Merge netsim mig to master (#17241)

* swarm: merged stream-tests migration to develop

* swarm/network: expose simulation RandomUpNode to use in stream tests

* swarm/network: wait for subs in PeerEvents and fix stream.runSyncTest

* swarm: enforce waitkademlia for snapshot tests

* swarm: fixed syncer tests and snapshot_sync_test

* swarm: linting of simulation package

* swarm: address review comments

* swarm/network/stream: fix delivery_test bugs and refactor

* swarm/network/stream: addressed PR comments @janos

* swarm/network/stream: enforce waitKademlia, improve TestIntervals

* swarm/network/stream: TestIntervals not waiting for chunk to be stored
This commit is contained in:
holisticode
2018-07-30 15:55:25 -05:00
committed by Balint Gabor
parent 3ea8ac6a9a
commit d6efa69187
14 changed files with 1411 additions and 2479 deletions

View File

@@ -17,20 +17,19 @@ package stream
import (
"context"
crand "crypto/rand"
"fmt"
"math/rand"
"strings"
"os"
"sync"
"testing"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/node"
"github.com/ethereum/go-ethereum/p2p/discover"
"github.com/ethereum/go-ethereum/p2p/simulations"
"github.com/ethereum/go-ethereum/p2p/simulations/adapters"
"github.com/ethereum/go-ethereum/swarm/log"
"github.com/ethereum/go-ethereum/swarm/network"
streamTesting "github.com/ethereum/go-ethereum/swarm/network/stream/testing"
"github.com/ethereum/go-ethereum/swarm/network/simulation"
"github.com/ethereum/go-ethereum/swarm/state"
"github.com/ethereum/go-ethereum/swarm/storage"
)
@@ -40,40 +39,6 @@ const (
maxFileSize = 40
)
func initRetrievalTest() {
//global func to get overlay address from discover ID
toAddr = func(id discover.NodeID) *network.BzzAddr {
addr := network.NewAddrFromNodeID(id)
return addr
}
//global func to create local store
createStoreFunc = createTestLocalStorageForId
//local stores
stores = make(map[discover.NodeID]storage.ChunkStore)
//data directories for each node and store
datadirs = make(map[discover.NodeID]string)
//deliveries for each node
deliveries = make(map[discover.NodeID]*Delivery)
//global retrieve func
getRetrieveFunc = func(id discover.NodeID) func(ctx context.Context, chunk *storage.Chunk) error {
return func(ctx context.Context, chunk *storage.Chunk) error {
skipCheck := true
return deliveries[id].RequestFromPeers(ctx, chunk.Addr[:], skipCheck)
}
}
//registries, map of discover.NodeID to its streamer
registries = make(map[discover.NodeID]*TestRegistry)
//not needed for this test but required from common_test for NewStreamService
waitPeerErrC = make(chan error)
//also not needed for this test but required for NewStreamService
peerCount = func(id discover.NodeID) int {
if ids[0] == id || ids[len(ids)-1] == id {
return 1
}
return 2
}
}
//This test is a retrieval test for nodes.
//A configurable number of nodes can be
//provided to the test.
@@ -81,7 +46,10 @@ func initRetrievalTest() {
//Number of nodes can be provided via commandline too.
func TestFileRetrieval(t *testing.T) {
if *nodes != 0 {
fileRetrievalTest(t, *nodes)
err := runFileRetrievalTest(*nodes)
if err != nil {
t.Fatal(err)
}
} else {
nodeCnt := []int{16}
//if the `longrunning` flag has been provided
@@ -90,7 +58,10 @@ func TestFileRetrieval(t *testing.T) {
nodeCnt = append(nodeCnt, 32, 64, 128)
}
for _, n := range nodeCnt {
fileRetrievalTest(t, n)
err := runFileRetrievalTest(n)
if err != nil {
t.Fatal(err)
}
}
}
}
@@ -105,7 +76,10 @@ func TestRetrieval(t *testing.T) {
//if nodes/chunks have been provided via commandline,
//run the tests with these values
if *nodes != 0 && *chunks != 0 {
retrievalTest(t, *chunks, *nodes)
err := runRetrievalTest(*chunks, *nodes)
if err != nil {
t.Fatal(err)
}
} else {
var nodeCnt []int
var chnkCnt []int
@@ -121,76 +95,17 @@ func TestRetrieval(t *testing.T) {
}
for _, n := range nodeCnt {
for _, c := range chnkCnt {
retrievalTest(t, c, n)
err := runRetrievalTest(c, n)
if err != nil {
t.Fatal(err)
}
}
}
}
}
//Every test runs 3 times, a live, a history, and a live AND history
func fileRetrievalTest(t *testing.T, nodeCount int) {
//test live and NO history
log.Info("Testing live and no history", "nodeCount", nodeCount)
live = true
history = false
err := runFileRetrievalTest(nodeCount)
if err != nil {
t.Fatal(err)
}
//test history only
log.Info("Testing history only", "nodeCount", nodeCount)
live = false
history = true
err = runFileRetrievalTest(nodeCount)
if err != nil {
t.Fatal(err)
}
//finally test live and history
log.Info("Testing live and history", "nodeCount", nodeCount)
live = true
err = runFileRetrievalTest(nodeCount)
if err != nil {
t.Fatal(err)
}
}
//Every test runs 3 times, a live, a history, and a live AND history
func retrievalTest(t *testing.T, chunkCount int, nodeCount int) {
//test live and NO history
log.Info("Testing live and no history", "chunkCount", chunkCount, "nodeCount", nodeCount)
live = true
history = false
err := runRetrievalTest(chunkCount, nodeCount)
if err != nil {
t.Fatal(err)
}
//test history only
log.Info("Testing history only", "chunkCount", chunkCount, "nodeCount", nodeCount)
live = false
history = true
err = runRetrievalTest(chunkCount, nodeCount)
if err != nil {
t.Fatal(err)
}
//finally test live and history
log.Info("Testing live and history", "chunkCount", chunkCount, "nodeCount", nodeCount)
live = true
err = runRetrievalTest(chunkCount, nodeCount)
if err != nil {
t.Fatal(err)
}
}
/*
The upload is done by dependency to the global
`live` and `history` variables;
If `live` is set, first stream subscriptions are established,
then files are uploaded to nodes.
If `history` is enabled, first upload files, then build up subscriptions.
The test loads a snapshot file to construct the swarm network,
assuming that the snapshot file identifies a healthy
kademlia network. Nevertheless a health check runs in the
@@ -199,261 +114,129 @@ simulation's `action` function.
The snapshot should have 'streamer' in its service list.
*/
func runFileRetrievalTest(nodeCount int) error {
//for every run (live, history), int the variables
initRetrievalTest()
//the ids of the snapshot nodes, initiate only now as we need nodeCount
ids = make([]discover.NodeID, nodeCount)
//channel to check for disconnection errors
disconnectC := make(chan error)
//channel to close disconnection watcher routine
quitC := make(chan struct{})
//the test conf (using same as in `snapshot_sync_test`
conf = &synctestConfig{}
sim := simulation.New(map[string]simulation.ServiceFunc{
"streamer": func(ctx *adapters.ServiceContext, bucket *sync.Map) (s node.Service, cleanup func(), err error) {
id := ctx.Config.ID
addr := network.NewAddrFromNodeID(id)
store, datadir, err := createTestLocalStorageForID(id, addr)
if err != nil {
return nil, nil, err
}
bucket.Store(bucketKeyStore, store)
cleanup = func() {
os.RemoveAll(datadir)
store.Close()
}
localStore := store.(*storage.LocalStore)
db := storage.NewDBAPI(localStore)
kad := network.NewKademlia(addr.Over(), network.NewKadParams())
delivery := NewDelivery(kad, db)
r := NewRegistry(addr, delivery, db, state.NewInmemoryStore(), &RegistryOptions{
DoSync: true,
SyncUpdateDelay: 3 * time.Second,
})
fileStore := storage.NewFileStore(storage.NewNetStore(localStore, nil), storage.NewFileStoreParams())
bucket.Store(bucketKeyFileStore, fileStore)
return r, cleanup, nil
},
})
defer sim.Close()
log.Info("Initializing test config")
conf := &synctestConfig{}
//map of discover ID to indexes of chunks expected at that ID
conf.idToChunksMap = make(map[discover.NodeID][]int)
//map of overlay address to discover ID
conf.addrToIdMap = make(map[string]discover.NodeID)
conf.addrToIDMap = make(map[string]discover.NodeID)
//array where the generated chunk hashes will be stored
conf.hashes = make([]storage.Address, 0)
//load nodes from the snapshot file
net, err := initNetWithSnapshot(nodeCount)
err := sim.UploadSnapshot(fmt.Sprintf("testing/snapshot_%d.json", nodeCount))
if err != nil {
return err
}
var rpcSubscriptionsWg sync.WaitGroup
//do cleanup after test is terminated
defer func() {
//shutdown the snapshot network
net.Shutdown()
//after the test, clean up local stores initialized with createLocalStoreForId
localStoreCleanup()
//finally clear all data directories
datadirsCleanup()
}()
//get the nodes of the network
nodes := net.GetNodes()
//iterate over all nodes...
for c := 0; c < len(nodes); c++ {
//create an array of discovery nodeIDS
ids[c] = nodes[c].ID()
a := network.ToOverlayAddr(ids[c].Bytes())
//append it to the array of all overlay addresses
conf.addrs = append(conf.addrs, a)
conf.addrToIdMap[string(a)] = ids[c]
}
//needed for healthy call
ppmap = network.NewPeerPotMap(testMinProxBinSize, conf.addrs)
ctx, cancelSimRun := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancelSimRun()
//an array for the random files
var randomFiles []string
//channel to signal when the upload has finished
uploadFinished := make(chan struct{})
//channel to trigger new node checks
trigger := make(chan discover.NodeID)
//simulation action
action := func(ctx context.Context) error {
//first run the health check on all nodes,
//wait until nodes are all healthy
ticker := time.NewTicker(200 * time.Millisecond)
defer ticker.Stop()
for range ticker.C {
healthy := true
for _, id := range ids {
r := registries[id]
//PeerPot for this node
addr := common.Bytes2Hex(r.addr.OAddr)
pp := ppmap[addr]
//call Healthy RPC
h := r.delivery.overlay.Healthy(pp)
//print info
log.Debug(r.delivery.overlay.String())
log.Debug(fmt.Sprintf("IS HEALTHY: %t", h.GotNN && h.KnowNN && h.Full))
if !h.GotNN || !h.Full {
healthy = false
break
result := sim.Run(ctx, func(ctx context.Context, sim *simulation.Simulation) error {
nodeIDs := sim.UpNodeIDs()
for _, n := range nodeIDs {
//get the kademlia overlay address from this ID
a := network.ToOverlayAddr(n.Bytes())
//append it to the array of all overlay addresses
conf.addrs = append(conf.addrs, a)
//the proximity calculation is on overlay addr,
//the p2p/simulations check func triggers on discover.NodeID,
//so we need to know which overlay addr maps to which nodeID
conf.addrToIDMap[string(a)] = n
}
//an array for the random files
var randomFiles []string
//channel to signal when the upload has finished
//uploadFinished := make(chan struct{})
//channel to trigger new node checks
conf.hashes, randomFiles, err = uploadFilesToNodes(sim)
if err != nil {
return err
}
if _, err := sim.WaitTillHealthy(ctx, 2); err != nil {
return err
}
// File retrieval check is repeated until all uploaded files are retrieved from all nodes
// or until the timeout is reached.
allSuccess := false
for !allSuccess {
for _, id := range nodeIDs {
//for each expected chunk, check if it is in the local store
localChunks := conf.idToChunksMap[id]
localSuccess := true
for _, ch := range localChunks {
//get the real chunk by the index in the index array
chunk := conf.hashes[ch]
log.Trace(fmt.Sprintf("node has chunk: %s:", chunk))
//check if the expected chunk is indeed in the localstore
var err error
//check on the node's FileStore (netstore)
item, ok := sim.NodeItem(id, bucketKeyFileStore)
if !ok {
return fmt.Errorf("No registry")
}
fileStore := item.(*storage.FileStore)
//check all chunks
for i, hash := range conf.hashes {
reader, _ := fileStore.Retrieve(context.TODO(), hash)
//check that we can read the file size and that it corresponds to the generated file size
if s, err := reader.Size(ctx, nil); err != nil || s != int64(len(randomFiles[i])) {
allSuccess = false
log.Warn("Retrieve error", "err", err, "hash", hash, "nodeId", id)
} else {
log.Debug(fmt.Sprintf("File with root hash %x successfully retrieved", hash))
}
}
if err != nil {
log.Warn(fmt.Sprintf("Chunk %s NOT found for id %s", chunk, id))
localSuccess = false
} else {
log.Debug(fmt.Sprintf("Chunk %s IS FOUND for id %s", chunk, id))
}
}
}
if healthy {
break
allSuccess = localSuccess
}
}
if history {
log.Info("Uploading for history")
//If testing only history, we upload the chunk(s) first
conf.hashes, randomFiles, err = uploadFilesToNodes(nodes)
if err != nil {
return err
}
if !allSuccess {
return fmt.Errorf("Not all chunks succeeded!")
}
//variables needed to wait for all subscriptions established before uploading
errc := make(chan error)
//now setup and start event watching in order to know when we can upload
ctx, watchCancel := context.WithTimeout(context.Background(), MaxTimeout*time.Second)
defer watchCancel()
log.Info("Setting up stream subscription")
//We need two iterations, one to subscribe to the subscription events
//(so we know when setup phase is finished), and one to
//actually run the stream subscriptions. We can't do it in the same iteration,
//because while the first nodes in the loop are setting up subscriptions,
//the latter ones have not subscribed to listen to peer events yet,
//and then we miss events.
//first iteration: setup disconnection watcher and subscribe to peer events
for j, id := range ids {
log.Trace(fmt.Sprintf("Subscribe to subscription events: %d", j))
client, err := net.GetNode(id).Client()
if err != nil {
return err
}
wsDoneC := watchSubscriptionEvents(ctx, id, client, errc, quitC)
// doneC is nil, the error happened which is sent to errc channel, already
if wsDoneC == nil {
continue
}
rpcSubscriptionsWg.Add(1)
go func() {
<-wsDoneC
rpcSubscriptionsWg.Done()
}()
//watch for peers disconnecting
wdDoneC, err := streamTesting.WatchDisconnections(id, client, disconnectC, quitC)
if err != nil {
return err
}
rpcSubscriptionsWg.Add(1)
go func() {
<-wdDoneC
rpcSubscriptionsWg.Done()
}()
}
//second iteration: start syncing and setup stream subscriptions
for j, id := range ids {
log.Trace(fmt.Sprintf("Start syncing and stream subscriptions: %d", j))
client, err := net.GetNode(id).Client()
if err != nil {
return err
}
//start syncing!
var cnt int
err = client.CallContext(ctx, &cnt, "stream_startSyncing")
if err != nil {
return err
}
//increment the number of subscriptions we need to wait for
//by the count returned from startSyncing (SYNC subscriptions)
subscriptionCount += cnt
//now also add the number of RETRIEVAL_REQUEST subscriptions
for snid := range registries[id].peers {
subscriptionCount++
err = client.CallContext(ctx, nil, "stream_subscribeStream", snid, NewStream(swarmChunkServerStreamName, "", false), nil, Top)
if err != nil {
return err
}
}
}
//now wait until the number of expected subscriptions has been finished
//`watchSubscriptionEvents` will write with a `nil` value to errc
//every time a `SubscriptionMsg` has been received
for err := range errc {
if err != nil {
return err
}
//`nil` received, decrement count
subscriptionCount--
//all subscriptions received
if subscriptionCount == 0 {
break
}
}
log.Info("Stream subscriptions successfully requested, action terminated")
if live {
//upload generated files to nodes
var hashes []storage.Address
var rfiles []string
hashes, rfiles, err = uploadFilesToNodes(nodes)
if err != nil {
return err
}
conf.hashes = append(conf.hashes, hashes...)
randomFiles = append(randomFiles, rfiles...)
//signal to the trigger loop that the upload has finished
uploadFinished <- struct{}{}
}
return nil
}
//check defines what will be checked during the test
check := func(ctx context.Context, id discover.NodeID) (bool, error) {
select {
case <-ctx.Done():
return false, ctx.Err()
case e := <-disconnectC:
log.Error(e.Error())
return false, fmt.Errorf("Disconnect event detected, network unhealthy")
default:
}
log.Trace(fmt.Sprintf("Checking node: %s", id))
//if there are more than one chunk, test only succeeds if all expected chunks are found
allSuccess := true
//check on the node's FileStore (netstore)
fileStore := registries[id].fileStore
//check all chunks
for i, hash := range conf.hashes {
reader, _ := fileStore.Retrieve(context.TODO(), hash)
//check that we can read the file size and that it corresponds to the generated file size
if s, err := reader.Size(context.TODO(), nil); err != nil || s != int64(len(randomFiles[i])) {
allSuccess = false
log.Warn("Retrieve error", "err", err, "hash", hash, "nodeId", id)
} else {
log.Debug(fmt.Sprintf("File with root hash %x successfully retrieved", hash))
}
}
return allSuccess, nil
}
//for each tick, run the checks on all nodes
timingTicker := time.NewTicker(5 * time.Second)
defer timingTicker.Stop()
go func() {
//for live upload, we should wait for uploads to have finished
//before starting to trigger the checks, due to file size
if live {
<-uploadFinished
}
for range timingTicker.C {
for i := 0; i < len(ids); i++ {
log.Trace(fmt.Sprintf("triggering step %d, id %s", i, ids[i]))
trigger <- ids[i]
}
}
}()
log.Info("Starting simulation run...")
timeout := MaxTimeout * time.Second
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
//run the simulation
result := simulations.NewSimulation(net).Run(ctx, &simulations.Step{
Action: action,
Trigger: trigger,
Expect: &simulations.Expectation{
Nodes: ids,
Check: check,
},
})
if result.Error != nil {
@@ -466,14 +249,6 @@ func runFileRetrievalTest(nodeCount int) error {
/*
The test generates the given number of chunks.
The upload is done by dependency to the global
`live` and `history` variables;
If `live` is set, first stream subscriptions are established, then
upload to a random node.
If `history` is enabled, first upload then build up subscriptions.
The test loads a snapshot file to construct the swarm network,
assuming that the snapshot file identifies a healthy
kademlia network. Nevertheless a health check runs in the
@@ -482,259 +257,129 @@ simulation's `action` function.
The snapshot should have 'streamer' in its service list.
*/
func runRetrievalTest(chunkCount int, nodeCount int) error {
//for every run (live, history), int the variables
initRetrievalTest()
//the ids of the snapshot nodes, initiate only now as we need nodeCount
ids = make([]discover.NodeID, nodeCount)
//channel to check for disconnection errors
disconnectC := make(chan error)
//channel to close disconnection watcher routine
quitC := make(chan struct{})
//the test conf (using same as in `snapshot_sync_test`
conf = &synctestConfig{}
sim := simulation.New(map[string]simulation.ServiceFunc{
"streamer": func(ctx *adapters.ServiceContext, bucket *sync.Map) (s node.Service, cleanup func(), err error) {
id := ctx.Config.ID
addr := network.NewAddrFromNodeID(id)
store, datadir, err := createTestLocalStorageForID(id, addr)
if err != nil {
return nil, nil, err
}
bucket.Store(bucketKeyStore, store)
cleanup = func() {
os.RemoveAll(datadir)
store.Close()
}
localStore := store.(*storage.LocalStore)
db := storage.NewDBAPI(localStore)
kad := network.NewKademlia(addr.Over(), network.NewKadParams())
delivery := NewDelivery(kad, db)
r := NewRegistry(addr, delivery, db, state.NewInmemoryStore(), &RegistryOptions{
DoSync: true,
SyncUpdateDelay: 0,
})
fileStore := storage.NewFileStore(storage.NewNetStore(localStore, nil), storage.NewFileStoreParams())
bucketKeyFileStore = simulation.BucketKey("filestore")
bucket.Store(bucketKeyFileStore, fileStore)
return r, cleanup, nil
},
})
defer sim.Close()
conf := &synctestConfig{}
//map of discover ID to indexes of chunks expected at that ID
conf.idToChunksMap = make(map[discover.NodeID][]int)
//map of overlay address to discover ID
conf.addrToIdMap = make(map[string]discover.NodeID)
conf.addrToIDMap = make(map[string]discover.NodeID)
//array where the generated chunk hashes will be stored
conf.hashes = make([]storage.Address, 0)
//load nodes from the snapshot file
net, err := initNetWithSnapshot(nodeCount)
err := sim.UploadSnapshot(fmt.Sprintf("testing/snapshot_%d.json", nodeCount))
if err != nil {
return err
}
var rpcSubscriptionsWg sync.WaitGroup
//do cleanup after test is terminated
defer func() {
//shutdown the snapshot network
net.Shutdown()
//after the test, clean up local stores initialized with createLocalStoreForId
localStoreCleanup()
//finally clear all data directories
datadirsCleanup()
}()
//get the nodes of the network
nodes := net.GetNodes()
//select one index at random...
idx := rand.Intn(len(nodes))
//...and get the the node at that index
//this is the node selected for upload
uploadNode := nodes[idx]
//iterate over all nodes...
for c := 0; c < len(nodes); c++ {
//create an array of discovery nodeIDS
ids[c] = nodes[c].ID()
a := network.ToOverlayAddr(ids[c].Bytes())
//append it to the array of all overlay addresses
conf.addrs = append(conf.addrs, a)
conf.addrToIdMap[string(a)] = ids[c]
}
//needed for healthy call
ppmap = network.NewPeerPotMap(testMinProxBinSize, conf.addrs)
ctx := context.Background()
result := sim.Run(ctx, func(ctx context.Context, sim *simulation.Simulation) error {
nodeIDs := sim.UpNodeIDs()
for _, n := range nodeIDs {
//get the kademlia overlay address from this ID
a := network.ToOverlayAddr(n.Bytes())
//append it to the array of all overlay addresses
conf.addrs = append(conf.addrs, a)
//the proximity calculation is on overlay addr,
//the p2p/simulations check func triggers on discover.NodeID,
//so we need to know which overlay addr maps to which nodeID
conf.addrToIDMap[string(a)] = n
}
trigger := make(chan discover.NodeID)
//simulation action
action := func(ctx context.Context) error {
//first run the health check on all nodes,
//wait until nodes are all healthy
ticker := time.NewTicker(200 * time.Millisecond)
defer ticker.Stop()
for range ticker.C {
healthy := true
for _, id := range ids {
r := registries[id]
//PeerPot for this node
addr := common.Bytes2Hex(network.ToOverlayAddr(id.Bytes()))
pp := ppmap[addr]
//call Healthy RPC
h := r.delivery.overlay.Healthy(pp)
//print info
log.Debug(r.delivery.overlay.String())
log.Debug(fmt.Sprintf("IS HEALTHY: %t", h.GotNN && h.KnowNN && h.Full))
if !h.GotNN || !h.Full {
healthy = false
break
//an array for the random files
var randomFiles []string
//this is the node selected for upload
node := sim.RandomUpNode()
item, ok := sim.NodeItem(node.ID, bucketKeyStore)
if !ok {
return fmt.Errorf("No localstore")
}
lstore := item.(*storage.LocalStore)
conf.hashes, err = uploadFileToSingleNodeStore(node.ID, chunkCount, lstore)
if err != nil {
return err
}
if _, err := sim.WaitTillHealthy(ctx, 2); err != nil {
return err
}
// File retrieval check is repeated until all uploaded files are retrieved from all nodes
// or until the timeout is reached.
allSuccess := false
for !allSuccess {
for _, id := range nodeIDs {
//for each expected chunk, check if it is in the local store
localChunks := conf.idToChunksMap[id]
localSuccess := true
for _, ch := range localChunks {
//get the real chunk by the index in the index array
chunk := conf.hashes[ch]
log.Trace(fmt.Sprintf("node has chunk: %s:", chunk))
//check if the expected chunk is indeed in the localstore
var err error
//check on the node's FileStore (netstore)
item, ok := sim.NodeItem(id, bucketKeyFileStore)
if !ok {
return fmt.Errorf("No registry")
}
fileStore := item.(*storage.FileStore)
//check all chunks
for i, hash := range conf.hashes {
reader, _ := fileStore.Retrieve(context.TODO(), hash)
//check that we can read the file size and that it corresponds to the generated file size
if s, err := reader.Size(ctx, nil); err != nil || s != int64(len(randomFiles[i])) {
allSuccess = false
log.Warn("Retrieve error", "err", err, "hash", hash, "nodeId", id)
} else {
log.Debug(fmt.Sprintf("File with root hash %x successfully retrieved", hash))
}
}
if err != nil {
log.Warn(fmt.Sprintf("Chunk %s NOT found for id %s", chunk, id))
localSuccess = false
} else {
log.Debug(fmt.Sprintf("Chunk %s IS FOUND for id %s", chunk, id))
}
}
}
if healthy {
break
allSuccess = localSuccess
}
}
if history {
log.Info("Uploading for history")
//If testing only history, we upload the chunk(s) first
conf.hashes, err = uploadFileToSingleNodeStore(uploadNode.ID(), chunkCount)
if err != nil {
return err
}
if !allSuccess {
return fmt.Errorf("Not all chunks succeeded!")
}
//variables needed to wait for all subscriptions established before uploading
errc := make(chan error)
//now setup and start event watching in order to know when we can upload
ctx, watchCancel := context.WithTimeout(context.Background(), MaxTimeout*time.Second)
defer watchCancel()
log.Info("Setting up stream subscription")
//We need two iterations, one to subscribe to the subscription events
//(so we know when setup phase is finished), and one to
//actually run the stream subscriptions. We can't do it in the same iteration,
//because while the first nodes in the loop are setting up subscriptions,
//the latter ones have not subscribed to listen to peer events yet,
//and then we miss events.
//first iteration: setup disconnection watcher and subscribe to peer events
for j, id := range ids {
log.Trace(fmt.Sprintf("Subscribe to subscription events: %d", j))
client, err := net.GetNode(id).Client()
if err != nil {
return err
}
//check for `SubscribeMsg` events to know when setup phase is complete
wsDoneC := watchSubscriptionEvents(ctx, id, client, errc, quitC)
// doneC is nil, the error happened which is sent to errc channel, already
if wsDoneC == nil {
continue
}
rpcSubscriptionsWg.Add(1)
go func() {
<-wsDoneC
rpcSubscriptionsWg.Done()
}()
//watch for peers disconnecting
wdDoneC, err := streamTesting.WatchDisconnections(id, client, disconnectC, quitC)
if err != nil {
return err
}
rpcSubscriptionsWg.Add(1)
go func() {
<-wdDoneC
rpcSubscriptionsWg.Done()
}()
}
//second iteration: start syncing and setup stream subscriptions
for j, id := range ids {
log.Trace(fmt.Sprintf("Start syncing and stream subscriptions: %d", j))
client, err := net.GetNode(id).Client()
if err != nil {
return err
}
//start syncing!
var cnt int
err = client.CallContext(ctx, &cnt, "stream_startSyncing")
if err != nil {
return err
}
//increment the number of subscriptions we need to wait for
//by the count returned from startSyncing (SYNC subscriptions)
subscriptionCount += cnt
//now also add the number of RETRIEVAL_REQUEST subscriptions
for snid := range registries[id].peers {
subscriptionCount++
err = client.CallContext(ctx, nil, "stream_subscribeStream", snid, NewStream(swarmChunkServerStreamName, "", false), nil, Top)
if err != nil {
return err
}
}
}
//now wait until the number of expected subscriptions has been finished
//`watchSubscriptionEvents` will write with a `nil` value to errc
//every time a `SubscriptionMsg` has been received
for err := range errc {
if err != nil {
return err
}
//`nil` received, decrement count
subscriptionCount--
//all subscriptions received
if subscriptionCount == 0 {
break
}
}
log.Info("Stream subscriptions successfully requested, action terminated")
if live {
//now upload the chunks to the selected random single node
chnks, err := uploadFileToSingleNodeStore(uploadNode.ID(), chunkCount)
if err != nil {
return err
}
conf.hashes = append(conf.hashes, chnks...)
}
return nil
}
chunkSize := storage.DefaultChunkSize
//check defines what will be checked during the test
check := func(ctx context.Context, id discover.NodeID) (bool, error) {
//don't check the uploader node
if id == uploadNode.ID() {
return true, nil
}
select {
case <-ctx.Done():
return false, ctx.Err()
case e := <-disconnectC:
log.Error(e.Error())
return false, fmt.Errorf("Disconnect event detected, network unhealthy")
default:
}
log.Trace(fmt.Sprintf("Checking node: %s", id))
//if there are more than one chunk, test only succeeds if all expected chunks are found
allSuccess := true
//check on the node's FileStore (netstore)
fileStore := registries[id].fileStore
//check all chunks
for _, chnk := range conf.hashes {
reader, _ := fileStore.Retrieve(context.TODO(), chnk)
//assuming that reading the Size of the chunk is enough to know we found it
if s, err := reader.Size(context.TODO(), nil); err != nil || s != chunkSize {
allSuccess = false
log.Warn("Retrieve error", "err", err, "chunk", chnk, "nodeId", id)
} else {
log.Debug(fmt.Sprintf("Chunk %x found", chnk))
}
}
return allSuccess, nil
}
//for each tick, run the checks on all nodes
timingTicker := time.NewTicker(5 * time.Second)
defer timingTicker.Stop()
go func() {
for range timingTicker.C {
for i := 0; i < len(ids); i++ {
log.Trace(fmt.Sprintf("triggering step %d, id %s", i, ids[i]))
trigger <- ids[i]
}
}
}()
log.Info("Starting simulation run...")
timeout := MaxTimeout * time.Second
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
//run the simulation
result := simulations.NewSimulation(net).Run(ctx, &simulations.Step{
Action: action,
Trigger: trigger,
Expect: &simulations.Expectation{
Nodes: ids,
Check: check,
},
})
if result.Error != nil {
@@ -743,53 +388,3 @@ func runRetrievalTest(chunkCount int, nodeCount int) error {
return nil
}
//upload generated files to nodes
//every node gets one file uploaded
func uploadFilesToNodes(nodes []*simulations.Node) ([]storage.Address, []string, error) {
nodeCnt := len(nodes)
log.Debug(fmt.Sprintf("Uploading %d files to nodes", nodeCnt))
//array holding generated files
rfiles := make([]string, nodeCnt)
//array holding the root hashes of the files
rootAddrs := make([]storage.Address, nodeCnt)
var err error
//for every node, generate a file and upload
for i, n := range nodes {
id := n.ID()
fileStore := registries[id].fileStore
//generate a file
rfiles[i], err = generateRandomFile()
if err != nil {
return nil, nil, err
}
//store it (upload it) on the FileStore
ctx := context.TODO()
rk, wait, err := fileStore.Store(ctx, strings.NewReader(rfiles[i]), int64(len(rfiles[i])), false)
log.Debug("Uploaded random string file to node")
if err != nil {
return nil, nil, err
}
err = wait(ctx)
if err != nil {
return nil, nil, err
}
rootAddrs[i] = rk
}
return rootAddrs, rfiles, nil
}
//generate a random file (string)
func generateRandomFile() (string, error) {
//generate a random file size between minFileSize and maxFileSize
fileSize := rand.Intn(maxFileSize-minFileSize) + minFileSize
log.Debug(fmt.Sprintf("Generated file with filesize %d kB", fileSize))
b := make([]byte, fileSize*1024)
_, err := crand.Read(b)
if err != nil {
log.Error("Error generating random file.", "err", err)
return "", err
}
return string(b), nil
}