Merge netsim mig to master (#17241)

* swarm: merged stream-tests migration to develop * swarm/network: expose simulation RandomUpNode to use in stream tests * swarm/network: wait for subs in PeerEvents and fix stream.runSyncTest * swarm: enforce waitkademlia for snapshot tests * swarm: fixed syncer tests and snapshot_sync_test * swarm: linting of simulation package * swarm: address review comments * swarm/network/stream: fix delivery_test bugs and refactor * swarm/network/stream: addressed PR comments @janos * swarm/network/stream: enforce waitKademlia, improve TestIntervals * swarm/network/stream: TestIntervals not waiting for chunk to be stored
2018-07-30 15:55:25 -05:00
parent 3ea8ac6a9a
commit d6efa69187
14 changed files with 1411 additions and 2479 deletions
--- a/swarm/network/stream/snapshot_retrieval_test.go
+++ b/swarm/network/stream/snapshot_retrieval_test.go
@@ -17,20 +17,19 @@ package stream

 import (
 	"context"
-	crand "crypto/rand"
 	"fmt"
-	"math/rand"
-	"strings"
+	"os"
 	"sync"
 	"testing"
 	"time"

-	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/node"
 	"github.com/ethereum/go-ethereum/p2p/discover"
-	"github.com/ethereum/go-ethereum/p2p/simulations"
+	"github.com/ethereum/go-ethereum/p2p/simulations/adapters"
 	"github.com/ethereum/go-ethereum/swarm/log"
 	"github.com/ethereum/go-ethereum/swarm/network"
-	streamTesting "github.com/ethereum/go-ethereum/swarm/network/stream/testing"
+	"github.com/ethereum/go-ethereum/swarm/network/simulation"
+	"github.com/ethereum/go-ethereum/swarm/state"
 	"github.com/ethereum/go-ethereum/swarm/storage"
 )

@@ -40,40 +39,6 @@ const (
 	maxFileSize = 40
 )

-func initRetrievalTest() {
-	//global func to get overlay address from discover ID
-	toAddr = func(id discover.NodeID) *network.BzzAddr {
-		addr := network.NewAddrFromNodeID(id)
-		return addr
-	}
-	//global func to create local store
-	createStoreFunc = createTestLocalStorageForId
-	//local stores
-	stores = make(map[discover.NodeID]storage.ChunkStore)
-	//data directories for each node and store
-	datadirs = make(map[discover.NodeID]string)
-	//deliveries for each node
-	deliveries = make(map[discover.NodeID]*Delivery)
-	//global retrieve func
-	getRetrieveFunc = func(id discover.NodeID) func(ctx context.Context, chunk *storage.Chunk) error {
-		return func(ctx context.Context, chunk *storage.Chunk) error {
-			skipCheck := true
-			return deliveries[id].RequestFromPeers(ctx, chunk.Addr[:], skipCheck)
-		}
-	}
-	//registries, map of discover.NodeID to its streamer
-	registries = make(map[discover.NodeID]*TestRegistry)
-	//not needed for this test but required from common_test for NewStreamService
-	waitPeerErrC = make(chan error)
-	//also not needed for this test but required for NewStreamService
-	peerCount = func(id discover.NodeID) int {
-		if ids[0] == id || ids[len(ids)-1] == id {
-			return 1
-		}
-		return 2
-	}
-}
-
 //This test is a retrieval test for nodes.
 //A configurable number of nodes can be
 //provided to the test.
@@ -81,7 +46,10 @@ func initRetrievalTest() {
 //Number of nodes can be provided via commandline too.
 func TestFileRetrieval(t *testing.T) {
 	if *nodes != 0 {
-		fileRetrievalTest(t, *nodes)
+		err := runFileRetrievalTest(*nodes)
+		if err != nil {
+			t.Fatal(err)
+		}
 	} else {
 		nodeCnt := []int{16}
 		//if the `longrunning` flag has been provided
@@ -90,7 +58,10 @@ func TestFileRetrieval(t *testing.T) {
 			nodeCnt = append(nodeCnt, 32, 64, 128)
 		}
 		for _, n := range nodeCnt {
-			fileRetrievalTest(t, n)
+			err := runFileRetrievalTest(n)
+			if err != nil {
+				t.Fatal(err)
+			}
 		}
 	}
 }
@@ -105,7 +76,10 @@ func TestRetrieval(t *testing.T) {
 	//if nodes/chunks have been provided via commandline,
 	//run the tests with these values
 	if *nodes != 0 && *chunks != 0 {
-		retrievalTest(t, *chunks, *nodes)
+		err := runRetrievalTest(*chunks, *nodes)
+		if err != nil {
+			t.Fatal(err)
+		}
 	} else {
 		var nodeCnt []int
 		var chnkCnt []int
@@ -121,76 +95,17 @@ func TestRetrieval(t *testing.T) {
 		}
 		for _, n := range nodeCnt {
 			for _, c := range chnkCnt {
-				retrievalTest(t, c, n)
+				err := runRetrievalTest(c, n)
+				if err != nil {
+					t.Fatal(err)
+				}
 			}
 		}
 	}
 }

-//Every test runs 3 times, a live, a history, and a live AND history
-func fileRetrievalTest(t *testing.T, nodeCount int) {
-	//test live and NO history
-	log.Info("Testing live and no history", "nodeCount", nodeCount)
-	live = true
-	history = false
-	err := runFileRetrievalTest(nodeCount)
-	if err != nil {
-		t.Fatal(err)
-	}
-	//test history only
-	log.Info("Testing history only", "nodeCount", nodeCount)
-	live = false
-	history = true
-	err = runFileRetrievalTest(nodeCount)
-	if err != nil {
-		t.Fatal(err)
-	}
-	//finally test live and history
-	log.Info("Testing live and history", "nodeCount", nodeCount)
-	live = true
-	err = runFileRetrievalTest(nodeCount)
-	if err != nil {
-		t.Fatal(err)
-	}
-}
-
-//Every test runs 3 times, a live, a history, and a live AND history
-func retrievalTest(t *testing.T, chunkCount int, nodeCount int) {
-	//test live and NO history
-	log.Info("Testing live and no history", "chunkCount", chunkCount, "nodeCount", nodeCount)
-	live = true
-	history = false
-	err := runRetrievalTest(chunkCount, nodeCount)
-	if err != nil {
-		t.Fatal(err)
-	}
-	//test history only
-	log.Info("Testing history only", "chunkCount", chunkCount, "nodeCount", nodeCount)
-	live = false
-	history = true
-	err = runRetrievalTest(chunkCount, nodeCount)
-	if err != nil {
-		t.Fatal(err)
-	}
-	//finally test live and history
-	log.Info("Testing live and history", "chunkCount", chunkCount, "nodeCount", nodeCount)
-	live = true
-	err = runRetrievalTest(chunkCount, nodeCount)
-	if err != nil {
-		t.Fatal(err)
-	}
-}
-
 /*

-The upload is done by dependency to the global
-`live` and `history` variables;
-
-If `live` is set, first stream subscriptions are established,
-then files are uploaded to nodes.
-
-If `history` is enabled, first upload files, then build up subscriptions.
-
 The test loads a snapshot file to construct the swarm network,
 assuming that the snapshot file identifies a healthy
 kademlia network. Nevertheless a health check runs in the
@@ -199,261 +114,129 @@ simulation's `action` function.
 The snapshot should have 'streamer' in its service list.
 */
 func runFileRetrievalTest(nodeCount int) error {
-	//for every run (live, history), int the variables
-	initRetrievalTest()
-	//the ids of the snapshot nodes, initiate only now as we need nodeCount
-	ids = make([]discover.NodeID, nodeCount)
-	//channel to check for disconnection errors
-	disconnectC := make(chan error)
-	//channel to close disconnection watcher routine
-	quitC := make(chan struct{})
-	//the test conf (using same as in `snapshot_sync_test`
-	conf = &synctestConfig{}
+	sim := simulation.New(map[string]simulation.ServiceFunc{
+		"streamer": func(ctx *adapters.ServiceContext, bucket *sync.Map) (s node.Service, cleanup func(), err error) {
+
+			id := ctx.Config.ID
+			addr := network.NewAddrFromNodeID(id)
+			store, datadir, err := createTestLocalStorageForID(id, addr)
+			if err != nil {
+				return nil, nil, err
+			}
+			bucket.Store(bucketKeyStore, store)
+			cleanup = func() {
+				os.RemoveAll(datadir)
+				store.Close()
+			}
+			localStore := store.(*storage.LocalStore)
+			db := storage.NewDBAPI(localStore)
+			kad := network.NewKademlia(addr.Over(), network.NewKadParams())
+			delivery := NewDelivery(kad, db)
+
+			r := NewRegistry(addr, delivery, db, state.NewInmemoryStore(), &RegistryOptions{
+				DoSync:          true,
+				SyncUpdateDelay: 3 * time.Second,
+			})
+
+			fileStore := storage.NewFileStore(storage.NewNetStore(localStore, nil), storage.NewFileStoreParams())
+			bucket.Store(bucketKeyFileStore, fileStore)
+
+			return r, cleanup, nil
+
+		},
+	})
+	defer sim.Close()
+
+	log.Info("Initializing test config")
+
+	conf := &synctestConfig{}
+	//map of discover ID to indexes of chunks expected at that ID
+	conf.idToChunksMap = make(map[discover.NodeID][]int)
 	//map of overlay address to discover ID
-	conf.addrToIdMap = make(map[string]discover.NodeID)
+	conf.addrToIDMap = make(map[string]discover.NodeID)
 	//array where the generated chunk hashes will be stored
 	conf.hashes = make([]storage.Address, 0)
-	//load nodes from the snapshot file
-	net, err := initNetWithSnapshot(nodeCount)
+
+	err := sim.UploadSnapshot(fmt.Sprintf("testing/snapshot_%d.json", nodeCount))
 	if err != nil {
 		return err
 	}
-	var rpcSubscriptionsWg sync.WaitGroup
-	//do cleanup after test is terminated
-	defer func() {
-		//shutdown the snapshot network
-		net.Shutdown()
-		//after the test, clean up local stores initialized with createLocalStoreForId
-		localStoreCleanup()
-		//finally clear all data directories
-		datadirsCleanup()
-	}()
-	//get the nodes of the network
-	nodes := net.GetNodes()
-	//iterate over all nodes...
-	for c := 0; c < len(nodes); c++ {
-		//create an array of discovery nodeIDS
-		ids[c] = nodes[c].ID()
-		a := network.ToOverlayAddr(ids[c].Bytes())
-		//append it to the array of all overlay addresses
-		conf.addrs = append(conf.addrs, a)
-		conf.addrToIdMap[string(a)] = ids[c]
-	}

-	//needed for healthy call
-	ppmap = network.NewPeerPotMap(testMinProxBinSize, conf.addrs)
+	ctx, cancelSimRun := context.WithTimeout(context.Background(), 1*time.Minute)
+	defer cancelSimRun()

-	//an array for the random files
-	var randomFiles []string
-	//channel to signal when the upload has finished
-	uploadFinished := make(chan struct{})
-	//channel to trigger new node checks
-	trigger := make(chan discover.NodeID)
-	//simulation action
-	action := func(ctx context.Context) error {
-		//first run the health check on all nodes,
-		//wait until nodes are all healthy
-		ticker := time.NewTicker(200 * time.Millisecond)
-		defer ticker.Stop()
-		for range ticker.C {
-			healthy := true
-			for _, id := range ids {
-				r := registries[id]
-				//PeerPot for this node
-				addr := common.Bytes2Hex(r.addr.OAddr)
-				pp := ppmap[addr]
-				//call Healthy RPC
-				h := r.delivery.overlay.Healthy(pp)
-				//print info
-				log.Debug(r.delivery.overlay.String())
-				log.Debug(fmt.Sprintf("IS HEALTHY: %t", h.GotNN && h.KnowNN && h.Full))
-				if !h.GotNN || !h.Full {
-					healthy = false
-					break
+	result := sim.Run(ctx, func(ctx context.Context, sim *simulation.Simulation) error {
+		nodeIDs := sim.UpNodeIDs()
+		for _, n := range nodeIDs {
+			//get the kademlia overlay address from this ID
+			a := network.ToOverlayAddr(n.Bytes())
+			//append it to the array of all overlay addresses
+			conf.addrs = append(conf.addrs, a)
+			//the proximity calculation is on overlay addr,
+			//the p2p/simulations check func triggers on discover.NodeID,
+			//so we need to know which overlay addr maps to which nodeID
+			conf.addrToIDMap[string(a)] = n
+		}
+
+		//an array for the random files
+		var randomFiles []string
+		//channel to signal when the upload has finished
+		//uploadFinished := make(chan struct{})
+		//channel to trigger new node checks
+
+		conf.hashes, randomFiles, err = uploadFilesToNodes(sim)
+		if err != nil {
+			return err
+		}
+		if _, err := sim.WaitTillHealthy(ctx, 2); err != nil {
+			return err
+		}
+
+		// File retrieval check is repeated until all uploaded files are retrieved from all nodes
+		// or until the timeout is reached.
+		allSuccess := false
+		for !allSuccess {
+			for _, id := range nodeIDs {
+				//for each expected chunk, check if it is in the local store
+				localChunks := conf.idToChunksMap[id]
+				localSuccess := true
+				for _, ch := range localChunks {
+					//get the real chunk by the index in the index array
+					chunk := conf.hashes[ch]
+					log.Trace(fmt.Sprintf("node has chunk: %s:", chunk))
+					//check if the expected chunk is indeed in the localstore
+					var err error
+					//check on the node's FileStore (netstore)
+					item, ok := sim.NodeItem(id, bucketKeyFileStore)
+					if !ok {
+						return fmt.Errorf("No registry")
+					}
+					fileStore := item.(*storage.FileStore)
+					//check all chunks
+					for i, hash := range conf.hashes {
+						reader, _ := fileStore.Retrieve(context.TODO(), hash)
+						//check that we can read the file size and that it corresponds to the generated file size
+						if s, err := reader.Size(ctx, nil); err != nil || s != int64(len(randomFiles[i])) {
+							allSuccess = false
+							log.Warn("Retrieve error", "err", err, "hash", hash, "nodeId", id)
+						} else {
+							log.Debug(fmt.Sprintf("File with root hash %x successfully retrieved", hash))
+						}
+					}
+					if err != nil {
+						log.Warn(fmt.Sprintf("Chunk %s NOT found for id %s", chunk, id))
+						localSuccess = false
+					} else {
+						log.Debug(fmt.Sprintf("Chunk %s IS FOUND for id %s", chunk, id))
+					}
 				}
-			}
-			if healthy {
-				break
+				allSuccess = localSuccess
 			}
 		}
-
-		if history {
-			log.Info("Uploading for history")
-			//If testing only history, we upload the chunk(s) first
-			conf.hashes, randomFiles, err = uploadFilesToNodes(nodes)
-			if err != nil {
-				return err
-			}
+		if !allSuccess {
+			return fmt.Errorf("Not all chunks succeeded!")
 		}
-
-		//variables needed to wait for all subscriptions established before uploading
-		errc := make(chan error)
-
-		//now setup and start event watching in order to know when we can upload
-		ctx, watchCancel := context.WithTimeout(context.Background(), MaxTimeout*time.Second)
-		defer watchCancel()
-
-		log.Info("Setting up stream subscription")
-		//We need two iterations, one to subscribe to the subscription events
-		//(so we know when setup phase is finished), and one to
-		//actually run the stream subscriptions. We can't do it in the same iteration,
-		//because while the first nodes in the loop are setting up subscriptions,
-		//the latter ones have not subscribed to listen to peer events yet,
-		//and then we miss events.
-
-		//first iteration: setup disconnection watcher and subscribe to peer events
-		for j, id := range ids {
-			log.Trace(fmt.Sprintf("Subscribe to subscription events: %d", j))
-			client, err := net.GetNode(id).Client()
-			if err != nil {
-				return err
-			}
-			wsDoneC := watchSubscriptionEvents(ctx, id, client, errc, quitC)
-			// doneC is nil, the error happened which is sent to errc channel, already
-			if wsDoneC == nil {
-				continue
-			}
-			rpcSubscriptionsWg.Add(1)
-			go func() {
-				<-wsDoneC
-				rpcSubscriptionsWg.Done()
-			}()
-
-			//watch for peers disconnecting
-			wdDoneC, err := streamTesting.WatchDisconnections(id, client, disconnectC, quitC)
-			if err != nil {
-				return err
-			}
-			rpcSubscriptionsWg.Add(1)
-			go func() {
-				<-wdDoneC
-				rpcSubscriptionsWg.Done()
-			}()
-		}
-
-		//second iteration: start syncing and setup stream subscriptions
-		for j, id := range ids {
-			log.Trace(fmt.Sprintf("Start syncing and stream subscriptions: %d", j))
-			client, err := net.GetNode(id).Client()
-			if err != nil {
-				return err
-			}
-			//start syncing!
-			var cnt int
-			err = client.CallContext(ctx, &cnt, "stream_startSyncing")
-			if err != nil {
-				return err
-			}
-			//increment the number of subscriptions we need to wait for
-			//by the count returned from startSyncing (SYNC subscriptions)
-			subscriptionCount += cnt
-			//now also add the number of RETRIEVAL_REQUEST subscriptions
-			for snid := range registries[id].peers {
-				subscriptionCount++
-				err = client.CallContext(ctx, nil, "stream_subscribeStream", snid, NewStream(swarmChunkServerStreamName, "", false), nil, Top)
-				if err != nil {
-					return err
-				}
-			}
-		}
-
-		//now wait until the number of expected subscriptions has been finished
-		//`watchSubscriptionEvents` will write with a `nil` value to errc
-		//every time a `SubscriptionMsg` has been received
-		for err := range errc {
-			if err != nil {
-				return err
-			}
-			//`nil` received, decrement count
-			subscriptionCount--
-			//all subscriptions received
-			if subscriptionCount == 0 {
-				break
-			}
-		}
-
-		log.Info("Stream subscriptions successfully requested, action terminated")
-
-		if live {
-			//upload generated files to nodes
-			var hashes []storage.Address
-			var rfiles []string
-			hashes, rfiles, err = uploadFilesToNodes(nodes)
-			if err != nil {
-				return err
-			}
-			conf.hashes = append(conf.hashes, hashes...)
-			randomFiles = append(randomFiles, rfiles...)
-			//signal to the trigger loop that the upload has finished
-			uploadFinished <- struct{}{}
-		}
-
 		return nil
-	}
-
-	//check defines what will be checked during the test
-	check := func(ctx context.Context, id discover.NodeID) (bool, error) {
-
-		select {
-		case <-ctx.Done():
-			return false, ctx.Err()
-		case e := <-disconnectC:
-			log.Error(e.Error())
-			return false, fmt.Errorf("Disconnect event detected, network unhealthy")
-		default:
-		}
-		log.Trace(fmt.Sprintf("Checking node: %s", id))
-		//if there are more than one chunk, test only succeeds if all expected chunks are found
-		allSuccess := true
-
-		//check on the node's FileStore (netstore)
-		fileStore := registries[id].fileStore
-		//check all chunks
-		for i, hash := range conf.hashes {
-			reader, _ := fileStore.Retrieve(context.TODO(), hash)
-			//check that we can read the file size and that it corresponds to the generated file size
-			if s, err := reader.Size(context.TODO(), nil); err != nil || s != int64(len(randomFiles[i])) {
-				allSuccess = false
-				log.Warn("Retrieve error", "err", err, "hash", hash, "nodeId", id)
-			} else {
-				log.Debug(fmt.Sprintf("File with root hash %x successfully retrieved", hash))
-			}
-		}
-
-		return allSuccess, nil
-	}
-
-	//for each tick, run the checks on all nodes
-	timingTicker := time.NewTicker(5 * time.Second)
-	defer timingTicker.Stop()
-	go func() {
-		//for live upload, we should wait for uploads to have finished
-		//before starting to trigger the checks, due to file size
-		if live {
-			<-uploadFinished
-		}
-		for range timingTicker.C {
-			for i := 0; i < len(ids); i++ {
-				log.Trace(fmt.Sprintf("triggering step %d, id %s", i, ids[i]))
-				trigger <- ids[i]
-			}
-		}
-	}()
-
-	log.Info("Starting simulation run...")
-
-	timeout := MaxTimeout * time.Second
-	ctx, cancel := context.WithTimeout(context.Background(), timeout)
-	defer cancel()
-
-	//run the simulation
-	result := simulations.NewSimulation(net).Run(ctx, &simulations.Step{
-		Action:  action,
-		Trigger: trigger,
-		Expect: &simulations.Expectation{
-			Nodes: ids,
-			Check: check,
-		},
 	})

 	if result.Error != nil {
@@ -466,14 +249,6 @@ func runFileRetrievalTest(nodeCount int) error {
 /*
 The test generates the given number of chunks.

-The upload is done by dependency to the global
-`live` and `history` variables;
-
-If `live` is set, first stream subscriptions are established, then
-upload to a random node.
-
-If `history` is enabled, first upload then build up subscriptions.
-
 The test loads a snapshot file to construct the swarm network,
 assuming that the snapshot file identifies a healthy
 kademlia network. Nevertheless a health check runs in the
@@ -482,259 +257,129 @@ simulation's `action` function.
 The snapshot should have 'streamer' in its service list.
 */
 func runRetrievalTest(chunkCount int, nodeCount int) error {
-	//for every run (live, history), int the variables
-	initRetrievalTest()
-	//the ids of the snapshot nodes, initiate only now as we need nodeCount
-	ids = make([]discover.NodeID, nodeCount)
-	//channel to check for disconnection errors
-	disconnectC := make(chan error)
-	//channel to close disconnection watcher routine
-	quitC := make(chan struct{})
-	//the test conf (using same as in `snapshot_sync_test`
-	conf = &synctestConfig{}
+	sim := simulation.New(map[string]simulation.ServiceFunc{
+		"streamer": func(ctx *adapters.ServiceContext, bucket *sync.Map) (s node.Service, cleanup func(), err error) {
+
+			id := ctx.Config.ID
+			addr := network.NewAddrFromNodeID(id)
+			store, datadir, err := createTestLocalStorageForID(id, addr)
+			if err != nil {
+				return nil, nil, err
+			}
+			bucket.Store(bucketKeyStore, store)
+			cleanup = func() {
+				os.RemoveAll(datadir)
+				store.Close()
+			}
+			localStore := store.(*storage.LocalStore)
+			db := storage.NewDBAPI(localStore)
+			kad := network.NewKademlia(addr.Over(), network.NewKadParams())
+			delivery := NewDelivery(kad, db)
+
+			r := NewRegistry(addr, delivery, db, state.NewInmemoryStore(), &RegistryOptions{
+				DoSync:          true,
+				SyncUpdateDelay: 0,
+			})
+
+			fileStore := storage.NewFileStore(storage.NewNetStore(localStore, nil), storage.NewFileStoreParams())
+			bucketKeyFileStore = simulation.BucketKey("filestore")
+			bucket.Store(bucketKeyFileStore, fileStore)
+
+			return r, cleanup, nil
+
+		},
+	})
+	defer sim.Close()
+
+	conf := &synctestConfig{}
+	//map of discover ID to indexes of chunks expected at that ID
+	conf.idToChunksMap = make(map[discover.NodeID][]int)
 	//map of overlay address to discover ID
-	conf.addrToIdMap = make(map[string]discover.NodeID)
+	conf.addrToIDMap = make(map[string]discover.NodeID)
 	//array where the generated chunk hashes will be stored
 	conf.hashes = make([]storage.Address, 0)
-	//load nodes from the snapshot file
-	net, err := initNetWithSnapshot(nodeCount)
+
+	err := sim.UploadSnapshot(fmt.Sprintf("testing/snapshot_%d.json", nodeCount))
 	if err != nil {
 		return err
 	}
-	var rpcSubscriptionsWg sync.WaitGroup
-	//do cleanup after test is terminated
-	defer func() {
-		//shutdown the snapshot network
-		net.Shutdown()
-		//after the test, clean up local stores initialized with createLocalStoreForId
-		localStoreCleanup()
-		//finally clear all data directories
-		datadirsCleanup()
-	}()
-	//get the nodes of the network
-	nodes := net.GetNodes()
-	//select one index at random...
-	idx := rand.Intn(len(nodes))
-	//...and get the the node at that index
-	//this is the node selected for upload
-	uploadNode := nodes[idx]
-	//iterate over all nodes...
-	for c := 0; c < len(nodes); c++ {
-		//create an array of discovery nodeIDS
-		ids[c] = nodes[c].ID()
-		a := network.ToOverlayAddr(ids[c].Bytes())
-		//append it to the array of all overlay addresses
-		conf.addrs = append(conf.addrs, a)
-		conf.addrToIdMap[string(a)] = ids[c]
-	}

-	//needed for healthy call
-	ppmap = network.NewPeerPotMap(testMinProxBinSize, conf.addrs)
+	ctx := context.Background()
+	result := sim.Run(ctx, func(ctx context.Context, sim *simulation.Simulation) error {
+		nodeIDs := sim.UpNodeIDs()
+		for _, n := range nodeIDs {
+			//get the kademlia overlay address from this ID
+			a := network.ToOverlayAddr(n.Bytes())
+			//append it to the array of all overlay addresses
+			conf.addrs = append(conf.addrs, a)
+			//the proximity calculation is on overlay addr,
+			//the p2p/simulations check func triggers on discover.NodeID,
+			//so we need to know which overlay addr maps to which nodeID
+			conf.addrToIDMap[string(a)] = n
+		}

-	trigger := make(chan discover.NodeID)
-	//simulation action
-	action := func(ctx context.Context) error {
-		//first run the health check on all nodes,
-		//wait until nodes are all healthy
-		ticker := time.NewTicker(200 * time.Millisecond)
-		defer ticker.Stop()
-		for range ticker.C {
-			healthy := true
-			for _, id := range ids {
-				r := registries[id]
-				//PeerPot for this node
-				addr := common.Bytes2Hex(network.ToOverlayAddr(id.Bytes()))
-				pp := ppmap[addr]
-				//call Healthy RPC
-				h := r.delivery.overlay.Healthy(pp)
-				//print info
-				log.Debug(r.delivery.overlay.String())
-				log.Debug(fmt.Sprintf("IS HEALTHY: %t", h.GotNN && h.KnowNN && h.Full))
-				if !h.GotNN || !h.Full {
-					healthy = false
-					break
+		//an array for the random files
+		var randomFiles []string
+		//this is the node selected for upload
+		node := sim.RandomUpNode()
+		item, ok := sim.NodeItem(node.ID, bucketKeyStore)
+		if !ok {
+			return fmt.Errorf("No localstore")
+		}
+		lstore := item.(*storage.LocalStore)
+		conf.hashes, err = uploadFileToSingleNodeStore(node.ID, chunkCount, lstore)
+		if err != nil {
+			return err
+		}
+		if _, err := sim.WaitTillHealthy(ctx, 2); err != nil {
+			return err
+		}
+
+		// File retrieval check is repeated until all uploaded files are retrieved from all nodes
+		// or until the timeout is reached.
+		allSuccess := false
+		for !allSuccess {
+			for _, id := range nodeIDs {
+				//for each expected chunk, check if it is in the local store
+				localChunks := conf.idToChunksMap[id]
+				localSuccess := true
+				for _, ch := range localChunks {
+					//get the real chunk by the index in the index array
+					chunk := conf.hashes[ch]
+					log.Trace(fmt.Sprintf("node has chunk: %s:", chunk))
+					//check if the expected chunk is indeed in the localstore
+					var err error
+					//check on the node's FileStore (netstore)
+					item, ok := sim.NodeItem(id, bucketKeyFileStore)
+					if !ok {
+						return fmt.Errorf("No registry")
+					}
+					fileStore := item.(*storage.FileStore)
+					//check all chunks
+					for i, hash := range conf.hashes {
+						reader, _ := fileStore.Retrieve(context.TODO(), hash)
+						//check that we can read the file size and that it corresponds to the generated file size
+						if s, err := reader.Size(ctx, nil); err != nil || s != int64(len(randomFiles[i])) {
+							allSuccess = false
+							log.Warn("Retrieve error", "err", err, "hash", hash, "nodeId", id)
+						} else {
+							log.Debug(fmt.Sprintf("File with root hash %x successfully retrieved", hash))
+						}
+					}
+					if err != nil {
+						log.Warn(fmt.Sprintf("Chunk %s NOT found for id %s", chunk, id))
+						localSuccess = false
+					} else {
+						log.Debug(fmt.Sprintf("Chunk %s IS FOUND for id %s", chunk, id))
+					}
 				}
-			}
-			if healthy {
-				break
+				allSuccess = localSuccess
 			}
 		}
-
-		if history {
-			log.Info("Uploading for history")
-			//If testing only history, we upload the chunk(s) first
-			conf.hashes, err = uploadFileToSingleNodeStore(uploadNode.ID(), chunkCount)
-			if err != nil {
-				return err
-			}
+		if !allSuccess {
+			return fmt.Errorf("Not all chunks succeeded!")
 		}
-
-		//variables needed to wait for all subscriptions established before uploading
-		errc := make(chan error)
-
-		//now setup and start event watching in order to know when we can upload
-		ctx, watchCancel := context.WithTimeout(context.Background(), MaxTimeout*time.Second)
-		defer watchCancel()
-
-		log.Info("Setting up stream subscription")
-		//We need two iterations, one to subscribe to the subscription events
-		//(so we know when setup phase is finished), and one to
-		//actually run the stream subscriptions. We can't do it in the same iteration,
-		//because while the first nodes in the loop are setting up subscriptions,
-		//the latter ones have not subscribed to listen to peer events yet,
-		//and then we miss events.
-
-		//first iteration: setup disconnection watcher and subscribe to peer events
-		for j, id := range ids {
-			log.Trace(fmt.Sprintf("Subscribe to subscription events: %d", j))
-			client, err := net.GetNode(id).Client()
-			if err != nil {
-				return err
-			}
-
-			//check for `SubscribeMsg` events to know when setup phase is complete
-			wsDoneC := watchSubscriptionEvents(ctx, id, client, errc, quitC)
-			// doneC is nil, the error happened which is sent to errc channel, already
-			if wsDoneC == nil {
-				continue
-			}
-			rpcSubscriptionsWg.Add(1)
-			go func() {
-				<-wsDoneC
-				rpcSubscriptionsWg.Done()
-			}()
-
-			//watch for peers disconnecting
-			wdDoneC, err := streamTesting.WatchDisconnections(id, client, disconnectC, quitC)
-			if err != nil {
-				return err
-			}
-			rpcSubscriptionsWg.Add(1)
-			go func() {
-				<-wdDoneC
-				rpcSubscriptionsWg.Done()
-			}()
-		}
-
-		//second iteration: start syncing and setup stream subscriptions
-		for j, id := range ids {
-			log.Trace(fmt.Sprintf("Start syncing and stream subscriptions: %d", j))
-			client, err := net.GetNode(id).Client()
-			if err != nil {
-				return err
-			}
-			//start syncing!
-			var cnt int
-			err = client.CallContext(ctx, &cnt, "stream_startSyncing")
-			if err != nil {
-				return err
-			}
-			//increment the number of subscriptions we need to wait for
-			//by the count returned from startSyncing (SYNC subscriptions)
-			subscriptionCount += cnt
-			//now also add the number of RETRIEVAL_REQUEST subscriptions
-			for snid := range registries[id].peers {
-				subscriptionCount++
-				err = client.CallContext(ctx, nil, "stream_subscribeStream", snid, NewStream(swarmChunkServerStreamName, "", false), nil, Top)
-				if err != nil {
-					return err
-				}
-			}
-		}
-
-		//now wait until the number of expected subscriptions has been finished
-		//`watchSubscriptionEvents` will write with a `nil` value to errc
-		//every time a `SubscriptionMsg` has been received
-		for err := range errc {
-			if err != nil {
-				return err
-			}
-			//`nil` received, decrement count
-			subscriptionCount--
-			//all subscriptions received
-			if subscriptionCount == 0 {
-				break
-			}
-		}
-
-		log.Info("Stream subscriptions successfully requested, action terminated")
-
-		if live {
-			//now upload the chunks to the selected random single node
-			chnks, err := uploadFileToSingleNodeStore(uploadNode.ID(), chunkCount)
-			if err != nil {
-				return err
-			}
-			conf.hashes = append(conf.hashes, chnks...)
-		}
-
 		return nil
-	}
-
-	chunkSize := storage.DefaultChunkSize
-
-	//check defines what will be checked during the test
-	check := func(ctx context.Context, id discover.NodeID) (bool, error) {
-
-		//don't check the uploader node
-		if id == uploadNode.ID() {
-			return true, nil
-		}
-
-		select {
-		case <-ctx.Done():
-			return false, ctx.Err()
-		case e := <-disconnectC:
-			log.Error(e.Error())
-			return false, fmt.Errorf("Disconnect event detected, network unhealthy")
-		default:
-		}
-		log.Trace(fmt.Sprintf("Checking node: %s", id))
-		//if there are more than one chunk, test only succeeds if all expected chunks are found
-		allSuccess := true
-
-		//check on the node's FileStore (netstore)
-		fileStore := registries[id].fileStore
-		//check all chunks
-		for _, chnk := range conf.hashes {
-			reader, _ := fileStore.Retrieve(context.TODO(), chnk)
-			//assuming that reading the Size of the chunk is enough to know we found it
-			if s, err := reader.Size(context.TODO(), nil); err != nil || s != chunkSize {
-				allSuccess = false
-				log.Warn("Retrieve error", "err", err, "chunk", chnk, "nodeId", id)
-			} else {
-				log.Debug(fmt.Sprintf("Chunk %x found", chnk))
-			}
-		}
-		return allSuccess, nil
-	}
-
-	//for each tick, run the checks on all nodes
-	timingTicker := time.NewTicker(5 * time.Second)
-	defer timingTicker.Stop()
-	go func() {
-		for range timingTicker.C {
-			for i := 0; i < len(ids); i++ {
-				log.Trace(fmt.Sprintf("triggering step %d, id %s", i, ids[i]))
-				trigger <- ids[i]
-			}
-		}
-	}()
-
-	log.Info("Starting simulation run...")
-
-	timeout := MaxTimeout * time.Second
-	ctx, cancel := context.WithTimeout(context.Background(), timeout)
-	defer cancel()
-
-	//run the simulation
-	result := simulations.NewSimulation(net).Run(ctx, &simulations.Step{
-		Action:  action,
-		Trigger: trigger,
-		Expect: &simulations.Expectation{
-			Nodes: ids,
-			Check: check,
-		},
 	})

 	if result.Error != nil {
@@ -743,53 +388,3 @@ func runRetrievalTest(chunkCount int, nodeCount int) error {

 	return nil
 }
-
-//upload generated files to nodes
-//every node gets one file uploaded
-func uploadFilesToNodes(nodes []*simulations.Node) ([]storage.Address, []string, error) {
-	nodeCnt := len(nodes)
-	log.Debug(fmt.Sprintf("Uploading %d files to nodes", nodeCnt))
-	//array holding generated files
-	rfiles := make([]string, nodeCnt)
-	//array holding the root hashes of the files
-	rootAddrs := make([]storage.Address, nodeCnt)
-
-	var err error
-	//for every node, generate a file and upload
-	for i, n := range nodes {
-		id := n.ID()
-		fileStore := registries[id].fileStore
-		//generate a file
-		rfiles[i], err = generateRandomFile()
-		if err != nil {
-			return nil, nil, err
-		}
-		//store it (upload it) on the FileStore
-		ctx := context.TODO()
-		rk, wait, err := fileStore.Store(ctx, strings.NewReader(rfiles[i]), int64(len(rfiles[i])), false)
-		log.Debug("Uploaded random string file to node")
-		if err != nil {
-			return nil, nil, err
-		}
-		err = wait(ctx)
-		if err != nil {
-			return nil, nil, err
-		}
-		rootAddrs[i] = rk
-	}
-	return rootAddrs, rfiles, nil
-}
-
-//generate a random file (string)
-func generateRandomFile() (string, error) {
-	//generate a random file size between minFileSize and maxFileSize
-	fileSize := rand.Intn(maxFileSize-minFileSize) + minFileSize
-	log.Debug(fmt.Sprintf("Generated file with filesize %d kB", fileSize))
-	b := make([]byte, fileSize*1024)
-	_, err := crand.Read(b)
-	if err != nil {
-		log.Error("Error generating random file.", "err", err)
-		return "", err
-	}
-	return string(b), nil
-}