swarm: fix network/stream data races (#19051)

* swarm/network/stream: newStreamerTester cleanup only if err is nil

* swarm/network/stream: raise newStreamerTester waitForPeers timeout

* swarm/network/stream: fix data races in GetPeerSubscriptions

* swarm/storage: prevent data race on LDBStore.batchesC

https://github.com/ethersphere/go-ethereum/issues/1198#issuecomment-461775049

* swarm/network/stream: fix TestGetSubscriptionsRPC data race

https://github.com/ethersphere/go-ethereum/issues/1198#issuecomment-461768477

* swarm/network/stream: correctly use Simulation.Run callback

https://github.com/ethersphere/go-ethereum/issues/1198#issuecomment-461783804

* swarm/network: protect addrCountC in Kademlia.AddrCountC function

https://github.com/ethersphere/go-ethereum/issues/1198#issuecomment-462273444

* p2p/simulations: fix a deadlock calling getRandomNode with lock

https://github.com/ethersphere/go-ethereum/issues/1198#issuecomment-462317407

* swarm/network/stream: terminate disconnect goruotines in tests

* swarm/network/stream: reduce memory consumption when testing data races

* swarm/network/stream: add watchDisconnections helper function

* swarm/network/stream: add concurrent counter for tests

* swarm/network/stream: rename race/norace test files and use const

* swarm/network/stream: remove watchSim and its panic

* swarm/network/stream: pass context in watchDisconnections

* swarm/network/stream: add concurrent safe bool for watchDisconnections

* swarm/storage: fix LDBStore.batchesC data race by not closing it
This commit is contained in:
Janoš Guljaš
2019-02-13 13:03:23 +01:00
committed by Viktor Trón
parent d596bea2d5
commit 3fd6db2bf6
14 changed files with 274 additions and 197 deletions

View File

@ -17,11 +17,12 @@ package stream
import (
"context"
"errors"
"fmt"
"io/ioutil"
"os"
"runtime"
"sync"
"sync/atomic"
"testing"
"time"
@ -92,6 +93,15 @@ func TestSyncingViaGlobalSync(t *testing.T) {
if *longrunning {
chnkCnt = []int{1, 8, 32, 256, 1024}
nodeCnt = []int{16, 32, 64, 128, 256}
} else if raceTest {
// TestSyncingViaGlobalSync allocates a lot of memory
// with race detector. By reducing the number of chunks
// and nodes, memory consumption is lower and data races
// are still checked, while correctness of syncing is
// tested with more chunks and nodes in regular (!race)
// tests.
chnkCnt = []int{4}
nodeCnt = []int{16}
} else {
//default test
chnkCnt = []int{4, 32}
@ -113,7 +123,23 @@ var simServiceMap = map[string]simulation.ServiceFunc{
return nil, nil, err
}
r := NewRegistry(addr.ID(), delivery, netStore, state.NewInmemoryStore(), &RegistryOptions{
var dir string
var store *state.DBStore
if raceTest {
// Use on-disk DBStore to reduce memory consumption in race tests.
dir, err = ioutil.TempDir("", "swarm-stream-")
if err != nil {
return nil, nil, err
}
store, err = state.NewDBStore(dir)
if err != nil {
return nil, nil, err
}
} else {
store = state.NewInmemoryStore()
}
r := NewRegistry(addr.ID(), delivery, netStore, store, &RegistryOptions{
Retrieval: RetrievalDisabled,
Syncing: SyncingAutoSubscribe,
SyncUpdateDelay: 3 * time.Second,
@ -156,36 +182,24 @@ func testSyncingViaGlobalSync(t *testing.T, chunkCount int, nodeCount int) {
t.Fatal(err)
}
disconnections := sim.PeerEvents(
context.Background(),
sim.NodeIDs(),
simulation.NewPeerEventsFilter().Drop(),
)
var disconnected atomic.Value
go func() {
for d := range disconnections {
if d.Error != nil {
log.Error("peer drop", "node", d.NodeID, "peer", d.PeerID)
disconnected.Store(true)
}
}
}()
result := runSim(conf, ctx, sim, chunkCount)
if result.Error != nil {
t.Fatal(result.Error)
}
if yes, ok := disconnected.Load().(bool); ok && yes {
t.Fatal("disconnect events received")
}
log.Info("Simulation ended")
}
func runSim(conf *synctestConfig, ctx context.Context, sim *simulation.Simulation, chunkCount int) simulation.Result {
return sim.Run(ctx, func(ctx context.Context, sim *simulation.Simulation) error {
return sim.Run(ctx, func(ctx context.Context, sim *simulation.Simulation) (err error) {
disconnected := watchDisconnections(ctx, sim)
defer func() {
if err != nil && disconnected.bool() {
err = errors.New("disconnect events received")
}
}()
nodeIDs := sim.UpNodeIDs()
for _, n := range nodeIDs {
//get the kademlia overlay address from this ID