Ci race detector handle failing tests (#19143)

* swarm/storage: increase mget timeout in common_test.go

 TestDbStoreCorrect_1k sometimes timed out with -race on Travis.

--- FAIL: TestDbStoreCorrect_1k (24.63s)
    common_test.go:194: testStore failed: timed out after 10s

* swarm: remove unused vars from TestSnapshotSyncWithServer

nodeCount and chunkCount is returned from setupSim and those values
we use.

* swarm: move race/norace helpers from stream to testutil

As we will need to use the flag in other packages, too.

* swarm: refactor TestSwarmNetwork case

Extract long running test cases for better visibility.

* swarm/network: skip TestSyncingViaGlobalSync with -race

As panics on Travis.

panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x7e351b]

* swarm: run TestSwarmNetwork with fewer nodes with -race

As otherwise we always get test failure with `network_test.go:374:
context deadline exceeded` even with raised `Timeout`.

* swarm/network: run TestDeliveryFromNodes with fewer nodes with -race

Test on Travis times out with 8 or more nodes if -race flag is present.

* swarm/network: smaller node count for discovery tests with -race

TestDiscoveryPersistenceSimulationSimAdapters failed on Travis with
`-race` flag present. The failure was due to extensive memory usage,
coming from the CGO runtime. Using a smaller node count resolves the
issue.

=== RUN   TestDiscoveryPersistenceSimulationSimAdapter
==7227==ERROR: ThreadSanitizer failed to allocate 0x80000 (524288) bytes of clock allocator (error code: 12)
FATAL: ThreadSanitizer CHECK failed: ./gotsan.cc:6976 "((0 && "unable to mmap")) != (0)" (0x0, 0x0)
FAIL    github.com/ethereum/go-ethereum/swarm/network/simulations/discovery     804.826s

* swarm/network: run TestFileRetrieval with fewer nodes with -race

Otherwise we get a failure due to extensive memory usage, as the CGO
runtime cannot allocate more bytes.

=== RUN   TestFileRetrieval
==7366==ERROR: ThreadSanitizer failed to allocate 0x80000 (524288) bytes of clock allocator (error code: 12)
FATAL: ThreadSanitizer CHECK failed: ./gotsan.cc:6976 "((0 && "unable to mmap")) != (0)" (0x0, 0x0)
FAIL	github.com/ethereum/go-ethereum/swarm/network/stream	155.165s

* swarm/network: run TestRetrieval with fewer nodes with -race

Otherwise we get a failure due to extensive memory usage, as the CGO
runtime cannot allocate more bytes ("ThreadSanitizer failed to
allocate").

* swarm/network: skip flaky TestGetSubscriptionsRPC on Travis w/ -race

Test fails a lot with something like:
 streamer_test.go:1332: Real subscriptions and expected amount don't match; real: 0, expected: 20

* swarm/storage: skip TestDB_SubscribePull* tests on Travis w/ -race

Travis just hangs...

ok  	github.com/ethereum/go-ethereum/swarm/storage/feed/lookup	1.307s
keepalive
keepalive
keepalive

or panics after a while.

Without these tests the race detector job is now stable. Let's
invetigate these tests in a separate issue:
https://github.com/ethersphere/go-ethereum/issues/1245
This commit is contained in:
Ferenc Szabo
2019-02-20 22:57:42 +01:00
committed by Viktor Trón
parent d36e974ba3
commit e38b227ce6
13 changed files with 187 additions and 132 deletions

View File

@@ -28,6 +28,8 @@ import (
"testing"
"time"
"github.com/ethereum/go-ethereum/swarm/testutil"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/node"
@@ -36,7 +38,7 @@ import (
"github.com/ethereum/go-ethereum/swarm/api"
"github.com/ethereum/go-ethereum/swarm/network/simulation"
"github.com/ethereum/go-ethereum/swarm/storage"
colorable "github.com/mattn/go-colorable"
"github.com/mattn/go-colorable"
)
var (
@@ -57,12 +59,7 @@ func init() {
// static and dynamic Swarm nodes in network simulation, by
// uploading files to every node and retrieving them.
func TestSwarmNetwork(t *testing.T) {
for _, tc := range []struct {
name string
steps []testSwarmNetworkStep
options *testSwarmNetworkOptions
disabled bool
}{
var tests = []testSwarmNetworkCase{
{
name: "10_nodes",
steps: []testSwarmNetworkStep{
@@ -86,6 +83,61 @@ func TestSwarmNetwork(t *testing.T) {
SkipCheck: true,
},
},
{
name: "dec_inc_node_count",
steps: []testSwarmNetworkStep{
{
nodeCount: 3,
},
{
nodeCount: 1,
},
{
nodeCount: 5,
},
},
options: &testSwarmNetworkOptions{
Timeout: 90 * time.Second,
},
},
}
if *longrunning {
tests = append(tests, longRunningCases()...)
} else if testutil.RaceEnabled {
tests = shortCaseForRace()
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
testSwarmNetwork(t, tc.options, tc.steps...)
})
}
}
type testSwarmNetworkCase struct {
name string
steps []testSwarmNetworkStep
options *testSwarmNetworkOptions
}
// testSwarmNetworkStep is the configuration
// for the state of the simulation network.
type testSwarmNetworkStep struct {
// number of swarm nodes that must be in the Up state
nodeCount int
}
// testSwarmNetworkOptions contains optional parameters for running
// testSwarmNetwork.
type testSwarmNetworkOptions struct {
Timeout time.Duration
SkipCheck bool
}
func longRunningCases() []testSwarmNetworkCase {
return []testSwarmNetworkCase{
{
name: "50_nodes",
steps: []testSwarmNetworkStep{
@@ -96,7 +148,6 @@ func TestSwarmNetwork(t *testing.T) {
options: &testSwarmNetworkOptions{
Timeout: 3 * time.Minute,
},
disabled: !*longrunning,
},
{
name: "50_nodes_skip_check",
@@ -109,7 +160,6 @@ func TestSwarmNetwork(t *testing.T) {
Timeout: 3 * time.Minute,
SkipCheck: true,
},
disabled: !*longrunning,
},
{
name: "inc_node_count",
@@ -127,7 +177,6 @@ func TestSwarmNetwork(t *testing.T) {
options: &testSwarmNetworkOptions{
Timeout: 90 * time.Second,
},
disabled: !*longrunning,
},
{
name: "dec_node_count",
@@ -145,24 +194,6 @@ func TestSwarmNetwork(t *testing.T) {
options: &testSwarmNetworkOptions{
Timeout: 90 * time.Second,
},
disabled: !*longrunning,
},
{
name: "dec_inc_node_count",
steps: []testSwarmNetworkStep{
{
nodeCount: 3,
},
{
nodeCount: 1,
},
{
nodeCount: 5,
},
},
options: &testSwarmNetworkOptions{
Timeout: 90 * time.Second,
},
},
{
name: "inc_dec_node_count",
@@ -186,7 +217,6 @@ func TestSwarmNetwork(t *testing.T) {
options: &testSwarmNetworkOptions{
Timeout: 5 * time.Minute,
},
disabled: !*longrunning,
},
{
name: "inc_dec_node_count_skip_check",
@@ -211,23 +241,25 @@ func TestSwarmNetwork(t *testing.T) {
Timeout: 5 * time.Minute,
SkipCheck: true,
},
disabled: !*longrunning,
},
} {
if tc.disabled {
continue
}
t.Run(tc.name, func(t *testing.T) {
testSwarmNetwork(t, tc.options, tc.steps...)
})
}
}
// testSwarmNetworkStep is the configuration
// for the state of the simulation network.
type testSwarmNetworkStep struct {
// number of swarm nodes that must be in the Up state
nodeCount int
func shortCaseForRace() []testSwarmNetworkCase {
// As for now, Travis with -race can only run 8 nodes
return []testSwarmNetworkCase{
{
name: "8_nodes",
steps: []testSwarmNetworkStep{
{
nodeCount: 8,
},
},
options: &testSwarmNetworkOptions{
Timeout: 1 * time.Minute,
},
},
}
}
// file represents the file uploaded on a particular node.
@@ -244,13 +276,6 @@ type check struct {
nodeID enode.ID
}
// testSwarmNetworkOptions contains optional parameters for running
// testSwarmNetwork.
type testSwarmNetworkOptions struct {
Timeout time.Duration
SkipCheck bool
}
// testSwarmNetwork is a helper function used for testing different
// static and dynamic Swarm network simulations.
// It is responsible for:
@@ -259,6 +284,7 @@ type testSwarmNetworkOptions struct {
// - May wait for Kademlia on every node to be healthy.
// - Checking if a file is retrievable from all nodes.
func testSwarmNetwork(t *testing.T, o *testSwarmNetworkOptions, steps ...testSwarmNetworkStep) {
t.Helper()
if o == nil {
o = new(testSwarmNetworkOptions)