Ci race detector handle failing tests (#19143)

* swarm/storage: increase mget timeout in common_test.go TestDbStoreCorrect_1k sometimes timed out with -race on Travis. --- FAIL: TestDbStoreCorrect_1k (24.63s) common_test.go:194: testStore failed: timed out after 10s * swarm: remove unused vars from TestSnapshotSyncWithServer nodeCount and chunkCount is returned from setupSim and those values we use. * swarm: move race/norace helpers from stream to testutil As we will need to use the flag in other packages, too. * swarm: refactor TestSwarmNetwork case Extract long running test cases for better visibility. * swarm/network: skip TestSyncingViaGlobalSync with -race As panics on Travis. panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x7e351b] * swarm: run TestSwarmNetwork with fewer nodes with -race As otherwise we always get test failure with `network_test.go:374: context deadline exceeded` even with raised `Timeout`. * swarm/network: run TestDeliveryFromNodes with fewer nodes with -race Test on Travis times out with 8 or more nodes if -race flag is present. * swarm/network: smaller node count for discovery tests with -race TestDiscoveryPersistenceSimulationSimAdapters failed on Travis with `-race` flag present. The failure was due to extensive memory usage, coming from the CGO runtime. Using a smaller node count resolves the issue. === RUN TestDiscoveryPersistenceSimulationSimAdapter ==7227==ERROR: ThreadSanitizer failed to allocate 0x80000 (524288) bytes of clock allocator (error code: 12) FATAL: ThreadSanitizer CHECK failed: ./gotsan.cc:6976 "((0 && "unable to mmap")) != (0)" (0x0, 0x0) FAIL github.com/ethereum/go-ethereum/swarm/network/simulations/discovery 804.826s * swarm/network: run TestFileRetrieval with fewer nodes with -race Otherwise we get a failure due to extensive memory usage, as the CGO runtime cannot allocate more bytes. === RUN TestFileRetrieval ==7366==ERROR: ThreadSanitizer failed to allocate 0x80000 (524288) bytes of clock allocator (error code: 12) FATAL: ThreadSanitizer CHECK failed: ./gotsan.cc:6976 "((0 && "unable to mmap")) != (0)" (0x0, 0x0) FAIL github.com/ethereum/go-ethereum/swarm/network/stream 155.165s * swarm/network: run TestRetrieval with fewer nodes with -race Otherwise we get a failure due to extensive memory usage, as the CGO runtime cannot allocate more bytes ("ThreadSanitizer failed to allocate"). * swarm/network: skip flaky TestGetSubscriptionsRPC on Travis w/ -race Test fails a lot with something like: streamer_test.go:1332: Real subscriptions and expected amount don't match; real: 0, expected: 20 * swarm/storage: skip TestDB_SubscribePull* tests on Travis w/ -race Travis just hangs... ok github.com/ethereum/go-ethereum/swarm/storage/feed/lookup 1.307s keepalive keepalive keepalive or panics after a while. Without these tests the race detector job is now stable. Let's invetigate these tests in a separate issue: https://github.com/ethersphere/go-ethereum/issues/1245
2019-02-20 22:57:42 +01:00
parent d36e974ba3
commit e38b227ce6
13 changed files with 187 additions and 132 deletions
--- a/swarm/network_test.go
+++ b/swarm/network_test.go
@@ -28,6 +28,8 @@ import (
 	"testing"
 	"time"

+	"github.com/ethereum/go-ethereum/swarm/testutil"
+
 	"github.com/ethereum/go-ethereum/crypto"
 	"github.com/ethereum/go-ethereum/log"
 	"github.com/ethereum/go-ethereum/node"
@@ -36,7 +38,7 @@ import (
 	"github.com/ethereum/go-ethereum/swarm/api"
 	"github.com/ethereum/go-ethereum/swarm/network/simulation"
 	"github.com/ethereum/go-ethereum/swarm/storage"
-	colorable "github.com/mattn/go-colorable"
+	"github.com/mattn/go-colorable"
 )

 var (
@@ -57,12 +59,7 @@ func init() {
 // static and dynamic Swarm nodes in network simulation, by
 // uploading files to every node and retrieving them.
 func TestSwarmNetwork(t *testing.T) {
-	for _, tc := range []struct {
-		name     string
-		steps    []testSwarmNetworkStep
-		options  *testSwarmNetworkOptions
-		disabled bool
-	}{
+	var tests = []testSwarmNetworkCase{
 		{
 			name: "10_nodes",
 			steps: []testSwarmNetworkStep{
@@ -86,6 +83,61 @@ func TestSwarmNetwork(t *testing.T) {
 				SkipCheck: true,
 			},
 		},
+		{
+			name: "dec_inc_node_count",
+			steps: []testSwarmNetworkStep{
+				{
+					nodeCount: 3,
+				},
+				{
+					nodeCount: 1,
+				},
+				{
+					nodeCount: 5,
+				},
+			},
+			options: &testSwarmNetworkOptions{
+				Timeout: 90 * time.Second,
+			},
+		},
+	}
+
+	if *longrunning {
+		tests = append(tests, longRunningCases()...)
+	} else if testutil.RaceEnabled {
+		tests = shortCaseForRace()
+
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			testSwarmNetwork(t, tc.options, tc.steps...)
+		})
+	}
+}
+
+type testSwarmNetworkCase struct {
+	name    string
+	steps   []testSwarmNetworkStep
+	options *testSwarmNetworkOptions
+}
+
+// testSwarmNetworkStep is the configuration
+// for the state of the simulation network.
+type testSwarmNetworkStep struct {
+	// number of swarm nodes that must be in the Up state
+	nodeCount int
+}
+
+// testSwarmNetworkOptions contains optional parameters for running
+// testSwarmNetwork.
+type testSwarmNetworkOptions struct {
+	Timeout   time.Duration
+	SkipCheck bool
+}
+
+func longRunningCases() []testSwarmNetworkCase {
+	return []testSwarmNetworkCase{
 		{
 			name: "50_nodes",
 			steps: []testSwarmNetworkStep{
@@ -96,7 +148,6 @@ func TestSwarmNetwork(t *testing.T) {
 			options: &testSwarmNetworkOptions{
 				Timeout: 3 * time.Minute,
 			},
-			disabled: !*longrunning,
 		},
 		{
 			name: "50_nodes_skip_check",
@@ -109,7 +160,6 @@ func TestSwarmNetwork(t *testing.T) {
 				Timeout:   3 * time.Minute,
 				SkipCheck: true,
 			},
-			disabled: !*longrunning,
 		},
 		{
 			name: "inc_node_count",
@@ -127,7 +177,6 @@ func TestSwarmNetwork(t *testing.T) {
 			options: &testSwarmNetworkOptions{
 				Timeout: 90 * time.Second,
 			},
-			disabled: !*longrunning,
 		},
 		{
 			name: "dec_node_count",
@@ -145,24 +194,6 @@ func TestSwarmNetwork(t *testing.T) {
 			options: &testSwarmNetworkOptions{
 				Timeout: 90 * time.Second,
 			},
-			disabled: !*longrunning,
-		},
-		{
-			name: "dec_inc_node_count",
-			steps: []testSwarmNetworkStep{
-				{
-					nodeCount: 3,
-				},
-				{
-					nodeCount: 1,
-				},
-				{
-					nodeCount: 5,
-				},
-			},
-			options: &testSwarmNetworkOptions{
-				Timeout: 90 * time.Second,
-			},
 		},
 		{
 			name: "inc_dec_node_count",
@@ -186,7 +217,6 @@ func TestSwarmNetwork(t *testing.T) {
 			options: &testSwarmNetworkOptions{
 				Timeout: 5 * time.Minute,
 			},
-			disabled: !*longrunning,
 		},
 		{
 			name: "inc_dec_node_count_skip_check",
@@ -211,23 +241,25 @@ func TestSwarmNetwork(t *testing.T) {
 				Timeout:   5 * time.Minute,
 				SkipCheck: true,
 			},
-			disabled: !*longrunning,
 		},
-	} {
-		if tc.disabled {
-			continue
-		}
-		t.Run(tc.name, func(t *testing.T) {
-			testSwarmNetwork(t, tc.options, tc.steps...)
-		})
 	}
 }

-// testSwarmNetworkStep is the configuration
-// for the state of the simulation network.
-type testSwarmNetworkStep struct {
-	// number of swarm nodes that must be in the Up state
-	nodeCount int
+func shortCaseForRace() []testSwarmNetworkCase {
+	// As for now, Travis with -race can only run 8 nodes
+	return []testSwarmNetworkCase{
+		{
+			name: "8_nodes",
+			steps: []testSwarmNetworkStep{
+				{
+					nodeCount: 8,
+				},
+			},
+			options: &testSwarmNetworkOptions{
+				Timeout: 1 * time.Minute,
+			},
+		},
+	}
 }

 // file represents the file uploaded on a particular node.
@@ -244,13 +276,6 @@ type check struct {
 	nodeID enode.ID
 }

-// testSwarmNetworkOptions contains optional parameters for running
-// testSwarmNetwork.
-type testSwarmNetworkOptions struct {
-	Timeout   time.Duration
-	SkipCheck bool
-}
-
 // testSwarmNetwork is a helper function used for testing different
 // static and dynamic Swarm network simulations.
 // It is responsible for:
@@ -259,6 +284,7 @@ type testSwarmNetworkOptions struct {
 //  - May wait for Kademlia on every node to be healthy.
 //  - Checking if a file is retrievable from all nodes.
 func testSwarmNetwork(t *testing.T, o *testSwarmNetworkOptions, steps ...testSwarmNetworkStep) {
+	t.Helper()

 	if o == nil {
 		o = new(testSwarmNetworkOptions)