Ci race detector handle failing tests (#19143)

* swarm/storage: increase mget timeout in common_test.go

 TestDbStoreCorrect_1k sometimes timed out with -race on Travis.

--- FAIL: TestDbStoreCorrect_1k (24.63s)
    common_test.go:194: testStore failed: timed out after 10s

* swarm: remove unused vars from TestSnapshotSyncWithServer

nodeCount and chunkCount is returned from setupSim and those values
we use.

* swarm: move race/norace helpers from stream to testutil

As we will need to use the flag in other packages, too.

* swarm: refactor TestSwarmNetwork case

Extract long running test cases for better visibility.

* swarm/network: skip TestSyncingViaGlobalSync with -race

As panics on Travis.

panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x7e351b]

* swarm: run TestSwarmNetwork with fewer nodes with -race

As otherwise we always get test failure with `network_test.go:374:
context deadline exceeded` even with raised `Timeout`.

* swarm/network: run TestDeliveryFromNodes with fewer nodes with -race

Test on Travis times out with 8 or more nodes if -race flag is present.

* swarm/network: smaller node count for discovery tests with -race

TestDiscoveryPersistenceSimulationSimAdapters failed on Travis with
`-race` flag present. The failure was due to extensive memory usage,
coming from the CGO runtime. Using a smaller node count resolves the
issue.

=== RUN   TestDiscoveryPersistenceSimulationSimAdapter
==7227==ERROR: ThreadSanitizer failed to allocate 0x80000 (524288) bytes of clock allocator (error code: 12)
FATAL: ThreadSanitizer CHECK failed: ./gotsan.cc:6976 "((0 && "unable to mmap")) != (0)" (0x0, 0x0)
FAIL    github.com/ethereum/go-ethereum/swarm/network/simulations/discovery     804.826s

* swarm/network: run TestFileRetrieval with fewer nodes with -race

Otherwise we get a failure due to extensive memory usage, as the CGO
runtime cannot allocate more bytes.

=== RUN   TestFileRetrieval
==7366==ERROR: ThreadSanitizer failed to allocate 0x80000 (524288) bytes of clock allocator (error code: 12)
FATAL: ThreadSanitizer CHECK failed: ./gotsan.cc:6976 "((0 && "unable to mmap")) != (0)" (0x0, 0x0)
FAIL	github.com/ethereum/go-ethereum/swarm/network/stream	155.165s

* swarm/network: run TestRetrieval with fewer nodes with -race

Otherwise we get a failure due to extensive memory usage, as the CGO
runtime cannot allocate more bytes ("ThreadSanitizer failed to
allocate").

* swarm/network: skip flaky TestGetSubscriptionsRPC on Travis w/ -race

Test fails a lot with something like:
 streamer_test.go:1332: Real subscriptions and expected amount don't match; real: 0, expected: 20

* swarm/storage: skip TestDB_SubscribePull* tests on Travis w/ -race

Travis just hangs...

ok  	github.com/ethereum/go-ethereum/swarm/storage/feed/lookup	1.307s
keepalive
keepalive
keepalive

or panics after a while.

Without these tests the race detector job is now stable. Let's
invetigate these tests in a separate issue:
https://github.com/ethersphere/go-ethereum/issues/1245
This commit is contained in:
Ferenc Szabo
2019-02-20 22:57:42 +01:00
committed by Viktor Trón
parent d36e974ba3
commit e38b227ce6
13 changed files with 187 additions and 132 deletions

View File

@@ -27,6 +27,8 @@ import (
"testing"
"time"
"github.com/ethereum/go-ethereum/swarm/testutil"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/node"
@@ -82,12 +84,19 @@ func getDbStore(nodeID string) (*state.DBStore, error) {
}
var (
nodeCount = flag.Int("nodes", 32, "number of nodes to create (default 32)")
nodeCount = flag.Int("nodes", defaultNodeCount(), "number of nodes to create (default 32)")
initCount = flag.Int("conns", 1, "number of originally connected peers (default 1)")
loglevel = flag.Int("loglevel", 3, "verbosity of logs")
rawlog = flag.Bool("rawlog", false, "remove terminal formatting from logs")
)
func defaultNodeCount() int {
if testutil.RaceEnabled {
return 8
}
return 32
}
func init() {
flag.Parse()
// register the discovery service which will run as a devp2p

View File

@@ -446,6 +446,12 @@ func TestDeliveryFromNodes(t *testing.T) {
testDeliveryFromNodes(t, 2, dataChunkCount, false)
testDeliveryFromNodes(t, 4, dataChunkCount, true)
testDeliveryFromNodes(t, 4, dataChunkCount, false)
if testutil.RaceEnabled {
// Travis cannot handle more nodes with -race; would time out.
return
}
testDeliveryFromNodes(t, 8, dataChunkCount, true)
testDeliveryFromNodes(t, 8, dataChunkCount, false)
testDeliveryFromNodes(t, 16, dataChunkCount, true)

View File

@@ -1,24 +0,0 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// +build !race
package stream
// Provide a flag to reduce the scope of tests when running them
// with race detector. Some of the tests are doing a lot of allocations
// on the heap, and race detector uses much more memory to track them.
const raceTest = false

View File

@@ -1,23 +0,0 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// +build race
package stream
// Reduce the scope of some tests when running with race detector,
// as it raises the memory consumption significantly.
const raceTest = true

View File

@@ -22,6 +22,8 @@ import (
"testing"
"time"
"github.com/ethereum/go-ethereum/swarm/testutil"
"github.com/ethereum/go-ethereum/node"
"github.com/ethereum/go-ethereum/p2p/enode"
"github.com/ethereum/go-ethereum/p2p/simulations/adapters"
@@ -43,23 +45,24 @@ const (
//Files are uploaded to nodes, other nodes try to retrieve the file
//Number of nodes can be provided via commandline too.
func TestFileRetrieval(t *testing.T) {
var nodeCount []int
if *nodes != 0 {
err := runFileRetrievalTest(*nodes)
if err != nil {
t.Fatal(err)
}
nodeCount = []int{*nodes}
} else {
nodeCnt := []int{16}
//if the `longrunning` flag has been provided
//run more test combinations
nodeCount = []int{16}
if *longrunning {
nodeCnt = append(nodeCnt, 32, 64, 128)
nodeCount = append(nodeCount, 32, 64, 128)
} else if testutil.RaceEnabled {
nodeCount = []int{4}
}
for _, n := range nodeCnt {
err := runFileRetrievalTest(n)
if err != nil {
t.Fatal(err)
}
}
for _, nc := range nodeCount {
if err := runFileRetrievalTest(nc); err != nil {
t.Error(err)
}
}
}
@@ -79,18 +82,17 @@ func TestRetrieval(t *testing.T) {
t.Fatal(err)
}
} else {
var nodeCnt []int
var chnkCnt []int
//if the `longrunning` flag has been provided
//run more test combinations
nodeCnt := []int{16}
chnkCnt := []int{32}
if *longrunning {
nodeCnt = []int{16, 32, 128}
chnkCnt = []int{4, 32, 256}
} else {
//default test
nodeCnt = []int{16}
chnkCnt = []int{32}
} else if testutil.RaceEnabled {
nodeCnt = []int{4}
chnkCnt = []int{4}
}
for _, n := range nodeCnt {
for _, c := range chnkCnt {
t.Run(fmt.Sprintf("TestRetrieval_%d_%d", n, c), func(t *testing.T) {

View File

@@ -19,7 +19,6 @@ import (
"context"
"errors"
"fmt"
"io/ioutil"
"os"
"runtime"
"sync"
@@ -42,8 +41,6 @@ import (
"github.com/ethereum/go-ethereum/swarm/testutil"
)
const MaxTimeout = 600
type synctestConfig struct {
addrs [][]byte
hashes []storage.Address
@@ -80,37 +77,31 @@ func TestSyncingViaGlobalSync(t *testing.T) {
if runtime.GOOS == "darwin" && os.Getenv("TRAVIS") == "true" {
t.Skip("Flaky on mac on travis")
}
if testutil.RaceEnabled {
t.Skip("Segfaults on Travis with -race")
}
//if nodes/chunks have been provided via commandline,
//run the tests with these values
if *nodes != 0 && *chunks != 0 {
log.Info(fmt.Sprintf("Running test with %d chunks and %d nodes...", *chunks, *nodes))
testSyncingViaGlobalSync(t, *chunks, *nodes)
} else {
var nodeCnt []int
var chnkCnt []int
chunkCounts := []int{4, 32}
nodeCounts := []int{32, 16}
//if the `longrunning` flag has been provided
//run more test combinations
if *longrunning {
chnkCnt = []int{1, 8, 32, 256, 1024}
nodeCnt = []int{16, 32, 64, 128, 256}
} else if raceTest {
// TestSyncingViaGlobalSync allocates a lot of memory
// with race detector. By reducing the number of chunks
// and nodes, memory consumption is lower and data races
// are still checked, while correctness of syncing is
// tested with more chunks and nodes in regular (!race)
// tests.
chnkCnt = []int{4}
nodeCnt = []int{16}
} else {
//default test
chnkCnt = []int{4, 32}
nodeCnt = []int{32, 16}
chunkCounts = []int{1, 8, 32, 256, 1024}
nodeCounts = []int{16, 32, 64, 128, 256}
}
for _, chnk := range chnkCnt {
for _, n := range nodeCnt {
log.Info(fmt.Sprintf("Long running test with %d chunks and %d nodes...", chnk, n))
testSyncingViaGlobalSync(t, chnk, n)
for _, chunkCount := range chunkCounts {
for _, n := range nodeCounts {
log.Info(fmt.Sprintf("Long running test with %d chunks and %d nodes...", chunkCount, n))
testSyncingViaGlobalSync(t, chunkCount, n)
}
}
}
@@ -123,21 +114,7 @@ var simServiceMap = map[string]simulation.ServiceFunc{
return nil, nil, err
}
var dir string
var store *state.DBStore
if raceTest {
// Use on-disk DBStore to reduce memory consumption in race tests.
dir, err = ioutil.TempDir("", "swarm-stream-")
if err != nil {
return nil, nil, err
}
store, err = state.NewDBStore(dir)
if err != nil {
return nil, nil, err
}
} else {
store = state.NewInmemoryStore()
}
store := state.NewInmemoryStore()
r := NewRegistry(addr.ID(), delivery, netStore, store, &RegistryOptions{
Retrieval: RetrievalDisabled,

View File

@@ -21,12 +21,15 @@ import (
"context"
"errors"
"fmt"
"os"
"strconv"
"strings"
"sync"
"testing"
"time"
"github.com/ethereum/go-ethereum/swarm/testutil"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/node"
@@ -1178,6 +1181,11 @@ stream registration, then tests that there are subscriptions.
*/
func TestGetSubscriptionsRPC(t *testing.T) {
if testutil.RaceEnabled && os.Getenv("TRAVIS") == "true" {
t.Skip("flaky with -race on Travis")
// Note: related ticket https://github.com/ethersphere/go-ethereum/issues/1234
}
// arbitrarily set to 4
nodeCount := 4
// run with more nodes if `longrunning` flag is set

View File

@@ -48,7 +48,7 @@ func TestSyncerSimulation(t *testing.T) {
// race detector. Allow it to finish successfully by
// reducing its scope, and still check for data races
// with the smallest number of nodes.
if !raceTest {
if !testutil.RaceEnabled {
testSyncBetweenNodes(t, 4, dataChunkCount, true, 1)
testSyncBetweenNodes(t, 8, dataChunkCount, true, 1)
testSyncBetweenNodes(t, 16, dataChunkCount, true, 1)
@@ -88,7 +88,7 @@ func testSyncBetweenNodes(t *testing.T, nodes, chunkCount int, skipCheck bool, p
var dir string
var store *state.DBStore
if raceTest {
if testutil.RaceEnabled {
// Use on-disk DBStore to reduce memory consumption in race tests.
dir, err = ioutil.TempDir("", "swarm-stream-")
if err != nil {

View File

@@ -136,16 +136,6 @@ func TestSnapshotSyncWithServer(t *testing.T) {
//define a wrapper object to be able to pass around data
wrapper := &netWrapper{}
nodeCount := *nodes
chunkCount := *chunks
if nodeCount == 0 || chunkCount == 0 {
nodeCount = 32
chunkCount = 1
}
log.Info(fmt.Sprintf("Running the simulation with %d nodes and %d chunks", nodeCount, chunkCount))
sim := simulation.New(map[string]simulation.ServiceFunc{
"streamer": func(ctx *adapters.ServiceContext, bucket *sync.Map) (s node.Service, cleanup func(), err error) {
addr, netStore, delivery, clean, err := newNetStoreAndDeliveryWithRequestFunc(ctx, bucket, dummyRequestFromPeers)
@@ -178,6 +168,7 @@ func TestSnapshotSyncWithServer(t *testing.T) {
nodeCount, chunkCount, sim := setupSim(simServiceMap)
defer sim.Close()
log.Info(fmt.Sprintf("Running the simulation with %d nodes and %d chunks", nodeCount, chunkCount))
log.Info("Initializing test config")
conf := &synctestConfig{}