p2p/discover: fix race involving the seed node iterator

nodeDB.querySeeds was not safe for concurrent use but could be called
concurrenty on multiple goroutines in the following case:

- the table was empty
- a timed refresh started
- a lookup was started and initiated refresh

These conditions are unlikely to coincide during normal use, but are
much more likely to occur all at once when the user's machine just woke
from sleep. The root cause of the issue is that querySeeds reused the
same leveldb iterator until it was exhausted.

This commit moves the refresh scheduling logic into its own goroutine
(so only one refresh is ever active) and changes querySeeds to not use
a persistent iterator. The seed node selection is now more random and
ignores nodes that have not been contacted in the last 5 days.
This commit is contained in:
Felix Lange
2015-09-30 05:01:49 +02:00
parent 7977e87ce1
commit b4374436f3
5 changed files with 204 additions and 178 deletions

View File

@ -162,9 +162,33 @@ var nodeDBSeedQueryNodes = []struct {
node *Node
pong time.Time
}{
// This one should not be in the result set because its last
// pong time is too far in the past.
{
node: newNode(
MustHexID("0x01d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
MustHexID("0x84d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
net.IP{127, 0, 0, 3},
30303,
30303,
),
pong: time.Now().Add(-3 * time.Hour),
},
// This one shouldn't be in in the result set because its
// nodeID is the local node's ID.
{
node: newNode(
MustHexID("0x57d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
net.IP{127, 0, 0, 3},
30303,
30303,
),
pong: time.Now().Add(-4 * time.Second),
},
// These should be in the result set.
{
node: newNode(
MustHexID("0x22d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
net.IP{127, 0, 0, 1},
30303,
30303,
@ -173,7 +197,7 @@ var nodeDBSeedQueryNodes = []struct {
},
{
node: newNode(
MustHexID("0x02d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
MustHexID("0x44d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
net.IP{127, 0, 0, 2},
30303,
30303,
@ -182,7 +206,7 @@ var nodeDBSeedQueryNodes = []struct {
},
{
node: newNode(
MustHexID("0x03d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
MustHexID("0xe2d9d65c4552b5eb43d5ad55a2ee3f56c6cbc1c64a5c8d659f51fcd51bace24351232b8d7821617d2b29b54b81cdefb9b3e9c37d7fd5f63270bcc9e1a6f6a439"),
net.IP{127, 0, 0, 3},
30303,
30303,
@ -192,7 +216,7 @@ var nodeDBSeedQueryNodes = []struct {
}
func TestNodeDBSeedQuery(t *testing.T) {
db, _ := newNodeDB("", Version, NodeID{})
db, _ := newNodeDB("", Version, nodeDBSeedQueryNodes[1].node.ID)
defer db.close()
// Insert a batch of nodes for querying
@ -200,20 +224,24 @@ func TestNodeDBSeedQuery(t *testing.T) {
if err := db.updateNode(seed.node); err != nil {
t.Fatalf("node %d: failed to insert: %v", i, err)
}
if err := db.updateLastPong(seed.node.ID, seed.pong); err != nil {
t.Fatalf("node %d: failed to insert lastPong: %v", i, err)
}
}
// Retrieve the entire batch and check for duplicates
seeds := db.querySeeds(2 * len(nodeDBSeedQueryNodes))
if len(seeds) != len(nodeDBSeedQueryNodes) {
t.Errorf("seed count mismatch: have %v, want %v", len(seeds), len(nodeDBSeedQueryNodes))
}
seeds := db.querySeeds(len(nodeDBSeedQueryNodes)*2, time.Hour)
have := make(map[NodeID]struct{})
for _, seed := range seeds {
have[seed.ID] = struct{}{}
}
want := make(map[NodeID]struct{})
for _, seed := range nodeDBSeedQueryNodes {
for _, seed := range nodeDBSeedQueryNodes[2:] {
want[seed.node.ID] = struct{}{}
}
if len(seeds) != len(want) {
t.Errorf("seed count mismatch: have %v, want %v", len(seeds), len(want))
}
for id, _ := range have {
if _, ok := want[id]; !ok {
t.Errorf("extra seed: %v", id)
@ -224,63 +252,6 @@ func TestNodeDBSeedQuery(t *testing.T) {
t.Errorf("missing seed: %v", id)
}
}
// Make sure the next batch is empty (seed EOF)
seeds = db.querySeeds(2 * len(nodeDBSeedQueryNodes))
if len(seeds) != 0 {
t.Errorf("seed count mismatch: have %v, want %v", len(seeds), 0)
}
}
func TestNodeDBSeedQueryContinuation(t *testing.T) {
db, _ := newNodeDB("", Version, NodeID{})
defer db.close()
// Insert a batch of nodes for querying
for i, seed := range nodeDBSeedQueryNodes {
if err := db.updateNode(seed.node); err != nil {
t.Fatalf("node %d: failed to insert: %v", i, err)
}
}
// Iteratively retrieve the batch, checking for an empty batch on reset
for i := 0; i < len(nodeDBSeedQueryNodes); i++ {
if seeds := db.querySeeds(1); len(seeds) != 1 {
t.Errorf("1st iteration %d: seed count mismatch: have %v, want %v", i, len(seeds), 1)
}
}
if seeds := db.querySeeds(1); len(seeds) != 0 {
t.Errorf("reset: seed count mismatch: have %v, want %v", len(seeds), 0)
}
for i := 0; i < len(nodeDBSeedQueryNodes); i++ {
if seeds := db.querySeeds(1); len(seeds) != 1 {
t.Errorf("2nd iteration %d: seed count mismatch: have %v, want %v", i, len(seeds), 1)
}
}
}
func TestNodeDBSelfSeedQuery(t *testing.T) {
// Assign a node as self to verify evacuation
self := nodeDBSeedQueryNodes[0].node.ID
db, _ := newNodeDB("", Version, self)
defer db.close()
// Insert a batch of nodes for querying
for i, seed := range nodeDBSeedQueryNodes {
if err := db.updateNode(seed.node); err != nil {
t.Fatalf("node %d: failed to insert: %v", i, err)
}
}
// Retrieve the entire batch and check that self was evacuated
seeds := db.querySeeds(2 * len(nodeDBSeedQueryNodes))
if len(seeds) != len(nodeDBSeedQueryNodes)-1 {
t.Errorf("seed count mismatch: have %v, want %v", len(seeds), len(nodeDBSeedQueryNodes)-1)
}
have := make(map[NodeID]struct{})
for _, seed := range seeds {
have[seed.ID] = struct{}{}
}
if _, ok := have[self]; ok {
t.Errorf("self not evacuated")
}
}
func TestNodeDBPersistency(t *testing.T) {