p2p/discover: fix race involving the seed node iterator

nodeDB.querySeeds was not safe for concurrent use but could be called concurrenty on multiple goroutines in the following case: - the table was empty - a timed refresh started - a lookup was started and initiated refresh These conditions are unlikely to coincide during normal use, but are much more likely to occur all at once when the user's machine just woke from sleep. The root cause of the issue is that querySeeds reused the same leveldb iterator until it was exhausted. This commit moves the refresh scheduling logic into its own goroutine (so only one refresh is ever active) and changes querySeeds to not use a persistent iterator. The seed node selection is now more random and ignores nodes that have not been contacted in the last 5 days.
2015-09-30 05:01:49 +02:00
parent 7977e87ce1
commit b4374436f3
5 changed files with 204 additions and 178 deletions
--- a/p2p/discover/database.go
+++ b/p2p/discover/database.go
@ -21,6 +21,7 @@ package discover

 import (
 	"bytes"
+	"crypto/rand"
 	"encoding/binary"
 	"os"
 	"sync"
@ -46,11 +47,8 @@ var (

 // nodeDB stores all nodes we know about.
 type nodeDB struct {
-	lvl    *leveldb.DB       // Interface to the database itself
-	seeder iterator.Iterator // Iterator for fetching possible seed nodes
-
-	self NodeID // Own node id to prevent adding it into the database
-
+	lvl    *leveldb.DB   // Interface to the database itself
+	self   NodeID        // Own node id to prevent adding it into the database
 	runner sync.Once     // Ensures we can start at most one expirer
 	quit   chan struct{} // Channel to signal the expiring thread to stop
 }
@ -302,52 +300,70 @@ func (db *nodeDB) updateFindFails(id NodeID, fails int) error {
 	return db.storeInt64(makeKey(id, nodeDBDiscoverFindFails), int64(fails))
 }

-// querySeeds retrieves a batch of nodes to be used as potential seed servers
-// during bootstrapping the node into the network.
-//
-// Ideal seeds are the most recently seen nodes (highest probability to be still
-// alive), but yet untried. However, since leveldb only supports dumb iteration
-// we will instead start pulling in potential seeds that haven't been yet pinged
-// since the start of the boot procedure.
-//
-// If the database runs out of potential seeds, we restart the startup counter
-// and start iterating over the peers again.
-func (db *nodeDB) querySeeds(n int) []*Node {
-	// Create a new seed iterator if none exists
-	if db.seeder == nil {
-		db.seeder = db.lvl.NewIterator(nil, nil)
-	}
-	// Iterate over the nodes and find suitable seeds
-	nodes := make([]*Node, 0, n)
-	for len(nodes) < n && db.seeder.Next() {
-		// Iterate until a discovery node is found
-		id, field := splitKey(db.seeder.Key())
-		if field != nodeDBDiscoverRoot {
-			continue
+// querySeeds retrieves random nodes to be used as potential seed nodes
+// for bootstrapping.
+func (db *nodeDB) querySeeds(n int, maxAge time.Duration) []*Node {
+	var (
+		now   = time.Now()
+		nodes = make([]*Node, 0, n)
+		it    = db.lvl.NewIterator(nil, nil)
+		id    NodeID
+	)
+	defer it.Release()
+
+seek:
+	for seeks := 0; len(nodes) < n && seeks < n*5; seeks++ {
+		// Seek to a random entry. The first byte is incremented by a
+		// random amount each time in order to increase the likelihood
+		// of hitting all existing nodes in very small databases.
+		ctr := id[0]
+		rand.Read(id[:])
+		id[0] = ctr + id[0]%16
+		it.Seek(makeKey(id, nodeDBDiscoverRoot))
+
+		n := nextNode(it)
+		if n == nil {
+			id[0] = 0
+			continue seek // iterator exhausted
 		}
-		// Dump it if its a self reference
-		if bytes.Compare(id[:], db.self[:]) == 0 {
-			db.deleteNode(id)
-			continue
+		if n.ID == db.self {
+			continue seek
 		}
-		// Load it as a potential seed
-		if node := db.node(id); node != nil {
-			nodes = append(nodes, node)
+		if now.Sub(db.lastPong(n.ID)) > maxAge {
+			continue seek
 		}
-	}
-	// Release the iterator if we reached the end
-	if len(nodes) == 0 {
-		db.seeder.Release()
-		db.seeder = nil
+		for i := range nodes {
+			if nodes[i].ID == n.ID {
+				continue seek // duplicate
+			}
+		}
+		nodes = append(nodes, n)
 	}
 	return nodes
 }

+// reads the next node record from the iterator, skipping over other
+// database entries.
+func nextNode(it iterator.Iterator) *Node {
+	for end := false; !end; end = !it.Next() {
+		id, field := splitKey(it.Key())
+		if field != nodeDBDiscoverRoot {
+			continue
+		}
+		var n Node
+		if err := rlp.DecodeBytes(it.Value(), &n); err != nil {
+			if glog.V(logger.Warn) {
+				glog.Errorf("invalid node %x: %v", id, err)
+			}
+			continue
+		}
+		return &n
+	}
+	return nil
+}
+
 // close flushes and closes the database files.
 func (db *nodeDB) close() {
-	if db.seeder != nil {
-		db.seeder.Release()
-	}
 	close(db.quit)
 	db.lvl.Close()
 }