p2p/discover: avoid dropping unverified nodes when table is almost empty (#21396)

This change improves discovery behavior in small networks. Very small networks would often fail to bootstrap because all member nodes were dropping table content due to findnode failure. The check is now changed to avoid dropping nodes on findnode failure when their bucket is almost empty. It also relaxes the liveness check requirement for FINDNODE/v4 response nodes, returning unverified nodes as results when there aren't any verified nodes yet. The "findnode failed" log now reports whether the node was dropped instead of the number of results. The value of the "results" was always zero by definition. Co-authored-by: Felix Lange <fjl@twurst.com>
2020-08-24 14:42:39 +02:00
parent bdde616f23
commit 7b5107b73f
5 changed files with 140 additions and 25 deletions
--- a/p2p/discover/table.go
+++ b/p2p/discover/table.go
@@ -392,22 +392,35 @@ func (tab *Table) copyLiveNodes() {
 	}
 }

-// closest returns the n nodes in the table that are closest to the
-// given id. The caller must hold tab.mutex.
-func (tab *Table) closest(target enode.ID, nresults int, checklive bool) *nodesByDistance {
-	// This is a very wasteful way to find the closest nodes but
-	// obviously correct. I believe that tree-based buckets would make
-	// this easier to implement efficiently.
-	close := &nodesByDistance{target: target}
+// findnodeByID returns the n nodes in the table that are closest to the given id.
+// This is used by the FINDNODE/v4 handler.
+//
+// The preferLive parameter says whether the caller wants liveness-checked results. If
+// preferLive is true and the table contains any verified nodes, the result will not
+// contain unverified nodes. However, if there are no verified nodes at all, the result
+// will contain unverified nodes.
+func (tab *Table) findnodeByID(target enode.ID, nresults int, preferLive bool) *nodesByDistance {
+	tab.mutex.Lock()
+	defer tab.mutex.Unlock()
+
+	// Scan all buckets. There might be a better way to do this, but there aren't that many
+	// buckets, so this solution should be fine. The worst-case complexity of this loop
+	// is O(tab.len() * nresults).
+	nodes := &nodesByDistance{target: target}
+	liveNodes := &nodesByDistance{target: target}
 	for _, b := range &tab.buckets {
 		for _, n := range b.entries {
-			if checklive && n.livenessChecks == 0 {
-				continue
+			nodes.push(n, nresults)
+			if preferLive && n.livenessChecks > 0 {
+				liveNodes.push(n, nresults)
 			}
-			close.push(n, nresults)
 		}
 	}
-	return close
+
+	if preferLive && len(liveNodes.entries) > 0 {
+		return liveNodes
+	}
+	return nodes
 }

 // len returns the number of nodes in the table.
@@ -421,6 +434,14 @@ func (tab *Table) len() (n int) {
 	return n
 }

+// bucketLen returns the number of nodes in the bucket for the given ID.
+func (tab *Table) bucketLen(id enode.ID) int {
+	tab.mutex.Lock()
+	defer tab.mutex.Unlock()
+
+	return len(tab.bucket(id).entries)
+}
+
 // bucket returns the bucket for the given node ID hash.
 func (tab *Table) bucket(id enode.ID) *bucket {
 	d := enode.LogDist(tab.self().ID(), id)