p2p/discover: avoid dropping unverified nodes when table is almost empty (#21396)
This change improves discovery behavior in small networks. Very small networks would often fail to bootstrap because all member nodes were dropping table content due to findnode failure. The check is now changed to avoid dropping nodes on findnode failure when their bucket is almost empty. It also relaxes the liveness check requirement for FINDNODE/v4 response nodes, returning unverified nodes as results when there aren't any verified nodes yet. The "findnode failed" log now reports whether the node was dropped instead of the number of results. The value of the "results" was always zero by definition. Co-authored-by: Felix Lange <fjl@twurst.com>
This commit is contained in:
@ -104,9 +104,7 @@ func (it *lookup) startQueries() bool {
|
||||
|
||||
// The first query returns nodes from the local table.
|
||||
if it.queries == -1 {
|
||||
it.tab.mutex.Lock()
|
||||
closest := it.tab.closest(it.result.target, bucketSize, false)
|
||||
it.tab.mutex.Unlock()
|
||||
closest := it.tab.findnodeByID(it.result.target, bucketSize, false)
|
||||
// Avoid finishing the lookup too quickly if table is empty. It'd be better to wait
|
||||
// for the table to fill in this case, but there is no good mechanism for that
|
||||
// yet.
|
||||
@ -150,11 +148,14 @@ func (it *lookup) query(n *node, reply chan<- []*node) {
|
||||
} else if len(r) == 0 {
|
||||
fails++
|
||||
it.tab.db.UpdateFindFails(n.ID(), n.IP(), fails)
|
||||
it.tab.log.Trace("Findnode failed", "id", n.ID(), "failcount", fails, "results", len(r), "err", err)
|
||||
if fails >= maxFindnodeFailures {
|
||||
it.tab.log.Trace("Too many findnode failures, dropping", "id", n.ID(), "failcount", fails)
|
||||
// Remove the node from the local table if it fails to return anything useful too
|
||||
// many times, but only if there are enough other nodes in the bucket.
|
||||
dropped := false
|
||||
if fails >= maxFindnodeFailures && it.tab.bucketLen(n.ID()) >= bucketSize/2 {
|
||||
dropped = true
|
||||
it.tab.delete(n)
|
||||
}
|
||||
it.tab.log.Trace("FINDNODE failed", "id", n.ID(), "failcount", fails, "dropped", dropped, "err", err)
|
||||
} else if fails > 0 {
|
||||
// Reset failure counter because it counts _consecutive_ failures.
|
||||
it.tab.db.UpdateFindFails(n.ID(), n.IP(), 0)
|
||||
|
Reference in New Issue
Block a user