p2p, p2p/discover: misc connectivity improvements (#16069)

* p2p: add DialRatio for configuration of inbound vs. dialed connections

* p2p: add connection flags to PeerInfo

* p2p/netutil: add SameNet, DistinctNetSet

* p2p/discover: improve revalidation and seeding

This changes node revalidation to be periodic instead of on-demand. This
should prevent issues where dead nodes get stuck in closer buckets
because no other node will ever come along to replace them.

Every 5 seconds (on average), the last node in a random bucket is
checked and moved to the front of the bucket if it is still responding.
If revalidation fails, the last node is replaced by an entry of the
'replacement list' containing recently-seen nodes.

Most close buckets are removed because it's very unlikely we'll ever
encounter a node that would fall into any of those buckets.

Table seeding is also improved: we now require a few minutes of table
membership before considering a node as a potential seed node. This
should make it less likely to store short-lived nodes as potential
seeds.

* p2p/discover: fix nits in UDP transport

We would skip sending neighbors replies if there were fewer than
maxNeighbors results and CheckRelayIP returned an error for the last
one. While here, also resolve a TODO about pong reply tokens.
This commit is contained in:
Felix Lange
2018-02-12 13:36:09 +01:00
committed by Péter Szilágyi
parent 1d39912a9b
commit 9123eceb0f
10 changed files with 806 additions and 282 deletions

View File

@ -23,10 +23,11 @@
package discover
import (
"crypto/rand"
crand "crypto/rand"
"encoding/binary"
"errors"
"fmt"
mrand "math/rand"
"net"
"sort"
"sync"
@ -35,29 +36,45 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/p2p/netutil"
)
const (
alpha = 3 // Kademlia concurrency factor
bucketSize = 16 // Kademlia bucket size
hashBits = len(common.Hash{}) * 8
nBuckets = hashBits + 1 // Number of buckets
alpha = 3 // Kademlia concurrency factor
bucketSize = 16 // Kademlia bucket size
maxReplacements = 10 // Size of per-bucket replacement list
maxBondingPingPongs = 16
maxFindnodeFailures = 5
// We keep buckets for the upper 1/15 of distances because
// it's very unlikely we'll ever encounter a node that's closer.
hashBits = len(common.Hash{}) * 8
nBuckets = hashBits / 15 // Number of buckets
bucketMinDistance = hashBits - nBuckets // Log distance of closest bucket
autoRefreshInterval = 1 * time.Hour
seedCount = 30
seedMaxAge = 5 * 24 * time.Hour
// IP address limits.
bucketIPLimit, bucketSubnet = 2, 24 // at most 2 addresses from the same /24
tableIPLimit, tableSubnet = 10, 24
maxBondingPingPongs = 16 // Limit on the number of concurrent ping/pong interactions
maxFindnodeFailures = 5 // Nodes exceeding this limit are dropped
refreshInterval = 30 * time.Minute
revalidateInterval = 10 * time.Second
copyNodesInterval = 30 * time.Second
seedMinTableTime = 5 * time.Minute
seedCount = 30
seedMaxAge = 5 * 24 * time.Hour
)
type Table struct {
mutex sync.Mutex // protects buckets, their content, and nursery
mutex sync.Mutex // protects buckets, bucket content, nursery, rand
buckets [nBuckets]*bucket // index of known nodes by distance
nursery []*Node // bootstrap nodes
db *nodeDB // database of known nodes
rand *mrand.Rand // source of randomness, periodically reseeded
ips netutil.DistinctNetSet
db *nodeDB // database of known nodes
refreshReq chan chan struct{}
initDone chan struct{}
closeReq chan struct{}
closed chan struct{}
@ -89,9 +106,13 @@ type transport interface {
// bucket contains nodes, ordered by their last activity. the entry
// that was most recently active is the first element in entries.
type bucket struct{ entries []*Node }
type bucket struct {
entries []*Node // live entries, sorted by time of last contact
replacements []*Node // recently seen nodes to be used if revalidation fails
ips netutil.DistinctNetSet
}
func newTable(t transport, ourID NodeID, ourAddr *net.UDPAddr, nodeDBPath string) (*Table, error) {
func newTable(t transport, ourID NodeID, ourAddr *net.UDPAddr, nodeDBPath string, bootnodes []*Node) (*Table, error) {
// If no node database was given, use an in-memory one
db, err := newNodeDB(nodeDBPath, Version, ourID)
if err != nil {
@ -104,19 +125,42 @@ func newTable(t transport, ourID NodeID, ourAddr *net.UDPAddr, nodeDBPath string
bonding: make(map[NodeID]*bondproc),
bondslots: make(chan struct{}, maxBondingPingPongs),
refreshReq: make(chan chan struct{}),
initDone: make(chan struct{}),
closeReq: make(chan struct{}),
closed: make(chan struct{}),
rand: mrand.New(mrand.NewSource(0)),
ips: netutil.DistinctNetSet{Subnet: tableSubnet, Limit: tableIPLimit},
}
if err := tab.setFallbackNodes(bootnodes); err != nil {
return nil, err
}
for i := 0; i < cap(tab.bondslots); i++ {
tab.bondslots <- struct{}{}
}
for i := range tab.buckets {
tab.buckets[i] = new(bucket)
tab.buckets[i] = &bucket{
ips: netutil.DistinctNetSet{Subnet: bucketSubnet, Limit: bucketIPLimit},
}
}
go tab.refreshLoop()
tab.seedRand()
tab.loadSeedNodes(false)
// Start the background expiration goroutine after loading seeds so that the search for
// seed nodes also considers older nodes that would otherwise be removed by the
// expiration.
tab.db.ensureExpirer()
go tab.loop()
return tab, nil
}
func (tab *Table) seedRand() {
var b [8]byte
crand.Read(b[:])
tab.mutex.Lock()
tab.rand.Seed(int64(binary.BigEndian.Uint64(b[:])))
tab.mutex.Unlock()
}
// Self returns the local node.
// The returned node should not be modified by the caller.
func (tab *Table) Self() *Node {
@ -127,9 +171,12 @@ func (tab *Table) Self() *Node {
// table. It will not write the same node more than once. The nodes in
// the slice are copies and can be modified by the caller.
func (tab *Table) ReadRandomNodes(buf []*Node) (n int) {
if !tab.isInitDone() {
return 0
}
tab.mutex.Lock()
defer tab.mutex.Unlock()
// TODO: tree-based buckets would help here
// Find all non-empty buckets and get a fresh slice of their entries.
var buckets [][]*Node
for _, b := range tab.buckets {
@ -141,8 +188,8 @@ func (tab *Table) ReadRandomNodes(buf []*Node) (n int) {
return 0
}
// Shuffle the buckets.
for i := uint32(len(buckets)) - 1; i > 0; i-- {
j := randUint(i)
for i := len(buckets) - 1; i > 0; i-- {
j := tab.rand.Intn(len(buckets))
buckets[i], buckets[j] = buckets[j], buckets[i]
}
// Move head of each bucket into buf, removing buckets that become empty.
@ -161,15 +208,6 @@ func (tab *Table) ReadRandomNodes(buf []*Node) (n int) {
return i + 1
}
func randUint(max uint32) uint32 {
if max == 0 {
return 0
}
var b [4]byte
rand.Read(b[:])
return binary.BigEndian.Uint32(b[:]) % max
}
// Close terminates the network listener and flushes the node database.
func (tab *Table) Close() {
select {
@ -180,16 +218,15 @@ func (tab *Table) Close() {
}
}
// SetFallbackNodes sets the initial points of contact. These nodes
// setFallbackNodes sets the initial points of contact. These nodes
// are used to connect to the network if the table is empty and there
// are no known nodes in the database.
func (tab *Table) SetFallbackNodes(nodes []*Node) error {
func (tab *Table) setFallbackNodes(nodes []*Node) error {
for _, n := range nodes {
if err := n.validateComplete(); err != nil {
return fmt.Errorf("bad bootstrap/fallback node %q (%v)", n, err)
}
}
tab.mutex.Lock()
tab.nursery = make([]*Node, 0, len(nodes))
for _, n := range nodes {
cpy := *n
@ -198,11 +235,19 @@ func (tab *Table) SetFallbackNodes(nodes []*Node) error {
cpy.sha = crypto.Keccak256Hash(n.ID[:])
tab.nursery = append(tab.nursery, &cpy)
}
tab.mutex.Unlock()
tab.refresh()
return nil
}
// isInitDone returns whether the table's initial seeding procedure has completed.
func (tab *Table) isInitDone() bool {
select {
case <-tab.initDone:
return true
default:
return false
}
}
// Resolve searches for a specific node with the given ID.
// It returns nil if the node could not be found.
func (tab *Table) Resolve(targetID NodeID) *Node {
@ -314,33 +359,49 @@ func (tab *Table) refresh() <-chan struct{} {
return done
}
// refreshLoop schedules doRefresh runs and coordinates shutdown.
func (tab *Table) refreshLoop() {
// loop schedules refresh, revalidate runs and coordinates shutdown.
func (tab *Table) loop() {
var (
timer = time.NewTicker(autoRefreshInterval)
waiting []chan struct{} // accumulates waiting callers while doRefresh runs
done chan struct{} // where doRefresh reports completion
revalidate = time.NewTimer(tab.nextRevalidateTime())
refresh = time.NewTicker(refreshInterval)
copyNodes = time.NewTicker(copyNodesInterval)
revalidateDone = make(chan struct{})
refreshDone = make(chan struct{}) // where doRefresh reports completion
waiting = []chan struct{}{tab.initDone} // holds waiting callers while doRefresh runs
)
defer refresh.Stop()
defer revalidate.Stop()
defer copyNodes.Stop()
// Start initial refresh.
go tab.doRefresh(refreshDone)
loop:
for {
select {
case <-timer.C:
if done == nil {
done = make(chan struct{})
go tab.doRefresh(done)
case <-refresh.C:
tab.seedRand()
if refreshDone == nil {
refreshDone = make(chan struct{})
go tab.doRefresh(refreshDone)
}
case req := <-tab.refreshReq:
waiting = append(waiting, req)
if done == nil {
done = make(chan struct{})
go tab.doRefresh(done)
if refreshDone == nil {
refreshDone = make(chan struct{})
go tab.doRefresh(refreshDone)
}
case <-done:
case <-refreshDone:
for _, ch := range waiting {
close(ch)
}
waiting = nil
done = nil
waiting, refreshDone = nil, nil
case <-revalidate.C:
go tab.doRevalidate(revalidateDone)
case <-revalidateDone:
revalidate.Reset(tab.nextRevalidateTime())
case <-copyNodes.C:
go tab.copyBondedNodes()
case <-tab.closeReq:
break loop
}
@ -349,8 +410,8 @@ loop:
if tab.net != nil {
tab.net.close()
}
if done != nil {
<-done
if refreshDone != nil {
<-refreshDone
}
for _, ch := range waiting {
close(ch)
@ -365,38 +426,109 @@ loop:
func (tab *Table) doRefresh(done chan struct{}) {
defer close(done)
// Load nodes from the database and insert
// them. This should yield a few previously seen nodes that are
// (hopefully) still alive.
tab.loadSeedNodes(true)
// Run self lookup to discover new neighbor nodes.
tab.lookup(tab.self.ID, false)
// The Kademlia paper specifies that the bucket refresh should
// perform a lookup in the least recently used bucket. We cannot
// adhere to this because the findnode target is a 512bit value
// (not hash-sized) and it is not easily possible to generate a
// sha3 preimage that falls into a chosen bucket.
// We perform a lookup with a random target instead.
var target NodeID
rand.Read(target[:])
result := tab.lookup(target, false)
if len(result) > 0 {
// We perform a few lookups with a random target instead.
for i := 0; i < 3; i++ {
var target NodeID
crand.Read(target[:])
tab.lookup(target, false)
}
}
func (tab *Table) loadSeedNodes(bond bool) {
seeds := tab.db.querySeeds(seedCount, seedMaxAge)
seeds = append(seeds, tab.nursery...)
if bond {
seeds = tab.bondall(seeds)
}
for i := range seeds {
seed := seeds[i]
age := log.Lazy{Fn: func() interface{} { return time.Since(tab.db.lastPong(seed.ID)) }}
log.Debug("Found seed node in database", "id", seed.ID, "addr", seed.addr(), "age", age)
tab.add(seed)
}
}
// doRevalidate checks that the last node in a random bucket is still live
// and replaces or deletes the node if it isn't.
func (tab *Table) doRevalidate(done chan<- struct{}) {
defer func() { done <- struct{}{} }()
last, bi := tab.nodeToRevalidate()
if last == nil {
// No non-empty bucket found.
return
}
// The table is empty. Load nodes from the database and insert
// them. This should yield a few previously seen nodes that are
// (hopefully) still alive.
seeds := tab.db.querySeeds(seedCount, seedMaxAge)
seeds = tab.bondall(append(seeds, tab.nursery...))
// Ping the selected node and wait for a pong.
err := tab.ping(last.ID, last.addr())
if len(seeds) == 0 {
log.Debug("No discv4 seed nodes found")
}
for _, n := range seeds {
age := log.Lazy{Fn: func() time.Duration { return time.Since(tab.db.lastPong(n.ID)) }}
log.Trace("Found seed node in database", "id", n.ID, "addr", n.addr(), "age", age)
}
tab.mutex.Lock()
tab.stuff(seeds)
tab.mutex.Unlock()
defer tab.mutex.Unlock()
b := tab.buckets[bi]
if err == nil {
// The node responded, move it to the front.
log.Debug("Revalidated node", "b", bi, "id", last.ID)
b.bump(last)
return
}
// No reply received, pick a replacement or delete the node if there aren't
// any replacements.
if r := tab.replace(b, last); r != nil {
log.Debug("Replaced dead node", "b", bi, "id", last.ID, "ip", last.IP, "r", r.ID, "rip", r.IP)
} else {
log.Debug("Removed dead node", "b", bi, "id", last.ID, "ip", last.IP)
}
}
// Finally, do a self lookup to fill up the buckets.
tab.lookup(tab.self.ID, false)
// nodeToRevalidate returns the last node in a random, non-empty bucket.
func (tab *Table) nodeToRevalidate() (n *Node, bi int) {
tab.mutex.Lock()
defer tab.mutex.Unlock()
for _, bi = range tab.rand.Perm(len(tab.buckets)) {
b := tab.buckets[bi]
if len(b.entries) > 0 {
last := b.entries[len(b.entries)-1]
return last, bi
}
}
return nil, 0
}
func (tab *Table) nextRevalidateTime() time.Duration {
tab.mutex.Lock()
defer tab.mutex.Unlock()
return time.Duration(tab.rand.Int63n(int64(revalidateInterval)))
}
// copyBondedNodes adds nodes from the table to the database if they have been in the table
// longer then minTableTime.
func (tab *Table) copyBondedNodes() {
tab.mutex.Lock()
defer tab.mutex.Unlock()
now := time.Now()
for _, b := range tab.buckets {
for _, n := range b.entries {
if now.Sub(n.addedAt) >= seedMinTableTime {
tab.db.updateNode(n)
}
}
}
}
// closest returns the n nodes in the table that are closest to the
@ -459,15 +591,14 @@ func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16
if id == tab.self.ID {
return nil, errors.New("is self")
}
// Retrieve a previously known node and any recent findnode failures
node, fails := tab.db.node(id), 0
if node != nil {
fails = tab.db.findFails(id)
if pinged && !tab.isInitDone() {
return nil, errors.New("still initializing")
}
// If the node is unknown (non-bonded) or failed (remotely unknown), bond from scratch
var result error
// Start bonding if we haven't seen this node for a while or if it failed findnode too often.
node, fails := tab.db.node(id), tab.db.findFails(id)
age := time.Since(tab.db.lastPong(id))
if node == nil || fails > 0 || age > nodeDBNodeExpiration {
var result error
if fails > 0 || age > nodeDBNodeExpiration {
log.Trace("Starting bonding ping/pong", "id", id, "known", node != nil, "failcount", fails, "age", age)
tab.bondmu.Lock()
@ -494,10 +625,10 @@ func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16
node = w.n
}
}
// Add the node to the table even if the bonding ping/pong
// fails. It will be relaced quickly if it continues to be
// unresponsive.
if node != nil {
// Add the node to the table even if the bonding ping/pong
// fails. It will be relaced quickly if it continues to be
// unresponsive.
tab.add(node)
tab.db.updateFindFails(id, 0)
}
@ -522,7 +653,6 @@ func (tab *Table) pingpong(w *bondproc, pinged bool, id NodeID, addr *net.UDPAdd
}
// Bonding succeeded, update the node database.
w.n = NewNode(id, addr.IP, uint16(addr.Port), tcpPort)
tab.db.updateNode(w.n)
close(w.done)
}
@ -534,16 +664,18 @@ func (tab *Table) ping(id NodeID, addr *net.UDPAddr) error {
return err
}
tab.db.updateLastPong(id, time.Now())
// Start the background expiration goroutine after the first
// successful communication. Subsequent calls have no effect if it
// is already running. We do this here instead of somewhere else
// so that the search for seed nodes also considers older nodes
// that would otherwise be removed by the expiration.
tab.db.ensureExpirer()
return nil
}
// bucket returns the bucket for the given node ID hash.
func (tab *Table) bucket(sha common.Hash) *bucket {
d := logdist(tab.self.sha, sha)
if d <= bucketMinDistance {
return tab.buckets[0]
}
return tab.buckets[d-bucketMinDistance-1]
}
// add attempts to add the given node its corresponding bucket. If the
// bucket has space available, adding the node succeeds immediately.
// Otherwise, the node is added if the least recently active node in
@ -551,57 +683,29 @@ func (tab *Table) ping(id NodeID, addr *net.UDPAddr) error {
//
// The caller must not hold tab.mutex.
func (tab *Table) add(new *Node) {
b := tab.buckets[logdist(tab.self.sha, new.sha)]
tab.mutex.Lock()
defer tab.mutex.Unlock()
if b.bump(new) {
return
}
var oldest *Node
if len(b.entries) == bucketSize {
oldest = b.entries[bucketSize-1]
if oldest.contested {
// The node is already being replaced, don't attempt
// to replace it.
return
}
oldest.contested = true
// Let go of the mutex so other goroutines can access
// the table while we ping the least recently active node.
tab.mutex.Unlock()
err := tab.ping(oldest.ID, oldest.addr())
tab.mutex.Lock()
oldest.contested = false
if err == nil {
// The node responded, don't replace it.
return
}
}
added := b.replace(new, oldest)
if added && tab.nodeAddedHook != nil {
tab.nodeAddedHook(new)
b := tab.bucket(new.sha)
if !tab.bumpOrAdd(b, new) {
// Node is not in table. Add it to the replacement list.
tab.addReplacement(b, new)
}
}
// stuff adds nodes the table to the end of their corresponding bucket
// if the bucket is not full. The caller must hold tab.mutex.
// if the bucket is not full. The caller must not hold tab.mutex.
func (tab *Table) stuff(nodes []*Node) {
outer:
tab.mutex.Lock()
defer tab.mutex.Unlock()
for _, n := range nodes {
if n.ID == tab.self.ID {
continue // don't add self
}
bucket := tab.buckets[logdist(tab.self.sha, n.sha)]
for i := range bucket.entries {
if bucket.entries[i].ID == n.ID {
continue outer // already in bucket
}
}
if len(bucket.entries) < bucketSize {
bucket.entries = append(bucket.entries, n)
if tab.nodeAddedHook != nil {
tab.nodeAddedHook(n)
}
b := tab.bucket(n.sha)
if len(b.entries) < bucketSize {
tab.bumpOrAdd(b, n)
}
}
}
@ -611,36 +715,72 @@ outer:
func (tab *Table) delete(node *Node) {
tab.mutex.Lock()
defer tab.mutex.Unlock()
bucket := tab.buckets[logdist(tab.self.sha, node.sha)]
for i := range bucket.entries {
if bucket.entries[i].ID == node.ID {
bucket.entries = append(bucket.entries[:i], bucket.entries[i+1:]...)
return
}
}
tab.deleteInBucket(tab.bucket(node.sha), node)
}
func (b *bucket) replace(n *Node, last *Node) bool {
// Don't add if b already contains n.
for i := range b.entries {
if b.entries[i].ID == n.ID {
return false
}
func (tab *Table) addIP(b *bucket, ip net.IP) bool {
if netutil.IsLAN(ip) {
return true
}
// Replace last if it is still the last entry or just add n if b
// isn't full. If is no longer the last entry, it has either been
// replaced with someone else or became active.
if len(b.entries) == bucketSize && (last == nil || b.entries[bucketSize-1].ID != last.ID) {
if !tab.ips.Add(ip) {
log.Debug("IP exceeds table limit", "ip", ip)
return false
}
if len(b.entries) < bucketSize {
b.entries = append(b.entries, nil)
if !b.ips.Add(ip) {
log.Debug("IP exceeds bucket limit", "ip", ip)
tab.ips.Remove(ip)
return false
}
copy(b.entries[1:], b.entries)
b.entries[0] = n
return true
}
func (tab *Table) removeIP(b *bucket, ip net.IP) {
if netutil.IsLAN(ip) {
return
}
tab.ips.Remove(ip)
b.ips.Remove(ip)
}
func (tab *Table) addReplacement(b *bucket, n *Node) {
for _, e := range b.replacements {
if e.ID == n.ID {
return // already in list
}
}
if !tab.addIP(b, n.IP) {
return
}
var removed *Node
b.replacements, removed = pushNode(b.replacements, n, maxReplacements)
if removed != nil {
tab.removeIP(b, removed.IP)
}
}
// replace removes n from the replacement list and replaces 'last' with it if it is the
// last entry in the bucket. If 'last' isn't the last entry, it has either been replaced
// with someone else or became active.
func (tab *Table) replace(b *bucket, last *Node) *Node {
if len(b.entries) >= 0 && b.entries[len(b.entries)-1].ID != last.ID {
// Entry has moved, don't replace it.
return nil
}
// Still the last entry.
if len(b.replacements) == 0 {
tab.deleteInBucket(b, last)
return nil
}
r := b.replacements[tab.rand.Intn(len(b.replacements))]
b.replacements = deleteNode(b.replacements, r)
b.entries[len(b.entries)-1] = r
tab.removeIP(b, last.IP)
return r
}
// bump moves the given node to the front of the bucket entry list
// if it is contained in that list.
func (b *bucket) bump(n *Node) bool {
for i := range b.entries {
if b.entries[i].ID == n.ID {
@ -653,6 +793,50 @@ func (b *bucket) bump(n *Node) bool {
return false
}
// bumpOrAdd moves n to the front of the bucket entry list or adds it if the list isn't
// full. The return value is true if n is in the bucket.
func (tab *Table) bumpOrAdd(b *bucket, n *Node) bool {
if b.bump(n) {
return true
}
if len(b.entries) >= bucketSize || !tab.addIP(b, n.IP) {
return false
}
b.entries, _ = pushNode(b.entries, n, bucketSize)
b.replacements = deleteNode(b.replacements, n)
n.addedAt = time.Now()
if tab.nodeAddedHook != nil {
tab.nodeAddedHook(n)
}
return true
}
func (tab *Table) deleteInBucket(b *bucket, n *Node) {
b.entries = deleteNode(b.entries, n)
tab.removeIP(b, n.IP)
}
// pushNode adds n to the front of list, keeping at most max items.
func pushNode(list []*Node, n *Node, max int) ([]*Node, *Node) {
if len(list) < max {
list = append(list, nil)
}
removed := list[len(list)-1]
copy(list[1:], list)
list[0] = n
return list, removed
}
// deleteNode removes n from list.
func deleteNode(list []*Node, n *Node) []*Node {
for i := range list {
if list[i].ID == n.ID {
return append(list[:i], list[i+1:]...)
}
}
return list
}
// nodesByDistance is a list of nodes, ordered by
// distance to target.
type nodesByDistance struct {