p2p/discover: new distance metric based on sha3(id)

The previous metric was pubkey1^pubkey2, as specified in the Kademlia
paper. We missed that EC public keys are not uniformly distributed.
Using the hash of the public keys addresses that. It also makes it
a bit harder to generate node IDs that are close to a particular node.
This commit is contained in:
Felix Lange
2015-04-27 00:50:18 +02:00
parent d457a1187d
commit 2adcc31bb4
6 changed files with 354 additions and 106 deletions

View File

@ -7,20 +7,24 @@
package discover
import (
"crypto/rand"
"net"
"sort"
"sync"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/logger"
"github.com/ethereum/go-ethereum/logger/glog"
)
const (
alpha = 3 // Kademlia concurrency factor
bucketSize = 16 // Kademlia bucket size
nBuckets = nodeIDBits + 1 // Number of buckets
alpha = 3 // Kademlia concurrency factor
bucketSize = 16 // Kademlia bucket size
hashBits = len(common.Hash{}) * 8
nBuckets = hashBits + 1 // Number of buckets
maxBondingPingPongs = 10
)
@ -116,21 +120,23 @@ func (tab *Table) Bootstrap(nodes []*Node) {
// Lookup performs a network search for nodes close
// to the given target. It approaches the target by querying
// nodes that are closer to it on each iteration.
func (tab *Table) Lookup(target NodeID) []*Node {
// The given target does not need to be an actual node
// identifier.
func (tab *Table) Lookup(targetID NodeID) []*Node {
var (
target = crypto.Sha3Hash(targetID[:])
asked = make(map[NodeID]bool)
seen = make(map[NodeID]bool)
reply = make(chan []*Node, alpha)
pendingQueries = 0
)
// don't query further if we hit the target or ourself.
// don't query further if we hit ourself.
// unlikely to happen often in practice.
asked[target] = true
asked[tab.self.ID] = true
tab.mutex.Lock()
// update last lookup stamp (for refresh logic)
tab.buckets[logdist(tab.self.ID, target)].lastLookup = time.Now()
tab.buckets[logdist(tab.self.sha, target)].lastLookup = time.Now()
// generate initial result set
result := tab.closest(target, bucketSize)
tab.mutex.Unlock()
@ -143,7 +149,7 @@ func (tab *Table) Lookup(target NodeID) []*Node {
asked[n.ID] = true
pendingQueries++
go func() {
r, _ := tab.net.findnode(n.ID, n.addr(), target)
r, _ := tab.net.findnode(n.ID, n.addr(), targetID)
reply <- tab.bondall(r)
}()
}
@ -166,17 +172,16 @@ func (tab *Table) Lookup(target NodeID) []*Node {
// refresh performs a lookup for a random target to keep buckets full.
func (tab *Table) refresh() {
ld := -1 // logdist of chosen bucket
tab.mutex.Lock()
for i, b := range tab.buckets {
if i > 0 && b.lastLookup.Before(time.Now().Add(-1*time.Hour)) {
ld = i
break
}
}
tab.mutex.Unlock()
result := tab.Lookup(randomID(tab.self.ID, ld))
// The Kademlia paper specifies that the bucket refresh should
// perform a refresh in the least recently used bucket. We cannot
// adhere to this because the findnode target is a 512bit value
// (not hash-sized) and it is not easily possible to generate a
// sha3 preimage that falls into a chosen bucket.
//
// We perform a lookup with a random target instead.
var target NodeID
rand.Read(target[:])
result := tab.Lookup(target)
if len(result) == 0 {
// Pick a batch of previously know seeds to lookup with
seeds := tab.db.querySeeds(10)
@ -196,7 +201,7 @@ func (tab *Table) refresh() {
// closest returns the n nodes in the table that are closest to the
// given id. The caller must hold tab.mutex.
func (tab *Table) closest(target NodeID, nresults int) *nodesByDistance {
func (tab *Table) closest(target common.Hash, nresults int) *nodesByDistance {
// This is a very wasteful way to find the closest nodes but
// obviously correct. I believe that tree-based buckets would make
// this easier to implement efficiently.
@ -278,7 +283,8 @@ func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16
}
tab.mutex.Lock()
defer tab.mutex.Unlock()
if b := tab.buckets[logdist(tab.self.ID, n.ID)]; !b.bump(n) {
b := tab.buckets[logdist(tab.self.sha, n.sha)]
if !b.bump(n) {
tab.pingreplace(n, b)
}
return n, nil
@ -346,7 +352,7 @@ outer:
// don't add self.
continue
}
bucket := tab.buckets[logdist(tab.self.ID, n.ID)]
bucket := tab.buckets[logdist(tab.self.sha, n.sha)]
for i := range bucket.entries {
if bucket.entries[i].ID == n.ID {
// already in bucket
@ -375,13 +381,13 @@ func (b *bucket) bump(n *Node) bool {
// distance to target.
type nodesByDistance struct {
entries []*Node
target NodeID
target common.Hash
}
// push adds the given node to the list, keeping the total size below maxElems.
func (h *nodesByDistance) push(n *Node, maxElems int) {
ix := sort.Search(len(h.entries), func(i int) bool {
return distcmp(h.target, h.entries[i].ID, n.ID) > 0
return distcmp(h.target, h.entries[i].sha, n.sha) > 0
})
if len(h.entries) < maxElems {
h.entries = append(h.entries, n)