p2p: enforce connection retry limit on server side (#19684)

The dialer limits itself to one attempt every 30s. Apply the same limit
in Server and reject peers which try to connect too eagerly. The check
against the limit happens right after accepting the connection.

Further changes in this commit ensure we pass the Server logger
down to Peer instances, discovery and dialState. Unit test logging now
works in all Server tests.
This commit is contained in:
Felix Lange
2019-06-11 12:45:33 +02:00
committed by GitHub
parent c0a034ec89
commit c420dcb39c
9 changed files with 518 additions and 292 deletions

View File

@ -17,7 +17,6 @@
package p2p
import (
"container/heap"
"errors"
"fmt"
"net"
@ -29,9 +28,10 @@ import (
)
const (
// This is the amount of time spent waiting in between
// redialing a certain node.
dialHistoryExpiration = 30 * time.Second
// This is the amount of time spent waiting in between redialing a certain node. The
// limit is a bit higher than inboundThrottleTime to prevent failing dials in small
// private networks.
dialHistoryExpiration = inboundThrottleTime + 5*time.Second
// Discovery lookups are throttled and can only run
// once every few seconds.
@ -72,16 +72,16 @@ type dialstate struct {
ntab discoverTable
netrestrict *netutil.Netlist
self enode.ID
bootnodes []*enode.Node // default dials when there are no peers
log log.Logger
start time.Time // time when the dialer was first used
lookupRunning bool
dialing map[enode.ID]connFlag
lookupBuf []*enode.Node // current discovery lookup results
randomNodes []*enode.Node // filled from Table
static map[enode.ID]*dialTask
hist *dialHistory
start time.Time // time when the dialer was first used
bootnodes []*enode.Node // default dials when there are no peers
hist expHeap
}
type discoverTable interface {
@ -91,15 +91,6 @@ type discoverTable interface {
ReadRandomNodes([]*enode.Node) int
}
// the dial history remembers recent dials.
type dialHistory []pastDial
// pastDial is an entry in the dial history.
type pastDial struct {
id enode.ID
exp time.Time
}
type task interface {
Do(*Server)
}
@ -126,20 +117,23 @@ type waitExpireTask struct {
time.Duration
}
func newDialState(self enode.ID, static []*enode.Node, bootnodes []*enode.Node, ntab discoverTable, maxdyn int, netrestrict *netutil.Netlist) *dialstate {
func newDialState(self enode.ID, ntab discoverTable, maxdyn int, cfg *Config) *dialstate {
s := &dialstate{
maxDynDials: maxdyn,
ntab: ntab,
self: self,
netrestrict: netrestrict,
netrestrict: cfg.NetRestrict,
log: cfg.Logger,
static: make(map[enode.ID]*dialTask),
dialing: make(map[enode.ID]connFlag),
bootnodes: make([]*enode.Node, len(bootnodes)),
bootnodes: make([]*enode.Node, len(cfg.BootstrapNodes)),
randomNodes: make([]*enode.Node, maxdyn/2),
hist: new(dialHistory),
}
copy(s.bootnodes, bootnodes)
for _, n := range static {
copy(s.bootnodes, cfg.BootstrapNodes)
if s.log == nil {
s.log = log.Root()
}
for _, n := range cfg.StaticNodes {
s.addStatic(n)
}
return s
@ -154,9 +148,6 @@ func (s *dialstate) addStatic(n *enode.Node) {
func (s *dialstate) removeStatic(n *enode.Node) {
// This removes a task so future attempts to connect will not be made.
delete(s.static, n.ID())
// This removes a previous dial timestamp so that application
// can force a server to reconnect with chosen peer immediately.
s.hist.remove(n.ID())
}
func (s *dialstate) newTasks(nRunning int, peers map[enode.ID]*Peer, now time.Time) []task {
@ -167,7 +158,7 @@ func (s *dialstate) newTasks(nRunning int, peers map[enode.ID]*Peer, now time.Ti
var newtasks []task
addDial := func(flag connFlag, n *enode.Node) bool {
if err := s.checkDial(n, peers); err != nil {
log.Trace("Skipping dial candidate", "id", n.ID(), "addr", &net.TCPAddr{IP: n.IP(), Port: n.TCP()}, "err", err)
s.log.Trace("Skipping dial candidate", "id", n.ID(), "addr", &net.TCPAddr{IP: n.IP(), Port: n.TCP()}, "err", err)
return false
}
s.dialing[n.ID()] = flag
@ -196,7 +187,7 @@ func (s *dialstate) newTasks(nRunning int, peers map[enode.ID]*Peer, now time.Ti
err := s.checkDial(t.dest, peers)
switch err {
case errNotWhitelisted, errSelf:
log.Warn("Removing static dial candidate", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP(), Port: t.dest.TCP()}, "err", err)
s.log.Warn("Removing static dial candidate", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP(), Port: t.dest.TCP()}, "err", err)
delete(s.static, t.dest.ID())
case nil:
s.dialing[id] = t.flags
@ -246,7 +237,7 @@ func (s *dialstate) newTasks(nRunning int, peers map[enode.ID]*Peer, now time.Ti
// This should prevent cases where the dialer logic is not ticked
// because there are no pending events.
if nRunning == 0 && len(newtasks) == 0 && s.hist.Len() > 0 {
t := &waitExpireTask{s.hist.min().exp.Sub(now)}
t := &waitExpireTask{s.hist.nextExpiry().Sub(now)}
newtasks = append(newtasks, t)
}
return newtasks
@ -271,7 +262,7 @@ func (s *dialstate) checkDial(n *enode.Node, peers map[enode.ID]*Peer) error {
return errSelf
case s.netrestrict != nil && !s.netrestrict.Contains(n.IP()):
return errNotWhitelisted
case s.hist.contains(n.ID()):
case s.hist.contains(string(n.ID().Bytes())):
return errRecentlyDialed
}
return nil
@ -280,7 +271,7 @@ func (s *dialstate) checkDial(n *enode.Node, peers map[enode.ID]*Peer) error {
func (s *dialstate) taskDone(t task, now time.Time) {
switch t := t.(type) {
case *dialTask:
s.hist.add(t.dest.ID(), now.Add(dialHistoryExpiration))
s.hist.add(string(t.dest.ID().Bytes()), now.Add(dialHistoryExpiration))
delete(s.dialing, t.dest.ID())
case *discoverTask:
s.lookupRunning = false
@ -296,7 +287,7 @@ func (t *dialTask) Do(srv *Server) {
}
err := t.dial(srv, t.dest)
if err != nil {
log.Trace("Dial error", "task", t, "err", err)
srv.log.Trace("Dial error", "task", t, "err", err)
// Try resolving the ID of static nodes if dialing failed.
if _, ok := err.(*dialError); ok && t.flags&staticDialedConn != 0 {
if t.resolve(srv) {
@ -314,7 +305,7 @@ func (t *dialTask) Do(srv *Server) {
// The backoff delay resets when the node is found.
func (t *dialTask) resolve(srv *Server) bool {
if srv.ntab == nil {
log.Debug("Can't resolve node", "id", t.dest.ID, "err", "discovery is disabled")
srv.log.Debug("Can't resolve node", "id", t.dest.ID, "err", "discovery is disabled")
return false
}
if t.resolveDelay == 0 {
@ -330,13 +321,13 @@ func (t *dialTask) resolve(srv *Server) bool {
if t.resolveDelay > maxResolveDelay {
t.resolveDelay = maxResolveDelay
}
log.Debug("Resolving node failed", "id", t.dest.ID, "newdelay", t.resolveDelay)
srv.log.Debug("Resolving node failed", "id", t.dest.ID, "newdelay", t.resolveDelay)
return false
}
// The node was found.
t.resolveDelay = initialResolveDelay
t.dest = resolved
log.Debug("Resolved node", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP(), Port: t.dest.TCP()})
srv.log.Debug("Resolved node", "id", t.dest.ID, "addr", &net.TCPAddr{IP: t.dest.IP(), Port: t.dest.TCP()})
return true
}
@ -385,49 +376,3 @@ func (t waitExpireTask) Do(*Server) {
func (t waitExpireTask) String() string {
return fmt.Sprintf("wait for dial hist expire (%v)", t.Duration)
}
// Use only these methods to access or modify dialHistory.
func (h dialHistory) min() pastDial {
return h[0]
}
func (h *dialHistory) add(id enode.ID, exp time.Time) {
heap.Push(h, pastDial{id, exp})
}
func (h *dialHistory) remove(id enode.ID) bool {
for i, v := range *h {
if v.id == id {
heap.Remove(h, i)
return true
}
}
return false
}
func (h dialHistory) contains(id enode.ID) bool {
for _, v := range h {
if v.id == id {
return true
}
}
return false
}
func (h *dialHistory) expire(now time.Time) {
for h.Len() > 0 && h.min().exp.Before(now) {
heap.Pop(h)
}
}
// heap.Interface boilerplate
func (h dialHistory) Len() int { return len(h) }
func (h dialHistory) Less(i, j int) bool { return h[i].exp.Before(h[j].exp) }
func (h dialHistory) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h *dialHistory) Push(x interface{}) {
*h = append(*h, x.(pastDial))
}
func (h *dialHistory) Pop() interface{} {
old := *h
n := len(old)
x := old[n-1]
*h = old[0 : n-1]
return x
}