p2p: new dial scheduler (#20592)
* p2p: new dial scheduler This change replaces the peer-to-peer dial scheduler with a new and improved implementation. The new code is better than the previous implementation in two key aspects: - The time between discovery of a node and dialing that node is significantly lower in the new version. The old dialState kept a buffer of nodes and launched a task to refill it whenever the buffer became empty. This worked well with the discovery interface we used to have, but doesn't really work with the new iterator-based discovery API. - Selection of static dial candidates (created by Server.AddPeer or through static-nodes.json) performs much better for large amounts of static peers. Connections to static nodes are now limited like dynanic dials and can no longer overstep MaxPeers or the dial ratio. * p2p/simulations/adapters: adapt to new NodeDialer interface * p2p: re-add check for self in checkDial * p2p: remove peersetCh * p2p: allow static dials when discovery is disabled * p2p: add test for dialScheduler.removeStatic * p2p: remove blank line * p2p: fix documentation of maxDialPeers * p2p: change "ok" to "added" in static node log * p2p: improve dialTask docs Also increase log level for "Can't resolve node" * p2p: ensure dial resolver is truly nil without discovery * p2p: add "looking for peers" log message * p2p: clean up Server.run comments * p2p: fix maxDialedConns for maxpeers < dialRatio Always allocate at least one dial slot unless dialing is disabled using NoDial or MaxPeers == 0. Most importantly, this fixes MaxPeers == 1 to dedicate the sole slot to dialing instead of listening. * p2p: fix RemovePeer to disconnect the peer again Also make RemovePeer synchronous and add a test. * p2p: remove "Connection set up" log message * p2p: clean up connection logging We previously logged outgoing connection failures up to three times. - in SetupConn() as "Setting up connection failed addr=..." - in setupConn() with an error-specific message and "id=... addr=..." - in dial() as "Dial error task=..." This commit ensures a single log message is emitted per failure and adds "id=... addr=... conn=..." everywhere (id= omitted when the ID isn't known yet). Also avoid printing a log message when a static dial fails but can't be resolved because discv4 is disabled. The light client hit this case all the time, increasing the message count to four lines per failed connection. * p2p: document that RemovePeer blocks
This commit is contained in:
@ -34,10 +34,6 @@ import (
|
||||
"golang.org/x/crypto/sha3"
|
||||
)
|
||||
|
||||
// func init() {
|
||||
// log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(false))))
|
||||
// }
|
||||
|
||||
type testTransport struct {
|
||||
rpub *ecdsa.PublicKey
|
||||
*rlpx
|
||||
@ -72,11 +68,12 @@ func (c *testTransport) close(err error) {
|
||||
|
||||
func startTestServer(t *testing.T, remoteKey *ecdsa.PublicKey, pf func(*Peer)) *Server {
|
||||
config := Config{
|
||||
Name: "test",
|
||||
MaxPeers: 10,
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
PrivateKey: newkey(),
|
||||
Logger: testlog.Logger(t, log.LvlTrace),
|
||||
Name: "test",
|
||||
MaxPeers: 10,
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
NoDiscovery: true,
|
||||
PrivateKey: newkey(),
|
||||
Logger: testlog.Logger(t, log.LvlTrace),
|
||||
}
|
||||
server := &Server{
|
||||
Config: config,
|
||||
@ -131,11 +128,10 @@ func TestServerDial(t *testing.T) {
|
||||
t.Fatalf("could not setup listener: %v", err)
|
||||
}
|
||||
defer listener.Close()
|
||||
accepted := make(chan net.Conn)
|
||||
accepted := make(chan net.Conn, 1)
|
||||
go func() {
|
||||
conn, err := listener.Accept()
|
||||
if err != nil {
|
||||
t.Error("accept error:", err)
|
||||
return
|
||||
}
|
||||
accepted <- conn
|
||||
@ -205,155 +201,38 @@ func TestServerDial(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// This test checks that tasks generated by dialstate are
|
||||
// actually executed and taskdone is called for them.
|
||||
func TestServerTaskScheduling(t *testing.T) {
|
||||
var (
|
||||
done = make(chan *testTask)
|
||||
quit, returned = make(chan struct{}), make(chan struct{})
|
||||
tc = 0
|
||||
tg = taskgen{
|
||||
newFunc: func(running int, peers map[enode.ID]*Peer) []task {
|
||||
tc++
|
||||
return []task{&testTask{index: tc - 1}}
|
||||
},
|
||||
doneFunc: func(t task) {
|
||||
select {
|
||||
case done <- t.(*testTask):
|
||||
case <-quit:
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
// This test checks that RemovePeer disconnects the peer if it is connected.
|
||||
func TestServerRemovePeerDisconnect(t *testing.T) {
|
||||
srv1 := &Server{Config: Config{
|
||||
PrivateKey: newkey(),
|
||||
MaxPeers: 1,
|
||||
NoDiscovery: true,
|
||||
Logger: testlog.Logger(t, log.LvlTrace).New("server", "1"),
|
||||
}}
|
||||
srv2 := &Server{Config: Config{
|
||||
PrivateKey: newkey(),
|
||||
MaxPeers: 1,
|
||||
NoDiscovery: true,
|
||||
NoDial: true,
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
Logger: testlog.Logger(t, log.LvlTrace).New("server", "2"),
|
||||
}}
|
||||
srv1.Start()
|
||||
defer srv1.Stop()
|
||||
srv2.Start()
|
||||
defer srv2.Stop()
|
||||
|
||||
// The Server in this test isn't actually running
|
||||
// because we're only interested in what run does.
|
||||
db, _ := enode.OpenDB("")
|
||||
srv := &Server{
|
||||
Config: Config{MaxPeers: 10},
|
||||
localnode: enode.NewLocalNode(db, newkey()),
|
||||
nodedb: db,
|
||||
discmix: enode.NewFairMix(0),
|
||||
quit: make(chan struct{}),
|
||||
running: true,
|
||||
log: log.New(),
|
||||
if !syncAddPeer(srv1, srv2.Self()) {
|
||||
t.Fatal("peer not connected")
|
||||
}
|
||||
srv.loopWG.Add(1)
|
||||
go func() {
|
||||
srv.run(tg)
|
||||
close(returned)
|
||||
}()
|
||||
|
||||
var gotdone []*testTask
|
||||
for i := 0; i < 100; i++ {
|
||||
gotdone = append(gotdone, <-done)
|
||||
}
|
||||
for i, task := range gotdone {
|
||||
if task.index != i {
|
||||
t.Errorf("task %d has wrong index, got %d", i, task.index)
|
||||
break
|
||||
}
|
||||
if !task.called {
|
||||
t.Errorf("task %d was not called", i)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
close(quit)
|
||||
srv.Stop()
|
||||
select {
|
||||
case <-returned:
|
||||
case <-time.After(500 * time.Millisecond):
|
||||
t.Error("Server.run did not return within 500ms")
|
||||
srv1.RemovePeer(srv2.Self())
|
||||
if srv1.PeerCount() > 0 {
|
||||
t.Fatal("removed peer still connected")
|
||||
}
|
||||
}
|
||||
|
||||
// This test checks that Server doesn't drop tasks,
|
||||
// even if newTasks returns more than the maximum number of tasks.
|
||||
func TestServerManyTasks(t *testing.T) {
|
||||
alltasks := make([]task, 300)
|
||||
for i := range alltasks {
|
||||
alltasks[i] = &testTask{index: i}
|
||||
}
|
||||
|
||||
var (
|
||||
db, _ = enode.OpenDB("")
|
||||
srv = &Server{
|
||||
quit: make(chan struct{}),
|
||||
localnode: enode.NewLocalNode(db, newkey()),
|
||||
nodedb: db,
|
||||
running: true,
|
||||
log: log.New(),
|
||||
discmix: enode.NewFairMix(0),
|
||||
}
|
||||
done = make(chan *testTask)
|
||||
start, end = 0, 0
|
||||
)
|
||||
defer srv.Stop()
|
||||
srv.loopWG.Add(1)
|
||||
go srv.run(taskgen{
|
||||
newFunc: func(running int, peers map[enode.ID]*Peer) []task {
|
||||
start, end = end, end+maxActiveDialTasks+10
|
||||
if end > len(alltasks) {
|
||||
end = len(alltasks)
|
||||
}
|
||||
return alltasks[start:end]
|
||||
},
|
||||
doneFunc: func(tt task) {
|
||||
done <- tt.(*testTask)
|
||||
},
|
||||
})
|
||||
|
||||
doneset := make(map[int]bool)
|
||||
timeout := time.After(2 * time.Second)
|
||||
for len(doneset) < len(alltasks) {
|
||||
select {
|
||||
case tt := <-done:
|
||||
if doneset[tt.index] {
|
||||
t.Errorf("task %d got done more than once", tt.index)
|
||||
} else {
|
||||
doneset[tt.index] = true
|
||||
}
|
||||
case <-timeout:
|
||||
t.Errorf("%d of %d tasks got done within 2s", len(doneset), len(alltasks))
|
||||
for i := 0; i < len(alltasks); i++ {
|
||||
if !doneset[i] {
|
||||
t.Logf("task %d not done", i)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type taskgen struct {
|
||||
newFunc func(running int, peers map[enode.ID]*Peer) []task
|
||||
doneFunc func(task)
|
||||
}
|
||||
|
||||
func (tg taskgen) newTasks(running int, peers map[enode.ID]*Peer, now time.Time) []task {
|
||||
return tg.newFunc(running, peers)
|
||||
}
|
||||
func (tg taskgen) taskDone(t task, now time.Time) {
|
||||
tg.doneFunc(t)
|
||||
}
|
||||
func (tg taskgen) addStatic(*enode.Node) {
|
||||
}
|
||||
func (tg taskgen) removeStatic(*enode.Node) {
|
||||
}
|
||||
|
||||
type testTask struct {
|
||||
index int
|
||||
called bool
|
||||
}
|
||||
|
||||
func (t *testTask) Do(srv *Server) {
|
||||
t.called = true
|
||||
}
|
||||
|
||||
// This test checks that connections are disconnected
|
||||
// just after the encryption handshake when the server is
|
||||
// at capacity. Trusted connections should still be accepted.
|
||||
// This test checks that connections are disconnected just after the encryption handshake
|
||||
// when the server is at capacity. Trusted connections should still be accepted.
|
||||
func TestServerAtCap(t *testing.T) {
|
||||
trustedNode := newkey()
|
||||
trustedID := enode.PubkeyToIDV4(&trustedNode.PublicKey)
|
||||
@ -363,7 +242,8 @@ func TestServerAtCap(t *testing.T) {
|
||||
MaxPeers: 10,
|
||||
NoDial: true,
|
||||
NoDiscovery: true,
|
||||
TrustedNodes: []*enode.Node{newNode(trustedID, nil)},
|
||||
TrustedNodes: []*enode.Node{newNode(trustedID, "")},
|
||||
Logger: testlog.Logger(t, log.LvlTrace),
|
||||
},
|
||||
}
|
||||
if err := srv.Start(); err != nil {
|
||||
@ -401,14 +281,14 @@ func TestServerAtCap(t *testing.T) {
|
||||
}
|
||||
|
||||
// Remove from trusted set and try again
|
||||
srv.RemoveTrustedPeer(newNode(trustedID, nil))
|
||||
srv.RemoveTrustedPeer(newNode(trustedID, ""))
|
||||
c = newconn(trustedID)
|
||||
if err := srv.checkpoint(c, srv.checkpointPostHandshake); err != DiscTooManyPeers {
|
||||
t.Error("wrong error for insert:", err)
|
||||
}
|
||||
|
||||
// Add anotherID to trusted set and try again
|
||||
srv.AddTrustedPeer(newNode(anotherID, nil))
|
||||
srv.AddTrustedPeer(newNode(anotherID, ""))
|
||||
c = newconn(anotherID)
|
||||
if err := srv.checkpoint(c, srv.checkpointPostHandshake); err != nil {
|
||||
t.Error("unexpected error for trusted conn @posthandshake:", err)
|
||||
@ -439,9 +319,9 @@ func TestServerPeerLimits(t *testing.T) {
|
||||
NoDial: true,
|
||||
NoDiscovery: true,
|
||||
Protocols: []Protocol{discard},
|
||||
Logger: testlog.Logger(t, log.LvlTrace),
|
||||
},
|
||||
newTransport: func(fd net.Conn) transport { return tp },
|
||||
log: log.New(),
|
||||
}
|
||||
if err := srv.Start(); err != nil {
|
||||
t.Fatalf("couldn't start server: %v", err)
|
||||
@ -724,3 +604,23 @@ func (l *fakeAddrListener) Accept() (net.Conn, error) {
|
||||
func (c *fakeAddrConn) RemoteAddr() net.Addr {
|
||||
return c.remoteAddr
|
||||
}
|
||||
|
||||
func syncAddPeer(srv *Server, node *enode.Node) bool {
|
||||
var (
|
||||
ch = make(chan *PeerEvent)
|
||||
sub = srv.SubscribeEvents(ch)
|
||||
timeout = time.After(2 * time.Second)
|
||||
)
|
||||
defer sub.Unsubscribe()
|
||||
srv.AddPeer(node)
|
||||
for {
|
||||
select {
|
||||
case ev := <-ch:
|
||||
if ev.Type == PeerEventTypeAdd && ev.Peer == node.ID() {
|
||||
return true
|
||||
}
|
||||
case <-timeout:
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user