eth: request id dispatcher and direct req/reply APIs (#23576)

* eth: request ID based message dispatcher

* eth: fix dispatcher cancellation, rework fetchers idleness tracker

* eth/downloader: drop peers who refuse to serve advertised chains
This commit is contained in:
Péter Szilágyi
2021-11-26 13:26:03 +02:00
committed by GitHub
parent 3038e480f5
commit c10a0a62c3
52 changed files with 3213 additions and 3400 deletions

View File

@ -83,8 +83,8 @@ type handlerConfig struct {
TxPool txPool // Transaction pool to propagate from
Merger *consensus.Merger // The manager for eth1/2 transition
Network uint64 // Network identifier to adfvertise
Sync downloader.SyncMode // Whether to fast or full sync
BloomCache uint64 // Megabytes to alloc for fast sync bloom
Sync downloader.SyncMode // Whether to snap or full sync
BloomCache uint64 // Megabytes to alloc for snap sync bloom
EventMux *event.TypeMux // Legacy event mux, deprecate for `feed`
Checkpoint *params.TrustedCheckpoint // Hard coded checkpoint for sync challenges
Whitelist map[uint64]common.Hash // Hard coded whitelist for sync challenged
@ -94,8 +94,7 @@ type handler struct {
networkID uint64
forkFilter forkid.Filter // Fork ID filter, constant across the lifetime of the node
fastSync uint32 // Flag whether fast sync is enabled (gets disabled if we already have blocks)
snapSync uint32 // Flag whether fast sync should operate on top of the snap protocol
snapSync uint32 // Flag whether snap sync is enabled (gets disabled if we already have blocks)
acceptTxs uint32 // Flag whether we're considered synchronised (enables transaction processing)
checkpointNumber uint64 // Block number for the sync progress validator to cross reference
@ -147,29 +146,26 @@ func newHandler(config *handlerConfig) (*handler, error) {
quitSync: make(chan struct{}),
}
if config.Sync == downloader.FullSync {
// The database seems empty as the current block is the genesis. Yet the fast
// block is ahead, so fast sync was enabled for this node at a certain point.
// The database seems empty as the current block is the genesis. Yet the snap
// block is ahead, so snap sync was enabled for this node at a certain point.
// The scenarios where this can happen is
// * if the user manually (or via a bad block) rolled back a fast sync node
// * if the user manually (or via a bad block) rolled back a snap sync node
// below the sync point.
// * the last fast sync is not finished while user specifies a full sync this
// * the last snap sync is not finished while user specifies a full sync this
// time. But we don't have any recent state for full sync.
// In these cases however it's safe to reenable fast sync.
// In these cases however it's safe to reenable snap sync.
fullBlock, fastBlock := h.chain.CurrentBlock(), h.chain.CurrentFastBlock()
if fullBlock.NumberU64() == 0 && fastBlock.NumberU64() > 0 {
h.fastSync = uint32(1)
log.Warn("Switch sync mode from full sync to fast sync")
h.snapSync = uint32(1)
log.Warn("Switch sync mode from full sync to snap sync")
}
} else {
if h.chain.CurrentBlock().NumberU64() > 0 {
// Print warning log if database is not empty to run fast sync.
log.Warn("Switch sync mode from fast sync to full sync")
// Print warning log if database is not empty to run snap sync.
log.Warn("Switch sync mode from snap sync to full sync")
} else {
// If fast sync was requested and our database is empty, grant it
h.fastSync = uint32(1)
if config.Sync == downloader.SnapSync {
h.snapSync = uint32(1)
}
// If snap sync was requested and our database is empty, grant it
h.snapSync = uint32(1)
}
}
// If we have trusted checkpoints, enforce them on the chain
@ -177,14 +173,14 @@ func newHandler(config *handlerConfig) (*handler, error) {
h.checkpointNumber = (config.Checkpoint.SectionIndex+1)*params.CHTFrequency - 1
h.checkpointHash = config.Checkpoint.SectionHead
}
// Construct the downloader (long sync) and its backing state bloom if fast
// Construct the downloader (long sync) and its backing state bloom if snap
// sync is requested. The downloader is responsible for deallocating the state
// bloom when it's done.
// Note: we don't enable it if snap-sync is performed, since it's very heavy
// and the heal-portion of the snap sync is much lighter than fast. What we particularly
// and the heal-portion of the snap sync is much lighter than snap. What we particularly
// want to avoid, is a 90%-finished (but restarted) snap-sync to begin
// indexing the entire trie
if atomic.LoadUint32(&h.fastSync) == 1 && atomic.LoadUint32(&h.snapSync) == 0 {
if atomic.LoadUint32(&h.snapSync) == 1 && atomic.LoadUint32(&h.snapSync) == 0 {
h.stateBloom = trie.NewSyncBloom(config.BloomCache, config.Database)
}
h.downloader = downloader.New(h.checkpointNumber, config.Database, h.stateBloom, h.eventMux, h.chain, nil, h.removePeer)
@ -236,12 +232,12 @@ func newHandler(config *handlerConfig) (*handler, error) {
log.Warn("Unsynced yet, discarded propagated block", "number", blocks[0].Number(), "hash", blocks[0].Hash())
return 0, nil
}
// If fast sync is running, deny importing weird blocks. This is a problematic
// clause when starting up a new network, because fast-syncing miners might not
// If snap sync is running, deny importing weird blocks. This is a problematic
// clause when starting up a new network, because snap-syncing miners might not
// accept each others' blocks until a restart. Unfortunately we haven't figured
// out a way yet where nodes can decide unilaterally whether the network is new
// or not. This should be fixed if we figure out a solution.
if atomic.LoadUint32(&h.fastSync) == 1 {
if atomic.LoadUint32(&h.snapSync) == 1 {
log.Warn("Fast syncing, discarded propagated block", "number", blocks[0].Number(), "hash", blocks[0].Hash())
return 0, nil
}
@ -365,30 +361,93 @@ func (h *handler) runEthPeer(peer *eth.Peer, handler eth.Handler) error {
// after this will be sent via broadcasts.
h.syncTransactions(peer)
// Create a notification channel for pending requests if the peer goes down
dead := make(chan struct{})
defer close(dead)
// If we have a trusted CHT, reject all peers below that (avoid fast sync eclipse)
if h.checkpointHash != (common.Hash{}) {
// Request the peer's checkpoint header for chain height/weight validation
if err := peer.RequestHeadersByNumber(h.checkpointNumber, 1, 0, false); err != nil {
resCh := make(chan *eth.Response)
if _, err := peer.RequestHeadersByNumber(h.checkpointNumber, 1, 0, false, resCh); err != nil {
return err
}
// Start a timer to disconnect if the peer doesn't reply in time
p.syncDrop = time.AfterFunc(syncChallengeTimeout, func() {
peer.Log().Warn("Checkpoint challenge timed out, dropping", "addr", peer.RemoteAddr(), "type", peer.Name())
h.removePeer(peer.ID())
})
// Make sure it's cleaned up if the peer dies off
defer func() {
if p.syncDrop != nil {
p.syncDrop.Stop()
p.syncDrop = nil
go func() {
timeout := time.NewTimer(syncChallengeTimeout)
defer timeout.Stop()
select {
case res := <-resCh:
headers := ([]*types.Header)(*res.Res.(*eth.BlockHeadersPacket))
if len(headers) == 0 {
// If we're doing a snap sync, we must enforce the checkpoint
// block to avoid eclipse attacks. Unsynced nodes are welcome
// to connect after we're done joining the network.
if atomic.LoadUint32(&h.snapSync) == 1 {
peer.Log().Warn("Dropping unsynced node during sync", "addr", peer.RemoteAddr(), "type", peer.Name())
res.Done <- errors.New("unsynced node cannot serve sync")
return
}
res.Done <- nil
return
}
// Validate the header and either drop the peer or continue
if len(headers) > 1 {
res.Done <- errors.New("too many headers in checkpoint response")
return
}
if headers[0].Hash() != h.checkpointHash {
res.Done <- errors.New("checkpoint hash mismatch")
return
}
res.Done <- nil
case <-timeout.C:
peer.Log().Warn("Checkpoint challenge timed out, dropping", "addr", peer.RemoteAddr(), "type", peer.Name())
h.removePeer(peer.ID())
case <-dead:
// Peer handler terminated, abort all goroutines
}
}()
}
// If we have any explicit whitelist block hashes, request them
for number := range h.whitelist {
if err := peer.RequestHeadersByNumber(number, 1, 0, false); err != nil {
for number, hash := range h.whitelist {
resCh := make(chan *eth.Response)
if _, err := peer.RequestHeadersByNumber(number, 1, 0, false, resCh); err != nil {
return err
}
go func(number uint64, hash common.Hash) {
timeout := time.NewTimer(syncChallengeTimeout)
defer timeout.Stop()
select {
case res := <-resCh:
headers := ([]*types.Header)(*res.Res.(*eth.BlockHeadersPacket))
if len(headers) == 0 {
// Whitelisted blocks are allowed to be missing if the remote
// node is not yet synced
res.Done <- nil
return
}
// Validate the header and either drop the peer or continue
if len(headers) > 1 {
res.Done <- errors.New("too many headers in whitelist response")
return
}
if headers[0].Number.Uint64() != number || headers[0].Hash() != hash {
peer.Log().Info("Whitelist mismatch, dropping peer", "number", number, "hash", headers[0].Hash(), "want", hash)
res.Done <- errors.New("whitelist block mismatch")
return
}
peer.Log().Debug("Whitelist block verified", "number", number, "hash", hash)
case <-timeout.C:
peer.Log().Warn("Whitelist challenge timed out, dropping", "addr", peer.RemoteAddr(), "type", peer.Name())
h.removePeer(peer.ID())
}
}(number, hash)
}
// Handle incoming messages until the connection is torn down
return handler(peer)