eth: request id dispatcher and direct req/reply APIs (#23576)
* eth: request ID based message dispatcher * eth: fix dispatcher cancellation, rework fetchers idleness tracker * eth/downloader: drop peers who refuse to serve advertised chains
This commit is contained in:
131
eth/handler.go
131
eth/handler.go
@ -83,8 +83,8 @@ type handlerConfig struct {
|
||||
TxPool txPool // Transaction pool to propagate from
|
||||
Merger *consensus.Merger // The manager for eth1/2 transition
|
||||
Network uint64 // Network identifier to adfvertise
|
||||
Sync downloader.SyncMode // Whether to fast or full sync
|
||||
BloomCache uint64 // Megabytes to alloc for fast sync bloom
|
||||
Sync downloader.SyncMode // Whether to snap or full sync
|
||||
BloomCache uint64 // Megabytes to alloc for snap sync bloom
|
||||
EventMux *event.TypeMux // Legacy event mux, deprecate for `feed`
|
||||
Checkpoint *params.TrustedCheckpoint // Hard coded checkpoint for sync challenges
|
||||
Whitelist map[uint64]common.Hash // Hard coded whitelist for sync challenged
|
||||
@ -94,8 +94,7 @@ type handler struct {
|
||||
networkID uint64
|
||||
forkFilter forkid.Filter // Fork ID filter, constant across the lifetime of the node
|
||||
|
||||
fastSync uint32 // Flag whether fast sync is enabled (gets disabled if we already have blocks)
|
||||
snapSync uint32 // Flag whether fast sync should operate on top of the snap protocol
|
||||
snapSync uint32 // Flag whether snap sync is enabled (gets disabled if we already have blocks)
|
||||
acceptTxs uint32 // Flag whether we're considered synchronised (enables transaction processing)
|
||||
|
||||
checkpointNumber uint64 // Block number for the sync progress validator to cross reference
|
||||
@ -147,29 +146,26 @@ func newHandler(config *handlerConfig) (*handler, error) {
|
||||
quitSync: make(chan struct{}),
|
||||
}
|
||||
if config.Sync == downloader.FullSync {
|
||||
// The database seems empty as the current block is the genesis. Yet the fast
|
||||
// block is ahead, so fast sync was enabled for this node at a certain point.
|
||||
// The database seems empty as the current block is the genesis. Yet the snap
|
||||
// block is ahead, so snap sync was enabled for this node at a certain point.
|
||||
// The scenarios where this can happen is
|
||||
// * if the user manually (or via a bad block) rolled back a fast sync node
|
||||
// * if the user manually (or via a bad block) rolled back a snap sync node
|
||||
// below the sync point.
|
||||
// * the last fast sync is not finished while user specifies a full sync this
|
||||
// * the last snap sync is not finished while user specifies a full sync this
|
||||
// time. But we don't have any recent state for full sync.
|
||||
// In these cases however it's safe to reenable fast sync.
|
||||
// In these cases however it's safe to reenable snap sync.
|
||||
fullBlock, fastBlock := h.chain.CurrentBlock(), h.chain.CurrentFastBlock()
|
||||
if fullBlock.NumberU64() == 0 && fastBlock.NumberU64() > 0 {
|
||||
h.fastSync = uint32(1)
|
||||
log.Warn("Switch sync mode from full sync to fast sync")
|
||||
h.snapSync = uint32(1)
|
||||
log.Warn("Switch sync mode from full sync to snap sync")
|
||||
}
|
||||
} else {
|
||||
if h.chain.CurrentBlock().NumberU64() > 0 {
|
||||
// Print warning log if database is not empty to run fast sync.
|
||||
log.Warn("Switch sync mode from fast sync to full sync")
|
||||
// Print warning log if database is not empty to run snap sync.
|
||||
log.Warn("Switch sync mode from snap sync to full sync")
|
||||
} else {
|
||||
// If fast sync was requested and our database is empty, grant it
|
||||
h.fastSync = uint32(1)
|
||||
if config.Sync == downloader.SnapSync {
|
||||
h.snapSync = uint32(1)
|
||||
}
|
||||
// If snap sync was requested and our database is empty, grant it
|
||||
h.snapSync = uint32(1)
|
||||
}
|
||||
}
|
||||
// If we have trusted checkpoints, enforce them on the chain
|
||||
@ -177,14 +173,14 @@ func newHandler(config *handlerConfig) (*handler, error) {
|
||||
h.checkpointNumber = (config.Checkpoint.SectionIndex+1)*params.CHTFrequency - 1
|
||||
h.checkpointHash = config.Checkpoint.SectionHead
|
||||
}
|
||||
// Construct the downloader (long sync) and its backing state bloom if fast
|
||||
// Construct the downloader (long sync) and its backing state bloom if snap
|
||||
// sync is requested. The downloader is responsible for deallocating the state
|
||||
// bloom when it's done.
|
||||
// Note: we don't enable it if snap-sync is performed, since it's very heavy
|
||||
// and the heal-portion of the snap sync is much lighter than fast. What we particularly
|
||||
// and the heal-portion of the snap sync is much lighter than snap. What we particularly
|
||||
// want to avoid, is a 90%-finished (but restarted) snap-sync to begin
|
||||
// indexing the entire trie
|
||||
if atomic.LoadUint32(&h.fastSync) == 1 && atomic.LoadUint32(&h.snapSync) == 0 {
|
||||
if atomic.LoadUint32(&h.snapSync) == 1 && atomic.LoadUint32(&h.snapSync) == 0 {
|
||||
h.stateBloom = trie.NewSyncBloom(config.BloomCache, config.Database)
|
||||
}
|
||||
h.downloader = downloader.New(h.checkpointNumber, config.Database, h.stateBloom, h.eventMux, h.chain, nil, h.removePeer)
|
||||
@ -236,12 +232,12 @@ func newHandler(config *handlerConfig) (*handler, error) {
|
||||
log.Warn("Unsynced yet, discarded propagated block", "number", blocks[0].Number(), "hash", blocks[0].Hash())
|
||||
return 0, nil
|
||||
}
|
||||
// If fast sync is running, deny importing weird blocks. This is a problematic
|
||||
// clause when starting up a new network, because fast-syncing miners might not
|
||||
// If snap sync is running, deny importing weird blocks. This is a problematic
|
||||
// clause when starting up a new network, because snap-syncing miners might not
|
||||
// accept each others' blocks until a restart. Unfortunately we haven't figured
|
||||
// out a way yet where nodes can decide unilaterally whether the network is new
|
||||
// or not. This should be fixed if we figure out a solution.
|
||||
if atomic.LoadUint32(&h.fastSync) == 1 {
|
||||
if atomic.LoadUint32(&h.snapSync) == 1 {
|
||||
log.Warn("Fast syncing, discarded propagated block", "number", blocks[0].Number(), "hash", blocks[0].Hash())
|
||||
return 0, nil
|
||||
}
|
||||
@ -365,30 +361,93 @@ func (h *handler) runEthPeer(peer *eth.Peer, handler eth.Handler) error {
|
||||
// after this will be sent via broadcasts.
|
||||
h.syncTransactions(peer)
|
||||
|
||||
// Create a notification channel for pending requests if the peer goes down
|
||||
dead := make(chan struct{})
|
||||
defer close(dead)
|
||||
|
||||
// If we have a trusted CHT, reject all peers below that (avoid fast sync eclipse)
|
||||
if h.checkpointHash != (common.Hash{}) {
|
||||
// Request the peer's checkpoint header for chain height/weight validation
|
||||
if err := peer.RequestHeadersByNumber(h.checkpointNumber, 1, 0, false); err != nil {
|
||||
resCh := make(chan *eth.Response)
|
||||
if _, err := peer.RequestHeadersByNumber(h.checkpointNumber, 1, 0, false, resCh); err != nil {
|
||||
return err
|
||||
}
|
||||
// Start a timer to disconnect if the peer doesn't reply in time
|
||||
p.syncDrop = time.AfterFunc(syncChallengeTimeout, func() {
|
||||
peer.Log().Warn("Checkpoint challenge timed out, dropping", "addr", peer.RemoteAddr(), "type", peer.Name())
|
||||
h.removePeer(peer.ID())
|
||||
})
|
||||
// Make sure it's cleaned up if the peer dies off
|
||||
defer func() {
|
||||
if p.syncDrop != nil {
|
||||
p.syncDrop.Stop()
|
||||
p.syncDrop = nil
|
||||
go func() {
|
||||
timeout := time.NewTimer(syncChallengeTimeout)
|
||||
defer timeout.Stop()
|
||||
|
||||
select {
|
||||
case res := <-resCh:
|
||||
headers := ([]*types.Header)(*res.Res.(*eth.BlockHeadersPacket))
|
||||
if len(headers) == 0 {
|
||||
// If we're doing a snap sync, we must enforce the checkpoint
|
||||
// block to avoid eclipse attacks. Unsynced nodes are welcome
|
||||
// to connect after we're done joining the network.
|
||||
if atomic.LoadUint32(&h.snapSync) == 1 {
|
||||
peer.Log().Warn("Dropping unsynced node during sync", "addr", peer.RemoteAddr(), "type", peer.Name())
|
||||
res.Done <- errors.New("unsynced node cannot serve sync")
|
||||
return
|
||||
}
|
||||
res.Done <- nil
|
||||
return
|
||||
}
|
||||
// Validate the header and either drop the peer or continue
|
||||
if len(headers) > 1 {
|
||||
res.Done <- errors.New("too many headers in checkpoint response")
|
||||
return
|
||||
}
|
||||
if headers[0].Hash() != h.checkpointHash {
|
||||
res.Done <- errors.New("checkpoint hash mismatch")
|
||||
return
|
||||
}
|
||||
res.Done <- nil
|
||||
|
||||
case <-timeout.C:
|
||||
peer.Log().Warn("Checkpoint challenge timed out, dropping", "addr", peer.RemoteAddr(), "type", peer.Name())
|
||||
h.removePeer(peer.ID())
|
||||
|
||||
case <-dead:
|
||||
// Peer handler terminated, abort all goroutines
|
||||
}
|
||||
}()
|
||||
}
|
||||
// If we have any explicit whitelist block hashes, request them
|
||||
for number := range h.whitelist {
|
||||
if err := peer.RequestHeadersByNumber(number, 1, 0, false); err != nil {
|
||||
for number, hash := range h.whitelist {
|
||||
resCh := make(chan *eth.Response)
|
||||
if _, err := peer.RequestHeadersByNumber(number, 1, 0, false, resCh); err != nil {
|
||||
return err
|
||||
}
|
||||
go func(number uint64, hash common.Hash) {
|
||||
timeout := time.NewTimer(syncChallengeTimeout)
|
||||
defer timeout.Stop()
|
||||
|
||||
select {
|
||||
case res := <-resCh:
|
||||
headers := ([]*types.Header)(*res.Res.(*eth.BlockHeadersPacket))
|
||||
if len(headers) == 0 {
|
||||
// Whitelisted blocks are allowed to be missing if the remote
|
||||
// node is not yet synced
|
||||
res.Done <- nil
|
||||
return
|
||||
}
|
||||
// Validate the header and either drop the peer or continue
|
||||
if len(headers) > 1 {
|
||||
res.Done <- errors.New("too many headers in whitelist response")
|
||||
return
|
||||
}
|
||||
if headers[0].Number.Uint64() != number || headers[0].Hash() != hash {
|
||||
peer.Log().Info("Whitelist mismatch, dropping peer", "number", number, "hash", headers[0].Hash(), "want", hash)
|
||||
res.Done <- errors.New("whitelist block mismatch")
|
||||
return
|
||||
}
|
||||
peer.Log().Debug("Whitelist block verified", "number", number, "hash", hash)
|
||||
|
||||
case <-timeout.C:
|
||||
peer.Log().Warn("Whitelist challenge timed out, dropping", "addr", peer.RemoteAddr(), "type", peer.Name())
|
||||
h.removePeer(peer.ID())
|
||||
}
|
||||
}(number, hash)
|
||||
}
|
||||
// Handle incoming messages until the connection is torn down
|
||||
return handler(peer)
|
||||
|
Reference in New Issue
Block a user