eth, p2p/msgrate: move peer QoS tracking to its own package and use it for snap (#22876)
This change extracts the peer QoS tracking logic from eth/downloader, moving it into the new package p2p/msgrate. The job of msgrate.Tracker is determining suitable timeout values and request sizes per peer. The snap sync scheduler now uses msgrate.Tracker instead of the hard-coded 15s timeout. This should make the sync work better on network links with high latency.
This commit is contained in:
@ -37,6 +37,7 @@ import (
|
||||
"github.com/ethereum/go-ethereum/event"
|
||||
"github.com/ethereum/go-ethereum/light"
|
||||
"github.com/ethereum/go-ethereum/log"
|
||||
"github.com/ethereum/go-ethereum/p2p/msgrate"
|
||||
"github.com/ethereum/go-ethereum/rlp"
|
||||
"github.com/ethereum/go-ethereum/trie"
|
||||
"golang.org/x/crypto/sha3"
|
||||
@ -51,14 +52,15 @@ var (
|
||||
)
|
||||
|
||||
const (
|
||||
// maxRequestSize is the maximum number of bytes to request from a remote peer.
|
||||
maxRequestSize = 128 * 1024
|
||||
// minRequestSize is the minimum number of bytes to request from a remote peer.
|
||||
// This number is used as the low cap for account and storage range requests.
|
||||
// Bytecode and trienode are limited inherently by item count (1).
|
||||
minRequestSize = 64 * 1024
|
||||
|
||||
// maxStorageSetRequestCount is the maximum number of contracts to request the
|
||||
// storage of in a single query. If this number is too low, we're not filling
|
||||
// responses fully and waste round trip times. If it's too high, we're capping
|
||||
// responses and waste bandwidth.
|
||||
maxStorageSetRequestCount = maxRequestSize / 1024
|
||||
// maxRequestSize is the maximum number of bytes to request from a remote peer.
|
||||
// This number is used as the high cap for account and storage range requests.
|
||||
// Bytecode and trienode are limited more explicitly by the caps below.
|
||||
maxRequestSize = 512 * 1024
|
||||
|
||||
// maxCodeRequestCount is the maximum number of bytecode blobs to request in a
|
||||
// single query. If this number is too low, we're not filling responses fully
|
||||
@ -74,7 +76,7 @@ const (
|
||||
// a single query. If this number is too low, we're not filling responses fully
|
||||
// and waste round trip times. If it's too high, we're capping responses and
|
||||
// waste bandwidth.
|
||||
maxTrieRequestCount = 256
|
||||
maxTrieRequestCount = maxRequestSize / 512
|
||||
)
|
||||
|
||||
var (
|
||||
@ -85,10 +87,6 @@ var (
|
||||
// storageConcurrency is the number of chunks to split the a large contract
|
||||
// storage trie into to allow concurrent retrievals.
|
||||
storageConcurrency = 16
|
||||
|
||||
// requestTimeout is the maximum time a peer is allowed to spend on serving
|
||||
// a single network request.
|
||||
requestTimeout = 15 * time.Second // TODO(karalabe): Make it dynamic ala fast-sync?
|
||||
)
|
||||
|
||||
// ErrCancelled is returned from snap syncing if the operation was prematurely
|
||||
@ -105,8 +103,9 @@ var ErrCancelled = errors.New("sync cancelled")
|
||||
// is only included to allow the runloop to match a response to the task being
|
||||
// synced without having yet another set of maps.
|
||||
type accountRequest struct {
|
||||
peer string // Peer to which this request is assigned
|
||||
id uint64 // Request ID of this request
|
||||
peer string // Peer to which this request is assigned
|
||||
id uint64 // Request ID of this request
|
||||
time time.Time // Timestamp when the request was sent
|
||||
|
||||
deliver chan *accountResponse // Channel to deliver successful response on
|
||||
revert chan *accountRequest // Channel to deliver request failure on
|
||||
@ -142,8 +141,9 @@ type accountResponse struct {
|
||||
// is only included to allow the runloop to match a response to the task being
|
||||
// synced without having yet another set of maps.
|
||||
type bytecodeRequest struct {
|
||||
peer string // Peer to which this request is assigned
|
||||
id uint64 // Request ID of this request
|
||||
peer string // Peer to which this request is assigned
|
||||
id uint64 // Request ID of this request
|
||||
time time.Time // Timestamp when the request was sent
|
||||
|
||||
deliver chan *bytecodeResponse // Channel to deliver successful response on
|
||||
revert chan *bytecodeRequest // Channel to deliver request failure on
|
||||
@ -173,8 +173,9 @@ type bytecodeResponse struct {
|
||||
// is only included to allow the runloop to match a response to the task being
|
||||
// synced without having yet another set of maps.
|
||||
type storageRequest struct {
|
||||
peer string // Peer to which this request is assigned
|
||||
id uint64 // Request ID of this request
|
||||
peer string // Peer to which this request is assigned
|
||||
id uint64 // Request ID of this request
|
||||
time time.Time // Timestamp when the request was sent
|
||||
|
||||
deliver chan *storageResponse // Channel to deliver successful response on
|
||||
revert chan *storageRequest // Channel to deliver request failure on
|
||||
@ -218,8 +219,9 @@ type storageResponse struct {
|
||||
// is only included to allow the runloop to match a response to the task being
|
||||
// synced without having yet another set of maps.
|
||||
type trienodeHealRequest struct {
|
||||
peer string // Peer to which this request is assigned
|
||||
id uint64 // Request ID of this request
|
||||
peer string // Peer to which this request is assigned
|
||||
id uint64 // Request ID of this request
|
||||
time time.Time // Timestamp when the request was sent
|
||||
|
||||
deliver chan *trienodeHealResponse // Channel to deliver successful response on
|
||||
revert chan *trienodeHealRequest // Channel to deliver request failure on
|
||||
@ -252,8 +254,9 @@ type trienodeHealResponse struct {
|
||||
// is only included to allow the runloop to match a response to the task being
|
||||
// synced without having yet another set of maps.
|
||||
type bytecodeHealRequest struct {
|
||||
peer string // Peer to which this request is assigned
|
||||
id uint64 // Request ID of this request
|
||||
peer string // Peer to which this request is assigned
|
||||
id uint64 // Request ID of this request
|
||||
time time.Time // Timestamp when the request was sent
|
||||
|
||||
deliver chan *bytecodeHealResponse // Channel to deliver successful response on
|
||||
revert chan *bytecodeHealRequest // Channel to deliver request failure on
|
||||
@ -396,6 +399,7 @@ type Syncer struct {
|
||||
peers map[string]SyncPeer // Currently active peers to download from
|
||||
peerJoin *event.Feed // Event feed to react to peers joining
|
||||
peerDrop *event.Feed // Event feed to react to peers dropping
|
||||
rates *msgrate.Trackers // Message throughput rates for peers
|
||||
|
||||
// Request tracking during syncing phase
|
||||
statelessPeers map[string]struct{} // Peers that failed to deliver state data
|
||||
@ -452,6 +456,7 @@ func NewSyncer(db ethdb.KeyValueStore) *Syncer {
|
||||
peers: make(map[string]SyncPeer),
|
||||
peerJoin: new(event.Feed),
|
||||
peerDrop: new(event.Feed),
|
||||
rates: msgrate.NewTrackers(log.New("proto", "snap")),
|
||||
update: make(chan struct{}, 1),
|
||||
|
||||
accountIdlers: make(map[string]struct{}),
|
||||
@ -484,6 +489,7 @@ func (s *Syncer) Register(peer SyncPeer) error {
|
||||
return errors.New("already registered")
|
||||
}
|
||||
s.peers[id] = peer
|
||||
s.rates.Track(id, msgrate.NewTracker(s.rates.MeanCapacities(), s.rates.MedianRoundTrip()))
|
||||
|
||||
// Mark the peer as idle, even if no sync is running
|
||||
s.accountIdlers[id] = struct{}{}
|
||||
@ -509,6 +515,7 @@ func (s *Syncer) Unregister(id string) error {
|
||||
return errors.New("not registered")
|
||||
}
|
||||
delete(s.peers, id)
|
||||
s.rates.Untrack(id)
|
||||
|
||||
// Remove status markers, even if no sync is running
|
||||
delete(s.statelessPeers, id)
|
||||
@ -851,10 +858,24 @@ func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *ac
|
||||
s.lock.Lock()
|
||||
defer s.lock.Unlock()
|
||||
|
||||
// If there are no idle peers, short circuit assignment
|
||||
if len(s.accountIdlers) == 0 {
|
||||
// Sort the peers by download capacity to use faster ones if many available
|
||||
idlers := &capacitySort{
|
||||
ids: make([]string, 0, len(s.accountIdlers)),
|
||||
caps: make([]float64, 0, len(s.accountIdlers)),
|
||||
}
|
||||
targetTTL := s.rates.TargetTimeout()
|
||||
for id := range s.accountIdlers {
|
||||
if _, ok := s.statelessPeers[id]; ok {
|
||||
continue
|
||||
}
|
||||
idlers.ids = append(idlers.ids, id)
|
||||
idlers.caps = append(idlers.caps, s.rates.Capacity(id, AccountRangeMsg, targetTTL))
|
||||
}
|
||||
if len(idlers.ids) == 0 {
|
||||
return
|
||||
}
|
||||
sort.Sort(sort.Reverse(idlers))
|
||||
|
||||
// Iterate over all the tasks and try to find a pending one
|
||||
for _, task := range s.tasks {
|
||||
// Skip any tasks already filling
|
||||
@ -864,20 +885,15 @@ func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *ac
|
||||
// Task pending retrieval, try to find an idle peer. If no such peer
|
||||
// exists, we probably assigned tasks for all (or they are stateless).
|
||||
// Abort the entire assignment mechanism.
|
||||
var idle string
|
||||
for id := range s.accountIdlers {
|
||||
// If the peer rejected a query in this sync cycle, don't bother asking
|
||||
// again for anything, it's either out of sync or already pruned
|
||||
if _, ok := s.statelessPeers[id]; ok {
|
||||
continue
|
||||
}
|
||||
idle = id
|
||||
break
|
||||
}
|
||||
if idle == "" {
|
||||
if len(idlers.ids) == 0 {
|
||||
return
|
||||
}
|
||||
peer := s.peers[idle]
|
||||
var (
|
||||
idle = idlers.ids[0]
|
||||
peer = s.peers[idle]
|
||||
cap = idlers.caps[0]
|
||||
)
|
||||
idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
|
||||
|
||||
// Matched a pending task to an idle peer, allocate a unique request id
|
||||
var reqid uint64
|
||||
@ -895,6 +911,7 @@ func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *ac
|
||||
req := &accountRequest{
|
||||
peer: idle,
|
||||
id: reqid,
|
||||
time: time.Now(),
|
||||
deliver: success,
|
||||
revert: fail,
|
||||
cancel: cancel,
|
||||
@ -903,8 +920,9 @@ func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *ac
|
||||
limit: task.Last,
|
||||
task: task,
|
||||
}
|
||||
req.timeout = time.AfterFunc(requestTimeout, func() {
|
||||
req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
|
||||
peer.Log().Debug("Account range request timed out", "reqid", reqid)
|
||||
s.rates.Update(idle, AccountRangeMsg, 0, 0)
|
||||
s.scheduleRevertAccountRequest(req)
|
||||
})
|
||||
s.accountReqs[reqid] = req
|
||||
@ -915,7 +933,13 @@ func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *ac
|
||||
defer s.pend.Done()
|
||||
|
||||
// Attempt to send the remote request and revert if it fails
|
||||
if err := peer.RequestAccountRange(reqid, root, req.origin, req.limit, maxRequestSize); err != nil {
|
||||
if cap > maxRequestSize {
|
||||
cap = maxRequestSize
|
||||
}
|
||||
if cap < minRequestSize { // Don't bother with peers below a bare minimum performance
|
||||
cap = minRequestSize
|
||||
}
|
||||
if err := peer.RequestAccountRange(reqid, root, req.origin, req.limit, uint64(cap)); err != nil {
|
||||
peer.Log().Debug("Failed to request account range", "err", err)
|
||||
s.scheduleRevertAccountRequest(req)
|
||||
}
|
||||
@ -931,10 +955,24 @@ func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *
|
||||
s.lock.Lock()
|
||||
defer s.lock.Unlock()
|
||||
|
||||
// If there are no idle peers, short circuit assignment
|
||||
if len(s.bytecodeIdlers) == 0 {
|
||||
// Sort the peers by download capacity to use faster ones if many available
|
||||
idlers := &capacitySort{
|
||||
ids: make([]string, 0, len(s.bytecodeIdlers)),
|
||||
caps: make([]float64, 0, len(s.bytecodeIdlers)),
|
||||
}
|
||||
targetTTL := s.rates.TargetTimeout()
|
||||
for id := range s.bytecodeIdlers {
|
||||
if _, ok := s.statelessPeers[id]; ok {
|
||||
continue
|
||||
}
|
||||
idlers.ids = append(idlers.ids, id)
|
||||
idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL))
|
||||
}
|
||||
if len(idlers.ids) == 0 {
|
||||
return
|
||||
}
|
||||
sort.Sort(sort.Reverse(idlers))
|
||||
|
||||
// Iterate over all the tasks and try to find a pending one
|
||||
for _, task := range s.tasks {
|
||||
// Skip any tasks not in the bytecode retrieval phase
|
||||
@ -948,20 +986,15 @@ func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *
|
||||
// Task pending retrieval, try to find an idle peer. If no such peer
|
||||
// exists, we probably assigned tasks for all (or they are stateless).
|
||||
// Abort the entire assignment mechanism.
|
||||
var idle string
|
||||
for id := range s.bytecodeIdlers {
|
||||
// If the peer rejected a query in this sync cycle, don't bother asking
|
||||
// again for anything, it's either out of sync or already pruned
|
||||
if _, ok := s.statelessPeers[id]; ok {
|
||||
continue
|
||||
}
|
||||
idle = id
|
||||
break
|
||||
}
|
||||
if idle == "" {
|
||||
if len(idlers.ids) == 0 {
|
||||
return
|
||||
}
|
||||
peer := s.peers[idle]
|
||||
var (
|
||||
idle = idlers.ids[0]
|
||||
peer = s.peers[idle]
|
||||
cap = idlers.caps[0]
|
||||
)
|
||||
idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
|
||||
|
||||
// Matched a pending task to an idle peer, allocate a unique request id
|
||||
var reqid uint64
|
||||
@ -976,17 +1009,21 @@ func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *
|
||||
break
|
||||
}
|
||||
// Generate the network query and send it to the peer
|
||||
hashes := make([]common.Hash, 0, maxCodeRequestCount)
|
||||
if cap > maxCodeRequestCount {
|
||||
cap = maxCodeRequestCount
|
||||
}
|
||||
hashes := make([]common.Hash, 0, int(cap))
|
||||
for hash := range task.codeTasks {
|
||||
delete(task.codeTasks, hash)
|
||||
hashes = append(hashes, hash)
|
||||
if len(hashes) >= maxCodeRequestCount {
|
||||
if len(hashes) >= int(cap) {
|
||||
break
|
||||
}
|
||||
}
|
||||
req := &bytecodeRequest{
|
||||
peer: idle,
|
||||
id: reqid,
|
||||
time: time.Now(),
|
||||
deliver: success,
|
||||
revert: fail,
|
||||
cancel: cancel,
|
||||
@ -994,8 +1031,9 @@ func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *
|
||||
hashes: hashes,
|
||||
task: task,
|
||||
}
|
||||
req.timeout = time.AfterFunc(requestTimeout, func() {
|
||||
req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
|
||||
peer.Log().Debug("Bytecode request timed out", "reqid", reqid)
|
||||
s.rates.Update(idle, ByteCodesMsg, 0, 0)
|
||||
s.scheduleRevertBytecodeRequest(req)
|
||||
})
|
||||
s.bytecodeReqs[reqid] = req
|
||||
@ -1020,10 +1058,24 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
|
||||
s.lock.Lock()
|
||||
defer s.lock.Unlock()
|
||||
|
||||
// If there are no idle peers, short circuit assignment
|
||||
if len(s.storageIdlers) == 0 {
|
||||
// Sort the peers by download capacity to use faster ones if many available
|
||||
idlers := &capacitySort{
|
||||
ids: make([]string, 0, len(s.storageIdlers)),
|
||||
caps: make([]float64, 0, len(s.storageIdlers)),
|
||||
}
|
||||
targetTTL := s.rates.TargetTimeout()
|
||||
for id := range s.storageIdlers {
|
||||
if _, ok := s.statelessPeers[id]; ok {
|
||||
continue
|
||||
}
|
||||
idlers.ids = append(idlers.ids, id)
|
||||
idlers.caps = append(idlers.caps, s.rates.Capacity(id, StorageRangesMsg, targetTTL))
|
||||
}
|
||||
if len(idlers.ids) == 0 {
|
||||
return
|
||||
}
|
||||
sort.Sort(sort.Reverse(idlers))
|
||||
|
||||
// Iterate over all the tasks and try to find a pending one
|
||||
for _, task := range s.tasks {
|
||||
// Skip any tasks not in the storage retrieval phase
|
||||
@ -1037,20 +1089,15 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
|
||||
// Task pending retrieval, try to find an idle peer. If no such peer
|
||||
// exists, we probably assigned tasks for all (or they are stateless).
|
||||
// Abort the entire assignment mechanism.
|
||||
var idle string
|
||||
for id := range s.storageIdlers {
|
||||
// If the peer rejected a query in this sync cycle, don't bother asking
|
||||
// again for anything, it's either out of sync or already pruned
|
||||
if _, ok := s.statelessPeers[id]; ok {
|
||||
continue
|
||||
}
|
||||
idle = id
|
||||
break
|
||||
}
|
||||
if idle == "" {
|
||||
if len(idlers.ids) == 0 {
|
||||
return
|
||||
}
|
||||
peer := s.peers[idle]
|
||||
var (
|
||||
idle = idlers.ids[0]
|
||||
peer = s.peers[idle]
|
||||
cap = idlers.caps[0]
|
||||
)
|
||||
idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
|
||||
|
||||
// Matched a pending task to an idle peer, allocate a unique request id
|
||||
var reqid uint64
|
||||
@ -1067,9 +1114,17 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
|
||||
// Generate the network query and send it to the peer. If there are
|
||||
// large contract tasks pending, complete those before diving into
|
||||
// even more new contracts.
|
||||
if cap > maxRequestSize {
|
||||
cap = maxRequestSize
|
||||
}
|
||||
if cap < minRequestSize { // Don't bother with peers below a bare minimum performance
|
||||
cap = minRequestSize
|
||||
}
|
||||
storageSets := int(cap / 1024)
|
||||
|
||||
var (
|
||||
accounts = make([]common.Hash, 0, maxStorageSetRequestCount)
|
||||
roots = make([]common.Hash, 0, maxStorageSetRequestCount)
|
||||
accounts = make([]common.Hash, 0, storageSets)
|
||||
roots = make([]common.Hash, 0, storageSets)
|
||||
subtask *storageTask
|
||||
)
|
||||
for account, subtasks := range task.SubTasks {
|
||||
@ -1096,7 +1151,7 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
|
||||
accounts = append(accounts, acccount)
|
||||
roots = append(roots, root)
|
||||
|
||||
if len(accounts) >= maxStorageSetRequestCount {
|
||||
if len(accounts) >= storageSets {
|
||||
break
|
||||
}
|
||||
}
|
||||
@ -1109,6 +1164,7 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
|
||||
req := &storageRequest{
|
||||
peer: idle,
|
||||
id: reqid,
|
||||
time: time.Now(),
|
||||
deliver: success,
|
||||
revert: fail,
|
||||
cancel: cancel,
|
||||
@ -1122,8 +1178,9 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
|
||||
req.origin = subtask.Next
|
||||
req.limit = subtask.Last
|
||||
}
|
||||
req.timeout = time.AfterFunc(requestTimeout, func() {
|
||||
req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
|
||||
peer.Log().Debug("Storage request timed out", "reqid", reqid)
|
||||
s.rates.Update(idle, StorageRangesMsg, 0, 0)
|
||||
s.scheduleRevertStorageRequest(req)
|
||||
})
|
||||
s.storageReqs[reqid] = req
|
||||
@ -1138,7 +1195,7 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
|
||||
if subtask != nil {
|
||||
origin, limit = req.origin[:], req.limit[:]
|
||||
}
|
||||
if err := peer.RequestStorageRanges(reqid, root, accounts, origin, limit, maxRequestSize); err != nil {
|
||||
if err := peer.RequestStorageRanges(reqid, root, accounts, origin, limit, uint64(cap)); err != nil {
|
||||
log.Debug("Failed to request storage", "err", err)
|
||||
s.scheduleRevertStorageRequest(req)
|
||||
}
|
||||
@ -1157,10 +1214,24 @@ func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fai
|
||||
s.lock.Lock()
|
||||
defer s.lock.Unlock()
|
||||
|
||||
// If there are no idle peers, short circuit assignment
|
||||
if len(s.trienodeHealIdlers) == 0 {
|
||||
// Sort the peers by download capacity to use faster ones if many available
|
||||
idlers := &capacitySort{
|
||||
ids: make([]string, 0, len(s.trienodeHealIdlers)),
|
||||
caps: make([]float64, 0, len(s.trienodeHealIdlers)),
|
||||
}
|
||||
targetTTL := s.rates.TargetTimeout()
|
||||
for id := range s.trienodeHealIdlers {
|
||||
if _, ok := s.statelessPeers[id]; ok {
|
||||
continue
|
||||
}
|
||||
idlers.ids = append(idlers.ids, id)
|
||||
idlers.caps = append(idlers.caps, s.rates.Capacity(id, TrieNodesMsg, targetTTL))
|
||||
}
|
||||
if len(idlers.ids) == 0 {
|
||||
return
|
||||
}
|
||||
sort.Sort(sort.Reverse(idlers))
|
||||
|
||||
// Iterate over pending tasks and try to find a peer to retrieve with
|
||||
for len(s.healer.trieTasks) > 0 || s.healer.scheduler.Pending() > 0 {
|
||||
// If there are not enough trie tasks queued to fully assign, fill the
|
||||
@ -1186,20 +1257,15 @@ func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fai
|
||||
// Task pending retrieval, try to find an idle peer. If no such peer
|
||||
// exists, we probably assigned tasks for all (or they are stateless).
|
||||
// Abort the entire assignment mechanism.
|
||||
var idle string
|
||||
for id := range s.trienodeHealIdlers {
|
||||
// If the peer rejected a query in this sync cycle, don't bother asking
|
||||
// again for anything, it's either out of sync or already pruned
|
||||
if _, ok := s.statelessPeers[id]; ok {
|
||||
continue
|
||||
}
|
||||
idle = id
|
||||
break
|
||||
}
|
||||
if idle == "" {
|
||||
if len(idlers.ids) == 0 {
|
||||
return
|
||||
}
|
||||
peer := s.peers[idle]
|
||||
var (
|
||||
idle = idlers.ids[0]
|
||||
peer = s.peers[idle]
|
||||
cap = idlers.caps[0]
|
||||
)
|
||||
idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
|
||||
|
||||
// Matched a pending task to an idle peer, allocate a unique request id
|
||||
var reqid uint64
|
||||
@ -1214,10 +1280,13 @@ func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fai
|
||||
break
|
||||
}
|
||||
// Generate the network query and send it to the peer
|
||||
if cap > maxTrieRequestCount {
|
||||
cap = maxTrieRequestCount
|
||||
}
|
||||
var (
|
||||
hashes = make([]common.Hash, 0, maxTrieRequestCount)
|
||||
paths = make([]trie.SyncPath, 0, maxTrieRequestCount)
|
||||
pathsets = make([]TrieNodePathSet, 0, maxTrieRequestCount)
|
||||
hashes = make([]common.Hash, 0, int(cap))
|
||||
paths = make([]trie.SyncPath, 0, int(cap))
|
||||
pathsets = make([]TrieNodePathSet, 0, int(cap))
|
||||
)
|
||||
for hash, pathset := range s.healer.trieTasks {
|
||||
delete(s.healer.trieTasks, hash)
|
||||
@ -1226,13 +1295,14 @@ func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fai
|
||||
paths = append(paths, pathset)
|
||||
pathsets = append(pathsets, [][]byte(pathset)) // TODO(karalabe): group requests by account hash
|
||||
|
||||
if len(hashes) >= maxTrieRequestCount {
|
||||
if len(hashes) >= int(cap) {
|
||||
break
|
||||
}
|
||||
}
|
||||
req := &trienodeHealRequest{
|
||||
peer: idle,
|
||||
id: reqid,
|
||||
time: time.Now(),
|
||||
deliver: success,
|
||||
revert: fail,
|
||||
cancel: cancel,
|
||||
@ -1241,8 +1311,9 @@ func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fai
|
||||
paths: paths,
|
||||
task: s.healer,
|
||||
}
|
||||
req.timeout = time.AfterFunc(requestTimeout, func() {
|
||||
req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
|
||||
peer.Log().Debug("Trienode heal request timed out", "reqid", reqid)
|
||||
s.rates.Update(idle, TrieNodesMsg, 0, 0)
|
||||
s.scheduleRevertTrienodeHealRequest(req)
|
||||
})
|
||||
s.trienodeHealReqs[reqid] = req
|
||||
@ -1267,10 +1338,24 @@ func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fai
|
||||
s.lock.Lock()
|
||||
defer s.lock.Unlock()
|
||||
|
||||
// If there are no idle peers, short circuit assignment
|
||||
if len(s.bytecodeHealIdlers) == 0 {
|
||||
// Sort the peers by download capacity to use faster ones if many available
|
||||
idlers := &capacitySort{
|
||||
ids: make([]string, 0, len(s.bytecodeHealIdlers)),
|
||||
caps: make([]float64, 0, len(s.bytecodeHealIdlers)),
|
||||
}
|
||||
targetTTL := s.rates.TargetTimeout()
|
||||
for id := range s.bytecodeHealIdlers {
|
||||
if _, ok := s.statelessPeers[id]; ok {
|
||||
continue
|
||||
}
|
||||
idlers.ids = append(idlers.ids, id)
|
||||
idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL))
|
||||
}
|
||||
if len(idlers.ids) == 0 {
|
||||
return
|
||||
}
|
||||
sort.Sort(sort.Reverse(idlers))
|
||||
|
||||
// Iterate over pending tasks and try to find a peer to retrieve with
|
||||
for len(s.healer.codeTasks) > 0 || s.healer.scheduler.Pending() > 0 {
|
||||
// If there are not enough trie tasks queued to fully assign, fill the
|
||||
@ -1296,20 +1381,15 @@ func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fai
|
||||
// Task pending retrieval, try to find an idle peer. If no such peer
|
||||
// exists, we probably assigned tasks for all (or they are stateless).
|
||||
// Abort the entire assignment mechanism.
|
||||
var idle string
|
||||
for id := range s.bytecodeHealIdlers {
|
||||
// If the peer rejected a query in this sync cycle, don't bother asking
|
||||
// again for anything, it's either out of sync or already pruned
|
||||
if _, ok := s.statelessPeers[id]; ok {
|
||||
continue
|
||||
}
|
||||
idle = id
|
||||
break
|
||||
}
|
||||
if idle == "" {
|
||||
if len(idlers.ids) == 0 {
|
||||
return
|
||||
}
|
||||
peer := s.peers[idle]
|
||||
var (
|
||||
idle = idlers.ids[0]
|
||||
peer = s.peers[idle]
|
||||
cap = idlers.caps[0]
|
||||
)
|
||||
idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
|
||||
|
||||
// Matched a pending task to an idle peer, allocate a unique request id
|
||||
var reqid uint64
|
||||
@ -1324,18 +1404,22 @@ func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fai
|
||||
break
|
||||
}
|
||||
// Generate the network query and send it to the peer
|
||||
hashes := make([]common.Hash, 0, maxCodeRequestCount)
|
||||
if cap > maxCodeRequestCount {
|
||||
cap = maxCodeRequestCount
|
||||
}
|
||||
hashes := make([]common.Hash, 0, int(cap))
|
||||
for hash := range s.healer.codeTasks {
|
||||
delete(s.healer.codeTasks, hash)
|
||||
|
||||
hashes = append(hashes, hash)
|
||||
if len(hashes) >= maxCodeRequestCount {
|
||||
if len(hashes) >= int(cap) {
|
||||
break
|
||||
}
|
||||
}
|
||||
req := &bytecodeHealRequest{
|
||||
peer: idle,
|
||||
id: reqid,
|
||||
time: time.Now(),
|
||||
deliver: success,
|
||||
revert: fail,
|
||||
cancel: cancel,
|
||||
@ -1343,8 +1427,9 @@ func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fai
|
||||
hashes: hashes,
|
||||
task: s.healer,
|
||||
}
|
||||
req.timeout = time.AfterFunc(requestTimeout, func() {
|
||||
req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
|
||||
peer.Log().Debug("Bytecode heal request timed out", "reqid", reqid)
|
||||
s.rates.Update(idle, ByteCodesMsg, 0, 0)
|
||||
s.scheduleRevertBytecodeHealRequest(req)
|
||||
})
|
||||
s.bytecodeHealReqs[reqid] = req
|
||||
@ -2142,6 +2227,7 @@ func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, acco
|
||||
return nil
|
||||
}
|
||||
delete(s.accountReqs, id)
|
||||
s.rates.Update(peer.ID(), AccountRangeMsg, time.Since(req.time), int(size))
|
||||
|
||||
// Clean up the request timeout timer, we'll see how to proceed further based
|
||||
// on the actual delivered content
|
||||
@ -2253,6 +2339,7 @@ func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error
|
||||
return nil
|
||||
}
|
||||
delete(s.bytecodeReqs, id)
|
||||
s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes))
|
||||
|
||||
// Clean up the request timeout timer, we'll see how to proceed further based
|
||||
// on the actual delivered content
|
||||
@ -2361,6 +2448,7 @@ func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slo
|
||||
return nil
|
||||
}
|
||||
delete(s.storageReqs, id)
|
||||
s.rates.Update(peer.ID(), StorageRangesMsg, time.Since(req.time), int(size))
|
||||
|
||||
// Clean up the request timeout timer, we'll see how to proceed further based
|
||||
// on the actual delivered content
|
||||
@ -2487,6 +2575,7 @@ func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error
|
||||
return nil
|
||||
}
|
||||
delete(s.trienodeHealReqs, id)
|
||||
s.rates.Update(peer.ID(), TrieNodesMsg, time.Since(req.time), len(trienodes))
|
||||
|
||||
// Clean up the request timeout timer, we'll see how to proceed further based
|
||||
// on the actual delivered content
|
||||
@ -2581,6 +2670,7 @@ func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) e
|
||||
return nil
|
||||
}
|
||||
delete(s.bytecodeHealReqs, id)
|
||||
s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes))
|
||||
|
||||
// Clean up the request timeout timer, we'll see how to proceed further based
|
||||
// on the actual delivered content
|
||||
@ -2756,3 +2846,24 @@ func estimateRemainingSlots(hashes int, last common.Hash) (uint64, error) {
|
||||
}
|
||||
return space.Uint64() - uint64(hashes), nil
|
||||
}
|
||||
|
||||
// capacitySort implements the Sort interface, allowing sorting by peer message
|
||||
// throughput. Note, callers should use sort.Reverse to get the desired effect
|
||||
// of highest capacity being at the front.
|
||||
type capacitySort struct {
|
||||
ids []string
|
||||
caps []float64
|
||||
}
|
||||
|
||||
func (s *capacitySort) Len() int {
|
||||
return len(s.ids)
|
||||
}
|
||||
|
||||
func (s *capacitySort) Less(i, j int) bool {
|
||||
return s.caps[i] < s.caps[j]
|
||||
}
|
||||
|
||||
func (s *capacitySort) Swap(i, j int) {
|
||||
s.ids[i], s.ids[j] = s.ids[j], s.ids[i]
|
||||
s.caps[i], s.caps[j] = s.caps[j], s.caps[i]
|
||||
}
|
||||
|
Reference in New Issue
Block a user