eth, p2p/msgrate: move peer QoS tracking to its own package and use it for snap (#22876)

This change extracts the peer QoS tracking logic from eth/downloader, moving it into the new package p2p/msgrate. The job of msgrate.Tracker is determining suitable timeout values and request sizes per peer. The snap sync scheduler now uses msgrate.Tracker instead of the hard-coded 15s timeout. This should make the sync work better on network links with high latency.
2021-05-19 15:09:03 +03:00
parent b3a1fda650
commit 3e795881ea
7 changed files with 745 additions and 409 deletions
--- a/eth/protocols/snap/sync.go
+++ b/eth/protocols/snap/sync.go
@ -37,6 +37,7 @@ import (
 	"github.com/ethereum/go-ethereum/event"
 	"github.com/ethereum/go-ethereum/light"
 	"github.com/ethereum/go-ethereum/log"
+	"github.com/ethereum/go-ethereum/p2p/msgrate"
 	"github.com/ethereum/go-ethereum/rlp"
 	"github.com/ethereum/go-ethereum/trie"
 	"golang.org/x/crypto/sha3"
@ -51,14 +52,15 @@ var (
 )

 const (
-	// maxRequestSize is the maximum number of bytes to request from a remote peer.
-	maxRequestSize = 128 * 1024
+	// minRequestSize is the minimum number of bytes to request from a remote peer.
+	// This number is used as the low cap for account and storage range requests.
+	// Bytecode and trienode are limited inherently by item count (1).
+	minRequestSize = 64 * 1024

-	// maxStorageSetRequestCount is the maximum number of contracts to request the
-	// storage of in a single query. If this number is too low, we're not filling
-	// responses fully and waste round trip times. If it's too high, we're capping
-	// responses and waste bandwidth.
-	maxStorageSetRequestCount = maxRequestSize / 1024
+	// maxRequestSize is the maximum number of bytes to request from a remote peer.
+	// This number is used as the high cap for account and storage range requests.
+	// Bytecode and trienode are limited more explicitly by the caps below.
+	maxRequestSize = 512 * 1024

 	// maxCodeRequestCount is the maximum number of bytecode blobs to request in a
 	// single query. If this number is too low, we're not filling responses fully
@ -74,7 +76,7 @@ const (
 	// a single query. If this number is too low, we're not filling responses fully
 	// and waste round trip times. If it's too high, we're capping responses and
 	// waste bandwidth.
-	maxTrieRequestCount = 256
+	maxTrieRequestCount = maxRequestSize / 512
 )

 var (
@ -85,10 +87,6 @@ var (
 	// storageConcurrency is the number of chunks to split the a large contract
 	// storage trie into to allow concurrent retrievals.
 	storageConcurrency = 16
-
-	// requestTimeout is the maximum time a peer is allowed to spend on serving
-	// a single network request.
-	requestTimeout = 15 * time.Second // TODO(karalabe): Make it dynamic ala fast-sync?
 )

 // ErrCancelled is returned from snap syncing if the operation was prematurely
@ -105,8 +103,9 @@ var ErrCancelled = errors.New("sync cancelled")
 // is only included to allow the runloop to match a response to the task being
 // synced without having yet another set of maps.
 type accountRequest struct {
-	peer string // Peer to which this request is assigned
-	id   uint64 // Request ID of this request
+	peer string    // Peer to which this request is assigned
+	id   uint64    // Request ID of this request
+	time time.Time // Timestamp when the request was sent

 	deliver chan *accountResponse // Channel to deliver successful response on
 	revert  chan *accountRequest  // Channel to deliver request failure on
@ -142,8 +141,9 @@ type accountResponse struct {
 // is only included to allow the runloop to match a response to the task being
 // synced without having yet another set of maps.
 type bytecodeRequest struct {
-	peer string // Peer to which this request is assigned
-	id   uint64 // Request ID of this request
+	peer string    // Peer to which this request is assigned
+	id   uint64    // Request ID of this request
+	time time.Time // Timestamp when the request was sent

 	deliver chan *bytecodeResponse // Channel to deliver successful response on
 	revert  chan *bytecodeRequest  // Channel to deliver request failure on
@ -173,8 +173,9 @@ type bytecodeResponse struct {
 // is only included to allow the runloop to match a response to the task being
 // synced without having yet another set of maps.
 type storageRequest struct {
-	peer string // Peer to which this request is assigned
-	id   uint64 // Request ID of this request
+	peer string    // Peer to which this request is assigned
+	id   uint64    // Request ID of this request
+	time time.Time // Timestamp when the request was sent

 	deliver chan *storageResponse // Channel to deliver successful response on
 	revert  chan *storageRequest  // Channel to deliver request failure on
@ -218,8 +219,9 @@ type storageResponse struct {
 // is only included to allow the runloop to match a response to the task being
 // synced without having yet another set of maps.
 type trienodeHealRequest struct {
-	peer string // Peer to which this request is assigned
-	id   uint64 // Request ID of this request
+	peer string    // Peer to which this request is assigned
+	id   uint64    // Request ID of this request
+	time time.Time // Timestamp when the request was sent

 	deliver chan *trienodeHealResponse // Channel to deliver successful response on
 	revert  chan *trienodeHealRequest  // Channel to deliver request failure on
@ -252,8 +254,9 @@ type trienodeHealResponse struct {
 // is only included to allow the runloop to match a response to the task being
 // synced without having yet another set of maps.
 type bytecodeHealRequest struct {
-	peer string // Peer to which this request is assigned
-	id   uint64 // Request ID of this request
+	peer string    // Peer to which this request is assigned
+	id   uint64    // Request ID of this request
+	time time.Time // Timestamp when the request was sent

 	deliver chan *bytecodeHealResponse // Channel to deliver successful response on
 	revert  chan *bytecodeHealRequest  // Channel to deliver request failure on
@ -396,6 +399,7 @@ type Syncer struct {
 	peers    map[string]SyncPeer // Currently active peers to download from
 	peerJoin *event.Feed         // Event feed to react to peers joining
 	peerDrop *event.Feed         // Event feed to react to peers dropping
+	rates    *msgrate.Trackers   // Message throughput rates for peers

 	// Request tracking during syncing phase
 	statelessPeers map[string]struct{} // Peers that failed to deliver state data
@ -452,6 +456,7 @@ func NewSyncer(db ethdb.KeyValueStore) *Syncer {
 		peers:    make(map[string]SyncPeer),
 		peerJoin: new(event.Feed),
 		peerDrop: new(event.Feed),
+		rates:    msgrate.NewTrackers(log.New("proto", "snap")),
 		update:   make(chan struct{}, 1),

 		accountIdlers:  make(map[string]struct{}),
@ -484,6 +489,7 @@ func (s *Syncer) Register(peer SyncPeer) error {
 		return errors.New("already registered")
 	}
 	s.peers[id] = peer
+	s.rates.Track(id, msgrate.NewTracker(s.rates.MeanCapacities(), s.rates.MedianRoundTrip()))

 	// Mark the peer as idle, even if no sync is running
 	s.accountIdlers[id] = struct{}{}
@ -509,6 +515,7 @@ func (s *Syncer) Unregister(id string) error {
 		return errors.New("not registered")
 	}
 	delete(s.peers, id)
+	s.rates.Untrack(id)

 	// Remove status markers, even if no sync is running
 	delete(s.statelessPeers, id)
@ -851,10 +858,24 @@ func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *ac
 	s.lock.Lock()
 	defer s.lock.Unlock()

-	// If there are no idle peers, short circuit assignment
-	if len(s.accountIdlers) == 0 {
+	// Sort the peers by download capacity to use faster ones if many available
+	idlers := &capacitySort{
+		ids:  make([]string, 0, len(s.accountIdlers)),
+		caps: make([]float64, 0, len(s.accountIdlers)),
+	}
+	targetTTL := s.rates.TargetTimeout()
+	for id := range s.accountIdlers {
+		if _, ok := s.statelessPeers[id]; ok {
+			continue
+		}
+		idlers.ids = append(idlers.ids, id)
+		idlers.caps = append(idlers.caps, s.rates.Capacity(id, AccountRangeMsg, targetTTL))
+	}
+	if len(idlers.ids) == 0 {
 		return
 	}
+	sort.Sort(sort.Reverse(idlers))
+
 	// Iterate over all the tasks and try to find a pending one
 	for _, task := range s.tasks {
 		// Skip any tasks already filling
@ -864,20 +885,15 @@ func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *ac
 		// Task pending retrieval, try to find an idle peer. If no such peer
 		// exists, we probably assigned tasks for all (or they are stateless).
 		// Abort the entire assignment mechanism.
-		var idle string
-		for id := range s.accountIdlers {
-			// If the peer rejected a query in this sync cycle, don't bother asking
-			// again for anything, it's either out of sync or already pruned
-			if _, ok := s.statelessPeers[id]; ok {
-				continue
-			}
-			idle = id
-			break
-		}
-		if idle == "" {
+		if len(idlers.ids) == 0 {
 			return
 		}
-		peer := s.peers[idle]
+		var (
+			idle = idlers.ids[0]
+			peer = s.peers[idle]
+			cap  = idlers.caps[0]
+		)
+		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]

 		// Matched a pending task to an idle peer, allocate a unique request id
 		var reqid uint64
@ -895,6 +911,7 @@ func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *ac
 		req := &accountRequest{
 			peer:    idle,
 			id:      reqid,
+			time:    time.Now(),
 			deliver: success,
 			revert:  fail,
 			cancel:  cancel,
@ -903,8 +920,9 @@ func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *ac
 			limit:   task.Last,
 			task:    task,
 		}
-		req.timeout = time.AfterFunc(requestTimeout, func() {
+		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
 			peer.Log().Debug("Account range request timed out", "reqid", reqid)
+			s.rates.Update(idle, AccountRangeMsg, 0, 0)
 			s.scheduleRevertAccountRequest(req)
 		})
 		s.accountReqs[reqid] = req
@ -915,7 +933,13 @@ func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *ac
 			defer s.pend.Done()

 			// Attempt to send the remote request and revert if it fails
-			if err := peer.RequestAccountRange(reqid, root, req.origin, req.limit, maxRequestSize); err != nil {
+			if cap > maxRequestSize {
+				cap = maxRequestSize
+			}
+			if cap < minRequestSize { // Don't bother with peers below a bare minimum performance
+				cap = minRequestSize
+			}
+			if err := peer.RequestAccountRange(reqid, root, req.origin, req.limit, uint64(cap)); err != nil {
 				peer.Log().Debug("Failed to request account range", "err", err)
 				s.scheduleRevertAccountRequest(req)
 			}
@ -931,10 +955,24 @@ func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *
 	s.lock.Lock()
 	defer s.lock.Unlock()

-	// If there are no idle peers, short circuit assignment
-	if len(s.bytecodeIdlers) == 0 {
+	// Sort the peers by download capacity to use faster ones if many available
+	idlers := &capacitySort{
+		ids:  make([]string, 0, len(s.bytecodeIdlers)),
+		caps: make([]float64, 0, len(s.bytecodeIdlers)),
+	}
+	targetTTL := s.rates.TargetTimeout()
+	for id := range s.bytecodeIdlers {
+		if _, ok := s.statelessPeers[id]; ok {
+			continue
+		}
+		idlers.ids = append(idlers.ids, id)
+		idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL))
+	}
+	if len(idlers.ids) == 0 {
 		return
 	}
+	sort.Sort(sort.Reverse(idlers))
+
 	// Iterate over all the tasks and try to find a pending one
 	for _, task := range s.tasks {
 		// Skip any tasks not in the bytecode retrieval phase
@ -948,20 +986,15 @@ func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *
 		// Task pending retrieval, try to find an idle peer. If no such peer
 		// exists, we probably assigned tasks for all (or they are stateless).
 		// Abort the entire assignment mechanism.
-		var idle string
-		for id := range s.bytecodeIdlers {
-			// If the peer rejected a query in this sync cycle, don't bother asking
-			// again for anything, it's either out of sync or already pruned
-			if _, ok := s.statelessPeers[id]; ok {
-				continue
-			}
-			idle = id
-			break
-		}
-		if idle == "" {
+		if len(idlers.ids) == 0 {
 			return
 		}
-		peer := s.peers[idle]
+		var (
+			idle = idlers.ids[0]
+			peer = s.peers[idle]
+			cap  = idlers.caps[0]
+		)
+		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]

 		// Matched a pending task to an idle peer, allocate a unique request id
 		var reqid uint64
@ -976,17 +1009,21 @@ func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *
 			break
 		}
 		// Generate the network query and send it to the peer
-		hashes := make([]common.Hash, 0, maxCodeRequestCount)
+		if cap > maxCodeRequestCount {
+			cap = maxCodeRequestCount
+		}
+		hashes := make([]common.Hash, 0, int(cap))
 		for hash := range task.codeTasks {
 			delete(task.codeTasks, hash)
 			hashes = append(hashes, hash)
-			if len(hashes) >= maxCodeRequestCount {
+			if len(hashes) >= int(cap) {
 				break
 			}
 		}
 		req := &bytecodeRequest{
 			peer:    idle,
 			id:      reqid,
+			time:    time.Now(),
 			deliver: success,
 			revert:  fail,
 			cancel:  cancel,
@ -994,8 +1031,9 @@ func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *
 			hashes:  hashes,
 			task:    task,
 		}
-		req.timeout = time.AfterFunc(requestTimeout, func() {
+		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
 			peer.Log().Debug("Bytecode request timed out", "reqid", reqid)
+			s.rates.Update(idle, ByteCodesMsg, 0, 0)
 			s.scheduleRevertBytecodeRequest(req)
 		})
 		s.bytecodeReqs[reqid] = req
@ -1020,10 +1058,24 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
 	s.lock.Lock()
 	defer s.lock.Unlock()

-	// If there are no idle peers, short circuit assignment
-	if len(s.storageIdlers) == 0 {
+	// Sort the peers by download capacity to use faster ones if many available
+	idlers := &capacitySort{
+		ids:  make([]string, 0, len(s.storageIdlers)),
+		caps: make([]float64, 0, len(s.storageIdlers)),
+	}
+	targetTTL := s.rates.TargetTimeout()
+	for id := range s.storageIdlers {
+		if _, ok := s.statelessPeers[id]; ok {
+			continue
+		}
+		idlers.ids = append(idlers.ids, id)
+		idlers.caps = append(idlers.caps, s.rates.Capacity(id, StorageRangesMsg, targetTTL))
+	}
+	if len(idlers.ids) == 0 {
 		return
 	}
+	sort.Sort(sort.Reverse(idlers))
+
 	// Iterate over all the tasks and try to find a pending one
 	for _, task := range s.tasks {
 		// Skip any tasks not in the storage retrieval phase
@ -1037,20 +1089,15 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
 		// Task pending retrieval, try to find an idle peer. If no such peer
 		// exists, we probably assigned tasks for all (or they are stateless).
 		// Abort the entire assignment mechanism.
-		var idle string
-		for id := range s.storageIdlers {
-			// If the peer rejected a query in this sync cycle, don't bother asking
-			// again for anything, it's either out of sync or already pruned
-			if _, ok := s.statelessPeers[id]; ok {
-				continue
-			}
-			idle = id
-			break
-		}
-		if idle == "" {
+		if len(idlers.ids) == 0 {
 			return
 		}
-		peer := s.peers[idle]
+		var (
+			idle = idlers.ids[0]
+			peer = s.peers[idle]
+			cap  = idlers.caps[0]
+		)
+		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]

 		// Matched a pending task to an idle peer, allocate a unique request id
 		var reqid uint64
@ -1067,9 +1114,17 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
 		// Generate the network query and send it to the peer. If there are
 		// large contract tasks pending, complete those before diving into
 		// even more new contracts.
+		if cap > maxRequestSize {
+			cap = maxRequestSize
+		}
+		if cap < minRequestSize { // Don't bother with peers below a bare minimum performance
+			cap = minRequestSize
+		}
+		storageSets := int(cap / 1024)
+
 		var (
-			accounts = make([]common.Hash, 0, maxStorageSetRequestCount)
-			roots    = make([]common.Hash, 0, maxStorageSetRequestCount)
+			accounts = make([]common.Hash, 0, storageSets)
+			roots    = make([]common.Hash, 0, storageSets)
 			subtask  *storageTask
 		)
 		for account, subtasks := range task.SubTasks {
@ -1096,7 +1151,7 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
 				accounts = append(accounts, acccount)
 				roots = append(roots, root)

-				if len(accounts) >= maxStorageSetRequestCount {
+				if len(accounts) >= storageSets {
 					break
 				}
 			}
@ -1109,6 +1164,7 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
 		req := &storageRequest{
 			peer:     idle,
 			id:       reqid,
+			time:     time.Now(),
 			deliver:  success,
 			revert:   fail,
 			cancel:   cancel,
@ -1122,8 +1178,9 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
 			req.origin = subtask.Next
 			req.limit = subtask.Last
 		}
-		req.timeout = time.AfterFunc(requestTimeout, func() {
+		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
 			peer.Log().Debug("Storage request timed out", "reqid", reqid)
+			s.rates.Update(idle, StorageRangesMsg, 0, 0)
 			s.scheduleRevertStorageRequest(req)
 		})
 		s.storageReqs[reqid] = req
@ -1138,7 +1195,7 @@ func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *st
 			if subtask != nil {
 				origin, limit = req.origin[:], req.limit[:]
 			}
-			if err := peer.RequestStorageRanges(reqid, root, accounts, origin, limit, maxRequestSize); err != nil {
+			if err := peer.RequestStorageRanges(reqid, root, accounts, origin, limit, uint64(cap)); err != nil {
 				log.Debug("Failed to request storage", "err", err)
 				s.scheduleRevertStorageRequest(req)
 			}
@ -1157,10 +1214,24 @@ func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fai
 	s.lock.Lock()
 	defer s.lock.Unlock()

-	// If there are no idle peers, short circuit assignment
-	if len(s.trienodeHealIdlers) == 0 {
+	// Sort the peers by download capacity to use faster ones if many available
+	idlers := &capacitySort{
+		ids:  make([]string, 0, len(s.trienodeHealIdlers)),
+		caps: make([]float64, 0, len(s.trienodeHealIdlers)),
+	}
+	targetTTL := s.rates.TargetTimeout()
+	for id := range s.trienodeHealIdlers {
+		if _, ok := s.statelessPeers[id]; ok {
+			continue
+		}
+		idlers.ids = append(idlers.ids, id)
+		idlers.caps = append(idlers.caps, s.rates.Capacity(id, TrieNodesMsg, targetTTL))
+	}
+	if len(idlers.ids) == 0 {
 		return
 	}
+	sort.Sort(sort.Reverse(idlers))
+
 	// Iterate over pending tasks and try to find a peer to retrieve with
 	for len(s.healer.trieTasks) > 0 || s.healer.scheduler.Pending() > 0 {
 		// If there are not enough trie tasks queued to fully assign, fill the
@ -1186,20 +1257,15 @@ func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fai
 		// Task pending retrieval, try to find an idle peer. If no such peer
 		// exists, we probably assigned tasks for all (or they are stateless).
 		// Abort the entire assignment mechanism.
-		var idle string
-		for id := range s.trienodeHealIdlers {
-			// If the peer rejected a query in this sync cycle, don't bother asking
-			// again for anything, it's either out of sync or already pruned
-			if _, ok := s.statelessPeers[id]; ok {
-				continue
-			}
-			idle = id
-			break
-		}
-		if idle == "" {
+		if len(idlers.ids) == 0 {
 			return
 		}
-		peer := s.peers[idle]
+		var (
+			idle = idlers.ids[0]
+			peer = s.peers[idle]
+			cap  = idlers.caps[0]
+		)
+		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]

 		// Matched a pending task to an idle peer, allocate a unique request id
 		var reqid uint64
@ -1214,10 +1280,13 @@ func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fai
 			break
 		}
 		// Generate the network query and send it to the peer
+		if cap > maxTrieRequestCount {
+			cap = maxTrieRequestCount
+		}
 		var (
-			hashes   = make([]common.Hash, 0, maxTrieRequestCount)
-			paths    = make([]trie.SyncPath, 0, maxTrieRequestCount)
-			pathsets = make([]TrieNodePathSet, 0, maxTrieRequestCount)
+			hashes   = make([]common.Hash, 0, int(cap))
+			paths    = make([]trie.SyncPath, 0, int(cap))
+			pathsets = make([]TrieNodePathSet, 0, int(cap))
 		)
 		for hash, pathset := range s.healer.trieTasks {
 			delete(s.healer.trieTasks, hash)
@ -1226,13 +1295,14 @@ func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fai
 			paths = append(paths, pathset)
 			pathsets = append(pathsets, [][]byte(pathset)) // TODO(karalabe): group requests by account hash

-			if len(hashes) >= maxTrieRequestCount {
+			if len(hashes) >= int(cap) {
 				break
 			}
 		}
 		req := &trienodeHealRequest{
 			peer:    idle,
 			id:      reqid,
+			time:    time.Now(),
 			deliver: success,
 			revert:  fail,
 			cancel:  cancel,
@ -1241,8 +1311,9 @@ func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fai
 			paths:   paths,
 			task:    s.healer,
 		}
-		req.timeout = time.AfterFunc(requestTimeout, func() {
+		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
 			peer.Log().Debug("Trienode heal request timed out", "reqid", reqid)
+			s.rates.Update(idle, TrieNodesMsg, 0, 0)
 			s.scheduleRevertTrienodeHealRequest(req)
 		})
 		s.trienodeHealReqs[reqid] = req
@ -1267,10 +1338,24 @@ func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fai
 	s.lock.Lock()
 	defer s.lock.Unlock()

-	// If there are no idle peers, short circuit assignment
-	if len(s.bytecodeHealIdlers) == 0 {
+	// Sort the peers by download capacity to use faster ones if many available
+	idlers := &capacitySort{
+		ids:  make([]string, 0, len(s.bytecodeHealIdlers)),
+		caps: make([]float64, 0, len(s.bytecodeHealIdlers)),
+	}
+	targetTTL := s.rates.TargetTimeout()
+	for id := range s.bytecodeHealIdlers {
+		if _, ok := s.statelessPeers[id]; ok {
+			continue
+		}
+		idlers.ids = append(idlers.ids, id)
+		idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL))
+	}
+	if len(idlers.ids) == 0 {
 		return
 	}
+	sort.Sort(sort.Reverse(idlers))
+
 	// Iterate over pending tasks and try to find a peer to retrieve with
 	for len(s.healer.codeTasks) > 0 || s.healer.scheduler.Pending() > 0 {
 		// If there are not enough trie tasks queued to fully assign, fill the
@ -1296,20 +1381,15 @@ func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fai
 		// Task pending retrieval, try to find an idle peer. If no such peer
 		// exists, we probably assigned tasks for all (or they are stateless).
 		// Abort the entire assignment mechanism.
-		var idle string
-		for id := range s.bytecodeHealIdlers {
-			// If the peer rejected a query in this sync cycle, don't bother asking
-			// again for anything, it's either out of sync or already pruned
-			if _, ok := s.statelessPeers[id]; ok {
-				continue
-			}
-			idle = id
-			break
-		}
-		if idle == "" {
+		if len(idlers.ids) == 0 {
 			return
 		}
-		peer := s.peers[idle]
+		var (
+			idle = idlers.ids[0]
+			peer = s.peers[idle]
+			cap  = idlers.caps[0]
+		)
+		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]

 		// Matched a pending task to an idle peer, allocate a unique request id
 		var reqid uint64
@ -1324,18 +1404,22 @@ func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fai
 			break
 		}
 		// Generate the network query and send it to the peer
-		hashes := make([]common.Hash, 0, maxCodeRequestCount)
+		if cap > maxCodeRequestCount {
+			cap = maxCodeRequestCount
+		}
+		hashes := make([]common.Hash, 0, int(cap))
 		for hash := range s.healer.codeTasks {
 			delete(s.healer.codeTasks, hash)

 			hashes = append(hashes, hash)
-			if len(hashes) >= maxCodeRequestCount {
+			if len(hashes) >= int(cap) {
 				break
 			}
 		}
 		req := &bytecodeHealRequest{
 			peer:    idle,
 			id:      reqid,
+			time:    time.Now(),
 			deliver: success,
 			revert:  fail,
 			cancel:  cancel,
@ -1343,8 +1427,9 @@ func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fai
 			hashes:  hashes,
 			task:    s.healer,
 		}
-		req.timeout = time.AfterFunc(requestTimeout, func() {
+		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
 			peer.Log().Debug("Bytecode heal request timed out", "reqid", reqid)
+			s.rates.Update(idle, ByteCodesMsg, 0, 0)
 			s.scheduleRevertBytecodeHealRequest(req)
 		})
 		s.bytecodeHealReqs[reqid] = req
@ -2142,6 +2227,7 @@ func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, acco
 		return nil
 	}
 	delete(s.accountReqs, id)
+	s.rates.Update(peer.ID(), AccountRangeMsg, time.Since(req.time), int(size))

 	// Clean up the request timeout timer, we'll see how to proceed further based
 	// on the actual delivered content
@ -2253,6 +2339,7 @@ func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error
 		return nil
 	}
 	delete(s.bytecodeReqs, id)
+	s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes))

 	// Clean up the request timeout timer, we'll see how to proceed further based
 	// on the actual delivered content
@ -2361,6 +2448,7 @@ func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slo
 		return nil
 	}
 	delete(s.storageReqs, id)
+	s.rates.Update(peer.ID(), StorageRangesMsg, time.Since(req.time), int(size))

 	// Clean up the request timeout timer, we'll see how to proceed further based
 	// on the actual delivered content
@ -2487,6 +2575,7 @@ func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error
 		return nil
 	}
 	delete(s.trienodeHealReqs, id)
+	s.rates.Update(peer.ID(), TrieNodesMsg, time.Since(req.time), len(trienodes))

 	// Clean up the request timeout timer, we'll see how to proceed further based
 	// on the actual delivered content
@ -2581,6 +2670,7 @@ func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) e
 		return nil
 	}
 	delete(s.bytecodeHealReqs, id)
+	s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes))

 	// Clean up the request timeout timer, we'll see how to proceed further based
 	// on the actual delivered content
@ -2756,3 +2846,24 @@ func estimateRemainingSlots(hashes int, last common.Hash) (uint64, error) {
 	}
 	return space.Uint64() - uint64(hashes), nil
 }
+
+// capacitySort implements the Sort interface, allowing sorting by peer message
+// throughput. Note, callers should use sort.Reverse to get the desired effect
+// of highest capacity being at the front.
+type capacitySort struct {
+	ids  []string
+	caps []float64
+}
+
+func (s *capacitySort) Len() int {
+	return len(s.ids)
+}
+
+func (s *capacitySort) Less(i, j int) bool {
+	return s.caps[i] < s.caps[j]
+}
+
+func (s *capacitySort) Swap(i, j int) {
+	s.ids[i], s.ids[j] = s.ids[j], s.ids[i]
+	s.caps[i], s.caps[j] = s.caps[j], s.caps[i]
+}