eth/downloader: prioritize block fetch based on chain position, cap memory use

This commit is contained in:
Péter Szilágyi
2015-05-06 15:32:53 +03:00
parent 97c37356fd
commit 4800c94392
11 changed files with 808 additions and 283 deletions

View File

@ -11,11 +11,10 @@ import (
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/logger"
"github.com/ethereum/go-ethereum/logger/glog"
"gopkg.in/fatih/set.v0"
)
const (
maxBlockFetch = 256 // Amount of max blocks to be fetched per chunk
maxBlockFetch = 128 // Amount of max blocks to be fetched per chunk
peerCountTimeout = 12 * time.Second // Amount of time it takes for the peer handler to ignore minDesiredPeerCount
hashTtl = 20 * time.Second // The amount of time it takes for a hash request to time out
)
@ -80,7 +79,7 @@ type Downloader struct {
func New(hasBlock hashCheckFn, getBlock getBlockFn) *Downloader {
downloader := &Downloader{
queue: newqueue(),
queue: newQueue(),
peers: make(peers),
hasBlock: hasBlock,
getBlock: getBlock,
@ -93,7 +92,7 @@ func New(hasBlock hashCheckFn, getBlock getBlockFn) *Downloader {
}
func (d *Downloader) Stats() (current int, max int) {
return d.queue.blockHashes.Size(), d.queue.fetchPool.Size() + d.queue.hashPool.Size()
return d.queue.Size()
}
func (d *Downloader) RegisterPeer(id string, hash common.Hash, getHashes hashFetcherFn, getBlocks blockFetcherFn) error {
@ -111,7 +110,7 @@ func (d *Downloader) RegisterPeer(id string, hash common.Hash, getHashes hashFet
return nil
}
// UnregisterPeer unregister's a peer. This will prevent any action from the specified peer.
// UnregisterPeer unregisters a peer. This will prevent any action from the specified peer.
func (d *Downloader) UnregisterPeer(id string) {
d.mu.Lock()
defer d.mu.Unlock()
@ -121,20 +120,20 @@ func (d *Downloader) UnregisterPeer(id string) {
delete(d.peers, id)
}
// SynchroniseWithPeer will select the peer and use it for synchronising. If an empty string is given
// it will use the best peer possible and synchronise if it's TD is higher than our own. If any of the
// SynchroniseWithPeer will select the peer and use it for synchronizing. If an empty string is given
// it will use the best peer possible and synchronize if it's TD is higher than our own. If any of the
// checks fail an error will be returned. This method is synchronous
func (d *Downloader) Synchronise(id string, hash common.Hash) error {
// Make sure it's doing neither. Once done we can restart the
// downloading process if the TD is higher. For now just get on
// with whatever is going on. This prevents unecessary switching.
// with whatever is going on. This prevents unnecessary switching.
if d.isBusy() {
return errBusy
}
// When a synchronisation attempt is made while the queue stil
// When a synchronization attempt is made while the queue still
// contains items we abort the sync attempt
if d.queue.size() > 0 {
if done, pend := d.queue.Size(); done+pend > 0 {
return errPendingQueue
}
@ -157,56 +156,23 @@ func (d *Downloader) Synchronise(id string, hash common.Hash) error {
// are processed. If the block count reaches zero and done is called
// we reset the queue for the next batch of incoming hashes and blocks.
func (d *Downloader) Done() {
d.queue.mu.Lock()
defer d.queue.mu.Unlock()
if len(d.queue.blocks) == 0 {
d.queue.resetNoTS()
}
d.queue.Done()
}
// TakeBlocks takes blocks from the queue and yields them to the blockTaker handler
// it's possible it yields no blocks
func (d *Downloader) TakeBlocks() types.Blocks {
d.queue.mu.Lock()
defer d.queue.mu.Unlock()
var blocks types.Blocks
if len(d.queue.blocks) > 0 {
// Make sure the parent hash is known
if d.queue.blocks[0] != nil && !d.hasBlock(d.queue.blocks[0].ParentHash()) {
return nil
}
for _, block := range d.queue.blocks {
if block == nil {
break
}
blocks = append(blocks, block)
}
d.queue.blockOffset += len(blocks)
// delete the blocks from the slice and let them be garbage collected
// without this slice trick the blocks would stay in memory until nil
// would be assigned to d.queue.blocks
copy(d.queue.blocks, d.queue.blocks[len(blocks):])
for k, n := len(d.queue.blocks)-len(blocks), len(d.queue.blocks); k < n; k++ {
d.queue.blocks[k] = nil
}
d.queue.blocks = d.queue.blocks[:len(d.queue.blocks)-len(blocks)]
//d.queue.blocks = d.queue.blocks[len(blocks):]
if len(d.queue.blocks) == 0 {
d.queue.blocks = nil
}
// Check that there are blocks available and its parents are known
head := d.queue.GetHeadBlock()
if head == nil || !d.hasBlock(head.ParentHash()) {
return nil
}
return blocks
// Retrieve a full batch of blocks
return d.queue.TakeBlocks(head)
}
func (d *Downloader) Has(hash common.Hash) bool {
return d.queue.has(hash)
return d.queue.Has(hash)
}
func (d *Downloader) getFromPeer(p *peer, hash common.Hash, ignoreInitial bool) (err error) {
@ -214,7 +180,7 @@ func (d *Downloader) getFromPeer(p *peer, hash common.Hash, ignoreInitial bool)
defer func() {
// reset on error
if err != nil {
d.queue.reset()
d.queue.Reset()
}
}()
@ -244,7 +210,7 @@ func (d *Downloader) startFetchingHashes(p *peer, h common.Hash, ignoreInitial b
atomic.StoreInt32(&d.fetchingHashes, 1)
defer atomic.StoreInt32(&d.fetchingHashes, 0)
if d.queue.has(h) {
if d.queue.Has(h) { // TODO: Is this possible? Shouldn't queue be empty for startFetchingHashes to be even called?
return errAlreadyInPool
}
@ -256,7 +222,7 @@ func (d *Downloader) startFetchingHashes(p *peer, h common.Hash, ignoreInitial b
// In such circumstances we don't need to download the block so don't add it to the queue.
if !ignoreInitial {
// Add the hash to the queue first
d.queue.hashPool.Add(h)
d.queue.Insert([]common.Hash{h})
}
// Get the first batch of hashes
p.getHashes(h)
@ -273,7 +239,7 @@ out:
for {
select {
case hashPack := <-d.hashCh:
// make sure the active peer is giving us the hashes
// Make sure the active peer is giving us the hashes
if hashPack.peerId != activePeer.id {
glog.V(logger.Debug).Infof("Received hashes from incorrect peer(%s)\n", hashPack.peerId)
break
@ -281,43 +247,37 @@ out:
failureResponseTimer.Reset(hashTtl)
var (
hashes = hashPack.hashes
done bool // determines whether we're done fetching hashes (i.e. common hash found)
)
hashSet := set.New()
for _, hash = range hashes {
if d.hasBlock(hash) || d.queue.blockHashes.Has(hash) {
glog.V(logger.Debug).Infof("Found common hash %x\n", hash[:4])
// Make sure the peer actually gave something valid
if len(hashPack.hashes) == 0 {
glog.V(logger.Debug).Infof("Peer (%s) responded with empty hash set\n", activePeer.id)
d.queue.Reset()
return errEmptyHashSet
}
// Determine if we're done fetching hashes (queue up all pending), and continue if not done
done, index := false, 0
for index, hash = range hashPack.hashes {
if d.hasBlock(hash) || d.queue.GetBlock(hash) != nil {
glog.V(logger.Debug).Infof("Found common hash %x\n", hash[:4])
hashPack.hashes = hashPack.hashes[:index]
done = true
break
}
hashSet.Add(hash)
}
d.queue.put(hashSet)
d.queue.Insert(hashPack.hashes)
// Add hashes to the chunk set
if len(hashes) == 0 { // Make sure the peer actually gave you something valid
glog.V(logger.Debug).Infof("Peer (%s) responded with empty hash set\n", activePeer.id)
d.queue.reset()
return errEmptyHashSet
} else if !done { // Check if we're done fetching
// Get the next set of hashes
if !done {
activePeer.getHashes(hash)
} else { // we're done
// The offset of the queue is determined by the highest known block
var offset int
if block := d.getBlock(hash); block != nil {
offset = int(block.NumberU64() + 1)
}
// allocate proper size for the queueue
d.queue.alloc(offset, d.queue.hashPool.Size())
break out
continue
}
// We're done, allocate the download cache and proceed pulling the blocks
offset := 0
if block := d.getBlock(hash); block != nil {
offset = int(block.NumberU64() + 1)
}
d.queue.Alloc(offset)
break out
case <-failureResponseTimer.C:
glog.V(logger.Debug).Infof("Peer (%s) didn't respond in time for hash request\n", p.id)
@ -326,7 +286,7 @@ out:
// already fetched hash list. This can't guarantee 100% correctness but does
// a fair job. This is always either correct or false incorrect.
for id, peer := range d.peers {
if d.queue.hashPool.Has(peer.recentHash) && !attemptedPeers[id] {
if d.queue.Has(peer.recentHash) && !attemptedPeers[id] {
p = peer
break
}
@ -335,7 +295,7 @@ out:
// if all peers have been tried, abort the process entirely or if the hash is
// the zero hash.
if p == nil || (hash == common.Hash{}) {
d.queue.reset()
d.queue.Reset()
return errTimeout
}
@ -346,13 +306,14 @@ out:
glog.V(logger.Debug).Infof("Hash fetching switched to new peer(%s)\n", p.id)
}
}
glog.V(logger.Detail).Infof("Downloaded hashes (%d) in %v\n", d.queue.hashPool.Size(), time.Since(start))
glog.V(logger.Detail).Infof("Downloaded hashes (%d) in %v\n", d.queue.Pending(), time.Since(start))
return nil
}
func (d *Downloader) startFetchingBlocks(p *peer) error {
glog.V(logger.Detail).Infoln("Downloading", d.queue.hashPool.Size(), "block(s)")
glog.V(logger.Detail).Infoln("Downloading", d.queue.Pending(), "block(s)")
atomic.StoreInt32(&d.downloadingBlocks, 1)
defer atomic.StoreInt32(&d.downloadingBlocks, 0)
// Defer the peer reset. This will empty the peer requested set
@ -362,7 +323,7 @@ func (d *Downloader) startFetchingBlocks(p *peer) error {
start := time.Now()
// default ticker for re-fetching blocks everynow and then
// default ticker for re-fetching blocks every now and then
ticker := time.NewTicker(20 * time.Millisecond)
out:
for {
@ -371,7 +332,7 @@ out:
// If the peer was previously banned and failed to deliver it's pack
// in a reasonable time frame, ignore it's message.
if d.peers[blockPack.peerId] != nil {
err := d.queue.deliver(blockPack.peerId, blockPack.blocks)
err := d.queue.Deliver(blockPack.peerId, blockPack.blocks)
if err != nil {
glog.V(logger.Debug).Infof("deliver failed for peer %s: %v\n", blockPack.peerId, err)
// FIXME d.UnregisterPeer(blockPack.peerId)
@ -385,46 +346,49 @@ out:
d.peers.setState(blockPack.peerId, idleState)
}
case <-ticker.C:
// after removing bad peers make sure we actually have suffucient peer left to keep downlading
// after removing bad peers make sure we actually have sufficient peer left to keep downloading
if len(d.peers) == 0 {
d.queue.reset()
d.queue.Reset()
return errNoPeers
}
// If there are unrequested hashes left start fetching
// from the available peers.
if d.queue.hashPool.Size() > 0 {
if d.queue.Pending() > 0 {
// Throttle the download if block cache is full and waiting processing
if d.queue.Throttle() {
continue
}
availablePeers := d.peers.get(idleState)
for _, peer := range availablePeers {
// Get a possible chunk. If nil is returned no chunk
// could be returned due to no hashes available.
chunk := d.queue.get(peer, maxBlockFetch)
if chunk == nil {
request := d.queue.Reserve(peer, maxBlockFetch)
if request == nil {
continue
}
// XXX make fetch blocking.
// Fetch the chunk and check for error. If the peer was somehow
// already fetching a chunk due to a bug, it will be returned to
// the queue
if err := peer.fetch(chunk); err != nil {
if err := peer.fetch(request); err != nil {
// log for tracing
glog.V(logger.Debug).Infof("peer %s received double work (state = %v)\n", peer.id, peer.state)
d.queue.put(chunk.hashes)
d.queue.Cancel(request)
}
}
// make sure that we have peers available for fetching. If all peers have been tried
// and all failed throw an error
if len(d.queue.fetching) == 0 {
d.queue.reset()
if d.queue.InFlight() == 0 {
d.queue.Reset()
return fmt.Errorf("%v peers avaialable = %d. total peers = %d. hashes needed = %d", errPeersUnavailable, len(availablePeers), len(d.peers), d.queue.hashPool.Size())
return fmt.Errorf("%v peers avaialable = %d. total peers = %d. hashes needed = %d", errPeersUnavailable, len(availablePeers), len(d.peers), d.queue.Pending())
}
} else if len(d.queue.fetching) == 0 {
// When there are no more queue and no more `fetching`. We can
} else if d.queue.InFlight() == 0 {
// When there are no more queue and no more in flight, We can
// safely assume we're done. Another part of the process will check
// for parent errors and will re-request anything that's missing
break out
@ -434,27 +398,13 @@ out:
// that badly or poorly behave are removed from the peer set (not banned).
// Bad peers are excluded from the available peer set and therefor won't be
// reused. XXX We could re-introduce peers after X time.
d.queue.mu.Lock()
var badPeers []string
for pid, chunk := range d.queue.fetching {
if time.Since(chunk.itime) > blockTtl {
badPeers = append(badPeers, pid)
// remove peer as good peer from peer list
// FIXME d.UnregisterPeer(pid)
}
}
d.queue.mu.Unlock()
badPeers := d.queue.Expire(blockTtl)
for _, pid := range badPeers {
// A nil chunk is delivered so that the chunk's hashes are given
// back to the queue objects. When hashes are put back in the queue
// other (decent) peers can pick them up.
// XXX We could make use of a reputation system here ranking peers
// in their performance
// 1) Time for them to respond;
// 2) Measure their speed;
// 3) Amount and availability.
d.queue.deliver(pid, nil)
if peer := d.peers[pid]; peer != nil {
peer.demote()
peer.reset()
@ -486,7 +436,7 @@ func (d *Downloader) AddHashes(id string, hashes []common.Hash) error {
if glog.V(logger.Detail) && len(hashes) != 0 {
from, to := hashes[0], hashes[len(hashes)-1]
glog.Infof("adding %d (T=%d) hashes [ %x / %x ] from: %s\n", len(hashes), d.queue.hashPool.Size(), from[:4], to[:4], id)
glog.Infof("adding %d (T=%d) hashes [ %x / %x ] from: %s\n", len(hashes), d.queue.Pending(), from[:4], to[:4], id)
}
d.hashCh <- hashPack{id, hashes}