core, eth, trie: fix data races and merge/review issues

This commit is contained in:
Péter Szilágyi
2015-10-13 12:04:25 +03:00
parent aa0538db0b
commit 5b0ee8ec30
27 changed files with 765 additions and 465 deletions

View File

@ -18,7 +18,9 @@
package downloader
import (
"crypto/rand"
"errors"
"fmt"
"math"
"math/big"
"strings"
@ -59,9 +61,11 @@ var (
maxQueuedStates = 256 * 1024 // [eth/63] Maximum number of state requests to queue (DOS protection)
maxResultsProcess = 256 // Number of download results to import at once into the chain
headerCheckFrequency = 100 // Verification frequency of the downloaded headers during fast sync
minCheckedHeaders = 2048 // Number of headers to verify fully when approaching the chain head
minFullBlocks = 1024 // Number of blocks to retrieve fully even in fast sync
fsHeaderCheckFrequency = 100 // Verification frequency of the downloaded headers during fast sync
fsHeaderSafetyNet = 2048 // Number of headers to discard in case a chain violation is detected
fsHeaderForceVerify = 24 // Number of headers to verify before and after the pivot to accept it
fsPivotInterval = 512 // Number of headers out of which to randomize the pivot point
fsMinFullBlocks = 1024 // Number of blocks to retrieve fully even in fast sync
)
var (
@ -85,12 +89,14 @@ var (
errCancelHeaderFetch = errors.New("block header download canceled (requested)")
errCancelBodyFetch = errors.New("block body download canceled (requested)")
errCancelReceiptFetch = errors.New("receipt download canceled (requested)")
errCancelStateFetch = errors.New("state data download canceled (requested)")
errNoSyncActive = errors.New("no sync active")
)
type Downloader struct {
mode SyncMode // Synchronisation mode defining the strategies used
mux *event.TypeMux // Event multiplexer to announce sync operation events
mode SyncMode // Synchronisation mode defining the strategy used (per sync cycle)
noFast bool // Flag to disable fast syncing in case of a security error
mux *event.TypeMux // Event multiplexer to announce sync operation events
queue *queue // Scheduler for selecting the hashes to download
peers *peerSet // Set of active peers from which download can proceed
@ -150,13 +156,13 @@ type Downloader struct {
}
// New creates a new downloader to fetch hashes and blocks from remote peers.
func New(mode SyncMode, stateDb ethdb.Database, mux *event.TypeMux, hasHeader headerCheckFn, hasBlock blockCheckFn, getHeader headerRetrievalFn,
func New(stateDb ethdb.Database, mux *event.TypeMux, hasHeader headerCheckFn, hasBlock blockCheckFn, getHeader headerRetrievalFn,
getBlock blockRetrievalFn, headHeader headHeaderRetrievalFn, headBlock headBlockRetrievalFn, headFastBlock headFastBlockRetrievalFn,
commitHeadBlock headBlockCommitterFn, getTd tdRetrievalFn, insertHeaders headerChainInsertFn, insertBlocks blockChainInsertFn,
insertReceipts receiptChainInsertFn, rollback chainRollbackFn, dropPeer peerDropFn) *Downloader {
return &Downloader{
mode: mode,
mode: FullSync,
mux: mux,
queue: newQueue(stateDb),
peers: newPeerSet(),
@ -188,19 +194,28 @@ func New(mode SyncMode, stateDb ethdb.Database, mux *event.TypeMux, hasHeader he
}
}
// Boundaries retrieves the synchronisation boundaries, specifically the origin
// block where synchronisation started at (may have failed/suspended) and the
// latest known block which the synchonisation targets.
func (d *Downloader) Boundaries() (uint64, uint64) {
// Progress retrieves the synchronisation boundaries, specifically the origin
// block where synchronisation started at (may have failed/suspended); the block
// or header sync is currently at; and the latest known block which the sync targets.
func (d *Downloader) Progress() (uint64, uint64, uint64) {
d.syncStatsLock.RLock()
defer d.syncStatsLock.RUnlock()
return d.syncStatsChainOrigin, d.syncStatsChainHeight
current := uint64(0)
switch d.mode {
case FullSync:
current = d.headBlock().NumberU64()
case FastSync:
current = d.headFastBlock().NumberU64()
case LightSync:
current = d.headHeader().Number.Uint64()
}
return d.syncStatsChainOrigin, current, d.syncStatsChainHeight
}
// Synchronising returns whether the downloader is currently retrieving blocks.
func (d *Downloader) Synchronising() bool {
return atomic.LoadInt32(&d.synchronising) > 0
return atomic.LoadInt32(&d.synchronising) > 0 || atomic.LoadInt32(&d.processing) > 0
}
// RegisterPeer injects a new download peer into the set of block source to be
@ -233,10 +248,10 @@ func (d *Downloader) UnregisterPeer(id string) error {
// Synchronise tries to sync up our local block chain with a remote peer, both
// adding various sanity checks as well as wrapping it with various log entries.
func (d *Downloader) Synchronise(id string, head common.Hash, td *big.Int) {
func (d *Downloader) Synchronise(id string, head common.Hash, td *big.Int, mode SyncMode) {
glog.V(logger.Detail).Infof("Attempting synchronisation: %v, head [%x…], TD %v", id, head[:4], td)
switch err := d.synchronise(id, head, td); err {
switch err := d.synchronise(id, head, td, mode); err {
case nil:
glog.V(logger.Detail).Infof("Synchronisation completed")
@ -258,7 +273,7 @@ func (d *Downloader) Synchronise(id string, head common.Hash, td *big.Int) {
// synchronise will select the peer and use it for synchronising. If an empty string is given
// it will use the best peer possible and synchronize if it's TD is higher than our own. If any of the
// checks fail an error will be returned. This method is synchronous
func (d *Downloader) synchronise(id string, hash common.Hash, td *big.Int) error {
func (d *Downloader) synchronise(id string, hash common.Hash, td *big.Int, mode SyncMode) error {
// Mock out the synchonisation if testing
if d.synchroniseMock != nil {
return d.synchroniseMock(id, hash)
@ -298,6 +313,11 @@ func (d *Downloader) synchronise(id string, hash common.Hash, td *big.Int) error
d.cancelCh = make(chan struct{})
d.cancelLock.Unlock()
// Set the requested sync mode, unless it's forbidden
d.mode = mode
if d.mode == FastSync && d.noFast {
d.mode = FullSync
}
// Retrieve the origin peer and initiate the downloading process
p := d.peers.Peer(id)
if p == nil {
@ -306,13 +326,6 @@ func (d *Downloader) synchronise(id string, hash common.Hash, td *big.Int) error
return d.syncWithPeer(p, hash, td)
}
/*
// Has checks if the downloader knows about a particular hash, meaning that its
// either already downloaded of pending retrieval.
func (d *Downloader) Has(hash common.Hash) bool {
return d.queue.Has(hash)
}
*/
// syncWithPeer starts a block synchronization based on the hash chain from the
// specified peer and head hash.
func (d *Downloader) syncWithPeer(p *peer, hash common.Hash, td *big.Int) (err error) {
@ -387,8 +400,28 @@ func (d *Downloader) syncWithPeer(p *peer, hash common.Hash, td *big.Int) (err e
// Initiate the sync using a concurrent header and content retrieval algorithm
pivot := uint64(0)
if latest > uint64(minFullBlocks) {
pivot = latest - uint64(minFullBlocks)
switch d.mode {
case LightSync:
pivot = latest
case FastSync:
// Calculate the new fast/slow sync pivot point
pivotOffset, err := rand.Int(rand.Reader, big.NewInt(int64(fsPivotInterval)))
if err != nil {
panic(fmt.Sprintf("Failed to access crypto random source: %v", err))
}
if latest > uint64(fsMinFullBlocks)+pivotOffset.Uint64() {
pivot = latest - uint64(fsMinFullBlocks) - pivotOffset.Uint64()
}
// If the point is below the origin, move origin back to ensure state download
if pivot < origin {
if pivot > 0 {
origin = pivot - 1
} else {
origin = 0
}
}
glog.V(logger.Debug).Infof("Fast syncing until pivot block #%d", pivot)
}
d.queue.Prepare(origin+1, d.mode, pivot)
@ -396,10 +429,10 @@ func (d *Downloader) syncWithPeer(p *peer, hash common.Hash, td *big.Int) (err e
d.syncInitHook(origin, latest)
}
errc := make(chan error, 4)
go func() { errc <- d.fetchHeaders(p, td, origin+1, latest) }() // Headers are always retrieved
go func() { errc <- d.fetchBodies(origin + 1) }() // Bodies are retrieved during normal and fast sync
go func() { errc <- d.fetchReceipts(origin + 1) }() // Receipts are retrieved during fast sync
go func() { errc <- d.fetchNodeData() }() // Node state data is retrieved during fast sync
go func() { errc <- d.fetchHeaders(p, td, origin+1) }() // Headers are always retrieved
go func() { errc <- d.fetchBodies(origin + 1) }() // Bodies are retrieved during normal and fast sync
go func() { errc <- d.fetchReceipts(origin + 1) }() // Receipts are retrieved during fast sync
go func() { errc <- d.fetchNodeData() }() // Node state data is retrieved during fast sync
// If any fetcher fails, cancel the others
var fail error
@ -844,7 +877,7 @@ func (d *Downloader) fetchBlocks61(from uint64) error {
for _, peer := range idles {
// Short circuit if throttling activated
if d.queue.ThrottleBlocks() {
if d.queue.ShouldThrottleBlocks() {
throttled = true
break
}
@ -860,8 +893,13 @@ func (d *Downloader) fetchBlocks61(from uint64) error {
}
// Fetch the chunk and make sure any errors return the hashes to the queue
if err := peer.Fetch61(request); err != nil {
glog.V(logger.Error).Infof("%v: fetch failed, rescheduling", peer)
d.queue.CancelBlocks(request)
// Although we could try and make an attempt to fix this, this error really
// means that we've double allocated a fetch task to a peer. If that is the
// case, the internal state of the downloader and the queue is very wrong so
// better hard crash and note the error instead of silently accumulating into
// a much bigger issue.
panic(fmt.Sprintf("%v: fetch assignment failed, hard panic", peer))
d.queue.CancelBlocks(request) // noop for now
}
}
// Make sure that we have peers available for fetching. If all peers have been tried
@ -1051,28 +1089,34 @@ func (d *Downloader) findAncestor(p *peer) (uint64, error) {
//
// The queue parameter can be used to switch between queuing headers for block
// body download too, or directly import as pure header chains.
func (d *Downloader) fetchHeaders(p *peer, td *big.Int, from, latest uint64) error {
func (d *Downloader) fetchHeaders(p *peer, td *big.Int, from uint64) error {
glog.V(logger.Debug).Infof("%v: downloading headers from #%d", p, from)
defer glog.V(logger.Debug).Infof("%v: header download terminated", p)
// Calculate the pivoting point for switching from fast to slow sync
pivot := d.queue.FastSyncPivot()
// Keep a count of uncertain headers to roll back
rollback := []*types.Header{}
defer func() {
if len(rollback) > 0 {
// Flatten the headers and roll them back
hashes := make([]common.Hash, len(rollback))
for i, header := range rollback {
hashes[i] = header.Hash()
}
lh, lfb, lb := d.headHeader().Number, d.headFastBlock().Number(), d.headBlock().Number()
d.rollback(hashes)
glog.V(logger.Warn).Infof("Rolled back %d headers (LH: %d->%d, FB: %d->%d, LB: %d->%d)",
len(hashes), lh, d.headHeader().Number, lfb, d.headFastBlock().Number(), lb, d.headBlock().Number())
// If we're already past the pivot point, this could be an attack, disable fast sync
if rollback[len(rollback)-1].Number.Uint64() > pivot {
d.noFast = true
}
}
}()
// Calculate the pivoting point for switching from fast to slow sync
pivot := uint64(0)
if d.mode == FastSync && latest > uint64(minFullBlocks) {
pivot = latest - uint64(minFullBlocks)
} else if d.mode == LightSync {
pivot = latest
}
// Create a timeout timer, and the associated hash fetcher
request := time.Now() // time of the last fetch request
timeout := time.NewTimer(0) // timer to dump a non-responsive active peer
@ -1135,6 +1179,19 @@ func (d *Downloader) fetchHeaders(p *peer, td *big.Int, from, latest uint64) err
if !gotHeaders && td.Cmp(d.getTd(d.headBlock().Hash())) > 0 {
return errStallingPeer
}
// If fast or light syncing, ensure promised headers are indeed delivered. This is
// needed to detect scenarios where an attacker feeds a bad pivot and then bails out
// of delivering the post-pivot blocks that would flag the invalid content.
//
// This check cannot be executed "as is" for full imports, since blocks may still be
// queued for processing when the header download completes. However, as long as the
// peer gave us something useful, we're already happy/progressed (above check).
if d.mode == FastSync || d.mode == LightSync {
if td.Cmp(d.getTd(d.headHeader().Hash())) > 0 {
return errStallingPeer
}
}
rollback = nil
return nil
}
gotHeaders = true
@ -1152,8 +1209,8 @@ func (d *Downloader) fetchHeaders(p *peer, td *big.Int, from, latest uint64) err
}
}
// If we're importing pure headers, verify based on their recentness
frequency := headerCheckFrequency
if headers[len(headers)-1].Number.Uint64()+uint64(minCheckedHeaders) > pivot {
frequency := fsHeaderCheckFrequency
if headers[len(headers)-1].Number.Uint64()+uint64(fsHeaderForceVerify) > pivot {
frequency = 1
}
if n, err := d.insertHeaders(headers, frequency); err != nil {
@ -1162,11 +1219,8 @@ func (d *Downloader) fetchHeaders(p *peer, td *big.Int, from, latest uint64) err
}
// All verifications passed, store newly found uncertain headers
rollback = append(rollback, unknown...)
if len(rollback) > minCheckedHeaders {
rollback = append(rollback[:0], rollback[len(rollback)-minCheckedHeaders:]...)
}
if headers[len(headers)-1].Number.Uint64() >= pivot {
rollback = rollback[:0]
if len(rollback) > fsHeaderSafetyNet {
rollback = append(rollback[:0], rollback[len(rollback)-fsHeaderSafetyNet:]...)
}
}
if d.mode == FullSync || d.mode == FastSync {
@ -1230,12 +1284,11 @@ func (d *Downloader) fetchBodies(from uint64) error {
expire = func() []string { return d.queue.ExpireBodies(bodyHardTTL) }
fetch = func(p *peer, req *fetchRequest) error { return p.FetchBodies(req) }
capacity = func(p *peer) int { return p.BlockCapacity() }
getIdles = func() ([]*peer, int) { return d.peers.BodyIdlePeers() }
setIdle = func(p *peer) { p.SetBlocksIdle() }
setIdle = func(p *peer) { p.SetBodiesIdle() }
)
err := d.fetchParts(errCancelBodyFetch, d.bodyCh, deliver, d.bodyWakeCh, expire,
d.queue.PendingBlocks, d.queue.InFlightBlocks, d.queue.ThrottleBlocks, d.queue.ReserveBodies,
d.bodyFetchHook, fetch, d.queue.CancelBodies, capacity, getIdles, setIdle, "Body")
d.queue.PendingBlocks, d.queue.InFlightBlocks, d.queue.ShouldThrottleBlocks, d.queue.ReserveBodies,
d.bodyFetchHook, fetch, d.queue.CancelBodies, capacity, d.peers.BodyIdlePeers, setIdle, "Body")
glog.V(logger.Debug).Infof("Block body download terminated: %v", err)
return err
@ -1252,13 +1305,13 @@ func (d *Downloader) fetchReceipts(from uint64) error {
pack := packet.(*receiptPack)
return d.queue.DeliverReceipts(pack.peerId, pack.receipts)
}
expire = func() []string { return d.queue.ExpireReceipts(bodyHardTTL) }
expire = func() []string { return d.queue.ExpireReceipts(receiptHardTTL) }
fetch = func(p *peer, req *fetchRequest) error { return p.FetchReceipts(req) }
capacity = func(p *peer) int { return p.ReceiptCapacity() }
setIdle = func(p *peer) { p.SetReceiptsIdle() }
)
err := d.fetchParts(errCancelReceiptFetch, d.receiptCh, deliver, d.receiptWakeCh, expire,
d.queue.PendingReceipts, d.queue.InFlightReceipts, d.queue.ThrottleReceipts, d.queue.ReserveReceipts,
d.queue.PendingReceipts, d.queue.InFlightReceipts, d.queue.ShouldThrottleReceipts, d.queue.ReserveReceipts,
d.receiptFetchHook, fetch, d.queue.CancelReceipts, capacity, d.peers.ReceiptIdlePeers, setIdle, "Receipt")
glog.V(logger.Debug).Infof("Receipt download terminated: %v", err)
@ -1307,9 +1360,9 @@ func (d *Downloader) fetchNodeData() error {
capacity = func(p *peer) int { return p.NodeDataCapacity() }
setIdle = func(p *peer) { p.SetNodeDataIdle() }
)
err := d.fetchParts(errCancelReceiptFetch, d.stateCh, deliver, d.stateWakeCh, expire,
err := d.fetchParts(errCancelStateFetch, d.stateCh, deliver, d.stateWakeCh, expire,
d.queue.PendingNodeData, d.queue.InFlightNodeData, throttle, reserve, nil, fetch,
d.queue.CancelNodeData, capacity, d.peers.ReceiptIdlePeers, setIdle, "State")
d.queue.CancelNodeData, capacity, d.peers.NodeDataIdlePeers, setIdle, "State")
glog.V(logger.Debug).Infof("Node state data download terminated: %v", err)
return err
@ -1323,7 +1376,7 @@ func (d *Downloader) fetchParts(errCancel error, deliveryCh chan dataPack, deliv
fetchHook func([]*types.Header), fetch func(*peer, *fetchRequest) error, cancel func(*fetchRequest), capacity func(*peer) int,
idle func() ([]*peer, int), setIdle func(*peer), kind string) error {
// Create a ticker to detect expired retreival tasks
// Create a ticker to detect expired retrieval tasks
ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()
@ -1366,11 +1419,6 @@ func (d *Downloader) fetchParts(errCancel error, deliveryCh chan dataPack, deliv
// The hash chain is invalid (blocks are not ordered properly), abort
return err
case errInvalidBody, errInvalidReceipt:
// The peer delivered something very bad, drop immediately
glog.V(logger.Error).Infof("%s: delivered invalid %s, dropping", peer, strings.ToLower(kind))
d.dropPeer(peer.id)
case errNoFetchesPending:
// Peer probably timed out with its delivery but came through
// in the end, demote, but allow to to pull from this peer.
@ -1475,8 +1523,13 @@ func (d *Downloader) fetchParts(errCancel error, deliveryCh chan dataPack, deliv
fetchHook(request.Headers)
}
if err := fetch(peer, request); err != nil {
glog.V(logger.Error).Infof("%v: %s fetch failed, rescheduling", peer, strings.ToLower(kind))
cancel(request)
// Although we could try and make an attempt to fix this, this error really
// means that we've double allocated a fetch task to a peer. If that is the
// case, the internal state of the downloader and the queue is very wrong so
// better hard crash and note the error instead of silently accumulating into
// a much bigger issue.
panic(fmt.Sprintf("%v: %s fetch assignment failed, hard panic", peer, strings.ToLower(kind)))
cancel(request) // noop for now
}
running = true
}
@ -1526,6 +1579,7 @@ func (d *Downloader) process() {
// Repeat the processing as long as there are results to process
for {
// Fetch the next batch of results
pivot := d.queue.FastSyncPivot() // Fetch pivot before results to prevent reset race
results := d.queue.TakeResults()
if len(results) == 0 {
return
@ -1545,7 +1599,6 @@ func (d *Downloader) process() {
}
// Retrieve the a batch of results to import
var (
headers = make([]*types.Header, 0, maxResultsProcess)
blocks = make([]*types.Block, 0, maxResultsProcess)
receipts = make([]types.Receipts, 0, maxResultsProcess)
)
@ -1556,11 +1609,9 @@ func (d *Downloader) process() {
blocks = append(blocks, types.NewBlockWithHeader(result.Header).WithBody(result.Transactions, result.Uncles))
case d.mode == FastSync:
blocks = append(blocks, types.NewBlockWithHeader(result.Header).WithBody(result.Transactions, result.Uncles))
if result.Header.Number.Uint64() <= d.queue.fastSyncPivot {
if result.Header.Number.Uint64() <= pivot {
receipts = append(receipts, result.Receipts)
}
case d.mode == LightSync:
headers = append(headers, result.Header)
}
}
// Try to process the results, aborting if there's an error
@ -1569,12 +1620,10 @@ func (d *Downloader) process() {
index int
)
switch {
case len(headers) > 0:
index, err = d.insertHeaders(headers, headerCheckFrequency)
case len(receipts) > 0:
index, err = d.insertReceipts(blocks, receipts)
if err == nil && blocks[len(blocks)-1].NumberU64() == d.queue.fastSyncPivot {
if err == nil && blocks[len(blocks)-1].NumberU64() == pivot {
glog.V(logger.Debug).Infof("Committing block #%d [%x…] as the new head", blocks[len(blocks)-1].Number(), blocks[len(blocks)-1].Hash().Bytes()[:4])
index, err = len(blocks)-1, d.commitHeadBlock(blocks[len(blocks)-1].Hash())
}
default: