core, eth, trie: fix data races and merge/review issues

This commit is contained in:
Péter Szilágyi
2015-10-13 12:04:25 +03:00
parent aa0538db0b
commit 5b0ee8ec30
27 changed files with 765 additions and 465 deletions

View File

@ -56,9 +56,8 @@ type fetchRequest struct {
Time time.Time // Time when the request was made
}
// fetchResult is the assembly collecting partial results from potentially more
// than one fetcher routines, until all outstanding retrievals complete and the
// result as a whole can be processed.
// fetchResult is a struct collecting partial results from data fetchers until
// all outstanding pieces complete and the result as a whole can be processed.
type fetchResult struct {
Pending int // Number of data fetches still pending
@ -89,7 +88,7 @@ type queue struct {
receiptPendPool map[string]*fetchRequest // [eth/63] Currently pending receipt retrieval operations
receiptDonePool map[common.Hash]struct{} // [eth/63] Set of the completed receipt fetches
stateTaskIndex int // [eth/63] Counter indexing the added hashes to ensure prioritized retrieval order
stateTaskIndex int // [eth/63] Counter indexing the added hashes to ensure prioritised retrieval order
stateTaskPool map[common.Hash]int // [eth/63] Pending node data retrieval tasks, mapping to their priority
stateTaskQueue *prque.Prque // [eth/63] Priority queue of the hashes to fetch the node data for
statePendPool map[string]*fetchRequest // [eth/63] Currently pending node data retrieval operations
@ -97,10 +96,10 @@ type queue struct {
stateDatabase ethdb.Database // [eth/63] Trie database to populate during state reassembly
stateScheduler *state.StateSync // [eth/63] State trie synchronisation scheduler and integrator
stateProcessors int32 // [eth/63] Number of currently running state processors
stateSchedLock sync.RWMutex // [eth/63] Lock serializing access to the state scheduler
stateSchedLock sync.RWMutex // [eth/63] Lock serialising access to the state scheduler
resultCache []*fetchResult // Downloaded but not yet delivered fetch results
resultOffset uint64 // Offset of the first cached fetch result in the block-chain
resultOffset uint64 // Offset of the first cached fetch result in the block chain
lock sync.RWMutex
}
@ -131,6 +130,9 @@ func (q *queue) Reset() {
q.lock.Lock()
defer q.lock.Unlock()
q.stateSchedLock.Lock()
defer q.stateSchedLock.Unlock()
q.mode = FullSync
q.fastSyncPivot = 0
@ -233,9 +235,17 @@ func (q *queue) Idle() bool {
return (queued + pending + cached) == 0
}
// ThrottleBlocks checks if the download should be throttled (active block (body)
// FastSyncPivot retrieves the currently used fast sync pivot point.
func (q *queue) FastSyncPivot() uint64 {
q.lock.RLock()
defer q.lock.RUnlock()
return q.fastSyncPivot
}
// ShouldThrottleBlocks checks if the download should be throttled (active block (body)
// fetches exceed block cache).
func (q *queue) ThrottleBlocks() bool {
func (q *queue) ShouldThrottleBlocks() bool {
q.lock.RLock()
defer q.lock.RUnlock()
@ -248,9 +258,9 @@ func (q *queue) ThrottleBlocks() bool {
return pending >= len(q.resultCache)-len(q.blockDonePool)
}
// ThrottleReceipts checks if the download should be throttled (active receipt
// ShouldThrottleReceipts checks if the download should be throttled (active receipt
// fetches exceed block cache).
func (q *queue) ThrottleReceipts() bool {
func (q *queue) ShouldThrottleReceipts() bool {
q.lock.RLock()
defer q.lock.RUnlock()
@ -269,7 +279,7 @@ func (q *queue) Schedule61(hashes []common.Hash, fifo bool) []common.Hash {
q.lock.Lock()
defer q.lock.Unlock()
// Insert all the hashes prioritized in the arrival order
// Insert all the hashes prioritised in the arrival order
inserts := make([]common.Hash, 0, len(hashes))
for _, hash := range hashes {
// Skip anything we already have
@ -297,10 +307,10 @@ func (q *queue) Schedule(headers []*types.Header, from uint64) []*types.Header {
q.lock.Lock()
defer q.lock.Unlock()
// Insert all the headers prioritized by the contained block number
// Insert all the headers prioritised by the contained block number
inserts := make([]*types.Header, 0, len(headers))
for _, header := range headers {
// Make sure chain order is honored and preserved throughout
// Make sure chain order is honoured and preserved throughout
hash := header.Hash()
if header.Number == nil || header.Number.Uint64() != from {
glog.V(logger.Warn).Infof("Header #%v [%x] broke chain ordering, expected %d", header.Number, hash[:4], from)
@ -347,19 +357,29 @@ func (q *queue) GetHeadResult() *fetchResult {
q.lock.RLock()
defer q.lock.RUnlock()
// If there are no results pending, return nil
if len(q.resultCache) == 0 || q.resultCache[0] == nil {
return nil
}
// If the next result is still incomplete, return nil
if q.resultCache[0].Pending > 0 {
return nil
}
// If the next result is the fast sync pivot...
if q.mode == FastSync && q.resultCache[0].Header.Number.Uint64() == q.fastSyncPivot {
// If the pivot state trie is still being pulled, return nil
if len(q.stateTaskPool) > 0 {
return nil
}
if q.PendingNodeData() > 0 {
return nil
}
// If the state is done, but not enough post-pivot headers were verified, stall...
for i := 0; i < fsHeaderForceVerify; i++ {
if i+1 >= len(q.resultCache) || q.resultCache[i+1] == nil {
return nil
}
}
}
return q.resultCache[0]
}
@ -372,7 +392,7 @@ func (q *queue) TakeResults() []*fetchResult {
// Accumulate all available results
results := []*fetchResult{}
for _, result := range q.resultCache {
for i, result := range q.resultCache {
// Stop if no more results are ready
if result == nil || result.Pending > 0 {
break
@ -385,6 +405,16 @@ func (q *queue) TakeResults() []*fetchResult {
if q.PendingNodeData() > 0 {
break
}
// Even is state fetch is done, ensure post-pivot headers passed verifications
safe := true
for j := 0; j < fsHeaderForceVerify; j++ {
if i+j+1 >= len(q.resultCache) || q.resultCache[i+j+1] == nil {
safe = false
}
}
if !safe {
break
}
}
// If we've just inserted the fast sync pivot, stop as the following batch needs different insertion
if q.mode == FastSync && result.Header.Number.Uint64() == q.fastSyncPivot+1 && len(results) > 0 {
@ -411,6 +441,9 @@ func (q *queue) TakeResults() []*fetchResult {
// ReserveBlocks reserves a set of block hashes for the given peer, skipping any
// previously failed download.
func (q *queue) ReserveBlocks(p *peer, count int) *fetchRequest {
q.lock.Lock()
defer q.lock.Unlock()
return q.reserveHashes(p, count, q.hashQueue, nil, q.blockPendPool, len(q.resultCache)-len(q.blockDonePool))
}
@ -430,17 +463,21 @@ func (q *queue) ReserveNodeData(p *peer, count int) *fetchRequest {
}
}
}
q.lock.Lock()
defer q.lock.Unlock()
return q.reserveHashes(p, count, q.stateTaskQueue, generator, q.statePendPool, count)
}
// reserveHashes reserves a set of hashes for the given peer, skipping previously
// failed ones.
//
// Note, this method expects the queue lock to be already held for writing. The
// reason the lock is not obtained in here is because the parameters already need
// to access the queue, so they already need a lock anyway.
func (q *queue) reserveHashes(p *peer, count int, taskQueue *prque.Prque, taskGen func(int), pendPool map[string]*fetchRequest, maxPending int) *fetchRequest {
q.lock.Lock()
defer q.lock.Unlock()
// Short circuit if the peer's already downloading something (sanity check not
// to corrupt state)
// Short circuit if the peer's already downloading something (sanity check to
// not corrupt state)
if _, ok := pendPool[p.id]; ok {
return nil
}
@ -492,30 +529,37 @@ func (q *queue) reserveHashes(p *peer, count int, taskQueue *prque.Prque, taskGe
// previously failed downloads. Beside the next batch of needed fetches, it also
// returns a flag whether empty blocks were queued requiring processing.
func (q *queue) ReserveBodies(p *peer, count int) (*fetchRequest, bool, error) {
noop := func(header *types.Header) bool {
isNoop := func(header *types.Header) bool {
return header.TxHash == types.EmptyRootHash && header.UncleHash == types.EmptyUncleHash
}
return q.reserveHeaders(p, count, q.blockTaskPool, q.blockTaskQueue, q.blockPendPool, q.blockDonePool, noop)
q.lock.Lock()
defer q.lock.Unlock()
return q.reserveHeaders(p, count, q.blockTaskPool, q.blockTaskQueue, q.blockPendPool, q.blockDonePool, isNoop)
}
// ReserveReceipts reserves a set of receipt fetches for the given peer, skipping
// any previously failed downloads. Beside the next batch of needed fetches, it
// also returns a flag whether empty receipts were queued requiring importing.
func (q *queue) ReserveReceipts(p *peer, count int) (*fetchRequest, bool, error) {
noop := func(header *types.Header) bool {
isNoop := func(header *types.Header) bool {
return header.ReceiptHash == types.EmptyRootHash
}
return q.reserveHeaders(p, count, q.receiptTaskPool, q.receiptTaskQueue, q.receiptPendPool, q.receiptDonePool, noop)
q.lock.Lock()
defer q.lock.Unlock()
return q.reserveHeaders(p, count, q.receiptTaskPool, q.receiptTaskQueue, q.receiptPendPool, q.receiptDonePool, isNoop)
}
// reserveHeaders reserves a set of data download operations for a given peer,
// skipping any previously failed ones. This method is a generic version used
// by the individual special reservation functions.
//
// Note, this method expects the queue lock to be already held for writing. The
// reason the lock is not obtained in here is because the parameters already need
// to access the queue, so they already need a lock anyway.
func (q *queue) reserveHeaders(p *peer, count int, taskPool map[common.Hash]*types.Header, taskQueue *prque.Prque,
pendPool map[string]*fetchRequest, donePool map[common.Hash]struct{}, noop func(*types.Header) bool) (*fetchRequest, bool, error) {
q.lock.Lock()
defer q.lock.Unlock()
pendPool map[string]*fetchRequest, donePool map[common.Hash]struct{}, isNoop func(*types.Header) bool) (*fetchRequest, bool, error) {
// Short circuit if the pool has been depleted, or if the peer's already
// downloading something (sanity check not to corrupt state)
if taskQueue.Empty() {
@ -537,7 +581,7 @@ func (q *queue) reserveHeaders(p *peer, count int, taskPool map[common.Hash]*typ
for proc := 0; proc < space && len(send) < count && !taskQueue.Empty(); proc++ {
header := taskQueue.PopItem().(*types.Header)
// If we're the first to request this task, initialize the result container
// If we're the first to request this task, initialise the result container
index := int(header.Number.Int64() - int64(q.resultOffset))
if index >= len(q.resultCache) || index < 0 {
return nil, false, errInvalidChain
@ -553,7 +597,7 @@ func (q *queue) reserveHeaders(p *peer, count int, taskPool map[common.Hash]*typ
}
}
// If this fetch task is a noop, skip this fetch operation
if noop(header) {
if isNoop(header) {
donePool[header.Hash()] = struct{}{}
delete(taskPool, header.Hash())
@ -562,7 +606,7 @@ func (q *queue) reserveHeaders(p *peer, count int, taskPool map[common.Hash]*typ
progress = true
continue
}
// Otherwise if not a known unknown block, add to the retrieve list
// Otherwise unless the peer is known not to have the data, add to the retrieve list
if p.ignored.Has(header.Hash()) {
skip = append(skip, header)
} else {
@ -655,35 +699,48 @@ func (q *queue) Revoke(peerId string) {
}
// ExpireBlocks checks for in flight requests that exceeded a timeout allowance,
// canceling them and returning the responsible peers for penalization.
// canceling them and returning the responsible peers for penalisation.
func (q *queue) ExpireBlocks(timeout time.Duration) []string {
q.lock.Lock()
defer q.lock.Unlock()
return q.expire(timeout, q.blockPendPool, q.hashQueue, blockTimeoutMeter)
}
// ExpireBodies checks for in flight block body requests that exceeded a timeout
// allowance, canceling them and returning the responsible peers for penalization.
// allowance, canceling them and returning the responsible peers for penalisation.
func (q *queue) ExpireBodies(timeout time.Duration) []string {
q.lock.Lock()
defer q.lock.Unlock()
return q.expire(timeout, q.blockPendPool, q.blockTaskQueue, bodyTimeoutMeter)
}
// ExpireReceipts checks for in flight receipt requests that exceeded a timeout
// allowance, canceling them and returning the responsible peers for penalization.
// allowance, canceling them and returning the responsible peers for penalisation.
func (q *queue) ExpireReceipts(timeout time.Duration) []string {
q.lock.Lock()
defer q.lock.Unlock()
return q.expire(timeout, q.receiptPendPool, q.receiptTaskQueue, receiptTimeoutMeter)
}
// ExpireNodeData checks for in flight node data requests that exceeded a timeout
// allowance, canceling them and returning the responsible peers for penalization.
// allowance, canceling them and returning the responsible peers for penalisation.
func (q *queue) ExpireNodeData(timeout time.Duration) []string {
q.lock.Lock()
defer q.lock.Unlock()
return q.expire(timeout, q.statePendPool, q.stateTaskQueue, stateTimeoutMeter)
}
// expire is the generic check that move expired tasks from a pending pool back
// into a task pool, returning all entities caught with expired tasks.
//
// Note, this method expects the queue lock to be already held for writing. The
// reason the lock is not obtained in here is because the parameters already need
// to access the queue, so they already need a lock anyway.
func (q *queue) expire(timeout time.Duration, pendPool map[string]*fetchRequest, taskQueue *prque.Prque, timeoutMeter metrics.Meter) []string {
q.lock.Lock()
defer q.lock.Unlock()
// Iterate over the expired requests and return each to the queue
peers := []string{}
for id, request := range pendPool {
@ -764,7 +821,7 @@ func (q *queue) DeliverBlocks(id string, blocks []*types.Block) error {
case len(errs) == 1 && (errs[0] == errInvalidChain || errs[0] == errInvalidBlock):
return errs[0]
case len(errs) == len(request.Headers):
case len(errs) == len(blocks):
return errStaleDelivery
default:
@ -774,6 +831,9 @@ func (q *queue) DeliverBlocks(id string, blocks []*types.Block) error {
// DeliverBodies injects a block body retrieval response into the results queue.
func (q *queue) DeliverBodies(id string, txLists [][]*types.Transaction, uncleLists [][]*types.Header) error {
q.lock.Lock()
defer q.lock.Unlock()
reconstruct := func(header *types.Header, index int, result *fetchResult) error {
if types.DeriveSha(types.Transactions(txLists[index])) != header.TxHash || types.CalcUncleHash(uncleLists[index]) != header.UncleHash {
return errInvalidBody
@ -787,6 +847,9 @@ func (q *queue) DeliverBodies(id string, txLists [][]*types.Transaction, uncleLi
// DeliverReceipts injects a receipt retrieval response into the results queue.
func (q *queue) DeliverReceipts(id string, receiptList [][]*types.Receipt) error {
q.lock.Lock()
defer q.lock.Unlock()
reconstruct := func(header *types.Header, index int, result *fetchResult) error {
if types.DeriveSha(types.Receipts(receiptList[index])) != header.ReceiptHash {
return errInvalidReceipt
@ -798,11 +861,12 @@ func (q *queue) DeliverReceipts(id string, receiptList [][]*types.Receipt) error
}
// deliver injects a data retrieval response into the results queue.
//
// Note, this method expects the queue lock to be already held for writing. The
// reason the lock is not obtained in here is because the parameters already need
// to access the queue, so they already need a lock anyway.
func (q *queue) deliver(id string, taskPool map[common.Hash]*types.Header, taskQueue *prque.Prque, pendPool map[string]*fetchRequest,
donePool map[common.Hash]struct{}, reqTimer metrics.Timer, results int, reconstruct func(header *types.Header, index int, result *fetchResult) error) error {
q.lock.Lock()
defer q.lock.Unlock()
// Short circuit if the data was never requested
request := pendPool[id]
if request == nil {
@ -818,7 +882,10 @@ func (q *queue) deliver(id string, taskPool map[common.Hash]*types.Header, taskQ
}
}
// Assemble each of the results with their headers and retrieved data parts
errs := make([]error, 0)
var (
failure error
useful bool
)
for i, header := range request.Headers {
// Short circuit assembly if no more fetch results are found
if i >= results {
@ -827,15 +894,16 @@ func (q *queue) deliver(id string, taskPool map[common.Hash]*types.Header, taskQ
// Reconstruct the next result if contents match up
index := int(header.Number.Int64() - int64(q.resultOffset))
if index >= len(q.resultCache) || index < 0 || q.resultCache[index] == nil {
errs = []error{errInvalidChain}
failure = errInvalidChain
break
}
if err := reconstruct(header, i, q.resultCache[index]); err != nil {
errs = []error{err}
failure = err
break
}
donePool[header.Hash()] = struct{}{}
q.resultCache[index].Pending--
useful = true
// Clean up a successful fetch
request.Headers[i] = nil
@ -847,19 +915,16 @@ func (q *queue) deliver(id string, taskPool map[common.Hash]*types.Header, taskQ
taskQueue.Push(header, -float32(header.Number.Uint64()))
}
}
// If none of the blocks were good, it's a stale delivery
// If none of the data was good, it's a stale delivery
switch {
case len(errs) == 0:
return nil
case failure == nil || failure == errInvalidChain:
return failure
case len(errs) == 1 && (errs[0] == errInvalidChain || errs[0] == errInvalidBody || errs[0] == errInvalidReceipt):
return errs[0]
case len(errs) == len(request.Headers):
return errStaleDelivery
case useful:
return fmt.Errorf("partial failure: %v", failure)
default:
return fmt.Errorf("multiple failures: %v", errs)
return errStaleDelivery
}
}
@ -876,7 +941,7 @@ func (q *queue) DeliverNodeData(id string, data [][]byte, callback func(error, i
stateReqTimer.UpdateSince(request.Time)
delete(q.statePendPool, id)
// If no data was retrieved, mark them as unavailable for the origin peer
// If no data was retrieved, mark their hashes as unavailable for the origin peer
if len(data) == 0 {
for hash, _ := range request.Hashes {
request.Peer.ignored.Add(hash)
@ -955,9 +1020,6 @@ func (q *queue) Prepare(offset uint64, mode SyncMode, pivot uint64) {
if q.resultOffset < offset {
q.resultOffset = offset
}
q.fastSyncPivot = 0
if mode == FastSync {
q.fastSyncPivot = pivot
}
q.fastSyncPivot = pivot
q.mode = mode
}