core, eth/downloader: commit block data using batches (#15115)

* ethdb: add Putter interface and Has method

* ethdb: improve docs and add IdealBatchSize

* ethdb: remove memory batch lock

Batches are not safe for concurrent use.

* core: use ethdb.Putter for Write* functions

This covers the easy cases.

* core/state: simplify StateSync

* trie: optimize local node check

* ethdb: add ValueSize to Batch

* core: optimize HasHeader check

This avoids one random database read get the block number. For many uses
of HasHeader, the expectation is that it's actually there. Using Has
avoids a load + decode of the value.

* core: write fast sync block data in batches

Collect writes into batches up to the ideal size instead of issuing many
small, concurrent writes.

* eth/downloader: commit larger state batches

Collect nodes into a batch up to the ideal size instead of committing
whenever a node is received.

* core: optimize HasBlock check

This avoids a random database read to get the number.

* core: use numberCache in HasHeader

numberCache has higher capacity, increasing the odds of finding the
header without a database lookup.

* core: write imported block data using a batch

Restore batch writes of state and add blocks, tx entries, receipts to
the same batch. The change also simplifies the miner.

This commit also removes posting of logs when a forked block is imported.

* core: fix DB write error handling

* ethdb: use RLock for Has

* core: fix HasBlock comment
This commit is contained in:
Felix Lange
2017-09-09 18:03:07 +02:00
committed by Péter Szilágyi
parent ac193e36ce
commit 10181b57a9
18 changed files with 247 additions and 280 deletions

View File

@ -26,6 +26,7 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/state"
"github.com/ethereum/go-ethereum/crypto/sha3"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/trie"
)
@ -187,10 +188,13 @@ func (d *Downloader) runStateSync(s *stateSync) *stateSync {
type stateSync struct {
d *Downloader // Downloader instance to access and manage current peerset
sched *state.StateSync // State trie sync scheduler defining the tasks
sched *trie.TrieSync // State trie sync scheduler defining the tasks
keccak hash.Hash // Keccak256 hasher to verify deliveries with
tasks map[common.Hash]*stateTask // Set of tasks currently queued for retrieval
numUncommitted int
bytesUncommitted int
deliver chan *stateReq // Delivery channel multiplexing peer responses
cancel chan struct{} // Channel to signal a termination request
cancelOnce sync.Once // Ensures cancel only ever gets called once
@ -252,9 +256,10 @@ func (s *stateSync) loop() error {
// Keep assigning new tasks until the sync completes or aborts
for s.sched.Pending() > 0 {
if err := s.assignTasks(); err != nil {
if err := s.commit(false); err != nil {
return err
}
s.assignTasks()
// Tasks assigned, wait for something to happen
select {
case <-newPeer:
@ -284,12 +289,28 @@ func (s *stateSync) loop() error {
}
}
}
return s.commit(true)
}
func (s *stateSync) commit(force bool) error {
if !force && s.bytesUncommitted < ethdb.IdealBatchSize {
return nil
}
start := time.Now()
b := s.d.stateDB.NewBatch()
s.sched.Commit(b)
if err := b.Write(); err != nil {
return fmt.Errorf("DB write error: %v", err)
}
s.updateStats(s.numUncommitted, 0, 0, time.Since(start))
s.numUncommitted = 0
s.bytesUncommitted = 0
return nil
}
// assignTasks attempts to assing new tasks to all idle peers, either from the
// batch currently being retried, or fetching new data from the trie sync itself.
func (s *stateSync) assignTasks() error {
func (s *stateSync) assignTasks() {
// Iterate over all idle peers and try to assign them state fetches
peers, _ := s.d.peers.NodeDataIdlePeers()
for _, p := range peers {
@ -301,7 +322,6 @@ func (s *stateSync) assignTasks() error {
// If the peer was assigned tasks to fetch, send the network request
if len(req.items) > 0 {
req.peer.log.Trace("Requesting new batch of data", "type", "state", "count", len(req.items))
select {
case s.d.trackStateReq <- req:
req.peer.FetchNodeData(req.items)
@ -309,7 +329,6 @@ func (s *stateSync) assignTasks() error {
}
}
}
return nil
}
// fillTasks fills the given request object with a maximum of n state download
@ -347,11 +366,11 @@ func (s *stateSync) fillTasks(n int, req *stateReq) {
// delivered.
func (s *stateSync) process(req *stateReq) (bool, error) {
// Collect processing stats and update progress if valid data was received
processed, written, duplicate, unexpected := 0, 0, 0, 0
duplicate, unexpected := 0, 0
defer func(start time.Time) {
if processed+written+duplicate+unexpected > 0 {
s.updateStats(processed, written, duplicate, unexpected, time.Since(start))
if duplicate > 0 || unexpected > 0 {
s.updateStats(0, duplicate, unexpected, time.Since(start))
}
}(time.Now())
@ -362,7 +381,9 @@ func (s *stateSync) process(req *stateReq) (bool, error) {
prog, hash, err := s.processNodeData(blob)
switch err {
case nil:
processed++
s.numUncommitted++
s.bytesUncommitted += len(blob)
progress = progress || prog
case trie.ErrNotRequested:
unexpected++
case trie.ErrAlreadyProcessed:
@ -370,38 +391,20 @@ func (s *stateSync) process(req *stateReq) (bool, error) {
default:
return stale, fmt.Errorf("invalid state node %s: %v", hash.TerminalString(), err)
}
if prog {
progress = true
}
// If the node delivered a requested item, mark the delivery non-stale
if _, ok := req.tasks[hash]; ok {
delete(req.tasks, hash)
stale = false
}
}
// If some data managed to hit the database, flush and reset failure counters
if progress {
// Flush any accumulated data out to disk
batch := s.d.stateDB.NewBatch()
count, err := s.sched.Commit(batch)
if err != nil {
return stale, err
}
if err := batch.Write(); err != nil {
return stale, err
}
written = count
// If we're inside the critical section, reset fail counter since we progressed
if atomic.LoadUint32(&s.d.fsPivotFails) > 1 {
log.Trace("Fast-sync progressed, resetting fail counter", "previous", atomic.LoadUint32(&s.d.fsPivotFails))
atomic.StoreUint32(&s.d.fsPivotFails, 1) // Don't ever reset to 0, as that will unlock the pivot block
}
// If we're inside the critical section, reset fail counter since we progressed.
if progress && atomic.LoadUint32(&s.d.fsPivotFails) > 1 {
log.Trace("Fast-sync progressed, resetting fail counter", "previous", atomic.LoadUint32(&s.d.fsPivotFails))
atomic.StoreUint32(&s.d.fsPivotFails, 1) // Don't ever reset to 0, as that will unlock the pivot block
}
// Put unfulfilled tasks back into the retry queue
npeers := s.d.peers.Len()
for hash, task := range req.tasks {
// If the node did deliver something, missing items may be due to a protocol
// limit or a previous timeout + delayed delivery. Both cases should permit
@ -425,25 +428,25 @@ func (s *stateSync) process(req *stateReq) (bool, error) {
// error occurred.
func (s *stateSync) processNodeData(blob []byte) (bool, common.Hash, error) {
res := trie.SyncResult{Data: blob}
s.keccak.Reset()
s.keccak.Write(blob)
s.keccak.Sum(res.Hash[:0])
committed, _, err := s.sched.Process([]trie.SyncResult{res})
return committed, res.Hash, err
}
// updateStats bumps the various state sync progress counters and displays a log
// message for the user to see.
func (s *stateSync) updateStats(processed, written, duplicate, unexpected int, duration time.Duration) {
func (s *stateSync) updateStats(written, duplicate, unexpected int, duration time.Duration) {
s.d.syncStatsLock.Lock()
defer s.d.syncStatsLock.Unlock()
s.d.syncStatsState.pending = uint64(s.sched.Pending())
s.d.syncStatsState.processed += uint64(processed)
s.d.syncStatsState.processed += uint64(written)
s.d.syncStatsState.duplicate += uint64(duplicate)
s.d.syncStatsState.unexpected += uint64(unexpected)
log.Info("Imported new state entries", "count", processed, "flushed", written, "elapsed", common.PrettyDuration(duration), "processed", s.d.syncStatsState.processed, "pending", s.d.syncStatsState.pending, "retry", len(s.tasks), "duplicate", s.d.syncStatsState.duplicate, "unexpected", s.d.syncStatsState.unexpected)
if written > 0 || duplicate > 0 || unexpected > 0 {
log.Info("Imported new state entries", "count", written, "elapsed", common.PrettyDuration(duration), "processed", s.d.syncStatsState.processed, "pending", s.d.syncStatsState.pending, "retry", len(s.tasks), "duplicate", s.d.syncStatsState.duplicate, "unexpected", s.d.syncStatsState.unexpected)
}
}