core, eth/downloader: commit block data using batches (#15115)

* ethdb: add Putter interface and Has method * ethdb: improve docs and add IdealBatchSize * ethdb: remove memory batch lock Batches are not safe for concurrent use. * core: use ethdb.Putter for Write* functions This covers the easy cases. * core/state: simplify StateSync * trie: optimize local node check * ethdb: add ValueSize to Batch * core: optimize HasHeader check This avoids one random database read get the block number. For many uses of HasHeader, the expectation is that it's actually there. Using Has avoids a load + decode of the value. * core: write fast sync block data in batches Collect writes into batches up to the ideal size instead of issuing many small, concurrent writes. * eth/downloader: commit larger state batches Collect nodes into a batch up to the ideal size instead of committing whenever a node is received. * core: optimize HasBlock check This avoids a random database read to get the number. * core: use numberCache in HasHeader numberCache has higher capacity, increasing the odds of finding the header without a database lookup. * core: write imported block data using a batch Restore batch writes of state and add blocks, tx entries, receipts to the same batch. The change also simplifies the miner. This commit also removes posting of logs when a forked block is imported. * core: fix DB write error handling * ethdb: use RLock for Has * core: fix HasBlock comment
2017-09-09 18:03:07 +02:00
parent ac193e36ce
commit 10181b57a9
18 changed files with 247 additions and 280 deletions
--- a/eth/downloader/statesync.go
+++ b/eth/downloader/statesync.go
@ -26,6 +26,7 @@ import (
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/state"
 	"github.com/ethereum/go-ethereum/crypto/sha3"
+	"github.com/ethereum/go-ethereum/ethdb"
 	"github.com/ethereum/go-ethereum/log"
 	"github.com/ethereum/go-ethereum/trie"
 )
@ -187,10 +188,13 @@ func (d *Downloader) runStateSync(s *stateSync) *stateSync {
 type stateSync struct {
 	d *Downloader // Downloader instance to access and manage current peerset

-	sched  *state.StateSync           // State trie sync scheduler defining the tasks
+	sched  *trie.TrieSync             // State trie sync scheduler defining the tasks
 	keccak hash.Hash                  // Keccak256 hasher to verify deliveries with
 	tasks  map[common.Hash]*stateTask // Set of tasks currently queued for retrieval

+	numUncommitted   int
+	bytesUncommitted int
+
 	deliver    chan *stateReq // Delivery channel multiplexing peer responses
 	cancel     chan struct{}  // Channel to signal a termination request
 	cancelOnce sync.Once      // Ensures cancel only ever gets called once
@ -252,9 +256,10 @@ func (s *stateSync) loop() error {

 	// Keep assigning new tasks until the sync completes or aborts
 	for s.sched.Pending() > 0 {
-		if err := s.assignTasks(); err != nil {
+		if err := s.commit(false); err != nil {
 			return err
 		}
+		s.assignTasks()
 		// Tasks assigned, wait for something to happen
 		select {
 		case <-newPeer:
@ -284,12 +289,28 @@ func (s *stateSync) loop() error {
 			}
 		}
 	}
+	return s.commit(true)
+}
+
+func (s *stateSync) commit(force bool) error {
+	if !force && s.bytesUncommitted < ethdb.IdealBatchSize {
+		return nil
+	}
+	start := time.Now()
+	b := s.d.stateDB.NewBatch()
+	s.sched.Commit(b)
+	if err := b.Write(); err != nil {
+		return fmt.Errorf("DB write error: %v", err)
+	}
+	s.updateStats(s.numUncommitted, 0, 0, time.Since(start))
+	s.numUncommitted = 0
+	s.bytesUncommitted = 0
 	return nil
 }

 // assignTasks attempts to assing new tasks to all idle peers, either from the
 // batch currently being retried, or fetching new data from the trie sync itself.
-func (s *stateSync) assignTasks() error {
+func (s *stateSync) assignTasks() {
 	// Iterate over all idle peers and try to assign them state fetches
 	peers, _ := s.d.peers.NodeDataIdlePeers()
 	for _, p := range peers {
@ -301,7 +322,6 @@ func (s *stateSync) assignTasks() error {
 		// If the peer was assigned tasks to fetch, send the network request
 		if len(req.items) > 0 {
 			req.peer.log.Trace("Requesting new batch of data", "type", "state", "count", len(req.items))
-
 			select {
 			case s.d.trackStateReq <- req:
 				req.peer.FetchNodeData(req.items)
@ -309,7 +329,6 @@ func (s *stateSync) assignTasks() error {
 			}
 		}
 	}
-	return nil
 }

 // fillTasks fills the given request object with a maximum of n state download
@ -347,11 +366,11 @@ func (s *stateSync) fillTasks(n int, req *stateReq) {
 // delivered.
 func (s *stateSync) process(req *stateReq) (bool, error) {
 	// Collect processing stats and update progress if valid data was received
-	processed, written, duplicate, unexpected := 0, 0, 0, 0
+	duplicate, unexpected := 0, 0

 	defer func(start time.Time) {
-		if processed+written+duplicate+unexpected > 0 {
-			s.updateStats(processed, written, duplicate, unexpected, time.Since(start))
+		if duplicate > 0 || unexpected > 0 {
+			s.updateStats(0, duplicate, unexpected, time.Since(start))
 		}
 	}(time.Now())

@ -362,7 +381,9 @@ func (s *stateSync) process(req *stateReq) (bool, error) {
 		prog, hash, err := s.processNodeData(blob)
 		switch err {
 		case nil:
-			processed++
+			s.numUncommitted++
+			s.bytesUncommitted += len(blob)
+			progress = progress || prog
 		case trie.ErrNotRequested:
 			unexpected++
 		case trie.ErrAlreadyProcessed:
@ -370,38 +391,20 @@ func (s *stateSync) process(req *stateReq) (bool, error) {
 		default:
 			return stale, fmt.Errorf("invalid state node %s: %v", hash.TerminalString(), err)
 		}
-		if prog {
-			progress = true
-		}
 		// If the node delivered a requested item, mark the delivery non-stale
 		if _, ok := req.tasks[hash]; ok {
 			delete(req.tasks, hash)
 			stale = false
 		}
 	}
-	// If some data managed to hit the database, flush and reset failure counters
-	if progress {
-		// Flush any accumulated data out to disk
-		batch := s.d.stateDB.NewBatch()
-
-		count, err := s.sched.Commit(batch)
-		if err != nil {
-			return stale, err
-		}
-		if err := batch.Write(); err != nil {
-			return stale, err
-		}
-		written = count
-
-		// If we're inside the critical section, reset fail counter since we progressed
-		if atomic.LoadUint32(&s.d.fsPivotFails) > 1 {
-			log.Trace("Fast-sync progressed, resetting fail counter", "previous", atomic.LoadUint32(&s.d.fsPivotFails))
-			atomic.StoreUint32(&s.d.fsPivotFails, 1) // Don't ever reset to 0, as that will unlock the pivot block
-		}
+	// If we're inside the critical section, reset fail counter since we progressed.
+	if progress && atomic.LoadUint32(&s.d.fsPivotFails) > 1 {
+		log.Trace("Fast-sync progressed, resetting fail counter", "previous", atomic.LoadUint32(&s.d.fsPivotFails))
+		atomic.StoreUint32(&s.d.fsPivotFails, 1) // Don't ever reset to 0, as that will unlock the pivot block
 	}
+
 	// Put unfulfilled tasks back into the retry queue
 	npeers := s.d.peers.Len()
-
 	for hash, task := range req.tasks {
 		// If the node did deliver something, missing items may be due to a protocol
 		// limit or a previous timeout + delayed delivery. Both cases should permit
@ -425,25 +428,25 @@ func (s *stateSync) process(req *stateReq) (bool, error) {
 // error occurred.
 func (s *stateSync) processNodeData(blob []byte) (bool, common.Hash, error) {
 	res := trie.SyncResult{Data: blob}
-
 	s.keccak.Reset()
 	s.keccak.Write(blob)
 	s.keccak.Sum(res.Hash[:0])
-
 	committed, _, err := s.sched.Process([]trie.SyncResult{res})
 	return committed, res.Hash, err
 }

 // updateStats bumps the various state sync progress counters and displays a log
 // message for the user to see.
-func (s *stateSync) updateStats(processed, written, duplicate, unexpected int, duration time.Duration) {
+func (s *stateSync) updateStats(written, duplicate, unexpected int, duration time.Duration) {
 	s.d.syncStatsLock.Lock()
 	defer s.d.syncStatsLock.Unlock()

 	s.d.syncStatsState.pending = uint64(s.sched.Pending())
-	s.d.syncStatsState.processed += uint64(processed)
+	s.d.syncStatsState.processed += uint64(written)
 	s.d.syncStatsState.duplicate += uint64(duplicate)
 	s.d.syncStatsState.unexpected += uint64(unexpected)

-	log.Info("Imported new state entries", "count", processed, "flushed", written, "elapsed", common.PrettyDuration(duration), "processed", s.d.syncStatsState.processed, "pending", s.d.syncStatsState.pending, "retry", len(s.tasks), "duplicate", s.d.syncStatsState.duplicate, "unexpected", s.d.syncStatsState.unexpected)
+	if written > 0 || duplicate > 0 || unexpected > 0 {
+		log.Info("Imported new state entries", "count", written, "elapsed", common.PrettyDuration(duration), "processed", s.d.syncStatsState.processed, "pending", s.d.syncStatsState.pending, "retry", len(s.tasks), "duplicate", s.d.syncStatsState.duplicate, "unexpected", s.d.syncStatsState.unexpected)
+	}
 }