core, trie: intermediate mempool between trie and database (#15857)

This commit reduces database I/O by not writing every state trie to disk.
This commit is contained in:
Péter Szilágyi
2018-02-05 18:40:32 +02:00
committed by Felix Lange
parent 59336283c0
commit 55599ee95d
69 changed files with 1958 additions and 1164 deletions

View File

@ -24,7 +24,6 @@ import (
"io/ioutil"
"runtime"
"sync"
"sync/atomic"
"time"
"github.com/ethereum/go-ethereum/common"
@ -34,7 +33,6 @@ import (
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/core/vm"
"github.com/ethereum/go-ethereum/eth/tracers"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/internal/ethapi"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rlp"
@ -72,6 +70,7 @@ type txTraceResult struct {
type blockTraceTask struct {
statedb *state.StateDB // Intermediate state prepped for tracing
block *types.Block // Block to trace the transactions from
rootref common.Hash // Trie root reference held for this task
results []*txTraceResult // Trace results procudes by the task
}
@ -90,59 +89,6 @@ type txTraceTask struct {
index int // Transaction offset in the block
}
// ephemeralDatabase is a memory wrapper around a proper database, which acts as
// an ephemeral write layer. This construct is used by the chain tracer to write
// state tries for intermediate blocks without serializing to disk, but at the
// same time to allow disk fallback for reads that do no hit the memory layer.
type ephemeralDatabase struct {
diskdb ethdb.Database // Persistent disk database to fall back to with reads
memdb *ethdb.MemDatabase // Ephemeral memory database for primary reads and writes
}
func (db *ephemeralDatabase) Put(key []byte, value []byte) error { return db.memdb.Put(key, value) }
func (db *ephemeralDatabase) Delete(key []byte) error { return errors.New("delete not supported") }
func (db *ephemeralDatabase) Close() { db.memdb.Close() }
func (db *ephemeralDatabase) NewBatch() ethdb.Batch {
return db.memdb.NewBatch()
}
func (db *ephemeralDatabase) Has(key []byte) (bool, error) {
if has, _ := db.memdb.Has(key); has {
return has, nil
}
return db.diskdb.Has(key)
}
func (db *ephemeralDatabase) Get(key []byte) ([]byte, error) {
if blob, _ := db.memdb.Get(key); blob != nil {
return blob, nil
}
return db.diskdb.Get(key)
}
// Prune does a state sync into a new memory write layer and replaces the old one.
// This allows us to discard entries that are no longer referenced from the current
// state.
func (db *ephemeralDatabase) Prune(root common.Hash) {
// Pull the still relevant state data into memory
sync := state.NewStateSync(root, db.diskdb)
for sync.Pending() > 0 {
hash := sync.Missing(1)[0]
// Move the next trie node from the memory layer into a sync struct
node, err := db.memdb.Get(hash[:])
if err != nil {
panic(err) // memdb must have the data
}
if _, _, err := sync.Process([]trie.SyncResult{{Hash: hash, Data: node}}); err != nil {
panic(err) // it's not possible to fail processing a node
}
}
// Discard the old memory layer and write a new one
db.memdb, _ = ethdb.NewMemDatabaseWithCap(db.memdb.Len())
if _, err := sync.Commit(db); err != nil {
panic(err) // writing into a memdb cannot fail
}
}
// TraceChain returns the structured logs created during the execution of EVM
// between two blocks (excluding start) and returns them as a JSON object.
func (api *PrivateDebugAPI) TraceChain(ctx context.Context, start, end rpc.BlockNumber, config *TraceConfig) (*rpc.Subscription, error) {
@ -188,19 +134,15 @@ func (api *PrivateDebugAPI) traceChain(ctx context.Context, start, end *types.Bl
// Ensure we have a valid starting state before doing any work
origin := start.NumberU64()
database := state.NewDatabase(api.eth.ChainDb())
memdb, _ := ethdb.NewMemDatabase()
db := &ephemeralDatabase{
diskdb: api.eth.ChainDb(),
memdb: memdb,
}
if number := start.NumberU64(); number > 0 {
start = api.eth.blockchain.GetBlock(start.ParentHash(), start.NumberU64()-1)
if start == nil {
return nil, fmt.Errorf("parent block #%d not found", number-1)
}
}
statedb, err := state.New(start.Root(), state.NewDatabase(db))
statedb, err := state.New(start.Root(), database)
if err != nil {
// If the starting state is missing, allow some number of blocks to be reexecuted
reexec := defaultTraceReexec
@ -213,7 +155,7 @@ func (api *PrivateDebugAPI) traceChain(ctx context.Context, start, end *types.Bl
if start == nil {
break
}
if statedb, err = state.New(start.Root(), state.NewDatabase(db)); err == nil {
if statedb, err = state.New(start.Root(), database); err == nil {
break
}
}
@ -256,7 +198,7 @@ func (api *PrivateDebugAPI) traceChain(ctx context.Context, start, end *types.Bl
res, err := api.traceTx(ctx, msg, vmctx, task.statedb, config)
if err != nil {
task.results[i] = &txTraceResult{Error: err.Error()}
log.Warn("Tracing failed", "err", err)
log.Warn("Tracing failed", "hash", tx.Hash(), "block", task.block.NumberU64(), "err", err)
break
}
task.statedb.DeleteSuicides()
@ -273,7 +215,6 @@ func (api *PrivateDebugAPI) traceChain(ctx context.Context, start, end *types.Bl
}
// Start a goroutine to feed all the blocks into the tracers
begin := time.Now()
complete := start.NumberU64()
go func() {
var (
@ -281,6 +222,7 @@ func (api *PrivateDebugAPI) traceChain(ctx context.Context, start, end *types.Bl
number uint64
traced uint64
failed error
proot common.Hash
)
// Ensure everything is properly cleaned up on any exit path
defer func() {
@ -308,7 +250,7 @@ func (api *PrivateDebugAPI) traceChain(ctx context.Context, start, end *types.Bl
// Print progress logs if long enough time elapsed
if time.Since(logged) > 8*time.Second {
if number > origin {
log.Info("Tracing chain segment", "start", origin, "end", end.NumberU64(), "current", number, "transactions", traced, "elapsed", time.Since(begin))
log.Info("Tracing chain segment", "start", origin, "end", end.NumberU64(), "current", number, "transactions", traced, "elapsed", time.Since(begin), "memory", database.TrieDB().Size())
} else {
log.Info("Preparing state for chain trace", "block", number, "start", origin, "elapsed", time.Since(begin))
}
@ -325,13 +267,11 @@ func (api *PrivateDebugAPI) traceChain(ctx context.Context, start, end *types.Bl
txs := block.Transactions()
select {
case tasks <- &blockTraceTask{statedb: statedb.Copy(), block: block, results: make([]*txTraceResult, len(txs))}:
case tasks <- &blockTraceTask{statedb: statedb.Copy(), block: block, rootref: proot, results: make([]*txTraceResult, len(txs))}:
case <-notifier.Closed():
return
}
traced += uint64(len(txs))
} else {
atomic.StoreUint64(&complete, number)
}
// Generate the next state snapshot fast without tracing
_, _, _, err := api.eth.blockchain.Processor().Process(block, statedb, vm.Config{})
@ -340,7 +280,7 @@ func (api *PrivateDebugAPI) traceChain(ctx context.Context, start, end *types.Bl
break
}
// Finalize the state so any modifications are written to the trie
root, err := statedb.CommitTo(db, true)
root, err := statedb.Commit(true)
if err != nil {
failed = err
break
@ -349,26 +289,14 @@ func (api *PrivateDebugAPI) traceChain(ctx context.Context, start, end *types.Bl
failed = err
break
}
// After every N blocks, prune the database to only retain relevant data
if (number-start.NumberU64())%4096 == 0 {
// Wait until currently pending trace jobs finish
for atomic.LoadUint64(&complete) != number {
select {
case <-time.After(100 * time.Millisecond):
case <-notifier.Closed():
return
}
}
// No more concurrent access at this point, prune the database
var (
nodes = db.memdb.Len()
start = time.Now()
)
db.Prune(root)
log.Info("Pruned tracer state entries", "deleted", nodes-db.memdb.Len(), "left", db.memdb.Len(), "elapsed", time.Since(start))
statedb, _ = state.New(root, state.NewDatabase(db))
// Reference the trie twice, once for us, once for the trancer
database.TrieDB().Reference(root, common.Hash{})
if number >= origin {
database.TrieDB().Reference(root, common.Hash{})
}
// Dereference all past tries we ourselves are done working with
database.TrieDB().Dereference(proot, common.Hash{})
proot = root
}
}()
@ -387,12 +315,14 @@ func (api *PrivateDebugAPI) traceChain(ctx context.Context, start, end *types.Bl
}
done[uint64(result.Block)] = result
// Dereference any paret tries held in memory by this task
database.TrieDB().Dereference(res.rootref, common.Hash{})
// Stream completed traces to the user, aborting on the first error
for result, ok := done[next]; ok; result, ok = done[next] {
if len(result.Traces) > 0 || next == end.NumberU64() {
notifier.Notify(sub.ID, result)
}
atomic.StoreUint64(&complete, next)
delete(done, next)
next++
}
@ -544,18 +474,14 @@ func (api *PrivateDebugAPI) computeStateDB(block *types.Block, reexec uint64) (*
}
// Otherwise try to reexec blocks until we find a state or reach our limit
origin := block.NumberU64()
database := state.NewDatabase(api.eth.ChainDb())
memdb, _ := ethdb.NewMemDatabase()
db := &ephemeralDatabase{
diskdb: api.eth.ChainDb(),
memdb: memdb,
}
for i := uint64(0); i < reexec; i++ {
block = api.eth.blockchain.GetBlock(block.ParentHash(), block.NumberU64()-1)
if block == nil {
break
}
if statedb, err = state.New(block.Root(), state.NewDatabase(db)); err == nil {
if statedb, err = state.New(block.Root(), database); err == nil {
break
}
}
@ -571,6 +497,7 @@ func (api *PrivateDebugAPI) computeStateDB(block *types.Block, reexec uint64) (*
var (
start = time.Now()
logged time.Time
proot common.Hash
)
for block.NumberU64() < origin {
// Print progress logs if long enough time elapsed
@ -587,26 +514,18 @@ func (api *PrivateDebugAPI) computeStateDB(block *types.Block, reexec uint64) (*
return nil, err
}
// Finalize the state so any modifications are written to the trie
root, err := statedb.CommitTo(db, true)
root, err := statedb.Commit(true)
if err != nil {
return nil, err
}
if err := statedb.Reset(root); err != nil {
return nil, err
}
// After every N blocks, prune the database to only retain relevant data
if block.NumberU64()%4096 == 0 || block.NumberU64() == origin {
var (
nodes = db.memdb.Len()
begin = time.Now()
)
db.Prune(root)
log.Info("Pruned tracer state entries", "deleted", nodes-db.memdb.Len(), "left", db.memdb.Len(), "elapsed", time.Since(begin))
statedb, _ = state.New(root, state.NewDatabase(db))
}
database.TrieDB().Reference(root, common.Hash{})
database.TrieDB().Dereference(proot, common.Hash{})
proot = root
}
log.Info("Historical state regenerated", "block", block.NumberU64(), "elapsed", time.Since(start))
log.Info("Historical state regenerated", "block", block.NumberU64(), "elapsed", time.Since(start), "size", database.TrieDB().Size())
return statedb, nil
}