core, trie: intermediate mempool between trie and database (#15857)

This commit reduces database I/O by not writing every state trie to disk.
This commit is contained in:
Péter Szilágyi
2018-02-05 18:40:32 +02:00
committed by Felix Lange
parent 59336283c0
commit 55599ee95d
69 changed files with 1958 additions and 1164 deletions

View File

@ -40,16 +40,23 @@ const (
// Database wraps access to tries and contract code.
type Database interface {
// Accessing tries:
// OpenTrie opens the main account trie.
// OpenStorageTrie opens the storage trie of an account.
OpenTrie(root common.Hash) (Trie, error)
// OpenStorageTrie opens the storage trie of an account.
OpenStorageTrie(addrHash, root common.Hash) (Trie, error)
// Accessing contract code:
ContractCode(addrHash, codeHash common.Hash) ([]byte, error)
ContractCodeSize(addrHash, codeHash common.Hash) (int, error)
// CopyTrie returns an independent copy of the given trie.
CopyTrie(Trie) Trie
// ContractCode retrieves a particular contract's code.
ContractCode(addrHash, codeHash common.Hash) ([]byte, error)
// ContractCodeSize retrieves a particular contracts code's size.
ContractCodeSize(addrHash, codeHash common.Hash) (int, error)
// TrieDB retrieves the low level trie database used for data storage.
TrieDB() *trie.Database
}
// Trie is a Ethereum Merkle Trie.
@ -57,26 +64,33 @@ type Trie interface {
TryGet(key []byte) ([]byte, error)
TryUpdate(key, value []byte) error
TryDelete(key []byte) error
CommitTo(trie.DatabaseWriter) (common.Hash, error)
Commit(onleaf trie.LeafCallback) (common.Hash, error)
Hash() common.Hash
NodeIterator(startKey []byte) trie.NodeIterator
GetKey([]byte) []byte // TODO(fjl): remove this when SecureTrie is removed
Prove(key []byte, fromLevel uint, proofDb ethdb.Putter) error
}
// NewDatabase creates a backing store for state. The returned database is safe for
// concurrent use and retains cached trie nodes in memory.
// concurrent use and retains cached trie nodes in memory. The pool is an optional
// intermediate trie-node memory pool between the low level storage layer and the
// high level trie abstraction.
func NewDatabase(db ethdb.Database) Database {
csc, _ := lru.New(codeSizeCacheSize)
return &cachingDB{db: db, codeSizeCache: csc}
return &cachingDB{
db: trie.NewDatabase(db),
codeSizeCache: csc,
}
}
type cachingDB struct {
db ethdb.Database
db *trie.Database
mu sync.Mutex
pastTries []*trie.SecureTrie
codeSizeCache *lru.Cache
}
// OpenTrie opens the main account trie.
func (db *cachingDB) OpenTrie(root common.Hash) (Trie, error) {
db.mu.Lock()
defer db.mu.Unlock()
@ -105,10 +119,12 @@ func (db *cachingDB) pushTrie(t *trie.SecureTrie) {
}
}
// OpenStorageTrie opens the storage trie of an account.
func (db *cachingDB) OpenStorageTrie(addrHash, root common.Hash) (Trie, error) {
return trie.NewSecure(root, db.db, 0)
}
// CopyTrie returns an independent copy of the given trie.
func (db *cachingDB) CopyTrie(t Trie) Trie {
switch t := t.(type) {
case cachedTrie:
@ -120,14 +136,16 @@ func (db *cachingDB) CopyTrie(t Trie) Trie {
}
}
// ContractCode retrieves a particular contract's code.
func (db *cachingDB) ContractCode(addrHash, codeHash common.Hash) ([]byte, error) {
code, err := db.db.Get(codeHash[:])
code, err := db.db.Node(codeHash)
if err == nil {
db.codeSizeCache.Add(codeHash, len(code))
}
return code, err
}
// ContractCodeSize retrieves a particular contracts code's size.
func (db *cachingDB) ContractCodeSize(addrHash, codeHash common.Hash) (int, error) {
if cached, ok := db.codeSizeCache.Get(codeHash); ok {
return cached.(int), nil
@ -139,16 +157,25 @@ func (db *cachingDB) ContractCodeSize(addrHash, codeHash common.Hash) (int, erro
return len(code), err
}
// TrieDB retrieves any intermediate trie-node caching layer.
func (db *cachingDB) TrieDB() *trie.Database {
return db.db
}
// cachedTrie inserts its trie into a cachingDB on commit.
type cachedTrie struct {
*trie.SecureTrie
db *cachingDB
}
func (m cachedTrie) CommitTo(dbw trie.DatabaseWriter) (common.Hash, error) {
root, err := m.SecureTrie.CommitTo(dbw)
func (m cachedTrie) Commit(onleaf trie.LeafCallback) (common.Hash, error) {
root, err := m.SecureTrie.Commit(onleaf)
if err == nil {
m.db.pushTrie(m.SecureTrie)
}
return root, err
}
func (m cachedTrie) Prove(key []byte, fromLevel uint, proofDb ethdb.Putter) error {
return m.SecureTrie.Prove(key, fromLevel, proofDb)
}

View File

@ -21,12 +21,13 @@ import (
"testing"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/ethdb"
)
// Tests that the node iterator indeed walks over the entire database contents.
func TestNodeIteratorCoverage(t *testing.T) {
// Create some arbitrary test state to iterate
db, mem, root, _ := makeTestState()
db, root, _ := makeTestState()
state, err := New(root, db)
if err != nil {
@ -39,14 +40,18 @@ func TestNodeIteratorCoverage(t *testing.T) {
hashes[it.Hash] = struct{}{}
}
}
// Cross check the hashes and the database itself
// Cross check the iterated hashes and the database/nodepool content
for hash := range hashes {
if _, err := mem.Get(hash.Bytes()); err != nil {
t.Errorf("failed to retrieve reported node %x: %v", hash, err)
if _, err := db.TrieDB().Node(hash); err != nil {
t.Errorf("failed to retrieve reported node %x", hash)
}
}
for _, key := range mem.Keys() {
for _, hash := range db.TrieDB().Nodes() {
if _, ok := hashes[hash]; !ok {
t.Errorf("state entry not reported %x", hash)
}
}
for _, key := range db.TrieDB().DiskDB().(*ethdb.MemDatabase).Keys() {
if bytes.HasPrefix(key, []byte("secure-key-")) {
continue
}

View File

@ -25,7 +25,6 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
)
var emptyCodeHash = crypto.Keccak256(nil)
@ -238,12 +237,12 @@ func (self *stateObject) updateRoot(db Database) {
// CommitTrie the storage trie of the object to dwb.
// This updates the trie root.
func (self *stateObject) CommitTrie(db Database, dbw trie.DatabaseWriter) error {
func (self *stateObject) CommitTrie(db Database) error {
self.updateTrie(db)
if self.dbErr != nil {
return self.dbErr
}
root, err := self.trie.CommitTo(dbw)
root, err := self.trie.Commit(nil)
if err == nil {
self.data.Root = root
}

View File

@ -48,7 +48,7 @@ func (s *StateSuite) TestDump(c *checker.C) {
// write some of them to the trie
s.state.updateStateObject(obj1)
s.state.updateStateObject(obj2)
s.state.CommitTo(s.db, false)
s.state.Commit(false)
// check that dump contains the state objects that are in trie
got := string(s.state.Dump())
@ -97,7 +97,7 @@ func (s *StateSuite) TestNull(c *checker.C) {
//value := common.FromHex("0x823140710bf13990e4500136726d8b55")
var value common.Hash
s.state.SetState(address, common.Hash{}, value)
s.state.CommitTo(s.db, false)
s.state.Commit(false)
value = s.state.GetState(address, common.Hash{})
if !common.EmptyHash(value) {
c.Errorf("expected empty hash. got %x", value)
@ -155,7 +155,7 @@ func TestSnapshot2(t *testing.T) {
so0.deleted = false
state.setStateObject(so0)
root, _ := state.CommitTo(db, false)
root, _ := state.Commit(false)
state.Reset(root)
// and one with deleted == true

View File

@ -36,6 +36,14 @@ type revision struct {
journalIndex int
}
var (
// emptyState is the known hash of an empty state trie entry.
emptyState = crypto.Keccak256Hash(nil)
// emptyCode is the known hash of the empty EVM bytecode.
emptyCode = crypto.Keccak256Hash(nil)
)
// StateDBs within the ethereum protocol are used to store anything
// within the merkle trie. StateDBs take care of caching and storing
// nested states. It's the general query interface to retrieve:
@ -235,6 +243,11 @@ func (self *StateDB) GetState(a common.Address, b common.Hash) common.Hash {
return common.Hash{}
}
// Database retrieves the low level database supporting the lower level trie ops.
func (self *StateDB) Database() Database {
return self.db
}
// StorageTrie returns the storage trie of an account.
// The return value is a copy and is nil for non-existent accounts.
func (self *StateDB) StorageTrie(a common.Address) Trie {
@ -568,8 +581,8 @@ func (s *StateDB) clearJournalAndRefund() {
s.refund = 0
}
// CommitTo writes the state to the given database.
func (s *StateDB) CommitTo(dbw trie.DatabaseWriter, deleteEmptyObjects bool) (root common.Hash, err error) {
// Commit writes the state to the underlying in-memory trie database.
func (s *StateDB) Commit(deleteEmptyObjects bool) (root common.Hash, err error) {
defer s.clearJournalAndRefund()
// Commit objects to the trie.
@ -583,13 +596,11 @@ func (s *StateDB) CommitTo(dbw trie.DatabaseWriter, deleteEmptyObjects bool) (ro
case isDirty:
// Write any contract code associated with the state object
if stateObject.code != nil && stateObject.dirtyCode {
if err := dbw.Put(stateObject.CodeHash(), stateObject.code); err != nil {
return common.Hash{}, err
}
s.db.TrieDB().Insert(common.BytesToHash(stateObject.CodeHash()), stateObject.code)
stateObject.dirtyCode = false
}
// Write any storage changes in the state object to its storage trie.
if err := stateObject.CommitTrie(s.db, dbw); err != nil {
if err := stateObject.CommitTrie(s.db); err != nil {
return common.Hash{}, err
}
// Update the object in the main account trie.
@ -598,7 +609,20 @@ func (s *StateDB) CommitTo(dbw trie.DatabaseWriter, deleteEmptyObjects bool) (ro
delete(s.stateObjectsDirty, addr)
}
// Write trie changes.
root, err = s.trie.CommitTo(dbw)
root, err = s.trie.Commit(func(leaf []byte, parent common.Hash) error {
var account Account
if err := rlp.DecodeBytes(leaf, &account); err != nil {
return nil
}
if account.Root != emptyState {
s.db.TrieDB().Reference(account.Root, parent)
}
code := common.BytesToHash(account.CodeHash)
if code != emptyCode {
s.db.TrieDB().Reference(code, parent)
}
return nil
})
log.Debug("Trie cache stats after commit", "misses", trie.CacheMisses(), "unloads", trie.CacheUnloads())
return root, err
}

View File

@ -97,10 +97,10 @@ func TestIntermediateLeaks(t *testing.T) {
}
// Commit and cross check the databases.
if _, err := transState.CommitTo(transDb, false); err != nil {
if _, err := transState.Commit(false); err != nil {
t.Fatalf("failed to commit transition state: %v", err)
}
if _, err := finalState.CommitTo(finalDb, false); err != nil {
if _, err := finalState.Commit(false); err != nil {
t.Fatalf("failed to commit final state: %v", err)
}
for _, key := range finalDb.Keys() {
@ -122,8 +122,8 @@ func TestIntermediateLeaks(t *testing.T) {
// https://github.com/ethereum/go-ethereum/pull/15549.
func TestCopy(t *testing.T) {
// Create a random state test to copy and modify "independently"
mem, _ := ethdb.NewMemDatabase()
orig, _ := New(common.Hash{}, NewDatabase(mem))
db, _ := ethdb.NewMemDatabase()
orig, _ := New(common.Hash{}, NewDatabase(db))
for i := byte(0); i < 255; i++ {
obj := orig.GetOrNewStateObject(common.BytesToAddress([]byte{i}))
@ -346,11 +346,10 @@ func (test *snapshotTest) run() bool {
}
action.fn(action, state)
}
// Revert all snapshots in reverse order. Each revert must yield a state
// that is equivalent to fresh state with all actions up the snapshot applied.
for sindex--; sindex >= 0; sindex-- {
checkstate, _ := New(common.Hash{}, NewDatabase(db))
checkstate, _ := New(common.Hash{}, state.Database())
for _, action := range test.actions[:test.snapshots[sindex]] {
action.fn(action, checkstate)
}
@ -409,7 +408,7 @@ func (test *snapshotTest) checkEqual(state, checkstate *StateDB) error {
func (s *StateSuite) TestTouchDelete(c *check.C) {
s.state.GetOrNewStateObject(common.Address{})
root, _ := s.state.CommitTo(s.db, false)
root, _ := s.state.Commit(false)
s.state.Reset(root)
snapshot := s.state.Snapshot()
@ -417,7 +416,6 @@ func (s *StateSuite) TestTouchDelete(c *check.C) {
if len(s.state.stateObjectsDirty) != 1 {
c.Fatal("expected one dirty state object")
}
s.state.RevertToSnapshot(snapshot)
if len(s.state.stateObjectsDirty) != 0 {
c.Fatal("expected no dirty state object")

View File

@ -36,10 +36,10 @@ type testAccount struct {
}
// makeTestState create a sample test state to test node-wise reconstruction.
func makeTestState() (Database, *ethdb.MemDatabase, common.Hash, []*testAccount) {
func makeTestState() (Database, common.Hash, []*testAccount) {
// Create an empty state
mem, _ := ethdb.NewMemDatabase()
db := NewDatabase(mem)
diskdb, _ := ethdb.NewMemDatabase()
db := NewDatabase(diskdb)
state, _ := New(common.Hash{}, db)
// Fill it with some arbitrary data
@ -61,10 +61,10 @@ func makeTestState() (Database, *ethdb.MemDatabase, common.Hash, []*testAccount)
state.updateStateObject(obj)
accounts = append(accounts, acc)
}
root, _ := state.CommitTo(mem, false)
root, _ := state.Commit(false)
// Return the generated state
return db, mem, root, accounts
return db, root, accounts
}
// checkStateAccounts cross references a reconstructed state with an expected
@ -96,7 +96,7 @@ func checkTrieConsistency(db ethdb.Database, root common.Hash) error {
if v, _ := db.Get(root[:]); v == nil {
return nil // Consider a non existent state consistent.
}
trie, err := trie.New(root, db)
trie, err := trie.New(root, trie.NewDatabase(db))
if err != nil {
return err
}
@ -138,7 +138,7 @@ func TestIterativeStateSyncBatched(t *testing.T) { testIterativeStateSync(t,
func testIterativeStateSync(t *testing.T, batch int) {
// Create a random state to copy
_, srcMem, srcRoot, srcAccounts := makeTestState()
srcDb, srcRoot, srcAccounts := makeTestState()
// Create a destination state and sync with the scheduler
dstDb, _ := ethdb.NewMemDatabase()
@ -148,9 +148,9 @@ func testIterativeStateSync(t *testing.T, batch int) {
for len(queue) > 0 {
results := make([]trie.SyncResult, len(queue))
for i, hash := range queue {
data, err := srcMem.Get(hash.Bytes())
data, err := srcDb.TrieDB().Node(hash)
if err != nil {
t.Fatalf("failed to retrieve node data for %x: %v", hash, err)
t.Fatalf("failed to retrieve node data for %x", hash)
}
results[i] = trie.SyncResult{Hash: hash, Data: data}
}
@ -170,7 +170,7 @@ func testIterativeStateSync(t *testing.T, batch int) {
// partial results are returned, and the others sent only later.
func TestIterativeDelayedStateSync(t *testing.T) {
// Create a random state to copy
_, srcMem, srcRoot, srcAccounts := makeTestState()
srcDb, srcRoot, srcAccounts := makeTestState()
// Create a destination state and sync with the scheduler
dstDb, _ := ethdb.NewMemDatabase()
@ -181,9 +181,9 @@ func TestIterativeDelayedStateSync(t *testing.T) {
// Sync only half of the scheduled nodes
results := make([]trie.SyncResult, len(queue)/2+1)
for i, hash := range queue[:len(results)] {
data, err := srcMem.Get(hash.Bytes())
data, err := srcDb.TrieDB().Node(hash)
if err != nil {
t.Fatalf("failed to retrieve node data for %x: %v", hash, err)
t.Fatalf("failed to retrieve node data for %x", hash)
}
results[i] = trie.SyncResult{Hash: hash, Data: data}
}
@ -207,7 +207,7 @@ func TestIterativeRandomStateSyncBatched(t *testing.T) { testIterativeRandomS
func testIterativeRandomStateSync(t *testing.T, batch int) {
// Create a random state to copy
_, srcMem, srcRoot, srcAccounts := makeTestState()
srcDb, srcRoot, srcAccounts := makeTestState()
// Create a destination state and sync with the scheduler
dstDb, _ := ethdb.NewMemDatabase()
@ -221,9 +221,9 @@ func testIterativeRandomStateSync(t *testing.T, batch int) {
// Fetch all the queued nodes in a random order
results := make([]trie.SyncResult, 0, len(queue))
for hash := range queue {
data, err := srcMem.Get(hash.Bytes())
data, err := srcDb.TrieDB().Node(hash)
if err != nil {
t.Fatalf("failed to retrieve node data for %x: %v", hash, err)
t.Fatalf("failed to retrieve node data for %x", hash)
}
results = append(results, trie.SyncResult{Hash: hash, Data: data})
}
@ -247,7 +247,7 @@ func testIterativeRandomStateSync(t *testing.T, batch int) {
// partial results are returned (Even those randomly), others sent only later.
func TestIterativeRandomDelayedStateSync(t *testing.T) {
// Create a random state to copy
_, srcMem, srcRoot, srcAccounts := makeTestState()
srcDb, srcRoot, srcAccounts := makeTestState()
// Create a destination state and sync with the scheduler
dstDb, _ := ethdb.NewMemDatabase()
@ -263,9 +263,9 @@ func TestIterativeRandomDelayedStateSync(t *testing.T) {
for hash := range queue {
delete(queue, hash)
data, err := srcMem.Get(hash.Bytes())
data, err := srcDb.TrieDB().Node(hash)
if err != nil {
t.Fatalf("failed to retrieve node data for %x: %v", hash, err)
t.Fatalf("failed to retrieve node data for %x", hash)
}
results = append(results, trie.SyncResult{Hash: hash, Data: data})
@ -292,9 +292,9 @@ func TestIterativeRandomDelayedStateSync(t *testing.T) {
// the database.
func TestIncompleteStateSync(t *testing.T) {
// Create a random state to copy
_, srcMem, srcRoot, srcAccounts := makeTestState()
srcDb, srcRoot, srcAccounts := makeTestState()
checkTrieConsistency(srcMem, srcRoot)
checkTrieConsistency(srcDb.TrieDB().DiskDB().(ethdb.Database), srcRoot)
// Create a destination state and sync with the scheduler
dstDb, _ := ethdb.NewMemDatabase()
@ -306,9 +306,9 @@ func TestIncompleteStateSync(t *testing.T) {
// Fetch a batch of state nodes
results := make([]trie.SyncResult, len(queue))
for i, hash := range queue {
data, err := srcMem.Get(hash.Bytes())
data, err := srcDb.TrieDB().Node(hash)
if err != nil {
t.Fatalf("failed to retrieve node data for %x: %v", hash, err)
t.Fatalf("failed to retrieve node data for %x", hash)
}
results[i] = trie.SyncResult{Hash: hash, Data: data}
}