all: bloom-filter based pruning mechanism (#21724)

* cmd, core, tests: initial state pruner

core: fix db inspector

cmd/geth: add verify-state

cmd/geth: add verification tool

core/rawdb: implement flatdb

cmd, core: fix rebase

core/state: use new contract code layout

core/state/pruner: avoid deleting genesis state

cmd/geth: add helper function

core, cmd: fix extract genesis

core: minor fixes

contracts: remove useless

core/state/snapshot: plugin stacktrie

core: polish

core/state/snapshot: iterate storage concurrently

core/state/snapshot: fix iteration

core: add comments

core/state/snapshot: polish code

core/state: polish

core/state/snapshot: rebase

core/rawdb: add comments

core/rawdb: fix tests

core/rawdb: improve tests

core/state/snapshot: fix concurrent iteration

core/state: run pruning during the recovery

core, trie: implement martin's idea

core, eth: delete flatdb and polish pruner

trie: fix import

core/state/pruner: add log

core/state/pruner: fix issues

core/state/pruner: don't read back

core/state/pruner: fix contract code write

core/state/pruner: check root node presence

cmd, core: polish log

core/state: use HEAD-127 as the target

core/state/snapshot: improve tests

cmd/geth: fix verification tool

cmd/geth: use HEAD as the verification default target

all: replace the bloomfilter with martin's fork

cmd, core: polish code

core, cmd: forcibly delete state root

core/state/pruner: add hash64

core/state/pruner: fix blacklist

core/state: remove blacklist

cmd, core: delete trie clean cache before pruning

cmd, core: fix lint

cmd, core: fix rebase

core/state: fix the special case for clique networks

core/state/snapshot: remove useless code

core/state/pruner: capping the snapshot after pruning

cmd, core, eth: fixes

core/rawdb: update db inspector

cmd/geth: polish code

core/state/pruner: fsync bloom filter

cmd, core: print warning log

core/state/pruner: adjust the parameters for bloom filter

cmd, core: create the bloom filter by size

core: polish

core/state/pruner: sanitize invalid bloomfilter size

cmd: address comments

cmd/geth: address comments

cmd/geth: address comment

core/state/pruner: address comments

core/state/pruner: rename homedir to datadir

cmd, core: address comments

core/state/pruner: address comment

core/state: address comments

core, cmd, tests: address comments

core: address comments

core/state/pruner: release the iterator after each commit

core/state/pruner: improve pruner

cmd, core: adjust bloom paramters

core/state/pruner: fix lint

core/state/pruner: fix tests

core: fix rebase

core/state/pruner: remove atomic rename

core/state/pruner: address comments

all: run go mod tidy

core/state/pruner: avoid false-positive for the middle state roots

core/state/pruner: add checks for middle roots

cmd/geth: replace crit with error

* core/state/pruner: fix lint

* core: drop legacy bloom filter

* core/state/snapshot: improve pruner

* core/state/snapshot: polish concurrent logs to report ETA vs. hashes

* core/state/pruner: add progress report for pruning and compaction too

* core: fix snapshot test API

* core/state: fix some pruning logs

* core/state/pruner: support recovering from bloom flush fail

Co-authored-by: Péter Szilágyi <peterke@gmail.com>
This commit is contained in:
gary rong
2021-02-08 19:16:30 +08:00
committed by GitHub
parent bbe694fc52
commit f566dd305e
20 changed files with 1491 additions and 148 deletions

View File

@ -178,7 +178,7 @@ type Tree struct {
// store, on a background thread. If the memory layers from the journal is not
// continuous with disk layer or the journal is missing, all diffs will be discarded
// iff it's in "recovery" mode, otherwise rebuild is mandatory.
func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool, recovery bool) *Tree {
func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool, rebuild bool, recovery bool) (*Tree, error) {
// Create a new, empty snapshot tree
snap := &Tree{
diskdb: diskdb,
@ -192,16 +192,19 @@ func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root comm
// Attempt to load a previously persisted snapshot and rebuild one if failed
head, err := loadSnapshot(diskdb, triedb, cache, root, recovery)
if err != nil {
log.Warn("Failed to load snapshot, regenerating", "err", err)
snap.Rebuild(root)
return snap
if rebuild {
log.Warn("Failed to load snapshot, regenerating", "err", err)
snap.Rebuild(root)
return snap, nil
}
return nil, err // Bail out the error, don't rebuild automatically.
}
// Existing snapshot loaded, seed all the layers
for head != nil {
snap.layers[head.Root()] = head
head = head.Parent()
}
return snap
return snap, nil
}
// waitBuild blocks until the snapshot finishes rebuilding. This method is meant
@ -234,6 +237,39 @@ func (t *Tree) Snapshot(blockRoot common.Hash) Snapshot {
return t.layers[blockRoot]
}
// Snapshots returns all visited layers from the topmost layer with specific
// root and traverses downward. The layer amount is limited by the given number.
// If nodisk is set, then disk layer is excluded.
func (t *Tree) Snapshots(root common.Hash, limits int, nodisk bool) []Snapshot {
t.lock.RLock()
defer t.lock.RUnlock()
if limits == 0 {
return nil
}
layer := t.layers[root]
if layer == nil {
return nil
}
var ret []Snapshot
for {
if _, isdisk := layer.(*diskLayer); isdisk && nodisk {
break
}
ret = append(ret, layer)
limits -= 1
if limits == 0 {
break
}
parent := layer.Parent()
if parent == nil {
break
}
layer = parent
}
return ret
}
// Update adds a new snapshot into the tree, if that can be linked to an existing
// old parent. It is disallowed to insert a disk layer (the origin of all).
func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error {
@ -681,6 +717,38 @@ func (t *Tree) StorageIterator(root common.Hash, account common.Hash, seek commo
return newFastStorageIterator(t, root, account, seek)
}
// Verify iterates the whole state(all the accounts as well as the corresponding storages)
// with the specific root and compares the re-computed hash with the original one.
func (t *Tree) Verify(root common.Hash) error {
acctIt, err := t.AccountIterator(root, common.Hash{})
if err != nil {
return err
}
defer acctIt.Release()
got, err := generateTrieRoot(nil, acctIt, common.Hash{}, stackTrieGenerate, func(db ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) {
storageIt, err := t.StorageIterator(root, accountHash, common.Hash{})
if err != nil {
return common.Hash{}, err
}
defer storageIt.Release()
hash, err := generateTrieRoot(nil, storageIt, accountHash, stackTrieGenerate, nil, stat, false)
if err != nil {
return common.Hash{}, err
}
return hash, nil
}, newGenerateStats(), true)
if err != nil {
return err
}
if got != root {
return fmt.Errorf("state root hash mismatch: got %x, want %x", got, root)
}
return nil
}
// disklayer is an internal helper function to return the disk layer.
// The lock of snapTree is assumed to be held already.
func (t *Tree) disklayer() *diskLayer {