all: bloom-filter based pruning mechanism (#21724)

* cmd, core, tests: initial state pruner core: fix db inspector cmd/geth: add verify-state cmd/geth: add verification tool core/rawdb: implement flatdb cmd, core: fix rebase core/state: use new contract code layout core/state/pruner: avoid deleting genesis state cmd/geth: add helper function core, cmd: fix extract genesis core: minor fixes contracts: remove useless core/state/snapshot: plugin stacktrie core: polish core/state/snapshot: iterate storage concurrently core/state/snapshot: fix iteration core: add comments core/state/snapshot: polish code core/state: polish core/state/snapshot: rebase core/rawdb: add comments core/rawdb: fix tests core/rawdb: improve tests core/state/snapshot: fix concurrent iteration core/state: run pruning during the recovery core, trie: implement martin's idea core, eth: delete flatdb and polish pruner trie: fix import core/state/pruner: add log core/state/pruner: fix issues core/state/pruner: don't read back core/state/pruner: fix contract code write core/state/pruner: check root node presence cmd, core: polish log core/state: use HEAD-127 as the target core/state/snapshot: improve tests cmd/geth: fix verification tool cmd/geth: use HEAD as the verification default target all: replace the bloomfilter with martin's fork cmd, core: polish code core, cmd: forcibly delete state root core/state/pruner: add hash64 core/state/pruner: fix blacklist core/state: remove blacklist cmd, core: delete trie clean cache before pruning cmd, core: fix lint cmd, core: fix rebase core/state: fix the special case for clique networks core/state/snapshot: remove useless code core/state/pruner: capping the snapshot after pruning cmd, core, eth: fixes core/rawdb: update db inspector cmd/geth: polish code core/state/pruner: fsync bloom filter cmd, core: print warning log core/state/pruner: adjust the parameters for bloom filter cmd, core: create the bloom filter by size core: polish core/state/pruner: sanitize invalid bloomfilter size cmd: address comments cmd/geth: address comments cmd/geth: address comment core/state/pruner: address comments core/state/pruner: rename homedir to datadir cmd, core: address comments core/state/pruner: address comment core/state: address comments core, cmd, tests: address comments core: address comments core/state/pruner: release the iterator after each commit core/state/pruner: improve pruner cmd, core: adjust bloom paramters core/state/pruner: fix lint core/state/pruner: fix tests core: fix rebase core/state/pruner: remove atomic rename core/state/pruner: address comments all: run go mod tidy core/state/pruner: avoid false-positive for the middle state roots core/state/pruner: add checks for middle roots cmd/geth: replace crit with error * core/state/pruner: fix lint * core: drop legacy bloom filter * core/state/snapshot: improve pruner * core/state/snapshot: polish concurrent logs to report ETA vs. hashes * core/state/pruner: add progress report for pruning and compaction too * core: fix snapshot test API * core/state: fix some pruning logs * core/state/pruner: support recovering from bloom flush fail Co-authored-by: Péter Szilágyi <peterke@gmail.com>
2021-02-08 19:16:30 +08:00
parent bbe694fc52
commit f566dd305e
20 changed files with 1491 additions and 148 deletions
--- a/core/state/snapshot/snapshot.go
+++ b/core/state/snapshot/snapshot.go
@ -178,7 +178,7 @@ type Tree struct {
 // store, on a background thread. If the memory layers from the journal is not
 // continuous with disk layer or the journal is missing, all diffs will be discarded
 // iff it's in "recovery" mode, otherwise rebuild is mandatory.
-func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool, recovery bool) *Tree {
+func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool, rebuild bool, recovery bool) (*Tree, error) {
 	// Create a new, empty snapshot tree
 	snap := &Tree{
 		diskdb: diskdb,
@ -192,16 +192,19 @@ func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root comm
 	// Attempt to load a previously persisted snapshot and rebuild one if failed
 	head, err := loadSnapshot(diskdb, triedb, cache, root, recovery)
 	if err != nil {
-		log.Warn("Failed to load snapshot, regenerating", "err", err)
-		snap.Rebuild(root)
-		return snap
+		if rebuild {
+			log.Warn("Failed to load snapshot, regenerating", "err", err)
+			snap.Rebuild(root)
+			return snap, nil
+		}
+		return nil, err // Bail out the error, don't rebuild automatically.
 	}
 	// Existing snapshot loaded, seed all the layers
 	for head != nil {
 		snap.layers[head.Root()] = head
 		head = head.Parent()
 	}
-	return snap
+	return snap, nil
 }

 // waitBuild blocks until the snapshot finishes rebuilding. This method is meant
@ -234,6 +237,39 @@ func (t *Tree) Snapshot(blockRoot common.Hash) Snapshot {
 	return t.layers[blockRoot]
 }

+// Snapshots returns all visited layers from the topmost layer with specific
+// root and traverses downward. The layer amount is limited by the given number.
+// If nodisk is set, then disk layer is excluded.
+func (t *Tree) Snapshots(root common.Hash, limits int, nodisk bool) []Snapshot {
+	t.lock.RLock()
+	defer t.lock.RUnlock()
+
+	if limits == 0 {
+		return nil
+	}
+	layer := t.layers[root]
+	if layer == nil {
+		return nil
+	}
+	var ret []Snapshot
+	for {
+		if _, isdisk := layer.(*diskLayer); isdisk && nodisk {
+			break
+		}
+		ret = append(ret, layer)
+		limits -= 1
+		if limits == 0 {
+			break
+		}
+		parent := layer.Parent()
+		if parent == nil {
+			break
+		}
+		layer = parent
+	}
+	return ret
+}
+
 // Update adds a new snapshot into the tree, if that can be linked to an existing
 // old parent. It is disallowed to insert a disk layer (the origin of all).
 func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storage map[common.Hash]map[common.Hash][]byte) error {
@ -681,6 +717,38 @@ func (t *Tree) StorageIterator(root common.Hash, account common.Hash, seek commo
 	return newFastStorageIterator(t, root, account, seek)
 }

+// Verify iterates the whole state(all the accounts as well as the corresponding storages)
+// with the specific root and compares the re-computed hash with the original one.
+func (t *Tree) Verify(root common.Hash) error {
+	acctIt, err := t.AccountIterator(root, common.Hash{})
+	if err != nil {
+		return err
+	}
+	defer acctIt.Release()
+
+	got, err := generateTrieRoot(nil, acctIt, common.Hash{}, stackTrieGenerate, func(db ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) {
+		storageIt, err := t.StorageIterator(root, accountHash, common.Hash{})
+		if err != nil {
+			return common.Hash{}, err
+		}
+		defer storageIt.Release()
+
+		hash, err := generateTrieRoot(nil, storageIt, accountHash, stackTrieGenerate, nil, stat, false)
+		if err != nil {
+			return common.Hash{}, err
+		}
+		return hash, nil
+	}, newGenerateStats(), true)
+
+	if err != nil {
+		return err
+	}
+	if got != root {
+		return fmt.Errorf("state root hash mismatch: got %x, want %x", got, root)
+	}
+	return nil
+}
+
 // disklayer is an internal helper function to return the disk layer.
 // The lock of snapTree is assumed to be held already.
 func (t *Tree) disklayer() *diskLayer {