all: bloom-filter based pruning mechanism (#21724)

* cmd, core, tests: initial state pruner core: fix db inspector cmd/geth: add verify-state cmd/geth: add verification tool core/rawdb: implement flatdb cmd, core: fix rebase core/state: use new contract code layout core/state/pruner: avoid deleting genesis state cmd/geth: add helper function core, cmd: fix extract genesis core: minor fixes contracts: remove useless core/state/snapshot: plugin stacktrie core: polish core/state/snapshot: iterate storage concurrently core/state/snapshot: fix iteration core: add comments core/state/snapshot: polish code core/state: polish core/state/snapshot: rebase core/rawdb: add comments core/rawdb: fix tests core/rawdb: improve tests core/state/snapshot: fix concurrent iteration core/state: run pruning during the recovery core, trie: implement martin's idea core, eth: delete flatdb and polish pruner trie: fix import core/state/pruner: add log core/state/pruner: fix issues core/state/pruner: don't read back core/state/pruner: fix contract code write core/state/pruner: check root node presence cmd, core: polish log core/state: use HEAD-127 as the target core/state/snapshot: improve tests cmd/geth: fix verification tool cmd/geth: use HEAD as the verification default target all: replace the bloomfilter with martin's fork cmd, core: polish code core, cmd: forcibly delete state root core/state/pruner: add hash64 core/state/pruner: fix blacklist core/state: remove blacklist cmd, core: delete trie clean cache before pruning cmd, core: fix lint cmd, core: fix rebase core/state: fix the special case for clique networks core/state/snapshot: remove useless code core/state/pruner: capping the snapshot after pruning cmd, core, eth: fixes core/rawdb: update db inspector cmd/geth: polish code core/state/pruner: fsync bloom filter cmd, core: print warning log core/state/pruner: adjust the parameters for bloom filter cmd, core: create the bloom filter by size core: polish core/state/pruner: sanitize invalid bloomfilter size cmd: address comments cmd/geth: address comments cmd/geth: address comment core/state/pruner: address comments core/state/pruner: rename homedir to datadir cmd, core: address comments core/state/pruner: address comment core/state: address comments core, cmd, tests: address comments core: address comments core/state/pruner: release the iterator after each commit core/state/pruner: improve pruner cmd, core: adjust bloom paramters core/state/pruner: fix lint core/state/pruner: fix tests core: fix rebase core/state/pruner: remove atomic rename core/state/pruner: address comments all: run go mod tidy core/state/pruner: avoid false-positive for the middle state roots core/state/pruner: add checks for middle roots cmd/geth: replace crit with error * core/state/pruner: fix lint * core: drop legacy bloom filter * core/state/snapshot: improve pruner * core/state/snapshot: polish concurrent logs to report ETA vs. hashes * core/state/pruner: add progress report for pruning and compaction too * core: fix snapshot test API * core/state: fix some pruning logs * core/state/pruner: support recovering from bloom flush fail Co-authored-by: Péter Szilágyi <peterke@gmail.com>
2021-02-08 19:16:30 +08:00
parent bbe694fc52
commit f566dd305e
20 changed files with 1491 additions and 148 deletions
--- a/core/state/snapshot/snapshot_test.go
+++ b/core/state/snapshot/snapshot_test.go
@@ -17,6 +17,7 @@
 package snapshot

 import (
+	"encoding/binary"
 	"fmt"
 	"math/big"
 	"math/rand"
@@ -369,3 +370,69 @@ func TestPostCapBasicDataAccess(t *testing.T) {
 		t.Error("expected error capping the disk layer, got none")
 	}
 }
+
+// TestSnaphots tests the functionality for retrieveing the snapshot
+// with given head root and the desired depth.
+func TestSnaphots(t *testing.T) {
+	// setAccount is a helper to construct a random account entry and assign it to
+	// an account slot in a snapshot
+	setAccount := func(accKey string) map[common.Hash][]byte {
+		return map[common.Hash][]byte{
+			common.HexToHash(accKey): randomAccount(),
+		}
+	}
+	makeRoot := func(height uint64) common.Hash {
+		var buffer [8]byte
+		binary.BigEndian.PutUint64(buffer[:], height)
+		return common.BytesToHash(buffer[:])
+	}
+	// Create a starting base layer and a snapshot tree out of it
+	base := &diskLayer{
+		diskdb: rawdb.NewMemoryDatabase(),
+		root:   common.HexToHash("0x01"),
+		cache:  fastcache.New(1024 * 500),
+	}
+	snaps := &Tree{
+		layers: map[common.Hash]snapshot{
+			base.root: base,
+		},
+	}
+	// Construct the snapshots with 128 layers
+	var (
+		last = common.HexToHash("0x01")
+		head common.Hash
+	)
+	// Flush another 128 layers, one diff will be flatten into the parent.
+	for i := 0; i < 128; i++ {
+		head = makeRoot(uint64(i + 2))
+		snaps.Update(head, last, nil, setAccount(fmt.Sprintf("%d", i+2)), nil)
+		last = head
+		snaps.Cap(head, 128) // 129 layers(128 diffs + 1 disk) are allowed, 129th is the persistent layer
+	}
+	var cases = []struct {
+		headRoot     common.Hash
+		limit        int
+		nodisk       bool
+		expected     int
+		expectBottom common.Hash
+	}{
+		{head, 0, false, 0, common.Hash{}},
+		{head, 64, false, 64, makeRoot(127 + 2 - 63)},
+		{head, 128, false, 128, makeRoot(2)},              // All diff layers
+		{head, 129, true, 128, makeRoot(2)},               // All diff layers
+		{head, 129, false, 129, common.HexToHash("0x01")}, // All diff layers + disk layer
+	}
+	for _, c := range cases {
+		layers := snaps.Snapshots(c.headRoot, c.limit, c.nodisk)
+		if len(layers) != c.expected {
+			t.Fatalf("Returned snapshot layers are mismatched, want %v, got %v", c.expected, len(layers))
+		}
+		if len(layers) == 0 {
+			continue
+		}
+		bottommost := layers[len(layers)-1]
+		if bottommost.Root() != c.expectBottom {
+			t.Fatalf("Snapshot mismatch, want %v, get %v", c.expectBottom, bottommost.Root())
+		}
+	}
+}