core: improve snapshot journal recovery (#21594)
* core/state/snapshot: introduce snapshot journal version * core: update the disk layer in an atomic way * core: persist the disk layer generator periodically * core/state/snapshot: improve logging * core/state/snapshot: forcibly ensure the legacy snapshot is matched * core/state/snapshot: add debug logs * core, tests: fix tests and special recovery case * core: polish * core: add more blockchain tests for snapshot recovery * core/state: fix comment * core: add recovery flag for snapshot * core: add restart after start-after-crash tests * core/rawdb: fix imports * core: fix tests * core: remove log * core/state/snapshot: fix snapshot * core: avoid callbacks in SetHead * core: fix setHead cornercase where the threshold root has state * core: small docs for the test cases Co-authored-by: Péter Szilágyi <peterke@gmail.com>
This commit is contained in:
@ -29,6 +29,7 @@ import (
|
||||
"github.com/ethereum/go-ethereum/ethdb"
|
||||
"github.com/ethereum/go-ethereum/log"
|
||||
"github.com/ethereum/go-ethereum/metrics"
|
||||
"github.com/ethereum/go-ethereum/rlp"
|
||||
"github.com/ethereum/go-ethereum/trie"
|
||||
)
|
||||
|
||||
@ -136,6 +137,10 @@ type snapshot interface {
|
||||
// flattening everything down (bad for reorgs).
|
||||
Journal(buffer *bytes.Buffer) (common.Hash, error)
|
||||
|
||||
// LegacyJournal is basically identical to Journal. it's the legacy version for
|
||||
// flushing legacy journal. Now the only purpose of this function is for testing.
|
||||
LegacyJournal(buffer *bytes.Buffer) (common.Hash, error)
|
||||
|
||||
// Stale return whether this layer has become stale (was flattened across) or
|
||||
// if it's still live.
|
||||
Stale() bool
|
||||
@ -168,10 +173,12 @@ type Tree struct {
|
||||
// store (with a number of memory layers from a journal), ensuring that the head
|
||||
// of the snapshot matches the expected one.
|
||||
//
|
||||
// If the snapshot is missing or inconsistent, the entirety is deleted and will
|
||||
// be reconstructed from scratch based on the tries in the key-value store, on a
|
||||
// background thread.
|
||||
func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool) *Tree {
|
||||
// If the snapshot is missing or the disk layer is broken, the entire is deleted
|
||||
// and will be reconstructed from scratch based on the tries in the key-value
|
||||
// store, on a background thread. If the memory layers from the journal is not
|
||||
// continuous with disk layer or the journal is missing, all diffs will be discarded
|
||||
// iff it's in "recovery" mode, otherwise rebuild is mandatory.
|
||||
func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool, recovery bool) *Tree {
|
||||
// Create a new, empty snapshot tree
|
||||
snap := &Tree{
|
||||
diskdb: diskdb,
|
||||
@ -183,7 +190,7 @@ func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root comm
|
||||
defer snap.waitBuild()
|
||||
}
|
||||
// Attempt to load a previously persisted snapshot and rebuild one if failed
|
||||
head, err := loadSnapshot(diskdb, triedb, cache, root)
|
||||
head, err := loadSnapshot(diskdb, triedb, cache, root, recovery)
|
||||
if err != nil {
|
||||
log.Warn("Failed to load snapshot, regenerating", "err", err)
|
||||
snap.Rebuild(root)
|
||||
@ -198,7 +205,7 @@ func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root comm
|
||||
}
|
||||
|
||||
// waitBuild blocks until the snapshot finishes rebuilding. This method is meant
|
||||
// to be used by tests to ensure we're testing what we believe we are.
|
||||
// to be used by tests to ensure we're testing what we believe we are.
|
||||
func (t *Tree) waitBuild() {
|
||||
// Find the rebuild termination channel
|
||||
var done chan struct{}
|
||||
@ -415,6 +422,9 @@ func (t *Tree) cap(diff *diffLayer, layers int) *diskLayer {
|
||||
|
||||
// diffToDisk merges a bottom-most diff into the persistent disk layer underneath
|
||||
// it. The method will panic if called onto a non-bottom-most diff layer.
|
||||
//
|
||||
// The disk layer persistence should be operated in an atomic way. All updates should
|
||||
// be discarded if the whole transition if not finished.
|
||||
func diffToDisk(bottom *diffLayer) *diskLayer {
|
||||
var (
|
||||
base = bottom.parent.(*diskLayer)
|
||||
@ -427,8 +437,7 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
|
||||
base.genAbort <- abort
|
||||
stats = <-abort
|
||||
}
|
||||
// Start by temporarily deleting the current snapshot block marker. This
|
||||
// ensures that in the case of a crash, the entire snapshot is invalidated.
|
||||
// Put the deletion in the batch writer, flush all updates in the final step.
|
||||
rawdb.DeleteSnapshotRoot(batch)
|
||||
|
||||
// Mark the original base as stale as we're going to create a new wrapper
|
||||
@ -471,12 +480,6 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
|
||||
base.cache.Set(hash[:], data)
|
||||
snapshotCleanAccountWriteMeter.Mark(int64(len(data)))
|
||||
|
||||
if batch.ValueSize() > ethdb.IdealBatchSize {
|
||||
if err := batch.Write(); err != nil {
|
||||
log.Crit("Failed to write account snapshot", "err", err)
|
||||
}
|
||||
batch.Reset()
|
||||
}
|
||||
snapshotFlushAccountItemMeter.Mark(1)
|
||||
snapshotFlushAccountSizeMeter.Mark(int64(len(data)))
|
||||
}
|
||||
@ -505,18 +508,33 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
|
||||
snapshotFlushStorageItemMeter.Mark(1)
|
||||
snapshotFlushStorageSizeMeter.Mark(int64(len(data)))
|
||||
}
|
||||
if batch.ValueSize() > ethdb.IdealBatchSize {
|
||||
if err := batch.Write(); err != nil {
|
||||
log.Crit("Failed to write storage snapshot", "err", err)
|
||||
}
|
||||
batch.Reset()
|
||||
}
|
||||
}
|
||||
// Update the snapshot block marker and write any remainder data
|
||||
rawdb.WriteSnapshotRoot(batch, bottom.root)
|
||||
|
||||
// Write out the generator marker
|
||||
entry := journalGenerator{
|
||||
Done: base.genMarker == nil,
|
||||
Marker: base.genMarker,
|
||||
}
|
||||
if stats != nil {
|
||||
entry.Wiping = (stats.wiping != nil)
|
||||
entry.Accounts = stats.accounts
|
||||
entry.Slots = stats.slots
|
||||
entry.Storage = uint64(stats.storage)
|
||||
}
|
||||
blob, err := rlp.EncodeToBytes(entry)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Failed to RLP encode generator %v", err))
|
||||
}
|
||||
rawdb.WriteSnapshotGenerator(batch, blob)
|
||||
|
||||
// Flush all the updates in the single db operation. Ensure the
|
||||
// disk layer transition is atomic.
|
||||
if err := batch.Write(); err != nil {
|
||||
log.Crit("Failed to write leftover snapshot", "err", err)
|
||||
}
|
||||
log.Debug("Journalled disk layer", "root", bottom.root, "complete", base.genMarker == nil)
|
||||
res := &diskLayer{
|
||||
root: bottom.root,
|
||||
cache: base.cache,
|
||||
@ -554,7 +572,21 @@ func (t *Tree) Journal(root common.Hash) (common.Hash, error) {
|
||||
t.lock.Lock()
|
||||
defer t.lock.Unlock()
|
||||
|
||||
// Firstly write out the metadata of journal
|
||||
journal := new(bytes.Buffer)
|
||||
if err := rlp.Encode(journal, journalVersion); err != nil {
|
||||
return common.Hash{}, err
|
||||
}
|
||||
diskroot := t.diskRoot()
|
||||
if diskroot == (common.Hash{}) {
|
||||
return common.Hash{}, errors.New("invalid disk root")
|
||||
}
|
||||
// Secondly write out the disk layer root, ensure the
|
||||
// diff journal is continuous with disk.
|
||||
if err := rlp.Encode(journal, diskroot); err != nil {
|
||||
return common.Hash{}, err
|
||||
}
|
||||
// Finally write out the journal of each layer in reverse order.
|
||||
base, err := snap.(snapshot).Journal(journal)
|
||||
if err != nil {
|
||||
return common.Hash{}, err
|
||||
@ -564,6 +596,29 @@ func (t *Tree) Journal(root common.Hash) (common.Hash, error) {
|
||||
return base, nil
|
||||
}
|
||||
|
||||
// LegacyJournal is basically identical to Journal. it's the legacy
|
||||
// version for flushing legacy journal. Now the only purpose of this
|
||||
// function is for testing.
|
||||
func (t *Tree) LegacyJournal(root common.Hash) (common.Hash, error) {
|
||||
// Retrieve the head snapshot to journal from var snap snapshot
|
||||
snap := t.Snapshot(root)
|
||||
if snap == nil {
|
||||
return common.Hash{}, fmt.Errorf("snapshot [%#x] missing", root)
|
||||
}
|
||||
// Run the journaling
|
||||
t.lock.Lock()
|
||||
defer t.lock.Unlock()
|
||||
|
||||
journal := new(bytes.Buffer)
|
||||
base, err := snap.(snapshot).LegacyJournal(journal)
|
||||
if err != nil {
|
||||
return common.Hash{}, err
|
||||
}
|
||||
// Store the journal into the database and return
|
||||
rawdb.WriteSnapshotJournal(t.diskdb, journal.Bytes())
|
||||
return base, nil
|
||||
}
|
||||
|
||||
// Rebuild wipes all available snapshot data from the persistent database and
|
||||
// discard all caches and diff layers. Afterwards, it starts a new snapshot
|
||||
// generator with the given root hash.
|
||||
@ -571,6 +626,10 @@ func (t *Tree) Rebuild(root common.Hash) {
|
||||
t.lock.Lock()
|
||||
defer t.lock.Unlock()
|
||||
|
||||
// Firstly delete any recovery flag in the database. Because now we are
|
||||
// building a brand new snapshot.
|
||||
rawdb.DeleteSnapshotRecoveryNumber(t.diskdb)
|
||||
|
||||
// Track whether there's a wipe currently running and keep it alive if so
|
||||
var wiper chan struct{}
|
||||
|
||||
@ -657,6 +716,16 @@ func (t *Tree) disklayer() *diskLayer {
|
||||
}
|
||||
}
|
||||
|
||||
// diskRoot is a internal helper function to return the disk layer root.
|
||||
// The lock of snapTree is assumed to be held already.
|
||||
func (t *Tree) diskRoot() common.Hash {
|
||||
disklayer := t.disklayer()
|
||||
if disklayer == nil {
|
||||
return common.Hash{}
|
||||
}
|
||||
return disklayer.Root()
|
||||
}
|
||||
|
||||
// generating is an internal helper function which reports whether the snapshot
|
||||
// is still under the construction.
|
||||
func (t *Tree) generating() (bool, error) {
|
||||
@ -671,3 +740,11 @@ func (t *Tree) generating() (bool, error) {
|
||||
defer layer.lock.RUnlock()
|
||||
return layer.genMarker != nil, nil
|
||||
}
|
||||
|
||||
// diskRoot is a external helper function to return the disk layer root.
|
||||
func (t *Tree) DiskRoot() common.Hash {
|
||||
t.lock.Lock()
|
||||
defer t.lock.Unlock()
|
||||
|
||||
return t.diskRoot()
|
||||
}
|
||||
|
Reference in New Issue
Block a user