core: improve snapshot journal recovery (#21594)

* core/state/snapshot: introduce snapshot journal version

* core: update the disk layer in an atomic way

* core: persist the disk layer generator periodically

* core/state/snapshot: improve logging

* core/state/snapshot: forcibly ensure the legacy snapshot is matched

* core/state/snapshot: add debug logs

* core, tests: fix tests and special recovery case

* core: polish

* core: add more blockchain tests for snapshot recovery

* core/state: fix comment

* core: add recovery flag for snapshot

* core: add restart after start-after-crash tests

* core/rawdb: fix imports

* core: fix tests

* core: remove log

* core/state/snapshot: fix snapshot

* core: avoid callbacks in SetHead

* core: fix setHead cornercase where the threshold root has state

* core: small docs for the test cases

Co-authored-by: Péter Szilágyi <peterke@gmail.com>
This commit is contained in:
gary rong
2020-10-30 03:01:58 +08:00
committed by GitHub
parent 43c278cdf9
commit b63e3c37a6
11 changed files with 1792 additions and 159 deletions

View File

@ -29,6 +29,7 @@ import (
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/metrics"
"github.com/ethereum/go-ethereum/rlp"
"github.com/ethereum/go-ethereum/trie"
)
@ -136,6 +137,10 @@ type snapshot interface {
// flattening everything down (bad for reorgs).
Journal(buffer *bytes.Buffer) (common.Hash, error)
// LegacyJournal is basically identical to Journal. it's the legacy version for
// flushing legacy journal. Now the only purpose of this function is for testing.
LegacyJournal(buffer *bytes.Buffer) (common.Hash, error)
// Stale return whether this layer has become stale (was flattened across) or
// if it's still live.
Stale() bool
@ -168,10 +173,12 @@ type Tree struct {
// store (with a number of memory layers from a journal), ensuring that the head
// of the snapshot matches the expected one.
//
// If the snapshot is missing or inconsistent, the entirety is deleted and will
// be reconstructed from scratch based on the tries in the key-value store, on a
// background thread.
func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool) *Tree {
// If the snapshot is missing or the disk layer is broken, the entire is deleted
// and will be reconstructed from scratch based on the tries in the key-value
// store, on a background thread. If the memory layers from the journal is not
// continuous with disk layer or the journal is missing, all diffs will be discarded
// iff it's in "recovery" mode, otherwise rebuild is mandatory.
func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool, recovery bool) *Tree {
// Create a new, empty snapshot tree
snap := &Tree{
diskdb: diskdb,
@ -183,7 +190,7 @@ func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root comm
defer snap.waitBuild()
}
// Attempt to load a previously persisted snapshot and rebuild one if failed
head, err := loadSnapshot(diskdb, triedb, cache, root)
head, err := loadSnapshot(diskdb, triedb, cache, root, recovery)
if err != nil {
log.Warn("Failed to load snapshot, regenerating", "err", err)
snap.Rebuild(root)
@ -198,7 +205,7 @@ func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root comm
}
// waitBuild blocks until the snapshot finishes rebuilding. This method is meant
// to be used by tests to ensure we're testing what we believe we are.
// to be used by tests to ensure we're testing what we believe we are.
func (t *Tree) waitBuild() {
// Find the rebuild termination channel
var done chan struct{}
@ -415,6 +422,9 @@ func (t *Tree) cap(diff *diffLayer, layers int) *diskLayer {
// diffToDisk merges a bottom-most diff into the persistent disk layer underneath
// it. The method will panic if called onto a non-bottom-most diff layer.
//
// The disk layer persistence should be operated in an atomic way. All updates should
// be discarded if the whole transition if not finished.
func diffToDisk(bottom *diffLayer) *diskLayer {
var (
base = bottom.parent.(*diskLayer)
@ -427,8 +437,7 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
base.genAbort <- abort
stats = <-abort
}
// Start by temporarily deleting the current snapshot block marker. This
// ensures that in the case of a crash, the entire snapshot is invalidated.
// Put the deletion in the batch writer, flush all updates in the final step.
rawdb.DeleteSnapshotRoot(batch)
// Mark the original base as stale as we're going to create a new wrapper
@ -471,12 +480,6 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
base.cache.Set(hash[:], data)
snapshotCleanAccountWriteMeter.Mark(int64(len(data)))
if batch.ValueSize() > ethdb.IdealBatchSize {
if err := batch.Write(); err != nil {
log.Crit("Failed to write account snapshot", "err", err)
}
batch.Reset()
}
snapshotFlushAccountItemMeter.Mark(1)
snapshotFlushAccountSizeMeter.Mark(int64(len(data)))
}
@ -505,18 +508,33 @@ func diffToDisk(bottom *diffLayer) *diskLayer {
snapshotFlushStorageItemMeter.Mark(1)
snapshotFlushStorageSizeMeter.Mark(int64(len(data)))
}
if batch.ValueSize() > ethdb.IdealBatchSize {
if err := batch.Write(); err != nil {
log.Crit("Failed to write storage snapshot", "err", err)
}
batch.Reset()
}
}
// Update the snapshot block marker and write any remainder data
rawdb.WriteSnapshotRoot(batch, bottom.root)
// Write out the generator marker
entry := journalGenerator{
Done: base.genMarker == nil,
Marker: base.genMarker,
}
if stats != nil {
entry.Wiping = (stats.wiping != nil)
entry.Accounts = stats.accounts
entry.Slots = stats.slots
entry.Storage = uint64(stats.storage)
}
blob, err := rlp.EncodeToBytes(entry)
if err != nil {
panic(fmt.Sprintf("Failed to RLP encode generator %v", err))
}
rawdb.WriteSnapshotGenerator(batch, blob)
// Flush all the updates in the single db operation. Ensure the
// disk layer transition is atomic.
if err := batch.Write(); err != nil {
log.Crit("Failed to write leftover snapshot", "err", err)
}
log.Debug("Journalled disk layer", "root", bottom.root, "complete", base.genMarker == nil)
res := &diskLayer{
root: bottom.root,
cache: base.cache,
@ -554,7 +572,21 @@ func (t *Tree) Journal(root common.Hash) (common.Hash, error) {
t.lock.Lock()
defer t.lock.Unlock()
// Firstly write out the metadata of journal
journal := new(bytes.Buffer)
if err := rlp.Encode(journal, journalVersion); err != nil {
return common.Hash{}, err
}
diskroot := t.diskRoot()
if diskroot == (common.Hash{}) {
return common.Hash{}, errors.New("invalid disk root")
}
// Secondly write out the disk layer root, ensure the
// diff journal is continuous with disk.
if err := rlp.Encode(journal, diskroot); err != nil {
return common.Hash{}, err
}
// Finally write out the journal of each layer in reverse order.
base, err := snap.(snapshot).Journal(journal)
if err != nil {
return common.Hash{}, err
@ -564,6 +596,29 @@ func (t *Tree) Journal(root common.Hash) (common.Hash, error) {
return base, nil
}
// LegacyJournal is basically identical to Journal. it's the legacy
// version for flushing legacy journal. Now the only purpose of this
// function is for testing.
func (t *Tree) LegacyJournal(root common.Hash) (common.Hash, error) {
// Retrieve the head snapshot to journal from var snap snapshot
snap := t.Snapshot(root)
if snap == nil {
return common.Hash{}, fmt.Errorf("snapshot [%#x] missing", root)
}
// Run the journaling
t.lock.Lock()
defer t.lock.Unlock()
journal := new(bytes.Buffer)
base, err := snap.(snapshot).LegacyJournal(journal)
if err != nil {
return common.Hash{}, err
}
// Store the journal into the database and return
rawdb.WriteSnapshotJournal(t.diskdb, journal.Bytes())
return base, nil
}
// Rebuild wipes all available snapshot data from the persistent database and
// discard all caches and diff layers. Afterwards, it starts a new snapshot
// generator with the given root hash.
@ -571,6 +626,10 @@ func (t *Tree) Rebuild(root common.Hash) {
t.lock.Lock()
defer t.lock.Unlock()
// Firstly delete any recovery flag in the database. Because now we are
// building a brand new snapshot.
rawdb.DeleteSnapshotRecoveryNumber(t.diskdb)
// Track whether there's a wipe currently running and keep it alive if so
var wiper chan struct{}
@ -657,6 +716,16 @@ func (t *Tree) disklayer() *diskLayer {
}
}
// diskRoot is a internal helper function to return the disk layer root.
// The lock of snapTree is assumed to be held already.
func (t *Tree) diskRoot() common.Hash {
disklayer := t.disklayer()
if disklayer == nil {
return common.Hash{}
}
return disklayer.Root()
}
// generating is an internal helper function which reports whether the snapshot
// is still under the construction.
func (t *Tree) generating() (bool, error) {
@ -671,3 +740,11 @@ func (t *Tree) generating() (bool, error) {
defer layer.lock.RUnlock()
return layer.genMarker != nil, nil
}
// diskRoot is a external helper function to return the disk layer root.
func (t *Tree) DiskRoot() common.Hash {
t.lock.Lock()
defer t.lock.Unlock()
return t.diskRoot()
}