core: improve snapshot journal recovery (#21594)
* core/state/snapshot: introduce snapshot journal version * core: update the disk layer in an atomic way * core: persist the disk layer generator periodically * core/state/snapshot: improve logging * core/state/snapshot: forcibly ensure the legacy snapshot is matched * core/state/snapshot: add debug logs * core, tests: fix tests and special recovery case * core: polish * core: add more blockchain tests for snapshot recovery * core/state: fix comment * core: add recovery flag for snapshot * core: add restart after start-after-crash tests * core/rawdb: fix imports * core: fix tests * core: remove log * core/state/snapshot: fix snapshot * core: avoid callbacks in SetHead * core: fix setHead cornercase where the threshold root has state * core: small docs for the test cases Co-authored-by: Péter Szilágyi <peterke@gmail.com>
This commit is contained in:
@ -33,6 +33,8 @@ import (
|
||||
"github.com/ethereum/go-ethereum/trie"
|
||||
)
|
||||
|
||||
const journalVersion uint64 = 0
|
||||
|
||||
// journalGenerator is a disk layer entry containing the generator progress marker.
|
||||
type journalGenerator struct {
|
||||
Wiping bool // Whether the database was in progress of being wiped
|
||||
@ -61,8 +63,87 @@ type journalStorage struct {
|
||||
Vals [][]byte
|
||||
}
|
||||
|
||||
// loadAndParseLegacyJournal tries to parse the snapshot journal in legacy format.
|
||||
func loadAndParseLegacyJournal(db ethdb.KeyValueStore, base *diskLayer) (snapshot, journalGenerator, error) {
|
||||
// Retrieve the journal, for legacy journal it must exist since even for
|
||||
// 0 layer it stores whether we've already generated the snapshot or are
|
||||
// in progress only.
|
||||
journal := rawdb.ReadSnapshotJournal(db)
|
||||
if len(journal) == 0 {
|
||||
return nil, journalGenerator{}, errors.New("missing or corrupted snapshot journal")
|
||||
}
|
||||
r := rlp.NewStream(bytes.NewReader(journal), 0)
|
||||
|
||||
// Read the snapshot generation progress for the disk layer
|
||||
var generator journalGenerator
|
||||
if err := r.Decode(&generator); err != nil {
|
||||
return nil, journalGenerator{}, fmt.Errorf("failed to load snapshot progress marker: %v", err)
|
||||
}
|
||||
// Load all the snapshot diffs from the journal
|
||||
snapshot, err := loadDiffLayer(base, r)
|
||||
if err != nil {
|
||||
return nil, generator, err
|
||||
}
|
||||
return snapshot, generator, nil
|
||||
}
|
||||
|
||||
// loadAndParseJournal tries to parse the snapshot journal in latest format.
|
||||
func loadAndParseJournal(db ethdb.KeyValueStore, base *diskLayer) (snapshot, journalGenerator, error) {
|
||||
// Retrieve the disk layer generator. It must exist, no matter the
|
||||
// snapshot is fully generated or not. Otherwise the entire disk
|
||||
// layer is invalid.
|
||||
generatorBlob := rawdb.ReadSnapshotGenerator(db)
|
||||
if len(generatorBlob) == 0 {
|
||||
return nil, journalGenerator{}, errors.New("missing snapshot generator")
|
||||
}
|
||||
var generator journalGenerator
|
||||
if err := rlp.DecodeBytes(generatorBlob, &generator); err != nil {
|
||||
return nil, journalGenerator{}, fmt.Errorf("failed to decode snapshot generator: %v", err)
|
||||
}
|
||||
// Retrieve the diff layer journal. It's possible that the journal is
|
||||
// not existent, e.g. the disk layer is generating while that the Geth
|
||||
// crashes without persisting the diff journal.
|
||||
// So if there is no journal, or the journal is not matched with disk
|
||||
// layer, we just discard all diffs and try to recover them later.
|
||||
journal := rawdb.ReadSnapshotJournal(db)
|
||||
if len(journal) == 0 {
|
||||
log.Warn("Loaded snapshot journal", "diskroot", base.root, "diffs", "missing")
|
||||
return base, generator, nil
|
||||
}
|
||||
r := rlp.NewStream(bytes.NewReader(journal), 0)
|
||||
|
||||
// Firstly, resolve the first element as the journal version
|
||||
version, err := r.Uint()
|
||||
if err != nil {
|
||||
return nil, journalGenerator{}, err
|
||||
}
|
||||
if version != journalVersion {
|
||||
return nil, journalGenerator{}, fmt.Errorf("journal version mismatch, want %d got %v", journalVersion, version)
|
||||
}
|
||||
// Secondly, resolve the disk layer root, ensure it's continuous
|
||||
// with disk layer.
|
||||
var root common.Hash
|
||||
if err := r.Decode(&root); err != nil {
|
||||
return nil, journalGenerator{}, errors.New("missing disk layer root")
|
||||
}
|
||||
// The diff journal is not matched with disk, discard them.
|
||||
// It can happen that Geth crashes without persisting the latest
|
||||
// diff journal.
|
||||
if !bytes.Equal(root.Bytes(), base.root.Bytes()) {
|
||||
log.Warn("Loaded snapshot journal", "diskroot", base.root, "diffs", "unmatched")
|
||||
return base, generator, nil
|
||||
}
|
||||
// Load all the snapshot diffs from the journal
|
||||
snapshot, err := loadDiffLayer(base, r)
|
||||
if err != nil {
|
||||
return nil, journalGenerator{}, err
|
||||
}
|
||||
log.Debug("Loaded snapshot journal", "diskroot", base.root, "diffhead", snapshot.Root())
|
||||
return snapshot, generator, nil
|
||||
}
|
||||
|
||||
// loadSnapshot loads a pre-existing state snapshot backed by a key-value store.
|
||||
func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash) (snapshot, error) {
|
||||
func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, recovery bool) (snapshot, error) {
|
||||
// Retrieve the block number and hash of the snapshot, failing if no snapshot
|
||||
// is present in the database (or crashed mid-update).
|
||||
baseRoot := rawdb.ReadSnapshotRoot(diskdb)
|
||||
@ -75,28 +156,36 @@ func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int,
|
||||
cache: fastcache.New(cache * 1024 * 1024),
|
||||
root: baseRoot,
|
||||
}
|
||||
// Retrieve the journal, it must exist since even for 0 layer it stores whether
|
||||
// we've already generated the snapshot or are in progress only
|
||||
journal := rawdb.ReadSnapshotJournal(diskdb)
|
||||
if len(journal) == 0 {
|
||||
return nil, errors.New("missing or corrupted snapshot journal")
|
||||
var legacy bool
|
||||
snapshot, generator, err := loadAndParseJournal(diskdb, base)
|
||||
if err != nil {
|
||||
log.Debug("Failed to load new-format journal", "error", err)
|
||||
snapshot, generator, err = loadAndParseLegacyJournal(diskdb, base)
|
||||
legacy = true
|
||||
}
|
||||
r := rlp.NewStream(bytes.NewReader(journal), 0)
|
||||
|
||||
// Read the snapshot generation progress for the disk layer
|
||||
var generator journalGenerator
|
||||
if err := r.Decode(&generator); err != nil {
|
||||
return nil, fmt.Errorf("failed to load snapshot progress marker: %v", err)
|
||||
}
|
||||
// Load all the snapshot diffs from the journal
|
||||
snapshot, err := loadDiffLayer(base, r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Entire snapshot journal loaded, sanity check the head and return
|
||||
// Journal doesn't exist, don't worry if it's not supposed to
|
||||
// Entire snapshot journal loaded, sanity check the head. If the loaded
|
||||
// snapshot is not matched with current state root, print a warning log
|
||||
// or discard the entire snapshot it's legacy snapshot.
|
||||
//
|
||||
// Possible scenario: Geth was crashed without persisting journal and then
|
||||
// restart, the head is rewound to the point with available state(trie)
|
||||
// which is below the snapshot. In this case the snapshot can be recovered
|
||||
// by re-executing blocks but right now it's unavailable.
|
||||
if head := snapshot.Root(); head != root {
|
||||
return nil, fmt.Errorf("head doesn't match snapshot: have %#x, want %#x", head, root)
|
||||
// If it's legacy snapshot, or it's new-format snapshot but
|
||||
// it's not in recovery mode, returns the error here for
|
||||
// rebuilding the entire snapshot forcibly.
|
||||
if legacy || !recovery {
|
||||
return nil, fmt.Errorf("head doesn't match snapshot: have %#x, want %#x", head, root)
|
||||
}
|
||||
// It's in snapshot recovery, the assumption is held that
|
||||
// the disk layer is always higher than chain head. It can
|
||||
// be eventually recovered when the chain head beyonds the
|
||||
// disk layer.
|
||||
log.Warn("Snapshot is not continuous with chain", "snaproot", head, "chainroot", root)
|
||||
}
|
||||
// Everything loaded correctly, resume any suspended operations
|
||||
if !generator.Done {
|
||||
@ -203,7 +292,9 @@ func (dl *diskLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) {
|
||||
if dl.stale {
|
||||
return common.Hash{}, ErrSnapshotStale
|
||||
}
|
||||
// Write out the generator marker
|
||||
// Write out the generator marker. Note it's a standalone disk layer generator
|
||||
// which is not mixed with journal. It's ok if the generator is persisted while
|
||||
// journal is not.
|
||||
entry := journalGenerator{
|
||||
Done: dl.genMarker == nil,
|
||||
Marker: dl.genMarker,
|
||||
@ -214,9 +305,12 @@ func (dl *diskLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) {
|
||||
entry.Slots = stats.slots
|
||||
entry.Storage = uint64(stats.storage)
|
||||
}
|
||||
if err := rlp.Encode(buffer, entry); err != nil {
|
||||
blob, err := rlp.EncodeToBytes(entry)
|
||||
if err != nil {
|
||||
return common.Hash{}, err
|
||||
}
|
||||
log.Debug("Journalled disk layer", "root", dl.root, "complete", dl.genMarker == nil)
|
||||
rawdb.WriteSnapshotGenerator(dl.diskdb, blob)
|
||||
return dl.root, nil
|
||||
}
|
||||
|
||||
@ -266,5 +360,97 @@ func (dl *diffLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) {
|
||||
if err := rlp.Encode(buffer, storage); err != nil {
|
||||
return common.Hash{}, err
|
||||
}
|
||||
log.Debug("Journalled diff layer", "root", dl.root, "parent", dl.parent.Root())
|
||||
return base, nil
|
||||
}
|
||||
|
||||
// LegacyJournal writes the persistent layer generator stats into a buffer
|
||||
// to be stored in the database as the snapshot journal.
|
||||
//
|
||||
// Note it's the legacy version which is only used in testing right now.
|
||||
func (dl *diskLayer) LegacyJournal(buffer *bytes.Buffer) (common.Hash, error) {
|
||||
// If the snapshot is currently being generated, abort it
|
||||
var stats *generatorStats
|
||||
if dl.genAbort != nil {
|
||||
abort := make(chan *generatorStats)
|
||||
dl.genAbort <- abort
|
||||
|
||||
if stats = <-abort; stats != nil {
|
||||
stats.Log("Journalling in-progress snapshot", dl.root, dl.genMarker)
|
||||
}
|
||||
}
|
||||
// Ensure the layer didn't get stale
|
||||
dl.lock.RLock()
|
||||
defer dl.lock.RUnlock()
|
||||
|
||||
if dl.stale {
|
||||
return common.Hash{}, ErrSnapshotStale
|
||||
}
|
||||
// Write out the generator marker
|
||||
entry := journalGenerator{
|
||||
Done: dl.genMarker == nil,
|
||||
Marker: dl.genMarker,
|
||||
}
|
||||
if stats != nil {
|
||||
entry.Wiping = (stats.wiping != nil)
|
||||
entry.Accounts = stats.accounts
|
||||
entry.Slots = stats.slots
|
||||
entry.Storage = uint64(stats.storage)
|
||||
}
|
||||
if err := rlp.Encode(buffer, entry); err != nil {
|
||||
return common.Hash{}, err
|
||||
}
|
||||
return dl.root, nil
|
||||
}
|
||||
|
||||
// Journal writes the memory layer contents into a buffer to be stored in the
|
||||
// database as the snapshot journal.
|
||||
//
|
||||
// Note it's the legacy version which is only used in testing right now.
|
||||
func (dl *diffLayer) LegacyJournal(buffer *bytes.Buffer) (common.Hash, error) {
|
||||
// Journal the parent first
|
||||
base, err := dl.parent.LegacyJournal(buffer)
|
||||
if err != nil {
|
||||
return common.Hash{}, err
|
||||
}
|
||||
// Ensure the layer didn't get stale
|
||||
dl.lock.RLock()
|
||||
defer dl.lock.RUnlock()
|
||||
|
||||
if dl.Stale() {
|
||||
return common.Hash{}, ErrSnapshotStale
|
||||
}
|
||||
// Everything below was journalled, persist this layer too
|
||||
if err := rlp.Encode(buffer, dl.root); err != nil {
|
||||
return common.Hash{}, err
|
||||
}
|
||||
destructs := make([]journalDestruct, 0, len(dl.destructSet))
|
||||
for hash := range dl.destructSet {
|
||||
destructs = append(destructs, journalDestruct{Hash: hash})
|
||||
}
|
||||
if err := rlp.Encode(buffer, destructs); err != nil {
|
||||
return common.Hash{}, err
|
||||
}
|
||||
accounts := make([]journalAccount, 0, len(dl.accountData))
|
||||
for hash, blob := range dl.accountData {
|
||||
accounts = append(accounts, journalAccount{Hash: hash, Blob: blob})
|
||||
}
|
||||
if err := rlp.Encode(buffer, accounts); err != nil {
|
||||
return common.Hash{}, err
|
||||
}
|
||||
storage := make([]journalStorage, 0, len(dl.storageData))
|
||||
for hash, slots := range dl.storageData {
|
||||
keys := make([]common.Hash, 0, len(slots))
|
||||
vals := make([][]byte, 0, len(slots))
|
||||
for key, val := range slots {
|
||||
keys = append(keys, key)
|
||||
vals = append(vals, val)
|
||||
}
|
||||
storage = append(storage, journalStorage{Hash: hash, Keys: keys, Vals: vals})
|
||||
}
|
||||
if err := rlp.Encode(buffer, storage); err != nil {
|
||||
return common.Hash{}, err
|
||||
}
|
||||
log.Debug("Journalled diff layer", "root", dl.root, "parent", dl.parent.Root())
|
||||
return base, nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user