core: define and test chain rewind corner cases (#21409)

* core: define and test chain reparation cornercases

* core: write up a variety of set-head tests

* core, eth: unify chain rollbacks, handle all the cases

* core: make linter smile

* core: remove commented out legacy code

* core, eth/downloader: fix review comments

* core: revert a removed recovery mechanism
This commit is contained in:
Péter Szilágyi
2020-08-20 13:01:24 +03:00
committed by GitHub
parent 0bdd295cc0
commit 8cbdc8638f
13 changed files with 3953 additions and 215 deletions

View File

@ -187,6 +187,32 @@ func WriteHeadFastBlockHash(db ethdb.KeyValueWriter, hash common.Hash) {
}
}
// ReadLastPivotNumber retrieves the number of the last pivot block. If the node
// full synced, the last pivot will always be nil.
func ReadLastPivotNumber(db ethdb.KeyValueReader) *uint64 {
data, _ := db.Get(lastPivotKey)
if len(data) == 0 {
return nil
}
var pivot uint64
if err := rlp.DecodeBytes(data, &pivot); err != nil {
log.Error("Invalid pivot block number in database", "err", err)
return nil
}
return &pivot
}
// WriteLastPivotNumber stores the number of the last pivot block.
func WriteLastPivotNumber(db ethdb.KeyValueWriter, pivot uint64) {
enc, err := rlp.EncodeToBytes(pivot)
if err != nil {
log.Crit("Failed to encode pivot block number", "err", err)
}
if err := db.Put(lastPivotKey, enc); err != nil {
log.Crit("Failed to store pivot block number", "err", err)
}
}
// ReadFastTrieProgress retrieves the number of tries nodes fast synced to allow
// reporting correct numbers across restarts.
func ReadFastTrieProgress(db ethdb.KeyValueReader) uint64 {

View File

@ -21,6 +21,7 @@ import (
"errors"
"fmt"
"os"
"sync/atomic"
"time"
"github.com/ethereum/go-ethereum/common"
@ -53,6 +54,22 @@ func (frdb *freezerdb) Close() error {
return nil
}
// Freeze is a helper method used for external testing to trigger and block until
// a freeze cycle completes, without having to sleep for a minute to trigger the
// automatic background run.
func (frdb *freezerdb) Freeze(threshold uint64) {
// Set the freezer threshold to a temporary value
defer func(old uint64) {
atomic.StoreUint64(&frdb.AncientStore.(*freezer).threshold, old)
}(atomic.LoadUint64(&frdb.AncientStore.(*freezer).threshold))
atomic.StoreUint64(&frdb.AncientStore.(*freezer).threshold, threshold)
// Trigger a freeze cycle and block until it's done
trigger := make(chan struct{}, 1)
frdb.AncientStore.(*freezer).trigger <- trigger
<-trigger
}
// nofreezedb is a database wrapper that disables freezer data retrievals.
type nofreezedb struct {
ethdb.KeyValueStore

View File

@ -70,12 +70,16 @@ type freezer struct {
// WARNING: The `frozen` field is accessed atomically. On 32 bit platforms, only
// 64-bit aligned fields can be atomic. The struct is guaranteed to be so aligned,
// so take advantage of that (https://golang.org/pkg/sync/atomic/#pkg-note-BUG).
frozen uint64 // Number of blocks already frozen
frozen uint64 // Number of blocks already frozen
threshold uint64 // Number of recent blocks not to freeze (params.FullImmutabilityThreshold apart from tests)
tables map[string]*freezerTable // Data tables for storing everything
instanceLock fileutil.Releaser // File-system lock to prevent double opens
quit chan struct{}
closeOnce sync.Once
trigger chan chan struct{} // Manual blocking freeze trigger, test determinism
quit chan struct{}
closeOnce sync.Once
}
// newFreezer creates a chain freezer that moves ancient chain data into
@ -102,8 +106,10 @@ func newFreezer(datadir string, namespace string) (*freezer, error) {
}
// Open all the supported data tables
freezer := &freezer{
threshold: params.FullImmutabilityThreshold,
tables: make(map[string]*freezerTable),
instanceLock: lock,
trigger: make(chan chan struct{}),
quit: make(chan struct{}),
}
for name, disableSnappy := range freezerNoSnappy {
@ -261,7 +267,10 @@ func (f *freezer) Sync() error {
func (f *freezer) freeze(db ethdb.KeyValueStore) {
nfdb := &nofreezedb{KeyValueStore: db}
backoff := false
var (
backoff bool
triggered chan struct{} // Used in tests
)
for {
select {
case <-f.quit:
@ -270,9 +279,16 @@ func (f *freezer) freeze(db ethdb.KeyValueStore) {
default:
}
if backoff {
// If we were doing a manual trigger, notify it
if triggered != nil {
triggered <- struct{}{}
triggered = nil
}
select {
case <-time.NewTimer(freezerRecheckInterval).C:
backoff = false
case triggered = <-f.trigger:
backoff = false
case <-f.quit:
return
}
@ -285,18 +301,20 @@ func (f *freezer) freeze(db ethdb.KeyValueStore) {
continue
}
number := ReadHeaderNumber(nfdb, hash)
threshold := atomic.LoadUint64(&f.threshold)
switch {
case number == nil:
log.Error("Current full block number unavailable", "hash", hash)
backoff = true
continue
case *number < params.FullImmutabilityThreshold:
log.Debug("Current full block not old enough", "number", *number, "hash", hash, "delay", params.FullImmutabilityThreshold)
case *number < threshold:
log.Debug("Current full block not old enough", "number", *number, "hash", hash, "delay", threshold)
backoff = true
continue
case *number-params.FullImmutabilityThreshold <= f.frozen:
case *number-threshold <= f.frozen:
log.Debug("Ancient blocks frozen already", "number", *number, "hash", hash, "frozen", f.frozen)
backoff = true
continue
@ -308,7 +326,7 @@ func (f *freezer) freeze(db ethdb.KeyValueStore) {
continue
}
// Seems we have data ready to be frozen, process in usable batches
limit := *number - params.FullImmutabilityThreshold
limit := *number - threshold
if limit-f.frozen > freezerBatchLimit {
limit = f.frozen + freezerBatchLimit
}
@ -317,7 +335,7 @@ func (f *freezer) freeze(db ethdb.KeyValueStore) {
first = f.frozen
ancients = make([]common.Hash, 0, limit-f.frozen)
)
for f.frozen < limit {
for f.frozen <= limit {
// Retrieves all the components of the canonical block
hash := ReadCanonicalHash(nfdb, f.frozen)
if hash == (common.Hash{}) {
@ -368,11 +386,15 @@ func (f *freezer) freeze(db ethdb.KeyValueStore) {
log.Crit("Failed to delete frozen canonical blocks", "err", err)
}
batch.Reset()
// Wipe out side chain also.
// Wipe out side chains also and track dangling side chians
var dangling []common.Hash
for number := first; number < f.frozen; number++ {
// Always keep the genesis block in active database
if number != 0 {
for _, hash := range ReadAllHashes(db, number) {
dangling = ReadAllHashes(db, number)
for _, hash := range dangling {
log.Trace("Deleting side chain", "number", number, "hash", hash)
DeleteBlock(batch, hash, number)
}
}
@ -380,6 +402,41 @@ func (f *freezer) freeze(db ethdb.KeyValueStore) {
if err := batch.Write(); err != nil {
log.Crit("Failed to delete frozen side blocks", "err", err)
}
batch.Reset()
// Step into the future and delete and dangling side chains
if f.frozen > 0 {
tip := f.frozen
for len(dangling) > 0 {
drop := make(map[common.Hash]struct{})
for _, hash := range dangling {
log.Debug("Dangling parent from freezer", "number", tip-1, "hash", hash)
drop[hash] = struct{}{}
}
children := ReadAllHashes(db, tip)
for i := 0; i < len(children); i++ {
// Dig up the child and ensure it's dangling
child := ReadHeader(nfdb, children[i], tip)
if child == nil {
log.Error("Missing dangling header", "number", tip, "hash", children[i])
continue
}
if _, ok := drop[child.ParentHash]; !ok {
children = append(children[:i], children[i+1:]...)
i--
continue
}
// Delete all block data associated with the child
log.Debug("Deleting dangling block", "number", tip, "hash", children[i], "parent", child.ParentHash)
DeleteBlock(batch, children[i], tip)
}
dangling = children
tip++
}
if err := batch.Write(); err != nil {
log.Crit("Failed to delete dangling side blocks", "err", err)
}
}
// Log something friendly for the user
context := []interface{}{
"blocks", f.frozen - first, "elapsed", common.PrettyDuration(time.Since(start)), "number", f.frozen - 1,

View File

@ -38,6 +38,9 @@ var (
// headFastBlockKey tracks the latest known incomplete block's hash during fast sync.
headFastBlockKey = []byte("LastFast")
// lastPivotKey tracks the last pivot block used by fast sync (to reenable on sethead).
lastPivotKey = []byte("LastPivot")
// fastTrieProgressKey tracks the number of trie entries imported during fast sync.
fastTrieProgressKey = []byte("TrieSync")