core/rawdb: freezer batch write (#23462)

This change is a rewrite of the freezer code.

When writing ancient chain data to the freezer, the previous version first encoded each
individual item to a temporary buffer, then wrote the buffer. For small item sizes (for
example, in the block hash freezer table), this strategy causes a lot of system calls for
writing tiny chunks of data. It also allocated a lot of temporary []byte buffers.

In the new version, we instead encode multiple items into a re-useable batch buffer, which
is then written to the file all at once. This avoids performing a system call for every
inserted item.

To make the internal batching work, the ancient database API had to be changed. While
integrating this new API in BlockChain.InsertReceiptChain, additional optimizations were
also added there.

Co-authored-by: Felix Lange <fjl@twurst.com>
This commit is contained in:
Martin Holst Swende
2021-09-07 12:31:17 +02:00
committed by GitHub
parent 9a0df80bbc
commit 794c6133ef
13 changed files with 1349 additions and 590 deletions

View File

@ -61,6 +61,9 @@ const (
// freezerBatchLimit is the maximum number of blocks to freeze in one batch
// before doing an fsync and deleting it from the key-value store.
freezerBatchLimit = 30000
// freezerTableSize defines the maximum size of freezer data files.
freezerTableSize = 2 * 1000 * 1000 * 1000
)
// freezer is an memory mapped append-only database to store immutable chain data
@ -77,6 +80,10 @@ type freezer struct {
frozen uint64 // Number of blocks already frozen
threshold uint64 // Number of recent blocks not to freeze (params.FullImmutabilityThreshold apart from tests)
// This lock synchronizes writers and the truncate operation.
writeLock sync.Mutex
writeBatch *freezerBatch
readonly bool
tables map[string]*freezerTable // Data tables for storing everything
instanceLock fileutil.Releaser // File-system lock to prevent double opens
@ -90,7 +97,10 @@ type freezer struct {
// newFreezer creates a chain freezer that moves ancient chain data into
// append-only flat file containers.
func newFreezer(datadir string, namespace string, readonly bool) (*freezer, error) {
//
// The 'tables' argument defines the data tables. If the value of a map
// entry is true, snappy compression is disabled for the table.
func newFreezer(datadir string, namespace string, readonly bool, maxTableSize uint32, tables map[string]bool) (*freezer, error) {
// Create the initial freezer object
var (
readMeter = metrics.NewRegisteredMeter(namespace+"ancient/read", nil)
@ -119,8 +129,10 @@ func newFreezer(datadir string, namespace string, readonly bool) (*freezer, erro
trigger: make(chan chan struct{}),
quit: make(chan struct{}),
}
for name, disableSnappy := range FreezerNoSnappy {
table, err := newTable(datadir, name, readMeter, writeMeter, sizeGauge, disableSnappy)
// Create the tables.
for name, disableSnappy := range tables {
table, err := newTable(datadir, name, readMeter, writeMeter, sizeGauge, maxTableSize, disableSnappy)
if err != nil {
for _, table := range freezer.tables {
table.Close()
@ -130,6 +142,8 @@ func newFreezer(datadir string, namespace string, readonly bool) (*freezer, erro
}
freezer.tables[name] = table
}
// Truncate all tables to common length.
if err := freezer.repair(); err != nil {
for _, table := range freezer.tables {
table.Close()
@ -137,12 +151,19 @@ func newFreezer(datadir string, namespace string, readonly bool) (*freezer, erro
lock.Release()
return nil, err
}
// Create the write batch.
freezer.writeBatch = newFreezerBatch(freezer)
log.Info("Opened ancient database", "database", datadir, "readonly", readonly)
return freezer, nil
}
// Close terminates the chain freezer, unmapping all the data files.
func (f *freezer) Close() error {
f.writeLock.Lock()
defer f.writeLock.Unlock()
var errs []error
f.closeOnce.Do(func() {
close(f.quit)
@ -199,60 +220,49 @@ func (f *freezer) Ancients() (uint64, error) {
// AncientSize returns the ancient size of the specified category.
func (f *freezer) AncientSize(kind string) (uint64, error) {
// This needs the write lock to avoid data races on table fields.
// Speed doesn't matter here, AncientSize is for debugging.
f.writeLock.Lock()
defer f.writeLock.Unlock()
if table := f.tables[kind]; table != nil {
return table.size()
}
return 0, errUnknownTable
}
// AppendAncient injects all binary blobs belong to block at the end of the
// append-only immutable table files.
//
// Notably, this function is lock free but kind of thread-safe. All out-of-order
// injection will be rejected. But if two injections with same number happen at
// the same time, we can get into the trouble.
func (f *freezer) AppendAncient(number uint64, hash, header, body, receipts, td []byte) (err error) {
// ModifyAncients runs the given write operation.
func (f *freezer) ModifyAncients(fn func(ethdb.AncientWriteOp) error) (writeSize int64, err error) {
if f.readonly {
return errReadOnly
return 0, errReadOnly
}
// Ensure the binary blobs we are appending is continuous with freezer.
if atomic.LoadUint64(&f.frozen) != number {
return errOutOrderInsertion
}
// Rollback all inserted data if any insertion below failed to ensure
// the tables won't out of sync.
f.writeLock.Lock()
defer f.writeLock.Unlock()
// Roll back all tables to the starting position in case of error.
prevItem := f.frozen
defer func() {
if err != nil {
rerr := f.repair()
if rerr != nil {
log.Crit("Failed to repair freezer", "err", rerr)
// The write operation has failed. Go back to the previous item position.
for name, table := range f.tables {
err := table.truncate(prevItem)
if err != nil {
log.Error("Freezer table roll-back failed", "table", name, "index", prevItem, "err", err)
}
}
log.Info("Append ancient failed", "number", number, "err", err)
}
}()
// Inject all the components into the relevant data tables
if err := f.tables[freezerHashTable].Append(f.frozen, hash[:]); err != nil {
log.Error("Failed to append ancient hash", "number", f.frozen, "hash", hash, "err", err)
return err
f.writeBatch.reset()
if err := fn(f.writeBatch); err != nil {
return 0, err
}
if err := f.tables[freezerHeaderTable].Append(f.frozen, header); err != nil {
log.Error("Failed to append ancient header", "number", f.frozen, "hash", hash, "err", err)
return err
item, writeSize, err := f.writeBatch.commit()
if err != nil {
return 0, err
}
if err := f.tables[freezerBodiesTable].Append(f.frozen, body); err != nil {
log.Error("Failed to append ancient body", "number", f.frozen, "hash", hash, "err", err)
return err
}
if err := f.tables[freezerReceiptTable].Append(f.frozen, receipts); err != nil {
log.Error("Failed to append ancient receipts", "number", f.frozen, "hash", hash, "err", err)
return err
}
if err := f.tables[freezerDifficultyTable].Append(f.frozen, td); err != nil {
log.Error("Failed to append ancient difficulty", "number", f.frozen, "hash", hash, "err", err)
return err
}
atomic.AddUint64(&f.frozen, 1) // Only modify atomically
return nil
atomic.StoreUint64(&f.frozen, item)
return writeSize, nil
}
// TruncateAncients discards any recent data above the provided threshold number.
@ -260,6 +270,9 @@ func (f *freezer) TruncateAncients(items uint64) error {
if f.readonly {
return errReadOnly
}
f.writeLock.Lock()
defer f.writeLock.Unlock()
if atomic.LoadUint64(&f.frozen) <= items {
return nil
}
@ -286,6 +299,24 @@ func (f *freezer) Sync() error {
return nil
}
// repair truncates all data tables to the same length.
func (f *freezer) repair() error {
min := uint64(math.MaxUint64)
for _, table := range f.tables {
items := atomic.LoadUint64(&table.items)
if min > items {
min = items
}
}
for _, table := range f.tables {
if err := table.truncate(min); err != nil {
return err
}
}
atomic.StoreUint64(&f.frozen, min)
return nil
}
// freeze is a background thread that periodically checks the blockchain for any
// import progress and moves ancient data from the fast database into the freezer.
//
@ -352,54 +383,28 @@ func (f *freezer) freeze(db ethdb.KeyValueStore) {
backoff = true
continue
}
// Seems we have data ready to be frozen, process in usable batches
limit := *number - threshold
if limit-f.frozen > freezerBatchLimit {
limit = f.frozen + freezerBatchLimit
}
var (
start = time.Now()
first = f.frozen
ancients = make([]common.Hash, 0, limit-f.frozen)
first, _ = f.Ancients()
limit = *number - threshold
)
for f.frozen <= limit {
// Retrieves all the components of the canonical block
hash := ReadCanonicalHash(nfdb, f.frozen)
if hash == (common.Hash{}) {
log.Error("Canonical hash missing, can't freeze", "number", f.frozen)
break
}
header := ReadHeaderRLP(nfdb, hash, f.frozen)
if len(header) == 0 {
log.Error("Block header missing, can't freeze", "number", f.frozen, "hash", hash)
break
}
body := ReadBodyRLP(nfdb, hash, f.frozen)
if len(body) == 0 {
log.Error("Block body missing, can't freeze", "number", f.frozen, "hash", hash)
break
}
receipts := ReadReceiptsRLP(nfdb, hash, f.frozen)
if len(receipts) == 0 {
log.Error("Block receipts missing, can't freeze", "number", f.frozen, "hash", hash)
break
}
td := ReadTdRLP(nfdb, hash, f.frozen)
if len(td) == 0 {
log.Error("Total difficulty missing, can't freeze", "number", f.frozen, "hash", hash)
break
}
log.Trace("Deep froze ancient block", "number", f.frozen, "hash", hash)
// Inject all the components into the relevant data tables
if err := f.AppendAncient(f.frozen, hash[:], header, body, receipts, td); err != nil {
break
}
ancients = append(ancients, hash)
if limit-first > freezerBatchLimit {
limit = first + freezerBatchLimit
}
ancients, err := f.freezeRange(nfdb, first, limit)
if err != nil {
log.Error("Error in block freeze operation", "err", err)
backoff = true
continue
}
// Batch of blocks have been frozen, flush them before wiping from leveldb
if err := f.Sync(); err != nil {
log.Crit("Failed to flush frozen tables", "err", err)
}
// Wipe out all data from the active database
batch := db.NewBatch()
for i := 0; i < len(ancients); i++ {
@ -464,6 +469,7 @@ func (f *freezer) freeze(db ethdb.KeyValueStore) {
log.Crit("Failed to delete dangling side blocks", "err", err)
}
}
// Log something friendly for the user
context := []interface{}{
"blocks", f.frozen - first, "elapsed", common.PrettyDuration(time.Since(start)), "number", f.frozen - 1,
@ -480,20 +486,54 @@ func (f *freezer) freeze(db ethdb.KeyValueStore) {
}
}
// repair truncates all data tables to the same length.
func (f *freezer) repair() error {
min := uint64(math.MaxUint64)
for _, table := range f.tables {
items := atomic.LoadUint64(&table.items)
if min > items {
min = items
func (f *freezer) freezeRange(nfdb *nofreezedb, number, limit uint64) (hashes []common.Hash, err error) {
hashes = make([]common.Hash, 0, limit-number)
_, err = f.ModifyAncients(func(op ethdb.AncientWriteOp) error {
for ; number <= limit; number++ {
// Retrieve all the components of the canonical block.
hash := ReadCanonicalHash(nfdb, number)
if hash == (common.Hash{}) {
return fmt.Errorf("canonical hash missing, can't freeze block %d", number)
}
header := ReadHeaderRLP(nfdb, hash, number)
if len(header) == 0 {
return fmt.Errorf("block header missing, can't freeze block %d", number)
}
body := ReadBodyRLP(nfdb, hash, number)
if len(body) == 0 {
return fmt.Errorf("block body missing, can't freeze block %d", number)
}
receipts := ReadReceiptsRLP(nfdb, hash, number)
if len(receipts) == 0 {
return fmt.Errorf("block receipts missing, can't freeze block %d", number)
}
td := ReadTdRLP(nfdb, hash, number)
if len(td) == 0 {
return fmt.Errorf("total difficulty missing, can't freeze block %d", number)
}
// Write to the batch.
if err := op.AppendRaw(freezerHashTable, number, hash[:]); err != nil {
return fmt.Errorf("can't write hash to freezer: %v", err)
}
if err := op.AppendRaw(freezerHeaderTable, number, header); err != nil {
return fmt.Errorf("can't write header to freezer: %v", err)
}
if err := op.AppendRaw(freezerBodiesTable, number, body); err != nil {
return fmt.Errorf("can't write body to freezer: %v", err)
}
if err := op.AppendRaw(freezerReceiptTable, number, receipts); err != nil {
return fmt.Errorf("can't write receipts to freezer: %v", err)
}
if err := op.AppendRaw(freezerDifficultyTable, number, td); err != nil {
return fmt.Errorf("can't write td to freezer: %v", err)
}
hashes = append(hashes, hash)
}
}
for _, table := range f.tables {
if err := table.truncate(min); err != nil {
return err
}
}
atomic.StoreUint64(&f.frozen, min)
return nil
return nil
})
return hashes, err
}