core/rawdb: freezer batch write (#23462)

This change is a rewrite of the freezer code. When writing ancient chain data to the freezer, the previous version first encoded each individual item to a temporary buffer, then wrote the buffer. For small item sizes (for example, in the block hash freezer table), this strategy causes a lot of system calls for writing tiny chunks of data. It also allocated a lot of temporary []byte buffers. In the new version, we instead encode multiple items into a re-useable batch buffer, which is then written to the file all at once. This avoids performing a system call for every inserted item. To make the internal batching work, the ancient database API had to be changed. While integrating this new API in BlockChain.InsertReceiptChain, additional optimizations were also added there. Co-authored-by: Felix Lange <fjl@twurst.com>
2021-09-07 12:31:17 +02:00
parent 9a0df80bbc
commit 794c6133ef
13 changed files with 1349 additions and 590 deletions
--- a/core/rawdb/freezer_table.go
+++ b/core/rawdb/freezer_table.go
@ -17,6 +17,7 @@
 package rawdb

 import (
+	"bytes"
 	"encoding/binary"
 	"errors"
 	"fmt"
@ -55,19 +56,20 @@ type indexEntry struct {

 const indexEntrySize = 6

-// unmarshallBinary deserializes binary b into the rawIndex entry.
+// unmarshalBinary deserializes binary b into the rawIndex entry.
 func (i *indexEntry) unmarshalBinary(b []byte) error {
 	i.filenum = uint32(binary.BigEndian.Uint16(b[:2]))
 	i.offset = binary.BigEndian.Uint32(b[2:6])
 	return nil
 }

-// marshallBinary serializes the rawIndex entry into binary.
-func (i *indexEntry) marshallBinary() []byte {
-	b := make([]byte, indexEntrySize)
-	binary.BigEndian.PutUint16(b[:2], uint16(i.filenum))
-	binary.BigEndian.PutUint32(b[2:6], i.offset)
-	return b
+// append adds the encoded entry to the end of b.
+func (i *indexEntry) append(b []byte) []byte {
+	offset := len(b)
+	out := append(b, make([]byte, indexEntrySize)...)
+	binary.BigEndian.PutUint16(out[offset:], uint16(i.filenum))
+	binary.BigEndian.PutUint32(out[offset+2:], i.offset)
+	return out
 }

 // bounds returns the start- and end- offsets, and the file number of where to
@ -107,7 +109,7 @@ type freezerTable struct {
 	// to count how many historic items have gone missing.
 	itemOffset uint32 // Offset (number of discarded items)

-	headBytes  uint32        // Number of bytes written to the head file
+	headBytes  int64         // Number of bytes written to the head file
 	readMeter  metrics.Meter // Meter for measuring the effective amount of data read
 	writeMeter metrics.Meter // Meter for measuring the effective amount of data written
 	sizeGauge  metrics.Gauge // Gauge for tracking the combined size of all freezer tables
@ -118,12 +120,7 @@ type freezerTable struct {

 // NewFreezerTable opens the given path as a freezer table.
 func NewFreezerTable(path, name string, disableSnappy bool) (*freezerTable, error) {
-	return newTable(path, name, metrics.NilMeter{}, metrics.NilMeter{}, metrics.NilGauge{}, disableSnappy)
-}
-
-// newTable opens a freezer table with default settings - 2G files
-func newTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, disableSnappy bool) (*freezerTable, error) {
-	return newCustomTable(path, name, readMeter, writeMeter, sizeGauge, 2*1000*1000*1000, disableSnappy)
+	return newTable(path, name, metrics.NilMeter{}, metrics.NilMeter{}, metrics.NilGauge{}, freezerTableSize, disableSnappy)
 }

 // openFreezerFileForAppend opens a freezer table file and seeks to the end
@ -164,10 +161,10 @@ func truncateFreezerFile(file *os.File, size int64) error {
 	return nil
 }

-// newCustomTable opens a freezer table, creating the data and index files if they are
+// newTable opens a freezer table, creating the data and index files if they are
 // non existent. Both files are truncated to the shortest common length to ensure
 // they don't go out of sync.
-func newCustomTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, maxFilesize uint32, noCompression bool) (*freezerTable, error) {
+func newTable(path string, name string, readMeter metrics.Meter, writeMeter metrics.Meter, sizeGauge metrics.Gauge, maxFilesize uint32, noCompression bool) (*freezerTable, error) {
 	// Ensure the containing directory exists and open the indexEntry file
 	if err := os.MkdirAll(path, 0755); err != nil {
 		return nil, err
@ -313,7 +310,7 @@ func (t *freezerTable) repair() error {
 	}
 	// Update the item and byte counters and return
 	t.items = uint64(t.itemOffset) + uint64(offsetsSize/indexEntrySize-1) // last indexEntry points to the end of the data file
-	t.headBytes = uint32(contentSize)
+	t.headBytes = contentSize
 	t.headId = lastIndex.filenum

 	// Close opened files and preopen all files
@ -387,14 +384,14 @@ func (t *freezerTable) truncate(items uint64) error {
 		t.releaseFilesAfter(expected.filenum, true)
 		// Set back the historic head
 		t.head = newHead
-		atomic.StoreUint32(&t.headId, expected.filenum)
+		t.headId = expected.filenum
 	}
 	if err := truncateFreezerFile(t.head, int64(expected.offset)); err != nil {
 		return err
 	}
 	// All data files truncated, set internal counters and return
+	t.headBytes = int64(expected.offset)
 	atomic.StoreUint64(&t.items, items)
-	atomic.StoreUint32(&t.headBytes, expected.offset)

 	// Retrieve the new size and update the total size counter
 	newSize, err := t.sizeNolock()
@ -471,94 +468,6 @@ func (t *freezerTable) releaseFilesAfter(num uint32, remove bool) {
 	}
 }

-// Append injects a binary blob at the end of the freezer table. The item number
-// is a precautionary parameter to ensure data correctness, but the table will
-// reject already existing data.
-//
-// Note, this method will *not* flush any data to disk so be sure to explicitly
-// fsync before irreversibly deleting data from the database.
-func (t *freezerTable) Append(item uint64, blob []byte) error {
-	// Encode the blob before the lock portion
-	if !t.noCompression {
-		blob = snappy.Encode(nil, blob)
-	}
-	// Read lock prevents competition with truncate
-	retry, err := t.append(item, blob, false)
-	if err != nil {
-		return err
-	}
-	if retry {
-		// Read lock was insufficient, retry with a writelock
-		_, err = t.append(item, blob, true)
-	}
-	return err
-}
-
-// append injects a binary blob at the end of the freezer table.
-// Normally, inserts do not require holding the write-lock, so it should be invoked with 'wlock' set to
-// false.
-// However, if the data will grown the current file out of bounds, then this
-// method will return 'true, nil', indicating that the caller should retry, this time
-// with 'wlock' set to true.
-func (t *freezerTable) append(item uint64, encodedBlob []byte, wlock bool) (bool, error) {
-	if wlock {
-		t.lock.Lock()
-		defer t.lock.Unlock()
-	} else {
-		t.lock.RLock()
-		defer t.lock.RUnlock()
-	}
-	// Ensure the table is still accessible
-	if t.index == nil || t.head == nil {
-		return false, errClosed
-	}
-	// Ensure only the next item can be written, nothing else
-	if atomic.LoadUint64(&t.items) != item {
-		return false, fmt.Errorf("appending unexpected item: want %d, have %d", t.items, item)
-	}
-	bLen := uint32(len(encodedBlob))
-	if t.headBytes+bLen < bLen ||
-		t.headBytes+bLen > t.maxFileSize {
-		// Writing would overflow, so we need to open a new data file.
-		// If we don't already hold the writelock, abort and let the caller
-		// invoke this method a second time.
-		if !wlock {
-			return true, nil
-		}
-		nextID := atomic.LoadUint32(&t.headId) + 1
-		// We open the next file in truncated mode -- if this file already
-		// exists, we need to start over from scratch on it
-		newHead, err := t.openFile(nextID, openFreezerFileTruncated)
-		if err != nil {
-			return false, err
-		}
-		// Close old file, and reopen in RDONLY mode
-		t.releaseFile(t.headId)
-		t.openFile(t.headId, openFreezerFileForReadOnly)
-
-		// Swap out the current head
-		t.head = newHead
-		atomic.StoreUint32(&t.headBytes, 0)
-		atomic.StoreUint32(&t.headId, nextID)
-	}
-	if _, err := t.head.Write(encodedBlob); err != nil {
-		return false, err
-	}
-	newOffset := atomic.AddUint32(&t.headBytes, bLen)
-	idx := indexEntry{
-		filenum: atomic.LoadUint32(&t.headId),
-		offset:  newOffset,
-	}
-	// Write indexEntry
-	t.index.Write(idx.marshallBinary())
-
-	t.writeMeter.Mark(int64(bLen + indexEntrySize))
-	t.sizeGauge.Inc(int64(bLen + indexEntrySize))
-
-	atomic.AddUint64(&t.items, 1)
-	return false, nil
-}
-
 // getIndices returns the index entries for the given from-item, covering 'count' items.
 // N.B: The actual number of returned indices for N items will always be N+1 (unless an
 // error is returned).
@ -651,6 +560,7 @@ func (t *freezerTable) RetrieveItems(start, count, maxBytes uint64) ([][]byte, e
 func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []int, error) {
 	t.lock.RLock()
 	defer t.lock.RUnlock()
+
 	// Ensure the table and the item is accessible
 	if t.index == nil || t.head == nil {
 		return nil, nil, errClosed
@ -763,6 +673,32 @@ func (t *freezerTable) sizeNolock() (uint64, error) {
 	return total, nil
 }

+// advanceHead should be called when the current head file would outgrow the file limits,
+// and a new file must be opened. The caller of this method must hold the write-lock
+// before calling this method.
+func (t *freezerTable) advanceHead() error {
+	t.lock.Lock()
+	defer t.lock.Unlock()
+
+	// We open the next file in truncated mode -- if this file already
+	// exists, we need to start over from scratch on it.
+	nextID := t.headId + 1
+	newHead, err := t.openFile(nextID, openFreezerFileTruncated)
+	if err != nil {
+		return err
+	}
+
+	// Close old file, and reopen in RDONLY mode.
+	t.releaseFile(t.headId)
+	t.openFile(t.headId, openFreezerFileForReadOnly)
+
+	// Swap out the current head.
+	t.head = newHead
+	t.headBytes = 0
+	t.headId = nextID
+	return nil
+}
+
 // Sync pushes any pending data from memory out to disk. This is an expensive
 // operation, so use it with care.
 func (t *freezerTable) Sync() error {
@ -775,10 +711,21 @@ func (t *freezerTable) Sync() error {
 // DumpIndex is a debug print utility function, mainly for testing. It can also
 // be used to analyse a live freezer table index.
 func (t *freezerTable) DumpIndex(start, stop int64) {
+	t.dumpIndex(os.Stdout, start, stop)
+}
+
+func (t *freezerTable) dumpIndexString(start, stop int64) string {
+	var out bytes.Buffer
+	out.WriteString("\n")
+	t.dumpIndex(&out, start, stop)
+	return out.String()
+}
+
+func (t *freezerTable) dumpIndex(w io.Writer, start, stop int64) {
 	buf := make([]byte, indexEntrySize)

-	fmt.Printf("| number | fileno | offset |\n")
-	fmt.Printf("|--------|--------|--------|\n")
+	fmt.Fprintf(w, "| number | fileno | offset |\n")
+	fmt.Fprintf(w, "|--------|--------|--------|\n")

 	for i := uint64(start); ; i++ {
 		if _, err := t.index.ReadAt(buf, int64(i*indexEntrySize)); err != nil {
@ -786,10 +733,10 @@ func (t *freezerTable) DumpIndex(start, stop int64) {
 		}
 		var entry indexEntry
 		entry.unmarshalBinary(buf)
-		fmt.Printf("|  %03d   |  %03d   |  %03d   | \n", i, entry.filenum, entry.offset)
+		fmt.Fprintf(w, "|  %03d   |  %03d   |  %03d   | \n", i, entry.filenum, entry.offset)
 		if stop > 0 && i >= uint64(stop) {
 			break
 		}
 	}
-	fmt.Printf("|--------------------------|\n")
+	fmt.Fprintf(w, "|--------------------------|\n")
 }