les: historical data garbage collection (#19570)
This change introduces garbage collection for the light client. Historical chain data is deleted periodically. If you want to disable the GC, use the --light.nopruning flag.
This commit is contained in:
@ -17,6 +17,7 @@
|
||||
package light
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
@ -24,6 +25,7 @@ import (
|
||||
"math/big"
|
||||
"time"
|
||||
|
||||
"github.com/deckarep/golang-set"
|
||||
"github.com/ethereum/go-ethereum/common"
|
||||
"github.com/ethereum/go-ethereum/common/bitutil"
|
||||
"github.com/ethereum/go-ethereum/core"
|
||||
@ -128,23 +130,27 @@ func StoreChtRoot(db ethdb.Database, sectionIdx uint64, sectionHead, root common
|
||||
|
||||
// ChtIndexerBackend implements core.ChainIndexerBackend.
|
||||
type ChtIndexerBackend struct {
|
||||
disablePruning bool
|
||||
diskdb, trieTable ethdb.Database
|
||||
odr OdrBackend
|
||||
triedb *trie.Database
|
||||
trieset mapset.Set
|
||||
section, sectionSize uint64
|
||||
lastHash common.Hash
|
||||
trie *trie.Trie
|
||||
}
|
||||
|
||||
// NewChtIndexer creates a Cht chain indexer
|
||||
func NewChtIndexer(db ethdb.Database, odr OdrBackend, size, confirms uint64) *core.ChainIndexer {
|
||||
func NewChtIndexer(db ethdb.Database, odr OdrBackend, size, confirms uint64, disablePruning bool) *core.ChainIndexer {
|
||||
trieTable := rawdb.NewTable(db, ChtTablePrefix)
|
||||
backend := &ChtIndexerBackend{
|
||||
diskdb: db,
|
||||
odr: odr,
|
||||
trieTable: trieTable,
|
||||
triedb: trie.NewDatabaseWithCache(trieTable, 1), // Use a tiny cache only to keep memory down
|
||||
sectionSize: size,
|
||||
diskdb: db,
|
||||
odr: odr,
|
||||
trieTable: trieTable,
|
||||
triedb: trie.NewDatabaseWithCache(trieTable, 1), // Use a tiny cache only to keep memory down
|
||||
trieset: mapset.NewSet(),
|
||||
sectionSize: size,
|
||||
disablePruning: disablePruning,
|
||||
}
|
||||
return core.NewChainIndexer(db, rawdb.NewTable(db, "chtIndexV2-"), backend, size, confirms, time.Millisecond*100, "cht")
|
||||
}
|
||||
@ -189,7 +195,6 @@ func (c *ChtIndexerBackend) Reset(ctx context.Context, section uint64, lastSecti
|
||||
c.trie, err = trie.New(root, c.triedb)
|
||||
}
|
||||
}
|
||||
|
||||
c.section = section
|
||||
return err
|
||||
}
|
||||
@ -216,13 +221,83 @@ func (c *ChtIndexerBackend) Commit() error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.triedb.Commit(root, false)
|
||||
// Pruning historical trie nodes if necessary.
|
||||
if !c.disablePruning {
|
||||
// Flush the triedb and track the latest trie nodes.
|
||||
c.trieset.Clear()
|
||||
c.triedb.Commit(root, false, func(hash common.Hash) { c.trieset.Add(hash) })
|
||||
|
||||
it := c.trieTable.NewIterator(nil, nil)
|
||||
defer it.Release()
|
||||
|
||||
var (
|
||||
deleted int
|
||||
remaining int
|
||||
t = time.Now()
|
||||
)
|
||||
for it.Next() {
|
||||
trimmed := bytes.TrimPrefix(it.Key(), []byte(ChtTablePrefix))
|
||||
if !c.trieset.Contains(common.BytesToHash(trimmed)) {
|
||||
c.trieTable.Delete(trimmed)
|
||||
deleted += 1
|
||||
} else {
|
||||
remaining += 1
|
||||
}
|
||||
}
|
||||
log.Debug("Prune historical CHT trie nodes", "deleted", deleted, "remaining", remaining, "elapsed", common.PrettyDuration(time.Since(t)))
|
||||
} else {
|
||||
c.triedb.Commit(root, false, nil)
|
||||
}
|
||||
log.Info("Storing CHT", "section", c.section, "head", fmt.Sprintf("%064x", c.lastHash), "root", fmt.Sprintf("%064x", root))
|
||||
StoreChtRoot(c.diskdb, c.section, c.lastHash, root)
|
||||
return nil
|
||||
}
|
||||
|
||||
// PruneSections implements core.ChainIndexerBackend which deletes all
|
||||
// chain data(except hash<->number mappings) older than the specified
|
||||
// threshold.
|
||||
func (c *ChtIndexerBackend) Prune(threshold uint64) error {
|
||||
// Short circuit if the light pruning is disabled.
|
||||
if c.disablePruning {
|
||||
return nil
|
||||
}
|
||||
t := time.Now()
|
||||
// Always keep genesis header in database.
|
||||
start, end := uint64(1), (threshold+1)*c.sectionSize
|
||||
|
||||
var batch = c.diskdb.NewBatch()
|
||||
for {
|
||||
numbers, hashes := rawdb.ReadAllCanonicalHashes(c.diskdb, start, end, 10240)
|
||||
if len(numbers) == 0 {
|
||||
break
|
||||
}
|
||||
for i := 0; i < len(numbers); i++ {
|
||||
// Keep hash<->number mapping in database otherwise the hash based
|
||||
// API(e.g. GetReceipt, GetLogs) will be broken.
|
||||
//
|
||||
// Storage size wise, the size of a mapping is ~41bytes. For one
|
||||
// section is about 1.3MB which is acceptable.
|
||||
//
|
||||
// In order to totally get rid of this index, we need an additional
|
||||
// flag to specify how many historical data light client can serve.
|
||||
rawdb.DeleteCanonicalHash(batch, numbers[i])
|
||||
rawdb.DeleteBlockWithoutNumber(batch, hashes[i], numbers[i])
|
||||
}
|
||||
if batch.ValueSize() > ethdb.IdealBatchSize {
|
||||
if err := batch.Write(); err != nil {
|
||||
return err
|
||||
}
|
||||
batch.Reset()
|
||||
}
|
||||
start = numbers[len(numbers)-1] + 1
|
||||
}
|
||||
if err := batch.Write(); err != nil {
|
||||
return err
|
||||
}
|
||||
log.Debug("Prune history headers", "threshold", threshold, "elapsed", common.PrettyDuration(time.Since(t)))
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
bloomTriePrefix = []byte("bltRoot-") // bloomTriePrefix + bloomTrieNum (uint64 big endian) -> trie root hash
|
||||
BloomTrieTablePrefix = "blt-"
|
||||
@ -245,8 +320,10 @@ func StoreBloomTrieRoot(db ethdb.Database, sectionIdx uint64, sectionHead, root
|
||||
|
||||
// BloomTrieIndexerBackend implements core.ChainIndexerBackend
|
||||
type BloomTrieIndexerBackend struct {
|
||||
disablePruning bool
|
||||
diskdb, trieTable ethdb.Database
|
||||
triedb *trie.Database
|
||||
trieset mapset.Set
|
||||
odr OdrBackend
|
||||
section uint64
|
||||
parentSize uint64
|
||||
@ -257,15 +334,17 @@ type BloomTrieIndexerBackend struct {
|
||||
}
|
||||
|
||||
// NewBloomTrieIndexer creates a BloomTrie chain indexer
|
||||
func NewBloomTrieIndexer(db ethdb.Database, odr OdrBackend, parentSize, size uint64) *core.ChainIndexer {
|
||||
func NewBloomTrieIndexer(db ethdb.Database, odr OdrBackend, parentSize, size uint64, disablePruning bool) *core.ChainIndexer {
|
||||
trieTable := rawdb.NewTable(db, BloomTrieTablePrefix)
|
||||
backend := &BloomTrieIndexerBackend{
|
||||
diskdb: db,
|
||||
odr: odr,
|
||||
trieTable: trieTable,
|
||||
triedb: trie.NewDatabaseWithCache(trieTable, 1), // Use a tiny cache only to keep memory down
|
||||
parentSize: parentSize,
|
||||
size: size,
|
||||
diskdb: db,
|
||||
odr: odr,
|
||||
trieTable: trieTable,
|
||||
triedb: trie.NewDatabaseWithCache(trieTable, 1), // Use a tiny cache only to keep memory down
|
||||
trieset: mapset.NewSet(),
|
||||
parentSize: parentSize,
|
||||
size: size,
|
||||
disablePruning: disablePruning,
|
||||
}
|
||||
backend.bloomTrieRatio = size / parentSize
|
||||
backend.sectionHeads = make([]common.Hash, backend.bloomTrieRatio)
|
||||
@ -303,7 +382,6 @@ func (b *BloomTrieIndexerBackend) fetchMissingNodes(ctx context.Context, section
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
for i := uint(0); i < types.BloomBitLength; i++ {
|
||||
indexCh <- i
|
||||
}
|
||||
@ -380,10 +458,51 @@ func (b *BloomTrieIndexerBackend) Commit() error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
b.triedb.Commit(root, false)
|
||||
// Pruning historical trie nodes if necessary.
|
||||
if !b.disablePruning {
|
||||
// Flush the triedb and track the latest trie nodes.
|
||||
b.trieset.Clear()
|
||||
b.triedb.Commit(root, false, func(hash common.Hash) { b.trieset.Add(hash) })
|
||||
|
||||
it := b.trieTable.NewIterator(nil, nil)
|
||||
defer it.Release()
|
||||
|
||||
var (
|
||||
deleted int
|
||||
remaining int
|
||||
t = time.Now()
|
||||
)
|
||||
for it.Next() {
|
||||
trimmed := bytes.TrimPrefix(it.Key(), []byte(BloomTrieTablePrefix))
|
||||
if !b.trieset.Contains(common.BytesToHash(trimmed)) {
|
||||
b.trieTable.Delete(trimmed)
|
||||
deleted += 1
|
||||
} else {
|
||||
remaining += 1
|
||||
}
|
||||
}
|
||||
log.Debug("Prune historical bloom trie nodes", "deleted", deleted, "remaining", remaining, "elapsed", common.PrettyDuration(time.Since(t)))
|
||||
} else {
|
||||
b.triedb.Commit(root, false, nil)
|
||||
}
|
||||
sectionHead := b.sectionHeads[b.bloomTrieRatio-1]
|
||||
log.Info("Storing bloom trie", "section", b.section, "head", fmt.Sprintf("%064x", sectionHead), "root", fmt.Sprintf("%064x", root), "compression", float64(compSize)/float64(decompSize))
|
||||
StoreBloomTrieRoot(b.diskdb, b.section, sectionHead, root)
|
||||
log.Info("Storing bloom trie", "section", b.section, "head", fmt.Sprintf("%064x", sectionHead), "root", fmt.Sprintf("%064x", root), "compression", float64(compSize)/float64(decompSize))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Prune implements core.ChainIndexerBackend which deletes all
|
||||
// bloombits which older than the specified threshold.
|
||||
func (b *BloomTrieIndexerBackend) Prune(threshold uint64) error {
|
||||
// Short circuit if the light pruning is disabled.
|
||||
if b.disablePruning {
|
||||
return nil
|
||||
}
|
||||
start := time.Now()
|
||||
for i := uint(0); i < types.BloomBitLength; i++ {
|
||||
rawdb.DeleteBloombits(b.diskdb, i, 0, threshold*b.bloomTrieRatio+b.bloomTrieRatio)
|
||||
}
|
||||
log.Debug("Prune history bloombits", "threshold", threshold, "elapsed", common.PrettyDuration(time.Since(start)))
|
||||
return nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user