les: historical data garbage collection (#19570)

This change introduces garbage collection for the light client. Historical
chain data is deleted periodically. If you want to disable the GC, use
the --light.nopruning flag.
This commit is contained in:
gary rong
2020-07-13 17:02:54 +08:00
committed by GitHub
parent b8dd0890b3
commit 6eef141aef
45 changed files with 841 additions and 213 deletions

View File

@ -17,6 +17,7 @@
package light
import (
"bytes"
"context"
"encoding/binary"
"errors"
@ -24,6 +25,7 @@ import (
"math/big"
"time"
"github.com/deckarep/golang-set"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/common/bitutil"
"github.com/ethereum/go-ethereum/core"
@ -128,23 +130,27 @@ func StoreChtRoot(db ethdb.Database, sectionIdx uint64, sectionHead, root common
// ChtIndexerBackend implements core.ChainIndexerBackend.
type ChtIndexerBackend struct {
disablePruning bool
diskdb, trieTable ethdb.Database
odr OdrBackend
triedb *trie.Database
trieset mapset.Set
section, sectionSize uint64
lastHash common.Hash
trie *trie.Trie
}
// NewChtIndexer creates a Cht chain indexer
func NewChtIndexer(db ethdb.Database, odr OdrBackend, size, confirms uint64) *core.ChainIndexer {
func NewChtIndexer(db ethdb.Database, odr OdrBackend, size, confirms uint64, disablePruning bool) *core.ChainIndexer {
trieTable := rawdb.NewTable(db, ChtTablePrefix)
backend := &ChtIndexerBackend{
diskdb: db,
odr: odr,
trieTable: trieTable,
triedb: trie.NewDatabaseWithCache(trieTable, 1), // Use a tiny cache only to keep memory down
sectionSize: size,
diskdb: db,
odr: odr,
trieTable: trieTable,
triedb: trie.NewDatabaseWithCache(trieTable, 1), // Use a tiny cache only to keep memory down
trieset: mapset.NewSet(),
sectionSize: size,
disablePruning: disablePruning,
}
return core.NewChainIndexer(db, rawdb.NewTable(db, "chtIndexV2-"), backend, size, confirms, time.Millisecond*100, "cht")
}
@ -189,7 +195,6 @@ func (c *ChtIndexerBackend) Reset(ctx context.Context, section uint64, lastSecti
c.trie, err = trie.New(root, c.triedb)
}
}
c.section = section
return err
}
@ -216,13 +221,83 @@ func (c *ChtIndexerBackend) Commit() error {
if err != nil {
return err
}
c.triedb.Commit(root, false)
// Pruning historical trie nodes if necessary.
if !c.disablePruning {
// Flush the triedb and track the latest trie nodes.
c.trieset.Clear()
c.triedb.Commit(root, false, func(hash common.Hash) { c.trieset.Add(hash) })
it := c.trieTable.NewIterator(nil, nil)
defer it.Release()
var (
deleted int
remaining int
t = time.Now()
)
for it.Next() {
trimmed := bytes.TrimPrefix(it.Key(), []byte(ChtTablePrefix))
if !c.trieset.Contains(common.BytesToHash(trimmed)) {
c.trieTable.Delete(trimmed)
deleted += 1
} else {
remaining += 1
}
}
log.Debug("Prune historical CHT trie nodes", "deleted", deleted, "remaining", remaining, "elapsed", common.PrettyDuration(time.Since(t)))
} else {
c.triedb.Commit(root, false, nil)
}
log.Info("Storing CHT", "section", c.section, "head", fmt.Sprintf("%064x", c.lastHash), "root", fmt.Sprintf("%064x", root))
StoreChtRoot(c.diskdb, c.section, c.lastHash, root)
return nil
}
// PruneSections implements core.ChainIndexerBackend which deletes all
// chain data(except hash<->number mappings) older than the specified
// threshold.
func (c *ChtIndexerBackend) Prune(threshold uint64) error {
// Short circuit if the light pruning is disabled.
if c.disablePruning {
return nil
}
t := time.Now()
// Always keep genesis header in database.
start, end := uint64(1), (threshold+1)*c.sectionSize
var batch = c.diskdb.NewBatch()
for {
numbers, hashes := rawdb.ReadAllCanonicalHashes(c.diskdb, start, end, 10240)
if len(numbers) == 0 {
break
}
for i := 0; i < len(numbers); i++ {
// Keep hash<->number mapping in database otherwise the hash based
// API(e.g. GetReceipt, GetLogs) will be broken.
//
// Storage size wise, the size of a mapping is ~41bytes. For one
// section is about 1.3MB which is acceptable.
//
// In order to totally get rid of this index, we need an additional
// flag to specify how many historical data light client can serve.
rawdb.DeleteCanonicalHash(batch, numbers[i])
rawdb.DeleteBlockWithoutNumber(batch, hashes[i], numbers[i])
}
if batch.ValueSize() > ethdb.IdealBatchSize {
if err := batch.Write(); err != nil {
return err
}
batch.Reset()
}
start = numbers[len(numbers)-1] + 1
}
if err := batch.Write(); err != nil {
return err
}
log.Debug("Prune history headers", "threshold", threshold, "elapsed", common.PrettyDuration(time.Since(t)))
return nil
}
var (
bloomTriePrefix = []byte("bltRoot-") // bloomTriePrefix + bloomTrieNum (uint64 big endian) -> trie root hash
BloomTrieTablePrefix = "blt-"
@ -245,8 +320,10 @@ func StoreBloomTrieRoot(db ethdb.Database, sectionIdx uint64, sectionHead, root
// BloomTrieIndexerBackend implements core.ChainIndexerBackend
type BloomTrieIndexerBackend struct {
disablePruning bool
diskdb, trieTable ethdb.Database
triedb *trie.Database
trieset mapset.Set
odr OdrBackend
section uint64
parentSize uint64
@ -257,15 +334,17 @@ type BloomTrieIndexerBackend struct {
}
// NewBloomTrieIndexer creates a BloomTrie chain indexer
func NewBloomTrieIndexer(db ethdb.Database, odr OdrBackend, parentSize, size uint64) *core.ChainIndexer {
func NewBloomTrieIndexer(db ethdb.Database, odr OdrBackend, parentSize, size uint64, disablePruning bool) *core.ChainIndexer {
trieTable := rawdb.NewTable(db, BloomTrieTablePrefix)
backend := &BloomTrieIndexerBackend{
diskdb: db,
odr: odr,
trieTable: trieTable,
triedb: trie.NewDatabaseWithCache(trieTable, 1), // Use a tiny cache only to keep memory down
parentSize: parentSize,
size: size,
diskdb: db,
odr: odr,
trieTable: trieTable,
triedb: trie.NewDatabaseWithCache(trieTable, 1), // Use a tiny cache only to keep memory down
trieset: mapset.NewSet(),
parentSize: parentSize,
size: size,
disablePruning: disablePruning,
}
backend.bloomTrieRatio = size / parentSize
backend.sectionHeads = make([]common.Hash, backend.bloomTrieRatio)
@ -303,7 +382,6 @@ func (b *BloomTrieIndexerBackend) fetchMissingNodes(ctx context.Context, section
}
}()
}
for i := uint(0); i < types.BloomBitLength; i++ {
indexCh <- i
}
@ -380,10 +458,51 @@ func (b *BloomTrieIndexerBackend) Commit() error {
if err != nil {
return err
}
b.triedb.Commit(root, false)
// Pruning historical trie nodes if necessary.
if !b.disablePruning {
// Flush the triedb and track the latest trie nodes.
b.trieset.Clear()
b.triedb.Commit(root, false, func(hash common.Hash) { b.trieset.Add(hash) })
it := b.trieTable.NewIterator(nil, nil)
defer it.Release()
var (
deleted int
remaining int
t = time.Now()
)
for it.Next() {
trimmed := bytes.TrimPrefix(it.Key(), []byte(BloomTrieTablePrefix))
if !b.trieset.Contains(common.BytesToHash(trimmed)) {
b.trieTable.Delete(trimmed)
deleted += 1
} else {
remaining += 1
}
}
log.Debug("Prune historical bloom trie nodes", "deleted", deleted, "remaining", remaining, "elapsed", common.PrettyDuration(time.Since(t)))
} else {
b.triedb.Commit(root, false, nil)
}
sectionHead := b.sectionHeads[b.bloomTrieRatio-1]
log.Info("Storing bloom trie", "section", b.section, "head", fmt.Sprintf("%064x", sectionHead), "root", fmt.Sprintf("%064x", root), "compression", float64(compSize)/float64(decompSize))
StoreBloomTrieRoot(b.diskdb, b.section, sectionHead, root)
log.Info("Storing bloom trie", "section", b.section, "head", fmt.Sprintf("%064x", sectionHead), "root", fmt.Sprintf("%064x", root), "compression", float64(compSize)/float64(decompSize))
return nil
}
// Prune implements core.ChainIndexerBackend which deletes all
// bloombits which older than the specified threshold.
func (b *BloomTrieIndexerBackend) Prune(threshold uint64) error {
// Short circuit if the light pruning is disabled.
if b.disablePruning {
return nil
}
start := time.Now()
for i := uint(0); i < types.BloomBitLength; i++ {
rawdb.DeleteBloombits(b.diskdb, i, 0, threshold*b.bloomTrieRatio+b.bloomTrieRatio)
}
log.Debug("Prune history bloombits", "threshold", threshold, "elapsed", common.PrettyDuration(time.Since(start)))
return nil
}