les: historical data garbage collection (#19570)

This change introduces garbage collection for the light client. Historical
chain data is deleted periodically. If you want to disable the GC, use
the --light.nopruning flag.
This commit is contained in:
gary rong
2020-07-13 17:02:54 +08:00
committed by GitHub
parent b8dd0890b3
commit 6eef141aef
45 changed files with 841 additions and 213 deletions

View File

@ -162,8 +162,11 @@ func (b *LesApiBackend) GetLogs(ctx context.Context, hash common.Hash) ([][]*typ
return nil, nil
}
func (b *LesApiBackend) GetTd(hash common.Hash) *big.Int {
return b.eth.blockchain.GetTdByHash(hash)
func (b *LesApiBackend) GetTd(ctx context.Context, hash common.Hash) *big.Int {
if number := rawdb.ReadHeaderNumber(b.eth.chainDb, hash); number != nil {
return b.eth.blockchain.GetTdOdr(ctx, hash, *number)
}
return nil
}
func (b *LesApiBackend) GetEVM(ctx context.Context, msg core.Message, state *state.StateDB, header *types.Header) (*vm.EVM, func() error, error) {

View File

@ -62,6 +62,7 @@ type LightEthereum struct {
serverPool *serverPool
valueTracker *lpc.ValueTracker
dialCandidates enode.Iterator
pruner *pruner
bloomRequests chan chan *bloombits.Retrieval // Channel receiving bloom data retrieval requests
bloomIndexer *core.ChainIndexer // Bloom indexer operating during block imports
@ -121,8 +122,8 @@ func New(ctx *node.ServiceContext, config *eth.Config) (*LightEthereum, error) {
leth.relay = newLesTxRelay(peers, leth.retriever)
leth.odr = NewLesOdr(chainDb, light.DefaultClientIndexerConfig, leth.retriever)
leth.chtIndexer = light.NewChtIndexer(chainDb, leth.odr, params.CHTFrequency, params.HelperTrieConfirmations)
leth.bloomTrieIndexer = light.NewBloomTrieIndexer(chainDb, leth.odr, params.BloomBitsBlocksClient, params.BloomTrieFrequency)
leth.chtIndexer = light.NewChtIndexer(chainDb, leth.odr, params.CHTFrequency, params.HelperTrieConfirmations, config.LightNoPrune)
leth.bloomTrieIndexer = light.NewBloomTrieIndexer(chainDb, leth.odr, params.BloomBitsBlocksClient, params.BloomTrieFrequency, config.LightNoPrune)
leth.odr.SetIndexers(leth.chtIndexer, leth.bloomTrieIndexer, leth.bloomIndexer)
checkpoint := config.Checkpoint
@ -149,6 +150,9 @@ func New(ctx *node.ServiceContext, config *eth.Config) (*LightEthereum, error) {
leth.chtIndexer.Start(leth.blockchain)
leth.bloomIndexer.Start(leth.blockchain)
// Start a light chain pruner to delete useless historical data.
leth.pruner = newPruner(chainDb, leth.chtIndexer, leth.bloomTrieIndexer)
// Rewind the chain in case of an incompatible config upgrade.
if compat, ok := genesisErr.(*params.ConfigCompatError); ok {
log.Warn("Rewinding chain to upgrade configuration", "err", compat)
@ -302,6 +306,7 @@ func (s *LightEthereum) Stop() error {
s.handler.stop()
s.txPool.Stop()
s.engine.Close()
s.pruner.close()
s.eventMux.Stop()
s.chainDb.Close()
s.wg.Wait()

View File

@ -110,14 +110,16 @@ func (r *BlockRequest) Validate(db ethdb.Database, msg *Msg) error {
body := bodies[0]
// Retrieve our stored header and validate block content against it
header := rawdb.ReadHeader(db, r.Hash, r.Number)
if header == nil {
if r.Header == nil {
r.Header = rawdb.ReadHeader(db, r.Hash, r.Number)
}
if r.Header == nil {
return errHeaderUnavailable
}
if header.TxHash != types.DeriveSha(types.Transactions(body.Transactions)) {
if r.Header.TxHash != types.DeriveSha(types.Transactions(body.Transactions)) {
return errTxHashMismatch
}
if header.UncleHash != types.CalcUncleHash(body.Uncles) {
if r.Header.UncleHash != types.CalcUncleHash(body.Uncles) {
return errUncleHashMismatch
}
// Validations passed, encode and store RLP

View File

@ -183,7 +183,7 @@ func odrTxStatus(ctx context.Context, db ethdb.Database, config *params.ChainCon
// testOdr tests odr requests whose validation guaranteed by block headers.
func testOdr(t *testing.T, protocol int, expFail uint64, checkCached bool, fn odrTestFn) {
// Assemble the test environment
server, client, tearDown := newClientServerEnv(t, 4, protocol, nil, nil, 0, false, true)
server, client, tearDown := newClientServerEnv(t, 4, protocol, nil, nil, 0, false, true, true)
defer tearDown()
// Ensure the client has synced all necessary data.

98
les/pruner.go Normal file
View File

@ -0,0 +1,98 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package les
import (
"sync"
"time"
"github.com/ethereum/go-ethereum/common/math"
"github.com/ethereum/go-ethereum/core"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
)
// pruner is responsible for pruning historical light chain data.
type pruner struct {
db ethdb.Database
indexers []*core.ChainIndexer
closeCh chan struct{}
wg sync.WaitGroup
}
// newPruner returns a light chain pruner instance.
func newPruner(db ethdb.Database, indexers ...*core.ChainIndexer) *pruner {
pruner := &pruner{
db: db,
indexers: indexers,
closeCh: make(chan struct{}),
}
pruner.wg.Add(1)
go pruner.loop()
return pruner
}
// close notifies all background goroutines belonging to pruner to exit.
func (p *pruner) close() {
close(p.closeCh)
p.wg.Wait()
}
// loop periodically queries the status of chain indexers and prunes useless
// historical chain data. Notably, whenever Geth restarts, it will iterate
// all historical sections even they don't exist at all(below checkpoint) so
// that light client can prune cached chain data that was ODRed after pruning
// that section.
func (p *pruner) loop() {
defer p.wg.Done()
// cleanTicker is the ticker used to trigger a history clean 2 times a day.
var cleanTicker = time.NewTicker(12 * time.Hour)
// pruning finds the sections that have been processed by all indexers
// and deletes all historical chain data.
// Note, if some indexers don't support pruning(e.g. eth.BloomIndexer),
// pruning operations can be silently ignored.
pruning := func() {
min := uint64(math.MaxUint64)
for _, indexer := range p.indexers {
sections, _, _ := indexer.Sections()
if sections < min {
min = sections
}
}
// Always keep the latest section data in database.
if min < 2 || len(p.indexers) == 0 {
return
}
for _, indexer := range p.indexers {
if err := indexer.Prune(min - 2); err != nil {
log.Debug("Failed to prune historical data", "err", err)
return
}
}
p.db.Compact(nil, nil) // Compact entire database, ensure all removed data are deleted.
}
for {
pruning()
select {
case <-cleanTicker.C:
case <-p.closeCh:
return
}
}
}

197
les/pruner_test.go Normal file
View File

@ -0,0 +1,197 @@
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package les
import (
"bytes"
"context"
"encoding/binary"
"testing"
"time"
"github.com/ethereum/go-ethereum/core"
"github.com/ethereum/go-ethereum/light"
)
func TestLightPruner(t *testing.T) {
config := light.TestClientIndexerConfig
waitIndexers := func(cIndexer, bIndexer, btIndexer *core.ChainIndexer) {
for {
cs, _, _ := cIndexer.Sections()
bts, _, _ := btIndexer.Sections()
if cs >= 3 && bts >= 3 {
break
}
time.Sleep(10 * time.Millisecond)
}
}
server, client, tearDown := newClientServerEnv(t, int(3*config.ChtSize+config.ChtConfirms), 2, waitIndexers, nil, 0, false, true, false)
defer tearDown()
// checkDB iterates the chain with given prefix, resolves the block number
// with given callback and ensures this entry should exist or not.
checkDB := func(from, to uint64, prefix []byte, resolve func(key, value []byte) *uint64, exist bool) bool {
it := client.db.NewIterator(prefix, nil)
defer it.Release()
var next = from
for it.Next() {
number := resolve(it.Key(), it.Value())
if number == nil || *number < from {
continue
} else if *number > to {
return true
}
if exist {
if *number != next {
return false
}
next++
} else {
return false
}
}
return true
}
// checkPruned checks and ensures the stale chain data has been pruned.
checkPruned := func(from, to uint64) {
// Iterate canonical hash
if !checkDB(from, to, []byte("h"), func(key, value []byte) *uint64 {
if len(key) == 1+8+1 && bytes.Equal(key[9:10], []byte("n")) {
n := binary.BigEndian.Uint64(key[1:9])
return &n
}
return nil
}, false) {
t.Fatalf("canonical hash mappings are not properly pruned")
}
// Iterate header
if !checkDB(from, to, []byte("h"), func(key, value []byte) *uint64 {
if len(key) == 1+8+32 {
n := binary.BigEndian.Uint64(key[1:9])
return &n
}
return nil
}, false) {
t.Fatalf("headers are not properly pruned")
}
// Iterate body
if !checkDB(from, to, []byte("b"), func(key, value []byte) *uint64 {
if len(key) == 1+8+32 {
n := binary.BigEndian.Uint64(key[1:9])
return &n
}
return nil
}, false) {
t.Fatalf("block bodies are not properly pruned")
}
// Iterate receipts
if !checkDB(from, to, []byte("r"), func(key, value []byte) *uint64 {
if len(key) == 1+8+32 {
n := binary.BigEndian.Uint64(key[1:9])
return &n
}
return nil
}, false) {
t.Fatalf("receipts are not properly pruned")
}
// Iterate td
if !checkDB(from, to, []byte("h"), func(key, value []byte) *uint64 {
if len(key) == 1+8+32+1 && bytes.Equal(key[41:42], []byte("t")) {
n := binary.BigEndian.Uint64(key[1:9])
return &n
}
return nil
}, false) {
t.Fatalf("tds are not properly pruned")
}
}
// Start light pruner.
time.Sleep(1500 * time.Millisecond) // Ensure light client has finished the syncing and indexing
newPruner(client.db, client.chtIndexer, client.bloomTrieIndexer)
time.Sleep(1500 * time.Millisecond) // Ensure pruner have enough time to prune data.
checkPruned(1, config.ChtSize-1)
// Ensure all APIs still work after pruning.
var cases = []struct {
from, to uint64
methodName string
method func(uint64) bool
}{
{
1, 10, "GetHeaderByNumber",
func(n uint64) bool {
_, err := light.GetHeaderByNumber(context.Background(), client.handler.backend.odr, n)
return err == nil
},
},
{
11, 20, "GetCanonicalHash",
func(n uint64) bool {
_, err := light.GetCanonicalHash(context.Background(), client.handler.backend.odr, n)
return err == nil
},
},
{
21, 30, "GetTd",
func(n uint64) bool {
_, err := light.GetTd(context.Background(), client.handler.backend.odr, server.handler.blockchain.GetHeaderByNumber(n).Hash(), n)
return err == nil
},
},
{
31, 40, "GetBodyRLP",
func(n uint64) bool {
_, err := light.GetBodyRLP(context.Background(), client.handler.backend.odr, server.handler.blockchain.GetHeaderByNumber(n).Hash(), n)
return err == nil
},
},
{
41, 50, "GetBlock",
func(n uint64) bool {
_, err := light.GetBlock(context.Background(), client.handler.backend.odr, server.handler.blockchain.GetHeaderByNumber(n).Hash(), n)
return err == nil
},
},
{
51, 60, "GetBlockReceipts",
func(n uint64) bool {
_, err := light.GetBlockReceipts(context.Background(), client.handler.backend.odr, server.handler.blockchain.GetHeaderByNumber(n).Hash(), n)
return err == nil
},
},
}
for _, c := range cases {
for i := c.from; i <= c.to; i++ {
if !c.method(i) {
t.Fatalf("rpc method %s failed, number %d", c.methodName, i)
}
}
}
// Check GetBloombits
_, err := light.GetBloomBits(context.Background(), client.handler.backend.odr, 0, []uint64{0})
if err != nil {
t.Fatalf("Failed to retrieve bloombits of pruned section: %v", err)
}
// Ensure the ODR cached data can be cleaned by pruner.
newPruner(client.db, client.chtIndexer, client.bloomTrieIndexer)
time.Sleep(50 * time.Millisecond) // Ensure pruner have enough time to prune data.
checkPruned(1, config.ChtSize-1) // Ensure all cached data(by odr) is cleaned.
}

View File

@ -79,7 +79,7 @@ func tfCodeAccess(db ethdb.Database, bhash common.Hash, num uint64) light.OdrReq
func testAccess(t *testing.T, protocol int, fn accessTestFn) {
// Assemble the test environment
server, client, tearDown := newClientServerEnv(t, 4, protocol, nil, nil, 0, false, true)
server, client, tearDown := newClientServerEnv(t, 4, protocol, nil, nil, 0, false, true, true)
defer tearDown()
// Ensure the client has synced all necessary data.

View File

@ -77,8 +77,8 @@ func NewLesServer(e *eth.Ethereum, config *eth.Config) (*LesServer, error) {
iConfig: light.DefaultServerIndexerConfig,
chainDb: e.ChainDb(),
chainReader: e.BlockChain(),
chtIndexer: light.NewChtIndexer(e.ChainDb(), nil, params.CHTFrequency, params.HelperTrieProcessConfirmations),
bloomTrieIndexer: light.NewBloomTrieIndexer(e.ChainDb(), nil, params.BloomBitsBlocks, params.BloomTrieFrequency),
chtIndexer: light.NewChtIndexer(e.ChainDb(), nil, params.CHTFrequency, params.HelperTrieProcessConfirmations, true),
bloomTrieIndexer: light.NewBloomTrieIndexer(e.ChainDb(), nil, params.BloomBitsBlocks, params.BloomTrieFrequency, true),
closeCh: make(chan struct{}),
},
archiveMode: e.ArchiveMode(),

View File

@ -54,7 +54,7 @@ func testCheckpointSyncing(t *testing.T, protocol int, syncMode int) {
}
}
// Generate 512+4 blocks (totally 1 CHT sections)
server, client, tearDown := newClientServerEnv(t, int(config.ChtSize+config.ChtConfirms), protocol, waitIndexers, nil, 0, false, false)
server, client, tearDown := newClientServerEnv(t, int(config.ChtSize+config.ChtConfirms), protocol, waitIndexers, nil, 0, false, false, true)
defer tearDown()
expected := config.ChtSize + config.ChtConfirms
@ -144,7 +144,7 @@ func testMissOracleBackend(t *testing.T, hasCheckpoint bool) {
}
}
// Generate 512+4 blocks (totally 1 CHT sections)
server, client, tearDown := newClientServerEnv(t, int(config.ChtSize+config.ChtConfirms), 3, waitIndexers, nil, 0, false, false)
server, client, tearDown := newClientServerEnv(t, int(config.ChtSize+config.ChtConfirms), 3, waitIndexers, nil, 0, false, false, true)
defer tearDown()
expected := config.ChtSize + config.ChtConfirms

View File

@ -158,11 +158,11 @@ func prepare(n int, backend *backends.SimulatedBackend) {
}
// testIndexers creates a set of indexers with specified params for testing purpose.
func testIndexers(db ethdb.Database, odr light.OdrBackend, config *light.IndexerConfig) []*core.ChainIndexer {
func testIndexers(db ethdb.Database, odr light.OdrBackend, config *light.IndexerConfig, disablePruning bool) []*core.ChainIndexer {
var indexers [3]*core.ChainIndexer
indexers[0] = light.NewChtIndexer(db, odr, config.ChtSize, config.ChtConfirms)
indexers[0] = light.NewChtIndexer(db, odr, config.ChtSize, config.ChtConfirms, disablePruning)
indexers[1] = eth.NewBloomIndexer(db, config.BloomSize, config.BloomConfirms)
indexers[2] = light.NewBloomTrieIndexer(db, odr, config.BloomSize, config.BloomTrieSize)
indexers[2] = light.NewBloomTrieIndexer(db, odr, config.BloomSize, config.BloomTrieSize, disablePruning)
// make bloomTrieIndexer as a child indexer of bloom indexer.
indexers[1].AddChildIndexer(indexers[2])
return indexers[:]
@ -456,7 +456,7 @@ type testServer struct {
func newServerEnv(t *testing.T, blocks int, protocol int, callback indexerCallback, simClock bool, newPeer bool, testCost uint64) (*testServer, func()) {
db := rawdb.NewMemoryDatabase()
indexers := testIndexers(db, nil, light.TestServerIndexerConfig)
indexers := testIndexers(db, nil, light.TestServerIndexerConfig, true)
var clock mclock.Clock = &mclock.System{}
if simClock {
@ -499,7 +499,7 @@ func newServerEnv(t *testing.T, blocks int, protocol int, callback indexerCallba
return server, teardown
}
func newClientServerEnv(t *testing.T, blocks int, protocol int, callback indexerCallback, ulcServers []string, ulcFraction int, simClock bool, connect bool) (*testServer, *testClient, func()) {
func newClientServerEnv(t *testing.T, blocks int, protocol int, callback indexerCallback, ulcServers []string, ulcFraction int, simClock bool, connect bool, disablePruning bool) (*testServer, *testClient, func()) {
sdb, cdb := rawdb.NewMemoryDatabase(), rawdb.NewMemoryDatabase()
speers, cpeers := newServerPeerSet(), newClientPeerSet()
@ -511,8 +511,8 @@ func newClientServerEnv(t *testing.T, blocks int, protocol int, callback indexer
rm := newRetrieveManager(speers, dist, func() time.Duration { return time.Millisecond * 500 })
odr := NewLesOdr(cdb, light.TestClientIndexerConfig, rm)
sindexers := testIndexers(sdb, nil, light.TestServerIndexerConfig)
cIndexers := testIndexers(cdb, odr, light.TestClientIndexerConfig)
sindexers := testIndexers(sdb, nil, light.TestServerIndexerConfig, true)
cIndexers := testIndexers(cdb, odr, light.TestClientIndexerConfig, disablePruning)
scIndexer, sbIndexer, sbtIndexer := sindexers[0], sindexers[1], sindexers[2]
ccIndexer, cbIndexer, cbtIndexer := cIndexers[0], cIndexers[1], cIndexers[2]
@ -542,7 +542,7 @@ func newClientServerEnv(t *testing.T, blocks int, protocol int, callback indexer
}
select {
case <-done:
case <-time.After(3 * time.Second):
case <-time.After(10 * time.Second):
t.Fatal("test peer did not connect and sync within 3s")
}
}

View File

@ -138,6 +138,6 @@ func newTestServerPeer(t *testing.T, blocks int, protocol int) (*testServer, *en
// newTestLightPeer creates node with light sync mode
func newTestLightPeer(t *testing.T, protocol int, ulcServers []string, ulcFraction int) (*testClient, func()) {
_, c, teardown := newClientServerEnv(t, 0, protocol, nil, ulcServers, ulcFraction, false, false)
_, c, teardown := newClientServerEnv(t, 0, protocol, nil, ulcServers, ulcFraction, false, false, true)
return c, teardown
}