| 
									
										
											  
											
												all: bloom-filter based pruning mechanism (#21724)
* cmd, core, tests: initial state pruner
core: fix db inspector
cmd/geth: add verify-state
cmd/geth: add verification tool
core/rawdb: implement flatdb
cmd, core: fix rebase
core/state: use new contract code layout
core/state/pruner: avoid deleting genesis state
cmd/geth: add helper function
core, cmd: fix extract genesis
core: minor fixes
contracts: remove useless
core/state/snapshot: plugin stacktrie
core: polish
core/state/snapshot: iterate storage concurrently
core/state/snapshot: fix iteration
core: add comments
core/state/snapshot: polish code
core/state: polish
core/state/snapshot: rebase
core/rawdb: add comments
core/rawdb: fix tests
core/rawdb: improve tests
core/state/snapshot: fix concurrent iteration
core/state: run pruning during the recovery
core, trie: implement martin's idea
core, eth: delete flatdb and polish pruner
trie: fix import
core/state/pruner: add log
core/state/pruner: fix issues
core/state/pruner: don't read back
core/state/pruner: fix contract code write
core/state/pruner: check root node presence
cmd, core: polish log
core/state: use HEAD-127 as the target
core/state/snapshot: improve tests
cmd/geth: fix verification tool
cmd/geth: use HEAD as the verification default target
all: replace the bloomfilter with martin's fork
cmd, core: polish code
core, cmd: forcibly delete state root
core/state/pruner: add hash64
core/state/pruner: fix blacklist
core/state: remove blacklist
cmd, core: delete trie clean cache before pruning
cmd, core: fix lint
cmd, core: fix rebase
core/state: fix the special case for clique networks
core/state/snapshot: remove useless code
core/state/pruner: capping the snapshot after pruning
cmd, core, eth: fixes
core/rawdb: update db inspector
cmd/geth: polish code
core/state/pruner: fsync bloom filter
cmd, core: print warning log
core/state/pruner: adjust the parameters for bloom filter
cmd, core: create the bloom filter by size
core: polish
core/state/pruner: sanitize invalid bloomfilter size
cmd: address comments
cmd/geth: address comments
cmd/geth: address comment
core/state/pruner: address comments
core/state/pruner: rename homedir to datadir
cmd, core: address comments
core/state/pruner: address comment
core/state: address comments
core, cmd, tests: address comments
core: address comments
core/state/pruner: release the iterator after each commit
core/state/pruner: improve pruner
cmd, core: adjust bloom paramters
core/state/pruner: fix lint
core/state/pruner: fix tests
core: fix rebase
core/state/pruner: remove atomic rename
core/state/pruner: address comments
all: run go mod tidy
core/state/pruner: avoid false-positive for the middle state roots
core/state/pruner: add checks for middle roots
cmd/geth: replace crit with error
* core/state/pruner: fix lint
* core: drop legacy bloom filter
* core/state/snapshot: improve pruner
* core/state/snapshot: polish concurrent logs to report ETA vs. hashes
* core/state/pruner: add progress report for pruning and compaction too
* core: fix snapshot test API
* core/state: fix some pruning logs
* core/state/pruner: support recovering from bloom flush fail
Co-authored-by: Péter Szilágyi <peterke@gmail.com>
											
										 
											2021-02-08 19:16:30 +08:00
										 |  |  | // Copyright 2020 The go-ethereum Authors | 
					
						
							|  |  |  | // This file is part of the go-ethereum library. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // The go-ethereum library is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  | // it under the terms of the GNU Lesser General Public License as published by | 
					
						
							|  |  |  | // the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  | // (at your option) any later version. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // The go-ethereum library is distributed in the hope that it will be useful, | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
					
						
							|  |  |  | // GNU Lesser General Public License for more details. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // You should have received a copy of the GNU Lesser General Public License | 
					
						
							|  |  |  | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | package pruner | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"encoding/binary" | 
					
						
							|  |  |  | 	"errors" | 
					
						
							|  |  |  | 	"os" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	"github.com/ethereum/go-ethereum/common" | 
					
						
							|  |  |  | 	"github.com/ethereum/go-ethereum/core/rawdb" | 
					
						
							|  |  |  | 	"github.com/ethereum/go-ethereum/log" | 
					
						
							|  |  |  | 	bloomfilter "github.com/holiman/bloomfilter/v2" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // stateBloomHasher is a wrapper around a byte blob to satisfy the interface API | 
					
						
							|  |  |  | // requirements of the bloom library used. It's used to convert a trie hash or | 
					
						
							|  |  |  | // contract code hash into a 64 bit mini hash. | 
					
						
							|  |  |  | type stateBloomHasher []byte | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (f stateBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") } | 
					
						
							|  |  |  | func (f stateBloomHasher) Sum(b []byte) []byte               { panic("not implemented") } | 
					
						
							|  |  |  | func (f stateBloomHasher) Reset()                            { panic("not implemented") } | 
					
						
							|  |  |  | func (f stateBloomHasher) BlockSize() int                    { panic("not implemented") } | 
					
						
							|  |  |  | func (f stateBloomHasher) Size() int                         { return 8 } | 
					
						
							|  |  |  | func (f stateBloomHasher) Sum64() uint64                     { return binary.BigEndian.Uint64(f) } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // stateBloom is a bloom filter used during the state convesion(snapshot->state). | 
					
						
							|  |  |  | // The keys of all generated entries will be recorded here so that in the pruning | 
					
						
							|  |  |  | // stage the entries belong to the specific version can be avoided for deletion. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // The false-positive is allowed here. The "false-positive" entries means they | 
					
						
							|  |  |  | // actually don't belong to the specific version but they are not deleted in the | 
					
						
							|  |  |  | // pruning. The downside of the false-positive allowance is we may leave some "dangling" | 
					
						
							|  |  |  | // nodes in the disk. But in practice the it's very unlike the dangling node is | 
					
						
							|  |  |  | // state root. So in theory this pruned state shouldn't be visited anymore. Another | 
					
						
							|  |  |  | // potential issue is for fast sync. If we do another fast sync upon the pruned | 
					
						
							|  |  |  | // database, it's problematic which will stop the expansion during the syncing. | 
					
						
							|  |  |  | // TODO address it @rjl493456442 @holiman @karalabe. | 
					
						
							|  |  |  | // | 
					
						
							|  |  |  | // After the entire state is generated, the bloom filter should be persisted into | 
					
						
							|  |  |  | // the disk. It indicates the whole generation procedure is finished. | 
					
						
							|  |  |  | type stateBloom struct { | 
					
						
							|  |  |  | 	bloom *bloomfilter.Filter | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // newStateBloomWithSize creates a brand new state bloom for state generation. | 
					
						
							|  |  |  | // The bloom filter will be created by the passing bloom filter size. According | 
					
						
							|  |  |  | // to the https://hur.st/bloomfilter/?n=600000000&p=&m=2048MB&k=4, the parameters | 
					
						
							|  |  |  | // are picked so that the false-positive rate for mainnet is low enough. | 
					
						
							|  |  |  | func newStateBloomWithSize(size uint64) (*stateBloom, error) { | 
					
						
							|  |  |  | 	bloom, err := bloomfilter.New(size*1024*1024*8, 4) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	log.Info("Initialized state bloom", "size", common.StorageSize(float64(bloom.M()/8))) | 
					
						
							|  |  |  | 	return &stateBloom{bloom: bloom}, nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // NewStateBloomFromDisk loads the state bloom from the given file. | 
					
						
							|  |  |  | // In this case the assumption is held the bloom filter is complete. | 
					
						
							|  |  |  | func NewStateBloomFromDisk(filename string) (*stateBloom, error) { | 
					
						
							|  |  |  | 	bloom, _, err := bloomfilter.ReadFile(filename) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return &stateBloom{bloom: bloom}, nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Commit flushes the bloom filter content into the disk and marks the bloom | 
					
						
							|  |  |  | // as complete. | 
					
						
							|  |  |  | func (bloom *stateBloom) Commit(filename, tempname string) error { | 
					
						
							|  |  |  | 	// Write the bloom out into a temporary file | 
					
						
							|  |  |  | 	_, err := bloom.bloom.WriteFile(tempname) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// Ensure the file is synced to disk | 
					
						
							| 
									
										
										
										
											2021-08-10 10:38:49 +03:00
										 |  |  | 	f, err := os.OpenFile(tempname, os.O_RDWR, 0666) | 
					
						
							| 
									
										
											  
											
												all: bloom-filter based pruning mechanism (#21724)
* cmd, core, tests: initial state pruner
core: fix db inspector
cmd/geth: add verify-state
cmd/geth: add verification tool
core/rawdb: implement flatdb
cmd, core: fix rebase
core/state: use new contract code layout
core/state/pruner: avoid deleting genesis state
cmd/geth: add helper function
core, cmd: fix extract genesis
core: minor fixes
contracts: remove useless
core/state/snapshot: plugin stacktrie
core: polish
core/state/snapshot: iterate storage concurrently
core/state/snapshot: fix iteration
core: add comments
core/state/snapshot: polish code
core/state: polish
core/state/snapshot: rebase
core/rawdb: add comments
core/rawdb: fix tests
core/rawdb: improve tests
core/state/snapshot: fix concurrent iteration
core/state: run pruning during the recovery
core, trie: implement martin's idea
core, eth: delete flatdb and polish pruner
trie: fix import
core/state/pruner: add log
core/state/pruner: fix issues
core/state/pruner: don't read back
core/state/pruner: fix contract code write
core/state/pruner: check root node presence
cmd, core: polish log
core/state: use HEAD-127 as the target
core/state/snapshot: improve tests
cmd/geth: fix verification tool
cmd/geth: use HEAD as the verification default target
all: replace the bloomfilter with martin's fork
cmd, core: polish code
core, cmd: forcibly delete state root
core/state/pruner: add hash64
core/state/pruner: fix blacklist
core/state: remove blacklist
cmd, core: delete trie clean cache before pruning
cmd, core: fix lint
cmd, core: fix rebase
core/state: fix the special case for clique networks
core/state/snapshot: remove useless code
core/state/pruner: capping the snapshot after pruning
cmd, core, eth: fixes
core/rawdb: update db inspector
cmd/geth: polish code
core/state/pruner: fsync bloom filter
cmd, core: print warning log
core/state/pruner: adjust the parameters for bloom filter
cmd, core: create the bloom filter by size
core: polish
core/state/pruner: sanitize invalid bloomfilter size
cmd: address comments
cmd/geth: address comments
cmd/geth: address comment
core/state/pruner: address comments
core/state/pruner: rename homedir to datadir
cmd, core: address comments
core/state/pruner: address comment
core/state: address comments
core, cmd, tests: address comments
core: address comments
core/state/pruner: release the iterator after each commit
core/state/pruner: improve pruner
cmd, core: adjust bloom paramters
core/state/pruner: fix lint
core/state/pruner: fix tests
core: fix rebase
core/state/pruner: remove atomic rename
core/state/pruner: address comments
all: run go mod tidy
core/state/pruner: avoid false-positive for the middle state roots
core/state/pruner: add checks for middle roots
cmd/geth: replace crit with error
* core/state/pruner: fix lint
* core: drop legacy bloom filter
* core/state/snapshot: improve pruner
* core/state/snapshot: polish concurrent logs to report ETA vs. hashes
* core/state/pruner: add progress report for pruning and compaction too
* core: fix snapshot test API
* core/state: fix some pruning logs
* core/state/pruner: support recovering from bloom flush fail
Co-authored-by: Péter Szilágyi <peterke@gmail.com>
											
										 
											2021-02-08 19:16:30 +08:00
										 |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if err := f.Sync(); err != nil { | 
					
						
							|  |  |  | 		f.Close() | 
					
						
							|  |  |  | 		return err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	f.Close() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Move the teporary file into it's final location | 
					
						
							|  |  |  | 	return os.Rename(tempname, filename) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Put implements the KeyValueWriter interface. But here only the key is needed. | 
					
						
							|  |  |  | func (bloom *stateBloom) Put(key []byte, value []byte) error { | 
					
						
							|  |  |  | 	// If the key length is not 32bytes, ensure it's contract code | 
					
						
							|  |  |  | 	// entry with new scheme. | 
					
						
							|  |  |  | 	if len(key) != common.HashLength { | 
					
						
							|  |  |  | 		isCode, codeKey := rawdb.IsCodeKey(key) | 
					
						
							|  |  |  | 		if !isCode { | 
					
						
							|  |  |  | 			return errors.New("invalid entry") | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		bloom.bloom.Add(stateBloomHasher(codeKey)) | 
					
						
							|  |  |  | 		return nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	bloom.bloom.Add(stateBloomHasher(key)) | 
					
						
							|  |  |  | 	return nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Delete removes the key from the key-value data store. | 
					
						
							|  |  |  | func (bloom *stateBloom) Delete(key []byte) error { panic("not supported") } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Contain is the wrapper of the underlying contains function which | 
					
						
							|  |  |  | // reports whether the key is contained. | 
					
						
							|  |  |  | // - If it says yes, the key may be contained | 
					
						
							|  |  |  | // - If it says no, the key is definitely not contained. | 
					
						
							|  |  |  | func (bloom *stateBloom) Contain(key []byte) (bool, error) { | 
					
						
							|  |  |  | 	return bloom.bloom.Contains(stateBloomHasher(key)), nil | 
					
						
							|  |  |  | } |