trie: separate hashes and committer, collapse on commit

* trie: make db insert use size instead of full data * core/state: minor optimization in state onleaf allocation * trie: implement dedicated committer and hasher * trie: use dedicated committer/hasher * trie: linter nitpicks * core/state, trie: avoid unnecessary storage trie load+commit * trie: review feedback, mainly docs + minor changes * trie: start deprecating old hasher * trie: fix misspell+lint * trie: deprecate hasher.go, make proof framework use new hasher * trie: rename pure_committer/hasher to committer/hasher * trie, core/state: fix review concerns * trie: more review concerns * trie: make commit collapse into hashnode, don't touch dirtyness * trie: goimports fixes * trie: remove panics
2020-02-03 16:28:30 +01:00
parent 4cc89a5a32
commit 5a9c96454e
9 changed files with 462 additions and 161 deletions
--- a/trie/hasher.go
+++ b/trie/hasher.go
@ -1,4 +1,4 @@
-// Copyright 2016 The go-ethereum Authors
+// Copyright 2019 The go-ethereum Authors
 // This file is part of the go-ethereum library.
 //
 // The go-ethereum library is free software: you can redistribute it and/or modify
@ -20,17 +20,10 @@ import (
 	"hash"
 	"sync"

-	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/rlp"
 	"golang.org/x/crypto/sha3"
 )

-type hasher struct {
-	tmp    sliceBuffer
-	sha    keccakState
-	onleaf LeafCallback
-}
-
 // keccakState wraps sha3.state. In addition to the usual hash methods, it also supports
 // Read to get a variable amount of data from the hash state. Read is faster than Sum
 // because it doesn't copy the internal state, but also modifies the internal state.
@ -50,7 +43,14 @@ func (b *sliceBuffer) Reset() {
 	*b = (*b)[:0]
 }

-// hashers live in a global db.
+// hasher is a type used for the trie Hash operation. A hasher has some
+// internal preallocated temp space
+type hasher struct {
+	sha keccakState
+	tmp sliceBuffer
+}
+
+// hasherPool holds pureHashers
 var hasherPool = sync.Pool{
 	New: func() interface{} {
 		return &hasher{
@ -60,9 +60,8 @@ var hasherPool = sync.Pool{
 	},
 }

-func newHasher(onleaf LeafCallback) *hasher {
+func newHasher() *hasher {
 	h := hasherPool.Get().(*hasher)
-	h.onleaf = onleaf
 	return h
 }

@ -72,144 +71,126 @@ func returnHasherToPool(h *hasher) {

 // hash collapses a node down into a hash node, also returning a copy of the
 // original node initialized with the computed hash to replace the original one.
-func (h *hasher) hash(n node, db *Database, force bool) (node, node, error) {
-	// If we're not storing the node, just hashing, use available cached data
-	if hash, dirty := n.cache(); hash != nil {
-		if db == nil {
-			return hash, n, nil
-		}
-		if !dirty {
-			switch n.(type) {
-			case *fullNode, *shortNode:
-				return hash, hash, nil
-			default:
-				return hash, n, nil
-			}
-		}
+func (h *hasher) hash(n node, force bool) (hashed node, cached node) {
+	// We're not storing the node, just hashing, use available cached data
+	if hash, _ := n.cache(); hash != nil {
+		return hash, n
 	}
 	// Trie not processed yet or needs storage, walk the children
-	collapsed, cached, err := h.hashChildren(n, db)
-	if err != nil {
-		return hashNode{}, n, err
-	}
-	hashed, err := h.store(collapsed, db, force)
-	if err != nil {
-		return hashNode{}, n, err
-	}
-	// Cache the hash of the node for later reuse and remove
-	// the dirty flag in commit mode. It's fine to assign these values directly
-	// without copying the node first because hashChildren copies it.
-	cachedHash, _ := hashed.(hashNode)
-	switch cn := cached.(type) {
+	switch n := n.(type) {
 	case *shortNode:
-		cn.flags.hash = cachedHash
-		if db != nil {
-			cn.flags.dirty = false
+		collapsed, cached := h.hashShortNodeChildren(n)
+		hashed := h.shortnodeToHash(collapsed, force)
+		// We need to retain the possibly _not_ hashed node, in case it was too
+		// small to be hashed
+		if hn, ok := hashed.(hashNode); ok {
+			cached.flags.hash = hn
+		} else {
+			cached.flags.hash = nil
 		}
+		return hashed, cached
 	case *fullNode:
-		cn.flags.hash = cachedHash
-		if db != nil {
-			cn.flags.dirty = false
+		collapsed, cached := h.hashFullNodeChildren(n)
+		hashed = h.fullnodeToHash(collapsed, force)
+		if hn, ok := hashed.(hashNode); ok {
+			cached.flags.hash = hn
+		} else {
+			cached.flags.hash = nil
 		}
-	}
-	return hashed, cached, nil
-}
-
-// hashChildren replaces the children of a node with their hashes if the encoded
-// size of the child is larger than a hash, returning the collapsed node as well
-// as a replacement for the original node with the child hashes cached in.
-func (h *hasher) hashChildren(original node, db *Database) (node, node, error) {
-	var err error
-
-	switch n := original.(type) {
-	case *shortNode:
-		// Hash the short node's child, caching the newly hashed subtree
-		collapsed, cached := n.copy(), n.copy()
-		collapsed.Key = hexToCompact(n.Key)
-		cached.Key = common.CopyBytes(n.Key)
-
-		if _, ok := n.Val.(valueNode); !ok {
-			collapsed.Val, cached.Val, err = h.hash(n.Val, db, false)
-			if err != nil {
-				return original, original, err
-			}
-		}
-		return collapsed, cached, nil
-
-	case *fullNode:
-		// Hash the full node's children, caching the newly hashed subtrees
-		collapsed, cached := n.copy(), n.copy()
-
-		for i := 0; i < 16; i++ {
-			if n.Children[i] != nil {
-				collapsed.Children[i], cached.Children[i], err = h.hash(n.Children[i], db, false)
-				if err != nil {
-					return original, original, err
-				}
-			}
-		}
-		cached.Children[16] = n.Children[16]
-		return collapsed, cached, nil
-
+		return hashed, cached
 	default:
 		// Value and hash nodes don't have children so they're left as were
-		return n, original, nil
+		return n, n
 	}
 }

-// store hashes the node n and if we have a storage layer specified, it writes
-// the key/value pair to it and tracks any node->child references as well as any
-// node->external trie references.
-func (h *hasher) store(n node, db *Database, force bool) (node, error) {
-	// Don't store hashes or empty nodes.
-	if _, isHash := n.(hashNode); n == nil || isHash {
-		return n, nil
+// hashShortNodeChildren collapses the short node. The returned collapsed node
+// holds a live reference to the Key, and must not be modified.
+// The cached
+func (h *hasher) hashShortNodeChildren(n *shortNode) (collapsed, cached *shortNode) {
+	// Hash the short node's child, caching the newly hashed subtree
+	collapsed, cached = n.copy(), n.copy()
+	// Previously, we did copy this one. We don't seem to need to actually
+	// do that, since we don't overwrite/reuse keys
+	//cached.Key = common.CopyBytes(n.Key)
+	collapsed.Key = hexToCompact(n.Key)
+	// Unless the child is a valuenode or hashnode, hash it
+	switch n.Val.(type) {
+	case *fullNode, *shortNode:
+		collapsed.Val, cached.Val = h.hash(n.Val, false)
 	}
-	// Generate the RLP encoding of the node
+	return collapsed, cached
+}
+
+func (h *hasher) hashFullNodeChildren(n *fullNode) (collapsed *fullNode, cached *fullNode) {
+	// Hash the full node's children, caching the newly hashed subtrees
+	cached = n.copy()
+	collapsed = n.copy()
+	for i := 0; i < 16; i++ {
+		if child := n.Children[i]; child != nil {
+			collapsed.Children[i], cached.Children[i] = h.hash(child, false)
+		} else {
+			collapsed.Children[i] = nilValueNode
+		}
+	}
+	cached.Children[16] = n.Children[16]
+	return collapsed, cached
+}
+
+// shortnodeToHash creates a hashNode from a shortNode. The supplied shortnode
+// should have hex-type Key, which will be converted (without modification)
+// into compact form for RLP encoding.
+// If the rlp data is smaller than 32 bytes, `nil` is returned.
+func (h *hasher) shortnodeToHash(n *shortNode, force bool) node {
 	h.tmp.Reset()
 	if err := rlp.Encode(&h.tmp, n); err != nil {
 		panic("encode error: " + err.Error())
 	}
+
 	if len(h.tmp) < 32 && !force {
-		return n, nil // Nodes smaller than 32 bytes are stored inside their parent
+		return n // Nodes smaller than 32 bytes are stored inside their parent
 	}
-	// Larger nodes are replaced by their hash and stored in the database.
-	hash, _ := n.cache()
-	if hash == nil {
-		hash = h.makeHashNode(h.tmp)
-	}
-
-	if db != nil {
-		// We are pooling the trie nodes into an intermediate memory cache
-		hash := common.BytesToHash(hash)
-
-		db.lock.Lock()
-		db.insert(hash, h.tmp, n)
-		db.lock.Unlock()
-
-		// Track external references from account->storage trie
-		if h.onleaf != nil {
-			switch n := n.(type) {
-			case *shortNode:
-				if child, ok := n.Val.(valueNode); ok {
-					h.onleaf(child, hash)
-				}
-			case *fullNode:
-				for i := 0; i < 16; i++ {
-					if child, ok := n.Children[i].(valueNode); ok {
-						h.onleaf(child, hash)
-					}
-				}
-			}
-		}
-	}
-	return hash, nil
+	return h.hashData(h.tmp)
 }

-func (h *hasher) makeHashNode(data []byte) hashNode {
-	n := make(hashNode, h.sha.Size())
+// shortnodeToHash is used to creates a hashNode from a set of hashNodes, (which
+// may contain nil values)
+func (h *hasher) fullnodeToHash(n *fullNode, force bool) node {
+	h.tmp.Reset()
+	// Generate the RLP encoding of the node
+	if err := n.EncodeRLP(&h.tmp); err != nil {
+		panic("encode error: " + err.Error())
+	}
+
+	if len(h.tmp) < 32 && !force {
+		return n // Nodes smaller than 32 bytes are stored inside their parent
+	}
+	return h.hashData(h.tmp)
+}
+
+// hashData hashes the provided data
+func (h *hasher) hashData(data []byte) hashNode {
+	n := make(hashNode, 32)
 	h.sha.Reset()
 	h.sha.Write(data)
 	h.sha.Read(n)
 	return n
 }
+
+// proofHash is used to construct trie proofs, and returns the 'collapsed'
+// node (for later RLP encoding) aswell as the hashed node -- unless the
+// node is smaller than 32 bytes, in which case it will be returned as is.
+// This method does not do anything on value- or hash-nodes.
+func (h *hasher) proofHash(original node) (collapsed, hashed node) {
+	switch n := original.(type) {
+	case *shortNode:
+		sn, _ := h.hashShortNodeChildren(n)
+		return sn, h.shortnodeToHash(sn, false)
+	case *fullNode:
+		fn, _ := h.hashFullNodeChildren(n)
+		return fn, h.fullnodeToHash(fn, false)
+	default:
+		// Value and hash nodes don't have children so they're left as were
+		return n, n
+	}
+}