trie, core/state: improve memory usage and performance (#3135)

* trie: store nodes as pointers This avoids memory copies when unwrapping node interface values. name old time/op new time/op delta Get 388ns ± 8% 215ns ± 2% -44.56% (p=0.000 n=15+15) GetDB 363ns ± 3% 202ns ± 2% -44.21% (p=0.000 n=15+15) UpdateBE 1.57µs ± 2% 1.29µs ± 3% -17.80% (p=0.000 n=13+15) UpdateLE 1.92µs ± 2% 1.61µs ± 2% -16.25% (p=0.000 n=14+14) HashBE 2.16µs ± 6% 2.18µs ± 6% ~ (p=0.436 n=15+15) HashLE 7.43µs ± 3% 7.21µs ± 3% -2.96% (p=0.000 n=15+13) * trie: close temporary databases in GetDB benchmark * trie: don't keep []byte from DB load around Nodes decoded from a DB load kept hashes and values as sub-slices of the DB value. This can be a problem because loading from leveldb often returns []byte with a cap that's larger than necessary, increasing memory usage. * trie: unload old cached nodes * trie, core/state: use cache unloading for account trie * trie: use explicit private flags (fixes Go 1.5 reflection issue). * trie: fixup cachegen overflow at request of nick * core/state: rename journal size constant
2016-10-14 18:04:33 +02:00
parent c2ddfb343a
commit 40cdcf1183
15 changed files with 249 additions and 142 deletions
--- a/trie/hasher.go
+++ b/trie/hasher.go
@ -27,8 +27,9 @@ import (
 )

 type hasher struct {
-	tmp *bytes.Buffer
-	sha hash.Hash
+	tmp                  *bytes.Buffer
+	sha                  hash.Hash
+	cachegen, cachelimit uint16
 }

 // hashers live in a global pool.
@ -38,8 +39,10 @@ var hasherPool = sync.Pool{
 	},
 }

-func newHasher() *hasher {
-	return hasherPool.Get().(*hasher)
+func newHasher(cachegen, cachelimit uint16) *hasher {
+	h := hasherPool.Get().(*hasher)
+	h.cachegen, h.cachelimit = cachegen, cachelimit
+	return h
 }

 func returnHasherToPool(h *hasher) {
@ -50,8 +53,18 @@ func returnHasherToPool(h *hasher) {
 // original node initialzied with the computed hash to replace the original one.
 func (h *hasher) hash(n node, db DatabaseWriter, force bool) (node, node, error) {
 	// If we're not storing the node, just hashing, use avaialble cached data
-	if hash, dirty := n.cache(); hash != nil && (db == nil || !dirty) {
-		return hash, n, nil
+	if hash, dirty := n.cache(); hash != nil {
+		if db == nil {
+			return hash, n, nil
+		}
+		if n.canUnload(h.cachegen, h.cachelimit) {
+			// Evict the node from cache. All of its subnodes will have a lower or equal
+			// cache generation number.
+			return hash, hash, nil
+		}
+		if !dirty {
+			return hash, n, nil
+		}
 	}
 	// Trie not processed yet or needs storage, walk the children
 	collapsed, cached, err := h.hashChildren(n, db)
@ -62,19 +75,21 @@ func (h *hasher) hash(n node, db DatabaseWriter, force bool) (node, node, error)
 	if err != nil {
 		return hashNode{}, n, err
 	}
-	// Cache the hash and RLP blob of the ndoe for later reuse
+	// Cache the hash of the ndoe for later reuse.
 	if hash, ok := hashed.(hashNode); ok && !force {
 		switch cached := cached.(type) {
-		case shortNode:
-			cached.hash = hash
+		case *shortNode:
+			cached = cached.copy()
+			cached.flags.hash = hash
 			if db != nil {
-				cached.dirty = false
+				cached.flags.dirty = false
 			}
 			return hashed, cached, nil
-		case fullNode:
-			cached.hash = hash
+		case *fullNode:
+			cached = cached.copy()
+			cached.flags.hash = hash
 			if db != nil {
-				cached.dirty = false
+				cached.flags.dirty = false
 			}
 			return hashed, cached, nil
 		}
@ -89,40 +104,42 @@ func (h *hasher) hashChildren(original node, db DatabaseWriter) (node, node, err
 	var err error

 	switch n := original.(type) {
-	case shortNode:
+	case *shortNode:
 		// Hash the short node's child, caching the newly hashed subtree
-		cached := n
-		cached.Key = common.CopyBytes(cached.Key)
+		collapsed, cached := n.copy(), n.copy()
+		collapsed.Key = compactEncode(n.Key)
+		cached.Key = common.CopyBytes(n.Key)

-		n.Key = compactEncode(n.Key)
 		if _, ok := n.Val.(valueNode); !ok {
-			if n.Val, cached.Val, err = h.hash(n.Val, db, false); err != nil {
-				return n, original, err
+			collapsed.Val, cached.Val, err = h.hash(n.Val, db, false)
+			if err != nil {
+				return original, original, err
 			}
 		}
-		if n.Val == nil {
-			n.Val = valueNode(nil) // Ensure that nil children are encoded as empty strings.
+		if collapsed.Val == nil {
+			collapsed.Val = valueNode(nil) // Ensure that nil children are encoded as empty strings.
 		}
-		return n, cached, nil
+		return collapsed, cached, nil

-	case fullNode:
+	case *fullNode:
 		// Hash the full node's children, caching the newly hashed subtrees
-		cached := fullNode{dirty: n.dirty}
+		collapsed, cached := n.copy(), n.copy()

 		for i := 0; i < 16; i++ {
 			if n.Children[i] != nil {
-				if n.Children[i], cached.Children[i], err = h.hash(n.Children[i], db, false); err != nil {
-					return n, original, err
+				collapsed.Children[i], cached.Children[i], err = h.hash(n.Children[i], db, false)
+				if err != nil {
+					return original, original, err
 				}
 			} else {
-				n.Children[i] = valueNode(nil) // Ensure that nil children are encoded as empty strings.
+				collapsed.Children[i] = valueNode(nil) // Ensure that nil children are encoded as empty strings.
 			}
 		}
 		cached.Children[16] = n.Children[16]
-		if n.Children[16] == nil {
-			n.Children[16] = valueNode(nil)
+		if collapsed.Children[16] == nil {
+			collapsed.Children[16] = valueNode(nil)
 		}
-		return n, cached, nil
+		return collapsed, cached, nil

 	default:
 		// Value and hash nodes don't have children so they're left as were
@ -140,6 +157,7 @@ func (h *hasher) store(n node, db DatabaseWriter, force bool) (node, error) {
 	if err := rlp.Encode(h.tmp, n); err != nil {
 		panic("encode error: " + err.Error())
 	}
+
 	if h.tmp.Len() < 32 && !force {
 		return n, nil // Nodes smaller than 32 bytes are stored inside their parent
 	}