core/vm: less allocations for various call variants (#21222)

* core/vm/runtime/tests: add more benchmarks

* core/vm: initial work on improving alloc count for calls to precompiles

name                                  old time/op    new time/op    delta
SimpleLoop/identity-precompile-10M-6     117ms ±75%      43ms ± 1%  -63.09%  (p=0.008 n=5+5)
SimpleLoop/loop-10M-6                   79.6ms ± 4%    70.5ms ± 1%  -11.42%  (p=0.008 n=5+5)

name                                  old alloc/op   new alloc/op   delta
SimpleLoop/identity-precompile-10M-6    24.4MB ± 0%     4.9MB ± 0%  -79.94%  (p=0.008 n=5+5)
SimpleLoop/loop-10M-6                   13.2kB ± 0%    13.2kB ± 0%     ~     (p=0.357 n=5+5)

name                                  old allocs/op  new allocs/op  delta
SimpleLoop/identity-precompile-10M-6      382k ± 0%      153k ± 0%  -59.99%  (p=0.000 n=5+4)
SimpleLoop/loop-10M-6                     40.0 ± 0%      40.0 ± 0%     ~     (all equal)

* core/vm: don't allocate big.int for touch

name                                  old time/op    new time/op    delta
SimpleLoop/identity-precompile-10M-6    43.3ms ± 1%    42.4ms ± 7%     ~     (p=0.151 n=5+5)
SimpleLoop/loop-10M-6                   70.5ms ± 1%    76.7ms ± 1%   +8.67%  (p=0.008 n=5+5)

name                                  old alloc/op   new alloc/op   delta
SimpleLoop/identity-precompile-10M-6    4.90MB ± 0%    2.46MB ± 0%  -49.83%  (p=0.008 n=5+5)
SimpleLoop/loop-10M-6                   13.2kB ± 0%    13.2kB ± 1%     ~     (p=0.571 n=5+5)

name                                  old allocs/op  new allocs/op  delta
SimpleLoop/identity-precompile-10M-6      153k ± 0%       76k ± 0%  -49.98%  (p=0.029 n=4+4)
SimpleLoop/loop-10M-6                     40.0 ± 0%      40.0 ± 0%     ~     (all equal)

* core/vm: reduce allocs in staticcall

name                                  old time/op    new time/op    delta
SimpleLoop/identity-precompile-10M-6    42.4ms ± 7%    37.5ms ± 6%  -11.68%  (p=0.008 n=5+5)
SimpleLoop/loop-10M-6                   76.7ms ± 1%    69.1ms ± 1%   -9.82%  (p=0.008 n=5+5)

name                                  old alloc/op   new alloc/op   delta
SimpleLoop/identity-precompile-10M-6    2.46MB ± 0%    0.02MB ± 0%  -99.35%  (p=0.008 n=5+5)
SimpleLoop/loop-10M-6                   13.2kB ± 1%    13.2kB ± 0%     ~     (p=0.143 n=5+5)

name                                  old allocs/op  new allocs/op  delta
SimpleLoop/identity-precompile-10M-6     76.4k ± 0%      0.1k ± 0%     ~     (p=0.079 n=4+5)
SimpleLoop/loop-10M-6                     40.0 ± 0%      40.0 ± 0%     ~     (all equal)

* trie: better use of hasher keccakState

* core/state/statedb: reduce allocations in getDeletedStateObject

* core/vm: reduce allocations in all call derivates

* core/vm: reduce allocations in call variants

- Make returnstack `uint32`
- Use a `sync.Pool` of `stack`s

* core/vm: fix tests

* core/vm: goimports

* core/vm: tracer fix + staticcall gas fix

* core/vm: add back snapshot to staticcall

* core/vm: review concerns + make returnstack pooled + enable returndata in traces

* core/vm: fix some test tracer method signatures

* core/vm: run gencodec, minor comment polish

Co-authored-by: Péter Szilágyi <peterke@gmail.com>
This commit is contained in:
Martin Holst Swende
2020-07-16 14:06:19 +02:00
committed by GitHub
parent 240d1851db
commit 295693759e
21 changed files with 447 additions and 188 deletions

View File

@ -321,34 +321,6 @@ func TestBlockhash(t *testing.T) {
}
}
// BenchmarkSimpleLoop test a pretty simple loop which loops
// 1M (1 048 575) times.
// Takes about 200 ms
func BenchmarkSimpleLoop(b *testing.B) {
// 0xfffff = 1048575 loops
code := []byte{
byte(vm.PUSH3), 0x0f, 0xff, 0xff,
byte(vm.JUMPDEST), // [ count ]
byte(vm.PUSH1), 1, // [count, 1]
byte(vm.SWAP1), // [1, count]
byte(vm.SUB), // [ count -1 ]
byte(vm.DUP1), // [ count -1 , count-1]
byte(vm.PUSH1), 4, // [count-1, count -1, label]
byte(vm.JUMPI), // [ 0 ]
byte(vm.STOP),
}
//tracer := vm.NewJSONLogger(nil, os.Stdout)
//Execute(code, nil, &Config{
// EVMConfig: vm.Config{
// Debug: true,
// Tracer: tracer,
// }})
for i := 0; i < b.N; i++ {
Execute(code, nil, nil)
}
}
type stepCounter struct {
inner *vm.JSONLogger
steps int
@ -358,7 +330,7 @@ func (s *stepCounter) CaptureStart(from common.Address, to common.Address, creat
return nil
}
func (s *stepCounter) CaptureState(env *vm.EVM, pc uint64, op vm.OpCode, gas, cost uint64, memory *vm.Memory, stack *vm.Stack, rStack *vm.ReturnStack, contract *vm.Contract, depth int, err error) error {
func (s *stepCounter) CaptureState(env *vm.EVM, pc uint64, op vm.OpCode, gas, cost uint64, memory *vm.Memory, stack *vm.Stack, rStack *vm.ReturnStack, rData []byte, contract *vm.Contract, depth int, err error) error {
s.steps++
// Enable this for more output
//s.inner.CaptureState(env, pc, op, gas, cost, memory, stack, rStack, contract, depth, err)
@ -593,3 +565,160 @@ func DisabledTestEipExampleCases(t *testing.T) {
"allowed, and causes an error", code)
}
}
// benchmarkNonModifyingCode benchmarks code, but if the code modifies the
// state, this should not be used, since it does not reset the state between runs.
func benchmarkNonModifyingCode(gas uint64, code []byte, name string, b *testing.B) {
cfg := new(Config)
setDefaults(cfg)
cfg.State, _ = state.New(common.Hash{}, state.NewDatabase(rawdb.NewMemoryDatabase()), nil)
cfg.GasLimit = gas
var (
destination = common.BytesToAddress([]byte("contract"))
vmenv = NewEnv(cfg)
sender = vm.AccountRef(cfg.Origin)
)
cfg.State.CreateAccount(destination)
eoa := common.HexToAddress("E0")
{
cfg.State.CreateAccount(eoa)
cfg.State.SetNonce(eoa, 100)
}
reverting := common.HexToAddress("EE")
{
cfg.State.CreateAccount(reverting)
cfg.State.SetCode(reverting, []byte{
byte(vm.PUSH1), 0x00,
byte(vm.PUSH1), 0x00,
byte(vm.REVERT),
})
}
//cfg.State.CreateAccount(cfg.Origin)
// set the receiver's (the executing contract) code for execution.
cfg.State.SetCode(destination, code)
vmenv.Call(sender, destination, nil, gas, cfg.Value)
b.Run(name, func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
vmenv.Call(sender, destination, nil, gas, cfg.Value)
}
})
}
// BenchmarkSimpleLoop test a pretty simple loop which loops until OOG
// 55 ms
func BenchmarkSimpleLoop(b *testing.B) {
staticCallIdentity := []byte{
byte(vm.JUMPDEST), // [ count ]
// push args for the call
byte(vm.PUSH1), 0, // out size
byte(vm.DUP1), // out offset
byte(vm.DUP1), // out insize
byte(vm.DUP1), // in offset
byte(vm.PUSH1), 0x4, // address of identity
byte(vm.GAS), // gas
byte(vm.STATICCALL),
byte(vm.POP), // pop return value
byte(vm.PUSH1), 0, // jumpdestination
byte(vm.JUMP),
}
callIdentity := []byte{
byte(vm.JUMPDEST), // [ count ]
// push args for the call
byte(vm.PUSH1), 0, // out size
byte(vm.DUP1), // out offset
byte(vm.DUP1), // out insize
byte(vm.DUP1), // in offset
byte(vm.DUP1), // value
byte(vm.PUSH1), 0x4, // address of identity
byte(vm.GAS), // gas
byte(vm.CALL),
byte(vm.POP), // pop return value
byte(vm.PUSH1), 0, // jumpdestination
byte(vm.JUMP),
}
callInexistant := []byte{
byte(vm.JUMPDEST), // [ count ]
// push args for the call
byte(vm.PUSH1), 0, // out size
byte(vm.DUP1), // out offset
byte(vm.DUP1), // out insize
byte(vm.DUP1), // in offset
byte(vm.DUP1), // value
byte(vm.PUSH1), 0xff, // address of existing contract
byte(vm.GAS), // gas
byte(vm.CALL),
byte(vm.POP), // pop return value
byte(vm.PUSH1), 0, // jumpdestination
byte(vm.JUMP),
}
callEOA := []byte{
byte(vm.JUMPDEST), // [ count ]
// push args for the call
byte(vm.PUSH1), 0, // out size
byte(vm.DUP1), // out offset
byte(vm.DUP1), // out insize
byte(vm.DUP1), // in offset
byte(vm.DUP1), // value
byte(vm.PUSH1), 0xE0, // address of EOA
byte(vm.GAS), // gas
byte(vm.CALL),
byte(vm.POP), // pop return value
byte(vm.PUSH1), 0, // jumpdestination
byte(vm.JUMP),
}
loopingCode := []byte{
byte(vm.JUMPDEST), // [ count ]
// push args for the call
byte(vm.PUSH1), 0, // out size
byte(vm.DUP1), // out offset
byte(vm.DUP1), // out insize
byte(vm.DUP1), // in offset
byte(vm.PUSH1), 0x4, // address of identity
byte(vm.GAS), // gas
byte(vm.POP), byte(vm.POP), byte(vm.POP), byte(vm.POP), byte(vm.POP), byte(vm.POP),
byte(vm.PUSH1), 0, // jumpdestination
byte(vm.JUMP),
}
calllRevertingContractWithInput := []byte{
byte(vm.JUMPDEST), //
// push args for the call
byte(vm.PUSH1), 0, // out size
byte(vm.DUP1), // out offset
byte(vm.PUSH1), 0x20, // in size
byte(vm.PUSH1), 0x00, // in offset
byte(vm.PUSH1), 0x00, // value
byte(vm.PUSH1), 0xEE, // address of reverting contract
byte(vm.GAS), // gas
byte(vm.CALL),
byte(vm.POP), // pop return value
byte(vm.PUSH1), 0, // jumpdestination
byte(vm.JUMP),
}
//tracer := vm.NewJSONLogger(nil, os.Stdout)
//Execute(loopingCode, nil, &Config{
// EVMConfig: vm.Config{
// Debug: true,
// Tracer: tracer,
// }})
// 100M gas
benchmarkNonModifyingCode(100000000, staticCallIdentity, "staticcall-identity-100M", b)
benchmarkNonModifyingCode(100000000, callIdentity, "call-identity-100M", b)
benchmarkNonModifyingCode(100000000, loopingCode, "loop-100M", b)
benchmarkNonModifyingCode(100000000, callInexistant, "call-nonexist-100M", b)
benchmarkNonModifyingCode(100000000, callEOA, "call-EOA-100M", b)
benchmarkNonModifyingCode(100000000, calllRevertingContractWithInput, "call-reverting-100M", b)
//benchmarkNonModifyingCode(10000000, staticCallIdentity, "staticcall-identity-10M", b)
//benchmarkNonModifyingCode(10000000, loopingCode, "loop-10M", b)
}