cmd/evm, core/asm: add EVM assembler (#3686)

The evm compile command implements a simple assembly language that compiles to
EVM bytecode.
This commit is contained in:
Jeffrey Wilcke
2017-03-01 01:11:24 +01:00
committed by Felix Lange
parent 7ff75ac2f2
commit 230cf2ec91
8 changed files with 852 additions and 133 deletions

281
core/asm/compiler.go Normal file
View File

@ -0,0 +1,281 @@
// Copyright 2017 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package asm
import (
"errors"
"fmt"
"math/big"
"os"
"strings"
"github.com/ethereum/go-ethereum/common/math"
"github.com/ethereum/go-ethereum/core/vm"
)
// Compiler contains information about the parsed source
// and holds the tokens for the program.
type Compiler struct {
tokens []token
binary []interface{}
labels map[string]int
pc, pos int
debug bool
}
// newCompiler returns a new allocated compiler.
func NewCompiler(debug bool) *Compiler {
return &Compiler{
labels: make(map[string]int),
debug: debug,
}
}
// Feed feeds tokens in to ch and are interpreted by
// the compiler.
//
// feed is the first pass in the compile stage as it
// collect the used labels in the program and keeps a
// program counter which is used to determine the locations
// of the jump dests. The labels can than be used in the
// second stage to push labels and determine the right
// position.
func (c *Compiler) Feed(ch <-chan token) {
for i := range ch {
switch i.typ {
case number:
num := math.MustParseBig256(i.text).Bytes()
if len(num) == 0 {
num = []byte{0}
}
c.pc += len(num)
case stringValue:
c.pc += len(i.text) - 2
case element:
c.pc++
case labelDef:
c.labels[i.text] = c.pc
c.pc++
case label:
c.pc += 5
}
c.tokens = append(c.tokens, i)
}
if c.debug {
fmt.Fprintln(os.Stderr, "found", len(c.labels), "labels")
}
}
// Compile compiles the current tokens and returns a
// binary string that can be interpreted by the EVM
// and an error if it failed.
//
// compile is the second stage in the compile phase
// which compiles the tokens to EVM instructions.
func (c *Compiler) Compile() (string, []error) {
var errors []error
// continue looping over the tokens until
// the stack has been exhausted.
for c.pos < len(c.tokens) {
if err := c.compileLine(); err != nil {
errors = append(errors, err)
}
}
// turn the binary to hex
var bin string
for _, v := range c.binary {
switch v := v.(type) {
case vm.OpCode:
bin += fmt.Sprintf("%x", []byte{byte(v)})
case []byte:
bin += fmt.Sprintf("%x", v)
}
}
return bin, errors
}
// next returns the next token and increments the
// posititon.
func (c *Compiler) next() token {
token := c.tokens[c.pos]
c.pos++
return token
}
// compile line compiles a single line instruction e.g.
// "push 1", "jump @labal".
func (c *Compiler) compileLine() error {
n := c.next()
if n.typ != lineStart {
return compileErr(n, n.typ.String(), lineStart.String())
}
lvalue := c.next()
switch lvalue.typ {
case eof:
return nil
case element:
if err := c.compileElement(lvalue); err != nil {
return err
}
case labelDef:
c.compileLabel()
case lineEnd:
return nil
default:
return compileErr(lvalue, lvalue.text, fmt.Sprintf("%v or %v", labelDef, element))
}
if n := c.next(); n.typ != lineEnd {
return compileErr(n, n.text, lineEnd.String())
}
return nil
}
// compileNumber compiles the number to bytes
func (c *Compiler) compileNumber(element token) (int, error) {
num := math.MustParseBig256(element.text).Bytes()
if len(num) == 0 {
num = []byte{0}
}
c.pushBin(num)
return len(num), nil
}
// compileElement compiles the element (push & label or both)
// to a binary representation and may error if incorrect statements
// where fed.
func (c *Compiler) compileElement(element token) error {
// check for a jump. jumps must be read and compiled
// from right to left.
if isJump(element.text) {
rvalue := c.next()
switch rvalue.typ {
case number:
// TODO figure out how to return the error properly
c.compileNumber(rvalue)
case stringValue:
// strings are quoted, remove them.
c.pushBin(rvalue.text[1 : len(rvalue.text)-2])
case label:
c.pushBin(vm.PUSH4)
pos := big.NewInt(int64(c.labels[rvalue.text])).Bytes()
pos = append(make([]byte, 4-len(pos)), pos...)
c.pushBin(pos)
default:
return compileErr(rvalue, rvalue.text, "number, string or label")
}
// push the operation
c.pushBin(toBinary(element.text))
return nil
} else if isPush(element.text) {
// handle pushes. pushes are read from left to right.
var value []byte
rvalue := c.next()
switch rvalue.typ {
case number:
value = math.MustParseBig256(rvalue.text).Bytes()
if len(value) == 0 {
value = []byte{0}
}
case stringValue:
value = []byte(rvalue.text[1 : len(rvalue.text)-1])
case label:
value = make([]byte, 4)
copy(value, big.NewInt(int64(c.labels[rvalue.text])).Bytes())
default:
return compileErr(rvalue, rvalue.text, "number, string or label")
}
if len(value) > 32 {
return fmt.Errorf("%d type error: unsupported string or number with size > 32", rvalue.lineno)
}
c.pushBin(vm.OpCode(int(vm.PUSH1) - 1 + len(value)))
c.pushBin(value)
} else {
c.pushBin(toBinary(element.text))
}
return nil
}
// compileLabel pushes a jumpdest to the binary slice.
func (c *Compiler) compileLabel() {
c.pushBin(vm.JUMPDEST)
}
// pushBin pushes the value v to the binary stack.
func (c *Compiler) pushBin(v interface{}) {
if c.debug {
fmt.Printf("%d: %v\n", len(c.binary), v)
}
c.binary = append(c.binary, v)
}
// isPush returns whether the string op is either any of
// push(N).
func isPush(op string) bool {
if op == "push" {
return true
}
return false
}
// isJump returns whether the string op is jump(i)
func isJump(op string) bool {
return op == "jumpi" || op == "jump"
}
// toBinary converts text to a vm.OpCode
func toBinary(text string) vm.OpCode {
if isPush(text) {
text = "push1"
}
return vm.StringToOp(strings.ToUpper(text))
}
type compileError struct {
got string
want string
lineno int
}
func (err compileError) Error() string {
return fmt.Sprintf("%d syntax error: unexpected %v, expected %v", err.lineno, err.got, err.want)
}
var (
errExpBol = errors.New("expected beginning of line")
errExpElementOrLabel = errors.New("expected beginning of line")
)
func compileErr(c token, got, want string) error {
return compileError{
got: got,
want: want,
lineno: c.lineno,
}
}

36
core/asm/lex_test.go Normal file
View File

@ -0,0 +1,36 @@
// Copyright 2017 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package asm
import "testing"
func lexAll(src string) []token {
ch := Lex("test.asm", []byte(src), false)
var tokens []token
for i := range ch {
tokens = append(tokens, i)
}
return tokens
}
func TestComment(t *testing.T) {
tokens := lexAll(";; this is a comment")
if len(tokens) != 2 { // {new line, EOF}
t.Error("expected no tokens")
}
}

291
core/asm/lexer.go Normal file
View File

@ -0,0 +1,291 @@
// Copyright 2017 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package asm
import (
"fmt"
"os"
"strings"
"unicode"
"unicode/utf8"
)
// stateFn is used through the lifetime of the
// lexer to parse the different values at the
// current state.
type stateFn func(*lexer) stateFn
// token is emitted when the lexer has discovered
// a new parsable token. These are delivered over
// the tokens channels of the lexer
type token struct {
typ tokenType
lineno int
text string
}
// tokenType are the different types the lexer
// is able to parse and return.
type tokenType int
const (
eof tokenType = iota // end of file
lineStart // emitted when a line starts
lineEnd // emitted when a line ends
invalidStatement // any invalid statement
element // any element during element parsing
label // label is emitted when a labal is found
labelDef // label definition is emitted when a new label is found
number // number is emitted when a number is found
stringValue // stringValue is emitted when a string has been found
Numbers = "1234567890" // characters representing any decimal number
HexadecimalNumbers = Numbers + "aAbBcCdDeEfF" // characters representing any hexadecimal
Alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
)
// String implements stringer
func (it tokenType) String() string {
if int(it) > len(stringtokenTypes) {
return "invalid"
}
return stringtokenTypes[it]
}
var stringtokenTypes = []string{
eof: "EOF",
invalidStatement: "invalid statement",
element: "element",
lineEnd: "end of line",
lineStart: "new line",
label: "label",
labelDef: "label definition",
number: "number",
stringValue: "string",
}
// lexer is the basic construct for parsing
// source code and turning them in to tokens.
// Tokens are interpreted by the compiler.
type lexer struct {
input string // input contains the source code of the program
tokens chan token // tokens is used to deliver tokens to the listener
state stateFn // the current state function
lineno int // current line number in the source file
start, pos, width int // positions for lexing and returning value
debug bool // flag for triggering debug output
}
// lex lexes the program by name with the given source. It returns a
// channel on which the tokens are delivered.
func Lex(name string, source []byte, debug bool) <-chan token {
ch := make(chan token)
l := &lexer{
input: string(source),
tokens: ch,
state: lexLine,
debug: debug,
}
go func() {
l.emit(lineStart)
for l.state != nil {
l.state = l.state(l)
}
l.emit(eof)
close(l.tokens)
}()
return ch
}
// next returns the next rune in the program's source.
func (l *lexer) next() (rune rune) {
if l.pos >= len(l.input) {
l.width = 0
return 0
}
rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width
return rune
}
// backup backsup the last parsed element (multi-character)
func (l *lexer) backup() {
l.pos -= l.width
}
// peek returns the next rune but does not advance the seeker
func (l *lexer) peek() rune {
r := l.next()
l.backup()
return r
}
// ignore advances the seeker and ignores the value
func (l *lexer) ignore() {
l.start = l.pos
}
// Accepts checks whether the given input matches the next rune
func (l *lexer) accept(valid string) bool {
if strings.IndexRune(valid, l.next()) >= 0 {
return true
}
l.backup()
return false
}
// acceptRun will continue to advance the seeker until valid
// can no longer be met.
func (l *lexer) acceptRun(valid string) {
for strings.IndexRune(valid, l.next()) >= 0 {
}
l.backup()
}
// acceptRunUntil is the inverse of acceptRun and will continue
// to advance the seeker until the rune has been found.
func (l *lexer) acceptRunUntil(until rune) bool {
// Continues running until a rune is found
for i := l.next(); strings.IndexRune(string(until), i) == -1; i = l.next() {
if i == 0 {
return false
}
}
return true
}
// blob returns the current value
func (l *lexer) blob() string {
return l.input[l.start:l.pos]
}
// Emits a new token on to token channel for processing
func (l *lexer) emit(t tokenType) {
token := token{t, l.lineno, l.blob()}
if l.debug {
fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text)
}
l.tokens <- token
l.start = l.pos
}
// lexLine is state function for lexing lines
func lexLine(l *lexer) stateFn {
for {
switch r := l.next(); {
case r == '\n':
l.emit(lineEnd)
l.ignore()
l.lineno++
l.emit(lineStart)
case r == ';' && l.peek() == ';':
return lexComment
case isSpace(r):
l.ignore()
case isAlphaNumeric(r) || r == '_':
return lexElement
case isNumber(r):
return lexNumber
case r == '@':
l.ignore()
return lexLabel
case r == '"':
return lexInsideString
default:
return nil
}
}
}
// lexComment parses the current position until the end
// of the line and discards the text.
func lexComment(l *lexer) stateFn {
l.acceptRunUntil('\n')
l.ignore()
return lexLine
}
// lexLabel parses the current label, emits and returns
// the lex text state function to advance the parsing
// process.
func lexLabel(l *lexer) stateFn {
l.acceptRun(Alpha + "_")
l.emit(label)
return lexLine
}
// lexInsideString lexes the inside of a string until
// until the state function finds the closing quote.
// It returns the lex text state function.
func lexInsideString(l *lexer) stateFn {
if l.acceptRunUntil('"') {
l.emit(stringValue)
}
return lexLine
}
func lexNumber(l *lexer) stateFn {
acceptance := Numbers
if l.accept("0") && l.accept("xX") {
acceptance = HexadecimalNumbers
}
l.acceptRun(acceptance)
l.emit(number)
return lexLine
}
func lexElement(l *lexer) stateFn {
l.acceptRun(Alpha + "_" + Numbers)
if l.peek() == ':' {
l.emit(labelDef)
l.accept(":")
l.ignore()
} else {
l.emit(element)
}
return lexLine
}
func isAlphaNumeric(t rune) bool {
return unicode.IsLetter(t)
}
func isSpace(t rune) bool {
return unicode.IsSpace(t)
}
func isNumber(t rune) bool {
return unicode.IsNumber(t)
}