292 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			292 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2017 The go-ethereum Authors
 | |
| // This file is part of the go-ethereum library.
 | |
| //
 | |
| // The go-ethereum library is free software: you can redistribute it and/or modify
 | |
| // it under the terms of the GNU Lesser General Public License as published by
 | |
| // the Free Software Foundation, either version 3 of the License, or
 | |
| // (at your option) any later version.
 | |
| //
 | |
| // The go-ethereum library is distributed in the hope that it will be useful,
 | |
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 | |
| // GNU Lesser General Public License for more details.
 | |
| //
 | |
| // You should have received a copy of the GNU Lesser General Public License
 | |
| // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 | |
| 
 | |
| package asm
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 	"os"
 | |
| 	"strings"
 | |
| 	"unicode"
 | |
| 	"unicode/utf8"
 | |
| )
 | |
| 
 | |
| // stateFn is used through the lifetime of the
 | |
| // lexer to parse the different values at the
 | |
| // current state.
 | |
| type stateFn func(*lexer) stateFn
 | |
| 
 | |
| // token is emitted when the lexer has discovered
 | |
| // a new parsable token. These are delivered over
 | |
| // the tokens channels of the lexer
 | |
| type token struct {
 | |
| 	typ    tokenType
 | |
| 	lineno int
 | |
| 	text   string
 | |
| }
 | |
| 
 | |
| // tokenType are the different types the lexer
 | |
| // is able to parse and return.
 | |
| type tokenType int
 | |
| 
 | |
| const (
 | |
| 	eof              tokenType = iota // end of file
 | |
| 	lineStart                         // emitted when a line starts
 | |
| 	lineEnd                           // emitted when a line ends
 | |
| 	invalidStatement                  // any invalid statement
 | |
| 	element                           // any element during element parsing
 | |
| 	label                             // label is emitted when a label is found
 | |
| 	labelDef                          // label definition is emitted when a new label is found
 | |
| 	number                            // number is emitted when a number is found
 | |
| 	stringValue                       // stringValue is emitted when a string has been found
 | |
| 
 | |
| 	Numbers            = "1234567890"                                           // characters representing any decimal number
 | |
| 	HexadecimalNumbers = Numbers + "aAbBcCdDeEfF"                               // characters representing any hexadecimal
 | |
| 	Alpha              = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
 | |
| )
 | |
| 
 | |
| // String implements stringer
 | |
| func (it tokenType) String() string {
 | |
| 	if int(it) > len(stringtokenTypes) {
 | |
| 		return "invalid"
 | |
| 	}
 | |
| 	return stringtokenTypes[it]
 | |
| }
 | |
| 
 | |
| var stringtokenTypes = []string{
 | |
| 	eof:              "EOF",
 | |
| 	invalidStatement: "invalid statement",
 | |
| 	element:          "element",
 | |
| 	lineEnd:          "end of line",
 | |
| 	lineStart:        "new line",
 | |
| 	label:            "label",
 | |
| 	labelDef:         "label definition",
 | |
| 	number:           "number",
 | |
| 	stringValue:      "string",
 | |
| }
 | |
| 
 | |
| // lexer is the basic construct for parsing
 | |
| // source code and turning them in to tokens.
 | |
| // Tokens are interpreted by the compiler.
 | |
| type lexer struct {
 | |
| 	input string // input contains the source code of the program
 | |
| 
 | |
| 	tokens chan token // tokens is used to deliver tokens to the listener
 | |
| 	state  stateFn    // the current state function
 | |
| 
 | |
| 	lineno            int // current line number in the source file
 | |
| 	start, pos, width int // positions for lexing and returning value
 | |
| 
 | |
| 	debug bool // flag for triggering debug output
 | |
| }
 | |
| 
 | |
| // lex lexes the program by name with the given source. It returns a
 | |
| // channel on which the tokens are delivered.
 | |
| func Lex(source []byte, debug bool) <-chan token {
 | |
| 	ch := make(chan token)
 | |
| 	l := &lexer{
 | |
| 		input:  string(source),
 | |
| 		tokens: ch,
 | |
| 		state:  lexLine,
 | |
| 		debug:  debug,
 | |
| 	}
 | |
| 	go func() {
 | |
| 		l.emit(lineStart)
 | |
| 		for l.state != nil {
 | |
| 			l.state = l.state(l)
 | |
| 		}
 | |
| 		l.emit(eof)
 | |
| 		close(l.tokens)
 | |
| 	}()
 | |
| 
 | |
| 	return ch
 | |
| }
 | |
| 
 | |
| // next returns the next rune in the program's source.
 | |
| func (l *lexer) next() (rune rune) {
 | |
| 	if l.pos >= len(l.input) {
 | |
| 		l.width = 0
 | |
| 		return 0
 | |
| 	}
 | |
| 	rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
 | |
| 	l.pos += l.width
 | |
| 	return rune
 | |
| }
 | |
| 
 | |
| // backup backsup the last parsed element (multi-character)
 | |
| func (l *lexer) backup() {
 | |
| 	l.pos -= l.width
 | |
| }
 | |
| 
 | |
| // peek returns the next rune but does not advance the seeker
 | |
| func (l *lexer) peek() rune {
 | |
| 	r := l.next()
 | |
| 	l.backup()
 | |
| 	return r
 | |
| }
 | |
| 
 | |
| // ignore advances the seeker and ignores the value
 | |
| func (l *lexer) ignore() {
 | |
| 	l.start = l.pos
 | |
| }
 | |
| 
 | |
| // Accepts checks whether the given input matches the next rune
 | |
| func (l *lexer) accept(valid string) bool {
 | |
| 	if strings.ContainsRune(valid, l.next()) {
 | |
| 		return true
 | |
| 	}
 | |
| 
 | |
| 	l.backup()
 | |
| 
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| // acceptRun will continue to advance the seeker until valid
 | |
| // can no longer be met.
 | |
| func (l *lexer) acceptRun(valid string) {
 | |
| 	for strings.ContainsRune(valid, l.next()) {
 | |
| 	}
 | |
| 	l.backup()
 | |
| }
 | |
| 
 | |
| // acceptRunUntil is the inverse of acceptRun and will continue
 | |
| // to advance the seeker until the rune has been found.
 | |
| func (l *lexer) acceptRunUntil(until rune) bool {
 | |
| 	// Continues running until a rune is found
 | |
| 	for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() {
 | |
| 		if i == 0 {
 | |
| 			return false
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return true
 | |
| }
 | |
| 
 | |
| // blob returns the current value
 | |
| func (l *lexer) blob() string {
 | |
| 	return l.input[l.start:l.pos]
 | |
| }
 | |
| 
 | |
| // Emits a new token on to token channel for processing
 | |
| func (l *lexer) emit(t tokenType) {
 | |
| 	token := token{t, l.lineno, l.blob()}
 | |
| 
 | |
| 	if l.debug {
 | |
| 		fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text)
 | |
| 	}
 | |
| 
 | |
| 	l.tokens <- token
 | |
| 	l.start = l.pos
 | |
| }
 | |
| 
 | |
| // lexLine is state function for lexing lines
 | |
| func lexLine(l *lexer) stateFn {
 | |
| 	for {
 | |
| 		switch r := l.next(); {
 | |
| 		case r == '\n':
 | |
| 			l.emit(lineEnd)
 | |
| 			l.ignore()
 | |
| 			l.lineno++
 | |
| 
 | |
| 			l.emit(lineStart)
 | |
| 		case r == ';' && l.peek() == ';':
 | |
| 			return lexComment
 | |
| 		case isSpace(r):
 | |
| 			l.ignore()
 | |
| 		case isLetter(r) || r == '_':
 | |
| 			return lexElement
 | |
| 		case isNumber(r):
 | |
| 			return lexNumber
 | |
| 		case r == '@':
 | |
| 			l.ignore()
 | |
| 			return lexLabel
 | |
| 		case r == '"':
 | |
| 			return lexInsideString
 | |
| 		default:
 | |
| 			return nil
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // lexComment parses the current position until the end
 | |
| // of the line and discards the text.
 | |
| func lexComment(l *lexer) stateFn {
 | |
| 	l.acceptRunUntil('\n')
 | |
| 	l.ignore()
 | |
| 
 | |
| 	return lexLine
 | |
| }
 | |
| 
 | |
| // lexLabel parses the current label, emits and returns
 | |
| // the lex text state function to advance the parsing
 | |
| // process.
 | |
| func lexLabel(l *lexer) stateFn {
 | |
| 	l.acceptRun(Alpha + "_" + Numbers)
 | |
| 
 | |
| 	l.emit(label)
 | |
| 
 | |
| 	return lexLine
 | |
| }
 | |
| 
 | |
| // lexInsideString lexes the inside of a string until
 | |
| // the state function finds the closing quote.
 | |
| // It returns the lex text state function.
 | |
| func lexInsideString(l *lexer) stateFn {
 | |
| 	if l.acceptRunUntil('"') {
 | |
| 		l.emit(stringValue)
 | |
| 	}
 | |
| 
 | |
| 	return lexLine
 | |
| }
 | |
| 
 | |
| func lexNumber(l *lexer) stateFn {
 | |
| 	acceptance := Numbers
 | |
| 	if l.accept("0") || l.accept("xX") {
 | |
| 		acceptance = HexadecimalNumbers
 | |
| 	}
 | |
| 	l.acceptRun(acceptance)
 | |
| 
 | |
| 	l.emit(number)
 | |
| 
 | |
| 	return lexLine
 | |
| }
 | |
| 
 | |
| func lexElement(l *lexer) stateFn {
 | |
| 	l.acceptRun(Alpha + "_" + Numbers)
 | |
| 
 | |
| 	if l.peek() == ':' {
 | |
| 		l.emit(labelDef)
 | |
| 
 | |
| 		l.accept(":")
 | |
| 		l.ignore()
 | |
| 	} else {
 | |
| 		l.emit(element)
 | |
| 	}
 | |
| 	return lexLine
 | |
| }
 | |
| 
 | |
| func isLetter(t rune) bool {
 | |
| 	return unicode.IsLetter(t)
 | |
| }
 | |
| 
 | |
| func isSpace(t rune) bool {
 | |
| 	return unicode.IsSpace(t)
 | |
| }
 | |
| 
 | |
| func isNumber(t rune) bool {
 | |
| 	return unicode.IsNumber(t)
 | |
| }
 |