292 lines
		
	
	
		
			7.1 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
		
		
			
		
	
	
			292 lines
		
	
	
		
			7.1 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
|   | // Copyright 2017 The go-ethereum Authors | ||
|  | // This file is part of the go-ethereum library. | ||
|  | // | ||
|  | // The go-ethereum library is free software: you can redistribute it and/or modify | ||
|  | // it under the terms of the GNU Lesser General Public License as published by | ||
|  | // the Free Software Foundation, either version 3 of the License, or | ||
|  | // (at your option) any later version. | ||
|  | // | ||
|  | // The go-ethereum library is distributed in the hope that it will be useful, | ||
|  | // but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
|  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
|  | // GNU Lesser General Public License for more details. | ||
|  | // | ||
|  | // You should have received a copy of the GNU Lesser General Public License | ||
|  | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. | ||
|  | 
 | ||
|  | package asm | ||
|  | 
 | ||
|  | import ( | ||
|  | 	"fmt" | ||
|  | 	"os" | ||
|  | 	"strings" | ||
|  | 	"unicode" | ||
|  | 	"unicode/utf8" | ||
|  | ) | ||
|  | 
 | ||
|  | // stateFn is used through the lifetime of the | ||
|  | // lexer to parse the different values at the | ||
|  | // current state. | ||
|  | type stateFn func(*lexer) stateFn | ||
|  | 
 | ||
|  | // token is emitted when the lexer has discovered | ||
|  | // a new parsable token. These are delivered over | ||
|  | // the tokens channels of the lexer | ||
|  | type token struct { | ||
|  | 	typ    tokenType | ||
|  | 	lineno int | ||
|  | 	text   string | ||
|  | } | ||
|  | 
 | ||
|  | // tokenType are the different types the lexer | ||
|  | // is able to parse and return. | ||
|  | type tokenType int | ||
|  | 
 | ||
|  | const ( | ||
|  | 	eof              tokenType = iota // end of file | ||
|  | 	lineStart                         // emitted when a line starts | ||
|  | 	lineEnd                           // emitted when a line ends | ||
|  | 	invalidStatement                  // any invalid statement | ||
|  | 	element                           // any element during element parsing | ||
|  | 	label                             // label is emitted when a labal is found | ||
|  | 	labelDef                          // label definition is emitted when a new label is found | ||
|  | 	number                            // number is emitted when a number is found | ||
|  | 	stringValue                       // stringValue is emitted when a string has been found | ||
|  | 
 | ||
|  | 	Numbers            = "1234567890"                                           // characters representing any decimal number | ||
|  | 	HexadecimalNumbers = Numbers + "aAbBcCdDeEfF"                               // characters representing any hexadecimal | ||
|  | 	Alpha              = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric | ||
|  | ) | ||
|  | 
 | ||
|  | // String implements stringer | ||
|  | func (it tokenType) String() string { | ||
|  | 	if int(it) > len(stringtokenTypes) { | ||
|  | 		return "invalid" | ||
|  | 	} | ||
|  | 	return stringtokenTypes[it] | ||
|  | } | ||
|  | 
 | ||
|  | var stringtokenTypes = []string{ | ||
|  | 	eof:              "EOF", | ||
|  | 	invalidStatement: "invalid statement", | ||
|  | 	element:          "element", | ||
|  | 	lineEnd:          "end of line", | ||
|  | 	lineStart:        "new line", | ||
|  | 	label:            "label", | ||
|  | 	labelDef:         "label definition", | ||
|  | 	number:           "number", | ||
|  | 	stringValue:      "string", | ||
|  | } | ||
|  | 
 | ||
|  | // lexer is the basic construct for parsing | ||
|  | // source code and turning them in to tokens. | ||
|  | // Tokens are interpreted by the compiler. | ||
|  | type lexer struct { | ||
|  | 	input string // input contains the source code of the program | ||
|  | 
 | ||
|  | 	tokens chan token // tokens is used to deliver tokens to the listener | ||
|  | 	state  stateFn    // the current state function | ||
|  | 
 | ||
|  | 	lineno            int // current line number in the source file | ||
|  | 	start, pos, width int // positions for lexing and returning value | ||
|  | 
 | ||
|  | 	debug bool // flag for triggering debug output | ||
|  | } | ||
|  | 
 | ||
|  | // lex lexes the program by name with the given source. It returns a | ||
|  | // channel on which the tokens are delivered. | ||
|  | func Lex(name string, source []byte, debug bool) <-chan token { | ||
|  | 	ch := make(chan token) | ||
|  | 	l := &lexer{ | ||
|  | 		input:  string(source), | ||
|  | 		tokens: ch, | ||
|  | 		state:  lexLine, | ||
|  | 		debug:  debug, | ||
|  | 	} | ||
|  | 	go func() { | ||
|  | 		l.emit(lineStart) | ||
|  | 		for l.state != nil { | ||
|  | 			l.state = l.state(l) | ||
|  | 		} | ||
|  | 		l.emit(eof) | ||
|  | 		close(l.tokens) | ||
|  | 	}() | ||
|  | 
 | ||
|  | 	return ch | ||
|  | } | ||
|  | 
 | ||
|  | // next returns the next rune in the program's source. | ||
|  | func (l *lexer) next() (rune rune) { | ||
|  | 	if l.pos >= len(l.input) { | ||
|  | 		l.width = 0 | ||
|  | 		return 0 | ||
|  | 	} | ||
|  | 	rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) | ||
|  | 	l.pos += l.width | ||
|  | 	return rune | ||
|  | } | ||
|  | 
 | ||
|  | // backup backsup the last parsed element (multi-character) | ||
|  | func (l *lexer) backup() { | ||
|  | 	l.pos -= l.width | ||
|  | } | ||
|  | 
 | ||
|  | // peek returns the next rune but does not advance the seeker | ||
|  | func (l *lexer) peek() rune { | ||
|  | 	r := l.next() | ||
|  | 	l.backup() | ||
|  | 	return r | ||
|  | } | ||
|  | 
 | ||
|  | // ignore advances the seeker and ignores the value | ||
|  | func (l *lexer) ignore() { | ||
|  | 	l.start = l.pos | ||
|  | } | ||
|  | 
 | ||
|  | // Accepts checks whether the given input matches the next rune | ||
|  | func (l *lexer) accept(valid string) bool { | ||
|  | 	if strings.IndexRune(valid, l.next()) >= 0 { | ||
|  | 		return true | ||
|  | 	} | ||
|  | 
 | ||
|  | 	l.backup() | ||
|  | 
 | ||
|  | 	return false | ||
|  | } | ||
|  | 
 | ||
|  | // acceptRun will continue to advance the seeker until valid | ||
|  | // can no longer be met. | ||
|  | func (l *lexer) acceptRun(valid string) { | ||
|  | 	for strings.IndexRune(valid, l.next()) >= 0 { | ||
|  | 	} | ||
|  | 	l.backup() | ||
|  | } | ||
|  | 
 | ||
|  | // acceptRunUntil is the inverse of acceptRun and will continue | ||
|  | // to advance the seeker until the rune has been found. | ||
|  | func (l *lexer) acceptRunUntil(until rune) bool { | ||
|  | 	// Continues running until a rune is found | ||
|  | 	for i := l.next(); strings.IndexRune(string(until), i) == -1; i = l.next() { | ||
|  | 		if i == 0 { | ||
|  | 			return false | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return true | ||
|  | } | ||
|  | 
 | ||
|  | // blob returns the current value | ||
|  | func (l *lexer) blob() string { | ||
|  | 	return l.input[l.start:l.pos] | ||
|  | } | ||
|  | 
 | ||
|  | // Emits a new token on to token channel for processing | ||
|  | func (l *lexer) emit(t tokenType) { | ||
|  | 	token := token{t, l.lineno, l.blob()} | ||
|  | 
 | ||
|  | 	if l.debug { | ||
|  | 		fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text) | ||
|  | 	} | ||
|  | 
 | ||
|  | 	l.tokens <- token | ||
|  | 	l.start = l.pos | ||
|  | } | ||
|  | 
 | ||
|  | // lexLine is state function for lexing lines | ||
|  | func lexLine(l *lexer) stateFn { | ||
|  | 	for { | ||
|  | 		switch r := l.next(); { | ||
|  | 		case r == '\n': | ||
|  | 			l.emit(lineEnd) | ||
|  | 			l.ignore() | ||
|  | 			l.lineno++ | ||
|  | 
 | ||
|  | 			l.emit(lineStart) | ||
|  | 		case r == ';' && l.peek() == ';': | ||
|  | 			return lexComment | ||
|  | 		case isSpace(r): | ||
|  | 			l.ignore() | ||
|  | 		case isAlphaNumeric(r) || r == '_': | ||
|  | 			return lexElement | ||
|  | 		case isNumber(r): | ||
|  | 			return lexNumber | ||
|  | 		case r == '@': | ||
|  | 			l.ignore() | ||
|  | 			return lexLabel | ||
|  | 		case r == '"': | ||
|  | 			return lexInsideString | ||
|  | 		default: | ||
|  | 			return nil | ||
|  | 		} | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | // lexComment parses the current position until the end | ||
|  | // of the line and discards the text. | ||
|  | func lexComment(l *lexer) stateFn { | ||
|  | 	l.acceptRunUntil('\n') | ||
|  | 	l.ignore() | ||
|  | 
 | ||
|  | 	return lexLine | ||
|  | } | ||
|  | 
 | ||
|  | // lexLabel parses the current label, emits and returns | ||
|  | // the lex text state function to advance the parsing | ||
|  | // process. | ||
|  | func lexLabel(l *lexer) stateFn { | ||
|  | 	l.acceptRun(Alpha + "_") | ||
|  | 
 | ||
|  | 	l.emit(label) | ||
|  | 
 | ||
|  | 	return lexLine | ||
|  | } | ||
|  | 
 | ||
|  | // lexInsideString lexes the inside of a string until | ||
|  | // until the state function finds the closing quote. | ||
|  | // It returns the lex text state function. | ||
|  | func lexInsideString(l *lexer) stateFn { | ||
|  | 	if l.acceptRunUntil('"') { | ||
|  | 		l.emit(stringValue) | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return lexLine | ||
|  | } | ||
|  | 
 | ||
|  | func lexNumber(l *lexer) stateFn { | ||
|  | 	acceptance := Numbers | ||
|  | 	if l.accept("0") && l.accept("xX") { | ||
|  | 		acceptance = HexadecimalNumbers | ||
|  | 	} | ||
|  | 	l.acceptRun(acceptance) | ||
|  | 
 | ||
|  | 	l.emit(number) | ||
|  | 
 | ||
|  | 	return lexLine | ||
|  | } | ||
|  | 
 | ||
|  | func lexElement(l *lexer) stateFn { | ||
|  | 	l.acceptRun(Alpha + "_" + Numbers) | ||
|  | 
 | ||
|  | 	if l.peek() == ':' { | ||
|  | 		l.emit(labelDef) | ||
|  | 
 | ||
|  | 		l.accept(":") | ||
|  | 		l.ignore() | ||
|  | 	} else { | ||
|  | 		l.emit(element) | ||
|  | 	} | ||
|  | 	return lexLine | ||
|  | } | ||
|  | 
 | ||
|  | func isAlphaNumeric(t rune) bool { | ||
|  | 	return unicode.IsLetter(t) | ||
|  | } | ||
|  | 
 | ||
|  | func isSpace(t rune) bool { | ||
|  | 	return unicode.IsSpace(t) | ||
|  | } | ||
|  | 
 | ||
|  | func isNumber(t rune) bool { | ||
|  | 	return unicode.IsNumber(t) | ||
|  | } |