Created
December 2, 2014 19:18
-
-
Save wwkeyboard/d1871bf2b0fb3fb928d0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
/* | |
* TODOs: | |
* - Should pass the rune into the stateFn, to avoid all of the backups | |
*/ | |
import ( | |
"fmt" | |
"io/ioutil" | |
"strings" | |
"unicode" | |
"unicode/utf8" | |
) | |
type lexer struct { | |
input string | |
pos Pos | |
start Pos | |
width Pos | |
items chan item | |
state stateFn | |
} | |
type item struct { | |
typ itemType | |
pos Pos | |
val string | |
} | |
type Pos int | |
type itemType int | |
const ( | |
itemError itemType = iota // 0 | |
itemText // 1 | |
itemEOF // 2 | |
itemSpace // 3 | |
itemIdentifier // 4 | |
itemClass // 5 | |
itemDef // 6 | |
itemEnd // 7 | |
itemNewline // 8 | |
) | |
// not sure why this is a var an not a const | |
var keywords = map[string]itemType{ | |
"class": itemClass, | |
"def": itemDef, | |
"end": itemEnd, | |
} | |
const eof = -1 | |
const lineComment = "//" | |
type stateFn func(*lexer) stateFn | |
func lex(input string) *lexer { | |
l := &lexer{ | |
input: input, | |
items: make(chan item, 10), | |
} | |
go l.run() | |
return l | |
} | |
func (l *lexer) run() { | |
fmt.Printf("running - -\n") | |
for l.state = lexText; l.state != nil; { | |
l.state = l.state(l) | |
} | |
} | |
func (l *lexer) emit(t itemType) { | |
l.items <- item{t, l.start, l.input[l.start:l.pos]} | |
l.start = l.pos | |
} | |
func (l *lexer) close() { | |
close(l.items) | |
} | |
func (l *lexer) ignore() { | |
l.start = l.pos | |
} | |
func (l *lexer) next() rune { | |
if int(l.pos) >= len(l.input) { | |
l.width = 0 | |
return eof | |
} | |
r, w := utf8.DecodeRuneInString(l.input[l.pos:]) | |
l.width = Pos(w) | |
l.pos += l.width | |
return r | |
} | |
func (l *lexer) peek() rune { | |
r := l.next() | |
l.backup() | |
return r | |
} | |
func (l *lexer) backup() { | |
l.pos -= l.width | |
} | |
//////////////////////////////////////// | |
// type of rune conditions | |
func isAlphaNumeric(r rune) bool { | |
return r == '-' || r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) | |
} | |
func isNewline(r rune) bool { | |
return r == '\n' | |
} | |
func isSpace(r rune) bool { | |
return r == ' ' || r == '\t' | |
} | |
// screw heredoc, I'll deal with that later | |
func isStringDelim(r rune) bool { | |
return r == '\'' || r == '"' | |
} | |
//////////////////////////////////////// | |
// The main lexer, the center of this statemachine | |
func lexText(l *lexer) stateFn { | |
switch r := l.next(); { | |
case isSpace(r): | |
return lexSpace | |
case isAlphaNumeric(r): | |
l.backup() | |
return lexIdentifier | |
case strings.HasPrefix(l.input[l.pos:], lineComment): | |
return lexLineComment | |
case isNewline(r): | |
return lexNewline | |
case isStringDelim(r): | |
l.backup() | |
return lexString | |
} | |
// inform the channel we're done | |
l.emit(itemEOF) | |
l.close() | |
return nil | |
} | |
func lexLineComment(l *lexer) stateFn { | |
l.pos += Pos(len(lineComment)) | |
i := strings.Index(l.input[l.pos:], "\n") | |
// if -1 it's the last line of the program and we don't care | |
l.pos += Pos(i + len("\n")) | |
return lexText | |
} | |
func lexSpace(l *lexer) stateFn { | |
for isSpace(l.peek()) { | |
l.next() | |
} | |
l.emit(itemSpace) | |
return lexText | |
} | |
// takes a stateFn so we can eat the newline and stay in context | |
func lexNewline(l *lexer) stateFn { | |
for isNewline(l.peek()) { | |
l.next() | |
} | |
l.emit(itemNewline) | |
return lexText | |
} | |
func lexIdentifier(l *lexer) stateFn { | |
// yuck, this Loop is because of the nested for>switch | |
Loop: | |
for { | |
switch r := l.next(); { | |
case isAlphaNumeric(r): | |
// absorb | |
default: | |
l.backup() | |
word := l.input[l.start:l.pos] | |
switch { | |
case keywords[word] > 0: | |
l.emit(keywords[word]) | |
default: | |
l.emit(itemIdentifier) | |
} | |
break Loop | |
} | |
} | |
return lexText | |
} | |
func lexString(l *lexer) stateFn { | |
switch r := l.next(); { | |
case r == '\'': | |
return lexSingleQuoteString | |
// case r == '"': | |
// return lexDoubleQuoteString | |
} | |
} | |
func lexSingleQuoteString(l *lexer) stateFn { | |
for !isStringDelim(l.peek()) { | |
l.next() | |
} | |
l.emit(itemString) | |
return lexText | |
} | |
//////////////////////////////////////// | |
// usage | |
func main() { | |
file := loadFile("test.rb") | |
lexer := lex(file) | |
// go func() { | |
for item := range lexer.items { | |
fmt.Printf("%v, %v\n", item.val, item.typ) | |
} | |
// }() | |
fmt.Printf("fin") | |
// for _,token := range tokens { | |
// fmt.Printf("%T, \t %v\n", token, token) | |
// } | |
} | |
func loadFile(filename string) (string) { | |
body, err := ioutil.ReadFile(filename) | |
if err != nil { | |
fmt.Printf("Error %s", err) | |
} | |
return string(body) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment