Skip to content

Instantly share code, notes, and snippets.

@wwkeyboard
Created December 2, 2014 19:18
Show Gist options
  • Save wwkeyboard/d1871bf2b0fb3fb928d0 to your computer and use it in GitHub Desktop.
Save wwkeyboard/d1871bf2b0fb3fb928d0 to your computer and use it in GitHub Desktop.
package main
/*
* TODOs:
* - Should pass the rune into the stateFn, to avoid all of the backups
*/
import (
"fmt"
"io/ioutil"
"strings"
"unicode"
"unicode/utf8"
)
type lexer struct {
input string
pos Pos
start Pos
width Pos
items chan item
state stateFn
}
type item struct {
typ itemType
pos Pos
val string
}
type Pos int
type itemType int
const (
itemError itemType = iota // 0
itemText // 1
itemEOF // 2
itemSpace // 3
itemIdentifier // 4
itemClass // 5
itemDef // 6
itemEnd // 7
itemNewline // 8
)
// not sure why this is a var an not a const
var keywords = map[string]itemType{
"class": itemClass,
"def": itemDef,
"end": itemEnd,
}
const eof = -1
const lineComment = "//"
type stateFn func(*lexer) stateFn
func lex(input string) *lexer {
l := &lexer{
input: input,
items: make(chan item, 10),
}
go l.run()
return l
}
func (l *lexer) run() {
fmt.Printf("running - -\n")
for l.state = lexText; l.state != nil; {
l.state = l.state(l)
}
}
func (l *lexer) emit(t itemType) {
l.items <- item{t, l.start, l.input[l.start:l.pos]}
l.start = l.pos
}
func (l *lexer) close() {
close(l.items)
}
func (l *lexer) ignore() {
l.start = l.pos
}
func (l *lexer) next() rune {
if int(l.pos) >= len(l.input) {
l.width = 0
return eof
}
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
l.width = Pos(w)
l.pos += l.width
return r
}
func (l *lexer) peek() rune {
r := l.next()
l.backup()
return r
}
func (l *lexer) backup() {
l.pos -= l.width
}
////////////////////////////////////////
// type of rune conditions
func isAlphaNumeric(r rune) bool {
return r == '-' || r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
}
func isNewline(r rune) bool {
return r == '\n'
}
func isSpace(r rune) bool {
return r == ' ' || r == '\t'
}
// screw heredoc, I'll deal with that later
func isStringDelim(r rune) bool {
return r == '\'' || r == '"'
}
////////////////////////////////////////
// The main lexer, the center of this statemachine
func lexText(l *lexer) stateFn {
switch r := l.next(); {
case isSpace(r):
return lexSpace
case isAlphaNumeric(r):
l.backup()
return lexIdentifier
case strings.HasPrefix(l.input[l.pos:], lineComment):
return lexLineComment
case isNewline(r):
return lexNewline
case isStringDelim(r):
l.backup()
return lexString
}
// inform the channel we're done
l.emit(itemEOF)
l.close()
return nil
}
func lexLineComment(l *lexer) stateFn {
l.pos += Pos(len(lineComment))
i := strings.Index(l.input[l.pos:], "\n")
// if -1 it's the last line of the program and we don't care
l.pos += Pos(i + len("\n"))
return lexText
}
func lexSpace(l *lexer) stateFn {
for isSpace(l.peek()) {
l.next()
}
l.emit(itemSpace)
return lexText
}
// takes a stateFn so we can eat the newline and stay in context
func lexNewline(l *lexer) stateFn {
for isNewline(l.peek()) {
l.next()
}
l.emit(itemNewline)
return lexText
}
func lexIdentifier(l *lexer) stateFn {
// yuck, this Loop is because of the nested for>switch
Loop:
for {
switch r := l.next(); {
case isAlphaNumeric(r):
// absorb
default:
l.backup()
word := l.input[l.start:l.pos]
switch {
case keywords[word] > 0:
l.emit(keywords[word])
default:
l.emit(itemIdentifier)
}
break Loop
}
}
return lexText
}
func lexString(l *lexer) stateFn {
switch r := l.next(); {
case r == '\'':
return lexSingleQuoteString
// case r == '"':
// return lexDoubleQuoteString
}
}
func lexSingleQuoteString(l *lexer) stateFn {
for !isStringDelim(l.peek()) {
l.next()
}
l.emit(itemString)
return lexText
}
////////////////////////////////////////
// usage
func main() {
file := loadFile("test.rb")
lexer := lex(file)
// go func() {
for item := range lexer.items {
fmt.Printf("%v, %v\n", item.val, item.typ)
}
// }()
fmt.Printf("fin")
// for _,token := range tokens {
// fmt.Printf("%T, \t %v\n", token, token)
// }
}
func loadFile(filename string) (string) {
body, err := ioutil.ReadFile(filename)
if err != nil {
fmt.Printf("Error %s", err)
}
return string(body)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment