Created
October 10, 2018 08:28
-
-
Save tzmfreedom/19f54dd94f8b0b3084348fd240611ce5 to your computer and use it in GitHub Desktop.
Sample Lexer by golang
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"regexp" | |
) | |
func main() { | |
l := &Lexer{input: "=+*-/===abc=a1"} | |
tokens := []Token{} | |
for { | |
token := l.NextToken() | |
if token.Type() == EOF_TOKEN { | |
break | |
} | |
tokens = append(tokens, token) | |
} | |
// pp.Println(tokens) | |
} | |
type Lexer struct { | |
input string | |
currentPosition int | |
} | |
func (l *Lexer) readChar() string { | |
if len(l.input) <= l.currentPosition { | |
return "" | |
} | |
char := l.input[l.currentPosition] | |
l.currentPosition++ | |
return string(char) | |
} | |
func (l *Lexer) peekChar() string { | |
if len(l.input) <= l.currentPosition { | |
return "" | |
} | |
char := l.input[l.currentPosition] | |
return string(char) | |
} | |
func (l *Lexer) NextToken() Token { | |
char := l.readChar() | |
if char == "" { | |
return &EOF{} | |
} | |
switch char { | |
case "=": | |
next := l.peekChar() | |
if next == "=" { | |
l.readChar() | |
return &OpToken{"=="} | |
} | |
return &OpToken{char} | |
case "+", "-", "*", "/": | |
return &OpToken{char} | |
default: | |
if l.isIdentifierChar(char) { | |
value := l.getIdentifier(char) | |
return &Identifier{value} | |
} | |
} | |
return nil | |
} | |
func (l *Lexer) isIdentifierChar(char string) bool { | |
r := regexp.MustCompile(`[a-zA-Z]`) | |
return r.MatchString(char) | |
} | |
func (l *Lexer) getIdentifier(init string) string { | |
result := init | |
for { | |
char := l.peekChar() | |
r := regexp.MustCompile(`[a-zA-Z0-9]`) | |
if r.MatchString(char) { | |
result += l.readChar() | |
} else { | |
break | |
} | |
} | |
return result | |
} | |
const ( | |
OP_TOKEN = iota | |
EOF_TOKEN | |
IDENT_TOKEN | |
) | |
type Token interface { | |
Type() int | |
} | |
type OpToken struct { | |
Value string | |
} | |
func (t *OpToken) Type() int { | |
return OP_TOKEN | |
} | |
type EOF struct{} | |
func (t *EOF) Type() int { | |
return EOF_TOKEN | |
} | |
type Identifier struct { | |
Value string | |
} | |
func (t *Identifier) Type() int { | |
return IDENT_TOKEN | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment