Last active
April 18, 2020 16:23
-
-
Save bradford-hamilton/98b8c0ed9f1d50178ce4c735b42b8917 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Package parser TODO | |
package parser | |
import ( | |
"errors" | |
"fmt" | |
"strconv" | |
"strings" | |
"github.com/bradford-hamilton/dora/pkg/ast" | |
"github.com/bradford-hamilton/dora/pkg/lexer" | |
"github.com/bradford-hamilton/dora/pkg/token" | |
) | |
// Parser holds a Lexer, errors, the currentToken, and the peek peekToken (next token). | |
// Parser methods handle iterating through tokens and building and AST. | |
type Parser struct { | |
lexer *lexer.Lexer | |
errors []string | |
currentToken token.Token | |
peekToken token.Token | |
} | |
// New takes a Lexer, creates a Parser with that Lexer, sets the current and | |
// peek tokens, and returns the Parser. | |
func New(l *lexer.Lexer) *Parser { | |
p := &Parser{lexer: l} | |
// Read two tokens, so currentToken and peekToken are both set. | |
p.nextToken() | |
p.nextToken() | |
return p | |
} | |
// ParseProgram parses tokens and creates an AST. It returns the RootNode | |
// which holds a slice of Values (and in turn, the rest of the tree) | |
func (p *Parser) ParseProgram() (ast.RootNode, error) { | |
var rootNode ast.RootNode | |
if p.currentTokenTypeIs(token.LeftBracket) { | |
rootNode.Type = ast.ArrayRoot | |
} | |
val := p.parseValue() | |
if val == nil { | |
p.parseError(fmt.Sprintf( | |
"Error parsing JSON expected a value, got: %v:", | |
p.currentToken.Literal, | |
)) | |
return ast.RootNode{}, errors.New(p.Errors()) | |
} | |
rootNode.RootValue = &val | |
return rootNode, nil | |
} | |
// nextToken sets our current token to the peek token and the peek token to | |
// p.lexer.NextToken() which ends up scanning and returning the next token | |
func (p *Parser) nextToken() { | |
p.currentToken = p.peekToken | |
p.peekToken = p.lexer.NextToken() | |
} | |
func (p *Parser) currentTokenTypeIs(t token.Type) bool { | |
return p.currentToken.Type == t | |
} | |
// parseValue is our dynamic entrypoint to parsing JSON values. All scenarios for | |
// this parser fall under these 3 actions. | |
func (p *Parser) parseValue() ast.Value { | |
switch p.currentToken.Type { | |
case token.LeftBrace: | |
return p.parseJSONObject() | |
case token.LeftBracket: | |
return p.parseJSONArray() | |
default: | |
return p.parseJSONLiteral() | |
} | |
} | |
// parseJSONObject is called when an open left brace `{` token is found | |
func (p *Parser) parseJSONObject() ast.Value { | |
obj := ast.Object{Type: "Object"} | |
objState := ast.ObjStart | |
for !p.currentTokenTypeIs(token.EOF) { | |
switch objState { | |
case ast.ObjStart: | |
if p.currentTokenTypeIs(token.LeftBrace) { | |
objState = ast.ObjOpen | |
obj.Start = p.currentToken.Start | |
p.nextToken() | |
} else { | |
p.parseError(fmt.Sprintf( | |
"Error parsing JSON object Expected `{` token, got: %s", | |
p.currentToken.Literal, | |
)) | |
return nil | |
} | |
case ast.ObjOpen: | |
if p.currentTokenTypeIs(token.RightBrace) { | |
p.nextToken() | |
obj.End = p.currentToken.End | |
return obj | |
} | |
prop := p.parseProperty() | |
obj.Children = append(obj.Children, prop) | |
objState = ast.ObjProperty | |
case ast.ObjProperty: | |
if p.currentTokenTypeIs(token.RightBrace) { | |
p.nextToken() | |
obj.End = p.currentToken.Start | |
return obj | |
} else if p.currentTokenTypeIs(token.Comma) { | |
objState = ast.ObjComma | |
p.nextToken() | |
} else { | |
p.parseError(fmt.Sprintf( | |
"Error parsing property. Expected RightBrace or Comma token, got: %s", | |
p.currentToken.Literal, | |
)) | |
return nil | |
} | |
case ast.ObjComma: | |
prop := p.parseProperty() | |
if prop.Value != nil { | |
obj.Children = append(obj.Children, prop) | |
objState = ast.ObjProperty | |
} | |
} | |
} | |
obj.End = p.currentToken.Start | |
return obj | |
} | |
// parseJSONArray is called when an open left bracket `[` token is found | |
func (p *Parser) parseJSONArray() ast.Value { | |
array := ast.Array{Type: "Array"} | |
arrayState := ast.ArrayStart | |
for !p.currentTokenTypeIs(token.EOF) { | |
switch arrayState { | |
case ast.ArrayStart: | |
if p.currentTokenTypeIs(token.LeftBracket) { | |
array.Start = p.currentToken.Start | |
arrayState = ast.ArrayOpen | |
p.nextToken() | |
} | |
case ast.ArrayOpen: | |
if p.currentTokenTypeIs(token.RightBracket) { | |
array.End = p.currentToken.End | |
p.nextToken() | |
return array | |
} | |
val := p.parseValue() | |
array.Children = append(array.Children, val) | |
arrayState = ast.ArrayValue | |
if p.peekTokenTypeIs(token.RightBracket) { | |
p.nextToken() | |
} | |
case ast.ArrayValue: | |
if p.currentTokenTypeIs(token.RightBracket) { | |
array.End = p.currentToken.End | |
p.nextToken() | |
return array | |
} else if p.currentTokenTypeIs(token.Comma) { | |
arrayState = ast.ArrayComma | |
p.nextToken() | |
} else { | |
p.parseError(fmt.Sprintf( | |
"Error parsing property. Expected RightBrace or Comma token, got: %s", | |
p.currentToken.Literal, | |
)) | |
} | |
case ast.ArrayComma: | |
val := p.parseValue() | |
array.Children = append(array.Children, val) | |
arrayState = ast.ArrayValue | |
} | |
} | |
array.End = p.currentToken.Start | |
return array | |
} | |
// parseJSONLiteral switches on the current token's type, sets the Value on a return val and returns it. | |
func (p *Parser) parseJSONLiteral() ast.Literal { | |
val := ast.Literal{Type: "Literal"} | |
// Regardless of what the current token type is - after it's been assigned, we must consume the token | |
defer p.nextToken() | |
switch p.currentToken.Type { | |
case token.String: | |
val.Value = p.parseString() | |
return val | |
case token.Number: | |
v, _ := strconv.Atoi(p.currentToken.Literal) | |
val.Value = v | |
return val | |
case token.True: | |
val.Value = true | |
return val | |
case token.False: | |
val.Value = false | |
return val | |
default: | |
val.Value = "null" | |
return val | |
} | |
} | |
// parseProperty is used to parse an object property and in doing so handles setting the `key`:`value` pair. | |
func (p *Parser) parseProperty() ast.Property { | |
prop := ast.Property{Type: "Property"} | |
propertyState := ast.PropertyStart | |
for !p.currentTokenTypeIs(token.EOF) { | |
switch propertyState { | |
case ast.PropertyStart: | |
if p.currentTokenTypeIs(token.String) { | |
key := ast.Identifier{ | |
Type: "Identifier", | |
Value: p.parseString(), | |
} | |
prop.Key = key | |
propertyState = ast.PropertyKey | |
p.nextToken() | |
} else { | |
p.parseError(fmt.Sprintf( | |
"Error parsing property start. Expected String token, got: %s", | |
p.currentToken.Literal, | |
)) | |
} | |
case ast.PropertyKey: | |
if p.currentTokenTypeIs(token.Colon) { | |
propertyState = ast.PropertyColon | |
p.nextToken() | |
} else { | |
p.parseError(fmt.Sprintf( | |
"Error parsing property. Expected Colon token, got: %s", | |
p.currentToken.Literal, | |
)) | |
} | |
case ast.PropertyColon: | |
val := p.parseValue() | |
prop.Value = val | |
return prop | |
} | |
} | |
return prop | |
} | |
// TODO: all the tedius ecaping, etc still needs to be applied here | |
func (p *Parser) parseString() string { | |
return p.currentToken.Literal | |
} | |
// expectPeekType checks the next token type against the one passed in. If it matches, | |
// we call p.nextToken() to set us to the expected token and return true. If the expected | |
// type does not match, we add a peek error and return false. | |
func (p *Parser) expectPeekType(t token.Type) bool { | |
if p.peekTokenTypeIs(t) { | |
p.nextToken() | |
return true | |
} | |
p.peekError(t) | |
return false | |
} | |
func (p *Parser) peekTokenTypeIs(t token.Type) bool { | |
return p.peekToken.Type == t | |
} | |
// peekError is a small wrapper to add a peek error to our parser's errors field. | |
func (p *Parser) peekError(t token.Type) { | |
msg := fmt.Sprintf( | |
"Line: %d: Expected next token to be %s, got: %s instead", | |
p.currentToken.Line, | |
t, | |
p.peekToken.Type, | |
) | |
p.errors = append(p.errors, msg) | |
} | |
// parseError is very similar to `peekError`, except it simply takes a string message that | |
// gets appended to the parser's errors | |
func (p *Parser) parseError(msg string) { | |
p.errors = append(p.errors, msg) | |
} | |
// Errors is simply a helper function that returns the parser's errors | |
func (p *Parser) Errors() string { | |
return strings.Join(p.errors, ", ") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment