Last active
January 1, 2019 23:56
-
-
Save zicklag/c2a6060452759ce13864e43135e856f3 to your computer and use it in GitHub Desktop.
A WIP Ink Parser for Haxe.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hxparse.Ruleset; | |
import hxparse.Position; | |
import hxparse.LexerTokenSource; | |
import hxparse.ParserBuilder; | |
import hxparse.RuleBuilder; | |
import hxparse.Lexer; | |
enum LexerToken { | |
// Brackets | |
TParenOpen; | |
TParenClose; | |
TSquareOpen; | |
TSquareClose; | |
TCurlyOpen; | |
TCurlyClose; | |
// Keywords | |
TVar; | |
TConst; | |
TAnd; | |
TOr; | |
TNot; | |
// Symbols | |
TColon; | |
TStar; | |
TPlus; | |
TDash; | |
TPipe; | |
TAmpersand; | |
TBang; | |
TTilde; | |
TDot; | |
TArrow; | |
TTrippleEqual; | |
TEqual; | |
TGlue; | |
TLineComment(s:String); | |
TBlockComment(s:String); | |
TNewline; | |
// Basic Containers | |
/**A name that can be used as an identifier**/ | |
TIdentifier(s:String); | |
TChar(s:String); | |
TString(s:String); | |
TBool(b:Bool); | |
TNumber(n:Float); | |
TDivert; | |
// Other | |
TEof; | |
} | |
class LexerTokenPrinter { | |
public static function print(token:LexerToken) { | |
return switch (token) { | |
case TParenOpen: "("; | |
case TParenClose: ")"; | |
case TSquareOpen: "["; | |
case TSquareClose: "]"; | |
case TCurlyOpen: "{"; | |
case TCurlyClose: "}"; | |
// Keywords | |
case TVar: "VAR"; | |
case TConst: "CONST"; | |
case TAnd: "and"; | |
case TOr: "or"; | |
case TNot: "not"; | |
// Symbols | |
case TColon: ":"; | |
case TStar: "*"; | |
case TPlus: "+"; | |
case TDash: "-"; | |
case TPipe: "|"; | |
case TAmpersand: "&"; | |
case TBang: "!"; | |
case TTilde: "~"; | |
case TDot: "."; | |
case TArrow: "->"; | |
case TTrippleEqual: "==="; | |
case TEqual: "="; | |
case TGlue: "<>"; | |
case TLineComment(s): '//$s'; | |
case TBlockComment(s): '/*$s*/'; | |
case TNewline: "\n"; | |
// Basic Containers | |
case TIdentifier(s): s; | |
case TChar(s): s; | |
case TString(s): s; | |
case TBool(b): Std.string(b); | |
case TNumber(n): Std.string(n); | |
case TDivert: "->"; | |
// Other | |
case TEof: "EOF"; | |
} | |
} | |
} | |
class InkLexer extends Lexer implements RuleBuilder { | |
static var buf = new StringBuf(); | |
static final whitespace = "[ \t]*"; | |
// Token rules | |
@:ruleHelper static final identifier = "[a-zA-Z_\\-][a-zA-Z0-9_\\-]*" => TIdentifier(lexer.current); | |
@:ruleHelper static final escapedCharacter = "\\\\." => TChar(lexer.current.split("")[1]); | |
@:ruleHelper static final lineComment = "//[^\n]+" => TLineComment(lexer.current.split("").slice(2).join("")); | |
@:ruleHelper static final blockComment = "/\\*" => { | |
buf = new StringBuf(); | |
lexer.token(blockCommentTokens); | |
TBlockComment(buf.toString()); | |
}; | |
@:ruleHelper static final arrow = "->" + whitespace => TArrow; | |
@:ruleHelper static final newline = "\n" => TNewline; | |
@:ruleHelper static final character = "." => TChar(lexer.current); | |
@:ruleHelper static final eof = "" => TEof; | |
@:ruleHelper static final varToken = "VAR" + whitespace => TVar; | |
@:ruleHelper static final constToken = "CONST" + whitespace => TConst; | |
@:ruleHelper static final and = "and" + whitespace => TAnd; | |
@:ruleHelper static final or = "or" + whitespace => TOr; | |
@:ruleHelper static final not = "not" + whitespace => TNot; | |
@:ruleHelper static final star = "\\*" + whitespace => TStar; | |
@:ruleHelper static final plus = "\\+" + whitespace => TPlus; | |
@:ruleHelper static final dash = "-" + whitespace => TDash; | |
@:ruleHelper static final tilde = "\\~" + whitespace => TTilde; | |
@:ruleHelper static final pipe = "|" + whitespace => TPipe; | |
@:ruleHelper static final ampersand = "&" + whitespace => TAmpersand; | |
@:ruleHelper static final bang = "!" + whitespace => TBang; | |
@:ruleHelper static final colon = ":" => TBang; | |
@:ruleHelper static final dot = "\\." => TDot; | |
@:ruleHelper static final arrow = "->" + whitespace => TArrow; | |
@:ruleHelper static final trippleEqual = "===" + whitespace => TTrippleEqual; | |
@:ruleHelper static final equal = "=" + whitespace => TEqual; | |
@:ruleHelper static final glue = "<>" => TGlue; | |
@:ruleHelper static final curlyOpen = "{" => TCurlyOpen; | |
@:ruleHelper static final curlyClose = "}" => TCurlyClose; | |
@:ruleHelper static final squareOpen = "[" => TSquareOpen; | |
@:ruleHelper static final squareClose = "]" => TSquareClose; | |
@:ruleHelper static final parenOpen = "\\(" => TParenOpen; | |
@:ruleHelper static final parenClose = "\\)" => TParenClose; | |
/**Tokens matched from top level story**/ | |
static public final storyTokens = @:rule [ | |
escapedCharacter, | |
lineComment, | |
blockComment, | |
varToken, | |
constToken, | |
curlyOpen, | |
curlyClose, | |
star, | |
plus, | |
dash, | |
tilde, | |
arrow, | |
glue, | |
trippleEqual, | |
equal, | |
whitespace => lexer.token(storyTokens), // Skip whitespace | |
newline, | |
character, | |
eof | |
// Tokens yet to be placed | |
// "\\|" => TPipe, | |
// "\\&" => TAmpersand, | |
// "\\!" => TBang, | |
]; | |
/**Tokens matched inside of ink content**/ | |
static public final contentTokens = @:rule [ | |
escapedCharacter, | |
blockComment, | |
lineComment, | |
curlyOpen, | |
curlyClose, | |
glue, | |
arrow, | |
newline, | |
character, | |
eof | |
]; | |
/**Tokens matched inside of options**/ | |
static public final optionDefTokens = @:rule [ | |
escapedCharacter, | |
blockComment, | |
lineComment, | |
parenOpen, | |
parenClose, | |
curlyOpen, | |
curlyClose, | |
squareOpen, | |
squareClose, | |
star, | |
plus, | |
newline, | |
character, | |
eof | |
]; | |
/**Tokens matched inside of option content**/ | |
static public final optionContentTokens = @:rule [ | |
escapedCharacter, | |
blockComment, | |
lineComment, | |
curlyOpen, | |
curlyClose, | |
squareOpen, | |
squareClose, | |
glue, | |
arrow, | |
newline, | |
character, | |
eof | |
]; | |
/**Tokens matched inside of gathers**/ | |
static public final gatherDefTokens = @:rule [ | |
escapedCharacter, | |
blockComment, | |
lineComment, | |
parenOpen, | |
parenClose, | |
curlyOpen, | |
curlyClose, | |
arrow, | |
dash, | |
newline, | |
character, | |
eof | |
]; | |
static public final alternativeConditionTokens = @:rule [ | |
escapedCharacter, | |
blockComment, | |
lineComment, | |
ampersand, | |
bang, | |
tilde, | |
colon, | |
and, | |
or, | |
not, | |
dot, | |
identifier, | |
curlyClose, | |
newline, | |
character, | |
eof | |
]; | |
static public final alternativeContentTokens = @:rule [ | |
escapedCharacter, | |
blockComment, | |
lineComment, | |
curlyOpen, | |
curlyClose, | |
pipe, | |
glue, | |
arrow, | |
newline, | |
character, | |
eof | |
]; | |
/**Tokens matched while parsing variable definitions**/ | |
static public final varDefTokens = @:rule [ | |
identifier, | |
whitespace + "=" + whitespace => TEqual, | |
eof | |
]; | |
/**Tokens matched while parsing variable value**/ | |
static public final varValTokens = @:rule [ | |
// Bool | |
whitespace + "(true|false)" => {lexer.current == "true" ? TBool(true) : TBool(false);}, | |
// Number | |
whitespace + "-?(([1-9][0-9]*)|0)(.[0-9]+)?" => TNumber(Std.parseFloat(lexer.current)), | |
// Other Content | |
curlyOpen, | |
curlyClose, | |
arrow, | |
newline, | |
character, | |
eof | |
]; | |
static public final identifierTokens = @:rule [ | |
identifier, | |
eof | |
]; | |
/**A reference to a point in the story**/ | |
static public final referenceTokens = @:rule [ | |
identifier, | |
newline, | |
dot, | |
eof | |
]; | |
static public final blockCommentTokens = @:rule [ | |
"\\*/" => null, // End comment | |
"*" => {buf.add("*"); lexer.token(blockCommentTokens);}, | |
"." => {buf.add(lexer.current); lexer.token(blockCommentTokens);} | |
]; | |
} | |
class InkTokenSource { | |
var lexer:InkLexer; | |
var tokenSource:LexerTokenSource<LexerToken>; | |
/**Used if we have read a token and postponed it**/ | |
var pendingToken:LexerToken = null; | |
/**Depth of alternative parsing**/ | |
static var alternativeDepth = 0; | |
public var ruleset(get, set):Ruleset<LexerToken>; | |
function get_ruleset() { return this.tokenSource.ruleset; } | |
function set_ruleset(value) { return this.tokenSource.ruleset = value; } | |
public var nextRuleset:Ruleset<LexerToken> = null; | |
public function new(lexer:InkLexer) { | |
this.lexer = lexer; | |
this.tokenSource = new LexerTokenSource(lexer, InkLexer.storyTokens); | |
} | |
public function token():LexerToken { | |
var token; | |
// The next token is the pending token if it is set | |
if (pendingToken != null) { | |
token = pendingToken; | |
pendingToken = null; | |
} else { | |
token = tokenSource.token(); | |
} | |
// Apply `nextRulset` if set | |
if (nextRuleset != null) { | |
ruleset = nextRuleset; | |
nextRuleset = null; | |
} | |
// Change lexer ruleset based on parsed tokens | |
switch ([ruleset, token]) { | |
// Start parsing story tokens after newlines | |
case [_, TNewline]: ruleset = InkLexer.storyTokens; | |
// Parse variables and constants | |
case [_, TVar | TConst]: ruleset = InkLexer.varDefTokens; | |
case [InkLexer.varDefTokens, TEqual]: ruleset = InkLexer.varValTokens; | |
// Parse alternatives | |
case [_, TCurlyOpen]: | |
alternativeDepth++; | |
ruleset = InkLexer.alternativeConditionTokens; | |
case [InkLexer.alternativeConditionTokens, TBang|TAmpersand|TTilde]: | |
ruleset = InkLexer.alternativeContentTokens; | |
case [InkLexer.alternativeConditionTokens, TColon]: | |
ruleset = InkLexer.alternativeContentTokens; | |
case [InkLexer.alternativeContentTokens, TCurlyClose]: | |
alternativeDepth--; | |
if (alternativeDepth == 0) { | |
ruleset = InkLexer.contentTokens; | |
} else { | |
ruleset = InkLexer.alternativeConditionTokens; | |
} | |
// Start parsing content after tokens that indicate content | |
case [InkLexer.storyTokens, TChar(_) | TGlue]: | |
ruleset = InkLexer.contentTokens; | |
// Start parsing options | |
case [_, TStar | TPlus]: ruleset = InkLexer.optionDefTokens; | |
case [InkLexer.optionDefTokens, TParenOpen]: ruleset = InkLexer.identifierTokens; nextRuleset = InkLexer.optionDefTokens; | |
case [InkLexer.optionDefTokens, TSquareOpen | TChar(_)]: ruleset = InkLexer.optionContentTokens; | |
// Start parsing gather | |
case [_, TDash]: ruleset = InkLexer.gatherDefTokens; | |
case [InkLexer.gatherDefTokens, TParenOpen]: ruleset = InkLexer.identifierTokens; nextRuleset = InkLexer.gatherDefTokens; | |
case [InkLexer.gatherDefTokens, TCurlyOpen | TChar(_)]: ruleset = InkLexer.contentTokens; | |
// Parse diverts, knots, and stitches | |
case [_, TArrow]: ruleset = InkLexer.referenceTokens; | |
case [_, TTrippleEqual]: ruleset = InkLexer.identifierTokens; nextRuleset = InkLexer.storyTokens; | |
case [_, TEqual]: ruleset = InkLexer.identifierTokens; nextRuleset = InkLexer.storyTokens; | |
default: {}; | |
} | |
// Merge connected `TChar` tokens into `TString` tokens | |
switch (token) { | |
case TChar(val): | |
var content = val; | |
// Loop through connected tokens | |
while (true) { | |
var nextToken = tokenSource.token(); | |
switch (nextToken) { | |
case TChar(nextVal) | TIdentifier(nextVal): | |
content += nextVal; | |
case _: | |
pendingToken = nextToken; | |
return TString(content); | |
} | |
} | |
case _: | |
return token; | |
} | |
} | |
public function curPos():Position { | |
return tokenSource.curPos(); | |
} | |
} | |
class InkParser extends hxparse.Parser<InkTokenSource, LexerToken> implements ParserBuilder { | |
public function new(input:byte.ByteData, sourceName:String) { | |
var lexer = new InkLexer(input, sourceName); | |
var ts = new InkTokenSource(lexer); | |
super(ts); | |
} | |
public function testParse():Void { | |
var line = ""; | |
while (true) { | |
try {switch stream { | |
case [TArrow, TIdentifier(name)]: trace('Divert: {$name}'); | |
case [TEof]: break; | |
case [TNewline]: trace(line); line = ""; | |
case [token]: line += LexerTokenPrinter.print(token); | |
}} | |
catch (e:Dynamic) {trace(e);} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment