Last active
May 5, 2016 20:02
-
-
Save kubo39/2e737d16cac317433fd7cdc633a8ae28 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| module parser; | |
| import lexer : Lexer, Token, TokenType; | |
| class ParserError : Exception | |
| { | |
| public: | |
| this(string message, | |
| string file =__FILE__, | |
| size_t line = __LINE__, | |
| Throwable next = null) | |
| { | |
| super(message, file, line, next); | |
| } | |
| } | |
| class Parser | |
| { | |
| private: | |
| Lexer _input; | |
| Token current; | |
| public: | |
| this(Lexer input) | |
| { | |
| _input = input; | |
| current = _input.nextToken; | |
| } | |
| /** | |
| * list: '[' elements ']' | |
| */ | |
| void list() | |
| { | |
| match(TokenType.Lbracket); | |
| elements(); | |
| match(TokenType.Rbracket); | |
| } | |
| void consume() | |
| { | |
| current = _input.nextToken; | |
| } | |
| /** | |
| * elements: element (',' element)* | |
| */ | |
| void elements() | |
| { | |
| element(); | |
| while (current.type == TokenType.Comma) | |
| { | |
| match(TokenType.Comma); | |
| element(); | |
| } | |
| } | |
| /** | |
| * element: name | list | |
| */ | |
| void element() | |
| { | |
| if (current.type == TokenType.Name) match(TokenType.Name); | |
| else if (current.type == TokenType.Lbracket) list(); | |
| else if (current.type == TokenType.Whitespace) | |
| { | |
| consume(); | |
| element(); | |
| } | |
| else throw new ParserError("expecting name or list: found " ~ current.text); | |
| } | |
| void match(TokenType type) | |
| { | |
| if (current.type == type || current.type == TokenType.Whitespace) | |
| { | |
| consume(); | |
| } | |
| else throw new ParserError("unexpected tokentype."); | |
| } | |
| } | |
| unittest | |
| { | |
| import std.stdio; | |
| auto lexer = new Lexer("[a, b ]"); | |
| auto parser = new Parser(lexer); | |
| parser.list(); | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| module lexer; | |
| import std.array : appender; | |
| import std.ascii : isAlpha; | |
| enum TokenType | |
| { | |
| Whitespace, | |
| EOF, | |
| Name, | |
| Comma, | |
| Lbracket, | |
| Rbracket | |
| } | |
| struct Token | |
| { | |
| TokenType type; | |
| string text; | |
| } | |
| class LexerError : Exception | |
| { | |
| public: | |
| this(string message, | |
| string file =__FILE__, | |
| size_t line = __LINE__, | |
| Throwable next = null) | |
| { | |
| super(message, file, line, next); | |
| } | |
| } | |
| class Lexer | |
| { | |
| private: | |
| size_t pos; | |
| string _input; | |
| public: | |
| this(in string input) | |
| { | |
| _input = input; | |
| pos = 0; | |
| } | |
| bool isEOF() @property | |
| { | |
| return !hasAtLeast(0); | |
| } | |
| bool hasAtLeast(size_t n) | |
| { | |
| return pos + n < _input.length; | |
| } | |
| void consume() | |
| { | |
| ++pos; | |
| } | |
| char charAt(size_t offset) | |
| { | |
| return _input[pos + offset]; | |
| } | |
| char nextChar() | |
| { | |
| return charAt(0); | |
| } | |
| Token nextToken() | |
| { | |
| if (isEOF) return Token(TokenType.EOF, "<EOF>"); | |
| char c = nextChar; | |
| switch (c) | |
| { | |
| case ' ': | |
| case '\t': | |
| case '\n': | |
| case '\r': | |
| // consume whitespace. | |
| auto start = pos; | |
| consume(); | |
| while (!isEOF) | |
| { | |
| c = nextChar; | |
| switch (c) | |
| { | |
| case ' ': | |
| case '\t': | |
| case '\n': | |
| case '\r': | |
| consume(); | |
| break; | |
| default: | |
| goto L1; | |
| } | |
| } | |
| L1: | |
| return Token(TokenType.Whitespace, _input[start .. pos]); | |
| case ',': | |
| consume(); | |
| return Token(TokenType.Comma, ","); | |
| case '[': | |
| consume(); | |
| return Token(TokenType.Lbracket, "["); | |
| case ']': | |
| consume(); | |
| return Token(TokenType.Rbracket, "]"); | |
| default: | |
| if (c.isAlpha) | |
| { | |
| auto buf = appender!string(); | |
| do | |
| { | |
| buf.put(c); | |
| consume(); | |
| c = nextChar(); | |
| } | |
| while (c.isAlpha); | |
| return Token(TokenType.Name, buf.data); | |
| } | |
| throw new LexerError("invalid character."); | |
| } | |
| assert(false, "unexpected behavior."); | |
| } | |
| } | |
| unittest | |
| { | |
| import std.stdio; | |
| auto lexer = new Lexer("[a, b ]"); | |
| auto token = lexer.nextToken; | |
| while (token.type != TokenType.EOF) | |
| { | |
| token.writeln; | |
| token = lexer.nextToken; | |
| } | |
| token.writeln; // EOF | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment