Skip to content

Instantly share code, notes, and snippets.

@kubo39
Last active May 5, 2016 20:02
Show Gist options
  • Select an option

  • Save kubo39/2e737d16cac317433fd7cdc633a8ae28 to your computer and use it in GitHub Desktop.

Select an option

Save kubo39/2e737d16cac317433fd7cdc633a8ae28 to your computer and use it in GitHub Desktop.
module parser;
import lexer : Lexer, Token, TokenType;
class ParserError : Exception
{
public:
this(string message,
string file =__FILE__,
size_t line = __LINE__,
Throwable next = null)
{
super(message, file, line, next);
}
}
class Parser
{
private:
Lexer _input;
Token current;
public:
this(Lexer input)
{
_input = input;
current = _input.nextToken;
}
/**
* list: '[' elements ']'
*/
void list()
{
match(TokenType.Lbracket);
elements();
match(TokenType.Rbracket);
}
void consume()
{
current = _input.nextToken;
}
/**
* elements: element (',' element)*
*/
void elements()
{
element();
while (current.type == TokenType.Comma)
{
match(TokenType.Comma);
element();
}
}
/**
* element: name | list
*/
void element()
{
if (current.type == TokenType.Name) match(TokenType.Name);
else if (current.type == TokenType.Lbracket) list();
else if (current.type == TokenType.Whitespace)
{
consume();
element();
}
else throw new ParserError("expecting name or list: found " ~ current.text);
}
void match(TokenType type)
{
if (current.type == type || current.type == TokenType.Whitespace)
{
consume();
}
else throw new ParserError("unexpected tokentype.");
}
}
unittest
{
import std.stdio;
auto lexer = new Lexer("[a, b ]");
auto parser = new Parser(lexer);
parser.list();
}
module lexer;
import std.array : appender;
import std.ascii : isAlpha;
enum TokenType
{
Whitespace,
EOF,
Name,
Comma,
Lbracket,
Rbracket
}
struct Token
{
TokenType type;
string text;
}
class LexerError : Exception
{
public:
this(string message,
string file =__FILE__,
size_t line = __LINE__,
Throwable next = null)
{
super(message, file, line, next);
}
}
class Lexer
{
private:
size_t pos;
string _input;
public:
this(in string input)
{
_input = input;
pos = 0;
}
bool isEOF() @property
{
return !hasAtLeast(0);
}
bool hasAtLeast(size_t n)
{
return pos + n < _input.length;
}
void consume()
{
++pos;
}
char charAt(size_t offset)
{
return _input[pos + offset];
}
char nextChar()
{
return charAt(0);
}
Token nextToken()
{
if (isEOF) return Token(TokenType.EOF, "<EOF>");
char c = nextChar;
switch (c)
{
case ' ':
case '\t':
case '\n':
case '\r':
// consume whitespace.
auto start = pos;
consume();
while (!isEOF)
{
c = nextChar;
switch (c)
{
case ' ':
case '\t':
case '\n':
case '\r':
consume();
break;
default:
goto L1;
}
}
L1:
return Token(TokenType.Whitespace, _input[start .. pos]);
case ',':
consume();
return Token(TokenType.Comma, ",");
case '[':
consume();
return Token(TokenType.Lbracket, "[");
case ']':
consume();
return Token(TokenType.Rbracket, "]");
default:
if (c.isAlpha)
{
auto buf = appender!string();
do
{
buf.put(c);
consume();
c = nextChar();
}
while (c.isAlpha);
return Token(TokenType.Name, buf.data);
}
throw new LexerError("invalid character.");
}
assert(false, "unexpected behavior.");
}
}
unittest
{
import std.stdio;
auto lexer = new Lexer("[a, b ]");
auto token = lexer.nextToken;
while (token.type != TokenType.EOF)
{
token.writeln;
token = lexer.nextToken;
}
token.writeln; // EOF
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment