Created
July 10, 2018 04:32
-
-
Save Varriount/c0a129cad278849ecb86665696b2c7a1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pegs, strformat, strutils, unicode | |
let grammer = peg""" | |
# Node Expressions | |
commandline <- seperated_cmd / simple_cmd | |
seperated_cmd <- simple_cmd WS SEPERATOR WS simple_cmd | |
simple_cmd <- ATOM ( WS ATOM )+ ( WS redirection )? | |
redirection <- REDIRECTOR ( ATOM ) | |
# Token expressions | |
ATOM <- STRING_LIT / WORD_LIT | |
WORD_LIT <- \w+ | |
STRING_LIT <- DQ_STRING_LIT / SQ_STRING_LIT | |
DQ_STRING_LIT <- DQUOTE (BSLASH . / [^"])* DQUOTE | |
SQ_STRING_LIT <- SQUOTE (BSLASH . / [^'])* SQUOTE | |
SEPERATOR <- ( "||" / "&&" ) | |
REDIRECTOR <- ( ">" / ">>" / "!>" / "!>>" / "<" ) | |
WS <- \s+ | |
DQUOTE <- "\"" | |
SQUOTE <- "'" | |
BSLASH <- "\\" | |
""" | |
type | |
TokenKind = enum | |
tkUnknown | |
tkString | |
tkWord | |
Token = object | |
kind: TokenKind | |
data: string | |
line, column: int | |
AstNodeKind = enum | |
nkUnknown | |
nkTerm | |
nkCommand | |
nkSeperatedCmd | |
nkSimpleCmd | |
nkRedirection | |
AstNode = ref object | |
case kind: AstNodeKind | |
of nkTerm: | |
term: Token | |
else: | |
children: seq[AstNode] | |
# Maps for rules to nodes & tokens | |
const | |
nodeNameMap = { | |
"seperated_cmd" : nkSeperatedCmd, | |
"simple_cmd" : nkSimpleCmd, | |
"redirection" : nkRedirection | |
} | |
tokenNameMap = { | |
"WORD_LIT" : tkWord, | |
"STRING_LIT" : tkString | |
} | |
proc lookupTokenKind(name: string): TokenKind = | |
for kv in tokenNameMap: | |
if name == kv[0]: | |
return kv[1] | |
return tkUnknown | |
proc lookupNodeKind(name: string): AstNodeKind = | |
for kv in nodeNameMap: | |
if name == kv[0]: | |
return kv[1] | |
return nkUnknown | |
proc newToken( | |
kind = tkUnknown, | |
data: string = nil, | |
line = 0, | |
column = 0): Token = | |
result = Token( | |
kind: kind, | |
data: data, | |
line: line, | |
column: column | |
) | |
proc newNode(kind: AstNodeKind): AstNode = | |
result = AstNode(kind: kind) | |
case kind | |
of nkTerm: | |
result.term = newToken() | |
else: | |
result.children = @[] | |
template setLenOffset(value, offset) = | |
value.setLen(len(value) + offset) | |
proc print(n: AstNode, indent = " ") = | |
case n.kind | |
of nkTerm: | |
echo(fmt("{indent}{n.kind}(data: \"{n.term.data}\", line:{n.term.line}, column:{n.term.column}")) | |
else: | |
echo(fmt"{indent}{n.kind}:") | |
for child in n.children: | |
print(child, indent & " ") | |
var | |
root = newNode(nkCommand) | |
stack = @[root] | |
let parser = eventParser(grammer): | |
pkNonTerminal: | |
enter: | |
let nodeType = lookupNodeKind(p.nt.name) | |
if nodeType != nkUnknown: | |
echo "enter: ", p.nt.name | |
echo " start: ", start | |
echo "" | |
var node = newNode(nodeType) | |
stack[^1].children.add(node) | |
stack.add(node) | |
leave: | |
let nodeType = lookupNodeKind(p.nt.name) | |
if nodeType != nkUnknown: | |
echo "leave: ", p.nt.name | |
echo " start: ", start | |
echo " length: ", length | |
echo "" | |
setLenOffset(stack, -1) | |
if length == -1: | |
setLenOffset(stack[^1].children, -1) | |
let tokenType = lookupTokenKind(p.nt.name) | |
if tokenType != tkUnknown: | |
echo "leave: ", p.nt.name | |
echo " start: ", start | |
echo " length: ", length | |
echo "" | |
add( | |
stack[^1].children, | |
AstNode( | |
kind: nkTerm, | |
term: newToken( | |
kind = tokenType, | |
data = s.substr(start, start+length-1), | |
line = 0, | |
column = 0 | |
) | |
) | |
) | |
echo parser("hello world") | |
print(root) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment