Skip to content

Instantly share code, notes, and snippets.

@Varriount
Created July 10, 2018 04:32
Show Gist options
  • Save Varriount/c0a129cad278849ecb86665696b2c7a1 to your computer and use it in GitHub Desktop.
Save Varriount/c0a129cad278849ecb86665696b2c7a1 to your computer and use it in GitHub Desktop.
import pegs, strformat, strutils, unicode
let grammer = peg"""
# Node Expressions
commandline <- seperated_cmd / simple_cmd
seperated_cmd <- simple_cmd WS SEPERATOR WS simple_cmd
simple_cmd <- ATOM ( WS ATOM )+ ( WS redirection )?
redirection <- REDIRECTOR ( ATOM )
# Token expressions
ATOM <- STRING_LIT / WORD_LIT
WORD_LIT <- \w+
STRING_LIT <- DQ_STRING_LIT / SQ_STRING_LIT
DQ_STRING_LIT <- DQUOTE (BSLASH . / [^"])* DQUOTE
SQ_STRING_LIT <- SQUOTE (BSLASH . / [^'])* SQUOTE
SEPERATOR <- ( "||" / "&&" )
REDIRECTOR <- ( ">" / ">>" / "!>" / "!>>" / "<" )
WS <- \s+
DQUOTE <- "\""
SQUOTE <- "'"
BSLASH <- "\\"
"""
type
TokenKind = enum
tkUnknown
tkString
tkWord
Token = object
kind: TokenKind
data: string
line, column: int
AstNodeKind = enum
nkUnknown
nkTerm
nkCommand
nkSeperatedCmd
nkSimpleCmd
nkRedirection
AstNode = ref object
case kind: AstNodeKind
of nkTerm:
term: Token
else:
children: seq[AstNode]
# Maps for rules to nodes & tokens
const
nodeNameMap = {
"seperated_cmd" : nkSeperatedCmd,
"simple_cmd" : nkSimpleCmd,
"redirection" : nkRedirection
}
tokenNameMap = {
"WORD_LIT" : tkWord,
"STRING_LIT" : tkString
}
proc lookupTokenKind(name: string): TokenKind =
for kv in tokenNameMap:
if name == kv[0]:
return kv[1]
return tkUnknown
proc lookupNodeKind(name: string): AstNodeKind =
for kv in nodeNameMap:
if name == kv[0]:
return kv[1]
return nkUnknown
proc newToken(
kind = tkUnknown,
data: string = nil,
line = 0,
column = 0): Token =
result = Token(
kind: kind,
data: data,
line: line,
column: column
)
proc newNode(kind: AstNodeKind): AstNode =
result = AstNode(kind: kind)
case kind
of nkTerm:
result.term = newToken()
else:
result.children = @[]
template setLenOffset(value, offset) =
value.setLen(len(value) + offset)
proc print(n: AstNode, indent = " ") =
case n.kind
of nkTerm:
echo(fmt("{indent}{n.kind}(data: \"{n.term.data}\", line:{n.term.line}, column:{n.term.column}"))
else:
echo(fmt"{indent}{n.kind}:")
for child in n.children:
print(child, indent & " ")
var
root = newNode(nkCommand)
stack = @[root]
let parser = eventParser(grammer):
pkNonTerminal:
enter:
let nodeType = lookupNodeKind(p.nt.name)
if nodeType != nkUnknown:
echo "enter: ", p.nt.name
echo " start: ", start
echo ""
var node = newNode(nodeType)
stack[^1].children.add(node)
stack.add(node)
leave:
let nodeType = lookupNodeKind(p.nt.name)
if nodeType != nkUnknown:
echo "leave: ", p.nt.name
echo " start: ", start
echo " length: ", length
echo ""
setLenOffset(stack, -1)
if length == -1:
setLenOffset(stack[^1].children, -1)
let tokenType = lookupTokenKind(p.nt.name)
if tokenType != tkUnknown:
echo "leave: ", p.nt.name
echo " start: ", start
echo " length: ", length
echo ""
add(
stack[^1].children,
AstNode(
kind: nkTerm,
term: newToken(
kind = tokenType,
data = s.substr(start, start+length-1),
line = 0,
column = 0
)
)
)
echo parser("hello world")
print(root)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment