Skip to content

Instantly share code, notes, and snippets.

@fowlmouth
Created October 8, 2012 11:12
Show Gist options
  • Save fowlmouth/3851985 to your computer and use it in GitHub Desktop.
Save fowlmouth/3851985 to your computer and use it in GitHub Desktop.
import strutils
when defined(parserPeg):
import pegs
elif defined(parserRegex):
import re
else:
{.error: "define either parserPeg or parserRegex".}
proc last*[A](some: seq[A]): A = return some[len(some)-1]
type
TToken = object
literal: string
PLexer = ref object
input: string
pos, next: int
rules: seq[PRule]
tokens: seq[TToken]
PParserExpr = ref object
when defined(parserPeg):
match: TPeg
elif defined(parserRegex):
match: TRegex
when defined(debug):
pattern: string
PRule = ref object
match: TLexMatcher
call: TLexHandler ## called when a pattern matches
sub: PRule
TLexHandler = proc(lex: PLexer)
TLexMatcher = proc(lex: PLexer): bool {.closure.}
when defined(debug):
template debug_do(body: stmt): stmt = body
else:
template debug_do(body: stmt): stmt = nil
proc current*(lex: PLexer): string
proc `$`*(a: PLexer): string =
result = "[PLexer "
result.add($a.pos)
result.add".."
result.add($a.next)
result.add" '"
result.add a.current()
result.add"']"
proc exp(expression: string): PParserExpr =
new(result)
when defined(parserPeg):
result.match = peg(expression)
else:
result.match = re(expression)
debug_do:
result.pattern = expression
proc parse*(x: PParserExpr; lex: PLexer): bool {.discardable.} =
let i = matchLen(lex.input, x.match, lex.next)
if i > -1:
result = true
lex.next += i
debug_do: echo("expr match ", x.pattern," ", lex)
else:
debug_do: echo "no match for ", x.pattern, " ", lex
proc parse*(rule: PRule; lex: Plexer): bool =
if not rule.match.isNil:
result = rule.match(lex)
if not(result) and not(rule.sub.isNil):
result = rule.sub.parse(lex)
if result and not(rule.call.isNil):
echo "calling handler :)"
rule.call(lex)
proc newLex*(rules: varargs[PRule]): PLexer =
new(result)
result.tokens = @[]
result.rules = @[]
for r in rules:
result.rules.add r
proc setInput*(lex: PLexer; input: string; cleartoks = true) =
lex.input = input
lex.pos = 0
lex.next = 0
if cleartoks: lex.tokens = @[]
proc token*(lex: PLexer): TToken =
if lex.pos >= lex.input.len:
##
else:
for rule in lex.rules:
var m = rule.parse(lex)
if m:
result = lex.tokens.last
break
template or_impl(): expr =
result.match = proc(lex: PLexer): bool =
result = (a.parse(lex) or b.parse(lex))
proc `|`(a, b: PParserExpr): PRule =
new(result)
or_impl
proc `|`(a, b: PRule): PRule =
new(result)
or_impl
proc `|`(a: PRule; b: PParserExpr): PRule =
new(result)
or_impl
proc `|`(a: PParserExpr; b: PRule): PRule =
new(result)
or_impl
template and_impl(){.immediate, dirty.}=
result.match = proc(lex: PLexer): bool =
result = (a.parse(lex) and
b.parse(lex))
proc `+`(a, b: PParserExpr): PRule =
new(result)
and_impl
proc `+`(a: PRule; b: PParserExpr): PRule =
new(result)
and_impl
proc `+`(a: PParserExpr; b: PRule): PRule =
new(result)
and_impl
proc `+`(a, b: PRule): PRule =
new(result)
and_impl
proc `+`(a: PRule): PRule =
## Match one or more times
new(result)
result.match = proc(lex: PLexer): bool =
echo "checking unary +"
if a.parse(lex):
echo "unary + Parsed!"
result = true
while a.parse(lex):
## nil
proc `*`(a: PRule): PRule =
## Match 0 or more times
new(result)
result.match = proc(lex: PLexer): bool =
result = true
while a.parse(lex): discard
proc singleExpr(a: PParserExpr): PRule
proc `+`(a: PParserExpr): PRule = result = +(singleExpr(a))
proc `*`(a: PParserExpr): PRule = result = *(singleExpr(a))
proc `<-`*(a: TLexHandler; b: PRule): PRule {.discardable.} =
new(result)
result.call = a
result.sub = b
proc `<-`*(a: TLexHandler; b: PParserExpr): PRule {.discardable.} =
new(result)
result.call = a
result.sub = singleExpr(b)
proc singleExpr(a: PParserExpr): PRule =
new(result)
gc_ref a
result.match = proc(lex: PLexer): bool =
result = a.parse(lex)
proc skip_current(lex: PLexer) =
echo "skip_current called! ", lex
lex.pos = lex.next
proc skip(a: PParserExpr): PRule =
## if this parses, skip past it
new(result)
result.call = skip_current
result.match = proc(lex: PLexer): bool =
result = a.parse(lex)
proc skip(a: PRule): PRule =
new(result)
result.call = skip_current
result.match = proc(lex: PLexer): bool = return a.parse(lex)
proc current*(lex: PLexer): string =
return lex.input[lex.pos .. lex.next-1]
proc newRule*(callfunc: proc(lex: PLexer)): PRule =
new(result)
result.call = callfunc
proc newtok*(L: string): TToken =
result.literal = L
proc add_current_tok*(L: PLexer): TToken {.discardable.} =
result.literal = L.current()
L.pos = L.next
L.tokens.add(result)
proc op*(c: char): PParserExpr =
new(result)
when defined(parserPeg):
when defined(debug):
result.pattern = "'$1'".format(c)
result.match = peg(result.pattern)
else:
result.match = peg("'$1'".format(c))
else:
when defined(debug):
result.pattern = escapeRe($c)
result.match = re(result.pattern)
else:
result.match = re(escapeRe($c))
proc makeChain(lex: PLexer) =
echo "makechain(", lex.current()
lex.add_current_tok
proc makeNum(lex: PLexer) =
echo "makenum(", lex.current(), ")"
lex.add_current_tok
proc makeParen(lex: PLexer) =
echo "makeparen:",lex.current()
lex.add_current_tok
proc new_param_open(lex: PLExer) =
echo "new param open: ", lex.current()
lex.add_current_tok
echo($lex)
proc make_message(lex: PLExer) =
echo "make_message: ", lex.current()
lex.add_current_tok
proc eat_whitespace(lex: PLExer)=
echo "eat_whitespace \"",lex,"\""
lex.add_current_tok
var
sint = exp"-?[0-9][0-9_]*"
sfloat = exp"-? [0-9_]+ \. [0-9]+ "
sstr = exp""" " [^"]* " """
sterm = exp" [\n\r\c;] "
sname = exp" [A-Za-z_][A-Za-z0-9_]* "
soperator = exp"[!@#$%^&*<>/\\:+-]+"
sident = (sname | soperator)
sws_char = exp"[ \t]"
swhitespace = +sws_char
#sexpr = newRule(makeChain)
snum = make_num <- (sfloat | sint)
ssymbol = (sident | snum)
sparens = makeParen <- (
(newParamOpen <- op('(')) +
*swhitespace +
sident +
*swhitespace +
op(')'))
smessage = make_message <- +(ssymbol + swhitespace)
import math, rdstdin, simplerepl
randomize()
proc `$`(t: TToken): string =
result = if t.literal.isNil: "(none)" else: t.literal
proc eat(a: PLexer) =
echo "om nom nom", a
a.pos = a.next
proc catch(a: PLexer) =
echo "caught! ", a
a.add_current_tok()
var
rule = 0
lx_opts = @{
"*(num + whitespace)": *(snum + (eat <- +swschar)),
"symbol + *(skip(op',') + ws + symbol)": (
(catch <- ssymbol) +
*(
(eat <- op(',')) +
(eat <- *swschar) +
(catch <- ssymbol)
) ),
"*(num | +ws)":( *(snum | (eat <- (+swschar))) )
}
lx = newLex(lx_opts[rule][1])
repl_loop($rule &">> "):
try:
if line[0] == ':':
line = line.substr(1)
if line =~ re"\d+":
var i = parseint(line)
if i < len(lx_opts) and i >= 0:
rule = i
lx.rules = @[lx_opts[rule][1]]
echo "updated rule"
else:
echo "invalid option. try :?"
else:
if line[0] == '?':
echo "rules:"
for i in 0..len(lx_opts)-1:
echo i, ": ", lx_opts[i][0]
else:
lx.set_input(line, true)
while true:
var tk = lx.token()
echo($tk)
if $tk == "(none)":
break
for tok in items(lx.tokens):
echo($tok)
except:
nil
import rdstdin
template repl_loop(prompt: string; body: stmt): stmt =
## Run code once per line.
## Injects `line`, a string with the user input in it
##
## .. code-block:: nimrod
## replLoop(">> "):
## echo("Got input: ", line)
block repl:
var line{.inject.} = ""
while readlineFromStdin(prompt, line):
body
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment