fowlmouth · October 8, 2012 11:12
diff --git a/parser.nim b/parser.nim
 import strutils

 when defined(parserPeg):
  import pegs
 elif defined(parserRegex):
  import re
 else:
  {.error: "define either parserPeg or parserRegex".}

 proc last*[A](some: seq[A]): A = return some[len(some)-1]

 type 
  TToken = object
    literal: string
  PLexer = ref object
    input: string
    pos, next: int
    rules: seq[PRule]
    tokens: seq[TToken] 
  PParserExpr = ref object 
    when defined(parserPeg):
      match: TPeg
    elif defined(parserRegex):
      match: TRegex
    when defined(debug):
      pattern: string
  PRule = ref object
    match: TLexMatcher
    call: TLexHandler  ## called when a pattern matches
    sub: PRule
  
  TLexHandler = proc(lex: PLexer)
  TLexMatcher = proc(lex: PLexer): bool {.closure.}

 when defined(debug):
  template debug_do(body: stmt): stmt = body
 else:
  template debug_do(body: stmt): stmt = nil

 proc current*(lex: PLexer): string

 proc `$`*(a: PLexer): string = 
  result = "[PLexer "
  result.add($a.pos)
  result.add".."
  result.add($a.next)
  result.add" '"
  result.add a.current()
  result.add"']"

 proc exp(expression: string): PParserExpr =
  new(result)
  when defined(parserPeg):
    result.match = peg(expression)
  else:
    result.match = re(expression)
  debug_do:
    result.pattern = expression

 proc parse*(x: PParserExpr; lex: PLexer): bool {.discardable.} =
  let i = matchLen(lex.input, x.match, lex.next)
  if i > -1:
    result = true
    lex.next += i
    debug_do: echo("expr match ", x.pattern," ", lex)
  else:
    debug_do: echo "no match for ", x.pattern, " ", lex

 proc parse*(rule: PRule; lex: Plexer): bool =
  if not rule.match.isNil:
    result = rule.match(lex)
  if not(result) and not(rule.sub.isNil):
    result = rule.sub.parse(lex)
  if result and not(rule.call.isNil):
    echo "calling handler :)"
    rule.call(lex)


 proc newLex*(rules: varargs[PRule]): PLexer =
  new(result)
  result.tokens = @[]
  result.rules = @[]
  for r in rules:
    result.rules.add r

 proc setInput*(lex: PLexer; input: string; cleartoks = true) =
  lex.input = input
  lex.pos = 0
  lex.next = 0
  if cleartoks: lex.tokens = @[]

 proc token*(lex: PLexer): TToken =
  if lex.pos >= lex.input.len:
    ##
  else:
    for rule in lex.rules:
      var m = rule.parse(lex)
      if m:
        result = lex.tokens.last
        break

 template or_impl(): expr =
  result.match = proc(lex: PLexer): bool =
    result = (a.parse(lex) or b.parse(lex))
 proc `|`(a, b: PParserExpr): PRule =
  new(result)
  or_impl
 proc `|`(a, b: PRule): PRule =
  new(result)
  or_impl
 proc `|`(a: PRule; b: PParserExpr): PRule =
  new(result)
  or_impl
 proc `|`(a: PParserExpr; b: PRule): PRule =
  new(result)
  or_impl

 template and_impl(){.immediate, dirty.}=
  result.match = proc(lex: PLexer): bool =
    result = (a.parse(lex) and 
              b.parse(lex))
 proc `+`(a, b: PParserExpr): PRule =
  new(result)
  and_impl
 proc `+`(a: PRule; b: PParserExpr): PRule = 
  new(result)
  and_impl
 proc `+`(a: PParserExpr; b: PRule): PRule = 
  new(result)
  and_impl
 proc `+`(a, b: PRule): PRule =
  new(result)
  and_impl
  
 proc `+`(a: PRule): PRule =
  ## Match one or more times
  new(result)
  result.match = proc(lex: PLexer): bool =
    echo "checking unary +"
    if a.parse(lex):
      echo "unary + Parsed!"
      result = true
      while a.parse(lex):
        ## nil
 proc `*`(a: PRule): PRule =
  ## Match 0 or more times
  new(result)
  result.match = proc(lex: PLexer): bool =
    result = true
    while a.parse(lex): discard

 proc singleExpr(a: PParserExpr): PRule
 proc `+`(a: PParserExpr): PRule = result = +(singleExpr(a))
 proc `*`(a: PParserExpr): PRule = result = *(singleExpr(a))

 proc `<-`*(a: TLexHandler; b: PRule): PRule {.discardable.} =
  new(result)
  result.call = a
  result.sub = b
 proc `<-`*(a: TLexHandler; b: PParserExpr): PRule {.discardable.} =
  new(result)
  result.call = a
  result.sub = singleExpr(b)

 proc singleExpr(a: PParserExpr): PRule = 
  new(result)
  gc_ref a
  result.match = proc(lex: PLexer): bool =
    result = a.parse(lex)

 proc skip_current(lex: PLexer) =
  echo "skip_current called! ", lex
  lex.pos = lex.next

 proc skip(a: PParserExpr): PRule =
  ## if this parses, skip past it
  new(result)
  result.call = skip_current
  result.match = proc(lex: PLexer): bool =
    result = a.parse(lex)
 proc skip(a: PRule): PRule =
  new(result)
  result.call = skip_current
  result.match = proc(lex: PLexer): bool = return a.parse(lex)

 proc current*(lex: PLexer): string =
  return lex.input[lex.pos .. lex.next-1]

 proc newRule*(callfunc: proc(lex: PLexer)): PRule =
  new(result)
  result.call = callfunc

 proc newtok*(L: string): TToken =
  result.literal = L

 proc add_current_tok*(L: PLexer): TToken {.discardable.} = 
  result.literal = L.current()
  L.pos = L.next
  L.tokens.add(result)

 proc op*(c: char): PParserExpr =
  new(result)
  when defined(parserPeg):
    when defined(debug):
      result.pattern = "'$1'".format(c)
      result.match = peg(result.pattern)
    else:
      result.match = peg("'$1'".format(c))
  else:
    when defined(debug):
      result.pattern = escapeRe($c)
      result.match = re(result.pattern)
    else:
      result.match = re(escapeRe($c))

 proc makeChain(lex: PLexer) =
  echo "makechain(", lex.current()
  lex.add_current_tok
 proc makeNum(lex: PLexer) =
  echo "makenum(", lex.current(), ")"
  lex.add_current_tok

 proc makeParen(lex: PLexer) =
  echo "makeparen:",lex.current()
  lex.add_current_tok

 proc new_param_open(lex: PLExer) =
  echo "new param open: ", lex.current()
  lex.add_current_tok
  echo($lex)

 proc make_message(lex: PLExer) =
  echo "make_message: ", lex.current()
  lex.add_current_tok

 proc eat_whitespace(lex: PLExer)=
  echo "eat_whitespace \"",lex,"\""
  lex.add_current_tok

 var 
  sint = exp"-?[0-9][0-9_]*"
  sfloat = exp"-? [0-9_]+ \. [0-9]+ "
  sstr = exp""" " [^"]* " """
  sterm = exp" [\n\r\c;] "
  sname = exp" [A-Za-z_][A-Za-z0-9_]* "
  soperator = exp"[!@#$%^&*<>/\\:+-]+"
  sident = (sname | soperator)
  sws_char = exp"[ \t]"
  swhitespace = +sws_char
  #sexpr = newRule(makeChain)
  
  snum = make_num <- (sfloat | sint)
  ssymbol = (sident | snum)
  sparens = makeParen <- (
    (newParamOpen <- op('(')) + 
    *swhitespace +
    sident + 
    *swhitespace +
    op(')'))
  smessage = make_message <- +(ssymbol + swhitespace)


 import math, rdstdin, simplerepl
 randomize()


 proc `$`(t: TToken): string = 
  result = if t.literal.isNil: "(none)" else: t.literal

 proc eat(a: PLexer) =
  echo "om nom nom", a
  a.pos = a.next

 proc catch(a: PLexer) =
  echo "caught! ", a
  a.add_current_tok()


 var
  rule = 0
  lx_opts = @{
    "*(num + whitespace)": *(snum + (eat <- +swschar)),
    "symbol + *(skip(op',') + ws + symbol)": ( 
      (catch <- ssymbol) + 
      *(
        (eat <- op(',')) + 
        (eat <- *swschar) + 
        (catch <- ssymbol)
      )    ),
    "*(num | +ws)":(      *(snum | (eat <- (+swschar)))    )
  }
  lx = newLex(lx_opts[rule][1])

 repl_loop($rule &">> "):
  try:
    if line[0] == ':':
      line = line.substr(1)
      if line =~ re"\d+":
        var i = parseint(line)
        if i < len(lx_opts) and i >= 0:
          rule = i
          lx.rules = @[lx_opts[rule][1]]
          echo "updated rule"
        else:
          echo "invalid option. try :?"
      else:
        if line[0] == '?':
          echo "rules:"
          for i in 0..len(lx_opts)-1:
            echo i, ": ", lx_opts[i][0] 
    else:
      lx.set_input(line, true)
      while true:
        var tk = lx.token()
        echo($tk)
        if $tk == "(none)":
          break
      for tok in items(lx.tokens):
        echo($tok)
  except: 
    nil

diff --git a/simplerepl.nim b/simplerepl.nim
 import rdstdin

 template repl_loop(prompt: string; body: stmt): stmt =
  ## Run code once per line. 
  ## Injects `line`, a string with the user input in it
  ##
  ## .. code-block:: nimrod
  ##    replLoop(">> "):
  ##      echo("Got input: ", line)
  block repl:
    var line{.inject.} = ""
    while readlineFromStdin(prompt, line):
      body
	import strutils

	when defined(parserPeg):
	import pegs
	elif defined(parserRegex):
	import re
	else:
	{.error: "define either parserPeg or parserRegex".}

	proc last*[A](some: seq[A]): A = return some[len(some)-1]

	type
	TToken = object
	literal: string
	PLexer = ref object
	input: string
	pos, next: int
	rules: seq[PRule]
	tokens: seq[TToken]
	PParserExpr = ref object
	when defined(parserPeg):
	match: TPeg
	elif defined(parserRegex):
	match: TRegex
	when defined(debug):
	pattern: string
	PRule = ref object
	match: TLexMatcher
	call: TLexHandler ## called when a pattern matches
	sub: PRule

	TLexHandler = proc(lex: PLexer)
	TLexMatcher = proc(lex: PLexer): bool {.closure.}

	when defined(debug):
	template debug_do(body: stmt): stmt = body
	else:
	template debug_do(body: stmt): stmt = nil

	proc current*(lex: PLexer): string

	proc `$`*(a: PLexer): string =
	result = "[PLexer "
	result.add($a.pos)
	result.add".."
	result.add($a.next)
	result.add" '"
	result.add a.current()
	result.add"']"

	proc exp(expression: string): PParserExpr =
	new(result)
	when defined(parserPeg):
	result.match = peg(expression)
	else:
	result.match = re(expression)
	debug_do:
	result.pattern = expression

	proc parse*(x: PParserExpr; lex: PLexer): bool {.discardable.} =
	let i = matchLen(lex.input, x.match, lex.next)
	if i > -1:
	result = true
	lex.next += i
	debug_do: echo("expr match ", x.pattern," ", lex)
	else:
	debug_do: echo "no match for ", x.pattern, " ", lex

	proc parse*(rule: PRule; lex: Plexer): bool =
	if not rule.match.isNil:
	result = rule.match(lex)
	if not(result) and not(rule.sub.isNil):
	result = rule.sub.parse(lex)
	if result and not(rule.call.isNil):
	echo "calling handler :)"
	rule.call(lex)


	proc newLex*(rules: varargs[PRule]): PLexer =
	new(result)
	result.tokens = @[]
	result.rules = @[]
	for r in rules:
	result.rules.add r

	proc setInput*(lex: PLexer; input: string; cleartoks = true) =
	lex.input = input
	lex.pos = 0
	lex.next = 0
	if cleartoks: lex.tokens = @[]

	proc token*(lex: PLexer): TToken =
	if lex.pos >= lex.input.len:
	##
	else:
	for rule in lex.rules:
	var m = rule.parse(lex)
	if m:
	result = lex.tokens.last
	break

	template or_impl(): expr =
	result.match = proc(lex: PLexer): bool =
	result = (a.parse(lex) or b.parse(lex))
	proc `\|`(a, b: PParserExpr): PRule =
	new(result)
	or_impl
	proc `\|`(a, b: PRule): PRule =
	new(result)
	or_impl
	proc `\|`(a: PRule; b: PParserExpr): PRule =
	new(result)
	or_impl
	proc `\|`(a: PParserExpr; b: PRule): PRule =
	new(result)
	or_impl

	template and_impl(){.immediate, dirty.}=
	result.match = proc(lex: PLexer): bool =
	result = (a.parse(lex) and
	b.parse(lex))
	proc `+`(a, b: PParserExpr): PRule =
	new(result)
	and_impl
	proc `+`(a: PRule; b: PParserExpr): PRule =
	new(result)
	and_impl
	proc `+`(a: PParserExpr; b: PRule): PRule =
	new(result)
	and_impl
	proc `+`(a, b: PRule): PRule =
	new(result)
	and_impl

	proc `+`(a: PRule): PRule =
	## Match one or more times
	new(result)
	result.match = proc(lex: PLexer): bool =
	echo "checking unary +"
	if a.parse(lex):
	echo "unary + Parsed!"
	result = true
	while a.parse(lex):
	## nil
	proc `*`(a: PRule): PRule =
	## Match 0 or more times
	new(result)
	result.match = proc(lex: PLexer): bool =
	result = true
	while a.parse(lex): discard

	proc singleExpr(a: PParserExpr): PRule
	proc `+`(a: PParserExpr): PRule = result = +(singleExpr(a))
	proc ``(a: PParserExpr): PRule = result = (singleExpr(a))

	proc `<-`*(a: TLexHandler; b: PRule): PRule {.discardable.} =
	new(result)
	result.call = a
	result.sub = b
	proc `<-`*(a: TLexHandler; b: PParserExpr): PRule {.discardable.} =
	new(result)
	result.call = a
	result.sub = singleExpr(b)

	proc singleExpr(a: PParserExpr): PRule =
	new(result)
	gc_ref a
	result.match = proc(lex: PLexer): bool =
	result = a.parse(lex)

	proc skip_current(lex: PLexer) =
	echo "skip_current called! ", lex
	lex.pos = lex.next

	proc skip(a: PParserExpr): PRule =
	## if this parses, skip past it
	new(result)
	result.call = skip_current
	result.match = proc(lex: PLexer): bool =
	result = a.parse(lex)
	proc skip(a: PRule): PRule =
	new(result)
	result.call = skip_current
	result.match = proc(lex: PLexer): bool = return a.parse(lex)

	proc current*(lex: PLexer): string =
	return lex.input[lex.pos .. lex.next-1]

	proc newRule*(callfunc: proc(lex: PLexer)): PRule =
	new(result)
	result.call = callfunc

	proc newtok*(L: string): TToken =
	result.literal = L

	proc add_current_tok*(L: PLexer): TToken {.discardable.} =
	result.literal = L.current()
	L.pos = L.next
	L.tokens.add(result)

	proc op*(c: char): PParserExpr =
	new(result)
	when defined(parserPeg):
	when defined(debug):
	result.pattern = "'$1'".format(c)
	result.match = peg(result.pattern)
	else:
	result.match = peg("'$1'".format(c))
	else:
	when defined(debug):
	result.pattern = escapeRe($c)
	result.match = re(result.pattern)
	else:
	result.match = re(escapeRe($c))

	proc makeChain(lex: PLexer) =
	echo "makechain(", lex.current()
	lex.add_current_tok
	proc makeNum(lex: PLexer) =
	echo "makenum(", lex.current(), ")"
	lex.add_current_tok

	proc makeParen(lex: PLexer) =
	echo "makeparen:",lex.current()
	lex.add_current_tok

	proc new_param_open(lex: PLExer) =
	echo "new param open: ", lex.current()
	lex.add_current_tok
	echo($lex)

	proc make_message(lex: PLExer) =
	echo "make_message: ", lex.current()
	lex.add_current_tok

	proc eat_whitespace(lex: PLExer)=
	echo "eat_whitespace \"",lex,"\""
	lex.add_current_tok

	var
	sint = exp"-?[0-9][0-9_]*"
	sfloat = exp"-? [0-9_]+ \. [0-9]+ "
	sstr = exp""" " [^"]* " """
	sterm = exp" [\n\r\c;] "
	sname = exp" [A-Za-z_][A-Za-z0-9_]* "
	soperator = exp"[!@#$%^&*<>/\\:+-]+"
	sident = (sname \| soperator)
	sws_char = exp"[ \t]"
	swhitespace = +sws_char
	#sexpr = newRule(makeChain)

	snum = make_num <- (sfloat \| sint)
	ssymbol = (sident \| snum)
	sparens = makeParen <- (
	(newParamOpen <- op('(')) +
	*swhitespace +
	sident +
	*swhitespace +
	op(')'))
	smessage = make_message <- +(ssymbol + swhitespace)


	import math, rdstdin, simplerepl
	randomize()


	proc `$`(t: TToken): string =
	result = if t.literal.isNil: "(none)" else: t.literal

	proc eat(a: PLexer) =
	echo "om nom nom", a
	a.pos = a.next

	proc catch(a: PLexer) =
	echo "caught! ", a
	a.add_current_tok()


	var
	rule = 0
	lx_opts = @{
	"(num + whitespace)": (snum + (eat <- +swschar)),
	"symbol + *(skip(op',') + ws + symbol)": (
	(catch <- ssymbol) +
	*(
	(eat <- op(',')) +
	(eat <- *swschar) +
	(catch <- ssymbol)
	) ),
	"(num \| +ws)":( (snum \| (eat <- (+swschar))) )
	}
	lx = newLex(lx_opts[rule][1])

	repl_loop($rule &">> "):
	try:
	if line[0] == ':':
	line = line.substr(1)
	if line =~ re"\d+":
	var i = parseint(line)
	if i < len(lx_opts) and i >= 0:
	rule = i
	lx.rules = @[lx_opts[rule][1]]
	echo "updated rule"
	else:
	echo "invalid option. try :?"
	else:
	if line[0] == '?':
	echo "rules:"
	for i in 0..len(lx_opts)-1:
	echo i, ": ", lx_opts[i][0]
	else:
	lx.set_input(line, true)
	while true:
	var tk = lx.token()
	echo($tk)
	if $tk == "(none)":
	break
	for tok in items(lx.tokens):
	echo($tok)
	except:
	nil
	import rdstdin

	template repl_loop(prompt: string; body: stmt): stmt =
	## Run code once per line.
	## Injects `line`, a string with the user input in it
	##
	## .. code-block:: nimrod
	## replLoop(">> "):
	## echo("Got input: ", line)
	block repl:
	var line{.inject.} = ""
	while readlineFromStdin(prompt, line):
	body