Skip to content

Instantly share code, notes, and snippets.

@sshine
Last active December 28, 2015 07:49
Show Gist options
  • Save sshine/7467301 to your computer and use it in GitHub Desktop.
Save sshine/7467301 to your computer and use it in GitHub Desktop.
{
(* A position in a file is identified as a (line,column) : int * int *)
type position = int * int
(* Lexical errors have some error message and a position in the file *)
exception LexicalError of string * position (* (message, (line, column)) *)
(* While scanning the file, line numbers and the character offset (the integer
position for the character in the file) for each beginning line are kept
track of. This information is useful for turning "character at offset N"
into "character at line N, column M". *)
val currentLine = ref 1
val columnOffsets = ref [0]
(* Given a lexbuf (necessary to determine the current character offset), increase
the current line number by one and add its character offset to the list of
line character offsets. *)
fun nextLine lexbuf =
let val lineOffset = getLexemeStart lexbuf
in currentLine := !currentLine + 1
; lineStartPos := lineOffset :: !lineStartPos
end
local
(* Example: getLineColumn 120 7 [300, 250, 230, 170, 100, 20, 0] ~> (3, 20)
meaning: If the lexer has recorded 7 lines that began at those offsets,
then the 120th character in the input is on line 3, column 20.
*)
fun getLineColumn offset lineNum (lineOffset::offs)
= if offset >= lineOffset
then (lineNum, offset - lineOffset)
else getLineColumn offset (lineNum - 1) offs
| getLineColumn offset lineNum [] =
raise LexicalError ("Unknown line number for offset " ^
Int.toString offset, (0,0))
in
(* Given a lexbuf, return the (line,column) pair it has currently reached *)
fun getPos lexbuf = getLineColumn (getLexemeStart lexbuf)
(!currentLine)
(!columnOffsets)
end
(* Report a lexer error and include the (line,column) part *)
fun lexerError lexbuf s =
raise LexicalError (s, getPos lexbuf)
(* Helper function for constructing a 'string -> token list' lex function *)
fun lex' (action : lexbuf -> token) (s : string) =
let val LB = Lexing.createLexerString s
fun loop () = case action(LB) of
EOF => []
| tok => tok :: loop ()
in loop () end
}
rule getToken = parse
[`\n` `\012`] { nextLine lexbuf; getToken lexbuf }
| "true" { Parser.TRUE (getPos lexbuf) }
| "false" { Parser.FALSE (getPos lexbuf) }
| "if" { Parser.IF (getPos lexbuf) }
| "then" { Parser.THEN (getPos lexbuf) }
| "else" { Parser.ELSE (getPos lexbuf) }
| [`0`-`9`]+ { (case Int.fromString (getLexeme lexbuf) of
SOME n => NUM (n, getPos lexbuf)
| NONE => lexerError lexbuf ("Invalid number: " ^ s)) }
| `+` { Parser.PLUS (getPos lexbuf) }
| eof { Parser.EOF (getPos lexbuf) }
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment