Last active
December 28, 2015 07:49
-
-
Save sshine/7467301 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
(* A position in a file is identified as a (line,column) : int * int *) | |
type position = int * int | |
(* Lexical errors have some error message and a position in the file *) | |
exception LexicalError of string * position (* (message, (line, column)) *) | |
(* While scanning the file, line numbers and the character offset (the integer | |
position for the character in the file) for each beginning line are kept | |
track of. This information is useful for turning "character at offset N" | |
into "character at line N, column M". *) | |
val currentLine = ref 1 | |
val columnOffsets = ref [0] | |
(* Given a lexbuf (necessary to determine the current character offset), increase | |
the current line number by one and add its character offset to the list of | |
line character offsets. *) | |
fun nextLine lexbuf = | |
let val lineOffset = getLexemeStart lexbuf | |
in currentLine := !currentLine + 1 | |
; lineStartPos := lineOffset :: !lineStartPos | |
end | |
local | |
(* Example: getLineColumn 120 7 [300, 250, 230, 170, 100, 20, 0] ~> (3, 20) | |
meaning: If the lexer has recorded 7 lines that began at those offsets, | |
then the 120th character in the input is on line 3, column 20. | |
*) | |
fun getLineColumn offset lineNum (lineOffset::offs) | |
= if offset >= lineOffset | |
then (lineNum, offset - lineOffset) | |
else getLineColumn offset (lineNum - 1) offs | |
| getLineColumn offset lineNum [] = | |
raise LexicalError ("Unknown line number for offset " ^ | |
Int.toString offset, (0,0)) | |
in | |
(* Given a lexbuf, return the (line,column) pair it has currently reached *) | |
fun getPos lexbuf = getLineColumn (getLexemeStart lexbuf) | |
(!currentLine) | |
(!columnOffsets) | |
end | |
(* Report a lexer error and include the (line,column) part *) | |
fun lexerError lexbuf s = | |
raise LexicalError (s, getPos lexbuf) | |
(* Helper function for constructing a 'string -> token list' lex function *) | |
fun lex' (action : lexbuf -> token) (s : string) = | |
let val LB = Lexing.createLexerString s | |
fun loop () = case action(LB) of | |
EOF => [] | |
| tok => tok :: loop () | |
in loop () end | |
} | |
rule getToken = parse | |
[`\n` `\012`] { nextLine lexbuf; getToken lexbuf } | |
| "true" { Parser.TRUE (getPos lexbuf) } | |
| "false" { Parser.FALSE (getPos lexbuf) } | |
| "if" { Parser.IF (getPos lexbuf) } | |
| "then" { Parser.THEN (getPos lexbuf) } | |
| "else" { Parser.ELSE (getPos lexbuf) } | |
| [`0`-`9`]+ { (case Int.fromString (getLexeme lexbuf) of | |
SOME n => NUM (n, getPos lexbuf) | |
| NONE => lexerError lexbuf ("Invalid number: " ^ s)) } | |
| `+` { Parser.PLUS (getPos lexbuf) } | |
| eof { Parser.EOF (getPos lexbuf) } | |
; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment