Created
July 30, 2023 17:07
-
-
Save mndrix/67430097ff7d198bc151fe1dc8958c76 to your computer and use it in GitHub Desktop.
Lexical analysis with OCaml effect handlers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type t = Lexing.lexbuf -> Parser.token | |
val of_file : string -> t * Lexing.lexbuf | |
(** [of_file path] returns a new lexer for extracting tokens from | |
a file at [path]. *) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
open Parser | |
type t = Lexing.lexbuf -> Parser.token | |
type _ Effect.t += Emit: Parser.token -> unit Effect.t | |
let emit token = Effect.perform (Emit token) | |
let fail fmt = Printf.ksprintf failwith fmt | |
let unexpected context c lexbuf = | |
let p = Lexing.lexeme_start_p lexbuf in | |
let file = p.pos_fname in | |
let line = p.pos_lnum in | |
let column = p.pos_cnum - p.pos_bol in | |
fail "%s: unexpected character: %C at %s:%d:%d" context c file line column | |
} | |
let ws = [ ' ' '\t' ] | |
let ident = ['a'-'z' 'A'-'Z' '_' '.']+ | |
rule statement = parse | |
| ws+ { statement lexbuf (* ignore whitespace *) } | |
| (ident as name) ws* '=' ws* { | |
emit (VARIABLE name); | |
value lexbuf | |
} | |
| eof { | |
emit EOF; | |
statement lexbuf | |
} | |
| _ as c { unexpected "Lexer.statement" c lexbuf } | |
and value = parse | |
| '\n' { | |
Lexing.new_line lexbuf; | |
emit EOL; | |
statement lexbuf | |
} | |
| "${" ( ident as name ) "}" { | |
emit (REF name); | |
value lexbuf | |
} | |
| [^ '$' '\n']+ as content { | |
emit (LITERAL content); | |
value lexbuf | |
} | |
| eof { | |
emit EOL; (* pretend the line ended explicitly *) | |
emit EOF; | |
statement lexbuf | |
} | |
| _ as c { unexpected "Lexer.value" c lexbuf } | |
{ | |
let lexer rule lexbuf = | |
let open Effect.Deep in | |
match !rule with | |
| Some k -> continue k () | |
| None -> | |
let effc (type a) (eff : a Effect.t) = | |
match eff with | |
| Emit token -> | |
let do_emit (k: (a, _) continuation) = | |
rule := Some k; | |
token | |
in | |
Some do_emit | |
| _ -> None | |
in | |
try_with statement lexbuf { effc } | |
let of_file path = | |
let ic = open_in_bin path in | |
Gc.finalise close_in ic; | |
let lexbuf = Lexing.from_channel ic in | |
Lexing.set_filename lexbuf path; | |
(lexer (ref None), lexbuf) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment