Skip to content

Instantly share code, notes, and snippets.

@hkoba
Last active April 23, 2018 02:59
Show Gist options
  • Save hkoba/5495600 to your computer and use it in GitHub Desktop.
Save hkoba/5495600 to your computer and use it in GitHub Desktop.
Toy S-expression parser in OCaml with recursive descent parsing. Only supports symbol, integer and list. (This is just a partial port of brilliant awklisp: https://github.com/darius/awklisp/blob/master/awklisp )
module CharClass =
struct
let is_space c = c = ' ' || c = '\t'
let strrange str s e =
String.sub str s (e-s)
let strseek str start fn =
let pos = ref start
and end_ = String.length str in
while !pos < end_ && (fn (str.[!pos])) do incr pos done;
!pos
let skip_ws str start = strseek str start (fun c -> is_space c)
let seek_ws str start = strseek str start (fun c -> not (is_space c))
let strtok str start =
if (String.length str) <= start then
None
else
let ts = skip_ws str start in
if (String.length str) <= ts then
None
else
Some (ts, seek_ws str ts)
exception Bad_ccdef of string
let ccdefs_of_string str =
let ccdef str start end_ =
let s = strrange str start end_
and len = end_ - start in
if len = 1 then
let n = int_of_char str.[start] in
(n, n, s)
else if str.[start] = '-' then
let n = int_of_string (strrange str (start+1) end_) in
(n, n, s)
else if len = 3 && str.[start+1] = '-' then
(int_of_char str.[start], int_of_char str.[start+2], s)
else
raise (Bad_ccdef s)
in
let rec parse str pos =
match (strtok str pos) with
None -> []
| Some (ts, te) -> (ccdef str ts te) :: (parse str te)
in
parse str 0
let ccdef_fst (x, _, _) = x
let ccdef_snd (_, x, _) = x
let ccdef_trd (_, _, x) = x
let foreach lst fn = List.iter fn lst
let cc_of_string str =
let cc = Array.create 256 false
and ccdefs = ccdefs_of_string str
in
foreach ccdefs (fun ccr ->
for i = (ccdef_fst ccr) to (ccdef_snd ccr) do
cc.(i) <- true
done
);
cc
let in_cc cc c = cc.(int_of_char c)
let cc_match_one cc str pos =
in_cc cc str.[!pos] && (incr pos; true)
let cc_match_many cc str pos =
let nmatch = ref 0 in
while (!pos < String.length str) && in_cc cc str.[!pos]
do incr pos; incr nmatch done;
!nmatch > 0
let char_match c str pos =
str.[!pos] = c && (incr pos; true)
end
module Sexparse =
struct
open CharClass
let cc_paren = cc_of_string "( ) '"
let cc_atom = cc_of_string "_ A-Z a-z 0-9 = ! @ $ % & * < > ? + \\ - * / :"
let cc_ws = cc_of_string "-9 -32"
let cc_digit = cc_of_string "0-9"
let ignore v = ()
let skip_ws str pos = ignore (cc_match_many cc_ws str pos)
type sexp = Symbol of string | Int of int | Lst of sexp list
exception Syntax_error of string
let read ?(start=0) str =
let rec _read str pos =
skip_ws str pos;
let start = !pos in
if (String.length str) <= !pos then
Lst []
else if char_match '(' str pos; then
Lst (_list str pos [])
else if char_match '\'' str pos; then
Lst ((Symbol "quote") :: [_read str pos])
else if cc_match_many cc_digit str pos then
Int (int_of_string (strrange str start !pos))
else if cc_match_many cc_atom str pos then
Symbol (strrange str start !pos)
else
raise (Syntax_error "foo")
and _list str pos res =
skip_ws str pos;
if char_match ')' str pos then
List.rev res
else
_list str pos (_read str pos :: res)
in
let pos = (ref start) in
let res = _read str pos in
res, !pos
end
;;
(*
print_endline (Std.dump (Sexparse.read ~start:19 "(foo '123 baz) bar baz"))
*)
(*
# open Sexparse;;
# read "(foo bar baz)";;
- : Sexparse.sexp * int =
(Lst [Symbol "foo"; Symbol "bar"; Symbol "baz"], 13)
# read "(foo 123 baz)";;
- : Sexparse.sexp * int = (Lst [Symbol "foo"; Int 123; Symbol "baz"], 13)
# read "(foo '123 baz)";;
- : Sexparse.sexp * int =
(Lst [Symbol "foo"; Lst [Symbol "quote"; Int 123]; Symbol "baz"], 14)
# read "(foo 123) bar 'baz";;
- : Sexparse.sexp * int = (Lst [Symbol "foo"; Int 123], 9)
# read ~start:9 "(foo 123) bar 'baz";;
- : Sexparse.sexp * int = (Symbol "bar", 13)
# read ~start:13 "(foo 123) bar 'baz";;
- : Sexparse.sexp * int = (Lst [Symbol "quote"; Symbol "baz"], 18)
# read ~start:18 "(foo 123) bar 'baz";;
- : Sexparse.sexp * int = (Lst [], 18)
#
*)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment