Skip to content

Instantly share code, notes, and snippets.

@hkoba
Created February 19, 2014 13:46
Show Gist options
  • Save hkoba/9092301 to your computer and use it in GitHub Desktop.
Save hkoba/9092301 to your computer and use it in GitHub Desktop.
Short S-expression parser in OCaml, take2. (This time with string support). Basically for my self training.
(* -*- coding: utf-8 -*- *)
open Core.Std
module Util = struct
(** Perl5's defined-or operator *)
let (//) opt def = match opt with Some x -> x | None -> def
let min x y = if x < y then x else y
let apply_if opt fn v =
match opt with
| None -> v
| Some bound -> fn bound v
let cap_if opt v =
apply_if opt min v
end
module Strutil = struct
include Util
(** [compare left right] is an extended version of {!String.compare}
for string regions(I hope).
[~loff], [~roff] is where substring starts.
[~llen], [~rlen] is length to compare.
*)
let compare ?(loff=0) ?llen left ?(roff=0) ?rlen right =
let cmp l r =
if l < r then -1
else if l > r then 1
else 0
in
let rec loop left loff llen right roff rlen i len =
if i >= len then
cmp llen rlen
else
let n = cmp left.[loff + i] right.[roff + i] in
if n <> 0 then
n
else
loop left loff llen right roff rlen
(i+1) len
in
let llen = cap_if llen (String.length left - loff)
and rlen = cap_if rlen (String.length right - roff) in
loop left loff llen right roff rlen
0 (min llen rlen)
end
module Strrange = struct
include Util
module SU = Strutil
type t = {base: string; mutable start: int; mutable finish: int}
let create ?(start=0) ?finish ?(empty=false) str =
{base = str; start = start;
finish =
if empty then start
else min (finish // String.length str) (String.length str)}
let of_string str = create str
let to_string ?(offset=0) {base; start; finish} =
let start = start + offset in
String.sub base ~pos:start ~len:(finish - start)
let copy ?start ?finish range =
{range with start = start // range.start; finish = finish // range.finish}
let deep_copy {base; start; finish} =
{base = String.copy base; start = start; finish = finish}
let length {start; finish; _} = finish - start
(*let is_empty {start; finish; _} = start >= finish*)
let get ?(offset=0) range =
range.base.[range.start + offset]
let compare ?len left right =
SU.compare
~loff:left.start ~llen:(cap_if len (length left)) left.base
~roff:right.start ~rlen:(cap_if len (length right)) right.base
end
module Strcursor = struct
module SU = Strutil
type t = {subject: string; mutable pos: int}
let create ?(pos=0) subject = {subject; pos}
let of_string ?(pos=0) subject = {subject; pos}
let to_subject_pos {subject; pos} = subject, pos
let pos sc = sc.pos
let to_string ?(start=0) sc =
String.sub sc.subject ~pos:start ~len:(sc.pos - start)
let to_strrange ?(start=0) {subject; pos} =
Strrange.create ~start ~finish:pos subject
let peek ?(off=0) cursor =
cursor.subject.[cursor.pos + off]
let length {subject; pos} =
String.length subject - pos
let can_peek ?(len=1) cursor =
length cursor - len >= 0
let empty cursor =
not (can_peek cursor)
let advance ?(len=1) cursor =
cursor.pos <- cursor.pos + len;
true (* XXX: maybe bad habit *)
let advance_while test cursor =
while can_peek cursor && test (peek cursor); do
ignore (advance cursor)
done;
pos cursor
let compare str cursor =
SU.compare str ~roff:cursor.pos cursor.subject
exception End_of_cursor of t
let get ?(skip=0) cursor =
if not (can_peek ~len:(skip+1) cursor) then
raise (End_of_cursor cursor)
else
(if skip > 0 then
ignore (advance ~len:skip cursor);
let ch = peek cursor in
ignore (advance cursor);
ch)
end
module Strtok = struct
include Strrange
module SC = Strcursor
let space c = c = ' ' || c = '\t' || c = '\n'
let non_space c = not (space c)
let next_token sc =
let start = SC.advance_while space sc in
if not (SC.can_peek sc) then
None
else
(ignore (SC.advance_while non_space sc);
Some (SC.to_strrange ~start:start sc))
let token_list_of_string str =
let rec loop sc lst =
(*Printf.printf "pos %d in subj %s\n" (SC.pos sc) sc.SC.subject;*)
match next_token sc with
| None -> List.rev lst
| Some tok -> loop sc (tok :: lst)
in
let sc = SC.of_string str in
loop sc []
end
module CharRangeList = struct
module ST = Strtok
module Spec = struct
type t = {first: int; last: int}
exception Bad_charrange of Strrange.t
let create first last =
{first = first; last = last}
let of_strrange sr =
let ord = int_of_char in
let len = ST.length sr in
if len = 1 then
let n = ord (ST.get sr) in
create n n
else if ST.get sr = '-' then
let n = int_of_string (ST.to_string ~offset:1 sr) in
create n n
else if len = 3 && (ST.get ~offset:1 sr) = '-' then
create (ord (ST.get sr)) (ord (ST.get ~offset:2 sr))
else
(* XXX: Is this good? *)
raise (Bad_charrange sr)
end
type t = Spec.t list
let of_string str =
List.map ~f:Spec.of_strrange (ST.token_list_of_string str)
end
module CharClass = struct
type t = bool array
module CRL = CharRangeList
let create ?(init=false) () =
Array.create ~len:256 init
(*Without ~len, I got:
Warning 6: labels were omitted in the application of this function.
*)
let invert cc =
Array.map ~f:(fun b -> not b) cc
let set_charrange cc ?(value=true) cr =
for i = cr.CRL.Spec.first to cr.CRL.Spec.last do
cc.(i) <- value
done;
cc
let of_string str =
let cc = create ()
and ccdefs = CRL.of_string str in
List.iter ccdefs ~f:(fun def -> ignore (set_charrange cc def));
cc
let contains cc ch = cc.(int_of_char ch)
end
module Strmatch = struct
module SC = Strcursor
module CC = CharClass
let char ch sc =
SC.peek sc = ch && SC.advance sc
let string string sc =
SC.compare string sc = 0
&& SC.advance ~len:(String.length string) sc
let can_advance_and_match cc sc =
SC.can_peek sc
&& CC.contains cc (SC.peek sc)
let one cc sc =
can_advance_and_match cc sc
&& SC.advance sc
let many cc sc =
let nmatch = ref 0 in
while can_advance_and_match cc sc do
ignore (SC.advance sc); incr nmatch
done;
!nmatch > 0
end
module Sexparse1 = struct
module SC = Strcursor
module SR = Strrange
module SM = Strmatch
module CC = CharClass
let cc_atom = CC.of_string "_ A-Z a-z 0-9 = ! @ $ % & * < > ? + - * / :"
let cc_ws = CC.of_string "-9 -32"
let cc_digit = CC.of_string "0-9"
let cc_plmin = CC.of_string "+ -"
let cc_in_string = CC.invert (CC.of_string "\" \\")
let skip_ws sc = ignore (SM.many cc_ws sc)
type sexp = Symbol of string | Int of int | List of sexp list
| String of string
exception Syntax_error of string * SC.t
let read ?(start=0) str =
let rec _read cursor =
skip_ws cursor;
let start = SC.pos cursor in
if SC.empty cursor then
List []
else if SM.char '(' cursor; then
List (_list cursor [])
else if SM.char '\'' cursor; then
List ((Symbol "quote") :: [_read cursor])
else if SM.char '\"' cursor; then
String (_string cursor)
else if ignore (SM.char '-' cursor); SM.many cc_digit cursor then
Int (int_of_string (SC.to_string ~start cursor))
else if SM.many cc_atom cursor then
Symbol (SC.to_string ~start cursor)
else
(* XXX: proper error reporting *)
raise (Syntax_error ("Invalid character", cursor))
and _list cursor res =
skip_ws cursor;
if SM.char ')' cursor then
List.rev res
else
_list cursor (_read cursor :: res)
and _string cursor =
let rec loop cursor lst =
let start = SC.pos cursor in
if SM.many cc_in_string cursor then
loop cursor (SC.to_string ~start cursor :: lst)
else if SM.char '"' cursor then
String.concat (List.rev lst)
else if SM.char '\\' cursor then
if SC.can_peek cursor then
loop cursor (String.of_char (SC.get cursor) :: lst)
else
raise (Syntax_error ("String ended shortly", cursor))
else
raise (Syntax_error ("Can't find string terminator", cursor))
in
loop cursor []
in
let cursor = (SC.of_string ~pos:start str) in
let res = _read cursor in
res, cursor
end
(* -*- coding: utf-8 -*- *)
open OUnit2
open Parse1
open Sexparse1
module SC = Strcursor
type spec = int * string * int * sexp
let () = run_test_tt_main
("suite">:::
(List.map
(fun (start, str, last, sexp) ->
str>::(fun _ ->
let res, sc = read ~start str in
assert_equal
(res, SC.pos sc)
(sexp, last)
)
)
[0, "(foo bar baz)", 13
, List [Symbol "foo"; Symbol "bar"; Symbol "baz"]
;0, "(foo 123 baz)", 13
, List [Symbol "foo"; Int 123; Symbol "baz"]
;0, "(foo '123 baz)", 14
, List [Symbol "foo"; List [Symbol "quote"; Int 123]; Symbol "baz"]
;0, "(foo 123) bar 'baz", 9
, List [Symbol "foo"; Int 123]
;9, "(foo 123) bar 'baz", 13
, Symbol "bar"
;13, "(foo 123) bar 'baz", 18
, List [Symbol "quote"; Symbol "baz"]
;18, "(foo 123) bar 'baz", 18
, List []
;0, "(number 1 -1 )", 14
, List [Symbol "number"; Int 1; Int (-1)]
;0, "(str \"foo \\x bar\\\"baz qux\" hoe)", 31
, List [Symbol "str"; String "foo x bar\"baz qux";
Symbol "hoe"]
])
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment