Last active
June 21, 2016 13:12
-
-
Save dinosaure/34dea72ab585b6f70e130d73b05d5949 to your computer and use it in GitHub Desktop.
parser combinator and ringbuffer in ocaml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Buffer = Rb.Bytes | |
module Make (E : sig type err end) = | |
struct | |
type s = Complete | Incomplete | |
let pp fmt = function | |
| Complete -> Format.fprintf fmt "Complete" | |
| Incomplete -> Format.fprintf fmt "Incomplete" | |
type 'a state = | |
| Read of { buffer : Bytes.t; off : int; len : int; k : int -> s -> 'a state } | |
| Done of 'a | |
| Fail of string list * E.err | |
type 'a k = Buffer.t -> s -> 'a | |
type 'a fail = (string list -> E.err -> 'a state) k | |
type ('a, 'r) success = ('a -> 'r state) k | |
type 'a t = | |
{ f : 'r. ('r fail -> ('a, 'r) success -> 'r state) k } | |
let return v = { f = fun i s _ succ -> succ i s v } | |
let fail err = { f = fun i s fail _ -> fail i s [] err } | |
let (>>=) a f = { f = fun i s fail succ -> | |
let succ' i' s' v = (f v).f i' s' fail succ in | |
a.f i s fail succ' } | |
let (>>|) a f = { f = fun i s fail succ -> | |
let succ' i' s' v = succ i' s' (f v) in | |
a.f i s fail succ' } | |
let (<$>) f m = m >>| f | |
let lift f m = f <$> m | |
let (<|>) u v = | |
{ f = fun i s fail succ -> | |
Format.printf "<|>: %a %a\n%!" | |
Buffer.pp i pp s; | |
let saved, rs = Buffer.create 16, ref s in | |
let rec store = function | |
| Read { buffer; off; len; k; } -> | |
Read { buffer; off; len; k = fun n s -> | |
Buffer.write saved buffer off n; | |
rs := s; | |
Format.printf "<|>: (saved) %a\n%!" Buffer.pp saved; | |
store @@ k n s } | |
| Fail (marks, err) -> | |
Format.printf "<|>: %a %a\n%!" | |
Buffer.pp i pp s; | |
v.f saved !rs fail succ | |
| x -> x | |
in | |
store @@ u.f i s fail succ } | |
let ( *>) a b = | |
{ f = fun i s fail succ -> | |
let succ' i' s' x = | |
let succ'' i'' s'' _ = succ i'' s'' x in | |
b.f i' s' fail succ'' in | |
a.f i s fail succ' } | |
let (<?>) a mark = | |
{ f = fun i s fail succ -> | |
let fail' i' s' marks err = | |
fail i' s' (mark :: marks) err in | |
a.f i s fail' succ } | |
let run buffer a = | |
let fail' buf _ marks err = Fail (marks, err) in | |
let succeed' buf _ value = Done value in | |
a.f buffer Incomplete fail' succeed' | |
end | |
type error = .. | |
module Parser = Make (struct type err = error end) | |
let rec prompt i fail succ = | |
let continue n s = | |
Format.printf "read[%d]>\n%!" n; | |
Buffer.wadvance i n; | |
if n = 0 then | |
if s = Parser.Complete | |
then fail i Parser.Complete | |
else prompt i fail succ | |
else succ i s | |
in | |
let (buffer, off, len) = Buffer.write_space ~expect:1 i in | |
Parser.Read { buffer; off; len; k = continue; } | |
type error += End_of_flow | |
let expect = | |
{ Parser.f = fun i s fail succ -> | |
match s with | |
| Parser.Complete -> fail i s [] End_of_flow | |
| Parser.Incomplete -> | |
let succ' i' s' = succ i' s' () in | |
let fail' i' s' = fail i' s' [] End_of_flow in | |
prompt i fail' succ' } | |
let require n i s fail succ = | |
let rec continue = { Parser.f = fun i' s' fail' succ' -> | |
if n < Buffer.ravailable i' | |
then succ' i' s' () | |
else Parser.(expect >>= fun () -> continue).Parser.f i' s' fail' succ' } | |
in | |
Parser.(expect >>= fun () -> continue).Parser.f i s fail succ | |
let peek_chr = { Parser.f = fun i s fail succ -> | |
if Buffer.ravailable i > 0 | |
then succ i s (Some (Buffer.get i)) | |
else if s = Parser.Complete | |
then succ i s None | |
else | |
let succ' i' s' = | |
succ i' s' (Some (Buffer.get i')) in | |
let fail' i' s' = | |
succ i' s' None in | |
prompt i fail' succ' } | |
let peek_chr_exn = { Parser.f = fun i s fail succ -> | |
if Buffer.ravailable i > 0 | |
then succ i s (Buffer.get i) | |
else let succ' i' s' () = | |
succ i' s' (Buffer.get i') in | |
require 1 i s fail succ' } | |
let advance n = | |
{ Parser.f = fun i s fail succ -> Buffer.radvance i n; succ i s () } | |
let require n = | |
let sub n = | |
{ Parser.f = fun i s fail succ -> | |
let tmp = Bytes.create n in | |
Buffer.peek i tmp 0 n; | |
Format.printf "require: %a\n%!" Buffer.pp i; | |
Format.printf "require: peek %S\n%!" tmp; | |
succ i s tmp } | |
in | |
Parser.({ Parser.f = fun i s fail succ -> | |
if Buffer.ravailable i >= n | |
then succ i s () | |
else require n i s fail succ } | |
>>= fun () -> sub n) | |
type error += Satisfy | |
let satisfy f = | |
let open Parser in | |
peek_chr_exn >>= fun chr -> | |
if f chr | |
then advance 1 >>| fun () -> chr | |
else fail Satisfy | |
type error += String | |
let string f s = | |
let open Parser in | |
let len = String.length s in | |
require len >>= fun s' -> | |
Format.printf "string: %S\n%!" s'; | |
if f s = f s' | |
then advance len *> return s' | |
else fail String | |
let char chr = | |
let open Parser in | |
satisfy ((=) chr) <?> (String.make 1 chr) | |
let fix f = | |
let open Parser in | |
let rec u = lazy (f r) | |
and r = { f = fun i s fail succ -> | |
Lazy.(force u).f i s fail succ } | |
in r | |
let _ = | |
let tmp = Bytes.create 2 in | |
let rec loop = function | |
| Parser.Read { buffer; off; len; k; } -> | |
let max = min (Bytes.length tmp) len in | |
let n = input stdin tmp 0 max in | |
Bytes.blit tmp 0 buffer off n; | |
if n = 0 | |
then loop @@ k n Parser.Complete | |
else loop @@ k n Parser.Incomplete | |
| Parser.Done v -> v | |
| Parser.Fail (marks, err) -> assert false | |
in | |
loop @@ Parser.run (Buffer.create 16) | |
Parser.(string (fun x -> x) "foo" <|> string (fun x -> x) "bar") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module type A = | |
sig | |
type t | |
val create : int -> t | |
val blit : t -> int -> t -> int -> int -> unit | |
val get : t -> int -> char | |
val pp : Format.formatter -> t -> unit | |
end | |
module Make (A : A) = | |
struct | |
type t = | |
{ size : int | |
; buffer : A.t | |
; mutable rpos : int | |
; mutable wpos : int } | |
let pp fmt { rpos; wpos; buffer; _ } = | |
if rpos <= wpos | |
then Format.fprintf fmt "{ @[<hov>%d.@,%a@,.%d@] }" rpos A.pp buffer wpos | |
else Format.fprintf fmt "{ @[<hov>%d.@,%a@,.%d@] }" wpos A.pp buffer rpos | |
let create size = | |
{ size = size + 1 | |
; buffer = A.create size | |
; rpos = 0 | |
; wpos = 0 } | |
let ravailable t = | |
if t.wpos >= t.rpos then (t.wpos - t.rpos) | |
else t.size - (t.rpos - t.wpos) | |
let wavailable t = | |
if t.wpos >= t.rpos then t.size - (t.wpos - t.rpos) - 1 | |
else (t.rpos - t.wpos) - 1 | |
let radvance t n = | |
assert (n <= ravailable t); | |
if t.rpos + n < t.size then t.rpos <- t.rpos + n | |
else t.rpos <- t.rpos + n - t.size | |
let wadvance t n = | |
assert (n <= wavailable t); | |
if t.wpos + n < t.size then t.wpos <- t.wpos + n | |
else t.wpos <- t.wpos + n - t.size | |
let peek t buff off len = | |
assert (len <= wavailable t); | |
let pre = t.size - t.rpos in | |
let extra = len - pre in | |
if extra > 0 then begin | |
A.blit t.buffer t.rpos buff off pre; | |
A.blit t.buffer 0 buff (off + pre) extra; | |
end else | |
A.blit t.buffer t.rpos buff off len | |
let read t buff off len = | |
peek t buff off len; | |
radvance t len | |
let get t = | |
let tmp = A.create 1 in | |
peek t tmp 0 1; | |
A.get tmp 0 | |
let write t buff off len = | |
assert (len <= wavailable t); | |
let pre = t.size - t.wpos in | |
let extra = len - pre in | |
if extra > 0 then begin | |
A.blit buff off t.buffer t.wpos pre; | |
A.blit buff (off + pre) t.buffer 0 extra; | |
end else | |
A.blit buff off t.buffer t.wpos len; | |
wadvance t len | |
let read_space t = | |
if t.wpos = t.rpos then None | |
else let len0 = | |
if t.wpos >= t.rpos then t.wpos - t.rpos | |
else t.size - t.rpos | |
in | |
Some (t.buffer, t.rpos, len0) | |
let write_space t = | |
let len0 = | |
if t.wpos >= t.rpos | |
then t.size - t.wpos - 1 | |
else (t.rpos - t.wpos) - 1 | |
in | |
if len0 = 0 | |
then None | |
else Some (t.buffer, t.wpos, len0) | |
let transmit t f = | |
if t.wpos = t.rpos then 0 | |
else let len0 = | |
if t.wpos >= t.rpos then t.wpos - t.rpos | |
else t.size - t.rpos | |
in | |
let len = f t.buffer t.rpos len0 in | |
assert (len <= len0); | |
radvance t len; | |
len | |
end | |
module Ext (A : A) = | |
struct | |
module R = Make(A) | |
type t = | |
{ mutable rb : R.t } | |
let prepare buf len = | |
if R.wavailable buf.rb >= len then | |
buf.rb | |
else begin | |
let rb = R.create (R.ravailable buf.rb + len) in | |
while R.ravailable buf.rb <> 0 do | |
ignore (R.transmit buf.rb (fun buf off len -> R.write rb buf off len; len)) | |
done; | |
buf.rb <- rb; | |
rb | |
end | |
let compact buf = | |
let rb = R.create (buf.rb.R.size - 1) in | |
while R.ravailable buf.rb <> 0 do | |
ignore (R.transmit buf.rb (fun buf off len -> R.write rb buf off len; len)) | |
done; | |
buf.rb <- rb; | |
rb | |
let peek rb = R.peek rb.rb | |
let read rb = R.read rb.rb | |
let read_space rb = R.read_space rb.rb | |
let transmit rb = R.transmit rb.rb | |
let ravailable rb = R.ravailable rb.rb | |
let wavailable rb = R.wavailable rb.rb | |
let radvance rb = R.radvance rb.rb | |
let wadvance rb = R.wadvance rb.rb | |
let get rb = R.get rb.rb | |
let write rb buff off len = | |
let rb = prepare rb len in | |
R.write rb buff off len | |
let write_space ?(expect = 0) buf = | |
match R.write_space buf.rb with | |
| None when wavailable buf > expect -> | |
(* no continuous buffer, but enough space to write *) | |
let rb = compact buf in | |
(rb.R.buffer, rb.R.wpos, rb.R.size - rb.R.wpos - 1) | |
| None -> | |
(* no continuous buffer and not enough space to write *) | |
let rb = prepare buf expect in | |
(rb.R.buffer, rb.R.wpos, rb.R.size - rb.R.wpos - 1) | |
| Some (buff, off, len) when len > expect -> (buff, off, len) | |
| Some (buff, off, len) -> | |
(* continuous buffer but not enough space to write *) | |
let rb = prepare buf expect in | |
(rb.R.buffer, rb.R.wpos, rb.R.size - rb.R.wpos - 1) | |
let create len = | |
{ rb = R.create len } | |
let pp fmt rb = R.pp fmt rb.rb | |
end | |
module Bytes = Ext(struct include Bytes let pp fmt = Format.fprintf fmt "%S" end) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment