Skip to content

Instantly share code, notes, and snippets.

@SolarLiner
Last active February 9, 2020 21:16
Show Gist options
  • Save SolarLiner/f0bbd3694ae5069762831c05c50f6428 to your computer and use it in GitHub Desktop.
Save SolarLiner/f0bbd3694ae5069762831c05c50f6428 to your computer and use it in GitHub Desktop.
Homemade OCaml parser combinator

I made this learning OCaml, so this will not be the most idiomatic nor the best looking code you'll ever see. However it works and I'm stoked about it!

The filename is nacc.ml as as (cheeky) play on yacc and means "Not a(nother) compiler compiler".

exception ParseException of string
type 'a parse_result = ('a * string) option
type 'a parser = string -> 'a parse_result
let is_done = function
| Some(_, "") | None -> true
| Some(_, _) -> false
let option_of_parse = function
| Some(c, _) -> Some(c)
| None -> None
let pmap f = Option.map(fun (v,rest) -> (f v, rest))
let pand f = function
| Some(c, rest) -> f c rest
| None -> None
let pleak = function
| Some(c, _) -> c
| None -> raise (ParseException "Attempted to use failed parse")
let parse_char c = function
| "" -> None
| input -> match String.get input 0 with
| x when x = c -> let rest = String.(sub input 1 ((length input)-1)) in
Some(String.make 1 c, rest)
| _ -> None
let parse_or a b =
let parser input =
match a input with
| Some(_,_) as x -> x
| None -> b input
in
parser
let rec parse_any ps input =
match ps with
| [] -> None
| p::ps -> match p input with
| None -> parse_any ps input
| x -> x
let parse_all ps =
let rec do_parse ct ps (input:string) =
match ps with
| [] -> Some([], input)
| p::ps -> match p input with
| Some(c, rest) -> do_parse (ct @ [c]) ps rest
| None -> None
in
do_parse [] ps
let parse_many p =
let rec parser ct (input:string) =
match p input with
| Some(c, rest) -> parser (ct @ [c]) rest
| None -> match ct with
| [] -> None
| x -> Some(x, input)
in
parser []
let parse_concat_many p =
let rec parser ct input =
match p input with
| Some(c, rest) -> parser (ct^c) rest
| None -> match ct with
| "" -> None
| x -> Some(x, input)
in
parser ""
let parse_ignore p next input =
match p input with
| Some(_, rest) -> next rest
| None -> next input
let parse_skip p next input =
match p input with
| Some(_, rest) -> next rest
| None -> None
let parse_concat_seq ps =
let rec parser ps ct input =
match ps with
| p::ps ->
begin
match p input with
| Some(c, rest) -> parser ps (ct ^ c) rest
| None -> match ct with
| "" -> None
| x -> Some(x, input)
end
| [] ->
begin
match ct with
| "" -> None
| x -> Some(x, input)
end
in
parser ps ""
let rec parse_combine_seq ps input =
match ps with
| [] -> Some([], input)
| v::vs ->
match v input with
| None -> None
| Some(c, rest) ->
match parse_combine_seq vs rest with
| Some(cs, rest) -> Some(c::cs, rest)
| None -> None
let explode s =
let rec step s l i =
if i == String.length s then l
else step s (l @ [s.[i]]) (i+1)
in
step s [] 0
let parse_literal s =
let l = explode s in
let m = List.map parse_char l in
parse_concat_seq m
let parse_anychar_in s =
let rec parser ct n input =
if n == (String.length s) then
match ct with
| "" -> None
| _ -> Some(ct, input)
else match parse_char (s.[n]) input with
| Some(c, rest) -> Some(c, rest)
| None -> parser ct (n+1) input
in
parser "" 0
let parse_delim d p input =
match p input with
| None -> None
| Some(c, rest) -> match parse_skip d p rest with
| Some(d, rest) -> Some((c,d), rest)
| None -> None
(* Utils *)
let parse_wrap l r p input =
match l input with
| None -> None
| Some(_, rest) -> match p rest with
| None -> None
| Some(c, rest) -> match r rest with
| None -> None
| Some(_, rest) -> Some(c, rest)
let parse_digit = parse_anychar_in "0123456789"
let parse_nat input =
input |> parse_concat_many parse_digit |> pmap int_of_string
let parse_int input =
match parse_char '-' input with
| Some(_, rest) -> parse_nat rest |> pmap (fun i -> -i)
| None -> parse_nat input
let parse_float input =
let parse_float_pos input =
input
|> parse_or
(parse_concat_seq [parse_concat_many parse_digit; parse_char '.'; parse_concat_many parse_digit])
(parse_concat_many parse_digit)
|> pmap float_of_string
in
match parse_char '-' input with
| Some(_, rest) -> parse_float_pos rest |> pmap (fun i -> -. i)
| None -> parse_float_pos input
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"val parse_ignore : ('a -> ('b * 'a) option) -> ('a -> 'c) -> 'a -> 'c = <fun>\n",
"val parse_skip :\n",
" ('a -> ('b * 'c) option) -> ('c -> 'd option) -> 'a -> 'd option = <fun>\n",
"val parse_concat_seq :\n",
" ('a -> (string * 'a) option) list -> 'a -> (string * 'a) option = <fun>\n",
"val parse_combine_seq :\n",
" ('a -> ('b * 'a) option) list -> 'a -> ('b list * 'a) option = <fun>\n",
"val explode : string -> char list = <fun>\n",
"val parse_literal : string -> string -> (string * string) option = <fun>\n",
"val parse_anychar_in : string -> string -> (string * string) option = <fun>\n",
"val parse_delim :\n",
" ('a -> ('b * 'c) option) ->\n",
" ('c -> ('d * 'a) option) -> 'c -> (('d * 'd) * 'a) option = <fun>\n",
"val parse_wrap :\n",
" ('a -> ('b * 'c) option) ->\n",
" ('d -> ('e * 'f) option) ->\n",
" ('c -> ('g * 'd) option) -> 'a -> ('g * 'f) option = <fun>\n",
"val parse_digit : string -> (string * string) option = <fun>\n",
"val parse_nat : string -> (int * string) option = <fun>\n",
"val parse_int : string -> (int * string) option = <fun>\n",
"val parse_float : string -> (float * string) option = <fun>\n"
]
}
],
"source": [
"#use \"nacc.ml\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Usage example\n",
"The parser type is generic, which means it can hold your AST as well. You can use the the combinators to directly create your AST.\n",
"\n",
"See the example below:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"type expr = Plus of expr * expr | Mult of expr * expr | Value of float\n"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"val make_plus : expr * expr -> expr = <fun>\n"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"val make_mult : expr * expr -> expr = <fun>\n"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"val make_value : float -> expr = <fun>\n"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"val tuple2_of_list : 'a list -> 'a * 'a = <fun>\n"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(* AST *)\n",
"type expr =\n",
"| Plus of expr * expr\n",
"| Mult of expr * expr\n",
"| Value of float\n",
"\n",
"let make_plus (l,r) = Plus (l, r)\n",
"let make_mult (l,r) = Mult (l, r)\n",
"let make_value v = Value v\n",
"let tuple2_of_list l = (List.nth l 0, List.nth l 1)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"val parse_expr : string -> (expr * string) option = <fun>\n",
"val parse_term : string -> (expr * string) option = <fun>\n",
"val parse_factor : string -> (expr * string) option = <fun>\n",
"val parse_value : string -> (expr * string) option = <fun>\n",
"val parse_expr_parent : string -> (expr * string) option = <fun>\n",
"val term_expr : string -> (expr * string) option = <fun>\n",
"val term_factor : string -> (expr * string) option = <fun>\n"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(*\n",
"Grammar:\n",
"-- expr ::= term + expr | term\n",
"-- term ::= factor * term | factor\n",
"-- factor ::= (expr) | value\n",
"-- value ::= float\n",
"*)\n",
"let rec parse_expr input = input |> parse_or term_expr parse_term\n",
"and parse_term input = input |> parse_or term_factor parse_factor\n",
"and parse_factor input = input |> parse_or parse_expr_parent parse_value\n",
"and parse_value input = input |> parse_float |> pmap make_value\n",
"\n",
"and parse_expr_parent input =\n",
" parse_wrap (parse_char '(') (parse_char ')') parse_expr input\n",
"and term_expr input =\n",
" input\n",
" |> parse_combine_seq [parse_term; parse_skip (parse_char '+') parse_expr]\n",
" |> pmap tuple2_of_list |> pmap make_plus\n",
"and term_factor input =\n",
" input\n",
" |> parse_combine_seq [parse_factor; parse_skip (parse_char '*') parse_term]\n",
" |> pmap tuple2_of_list |> pmap make_mult"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"val pprint : expr -> string = <fun>\n"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"val eval : expr -> float = <fun>\n"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(* Pretty printing and evaluation *)\n",
"open Printf\n",
"\n",
"let rec pprint = function\n",
" | Plus (l, r) -> sprintf \"%s + %s\" (pprint l) (pprint r)\n",
" | Mult (l, r) -> sprintf \"%s * %s\" (pprint l) (pprint r)\n",
" | Value v -> string_of_float v\n",
" \n",
"let rec eval = function\n",
" | Value v -> v\n",
" | Plus (l, r) -> (eval l) +. (eval r)\n",
" | Mult (l, r) -> (eval l) *. (eval r)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"- : (string * string) option = Some (\"2. + 3. * 4.\", \"\")\n"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"parse_expr \"2+3*4\" |> pleak |> pprint"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"- : float = 14.\n"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"parse_expr \"2+3*4\" |> pleak |> eval"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "OCaml 4.08.0",
"language": "OCaml",
"name": "ocaml-jupyter"
},
"language_info": {
"codemirror_mode": "text/x-ocaml",
"file_extension": ".ml",
"mimetype": "text/x-ocaml",
"name": "OCaml",
"nbconverter_exporter": null,
"pygments_lexer": "OCaml",
"version": "4.08.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment