Skip to content

Instantly share code, notes, and snippets.

@wweic
Created August 1, 2013 03:05
Show Gist options
  • Save wweic/6128108 to your computer and use it in GitHub Desktop.
Save wweic/6128108 to your computer and use it in GitHub Desktop.
Parse regular expression pattern. using recursive descent.
(* Parse a subset of regular expression, using recursive descent *)
(* modified from https://code.google.com/p/sharable-stuff/ *)
type regexp =
| Empty_String
| Char of char
| Union of regexp * regexp
| Concat of regexp * regexp
| Star of regexp
exception IllegalExpression
let string_to_regexp s =
let index = ref 0 in
let length = String.length s in
let lookahead _ =
if !index < length then
Some s.[!index]
else
None
in
let consume c =
if Some c = (lookahead ()) then
index := !index + 1
else
raise IllegalExpression
and alphabet'contains = function
| Some c -> 'a' <= c & c <= 'z'
| None -> false
in
let starts'atom = function
| Some '(' -> true
| Some 'E' -> true
| co -> alphabet'contains co
in
let rec parseOR _ =
if starts'atom (lookahead ()) then
parseOR' (parseCONCAT ())
else
raise IllegalExpression
and parseOR' re =
if Some '|' = (lookahead ()) then
let re' = parseOR' (parseCONCAT (consume '|')) in
Union (re, re')
else
re
and parseCONCAT _ =
if starts'atom (lookahead ()) then
parseCONCAT' (parseSTAR ())
else
raise IllegalExpression
and parseCONCAT' re =
if starts'atom (lookahead ()) then
let re' = parseCONCAT' (parseSTAR ()) in
Concat (re, re')
else
re
and parseSTAR _ =
if starts'atom (lookahead ()) then
parseSTAR' (parseATOM ())
else
raise IllegalExpression
and parseSTAR' re =
if Some '*' = (lookahead ()) then begin
consume '*';
parseSTAR' (Star re)
end else
re
and parseATOM _ =
if Some '(' = (lookahead ()) then begin
let re =
consume '(';
parseOR ()
in
consume ')';
re
end else if Some 'E' = (lookahead ()) then begin
consume 'E';
Empty_String
end else if alphabet'contains (lookahead ()) then
let c = match (lookahead ()) with
| Some c -> c
| None -> raise IllegalExpression
in
let re = Char c in
consume c;
re
else
raise IllegalExpression
in
parseOR ();;
(* test *)
string_to_regexp "ab";;
string_to_regexp "a+";;
string_to_regexp "E";;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment