Created
October 16, 2012 06:04
-
-
Save brendanzab/3897455 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// based off the examples shown in this blog post: | |
// http://blog.rfw.name/2012/10/12/parser_combinators.html | |
enum ParseNode { | |
NonTerminal(&self/[ParseNode]), | |
Terminal(&self/str), | |
Empty, | |
} | |
enum ParseResult { | |
ParseOk(&self/ParseNode, &self/str), | |
NoParse, | |
} | |
enum Parser { | |
Concat (&self/[Parser]), | |
Alternate (&self/[Parser]), | |
OneOrMore (&self/Parser), | |
ZeroOrMore (&self/Parser), | |
Optional (&self/Parser), | |
CharRange (char, char), | |
Literal (&self/str), | |
} | |
/** | |
* Concat!(p1, p2 ... pn) => Concat(~[p1, p2 ... pn]) | |
*/ | |
macro_rules! Concat( | |
($($elem:expr),+) => ( | |
Concat([ $($elem),+ ]) | |
) | |
) | |
/** | |
* Alternate!(p1, p2 ... pn) => Alternate(~[p1, p2 ... pn]) | |
*/ | |
macro_rules! Alternate( | |
($($elem:expr),+) => ( | |
Alternate([ $($elem),+ ]) | |
) | |
) | |
impl Parser { | |
fn parse(src: &s/str) -> ParseResult { | |
match self { | |
Concat(arr) => concat(src, arr), | |
Alternate(arr) => alternate(src, arr), | |
OneOrMore(p) => one_or_more(src, p), | |
ZeroOrMore(p) => zero_or_more(src, p), | |
Optional(p) => optional(src, p), | |
CharRange(low, high) => char_range(src, low, high), | |
Literal(name) => literal(src, name) | |
} | |
} | |
} | |
/** | |
* Represents the concatenation of two parsers, i.e parse using the first | |
* parser, then the second, then return both results. | |
*/ | |
fn concat(src: &s/str, parsers: &[Parser]) -> ParseResult { | |
let mut nodes = ~[]; | |
let mut tail = src; | |
for parsers.each |p| { | |
match p.parse(tail) { | |
ParseOk(n, t) => { | |
nodes.push(*n); | |
tail = t; | |
} | |
NoParse => { | |
return NoParse; | |
} | |
} | |
} | |
return ParseOk(&NonTerminal(nodes), tail); | |
} | |
/** | |
* Represents the alternation of two parsers, i.e. parse using the first | |
* parser and, if that doesn't match, parse with the second parser. | |
*/ | |
fn alternate(src: &s/str, parsers: &[Parser]) -> ParseResult { | |
for parsers.each |p| { | |
match p.parse(src) { | |
ParseOk(node, tail) => { return ParseOk(node, copy tail); } | |
NoParse => { loop; } | |
} | |
} | |
return NoParse; | |
} | |
/** | |
* Represents the Kleene star operation, i.e. attempt to parse one or more | |
* results using the given parser. | |
*/ | |
fn one_or_more(src: &s/str, parser: &s/Parser) -> ParseResult { | |
let mut nodes = ~[]; | |
let mut tail = src; // <- copy? ugh | |
loop { | |
match parser.parse(tail) { | |
ParseOk(node, tl) => { | |
nodes.push(*node); | |
tail = tl; | |
} | |
NoParse => { | |
break; | |
} | |
} | |
} | |
match nodes.is_not_empty() { | |
true => ParseOk(&NonTerminal(nodes), tail), | |
false => NoParse | |
} | |
} | |
/** | |
* Represents an optional one_or_more parser. | |
*/ | |
fn zero_or_more(src: &s/str, parser: &s/Parser) -> ParseResult { | |
Optional(@OneOrMore(parser)).parse(src) | |
} | |
/** | |
* Represents a parser that doesn't fail if there is no match. | |
*/ | |
fn optional(src: &s/str, parser: &s/Parser) -> ParseResult { | |
match parser.parse(src) { | |
ParseOk(node, tail) => ParseOk(node, tail), | |
NoParse => ParseOk(&Empty, str::from_slice(src)) | |
} | |
} | |
/** | |
* Parse a single if it is in the given range | |
*/ | |
fn char_range(src: &s/str, low: char, high: char) -> ParseResult { | |
if src.is_not_empty() { | |
let c = src[0] as char; | |
if c >= low && c <= high { | |
return ParseOk(&Terminal(str::from_char(c)), str::view(src, 1, src.len())); | |
} | |
} | |
return NoParse; | |
} | |
/** | |
* Parse a literal. | |
*/ | |
fn literal(src: &s/str, name: &s/str) -> ParseResult { | |
match src.starts_with(name) { | |
true => ParseOk(&Terminal(name), src.slice(name.len(), src.len())), | |
false => NoParse | |
} | |
} | |
fn main() { | |
let match_ab = Concat!(Literal("a"), Literal("b")); | |
io::println(fmt!("match_ab.parse(\"ab\") -> %?", match_ab.parse("ab"))); | |
let match_a_or_b = Alternate!(Literal("a"), Literal("b")); | |
io::println(fmt!("match_a_or_b.parse(\"a\") -> %?", match_a_or_b.parse("a"))); | |
io::println(fmt!("match_a_or_b.parse(\"b\") -> %?", match_a_or_b.parse("b"))); | |
io::println(fmt!("match_a_or_b.parse(\"c\") -> %?", match_a_or_b.parse("c"))); | |
let match_a_plus = OneOrMore(&Literal("a")); | |
io::println(fmt!("match_ab.parse(\"aaab\") -> %?", match_a_plus.parse("aaab"))); | |
let digit = CharRange('0', '9'); | |
io::println(fmt!("digit.parse(\"3\") -> %?", digit.parse("3"))); | |
io::println(fmt!("digit.parse(\"9\") -> %?", digit.parse("9"))); | |
io::println(fmt!("digit.parse(\"a\") -> %?", digit.parse("a"))); | |
let number = OneOrMore(&digit); | |
io::println(fmt!("number.parse(\"1234\") -> %?", number.parse("1234"))); | |
io::println(fmt!("number.parse(\"abcd\") -> %?", number.parse("abcd"))); | |
io::println(fmt!("number.parse(\"123abc\") -> %?", number.parse("123abc"))); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment