Last active
May 18, 2019 15:56
-
-
Save vayn/676e11526a4d85c8498062c8542fca57 to your computer and use it in GitHub Desktop.
Parser combinator in Rust (https://git.io/xml-parser-combinator)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![allow(dead_code)] | |
#[derive(Clone, Debug, PartialEq, Eq)] | |
struct Element { | |
name: String, | |
attributes: Vec<(String, String)>, | |
children: Vec<Element>, | |
} | |
/// Time For A Trait | |
/// https://bodil.lol/parser-combinators/#time-for-a-trait | |
/// | |
/// You might have noticed by now that we keep repeating the shape of the parser | |
/// type signature: | |
/// | |
/// Fn(&str) -> Result<(&str, Output), &str> | |
/// | |
/// You may be getting as sick of reading it written out full like that as I'm | |
/// getting of writing it, so I think it's time to introduce a trait, to make | |
/// things a little more readable, and to let us add some extensibility to our | |
/// parsers. | |
/// | |
/// But first of all, let's make a type alias for that return type we keep | |
/// using: | |
type ParserResult<'a, Output> = Result<(&'a str, Output), &'a str>; | |
/// So that now, instead of typing that monstrosity out all the time, we can | |
/// just type `ParseResult<String>` or similar. We've added a lifetime there, | |
/// because the type declaration requires it, but a lot of the time the Rust | |
/// compiler should be able to infer it for you. As a rule, try leaving the | |
/// lifetime out and see if rustc gets upset, then just put it in if it does. | |
/// | |
/// The lifetime 'a, in this case, refers specifically to the lifetime of the | |
/// input. | |
/// | |
/// Now, for the trait. We need to put the lifetime in here as well, and when | |
/// you're using the trait the lifetime is usually always required. It's a bit | |
/// of extra typing, but it beats the previous version. | |
trait Parser<'a, Output> { | |
fn parse(&self, input: &'a str) -> ParserResult<'a, Output>; | |
fn map<F, NewOutput>(self, map_fn: F) -> BoxedParser<'a, NewOutput> | |
where | |
Self: Sized + 'a, | |
Output: 'a, | |
NewOutput: 'a, | |
F: Fn(Output) -> NewOutput + 'a, | |
{ | |
BoxedParser::new(map(self, map_fn)) | |
} | |
fn pred<F>(self, pred_fn: F) -> BoxedParser<'a, Output> | |
where | |
Self: Sized + 'a, | |
Output: 'a, | |
F: Fn(&Output) -> bool + 'a, | |
{ | |
BoxedParser::new(pred(self, pred_fn)) | |
} | |
/// Are You Going To Say The M Word Or Do I Have To? | |
/// https://bodil.lol/parser-combinators/#are-you-going-to-say-the-m-word-or-do-i-have-to | |
/// | |
/// Remember we talked about how the `map` pattern is called a "functor" on | |
/// Planet Haskell? | |
/// | |
/// The `and_then` pattern is another thing you see a lot in Rust, in | |
/// generally the same places as `map`. It's called `flat_map` on Iterator, | |
/// but it's the same pattern as the rest. | |
/// | |
/// The fancy word for it is "monad." If you've got a thing Thing<A>, and | |
/// you have an and_then function available that you can pass a function | |
/// from A to Thing<B> into, so that now you have a new Thing<B> instead, | |
/// that's a monad. | |
/// | |
/// The function might get called instantly, like when you have an | |
/// Option<A>, we already know if it's a Some(A) or a None, so we apply the | |
/// function directly if it's a Some(A), giving us a Some(B). | |
/// | |
/// It might also be called lazily. For instance, if you have a Future<A> | |
/// that is still waiting to resolve, instead of and_then immediately | |
/// calling the function to create a Future<B>, instead it creates a new | |
/// Future<B> which contains both the Future<A> and the function, and which | |
/// then waits for Future<A> to finish. When it does, it calls the function | |
/// with the result of the Future<A>, and Bob's your uncle1, you get your | |
/// Future<B> back. In other words, in the case of a Future you can think of | |
/// the function you pass to and_then as a callback function, because it | |
/// gets called with the result of the original future when it completes. | |
/// It's also a little more interesting than that, because it returns a new | |
/// Future, which may or may not have already been resolved, so it's a way | |
/// to chain futures together. | |
/// | |
/// As with functors, though, Rust's type system isn't currently capable of | |
/// expressing monads, so let's only note that this pattern is called a | |
/// monad, and that, rather disappointingly, it's got nothing at all to do | |
/// with burritos, contrary to what they say on the internets, and move on. | |
fn and_then<F, NextParser, NewOutput>(self, f: F) -> BoxedParser<'a, NewOutput> | |
where | |
Self: Sized + 'a, | |
Output: 'a, | |
NewOutput: 'a, | |
NextParser: Parser<'a, NewOutput> + 'a, | |
F: Fn(Output) -> NextParser + 'a, | |
{ | |
BoxedParser::new(and_then(self, f)) | |
} | |
} | |
/// To make this even easier, we can actually implement this trait for any | |
/// function that matches the signature of a parser: | |
impl<'a, F, Output> Parser<'a, Output> for F | |
where | |
F: Fn(&'a str) -> ParserResult<Output>, | |
{ | |
fn parse(&self, input: &'a str) -> ParserResult<'a, Output> { | |
self(input) | |
} | |
} | |
/// To Infinity And Beyond | |
/// https://bodil.lol/parser-combinators/#to-infinity-and-beyond | |
/// | |
/// enum List<A> { | |
/// Cons(A, List<A>), | |
/// Nil, | |
/// } | |
/// | |
/// To which rustc will, very sensibly, object that your recursive type List<A> | |
/// has an infinite size, because inside every List::<A>::Cons is another | |
/// List<A>, and that means it's List<A>s all the way down into infinity. As far | |
/// as rustc is concerned, we're asking for an infinite list, and we're asking it | |
/// to be able to allocate an infinite list. | |
/// | |
/// In many languages, an infinite list isn't a problem in principle for the type | |
/// system, and it's actually not for Rust either. The problem is that in Rust, | |
/// as mentioned, we need to be able to allocate it, or, rather, we need to be | |
/// able to determine the size of a type up front when we construct it, and when | |
/// the type is infinite, that means the size must be infinite too. | |
/// | |
/// The solution is to employ a bit of indirection. Instead of our List::Cons | |
/// being an element of A and another list of A, instead we make it an element of | |
/// A and a pointer to a list of A. We know the size of a pointer, and it's the | |
/// same no matter what it points to, and so our List::Cons now has a fixed and | |
/// predictable size no matter the size of the list. And the way to turn an owned | |
/// thing into a pointer to an owned thing on the heap, in Rust, is to Box it. | |
/// | |
/// enum List<A> { | |
/// Cons(A, Box<List<A>>), | |
/// Nil, | |
/// } | |
/// | |
/// Another interesting feature of Box is that the type inside it can be | |
/// abstract. This means that instead of our by now incredibly complicated | |
/// parser function types, we can let the type checker deal with a very succinct | |
/// Box<dyn Parser<'a, A>> instead. | |
/// | |
/// That sounds great. What's the downside? Well, we might be losing a cycle or | |
/// two to having to follow that pointer, and it could be that the compiler | |
/// loses some opportunities to optimise our parser. But recall Knuth's | |
/// admonition about premature optimisation: it's going to be fine. You can | |
/// afford those cycles. | |
/// | |
/// So let's proceed to implement Parser for a boxed parser function in addition | |
/// to the bare functions we've been using so far. | |
struct BoxedParser<'a, Output> { | |
parser: Box<dyn Parser<'a, Output> + 'a>, | |
} | |
impl<'a, Output> BoxedParser<'a, Output> { | |
fn new<P>(parser: P) -> Self | |
where | |
P: Parser<'a, Output> + 'a, | |
{ | |
BoxedParser { | |
parser: Box::new(parser), | |
} | |
} | |
} | |
impl<'a, Output> Parser<'a, Output> for BoxedParser<'a, Output> { | |
fn parse(&self, input: &'a str) -> ParserResult<'a, Output> { | |
self.parser.parse(input) | |
} | |
} | |
/* | |
fn match_literal(expected: &'static str) | |
-> impl Fn(&str) -> Result<(&str, ()), &str> | |
{ | |
move |input| match input.get(0..expected.len()) { | |
Some(next) if next == expected => { | |
Ok((&input[expected.len()..], ())) | |
} | |
_ => Err(input), | |
} | |
} | |
*/ | |
fn match_literal<'a>(expected: &'static str) -> impl Parser<'a, ()> { | |
move |input: &'a str| match input.get(0..expected.len()) { | |
Some(next) if next == expected => Ok((&input[expected.len()..], ())), | |
_ => Err(input), | |
} | |
} | |
#[test] | |
fn literal_parser() { | |
let parse_joe = match_literal("Hello Joe!"); | |
assert_eq!(Ok(("", ())), parse_joe.parse("Hello Joe!")); | |
assert_eq!( | |
Ok((" Hello Robert!", ())), | |
parse_joe.parse("Hello Joe! Hello Robert!") | |
); | |
assert_eq!(Err("Hello Mike!"), parse_joe.parse("Hello Mike!")) | |
} | |
// fn identifier(input: &str) -> Result<(&str, String), &str> { | |
fn identifier(input: &str) -> ParserResult<String> { | |
let mut matched = String::new(); | |
let mut chars = input.chars(); | |
match chars.next() { | |
Some(next) if next.is_alphabetic() => matched.push(next), | |
_ => return Err(input), | |
} | |
while let Some(next) = chars.next() { | |
if next.is_alphanumeric() || next == '-' { | |
matched.push(next); | |
} else { | |
break; | |
} | |
} | |
let next_index = matched.len(); | |
Ok((&input[next_index..], matched)) | |
} | |
#[test] | |
fn identifier_parser() { | |
assert_eq!( | |
Ok(("", "i-am-an-identifier".to_string())), | |
identifier("i-am-an-identifier") | |
); | |
assert_eq!( | |
Ok((" entirely an identifier", "not".to_string())), | |
identifier("not entirely an identifier") | |
); | |
assert_eq!( | |
Err("!not at all an identifier"), | |
identifier("!not at all an identifier") | |
); | |
} | |
/* | |
fn pair<P1, P2, R1, R2>(parser1: P1, parser2: P2) | |
-> impl Fn(&str) -> Result<(&str, (R1, R2)), &str> | |
where | |
P1: Fn(&str) -> Result<(&str, R1), &str>, | |
P2: Fn(&str) -> Result<(&str, R2), &str>, | |
{ | |
move |input| match parser1(input) { | |
Ok((next_input, result1)) => match parser2(next_input) { | |
Ok((final_input, result2)) => Ok((final_input, (result1, result2))), | |
Err(err) => Err(err), | |
}, | |
Err(err) => Err(err), | |
} | |
} | |
fn pair<'a, P1, P2, R1, R2>(parser1: P1, parser2: P2) | |
-> impl Parser<'a, (R1, R2)> | |
where | |
P1: Parser<'a, R1>, | |
P2: Parser<'a, R2>, | |
{ | |
move |input| match parser1.parse(input) { | |
Ok((next_input, result1)) => match parser2.parse(next_input) { | |
Ok((final_input, result2)) => Ok((final_input, (result1, result2))), | |
Err(err) => Err(err), | |
} | |
Err(err) => Err(err), | |
} | |
} | |
*/ | |
fn pair<'a, P1, P2, R1, R2>(parser1: P1, parser2: P2) -> impl Parser<'a, (R1, R2)> | |
where | |
P1: Parser<'a, R1>, | |
P2: Parser<'a, R2>, | |
{ | |
move |input| { | |
parser1.parse(input).and_then(|(next_input, result1)| { | |
parser2 | |
.parse(next_input) | |
.map(|(final_input, result2)| (final_input, (result1, result2))) | |
}) | |
} | |
} | |
/// It looks very neat, but there's a problem: parser2.map() consumes parser2 to | |
/// create the wrapped parser, and the function is a Fn, not a FnOnce, so it's | |
/// not allowed to consume parser2, just take a reference to it. Rust Problems, | |
/// in other words. | |
/// | |
/// fn pair<'a, P1, P2, R1, R2>(parser1: P1, parser2: P2) | |
/// -> impl Parser<'a, (R1, R2)> | |
/// where | |
/// P1: Parser<'a, R1> + 'a, | |
/// P2: Parser<'a, R2> + 'a, | |
/// R1: 'a + Clone, | |
/// R2: 'a, | |
/// { | |
/// parser1.and_then(move |result1| parser2.map(move |result2| { | |
/// (result1.clone(), result2) | |
/// })) | |
/// } | |
#[test] | |
fn pair_combinator() { | |
let tag_opener = pair(match_literal("<"), identifier); | |
assert_eq!( | |
Ok(("/>", ((), "my-first-element".to_string()))), | |
tag_opener.parse("<my-first-element/>") | |
); | |
assert_eq!(Err("oops"), tag_opener.parse("oops")); | |
assert_eq!(Err("!oops"), tag_opener.parse("<!oops")); | |
} | |
/// Enter The Functor | |
/// https://bodil.lol/parser-combinators/#enter-the-functor | |
/// | |
/// Actually, let's indulge ourselves and shorten this function a bit, because | |
/// this map thing turns out to be a common pattern that Result actually | |
/// implements too. | |
/// | |
/// This pattern is what's called a "functor" in Haskell and its mathematical | |
/// sibling, category theory. If you've got a thing with a type A in it, and you | |
/// have a map function available that you can pass a function from A to B into | |
/// to turn it into the same kind of thing but with the type B in it instead, | |
/// that's a functor. You see this a lot of places in Rust, such as in Option, | |
/// Result, Iterator and even Future, without it being explicitly named as such. | |
/// And there's a good reason for that: you can't really express a functor as a | |
/// generalised thing in Rust's type system, because it lacks higher kinded | |
/// types, but that's another story, so let's just note that these are functors, | |
/// and you just need to look for the map function to spot one. | |
/// | |
/// fn map<P, F, A, B>(parser: P, map_fn: F) | |
/// -> impl Fn(&str) -> Result<(&str, B), &str> | |
/// where | |
/// P: Fn(&str) -> Result<(&str, A), &str>, | |
/// F: Fn(A) -> B, | |
/// { | |
/// move |input| match parser(input) { | |
/// Ok((next_input, result)) => Ok((next_input, map_fn(result))), | |
/// Err(err) => Err(err), | |
/// } | |
/// } | |
/// | |
/// fn map<P, F, A, B>(parser: P, map_fn: F) | |
/// -> impl Fn(&str) -> Result<(&str, B), &str> | |
/// where | |
/// P: Fn(&str) -> Result<(&str, A), &str>, | |
/// F: Fn(A) -> B, | |
/// { | |
/// move |input| | |
/// parser(input) | |
/// .map(|(next_input, result)| (next_input, map_fn(result))) | |
/// } | |
fn map<'a, P, F, A, B>(parser: P, map_fn: F) -> impl Parser<'a, B> | |
where | |
P: Parser<'a, A>, | |
F: Fn(A) -> B, | |
{ | |
move |input| { | |
parser | |
.parse(input) | |
.map(|(next_input, result)| (next_input, map_fn(result))) | |
} | |
} | |
fn left<'a, P1, P2, R1, R2>(parser1: P1, parser2: P2) -> impl Parser<'a, R1> | |
where | |
P1: Parser<'a, R1> + 'a, | |
P2: Parser<'a, R2> + 'a, | |
R1: 'a, | |
R2: 'a, | |
{ | |
// map(pair(parser1, parser2), |(left, _right)| left) | |
pair(parser1, parser2).map(|(left, _right)| left) | |
} | |
fn right<'a, P1, P2, R1, R2>(parser1: P1, parser2: P2) -> impl Parser<'a, R2> | |
where | |
P1: Parser<'a, R1> + 'a, | |
P2: Parser<'a, R2> + 'a, | |
R1: 'a, | |
R2: 'a, | |
{ | |
// map(pair(parser1, parser2), |(_left, right)| right) | |
pair(parser1, parser2).map(|(_left, right)| right) | |
} | |
#[test] | |
fn right_combinator() { | |
let tag_opener = right(match_literal("<"), identifier); | |
assert_eq!( | |
Ok(("/>", "my-first-element".to_string())), | |
tag_opener.parse("<my-first-element/>") | |
); | |
assert_eq!(Err("oops"), tag_opener.parse("oops")); | |
assert_eq!(Err("!oops"), tag_opener.parse("<!oops")); | |
} | |
fn zero_or_more<'a, P, A>(parser: P) -> impl Parser<'a, Vec<A>> | |
where | |
P: Parser<'a, A>, | |
{ | |
move |mut input| { | |
let mut result = Vec::new(); | |
while let Ok((next_input, next_item)) = parser.parse(input) { | |
input = next_input; | |
result.push(next_item); | |
} | |
Ok((input, result)) | |
} | |
} | |
#[test] | |
fn zero_or_more_combinator() { | |
let parser = zero_or_more(match_literal("ha")); | |
assert_eq!(Ok(("", vec![(), (), ()])), parser.parse("hahaha")); | |
assert_eq!(Ok(("ahah", vec![])), parser.parse("ahah")); | |
assert_eq!(Ok(("", vec![])), parser.parse("")); | |
} | |
fn one_or_more<'a, P, A>(parser: P) -> impl Parser<'a, Vec<A>> | |
where | |
P: Parser<'a, A>, | |
{ | |
move |mut input| { | |
let mut result = Vec::new(); | |
if let Ok((next_input, first_item)) = parser.parse(input) { | |
input = next_input; | |
result.push(first_item); | |
} else { | |
return Err(input); | |
} | |
while let Ok((next_input, next_item)) = parser.parse(input) { | |
input = next_input; | |
result.push(next_item); | |
} | |
Ok((input, result)) | |
} | |
} | |
/// Ownership problem | |
/// https://bodil.lol/parser-combinators/#one-or-more | |
/// | |
/// Here, we run into Rust Problems, and I don't even mean the problem of not | |
/// having a cons method for Vec, but I know every Lisp programmer reading that | |
/// bit of code was thinking it. No, it's worse than that: it's ownership. | |
/// | |
/// We own that parser, so we can't go passing it as an argument twice, the | |
/// compiler will start shouting at you that you're trying to move an already | |
/// moved value. So can we make our combinators take references instead? No, it | |
/// turns out, not without running into another whole set of borrow checker | |
/// troubles - and we're not going to even go there right now. And because these | |
/// parsers are functions, they don't do anything so straightforward as to | |
/// implement Clone, which would have saved the day very tidily, so we're stuck | |
/// with a constraint that we can't repeat our parsers easily in combinators. | |
/// | |
/// fn one_or_more<'a, P, A>(parser: P) -> impl Parser<'a, Vec<A>> | |
/// where | |
/// P: Parser<'a, A>, | |
/// { | |
/// map(pair(parser, zero_or_more(parser)), |(head, mut tail)| { | |
/// tail.insert(0, head); | |
/// tail | |
/// }) | |
/// } | |
#[test] | |
fn one_or_more_combinator() { | |
let parser = one_or_more(match_literal("ha")); | |
assert_eq!(Ok(("", vec![(), (), ()])), parser.parse("hahaha")); | |
assert_eq!(Err("ahah"), parser.parse("ahah")); | |
assert_eq!(Err(""), parser.parse("")); | |
} | |
fn any_char(input: &str) -> ParserResult<char> { | |
match input.chars().next() { | |
Some(next) => Ok((&input[next.len_utf8()..], next)), | |
_ => Err(input), | |
} | |
} | |
fn pred<'a, P, A, F>(parser: P, predicate: F) -> impl Parser<'a, A> | |
where | |
P: Parser<'a, A>, | |
F: Fn(&A) -> bool, | |
{ | |
move |input| { | |
if let Ok((next_input, value)) = parser.parse(input) { | |
if predicate(&value) { | |
return Ok((next_input, value)); | |
} | |
} | |
Err(input) | |
} | |
} | |
#[test] | |
fn predicate_combinator() { | |
let parser = pred(any_char, |c| *c == 'o'); | |
assert_eq!(Ok(("mg", 'o')), parser.parse("omg")); | |
assert_eq!(Err("lol"), parser.parse("lol")); | |
} | |
fn whitespace_char<'a>() -> impl Parser<'a, char> { | |
// pred(any_char, |c| c.is_whitespace()) | |
any_char.pred(|c| c.is_whitespace()) | |
} | |
fn space1<'a>() -> impl Parser<'a, Vec<char>> { | |
one_or_more(whitespace_char()) | |
} | |
fn space0<'a>() -> impl Parser<'a, Vec<char>> { | |
zero_or_more(whitespace_char()) | |
} | |
fn quoted_string<'a>() -> impl Parser<'a, String> { | |
right( | |
match_literal("\""), | |
left( | |
zero_or_more(any_char.pred(|c| *c != '"')), | |
match_literal("\""), | |
), | |
) | |
.map(|chars| chars.into_iter().collect()) | |
} | |
#[test] | |
fn quoted_string_parser() { | |
assert_eq!( | |
Ok(("", "Hello Joe!".to_string())), | |
quoted_string().parse("\"Hello Joe!\"") | |
); | |
} | |
fn attribute_pair<'a>() -> impl Parser<'a, (String, String)> { | |
pair(identifier, right(match_literal("="), quoted_string())) | |
} | |
fn attributes<'a>() -> impl Parser<'a, Vec<(String, String)>> { | |
zero_or_more(right(space1(), attribute_pair())) | |
} | |
#[test] | |
fn attribute_parser() { | |
assert_eq!( | |
Ok(( | |
"", | |
vec![ | |
("one".to_string(), "1".to_string()), | |
("two".to_string(), "2".to_string()) | |
] | |
)), | |
attributes().parse(" one=\"1\" two=\"2\"") | |
); | |
} | |
fn element_start<'a>() -> impl Parser<'a, (String, Vec<(String, String)>)> { | |
right(match_literal("<"), pair(identifier, attributes())) | |
} | |
fn single_element<'a>() -> impl Parser<'a, Element> { | |
// map( | |
// left(element_start(), match_literal("/>")), | |
// |(name, attributes)| Element { | |
// name, | |
// attributes, | |
// children: vec![], | |
// }, | |
// ) | |
left(element_start(), match_literal("/>")).map(|(name, attributes)| Element { | |
name, | |
attributes, | |
children: vec![], | |
}) | |
} | |
#[test] | |
fn single_element_parser() { | |
assert_eq!( | |
Ok(( | |
"", | |
Element { | |
name: "div".to_string(), | |
attributes: vec![("class".to_string(), "float".to_string())], | |
children: vec![] | |
} | |
)), | |
single_element().parse("<div class=\"float\"/>") | |
); | |
} | |
fn open_element<'a>() -> impl Parser<'a, Element> { | |
left(element_start(), match_literal(">")).map(|(name, attributes)| Element { | |
name, | |
attributes, | |
children: vec![], | |
}) | |
} | |
fn either<'a, P1, P2, A>(parser1: P1, parser2: P2) -> impl Parser<'a, A> | |
where | |
P1: Parser<'a, A>, | |
P2: Parser<'a, A>, | |
{ | |
move |input| match parser1.parse(input) { | |
ok @ Ok(_) => ok, | |
Err(_) => parser2.parse(input), | |
} | |
} | |
fn element<'a>() -> impl Parser<'a, Element> { | |
whitespace_wrap(either(single_element(), parent_element())) | |
} | |
fn close_element<'a>(expected_name: String) -> impl Parser<'a, String> { | |
right(match_literal("</"), left(identifier, match_literal(">"))) | |
.pred(move |name| name == &expected_name) | |
} | |
/// Remember I said we'd get back to and_then later? Well, later is here. The | |
/// combinator we need is, in fact, and_then: we need something that takes a | |
/// parser, and a function that takes the result of a parser and returns a new | |
/// parser, which we'll then run. It's a bit like pair, except instead of just | |
/// collecting both results in a tuple, we thread them through a function. It's | |
/// also just how and_then works with Results and Options, except it's a bit | |
/// easier to follow because Results and Options don't really do anything, | |
/// they're just things that hold some data (or not, as the case may be). | |
/// | |
/// So let's try writing an implementation for it. | |
fn and_then<'a, P, F, A, B, NextP>(parser: P, f: F) -> impl Parser<'a, B> | |
where | |
P: Parser<'a, A>, | |
NextP: Parser<'a, B>, | |
F: Fn(A) -> NextP, | |
{ | |
move |input| match parser.parse(input) { | |
Ok((next_input, result)) => f(result).parse(next_input), | |
Err(err) => Err(err), | |
} | |
} | |
fn parent_element<'a>() -> impl Parser<'a, Element> { | |
open_element().and_then(|el| { | |
left(zero_or_more(element()), close_element(el.name.clone())).map(move |children| { | |
let mut el = el.clone(); | |
el.children = children; | |
el | |
}) | |
}) | |
} | |
fn whitespace_wrap<'a, P, A>(parser: P) -> impl Parser<'a, A> | |
where | |
P: Parser<'a, A> + 'a, | |
A: 'a, | |
{ | |
right(space0(), left(parser, space0())) | |
} | |
#[test] | |
fn xml_parser() { | |
let doc = r#" | |
<top label="Top"> | |
<semi-bottom label="Bottom"/> | |
<middle> | |
<bottom label="Another bottom"/> | |
</middle> | |
</top>"#; | |
let parsed_doc = Element { | |
name: "top".to_string(), | |
attributes: vec![("label".to_string(), "Top".to_string())], | |
children: vec![ | |
Element { | |
name: "semi-bottom".to_string(), | |
attributes: vec![("label".to_string(), "Bottom".to_string())], | |
children: vec![], | |
}, | |
Element { | |
name: "middle".to_string(), | |
attributes: vec![], | |
children: vec![Element { | |
name: "bottom".to_string(), | |
attributes: vec![("label".to_string(), "Another bottom".to_string())], | |
children: vec![], | |
}], | |
}, | |
], | |
}; | |
assert_eq!(Ok(("", parsed_doc)), element().parse(doc)); | |
} | |
#[test] | |
fn mismatched_closing_tag() { | |
let doc = r#" | |
<top> | |
<bottom/> | |
</middle>"#; | |
assert_eq!(Err("</middle>"), element().parse(doc)); | |
} |
Author
vayn
commented
May 18, 2019
•
- https://bodil.lol/parser-combinators/
- https://github.com/J-F-Liu/pom/blob/master/doc/article.md
- http://adit.io/posts/2013-04-17-functors,_applicatives,_and_monads_in_pictures.html
- http://jiyinyiyong.github.io/monads-in-pictures/
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment