-
-
Save eignnx/3c8444b8e2f4d8ce10fcd97815f29d2e to your computer and use it in GitHub Desktop.
| /// A module for parsing whitespace. Takes into account comments too. | |
| /// | |
| /// # Module Outline | |
| /// - mod space | |
| /// - fn comment | |
| /// - mod allowed | |
| /// - fn here | |
| /// - fn after | |
| /// - fn before | |
| /// - fn around | |
| /// - mod required | |
| /// - fn here | |
| /// - fn after | |
| /// - fn before | |
| /// - fn around | |
| /// | |
| /// The module structure allows semantic whitespace handling while constructing parsers. | |
| /// | |
| /// # Example | |
| /// | |
| /// ```rust | |
| /// let lisp_sexp = delimited( | |
| /// char('('), | |
| /// space::allowed::around( | |
| /// separated_nonempty_list(space::required::here, some_parser), | |
| /// ), | |
| /// char(')'), | |
| /// ); | |
| /// | |
| /// let rust_fn_definition = preceded( | |
| /// space::required::after(tag("fn")), | |
| /// tuple(( | |
| /// space::allowed::after(ident), | |
| /// delimited(char('('), space::allowed::around(param_list), char(')')), | |
| /// preceded( | |
| /// space::allowed::around(tag("->")), | |
| /// delimited(char('{'), space::allowed::around(block_interior), char('}')), | |
| /// ), | |
| /// )), | |
| /// ); | |
| /// | |
| /// let source_file = terminated( | |
| /// space::allowed::around(separated_list( | |
| /// space::allowed::here, | |
| /// alt((impl_block, fn_definition, trait_definition, type_definition)), | |
| /// )), | |
| /// eof, | |
| /// ); | |
| /// ``` | |
| mod space { | |
| use nom::{ | |
| branch::alt, | |
| bytes::complete::{tag, take_till}, | |
| character::complete::{multispace0, multispace1}, | |
| combinator::recognize, | |
| error::ParseError, | |
| multi::many1, | |
| sequence::{delimited, preceded, terminated}, | |
| IResult, | |
| }; | |
| /// A comment starts with `//` and continues till the end of the line, or | |
| /// end of input, whichever comes first. Note: this parser explicitly does | |
| /// NOT consume the '\n' character at the end of lines. | |
| pub fn comment<'i, E>(i: &'i str) -> IResult<&'i str, &'i str, E> | |
| where | |
| E: ParseError<&'i str> | |
| { | |
| let (i, _) = tag("//")(i)?; | |
| let (i, content) = take_till(|ch| ch == '\n')(i)?; | |
| // Strip off the first space if it has one. | |
| if content.starts_with(' ') { | |
| Ok((i, &content[1..])) | |
| } else { | |
| Ok((i, content)) | |
| } | |
| } | |
| pub mod allowed { | |
| use super::*; | |
| /// Whitespace is allowed here, but not required. | |
| pub fn here<'i, E>(i: &'i str) -> IResult<&'i str, &'i str, E> | |
| where | |
| E: ParseError<&'i str> | |
| { | |
| alt((super::required::here, multispace0))(i) | |
| } | |
| /// Has potentially-empty whitespace before **and** after the captured parser. | |
| pub fn around<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
| where | |
| E: ParseError<&'i str>, | |
| P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
| { | |
| move |i: &'i str| delimited(here, &parser, here)(i) | |
| } | |
| /// Has potentially-empty whitespace after the captured parser. | |
| pub fn after<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
| where | |
| E: ParseError<&'i str>, | |
| P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
| { | |
| move |i: &'i str| terminated(&parser, here)(i) | |
| } | |
| /// Has potentially-empty whitespace before the captured parser. | |
| pub fn before<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
| where | |
| E: ParseError<&'i str>, | |
| P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
| { | |
| move |i: &'i str| preceded(here, &parser)(i) | |
| } | |
| } | |
| pub mod required { | |
| use super::*; | |
| /// Whitespace is required here. | |
| pub fn here<'i, E>(i: &'i str) -> IResult<&'i str, &'i str, E> | |
| where | |
| E: ParseError<&'i str> | |
| { | |
| recognize(many1(alt((multispace1, comment))))(i) | |
| } | |
| /// Has potentially-empty whitespace before **and** after the captured parser. | |
| pub fn around<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
| where | |
| E: ParseError<&'i str>, | |
| P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
| { | |
| move |i: &'i str| delimited(here, &parser, here)(i) | |
| } | |
| /// Has potentially-empty whitespace after the captured parser. | |
| pub fn after<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
| where | |
| E: ParseError<&'i str>, | |
| P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
| { | |
| move |i: &'i str| terminated(&parser, here)(i) | |
| } | |
| /// Has potentially-empty whitespace before the captured parser. | |
| pub fn before<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
| where | |
| E: ParseError<&'i str>, | |
| P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
| { | |
| move |i: &'i str| preceded(here, &parser)(i) | |
| } | |
| } | |
| } |
I don't have a very deep understanding of Rust and I have a question about the parameter types. In your implementation, Fn(_) -> _ is required, but sometimes (i.e., nom::combinator::recognize), we only have FnMut(_) -> _. How should we handle this kind of scenario?
Currently, I have a function like this, which cannot pass the compiler check.
pub fn parse_identifier(input: &str) -> IResult<&str, String> {
space::allowed::after(recognize(pair(
alt((alpha1, tag("_"))),
many0(alt((alphanumeric1, tag("_")))),
)))(input)
}
// expected a `Fn<(&str,)>` closure, found `impl FnMut<(&str,)>`
// the trait `Fn<(&str,)>` is not implemented for `impl FnMut<(&str,)>`@Shuumatsu Wow didn't know anybody was using this code haha! Um, I'm not sure, but you could try changing all of the Fn(_) -> _ types in this gist to FnMut(_) -> _ types. That might solve the problem. I think in nom version 6 they basically did the same change in their codebase (allowing FnMut types as parsers), so that's what makes me think it might work here.
When I get a chance I'll try this change out in my codebase and, if it works, I'll update the gist. ✌️
Oops! Looks like all of the closures need to be
moveto captureparserby value, and then internally borrowparserfrom the closure. Updated.