Skip to content

Instantly share code, notes, and snippets.

@clinuxrulz
Last active February 10, 2022 02:30
Show Gist options
  • Save clinuxrulz/98f4f9026b5d8edde42909d8aee99e5a to your computer and use it in GitHub Desktop.
Save clinuxrulz/98f4f9026b5d8edde42909d8aee99e5a to your computer and use it in GitHub Desktop.
Iteratee parsing in Rust
#[derive(Debug, PartialEq, Eq)]
pub enum ChunkNextResult {
Empty,
Char(char),
Eof,
}
pub struct Chunk<'a> {
next: Box<dyn FnMut()->ChunkNextResult+'a>,
unnext_stack: Vec<char>,
}
impl<'a> Chunk<'a> {
fn from_next<NEXT>(next: NEXT) -> Chunk<'a>
where NEXT: FnMut()->ChunkNextResult+'a
{
Chunk { next: Box::new(next), unnext_stack: Vec::new() }
}
pub fn from_str(a: &'a str) -> Chunk<'a> {
let mut chars = a.chars();
Chunk::from_next(move || {
if let Some(x) = chars.next() {
ChunkNextResult::Char(x)
} else {
ChunkNextResult::Eof
}
})
}
pub fn next(&mut self) -> ChunkNextResult {
let x_op = self.unnext_stack.pop();
if let Some(x) = x_op {
ChunkNextResult::Char(x)
} else {
(self.next)()
}
}
pub fn unnext(&mut self, x: char) {
self.unnext_stack.push(x);
}
}
#[derive(Debug, PartialEq, Eq)]
pub enum ParserResult<A> {
More,
Done(A),
Fail,
}
#[derive(Debug, PartialEq, Eq)]
pub enum ParserResponse<A> {
None,
Result(ParserResult<A>),
}
pub enum ParserRequest<'a> {
Reset,
ProcessChunk(*mut Chunk<'a>),
}
pub struct Parser<'a,A> {
process: Box<dyn FnMut(ParserRequest<'a>)->ParserResponse<A>+'a>,
}
impl<'a> Parser<'a,()> {
pub fn match_eof() -> Parser<'a,()> {
Parser::from_process(|request| {
match request {
ParserRequest::ProcessChunk(chunk) => {
let chunk = unsafe { &mut *chunk };
match chunk.next() {
ChunkNextResult::Empty => {
return ParserResponse::Result(ParserResult::More);
},
ChunkNextResult::Char(x) => {
chunk.unnext(x);
return ParserResponse::Result(ParserResult::Fail);
},
ChunkNextResult::Eof => {
return ParserResponse::Result(ParserResult::Done(()));
}
}
},
ParserRequest::Reset => {
return ParserResponse::None
},
}
})
}
pub fn match_char(x: char) -> Parser<'a,()> {
Parser::from_process(move |request| {
match request {
ParserRequest::ProcessChunk(chunk) => {
let chunk = unsafe { &mut *chunk };
match chunk.next() {
ChunkNextResult::Empty => {
return ParserResponse::Result(ParserResult::More);
},
ChunkNextResult::Char(c) => {
if c == x {
return ParserResponse::Result(ParserResult::Done(()));
} else {
chunk.unnext(c);
return ParserResponse::Result(ParserResult::Fail);
}
},
ChunkNextResult::Eof => {
return ParserResponse::Result(ParserResult::Fail);
},
}
},
ParserRequest::Reset => {
return ParserResponse::None;
},
}
})
}
pub fn match_string(x: &'a str) -> Parser<'a,()> {
let mut index: usize = 0;
Parser::from_process(move |request| {
match request {
ParserRequest::ProcessChunk(chunk) => {
let chunk = unsafe { &mut *chunk };
match chunk.next() {
ChunkNextResult::Empty => {
return ParserResponse::Result(ParserResult::More);
},
ChunkNextResult::Char(c) => {
if x.chars().nth(index) == Some(c) {
if index == x.len()-1 {
index = 0;
return ParserResponse::Result(ParserResult::Done(()));
} else {
index += 1;
return ParserResponse::Result(ParserResult::More);
}
} else {
chunk.unnext(c);
return ParserResponse::Result(ParserResult::Fail);
}
},
ChunkNextResult::Eof => {
return ParserResponse::Result(ParserResult::Fail);
}
}
},
ParserRequest::Reset => {
index = 0;
return ParserResponse::None;
}
}
})
}
}
impl<'a,A:'a> Parser<'a,A> {
fn from_process<PROCESS>(step: PROCESS) -> Parser<'a,A>
where PROCESS: FnMut(ParserRequest<'a>)->ParserResponse<A>+'a
{
Parser { process: Box::new(step) }
}
pub fn run(&mut self, chunk: &mut Chunk<'a>) -> ParserResult<A> {
loop {
let r = self.step(chunk);
match r {
ParserResult::Done(_) => {
return r;
},
ParserResult::Fail => {
return r;
},
_ => {}
}
match chunk.next() {
ChunkNextResult::Empty => {
return ParserResult::More;
},
ChunkNextResult::Char(x) => {
chunk.unnext(x);
},
_ => {},
}
}
}
pub fn reset(&mut self) {
(self.process)(ParserRequest::Reset);
}
pub fn step(&mut self, chunk: &mut Chunk<'a>) -> ParserResult<A> {
let response = (self.process)(ParserRequest::ProcessChunk(chunk));
match response {
ParserResponse::Result(result) => {
return result;
}
_ => {
unreachable!("process should always return result when processing chunk");
}
}
}
pub fn map<B: 'a, FN: Fn(A)->B+'a>(mut self, f: FN) -> Parser<'a,B> {
Parser::from_process(move |request| {
match request {
ParserRequest::ProcessChunk(chunk) => {
let chunk = unsafe { &mut *chunk };
match self.step(chunk) {
ParserResult::More => ParserResponse::Result(ParserResult::More),
ParserResult::Done(a) => ParserResponse::Result(ParserResult::Done(f(a))),
ParserResult::Fail => ParserResponse::Result(ParserResult::Fail),
}
},
ParserRequest::Reset => {
self.reset();
ParserResponse::None
}
}
})
}
pub fn map_to<B: Clone+'a>(self, b: B) -> Parser<'a,B> {
self.map(move |_| b.clone())
}
pub fn match_first(mut elem_parsers: Vec<Parser<'a,A>>) -> Parser<'a,A> {
let mut elem_parser_indices: Vec<usize> = (0..elem_parsers.len()).collect();
Parser::from_process(move |request| {
match request {
ParserRequest::ProcessChunk(chunk) => {
let chunk = unsafe { &mut *chunk };
for idx in (0..elem_parser_indices.len()).rev() {
let mut done_op = None;
let mut remove_it = false;
{
let elem_parser = &mut elem_parsers[elem_parser_indices[idx]];
match elem_parser.step(chunk) {
ParserResult::More => {},
ParserResult::Done(x) => {
done_op = Some(x);
},
ParserResult::Fail => {
remove_it = true;
},
}
}
if let Some(done) = done_op {
elem_parser_indices.clear();
for i in 0..elem_parsers.len() {
elem_parser_indices.push(i);
}
for elem_parser in &mut elem_parsers {
elem_parser.reset();
}
return ParserResponse::Result(ParserResult::Done(done));
}
if remove_it {
elem_parser_indices.remove(idx);
}
}
if elem_parser_indices.len() == 0 {
return ParserResponse::Result(ParserResult::Fail);
} else {
return ParserResponse::Result(ParserResult::More);
}
},
ParserRequest::Reset => {
elem_parser_indices.clear();
for i in 0..elem_parsers.len() {
elem_parser_indices.push(i);
}
for elem_parser in &mut elem_parsers {
elem_parser.reset();
}
return ParserResponse::None;
},
}
})
}
pub fn optional(mut self) -> Parser<'a,Option<A>> {
Parser::from_process(move |request| {
match request {
ParserRequest::ProcessChunk(chunk) => {
let chunk = unsafe { &mut *chunk };
match self.step(chunk) {
ParserResult::More => {
return ParserResponse::Result(ParserResult::More);
},
ParserResult::Done(x) => {
return ParserResponse::Result(ParserResult::Done(Some(x)));
},
ParserResult::Fail => {
return ParserResponse::Result(ParserResult::Done(None));
},
}
},
ParserRequest::Reset => {
self.reset();
return ParserResponse::None;
}
}
})
}
pub fn tuple2<B:'a>(mut self, mut other: Parser<'a,B>) -> Parser<'a,(A,B)> {
let mut a_op = None;
Parser::from_process(move |request| {
match request {
ParserRequest::ProcessChunk(chunk) => {
let chunk = unsafe { &mut *chunk };
if a_op.is_none() {
match self.step(chunk) {
ParserResult::More => {
return ParserResponse::Result(ParserResult::More);
},
ParserResult::Done(a) => {
a_op = Some(a);
return ParserResponse::Result(ParserResult::More);
},
ParserResult::Fail => {
return ParserResponse::Result(ParserResult::Fail);
}
};
}
assert!(a_op.is_some());
match other.step(chunk) {
ParserResult::More => {
return ParserResponse::Result(ParserResult::More);
},
ParserResult::Done(b) => {
let mut a_op_2 = None;
std::mem::swap(&mut a_op_2, &mut a_op);
return ParserResponse::Result(ParserResult::Done((a_op_2.unwrap(), b)));
},
ParserResult::Fail => {
return ParserResponse::Result(ParserResult::Fail);
}
}
},
ParserRequest::Reset => {
a_op = None;
self.reset();
other.reset();
return ParserResponse::None;
},
}
})
}
pub fn tuple3<B:'a,C:'a>(self, parser_b: Parser<'a,B>, parser_c: Parser<'a,C>) -> Parser<'a,(A,B,C)> {
self.tuple2(parser_b).tuple2(parser_c).map(|((a,b),c)| (a,b,c))
}
pub fn tuple4<B:'a,C:'a,D:'a>(self, parser_b: Parser<'a,B>, parser_c: Parser<'a,C>, parser_d: Parser<'a,D>) -> Parser<'a,(A,B,C,D)> {
self.tuple3(parser_b, parser_c).tuple2(parser_d).map(|((a,b ,c),d)| (a,b,c,d))
}
pub fn tuple5<B:'a,C:'a,D:'a,E:'a>(self, parser_b: Parser<'a,B>, parser_c: Parser<'a,C>, parser_d: Parser<'a,D>, parser_e: Parser<'a,E>) -> Parser<'a,(A,B,C,D,E)> {
self.tuple3(parser_b, parser_c).tuple3(parser_d, parser_e).map(|((a,b ,c),d,e)| (a,b,c,d,e))
}
pub fn tuple6<B:'a,C:'a,D:'a,E:'a,F:'a>(self, parser_b: Parser<'a,B>, parser_c: Parser<'a,C>, parser_d: Parser<'a,D>, parser_e: Parser<'a,E>, parser_f: Parser<'a,F>) -> Parser<'a,(A,B,C,D,E,F)> {
self.tuple4(parser_b, parser_c, parser_d).tuple3(parser_e, parser_f).map(|((a,b ,c,d),e,f)| (a,b,c,d,e,f))
}
pub fn zero_or_more(mut self) -> Parser<'a,Vec<A>> {
let mut elements: Vec<A> = Vec::new();
Parser::from_process(move |request| {
match request {
ParserRequest::ProcessChunk(chunk) => {
let chunk = unsafe { &mut *chunk };
let x = self.step(chunk);
match x {
ParserResult::More => {
return ParserResponse::Result(ParserResult::More);
},
ParserResult::Done(x2) => {
elements.push(x2);
return ParserResponse::Result(ParserResult::More);
},
ParserResult::Fail => {
let mut result = Vec::new();
std::mem::swap(&mut result, &mut elements);
return ParserResponse::Result(ParserResult::Done(result));
}
}
},
ParserRequest::Reset => {
elements.clear();
self.reset();
return ParserResponse::None;
},
}
})
}
pub fn one_or_more(mut self) -> Parser<'a,Vec<A>> {
let mut elements: Vec<A> = Vec::new();
Parser::from_process(move |request| {
match request {
ParserRequest::ProcessChunk(chunk) => {
let chunk = unsafe { &mut *chunk };
let x = self.step(chunk);
match x {
ParserResult::More => {
return ParserResponse::Result(ParserResult::More);
},
ParserResult::Done(x2) => {
elements.push(x2);
return ParserResponse::Result(ParserResult::More);
},
ParserResult::Fail => {
let mut result = Vec::new();
std::mem::swap(&mut result, &mut elements);
if result.len() == 0 {
return ParserResponse::Result(ParserResult::Fail);
}
return ParserResponse::Result(ParserResult::Done(result));
}
}
},
ParserRequest::Reset => {
elements.clear();
self.reset();
return ParserResponse::None;
},
}
})
}
}
#[cfg(test)]
use super::parser::{Chunk, ChunkNextResult, Parser, ParserResult};
#[test]
fn parse_text_match_eof_matched() {
let mut parser: Parser<()> = Parser::match_eof();
let input = "";
let mut chunk = Chunk::from_str(&input);
let r = parser.step(&mut chunk);
assert_eq!(r, ParserResult::Done(()));
}
#[test]
fn parse_text_match_eof_unmatched() {
let mut parser: Parser<()> = Parser::match_eof();
let input = "a";
let mut chunk = Chunk::from_str(&input);
let r = parser.step(&mut chunk);
assert_eq!(r, ParserResult::Fail);
assert_eq!(chunk.next(), ChunkNextResult::Char('a'));
}
#[test]
fn parse_test_match_char_matched() {
let mut parser: Parser<char> = Parser::match_char('a').map_to('a');
let input = "a";
let mut chunk = Chunk::from_str(&input);
let r = parser.step(&mut chunk);
assert_eq!(r, ParserResult::Done('a'));
assert_eq!(chunk.next(), ChunkNextResult::Eof);
}
#[test]
fn parse_test_march_char_unmatched() {
let mut parser: Parser<()> = Parser::match_char('a');
let input = "b";
let mut chunk = Chunk::from_str(&input);
let r = parser.step(&mut chunk);
assert_eq!(r, ParserResult::Fail);
assert_eq!(chunk.next(), ChunkNextResult::Char('b'));
}
#[test]
fn parse_test_match_string_matched() {
let mut parser: Parser<()> = Parser::match_string("cat");
let input = "cat";
let mut chunk = Chunk::from_str(&input);
let r = parser.run(&mut chunk);
assert_eq!(r, ParserResult::Done(()));
}
#[test]
fn parse_test_match_string_unmatched() {
let mut parser: Parser<()> = Parser::match_string("cat");
let input = "dog";
let mut chunk = Chunk::from_str(&input);
let r = parser.run(&mut chunk);
assert_eq!(r, ParserResult::Fail);
}
#[test]
fn parse_test_match_first() {
#[derive(Clone, Debug, PartialEq, Eq)]
enum Animal {
Cat,
Dog,
Rabbit,
}
let mut parser: Parser<Animal> =
Parser::match_first(vec![
Parser::match_string("cat").map_to(Animal::Cat),
Parser::match_string("dog").map_to(Animal::Dog),
Parser::match_string("rabbit").map_to(Animal::Rabbit),
]);
let input = "dog";
let mut chunk = Chunk::from_str(&input);
let r = parser.run(&mut chunk);
assert_eq!(r, ParserResult::Done(Animal::Dog));
}
#[test]
fn parse_test_int() {
let mut parser: Parser<usize> = match_int();
let input = "42";
let mut chunk = Chunk::from_str(&input);
let r = parser.run(&mut chunk);
assert_eq!(r, ParserResult::Done(42));
}
#[test]
fn parse_test_optional_and_tuple() {
#[derive(Clone, Debug, PartialEq, Eq)]
enum Animal {
Cat,
Dog,
}
let mut parser =
Parser::match_string("cat").map_to(Animal::Cat).optional()
.tuple2(
Parser::match_string("dog").map_to(Animal::Dog)
);
{
let input = "catdog";
let mut chunk = Chunk::from_str(&input);
let r = parser.run(&mut chunk);
assert_eq!(r, ParserResult::Done((Some(Animal::Cat), Animal::Dog)));
}
{
let input = "dog";
let mut chunk = Chunk::from_str(&input);
let r = parser.run(&mut chunk);
assert_eq!(r, ParserResult::Done((None, Animal::Dog)));
}
}
#[cfg(test)]
fn match_digit<'a>() -> Parser<'a,u8> {
let digit_parsers: Vec<Parser<'a,u8>> = (0..10).into_iter().map(|x| Parser::match_char((('0' as u8) + x) as char).map_to(x as u8)).collect();
Parser::match_first(digit_parsers)
}
#[cfg(test)]
fn match_int<'a>() -> Parser<'a,usize> {
match_digit()
.one_or_more()
.map(|digits| {
let mut result: usize = 0;
let mut placement: usize = 1;
for i in (0..digits.len()).rev() {
result += (digits[i] as usize) * placement;
placement *= 10;
}
result
})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment