Last active
February 10, 2022 02:30
-
-
Save clinuxrulz/98f4f9026b5d8edde42909d8aee99e5a to your computer and use it in GitHub Desktop.
Iteratee parsing in Rust
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[derive(Debug, PartialEq, Eq)] | |
pub enum ChunkNextResult { | |
Empty, | |
Char(char), | |
Eof, | |
} | |
pub struct Chunk<'a> { | |
next: Box<dyn FnMut()->ChunkNextResult+'a>, | |
unnext_stack: Vec<char>, | |
} | |
impl<'a> Chunk<'a> { | |
fn from_next<NEXT>(next: NEXT) -> Chunk<'a> | |
where NEXT: FnMut()->ChunkNextResult+'a | |
{ | |
Chunk { next: Box::new(next), unnext_stack: Vec::new() } | |
} | |
pub fn from_str(a: &'a str) -> Chunk<'a> { | |
let mut chars = a.chars(); | |
Chunk::from_next(move || { | |
if let Some(x) = chars.next() { | |
ChunkNextResult::Char(x) | |
} else { | |
ChunkNextResult::Eof | |
} | |
}) | |
} | |
pub fn next(&mut self) -> ChunkNextResult { | |
let x_op = self.unnext_stack.pop(); | |
if let Some(x) = x_op { | |
ChunkNextResult::Char(x) | |
} else { | |
(self.next)() | |
} | |
} | |
pub fn unnext(&mut self, x: char) { | |
self.unnext_stack.push(x); | |
} | |
} | |
#[derive(Debug, PartialEq, Eq)] | |
pub enum ParserResult<A> { | |
More, | |
Done(A), | |
Fail, | |
} | |
#[derive(Debug, PartialEq, Eq)] | |
pub enum ParserResponse<A> { | |
None, | |
Result(ParserResult<A>), | |
} | |
pub enum ParserRequest<'a> { | |
Reset, | |
ProcessChunk(*mut Chunk<'a>), | |
} | |
pub struct Parser<'a,A> { | |
process: Box<dyn FnMut(ParserRequest<'a>)->ParserResponse<A>+'a>, | |
} | |
impl<'a> Parser<'a,()> { | |
pub fn match_eof() -> Parser<'a,()> { | |
Parser::from_process(|request| { | |
match request { | |
ParserRequest::ProcessChunk(chunk) => { | |
let chunk = unsafe { &mut *chunk }; | |
match chunk.next() { | |
ChunkNextResult::Empty => { | |
return ParserResponse::Result(ParserResult::More); | |
}, | |
ChunkNextResult::Char(x) => { | |
chunk.unnext(x); | |
return ParserResponse::Result(ParserResult::Fail); | |
}, | |
ChunkNextResult::Eof => { | |
return ParserResponse::Result(ParserResult::Done(())); | |
} | |
} | |
}, | |
ParserRequest::Reset => { | |
return ParserResponse::None | |
}, | |
} | |
}) | |
} | |
pub fn match_char(x: char) -> Parser<'a,()> { | |
Parser::from_process(move |request| { | |
match request { | |
ParserRequest::ProcessChunk(chunk) => { | |
let chunk = unsafe { &mut *chunk }; | |
match chunk.next() { | |
ChunkNextResult::Empty => { | |
return ParserResponse::Result(ParserResult::More); | |
}, | |
ChunkNextResult::Char(c) => { | |
if c == x { | |
return ParserResponse::Result(ParserResult::Done(())); | |
} else { | |
chunk.unnext(c); | |
return ParserResponse::Result(ParserResult::Fail); | |
} | |
}, | |
ChunkNextResult::Eof => { | |
return ParserResponse::Result(ParserResult::Fail); | |
}, | |
} | |
}, | |
ParserRequest::Reset => { | |
return ParserResponse::None; | |
}, | |
} | |
}) | |
} | |
pub fn match_string(x: &'a str) -> Parser<'a,()> { | |
let mut index: usize = 0; | |
Parser::from_process(move |request| { | |
match request { | |
ParserRequest::ProcessChunk(chunk) => { | |
let chunk = unsafe { &mut *chunk }; | |
match chunk.next() { | |
ChunkNextResult::Empty => { | |
return ParserResponse::Result(ParserResult::More); | |
}, | |
ChunkNextResult::Char(c) => { | |
if x.chars().nth(index) == Some(c) { | |
if index == x.len()-1 { | |
index = 0; | |
return ParserResponse::Result(ParserResult::Done(())); | |
} else { | |
index += 1; | |
return ParserResponse::Result(ParserResult::More); | |
} | |
} else { | |
chunk.unnext(c); | |
return ParserResponse::Result(ParserResult::Fail); | |
} | |
}, | |
ChunkNextResult::Eof => { | |
return ParserResponse::Result(ParserResult::Fail); | |
} | |
} | |
}, | |
ParserRequest::Reset => { | |
index = 0; | |
return ParserResponse::None; | |
} | |
} | |
}) | |
} | |
} | |
impl<'a,A:'a> Parser<'a,A> { | |
fn from_process<PROCESS>(step: PROCESS) -> Parser<'a,A> | |
where PROCESS: FnMut(ParserRequest<'a>)->ParserResponse<A>+'a | |
{ | |
Parser { process: Box::new(step) } | |
} | |
pub fn run(&mut self, chunk: &mut Chunk<'a>) -> ParserResult<A> { | |
loop { | |
let r = self.step(chunk); | |
match r { | |
ParserResult::Done(_) => { | |
return r; | |
}, | |
ParserResult::Fail => { | |
return r; | |
}, | |
_ => {} | |
} | |
match chunk.next() { | |
ChunkNextResult::Empty => { | |
return ParserResult::More; | |
}, | |
ChunkNextResult::Char(x) => { | |
chunk.unnext(x); | |
}, | |
_ => {}, | |
} | |
} | |
} | |
pub fn reset(&mut self) { | |
(self.process)(ParserRequest::Reset); | |
} | |
pub fn step(&mut self, chunk: &mut Chunk<'a>) -> ParserResult<A> { | |
let response = (self.process)(ParserRequest::ProcessChunk(chunk)); | |
match response { | |
ParserResponse::Result(result) => { | |
return result; | |
} | |
_ => { | |
unreachable!("process should always return result when processing chunk"); | |
} | |
} | |
} | |
pub fn map<B: 'a, FN: Fn(A)->B+'a>(mut self, f: FN) -> Parser<'a,B> { | |
Parser::from_process(move |request| { | |
match request { | |
ParserRequest::ProcessChunk(chunk) => { | |
let chunk = unsafe { &mut *chunk }; | |
match self.step(chunk) { | |
ParserResult::More => ParserResponse::Result(ParserResult::More), | |
ParserResult::Done(a) => ParserResponse::Result(ParserResult::Done(f(a))), | |
ParserResult::Fail => ParserResponse::Result(ParserResult::Fail), | |
} | |
}, | |
ParserRequest::Reset => { | |
self.reset(); | |
ParserResponse::None | |
} | |
} | |
}) | |
} | |
pub fn map_to<B: Clone+'a>(self, b: B) -> Parser<'a,B> { | |
self.map(move |_| b.clone()) | |
} | |
pub fn match_first(mut elem_parsers: Vec<Parser<'a,A>>) -> Parser<'a,A> { | |
let mut elem_parser_indices: Vec<usize> = (0..elem_parsers.len()).collect(); | |
Parser::from_process(move |request| { | |
match request { | |
ParserRequest::ProcessChunk(chunk) => { | |
let chunk = unsafe { &mut *chunk }; | |
for idx in (0..elem_parser_indices.len()).rev() { | |
let mut done_op = None; | |
let mut remove_it = false; | |
{ | |
let elem_parser = &mut elem_parsers[elem_parser_indices[idx]]; | |
match elem_parser.step(chunk) { | |
ParserResult::More => {}, | |
ParserResult::Done(x) => { | |
done_op = Some(x); | |
}, | |
ParserResult::Fail => { | |
remove_it = true; | |
}, | |
} | |
} | |
if let Some(done) = done_op { | |
elem_parser_indices.clear(); | |
for i in 0..elem_parsers.len() { | |
elem_parser_indices.push(i); | |
} | |
for elem_parser in &mut elem_parsers { | |
elem_parser.reset(); | |
} | |
return ParserResponse::Result(ParserResult::Done(done)); | |
} | |
if remove_it { | |
elem_parser_indices.remove(idx); | |
} | |
} | |
if elem_parser_indices.len() == 0 { | |
return ParserResponse::Result(ParserResult::Fail); | |
} else { | |
return ParserResponse::Result(ParserResult::More); | |
} | |
}, | |
ParserRequest::Reset => { | |
elem_parser_indices.clear(); | |
for i in 0..elem_parsers.len() { | |
elem_parser_indices.push(i); | |
} | |
for elem_parser in &mut elem_parsers { | |
elem_parser.reset(); | |
} | |
return ParserResponse::None; | |
}, | |
} | |
}) | |
} | |
pub fn optional(mut self) -> Parser<'a,Option<A>> { | |
Parser::from_process(move |request| { | |
match request { | |
ParserRequest::ProcessChunk(chunk) => { | |
let chunk = unsafe { &mut *chunk }; | |
match self.step(chunk) { | |
ParserResult::More => { | |
return ParserResponse::Result(ParserResult::More); | |
}, | |
ParserResult::Done(x) => { | |
return ParserResponse::Result(ParserResult::Done(Some(x))); | |
}, | |
ParserResult::Fail => { | |
return ParserResponse::Result(ParserResult::Done(None)); | |
}, | |
} | |
}, | |
ParserRequest::Reset => { | |
self.reset(); | |
return ParserResponse::None; | |
} | |
} | |
}) | |
} | |
pub fn tuple2<B:'a>(mut self, mut other: Parser<'a,B>) -> Parser<'a,(A,B)> { | |
let mut a_op = None; | |
Parser::from_process(move |request| { | |
match request { | |
ParserRequest::ProcessChunk(chunk) => { | |
let chunk = unsafe { &mut *chunk }; | |
if a_op.is_none() { | |
match self.step(chunk) { | |
ParserResult::More => { | |
return ParserResponse::Result(ParserResult::More); | |
}, | |
ParserResult::Done(a) => { | |
a_op = Some(a); | |
return ParserResponse::Result(ParserResult::More); | |
}, | |
ParserResult::Fail => { | |
return ParserResponse::Result(ParserResult::Fail); | |
} | |
}; | |
} | |
assert!(a_op.is_some()); | |
match other.step(chunk) { | |
ParserResult::More => { | |
return ParserResponse::Result(ParserResult::More); | |
}, | |
ParserResult::Done(b) => { | |
let mut a_op_2 = None; | |
std::mem::swap(&mut a_op_2, &mut a_op); | |
return ParserResponse::Result(ParserResult::Done((a_op_2.unwrap(), b))); | |
}, | |
ParserResult::Fail => { | |
return ParserResponse::Result(ParserResult::Fail); | |
} | |
} | |
}, | |
ParserRequest::Reset => { | |
a_op = None; | |
self.reset(); | |
other.reset(); | |
return ParserResponse::None; | |
}, | |
} | |
}) | |
} | |
pub fn tuple3<B:'a,C:'a>(self, parser_b: Parser<'a,B>, parser_c: Parser<'a,C>) -> Parser<'a,(A,B,C)> { | |
self.tuple2(parser_b).tuple2(parser_c).map(|((a,b),c)| (a,b,c)) | |
} | |
pub fn tuple4<B:'a,C:'a,D:'a>(self, parser_b: Parser<'a,B>, parser_c: Parser<'a,C>, parser_d: Parser<'a,D>) -> Parser<'a,(A,B,C,D)> { | |
self.tuple3(parser_b, parser_c).tuple2(parser_d).map(|((a,b ,c),d)| (a,b,c,d)) | |
} | |
pub fn tuple5<B:'a,C:'a,D:'a,E:'a>(self, parser_b: Parser<'a,B>, parser_c: Parser<'a,C>, parser_d: Parser<'a,D>, parser_e: Parser<'a,E>) -> Parser<'a,(A,B,C,D,E)> { | |
self.tuple3(parser_b, parser_c).tuple3(parser_d, parser_e).map(|((a,b ,c),d,e)| (a,b,c,d,e)) | |
} | |
pub fn tuple6<B:'a,C:'a,D:'a,E:'a,F:'a>(self, parser_b: Parser<'a,B>, parser_c: Parser<'a,C>, parser_d: Parser<'a,D>, parser_e: Parser<'a,E>, parser_f: Parser<'a,F>) -> Parser<'a,(A,B,C,D,E,F)> { | |
self.tuple4(parser_b, parser_c, parser_d).tuple3(parser_e, parser_f).map(|((a,b ,c,d),e,f)| (a,b,c,d,e,f)) | |
} | |
pub fn zero_or_more(mut self) -> Parser<'a,Vec<A>> { | |
let mut elements: Vec<A> = Vec::new(); | |
Parser::from_process(move |request| { | |
match request { | |
ParserRequest::ProcessChunk(chunk) => { | |
let chunk = unsafe { &mut *chunk }; | |
let x = self.step(chunk); | |
match x { | |
ParserResult::More => { | |
return ParserResponse::Result(ParserResult::More); | |
}, | |
ParserResult::Done(x2) => { | |
elements.push(x2); | |
return ParserResponse::Result(ParserResult::More); | |
}, | |
ParserResult::Fail => { | |
let mut result = Vec::new(); | |
std::mem::swap(&mut result, &mut elements); | |
return ParserResponse::Result(ParserResult::Done(result)); | |
} | |
} | |
}, | |
ParserRequest::Reset => { | |
elements.clear(); | |
self.reset(); | |
return ParserResponse::None; | |
}, | |
} | |
}) | |
} | |
pub fn one_or_more(mut self) -> Parser<'a,Vec<A>> { | |
let mut elements: Vec<A> = Vec::new(); | |
Parser::from_process(move |request| { | |
match request { | |
ParserRequest::ProcessChunk(chunk) => { | |
let chunk = unsafe { &mut *chunk }; | |
let x = self.step(chunk); | |
match x { | |
ParserResult::More => { | |
return ParserResponse::Result(ParserResult::More); | |
}, | |
ParserResult::Done(x2) => { | |
elements.push(x2); | |
return ParserResponse::Result(ParserResult::More); | |
}, | |
ParserResult::Fail => { | |
let mut result = Vec::new(); | |
std::mem::swap(&mut result, &mut elements); | |
if result.len() == 0 { | |
return ParserResponse::Result(ParserResult::Fail); | |
} | |
return ParserResponse::Result(ParserResult::Done(result)); | |
} | |
} | |
}, | |
ParserRequest::Reset => { | |
elements.clear(); | |
self.reset(); | |
return ParserResponse::None; | |
}, | |
} | |
}) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[cfg(test)] | |
use super::parser::{Chunk, ChunkNextResult, Parser, ParserResult}; | |
#[test] | |
fn parse_text_match_eof_matched() { | |
let mut parser: Parser<()> = Parser::match_eof(); | |
let input = ""; | |
let mut chunk = Chunk::from_str(&input); | |
let r = parser.step(&mut chunk); | |
assert_eq!(r, ParserResult::Done(())); | |
} | |
#[test] | |
fn parse_text_match_eof_unmatched() { | |
let mut parser: Parser<()> = Parser::match_eof(); | |
let input = "a"; | |
let mut chunk = Chunk::from_str(&input); | |
let r = parser.step(&mut chunk); | |
assert_eq!(r, ParserResult::Fail); | |
assert_eq!(chunk.next(), ChunkNextResult::Char('a')); | |
} | |
#[test] | |
fn parse_test_match_char_matched() { | |
let mut parser: Parser<char> = Parser::match_char('a').map_to('a'); | |
let input = "a"; | |
let mut chunk = Chunk::from_str(&input); | |
let r = parser.step(&mut chunk); | |
assert_eq!(r, ParserResult::Done('a')); | |
assert_eq!(chunk.next(), ChunkNextResult::Eof); | |
} | |
#[test] | |
fn parse_test_march_char_unmatched() { | |
let mut parser: Parser<()> = Parser::match_char('a'); | |
let input = "b"; | |
let mut chunk = Chunk::from_str(&input); | |
let r = parser.step(&mut chunk); | |
assert_eq!(r, ParserResult::Fail); | |
assert_eq!(chunk.next(), ChunkNextResult::Char('b')); | |
} | |
#[test] | |
fn parse_test_match_string_matched() { | |
let mut parser: Parser<()> = Parser::match_string("cat"); | |
let input = "cat"; | |
let mut chunk = Chunk::from_str(&input); | |
let r = parser.run(&mut chunk); | |
assert_eq!(r, ParserResult::Done(())); | |
} | |
#[test] | |
fn parse_test_match_string_unmatched() { | |
let mut parser: Parser<()> = Parser::match_string("cat"); | |
let input = "dog"; | |
let mut chunk = Chunk::from_str(&input); | |
let r = parser.run(&mut chunk); | |
assert_eq!(r, ParserResult::Fail); | |
} | |
#[test] | |
fn parse_test_match_first() { | |
#[derive(Clone, Debug, PartialEq, Eq)] | |
enum Animal { | |
Cat, | |
Dog, | |
Rabbit, | |
} | |
let mut parser: Parser<Animal> = | |
Parser::match_first(vec![ | |
Parser::match_string("cat").map_to(Animal::Cat), | |
Parser::match_string("dog").map_to(Animal::Dog), | |
Parser::match_string("rabbit").map_to(Animal::Rabbit), | |
]); | |
let input = "dog"; | |
let mut chunk = Chunk::from_str(&input); | |
let r = parser.run(&mut chunk); | |
assert_eq!(r, ParserResult::Done(Animal::Dog)); | |
} | |
#[test] | |
fn parse_test_int() { | |
let mut parser: Parser<usize> = match_int(); | |
let input = "42"; | |
let mut chunk = Chunk::from_str(&input); | |
let r = parser.run(&mut chunk); | |
assert_eq!(r, ParserResult::Done(42)); | |
} | |
#[test] | |
fn parse_test_optional_and_tuple() { | |
#[derive(Clone, Debug, PartialEq, Eq)] | |
enum Animal { | |
Cat, | |
Dog, | |
} | |
let mut parser = | |
Parser::match_string("cat").map_to(Animal::Cat).optional() | |
.tuple2( | |
Parser::match_string("dog").map_to(Animal::Dog) | |
); | |
{ | |
let input = "catdog"; | |
let mut chunk = Chunk::from_str(&input); | |
let r = parser.run(&mut chunk); | |
assert_eq!(r, ParserResult::Done((Some(Animal::Cat), Animal::Dog))); | |
} | |
{ | |
let input = "dog"; | |
let mut chunk = Chunk::from_str(&input); | |
let r = parser.run(&mut chunk); | |
assert_eq!(r, ParserResult::Done((None, Animal::Dog))); | |
} | |
} | |
#[cfg(test)] | |
fn match_digit<'a>() -> Parser<'a,u8> { | |
let digit_parsers: Vec<Parser<'a,u8>> = (0..10).into_iter().map(|x| Parser::match_char((('0' as u8) + x) as char).map_to(x as u8)).collect(); | |
Parser::match_first(digit_parsers) | |
} | |
#[cfg(test)] | |
fn match_int<'a>() -> Parser<'a,usize> { | |
match_digit() | |
.one_or_more() | |
.map(|digits| { | |
let mut result: usize = 0; | |
let mut placement: usize = 1; | |
for i in (0..digits.len()).rev() { | |
result += (digits[i] as usize) * placement; | |
placement *= 10; | |
} | |
result | |
}) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment