Created
June 15, 2023 10:49
-
-
Save reinhrst/4aa525c191bc1cf69e70a62f00b474fd to your computer and use it in GitHub Desktop.
Nom streaming test (blog post to follow soon)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "nomtest" | |
version = "0.1.0" | |
edition = "2021" | |
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | |
[dependencies] | |
nom = "7.1.3" | |
tempfile = "3.5.0" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::error::Error; | |
use std::fs::File; | |
use std::io::{Read, Seek, Write}; | |
use tempfile; | |
use nom::{bytes, error, Err, IResult}; | |
fn parse_until_null_byte(unparsed_data: &[u8]) -> IResult<&[u8], Vec<u8>> { | |
let (unparsed_data, parse_result) = | |
bytes::streaming::take_till::<_, _, error::Error<_>>(|b| b == 0x00)(unparsed_data)?; | |
let (unparsed_data, _) = bytes::streaming::take_while_m_n(0, 1, |b| b == 0x00)(unparsed_data)?; | |
Ok((unparsed_data, parse_result.to_vec())) | |
} | |
/// Since we want a file to parse, let's start by creating something | |
fn create_rewound_file() -> Result<File, Box<dyn Error>> { | |
let mut file = tempfile::tempfile()?; | |
for i in 1u8..0x10 { | |
let range: Vec<u8> = (0u8..=i).collect(); | |
file.write(&range)?; | |
if i % 2 == 0 { | |
file.write(&[0u8])?; | |
} | |
} | |
file.rewind()?; | |
return Ok(file); | |
} | |
/// We will read the file in chunks of this size | |
const CHUNK_SIZE: usize = 8; | |
struct FileIterator { | |
file: File, | |
} | |
impl Iterator for FileIterator { | |
type Item = Vec<u8>; | |
fn next(&mut self) -> Option<Self::Item> { | |
let mut buffer: Vec<u8> = vec![0u8; CHUNK_SIZE]; | |
let len = self.file.read(&mut buffer).expect("Cannot read file"); | |
if len == 0 { | |
// For now assuming EOF; probably in production code you might want to do something | |
// else | |
None | |
} else { | |
buffer.truncate(len); | |
Some(buffer) | |
} | |
} | |
} | |
struct NullDelimitedVectorParser { | |
input_iterator: Box<dyn Iterator<Item = Vec<u8>>>, | |
parsing_data: Vec<u8>, // store the current data-chunk here | |
unparsed_data_offset: usize, | |
} | |
impl NullDelimitedVectorParser { | |
pub fn new(input_iterator: Box<dyn Iterator<Item = Vec<u8>>>) -> NullDelimitedVectorParser { | |
return Self { | |
input_iterator, | |
parsing_data: vec![], | |
unparsed_data_offset: 0, | |
}; | |
} | |
pub fn get_slice(&self) -> &[u8] { | |
&self.parsing_data[self.unparsed_data_offset..] | |
} | |
pub fn get_slice_offset(&self, slice: &[u8]) -> usize { | |
let data_begin = self.parsing_data.as_ptr() as usize; | |
let data_end = data_begin + self.parsing_data.len(); | |
let slice_begin = slice.as_ptr() as usize; | |
let slice_end = slice_begin + slice.len(); | |
let slice_offset = slice_begin - data_begin; | |
assert_eq!(data_end, slice_end); | |
assert!(slice_offset <= self.parsing_data.len()); | |
slice_offset | |
} | |
fn read_more_data_from_source(&mut self) -> Result<(), Box<dyn Error>> { | |
match self.input_iterator.next() { | |
Some(new_data) => { | |
self.parsing_data = [self.get_slice(), &new_data].concat().to_vec(); | |
self.unparsed_data_offset = 0; | |
Ok(()) | |
} | |
None => Err("EOF")?, | |
} | |
} | |
} | |
impl Iterator for NullDelimitedVectorParser { | |
type Item = Vec<u8>; | |
fn next(&mut self) -> Option<Self::Item> { | |
loop { | |
match parse_until_null_byte(self.get_slice()) { | |
Ok((new_unparsed_data, return_value)) => { | |
self.unparsed_data_offset = self.get_slice_offset(new_unparsed_data); | |
return Some(return_value.to_vec()); | |
} | |
Err(Err::Incomplete(_)) => { | |
println!("More data needed"); | |
match self.read_more_data_from_source() { | |
Ok(_) => continue, | |
Err(_) => { | |
if self.get_slice().len() == 0 { | |
println!("Done"); | |
} else { | |
println!("There are {} bytes remaining", self.get_slice().len()); | |
} | |
return None | |
} | |
} | |
} | |
Err(e) => { | |
panic!("Parse error: {}", e); | |
} | |
}; | |
} | |
} | |
} | |
fn main() -> Result<(), Box<dyn Error>> { | |
let file = create_rewound_file()?; | |
for bs in NullDelimitedVectorParser::new(Box::new(FileIterator { file })) { | |
println!("Found {:x?}", bs) | |
} | |
Ok(()) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment