Created
December 9, 2021 22:37
-
-
Save siritori/e4e2dc4ec2076f075f17f9e56560d8ed to your computer and use it in GitHub Desktop.
Binary parsing test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use bytes::Buf; | |
pub trait BinaryRead: Sized { | |
fn read<T: bytes::Buf>(reader: &mut BinaryReader<T>) -> Option<Self>; | |
} | |
pub struct BinaryReader<T: bytes::Buf> { | |
pub buf: T, | |
} | |
fn read_string_from_u16_iter(len_u16: usize, iter: impl Iterator<Item = u16>) -> Option<String> { | |
let mut buf = String::with_capacity(len_u16 * 2); | |
for ch in std::char::decode_utf16(iter) { | |
buf.push(ch.ok()?) | |
} | |
Some(buf) | |
} | |
impl<T: bytes::Buf> BinaryReader<T> { | |
pub fn new(buf: T) -> Self { | |
BinaryReader { buf } | |
} | |
fn check_remaining(&self, required_size: usize) -> Option<()> { | |
if self.buf.remaining() < required_size { | |
None | |
} else { | |
Some(()) | |
} | |
} | |
pub fn symbol(&mut self, symbol: &[u8]) -> Option<()> { | |
self.check_remaining(symbol.len())?; | |
if !self.buf.chunk().starts_with(symbol) { | |
None | |
} else { | |
self.buf.advance(symbol.len()); | |
Some(()) | |
} | |
} | |
pub fn sized_utf8_str(&mut self, len_u8: usize) -> Option<String> { | |
self.check_remaining(len_u8)?; | |
std::str::from_utf8(&self.buf.chunk()[..len_u8]).ok().map(&str::to_string) | |
} | |
pub fn null_terminated_utf8_str(&mut self) -> Option<String> { | |
let str_len = self.buf.chunk().iter().position(|&ch| ch == b'\0')?; | |
std::str::from_utf8(&self.buf.chunk()[..str_len]).ok().map(&str::to_string) | |
} | |
pub fn null_terminated_utf16_str(&mut self) -> Option<String> { | |
let size_all = self.buf.chunk().len() / 2; | |
let u16_iter: Vec<_> = self.buf.chunk()[..size_all * 2] | |
.chunks(2) | |
.map(|mut chunk| chunk.get_u16()) | |
.take_while(|&ch| ch != 0) | |
.collect(); | |
read_string_from_u16_iter(u16_iter.len(), u16_iter.into_iter()) | |
} | |
pub fn null_terminated_utf16le_str(&mut self) -> Option<String> { | |
let size_all = self.buf.chunk().len() / 2; | |
let u16_iter: Vec<_> = self.buf.chunk()[..size_all * 2] | |
.chunks(2) | |
.map(|mut chunk| chunk.get_u16_le()) | |
.take_while(|&ch| ch != 0) | |
.collect(); | |
read_string_from_u16_iter(u16_iter.len(), u16_iter.into_iter()) | |
} | |
pub fn sized_utf16_str(&mut self, len_u16: usize) -> Option<String> { | |
let u16_size = std::mem::size_of::<u16>(); | |
self.check_remaining(len_u16 * u16_size)?; | |
let u8_slice = &self.buf.chunk()[..len_u16 * u16_size]; | |
let u16_iter = u8_slice.chunks(u16_size).map(|mut chunk| { chunk.get_u16() }); | |
read_string_from_u16_iter(len_u16, u16_iter) | |
} | |
pub fn sized_utf16le_str(&mut self, len_u16: usize) -> Option<String> { | |
let u16_size = std::mem::size_of::<u16>(); | |
self.check_remaining(len_u16 * u16_size)?; | |
let u8_slice = &self.buf.chunk()[..len_u16 * u16_size]; | |
let u16_iter = u8_slice.chunks(u16_size).map(|mut chunk| { chunk.get_u16_le() }); | |
read_string_from_u16_iter(len_u16, u16_iter) | |
} | |
pub fn u8(&mut self) -> Option<u8> { | |
self.check_remaining(std::mem::size_of::<u8>())?; | |
Some(self.buf.get_u8()) | |
} | |
pub fn i8(&mut self) -> Option<i8> { | |
self.check_remaining(std::mem::size_of::<i8>())?; | |
Some(self.buf.get_i8()) | |
} | |
pub fn u16(&mut self) -> Option<u16> { | |
self.check_remaining(std::mem::size_of::<u16>())?; | |
Some(self.buf.get_u16()) | |
} | |
pub fn u16le(&mut self) -> Option<u16> { | |
self.check_remaining(std::mem::size_of::<u16>())?; | |
Some(self.buf.get_u16_le()) | |
} | |
pub fn i16(&mut self) -> Option<i16> { | |
self.check_remaining(std::mem::size_of::<i16>())?; | |
Some(self.buf.get_i16()) | |
} | |
pub fn i16le(&mut self) -> Option<i16> { | |
self.check_remaining(std::mem::size_of::<i16>())?; | |
Some(self.buf.get_i16_le()) | |
} | |
pub fn u32(&mut self) -> Option<u32> { | |
self.check_remaining(std::mem::size_of::<u32>())?; | |
Some(self.buf.get_u32()) | |
} | |
pub fn u32le(&mut self) -> Option<u32> { | |
self.check_remaining(std::mem::size_of::<u32>())?; | |
Some(self.buf.get_u32_le()) | |
} | |
pub fn read<U: BinaryRead>(&mut self) -> Option<U> { | |
U::read(self) | |
} | |
pub fn skip_read<U: BinaryRead + std::cmp::PartialEq>(&mut self, expected: U) -> Option<()> { | |
let v = U::read(self)?; | |
if v == expected { | |
Some(()) | |
} else { | |
None | |
} | |
} | |
} | |
#[derive(Debug)] | |
enum MidiFormat { | |
Format0, | |
Format1, | |
Format2, | |
} | |
impl BinaryRead for MidiFormat { | |
fn read<T: bytes::Buf>(reader: &mut BinaryReader<T>) -> Option<Self> { | |
match reader.u16() { | |
Some(0x00_00) => Some(MidiFormat::Format0), | |
Some(0x00_01) => Some(MidiFormat::Format1), | |
Some(0x00_02) => Some(MidiFormat::Format2), | |
_ => return None, | |
} | |
} | |
} | |
#[derive(Debug)] | |
struct MidiHeader { | |
format: MidiFormat, | |
num_tracks: u16, | |
temporal_resolution: u16, | |
} | |
impl BinaryRead for MidiHeader { | |
fn read<T: bytes::Buf>(reader: &mut BinaryReader<T>) -> Option<Self> { | |
reader.symbol(b"MThd")?; | |
(reader.u32()? != 6).then(|| ())?; | |
Some(MidiHeader { | |
format: reader.read()?, | |
num_tracks: reader.u16()?, | |
temporal_resolution: reader.u16()?, | |
}) | |
} | |
} | |
fn main() { | |
let buf: Vec<u8> = vec![0x30, 0x42, 0x30, 0x44, 0x30, 0x46, 0x30, 0x48, 0x30, 0x4a, 0x00, 0xFF, 0xFF]; | |
let mut reader = BinaryReader::new(&buf[..]); | |
let s = reader.sized_utf16_str(5).unwrap_or("failure".to_string()); | |
println!("{}", &s); | |
let mut reader = BinaryReader::new(&buf[..]); | |
let s = reader.null_terminated_utf16_str().unwrap_or("failure".to_string()); | |
println!("{}", &s); | |
let buf: Vec<u8> = vec![0x42, 0x30, 0x44, 0x30, 0x46, 0x30, 0x48, 0x30, 0x4a, 0x30]; | |
let mut reader = BinaryReader::new(&buf[..]); | |
let s = reader.sized_utf16le_str(5).unwrap_or("failure".to_string()); | |
println!("{}", &s); | |
let buf: Vec<u8> = vec![230, 150, 135, 229, 173, 151, 229, 136, 151, 40, 85, 84, 70, 45, 49, 54, 41]; | |
let mut reader = BinaryReader::new(&buf[..]); | |
let s = reader.sized_utf8_str(17).unwrap_or("failure".to_string()); | |
println!("{}", &s); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment