Skip to content

Instantly share code, notes, and snippets.

@withzombies
Last active March 27, 2025 16:15
Show Gist options
  • Save withzombies/817e787ea7d47632d9d4fc2e5bd83e97 to your computer and use it in GitHub Desktop.
Save withzombies/817e787ea7d47632d9d4fc2e5bd83e97 to your computer and use it in GitHub Desktop.
A class that turns a Read reader into a Read + Seek reader
#![allow(incomplete_features)]
#![feature(specialization)]
use std::cmp::min;
use std::io;
use std::io::{Read, Seek, SeekFrom};
pub trait ReadSeek: Read + Seek {}
impl<T: Read + Seek> ReadSeek for T {}
#[derive(Debug)]
pub struct LazyBufferReader<R: Read> {
reader: R,
buffer: Vec<u8>,
position: usize,
}
impl<'a, R: Read> LazyBufferReader<R> {
pub fn new(reader: R) -> LazyBufferReader<R> {
LazyBufferReader {
reader: reader,
buffer: Vec::new(),
position: 0,
}
}
fn powers_of_two(limit: usize) -> Vec<usize> {
let mut powers = Vec::new();
let mut current = 0x40;
let two_gb = 2 * 1024 * 1024 * 1024;
while current < min(limit, two_gb) {
powers.push(current);
current = current.saturating_mul(2);
}
powers.push(current.saturating_mul(2));
powers
}
fn ensure_buffer(&mut self, size: usize) -> io::Result<()> {
let chunks = Self::powers_of_two(self.position + size);
for chunk_size in chunks {
if (self.position + size) > self.buffer.len() {
// Temporary buffer for reading chunks
let mut temp_buf = vec![0; chunk_size];
let bytes_read = self.reader.read(&mut temp_buf)?;
// Append new data to the internal buffer
if bytes_read > 0 {
self.buffer.extend_from_slice(&temp_buf[..bytes_read]);
}
if bytes_read == 0 {
break;
}
}
}
Ok(())
}
}
impl<R: ReadSeek> Read for LazyBufferReader<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.reader.read(buf)
}
}
impl<R: ReadSeek> Seek for LazyBufferReader<R> {
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
self.reader.seek(pos)
}
}
impl<R: Read> Read for LazyBufferReader<R> {
default fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
// Ensure the internal buffer is filled if necessary
self.ensure_buffer(buf.len())?;
// If there's no data left in the buffer after attempting to fill it, return EOF (0)
if self.position > self.buffer.len() {
return Ok(0);
}
// Determine how much data to copy from the internal buffer to the provided buffer
let available_data = &self.buffer[self.position..];
let bytes_to_copy = available_data.len().min(buf.len());
buf[..bytes_to_copy].copy_from_slice(&available_data[..bytes_to_copy]);
// Update the position in the internal buffer
self.position += bytes_to_copy;
Ok(bytes_to_copy)
}
}
impl<R: Read> Seek for LazyBufferReader<R> {
default fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
match pos {
io::SeekFrom::Start(offset) => {
self.position = offset as usize;
}
io::SeekFrom::Current(offset) => {
self.position = (self.position as i64 + offset) as usize;
}
io::SeekFrom::End(offset) => {
// Need to read the whole buffer to use SeekFromEnd
self.ensure_buffer(usize::MAX)?;
self.position = self.buffer.len() - offset as usize;
}
}
Ok(self.position as u64)
}
}
#[cfg(test)]
mod tests {
use super::*;
use mockall::mock;
use mockall::predicate::eq;
use std::fs::File;
use std::io::SeekFrom;
use std::path::Path;
#[test]
fn test_empty_input() {
use std::io::Cursor;
let mut data = Cursor::new(b"");
let mut lazy_reader = LazyBufferReader::new(&mut data);
let mut buf = [0; 10];
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 0); // Should return EOF immediately
assert_eq!(lazy_reader.buffer.len(), 0);
}
#[test]
fn test_small_input() {
use std::io::Cursor;
let mut data = Cursor::new(b"Hello, world!");
let mut lazy_reader = LazyBufferReader::new(&mut data);
let mut buf = [0; 5];
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 5);
assert_eq!(&buf[..bytes_read], b"Hello");
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 5);
assert_eq!(&buf[..bytes_read], b", wor");
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 3);
assert_eq!(&buf[..bytes_read], b"ld!");
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 0); // EOF
assert_eq!(lazy_reader.buffer.len(), 0);
}
#[test]
fn test_exact_buffer_size_input() {
use std::io::Cursor;
let data = vec![b'a'; 1024]; // Input exactly 1024 bytes
let mut binding = Cursor::new(data.clone());
let mut lazy_reader = LazyBufferReader::new(&mut binding);
let mut buf = [0; 1024];
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 1024);
assert_eq!(&buf[..bytes_read], &data[..]);
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 0); // EOF
assert_eq!(lazy_reader.buffer.len(), 0);
}
#[test]
fn test_large_input() {
use std::io::Cursor;
let data = vec![b'b'; 5000]; // Input larger than internal buffer size
let mut binding = Cursor::new(data.clone());
let mut lazy_reader = LazyBufferReader::new(&mut binding);
let mut buf = [0; 2048]; // Read in chunks smaller than input size
let mut total_bytes_read = 0;
while let Ok(bytes_read) = lazy_reader.read(&mut buf) {
if bytes_read == 0 {
break; // EOF
}
total_bytes_read += bytes_read;
for &byte in &buf[..bytes_read] {
assert_eq!(byte, b'b'); // Ensure all bytes are correct
}
}
assert_eq!(total_bytes_read, data.len());
assert_eq!(lazy_reader.buffer.len(), 0);
}
#[test]
fn test_small_chunk_reads() {
use std::io::Cursor;
let mut data = Cursor::new(b"abcdef");
let mut lazy_reader = LazyBufferReader::new(&mut data);
let mut buf = [0; 1]; // Read one byte at a time
for expected_byte in b"abcdef".iter() {
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 1);
assert_eq!(buf[0], *expected_byte);
}
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 0); // EOF
assert_eq!(lazy_reader.buffer.len(), 0);
}
#[test]
fn test_eof_behavior() {
use std::io::Cursor;
let mut data = Cursor::new(b"End of stream.");
let mut lazy_reader = LazyBufferReader::new(&mut data);
let mut buf = [0; 16];
// Read all available data
while lazy_reader.read(&mut buf).unwrap() > 0 {}
assert_eq!(lazy_reader.buffer.len(), 0);
// Ensure subsequent reads return EOF (0)
for _ in 0..3 {
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 0); // EOF
}
}
#[test]
fn test_partial_reads() {
use std::io::Cursor;
let mut data = Cursor::new(b"123456789");
let mut lazy_reader = LazyBufferReader::new(&mut data);
let mut buf = [0; 4]; // Buffer smaller than available data
// First read: should read "1234"
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 4);
assert_eq!(&buf[..bytes_read], b"1234");
// Second read: should read "5678"
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 4);
assert_eq!(&buf[..bytes_read], b"5678");
// Third read: should read "9"
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 1);
assert_eq!(&buf[..bytes_read], b"9");
// Fourth read: should return EOF (0)
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 0);
assert_eq!(lazy_reader.buffer.len(), 0);
}
#[test]
fn test_giant_read_small_input() {
use std::io::Cursor;
let mut data = Cursor::new(b"123456789");
let mut lazy_reader = LazyBufferReader::new(&mut data);
let mut buf = vec![0; 4 * 1024 * 1024]; // Buffer smaller than available data
// First read: should read "1234"
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 9);
assert_eq!(&buf[..bytes_read], b"123456789");
// Second read: should read "5678"
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 0);
assert_eq!(lazy_reader.buffer.len(), 0);
}
#[test]
fn test_empty_input_noseek() {
use std::io::Cursor;
let data = Cursor::new(b"");
let mut reader = Box::new(data) as Box<dyn Read>;
let mut lazy_reader = LazyBufferReader::new(&mut reader);
let mut buf = [0; 10];
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 0); // Should return EOF immediately
assert_eq!(lazy_reader.buffer.len(), 0); // The buffer size is zero because there's no input
}
#[test]
fn test_small_input_noseek() {
use std::io::Cursor;
let data = Cursor::new(b"Hello, world!");
let mut reader = Box::new(data) as Box<dyn Read>;
let mut lazy_reader = LazyBufferReader::new(&mut reader);
let mut buf = [0; 5];
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 5);
assert_eq!(&buf[..bytes_read], b"Hello");
assert!(lazy_reader.buffer.len() > 0);
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 5);
assert_eq!(&buf[..bytes_read], b", wor");
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 3);
assert_eq!(&buf[..bytes_read], b"ld!");
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 0); // EOF
}
#[test]
fn test_exact_buffer_size_input_noseek() {
use std::io::Cursor;
let data = vec![b'a'; 1024]; // Input exactly 1024 bytes
let binding = Cursor::new(data.clone());
let mut reader = Box::new(binding) as Box<dyn Read>;
let mut lazy_reader = LazyBufferReader::new(&mut reader);
let mut buf = [0; 1024];
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 1024);
assert_eq!(&buf[..bytes_read], &data[..]);
assert!(lazy_reader.buffer.len() > 0);
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 0); // EOF
}
#[test]
fn test_large_input_noseek() {
use std::io::Cursor;
let data = vec![b'b'; 5000]; // Input larger than internal buffer size
let binding = Cursor::new(data.clone());
let mut reader = Box::new(binding) as Box<dyn Read>;
let mut lazy_reader = LazyBufferReader::new(&mut reader);
let mut buf = [0; 2048]; // Read in chunks smaller than input size
let mut total_bytes_read = 0;
while let Ok(bytes_read) = lazy_reader.read(&mut buf) {
if bytes_read == 0 {
break; // EOF
}
total_bytes_read += bytes_read;
for &byte in &buf[..bytes_read] {
assert_eq!(byte, b'b'); // Ensure all bytes are correct
}
}
assert!(lazy_reader.buffer.len() > 0);
assert_eq!(total_bytes_read, data.len());
}
#[test]
fn test_small_chunk_reads_noseek() {
use std::io::Cursor;
let data = Cursor::new(b"abcdef");
let mut reader = Box::new(data) as Box<dyn Read>;
let mut lazy_reader = LazyBufferReader::new(&mut reader);
let mut buf = [0; 1]; // Read one byte at a time
for expected_byte in b"abcdef".iter() {
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 1);
assert_eq!(buf[0], *expected_byte);
}
assert!(lazy_reader.buffer.len() > 0);
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 0); // EOF
}
#[test]
fn test_eof_behavior_noseek() {
use std::io::Cursor;
let data = Cursor::new(b"End of stream.");
let mut reader = Box::new(data) as Box<dyn Read>;
let mut lazy_reader = LazyBufferReader::new(&mut reader);
let mut buf = [0; 16];
// Read all available data
while lazy_reader.read(&mut buf).unwrap() > 0 {}
assert!(lazy_reader.buffer.len() > 0);
// Ensure subsequent reads return EOF (0)
for _ in 0..3 {
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 0); // EOF
}
}
#[test]
fn test_partial_reads_noseek() {
use std::io::Cursor;
let data = Cursor::new(b"123456789");
let mut reader = Box::new(data) as Box<dyn Read>;
let mut lazy_reader = LazyBufferReader::new(&mut reader);
let mut buf = [0; 4]; // Buffer smaller than available data
// First read: should read "1234"
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 4);
assert_eq!(&buf[..bytes_read], b"1234");
assert!(lazy_reader.buffer.len() > 0);
// Second read: should read "5678"
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 4);
assert_eq!(&buf[..bytes_read], b"5678");
// Third read: should read "9"
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 1);
assert_eq!(&buf[..bytes_read], b"9");
// Fourth read: should return EOF (0)
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 0);
}
#[test]
fn test_giant_read_small_input_noseek() {
use std::io::Cursor;
let data = Cursor::new(b"123456789");
let mut reader = Box::new(data) as Box<dyn Read>;
let mut lazy_reader = LazyBufferReader::new(&mut reader);
let mut buf = vec![0; 4 * 1024 * 1024]; // Buffer smaller than available data
// First read: should read "1234"
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 9);
assert_eq!(&buf[..bytes_read], b"123456789");
assert!(lazy_reader.buffer.len() > 0);
// Second read: should read "5678"
let bytes_read = lazy_reader.read(&mut buf).unwrap();
assert_eq!(bytes_read, 0);
}
// Mock for a `Read`-only struct
mock! {
pub ReadOnlyReader {}
impl Read for ReadOnlyReader {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize>;
}
}
#[test]
fn test_lazy_buffer_reader_with_nonseekable_reader() {
let mut mock_reader = MockReadOnlyReader::new();
// Mock behavior for read
mock_reader
.expect_read()
.times(1)
.withf(|buf| buf.len() >= 9)
.return_once(|buf| {
buf[..9].copy_from_slice(b"test data");
Ok(9) // Simulate reading 9 bytes
});
let mut reader = LazyBufferReader::new(mock_reader);
let mut buf = [0u8; 9];
let bytes = reader.read(&mut buf).unwrap();
assert_eq!(bytes, 9);
}
// Mock for a `Read` + `Seek` struct
mock! {
pub ReadSeekReader {}
impl Read for ReadSeekReader {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize>;
}
impl Seek for ReadSeekReader {
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64>;
}
}
#[test]
fn test_lazy_buffer_reader_with_seekable_reader() {
let mut mock_reader = MockReadSeekReader::new();
// Mock behavior for read
mock_reader
.expect_read()
.times(1)
.withf(|buf| buf.len() >= 9)
.return_once(|buf| {
buf[..9].copy_from_slice(b"test data");
Ok(9) // Simulate reading 9 bytes
});
// Mock behavior for seek
mock_reader
.expect_seek()
.times(1)
.with(eq(SeekFrom::Start(0)))
.return_once(|_| Ok(0));
let mut reader = LazyBufferReader::new(mock_reader);
let mut buf = [0u8; 9];
let bytes = reader.read(&mut buf).unwrap();
assert_eq!(bytes, 9);
let pos = reader.seek(SeekFrom::Start(0)).unwrap();
assert_eq!(pos, 0)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment