Last active
March 27, 2025 16:15
-
-
Save withzombies/817e787ea7d47632d9d4fc2e5bd83e97 to your computer and use it in GitHub Desktop.
A class that turns a Read reader into a Read + Seek reader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![allow(incomplete_features)] | |
#![feature(specialization)] | |
use std::cmp::min; | |
use std::io; | |
use std::io::{Read, Seek, SeekFrom}; | |
pub trait ReadSeek: Read + Seek {} | |
impl<T: Read + Seek> ReadSeek for T {} | |
#[derive(Debug)] | |
pub struct LazyBufferReader<R: Read> { | |
reader: R, | |
buffer: Vec<u8>, | |
position: usize, | |
} | |
impl<'a, R: Read> LazyBufferReader<R> { | |
pub fn new(reader: R) -> LazyBufferReader<R> { | |
LazyBufferReader { | |
reader: reader, | |
buffer: Vec::new(), | |
position: 0, | |
} | |
} | |
fn powers_of_two(limit: usize) -> Vec<usize> { | |
let mut powers = Vec::new(); | |
let mut current = 0x40; | |
let two_gb = 2 * 1024 * 1024 * 1024; | |
while current < min(limit, two_gb) { | |
powers.push(current); | |
current = current.saturating_mul(2); | |
} | |
powers.push(current.saturating_mul(2)); | |
powers | |
} | |
fn ensure_buffer(&mut self, size: usize) -> io::Result<()> { | |
let chunks = Self::powers_of_two(self.position + size); | |
for chunk_size in chunks { | |
if (self.position + size) > self.buffer.len() { | |
// Temporary buffer for reading chunks | |
let mut temp_buf = vec![0; chunk_size]; | |
let bytes_read = self.reader.read(&mut temp_buf)?; | |
// Append new data to the internal buffer | |
if bytes_read > 0 { | |
self.buffer.extend_from_slice(&temp_buf[..bytes_read]); | |
} | |
if bytes_read == 0 { | |
break; | |
} | |
} | |
} | |
Ok(()) | |
} | |
} | |
impl<R: ReadSeek> Read for LazyBufferReader<R> { | |
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { | |
self.reader.read(buf) | |
} | |
} | |
impl<R: ReadSeek> Seek for LazyBufferReader<R> { | |
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> { | |
self.reader.seek(pos) | |
} | |
} | |
impl<R: Read> Read for LazyBufferReader<R> { | |
default fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { | |
// Ensure the internal buffer is filled if necessary | |
self.ensure_buffer(buf.len())?; | |
// If there's no data left in the buffer after attempting to fill it, return EOF (0) | |
if self.position > self.buffer.len() { | |
return Ok(0); | |
} | |
// Determine how much data to copy from the internal buffer to the provided buffer | |
let available_data = &self.buffer[self.position..]; | |
let bytes_to_copy = available_data.len().min(buf.len()); | |
buf[..bytes_to_copy].copy_from_slice(&available_data[..bytes_to_copy]); | |
// Update the position in the internal buffer | |
self.position += bytes_to_copy; | |
Ok(bytes_to_copy) | |
} | |
} | |
impl<R: Read> Seek for LazyBufferReader<R> { | |
default fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> { | |
match pos { | |
io::SeekFrom::Start(offset) => { | |
self.position = offset as usize; | |
} | |
io::SeekFrom::Current(offset) => { | |
self.position = (self.position as i64 + offset) as usize; | |
} | |
io::SeekFrom::End(offset) => { | |
// Need to read the whole buffer to use SeekFromEnd | |
self.ensure_buffer(usize::MAX)?; | |
self.position = self.buffer.len() - offset as usize; | |
} | |
} | |
Ok(self.position as u64) | |
} | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
use mockall::mock; | |
use mockall::predicate::eq; | |
use std::fs::File; | |
use std::io::SeekFrom; | |
use std::path::Path; | |
#[test] | |
fn test_empty_input() { | |
use std::io::Cursor; | |
let mut data = Cursor::new(b""); | |
let mut lazy_reader = LazyBufferReader::new(&mut data); | |
let mut buf = [0; 10]; | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 0); // Should return EOF immediately | |
assert_eq!(lazy_reader.buffer.len(), 0); | |
} | |
#[test] | |
fn test_small_input() { | |
use std::io::Cursor; | |
let mut data = Cursor::new(b"Hello, world!"); | |
let mut lazy_reader = LazyBufferReader::new(&mut data); | |
let mut buf = [0; 5]; | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 5); | |
assert_eq!(&buf[..bytes_read], b"Hello"); | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 5); | |
assert_eq!(&buf[..bytes_read], b", wor"); | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 3); | |
assert_eq!(&buf[..bytes_read], b"ld!"); | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 0); // EOF | |
assert_eq!(lazy_reader.buffer.len(), 0); | |
} | |
#[test] | |
fn test_exact_buffer_size_input() { | |
use std::io::Cursor; | |
let data = vec![b'a'; 1024]; // Input exactly 1024 bytes | |
let mut binding = Cursor::new(data.clone()); | |
let mut lazy_reader = LazyBufferReader::new(&mut binding); | |
let mut buf = [0; 1024]; | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 1024); | |
assert_eq!(&buf[..bytes_read], &data[..]); | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 0); // EOF | |
assert_eq!(lazy_reader.buffer.len(), 0); | |
} | |
#[test] | |
fn test_large_input() { | |
use std::io::Cursor; | |
let data = vec![b'b'; 5000]; // Input larger than internal buffer size | |
let mut binding = Cursor::new(data.clone()); | |
let mut lazy_reader = LazyBufferReader::new(&mut binding); | |
let mut buf = [0; 2048]; // Read in chunks smaller than input size | |
let mut total_bytes_read = 0; | |
while let Ok(bytes_read) = lazy_reader.read(&mut buf) { | |
if bytes_read == 0 { | |
break; // EOF | |
} | |
total_bytes_read += bytes_read; | |
for &byte in &buf[..bytes_read] { | |
assert_eq!(byte, b'b'); // Ensure all bytes are correct | |
} | |
} | |
assert_eq!(total_bytes_read, data.len()); | |
assert_eq!(lazy_reader.buffer.len(), 0); | |
} | |
#[test] | |
fn test_small_chunk_reads() { | |
use std::io::Cursor; | |
let mut data = Cursor::new(b"abcdef"); | |
let mut lazy_reader = LazyBufferReader::new(&mut data); | |
let mut buf = [0; 1]; // Read one byte at a time | |
for expected_byte in b"abcdef".iter() { | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 1); | |
assert_eq!(buf[0], *expected_byte); | |
} | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 0); // EOF | |
assert_eq!(lazy_reader.buffer.len(), 0); | |
} | |
#[test] | |
fn test_eof_behavior() { | |
use std::io::Cursor; | |
let mut data = Cursor::new(b"End of stream."); | |
let mut lazy_reader = LazyBufferReader::new(&mut data); | |
let mut buf = [0; 16]; | |
// Read all available data | |
while lazy_reader.read(&mut buf).unwrap() > 0 {} | |
assert_eq!(lazy_reader.buffer.len(), 0); | |
// Ensure subsequent reads return EOF (0) | |
for _ in 0..3 { | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 0); // EOF | |
} | |
} | |
#[test] | |
fn test_partial_reads() { | |
use std::io::Cursor; | |
let mut data = Cursor::new(b"123456789"); | |
let mut lazy_reader = LazyBufferReader::new(&mut data); | |
let mut buf = [0; 4]; // Buffer smaller than available data | |
// First read: should read "1234" | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 4); | |
assert_eq!(&buf[..bytes_read], b"1234"); | |
// Second read: should read "5678" | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 4); | |
assert_eq!(&buf[..bytes_read], b"5678"); | |
// Third read: should read "9" | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 1); | |
assert_eq!(&buf[..bytes_read], b"9"); | |
// Fourth read: should return EOF (0) | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 0); | |
assert_eq!(lazy_reader.buffer.len(), 0); | |
} | |
#[test] | |
fn test_giant_read_small_input() { | |
use std::io::Cursor; | |
let mut data = Cursor::new(b"123456789"); | |
let mut lazy_reader = LazyBufferReader::new(&mut data); | |
let mut buf = vec![0; 4 * 1024 * 1024]; // Buffer smaller than available data | |
// First read: should read "1234" | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 9); | |
assert_eq!(&buf[..bytes_read], b"123456789"); | |
// Second read: should read "5678" | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 0); | |
assert_eq!(lazy_reader.buffer.len(), 0); | |
} | |
#[test] | |
fn test_empty_input_noseek() { | |
use std::io::Cursor; | |
let data = Cursor::new(b""); | |
let mut reader = Box::new(data) as Box<dyn Read>; | |
let mut lazy_reader = LazyBufferReader::new(&mut reader); | |
let mut buf = [0; 10]; | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 0); // Should return EOF immediately | |
assert_eq!(lazy_reader.buffer.len(), 0); // The buffer size is zero because there's no input | |
} | |
#[test] | |
fn test_small_input_noseek() { | |
use std::io::Cursor; | |
let data = Cursor::new(b"Hello, world!"); | |
let mut reader = Box::new(data) as Box<dyn Read>; | |
let mut lazy_reader = LazyBufferReader::new(&mut reader); | |
let mut buf = [0; 5]; | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 5); | |
assert_eq!(&buf[..bytes_read], b"Hello"); | |
assert!(lazy_reader.buffer.len() > 0); | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 5); | |
assert_eq!(&buf[..bytes_read], b", wor"); | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 3); | |
assert_eq!(&buf[..bytes_read], b"ld!"); | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 0); // EOF | |
} | |
#[test] | |
fn test_exact_buffer_size_input_noseek() { | |
use std::io::Cursor; | |
let data = vec![b'a'; 1024]; // Input exactly 1024 bytes | |
let binding = Cursor::new(data.clone()); | |
let mut reader = Box::new(binding) as Box<dyn Read>; | |
let mut lazy_reader = LazyBufferReader::new(&mut reader); | |
let mut buf = [0; 1024]; | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 1024); | |
assert_eq!(&buf[..bytes_read], &data[..]); | |
assert!(lazy_reader.buffer.len() > 0); | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 0); // EOF | |
} | |
#[test] | |
fn test_large_input_noseek() { | |
use std::io::Cursor; | |
let data = vec![b'b'; 5000]; // Input larger than internal buffer size | |
let binding = Cursor::new(data.clone()); | |
let mut reader = Box::new(binding) as Box<dyn Read>; | |
let mut lazy_reader = LazyBufferReader::new(&mut reader); | |
let mut buf = [0; 2048]; // Read in chunks smaller than input size | |
let mut total_bytes_read = 0; | |
while let Ok(bytes_read) = lazy_reader.read(&mut buf) { | |
if bytes_read == 0 { | |
break; // EOF | |
} | |
total_bytes_read += bytes_read; | |
for &byte in &buf[..bytes_read] { | |
assert_eq!(byte, b'b'); // Ensure all bytes are correct | |
} | |
} | |
assert!(lazy_reader.buffer.len() > 0); | |
assert_eq!(total_bytes_read, data.len()); | |
} | |
#[test] | |
fn test_small_chunk_reads_noseek() { | |
use std::io::Cursor; | |
let data = Cursor::new(b"abcdef"); | |
let mut reader = Box::new(data) as Box<dyn Read>; | |
let mut lazy_reader = LazyBufferReader::new(&mut reader); | |
let mut buf = [0; 1]; // Read one byte at a time | |
for expected_byte in b"abcdef".iter() { | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 1); | |
assert_eq!(buf[0], *expected_byte); | |
} | |
assert!(lazy_reader.buffer.len() > 0); | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 0); // EOF | |
} | |
#[test] | |
fn test_eof_behavior_noseek() { | |
use std::io::Cursor; | |
let data = Cursor::new(b"End of stream."); | |
let mut reader = Box::new(data) as Box<dyn Read>; | |
let mut lazy_reader = LazyBufferReader::new(&mut reader); | |
let mut buf = [0; 16]; | |
// Read all available data | |
while lazy_reader.read(&mut buf).unwrap() > 0 {} | |
assert!(lazy_reader.buffer.len() > 0); | |
// Ensure subsequent reads return EOF (0) | |
for _ in 0..3 { | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 0); // EOF | |
} | |
} | |
#[test] | |
fn test_partial_reads_noseek() { | |
use std::io::Cursor; | |
let data = Cursor::new(b"123456789"); | |
let mut reader = Box::new(data) as Box<dyn Read>; | |
let mut lazy_reader = LazyBufferReader::new(&mut reader); | |
let mut buf = [0; 4]; // Buffer smaller than available data | |
// First read: should read "1234" | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 4); | |
assert_eq!(&buf[..bytes_read], b"1234"); | |
assert!(lazy_reader.buffer.len() > 0); | |
// Second read: should read "5678" | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 4); | |
assert_eq!(&buf[..bytes_read], b"5678"); | |
// Third read: should read "9" | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 1); | |
assert_eq!(&buf[..bytes_read], b"9"); | |
// Fourth read: should return EOF (0) | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 0); | |
} | |
#[test] | |
fn test_giant_read_small_input_noseek() { | |
use std::io::Cursor; | |
let data = Cursor::new(b"123456789"); | |
let mut reader = Box::new(data) as Box<dyn Read>; | |
let mut lazy_reader = LazyBufferReader::new(&mut reader); | |
let mut buf = vec![0; 4 * 1024 * 1024]; // Buffer smaller than available data | |
// First read: should read "1234" | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 9); | |
assert_eq!(&buf[..bytes_read], b"123456789"); | |
assert!(lazy_reader.buffer.len() > 0); | |
// Second read: should read "5678" | |
let bytes_read = lazy_reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes_read, 0); | |
} | |
// Mock for a `Read`-only struct | |
mock! { | |
pub ReadOnlyReader {} | |
impl Read for ReadOnlyReader { | |
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize>; | |
} | |
} | |
#[test] | |
fn test_lazy_buffer_reader_with_nonseekable_reader() { | |
let mut mock_reader = MockReadOnlyReader::new(); | |
// Mock behavior for read | |
mock_reader | |
.expect_read() | |
.times(1) | |
.withf(|buf| buf.len() >= 9) | |
.return_once(|buf| { | |
buf[..9].copy_from_slice(b"test data"); | |
Ok(9) // Simulate reading 9 bytes | |
}); | |
let mut reader = LazyBufferReader::new(mock_reader); | |
let mut buf = [0u8; 9]; | |
let bytes = reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes, 9); | |
} | |
// Mock for a `Read` + `Seek` struct | |
mock! { | |
pub ReadSeekReader {} | |
impl Read for ReadSeekReader { | |
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize>; | |
} | |
impl Seek for ReadSeekReader { | |
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64>; | |
} | |
} | |
#[test] | |
fn test_lazy_buffer_reader_with_seekable_reader() { | |
let mut mock_reader = MockReadSeekReader::new(); | |
// Mock behavior for read | |
mock_reader | |
.expect_read() | |
.times(1) | |
.withf(|buf| buf.len() >= 9) | |
.return_once(|buf| { | |
buf[..9].copy_from_slice(b"test data"); | |
Ok(9) // Simulate reading 9 bytes | |
}); | |
// Mock behavior for seek | |
mock_reader | |
.expect_seek() | |
.times(1) | |
.with(eq(SeekFrom::Start(0))) | |
.return_once(|_| Ok(0)); | |
let mut reader = LazyBufferReader::new(mock_reader); | |
let mut buf = [0u8; 9]; | |
let bytes = reader.read(&mut buf).unwrap(); | |
assert_eq!(bytes, 9); | |
let pos = reader.seek(SeekFrom::Start(0)).unwrap(); | |
assert_eq!(pos, 0) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment