Last active
September 25, 2016 14:05
-
-
Save jimfulton/0e61d1b5da0bccf8282a58a9a63aedfd to your computer and use it in GitHub Desktop.
Rust script for iterating over a ZODB file storage file using nested iterators.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// Summarize a file-storage 1 formatted file, mainly as a way of | |
/// gaging rust performance. This is more or less eqivalent to the | |
/// following Python/ZODB script: | |
/// | |
/// import ZODB.FileStorage | |
/// import sys | |
/// | |
/// def main(): | |
/// it = ZODB.FileStorage.FileIterator(sys.argv[1]) | |
/// transactions = records = 0 | |
/// for transaction in it: | |
/// transactions += 1 | |
/// for record in transaction: | |
/// records += 1 | |
/// | |
/// print(transactions, records) | |
/// | |
/// if __name__ == '__main__': | |
/// main() | |
extern crate byteorder; | |
use byteorder::{BigEndian, ReadBytesExt}; | |
use std::cell::RefCell; | |
use std::fs::File; | |
use std::io; | |
use std::io::BufReader; | |
use std::io::prelude::*; | |
use std::rc::Rc; | |
macro_rules! io_assert { | |
($cond: expr, $msg: expr ) => ( | |
if ! ($cond) { | |
return Err(io::Error::new(io::ErrorKind::Other, $msg)) | |
} | |
) | |
} | |
static MAGIC2: [u8; 4] = [70, 83, 50, 49]; // b'FS21' | |
static MAGIC3: [u8; 4] = [70, 83, 51, 48]; // b'FS30' | |
const CHECKPOINT_STATUS: u8 = 99; // b'c' | |
type Tid = [u8; 8]; | |
type Oid = [u8; 8]; | |
type Reader = Rc<RefCell<BufReader<File>>>; | |
struct Transaction { | |
reader: Reader, | |
pos: u64, | |
offset: u64, | |
id: Tid, | |
length: u64, | |
status: u8, | |
user: Vec<u8>, | |
desc: Vec<u8>, | |
ext: Vec<u8>, | |
} | |
const TRANSACTIONHEADERSIZE: u64 = 23; | |
impl Transaction { | |
fn read(reader_: Reader, pos: u64) -> io::Result<Option<Transaction>> { | |
let mut reader = reader_.borrow_mut(); | |
let mut id = [0; 8]; | |
let read = try!(reader.read(&mut id)); | |
if read == 0 { | |
Ok(None) | |
} | |
else { | |
if read != 8 { | |
try!(reader.read_exact(&mut id[read..])); | |
} | |
let length = try!(reader.read_u64::<BigEndian>()); | |
let mut status = [0]; | |
try!(reader.read_exact(&mut status)); | |
if status[0] == CHECKPOINT_STATUS { | |
return Ok(None); | |
} | |
let luser = try!(reader.read_u16::<BigEndian>()); | |
let ldesc = try!(reader.read_u16::<BigEndian>()); | |
let lext = try!(reader.read_u16::<BigEndian>()); | |
let mut t = Transaction { | |
reader: reader_.clone(), pos: pos, | |
offset: TRANSACTIONHEADERSIZE + | |
luser as u64 + ldesc as u64 + lext as u64, | |
id: id, length: length, status: status[0], | |
user: vec![0u8; luser as usize], | |
desc: vec![0u8; ldesc as usize], | |
ext: vec![0u8; lext as usize], | |
}; | |
try!(reader.read_exact(&mut t.user)); | |
try!(reader.read_exact(&mut t.desc)); | |
try!(reader.read_exact(&mut t.ext)); | |
Ok(Some(t)) | |
} | |
} | |
fn check_length(&mut self) -> io::Result<()> { | |
let mut reader = self.reader.borrow_mut(); | |
io_assert!(try!(reader.read_u64::<BigEndian>()) == self.length, | |
"Redundant length mismatch"); | |
Ok(()) | |
} | |
} | |
impl std::iter::Iterator for Transaction { | |
type Item = io::Result<DataRecord>; | |
fn next(&mut self) -> Option<io::Result<DataRecord>> { | |
if self.offset == self.length { | |
match self.check_length() { | |
Ok(_) => None, | |
r => Some(Err(r.unwrap_err())), | |
} | |
} | |
else { | |
let mut reader = self.reader.borrow_mut(); | |
let r = DataRecord::read(&mut *reader, self.pos + self.offset); | |
if let Ok(ref d) = r { | |
self.offset += d.len(); | |
} | |
Some(r) | |
} | |
} | |
} | |
pub struct DataRecord { | |
pos: u64, | |
oid: Oid, | |
tid: Tid, | |
prev: u64, | |
tloc: u64, | |
lver: u16, | |
dlen: u64, | |
data: Vec<u8>, // deleted data uses empty, not None | |
} | |
const DATAHEADERSIZE: u64 = 42; | |
impl DataRecord { | |
fn read(mut reader: &mut BufReader<File>, pos: u64) | |
-> io::Result<DataRecord> { | |
let mut oid = [0; 8]; | |
let mut tid = [0; 8]; | |
try!(reader.read_exact(&mut oid)); | |
try!(reader.read_exact(&mut tid)); | |
let prev = try!(reader.read_u64::<BigEndian>()); | |
let tloc = try!(reader.read_u64::<BigEndian>()); | |
io_assert!(try!(reader.read_u16::<BigEndian>()) == 0, "lver non-zero"); | |
let mut dlen = try!(reader.read_u64::<BigEndian>()); | |
let data = if dlen == 0 { | |
dlen = 8; | |
let back_pointer = try!(reader.read_u64::<BigEndian>()); | |
if back_pointer > 0 { | |
// Get data from earlier record because Undo | |
try!(reader.seek(std::io::SeekFrom::Start(back_pointer))); | |
let data = try!( | |
DataRecord::read(&mut reader, back_pointer)).data; | |
try!(reader.seek(std::io::SeekFrom::Start( | |
pos + DATAHEADERSIZE + 8))); | |
data | |
} | |
else { | |
vec![0u8; 0] // Deleted (or undone created) | |
} | |
} | |
else { // Normal case | |
let mut data = vec![0u8; dlen as usize]; | |
try!(reader.read_exact(&mut data)); | |
data | |
}; | |
Ok(DataRecord { | |
pos: pos, oid: oid, tid: tid, prev: prev, tloc: tloc, lver: 0, | |
dlen: dlen, data: data, | |
}) | |
} | |
fn len(&self) -> u64 { DATAHEADERSIZE + self.dlen} | |
} | |
struct FileIterator { | |
reader: Reader, | |
pos: u64, | |
} | |
impl FileIterator { | |
fn open(name: &str) -> io::Result<FileIterator> { | |
let mut reader = BufReader::new(try!(File::open(name))); | |
try!(FileIterator::check_magic(&mut reader)); | |
Ok(FileIterator { reader: Rc::new(RefCell::new(reader)), pos: 4 }) | |
} | |
fn check_magic(reader: &mut io::Read) -> io::Result<()> { | |
let mut magic = [0u8; 4]; | |
try!(reader.read_exact(&mut magic)); | |
io_assert!(magic == MAGIC3 || magic == MAGIC2, "Bad file magic"); | |
Ok(()) | |
} | |
} | |
impl std::iter::Iterator for FileIterator { | |
type Item = io::Result<Transaction>; | |
fn next(&mut self) -> Option<io::Result<Transaction>> { | |
match Transaction::read(self.reader.clone(), self.pos) { | |
Ok(Some(t)) => { | |
self.pos += t.length + 8; | |
Some(Ok(t)) | |
}, | |
Err(e) => Some(Err(e)), | |
_ => None | |
} | |
} | |
} | |
#[derive(Debug)] | |
struct DBStats { | |
transactions: i32, | |
records: i32, | |
} | |
fn main() { | |
let args: Vec<String> = std::env::args().collect(); | |
let fi = FileIterator::open(&args[1]).unwrap(); | |
let mut stats = DBStats {transactions: 0, records: 0}; | |
'outer: for tr in fi { | |
match tr { | |
Ok(transaction) => { | |
stats.transactions += 1; | |
for rr in transaction { | |
match rr { | |
Ok(_) => stats.records += 1, | |
Err(error) => { | |
print!("WTF? {}\n", error); | |
break 'outer; | |
} | |
} | |
} | |
}, | |
Err(error) => { | |
print!("WTF? {}\n", error); | |
break; | |
}, | |
} | |
} | |
print!("{:?}\n", stats); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment