Created
September 25, 2016 14:06
-
-
Save jimfulton/1ed5e0a655647514465af2f9517333ac to your computer and use it in GitHub Desktop.
Rust script for iterating over a ZODB file storage file using a single iterator yielding an enum type
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// Summarize a file-storage 1 formatted file, mainly as a way of | |
/// gaging rust performance. This is more or less eqivalent to the | |
/// following Python/ZODB script: | |
/// | |
/// import ZODB.FileStorage | |
/// import sys | |
/// | |
/// def main(): | |
/// it = ZODB.FileStorage.FileIterator(sys.argv[1]) | |
/// transactions = records = 0 | |
/// for transaction in it: | |
/// transactions += 1 | |
/// for record in transaction: | |
/// records += 1 | |
/// | |
/// print(transactions, records) | |
/// | |
/// if __name__ == '__main__': | |
/// main() | |
/// | |
/// Except that rather than providing nested iterators, a single | |
/// iterator of both transactions and data records is provided. | |
extern crate byteorder; | |
use byteorder::{BigEndian, ReadBytesExt}; | |
use std::fs::File; | |
use std::io; | |
use std::io::BufReader; | |
use std::io::prelude::*; | |
macro_rules! io_assert { | |
($cond: expr, $msg: expr ) => ( | |
if ! ($cond) { | |
return Err(io::Error::new(io::ErrorKind::Other, $msg)) | |
} | |
) | |
} | |
static MAGIC2: [u8; 4] = [70, 83, 50, 49]; // b'FS21' | |
static MAGIC3: [u8; 4] = [70, 83, 51, 48]; // b'FS30' | |
const CHECKPOINT_STATUS: u8 = 99; // b'c' | |
type Tid = [u8; 8]; | |
type Oid = [u8; 8]; | |
struct TransactionRecord { | |
pos: u64, | |
id: Tid, | |
length: u64, | |
status: u8, | |
user: Vec<u8>, | |
desc: Vec<u8>, | |
ext: Vec<u8>, | |
} | |
const TRANSACTIONHEADERSIZE: u64 = 23; | |
impl TransactionRecord { | |
fn read(reader: &mut io::Read, pos: u64) | |
-> io::Result<Option<TransactionRecord>> { | |
let mut id = [0; 8]; | |
let read = try!(reader.read(&mut id)); | |
if read == 0 { | |
Ok(None) // EOF | |
} | |
else { | |
if read != 8 { // because BufReader | |
try!(reader.read_exact(&mut id[read..])); | |
} | |
let length = try!(reader.read_u64::<BigEndian>()); | |
let mut status = [0]; | |
try!(reader.read_exact(&mut status)); | |
let luser = try!(reader.read_u16::<BigEndian>()); | |
let ldesc = try!(reader.read_u16::<BigEndian>()); | |
let lext = try!(reader.read_u16::<BigEndian>()); | |
let mut r = TransactionRecord { | |
pos: pos, id: id, length: length, status: status[0], | |
user: vec![0u8; luser as usize], | |
desc: vec![0u8; ldesc as usize], | |
ext: vec![0u8; lext as usize], | |
}; | |
try!(reader.read_exact(&mut r.user)); | |
try!(reader.read_exact(&mut r.desc)); | |
try!(reader.read_exact(&mut r.ext)); | |
Ok(Some(r)) | |
} | |
} | |
} | |
pub struct DataRecord { | |
pos: u64, | |
oid: Oid, | |
tid: Tid, | |
prev: u64, | |
tloc: u64, | |
lver: u16, | |
data: Vec<u8>, | |
} | |
const DATAHEADERSIZE: u64 = 42; | |
impl DataRecord { | |
fn read(reader: &mut io::Read, pos: u64) -> io::Result<DataRecord> { | |
let mut oid = [0; 8]; | |
let mut tid = [0; 8]; | |
try!(reader.read_exact(&mut oid)); | |
try!(reader.read_exact(&mut tid)); | |
let prev = try!(reader.read_u64::<BigEndian>()); | |
let tloc = try!(reader.read_u64::<BigEndian>()); | |
io_assert!(try!(reader.read_u16::<BigEndian>()) == 0, "lver non-zero"); | |
let mut dlen = try!(reader.read_u64::<BigEndian>()); | |
if dlen == 0 { | |
dlen = 8; // backpointer | |
} | |
let mut r = DataRecord { | |
pos: pos, oid: oid, tid: tid, prev: prev, tloc: tloc, lver: 0, | |
data: vec![0u8; dlen as usize], | |
}; | |
try!(reader.read_exact(&mut r.data)); | |
Ok(r) | |
} | |
} | |
enum Record { | |
Transaction(TransactionRecord), | |
Data(DataRecord), | |
} | |
struct Iterator { | |
reader: BufReader<File>, | |
pos: u64, | |
tlen: u64, | |
tend: u64, | |
} | |
impl Iterator { | |
fn open(name: &str) -> io::Result<Iterator> { | |
let mut reader = BufReader::new(try!(File::open(name))); | |
try!(Iterator::check_magic(&mut reader)); | |
Ok(Iterator { reader: reader, pos: 4, tlen: 0, tend: 4 }) | |
} | |
fn check_magic(reader: &mut io::Read) -> io::Result<()> { | |
let mut magic = [0u8; 4]; | |
try!(reader.read_exact(&mut magic)); | |
io_assert!(magic == MAGIC3 || magic == MAGIC2, "Bad file magic"); | |
Ok(()) | |
} | |
fn check_tlen(&mut self) -> io::Result<()> { | |
io_assert!(try!(self.reader.read_u64::<BigEndian>()) == self.tlen, | |
"Redundant length mismatch"); | |
Ok(()) | |
} | |
} | |
impl std::iter::Iterator for Iterator { | |
type Item = io::Result<Record>; | |
fn next(&mut self) -> Option<io::Result<Record>> { | |
if self.pos == self.tend { | |
if self.tlen > 0 { | |
if let Err(e) = self.check_tlen() { | |
return Some(Err(e)) | |
} | |
self.pos += 8 | |
} | |
match TransactionRecord::read(&mut self.reader, self.pos) { | |
Ok(Some(t)) => { | |
self.tend = self.pos + t.length; | |
self.tlen = t.length; | |
self.pos += TRANSACTIONHEADERSIZE + | |
t.user.len() as u64 + | |
t.desc.len() as u64 + | |
t.ext.len() as u64; | |
Some(Ok(Record::Transaction(t))) | |
}, | |
Err(e) => Some(Err(e)), | |
_ => None, | |
} | |
} | |
else { | |
match DataRecord::read(&mut self.reader, self.pos) { | |
Ok(d) => { | |
self.pos += DATAHEADERSIZE + d.data.len() as u64; | |
Some(Ok(Record::Data(d))) | |
}, | |
Err(e) => Some(Err(e)), | |
} | |
} | |
} | |
} | |
#[derive(Debug)] | |
struct DBStats { | |
transactions: i32, | |
records: i32, | |
} | |
fn main() { | |
let args: Vec<String> = std::env::args().collect(); | |
let mut fi = Iterator::open(&args[1]).unwrap(); | |
let mut stats = DBStats {transactions: 0, records: 0}; | |
for tr in Iterator::open(&args[1]).unwrap() { | |
match tr { | |
Ok(Record::Transaction(t)) => { | |
stats.transactions += 1; | |
}, | |
Ok(Record::Data(d)) => { | |
stats.records += 1; | |
}, | |
Err(error) => { | |
print!("WTF? {}\n", error); | |
break; | |
}, | |
} | |
} | |
print!("{:?}\n", stats); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment