Skip to content

Instantly share code, notes, and snippets.

@Measter
Last active May 19, 2020 15:39
Show Gist options
  • Save Measter/d31abe88b5e318ba98856bf6f047ef19 to your computer and use it in GitHub Desktop.
Save Measter/d31abe88b5e318ba98856bf6f047ef19 to your computer and use it in GitHub Desktop.
fqcnt_r1_4l
use std::{io::{self, Read, BufReader, BufRead}, fs::File};
use flate2::read::GzDecoder;
struct Read4lfq<R: Read> {
rdr: BufReader<R>,
line_buf: String,
}
impl<R: Read> Read4lfq<R> {
fn new(rdr: R) -> Self {
Self {
rdr: BufReader::new(rdr),
line_buf: String::new(),
}
}
fn next(&mut self) -> Result<Option<(&str, &str, &str)>, io::Error> {
// Pre-read the lines into the buffer, aso we can return references to that buffer.
// This saves us from allocating new strings for every line.
self.line_buf.clear();
self.rdr.read_line(&mut self.line_buf)?;
self.rdr.read_line(&mut self.line_buf)?;
self.rdr.read_line(&mut self.line_buf)?;
self.rdr.read_line(&mut self.line_buf)?;
if self.line_buf.trim().is_empty() {
return Ok(None);
}
// Now we can split each of the lines into their separate parts
// and operate on and return those.
let mut lines = self.line_buf.lines();
let (name, seq, qual) = if let (Some(name), Some(seq), Some("+"), Some(qual)) = (lines.next(), lines.next(), lines.next(), lines.next()) {
(name, seq, qual)
} else {
panic!("unable to read enough lines");
};
if !name.starts_with('@') {
panic!("no fq header: \"{:?}\"", name);
}
let name = name[1..]
.split_whitespace()
.next()
.unwrap();
if seq.len() != qual.len() {
panic!("diff len: {} {}", seq.len(), qual.len());
}
Ok(Some((name, seq, qual)))
}
}
fn main() -> Result<(), io::Error> {
let filename = std::env::args().skip(1).next();
let filename = if let Some(f) = filename {
f
} else {
println!("Usage: fqcnt <in.fq>");
std::process::exit(0);
};
let mut file = File::open(&filename)?;
let mut zip;
let mut reader: Read4lfq<&mut dyn Read> = if filename.ends_with("gz") {
zip = GzDecoder::new(file);
Read4lfq::new(&mut zip)
} else {
Read4lfq::new(&mut file)
};
let mut n = 0;
let mut slen = 0;
let mut qlen = 0;
while let Some((_, seq, qual)) = reader.next()? {
n += 1;
slen += seq.len();
qlen += qual.len();
}
println!("{}\t{}\t{}", n, slen, qlen);
Ok(())
}
PS G:\ProgrammingProjects\Rust\biofast> hyperfine --warmup=3 ".\target\release\fqcnt.exe .\M_abscessus_HiSeq.fq" ".\target\release\fqcnt.exe .\M_abscessus_HiSeq.fq.gz"
Benchmark #1: .\target\release\fqcnt.exe .\M_abscessus_HiSeq.fq
Time (mean ± σ): 2.148 s ± 0.011 s [User: 0.0 ms, System: 3.4 ms]
Range (min … max): 2.134 s … 2.165 s
Benchmark #2: .\target\release\fqcnt.exe .\M_abscessus_HiSeq.fq.gz
Time (mean ± σ): 6.538 s ± 0.043 s [User: 2.8 ms, System: 2.3 ms]
Range (min … max): 6.485 s … 6.618 s
Summary
'.\target\release\fqcnt.exe .\M_abscessus_HiSeq.fq' ran
3.04x faster than '.\target\release\fqcnt.exe .\M_abscessus_HiSeq.fq.gz'
PS G:\ProgrammingProjects\Rust\biofast> hyperfine --warmup=3 "python35 fqcnt_py1_4l.py .\M_abscessus_HiSeq.fq" "python35 fqcnt_py1_4l.py .\M_abscessus_HiSeq.fq.gz"
Benchmark #1: python35 fqcnt_py1_4l.py .\M_abscessus_HiSeq.fq
Time (mean ± σ): 12.755 s ± 0.074 s [User: 0.0 ms, System: 7.8 ms]
Range (min … max): 12.652 s … 12.909 s
Benchmark #2: python35 fqcnt_py1_4l.py .\M_abscessus_HiSeq.fq.gz
Time (mean ± σ): 23.042 s ± 0.180 s [User: 1.4 ms, System: 6.7 ms]
Range (min … max): 22.658 s … 23.207 s
Summary
'python35 fqcnt_py1_4l.py .\M_abscessus_HiSeq.fq' ran
1.81x faster than 'python35 fqcnt_py1_4l.py .\M_abscessus_HiSeq.fq.gz'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment