Last active
May 19, 2020 15:39
-
-
Save Measter/d31abe88b5e318ba98856bf6f047ef19 to your computer and use it in GitHub Desktop.
fqcnt_r1_4l
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::{io::{self, Read, BufReader, BufRead}, fs::File}; | |
use flate2::read::GzDecoder; | |
struct Read4lfq<R: Read> { | |
rdr: BufReader<R>, | |
line_buf: String, | |
} | |
impl<R: Read> Read4lfq<R> { | |
fn new(rdr: R) -> Self { | |
Self { | |
rdr: BufReader::new(rdr), | |
line_buf: String::new(), | |
} | |
} | |
fn next(&mut self) -> Result<Option<(&str, &str, &str)>, io::Error> { | |
// Pre-read the lines into the buffer, aso we can return references to that buffer. | |
// This saves us from allocating new strings for every line. | |
self.line_buf.clear(); | |
self.rdr.read_line(&mut self.line_buf)?; | |
self.rdr.read_line(&mut self.line_buf)?; | |
self.rdr.read_line(&mut self.line_buf)?; | |
self.rdr.read_line(&mut self.line_buf)?; | |
if self.line_buf.trim().is_empty() { | |
return Ok(None); | |
} | |
// Now we can split each of the lines into their separate parts | |
// and operate on and return those. | |
let mut lines = self.line_buf.lines(); | |
let (name, seq, qual) = if let (Some(name), Some(seq), Some("+"), Some(qual)) = (lines.next(), lines.next(), lines.next(), lines.next()) { | |
(name, seq, qual) | |
} else { | |
panic!("unable to read enough lines"); | |
}; | |
if !name.starts_with('@') { | |
panic!("no fq header: \"{:?}\"", name); | |
} | |
let name = name[1..] | |
.split_whitespace() | |
.next() | |
.unwrap(); | |
if seq.len() != qual.len() { | |
panic!("diff len: {} {}", seq.len(), qual.len()); | |
} | |
Ok(Some((name, seq, qual))) | |
} | |
} | |
fn main() -> Result<(), io::Error> { | |
let filename = std::env::args().skip(1).next(); | |
let filename = if let Some(f) = filename { | |
f | |
} else { | |
println!("Usage: fqcnt <in.fq>"); | |
std::process::exit(0); | |
}; | |
let mut file = File::open(&filename)?; | |
let mut zip; | |
let mut reader: Read4lfq<&mut dyn Read> = if filename.ends_with("gz") { | |
zip = GzDecoder::new(file); | |
Read4lfq::new(&mut zip) | |
} else { | |
Read4lfq::new(&mut file) | |
}; | |
let mut n = 0; | |
let mut slen = 0; | |
let mut qlen = 0; | |
while let Some((_, seq, qual)) = reader.next()? { | |
n += 1; | |
slen += seq.len(); | |
qlen += qual.len(); | |
} | |
println!("{}\t{}\t{}", n, slen, qlen); | |
Ok(()) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PS G:\ProgrammingProjects\Rust\biofast> hyperfine --warmup=3 ".\target\release\fqcnt.exe .\M_abscessus_HiSeq.fq" ".\target\release\fqcnt.exe .\M_abscessus_HiSeq.fq.gz" | |
Benchmark #1: .\target\release\fqcnt.exe .\M_abscessus_HiSeq.fq | |
Time (mean ± σ): 2.148 s ± 0.011 s [User: 0.0 ms, System: 3.4 ms] | |
Range (min … max): 2.134 s … 2.165 s | |
Benchmark #2: .\target\release\fqcnt.exe .\M_abscessus_HiSeq.fq.gz | |
Time (mean ± σ): 6.538 s ± 0.043 s [User: 2.8 ms, System: 2.3 ms] | |
Range (min … max): 6.485 s … 6.618 s | |
Summary | |
'.\target\release\fqcnt.exe .\M_abscessus_HiSeq.fq' ran | |
3.04x faster than '.\target\release\fqcnt.exe .\M_abscessus_HiSeq.fq.gz' | |
PS G:\ProgrammingProjects\Rust\biofast> hyperfine --warmup=3 "python35 fqcnt_py1_4l.py .\M_abscessus_HiSeq.fq" "python35 fqcnt_py1_4l.py .\M_abscessus_HiSeq.fq.gz" | |
Benchmark #1: python35 fqcnt_py1_4l.py .\M_abscessus_HiSeq.fq | |
Time (mean ± σ): 12.755 s ± 0.074 s [User: 0.0 ms, System: 7.8 ms] | |
Range (min … max): 12.652 s … 12.909 s | |
Benchmark #2: python35 fqcnt_py1_4l.py .\M_abscessus_HiSeq.fq.gz | |
Time (mean ± σ): 23.042 s ± 0.180 s [User: 1.4 ms, System: 6.7 ms] | |
Range (min … max): 22.658 s … 23.207 s | |
Summary | |
'python35 fqcnt_py1_4l.py .\M_abscessus_HiSeq.fq' ran | |
1.81x faster than 'python35 fqcnt_py1_4l.py .\M_abscessus_HiSeq.fq.gz' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment