Last active
August 29, 2015 14:03
-
-
Save alco/5d18c046094d0abb3f58 to your computer and use it in GitHub Desktop.
Transcribed from https://gist.github.com/samuell/5591367. Tested with Rust 0.11. Data file found here http://saml.rilspace.org/calculating-gc-content-in-python-and-d-how-to-get-10x-speedup-in-d#toc0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::io::fs::File; | |
enum ReadState { | |
Normal, | |
Newline, | |
Skipline, | |
} | |
fn main() { | |
let mut at = 0i; | |
let mut gc = 0i; | |
let path = Path::new("Homo_sapiens.GRCh37.67.dna_rm.chromosome.Y.fa"); | |
let vec = match File::open(&path).read_to_end() { | |
Ok(vec) => vec, | |
Err(why) => fail!("Error opening file: {}", why), | |
}; | |
let bytes = vec.slice_from(0); | |
let mut state = Newline; | |
for &byte in bytes.iter() { | |
let c = byte as char; | |
match (state, c) { | |
(Skipline, '\n') => { | |
state = Newline; | |
continue; | |
}, | |
(Skipline, _) => continue, | |
(Newline, '>') => { | |
state = Skipline; | |
continue; | |
}, | |
(Newline, _) => state = Normal, | |
_ => (), | |
} | |
match c { | |
'A' | 'T' => at += 1, | |
'G' | 'C' => gc += 1, | |
_ => (), | |
} | |
} | |
let gc_fraction = gc as f32 / (at+gc) as f32; | |
println!("{}", gc_fraction * 100.0); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::io::fs::File; | |
use std::io::BufferedReader; | |
fn main() { | |
let mut at = 0i; | |
let mut gc = 0i; | |
let path = Path::new("Homo_sapiens.GRCh37.67.dna_rm.chromosome.Y.fa"); | |
let mut file = BufferedReader::new(File::open(&path)); | |
for line in file.lines() { | |
let line = line.unwrap(); | |
let bytes = line.as_bytes(); | |
if bytes.len() == 0 || bytes[0] == '>' as u8 { | |
continue | |
} | |
for &c in bytes.iter() { | |
match c as char { | |
'A' | 'T' => at += 1, | |
'G' | 'C' => gc += 1, | |
_ => (), | |
} | |
} | |
} | |
let gc_fraction = gc as f32 / (at+gc) as f32; | |
println!("{}", gc_fraction * 100.0); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::io::fs::File; | |
use std::io::BufferedReader; | |
fn main() { | |
let mut countat = [0i, ..256]; | |
countat['A' as uint] = 1; | |
countat['T' as uint] = 1; | |
let mut countgc = [0i, ..256]; | |
countgc['G' as uint] = 1; | |
countgc['C' as uint] = 1; | |
let mut at = 0i; | |
let mut gc = 0i; | |
let path = Path::new("Homo_sapiens.GRCh37.67.dna_rm.chromosome.Y.fa"); | |
let mut file = BufferedReader::new(File::open(&path)); | |
for line in file.lines() { | |
let line = line.unwrap(); | |
let bytes = line.as_bytes(); | |
if bytes.len() == 0 || bytes[0] == '>' as u8 { | |
continue | |
} | |
for &c in bytes.iter() { | |
at += countat[c as uint]; | |
gc += countgc[c as uint]; | |
} | |
} | |
let gc_fraction = gc as f32 / (at+gc) as f32; | |
println!("{}", gc_fraction * 100.0); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# measured on a single machine | |
## Go | |
$ go version | |
go version go1.2.2 darwin/amd64 | |
$ time ./gc_rp | |
37.62173 | |
./gc_rp AVERAGE 0.22 total | |
## Rust | |
$ bin/rustc -v | |
rustc 0.11.0 (aa1163b92de7717eb7c5eba002b4012e0574a7fe 2014-06-27 12:50:16 -0700) | |
$ bin/rustc --opt-level=3 my/gc_rp.rs | |
$ time ./gc_rp | |
./gc_rp AVERAGE 0.59 total | |
$ bin/rustc --opt-level=3 my/gc_fsm.rs | |
$ time ./gc_fsm | |
./gc_fsm AVERAGE 0.29 total |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment