Created
September 6, 2014 01:01
-
-
Save samuell/dbea9f759dced60a7d0b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule ATGCCount do | |
def count(sequence), do: cnt(String.to_char_list(sequence),0,0) | |
def cnt([65|t],at,gc), do: cnt(t,at+1,gc) | |
def cnt([84|t],at,gc), do: cnt(t,at+1,gc) | |
def cnt([71|t],at,gc), do: cnt(t,at,gc+1) | |
def cnt([67|t],at,gc), do: cnt(t,at,gc+1) | |
def cnt([62|_],at,gc), do: {at,gc} | |
def cnt([],at,gc), do: {at,gc} | |
def cnt(_,0,0), do: {0,0} | |
def cnt([_|t], at, gc), do: cnt(t,at,gc) | |
end | |
defmodule GCCount do | |
def process do | |
filename = "chry.fa" | |
if File.exists?(filename) do | |
stream = File.stream!(filename, [:read_ahead, :raw], :line) | |
Enum.reduce stream, {0, 0}, fn(line, {at_acc, gc_acc}) -> | |
{at, gc} = ATGCCount.count(line) | |
{at_acc + at, gc_acc + gc} | |
end | |
end | |
end | |
def gc_ratio do | |
{at, gc} = process | |
case {gc, at} do | |
{0, 0} -> 0 | |
{at, gc} -> gc/(gc+at) | |
{_,_} -> 0 | |
end | |
end | |
end |
All: For reference, the data file used here is available at: http://bit.ly/ychromo
Also see the discussion about these tests on the meetup page: http://www.meetup.com/stockholm-elixir/events/203279212/
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@behe Cool! In my experiment on saml.rilspace.org/moar-languagez-gc-content-in-python-d-fpc-c-and-c I put on the restriction to only read one line at a time (except for the second test in the end). Would be very interesting to see what performance we get without that optimization!