Skip to content

Instantly share code, notes, and snippets.

@brentp
Created September 5, 2009 04:25
Show Gist options
  • Save brentp/181285 to your computer and use it in GitHub Desktop.
Save brentp/181285 to your computer and use it in GitHub Desktop.
from sys import stdin
from collections import defaultdict
def gen_freq(seq, frame, frequences):
ns = len(seq) + 1 - frame
frequences.clear()
for ii in xrange(ns):
nucleo = seq[ii:ii + frame]
frequences[nucleo] += 1
return ns, frequences
def sort_seq(seq, length, frequences):
n, frequences = gen_freq(seq, length, frequences)
l = sorted(((freq, seq) for (seq, freq) in frequences.items()), reverse=True)
print '\n'.join("%s %.3f" % (st, 100.0*fr/n) for (fr, st) in l)
print
def find_seq(seq, s, frequences):
n, t = gen_freq(seq, len(s), frequences)
print "%d\t%s" % (t[s], s)
def main():
frequences = defaultdict(int)
for line in stdin:
if line[:3] == ">TH":
break
seq = []
for line in stdin:
if line[0] in ">;":
break
seq.append( line[:-1] )
sequence = "".join(seq).upper()
for nl in 1,2:
sort_seq(sequence, nl, frequences)
for se in "GGT GGTA GGTATT GGTATTTTAATT GGTATTTTAATTTATAGT".split():
find_seq(sequence, se, frequences)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment