Skip to content

Instantly share code, notes, and snippets.

@Lysander
Created August 12, 2012 10:04
Show Gist options
  • Select an option

  • Save Lysander/3331015 to your computer and use it in GitHub Desktop.

Select an option

Save Lysander/3331015 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# coding: utf-8
import sys
import argparse
from contextlib import closing
import io
def count_triples_seahawk1986(sequences):
"""
Ansatz von Seahawk1986: http://forum.ubuntuusers.de/post/4689377/
(just ported to Python3 with `io`-Module)
"""
line1 = io.StringIO()
line2 = io.StringIO()
a, b = sequences
line1.write("".join(a))
line2.write("".join(b))
line1.seek(0)
line2.seek(0)
result1 = io.StringIO()
result2 = io.StringIO()
pos = 0
lenstr = len(line1.getvalue()) - 3
line1.seek(0)
line2.seek(0)
while pos <= lenstr:
trip1 = line1.read(3)
trip2 = line2.read(3)
if trip1.isalpha() and trip2.isalpha():
result1.write(trip1)
pos +=3
return int(len(result1.getvalue()) / 3)
def count_triples_seahawk1986_2(sequences):
"""
Ansatz von Seahawk1986: http://forum.ubuntuusers.de/post/4689377/
(just ported to Python3 with `io`-Module)
Umgestellt auf das Testen auf Gaps "-" analog zu `count_triples2`
"""
line1 = io.StringIO()
line2 = io.StringIO()
a, b = sequences
line1.write("".join(a))
line2.write("".join(b))
line1.seek(0)
line2.seek(0)
result1 = io.StringIO()
result2 = io.StringIO()
pos = 0
lenstr = len(line1.getvalue()) - 3
line1.seek(0)
line2.seek(0)
while pos <= lenstr:
trip1 = line1.read(3)
trip2 = line2.read(3)
if "-" not in trip1 and "-" not in trip2:
result1.write(trip1)
pos +=3
return int(len(result1.getvalue()) / 3)
def is_gap(a, b):
return a.isalpha() and b.isalpha()
def count_triples1(sequences):
return int(sum(1 for a, b in zip(*sequences) if is_gap(a, b)) / 3)
def count_triples2(sequences):
return int(sum(1 for pair in zip(*sequences) if "-" not in pair) / 3)
def load(f):
with closing(f):
for line in f:
line = line.strip()
if line:
yield line
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="search gap free dna blocks")
parser.add_argument("-algo", default="lys2",
choices=("lys1", "lys2", "sea1", "sea2"),
help="choose algorithm: %(choices)s "
"(default: %(default)s)")
parser.add_argument("dnafiles", nargs=2, type=argparse.FileType("r"),
default=sys.stdin)
args = parser.parse_args()
dnafiles = map(load, args.dnafiles)
print({
"lys1": count_triples1,
"lys2": count_triples2,
"sea1": count_triples_seahawk1986,
"sea2": count_triples_seahawk1986_2
}[args.algo](dnafiles))
#!/usr/bin/env python
import argparse
import sys
from count_triples import load, count_triples1, count_triples2, \
count_triples_seahawk1986, count_triples_seahawk1986_2
def main():
parser = argparse.ArgumentParser(description="search gap free dna blocks")
parser.add_argument("dnafiles", nargs=2, type=argparse.FileType("r"),
default=sys.stdin)
args = parser.parse_args()
dnafiles = map(load, args.dnafiles)
# we need data for multiple usage
sequences = ["".join(s) for s in dnafiles]
import timeit
for func in [count_triples1, count_triples2, count_triples_seahawk1986,
count_triples_seahawk1986_2]:
t = timeit.Timer("{0}({1})".format(func.__name__, sequences),
"from __main__ import {0}".format(func.__name__))
count = 1000
print(func.__name__, (t.timeit(count) / count))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment