Created
August 12, 2012 10:04
-
-
Save Lysander/3331015 to your computer and use it in GitHub Desktop.
Solution for this thread: http://forum.ubuntuusers.de/topic/programmieraufgabe-wie-loesbar/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # coding: utf-8 | |
| import sys | |
| import argparse | |
| from contextlib import closing | |
| import io | |
| def count_triples_seahawk1986(sequences): | |
| """ | |
| Ansatz von Seahawk1986: http://forum.ubuntuusers.de/post/4689377/ | |
| (just ported to Python3 with `io`-Module) | |
| """ | |
| line1 = io.StringIO() | |
| line2 = io.StringIO() | |
| a, b = sequences | |
| line1.write("".join(a)) | |
| line2.write("".join(b)) | |
| line1.seek(0) | |
| line2.seek(0) | |
| result1 = io.StringIO() | |
| result2 = io.StringIO() | |
| pos = 0 | |
| lenstr = len(line1.getvalue()) - 3 | |
| line1.seek(0) | |
| line2.seek(0) | |
| while pos <= lenstr: | |
| trip1 = line1.read(3) | |
| trip2 = line2.read(3) | |
| if trip1.isalpha() and trip2.isalpha(): | |
| result1.write(trip1) | |
| pos +=3 | |
| return int(len(result1.getvalue()) / 3) | |
| def count_triples_seahawk1986_2(sequences): | |
| """ | |
| Ansatz von Seahawk1986: http://forum.ubuntuusers.de/post/4689377/ | |
| (just ported to Python3 with `io`-Module) | |
| Umgestellt auf das Testen auf Gaps "-" analog zu `count_triples2` | |
| """ | |
| line1 = io.StringIO() | |
| line2 = io.StringIO() | |
| a, b = sequences | |
| line1.write("".join(a)) | |
| line2.write("".join(b)) | |
| line1.seek(0) | |
| line2.seek(0) | |
| result1 = io.StringIO() | |
| result2 = io.StringIO() | |
| pos = 0 | |
| lenstr = len(line1.getvalue()) - 3 | |
| line1.seek(0) | |
| line2.seek(0) | |
| while pos <= lenstr: | |
| trip1 = line1.read(3) | |
| trip2 = line2.read(3) | |
| if "-" not in trip1 and "-" not in trip2: | |
| result1.write(trip1) | |
| pos +=3 | |
| return int(len(result1.getvalue()) / 3) | |
| def is_gap(a, b): | |
| return a.isalpha() and b.isalpha() | |
| def count_triples1(sequences): | |
| return int(sum(1 for a, b in zip(*sequences) if is_gap(a, b)) / 3) | |
| def count_triples2(sequences): | |
| return int(sum(1 for pair in zip(*sequences) if "-" not in pair) / 3) | |
| def load(f): | |
| with closing(f): | |
| for line in f: | |
| line = line.strip() | |
| if line: | |
| yield line | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="search gap free dna blocks") | |
| parser.add_argument("-algo", default="lys2", | |
| choices=("lys1", "lys2", "sea1", "sea2"), | |
| help="choose algorithm: %(choices)s " | |
| "(default: %(default)s)") | |
| parser.add_argument("dnafiles", nargs=2, type=argparse.FileType("r"), | |
| default=sys.stdin) | |
| args = parser.parse_args() | |
| dnafiles = map(load, args.dnafiles) | |
| print({ | |
| "lys1": count_triples1, | |
| "lys2": count_triples2, | |
| "sea1": count_triples_seahawk1986, | |
| "sea2": count_triples_seahawk1986_2 | |
| }[args.algo](dnafiles)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import argparse | |
| import sys | |
| from count_triples import load, count_triples1, count_triples2, \ | |
| count_triples_seahawk1986, count_triples_seahawk1986_2 | |
| def main(): | |
| parser = argparse.ArgumentParser(description="search gap free dna blocks") | |
| parser.add_argument("dnafiles", nargs=2, type=argparse.FileType("r"), | |
| default=sys.stdin) | |
| args = parser.parse_args() | |
| dnafiles = map(load, args.dnafiles) | |
| # we need data for multiple usage | |
| sequences = ["".join(s) for s in dnafiles] | |
| import timeit | |
| for func in [count_triples1, count_triples2, count_triples_seahawk1986, | |
| count_triples_seahawk1986_2]: | |
| t = timeit.Timer("{0}({1})".format(func.__name__, sequences), | |
| "from __main__ import {0}".format(func.__name__)) | |
| count = 1000 | |
| print(func.__name__, (t.timeit(count) / count)) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment