Skip to content

Instantly share code, notes, and snippets.

@f6v
Created June 7, 2020 21:06
Show Gist options
  • Save f6v/71dabc50f397b21e2f34de5f1d19aaf3 to your computer and use it in GitHub Desktop.
Save f6v/71dabc50f397b21e2f34de5f1d19aaf3 to your computer and use it in GitHub Desktop.
import itertools
from Bio import SeqIO
def overlap(read_a, read_b, min_length):
start = 0
while True:
start = read_a.find(read_b[:min_length], start)
if start == -1:
return 0
if read_b.startswith(read_a[start:]):
return len(read_a) - start
start += 1
def pick_maximal_overlap(reads, min_overlap):
read_a, read_b = None, None
best_olen = 0
for a, b in itertools.permutations(reads, 2):
olen = overlap(a, b, min_overlap)
if olen > best_olen:
read_a, read_b = a, b
best_olen = olen
return read_a, read_b, best_olen
def shortestSuperstring(file_name):
sequences = SeqIO.parse(file_name, 'fasta')
reads = list(map(lambda seq_rec: str(seq_rec.seq), sequences))
min_overlap = len(reads[0]) // 2 + 1
read_a, read_b, overlap_len = pick_maximal_overlap(reads, min_overlap)
while overlap_len > 0:
reads.remove(read_a)
reads.remove(read_b)
reads.append(read_a + read_b[overlap_len:])
read_a, read_b, overlap_len = pick_maximal_overlap(reads, min_overlap)
return ''.join(reads)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment