Last active
May 12, 2016 05:39
-
-
Save tiptopcoder/21163bc1b21c9ae5847e3995c4fb8617 to your computer and use it in GitHub Desktop.
Locating Restriction Sites Algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Locating Restriction Sites | |
# ========================== | |
# | |
# A DNA string is a reverse palindrome if it is equal to its reverse complement. | |
# For instance, GCATGC is a reverse palindrome because its reverse complement is GCATGC. | |
# | |
# Given: A DNA string of length at most 1 kbp. | |
# | |
# Return: The position and length of every reverse palindrome in the string having length between 4 and 12. | |
# | |
# ========================== | |
# | |
# Sample Dataset | |
# -------------- | |
# TCAATGCATGCGGGTCTATATGCAT | |
# | |
# Sample Output | |
# ------------- | |
# 4 6 | |
# 5 4 | |
# 6 6 | |
# 7 4 | |
# 17 4 | |
# 18 4 | |
# 20 6 | |
# 21 4 | |
# ========================== | |
from Bio import SeqIO | |
def reverse_complement(s): | |
complements = {'A':'T', 'T':'A', 'G':'C', 'C':'G'} | |
return ''.join([complements[c] for c in reversed(s)]) | |
def reverse_palindromes(s): | |
results = [] | |
l = len(s) | |
for i in range(l): #looping over string s | |
for j in range(4, 12): #palindrome length from 4 to 12 | |
if i + j > l: #prevent string s out of range | |
continue | |
s1 = s[i:i+j] | |
s2 = reverse_complement(s1) | |
if s1 == s2: | |
results.append((i + 1, j)) | |
return results | |
handle = open("fasta.fasta", "rU") | |
for record in SeqIO.parse(handle,"fasta"): | |
DNA = str(record.seq) | |
results = reverse_palindromes(DNA) | |
print "\n".join([' '.join(map(str, r)) for r in results]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment