Skip to content

Instantly share code, notes, and snippets.

@chrisjurich
Created January 8, 2021 09:40
Show Gist options
  • Save chrisjurich/c77a55c5b7bf7380b71fb0ef3a7f7ef2 to your computer and use it in GitHub Desktop.
Save chrisjurich/c77a55c5b7bf7380b71fb0ef3a7f7ef2 to your computer and use it in GitHub Desktop.
from enum import Enum
from abc import ABC, abstractmethod
from multipledispatch import dispatch
class MotifType(Enum):
SINGLESTRAND = 0
HELIX = 1
HAIRPIN = 2
JUNCTION = 3
TYPE_MAPPER = {
MotifType.SINGLESTRAND : "SingleStrand",
MotifType.HELIX : "Helix",
MotifType.HAIRPIN : "Hairpin",
MotifType.JUNCTION : "Junction",
}
def connectivity_list(structure):
connections, pairs = [-1]*len(structure), []
for index, db in enumerate(structure):
if db == '(':
pairs.append(index)
elif db == ')':
complement = pairs.pop()
connections[complement] = index
connections[index] = complement
assert len(pairs) == 0
return connections
class Motif(ABC):
def __init__(self, **kwargs):
self.type_ = None
self.parent_ = None
self.sequence_ = str()
self.children_ = []
self.strands_ = []
self.depth = int()
self.structure = str()
self.token = str()
if 'sequence' in kwargs:
self.sequence_ = kwargs['sequence']
if 'strands_' in kwargs:
self.strands_ = kwargs['strands']
for variable, value in kwargs.items():
setattr(self, variable, value)
if len(self.sequence_) == 0:
return
strand_seqs = []
for strand in self.strands:
strand_seqs.append(''.join([self.sequence_[index] for index in strand]))
self.sequence_ = '&'.join(strand_seqs)
def link_children_(self, depth):
if self.type() == MotifType.SINGLESTRAND:
depth = 0
self.depth = depth
for child in self.children():
child.parent(self)
for child in self.children():
child.link_children_(depth+1)
def str(self):
depth = '\t'*self.depth
if len(self.children_ ) == 0:
return f"{depth}{self.token} {self.structure} {self.sequence_}"
else:
children_ = '\n'.join([""] + [child.str() for child in self.children_])
return f"{depth}{self.token} {self.structure} {self.sequence_}{children_}"
def __str__(self):
return self.str()
def __repr__(self):
return self.str()
# def __eq__(self, other):
def type(self):
return self.type_
def children(self):
return self.children_
def parent(self, other=None):
if other is not None:
self.parent_ = other
else:
return self.parent_
@abstractmethod
def buffer(self):
pass
def has_children(self):
return len(self.children_) > 0
class SingleStrand(Motif):
def __init__(self, **kwargs):
super().__init__( **kwargs)
self.type_ = MotifType.SINGLESTRAND
self.structure = '.'*len(self.sequence_)
self.token = f"SingleStrand{len(self.sequence_)}"
def buffer(self):
return -1
class Helix(Motif):
def __init__(self, **kwargs):
super().__init__( **kwargs)
self.type_ = MotifType.HELIX
self.size_ = (len(self.sequence_) - 1) // 2
self.structure = f'{"("*self.size_}&{")"*self.size_}'
self.token = f"Helix{self.size_}"
self.pairs_ = []
for index in range(self.size_):
self.pairs_.append(
self.sequence_[index] + self.sequence_[-index-1]
)
def buffer(self):
return self.size_
def pairs(self):
return self.pairs_
class Hairpin(Motif):
def __init__(self, **kwargs):
super().__init__( **kwargs)
self.type_ = MotifType.HAIRPIN
size = len(self.sequence_) - 2
self.structure = '(' + '.'*size + ')'
self.token = f"Hairpin{size}"
def buffer(self):
return self.parent().buffer()
class Junction(Motif):
def __init__(self, **kwargs):
super().__init__( **kwargs)
self.type_ = MotifType.JUNCTION
self.gaps_ = [len(strand) -2 for strands in self.strands_]
self.structure = ['.']*len(self.sequence_)
for index, nt in enumerate(self.sequence_):
if nt == '&':
self.structure[index-1] = '('
self.structure[index] = '&'
self.structure[index+1] = ')'
self.structure[0] = '('
self.structure[-1] = ')'
self.structure = "".join(self.structure)
self.token = "Junction_" + '|'.join([str(len(strand)-2) for strand in self.strands_])
def buffer(self):
buffers = [self.parent.buffer()]
for child in self.children:
buffers.append(child.buffer())
return buffers
def gaps(self):
return self.gaps_
def helix_length_(connections, start):
complement = connections[start]
length = 0
while start + length == connections[connections[start+length]]:
length += 1
return length
def get_junction_or_helix_(sequence, connections, start):
strands = []
offset = 1
it = start
while True:
next = [it]
it += 1
while connections[it] == -1:
next.append(it)
it += 1
next.append(it)
strands.append(next)
it = connections[it]
if it == start:
break
if len(strands) > 1:
# JUNCTION
junction = Junction(strands=strands, sequence=sequence)
for strand in strands[:-1]:
junction.children_.append(get_motifs_(sequence, connections, strand[-1]))
return junction
else:
hp = Hairpin(sequence=sequence, strands=strands)
return hp
def get_helix_(sequence, connections, start):
# figure out how big this current helix i
helix_len = helix_length_(connections, start)
lhs, rhs = [], []
for index in range(start, start + helix_len):
lhs.append(index)
rhs.append(connections[index])
rhs.reverse()
helix = Helix(sequence=sequence, strands = [lhs, rhs] )
# figure out what is at the other end of the junction
helix.children_.append(get_junction_or_helix_(sequence, connections, start+helix_len-1))
# basically need to check if already inside of a junction or need to go out to a new singlestrand
# the singlestrand is only added if the singlestrand ends out the structure or the next helix
# is not part of a junction
it = rhs[-1] + 1
while it < len(connections) and connections[it] == -1:
it += 1
if it == len(connections) or connections[it] > rhs[-1]:
helix.children_.append(get_motifs_(sequence, connections, rhs[-1] + 1))
return helix
def get_singlestrand_(sequence, connections, start):
offset = 0
while start + offset < len(connections) and connections[start+offset] == -1:
offset += 1
strand = list(range(start,start+offset))
singlestrand = SingleStrand(sequence=sequence, strands=[strand])
# checking here if there is an outer nt
if start + offset < len(connections):
singlestrand.children_.append(get_motifs_(sequence, connections, start+offset))
return singlestrand
def get_motifs_(sequence, connections, start):
if connections[start] < 0:
return get_singlestrand_(sequence, connections, start)
return get_helix_(sequence, connections, start)
def parse_to_motifs(structure, sequence):
connections = connectivity_list(structure)
root = get_motifs_(sequence, connections, 0)
root.link_children_(0)
return root
if __name__ == "__main__":
ss="...(((.((((....)))).)))..(((.((((....)))).)))..(((.((((....)))).)))..(((.((((....)))).)))..(((((((....)))))))....................."
seq="GGAACCGAGACUUCGGUCUGGGUAAACCGUGACUUCGGUCAGGGUAAACCGCGACUUCGGUCGGGGUAAACCGGGACUUCGGUCCGGGUAAACUGUUUUUCGAAACAGUAAAAGAAACAACAACAACAAC"
m = parse_to_motifs(ss, seq)
print(m)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment