Skip to content

Instantly share code, notes, and snippets.

@hiraksarkar
Last active November 26, 2018 08:47
Show Gist options
  • Save hiraksarkar/b28c32524652521adc1ac9d1256e8029 to your computer and use it in GitHub Desktop.
Save hiraksarkar/b28c32524652521adc1ac9d1256e8029 to your computer and use it in GitHub Desktop.
Native dust in python (N.B. This is not accelerated sdust)
from collections import deque
import itertools
# Make dictionary of triplets
i = 0
triplet_index = {}
inverse_triplet = {}
for x in list(itertools.product(['A','T','G','C'], repeat=3)):
triplet_index[''.join(x)] = i
inverse_triplet[i] = ''.join(x)
i += 1
class Interval(object):
def __init__(self, s=0, e=0):
"""
type val: int
type val: int
"""
self.start = s
self.end = e
class IntervalArray(object):
def __init__(self):
"""
"""
self.__intervals = []
def addInterval(self, val):
"""
:type val: int
:rtype: void
"""
def upper_bound(nums, target):
left, right = 0, len(nums) - 1
while left <= right:
mid = int(left + (right - left) / 2)
if nums[mid].start > target:
right = mid - 1
else:
left = mid + 1
return left
i = upper_bound(self.__intervals, val.start)
start, end = val.start, val.end
if i != 0 and self.__intervals[i-1].end + 1 >= val.end:
i -= 1
while i != len(self.__intervals) and \
end + 1 >= self.__intervals[i].start:
start = min(start, self.__intervals[i].start)
end = max(end, self.__intervals[i].end);
del self.__intervals[i]
self.__intervals.insert(i, Interval(start, end))
def getIntervals(self):
return self.__intervals
def best_prefix(w, istart, ifinish):
"""
type val: deque
type val: int
type val: int
rtype val: tuple(int, int)
"""
cw = np.zeros(64)
rw = 0
finish = istart - 1
max_score = 0
for i in range(istart, ifinish + 1):
t = w[i]
rw = rw + cw[t]
cw[t] += 1
if i > istart and rw/(i-istart) > max_score:
finish = i
max_score = rw/(i - istart)
return (finish + 2, max_score)
def dust(q, W, T):
"""
type val: str
type val: int
type val: int
rtype val: list(object)
"""
res = IntervalArray()
w = deque()
for i in range(2, len(q) + W - 3):
wstart = max(i-W+1, 0)
wfinish = min(i, len(q)-1)
# print (wstart, wfinish)
if i < len(q):
t = q[i-2:i+1]
w.append(triplet_index[t])
if wstart > 0:
w.popleft()
limit, max_score = best_prefix(w, 0, len(w)-1)
start, finish = 0, limit
if max_score > T:
for s in range(1, limit-2):
f, new_score = best_prefix(w, s, limit-2)
if new_score > max_score:
max_score = new_score
start, finish = s, f
interval = Interval(start + wstart, finish + wstart)
res.addInterval(interval)
return res
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment