Skip to content

Instantly share code, notes, and snippets.

@1328
Created April 21, 2014 18:54
Show Gist options
  • Select an option

  • Save 1328/11152589 to your computer and use it in GitHub Desktop.

Select an option

Save 1328/11152589 to your computer and use it in GitHub Desktop.
import numpy as np
from PIL import Image
import collections
colors = [
(238,203,173),
(49,79,79),
(100,149,237),
(127,255,0),
(255,255,0),
(178,34,34),
(255,140,0),
(147,112,219),
(199,21,133),
(255,0,0),
(139,69,19),
(34,139,34),
(0,100,0),
(0,0,128),
(255,250,205),
(220,220,220),
(255,228,225),
(49,79,79),
(0,191,255),
(107,142,35),
]
signifiers = ['A', 'C', 'D', 'E', 'F', 'G', 'H',
'I', 'K', 'L', 'M', 'N', 'P', 'Q',
'R', 'S', 'T', 'V', 'W', 'Y']
mask = {k:v for k,v in zip(signifiers, colors)}
mask[' ']=(255,255,255)
Y_LIMIT = 50
def read_genes(fn):
c = 0
result = []
buffer = ''
with open(fn, mode='r') as fh:
for line in fh:
if line.startswith('>') and buffer:
result.append(buffer[0:500])
buffer = ''
elif line.startswith('>'):
pass
else:
buffer += line.strip()
if len(result)>Y_LIMIT:
break
## for x,r in enumerate(result):
## print(x,':',len(r))
## print(collections.Counter(r))
return result
def build_image_array(data):
'''for each data point apply mask to build a 3d of colors'''
x_max = max([len(r) for r in data])
y_max = len(data)
result = np.zeros((y_max, x_max, 3))
for y_idx, line in enumerate(data):
for x_idx, c in enumerate(line):
try:
result[y_idx, x_idx] = mask[c]
except KeyError:
print('mask missing key for {}'.format(c))
print('on line: {}'.format(line))
result[y_idx, x_idx] = mask[' ']
return result
def find(line, what):
try:
return line.index(what)
except ValueError:
return None
def slicie(some_string):
for length in range(len(some_string),1,-1):
for start_at in range(len(some_string)-length):
yield start_at, some_string[start_at:start_at+length]
def offset(a,b):
'''offset b to match a'''
found = ''
found_offset = 0
for start, sliver in slicie(b):
test = find(a, sliver)
if not test:
pass
elif test>start and ((len(sliver)>len(found)) or \
(len(sliver) == len(found) and found_offset>test-start)):
found = sliver
found_offset = test-start
## print('found: {}, began at {} in a and repeats at {} in b -> {}'.format(
## sliver, start, test, found_offset))
elif len(found)<len(sliver):
return found_offset
return found_offset
def align(data):
'''jigger rows to align similar sequences'''
for row in range(2, len(data)):
move = offset(data[row-1],data[row])
if move:
print('moving row {} -> {} spaces'.format(
row, move))
data[row] = ''.join(' ' for _ in range(move)) + data[row]
return(data)
def main():
data = read_genes('some.fasta')
data = align(data)
img_array = build_image_array(data)
img = Image.fromarray(img_array, 'RGB')
img.show()
## img.save('test.png', 'PNG')
return
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment