Last active
April 1, 2017 04:55
-
-
Save pjvandehaar/7d1882b2120d3a2031c73b6b8fa3c7cd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
def get_blks(): | |
import gzip | |
# <http://hgdownload.cse.ucsc.edu/goldenPath/hg38/liftOver/hg38ToHg19.over.chain.gz> | |
with gzip.open('hg38ToHg19.over.chain.gz', 'rt') as f: | |
for line in f: | |
line = line.rstrip('\n').split() | |
if line and line[0] == 'chain': | |
blk = dict(chr1=line[2], std1=line[4], stt1=int(line[5]), len1=int(line[6]), | |
chr2=line[7], std2=line[9], stt2=int(line[10]),len2=int(line[11]), | |
pieces=[]) | |
elif len(line)==3: | |
blk['pieces'].append(dict(bth=int(line[0]), gap1=int(line[1]), gap2=int(line[2]))) | |
elif len(line)==1: | |
blk['pieces'].append(dict(bth=int(line[0]))) | |
elif not line: | |
yield blk | |
else: raise | |
yield blk | |
def wth(dct, dct2): | |
d = dct.copy() | |
d.update(dct2) | |
return d | |
assert wth(dict(a=1,b=2), dict(b=102,c=103)) == dict(a=1,b=102,c=103) | |
def justkeys(dct, keys): | |
return {k:v for k,v in dct.items() if k in keys} | |
assert justkeys(dict(a=1,b=2,c=3,d=4), 'ac') == dict(a=1,c=3) | |
def wthout(dct, keys): | |
return {k:v for k,v in dct.items() if k not in keys} | |
assert wthout(dict(a=1,b=2,c=3,d=4), 'ac') == dict(b=2,d=4) | |
import itertools | |
def take(itr, n): | |
return list(itertools.islice(itr, 0, n)) | |
def get_alns(): | |
import gzip | |
for blk in get_blks(): | |
aln = justkeys(blk, 'chr1 chr2 std1 std2 stt1 stt2'.split()) | |
for piece in blk['pieces']: | |
yield wth(aln, {'end1': aln['stt1'] + piece['bth']}) | |
for i in '12': | |
try: aln[f'stt{i}'] += piece['bth'] + piece[f'gap{i}'] | |
except KeyError: pass | |
def lookup(chrom, pos): | |
for aln in get_alns(): | |
if aln['chr1'] == chrom and aln['stt1'] <= pos < aln['end1']: | |
yield (aln['chr2'], aln['stt2'] + pos - aln['stt1']) | |
# took these from dbsnp website | |
test_str = '''(38) 1:169_549_811 = (37) 1:169_519_049 | |
(38) 1:25_257_119 = (37) 1:25_583_610 | |
(38) 6:396_321 = (37) 6:396_321 | |
(38) 16:3_243_257 = (37) 16:3_293_257 | |
(38) 16:3_243_403 = (37) 16:3_293_403 | |
(38) 16:3_243_257 = (37) 16:3_293_257''' | |
tests = [] | |
for t in test_str.split('\n'): | |
t = t.split() | |
xs = [t[i].split(':') for i in [1,4]] | |
tests.append([(f'chr{x[0]}', int(x[1])) for x in xs]) | |
for test in tests: | |
lkup = list(lookup(*test[0])) | |
print(f'{test[0][0]}:{test[0][1]:,} -> {test[1][0]}:{test[1][1]:,}') | |
assert len(lkup) == 1, lkup | |
assert lkup[0] == test[1] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment