Created
December 5, 2018 14:56
-
-
Save gnuton/6fa12d87a9ee98fdcb74016f1b09fdd0 to your computer and use it in GitHub Desktop.
Reading HLA dat files in python.. not complete
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
filename="hla.dat" | |
alleles= [] | |
OP_BEGIN="ID" | |
OP_WIDTH=2 | |
LAST=-1 | |
allels = [] | |
# for debugging | |
MAX_N_LINE_TO_READ=100 | |
n_read_lines=0 | |
with open(filename, "r") as f: | |
for line in f: | |
if n_read_lines > MAX_N_LINE_TO_READ: | |
break | |
++n_read_lines | |
op = line[:OP_WIDTH] | |
value = line[OP_WIDTH:].strip() | |
#print ">" + op + "<" | |
# adds empty allel to allels list | |
if op == OP_BEGIN: | |
allels.append({ "FT": [], "SQ": ["", ""]}) | |
# all operations work on the last allel in the list | |
if op == "ID": | |
allels[LAST][op]=value | |
#elif op == "FT": | |
# allels[LAST][op].append(value) | |
elif op == "SQ": | |
allels[LAST][op][0] = value | |
elif op == ' ': | |
dna_seq=value[:-10].strip().replace(" ", "") | |
allels[LAST]["SQ"][1] += dna_seq | |
#prints out all files | |
for a in allels: | |
print a |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment