Created
March 16, 2017 22:59
-
-
Save ricarkol/863d57f51dde068eb3c97d90e0a932f3 to your computer and use it in GitHub Desktop.
Messing with 23andme raw exported data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
# Parse an exported raw genome data file from 23andme.com, wihch is basically | |
# one SNP per line. Each SNP is looked up in a small dictionary of | |
# "interesting" genes, like the ones related to celiac disease, | |
# lactointolerance, or obesity. The first and only arg is the name of the file | |
# (umcompressed). | |
db = {'rs2187668': {'geno': | |
{'AA': 'Autoimmune disorder risk (lupus, celiac disease) due to 2 HLA-DRB1*0301 alleles', | |
'AG': 'Somewhat increased autoimmune disorder (lupus, celiac disease) risk; 1 HLA-DRB1*0301 allele', | |
'GG': 'average'}, | |
'url':'https://www.snpedia.com/index.php/Rs2187668'}, | |
'rs3184504': {'geno': | |
{'CC':'normal', | |
'CT':'increased risk for celiac disease', | |
'TT':'increased risk for celiac disease'}, | |
'url':'https://www.snpedia.com/index.php/Rs3184504'}, | |
'rs6822844': {'geno': | |
{'GG': 'increased risk for celiac disease. Common on affy axiom data', | |
'GT': 'n/a', | |
'TT': 'n/a'}, | |
'url': 'https://www.snpedia.com/index.php/Rs6822844'}, | |
'rs4988235': {'geno': | |
{'CC': 'likely to be lactose intolerant as an adult', | |
'CT': 'likely to be able to digest milk as an adult', | |
'TT': 'can digest milk'}, | |
'url': 'https://www.snpedia.com/index.php/Rs4988235'}, | |
'rs182549': {'geno': | |
{'CC': 'possibly lactose intolerant', | |
'CT': 'Can digest milk.', | |
'TT':'Can digest milk.'}, | |
'url': 'https://www.snpedia.com/index.php/Rs182549'}, | |
'rs1799971': {'geno': | |
{'AA': 'normal', | |
'AG': '2.5 stronger cravings for alcohol. if alcoholic, naltrexone treatment 2x more successful', | |
'GG': 'more pain'}, | |
'url': 'https://www.snpedia.com/index.php/Rs1799971'}, | |
'rs9939609': {'geno': | |
{'AA': 'obesity risk and 1.6x risk for Type-2 diabetes', | |
'AT': 'risk for Type-2 diabetes; obesity risk', | |
'TT': 'lower risk of obesity and Type-2 diabetes'}, | |
'url': 'https://www.snpedia.com/index.php/Rs1799971'} | |
} | |
infile = sys.argv[1] | |
print '#%-9s %-10s %-10s' % ('marker','chromosome', 'genotype') | |
with open(infile) as inf: | |
for line in inf: | |
if line[0] == '#': | |
continue | |
rsid, chromosome, position, genotype = line.split() | |
if db.has_key(rsid): | |
if db[rsid]['geno'].has_key(genotype): | |
print '%-10s %-10s %-10s %s (%s)' % (rsid, chromosome, genotype, | |
db[rsid]['geno'][genotype], db[rsid]['url']) | |
else: | |
print '%-10s %-10s %-10s %s (%s)' % (rsid, chromosome, genotype, '??', db[rsid]['url']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment