Last active
September 2, 2021 13:48
-
-
Save casibbald/3b6bad5b2d938d047bad to your computer and use it in GitHub Desktop.
Synthesise genes for a potential child from two 23andme profiles. This is a work in progress and my still have issues.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import os | |
import zipfile | |
from collections import Counter | |
# gene_set = set(['rs2015343', 'rs12203592', 'rs4778136', 'rs9782955', | |
# 'rs4778138', 'rs3739070', 'rs11074314', 'rs11631797', | |
# 'rs12914687', 'rs1667394', 'rs3794604', 'rs3947367', | |
# 'rs4778241', 'rs7495174', 'rs7170869', | |
# 'rs12593929', 'rs4900109', 'rs2240203', 'rs7174027', | |
# 'rs8028689', 'rs916977', 'rs3940272', 'rs3935591', | |
# 'rs2703969', 'rs1533995', 'rs12906280', 'rs12896399', | |
# 'rs1800414', 'rs10235789', 'rs12913832', 'rs746861', | |
# 'rs7170852', 'rs16891982', 'rs7494942', 'rs11634406', | |
# 'rs7183877', 'rs989869', 'rs2238289', 'rs1129038', | |
# 'rs895828', 'rs1800407', 'rs1800404', 'rs9834312', | |
# 'rs2179922', 'rs1867138', 'rs1867137', 'rs939882', | |
# 'rs1042725', 'rs363050', 'rs174575', 'rs324640', | |
# 'rs363039', 'rs363043', 'rs353016', ]) | |
_23andme_header = """ | |
# This data file generated by 23andMe at: Thu Dec 17 08:08:10 2015 | |
# | |
# This file contains raw genotype data, including data that is not used in 23andMe reports. | |
# This data has undergone a general quality review however only a subset of markers have been | |
# individually validated for accuracy. As such, this data is suitable only for research, | |
# educational, and informational use and not for medical or other use. | |
# | |
# Below is a text version of your data. Fields are TAB-separated | |
# Each line corresponds to a single SNP. For each SNP, we provide its identifier | |
# (an rsid or an internal id), its location on the reference human genome, and the | |
# genotype call oriented with respect to the plus strand on the human reference sequence. | |
# We are using reference human assembly build 37 (also known as Annotation Release 104). | |
# Note that it is possible that data downloaded at different times may be different due to ongoing | |
# improvements in our ability to call genotypes. More information about these changes can be found at: | |
# https://www.23andme.com/you/download/revisions/ | |
# | |
# More information on reference human assembly build 37 (aka Annotation Release 104): | |
# http://www.ncbi.nlm.nih.gov/mapview/map_search.cgi?taxid=9606 | |
# | |
# rsid chromosome position genotype | |
""" | |
descriptors = ["eye_colour", "melanin_intensity", "hair_colour",] | |
gene_discriptors = {"rs9782955": {"eye_colour": "Blocks some melanin. Often gives light colored eyes", | |
"melanin_intensity": "Blocks some melanin. Often gives light colored eyes." | |
}, | |
"rs12203592": {"hair_colour": "associated with hair color, eye color, and tanning response to sunlight"}, | |
"rs4778138": {"eye_colour":"", | |
"melanin_intensity": "freckles more likely",}, | |
"rs7495174": {"eye_colour": "AA - blue/gray eyes more likely associated with blue or green eye color in Caucasians"}, | |
"rs1667394": {"eye_colour": "AA - increases susceptibility to Blond rather than brown hair 4.94 times for carriers of the A allele "}, | |
"rs3794604": {"melanin_intensity": "CC - Blocks some melanin. Often gives light colored eyes."}, | |
"rs3947367": {"hair_colour": "associated with red hair color"}, | |
"rs4778241": {"eye_colour": "AC - usually brown eye color"}, | |
"rs7174027": {"melanin_intensity": "Blocks some melanin. Often gives light colored eyes."}, | |
"rs12896399": {"hair_colour": "GG - Light hair color"}, | |
"rs1800414": {"melanin_intensity": "Blocks some melanine"}, | |
"rs12913832": {"eye_colour": "GG - blue eye color, 99% of the time; AG - brown eye color; AA - brown eye color, 80% of the time"}, | |
"rs16891982": {"hair_colour": "more likely to have black hair, Light skin; "}, | |
"rs7183877": {"eye_colour": "CC - blue eye color if part of blue eye color haplotype; AA/AC - usually brown eye color"}, | |
"rs989869": {"eye_colour": "CT - Contrasting sphincter around pupil."}, | |
} | |
def write_gene_rslist(output_file): | |
with open(output_file, mode='w') as f: | |
while True: | |
item = yield | |
#print item | |
f.write("{0} \n".format(item)) | |
def get_rslist_from_zip(zip_file): | |
new_gene_set = set([]) | |
with zipfile.ZipFile(os.path.expanduser(zip_file)) as z: | |
with z.open(z.namelist()[0]) as f: | |
for line in f: | |
if not line.startswith("#"): | |
line_list = line.split() | |
new_gene_set.add(line_list[0]) | |
return new_gene_set | |
def get_text_from_zip(zip_file, gene_set): | |
gene_dict = {} | |
with zipfile.ZipFile(os.path.expanduser(zip_file)) as z: | |
with z.open(z.namelist()[0]) as f: | |
for line in f: | |
if not line.startswith("#"): | |
line_list = line.split() | |
if line_list[0] in gene_set: | |
gene_dict[line_list[0]] = { | |
"chromosome" : line_list[1], | |
"position" : line_list[2], | |
"genotype" : line_list[3], | |
} | |
return gene_dict | |
def get_recessive_phenotypes(x_genotype, y_genotype): | |
if x_genotype == '--': | |
return y_genotype | |
if y_genotype == '--': | |
return x_genotype | |
alleles = [] | |
for allele in x_genotype: | |
alleles.append(allele) | |
for allele in y_genotype: | |
alleles.append(allele) | |
count = Counter(alleles) | |
if count[count.keys()[0]] == count[count.keys()[1]]: | |
progressive_allele = "".join([count.keys()[0], count.keys()[1]]) | |
return progressive_allele | |
else: | |
recessive_allele = max(count, key=count.get) | |
return recessive_allele * 2 | |
def get_gene_matches(x, y, gene_set, output_file): | |
recv = write_gene_rslist(output_file) | |
recv.next() | |
recv.send(_23andme_header) | |
#print "{:<15} {:>10} {:<10} {:>10} {:>10} {:>10}".format("rsid", "chromosome", "position", "x_genotype", "y_genotype", "Progeny") | |
for rs in gene_set: | |
try: | |
if x[rs] and y[rs]: | |
if x[rs]["genotype"] == y[rs]["genotype"]: | |
print "{:<15} {:^10} {:<10} {:>10} {:>10} {:>10}".format(rs, x[rs]["chromosome"], x[rs]["position"], x[rs]["genotype"], y[rs]["genotype"], "".join([x[rs]["genotype"][:1], y[rs]["genotype"][1:]])) | |
recv.send("{:<15} {:^10} {:<10} {:>10}".format(rs, x[rs]["chromosome"], x[rs]["position"], "".join([x[rs]["genotype"][:1], y[rs]["genotype"][1:]]))) | |
else: | |
print "{:<15} {:^10} {:<10} {:>10} {:>10} {:>10}".format(rs, x[rs]["chromosome"], x[rs]["position"], x[rs]["genotype"], y[rs]["genotype"], get_recessive_phenotypes(x[rs]["genotype"], y[rs]["genotype"])) | |
recv.send("{:<15} {:^10} {:<10} {:>10}".format(rs, x[rs]["chromosome"], x[rs]["position"], get_recessive_phenotypes(x[rs]["genotype"], y[rs]["genotype"]))) | |
except: | |
#print "{:<15} {:^10} {:<10} {:<10}".format(rs, x[rs]["chromosome"], x[rs]["position"], x[rs]["genotype"]) | |
#print "{:<15} {:^10} {:<10} {:<10}".format(rs, y[rs]["chromosome"], y[rs]["position"], y[rs]["genotype"]) | |
pass | |
parser = argparse.ArgumentParser() | |
subparsers = parser.add_subparsers(dest='subparser_name', help='commands') | |
#synthesis command | |
gene_parser = subparsers.add_parser('synthesis', help='specify operation') | |
gene_parser.add_argument('-x', '--female', required=True, help='female 23andme zip file, do not extract') | |
gene_parser.add_argument('-y', '--male', required=True, help='male 23andme zip file, do not extract') | |
gene_parser.add_argument('-o', '--output', required=True, help='outputfile') | |
def runner(): | |
if parser.parse_args().subparser_name == 'synthesis': | |
gene_set_x = get_rslist_from_zip(parser.parse_args().female) | |
gene_set_y = get_rslist_from_zip(parser.parse_args().male) | |
gene_set = gene_set_x | gene_set_y | |
get_gene_matches(get_text_from_zip(parser.parse_args().female, gene_set), | |
get_text_from_zip(parser.parse_args().male, gene_set), | |
gene_set, | |
parser.parse_args().output) | |
if __name__ == "__main__": | |
runner() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment