Last active
August 16, 2020 03:32
-
-
Save dtlanghoff/b603030865bac5d9d623bbb308aa6dbb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import itertools | |
import pprint | |
from math import prod | |
import numpy as np | |
from scipy.optimize import least_squares | |
phenotype_distribution = np.array([0.39, 0.49, 0.08, 0.04]) | |
allele_IA, allele_IB = least_squares((lambda x: | |
(lambda i, IA, IB: np.array([i**2, 2*i*IA + IA**2, 2*i*IB + IB**2, 2*IA*IB]))(1-(x[0]+x[1]), x[0], x[1]) - phenotype_distribution), | |
np.ones(2)/3).x | |
allele_i = 1.-(allele_IA+allele_IB) | |
genotypes = ('OO', 'AO', 'AA', 'BO', 'BB', 'AB') | |
def prior_genotype(genotype): | |
return {'OO': allele_i*allele_i, | |
'AO': 2*allele_i*allele_IA, | |
'AA': allele_IA**2, | |
'BO': 2*allele_i*allele_IB, | |
'BB': allele_IB**2, | |
'AB': 2*allele_IA*allele_IB}[genotype] | |
# sannsynet for at ein person har ein fenotype gjeve genotypen | |
def likelihood_phenotype_genotype(phenotype, genotype): | |
return float(genotype in {'O': ('OO',), 'A': ('AO', 'AA'), 'B': ('BO', 'BB'), 'AB': ('AB',)}[phenotype]) | |
# sannsynet for at barnet har ein genotype gjeve foreldrane sine genotypar | |
def likelihood_inheritance(child, father, mother): | |
return (father.count(child[0])*mother.count(child[1]) + father.count(child[1])*mother.count(child[0])) / (4 << (child[0] == child[1])) | |
if __name__ == '__main__': | |
people = ['Ola', 'Kari', 'Emma', 'Jakob'] | |
# barn: (far, mor) | |
family_relations = {'Emma': ('Ola', 'Kari'), | |
'Jakob': ('Ola', 'Kari')} | |
# person: fenotype | |
known_phenotypes = {'Emma': 'AB'} | |
# dei personane som ikkje er barn av nokon, og som dermed har blodtype som ikkje avheng av nokon andre | |
independent = set(people) - family_relations.keys() | |
# mengda av moglege genotypar for kvar person | |
domains = [genotypes] * len(people) | |
# constraint propagation | |
# utelukk genotypar som personen ikkje kan ha på grunn av fenotypen | |
for person, phenotype in known_phenotypes.items(): | |
pid = people.index(person) | |
domains[pid] = tuple(genotype for genotype in domains[pid] | |
if likelihood_phenotype_genotype(phenotype, genotype) > 0.0) | |
n = None | |
while n != (n := prod(len(domain) for domain in domains)): | |
print('domain size:', n) | |
for child, parents in family_relations.items(): | |
cid = people.index(child) | |
fid, mid = (people.index(parent) for parent in parents) | |
# utelukk genotypar som barnet ikkje kan ha på grunn av foreldra | |
domains[cid] = tuple(c_genotype for c_genotype in domains[cid] | |
if any(likelihood_inheritance(c_genotype, f_genotype, m_genotype) > 0.0 | |
for f_genotype in domains[fid] for m_genotype in domains[mid])) | |
# utelukk genotypar som faren ikkje kan ha på grunn av barnet og mora | |
domains[fid] = tuple(f_genotype for f_genotype in domains[fid] | |
if any(likelihood_inheritance(c_genotype, f_genotype, m_genotype) > 0.0 | |
for c_genotype in domains[cid] for m_genotype in domains[mid])) | |
# utelukk genotypar som mora ikkje kan ha på grunn av barnet og faren | |
domains[mid] = tuple(m_genotype for m_genotype in domains[mid] | |
if any(likelihood_inheritance(c_genotype, f_genotype, m_genotype) > 0.0 | |
for c_genotype in domains[cid] for f_genotype in domains[fid])) | |
assert n > 0, 'Inconsistent information given' | |
print() | |
models = [(prod(prior_genotype(model[people.index(person)]) for person in independent) * | |
prod(likelihood_inheritance(*(model[people.index(i)] for i in (child, father, mother))) | |
for child, (father, mother) in family_relations.items()), | |
model) | |
for model in itertools.product(*domains)] | |
denominator = sum(numerator for numerator, model in models) | |
models = sorted(((numerator/denominator, model) for numerator, model in models if numerator > 0.0), | |
reverse=True) | |
for person in people: | |
print('ABO genotype distribution for', person) | |
pprint.pprint({genotype: sum(posterior for posterior, model in models | |
if model[people.index(person)] == genotype) | |
for genotype in genotypes}) | |
print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment