Skip to content

Instantly share code, notes, and snippets.

@dtlanghoff
Last active August 16, 2020 03:32
Show Gist options
  • Save dtlanghoff/b603030865bac5d9d623bbb308aa6dbb to your computer and use it in GitHub Desktop.
Save dtlanghoff/b603030865bac5d9d623bbb308aa6dbb to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import itertools
import pprint
from math import prod
import numpy as np
from scipy.optimize import least_squares
phenotype_distribution = np.array([0.39, 0.49, 0.08, 0.04])
allele_IA, allele_IB = least_squares((lambda x:
(lambda i, IA, IB: np.array([i**2, 2*i*IA + IA**2, 2*i*IB + IB**2, 2*IA*IB]))(1-(x[0]+x[1]), x[0], x[1]) - phenotype_distribution),
np.ones(2)/3).x
allele_i = 1.-(allele_IA+allele_IB)
genotypes = ('OO', 'AO', 'AA', 'BO', 'BB', 'AB')
def prior_genotype(genotype):
return {'OO': allele_i*allele_i,
'AO': 2*allele_i*allele_IA,
'AA': allele_IA**2,
'BO': 2*allele_i*allele_IB,
'BB': allele_IB**2,
'AB': 2*allele_IA*allele_IB}[genotype]
# sannsynet for at ein person har ein fenotype gjeve genotypen
def likelihood_phenotype_genotype(phenotype, genotype):
return float(genotype in {'O': ('OO',), 'A': ('AO', 'AA'), 'B': ('BO', 'BB'), 'AB': ('AB',)}[phenotype])
# sannsynet for at barnet har ein genotype gjeve foreldrane sine genotypar
def likelihood_inheritance(child, father, mother):
return (father.count(child[0])*mother.count(child[1]) + father.count(child[1])*mother.count(child[0])) / (4 << (child[0] == child[1]))
if __name__ == '__main__':
people = ['Ola', 'Kari', 'Emma', 'Jakob']
# barn: (far, mor)
family_relations = {'Emma': ('Ola', 'Kari'),
'Jakob': ('Ola', 'Kari')}
# person: fenotype
known_phenotypes = {'Emma': 'AB'}
# dei personane som ikkje er barn av nokon, og som dermed har blodtype som ikkje avheng av nokon andre
independent = set(people) - family_relations.keys()
# mengda av moglege genotypar for kvar person
domains = [genotypes] * len(people)
# constraint propagation
# utelukk genotypar som personen ikkje kan ha på grunn av fenotypen
for person, phenotype in known_phenotypes.items():
pid = people.index(person)
domains[pid] = tuple(genotype for genotype in domains[pid]
if likelihood_phenotype_genotype(phenotype, genotype) > 0.0)
n = None
while n != (n := prod(len(domain) for domain in domains)):
print('domain size:', n)
for child, parents in family_relations.items():
cid = people.index(child)
fid, mid = (people.index(parent) for parent in parents)
# utelukk genotypar som barnet ikkje kan ha på grunn av foreldra
domains[cid] = tuple(c_genotype for c_genotype in domains[cid]
if any(likelihood_inheritance(c_genotype, f_genotype, m_genotype) > 0.0
for f_genotype in domains[fid] for m_genotype in domains[mid]))
# utelukk genotypar som faren ikkje kan ha på grunn av barnet og mora
domains[fid] = tuple(f_genotype for f_genotype in domains[fid]
if any(likelihood_inheritance(c_genotype, f_genotype, m_genotype) > 0.0
for c_genotype in domains[cid] for m_genotype in domains[mid]))
# utelukk genotypar som mora ikkje kan ha på grunn av barnet og faren
domains[mid] = tuple(m_genotype for m_genotype in domains[mid]
if any(likelihood_inheritance(c_genotype, f_genotype, m_genotype) > 0.0
for c_genotype in domains[cid] for f_genotype in domains[fid]))
assert n > 0, 'Inconsistent information given'
print()
models = [(prod(prior_genotype(model[people.index(person)]) for person in independent) *
prod(likelihood_inheritance(*(model[people.index(i)] for i in (child, father, mother)))
for child, (father, mother) in family_relations.items()),
model)
for model in itertools.product(*domains)]
denominator = sum(numerator for numerator, model in models)
models = sorted(((numerator/denominator, model) for numerator, model in models if numerator > 0.0),
reverse=True)
for person in people:
print('ABO genotype distribution for', person)
pprint.pprint({genotype: sum(posterior for posterior, model in models
if model[people.index(person)] == genotype)
for genotype in genotypes})
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment