Skip to content

Instantly share code, notes, and snippets.

@Nikolaj-K
Last active November 24, 2024 22:03
Show Gist options
  • Save Nikolaj-K/9d0e1a92be4354acbe59174cab96e8c1 to your computer and use it in GitHub Desktop.
Save Nikolaj-K/9d0e1a92be4354acbe59174cab96e8c1 to your computer and use it in GitHub Desktop.
Bayes rules exmaple
import random
from scipy import special
import matplotlib.pyplot as plt
import numpy as np
def l1_normalize(finite_stream):
lst = list(finite_stream)
return np.array(lst) / sum(lst)
def update_priors(hs, priors, likelihood_function, sample_x):
"""
Return i \\mapsto P(H_i|x)
where P(H|x) := P(x|H) * P(H) / P(X)
where P(x|H) ... likelihood for fixed x given any H
P(H) ... Priors for H
P(x) ... Marginal probability for x, or E_H[P(x|H)],
implicitly obtained via normalization
(Summing P(H|x) over H gives 1)
"""
likelihoods = [likelihood_function(sample_x, h) for h in hs] # For all hs
numerator = np.multiply(likelihoods, priors) # For all hs
posterior = l1_normalize(numerator) # Return posterior
return posterior
def _likelihood_function__3b1b_example(test, h):
# For fixing error language in the comments: Null hypothesis = HEALTHY
# Chances for when test (sample) claims SICK
TRUE_POSITIVE = 9 / 10
FALSE_POSITIVE = 89 / 990 # Type I error w.r.t. Null hypothesis
# See video around 3min15, https://youtu.be/lG4VkPoG3ko?si=ieU7dBYSKBsJ5ulq
# Chances for when test (sample) claims HEALTHY
TRUE_NEGATIVE = 1.0 - FALSE_POSITIVE # Confidence
FALSE_NEGATIVE = 1.0 - TRUE_POSITIVE # Type II error
test_claims_positive: bool = test=="SICK"
test_is_correct: bool = test==h
if test_claims_positive:
return TRUE_POSITIVE if test_is_correct else FALSE_POSITIVE
else:
return TRUE_NEGATIVE if test_is_correct else FALSE_NEGATIVE
def _is_validated_3b1b_example_data(posteriors_dict) -> bool:
EPS = 10**-16
SICK_SICK_REF = 9 / (9 + 89)
return abs(SICK_SICK_REF - posteriors_dict["SICK"]["SICK"]) < EPS
def _print_make_3b1b_example(priors_for_h_dict, posteriors_dict):
healthy, sick = list(priors_for_h_dict.keys())
print(f"""{80 * "-"}\n
The prior chance any given person has the sickness is
{healthy}: {round(priors_for_h_dict[healthy], 5)} %
resp.
{sick}: {round(priors_for_h_dict[sick], 5)} %""")
for sampled_h, posterior in posteriors_dict.items():
print(f"""After testing and the test saying they are {sampled_h}, our credence is
{healthy}: {round(posterior[healthy], 5)}
resp.
{sick}: {round(posterior[sick], 5)}""")
def run_3b1b_example():
priors_for_h_dict = dict(HEALTHY=99, SICK=1)
hs = list(priors_for_h_dict.keys())
priors_for_h = l1_normalize(priors_for_h_dict.values())
posteriors_dict = {sampled_h:
# Computation and formatting to dict
dict(zip(hs, update_priors(hs, priors_for_h,
_likelihood_function__3b1b_example, sampled_h)))
for sampled_h in hs}
_print_make_3b1b_example(priors_for_h_dict, posteriors_dict)
assert _is_validated_3b1b_example_data(posteriors_dict)
def _likelihood_function__sample_k_out_of_n(sample__k_out_of_n, h__proposed_rate):
# (Needs some theory to be derived.)
k, n = sample__k_out_of_n
r = h__proposed_rate
assert 0 <= r <= 1, f"{r} is not a valid rate"
chance = r**k * (1 - r)**(n-k) # Chance of first picking k times, and then failing to pick n-k
num_permutations = special.binom(n, k) # Num indistinguishable variations of picking k from n in a sequnece
return num_permutations * chance
def _print_book_narrative(hs__ratio_vals, priors_for_h, SAMPLE_MAX_N, sampled_k, posteriors_for_h):
def rounds(xs):
return ", ".join(str(round(100 * x, 2)) for x in xs)
print(f"""{80 * "-"}\n
Some fraction of the population has a medical condition.
There are {len(hs__ratio_vals)} studies suggesting the ratio in % is, respectively,
{rounds(hs__ratio_vals)}
We give those a credence in %, respectively, of
{rounds(priors_for_h)}
We do our own experiment, drawing a sample of {SAMPLE_MAX_N} and finding {sampled_k} cases,
i.e. {rounds([sampled_k / SAMPLE_MAX_N])} %.
With this, our posterior credence in % for the studies becomes
{rounds(posteriors_for_h)}
""")
def _make_book_example_plot(hs__ratio_vals, priors_for_h, sampled_ratio, posteriors_for_h):
plt.figure(figsize=(6, 4))
plt.plot(hs__ratio_vals, label='ratio_vals', marker='.', linestyle='-', color='k', linewidth=1)
plt.plot(priors_for_h, label='priors', marker='o', linestyle='--', color='darkblue', linewidth=3)
plt.plot(posteriors_for_h, label='posterior', marker='o', linestyle='--', color='blue', linewidth=3)
plt.axhline(y=sampled_ratio, color='gray', linestyle='--', label=f'sample={round(sampled_ratio, 3)}', alpha=0.5)
for idx, (pri, post) in enumerate(zip(priors_for_h, posteriors_for_h)):
OFFSET: float = -1/2
x_and_x_next = [0, 1] + np.ones(2) * (idx + OFFSET)
color: str = "green" if post > pri else "red"
plt.fill_between(x_and_x_next, pri, post, color=color, alpha=0.3)
plt.ylim(0, 1)
plt.xlabel('Index of data value (ratio in this case)')
plt.title('Bayesian Rule update step')
plt.legend()
plt.show()
def run_book_example():
MAX_NUM_STUDIES = 15
SAMPLE_MAX_N = 13
MU, SIGMA = 0.5, 0.25 # For making h values for this script
def is_valid_ratio(h):
return 0 <= h <= 1
hs__ratio_vals_unsorted = filter(
is_valid_ratio,
np.random.normal(MU, SIGMA, MAX_NUM_STUDIES)
) # Could choose any distribution here
hs__ratio_vals = sorted(hs__ratio_vals_unsorted)
priors_for_h = l1_normalize(np.random.rand(len(hs__ratio_vals)))
sampled_k = random.choice(range(SAMPLE_MAX_N + 1)) # Could choose any distribution here
k_out_of_n = (sampled_k, SAMPLE_MAX_N) # Sampled h
posteriors_for_h = update_priors(hs__ratio_vals, priors_for_h,
_likelihood_function__sample_k_out_of_n, k_out_of_n)
_print_book_narrative(hs__ratio_vals, priors_for_h,
SAMPLE_MAX_N, sampled_k, posteriors_for_h)
_make_book_example_plot(hs__ratio_vals, priors_for_h,
sampled_k / SAMPLE_MAX_N, posteriors_for_h)
if __name__=="__main__":
run_3b1b_example()
run_book_example()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment