Last active
November 24, 2024 22:03
-
-
Save Nikolaj-K/9d0e1a92be4354acbe59174cab96e8c1 to your computer and use it in GitHub Desktop.
Bayes rules exmaple
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
from scipy import special | |
import matplotlib.pyplot as plt | |
import numpy as np | |
def l1_normalize(finite_stream): | |
lst = list(finite_stream) | |
return np.array(lst) / sum(lst) | |
def update_priors(hs, priors, likelihood_function, sample_x): | |
""" | |
Return i \\mapsto P(H_i|x) | |
where P(H|x) := P(x|H) * P(H) / P(X) | |
where P(x|H) ... likelihood for fixed x given any H | |
P(H) ... Priors for H | |
P(x) ... Marginal probability for x, or E_H[P(x|H)], | |
implicitly obtained via normalization | |
(Summing P(H|x) over H gives 1) | |
""" | |
likelihoods = [likelihood_function(sample_x, h) for h in hs] # For all hs | |
numerator = np.multiply(likelihoods, priors) # For all hs | |
posterior = l1_normalize(numerator) # Return posterior | |
return posterior | |
def _likelihood_function__3b1b_example(test, h): | |
# For fixing error language in the comments: Null hypothesis = HEALTHY | |
# Chances for when test (sample) claims SICK | |
TRUE_POSITIVE = 9 / 10 | |
FALSE_POSITIVE = 89 / 990 # Type I error w.r.t. Null hypothesis | |
# See video around 3min15, https://youtu.be/lG4VkPoG3ko?si=ieU7dBYSKBsJ5ulq | |
# Chances for when test (sample) claims HEALTHY | |
TRUE_NEGATIVE = 1.0 - FALSE_POSITIVE # Confidence | |
FALSE_NEGATIVE = 1.0 - TRUE_POSITIVE # Type II error | |
test_claims_positive: bool = test=="SICK" | |
test_is_correct: bool = test==h | |
if test_claims_positive: | |
return TRUE_POSITIVE if test_is_correct else FALSE_POSITIVE | |
else: | |
return TRUE_NEGATIVE if test_is_correct else FALSE_NEGATIVE | |
def _is_validated_3b1b_example_data(posteriors_dict) -> bool: | |
EPS = 10**-16 | |
SICK_SICK_REF = 9 / (9 + 89) | |
return abs(SICK_SICK_REF - posteriors_dict["SICK"]["SICK"]) < EPS | |
def _print_make_3b1b_example(priors_for_h_dict, posteriors_dict): | |
healthy, sick = list(priors_for_h_dict.keys()) | |
print(f"""{80 * "-"}\n | |
The prior chance any given person has the sickness is | |
{healthy}: {round(priors_for_h_dict[healthy], 5)} % | |
resp. | |
{sick}: {round(priors_for_h_dict[sick], 5)} %""") | |
for sampled_h, posterior in posteriors_dict.items(): | |
print(f"""After testing and the test saying they are {sampled_h}, our credence is | |
{healthy}: {round(posterior[healthy], 5)} | |
resp. | |
{sick}: {round(posterior[sick], 5)}""") | |
def run_3b1b_example(): | |
priors_for_h_dict = dict(HEALTHY=99, SICK=1) | |
hs = list(priors_for_h_dict.keys()) | |
priors_for_h = l1_normalize(priors_for_h_dict.values()) | |
posteriors_dict = {sampled_h: | |
# Computation and formatting to dict | |
dict(zip(hs, update_priors(hs, priors_for_h, | |
_likelihood_function__3b1b_example, sampled_h))) | |
for sampled_h in hs} | |
_print_make_3b1b_example(priors_for_h_dict, posteriors_dict) | |
assert _is_validated_3b1b_example_data(posteriors_dict) | |
def _likelihood_function__sample_k_out_of_n(sample__k_out_of_n, h__proposed_rate): | |
# (Needs some theory to be derived.) | |
k, n = sample__k_out_of_n | |
r = h__proposed_rate | |
assert 0 <= r <= 1, f"{r} is not a valid rate" | |
chance = r**k * (1 - r)**(n-k) # Chance of first picking k times, and then failing to pick n-k | |
num_permutations = special.binom(n, k) # Num indistinguishable variations of picking k from n in a sequnece | |
return num_permutations * chance | |
def _print_book_narrative(hs__ratio_vals, priors_for_h, SAMPLE_MAX_N, sampled_k, posteriors_for_h): | |
def rounds(xs): | |
return ", ".join(str(round(100 * x, 2)) for x in xs) | |
print(f"""{80 * "-"}\n | |
Some fraction of the population has a medical condition. | |
There are {len(hs__ratio_vals)} studies suggesting the ratio in % is, respectively, | |
{rounds(hs__ratio_vals)} | |
We give those a credence in %, respectively, of | |
{rounds(priors_for_h)} | |
We do our own experiment, drawing a sample of {SAMPLE_MAX_N} and finding {sampled_k} cases, | |
i.e. {rounds([sampled_k / SAMPLE_MAX_N])} %. | |
With this, our posterior credence in % for the studies becomes | |
{rounds(posteriors_for_h)} | |
""") | |
def _make_book_example_plot(hs__ratio_vals, priors_for_h, sampled_ratio, posteriors_for_h): | |
plt.figure(figsize=(6, 4)) | |
plt.plot(hs__ratio_vals, label='ratio_vals', marker='.', linestyle='-', color='k', linewidth=1) | |
plt.plot(priors_for_h, label='priors', marker='o', linestyle='--', color='darkblue', linewidth=3) | |
plt.plot(posteriors_for_h, label='posterior', marker='o', linestyle='--', color='blue', linewidth=3) | |
plt.axhline(y=sampled_ratio, color='gray', linestyle='--', label=f'sample={round(sampled_ratio, 3)}', alpha=0.5) | |
for idx, (pri, post) in enumerate(zip(priors_for_h, posteriors_for_h)): | |
OFFSET: float = -1/2 | |
x_and_x_next = [0, 1] + np.ones(2) * (idx + OFFSET) | |
color: str = "green" if post > pri else "red" | |
plt.fill_between(x_and_x_next, pri, post, color=color, alpha=0.3) | |
plt.ylim(0, 1) | |
plt.xlabel('Index of data value (ratio in this case)') | |
plt.title('Bayesian Rule update step') | |
plt.legend() | |
plt.show() | |
def run_book_example(): | |
MAX_NUM_STUDIES = 15 | |
SAMPLE_MAX_N = 13 | |
MU, SIGMA = 0.5, 0.25 # For making h values for this script | |
def is_valid_ratio(h): | |
return 0 <= h <= 1 | |
hs__ratio_vals_unsorted = filter( | |
is_valid_ratio, | |
np.random.normal(MU, SIGMA, MAX_NUM_STUDIES) | |
) # Could choose any distribution here | |
hs__ratio_vals = sorted(hs__ratio_vals_unsorted) | |
priors_for_h = l1_normalize(np.random.rand(len(hs__ratio_vals))) | |
sampled_k = random.choice(range(SAMPLE_MAX_N + 1)) # Could choose any distribution here | |
k_out_of_n = (sampled_k, SAMPLE_MAX_N) # Sampled h | |
posteriors_for_h = update_priors(hs__ratio_vals, priors_for_h, | |
_likelihood_function__sample_k_out_of_n, k_out_of_n) | |
_print_book_narrative(hs__ratio_vals, priors_for_h, | |
SAMPLE_MAX_N, sampled_k, posteriors_for_h) | |
_make_book_example_plot(hs__ratio_vals, priors_for_h, | |
sampled_k / SAMPLE_MAX_N, posteriors_for_h) | |
if __name__=="__main__": | |
run_3b1b_example() | |
run_book_example() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment