Nikolaj-K · November 24, 2024 22:03
diff --git a/bayes_rule_examples.py b/bayes_rule_examples.py
 import random
 from scipy import special
 import matplotlib.pyplot as plt
 import numpy as np


 def l1_normalize(finite_stream):
    lst = list(finite_stream)
    return np.array(lst) / sum(lst)


 def update_priors(hs, priors, likelihood_function, sample_x):
    """
    Return i \\mapsto P(H_i|x)
      where P(H|x) := P(x|H) * P(H) / P(X)
          where P(x|H) ... likelihood for fixed x given any H
                P(H) ... Priors for H
                P(x) ... Marginal probability for x, or E_H[P(x|H)],
                         implicitly obtained via normalization
                         (Summing P(H|x) over H gives 1)
    """
    likelihoods = [likelihood_function(sample_x, h) for h in hs]  # For all hs
    numerator = np.multiply(likelihoods, priors)  # For all hs
    posterior = l1_normalize(numerator)  # Return posterior
    return posterior


 def _likelihood_function__3b1b_example(test, h):
    # For fixing error language in the comments: Null hypothesis = HEALTHY

    # Chances for when test (sample) claims SICK
    TRUE_POSITIVE = 9 / 10
    FALSE_POSITIVE = 89 / 990  # Type I error w.r.t. Null hypothesis
    # See video around 3min15, https://youtu.be/lG4VkPoG3ko?si=ieU7dBYSKBsJ5ulq

    # Chances for when test (sample) claims HEALTHY
    TRUE_NEGATIVE = 1.0 - FALSE_POSITIVE  # Confidence
    FALSE_NEGATIVE = 1.0 - TRUE_POSITIVE  # Type II error

    test_claims_positive: bool = test=="SICK"
    test_is_correct: bool = test==h

    if test_claims_positive:
        return TRUE_POSITIVE if test_is_correct else FALSE_POSITIVE
    else:
        return TRUE_NEGATIVE if test_is_correct else FALSE_NEGATIVE


 def _is_validated_3b1b_example_data(posteriors_dict) -> bool:
    EPS = 10**-16
    SICK_SICK_REF = 9 / (9 + 89)
    return abs(SICK_SICK_REF - posteriors_dict["SICK"]["SICK"]) < EPS


 def _print_make_3b1b_example(priors_for_h_dict, posteriors_dict):
    healthy, sick = list(priors_for_h_dict.keys())
    print(f"""{80 * "-"}\n
        The prior chance any given person has the sickness is
        {healthy}: {round(priors_for_h_dict[healthy], 5)} %
        resp.
        {sick}: {round(priors_for_h_dict[sick], 5)} %""")
    for sampled_h, posterior in posteriors_dict.items():
        print(f"""After testing and the test saying they are {sampled_h}, our credence is
            {healthy}: {round(posterior[healthy], 5)}
            resp.
            {sick}: {round(posterior[sick], 5)}""")


 def run_3b1b_example():
    priors_for_h_dict = dict(HEALTHY=99, SICK=1)
    hs = list(priors_for_h_dict.keys())
    priors_for_h = l1_normalize(priors_for_h_dict.values())

    posteriors_dict = {sampled_h:
        # Computation and formatting to dict
        dict(zip(hs, update_priors(hs, priors_for_h,
            _likelihood_function__3b1b_example, sampled_h)))
        for sampled_h in hs}

    _print_make_3b1b_example(priors_for_h_dict, posteriors_dict)
    assert _is_validated_3b1b_example_data(posteriors_dict)


 def _likelihood_function__sample_k_out_of_n(sample__k_out_of_n, h__proposed_rate):
    # (Needs some theory to be derived.)
    k, n = sample__k_out_of_n
    r = h__proposed_rate
    assert 0 <= r <= 1, f"{r} is not a valid rate"

    chance = r**k * (1 - r)**(n-k)  # Chance of first picking k times, and then failing to pick n-k

    num_permutations = special.binom(n, k)  # Num indistinguishable variations of picking k from n in a sequnece

    return num_permutations * chance


 def _print_book_narrative(hs__ratio_vals, priors_for_h, SAMPLE_MAX_N, sampled_k, posteriors_for_h):
    def rounds(xs):
        return ", ".join(str(round(100 * x, 2)) for x in xs)

    print(f"""{80 * "-"}\n
    Some fraction of the population has a medical condition.
    There are {len(hs__ratio_vals)} studies suggesting the ratio in % is, respectively,
    {rounds(hs__ratio_vals)}
    We give those a credence in %, respectively, of
    {rounds(priors_for_h)}

    We do our own experiment, drawing a sample of {SAMPLE_MAX_N} and finding {sampled_k} cases,
    i.e. {rounds([sampled_k / SAMPLE_MAX_N])} %.
    With this, our posterior credence in % for the studies becomes
    {rounds(posteriors_for_h)}
    """)


 def _make_book_example_plot(hs__ratio_vals, priors_for_h, sampled_ratio, posteriors_for_h):
    plt.figure(figsize=(6, 4))
    plt.plot(hs__ratio_vals, label='ratio_vals', marker='.', linestyle='-', color='k', linewidth=1)
    plt.plot(priors_for_h, label='priors', marker='o', linestyle='--', color='darkblue', linewidth=3)
    plt.plot(posteriors_for_h, label='posterior', marker='o', linestyle='--', color='blue', linewidth=3)
    plt.axhline(y=sampled_ratio, color='gray', linestyle='--', label=f'sample={round(sampled_ratio, 3)}', alpha=0.5)
    for idx, (pri, post) in enumerate(zip(priors_for_h, posteriors_for_h)):
        OFFSET: float = -1/2
        x_and_x_next = [0, 1] + np.ones(2) * (idx + OFFSET)
        color: str = "green" if post > pri else "red"
        plt.fill_between(x_and_x_next, pri, post, color=color, alpha=0.3)

    plt.ylim(0, 1)
    plt.xlabel('Index of data value (ratio in this case)')
    plt.title('Bayesian Rule update step')
    plt.legend()
    plt.show()


 def run_book_example():
    MAX_NUM_STUDIES = 15
    SAMPLE_MAX_N = 13

    MU, SIGMA = 0.5, 0.25  # For making h values for this script

    def is_valid_ratio(h):
        return 0 <= h <= 1

    hs__ratio_vals_unsorted = filter(
        is_valid_ratio,
        np.random.normal(MU, SIGMA, MAX_NUM_STUDIES)
    )  # Could choose any distribution here
    hs__ratio_vals = sorted(hs__ratio_vals_unsorted)

    priors_for_h = l1_normalize(np.random.rand(len(hs__ratio_vals)))

    sampled_k = random.choice(range(SAMPLE_MAX_N + 1))  # Could choose any distribution here
    k_out_of_n = (sampled_k, SAMPLE_MAX_N)  # Sampled h

    posteriors_for_h = update_priors(hs__ratio_vals, priors_for_h,
                            _likelihood_function__sample_k_out_of_n, k_out_of_n)

    _print_book_narrative(hs__ratio_vals, priors_for_h,
                            SAMPLE_MAX_N, sampled_k, posteriors_for_h)
    _make_book_example_plot(hs__ratio_vals, priors_for_h,
                            sampled_k / SAMPLE_MAX_N, posteriors_for_h)


 if __name__=="__main__":
    run_3b1b_example()
    run_book_example()
	import random
	from scipy import special
	import matplotlib.pyplot as plt
	import numpy as np


	def l1_normalize(finite_stream):
	lst = list(finite_stream)
	return np.array(lst) / sum(lst)


	def update_priors(hs, priors, likelihood_function, sample_x):
	"""
	Return i \\mapsto P(H_i\|x)
	where P(H\|x) := P(x\|H) * P(H) / P(X)
	where P(x\|H) ... likelihood for fixed x given any H
	P(H) ... Priors for H
	P(x) ... Marginal probability for x, or E_H[P(x\|H)],
	implicitly obtained via normalization
	(Summing P(H\|x) over H gives 1)
	"""
	likelihoods = [likelihood_function(sample_x, h) for h in hs] # For all hs
	numerator = np.multiply(likelihoods, priors) # For all hs
	posterior = l1_normalize(numerator) # Return posterior
	return posterior


	def _likelihood_function__3b1b_example(test, h):
	# For fixing error language in the comments: Null hypothesis = HEALTHY

	# Chances for when test (sample) claims SICK
	TRUE_POSITIVE = 9 / 10
	FALSE_POSITIVE = 89 / 990 # Type I error w.r.t. Null hypothesis
	# See video around 3min15, https://youtu.be/lG4VkPoG3ko?si=ieU7dBYSKBsJ5ulq

	# Chances for when test (sample) claims HEALTHY
	TRUE_NEGATIVE = 1.0 - FALSE_POSITIVE # Confidence
	FALSE_NEGATIVE = 1.0 - TRUE_POSITIVE # Type II error

	test_claims_positive: bool = test=="SICK"
	test_is_correct: bool = test==h

	if test_claims_positive:
	return TRUE_POSITIVE if test_is_correct else FALSE_POSITIVE
	else:
	return TRUE_NEGATIVE if test_is_correct else FALSE_NEGATIVE


	def _is_validated_3b1b_example_data(posteriors_dict) -> bool:
	EPS = 10**-16
	SICK_SICK_REF = 9 / (9 + 89)
	return abs(SICK_SICK_REF - posteriors_dict["SICK"]["SICK"]) < EPS


	def _print_make_3b1b_example(priors_for_h_dict, posteriors_dict):
	healthy, sick = list(priors_for_h_dict.keys())
	print(f"""{80 * "-"}\n
	The prior chance any given person has the sickness is
	{healthy}: {round(priors_for_h_dict[healthy], 5)} %
	resp.
	{sick}: {round(priors_for_h_dict[sick], 5)} %""")
	for sampled_h, posterior in posteriors_dict.items():
	print(f"""After testing and the test saying they are {sampled_h}, our credence is
	{healthy}: {round(posterior[healthy], 5)}
	resp.
	{sick}: {round(posterior[sick], 5)}""")


	def run_3b1b_example():
	priors_for_h_dict = dict(HEALTHY=99, SICK=1)
	hs = list(priors_for_h_dict.keys())
	priors_for_h = l1_normalize(priors_for_h_dict.values())

	posteriors_dict = {sampled_h:
	# Computation and formatting to dict
	dict(zip(hs, update_priors(hs, priors_for_h,
	_likelihood_function__3b1b_example, sampled_h)))
	for sampled_h in hs}

	_print_make_3b1b_example(priors_for_h_dict, posteriors_dict)
	assert _is_validated_3b1b_example_data(posteriors_dict)


	def _likelihood_function__sample_k_out_of_n(sample__k_out_of_n, h__proposed_rate):
	# (Needs some theory to be derived.)
	k, n = sample__k_out_of_n
	r = h__proposed_rate
	assert 0 <= r <= 1, f"{r} is not a valid rate"

	chance = r*k (1 - r)**(n-k) # Chance of first picking k times, and then failing to pick n-k

	num_permutations = special.binom(n, k) # Num indistinguishable variations of picking k from n in a sequnece

	return num_permutations * chance


	def _print_book_narrative(hs__ratio_vals, priors_for_h, SAMPLE_MAX_N, sampled_k, posteriors_for_h):
	def rounds(xs):
	return ", ".join(str(round(100 * x, 2)) for x in xs)

	print(f"""{80 * "-"}\n
	Some fraction of the population has a medical condition.
	There are {len(hs__ratio_vals)} studies suggesting the ratio in % is, respectively,
	{rounds(hs__ratio_vals)}
	We give those a credence in %, respectively, of
	{rounds(priors_for_h)}

	We do our own experiment, drawing a sample of {SAMPLE_MAX_N} and finding {sampled_k} cases,
	i.e. {rounds([sampled_k / SAMPLE_MAX_N])} %.
	With this, our posterior credence in % for the studies becomes
	{rounds(posteriors_for_h)}
	""")


	def _make_book_example_plot(hs__ratio_vals, priors_for_h, sampled_ratio, posteriors_for_h):
	plt.figure(figsize=(6, 4))
	plt.plot(hs__ratio_vals, label='ratio_vals', marker='.', linestyle='-', color='k', linewidth=1)
	plt.plot(priors_for_h, label='priors', marker='o', linestyle='--', color='darkblue', linewidth=3)
	plt.plot(posteriors_for_h, label='posterior', marker='o', linestyle='--', color='blue', linewidth=3)
	plt.axhline(y=sampled_ratio, color='gray', linestyle='--', label=f'sample={round(sampled_ratio, 3)}', alpha=0.5)
	for idx, (pri, post) in enumerate(zip(priors_for_h, posteriors_for_h)):
	OFFSET: float = -1/2
	x_and_x_next = [0, 1] + np.ones(2) * (idx + OFFSET)
	color: str = "green" if post > pri else "red"
	plt.fill_between(x_and_x_next, pri, post, color=color, alpha=0.3)

	plt.ylim(0, 1)
	plt.xlabel('Index of data value (ratio in this case)')
	plt.title('Bayesian Rule update step')
	plt.legend()
	plt.show()


	def run_book_example():
	MAX_NUM_STUDIES = 15
	SAMPLE_MAX_N = 13

	MU, SIGMA = 0.5, 0.25 # For making h values for this script

	def is_valid_ratio(h):
	return 0 <= h <= 1

	hs__ratio_vals_unsorted = filter(
	is_valid_ratio,
	np.random.normal(MU, SIGMA, MAX_NUM_STUDIES)
	) # Could choose any distribution here
	hs__ratio_vals = sorted(hs__ratio_vals_unsorted)

	priors_for_h = l1_normalize(np.random.rand(len(hs__ratio_vals)))

	sampled_k = random.choice(range(SAMPLE_MAX_N + 1)) # Could choose any distribution here
	k_out_of_n = (sampled_k, SAMPLE_MAX_N) # Sampled h

	posteriors_for_h = update_priors(hs__ratio_vals, priors_for_h,
	_likelihood_function__sample_k_out_of_n, k_out_of_n)

	_print_book_narrative(hs__ratio_vals, priors_for_h,
	SAMPLE_MAX_N, sampled_k, posteriors_for_h)
	_make_book_example_plot(hs__ratio_vals, priors_for_h,
	sampled_k / SAMPLE_MAX_N, posteriors_for_h)


	if __name__=="__main__":
	run_3b1b_example()
	run_book_example()