Last active
July 14, 2025 19:43
-
-
Save ortsed/a94b18b683c621d77877d779a81432b2 to your computer and use it in GitHub Desktop.
Bayesian Likelihood For Pandas
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| def bayesian_liklihood(df, target_col, target_val, prior_col): | |
| """ | |
| Gets the liklihood of a target value given a prior value using Bayesian formula | |
| on a Pandas dataframe | |
| df: Pandas dataframe | |
| target_col: column being predicted | |
| target_val: value in the target_col being predicted | |
| prior_col: name of the column whose values are priors used to predict | |
| """ | |
| # get the target subset of the dataframe | |
| target_df = df[df[target_col] == target_val] | |
| # get probability of the target value out of whole dataset | |
| prob_target = len(target_df)/len(df) | |
| # get probability of each prior value given target | |
| # using the target subset | |
| prob_prior_given_target = target_df[prior_col].value_counts(normalize=True).reset_index() | |
| # get the probability of each prior value | |
| # from the complete dataset | |
| prob_prior = df[prior_col].value_counts(normalize=True).reset_index() | |
| # merge the prior probabilities and the | |
| # prior-given-target probabilities into same dataframe | |
| merged = prob_prior.merge(prob_prior_given_target, on=prior_col, how="left", suffixes=("_prior", "_given_target")).fillna(0) | |
| # rename columns for simplicity | |
| merged.columns = ["prior", "prior_prob", "prior_prob_given_target"] | |
| # Apply Bayes Theorem | |
| merged["target_prob_given_prior"] = merged["prior_prob_given_target"] * prob_target/merged["prior_prob"] | |
| # Set infinite value to zero | |
| # if prior_prob is zero than target given prior should be zero too | |
| merged["target_prob_given_prior"] = merged["target_prob_given_prior"].apply(lambda x: 0 if x == np.inf else x) | |
| # return the output dataframe | |
| return merged.sort_values("target_prob_given_prior", ascending=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment