ortsed · July 14, 2025 19:43
diff --git a/bayesian_likelihood.py b/bayesian_likelihood.py
 import numpy as np

 def bayesian_liklihood(df, target_col, target_val, prior_col):
 	"""
 	 Gets the liklihood of a target value given a prior value using Bayesian formula
 	 on a Pandas dataframe

 	 df: Pandas dataframe
 	 target_col: column being predicted
 	 target_val: value in the target_col being predicted

 	 prior_col: name of the column whose values are priors used to predict

 	"""
 	# get the target subset of the dataframe
 	target_df = df[df[target_col] == target_val]

 	# get probability of the target value out of whole dataset
 	prob_target = len(target_df)/len(df)

 	# get probability of each prior value given target
 	# using the target subset
 	prob_prior_given_target = target_df[prior_col].value_counts(normalize=True).reset_index()

 	# get the probability of each prior value
 	# from the complete dataset
 	prob_prior = df[prior_col].value_counts(normalize=True).reset_index()


 	# merge the prior probabilities and the
 	# prior-given-target probabilities into same dataframe
 	merged = prob_prior.merge(prob_prior_given_target, on=prior_col, how="left", suffixes=("_prior", "_given_target")).fillna(0)

 	# rename columns for simplicity
 	merged.columns = ["prior", "prior_prob", "prior_prob_given_target"]

 	# Apply Bayes Theorem
 	merged["target_prob_given_prior"] = merged["prior_prob_given_target"] * prob_target/merged["prior_prob"]
 	
 	# Set infinite value to zero
 	# if prior_prob is zero than target given prior should be zero too
 	merged["target_prob_given_prior"] = merged["target_prob_given_prior"].apply(lambda x: 0 if x == np.inf else x)
 	
 	# return the output dataframe
 	return merged.sort_values("target_prob_given_prior", ascending=False)
	import numpy as np

	def bayesian_liklihood(df, target_col, target_val, prior_col):
	"""
	Gets the liklihood of a target value given a prior value using Bayesian formula
	on a Pandas dataframe

	df: Pandas dataframe
	target_col: column being predicted
	target_val: value in the target_col being predicted

	prior_col: name of the column whose values are priors used to predict

	"""
	# get the target subset of the dataframe
	target_df = df[df[target_col] == target_val]

	# get probability of the target value out of whole dataset
	prob_target = len(target_df)/len(df)

	# get probability of each prior value given target
	# using the target subset
	prob_prior_given_target = target_df[prior_col].value_counts(normalize=True).reset_index()

	# get the probability of each prior value
	# from the complete dataset
	prob_prior = df[prior_col].value_counts(normalize=True).reset_index()


	# merge the prior probabilities and the
	# prior-given-target probabilities into same dataframe
	merged = prob_prior.merge(prob_prior_given_target, on=prior_col, how="left", suffixes=("_prior", "_given_target")).fillna(0)

	# rename columns for simplicity
	merged.columns = ["prior", "prior_prob", "prior_prob_given_target"]

	# Apply Bayes Theorem
	merged["target_prob_given_prior"] = merged["prior_prob_given_target"] * prob_target/merged["prior_prob"]

	# Set infinite value to zero
	# if prior_prob is zero than target given prior should be zero too
	merged["target_prob_given_prior"] = merged["target_prob_given_prior"].apply(lambda x: 0 if x == np.inf else x)

	# return the output dataframe
	return merged.sort_values("target_prob_given_prior", ascending=False)
No results found