meraldo-aliz · September 14, 2023 04:51
diff --git a/bayes.py b/bayes.py
 from pymc3.math import log, exp, where
 import pymc3 as pm
 import numpy as np


 # We use the "calibration" portion of the dataset to train the model
 N = rfm_cal_holdout.shape[0] # number of customers
 x = rfm_cal_holdout['frequency_cal'].values # repeat purchase frequency
 t_x = rfm_cal_holdout['recency_cal'].values # recency
 T = rfm_cal_holdout['T_cal'].values # time since first purchase (T)

 # Modeling step
 bgnbd_model = pm.Model()
 with bgnbd_model:
    
    # Priors for r and alpha, the two Gamma parameters
    r = pm.TruncatedNormal('r', mu=8, sigma=7, lower=0, upper=40)
    alpha = pm.TruncatedNormal('alpha', mu=0.5, sigma=5, lower=0, upper=10)

    # Priors for a and b, the two Beta parameters
    a = pm.TruncatedNormal('a', mu=1, sigma=5, lower=0, upper=10)
    b = pm.TruncatedNormal('b', mu=1, sigma=5, lower=0, upper=10)

    # lambda_ (purchase rate) is modeled by Gamma, which is a child distribution of r and alpha
    lambda_ = pm.Gamma('lambda', alpha=r, beta=alpha, shape=N, testval=np.random.rand(N))
    
    # p (dropout probability) is modeled by Beta, which is a child distribution of a and b
    p = pm.Beta('p', alpha=a, beta=b, shape=N, testval=np.random.rand(N))
    
    def logp(x, t_x, T):
        """
        Loglikelihood function
        """    
        delta_x = where(x>0, 1, 0)
        A1 = x*log(1-p) + x*log(lambda_) - lambda_*T
        A2 = (log(p) + (x-1)*log(1-p) + x*log(lambda_) - lambda_*t_x)
        A3 = log(exp(A1) + delta_x * exp(A2))
        return A3
    
    # Custom distribution for BG-NBD likelihood function
    loglikelihood = pm.DensityDist("loglikelihood", logp, observed={'x': x, 't_x': t_x, 'T': T})

 # Sampling step
 SEED = 8 
 SAMPLE_KWARGS = {
    'chains': 1,
    'draws': 4000,
    'tune': 1000,
    'target_accept': 0.7,
    'random_seed': [
        SEED,
    ]
 }
 with bgnbd_model:
    trace = pm.sample(**SAMPLE_KWARGS)
    
 # It's a good practice to burn (discard) early samples
 # these are likely to be obtained before convergence
 # they aren't representative of our posteriors.
 trace_trunc = trace[3000:]
	from pymc3.math import log, exp, where
	import pymc3 as pm
	import numpy as np


	# We use the "calibration" portion of the dataset to train the model
	N = rfm_cal_holdout.shape[0] # number of customers
	x = rfm_cal_holdout['frequency_cal'].values # repeat purchase frequency
	t_x = rfm_cal_holdout['recency_cal'].values # recency
	T = rfm_cal_holdout['T_cal'].values # time since first purchase (T)

	# Modeling step
	bgnbd_model = pm.Model()
	with bgnbd_model:

	# Priors for r and alpha, the two Gamma parameters
	r = pm.TruncatedNormal('r', mu=8, sigma=7, lower=0, upper=40)
	alpha = pm.TruncatedNormal('alpha', mu=0.5, sigma=5, lower=0, upper=10)

	# Priors for a and b, the two Beta parameters
	a = pm.TruncatedNormal('a', mu=1, sigma=5, lower=0, upper=10)
	b = pm.TruncatedNormal('b', mu=1, sigma=5, lower=0, upper=10)

	# lambda_ (purchase rate) is modeled by Gamma, which is a child distribution of r and alpha
	lambda_ = pm.Gamma('lambda', alpha=r, beta=alpha, shape=N, testval=np.random.rand(N))

	# p (dropout probability) is modeled by Beta, which is a child distribution of a and b
	p = pm.Beta('p', alpha=a, beta=b, shape=N, testval=np.random.rand(N))

	def logp(x, t_x, T):
	"""
	Loglikelihood function
	"""
	delta_x = where(x>0, 1, 0)
	A1 = xlog(1-p) + xlog(lambda_) - lambda_*T
	A2 = (log(p) + (x-1)log(1-p) + xlog(lambda_) - lambda_*t_x)
	A3 = log(exp(A1) + delta_x * exp(A2))
	return A3

	# Custom distribution for BG-NBD likelihood function
	loglikelihood = pm.DensityDist("loglikelihood", logp, observed={'x': x, 't_x': t_x, 'T': T})

	# Sampling step
	SEED = 8
	SAMPLE_KWARGS = {
	'chains': 1,
	'draws': 4000,
	'tune': 1000,
	'target_accept': 0.7,
	'random_seed': [
	SEED,
	]
	}
	with bgnbd_model:
	trace = pm.sample(**SAMPLE_KWARGS)

	# It's a good practice to burn (discard) early samples
	# these are likely to be obtained before convergence
	# they aren't representative of our posteriors.
	trace_trunc = trace[3000:]