usptact · January 25, 2017 02:23
diff --git a/ordinal_pymc3_DBDA23.2.2.py b/ordinal_pymc3_DBDA23.2.2.py
 import seaborn as sns
 import pymc3 as pm
 import numpy as np
 from scipy.stats import norm
 import pandas as pd

 import theano.tensor as T
 from theano.compile.ops import as_op

 # Generate True data
 n = 500
 theta_true = [1.5, 2.5, 3.5, 4.5, 5.5, 6.5]
 mu_true = 1
 sigma_true = 2.5
 true_underlying = np.random.normal(mu_true, sigma_true, size=n)
 true_bucketed = np.digitize(true_underlying, theta_true)

 # we want to fix the top and bottom thresholds, but let the others 'move'.  Use a masked array for simplicity.
 n_y_levels = len(theta_true) + 1
 thresh_mus = [k + .5 for k in range(1, n_y_levels)]
 thresh_mus_observed = np.array(thresh_mus)
 thresh_mus_observed[1:-1] = -999
 thresh_mus_observed = np.ma.masked_values(thresh_mus_observed, value=-999)

 @as_op(itypes=[T.dvector, T.dscalar, T.dscalar], otypes=[T.dvector])
 def bucket_probabilities(theta, mu, sigma):
    """Given cutpoints and parameters of normal distribution, calculate probabilities for each ordinal outcome."""
    out = np.empty(n_y_levels)
    n = norm(loc=mu, scale=sigma)       
    out[0] = n.cdf(theta[0])        
    out[1] = np.max([0, n.cdf(theta[1]) - n.cdf(theta[0])])
    out[2] = np.max([0, n.cdf(theta[2]) - n.cdf(theta[1])])
    out[3] = np.max([0, n.cdf(theta[3]) - n.cdf(theta[2])])
    out[4] = np.max([0, n.cdf(theta[4]) - n.cdf(theta[3])])
    out[5] = np.max([0, n.cdf(theta[5]) - n.cdf(theta[4])])
    out[6] = 1 - n.cdf(theta[5])
    return out

 with pm.Model() as ordinal_model:    
    theta = pm.Normal('theta', 
                      mu=thresh_mus, 
                      tau=np.repeat(.5**2, len(thresh_mus)),
                      shape=len(thresh_mus),
                      observed=thresh_mus_observed,
                      testval=thresh_mus[1:-1])
    mu = pm.Normal('mu', 
                   mu=n_y_levels/2.0, 
                   tau=1.0/(n_y_levels**2),
                   testval=1.0)
    sigma = pm.Uniform('sigma', 
                       n_y_levels/1000.0, 
                       n_y_levels*10.0,
                       testval=2.5)
    
    pr = bucket_probabilities(theta, mu, sigma)
    
    obs = pm.Categorical('obs', pr, observed=true_bucketed)
    
    # must specify Metropolis because Theano can't compute gradient of bucket_probabilities
    step = pm.Metropolis([theta, mu, sigma, pr, obs])
    trace = pm.sample(8000, step=step)
	import seaborn as sns
	import pymc3 as pm
	import numpy as np
	from scipy.stats import norm
	import pandas as pd

	import theano.tensor as T
	from theano.compile.ops import as_op

	# Generate True data
	n = 500
	theta_true = [1.5, 2.5, 3.5, 4.5, 5.5, 6.5]
	mu_true = 1
	sigma_true = 2.5
	true_underlying = np.random.normal(mu_true, sigma_true, size=n)
	true_bucketed = np.digitize(true_underlying, theta_true)

	# we want to fix the top and bottom thresholds, but let the others 'move'. Use a masked array for simplicity.
	n_y_levels = len(theta_true) + 1
	thresh_mus = [k + .5 for k in range(1, n_y_levels)]
	thresh_mus_observed = np.array(thresh_mus)
	thresh_mus_observed[1:-1] = -999
	thresh_mus_observed = np.ma.masked_values(thresh_mus_observed, value=-999)

	@as_op(itypes=[T.dvector, T.dscalar, T.dscalar], otypes=[T.dvector])
	def bucket_probabilities(theta, mu, sigma):
	"""Given cutpoints and parameters of normal distribution, calculate probabilities for each ordinal outcome."""
	out = np.empty(n_y_levels)
	n = norm(loc=mu, scale=sigma)
	out[0] = n.cdf(theta[0])
	out[1] = np.max([0, n.cdf(theta[1]) - n.cdf(theta[0])])
	out[2] = np.max([0, n.cdf(theta[2]) - n.cdf(theta[1])])
	out[3] = np.max([0, n.cdf(theta[3]) - n.cdf(theta[2])])
	out[4] = np.max([0, n.cdf(theta[4]) - n.cdf(theta[3])])
	out[5] = np.max([0, n.cdf(theta[5]) - n.cdf(theta[4])])
	out[6] = 1 - n.cdf(theta[5])
	return out

	with pm.Model() as ordinal_model:
	theta = pm.Normal('theta',
	mu=thresh_mus,
	tau=np.repeat(.5**2, len(thresh_mus)),
	shape=len(thresh_mus),
	observed=thresh_mus_observed,
	testval=thresh_mus[1:-1])
	mu = pm.Normal('mu',
	mu=n_y_levels/2.0,
	tau=1.0/(n_y_levels**2),
	testval=1.0)
	sigma = pm.Uniform('sigma',
	n_y_levels/1000.0,
	n_y_levels*10.0,
	testval=2.5)

	pr = bucket_probabilities(theta, mu, sigma)

	obs = pm.Categorical('obs', pr, observed=true_bucketed)

	# must specify Metropolis because Theano can't compute gradient of bucket_probabilities
	step = pm.Metropolis([theta, mu, sigma, pr, obs])
	trace = pm.sample(8000, step=step)