Forked from DanielWeitzenfeld/ordinal_pymc3_DBDA23.2.2.py
Created
January 25, 2017 02:23
-
-
Save usptact/3a45d61fe778b646ba7ed8a5f8e89dee to your computer and use it in GitHub Desktop.
functional implementation of ordinal predicted variable
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import seaborn as sns | |
import pymc3 as pm | |
import numpy as np | |
from scipy.stats import norm | |
import pandas as pd | |
import theano.tensor as T | |
from theano.compile.ops import as_op | |
# Generate True data | |
n = 500 | |
theta_true = [1.5, 2.5, 3.5, 4.5, 5.5, 6.5] | |
mu_true = 1 | |
sigma_true = 2.5 | |
true_underlying = np.random.normal(mu_true, sigma_true, size=n) | |
true_bucketed = np.digitize(true_underlying, theta_true) | |
# we want to fix the top and bottom thresholds, but let the others 'move'. Use a masked array for simplicity. | |
n_y_levels = len(theta_true) + 1 | |
thresh_mus = [k + .5 for k in range(1, n_y_levels)] | |
thresh_mus_observed = np.array(thresh_mus) | |
thresh_mus_observed[1:-1] = -999 | |
thresh_mus_observed = np.ma.masked_values(thresh_mus_observed, value=-999) | |
@as_op(itypes=[T.dvector, T.dscalar, T.dscalar], otypes=[T.dvector]) | |
def bucket_probabilities(theta, mu, sigma): | |
"""Given cutpoints and parameters of normal distribution, calculate probabilities for each ordinal outcome.""" | |
out = np.empty(n_y_levels) | |
n = norm(loc=mu, scale=sigma) | |
out[0] = n.cdf(theta[0]) | |
out[1] = np.max([0, n.cdf(theta[1]) - n.cdf(theta[0])]) | |
out[2] = np.max([0, n.cdf(theta[2]) - n.cdf(theta[1])]) | |
out[3] = np.max([0, n.cdf(theta[3]) - n.cdf(theta[2])]) | |
out[4] = np.max([0, n.cdf(theta[4]) - n.cdf(theta[3])]) | |
out[5] = np.max([0, n.cdf(theta[5]) - n.cdf(theta[4])]) | |
out[6] = 1 - n.cdf(theta[5]) | |
return out | |
with pm.Model() as ordinal_model: | |
theta = pm.Normal('theta', | |
mu=thresh_mus, | |
tau=np.repeat(.5**2, len(thresh_mus)), | |
shape=len(thresh_mus), | |
observed=thresh_mus_observed, | |
testval=thresh_mus[1:-1]) | |
mu = pm.Normal('mu', | |
mu=n_y_levels/2.0, | |
tau=1.0/(n_y_levels**2), | |
testval=1.0) | |
sigma = pm.Uniform('sigma', | |
n_y_levels/1000.0, | |
n_y_levels*10.0, | |
testval=2.5) | |
pr = bucket_probabilities(theta, mu, sigma) | |
obs = pm.Categorical('obs', pr, observed=true_bucketed) | |
# must specify Metropolis because Theano can't compute gradient of bucket_probabilities | |
step = pm.Metropolis([theta, mu, sigma, pr, obs]) | |
trace = pm.sample(8000, step=step) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment