Last active
May 26, 2017 14:34
-
-
Save sammosummo/3abf2639ee8e42bd26dd to your computer and use it in GitHub Desktop.
Bayesian hierarchical generalised linear signal detection models for yes-no experiments using PyMC3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Bayesian hierarchical generalised linear signal detection models for | |
yes-no experiments. | |
""" | |
import numpy as np | |
import pandas as pd | |
import patsy | |
import pymc3 as pm | |
import theano | |
import theano.tensor as T | |
from scipy.stats import norm | |
from scipy.signal import detrend | |
def _est_mle(f, h, m, r): | |
"""Calculates maximum-likelihood estimates of sensitivity and bias. | |
Args: | |
f: False alarms. | |
h: Hits. | |
m: Misses. | |
r: Correct rejections. | |
Returns: | |
[(d1, c1) ...] | |
""" | |
out = [] | |
for _f, _h, _m, _r in zip(f, h, m, r): | |
n0, n1 = float(_f + _r), float(_h + _m) | |
if _f == 0: | |
_f += 0.5 | |
if _f == n0: | |
_f -= 0.5 | |
if _h == 0: | |
_h += 0.5 | |
if _h == n1: | |
_h -= 0.5 | |
fhat = _f / float(n0) | |
hhat = _h / float(n1) | |
d = norm.ppf(hhat) - norm.ppf(fhat) | |
c = -0.5 * (norm.ppf(hhat) + norm.ppf(fhat)) | |
out.append((d, c)) | |
return out | |
def _find_starting_values(data): | |
"""Uses traditional MLE to find sensible starting values for each lower- | |
level node and appends these values to the data frame. | |
""" | |
fhmr = data[['F', 'H', 'M', 'R']].values.T | |
data['mle_d'], data['mle_c'] = zip(*_est_mle(*fhmr)) | |
return data | |
def _create_dmatrix(formula, data): | |
"""Design matrix containing predictor values created using a patsy | |
formula; e.g., `d ~ age`. | |
""" | |
parameter, formula = formula.replace(' ', '').split('~') | |
predictors = patsy.dmatrix(formula, data) | |
names = predictors.design_info.column_names | |
names = [parameter + '_' + n for n in names] | |
return predictors, names | |
def _create_coefficient(name): | |
"""Top-level stochastic node representing a coefficient for a predictor. | |
""" | |
coefficient = pm.Normal( | |
name=name, | |
mu=0, | |
sd=10 | |
) | |
return coefficient | |
def _create_normal_family(predictors, coefficients, name, startvals): | |
"""A normal family comprises a deterministic reg node, representing the | |
predicted values, sd stochastic node, and a lower-level stochastic node. | |
""" | |
predicted = pm.Deterministic( | |
name + '_reg', | |
theano.dot(np.asarray(predictors), T.stack(*coefficients)) | |
) | |
sd = pm.HalfNormal( | |
name=name + '_sd', | |
sd=10, | |
shape=(1,), | |
testval=detrend(np.asarray(startvals)).std() | |
) | |
reg = pm.Normal( | |
name=name, | |
mu=predicted, | |
sd=sd, | |
shape=(predictors.shape[0],), | |
testval=np.asarray(startvals) | |
) | |
return predicted, sd, reg | |
def _create_f_and_h_nodes(d, c, f_name, h_name): | |
"""Deterministic nodes representing false-alarm and hit probabilities. | |
""" | |
f = pm.Deterministic( | |
f_name, | |
(1 + T.erf((-d/2 - c)/T.sqrt(2))) / 2 | |
) | |
h = pm.Deterministic( | |
h_name, | |
(1 + T.erf((d/2 - c)/T.sqrt(2))) / 2 | |
) | |
return f, h | |
def _create_binomial_likelihoods(f, h, f_lik_name, h_lik_name, data): | |
"""False-alarm and hit binomial likelihood nodes. | |
""" | |
f_lik = pm.Binomial( | |
name=f_lik_name, | |
n=np.asarray(data.N), | |
p=f, | |
observed=np.asarray(data.F) | |
) | |
h_lik = pm.Binomial( | |
name=h_lik_name, | |
n=np.asarray(data.S), | |
p=h, | |
observed=np.asarray(data.H) | |
) | |
return f_lik, h_lik | |
def yesno(data, d_formula='d~', c_formula='c~'): | |
"""Creates a model for yes-no experiments. | |
Args: | |
formulae: list-like of patsy design strings. | |
data: pandas data frame containing at least the following columns: `N`, | |
`S`, `F`, `H` and any predictors mentioned in either design str. | |
""" | |
data = _find_starting_values(data) | |
d_predictors, d_names = _create_dmatrix(d_formula, data) | |
d_coefficients = [_create_coefficient(c) for c in d_names] | |
d_predicted, d_sd, d = _create_normal_family( | |
d_predictors, d_coefficients, 'd', data['mle_d'] | |
) | |
c_predictors, c_names = _create_dmatrix(c_formula, data) | |
c_coefficients = [_create_coefficient(c) for c in c_names] | |
c_predicted, c_sd, c = _create_normal_family( | |
c_predictors, c_coefficients, 'c', data['mle_c'] | |
) | |
f, h = _create_f_and_h_nodes(d, c, 'f', 'h') | |
f_lik, h_lik = _create_binomial_likelihoods(f, h, 'f_lik', 'h_lik', data) | |
return locals() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment