Created
September 13, 2016 18:41
-
-
Save sammosummo/7e3ba00ea66063616a1417d447479a4e to your computer and use it in GitHub Desktop.
Bayesian model of working-memory capacity.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Model of working-memory capacity for change-detection tasks. | |
This script implements a "slots-based" model of working memory capacity based | |
on the Cowan formula, modified to allow lapses and formalized in a Bayesian | |
framework. | |
Data are organised into a pandas data frame in which each row represents a | |
subject, and columns contain covariates or observations. "Covariates" can be | |
categorical or continuous. "Observations" are counts of trials and are named | |
`{x}_{y}` where `{x}` is a single uppercase letter indicating the count type | |
(either `S`, `D`, `F`, or `H`) and `{y}` is a positive integer indicating the | |
set size. Covariates should not have names that could be confused with | |
observations. | |
The code is designed to work on pure "between-subjects" designs; i.e., a | |
subject is assumed to have one set of parameter values for all trials in the | |
experiment. It is not optimal for "within-subjects" designs. | |
""" | |
from inspect import signature | |
import numpy as np | |
import pymc3 as pm | |
import theano.tensor as tt | |
from theano import dot | |
from patsy import dmatrix | |
def create_family(fn, dm, lh={}, sh={}, robust=False): | |
"""Returns a hierarchical family of random variables. | |
Families comprise a collection of children (e.g., subject-level) variables, | |
and parent variables that control the shape of the prior distribution from | |
which the children are drawn. | |
Args: | |
fn (str): Descriptive name for the family; e.g., `'kappa'`. | |
dm (patsy.DesignMatrix): The design matrix for the location parameters | |
of the prior distribution on the children variables. | |
lh (Optional [dict]): Additional details used in the creation of the | |
location parameters. Defaults to `{'prior'=pymc3.Normal, 'mu': 0, | |
'sd': 100}`. | |
sh (Optional [dict]): Additional details used in the creation of the | |
scale parameters. Defaults to `{'prior'=pymc3.HalfCauchy, | |
'beta': 30}`. | |
robust (Optional [bool]): If False, subject-level parameters are | |
normally distributed. If True, they have a non-central student | |
t distribution, and an additional nu parameter is added to the | |
model. | |
Returns: | |
list: Container of children (subject-level) variables. | |
""" | |
cns = ['%s__%s' % (fn, n) for n in ldm.design_info.column_names] | |
clocs = [] | |
for cn in cns: | |
lp = {'prior': pm.Normal, 'mu': 0, 'sd': 100} | |
lp.update(lh) | |
f = lp.pop('prior') | |
lp_ = lp.copy() | |
p = signature(f.__init__).parameters | |
[lp_.pop(k) for k in lp.keys() if k not in p] | |
lp_['name'] = 'loc_%s' % cn | |
clocs.append(f(**lp_)) | |
loc = dot(np.asarray(ldm), tt.stack(*clocs)) | |
sp = {'prior': pm.HalfCauchy, 'beta': 30} | |
sp.update(sh) | |
f = sp.pop('prior') | |
sp_ = sp.copy() | |
p = signature(f.__init__).parameters | |
[sp_.pop(k) for k in sp.keys() if k not in p] | |
sp_['name'] = 'scale_%s' % fn | |
sc = f(**sp_) | |
# children priors | |
if robust is False: | |
children = pm.Normal( | |
name=fn, mu=loc, sd=sc, shape=(ldm.shape[0],) | |
) | |
else: | |
nu = pm.Exponential(name='nu_minus_one_%s' % fn, lam=1/29.) + 1 | |
children = pm.StudentT( | |
name=fn, mu=loc, sd=sc, nu=nu, shape=(ldm.shape[0],) | |
) | |
return children | |
def make_model(data, formulae, robust=False): | |
"""Constructs the model. | |
Args: | |
data (pandas.DataFrame): Data formatted in the appropriate way. | |
formulae (dict): Dictionary of formulae. A constant is added for any | |
missing. | |
robust (Optional[bool]): Robust estimation. Default is False. | |
Returns: | |
None: Model is placed in context, ready for sampling. | |
""" | |
for name in ('kappa', 'gamma', 'zeta'): | |
if name not in formulae: | |
formulae[name] = '1' | |
dm = {k: dmatrix(formulae[k], data) for k in formulae} | |
kappa = create_family('kappa', dm['kappa'], robust=robust) | |
gamma = create_family('gamma', dm['gamma'], robust=robust) | |
zeta = create_family('zeta', dm['zeta'], robust=robust) | |
k = pm.Deterministic('k', tt.max((kappa, np.zeros(len(data))), axis=0)) | |
g = pm.Deterministic('g', pm.invlogit(gamma)) | |
z = pm.Deterministic('z', pm.invlogit(zeta)) | |
for l in [int(c.replace('S_', '')) for c in data.columns if 'S_' in c]: | |
r = tt.min((k / l, np.ones(len(data))), axis=0) | |
f = (1 - z) * g + z * (1 - r) * g | |
h = (1 - z) * g + z * r + z * (1 - r) * g | |
pm.Binomial( | |
name='F_%i' % l, p=f, n=data['S_%i' % l].values, | |
observed=data['F_%i' % l].values | |
) | |
pm.Binomial( | |
name='H_%i' % l, p=h, n=data['D_%i' % l].values, | |
observed=data['H_%i' % l].values | |
) | |
def sample_model(model_name, data, formulae, n): | |
"""Draw from posterior. | |
""" | |
with pm.Model() as model: | |
make_model(data, formulae, True, False, False) | |
backend = pm.backends.Text(model_name) | |
pm.sample(n, trace=backend) | |
if __name__ == '__main__': | |
pass | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment