Last active
October 18, 2018 18:09
-
-
Save JoKeyser/e4a263a8b9a1a6e8019ad0b3dde577bb to your computer and use it in GitHub Desktop.
Re-creation of the anovaBF example from R's BayesFactor package, using Python and rpy2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
This re-creates the anovaBF example from BFManual() in Python with rpy2, | |
and converts the BayesFactor in a pandas DataFrame. | |
See in R: library(BayesFactor), BFManual(). | |
COPYRIGHT INFORMATION | |
Copyright 2018 Johannes Keyser <[email protected]> | |
Sensorimotorlab, www.sensorimotorlab.nl | |
Donders Institute for Brain, Cognition and Behaviour | |
Stichting Katholieke Universiteit, Nijmegen, The Netherlands | |
This script is free software: you can redistribute it and/or modify it | |
under the terms of the GNU General Public License as published by | |
the Free Software Foundation, either version 3 of the license, or | |
(at your option) any later version, see https://www.gnu.org/licenses/. | |
""" | |
import pandas as pd | |
from rpy2.robjects.packages import importr, data as rdata | |
from rpy2.robjects import pandas2ri, Formula as rformula, vectors as rvectors | |
pandas2ri.activate() | |
BF = importr('BayesFactor') | |
DATASETS = importr('datasets', data=True) | |
# The ToothGrowth data set contains 3 columns: | |
# len: the dependent variable, each of which is the length of a | |
# guinea pig's tooth after treatment with Vitamin C; | |
# supp: the supplement type (orange juice or ascorbic acid); | |
# dose: the amount of Vitamin C administered. | |
TOOTH_GROWTH = rdata(DATASETS).fetch('ToothGrowth')['ToothGrowth'] | |
# Treat dose as a factor | |
dose_idx = list(TOOTH_GROWTH.colnames).index('dose') | |
dose_levels = rvectors.StrVector(('low', 'medium', 'high')) | |
dose_factor = rvectors.FactorVector(TOOTH_GROWTH.rx2('dose'), | |
labels=dose_levels) | |
TOOTH_GROWTH[dose_idx] = dose_factor | |
bf = BF.anovaBF(rformula('len ~ supp*dose'), data=TOOTH_GROWTH) | |
print(bf) | |
def bf2df(bfobj, index=0): | |
""" | |
Convert entries (relative to grand mean model) into pandas DataFrame. | |
""" | |
bfvec = BF.as_vector_BFBayesFactor(bfobj) | |
bfdct = {key:val for key, val in bfvec.items()} | |
bfdf = pd.DataFrame(bfdct, index=[index]) | |
return bfdf | |
bf_df = bf2df(bf) | |
print("As pandas DataFrame (losing the precision information!):") | |
print(bf_df) | |
# Suppose we were interested in comparing the two main-effects model and | |
# the full model to the dose-only model. We could use division: | |
print("\nModel comparisons:") | |
COMPARISONS = (('supp + dose', 'dose'), | |
('supp + dose + supp:dose', 'dose')) | |
for (num, denum) in COMPARISONS: | |
print('"%s" / "%s" = %g' % (num, denum, bf_df[num] / bf_df[denum])) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment