Last active
October 11, 2017 23:00
-
-
Save jaradc/126e1cbe81c8e18f05d15e22a5d15dd5 to your computer and use it in GitHub Desktop.
Test if two binomial distributions are statistically different from each other
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import statsmodels.api as sm | |
import numpy as np | |
import rpy2.robjects.packages as rpackages | |
import rpy2.robjects as robjects | |
rstats = rpackages.importr('stats') | |
s1 = 1556 | |
n1 = 2455 | |
s2 = 1671 | |
n2 = 2730 | |
# manual calculation | |
p1 = s1 / n1 | |
p2 = s2 / n2 | |
p = (n1*p1 + n2*p2) / (n1 + n2) | |
z = (p1 - p2) / (p*(1-p)*((1/n1)+(1/n2)))**0.5 | |
# using R in Python with rpy2 | |
rmatrix = robjects.r.matrix(robjects.IntVector([1556, 2455-1556, 1671,2730-1671]), nrow=2) | |
fisher_test = rstats.fisher_test(rmatrix, alternative="two.sided") | |
zscore, pval = sm.stats.proportions_ztest([s1, s2], [n1, n2], alternative='two-sided') | |
print('Manual calculation of z: {:.6f}'.format(z)) | |
print('Z-score from statsmodels: {:.6f}'.format(zscore)) | |
print('R pvalue from fisher.test: {:.6f}'.format(fisher_test[0][0])) | |
print('Statsmodels pvalue: {:.6f}'.format(pval)) | |
#Manual calculation of z: 1.610825 | |
#Z-score from statsmodels: 1.610825 | |
#R pvalue from fisher.test: 0.108268 | |
#Statsmodels pvalue: 0.107218 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment