Created
January 25, 2018 23:02
-
-
Save tyarkoni/ee1825fc4e9dbf11a634f16fb4fabd3c to your computer and use it in GitHub Desktop.
Illustrating the effects of p-hacking on observed effect sizes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy import stats | |
import matplotlib.pyplot as plt | |
%matplotlib inline | |
def run_study(step_size=50, max_n=200, num_tests=10, alpha=0.05): | |
''' Run a single study in increments of N until we either (a) achieve | |
significance, or (b) hit a maximum sample size. To model p-hacking, we | |
conduct num_tests independent tests after each increment of sampling. ''' | |
X = np.zeros((0, num_tests)) | |
n = 0 | |
while n < max_n: | |
X = np.r_[X, np.random.normal(size=(step_size, num_tests))] | |
t, p = stats.ttest_1samp(X, 0) | |
min_ind = np.argmin(p) | |
min_p = p[min_ind] | |
n += step_size | |
if min_p < 0.05: | |
break | |
# As a null distribution, don't take the smallest p value | |
# if there's no sig effect | |
min_ind = 0 | |
return min_p, t[min_ind], n, X[:, min_ind] | |
def plot_effect_sizes(n_reps=100, min_n=10, **kwargs): | |
''' Sample n_reps studies per the procedure in run_study(), and | |
plot the standardized effect size of the result, coloring by | |
significance. ''' | |
studies = [run_study(**kwargs) for i in range(n_reps)] | |
# Count sig vs. non-sig | |
p = np.array([s[0] for s in studies]) | |
print("prop. significant studies: %.2f" % (p < 0.05).mean()) | |
for p, t, n, X in studies: | |
# Recode all significant effects in same direction | |
if p < .05 and X.mean() < 0: | |
X *= -1 | |
color = 'blue' if p < .05 else 'gray' | |
d = [X[:i].mean()/X[:i].std() for i in range(min_n, n)] | |
plt.plot(np.arange(min_n, n), d, color=color, lw=2, alpha=0.2) | |
plt.hlines(0, min_n, 200, linestyles='--', lw=0.5) | |
plt.gcf().set_size_inches(12, 6) | |
plot_effect_sizes() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment