Last active
October 17, 2019 14:39
-
-
Save tleonardi/d80ec68a8240088a45894bc0f89c066e to your computer and use it in GitHub Desktop.
Hou's method for the approximation for the distribution of the weighted combination of non-independent or independent probabilities.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.stats import chi2 | |
import numpy as np | |
def combine_pvalues_hou(pvalues, weights, cor_mat): | |
""" Hou's method for the approximation for the distribution of the weighted | |
combination of non-independent or independent probabilities. | |
If any pvalue is nan, returns nan. | |
https://doi.org/10.1016/j.spl.2004.11.028 | |
pvalues: list of pvalues to be combined | |
weights: the weights of the pvalues | |
cor_mat: a matrix containing the correlation coefficients between pvalues | |
Test: when weights are equal and cor=0, hou is the same as Fisher | |
print(combine_pvalues([0.1,0.02,0.1,0.02,0.3], method='fisher')[1]) | |
print(hou([0.1,0.02,0.1,0.02,0.3], [1,1,1,1,1], np.zeros((5,5)))) | |
""" | |
if(len(pvalues) != len(weights)): | |
raise Exception("Can't combine pvalues if pvalues and weights are not the same length.") | |
if( cor_mat.shape[0] != cor_mat.shape[1] or cor_mat.shape[0] != len(pvalues)): | |
raise Exception("The correlation matrix needs to be squared, with each dimension equal to the length of the pvalues vector.") | |
if all(p==1 for p in pvalues): | |
return 1 | |
if any((p==0 or np.isinf(p) or p>1) for p in pvalues): | |
raise Exception("At least one p-value is invalid") | |
# Covariance estimation as in Kost and McDermott (eq:8) | |
# https://doi.org/10.1016/S0167-7152(02)00310-3 | |
cov = lambda r: (3.263*r)+(0.710*r**2)+(0.027*r**3) | |
k=len(pvalues) | |
cov_sum=np.float64(0) | |
sw_sum=np.float64(0) | |
w_sum=np.float64(0) | |
tau=np.float64(0) | |
for i in range(k): | |
for j in range(i+1,k): | |
cov_sum += weights[i]*weights[j]*cov(cor_mat[i][j]) | |
sw_sum += weights[i]**2 | |
w_sum += weights[i] | |
# Calculate the weighted Fisher's combination statistic | |
tau += weights[i] * (-2*np.log(pvalues[i])) | |
# Correction factor | |
c = (2*sw_sum+cov_sum) / (2*w_sum) | |
# Degrees of freedom | |
f = (4*w_sum**2) / (2*sw_sum+cov_sum) | |
# chi2.sf is the same as 1-chi2.cdf but is more accurate | |
combined_p_value = chi2.sf(tau/c,f) | |
# Return a very small number if pvalue = 0 | |
if combined_p_value == 0: | |
combined_p_value = np.finfo(np.float).tiny | |
return combined_p_value |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment