Last active
May 19, 2023 14:57
-
-
Save jeanbaptisteb/374fde9b34baa8b4f98bebc647bebee4 to your computer and use it in GitHub Desktop.
Python implementation of the פ (Fei) effect size for goodness-of-fit tests. Beta version. Confidence intervals may differ slightly from the R implementation.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Python implementation of the פ (Fei) effect size for goodness-of-fit tests. | |
This is a beta version. Confidence intervals may differ from the original R implementation (see the "Returns" section below). | |
Reference: Ben-Shachar, M.S.; Patil, I.; Thériault, R.; Wiernik, B.M.; Lüdecke, D. Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data That Use the Chi-Squared Statistic. Mathematics 2023, 11, 1982. https://doi.org/10.3390/math11091982 | |
For the original R implementation of Fei, see the "effectsize" library https://easystats.github.io/effectsize/reference/phi.html. | |
""" | |
from scipy.stats import chisquare, ncx2 | |
from scipy.optimize import least_squares | |
from collections import namedtuple | |
import numpy as np | |
import math | |
def fei(obs, exp, ci=0.95, alternative="greater"): | |
""" | |
Parameters | |
---------- | |
obs : list or numpy array | |
Observed counts. Must be a one dimensional table of counts. | |
Example: obs=[10,50,120,20] | |
exp : list or numpy array | |
Expected counts or probabilities. Must be a one dimensional table of counts or probabilities. | |
Examples: | |
uniform counts: exp=[50, 50, 50, 50] | |
uniform probabilities: exp=[0.25, 0.25, 0.25, 0.25] | |
non-uniform counts: exp=[50,10,90,50] | |
non-uniform probabilities: exp=[0.25, 0.05, 0.45, 0.25] | |
ci: float | |
Confidence interval level. | |
alternative: string | |
Alternative hypothesis. Controls the type of confidence interval returned. | |
Possible values: | |
- "greater" (default) | |
- "two.sided" | |
- "less" | |
If "greater" is selected, the upper confidence interval limit will always be 1. | |
If "less" is selected, the lower confidence interval limit will always be 0. | |
Returns | |
------- | |
statistic : float | |
Returns the fei coefficient, as described by Ben-Shachar et al. (2023). | |
interval: tuple of two floats | |
Returns the confidence interval. | |
Note that if "less" set as the alternative, the lower interval limit will always be 0, while if it's set | |
to "greater", the upper interval limit will always be 1. | |
The confidence interval is calculated by using the non-centrality parameter method (aka the "pivot method"), | |
similarly to the R implementation of Fei in the library effectsize and to what is described in Ben-Shachar et al. (2023). | |
However, the "pivot" calculation uses SciPy's least squares optimization function, which may result in | |
different confidence intervals than the R implementation of Fei in a few cases. | |
For details about SciPy's least squares optimization function, see https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.least_squares.html | |
Reference | |
------- | |
Ben-Shachar, M.S.; Patil, I.; Thériault, R.; Wiernik, B.M.; Lüdecke, D. Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data That Use the Chi-Squared Statistic. Mathematics 2023, 11, 1982. https://doi.org/10.3390/math11091982 | |
""" | |
result_tuple = namedtuple('Fei', ['statistic', 'interval']) | |
# ============================================================================= | |
# Fei statistic calculations | |
# ============================================================================= | |
obs = np.array(obs) | |
exp = np.array(exp) | |
if len(obs.shape) > 1 or len(exp.shape) >1: | |
raise ValueError(f"Observed counts and expected counts/probabilities must be both one-dimensional tables. You passed a {len(obs.shape)}-dimensional table for observed counts, and a {len(exp.shape)}-dimensional table for expected counts/probabilities.") | |
N=obs.sum() | |
if exp.sum() <= 1: | |
exp = exp*N | |
exp_probas = exp/exp.sum() | |
chi, p = chisquare(f_obs=obs, f_exp=exp) | |
min_p = exp_probas.min() | |
fei = np.sqrt(chi/(N*((1/min_p )-1))) | |
# ============================================================================= | |
# confidence intervals calculations | |
# ============================================================================= | |
def ncp_fun(nc, chi, dof, q): | |
#internal function to calculate confidence intervals according to the "pivot method" | |
p1 = ncx2.cdf(chi,df=dof, nc=nc) | |
res = p1 - q | |
return res | |
alpha = 1-ci | |
dof = len(obs)-1 | |
lower_ci = 0 | |
upper_ci = 1 | |
if alternative in ["two-sided", "two.sided"]: | |
upper_ncp = least_squares(ncp_fun, | |
chi, | |
args=(chi, dof, alpha/2), | |
bounds = (chi, N * ((1/min_p)-1)), | |
).x[0] | |
upper_ci = np.sqrt(upper_ncp/ (N * ((1/min_p)-1))) | |
if chi == 0 or \ | |
math.isclose(ncx2.ppf(1-alpha, df=dof, nc=0, loc=0, scale=1), chi, | |
abs_tol=1e-3): | |
lower_ncp = 0 | |
else: | |
lower_ncp = least_squares(ncp_fun, | |
chi, | |
args=(chi, dof, 1-alpha/2), | |
bounds=(-0.0,chi), | |
).x[0] | |
lower_ci = np.sqrt(lower_ncp/ (N * ((1/min_p)-1))) | |
elif alternative == "less": | |
upper_ncp = least_squares(ncp_fun, | |
chi, | |
args=(chi, dof, alpha), | |
bounds=(chi,N * ((1/min_p)-1)), | |
).x[0] | |
upper_ci= np.sqrt(upper_ncp / (N * ((1/min_p)-1))) | |
elif alternative == "greater": | |
if chi == 0 or \ | |
math.isclose(ncx2.ppf(1-alpha, df=dof, nc=0, loc=0, scale=1), chi, | |
abs_tol=1e-3): | |
upper_ncp = 0 | |
else: | |
upper_ncp= least_squares(ncp_fun, | |
chi, | |
args=(chi, dof, 1-alpha), | |
bounds=(0,chi), | |
).x[0] | |
lower_ci= np.sqrt(upper_ncp / (N * ((1/min_p)-1))) | |
result = result_tuple(fei, (lower_ci, upper_ci)) | |
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ============================================================================= | |
# Examples from https://easystats.github.io/effectsize/articles/xtabs.html#goodness-of-fit | |
# ============================================================================= | |
O = [89, 37, 130, 28, 2] # observed group sizes | |
E = [.40, .20, .20, .15, .05] # expected group freq | |
fei(O, E) # == 0.14933 | |
# Observed perfectly matches Expected | |
O1 = np.array(E) * 286 | |
fei(O1, E) # == 0 | |
# Observed deviates maximally from Expected: | |
O2 = [0,0,0,0,286] | |
fei(O2,E) # ~= 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment