Last active
December 29, 2024 07:04
-
-
Save rspeare/77061e6e317896be29c6de9a85db301d to your computer and use it in GitHub Desktop.
P values for sklearn logistic regression
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn import linear_model | |
import numpy as np | |
import scipy.stats as stat | |
class LogisticReg: | |
""" | |
Wrapper Class for Logistic Regression which has the usual sklearn instance | |
in an attribute self.model, and pvalues, z scores and estimated | |
errors for each coefficient in | |
self.z_scores | |
self.p_values | |
self.sigma_estimates | |
as well as the negative hessian of the log Likelihood (Fisher information) | |
self.F_ij | |
""" | |
def __init__(self,*args,**kwargs):#,**kwargs): | |
self.model = linear_model.LogisticRegression(*args,**kwargs)#,**args) | |
def fit(self,X,y): | |
self.model.fit(X,y) | |
#### Get p-values for the fitted model #### | |
denom = (2.0*(1.0+np.cosh(self.model.decision_function(X)))) | |
denom = np.tile(denom,(X.shape[1],1)).T | |
F_ij = np.dot((X/denom).T,X) ## Fisher Information Matrix | |
Cramer_Rao = np.linalg.inv(F_ij) ## Inverse Information Matrix | |
sigma_estimates = np.sqrt(np.diagonal(Cramer_Rao)) | |
z_scores = self.model.coef_[0]/sigma_estimates # z-score for eaach model coefficient | |
p_values = [stat.norm.sf(abs(x))*2 for x in z_scores] ### two tailed test for p-values | |
self.z_scores = z_scores | |
self.p_values = p_values | |
self.sigma_estimates = sigma_estimates | |
self.F_ij = F_ij | |
Hi @biohousten , your code was very helpful, but I am getting an error claiming 'dep_acute' is not defined, can you help me understand where this variable comes from? Cheers
It boggles my mind that it is so freaking complicated to get a p-value in sklearn. My God, what is the freaking point of doing logistic regression if you can't assess whether your p-values are significant or not? Why is this not a basic function argument?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@biohouston Thanks a whole lot this was super helpful. Also I think this is calling a special implementation of multinomial? I did linear_model.LogisticRegression(multi_class='multinomial', max_iter=500, solver='lbfgs') and only got n series of coefs/p-values (n = groups of y classes) but your awesome code seems to generate C(n, 2) series? :)