Skip to content

Instantly share code, notes, and snippets.

View erykml's full-sized avatar

Eryk Lewinson erykml

View GitHub Profile
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X, y)
print(f'Coefficients: {lin_reg.coef_}')
print(f'Intercept: {lin_reg.intercept_}')
print(f'R^2 score: {lin_reg.score(X, y)}')
%%R -i X -i y
lin_reg <- lm(y ~ ., data = cbind(X, y))
summary(lin_reg)
import statsmodels.api as sm
X_constant = sm.add_constant(X)
lin_reg = sm.OLS(y,X_constant).fit()
lin_reg.summary()
%matplotlib inline
%config InlineBackend.figure_format ='retina'
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.stats.api as sms
sns.set_style('darkgrid')
sns.mpl.rcParams['figure.figsize'] = (15.0, 9.0)
def linearity_test(model, y):
'''
%%R -i y
p1 <- ggplot(lin_reg, aes(.fitted, .resid)) + geom_point()
p1 <- p1 + stat_smooth(method="loess") + geom_hline(yintercept=0, col="red", linetype="dashed")
p1 <- p1 + xlab("Predicted") + ylab("Residuals")
p1 <- p1 + ggtitle("Residuals vs. Predicted Values") + theme_bw()
df_plt <- data.frame("fitted" = fitted(lin_reg), "observed" = X$medv)
p2 <- ggplot(df_plt, aes(x=fitted, y=observed)) + geom_point()
p2 <- p2 + stat_smooth(method="loess") + geom_abline(intercept = 1, col="red", linetype="dashed")
from statsmodels.stats.outliers_influence import variance_inflation_factor
vif = [variance_inflation_factor(X_constant.values, i) for i in range(X_constant.shape[1])]
pd.DataFrame({'vif': vif[1:]}, index=X.columns).T
%%R
library(lmtest)
par(mfrow=c(2,2)) # set 2 rows and 2 column plot layout
plot(lin_reg)
# Breusch-Pagan test
print(bptest(lin_reg, data = X, studentize = TRUE))
# Goldfeld-Quandt
print(gqtest(lin_reg))
%matplotlib inline
%config InlineBackend.figure_format ='retina'
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.stats.api as sms
sns.set_style('darkgrid')
sns.mpl.rcParams['figure.figsize'] = (15.0, 9.0)
def homoscedasticity_test(model):
'''
import statsmodels.tsa.api as smt
acf = smt.graphics.plot_acf(lin_reg.resid, lags=40 , alpha=0.05)
acf.show()
%%R
library(ggplot2)
library(lmtest)
acf(lin_reg$residuals)
dwtest(lin_reg)