Skip to content

Instantly share code, notes, and snippets.

@negedng
Last active March 6, 2024 09:26
Show Gist options
  • Save negedng/126d53fd74d17f1621796787b963c342 to your computer and use it in GitHub Desktop.
Save negedng/126d53fd74d17f1621796787b963c342 to your computer and use it in GitHub Desktop.
NumPy and Scikit-learn has different default value for DDOF.
import numpy as np
from sklearn import linear_model
# The data
X = np.array([10,11,15,20,30,50,60,61,70])
Y = np.array([3,4,3,5,10,10,12,11,13])
# Calculating mean
mean_X = np.mean(X)
mean_Y = np.mean(Y)
# Using the formula to calculate b0 and b1
n = 0.0
d = 0.0
for i in range(len(X)):
n = n + ((X[i] - mean_X) * (Y[i] - mean_Y))
d = d + ((X[i] - mean_X) ** 2)
n0 = n / len(X) # For Population covariance
n1 = n / (len(X) - 1) # For Sampling covariance
d = d / len(X)
b1_0 = n0 / d # For Population covariance
b1_1 = n1 / d # For Sampling covariance
b0_0 = mean_Y - (b1_0 * mean_X) # Population
b0_1 = mean_Y - (b1_1 * mean_X) # Sampling
print("Population: ", b0_0, b1_0)
print("Sampling: ", b0_1, b1_1)
print("Now let's see in NumPy: ")
cov_XY_0 = np.cov(X, Y, ddof=0)[0, 1] # Population
cov_XY_1 = np.cov(X, Y)[0, 1] # Sampling
var_X = np.var(X)
b1_0 = cov_XY_0 / var_X
b1_1 = cov_XY_1 / var_X
b0_0 = mean_Y - (b1_0 * mean_X)
b0_1 = mean_Y - (b1_1 * mean_X)
print("Population: ", b0_0, b1_0)
print("Sampling: ", b0_1, b1_1)
print("Now it's Sklearn!")
reg_lin = linear_model.LinearRegression()
reg_lin.fit(np.expand_dims(X, -1),Y)
print("Population: ", reg_lin.intercept_, reg_lin.coef_[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment