Skip to content

Instantly share code, notes, and snippets.

def loss(h, y):
sq_error = (h - y)**2
n = len(y)
return 1.0 / (2*n) * sq_error.sum()
x = df_train['GrLivArea']
y = df_train['SalePrice']
x = (x - x.mean()) / x.std()
x = np.c_[np.ones(x.shape[0]), x]
class LinearRegression:
def predict(self, X):
return np.dot(X, self._W)
def _gradient_descent_step(self, X, targets, lr):
predictions = self.predict(X)
error = predictions - targets
class TestLinearRegression(unittest.TestCase):
def test_find_coefficients(self):
clf = LinearRegression()
clf.fit(x, y, n_iter=2000, lr=0.01)
np.testing.assert_array_almost_equal(clf._W, np.array([180921.19555322, 56294.90199925]))
clf = LinearRegression()
clf.fit(x, y, n_iter=2000, lr=0.01)
x = df_train[['OverallQual', 'GrLivArea', 'GarageCars']]
x = (x - x.mean()) / x.std()
x = np.c_[np.ones(x.shape[0]), x]
X = df_train[['OverallQual', 'GrLivArea', 'GarageCars']]
y = df_train['SalePrice']
from sklearn.metrics import mean_squared_error
from math import sqrt
def rmse(h, y):
return sqrt(mean_squared_error(h, y))
from sklearn.ensemble import RandomForestRegressor
reg = RandomForestRegressor(
n_estimators=1,
max_depth=2,
bootstrap=False,
random_state=RANDOM_SEED
)
reg.fit(X, y)
preds = reg.predict(X)
metrics.r2_score(y, preds)