This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class DecisionTreeRegressor: | |
def fit(self, X, y, min_leaf = 5): | |
self.dtree = Node(X, y, np.array(np.arange(len(y))), min_leaf) | |
return self | |
def predict(self, X): | |
return self.dtree.predict(X.values) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Node: | |
def __init__(self, x, y, idxs, min_leaf=5): | |
self.x = x | |
self.y = y | |
self.idxs = idxs | |
self.min_leaf = min_leaf | |
self.row_count = len(idxs) | |
self.col_count = x.shape[1] | |
self.val = np.mean(y[idxs]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Node: | |
def __init__(self, x, y, idxs, min_leaf=5): | |
self.x = x | |
self.y = y | |
self.idxs = idxs | |
self.min_leaf = min_leaf | |
self.row_count = len(idxs) | |
self.col_count = x.shape[1] | |
self.val = np.mean(y[idxs]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def find_varsplit(self): | |
for c in range(self.col_count): self.find_better_split(c) | |
if self.is_leaf: return | |
x = self.split_col | |
lhs = np.nonzero(x <= self.split)[0] | |
rhs = np.nonzero(x > self.split)[0] | |
self.lhs = Node(self.x, self.y, self.idxs[lhs], self.min_leaf) | |
self.rhs = Node(self.x, self.y, self.idxs[rhs], self.min_leaf) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@property | |
def split_col(self): return self.x.values[self.idxs,self.var_idx] | |
@property | |
def is_leaf(self): return self.score == float('inf') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def find_better_split(self, var_idx): | |
x = self.x.values[self.idxs, var_idx] | |
for r in range(self.row_count): | |
lhs = x <= x[r] | |
rhs = x > x[r] | |
if rhs.sum() < self.min_leaf or lhs.sum() < self.min_leaf: continue | |
curr_score = self.find_score(lhs, rhs) | |
if curr_score < self.score: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def predict(self, x): | |
return np.array([self.predict_row(xi) for xi in x]) | |
def predict_row(self, xi): | |
if self.is_leaf: return self.val | |
node = self.lhs if xi[self.var_idx] <= self.split else self.rhs | |
return node.predict_row(xi) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
regressor = DecisionTreeRegressor().fit(X, y) | |
preds = regressor.predict(X) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
X_test = df_test[['OverallQual', 'GrLivArea', 'GarageCars']] | |
pred_test = regressor.predict(X_test) | |
submission = pd.DataFrame({'Id': df_test.Id, 'SalePrice': pred_test}) | |
submission.to_csv('submission.csv', index=False) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!pip install tensorflow-gpu==2.0.0-alpha0 |