Last active
September 7, 2020 14:40
-
-
Save kashif/202941997f9ac64aed8852053fc62397 to your computer and use it in GitHub Desktop.
Batch GBRT
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import load_boston | |
from sklearn.linear_model import (LinearRegression, Ridge, | |
Lasso, RandomizedLasso) | |
from sklearn.feature_selection import RFE, f_regression | |
from sklearn.preprocessing import MinMaxScaler | |
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor | |
import numpy as np | |
#from minepy import MINE | |
from sklearn.metrics import mean_squared_error | |
#np.random.seed(0) | |
size = 1000 | |
X1 = np.random.uniform(0, 1, (size, 14)) | |
X2 = np.random.uniform(0, 1, (size, 14)) | |
X3 = np.random.uniform(0, 1, (size, 14)) | |
Xtrain = [X1,X2,X3] | |
X_test = np.random.uniform(0, 1, (size, 14)) | |
### Friedamn 1st regression problem | |
Ytrue1 = 10 * np.sin(np.pi*X1[:,0]*X1[:,1]) + 20*(X1[:,2] - .5)**2 + 10*X1[:,3] + 5*X1[:,4] | |
Y1 = (Ytrue1 + np.random.normal(0,1)) | |
Ytrue2 = 10 * np.sin(np.pi*X2[:,0]*X2[:,1]) + 20*(X2[:,2] - .5)**2 + 10*X2[:,3] + 5*X2[:,4] | |
Y2 = (Ytrue2 + np.random.normal(0,2)) | |
Ytrue3 = 10 * np.sin(np.pi*X3[:,0]*X3[:,1]) + 20*(X3[:,2] - .5)**2 + 10*X3[:,3] + 5*X3[:,4] | |
Y3 = (Ytrue3 + np.random.normal(0,3)) | |
Ytrain = [Y1,Y2,Y3] | |
Ytrue_test = 10 * np.sin(np.pi*X_test[:,0]*X_test[:,1]) + 20*(X_test[:,2] - .5)**2 + 10*X_test[:,3] + 5*X_test[:,4] | |
### Add 3 additional correlated variables (correlated with X1-X3) | |
X1[:,10:] = X1[:,:4] + np.random.normal(0, .025, (size,4)) | |
X2[:,10:] = X2[:,:4] + np.random.normal(0, .025, (size,4)) | |
X3[:,10:] = X3[:,:4] + np.random.normal(0, .025, (size,4)) | |
names = ["x%s" % i for i in range(1,15)] | |
ranks = {} | |
def rank_to_dict(ranks, names, order=1): | |
minmax = MinMaxScaler() | |
ranks = minmax.fit_transform(order*np.array([ranks]).T).T[0] | |
ranks = map(lambda x: round(x, 2), ranks) | |
return dict(zip(names, ranks )) | |
gbrt = GradientBoostingRegressor(warm_start=True) | |
for ii in range(len(Ytrain)): | |
X = Xtrain[ii] | |
Y = Ytrain[ii] | |
gbrt.fit(X,Y) | |
ranks["GBRT"] = rank_to_dict(gbrt.feature_importances_, names) | |
y_pred = gbrt.predict(X_test) | |
print mean_squared_error(Ytrue_test, y_pred) | |
r = {} | |
for name in names: | |
r[name] = round(np.mean([ranks[method][name] | |
for method in ranks.keys()]), 2) | |
methods = sorted(ranks.keys()) | |
print "\t%s" % "\t".join(methods) | |
for name in names: | |
print "%s\t%s" % (name, "\t".join(map(str, | |
[ranks[method][name] for method in methods]))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment