Last active
September 7, 2020 14:39
-
-
Save kashif/b329650b2ae492d8ef07010797b04c90 to your computer and use it in GitHub Desktop.
Batch SGD ElasticNet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import load_boston | |
from sklearn.linear_model import (LinearRegression, Ridge, SGDRegressor, | |
Lasso, ElasticNetCV) | |
from sklearn.preprocessing import MinMaxScaler | |
import numpy as np | |
#from minepy import MINE | |
from sklearn.metrics import mean_squared_error | |
#np.random.seed(0) | |
size = 1000 | |
X1 = np.random.uniform(0, 1, (size, 14)) | |
X2 = np.random.uniform(0, 1, (size, 14)) | |
X3 = np.random.uniform(0, 1, (size, 14)) | |
Xtrain = [X1,X2,X3] | |
X_test = np.random.uniform(0, 1, (size, 14)) | |
### Friedamn 1st regression problem | |
Ytrue1 = 10 * np.sin(np.pi*X1[:,0]*X1[:,1]) + 20*(X1[:,2] - .5)**2 + 10*X1[:,3] + 5*X1[:,4] | |
Y1 = (Ytrue1 + np.random.normal(0,1)) | |
Ytrue2 = 10 * np.sin(np.pi*X2[:,0]*X2[:,1]) + 20*(X2[:,2] - .5)**2 + 10*X2[:,3] + 5*X2[:,4] | |
Y2 = (Ytrue2 + np.random.normal(0,1)) | |
Ytrue3 = 10 * np.sin(np.pi*X3[:,0]*X3[:,1]) + 20*(X3[:,2] - .5)**2 + 10*X3[:,3] + 5*X3[:,4] | |
Y3 = (Ytrue3 + np.random.normal(0,1)) | |
Ytrain = [Y1,Y2,Y3] | |
Ytrue_test = 10 * np.sin(np.pi*X_test[:,0]*X_test[:,1]) + 20*(X_test[:,2] - .5)**2 + 10*X_test[:,3] + 5*X_test[:,4] | |
### Add 3 additional correlated variables (correlated with X1-X3) | |
X1[:,10:] = X1[:,:4] + np.random.normal(0, .025, (size,4)) | |
X2[:,10:] = X2[:,:4] + np.random.normal(0, .025, (size,4)) | |
X3[:,10:] = X3[:,:4] + np.random.normal(0, .025, (size,4)) | |
names = ["x%s" % i for i in range(1,15)] | |
ranks = {} | |
def rank_to_dict(ranks, names, order=1): | |
minmax = MinMaxScaler() | |
ranks = minmax.fit_transform(order*np.array([ranks]).T).T[0] | |
ranks = map(lambda x: round(x, 2), ranks) | |
return dict(zip(names, ranks )) | |
clf_ElasticNetCV = ElasticNetCV(alphas=[0.0001,0.001,0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 100],l1_ratio=[.1, .5, .7, .9, .95, .99, 1]) | |
clf_ElasticNetCV.fit(X3, Y3) | |
print ('Best alpha:', clf_ElasticNetCV.alpha_) | |
print ('Best l1_ratio:', clf_ElasticNetCV.l1_ratio_) | |
#sgdEN = SGDRegressor(warm_start=True, penalty='elasticnet') | |
sgdEN = SGDRegressor(warm_start=True, penalty='elasticnet', alpha=clf_ElasticNetCV.alpha_, l1_ratio=clf_ElasticNetCV.l1_ratio_) | |
for ii in range(len(Ytrain)): | |
X = Xtrain[ii] | |
Y = Ytrain[ii] | |
sgdEN.partial_fit(X,Y) | |
ranks["SGDEN"] = rank_to_dict(sgdEN.coef_, names) | |
y_pred = sgdEN.predict(X_test) | |
print mean_squared_error(Ytrue_test, y_pred) | |
r = {} | |
for name in names: | |
r[name] = round(np.mean([ranks[method][name] | |
for method in ranks.keys()]), 2) | |
methods = sorted(ranks.keys()) | |
print "\t%s" % "\t".join(methods) | |
for name in names: | |
print "%s\t%s" % (name, "\t".join(map(str, | |
[ranks[method][name] for method in methods]))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment