Skip to content

Instantly share code, notes, and snippets.

@kashif
Last active September 7, 2020 14:39
Show Gist options
  • Save kashif/b329650b2ae492d8ef07010797b04c90 to your computer and use it in GitHub Desktop.
Save kashif/b329650b2ae492d8ef07010797b04c90 to your computer and use it in GitHub Desktop.
Batch SGD ElasticNet
from sklearn.datasets import load_boston
from sklearn.linear_model import (LinearRegression, Ridge, SGDRegressor,
Lasso, ElasticNetCV)
from sklearn.preprocessing import MinMaxScaler
import numpy as np
#from minepy import MINE
from sklearn.metrics import mean_squared_error
#np.random.seed(0)
size = 1000
X1 = np.random.uniform(0, 1, (size, 14))
X2 = np.random.uniform(0, 1, (size, 14))
X3 = np.random.uniform(0, 1, (size, 14))
Xtrain = [X1,X2,X3]
X_test = np.random.uniform(0, 1, (size, 14))
### Friedamn 1st regression problem
Ytrue1 = 10 * np.sin(np.pi*X1[:,0]*X1[:,1]) + 20*(X1[:,2] - .5)**2 + 10*X1[:,3] + 5*X1[:,4]
Y1 = (Ytrue1 + np.random.normal(0,1))
Ytrue2 = 10 * np.sin(np.pi*X2[:,0]*X2[:,1]) + 20*(X2[:,2] - .5)**2 + 10*X2[:,3] + 5*X2[:,4]
Y2 = (Ytrue2 + np.random.normal(0,1))
Ytrue3 = 10 * np.sin(np.pi*X3[:,0]*X3[:,1]) + 20*(X3[:,2] - .5)**2 + 10*X3[:,3] + 5*X3[:,4]
Y3 = (Ytrue3 + np.random.normal(0,1))
Ytrain = [Y1,Y2,Y3]
Ytrue_test = 10 * np.sin(np.pi*X_test[:,0]*X_test[:,1]) + 20*(X_test[:,2] - .5)**2 + 10*X_test[:,3] + 5*X_test[:,4]
### Add 3 additional correlated variables (correlated with X1-X3)
X1[:,10:] = X1[:,:4] + np.random.normal(0, .025, (size,4))
X2[:,10:] = X2[:,:4] + np.random.normal(0, .025, (size,4))
X3[:,10:] = X3[:,:4] + np.random.normal(0, .025, (size,4))
names = ["x%s" % i for i in range(1,15)]
ranks = {}
def rank_to_dict(ranks, names, order=1):
minmax = MinMaxScaler()
ranks = minmax.fit_transform(order*np.array([ranks]).T).T[0]
ranks = map(lambda x: round(x, 2), ranks)
return dict(zip(names, ranks ))
clf_ElasticNetCV = ElasticNetCV(alphas=[0.0001,0.001,0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 100],l1_ratio=[.1, .5, .7, .9, .95, .99, 1])
clf_ElasticNetCV.fit(X3, Y3)
print ('Best alpha:', clf_ElasticNetCV.alpha_)
print ('Best l1_ratio:', clf_ElasticNetCV.l1_ratio_)
#sgdEN = SGDRegressor(warm_start=True, penalty='elasticnet')
sgdEN = SGDRegressor(warm_start=True, penalty='elasticnet', alpha=clf_ElasticNetCV.alpha_, l1_ratio=clf_ElasticNetCV.l1_ratio_)
for ii in range(len(Ytrain)):
X = Xtrain[ii]
Y = Ytrain[ii]
sgdEN.partial_fit(X,Y)
ranks["SGDEN"] = rank_to_dict(sgdEN.coef_, names)
y_pred = sgdEN.predict(X_test)
print mean_squared_error(Ytrue_test, y_pred)
r = {}
for name in names:
r[name] = round(np.mean([ranks[method][name]
for method in ranks.keys()]), 2)
methods = sorted(ranks.keys())
print "\t%s" % "\t".join(methods)
for name in names:
print "%s\t%s" % (name, "\t".join(map(str,
[ranks[method][name] for method in methods])))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment