Skip to content

Instantly share code, notes, and snippets.

@xhluca
Last active June 27, 2019 21:05
Show Gist options
  • Save xhluca/334c24933e6f4913f4d779b784e71043 to your computer and use it in GitHub Desktop.
Save xhluca/334c24933e6f4913f4d779b784e71043 to your computer and use it in GitHub Desktop.
A faster implementation of scikit-learn's Ridge, leveraging Tensorflow: https://www.kaggle.com/xhlulu/accelerate-sklearn-regression-using-tensorflow
import numpy as np
import tensorflow as tf
class TFRidge:
def __init__(self, alpha=1.0, fit_intercept=True, normalize=False, solver='cholesky'):
if solver != 'cholesky':
raise ValueError('The only accepted solver is currently "cholesky"')
self.alpha = alpha
self.fit_intercept = fit_intercept
self.normalize = normalize
self.graph = tf.Graph()
self.build_graph()
def build_graph(self):
with self.graph.as_default():
self.tf_y = tf.placeholder(tf.float64, shape=(None, None))
self.tf_input = tf.placeholder(tf.float64, shape=(None, None))
if self.fit_intercept:
tf_bias = tf.ones((tf.shape(self.tf_input)[0], 1), dtype=tf.float64)
tf_x = tf.concat([self.tf_input, tf_bias], axis=1)
else:
tf_x = self.tf_input
self.tf_weights = tf.linalg.lstsq(
tf_x, self.tf_y,
l2_regularizer=self.alpha,
fast=True
)
self.tf_trained_weights = tf.placeholder(tf.float64, shape=(None, None))
self.tf_preds = tf.matmul(tf_x, self.tf_trained_weights)
def fit(self, X, y):
if len(y.shape) == 1:
y = np.expand_dims(y, axis=-1)
with tf.Session(graph=self.graph) as sess:
weights = sess.run(
self.tf_weights,
feed_dict={self.tf_input: X, self.tf_y: y}
)
self.weights = weights
def predict(self, X):
with tf.Session(graph=self.graph) as sess:
y_pred = sess.run(
self.tf_preds,
feed_dict={self.tf_input: X, self.tf_trained_weights: self.weights}
)
return y_pred
if __name__ == '__main__':
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
X, y = make_regression(
n_samples=100000,
n_features=1000,
n_informative=1000,
random_state=2019,
bias=5,
noise=10
)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=2019
)
print("Dataset created.")
# Using Scikit-Learn
model = Ridge(alpha=0., solver='cholesky', fit_intercept=True)
model.fit(X_train, y_train)
y_pred_sklearn = model.predict(X_test)
print("Scikit-Learn MSE:", mean_squared_error(y_test, y_pred_sklearn))
# Using Tensorflow Ridge
model = TFRidge(alpha=0., solver='cholesky', fit_intercept=True)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Tensorflow MSE:", mean_squared_error(y_test, y_pred))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment