Last active
June 27, 2019 21:05
-
-
Save xhluca/334c24933e6f4913f4d779b784e71043 to your computer and use it in GitHub Desktop.
A faster implementation of scikit-learn's Ridge, leveraging Tensorflow: https://www.kaggle.com/xhlulu/accelerate-sklearn-regression-using-tensorflow
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import tensorflow as tf | |
class TFRidge: | |
def __init__(self, alpha=1.0, fit_intercept=True, normalize=False, solver='cholesky'): | |
if solver != 'cholesky': | |
raise ValueError('The only accepted solver is currently "cholesky"') | |
self.alpha = alpha | |
self.fit_intercept = fit_intercept | |
self.normalize = normalize | |
self.graph = tf.Graph() | |
self.build_graph() | |
def build_graph(self): | |
with self.graph.as_default(): | |
self.tf_y = tf.placeholder(tf.float64, shape=(None, None)) | |
self.tf_input = tf.placeholder(tf.float64, shape=(None, None)) | |
if self.fit_intercept: | |
tf_bias = tf.ones((tf.shape(self.tf_input)[0], 1), dtype=tf.float64) | |
tf_x = tf.concat([self.tf_input, tf_bias], axis=1) | |
else: | |
tf_x = self.tf_input | |
self.tf_weights = tf.linalg.lstsq( | |
tf_x, self.tf_y, | |
l2_regularizer=self.alpha, | |
fast=True | |
) | |
self.tf_trained_weights = tf.placeholder(tf.float64, shape=(None, None)) | |
self.tf_preds = tf.matmul(tf_x, self.tf_trained_weights) | |
def fit(self, X, y): | |
if len(y.shape) == 1: | |
y = np.expand_dims(y, axis=-1) | |
with tf.Session(graph=self.graph) as sess: | |
weights = sess.run( | |
self.tf_weights, | |
feed_dict={self.tf_input: X, self.tf_y: y} | |
) | |
self.weights = weights | |
def predict(self, X): | |
with tf.Session(graph=self.graph) as sess: | |
y_pred = sess.run( | |
self.tf_preds, | |
feed_dict={self.tf_input: X, self.tf_trained_weights: self.weights} | |
) | |
return y_pred | |
if __name__ == '__main__': | |
import pandas as pd | |
from sklearn.datasets import make_regression | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import Ridge | |
from sklearn.metrics import mean_squared_error | |
X, y = make_regression( | |
n_samples=100000, | |
n_features=1000, | |
n_informative=1000, | |
random_state=2019, | |
bias=5, | |
noise=10 | |
) | |
X_train, X_test, y_train, y_test = train_test_split( | |
X, y, test_size=0.2, random_state=2019 | |
) | |
print("Dataset created.") | |
# Using Scikit-Learn | |
model = Ridge(alpha=0., solver='cholesky', fit_intercept=True) | |
model.fit(X_train, y_train) | |
y_pred_sklearn = model.predict(X_test) | |
print("Scikit-Learn MSE:", mean_squared_error(y_test, y_pred_sklearn)) | |
# Using Tensorflow Ridge | |
model = TFRidge(alpha=0., solver='cholesky', fit_intercept=True) | |
model.fit(X_train, y_train) | |
y_pred = model.predict(X_test) | |
print("Tensorflow MSE:", mean_squared_error(y_test, y_pred)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment