Skip to content

Instantly share code, notes, and snippets.

@yakovenkodenis
Created October 15, 2016 17:02
Show Gist options
  • Save yakovenkodenis/7c6ca690a200ffab88ebf79bda27dbe6 to your computer and use it in GitHub Desktop.
Save yakovenkodenis/7c6ca690a200ffab88ebf79bda27dbe6 to your computer and use it in GitHub Desktop.
Grid Search for Locally Weighted Linear Regression
import numpy as np
import pandas as pd
from statistics import mean
from sklearn.grid_search import ParameterGrid
from sklearn.preprocessing import StandardScaler
def getPredict(theta, x):
return np.dot(x, theta)
def getError(h, y):
return np.square(h - y)
def getWeightedLRThetaParams(alpha, n_iterations, x_query, tau, x, y):
theta = np.zeros(shape=x_query.shape)
w = np.array([np.exp(-((x[i] - x_query) @ (x[i] - x_query)) / (2 * tau ** 2)) for i in range(len(x))])
for _ in range(n_iterations):
theta -= alpha * ((w * (getPredict(theta, x) - y)).dot(x) * 2)
return theta
def normalize_dataset(X_train, X_test):
scaler = StandardScaler()
train_scaled = scaler.fit_transform(X_train)
test_scaled = scaler.transform(X_test)
return train_scaled, test_scaled
def create_train_test(data, train_percentage):
data_train, data_test = np.vsplit(data, [int(np.ceil(len(data) * train_percentage))])
X_train, y_train = data_train.ix[:, :-1], data_train.ix[:, [-1]].values
X_test, y_test = data_test.ix[:, :-1], data_test.ix[:, [-1]].values
X_train_norm, X_test_norm = normalize_dataset(X_train, X_test)
X_train_norm = np.insert(X_train_norm, 0, np.ones(len(X_train_norm)), axis=1)
X_test_norm = np.insert(X_test_norm, 0, np.ones(len(X_test_norm)), axis=1)
return X_train_norm, X_test_norm, y_train.ravel(), y_test.ravel()
def perform_grid_search_for_single_instance(X, y, x_query, y_query, list_of_params_sets):
scores = []
for params_set in list_of_params_sets:
learned_theta = getWeightedLRThetaParams(
params_set['alpha'], params_set['n_iterations'], x_query, params_set['tau'], X, y
)
prediction = getPredict(learned_theta, x_query)
scores_tuple = (
learned_theta,
prediction,
getError(prediction, y_query),
params_set
)
scores.append(scores_tuple)
return min(scores, key=lambda score: score[2])
def perform_grid_search(X_train, y_train, X_test, y_test):
params_grid = {
'tau': np.linspace(0.1, 1.0, num=5),
'alpha': np.linspace(0.001, 0.1, num=10),
'n_iterations': np.arange(10, 1000, step=100)
}
list_of_params_sets = list(ParameterGrid(params_grid))
optimal_params_sets = []
for i in range(len(y_test)):
optimal_params_set_for_ith_example = perform_grid_search_for_single_instance(
X_train, y_train, X_test[i], y_test[i], list_of_params_sets
)
optimal_params_sets.append((optimal_params_set_for_ith_example[0], optimal_params_set_for_ith_example[-1]))
optimal_params_sets = np.array(optimal_params_sets)
tuned_mean_theta = np.array([mean(t) for t in zip(*optimal_params_sets[:, 0])])
tuned_mean_params_vals = [mean(p) for p in zip(*(d.values() for d in optimal_params_sets[:, 1]))]
tuned_mean_params = dict(zip(params_grid.keys(), tuned_mean_params_vals))
return tuned_mean_theta, tuned_mean_params
def test():
data = pd.read_csv("prices.csv")
data = data.astype(np.float32)
X_train_intercept, X_test_intercept, y_train, y_test = create_train_test(data, 0.8)
theta, params = perform_grid_search(
X_train_intercept, y_train, X_test_intercept, y_test
)
print('Best theta: ', theta)
print('Best params: ', params)
print(getPredict(theta, X_test_intercept[0]) * 1000)
print(getPredict(theta, X_test_intercept[1]) * 1000)
print(getPredict(theta, X_test_intercept[2]) * 1000)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment