Skip to content

Instantly share code, notes, and snippets.

@nkt1546789
Last active August 25, 2021 22:36
Show Gist options
  • Save nkt1546789/2fd31a08a0f94cdd766259d65cb862dd to your computer and use it in GitHub Desktop.
Save nkt1546789/2fd31a08a0f94cdd766259d65cb862dd to your computer and use it in GitHub Desktop.
An example of regression using kernel density estimation (KDE)
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KernelDensity
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
random_state = 1
n_samples = 200
n_features = 1
n_proxy_samples = 300
offset = 0.1
n_test_samples = 200
X_train, y_train = datasets.make_regression(n_samples=n_samples, n_features=n_features, random_state=1, noise=25.0)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
y_train = StandardScaler().fit_transform(np.c_[y_train]).ravel()
pxy = GridSearchCV(KernelDensity(), param_grid={"bandwidth": np.logspace(-2, 1, 6)}).fit(np.c_[X_train, y_train]).best_estimator_
px = GridSearchCV(KernelDensity(), param_grid={"bandwidth": np.logspace(-2, 1, 6)}).fit(X_train).best_estimator_
X_test = np.c_[np.linspace(X_train.min() - offset, X_train.max() + offset, n_test_samples)]
X_test = scaler.transform(X_test)
ypred = []
sigmas = []
for x in X_test:
y_tmp = stats.norm.rvs(size=n_proxy_samples, random_state=random_state)
logpx = px.score_samples([x])
logpxy = pxy.score_samples(np.c_[np.kron(np.ones((len(y_tmp), 1)), x), y_tmp])
logny = np.log(stats.norm.pdf(y_tmp))
weight = np.exp(logpxy - logpx - logny)
mu = np.mean(y_tmp * weight)
sigma = np.mean((y_tmp - mu)**2 * weight)
ypred.append(mu)
sigmas.append(sigma)
ypred = np.array(ypred)
sigmas = np.array(sigmas)
plt.figure()
plt.scatter(X_train[:, 0], y_train)
plt.plot(X_test[:, 0], ypred, "r-", linewidth=2.0)
#plt.fill_between(X_test[:, 0], ypred - 0.95 * sigmas, ypred + 0.95 * sigmas)
plt.tight_layout()
plt.show()
@nkt1546789
Copy link
Author

figure_1

@thistleknot
Copy link

how does one do this for multiple regression?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment