Last active
August 25, 2021 22:36
-
-
Save nkt1546789/2fd31a08a0f94cdd766259d65cb862dd to your computer and use it in GitHub Desktop.
An example of regression using kernel density estimation (KDE)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from scipy import stats | |
from sklearn.model_selection import GridSearchCV | |
from sklearn.neighbors import KernelDensity | |
from sklearn import datasets | |
from sklearn.preprocessing import StandardScaler | |
random_state = 1 | |
n_samples = 200 | |
n_features = 1 | |
n_proxy_samples = 300 | |
offset = 0.1 | |
n_test_samples = 200 | |
X_train, y_train = datasets.make_regression(n_samples=n_samples, n_features=n_features, random_state=1, noise=25.0) | |
scaler = StandardScaler() | |
X_train = scaler.fit_transform(X_train) | |
y_train = StandardScaler().fit_transform(np.c_[y_train]).ravel() | |
pxy = GridSearchCV(KernelDensity(), param_grid={"bandwidth": np.logspace(-2, 1, 6)}).fit(np.c_[X_train, y_train]).best_estimator_ | |
px = GridSearchCV(KernelDensity(), param_grid={"bandwidth": np.logspace(-2, 1, 6)}).fit(X_train).best_estimator_ | |
X_test = np.c_[np.linspace(X_train.min() - offset, X_train.max() + offset, n_test_samples)] | |
X_test = scaler.transform(X_test) | |
ypred = [] | |
sigmas = [] | |
for x in X_test: | |
y_tmp = stats.norm.rvs(size=n_proxy_samples, random_state=random_state) | |
logpx = px.score_samples([x]) | |
logpxy = pxy.score_samples(np.c_[np.kron(np.ones((len(y_tmp), 1)), x), y_tmp]) | |
logny = np.log(stats.norm.pdf(y_tmp)) | |
weight = np.exp(logpxy - logpx - logny) | |
mu = np.mean(y_tmp * weight) | |
sigma = np.mean((y_tmp - mu)**2 * weight) | |
ypred.append(mu) | |
sigmas.append(sigma) | |
ypred = np.array(ypred) | |
sigmas = np.array(sigmas) | |
plt.figure() | |
plt.scatter(X_train[:, 0], y_train) | |
plt.plot(X_test[:, 0], ypred, "r-", linewidth=2.0) | |
#plt.fill_between(X_test[:, 0], ypred - 0.95 * sigmas, ypred + 0.95 * sigmas) | |
plt.tight_layout() | |
plt.show() |
Author
nkt1546789
commented
Jun 12, 2017
how does one do this for multiple regression?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment