Last active
          August 25, 2021 22:36 
        
      - 
      
- 
        Save nkt1546789/2fd31a08a0f94cdd766259d65cb862dd to your computer and use it in GitHub Desktop. 
    An example of regression using kernel density estimation (KDE)
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import numpy as np | |
| import matplotlib.pyplot as plt | |
| from scipy import stats | |
| from sklearn.model_selection import GridSearchCV | |
| from sklearn.neighbors import KernelDensity | |
| from sklearn import datasets | |
| from sklearn.preprocessing import StandardScaler | |
| random_state = 1 | |
| n_samples = 200 | |
| n_features = 1 | |
| n_proxy_samples = 300 | |
| offset = 0.1 | |
| n_test_samples = 200 | |
| X_train, y_train = datasets.make_regression(n_samples=n_samples, n_features=n_features, random_state=1, noise=25.0) | |
| scaler = StandardScaler() | |
| X_train = scaler.fit_transform(X_train) | |
| y_train = StandardScaler().fit_transform(np.c_[y_train]).ravel() | |
| pxy = GridSearchCV(KernelDensity(), param_grid={"bandwidth": np.logspace(-2, 1, 6)}).fit(np.c_[X_train, y_train]).best_estimator_ | |
| px = GridSearchCV(KernelDensity(), param_grid={"bandwidth": np.logspace(-2, 1, 6)}).fit(X_train).best_estimator_ | |
| X_test = np.c_[np.linspace(X_train.min() - offset, X_train.max() + offset, n_test_samples)] | |
| X_test = scaler.transform(X_test) | |
| ypred = [] | |
| sigmas = [] | |
| for x in X_test: | |
| y_tmp = stats.norm.rvs(size=n_proxy_samples, random_state=random_state) | |
| logpx = px.score_samples([x]) | |
| logpxy = pxy.score_samples(np.c_[np.kron(np.ones((len(y_tmp), 1)), x), y_tmp]) | |
| logny = np.log(stats.norm.pdf(y_tmp)) | |
| weight = np.exp(logpxy - logpx - logny) | |
| mu = np.mean(y_tmp * weight) | |
| sigma = np.mean((y_tmp - mu)**2 * weight) | |
| ypred.append(mu) | |
| sigmas.append(sigma) | |
| ypred = np.array(ypred) | |
| sigmas = np.array(sigmas) | |
| plt.figure() | |
| plt.scatter(X_train[:, 0], y_train) | |
| plt.plot(X_test[:, 0], ypred, "r-", linewidth=2.0) | |
| #plt.fill_between(X_test[:, 0], ypred - 0.95 * sigmas, ypred + 0.95 * sigmas) | |
| plt.tight_layout() | |
| plt.show() | 
      
      
  Author
  
  
        
      
            nkt1546789
  
      
      
      commented 
        Jun 12, 2017 
      
    
  

how does one do this for multiple regression?
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment