Created
          July 5, 2020 04:30 
        
      - 
      
- 
        Save seahrh/f81dbefa0e00f8a2082691d339b65506 to your computer and use it in GitHub Desktop. 
    Simple linear regression example (sklearn, L1 regularization)
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import os | |
| import random | |
| import numpy as np | |
| import pandas as pd | |
| from sklearn.linear_model import Lasso | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.model_selection import cross_val_score, GridSearchCV | |
| from sklearn.preprocessing import RobustScaler | |
| from sklearn.decomposition import PCA | |
| from sklearn import metrics | |
| folds = 3 | |
| target = 'target' | |
| names = [f'p{i + 1}' for i in range(500)] | |
| train = pd.read_csv(f'train.csv', header=None, names=names + [target]) | |
| test = pd.read_csv(f'test.csv', header=None, names=names) | |
| train.info() | |
| test.info() | |
| # standardization - tried it but model got worse | |
| #scaler = RobustScaler() | |
| #train[names] = scaler.fit_transform(train[names]) | |
| #test[names] = scaler.transform(test[names]) | |
| y_train = train[target] | |
| x_train = train[names] | |
| x_test = test[names] | |
| # tried dimensionality reduction but model got worse | |
| #pca = PCA(n_components=20) | |
| #x_train = pca.fit_transform(x_train) | |
| #x_test = pca.transform(x_test) | |
| # L1 regularization | |
| model = Lasso() | |
| pipe = Pipeline([('model', model)]) | |
| param_grid = { | |
| 'model__alpha': [1.0], | |
| 'model__max_iter': [1000, 2000] | |
| } | |
| cv = GridSearchCV(pipe, cv=folds, param_grid=param_grid, scoring='neg_mean_absolute_error') | |
| cv.fit(x_train, y_train) | |
| print('best_params_={}\nbest_score_={}'.format(repr(cv.best_params_), repr(cv.best_score_))) | |
| preds = cv.predict(x_test) | |
| submission = pd.DataFrame({target: preds}) | |
| print(submission.head()) | |
| submission.to_csv('prediction.csv', index=False, header=None) | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment