This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import pandas as pd | |
import numpy as np | |
import fbprophet as fbpro | |
import sklearn.metrics as skm | |
import math | |
import datetime as dt | |
class ProphetModeller(object): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Project/Contest: Recruit Restaurant Visitor Forecasting (https://www.kaggle.com/c/recruit-restaurant-visitor-forecasting) | |
# | |
# Summary: this is a basic pre-processing and feature engineering script to transform original input data from the Customer | |
# into a ready-for modelling training and testing sets | |
# | |
# inspirations: | |
# - https://www.kaggle.com/the1owl/surprise-me/ | |
import numpy as np | |
import pandas as pd |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sparsity_ratio(X): | |
return 1.0 - np.count_nonzero(X) / float(X.shape[0] * X.shape[1]) | |
print("input sparsity ratio:", sparsity_ratio(X)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# GBM prediction | |
import numpy as np | |
import pandas as pd | |
from sklearn import * | |
import datetime as dt | |
def RMSLE(y, pred): | |
return metrics.mean_squared_error(y, pred) ** 0.5 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# GBM prediction | |
import numpy as np | |
import pandas as pd | |
from sklearn import * | |
import datetime as dt | |
def RMSLE(y, pred): | |
return metrics.mean_squared_error(y, pred) ** 0.5 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import pdpipe as pdp | |
# ... data reading code goes here | |
# set up a transformation pipeline | |
pipeline_1 = pdp.ApplyByCols( | |
['lat', 'lon', 'lat_inspection_location', 'lon_inspection_location'], | |
lambda col: pd.to_numeric(col) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_selection import RFE | |
# Define dictionary to store our rankings | |
ranks = {} | |
# Create our function which stores the feature rankings to the ranks dictionary | |
def ranking(ranks, names, order=1): | |
minmax = MinMaxScaler() | |
ranks = minmax.fit_transform(order*np.array([ranks]).T).T[0] | |
ranks = map(lambda x: round(x,2), ranks) | |
return dict(zip(names, ranks)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_selection import SelectFromModel | |
embeded_rf_selector = SelectFromModel(modeller, max_features=200) | |
embeded_rf_selector.fit(X, y) | |
embeded_rf_support = embeded_rf_selector.get_support() | |
embeded_rf_feature = X.loc[:,embeded_rf_support].columns.tolist() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.inspection import permutation_importance | |
# Here's how you use permutation importance | |
def get_permutation_importance(X, y, model) -> pd.DataFrame: | |
result = permutation_importance(model, X, y, n_repeats=1, | |
random_state=0) | |
# permutational importance results | |
result_df = pd.DataFrame(colnames, columns=['Feature']) | |
result_df['permutation_importance'] = result.get('importances') |