This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def feature_matrix_from_entityset(es_dict, feature_defs): | |
"""Run deep feature synthesis from an entityset and feature definitions. | |
Saves feature matrix based on partition number.""" | |
# Extract the entityset | |
es = es_dict['es'] | |
# Calculate the feature matrix and save | |
feature_matrix = ft.calculate_feature_matrix(feature_definitions, | |
entityset=es, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def total_previous_month(numeric, datetime, time): | |
"""Return total of `numeric` column in the month prior to `time`.""" | |
df = pd.DataFrame({'value': numeric, 'time': datetime}) | |
previous_month = time.month - 1 | |
# Handle January | |
if previous_month == 0: | |
previous_month = 12 | |
previous_year = time.year - 1 | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
# Hyperparameter grid | |
param_grid = { | |
'boosting_type': ['gbdt', 'goss', 'dart'], | |
'num_leaves': list(range(20, 150)), | |
'learning_rate': list(np.logspace(np.log10(0.005), np.log10(0.5), base = 10, num = 1000)), | |
'subsample_for_bin': list(range(20000, 300000, 20000)), | |
'min_child_samples': list(range(20, 500, 5)), | |
'reg_alpha': list(np.linspace(0, 1)), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import lightgbm as lgb | |
def objective(hyperparameters, iteration): | |
"""Objective function for grid and random search. Returns | |
the cross validation score from a set of hyperparameters.""" | |
# Number of estimators will be found using early stopping | |
if 'n_estimators' in hyperparameters.keys(): | |
del hyperparameters['n_estimators'] | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from hyperopt import fmin | |
MAX_EVALS = 500 | |
# Optimize | |
best = fmin(fn = objective, space = space, algo = tpe.suggest, | |
max_evals = MAX_EVALS, trials = bayes_trials) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Read in results and sort with best on top | |
results = pd.read_csv('gbm_trials.csv') | |
results.sort_values('loss', ascending = True, inplace = True) | |
# Extract the ideal number of estimators and hyperparameters | |
best_bayes_estimators = int(results.iloc[0, 4]) | |
best_bayes_params = results.iloc[0, 1] | |
# Re-create the best model and train on the training data | |
best_bayes_model = lgb.LGBMClassifier(**best_bayes_params, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Write to the csv file ('a' means append) | |
of_connection = open(out_file, 'a') | |
writer = csv.writer(of_connection) | |
writer.writerow([loss, params, iteration, n_estimators, run_time]) | |
of_connection.close() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
# File to save first results | |
out_file = 'gbm_trials.csv' | |
of_connection = open(out_file, 'w') | |
writer = csv.writer(of_connection) | |
# Write the headers to the file | |
writer.writerow(['loss', 'params', 'iteration', 'estimators', 'train_time']) | |
of_connection.close() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Sample from the full space | |
example = sample(space) | |
# Dictionary get method with default | |
subsample = example['boosting_type'].get('subsample', 1.0) | |
# Assign top-level keys | |
example['boosting_type'] = example['boosting_type']['boosting_type'] | |
example['subsample'] = subsample |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# boosting type domain | |
boosting_type = {'boosting_type': hp.choice('boosting_type', | |
[{'boosting_type': 'gbdt', | |
'subsample': hp.uniform('subsample', 0.5, 1)}, | |
{'boosting_type': 'dart', | |
'subsample': hp.uniform('subsample', 0.5, 1)}, | |
{'boosting_type': 'goss', | |
'subsample': 1.0}])} |