Last active
July 11, 2021 07:03
-
-
Save keisuke-umezawa/4571551203eb02c1653c6cb36c571a4f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import time | |
import lightgbm as lgb | |
import numpy as np | |
import sklearn.datasets | |
import sklearn.metrics | |
from sklearn.model_selection import train_test_split | |
from statistics import mean, stdev | |
import optuna | |
class Profile: | |
def __enter__(self): | |
self.start = time.time() | |
return self | |
def __exit__(self, exc_type, exc_val, exc_tb): | |
self.end = time.time() | |
def get(self): | |
return self.end - self.start | |
# FYI: Objective functions can take additional arguments | |
# (https://optuna.readthedocs.io/en/stable/faq.html#objective-func-additional-args). | |
def objective(trial, interval): | |
data, target = sklearn.datasets.load_breast_cancer(return_X_y=True) | |
train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.25) | |
dtrain = lgb.Dataset(train_x, label=train_y) | |
dvalid = lgb.Dataset(valid_x, label=valid_y) | |
param = { | |
"objective": "binary", | |
"metric": "auc", | |
"verbosity": -1, | |
"boosting_type": "gbdt", | |
"num_iterations": 1000, | |
"lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True), | |
"lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True), | |
"num_leaves": trial.suggest_int("num_leaves", 2, 256), | |
"feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0), | |
"bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0), | |
"bagging_freq": trial.suggest_int("bagging_freq", 1, 7), | |
"min_child_samples": trial.suggest_int("min_child_samples", 5, 100), | |
} | |
# Add a callback for pruning. | |
if interval == 0: | |
gbm = lgb.train( | |
param, dtrain, valid_sets=[dvalid], verbose_eval=False | |
) | |
else: | |
pruning_callback = optuna.integration.LightGBMPruningCallback( | |
trial, "auc", report_interval=interval | |
) | |
gbm = lgb.train( | |
param, dtrain, valid_sets=[dvalid], verbose_eval=False, callbacks=[pruning_callback] | |
) | |
preds = gbm.predict(valid_x) | |
pred_labels = np.rint(preds) | |
accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels) | |
return accuracy | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--n-trials", type=int, nargs="+", default=[100, 500]) | |
parser.add_argument("--n-intervals", type=int, nargs="+", default=[0, 1, 2, 4, 8, 16]) | |
args = parser.parse_args() | |
n_trials = args.n_trials | |
n_intervals = args.n_intervals | |
times = 10 | |
optuna.logging.set_verbosity(optuna.logging.CRITICAL) | |
print("| #trials | #interval | time(sec) | time/trial(sec) |") | |
print("| ------- | --------- | --------- | --------------- |") | |
for n_trial in n_trials: | |
for n_interval in n_intervals: | |
r = [] | |
for _ in range(times): | |
if n_interval == 0: | |
study = optuna.create_study( | |
direction="maximize", | |
) | |
else: | |
study = optuna.create_study( | |
pruner=optuna.pruners.MedianPruner( | |
n_warmup_steps=16, | |
interval_steps=n_interval), | |
direction="maximize", | |
) | |
with Profile() as prof: | |
study.optimize( | |
lambda t: objective(t, n_interval), n_trials=n_trial, gc_after_trial=False | |
) | |
r.append(prof.get()) | |
m = mean(r) | |
s = stdev(r) | |
print(f"| {n_trial} | {n_interval} | {m:.2f}+-{s:.2f} | {m / n_trial:.3f} |") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import time | |
import lightgbm as lgb | |
import numpy as np | |
import sklearn.datasets | |
import sklearn.metrics | |
from sklearn.model_selection import train_test_split | |
from statistics import mean, stdev | |
import optuna | |
class Profile: | |
def __enter__(self): | |
self.start = time.time() | |
return self | |
def __exit__(self, exc_type, exc_val, exc_tb): | |
self.end = time.time() | |
def get(self): | |
return self.end - self.start | |
# FYI: Objective functions can take additional arguments | |
# (https://optuna.readthedocs.io/en/stable/faq.html#objective-func-additional-args). | |
def objective(trial, interval): | |
data, target = sklearn.datasets.load_breast_cancer(return_X_y=True) | |
train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.25) | |
dtrain = lgb.Dataset(train_x, label=train_y) | |
dvalid = lgb.Dataset(valid_x, label=valid_y) | |
param = { | |
"objective": "binary", | |
"metric": "auc", | |
"verbosity": -1, | |
"boosting_type": "gbdt", | |
"num_iterations": 100, | |
"lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True), | |
"lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True), | |
"num_leaves": trial.suggest_int("num_leaves", 2, 256), | |
"feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0), | |
"bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0), | |
"bagging_freq": trial.suggest_int("bagging_freq", 1, 7), | |
"min_child_samples": trial.suggest_int("min_child_samples", 5, 100), | |
} | |
# Add a callback for pruning. | |
if interval == 0: | |
gbm = lgb.train( | |
param, dtrain, valid_sets=[dvalid], verbose_eval=False | |
) | |
else: | |
pruning_callback = optuna.integration.LightGBMPruningCallback( | |
trial, "auc", | |
# report_interval=interval, | |
) | |
gbm = lgb.train( | |
param, dtrain, valid_sets=[dvalid], verbose_eval=False, callbacks=[pruning_callback] | |
) | |
preds = gbm.predict(valid_x) | |
pred_labels = np.rint(preds) | |
accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels) | |
return accuracy | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--n-trials", type=int, nargs="+", default=[10, ]) | |
parser.add_argument("--n-intervals", type=int, nargs="+", default=[0, 1, 2, 4, 8, 16, 32]) | |
args = parser.parse_args() | |
n_trials = args.n_trials | |
n_intervals = args.n_intervals | |
times = 10 | |
optuna.logging.set_verbosity(optuna.logging.CRITICAL) | |
print("| #trials | #interval | time(sec) | time/trial(sec) |") | |
print("| ------- | --------- | --------- | --------------- |") | |
for n_trial in n_trials: | |
for n_interval in n_intervals: | |
r = [] | |
for _ in range(times): | |
if n_interval == 0: | |
study = optuna.create_study( | |
direction="maximize", | |
storage="sqlite:///db.sqlite3", | |
) | |
else: | |
study = optuna.create_study( | |
pruner=optuna.pruners.MedianPruner( | |
n_warmup_steps=16, | |
interval_steps=n_interval), | |
direction="maximize", | |
) | |
with Profile() as prof: | |
study.optimize( | |
lambda t: objective(t, n_interval), n_trials=n_trial, gc_after_trial=False | |
) | |
r.append(prof.get()) | |
m = mean(r) | |
s = stdev(r) | |
print(f"| {n_trial} | {n_interval} | {m:.2f}+-{s:.2f} | {m / n_trial:.3f} |") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment