Last active
September 21, 2020 18:52
-
-
Save kstoneriv3/61c4155b4edda96b140e0025e4b7b70d to your computer and use it in GitHub Desktop.
benchmark code for `MutualInformationImportanceEvaluator`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import lightgbm as lgb | |
import numpy as np | |
import pandas as pd | |
import sklearn.datasets | |
from sklearn.datasets import fetch_openml | |
import sklearn.metrics | |
from sklearn.model_selection import train_test_split | |
import optuna | |
import utils | |
def objective(trial): | |
data = fetch_openml(name="adult") | |
X = data["data"] | |
y = pd.Series(data["target"]).factorize()[0] | |
train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.25) | |
dtrain = lgb.Dataset(train_x, label=train_y) | |
param = { | |
"objective": "binary", | |
"metric": "binary_logloss", | |
"verbosity": -1, | |
"boosting_type": "gbdt", | |
"lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 1), | |
"lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 1), | |
"num_leaves": trial.suggest_int("num_leaves", 2, 256), | |
"feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0), | |
"bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0), | |
"bagging_freq": trial.suggest_int("bagging_freq", 1, 8), | |
"min_child_samples": trial.suggest_int("min_child_samples", 5, 100), | |
} | |
gbm = lgb.train(param, dtrain) | |
preds = gbm.predict(test_x) | |
pred_labels = np.rint(preds) | |
accuracy = sklearn.metrics.accuracy_score(test_y, pred_labels) | |
return accuracy | |
if __name__ == "__main__": | |
study = optuna.create_study( | |
sampler=optuna.samplers.RandomSampler(), | |
study_name="lightgbm-hpi-1", | |
storage="sqlite:///lightgbm.db", | |
load_if_exists=True, | |
direction="maximize", | |
) | |
# study.optimize(objective, n_trials=100) | |
utils.save_benchmark( | |
study, filename="{}_hpi.png".format(__file__.split(".")[0]), title="LightGBM" | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
import torch.optim as optim | |
import torch.utils.data | |
from torchvision import datasets | |
from torchvision import transforms | |
import optuna | |
import utils | |
DEVICE = torch.device("cpu") | |
CLASSES = 10 | |
DIR = os.getcwd() | |
EPOCHS = 20 | |
def define_model(trial): | |
layers = [] | |
in_features = 28 * 28 | |
for i in range(2): | |
out_features = trial.suggest_int("n_units_l{}".format(i), 4, 128) | |
activation = trial.suggest_categorical( | |
"activation_l{}".format(i), choices=["relu", "tanh"] | |
) | |
p = trial.suggest_uniform("dropout_l{}".format(i), 0.2, 0.5) | |
layers.append(nn.Linear(in_features, out_features)) | |
if activation == "relu": | |
layers.append(nn.ReLU()) | |
elif activation == "tanh": | |
layers.append(nn.Tanh()) | |
else: | |
assert False | |
layers.append(nn.Dropout(p)) | |
in_features = out_features | |
layers.append(nn.Linear(in_features, CLASSES)) | |
layers.append(nn.LogSoftmax(dim=1)) | |
return nn.Sequential(*layers) | |
def get_mnist(trial): | |
batch_size = trial.suggest_categorical("batch_size", choices=[8, 16, 32, 64, 128]) | |
train_loader = torch.utils.data.DataLoader( | |
datasets.MNIST(DIR, train=True, download=True, transform=transforms.ToTensor()), | |
batch_size=batch_size, | |
shuffle=True, | |
) | |
test_loader = torch.utils.data.DataLoader( | |
datasets.MNIST(DIR, train=False, transform=transforms.ToTensor()), | |
batch_size=batch_size, | |
shuffle=True, | |
) | |
return train_loader, test_loader | |
def objective(trial): | |
model = define_model(trial).to(DEVICE) | |
optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"]) | |
lr = trial.suggest_loguniform("lr", 1e-5, 1e-1) | |
optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr) | |
train_loader, test_loader = get_mnist(trial) | |
model.train() | |
for epoch in range(EPOCHS): | |
epoch_loss = 0 | |
for batch_idx, (data, target) in enumerate(train_loader): | |
data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE) | |
optimizer.zero_grad() | |
output = model(data) | |
loss = F.nll_loss(output, target) | |
epoch_loss += loss.data | |
loss.backward() | |
optimizer.step() | |
epoch_loss /= len(train_loader.dataset) | |
print("Epoch {}: {}".format(epoch, epoch_loss)) | |
trial.report(epoch_loss, step=epoch) | |
model.eval() | |
correct = 0 | |
with torch.no_grad(): | |
for batch_idx, (data, target) in enumerate(test_loader): | |
data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE) | |
output = model(data) | |
pred = output.argmax(dim=1, keepdim=True) | |
correct += pred.eq(target.view_as(pred)).sum().item() | |
accuracy = correct / len(test_loader.dataset) | |
return accuracy | |
if __name__ == "__main__": | |
study = optuna.create_study( | |
sampler=optuna.samplers.RandomSampler(), | |
study_name="pytorch-hpi", | |
storage="sqlite:///pytorch.db", | |
load_if_exists=True, | |
direction="maximize", | |
) | |
study.optimize(objective, n_trials=100) | |
utils.save_benchmark( | |
study, filename="{}_hpi.png".format(__file__.split(".")[0]), title="PyTorch" | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import lightgbm as lgb | |
import numpy as np | |
from numpy.linalg import norm | |
import pandas as pd | |
import sklearn.datasets | |
from sklearn.datasets import fetch_openml | |
import sklearn.metrics | |
from sklearn.model_selection import train_test_split | |
import optuna | |
import utils | |
D = 3 | |
def objective(trial): | |
x = np.zeros(D) | |
# relevant params | |
for d in range(D): | |
x[d] = trial.suggest_uniform("x{}".format(d), -3, 3) | |
z1 = np.ones(D) | |
z2 = -np.ones(D) | |
out = -2 * np.exp(-norm(x - z1)) - np.exp(-norm(x - z2)) | |
# irrelevant params | |
for d in range(D, 2*D): | |
trial.suggest_uniform("x{}".format(d), -3, 3) | |
return out | |
if __name__ == "__main__": | |
study = optuna.create_study( | |
sampler=optuna.samplers.RandomSampler(), | |
study_name="toy-hpi", | |
storage="sqlite:///lightgbm.db", | |
load_if_exists=True, | |
direction="maximize", | |
) | |
# study.optimize(objective, n_trials=100) | |
utils.save_benchmark( | |
study, filename="{}_hpi.png".format(__file__.split(".")[0]), title="Toy Example (x0-x2 are equally importanct and x3-x5 are irrelevant.)" | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from numpy.linalg import norm | |
import optuna | |
from optuna.importance import MutualInformationImportanceEvaluator | |
from optuna.importance import FanovaImportanceEvaluator | |
from optuna.importance import MeanDecreaseImpurityImportanceEvaluator | |
from optuna.samplers import RandomSampler | |
D = 3 | |
def objective1(trial): | |
x = np.zeros(D) | |
# relevant params | |
for d in range(D): | |
x[d] = trial.suggest_uniform("x{}".format(d), -3, 3) | |
z1 = np.ones(D) | |
z2 = -np.ones(D) | |
out = -2 * np.exp(-norm(x - z1)) - np.exp(-norm(x - z2)) | |
# irrelevant params | |
for d in range(D, 2*D): | |
trial.suggest_uniform("x{}".format(d), -3, 3) | |
return out | |
def main(): | |
study = optuna.create_study(sampler=RandomSampler()) | |
study.optimize(objective1, n_trials=100) | |
evaluator = MutualInformationImportanceEvaluator() | |
mis = evaluator.evaluate(study) | |
stddivs = evaluator.evaluate_stddiv(study) | |
print() | |
print("Mutual Information Estimates") | |
for (name, mi), stddiv in zip(mis.items(), stddivs.values()): | |
print("{}: {} ±{}".format(name, mi, stddiv)) | |
print() | |
evaluator = FanovaImportanceEvaluator() | |
imps = evaluator.evaluate(study) | |
# evaluator = MeanDecreaseImpurityImportanceEvaluator() | |
print("FANOVA Importance Estimates") | |
for name, imp in imps.items(): | |
print("{}: {}".format(name, imp)) | |
print() | |
fig = optuna.visualization.plot_param_importances(study, evaluator=evaluator) | |
fig.show() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment