Skip to content

Instantly share code, notes, and snippets.

@subhanshuja
Forked from glennq/adam_lbfgs_compare.py
Created December 27, 2017 03:30
Show Gist options
  • Save subhanshuja/44e156790586c3006a57f93fc8fa1d70 to your computer and use it in GitHub Desktop.
Save subhanshuja/44e156790586c3006a57f93fc8fa1d70 to your computer and use it in GitHub Desktop.
Benchmarks for learning rate updating schemes in MLP
import numpy as np
from time import time
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
h = .02 # step size in the mesh
ESTIMATORS = {
'Adam': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1),
'Adam_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1, early_stopping=True),
'l-bfgs': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='l-bfgs', learning_rate_init=0.01, verbose=1,
tol=1e-4, random_state=1, early_stopping=False),
}
names = ESTIMATORS.keys()
classifiers = ESTIMATORS.values()
def make_datasets(n_samples=100):
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
random_state=1, n_clusters_per_class=1,
n_samples=n_samples)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y)
datasets = [make_moons(noise=0.3, random_state=0, n_samples=n_samples),
make_circles(noise=0.2, factor=0.5, random_state=1,
n_samples=n_samples),
linearly_separable]
return datasets
figure = plt.figure(figsize=(27, 9))
i = 0
# iterate over datasets
sample_sizes = range(100, 1000, 400)
datasets = []
for n_samples in sample_sizes:
datasets += make_datasets(n_samples)
for j, ds in enumerate(datasets):
# preprocess dataset, split into training and test part
X, y = ds
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
# just plot the dataset first
cm = plt.cm.RdBu
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
ax = plt.subplot(len(classifiers) + 1, len(datasets),
i % (len(classifiers) + 1) * len(datasets) + j + 1)
# Plot the training points
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
# and testing points
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
alpha=0.6)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(str(len(y)), fontsize=10)
i += 1
# iterate over classifiers
cnt = 0
for name, clf in zip(names, classifiers):
cnt += 1
ax = plt.subplot(len(classifiers) + 1, len(datasets),
i % (len(classifiers) + 1) * len(datasets) + j + 1)
time_start = time()
clf.fit(X_train, y_train)
train_time = time() - time_start
score = clf.score(X_test, y_test)
# Plot the decision boundary. For that, we will assign a color to
# each point in the mesh [x_min, m_max]x[y_min, y_max].
if hasattr(clf, "decision_function"):
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
else:
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
# Put the result into a color plot
Z = Z.reshape(xx.shape)
ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)
# Plot also the training points
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
# and testing points
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
alpha=0.6)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xticks(())
ax.set_yticks(())
ax.set_title(name, fontsize=10)
ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),
size=15, horizontalalignment='right')
ax.text(xx.min() + .3, yy.min() + .3,
('%.3f' % train_time).lstrip('0'),
size=15, horizontalalignment='left')
i += 1
figure.subplots_adjust(left=.02, right=.98)
plt.show()
"""
Benchmarking adam and lbfgs on Boston dataset
Regression performance:
===========================
Regressor train-time test-time test-score
----------------------------------------------------------------------------
adam 0.3896s 0.0003s 0.8606
l-bfgs 0.5861s 0.0003s 0.8689
adam-early 0.6177s 0.0004s 0.8750
"""
from __future__ import print_function
import numpy as np
from time import time
import argparse
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.neural_network import MLPRegressor
# import some data to play with
def load_data():
dataset = datasets.load_boston()
X = dataset.data # we only take the first two features.
X = StandardScaler().fit_transform(X)
y = dataset.target
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2,
random_state=1)
scaler = StandardScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.transform(test_X)
return train_X, test_X, train_y, test_y
ESTIMATORS = {'adam': MLPRegressor(random_state=1,
hidden_layer_sizes=(100, 100)),
'adam-early': MLPRegressor(random_state=1, early_stopping=True,
hidden_layer_sizes=(100, 100)),
'l-bfgs': MLPRegressor(algorithm='l-bfgs', random_state=1,
hidden_layer_sizes=(100, 100))}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--estimators', nargs="+",
choices=ESTIMATORS.keys() + ['all'], type=str,
default=['adam', 'adam-early', 'l-bfgs'],
help="list of classifiers to benchmark.")
parser.add_argument('--n-jobs', nargs="?", default=1, type=int,
help="Number of concurrently running workers for "
"models that support parallelism.")
parser.add_argument('--random-seed', nargs="?", default=0, type=int,
help="Common seed used by random number generator.")
args = vars(parser.parse_args())
print(__doc__)
X_train, X_test, y_train, y_test = load_data()
print("")
print("Dataset statistics:")
print("===================")
print("%s %d" % ("number of features:".ljust(25), X_train.shape[1]))
print("%s %d" % ("number of classes:".ljust(25), np.unique(y_train).size))
print("%s %s" % ("data type:".ljust(25), X_train.dtype))
print("%s %d (size=%dMB)" % ("number of train samples:".ljust(25),
X_train.shape[0], int(X_train.nbytes / 1e6)))
print("%s %d (size=%dMB)" % ("number of test samples:".ljust(25),
X_test.shape[0], int(X_test.nbytes / 1e6)))
print()
print("Training Estimators")
print("====================")
error, train_time, test_time = {}, {}, {}
if 'all' in args['estimators']:
args['estimators'] = ESTIMATORS.keys()
for name in sorted(args["estimators"]):
print("Training %s ... " % name, end="")
estimator = ESTIMATORS[name]
estimator_params = estimator.get_params()
estimator.set_params(**{p: args["random_seed"]
for p in estimator_params
if p.endswith("random_state")})
if "n_jobs" in estimator_params:
estimator.set_params(n_jobs=args["n_jobs"])
time_start = time()
estimator.fit(X_train, y_train)
train_time[name] = time() - time_start
time_start = time()
y_pred = estimator.predict(X_test)
test_time[name] = time() - time_start
error[name] = estimator.score(X_test, y_test)
print("done")
print()
print("Regression performance:")
print("===========================")
print("{0: <23} {1: >10} {2: >11} {3: >12}"
"".format("Regressor ", "train-time", "test-time",
"test-score"))
print("-" * 76)
for name in sorted(args["estimators"], key=error.get):
print("{0: <24} {1: >10.4f}s {2: >10.4f}s {3: >12.4f}"
"".format(name, train_time[name], test_time[name], error[name]))
print()
"""
Benchmarking adam and lbfgs on Diabetes dataset
Regression performance:
===========================
Regressor train-time test-time test-score
----------------------------------------------------------------------------
adam-early 0.3612s 0.0002s 0.2961
adam 0.4856s 0.0003s 0.3538
l-bfgs 0.4855s 0.0003s 0.4170
"""
from __future__ import print_function
import numpy as np
from time import time
import argparse
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.neural_network import MLPRegressor
# import some data to play with
def load_data():
iris = datasets.load_diabetes()
X = iris.data # we only take the first two features.
y = iris.target
return train_test_split(X, y, test_size=0.2, random_state=1)
ESTIMATORS = {'adam': MLPRegressor(random_state=1,
hidden_layer_sizes=(100, 100)),
'adam-early': MLPRegressor(random_state=1, early_stopping=True,
hidden_layer_sizes=(100, 100)),
'l-bfgs': MLPRegressor(algorithm='l-bfgs', random_state=1,
hidden_layer_sizes=(100, 100))}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--estimators', nargs="+",
choices=ESTIMATORS.keys() + ['all'], type=str,
default=['adam', 'adam-early', 'l-bfgs'],
help="list of classifiers to benchmark.")
parser.add_argument('--n-jobs', nargs="?", default=1, type=int,
help="Number of concurrently running workers for "
"models that support parallelism.")
parser.add_argument('--random-seed', nargs="?", default=0, type=int,
help="Common seed used by random number generator.")
args = vars(parser.parse_args())
print(__doc__)
X_train, X_test, y_train, y_test = load_data()
print("")
print("Dataset statistics:")
print("===================")
print("%s %d" % ("number of features:".ljust(25), X_train.shape[1]))
print("%s %d" % ("number of classes:".ljust(25), np.unique(y_train).size))
print("%s %s" % ("data type:".ljust(25), X_train.dtype))
print("%s %d (size=%dMB)" % ("number of train samples:".ljust(25),
X_train.shape[0], int(X_train.nbytes / 1e6)))
print("%s %d (size=%dMB)" % ("number of test samples:".ljust(25),
X_test.shape[0], int(X_test.nbytes / 1e6)))
print()
print("Training Estimators")
print("====================")
error, train_time, test_time = {}, {}, {}
if 'all' in args['estimators']:
args['estimators'] = ESTIMATORS.keys()
for name in sorted(args["estimators"]):
print("Training %s ... " % name, end="")
estimator = ESTIMATORS[name]
estimator_params = estimator.get_params()
estimator.set_params(**{p: args["random_seed"]
for p in estimator_params
if p.endswith("random_state")})
if "n_jobs" in estimator_params:
estimator.set_params(n_jobs=args["n_jobs"])
time_start = time()
estimator.fit(X_train, y_train)
train_time[name] = time() - time_start
time_start = time()
y_pred = estimator.predict(X_test)
test_time[name] = time() - time_start
error[name] = estimator.score(X_test, y_test)
print("done")
print()
print("Regression performance:")
print("===========================")
print("{0: <23} {1: >10} {2: >11} {3: >12}"
"".format("Regressor ", "train-time", "test-time",
"test-score"))
print("-" * 76)
for name in sorted(args["estimators"], key=error.get):
print("{0: <24} {1: >10.4f}s {2: >10.4f}s {3: >12.4f}"
"".format(name, train_time[name], test_time[name], error[name]))
print()
"""
Classification performance:
===========================
Classifier train-time test-time error-rate
----------------------------------------------------------------------------
adam 1.1049s 0.0010s 0.0167
l-bfgs 0.0910s 0.0008s 0.0306
adam-early 0.1354s 0.0009s 0.0528
"""
from __future__ import print_function
import numpy as np
from time import time
import argparse
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import zero_one_loss
from sklearn.cross_validation import train_test_split
from sklearn.neural_network import MLPClassifier
def load_data():
dataset = datasets.load_digits()
X = dataset.data # we only take the first two features.
X = StandardScaler().fit_transform(X)
y = dataset.target
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2,
random_state=1)
scaler = StandardScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.transform(test_X)
return train_X, test_X, train_y, test_y
ESTIMATORS = {'adam': MLPClassifier(random_state=1),
'adam-early': MLPClassifier(random_state=1, early_stopping=True),
'l-bfgs': MLPClassifier(algorithm='l-bfgs', random_state=1)}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--classifiers', nargs="+",
choices=ESTIMATORS.keys() + ['all'], type=str,
default=['adam', 'adam-early', 'l-bfgs'],
help="list of classifiers to benchmark.")
parser.add_argument('--n-jobs', nargs="?", default=1, type=int,
help="Number of concurrently running workers for "
"models that support parallelism.")
parser.add_argument('--random-seed', nargs="?", default=0, type=int,
help="Common seed used by random number generator.")
args = vars(parser.parse_args())
print(__doc__)
X_train, X_test, y_train, y_test = load_data()
print("")
print("Dataset statistics:")
print("===================")
print("%s %d" % ("number of features:".ljust(25), X_train.shape[1]))
print("%s %d" % ("number of classes:".ljust(25), np.unique(y_train).size))
print("%s %s" % ("data type:".ljust(25), X_train.dtype))
print("%s %d (size=%dMB)" % ("number of train samples:".ljust(25),
X_train.shape[0], int(X_train.nbytes / 1e6)))
print("%s %d (size=%dMB)" % ("number of test samples:".ljust(25),
X_test.shape[0], int(X_test.nbytes / 1e6)))
print()
print("Training Classifiers")
print("====================")
error, train_time, test_time, loss_curve, val_curve = {}, {}, {}, {}, {}
if 'all' in args['classifiers']:
args['classifiers'] = ESTIMATORS.keys()
for name in sorted(args["classifiers"]):
print("Training %s ... " % name, end="")
estimator = ESTIMATORS[name]
estimator_params = estimator.get_params()
estimator.set_params(**{p: args["random_seed"]
for p in estimator_params
if p.endswith("random_state")})
if "n_jobs" in estimator_params:
estimator.set_params(n_jobs=args["n_jobs"])
time_start = time()
estimator.fit(X_train, y_train)
train_time[name] = time() - time_start
time_start = time()
y_pred = estimator.predict(X_test)
test_time[name] = time() - time_start
error[name] = zero_one_loss(y_test, y_pred)
print("done")
print()
print("Classification performance:")
print("===========================")
print("{0: <23} {1: >10} {2: >11} {3: >12}"
"".format("Classifier ", "train-time", "test-time", "error-rate"))
print("-" * 76)
for name in sorted(args["classifiers"], key=error.get):
print("{0: <24} {1: >10.4f}s {2: >10.4f}s {3: >12.4f}"
"".format(name, train_time[name], test_time[name], error[name]))
print()
"""
Classification performance:
===========================
Classifier train-time test-time error-rate
----------------------------------------------------------------------------
adam 0.1003s 0.0002s 0.0333
l-bfgs 0.0344s 0.0001s 0.0333
adam-early 0.0083s 0.0001s 0.5333
"""
from __future__ import print_function
import numpy as np
from time import time
import argparse
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import zero_one_loss
from sklearn.cross_validation import train_test_split
from sklearn.neural_network import MLPClassifier
def load_data():
dataset = datasets.load_iris()
X = dataset.data # we only take the first two features.
X = StandardScaler().fit_transform(X)
y = dataset.target
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2,
random_state=1)
scaler = StandardScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.transform(test_X)
return train_X, test_X, train_y, test_y
ESTIMATORS = {'adam': MLPClassifier(random_state=1),
'adam-early': MLPClassifier(random_state=1, early_stopping=True),
'l-bfgs': MLPClassifier(algorithm='l-bfgs', random_state=1)}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--classifiers', nargs="+",
choices=ESTIMATORS.keys() + ['all'], type=str,
default=['adam', 'adam-early', 'l-bfgs'],
help="list of classifiers to benchmark.")
parser.add_argument('--n-jobs', nargs="?", default=1, type=int,
help="Number of concurrently running workers for "
"models that support parallelism.")
parser.add_argument('--random-seed', nargs="?", default=0, type=int,
help="Common seed used by random number generator.")
args = vars(parser.parse_args())
print(__doc__)
X_train, X_test, y_train, y_test = load_data()
print("")
print("Dataset statistics:")
print("===================")
print("%s %d" % ("number of features:".ljust(25), X_train.shape[1]))
print("%s %d" % ("number of classes:".ljust(25), np.unique(y_train).size))
print("%s %s" % ("data type:".ljust(25), X_train.dtype))
print("%s %d (size=%dMB)" % ("number of train samples:".ljust(25),
X_train.shape[0], int(X_train.nbytes / 1e6)))
print("%s %d (size=%dMB)" % ("number of test samples:".ljust(25),
X_test.shape[0], int(X_test.nbytes / 1e6)))
print()
print("Training Classifiers")
print("====================")
error, train_time, test_time, loss_curve, val_curve = {}, {}, {}, {}, {}
if 'all' in args['classifiers']:
args['classifiers'] = ESTIMATORS.keys()
for name in sorted(args["classifiers"]):
print("Training %s ... " % name, end="")
estimator = ESTIMATORS[name]
estimator_params = estimator.get_params()
estimator.set_params(**{p: args["random_seed"]
for p in estimator_params
if p.endswith("random_state")})
if "n_jobs" in estimator_params:
estimator.set_params(n_jobs=args["n_jobs"])
time_start = time()
estimator.fit(X_train, y_train)
train_time[name] = time() - time_start
time_start = time()
y_pred = estimator.predict(X_test)
test_time[name] = time() - time_start
error[name] = zero_one_loss(y_test, y_pred)
print("done")
print()
print("Classification performance:")
print("===========================")
print("{0: <23} {1: >10} {2: >11} {3: >12}"
"".format("Classifier ", "train-time", "test-time", "error-rate"))
print("-" * 76)
for name in sorted(args["classifiers"], key=error.get):
print("{0: <24} {1: >10.4f}s {2: >10.4f}s {3: >12.4f}"
"".format(name, train_time[name], test_time[name], error[name]))
print()
"""
Benchmarking MLP Performances on 20NewGroup dataset
Classification performance:
===========================
Classifier train-time test-time Accuracy
-------------------------------------------------------------------
MLP_SGD_constant_no_momentum_early 61.9779s 0.1686s 0.0333
MLP_SGD_invscaling_nesterov 143.6211s 0.1713s 0.0506
MLP_SGD_invscaling_nesterov_early 212.4194s 0.1691s 0.0769
MLP_SGD_constant_no_momentum 6882.5231s 0.1800s 0.5842
MLP_SGD_constant_nesterov_early 1871.2481s 0.1793s 0.7302
MLP_SGD_adaptive_nesterov_early 2395.0412s 0.1822s 0.7382
MLP_SGD_constant_nesterov 5725.5833s 0.1799s 0.7649
MLP_SGD_adaptive_nesterov 6263.0604s 0.1733s 0.7678
MLP_SGD_constant_momentum 4174.8977s 0.1666s 0.7678
MLP_Adam 1395.2267s 0.1822s 0.8314
MLP_Adam_early 528.2558s 0.1718s 0.8330
with learning_rate_init=0.1 for sgd:
Classification performance:
===========================
Classifier train-time test-time Accuracy
-------------------------------------------------------------------
MLP_SGD_invscaling_nesterov_early 96.1587s 0.1757s 0.1032
MLP_SGD_invscaling_nesterov 11964.9086s 0.2026s 0.1374
MLP_SGD_constant_no_momentum_early 326.6792s 0.2224s 0.1620
MLP_SGD_constant_nesterov_early 407.4799s 0.2217s 0.6190
MLP_SGD_adaptive_nesterov_early 2152.5739s 0.2308s 0.7338
MLP_SGD_constant_momentum 1169.3004s 0.2609s 0.7617
MLP_SGD_constant_nesterov 1634.3541s 0.2659s 0.7681
MLP_SGD_constant_no_momentum 5292.0535s 0.2432s 0.7747
MLP_SGD_adaptive_nesterov 2166.2128s 0.2393s 0.7781
MLP_Adam 1740.3218s 0.2305s 0.8314
MLP_Adam_early 636.6075s 0.1822s 0.8330
"""
from __future__ import print_function, division
from time import time
import cPickle as pickle
import argparse
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_20newsgroups_vectorized
from sklearn.metrics import accuracy_score
from sklearn.utils.validation import check_array
from sklearn.neural_network import MLPClassifier
def make_plots(loss, val_loss):
non_early = [name for name in loss if not name.endswith('_early')]
early = [name for name in loss if name.endswith('_early')]
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
make_sub_plot({name: loss[name] for name in non_early}, axes.ravel()[0])
for name, ax in zip(early, axes.ravel()[1:]):
make_sub_plot({name[:-6]: loss[name[:-6]], name: loss[name],
name + '_val': val_loss[name]}, ax)
plt.subplots_adjust(hspace=0.45)
plt.subplots_adjust(top=0.8)
plt.show()
def make_sub_plot(loss, ax):
plot_args = [{'c': 'red', 'linestyle': '-'},
{'c': 'green', 'linestyle': '-'},
{'c': 'blue', 'linestyle': '-'},
{'c': 'red', 'linestyle': '--'},
{'c': 'green', 'linestyle': '--'},
{'c': 'blue', 'linestyle': '--'}]
for label, loss_curve, args in zip(loss.keys(), loss.values(), plot_args):
ax.plot(loss_curve, label=label, **args)
if len(loss) > 3:
ax.legend(ax.get_lines(), labels=loss.keys(), loc='center right',
bbox_to_anchor=(0.95, 1.30), fontsize=11)
else:
ax.legend(ax.get_lines(), labels=loss.keys(), loc='center right',
bbox_to_anchor=(1.05, 1.20), fontsize=11)
ESTIMATORS = {
'MLP_SGD_constant_no_momentum': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False),
'MLP_SGD_constant_no_momentum_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False,
early_stopping=True),
'MLP_SGD_constant_momentum': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False),
'MLP_SGD_constant_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1),
'MLP_SGD_constant_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
early_stopping=True),
'MLP_SGD_invscaling_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='invscaling'),
'MLP_SGD_invscaling_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='invscaling', early_stopping=True),
'MLP_SGD_adaptive_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='adaptive'),
'MLP_SGD_adaptive_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='adaptive', early_stopping=True),
'MLP_Adam': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1),
'MLP_Adam_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1, early_stopping=True),
}
###############################################################################
# Data
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-e', '--estimators', nargs="+", required=True,
choices=ESTIMATORS.keys() + ['all'])
args = vars(parser.parse_args())
data_train = fetch_20newsgroups_vectorized(subset="train")
data_test = fetch_20newsgroups_vectorized(subset="test")
X_train = check_array(data_train.data, dtype=np.float32,
accept_sparse="csc")
X_test = check_array(data_test.data, dtype=np.float32, accept_sparse="csr")
y_train = data_train.target
y_test = data_test.target
print("20 newsgroups")
print("=============")
print("X_train.shape = {0}".format(X_train.shape))
print("X_train.format = {0}".format(X_train.format))
print("X_train.dtype = {0}".format(X_train.dtype))
print("X_train density = {0}"
"".format(X_train.nnz / np.product(X_train.shape)))
print("y_train {0}".format(y_train.shape))
print("X_test {0}".format(X_test.shape))
print("X_test.format = {0}".format(X_test.format))
print("X_test.dtype = {0}".format(X_test.dtype))
print("y_test {0}".format(y_test.shape))
print()
print("Classifier Training")
print("===================")
accuracy, train_time, test_time, loss_curve, val_curve = {}, {}, {}, {}, {}
if 'all' in args['estimators']:
args['estimators'] = ESTIMATORS.keys()
for name in sorted(args["estimators"]):
clf = ESTIMATORS[name]
try:
clf.set_params(random_state=0)
except (TypeError, ValueError):
pass
print("Training %s ... " % name, end="")
t0 = time()
clf.fit(X_train, y_train)
train_time[name] = time() - t0
t0 = time()
y_pred = clf.predict(X_test)
test_time[name] = time() - t0
accuracy[name] = accuracy_score(y_test, y_pred)
loss_curve[name] = clf.loss_curve_
val_curve[name] = getattr(clf, 'validation_scores_', [])
print("done")
print()
print("Classification performance:")
print("===========================")
print()
print("%s %s %s %s" % ("Classifier ", "train-time", "test-time",
"Accuracy"))
print("-" * 67)
for name in sorted(accuracy, key=accuracy.get):
print("%s %s %s %s" % (name.ljust(36),
("%.4fs" % train_time[name]).center(10),
("%.4fs" % test_time[name]).center(10),
("%.4f" % accuracy[name]).center(10)))
print()
with open('loss_history_20news.pkl', 'wb') as f:
pickle.dump(loss_curve, f)
with open('val_loss_history_20news.pkl', 'wb') as f:
pickle.dump(val_curve, f)
make_plots(loss_curve, val_curve)
"""
Benchmarking MLP performance on MNIST dataset
Classification performance:
===========================
Classifier train-time test-time error-rate
----------------------------------------------------------------------------
MLP_SGD_constant_momentum 105.07s 0.10s 0.0205
MLP_SGD_adaptive_nesterov 166.20s 0.10s 0.0213
MLP_SGD_constant_nesterov 123.11s 0.11s 0.0219
MLP_Adam 49.43s 0.26s 0.0224
MLP_SGD_constant_no_momentum 532.17s 0.11s 0.0231
MLP_Adam_early 19.61s 0.12s 0.0241
MLP_SGD_adaptive_nesterov_early 57.51s 0.11s 0.0251
MLP_SGD_constant_nesterov_early 29.66s 0.11s 0.0283
MLP_SGD_constant_no_momentum_early 95.10s 0.11s 0.0388
MLP_SGD_invscaling_nesterov 46.28s 0.14s 0.0785
MLP_SGD_invscaling_nesterov_early 17.27s 0.12s 0.0817
with learning_rate_init=0.1 for sgd:
Classification performance:
===========================
Classifier train-time test-time error-rate
----------------------------------------------------------------------------
MLP_SGD_constant_momentum 37.69s 0.13s 0.0170
MLP_SGD_constant_nesterov 48.36s 0.13s 0.0171
MLP_SGD_adaptive_nesterov 91.48s 0.13s 0.0171
MLP_SGD_adaptive_nesterov_early 57.50s 0.12s 0.0197
MLP_SGD_constant_no_momentum 112.04s 0.13s 0.0204
MLP_SGD_constant_nesterov_early 19.50s 0.14s 0.0213
MLP_Adam 55.58s 0.14s 0.0224
MLP_SGD_constant_no_momentum_early 39.07s 0.13s 0.0229
MLP_Adam_early 22.73s 0.13s 0.0241
MLP_SGD_invscaling_nesterov 107.90s 0.15s 0.0304
MLP_SGD_invscaling_nesterov_early 44.23s 0.16s 0.0345
"""
from __future__ import print_function
import os
import cPickle as pickle
from time import time
import argparse
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_mldata
from sklearn.datasets import get_data_home
from sklearn.externals.joblib import Memory
from sklearn.metrics import zero_one_loss
from sklearn.utils import check_array
from sklearn.neural_network import MLPClassifier
# Memoize the data extraction and memory map the resulting
# train / test splits in readonly mode
memory = Memory(os.path.join(get_data_home(), 'mnist_benchmark_data'),
mmap_mode='r')
@memory.cache
def load_data(dtype=np.float32, order='F'):
"""Load the data, then cache and memmap the train/test split"""
######################################################################
# Load dataset
print("Loading dataset...")
data = fetch_mldata('MNIST original')
X = check_array(data['data'], dtype=dtype, order=order)
y = data["target"]
# Normalize features
X = X / 255
# Create train-test split (as [Joachims, 2006])
print("Creating train-test split...")
n_train = 60000
X_train = X[:n_train]
y_train = y[:n_train]
X_test = X[n_train:]
y_test = y[n_train:]
return X_train, X_test, y_train, y_test
def make_plots(loss, val_loss):
non_early = [name for name in loss if not name.endswith('_early')]
early = [name for name in loss if name.endswith('_early')]
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
# not including MLP_SGD_constant_no_momentum because the number of
# iterations is too large
make_sub_plot({name: loss[name] for name in non_early
if name != 'MLP_SGD_constant_no_momentum'}, axes.ravel()[0])
for name, ax in zip(early, axes.ravel()[1:]):
make_sub_plot({name[:-6]: loss[name[:-6]], name: loss[name],
name + '_val': val_loss[name]}, ax)
plt.subplots_adjust(hspace=0.45)
plt.subplots_adjust(top=0.8)
plt.show()
def make_sub_plot(loss, ax):
plot_args = [{'c': 'red', 'linestyle': '-'},
{'c': 'green', 'linestyle': '-'},
{'c': 'blue', 'linestyle': '-'},
{'c': 'red', 'linestyle': '--'},
{'c': 'green', 'linestyle': '--'},
{'c': 'blue', 'linestyle': '--'}]
for label, loss_curve, args in zip(loss.keys(), loss.values(), plot_args):
ax.plot(loss_curve, label=label, **args)
if len(loss) > 3:
ax.legend(ax.get_lines(), labels=loss.keys(), loc='center right',
bbox_to_anchor=(0.95, 1.30), fontsize=11)
else:
ax.legend(ax.get_lines(), labels=loss.keys(), loc='center right',
bbox_to_anchor=(1.05, 1.20), fontsize=11)
ESTIMATORS = {
'MLP_SGD_constant_no_momentum': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False),
'MLP_SGD_constant_no_momentum_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False,
early_stopping=True),
'MLP_SGD_constant_momentum': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False),
'MLP_SGD_constant_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1),
'MLP_SGD_constant_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
early_stopping=True),
'MLP_SGD_invscaling_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='invscaling'),
'MLP_SGD_invscaling_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='invscaling', early_stopping=True),
'MLP_SGD_adaptive_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='adaptive'),
'MLP_SGD_adaptive_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='adaptive', early_stopping=True),
'MLP_Adam': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1),
'MLP_Adam_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1, early_stopping=True),
}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--classifiers', nargs="+",
choices=ESTIMATORS.keys() + ['all'], type=str,
default=['MLP_SGD_constant_no_momentum',
'MLP_SGD_constant_momentum',
'MLP_SGD_constant_nesterov',
'MLP_Adam', 'MLP_Adam_early',
'MLP_SGD_constant_nesterov_early'],
help="list of classifiers to benchmark.")
parser.add_argument('--n-jobs', nargs="?", default=1, type=int,
help="Number of concurrently running workers for "
"models that support parallelism.")
parser.add_argument('--order', nargs="?", default="C", type=str,
choices=["F", "C"],
help="Allow to choose between fortran and C ordered "
"data")
parser.add_argument('--random-seed', nargs="?", default=0, type=int,
help="Common seed used by random number generator.")
args = vars(parser.parse_args())
print(__doc__)
X_train, X_test, y_train, y_test = load_data(order=args["order"])
print("")
print("Dataset statistics:")
print("===================")
print("%s %d" % ("number of features:".ljust(25), X_train.shape[1]))
print("%s %d" % ("number of classes:".ljust(25), np.unique(y_train).size))
print("%s %s" % ("data type:".ljust(25), X_train.dtype))
print("%s %d (size=%dMB)" % ("number of train samples:".ljust(25),
X_train.shape[0], int(X_train.nbytes / 1e6)))
print("%s %d (size=%dMB)" % ("number of test samples:".ljust(25),
X_test.shape[0], int(X_test.nbytes / 1e6)))
print()
print("Training Classifiers")
print("====================")
error, train_time, test_time, loss_curve, val_curve = {}, {}, {}, {}, {}
if 'all' in args['classifiers']:
args['classifiers'] = ESTIMATORS.keys()
for name in sorted(args["classifiers"]):
print("Training %s ... " % name, end="")
estimator = ESTIMATORS[name]
estimator_params = estimator.get_params()
estimator.set_params(**{p: args["random_seed"]
for p in estimator_params
if p.endswith("random_state")})
if "n_jobs" in estimator_params:
estimator.set_params(n_jobs=args["n_jobs"])
time_start = time()
estimator.fit(X_train, y_train)
train_time[name] = time() - time_start
time_start = time()
y_pred = estimator.predict(X_test)
test_time[name] = time() - time_start
error[name] = zero_one_loss(y_test, y_pred)
loss_curve[name] = estimator.loss_curve_
val_curve[name] = getattr(estimator, 'validation_scores_', [])
print("done")
print()
print("Classification performance:")
print("===========================")
print("{0: <39} {1: >10} {2: >11} {3: >12}"
"".format("Classifier ", "train-time", "test-time", "error-rate"))
print("-" * 76)
for name in sorted(args["classifiers"], key=error.get):
print("{0: <40} {1: >10.2f}s {2: >10.2f}s {3: >12.4f}"
"".format(name, train_time[name], test_time[name], error[name]))
print()
with open('loss_history_mnist.pkl', 'wb') as f:
pickle.dump(loss_curve, f)
with open('val_loss_history_mnist.pkl', 'wb') as f:
pickle.dump(val_curve, f)
make_plots(loss_curve, val_curve)
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
h = .02 # step size in the mesh
ESTIMATORS = {
'SGD_constant_no_momentum': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False),
'SGD_constant_no_momentum_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False,
early_stopping=True),
'SGD_constant_momentum': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False),
'SGD_constant_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1),
'SGD_constant_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
early_stopping=True),
'SGD_invscaling_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='invscaling'),
'SGD_invscaling_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='invscaling', early_stopping=True),
'SGD_adaptive_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='adaptive'),
'SGD_adaptive_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='adaptive', early_stopping=True),
'Adam': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1),
'Adam_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1, early_stopping=True),
}
names = ESTIMATORS.keys()
classifiers = ESTIMATORS.values()
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
random_state=1, n_clusters_per_class=1)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y)
datasets = [make_moons(noise=0.3, random_state=0),
make_circles(noise=0.2, factor=0.5, random_state=1),
linearly_separable
]
figure = plt.figure(figsize=(27, 9))
i = 1
# iterate over datasets
for ds in datasets:
# preprocess dataset, split into training and test part
X, y = ds
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
# just plot the dataset first
cm = plt.cm.RdBu
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
# Plot the training points
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
# and testing points
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xticks(())
ax.set_yticks(())
i += 1
# iterate over classifiers
cnt = 0
for name, clf in zip(names, classifiers):
cnt += 1
ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
if hasattr(clf, "decision_function"):
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
else:
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
# Put the result into a color plot
Z = Z.reshape(xx.shape)
ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)
# Plot also the training points
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
# and testing points
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
alpha=0.6)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xticks(())
ax.set_yticks(())
if cnt % 2 == 0:
ax.set_title(name, fontsize=10, y=1.08)
else:
ax.set_title(name, fontsize=10)
ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),
size=15, horizontalalignment='right')
i += 1
figure.subplots_adjust(left=.02, right=.98)
plt.show()
"""
Classification performance:
===========================
Classifier train-time test-time Hamming Loss
-------------------------------------------------------------------
MLP_SGD_constant_no_momentum 5535.1194s 13.0709s 0.0136
MLP_SGD_adaptive_nesterov_early 1295.6843s 11.7958s 0.0139
MLP_SGD_constant_nesterov_early 860.3760s 11.6086s 0.0139
MLP_Adam_early 570.8964s 11.9987s 0.0141
MLP_SGD_adaptive_nesterov 5878.9259s 12.5683s 0.0144
MLP_Adam 2589.0170s 14.1955s 0.0145
MLP_SGD_constant_momentum 2408.1225s 11.9756s 0.0145
MLP_SGD_constant_nesterov 4265.0237s 11.8534s 0.0145
MLP_SGD_invscaling_nesterov 751.3171s 11.2348s 0.0315
MLP_SGD_constant_no_momentum_early 50.1751s 11.1671s 0.0320
MLP_SGD_invscaling_nesterov_early 70.1446s 11.3409s 0.0320
"""
from __future__ import print_function, division
from time import time
import cPickle as pickle
import argparse
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_rcv1
from sklearn.metrics import hamming_loss
from sklearn.utils.validation import check_array
from sklearn.neural_network import MLPClassifier
def make_plots(loss, val_loss):
non_early = [name for name in loss if not name.endswith('_early')]
early = [name for name in loss if name.endswith('_early')]
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
make_sub_plot({name: loss[name] for name in non_early}, axes.ravel()[0])
for name, ax in zip(early, axes.ravel()[1:]):
make_sub_plot({name[:-6]: loss[name[:-6]], name: loss[name],
name + '_val': val_loss[name]}, ax)
plt.subplots_adjust(hspace=0.45)
plt.subplots_adjust(top=0.8)
plt.show()
def make_sub_plot(loss, ax):
plot_args = [{'c': 'red', 'linestyle': '-'},
{'c': 'green', 'linestyle': '-'},
{'c': 'blue', 'linestyle': '-'},
{'c': 'red', 'linestyle': '--'},
{'c': 'green', 'linestyle': '--'},
{'c': 'blue', 'linestyle': '--'}]
for label, loss_curve, args in zip(loss.keys(), loss.values(), plot_args):
ax.plot(loss_curve, label=label, **args)
if len(loss) > 3:
ax.legend(ax.get_lines(), labels=loss.keys(), loc='center right',
bbox_to_anchor=(0.95, 1.30), fontsize=11)
else:
ax.legend(ax.get_lines(), labels=loss.keys(), loc='center right',
bbox_to_anchor=(1.05, 1.20), fontsize=11)
ESTIMATORS = {
'MLP_SGD_constant_no_momentum': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False),
'MLP_SGD_constant_no_momentum_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False,
early_stopping=True),
'MLP_SGD_constant_momentum': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9, verbose=1,
tol=1e-4, random_state=1, nesterovs_momentum=False),
'MLP_SGD_constant_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1),
'MLP_SGD_constant_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
early_stopping=True),
'MLP_SGD_invscaling_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='invscaling'),
'MLP_SGD_invscaling_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='invscaling', early_stopping=True),
'MLP_SGD_adaptive_nesterov': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='adaptive'),
'MLP_SGD_adaptive_nesterov_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='sgd', learning_rate_init=0.01, momentum=0.9,
nesterovs_momentum=True, verbose=1, tol=1e-4, random_state=1,
learning_rate='adaptive', early_stopping=True),
'MLP_Adam': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1),
'MLP_Adam_early': MLPClassifier(
hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
algorithm='adam', learning_rate_init=0.001, verbose=1,
tol=1e-4, random_state=1, early_stopping=True),
}
###############################################################################
# Data
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-e', '--estimators', nargs="+", required=True,
choices=ESTIMATORS.keys() + ['all'])
args = vars(parser.parse_args())
data_train = fetch_rcv1(subset="train", shuffle=True, random_state=1)
data_test = fetch_rcv1(subset="test", shuffle=True, random_state=1)
X_train = check_array(data_train.data, dtype=np.float32,
accept_sparse="csr")
X_test = check_array(data_test.data, dtype=np.float32, accept_sparse="csr")
y_train = data_train.target
y_test = data_test.target
print("rcv1")
print("=============")
print("X_train.shape = {0}".format(X_train.shape))
print("X_train.format = {0}".format(X_train.format))
print("X_train.dtype = {0}".format(X_train.dtype))
print("X_train density = {0}"
"".format(X_train.nnz / np.product(X_train.shape)))
print("y_train {0}".format(y_train.shape))
print("X_test {0}".format(X_test.shape))
print("X_test.format = {0}".format(X_test.format))
print("X_test.dtype = {0}".format(X_test.dtype))
print("y_test {0}".format(y_test.shape))
print()
print("Classifier Training")
print("===================")
if 'all' in args['estimators']:
args['estimators'] = ESTIMATORS.keys()
hmg_loss, train_time, test_time, loss_curve, val_curve = {}, {}, {}, {}, {}
for name in sorted(args["estimators"]):
clf = ESTIMATORS[name]
try:
clf.set_params(random_state=0)
except (TypeError, ValueError):
pass
print("Training %s ... " % name, end="")
t0 = time()
clf.fit(X_train, y_train)
train_time[name] = time() - t0
t0 = time()
y_pred = clf.predict(X_test)
test_time[name] = time() - t0
hmg_loss[name] = hamming_loss(y_test, y_pred)
loss_curve[name] = clf.loss_curve_
val_curve[name] = getattr(clf, 'validation_scores_', [])
print("done")
print()
print("Classification performance:")
print("===========================")
print()
print("%s %s %s %s" % ("Classifier ", "train-time", "test-time",
"Hamming Loss"))
print("-" * 67)
for name in sorted(hmg_loss, key=hmg_loss.get):
print("%s %s %s %s" % (name.ljust(36),
("%.4fs" % train_time[name]).center(10),
("%.4fs" % test_time[name]).center(10),
("%.4f" % hmg_loss[name]).center(10)))
print()
with open('loss_history_rcv1.pkl', 'wb') as f:
pickle.dump(loss_curve, f)
with open('val_loss_history_rcv1.pkl', 'wb') as f:
pickle.dump(val_curve, f)
make_plots(loss_curve, val_curve)
"""
mean std
Sparse: 0.0586632259687 0.00355379776739
np.dot: 0.0561765789986 0.00206648457631
dummy: 0.0553110162417 0.00247213620297
"""
from __future__ import print_function
import sys
from scipy.sparse import issparse
from sklearn.utils.extmath import safe_sparse_dot
import numpy as np
import time
def dummy_dot(a, b):
if issparse(a) or issparse(b):
raise ValueError
else:
return np.dot(a, b)
def compare():
tests = [(np.random.rand(1000,10000), np.random.rand(10000)) for i in range(10)]
start = time.time()
for a, b in tests:
safe_sparse_dot(a, b)
elapsed_sparse = time.time() - start
start = time.time()
for a, b in tests:
np.dot(a, b)
elapsed_npdot = time.time() - start
start = time.time()
for a, b in tests:
dummy_dot(a, b)
elapsed_dummy = time.time() - start
return elapsed_sparse, elapsed_npdot, elapsed_dummy
def main():
times = []
n = 300
for i in range(n):
times.append(compare())
sys.stdout.write('\rFinished {} out of {}'.format(i+1, n))
sys.stdout.flush()
times_sparse, times_npdot, times_dummy = map(np.array, zip(*times))
avg_sparse, std_sparse = times_sparse.mean(), times_sparse.std()
avg_npdot, std_npdot = times_npdot.mean(), times_npdot.std()
avg_dummy, std_dummy = times_dummy.mean(), times_dummy.std()
print()
print(" mean std")
print("Sparse:", avg_sparse, std_sparse)
print("np.dot:", avg_npdot, std_npdot)
print("dummy: ", avg_dummy, std_dummy)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment