Forked from understar/plot_rbm_logistic_classification.py
Last active
July 29, 2022 05:45
-
-
Save bitsnaps/13d75e7000c9573ad405ecae608e8807 to your computer and use it in GitHub Desktop.
scikit-learn RBM feature extraction and logistic classification
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Update: (07.29.2022): Fix: train_test_split import | |
============================================================== | |
Restricted Boltzmann Machine features for digit classification | |
============================================================== | |
For greyscale image data where pixel values can be interpreted as degrees of | |
blackness on a white background, like handwritten digit recognition, the | |
Bernoulli Restricted Boltzmann machine model (:class:`BernoulliRBM | |
<sklearn.neural_network.BernoulliRBM>`) can perform effective non-linear | |
feature extraction. | |
In order to learn good latent representations from a small dataset, we | |
artificially generate more labeled data by perturbing the training data with | |
linear shifts of 1 pixel in each direction. | |
This example shows how to build a classification pipeline with a BernoulliRBM | |
feature extractor and a :class:`LogisticRegression | |
<sklearn.linear_model.LogisticRegression>` classifier. The hyperparameters | |
of the entire model (learning rate, hidden layer size, regularization) | |
were optimized by grid search, but the search is not reproduced here because | |
of runtime constraints. | |
Logistic regression on raw pixel values is presented for comparison. The | |
example shows that the features extracted by the BernoulliRBM help improve the | |
classification accuracy. | |
""" | |
from __future__ import print_function | |
print(__doc__) | |
# Authors: Yann N. Dauphin, Vlad Niculae, Gabriel Synnaeve | |
# License: BSD | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from scipy.ndimage import convolve | |
from sklearn import linear_model, datasets, metrics | |
from sklearn.model_selection import train_test_split | |
from sklearn.neural_network import BernoulliRBM | |
from sklearn.pipeline import Pipeline | |
############################################################################### | |
# Setting up | |
def nudge_dataset(X, Y): | |
""" | |
This produces a dataset 5 times bigger than the original one, | |
by moving the 8x8 images in X around by 1px to left, right, down, up | |
""" | |
direction_vectors = [ | |
[[0, 1, 0], | |
[0, 0, 0], | |
[0, 0, 0]], | |
[[0, 0, 0], | |
[1, 0, 0], | |
[0, 0, 0]], | |
[[0, 0, 0], | |
[0, 0, 1], | |
[0, 0, 0]], | |
[[0, 0, 0], | |
[0, 0, 0], | |
[0, 1, 0]]] | |
shift = lambda x, w: convolve(x.reshape((8, 8)), mode='constant', | |
weights=w).ravel() | |
X = np.concatenate([X] + | |
[np.apply_along_axis(shift, 1, X, vector) | |
for vector in direction_vectors]) | |
Y = np.concatenate([Y for _ in range(5)], axis=0) | |
return X, Y | |
# Load Data | |
digits = datasets.load_digits() | |
X = np.asarray(digits.data, 'float32') | |
X, Y = nudge_dataset(X, digits.target) | |
X = (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001) # 0-1 scaling | |
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, | |
test_size=0.2, | |
random_state=0) | |
# Models we will use | |
logistic = linear_model.LogisticRegression(random_state=0, solver='lbfgs') | |
rbm = BernoulliRBM(random_state=0, verbose=True) | |
classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)]) | |
############################################################################### | |
# Training | |
# Hyper-parameters. These were set by cross-validation, | |
# using a GridSearchCV. Here we are not performing cross-validation to | |
# save time. | |
rbm.learning_rate = 0.06 | |
rbm.n_iter = 20 | |
# More components tend to give better prediction performance, but larger | |
# fitting time | |
rbm.n_components = 100 | |
logistic.C = 6000.0 | |
# Training RBM-Logistic Pipeline | |
classifier.fit(X_train, Y_train) | |
# Training Logistic regression | |
logistic_classifier = linear_model.LogisticRegression(C=100.0, random_state=0, solver='lbfgs') | |
logistic_classifier.fit(X_train, Y_train) | |
############################################################################### | |
# Evaluation | |
print() | |
print("Logistic regression using RBM features:\n%s\n" % ( | |
metrics.classification_report( | |
Y_test, | |
classifier.predict(X_test)))) | |
print("Logistic regression using raw pixel features:\n%s\n" % ( | |
metrics.classification_report( | |
Y_test, | |
logistic_classifier.predict(X_test)))) | |
############################################################################### | |
# Plotting | |
plt.figure(figsize=(4.2, 4)) | |
for i, comp in enumerate(rbm.components_): | |
plt.subplot(10, 10, i + 1) | |
plt.imshow(comp.reshape((8, 8)), cmap=plt.cm.gray_r, | |
interpolation='nearest') | |
plt.xticks(()) | |
plt.yticks(()) | |
plt.suptitle('100 components extracted by RBM', fontsize=16) | |
plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment