Last active
September 27, 2018 19:34
-
-
Save ethen8181/4fc0d4eb01bd1c8de5912408065c7f0f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from tqdm import trange | |
from scipy.special import expit | |
from sklearn.base import BaseEstimator | |
from copy import deepcopy | |
class NeuralNet(BaseEstimator): | |
""" | |
Neural Network for classification | |
Parameters | |
---------- | |
learning_rate : float | |
learning rate for gradient descent | |
hidden_dims : list of int | |
number of units in the hidden layer, e.g. [30], one hidden layer | |
with 30 units; [50, 50], two hidden layer with 50 units each | |
n_iters : int | |
number of iterations to run the algorithm, a.k.a. epochs | |
activation : str, 'relu' or 'tanh' | |
activation function after the fully connected layer | |
reg : float | |
regularization for the weights | |
initialize : str, 'xavier' or 'normal' | |
weight initialization methods | |
seed : int | |
seed for the randomly initialized weights | |
""" | |
def __init__(self, learning_rate, hidden_dims, n_iters, | |
activation, reg, initialize, seed): | |
self.reg = reg | |
self.seed = seed | |
self.n_iters = n_iters | |
self.initialize = initialize | |
self.activation = activation | |
self.hidden_dims = hidden_dims | |
self.learning_rate = learning_rate | |
def fit(self, X, y): | |
""" | |
Parameters | |
---------- | |
X : 2d numpy array, shape = [n_samples, n_features] | |
The training input samples | |
y : 1d numpy array, shape = [n_samples] | |
the target values, a.k.a class labels in classification | |
""" | |
N, n_features = X.shape | |
n_classes = np.unique(y).shape[0] | |
# initialize random weights, we need to learn these | |
self.biases = [] | |
self.weights = [] | |
dims = [n_features] + self.hidden_dims + [n_classes] | |
rstate = np.random.RandomState(self.seed) | |
for d in range(0, len(dims) - 1): | |
if self.initialize == 'xavier': | |
weight = rstate.normal(0, 2 / dims[d], size = (dims[d], dims[d + 1])) | |
elif self.initialize == 'normal': | |
weight = rstate.randn(dims[d], dims[d + 1]) | |
bias = np.zeros((1, dims[d + 1])) | |
self.weights.append(weight) | |
self.biases.append(bias) | |
# iterate between forward and backpropagation steps to | |
# train the neural network and store the loss and accuracy history | |
self.losses = [] | |
self.accuracies = [] | |
for _ in trange(self.n_iters): | |
proba, caches = self._forward_pass(X) | |
self._backward_pass(proba, caches, y, i) | |
loss = softmax_loss(proba, y, self.weights, self.reg) | |
self.losses.append(loss) | |
y_pred = np.argmax(proba, axis = 1) | |
accuracy = np.sum(y_pred == y) / N | |
self.accuracies.append(accuracy * 100) | |
return self | |
def _forward_pass(self, X): | |
""" | |
feed forward: | |
given the input data, output the softmax probability | |
and a cache list that contains the information needed | |
to do the backpropagation | |
""" | |
f, f_cache = feed_forward(X, self.weights[0], self.biases[0]) | |
caches = [f_cache] | |
for weight, bias in zip(self.weights[1:], self.biases[1:]): | |
activation_forward = ACTIVATION[self.activation]['forward'] | |
a, a_cache = activation_forward(f) | |
f, f_cache = feed_forward(a, weight, bias) | |
caches.append(a_cache) | |
caches.append(f_cache) | |
proba = softmax_forward(f) | |
return proba, caches | |
def _backward_pass(self, proba, caches, y, i): | |
"""backpropagation that updates the weights""" | |
dout = softmax_backward(proba, y) | |
cache = caches.pop() | |
dx, dw, db = feed_backward(dout, cache) | |
dbiases = [db] | |
dweights = [dw] | |
for _ in range(len(caches) // 2): | |
cache = caches.pop() | |
activation_backward = ACTIVATION[self.activation]['backward'] | |
# da = tanh_backward(dx, cache) | |
da = activation_backward(dx, cache) | |
cache = caches.pop() | |
dx, dw, db = feed_backward(da, cache) | |
dbiases.append(db) | |
dweights.append(dw) | |
# regularization | |
dweights = [dw + self.reg * dw for dw in dweights] | |
# update the weights using standard gradient descent, | |
# note that the first element of the dweight corresponds | |
# to the last element of weights | |
w_len = len(self.weights) - 1 | |
for j in range(w_len): | |
self.weights[w_len - j] -= self.learning_rate * dweights[j] | |
self.biases[w_len - j] -= self.learning_rate * dbiases[j] | |
return self | |
def predict(self, X): | |
proba = self.predict_proba(X) | |
y_pred = np.argmax(proba, axis = 1) | |
return y_pred | |
def predict_proba(self, X): | |
proba, _ = self._forward_pass(X) | |
return proba | |
def softmax_loss(proba, y, weights, reg): | |
"""loss is averaged by the number of samples""" | |
N = y.shape[0] | |
# add an epsilon value to prevent taking log of 0 | |
log_proba = -np.log(proba[range(N), y] + 1e-9) | |
data_loss = np.sum(log_proba) / N | |
# regularization for the weights | |
weights_sum = np.sum([np.sum(w ** 2) for w in weights]) | |
reg_loss = 0.5 * reg * weights_sum | |
loss = data_loss + reg_loss | |
return loss | |
def softmax_forward(x): | |
""" | |
compute the softmax of matrix x in a numerically stable way, | |
by substracting each row with the max of each row | |
""" | |
shift_x = x - np.amax(x, axis = 1, keepdims = 1) | |
exp_x = np.exp(shift_x) | |
proba = exp_x / np.sum(exp_x, axis = 1, keepdims = 1) | |
return proba | |
def softmax_backward(proba, y): | |
N = y.shape[0] | |
dx = proba.copy() | |
dx[range(N), y] -= 1 | |
dx /= N | |
return dx | |
def feed_forward(x, w, b): | |
f = x.dot(w) + b | |
f_cache = x, w | |
return f, f_cache | |
def feed_backward(dout, cache): | |
x, w = cache | |
# gradient of w, can be computed by matrix multiplication | |
# with the dout. Just be careful with the dimensions of the output, | |
# e.g. we know that the gradient on the weights dw must be of the | |
# same shape as the w matrix | |
dx = dout.dot(w.T) | |
dw = x.T.dot(dout) | |
db = np.sum(dout, axis = 0) | |
return dx, dw, db | |
def relu_forward(x): | |
a = np.maximum(0, x) | |
a_cache = x | |
return a, a_cache | |
def relu_backward(dout, cache): | |
dx = np.where(cache > 0, dout, 0) | |
return dx | |
def leaky_relu_forward(x): | |
a = np.maximum(0.01 * x, x) | |
a_cache = x | |
return a, a_cache | |
def leaky_relu_backward(dout, cache): | |
dx = np.where(cache > 0, dout, 0.01) | |
return dx | |
def tanh_forward(x): | |
a = np.tanh(x) | |
a_cache = x | |
return a, a_cache | |
def tanh_backward(dout, cache): | |
dx = 1 - cache ** 2 | |
return dx | |
ACTIVATION = {} | |
ACTIVATION['relu'] = { | |
'forward': relu_forward, | |
'backward': relu_backward | |
} | |
ACTIVATION['tanh'] = { | |
'forward': tanh_forward, | |
'backward': tanh_backward | |
} | |
ACTIVATION['leaky_relu'] = { | |
'forward': leaky_relu_forward, | |
'backward': leaky_relu_backward | |
} | |
__all__ = [NeuralNet] | |
if __name__ == '__main__': | |
import matplotlib.pyplot as plt | |
from keras.datasets.mnist import load_data | |
from sklearn.metrics import accuracy_score | |
def plot_info(estimator, X, y, figname = 'history.png'): | |
""" | |
select a random sample from the dataset, | |
visualize the image its corresponding prediction and it's | |
confidence of the prediction, i.e. predicted probability; | |
also visualize the stored loss and accuracy up to the | |
current iteration; the utility function will also store the | |
visualization to disk, change figname to None to not have | |
this behavior | |
""" | |
fig, ax = plt.subplots(1, 3, figsize = (12,3)) | |
# evaluate overall accuracy | |
y_pred = estimator.predict(X) | |
accuracy = accuracy_score(y, y_pred) | |
title = 'Overall accuracy %0.2f' % accuracy | |
# reshape the randomly chosen image to a square | |
i = np.random.choice(X.shape[0]) | |
size = np.sqrt(X.shape[1]).astype(np.int) | |
img = X[i].reshape(size, size) | |
ax[0].imshow(img, cmap = 'gray') | |
# prediction for the randomly chosen image | |
proba = estimator.predict_proba(X[i]) | |
y_pred = np.argmax(proba) | |
title += "\nPrediction: %d confidence=%0.2f" % (y_pred, proba[0][y_pred]) | |
ax[0].set_title(title) | |
ax[0].set_xticks([]) | |
ax[0].set_yticks([]) | |
ax[1].plot(estimator.losses, color = 'blue') | |
ax[1].set_title('Loss') | |
ax[1].set_yscale('log') | |
# aim for 90% accuracy | |
ax[2].plot(estimator.accuracies, color = 'blue') | |
ax[2].axhline(90, color = 'red', linestyle = ':') | |
ax[2].set_title('Accuracy: %0.2f%%' % estimator.accuracies[-1]) | |
# modify the figure size to add a little height, | |
# this prevents some text to be chopped off | |
size = fig.get_size_inches() | |
fig.set_size_inches(size[0], size[1] + 1) | |
if figname is not None: | |
fig.savefig(figname) | |
plt.show() | |
# load mnist dataset, and normalize them | |
(X_train, y_train), (X_test, y_test) = load_data() | |
X_train = X_train.reshape((X_train.shape[0], -1)) / 255.0 | |
X_test = X_test.reshape((X_test.shape[0], -1)) / 255.0 | |
# train neural network model | |
nn_params = { | |
'reg': 0.01, | |
'seed': 1234, | |
'n_iters': 350, | |
'hidden_dims': [512, 512], | |
'learning_rate': 0.01, | |
'activation': 'leaky_relu', | |
'initialize': 'xavier' | |
} | |
nn = NeuralNet(**nn_params) | |
nn.fit(X_train, y_train) | |
plot_info(nn, X_test, y_test) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment