Last active
July 22, 2017 18:15
-
-
Save Erlemar/cb431b88d1d64e163601c68a530b8e9c to your computer and use it in GitHub Desktop.
FNN.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"\n", | |
"class TwoLayerNet(object):\n", | |
"\t\"\"\"\n", | |
"\tA two-layer fully-connected neural network. The net has an input dimension of\n", | |
"\tN, a hidden layer dimension of H, and performs classification over C classes.\n", | |
"\tWe train the network with a softmax loss function and L2 regularization on the\n", | |
"\tweight matrices. The network uses a ReLU nonlinearity after the first fully\n", | |
"\tconnected layer.\n", | |
"\tIn other words, the network has the following architecture:\n", | |
"\tinput - fully connected layer - ReLU - fully connected layer - softmax\n", | |
"\tThe outputs of the second fully-connected layer are the scores for each class.\n", | |
"\t\"\"\"\n", | |
"\n", | |
"\tdef __init__(self, input_size, hidden_size, output_size, std):\n", | |
"\t\t\"\"\"\n", | |
"\t\tInitialize the model. Weights are initialized following Xavier intialization and\n", | |
"\t\tbiases are initialized to zero. Weights and biases are stored in the\n", | |
"\t\tvariable self.params, which is a dictionary with the following keys:\n", | |
"\t\tW1: First layer weights; has shape (D, H)\n", | |
"\t\tb1: First layer biases; has shape (H,)\n", | |
"\t\tW2: Second layer weights; has shape (H, C)\n", | |
"\t\tb2: Second layer biases; has shape (C,)\n", | |
"\t\tInputs:\n", | |
"\t\t- input_size: The dimension D of the input data.\n", | |
"\t\t- hidden_size: The number of neurons H in the hidden layer.\n", | |
"\t\t- output_size: The number of classes C.\n", | |
"\t\t\"\"\"\n", | |
"\t\tself.params = {}\n", | |
"\t\tself.params['W1'] = ((2 / input_size) ** 0.5) * np.random.randn(input_size, hidden_size)\n", | |
"\t\tself.params['b1'] = np.zeros(hidden_size)\n", | |
"\t\tself.params['W2'] = ((2 / hidden_size) ** 0.5) * np.random.randn(hidden_size, output_size)\n", | |
"\t\tself.params['b2'] = np.zeros(output_size)\n", | |
"\n", | |
"\tdef loss(self, X, y=None, reg=0.0):\n", | |
"\t\t\"\"\"\n", | |
"\t\tCompute the loss and gradients for a two layer fully connected neural\n", | |
"\t\tnetwork.\n", | |
"\t\tInputs:\n", | |
"\t\t- X: Input data of shape (N, D). Each X[i] is a training sample.\n", | |
"\t\t- y: Vector of training labels. y[i] is the label for X[i], and each y[i] is\n", | |
"\t\t an integer in the range 0 <= y[i] < C. This parameter is optional; if it\n", | |
"\t\t is not passed then we only return scores, and if it is passed then we\n", | |
"\t\t instead return the loss and gradients.\n", | |
"\t\t- reg: Regularization strength.\n", | |
"\t\tReturns:\n", | |
"\t\t- loss: Loss (data loss and regularization loss) for this batch of training\n", | |
"\t\t samples.\n", | |
"\t\t- grads: Dictionary mapping parameter names to gradients of those parameters\n", | |
"\t\t with respect to the loss function; has the same keys as self.params.\n", | |
"\t\t\"\"\"\n", | |
"\t\t# Unpack variables from the params dictionary\n", | |
"\t\tW1, b1 = self.params['W1'], self.params['b1']\n", | |
"\t\tW2, b2 = self.params['W2'], self.params['b2']\n", | |
"\t\tN, D = X.shape\n", | |
"\n", | |
"\t\t# Compute the forward pass\n", | |
"\t\tl1 = X.dot(W1) + b1\n", | |
"\t\tl1[l1 < 0] = 0\n", | |
"\t\tl2 = l1.dot(W2) + b2\n", | |
"\t\texp_scores = np.exp(l2)\n", | |
"\t\tprobs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)\n", | |
"\t\tscores = l2\n", | |
"\n", | |
"\t\t# Compute the loss\n", | |
"\n", | |
"\t\tW1_r = 0.5 * reg * np.sum(W1 * W1)\n", | |
"\t\tW2_r = 0.5 * reg * np.sum(W2 * W2)\n", | |
"\n", | |
"\t\tloss = -np.sum(np.log(probs[range(y.shape[0]), y])) / N + W1_r + W2_r\n", | |
"\n", | |
"\n", | |
"\t\t# Backward pass: compute gradients\n", | |
"\t\tgrads = {}\n", | |
"\t\t\n", | |
"\t\tprobs[range(X.shape[0]),y] -= 1\n", | |
"\t\tdW2 = np.dot(l1.T, probs)\n", | |
"\t\tdW2 /= X.shape[0]\n", | |
"\t\tdW2 += reg * W2\n", | |
"\t\tgrads['W2'] = dW2\n", | |
"\t\tgrads['b2'] = np.sum(probs, axis=0, keepdims=True) / X.shape[0]\n", | |
"\t\t\n", | |
"\t\tdelta = probs.dot(W2.T)\n", | |
"\t\tdelta = delta * (l1 > 0)\n", | |
"\t\tgrads['W1'] = np.dot(X.T, delta)/ X.shape[0] + reg * W1\n", | |
"\t\tgrads['b1'] = np.sum(delta, axis=0, keepdims=True) / X.shape[0]\n", | |
"\n", | |
"\t\treturn loss, grads\n", | |
"\n", | |
"\tdef train(self, X, y, X_val, y_val,\n", | |
" learning_rate=1e-3, learning_rate_decay=0.95,\n", | |
" reg=5e-6, num_iters=100,\n", | |
" batch_size=24, verbose=False):\n", | |
"\t\t\"\"\"\n", | |
"\t\tTrain this neural network using stochastic gradient descent.\n", | |
"\t\tInputs:\n", | |
"\t\t- X: A numpy array of shape (N, D) giving training data.\n", | |
"\t\t- y: A numpy array f shape (N,) giving training labels; y[i] = c means that\n", | |
"\t\t X[i] has label c, where 0 <= c < C.\n", | |
"\t\t- X_val: A numpy array of shape (N_val, D) giving validation data.\n", | |
"\t\t- y_val: A numpy array of shape (N_val,) giving validation labels.\n", | |
"\t\t- learning_rate: Scalar giving learning rate for optimization.\n", | |
"\t\t- learning_rate_decay: Scalar giving factor used to decay the learning rate\n", | |
"\t\t after each epoch.\n", | |
"\t\t- reg: Scalar giving regularization strength.\n", | |
"\t\t- num_iters: Number of steps to take when optimizing.\n", | |
"\t\t- batch_size: Number of training examples to use per step.\n", | |
"\t\t- verbose: boolean; if true print progress during optimization.\n", | |
"\t\t\"\"\"\n", | |
"\t\tnum_train = X.shape[0]\n", | |
"\t\titerations_per_epoch = max(num_train / batch_size, 1)\n", | |
"\n", | |
"\t\t# Use SGD to optimize the parameters in self.model\n", | |
"\t\tloss_history = []\n", | |
"\t\ttrain_acc_history = []\n", | |
"\t\tval_acc_history = []\n", | |
"\n", | |
"\t\tfor it in range(num_iters):\n", | |
"\t\t\tindexes = np.random.choice(X.shape[0], batch_size, replace=True)\n", | |
"\t\t\tX_batch = X[indexes]\n", | |
"\t\t\ty_batch = y[indexes]\n", | |
"\t\t\t# Compute loss and gradients using the current minibatch\n", | |
"\t\t\tloss, grads = self.loss(X_batch, y=y_batch, reg=reg)\n", | |
"\t\t\tloss_history.append(loss)\n", | |
"\n", | |
"\n", | |
"\t\t\tself.params['W1'] -= learning_rate * grads['W1']\n", | |
"\t\t\tself.params['b1'] -= learning_rate * grads['b1'][0]\n", | |
"\t\t\tself.params['W2'] -= learning_rate * grads['W2']\n", | |
"\t\t\tself.params['b2'] -= learning_rate * grads['b2'][0]\n", | |
"\n", | |
"\t\t\tif verbose and it % 100 == 0:\n", | |
"\t\t\t\tprint('iteration %d / %d: loss %f' % (it, num_iters, loss))\n", | |
"\n", | |
"\t\t\t# Every epoch, check train and val accuracy and decay learning rate.\n", | |
"\t\t\tif it % iterations_per_epoch == 0:\n", | |
"\t\t\t\t# Check accuracy\n", | |
"\t\t\t\ttrain_acc = (self.predict(X_batch) == y_batch).mean()\n", | |
"\t\t\t\tval_acc = (self.predict(X_val) == y_val).mean()\n", | |
"\t\t\t\ttrain_acc_history.append(train_acc)\n", | |
"\t\t\t\tval_acc_history.append(val_acc)\n", | |
"\n", | |
"\t\t\t\t# Decay learning rate\n", | |
"\t\t\t\tlearning_rate *= learning_rate_decay\n", | |
"\n", | |
"\t\treturn {\n", | |
"\t\t 'loss_history': loss_history,\n", | |
"\t\t 'train_acc_history': train_acc_history,\n", | |
"\t\t 'val_acc_history': val_acc_history,\n", | |
"\t\t}\n", | |
"\n", | |
"\tdef predict(self, X):\n", | |
"\t\t\"\"\"\n", | |
"\t\tUse the trained weights of this two-layer network to predict labels for\n", | |
"\t\tdata points. For each data point we predict scores for each of the C\n", | |
"\t\tclasses, and assign each data point to the class with the highest score.\n", | |
"\t\tInputs:\n", | |
"\t\t- X: A numpy array of shape (N, D) giving N D-dimensional data points to\n", | |
"\t\t classify.\n", | |
"\t\tReturns:\n", | |
"\t\t- y_pred: A numpy array of shape (N,) giving predicted labels for each of\n", | |
"\t\t the elements of X. For all i, y_pred[i] = c means that X[i] is predicted\n", | |
"\t\t to have class c, where 0 <= c < C.\n", | |
"\t\t\"\"\"\n", | |
"\t\tl1 = X.dot(self.params['W1']) + self.params['b1']\n", | |
"\t\tl1[l1 < 0] = 0\n", | |
"\t\tl2 = l1.dot(self.params['W2']) + self.params['b2']\n", | |
"\t\texp_scores = np.exp(l2)\n", | |
"\t\tprobs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)\n", | |
"\t\ty_pred = np.argmax(probs, axis=1)\n", | |
"\n", | |
"\t\treturn y_pred\n", | |
"\n", | |
"\tdef predict_single(self, X):\n", | |
"\t\t\"\"\"\n", | |
"\t\tUse the trained weights of this two-layer network to predict labels for\n", | |
"\t\tdata point. We predict scores for each of the C\n", | |
"\t\tclasses, and assign еру data point to the class with the highest score.\n", | |
"\t\tInputs:\n", | |
"\t\t- X: A numpy array of shape (N, D) giving N D-dimensional data points to\n", | |
"\t\t classify.\n", | |
"\t\tReturns:\n", | |
"\t\t- y_pred: A numpy array of shape (1,) giving predicted labels for X.\n", | |
"\t\t\"\"\"\n", | |
"\t\tl1 = X.dot(self.params['W1']) + self.params['b1']\n", | |
"\t\tl1[l1 < 0] = 0\n", | |
"\t\tl2 = l1.dot(self.params['W2']) + self.params['b2']\n", | |
"\t\texp_scores = np.exp(l2)\n", | |
"\t\ty_pred = np.argmax(exp_scores)\n", | |
"\t\t\n", | |
"\t\treturn y_pred" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [Root]", | |
"language": "python", | |
"name": "Python [Root]" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment