Last active
May 28, 2023 13:33
-
-
Save Nikolaj-K/2e3ee0ebc5bd99d7449bb958e8effa4e to your computer and use it in GitHub Desktop.
A ff neural network in vanilla Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A compact feedforward AI. Script explained here: | |
https://youtu.be/z2aq21lMw40 | |
In this video we're implementing a feed forward neutral network that is able to discern | |
handwritten digits in vanilla Python. | |
References: | |
http://static.latexstudio.net/article/2018/0912/neuralnetworksanddeeplearning.pdf # deep learning book | |
https://en.wikipedia.org/wiki/MNIST_database # mnist data set | |
https://en.wikipedia.org/wiki/Stochastic_del_descent # SGD | |
https://en.wikipedia.org/wiki/Feedforward_neural_network # NN | |
http://neuralnetworksanddeeplearning.com/images/tikz12.png # image of the network architecture used in the video | |
https://www.youtube.com/results?search_query=3blue1brown+deep+learning+chapter | |
https://youtu.be/X1mo7Uwvzn8?t=145 # on sigmoid curves (Nikolaj-K) | |
Notions present in the book but not touched upon in this script: | |
- improved initialization of network weights | |
- improved cost function | |
- regularization | |
- convolution | |
- max pooling | |
- other activation functions than sigmoid | |
- rectified linear units | |
- softmax | |
- external library speedup | |
""" | |
import gzip | |
import numpy as np | |
import pickle | |
import time | |
import warnings; warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) # np.array cast non-niceness | |
class IOPair: | |
""" | |
4 functions that ought to be implemented | |
E.g. if the data type is an image together with a label, _get_input might | |
return the pixels and _correct might validate correct classifications by the network | |
""" | |
def get_input(self): | |
return self._get_input() | |
def output_ground_truth(self): | |
return self._output_ground_truth() | |
def show(self, activation_prediction) -> None: | |
self._show(activation_prediction) | |
def correct(self, activation_prediction) -> bool: | |
gt = self._output_ground_truth() | |
ap_formatted = self._to_output_format(activation_prediction) | |
return ap_formatted == gt | |
class Math: | |
def sigmoid(z): # Smooth step. | |
return (1 + np.exp(-z)) ** -1 | |
def dsigmoid(z): # Derivative of a smooth step. | |
s = Math.sigmoid(z) | |
return s * (1 - s) | |
def zeros(np_array): | |
return [np.zeros(x.shape) for x in np_array] | |
class NeuralNetwork: | |
""" | |
Feedforward neural | |
""" | |
def __init__(self, input_layer_size, hidden_layer_sizes, output_layer_size): | |
""" | |
Initializing a network archetecture. | |
1. input layer | |
2. output layer according to this functions parameter | |
3. outputer layer | |
Weights are connections. Compute them by zipping over the dimensions. | |
""" | |
self.__output_layer_size = output_layer_size | |
in_layers_sizes = [input_layer_size] + hidden_layer_sizes | |
out_layers_sizes = hidden_layer_sizes + [output_layer_size] | |
self.__weights = [np.random.randn(*sizes) for sizes in zip(out_layers_sizes, in_layers_sizes)] | |
self.__biases = [np.random.randn(size, 1) for size in out_layers_sizes] # sampling from a normal distirbution | |
# The elements are pairs of sizes of connected layers (number of outgoing resp. ingoing neurons.) | |
def __feed_forward(self, data): | |
""" | |
Propagate the activations, from data all through to the prediction. | |
""" | |
a = data # Activation | |
as_ = [a] # Layer by layer | |
zs = [] # List to store all the z vectors, layer by layer | |
for w, b in zip(self.__weights, self.__biases): | |
z = np.dot(w, a) + b | |
a = Math.sigmoid(z) | |
zs.append(z) | |
as_.append(a) # Derivable from data and zs | |
return zs, as_ | |
def predicts_correctly(self, in_out_pair, show=False): | |
""" | |
Wrap the feed forward call to return a prediction & success pair. | |
""" | |
_zs, _as = self.__feed_forward(in_out_pair.get_input()) | |
activation_prediction = _as[-1] | |
if show: | |
in_out_pair.show(activation_prediction) | |
return in_out_pair.correct(activation_prediction) | |
def run_sgd(self, epochs, batch_size, eta, training_data): | |
""" | |
Run the stochastic gradient descent algorithm: | |
In epochs (iterations), put the training data set in random batches, | |
and update the weights and biases for the purpose of improving the | |
delta between predicted and the ground turth classifications. | |
See formulas in any deep learning book! | |
Note: Makes use of the normalized mini batch gradient. | |
""" | |
data = list(training_data) # local copy | |
np.random.shuffle(data) | |
for epoch in range(epochs): | |
for idx in range(0, len(data), batch_size): | |
batch = data[idx : idx + batch_size] | |
del_w, del_b = self.__mean_gradient(batch) # Call subroutine | |
self.__weights -= eta * del_w # Normalize | |
self.__biases -= eta * del_b | |
self.__log_score(data) # Optional | |
def __gradient(self, in_out_pair): | |
""" | |
Details: | |
The cost C is (a neuron sum over neurons j at the last layer) | |
(output_j - a_j)^2 | |
For every layer l: | |
a_{l-1} = \sigma(z_{l-1}) | |
is the "activation" from the previous layer and | |
z_l = w_l * a_{l-1} + b_l | |
The derivative of C w.r.t. a_j is a linear difference, | |
although we're interested in the derivative's w.r.t. z. | |
For further details for why dw and db are what they are, see the chap2 here | |
http://neuralnetworksanddeeplearning.com/chap2.html | |
""" | |
zs, as_ = self.__feed_forward(in_out_pair.get_input()) | |
# Note: as_[-1] holds activation of final output layer. | |
l = -1 # -1 is index of the last layer | |
activation_prediction = as_[l] | |
cost_derivative = activation_prediction - in_out_pair.output_ground_truth() # d cost / d a at the final layer | |
# Backward pass | |
dw = Math.zeros(self.__weights) | |
db = Math.zeros(self.__biases) | |
activation = Math.dsigmoid(zs[l]) | |
delta = cost_derivative * activation # d cost_l / d z_l | |
# Here: Last activation minus the ground truth | |
dw[l] = np.dot(delta, as_[l - 1].transpose()) | |
db[l] = delta | |
for l in range(l - 1, l-len(self.__biases), -1): | |
activation = Math.dsigmoid(zs[l]) | |
delta = np.dot(self.__weights[l + 1].transpose(), delta) * activation | |
dw[l] = np.dot(delta, as_[l - 1].transpose()) | |
db[l] = delta | |
return np.array(dw), np.array(db) | |
def __mean_gradient(self, in_out_pairs): | |
""" | |
Compute the gradient for the batch of images. | |
Note: Makes use of the image gradient. | |
""" | |
del_w = Math.zeros(self.__weights) | |
del_b = Math.zeros(self.__biases) | |
for in_out_pair in in_out_pairs: | |
dw, db = self.__gradient(in_out_pair) | |
del_w += np.array(dw) | |
del_b += np.array(db) | |
n = len(in_out_pairs) | |
return del_w / n, del_b / n | |
def __log_score(self, test_data): | |
score = sum(map(self.predicts_correctly, test_data)) # count the successes | |
total = len(test_data) | |
percentage = round(score * 100.0 / total, 2) | |
print(f"[log_score] Test data score: {score}/{total}. ({percentage}%)") | |
class Config: | |
# Script | |
# Git clone this (or download just the .gz file): https://github.com/MichalDanielDobrzanski/DeepLearningPython | |
DATA_DIRPATH = f"/Users/amoogle/Documents/Git/neural_network/DeepLearningPython/" | |
DATA_FILENAME = "mnist.pkl.gz" | |
DATA_FILEPATH = f"{DATA_DIRPATH}/{DATA_FILENAME}" | |
# Learning and Network | |
EPOCHS = 4 | |
BATCH_SIZE = 10 | |
ETA = 3.0 | |
HIDDEN_LAYER_SIZES = [10, 20, 10] # array of ints | |
# Misc for logging and evaluation | |
SOME_INDEX = 98 | |
class Image(IOPair): | |
IMAGE_WIDTH = 28 | |
NUM_PIXELS = IMAGE_WIDTH ** 2 | |
NUMBER_BASE = 10 | |
def __init__(self, image_and_gt_pair): | |
""" | |
Class holding an array of grayscale values (=the networks input layer | |
format) as well as the ground truth value digit displayed on that image | |
""" | |
self.__pixels = image_and_gt_pair[0] | |
self.__gt_label = image_and_gt_pair[1] # Classification/Label | |
def __to_prediction_digit(self, activation_prediction): | |
""" | |
Activation_prediction is an arraw of floats while the return value is an index into that list. | |
E.g. [[9.89989735e-03] [1.08126136e-02] [6.17035645e-04] [1.14060381e-02] [5.98185829e-03] | |
[7.31337324e-03] [1.61992979e-05] [9.24512441e-01] [9.88453143e-05] [8.47158033e-02]] | |
\mapsto 7 | |
as '9.24512441e-01' is the biggest value | |
""" | |
return np.argmax(activation_prediction) | |
def __digit_to_output_format(self, d_in): | |
""" | |
E.g. | |
7 \mapsto [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] | |
""" | |
return [[int(d_in==d)] for d in range(self.NUMBER_BASE)] | |
def _get_input(self): | |
return self.__pixels | |
def _output_ground_truth(self): | |
return self.__digit_to_output_format(self.__gt_label) | |
def _to_output_format(self, activation_prediction): | |
pd = self.__to_prediction_digit(activation_prediction) | |
return self.__digit_to_output_format(pd) | |
def _show(self, activation_prediction) -> None: | |
""" | |
Print the grayscale image as ascii image. | |
And optionally compare the prediction to a ground turth value. | |
""" | |
GRAYSCALE_ASCII = " .',_-+*$#" | |
HORIZONTAL_LINE: str = 2 * self.IMAGE_WIDTH * "_" | |
# Draw image | |
print(HORIZONTAL_LINE) # top of bounding box | |
for idx, pixel in enumerate(self.__pixels): | |
end_of_row = (idx + 1) % self.IMAGE_WIDTH == 0 | |
char = GRAYSCALE_ASCII[int(10 * pixel)] | |
print(char, end=" |\n" if end_of_row else " ") | |
print(HORIZONTAL_LINE + "|") # bottom of bounding box | |
print(f"[show] Ground truth value is {self.__gt_label}.") | |
if self.correct(activation_prediction): | |
print("[show] And this was predicted correctly. :)") | |
else: | |
print(f"[show] And this was wrongly predicted as predicted as {activation_prediction}. :(") | |
def load_MNIST_data__and_cast(): | |
""" | |
Load the pairs of grayscale image array and ground truth for training and test data. The files are found here: | |
https://github.com/MichalDanielDobrzanski/DeepLearningPython35/blob/master/mnist.pkl.gz | |
This holds 3 data sets, but we won't use the validation one. | |
:return: Images. | |
""" | |
print(f"[load_MNIST_data] Trying to load data from <{Config.DATA_FILEPATH}>") | |
with gzip.open(Config.DATA_FILEPATH, 'rb') as in_file: | |
training_data_gz, _validation_data_gz, test_data_gz = pickle.load(in_file, encoding="latin1") # Load the "raw" data sets | |
print(f"[load_MNIST_data] Data loading successful. len(training_data)={len(training_data_gz)}. len(test_data)={len(test_data_gz)}") | |
def reshape_and_zip(all_images__and__all_gt_labels): | |
all_images = all_images__and__all_gt_labels[0] | |
assert all(len(img)==Image.NUM_PIXELS for img in all_images) | |
all_gt_labels = all_images__and__all_gt_labels[1] | |
assert all(num in range(10) for num in all_gt_labels) | |
assert len(all_images)==len(all_gt_labels) | |
SHAPE = (Image.NUM_PIXELS, 1) | |
all_images_ = (np.reshape(img, SHAPE) for img in all_images) | |
return zip(all_images_, all_gt_labels) | |
def cast_all_to_image(all_images__and__all_gt_labels): | |
return [Image(image_and_gt_pair) for image_and_gt_pair in reshape_and_zip(all_images__and__all_gt_labels)] | |
return cast_all_to_image(training_data_gz), cast_all_to_image(test_data_gz) | |
def run_main(): | |
""" | |
Unpack data, train and do some examples. | |
""" | |
# Set up network | |
INPUT_LAYER_SIZE = Image.NUM_PIXELS | |
OUTPUT_LAYER_SIZE = Image.NUMBER_BASE | |
net = NeuralNetwork( | |
INPUT_LAYER_SIZE, | |
Config.HIDDEN_LAYER_SIZES, | |
OUTPUT_LAYER_SIZE | |
) | |
# Train | |
training_data, test_data = load_MNIST_data__and_cast() | |
print(f"\n[run_main] Starting the training with {Config.EPOCHS} epochs...") | |
net.run_sgd( | |
Config.EPOCHS, | |
Config.BATCH_SIZE, | |
Config.ETA, | |
training_data | |
) | |
# Evaluate | |
print("[run_main] " + 40 * "*" + " Let's predict some example_imgs.") | |
for some_other_index in [Config.SOME_INDEX * n for n in range(1, 8)]: | |
img = test_data[some_other_index] | |
_is_correct: bool = net.predicts_correctly(img, show=True) | |
time.sleep(1) # Just for clarity | |
if __name__=='__main__': | |
run_main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment