Last active
October 30, 2021 19:48
-
-
Save thomasnield/6a3fbc934a555936f3c128fca3947a74 to your computer and use it in GitHub Desktop.
neural_network_stochastic_gradient_descent.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
all_data = pd.read_csv("https://tinyurl.com/y2qmhfsr") | |
# Learning rate controls how slowly we approach a solution | |
# Make it too small, it will take too long to run. | |
# Make it too big, it will likely overshoot and miss the solution. | |
L = 0.05 | |
# Extract the input columns, scale down by 255 | |
all_inputs = (all_data.iloc[:, 0:3].values / 255.0) | |
all_outputs = all_data.iloc[:, -1].values | |
# Split train and test data sets | |
X_train, X_test, Y_train, Y_test = train_test_split(all_inputs, all_outputs, test_size=1 / 3) | |
n = X_train.shape[0] | |
# Build neural network with weights and biases | |
# with random initialization | |
w_1 = np.random.rand(3, 3) | |
w_2 = np.random.rand(1, 3) | |
b_1 = np.random.rand(3, 1) | |
b_2 = np.random.rand(1, 1) | |
# Activation functions | |
relu = lambda x: np.maximum(x, 0) | |
logistic = lambda x: 1 / (1 + np.exp(-x)) | |
# Runs inputs through the neural network to get predicted outputs | |
def forward_prop(X): | |
Z1 = w_1 @ X + b_1 | |
A1 = relu(Z1) | |
Z2 = w_2 @ A1 + b_2 | |
A2 = logistic(Z2) | |
return Z1, A1, Z2, A2 | |
# Derivatives of Activation functions | |
d_relu = lambda x: x > 0 | |
d_logistic = lambda x: np.exp(-x) / (1 + np.exp(-x)) ** 2 | |
# returns slopes for weights and biases | |
# using chain rule | |
def backward_prop(Z1, A1, Z2, A2, X, Y): | |
dC_dA2 = 2 * A2 - 2 * Y | |
dA2_dZ2 = d_logistic(Z2) | |
dZ2_dA1 = w_2 | |
dZ2_dW2 = A1 | |
dZ2_dB2 = 1 | |
dA1_dZ1 = d_relu(Z1) | |
dZ1_dW1 = X | |
dZ1_dB1 = 1 | |
dC_dW2 = dC_dA2 @ dA2_dZ2 @ dZ2_dW2.T | |
dC_dB2 = dC_dA2 @ dA2_dZ2 * dZ2_dB2 | |
dC_dA1 = dC_dA2 @ dA2_dZ2 @ dZ2_dA1 | |
dC_dW1 = dC_dA1 @ dA1_dZ1 @ dZ1_dW1.T | |
dC_dB1 = dC_dA1 @ dA1_dZ1 * dZ1_dB1 | |
return dC_dW1, dC_dB1, dC_dW2, dC_dB2 | |
# Execute gradient descent | |
for i in range(100_000): | |
# randomly select one of the training data | |
idx = np.random.choice(n, 1, replace=False) | |
X_sample = X_train[idx].transpose() | |
Y_sample = Y_train[idx] | |
# run randomly selected training data through neural network | |
Z1, A1, Z2, A2 = forward_prop(X_sample) | |
# distribute error through backpropogation | |
# and return slopes for weights and biases | |
dW1, dB1, dW2, dB2 = backward_prop(Z1, A1, Z2, A2, X_sample, Y_sample) | |
# update weights and biases | |
w_1 -= L * dW1 | |
b_1 -= L * dB1 | |
w_2 -= L * dW2 | |
b_2 -= L * dB2 | |
# Calculate accuracy | |
test_predictions = forward_prop(X_test.transpose())[3] # grab only A2 | |
test_comparisons = np.equal((test_predictions >= .5).flatten().astype(int), Y_test) | |
accuracy = sum(test_comparisons.astype(int) / X_test.shape[0]) | |
print("ACCURACY: ", accuracy) | |
# Interact and test with new colors | |
def predict_probability(r, g, b): | |
X = np.array([[r, g, b]]).transpose() / 255 | |
Z1, A1, Z2, A2 = forward_prop(X) | |
return A2 | |
def predict_font_shade(r, g, b): | |
output_values = predict_probability(r, g, b) | |
if output_values > .5: | |
return "DARK" | |
else: | |
return "LIGHT" | |
while True: | |
col_input = input("Predict light or dark font. Input values R,G,B: ") | |
(r, g, b) = col_input.split(",") | |
print(predict_font_shade(int(r), int(g), int(b))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment