Skip to content

Instantly share code, notes, and snippets.

@thomasnield
Last active October 30, 2021 19:48
Show Gist options
  • Save thomasnield/6a3fbc934a555936f3c128fca3947a74 to your computer and use it in GitHub Desktop.
Save thomasnield/6a3fbc934a555936f3c128fca3947a74 to your computer and use it in GitHub Desktop.
neural_network_stochastic_gradient_descent.py
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
all_data = pd.read_csv("https://tinyurl.com/y2qmhfsr")
# Learning rate controls how slowly we approach a solution
# Make it too small, it will take too long to run.
# Make it too big, it will likely overshoot and miss the solution.
L = 0.05
# Extract the input columns, scale down by 255
all_inputs = (all_data.iloc[:, 0:3].values / 255.0)
all_outputs = all_data.iloc[:, -1].values
# Split train and test data sets
X_train, X_test, Y_train, Y_test = train_test_split(all_inputs, all_outputs, test_size=1 / 3)
n = X_train.shape[0]
# Build neural network with weights and biases
# with random initialization
w_1 = np.random.rand(3, 3)
w_2 = np.random.rand(1, 3)
b_1 = np.random.rand(3, 1)
b_2 = np.random.rand(1, 1)
# Activation functions
relu = lambda x: np.maximum(x, 0)
logistic = lambda x: 1 / (1 + np.exp(-x))
# Runs inputs through the neural network to get predicted outputs
def forward_prop(X):
Z1 = w_1 @ X + b_1
A1 = relu(Z1)
Z2 = w_2 @ A1 + b_2
A2 = logistic(Z2)
return Z1, A1, Z2, A2
# Derivatives of Activation functions
d_relu = lambda x: x > 0
d_logistic = lambda x: np.exp(-x) / (1 + np.exp(-x)) ** 2
# returns slopes for weights and biases
# using chain rule
def backward_prop(Z1, A1, Z2, A2, X, Y):
dC_dA2 = 2 * A2 - 2 * Y
dA2_dZ2 = d_logistic(Z2)
dZ2_dA1 = w_2
dZ2_dW2 = A1
dZ2_dB2 = 1
dA1_dZ1 = d_relu(Z1)
dZ1_dW1 = X
dZ1_dB1 = 1
dC_dW2 = dC_dA2 @ dA2_dZ2 @ dZ2_dW2.T
dC_dB2 = dC_dA2 @ dA2_dZ2 * dZ2_dB2
dC_dA1 = dC_dA2 @ dA2_dZ2 @ dZ2_dA1
dC_dW1 = dC_dA1 @ dA1_dZ1 @ dZ1_dW1.T
dC_dB1 = dC_dA1 @ dA1_dZ1 * dZ1_dB1
return dC_dW1, dC_dB1, dC_dW2, dC_dB2
# Execute gradient descent
for i in range(100_000):
# randomly select one of the training data
idx = np.random.choice(n, 1, replace=False)
X_sample = X_train[idx].transpose()
Y_sample = Y_train[idx]
# run randomly selected training data through neural network
Z1, A1, Z2, A2 = forward_prop(X_sample)
# distribute error through backpropogation
# and return slopes for weights and biases
dW1, dB1, dW2, dB2 = backward_prop(Z1, A1, Z2, A2, X_sample, Y_sample)
# update weights and biases
w_1 -= L * dW1
b_1 -= L * dB1
w_2 -= L * dW2
b_2 -= L * dB2
# Calculate accuracy
test_predictions = forward_prop(X_test.transpose())[3] # grab only A2
test_comparisons = np.equal((test_predictions >= .5).flatten().astype(int), Y_test)
accuracy = sum(test_comparisons.astype(int) / X_test.shape[0])
print("ACCURACY: ", accuracy)
# Interact and test with new colors
def predict_probability(r, g, b):
X = np.array([[r, g, b]]).transpose() / 255
Z1, A1, Z2, A2 = forward_prop(X)
return A2
def predict_font_shade(r, g, b):
output_values = predict_probability(r, g, b)
if output_values > .5:
return "DARK"
else:
return "LIGHT"
while True:
col_input = input("Predict light or dark font. Input values R,G,B: ")
(r, g, b) = col_input.split(",")
print(predict_font_shade(int(r), int(g), int(b)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment