Skip to content

Instantly share code, notes, and snippets.

@tuttlem
Created August 26, 2025 11:23
Show Gist options
  • Save tuttlem/dbb7a351b83ce76325e83360c0059595 to your computer and use it in GitHub Desktop.
Save tuttlem/dbb7a351b83ce76325e83360c0059595 to your computer and use it in GitHub Desktop.
import numpy as np
# --- Utility functions ---
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(x):
# Assumes x has already had sigmoid applied
return x * (1 - x)
def mse_loss(y_true, y_pred):
return np.mean((y_true - y_pred) ** 2)
# --- Dataset: XOR problem ---
X = np.array([
[0, 0],
[0, 1],
[1, 0],
[1, 1]
])
y = np.array([
[0],
[1],
[1],
[0]
])
# --- Network architecture ---
np.random.seed(42)
input_size = 2
hidden_size = 2
output_size = 1
learning_rate = 0.1
# Weights initialization
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))
# --- Training loop ---
epochs = 100000
for epoch in range(epochs):
# --- Forward pass ---
z1 = np.dot(X, W1) + b1
a1 = sigmoid(z1)
z2 = np.dot(a1, W2) + b2
a2 = sigmoid(z2)
loss = mse_loss(y, a2)
# --- Backward pass ---
d_loss_a2 = 2 * (a2 - y) / y.size
d_a2_z2 = sigmoid_derivative(a2)
d_z2_W2 = a1
d_z2 = d_loss_a2 * d_a2_z2
d_W2 = np.dot(d_z2_W2.T, d_z2)
d_b2 = np.sum(d_z2, axis=0, keepdims=True)
d_a1 = np.dot(d_z2, W2.T)
d_z1 = d_a1 * sigmoid_derivative(a1)
d_W1 = np.dot(X.T, d_z1)
d_b1 = np.sum(d_z1, axis=0, keepdims=True)
# --- Update weights ---
W2 -= learning_rate * d_W2
b2 -= learning_rate * d_b2
W1 -= learning_rate * d_W1
b1 -= learning_rate * d_b1
# --- Logging ---
if epoch % 1000 == 0:
print(f"Epoch {epoch}, Loss: {loss:.4f}")
# --- Final predictions ---
print("\nFinal predictions:")
print(a2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment