tuttlem · August 26, 2025 11:23
diff --git a/main.py b/main.py
 import numpy as np

 # --- Utility functions ---

 def sigmoid(x):
    return 1 / (1 + np.exp(-x))

 def sigmoid_derivative(x):
    # Assumes x has already had sigmoid applied
    return x * (1 - x)

 def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

 # --- Dataset: XOR problem ---
 X = np.array([
    [0, 0],
    [0, 1],
    [1, 0],
    [1, 1]
 ])

 y = np.array([
    [0],
    [1],
    [1],
    [0]
 ])

 # --- Network architecture ---
 np.random.seed(42)
 input_size = 2
 hidden_size = 2
 output_size = 1
 learning_rate = 0.1

 # Weights initialization
 W1 = np.random.randn(input_size, hidden_size)
 b1 = np.zeros((1, hidden_size))

 W2 = np.random.randn(hidden_size, output_size)
 b2 = np.zeros((1, output_size))

 # --- Training loop ---
 epochs = 100000
 for epoch in range(epochs):
    # --- Forward pass ---
    z1 = np.dot(X, W1) + b1
    a1 = sigmoid(z1)

    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)

    loss = mse_loss(y, a2)

    # --- Backward pass ---
    d_loss_a2 = 2 * (a2 - y) / y.size
    d_a2_z2 = sigmoid_derivative(a2)
    d_z2_W2 = a1

    d_z2 = d_loss_a2 * d_a2_z2
    d_W2 = np.dot(d_z2_W2.T, d_z2)
    d_b2 = np.sum(d_z2, axis=0, keepdims=True)

    d_a1 = np.dot(d_z2, W2.T)
    d_z1 = d_a1 * sigmoid_derivative(a1)
    d_W1 = np.dot(X.T, d_z1)
    d_b1 = np.sum(d_z1, axis=0, keepdims=True)

    # --- Update weights ---
    W2 -= learning_rate * d_W2
    b2 -= learning_rate * d_b2
    W1 -= learning_rate * d_W1
    b1 -= learning_rate * d_b1

    # --- Logging ---
    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

 # --- Final predictions ---
 print("\nFinal predictions:")
 print(a2)
	import numpy as np

	# --- Utility functions ---

	def sigmoid(x):
	return 1 / (1 + np.exp(-x))

	def sigmoid_derivative(x):
	# Assumes x has already had sigmoid applied
	return x * (1 - x)

	def mse_loss(y_true, y_pred):
	return np.mean((y_true - y_pred) ** 2)

	# --- Dataset: XOR problem ---
	X = np.array([
	[0, 0],
	[0, 1],
	[1, 0],
	[1, 1]
	])

	y = np.array([
	[0],
	[1],
	[1],
	[0]
	])

	# --- Network architecture ---
	np.random.seed(42)
	input_size = 2
	hidden_size = 2
	output_size = 1
	learning_rate = 0.1

	# Weights initialization
	W1 = np.random.randn(input_size, hidden_size)
	b1 = np.zeros((1, hidden_size))

	W2 = np.random.randn(hidden_size, output_size)
	b2 = np.zeros((1, output_size))

	# --- Training loop ---
	epochs = 100000
	for epoch in range(epochs):
	# --- Forward pass ---
	z1 = np.dot(X, W1) + b1
	a1 = sigmoid(z1)

	z2 = np.dot(a1, W2) + b2
	a2 = sigmoid(z2)

	loss = mse_loss(y, a2)

	# --- Backward pass ---
	d_loss_a2 = 2 * (a2 - y) / y.size
	d_a2_z2 = sigmoid_derivative(a2)
	d_z2_W2 = a1

	d_z2 = d_loss_a2 * d_a2_z2
	d_W2 = np.dot(d_z2_W2.T, d_z2)
	d_b2 = np.sum(d_z2, axis=0, keepdims=True)

	d_a1 = np.dot(d_z2, W2.T)
	d_z1 = d_a1 * sigmoid_derivative(a1)
	d_W1 = np.dot(X.T, d_z1)
	d_b1 = np.sum(d_z1, axis=0, keepdims=True)

	# --- Update weights ---
	W2 -= learning_rate * d_W2
	b2 -= learning_rate * d_b2
	W1 -= learning_rate * d_W1
	b1 -= learning_rate * d_b1

	# --- Logging ---
	if epoch % 1000 == 0:
	print(f"Epoch {epoch}, Loss: {loss:.4f}")

	# --- Final predictions ---
	print("\nFinal predictions:")
	print(a2)