Skip to content

Instantly share code, notes, and snippets.

@BexTuychiev
Last active August 20, 2024 18:00
Show Gist options
  • Save BexTuychiev/d8c37f6a37416e6abaf0dd63b77f6f23 to your computer and use it in GitHub Desktop.
Save BexTuychiev/d8c37f6a37416e6abaf0dd63b77f6f23 to your computer and use it in GitHub Desktop.
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
np.random.seed(42)
# Load the data
dataset_size = 10_000
diamonds = sns.load_dataset("diamonds")
# Extract the target and the feature
xy = diamonds[["carat", "price"]].values
np.random.shuffle(xy) # Shuffle the data
xy = xy[:dataset_size]
# Split the data
np.random.shuffle(xy)
train_size = int(0.8 * dataset_size)
train_xy, test_xy = xy[:train_size], xy[train_size:]
def model(m, x, b):
"""Simple linear model"""
return m * x + b
def loss(y_true, y_pred):
"""Mean squared error"""
return np.mean((y_true - y_pred) ** 2)
def stochastic_gradient_descent_with_momentum(
x,
y,
epochs=100,
learning_rate=0.01,
batch_size=32,
stopping_threshold=1e-6,
momentum=0.9,
):
"""
SGD with momentum, support for mini-batches, and gradient clipping.
"""
# Initialize the model parameters randomly
m = np.random.randn()
b = np.random.randn()
# Initialize velocity terms
v_m = 0
v_b = 0
n = len(x)
previous_loss = np.inf
for i in range(epochs):
# Shuffle the data
indices = np.random.permutation(n)
x = x[indices]
y = y[indices]
for j in range(0, n, batch_size):
x_batch = x[j : j + batch_size]
y_batch = y[j : j + batch_size]
# Compute the gradients
y_pred = model(m, x_batch, b)
m_gradient = 2 * np.mean(x_batch * (y_batch - y_pred))
b_gradient = 2 * np.mean(y_batch - y_pred)
# Gradient clipping
clip_value = 1.0
m_gradient = np.clip(m_gradient, -clip_value, clip_value)
b_gradient = np.clip(b_gradient, -clip_value, clip_value)
# Update velocity terms
v_m = momentum * v_m + learning_rate * m_gradient
v_b = momentum * v_b + learning_rate * b_gradient
# Update the model parameters using velocity
m -= v_m
b -= v_b
# Compute the loss
y_pred = model(m, x, b)
current_loss = loss(y, y_pred)
if abs(previous_loss - current_loss) < stopping_threshold:
break
previous_loss = current_loss
return m, b
# Find the optimal parameters to m and b with SGD and momentum
m, b = stochastic_gradient_descent_with_momentum(
train_xy[:, 0],
train_xy[:, 1],
learning_rate=0.1,
epochs=10000,
batch_size=5012,
momentum=0.9,
)
# Make predictions
y_preds = model(m, test_xy[:, 0], b)
# Compute and print the loss
mean_squared_error = loss(test_xy[:, 1], y_preds)
print(f"Normalized RMSE: {mean_squared_error**0.5}")
# Denormalize the predictions and compute the actual RMSE
y_preds_denormalized = y_preds * std[1] + mean[1]
y_true_denormalized = test_xy[:, 1] * std[1] + mean[1]
actual_mse = np.mean((y_true_denormalized - y_preds_denormalized) ** 2)
print(f"Actual RMSE: {actual_mse**0.5}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment