Created
September 11, 2024 09:50
-
-
Save BexTuychiev/cd9cf68479be105859097dfc13191154 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 1. Import statements and environment setup | |
import os | |
import random | |
import warnings | |
from typing import Callable, Tuple | |
import matplotlib.pyplot as plt | |
import neptune | |
import numpy as np | |
import tensorflow as tf | |
from dotenv import load_dotenv | |
from tensorflow import keras | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras.layers import Flatten, Dense | |
from neptune.integrations.tensorflow_keras import NeptuneCallback | |
# Load environment variables | |
load_dotenv() | |
# Ignore warnings | |
warnings.filterwarnings("ignore") | |
# 2. Configuration | |
class Config: | |
SEED = 42 | |
EPOCHS = 100 | |
BATCH_SIZE = 512 | |
INITIAL_LEARNING_RATE = 0.1 | |
NEPTUNE_PROJECT_NAME = os.getenv("NEPTUNE_PROJECT_NAME") | |
NEPTUNE_API_TOKEN = os.getenv("NEPTUNE_API_TOKEN") | |
# 3. Utility functions | |
def reset_random_seeds(seed: int = Config.SEED) -> None: | |
os.environ["PYTHONHASHSEED"] = str(seed) | |
tf.random.set_seed(seed) | |
np.random.seed(seed) | |
random.seed(seed) | |
def init_neptune_run(custom_id: str = None, tags: list = None) -> neptune.Run: | |
return neptune.init_run( | |
project=Config.NEPTUNE_PROJECT_NAME, | |
api_token=Config.NEPTUNE_API_TOKEN, | |
tags=tags, | |
custom_run_id=custom_id, | |
) | |
def get_lr_metric(optimizer: keras.optimizers.Optimizer) -> Callable: | |
def lr(y_true, y_pred): | |
return ( | |
optimizer.learning_rate(optimizer.iterations) | |
if hasattr(optimizer.learning_rate, "__call__") | |
else optimizer.learning_rate | |
) | |
return lr | |
def plot_lr(history: keras.callbacks.History) -> plt.Figure: | |
learning_rate = history.history["lr"] | |
epochs = range(1, len(learning_rate) + 1) | |
fig, ax = plt.subplots() | |
ax.plot(epochs, learning_rate) | |
ax.set_title("Learning rate") | |
ax.set_xlabel("Epochs") | |
ax.set_ylabel("Learning rate") | |
return fig | |
def plot_performance( | |
history: keras.callbacks.History, current_lr_scheduler: str | |
) -> plt.Figure: | |
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4)) | |
ax1.plot(history.history["loss"]) | |
ax1.plot(history.history["val_loss"]) | |
ax1.legend(["Train Loss", "Test Loss"]) | |
ax1.set_title(f"Loss Curves ({current_lr_scheduler})") | |
ax1.set_xlabel("Epoch") | |
ax1.set_ylabel("Loss") | |
ax2.plot(history.history["accuracy"]) | |
ax2.plot(history.history["val_accuracy"]) | |
ax2.legend(["Train Accuracy", "Test Accuracy"]) | |
ax2.set_title(f"Accuracy Curves ({current_lr_scheduler})") | |
ax2.set_xlabel("Epoch") | |
ax2.set_ylabel("Accuracy") | |
return fig | |
def plot_neptune( | |
history: keras.callbacks.History, decay_title: str, npt_run: neptune.Run | |
) -> None: | |
lr_fig = plot_lr(history) | |
npt_run[f"Learning Rate Change ({decay_title})"].upload( | |
neptune.types.File.as_image(lr_fig) | |
) | |
plt.figure(lr_fig.number) | |
plt.show() | |
plt.close(lr_fig) | |
perf_fig = plot_performance(history, decay_title) | |
npt_run[f"Training Performance Curves ({decay_title})"].upload( | |
neptune.types.File.as_image(perf_fig) | |
) | |
plt.figure(perf_fig.number) | |
plt.show() | |
plt.close(perf_fig) | |
# 4. Data loading and preprocessing | |
def load_and_preprocess_data() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: | |
fashion_mnist = keras.datasets.fashion_mnist | |
(x_train_full, y_train_full), (x_test, y_test) = fashion_mnist.load_data() | |
train_idx = random.sample(range(60000), 20000) | |
x_train, y_train = x_train_full[train_idx] / 255.0, y_train_full[train_idx] | |
x_test = x_test / 255.0 | |
return x_train, y_train, x_test, y_test | |
# 5. Model definition | |
def create_model() -> Sequential: | |
model = Sequential( | |
[ | |
Flatten(input_shape=(28, 28)), | |
Dense(512, activation="relu"), | |
Dense(200, activation="relu"), | |
Dense(10, activation="softmax"), | |
] | |
) | |
return model | |
# 6. Learning rate schedulers | |
class LRPolynomialDecay: | |
def __init__( | |
self, | |
epochs: int = Config.EPOCHS, | |
initial_learning_rate: float = Config.INITIAL_LEARNING_RATE, | |
power: float = 1.0, | |
): | |
self.epochs = epochs | |
self.initial_learning_rate = initial_learning_rate | |
self.power = power | |
def __call__(self, epoch): | |
decay = (1 - (epoch / float(self.epochs))) ** self.power | |
return float(self.initial_learning_rate * decay) | |
def lr_time_based_decay(epoch, lr): | |
decay = Config.INITIAL_LEARNING_RATE / Config.EPOCHS | |
return lr * 1 / (1 + decay * epoch) | |
def lr_exp_decay(epoch): | |
k = 0.1 | |
return Config.INITIAL_LEARNING_RATE * np.exp(-k * epoch) | |
def lr_step_based_decay(epoch): | |
drop_rate = 0.5 | |
epochs_drop = 20.0 | |
decay_factor = np.power(drop_rate, np.floor(epoch / epochs_drop)) | |
return Config.INITIAL_LEARNING_RATE * decay_factor | |
# 7. Training functions | |
def train_model( | |
model: Sequential, | |
x_train: np.ndarray, | |
y_train: np.ndarray, | |
optimizer: keras.optimizers.Optimizer, | |
lr_scheduler: Callable = None, | |
run: neptune.Run = None, | |
) -> keras.callbacks.History: | |
lr_metric = get_lr_metric(optimizer) | |
model.compile( | |
optimizer=optimizer, | |
loss="sparse_categorical_crossentropy", | |
metrics=["accuracy", lr_metric], | |
) | |
callbacks = [] | |
if run: | |
callbacks.append(NeptuneCallback(run=run, base_namespace="metrics")) | |
if lr_scheduler: | |
callbacks.append(keras.callbacks.LearningRateScheduler(lr_scheduler, verbose=0)) | |
history = model.fit( | |
x_train, | |
y_train, | |
epochs=Config.EPOCHS, | |
batch_size=Config.BATCH_SIZE, | |
validation_split=0.2, | |
callbacks=callbacks, | |
verbose=0, | |
) | |
# Print final train/validation metrics | |
print("Final training accuracy:", history.history['accuracy'][-1]) | |
print("Final validation accuracy:", history.history['val_accuracy'][-1]) | |
print("Final training loss:", history.history['loss'][-1]) | |
print("Final validation loss:", history.history['val_loss'][-1]) | |
return history | |
# 8. Experiment runners | |
def run_constant_lr_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None: | |
run = init_neptune_run(tags=["constant", "baseline"]) | |
model = create_model() | |
optimizer = keras.optimizers.SGD(learning_rate=Config.INITIAL_LEARNING_RATE) | |
history = train_model(model, x_train, y_train, optimizer, run=run) | |
plot_neptune(history, "Constant", run) | |
run.stop() | |
def run_polynomial_decay_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None: | |
run = init_neptune_run(tags=["PolynomialDecay"]) | |
model = create_model() | |
optimizer = keras.optimizers.SGD(learning_rate=Config.INITIAL_LEARNING_RATE) | |
lr_scheduler = LRPolynomialDecay(power=1.0) | |
history = train_model(model, x_train, y_train, optimizer, lr_scheduler, run) | |
plot_neptune(history, "Linear Decay", run) | |
run.stop() | |
def run_time_based_decay_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None: | |
run = init_neptune_run(tags=["TimeBasedDecay"]) | |
model = create_model() | |
optimizer = keras.optimizers.SGD(learning_rate=Config.INITIAL_LEARNING_RATE) | |
history = train_model(model, x_train, y_train, optimizer, lr_time_based_decay, run) | |
plot_neptune(history, "Time-Based Decay", run) | |
run.stop() | |
def run_exp_decay_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None: | |
run = init_neptune_run(tags=["ExponentialDecay"]) | |
model = create_model() | |
optimizer = keras.optimizers.SGD(learning_rate=Config.INITIAL_LEARNING_RATE) | |
history = train_model(model, x_train, y_train, optimizer, lr_exp_decay, run) | |
plot_neptune(history, "Exponential Decay", run) | |
run.stop() | |
def run_step_based_decay_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None: | |
run = init_neptune_run(tags=["StepBasedDecay"]) | |
model = create_model() | |
optimizer = keras.optimizers.SGD(learning_rate=Config.INITIAL_LEARNING_RATE) | |
history = train_model(model, x_train, y_train, optimizer, lr_step_based_decay, run) | |
plot_neptune(history, "Step-Based Decay", run) | |
run.stop() | |
def run_adam_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None: | |
run = init_neptune_run(tags=["Adam"]) | |
model = create_model() | |
optimizer = keras.optimizers.Adam(learning_rate=0.01) | |
history = train_model(model, x_train, y_train, optimizer, run=run) | |
plot_neptune(history, "Adam Optimizer", run) | |
run.stop() | |
# 9. Main execution | |
if __name__ == "__main__": | |
reset_random_seeds() | |
x_train, y_train, x_test, y_test = load_and_preprocess_data() | |
experiments = [ | |
run_constant_lr_experiment, | |
run_polynomial_decay_experiment, | |
run_time_based_decay_experiment, | |
run_exp_decay_experiment, | |
run_step_based_decay_experiment, | |
run_adam_experiment, | |
] | |
for experiment in experiments: | |
experiment(x_train, y_train) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment