BexTuychiev · September 11, 2024 09:50
diff --git a/learning_rate_schedulers.py b/learning_rate_schedulers.py
 # 1. Import statements and environment setup
 import os
 import random
 import warnings
 from typing import Callable, Tuple

 import matplotlib.pyplot as plt
 import neptune
 import numpy as np
 import tensorflow as tf
 from dotenv import load_dotenv
 from tensorflow import keras
 from tensorflow.keras.models import Sequential
 from tensorflow.keras.layers import Flatten, Dense
 from neptune.integrations.tensorflow_keras import NeptuneCallback

 # Load environment variables
 load_dotenv()

 # Ignore warnings
 warnings.filterwarnings("ignore")


 # 2. Configuration
 class Config:
    SEED = 42
    EPOCHS = 100
    BATCH_SIZE = 512
    INITIAL_LEARNING_RATE = 0.1
    NEPTUNE_PROJECT_NAME = os.getenv("NEPTUNE_PROJECT_NAME")
    NEPTUNE_API_TOKEN = os.getenv("NEPTUNE_API_TOKEN")


 # 3. Utility functions
 def reset_random_seeds(seed: int = Config.SEED) -> None:
    os.environ["PYTHONHASHSEED"] = str(seed)
    tf.random.set_seed(seed)
    np.random.seed(seed)
    random.seed(seed)


 def init_neptune_run(custom_id: str = None, tags: list = None) -> neptune.Run:
    return neptune.init_run(
        project=Config.NEPTUNE_PROJECT_NAME,
        api_token=Config.NEPTUNE_API_TOKEN,
        tags=tags,
        custom_run_id=custom_id,
    )


 def get_lr_metric(optimizer: keras.optimizers.Optimizer) -> Callable:
    def lr(y_true, y_pred):
        return (
            optimizer.learning_rate(optimizer.iterations)
            if hasattr(optimizer.learning_rate, "__call__")
            else optimizer.learning_rate
        )

    return lr


 def plot_lr(history: keras.callbacks.History) -> plt.Figure:
    learning_rate = history.history["lr"]
    epochs = range(1, len(learning_rate) + 1)
    fig, ax = plt.subplots()
    ax.plot(epochs, learning_rate)
    ax.set_title("Learning rate")
    ax.set_xlabel("Epochs")
    ax.set_ylabel("Learning rate")
    return fig


 def plot_performance(
    history: keras.callbacks.History, current_lr_scheduler: str
 ) -> plt.Figure:
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))

    ax1.plot(history.history["loss"])
    ax1.plot(history.history["val_loss"])
    ax1.legend(["Train Loss", "Test Loss"])
    ax1.set_title(f"Loss Curves ({current_lr_scheduler})")
    ax1.set_xlabel("Epoch")
    ax1.set_ylabel("Loss")

    ax2.plot(history.history["accuracy"])
    ax2.plot(history.history["val_accuracy"])
    ax2.legend(["Train Accuracy", "Test Accuracy"])
    ax2.set_title(f"Accuracy Curves ({current_lr_scheduler})")
    ax2.set_xlabel("Epoch")
    ax2.set_ylabel("Accuracy")

    return fig


 def plot_neptune(
    history: keras.callbacks.History, decay_title: str, npt_run: neptune.Run
 ) -> None:
    lr_fig = plot_lr(history)
    npt_run[f"Learning Rate Change ({decay_title})"].upload(
        neptune.types.File.as_image(lr_fig)
    )
    plt.figure(lr_fig.number)
    plt.show()
    plt.close(lr_fig)

    perf_fig = plot_performance(history, decay_title)
    npt_run[f"Training Performance Curves ({decay_title})"].upload(
        neptune.types.File.as_image(perf_fig)
    )
    plt.figure(perf_fig.number)
    plt.show()
    plt.close(perf_fig)


 # 4. Data loading and preprocessing
 def load_and_preprocess_data() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    fashion_mnist = keras.datasets.fashion_mnist
    (x_train_full, y_train_full), (x_test, y_test) = fashion_mnist.load_data()

    train_idx = random.sample(range(60000), 20000)
    x_train, y_train = x_train_full[train_idx] / 255.0, y_train_full[train_idx]
    x_test = x_test / 255.0

    return x_train, y_train, x_test, y_test


 # 5. Model definition
 def create_model() -> Sequential:
    model = Sequential(
        [
            Flatten(input_shape=(28, 28)),
            Dense(512, activation="relu"),
            Dense(200, activation="relu"),
            Dense(10, activation="softmax"),
        ]
    )
    return model

 # 6. Learning rate schedulers
 class LRPolynomialDecay:
    def __init__(
        self,
        epochs: int = Config.EPOCHS,
        initial_learning_rate: float = Config.INITIAL_LEARNING_RATE,
        power: float = 1.0,
    ):
        self.epochs = epochs
        self.initial_learning_rate = initial_learning_rate
        self.power = power

    def __call__(self, epoch):
        decay = (1 - (epoch / float(self.epochs))) ** self.power
        return float(self.initial_learning_rate * decay)


 def lr_time_based_decay(epoch, lr):
    decay = Config.INITIAL_LEARNING_RATE / Config.EPOCHS
    return lr * 1 / (1 + decay * epoch)


 def lr_exp_decay(epoch):
    k = 0.1
    return Config.INITIAL_LEARNING_RATE * np.exp(-k * epoch)


 def lr_step_based_decay(epoch):
    drop_rate = 0.5
    epochs_drop = 20.0
    decay_factor = np.power(drop_rate, np.floor(epoch / epochs_drop))
    return Config.INITIAL_LEARNING_RATE * decay_factor


 # 7. Training functions
 def train_model(
    model: Sequential,
    x_train: np.ndarray,
    y_train: np.ndarray,
    optimizer: keras.optimizers.Optimizer,
    lr_scheduler: Callable = None,
    run: neptune.Run = None,
 ) -> keras.callbacks.History:
    lr_metric = get_lr_metric(optimizer)
    model.compile(
        optimizer=optimizer,
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy", lr_metric],
    )

    callbacks = []
    if run:
        callbacks.append(NeptuneCallback(run=run, base_namespace="metrics"))
    if lr_scheduler:
        callbacks.append(keras.callbacks.LearningRateScheduler(lr_scheduler, verbose=0))

    history = model.fit(
        x_train,
        y_train,
        epochs=Config.EPOCHS,
        batch_size=Config.BATCH_SIZE,
        validation_split=0.2,
        callbacks=callbacks,
        verbose=0,
    )

    # Print final train/validation metrics
    print("Final training accuracy:", history.history['accuracy'][-1])
    print("Final validation accuracy:", history.history['val_accuracy'][-1])
    print("Final training loss:", history.history['loss'][-1])
    print("Final validation loss:", history.history['val_loss'][-1])

    return history


 # 8. Experiment runners
 def run_constant_lr_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None:
    run = init_neptune_run(tags=["constant", "baseline"])
    model = create_model()
    optimizer = keras.optimizers.SGD(learning_rate=Config.INITIAL_LEARNING_RATE)
    history = train_model(model, x_train, y_train, optimizer, run=run)
    plot_neptune(history, "Constant", run)
    run.stop()


 def run_polynomial_decay_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None:
    run = init_neptune_run(tags=["PolynomialDecay"])
    model = create_model()
    optimizer = keras.optimizers.SGD(learning_rate=Config.INITIAL_LEARNING_RATE)
    lr_scheduler = LRPolynomialDecay(power=1.0)
    history = train_model(model, x_train, y_train, optimizer, lr_scheduler, run)
    plot_neptune(history, "Linear Decay", run)
    run.stop()


 def run_time_based_decay_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None:
    run = init_neptune_run(tags=["TimeBasedDecay"])
    model = create_model()
    optimizer = keras.optimizers.SGD(learning_rate=Config.INITIAL_LEARNING_RATE)
    history = train_model(model, x_train, y_train, optimizer, lr_time_based_decay, run)
    plot_neptune(history, "Time-Based Decay", run)
    run.stop()


 def run_exp_decay_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None:
    run = init_neptune_run(tags=["ExponentialDecay"])
    model = create_model()
    optimizer = keras.optimizers.SGD(learning_rate=Config.INITIAL_LEARNING_RATE)
    history = train_model(model, x_train, y_train, optimizer, lr_exp_decay, run)
    plot_neptune(history, "Exponential Decay", run)
    run.stop()


 def run_step_based_decay_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None:
    run = init_neptune_run(tags=["StepBasedDecay"])
    model = create_model()
    optimizer = keras.optimizers.SGD(learning_rate=Config.INITIAL_LEARNING_RATE)
    history = train_model(model, x_train, y_train, optimizer, lr_step_based_decay, run)
    plot_neptune(history, "Step-Based Decay", run)
    run.stop()


 def run_adam_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None:
    run = init_neptune_run(tags=["Adam"])
    model = create_model()
    optimizer = keras.optimizers.Adam(learning_rate=0.01)
    history = train_model(model, x_train, y_train, optimizer, run=run)
    plot_neptune(history, "Adam Optimizer", run)
    run.stop()


 # 9. Main execution
 if __name__ == "__main__":
    reset_random_seeds()
    x_train, y_train, x_test, y_test = load_and_preprocess_data()

    experiments = [
        run_constant_lr_experiment,
        run_polynomial_decay_experiment,
        run_time_based_decay_experiment,
        run_exp_decay_experiment,
        run_step_based_decay_experiment,
        run_adam_experiment,
    ]

    for experiment in experiments:
        experiment(x_train, y_train)
	# 1. Import statements and environment setup
	import os
	import random
	import warnings
	from typing import Callable, Tuple

	import matplotlib.pyplot as plt
	import neptune
	import numpy as np
	import tensorflow as tf
	from dotenv import load_dotenv
	from tensorflow import keras
	from tensorflow.keras.models import Sequential
	from tensorflow.keras.layers import Flatten, Dense
	from neptune.integrations.tensorflow_keras import NeptuneCallback

	# Load environment variables
	load_dotenv()

	# Ignore warnings
	warnings.filterwarnings("ignore")


	# 2. Configuration
	class Config:
	SEED = 42
	EPOCHS = 100
	BATCH_SIZE = 512
	INITIAL_LEARNING_RATE = 0.1
	NEPTUNE_PROJECT_NAME = os.getenv("NEPTUNE_PROJECT_NAME")
	NEPTUNE_API_TOKEN = os.getenv("NEPTUNE_API_TOKEN")


	# 3. Utility functions
	def reset_random_seeds(seed: int = Config.SEED) -> None:
	os.environ["PYTHONHASHSEED"] = str(seed)
	tf.random.set_seed(seed)
	np.random.seed(seed)
	random.seed(seed)


	def init_neptune_run(custom_id: str = None, tags: list = None) -> neptune.Run:
	return neptune.init_run(
	project=Config.NEPTUNE_PROJECT_NAME,
	api_token=Config.NEPTUNE_API_TOKEN,
	tags=tags,
	custom_run_id=custom_id,
	)


	def get_lr_metric(optimizer: keras.optimizers.Optimizer) -> Callable:
	def lr(y_true, y_pred):
	return (
	optimizer.learning_rate(optimizer.iterations)
	if hasattr(optimizer.learning_rate, "__call__")
	else optimizer.learning_rate
	)

	return lr


	def plot_lr(history: keras.callbacks.History) -> plt.Figure:
	learning_rate = history.history["lr"]
	epochs = range(1, len(learning_rate) + 1)
	fig, ax = plt.subplots()
	ax.plot(epochs, learning_rate)
	ax.set_title("Learning rate")
	ax.set_xlabel("Epochs")
	ax.set_ylabel("Learning rate")
	return fig


	def plot_performance(
	history: keras.callbacks.History, current_lr_scheduler: str
	) -> plt.Figure:
	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))

	ax1.plot(history.history["loss"])
	ax1.plot(history.history["val_loss"])
	ax1.legend(["Train Loss", "Test Loss"])
	ax1.set_title(f"Loss Curves ({current_lr_scheduler})")
	ax1.set_xlabel("Epoch")
	ax1.set_ylabel("Loss")

	ax2.plot(history.history["accuracy"])
	ax2.plot(history.history["val_accuracy"])
	ax2.legend(["Train Accuracy", "Test Accuracy"])
	ax2.set_title(f"Accuracy Curves ({current_lr_scheduler})")
	ax2.set_xlabel("Epoch")
	ax2.set_ylabel("Accuracy")

	return fig


	def plot_neptune(
	history: keras.callbacks.History, decay_title: str, npt_run: neptune.Run
	) -> None:
	lr_fig = plot_lr(history)
	npt_run[f"Learning Rate Change ({decay_title})"].upload(
	neptune.types.File.as_image(lr_fig)
	)
	plt.figure(lr_fig.number)
	plt.show()
	plt.close(lr_fig)

	perf_fig = plot_performance(history, decay_title)
	npt_run[f"Training Performance Curves ({decay_title})"].upload(
	neptune.types.File.as_image(perf_fig)
	)
	plt.figure(perf_fig.number)
	plt.show()
	plt.close(perf_fig)


	# 4. Data loading and preprocessing
	def load_and_preprocess_data() -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
	fashion_mnist = keras.datasets.fashion_mnist
	(x_train_full, y_train_full), (x_test, y_test) = fashion_mnist.load_data()

	train_idx = random.sample(range(60000), 20000)
	x_train, y_train = x_train_full[train_idx] / 255.0, y_train_full[train_idx]
	x_test = x_test / 255.0

	return x_train, y_train, x_test, y_test


	# 5. Model definition
	def create_model() -> Sequential:
	model = Sequential(
	[
	Flatten(input_shape=(28, 28)),
	Dense(512, activation="relu"),
	Dense(200, activation="relu"),
	Dense(10, activation="softmax"),
	]
	)
	return model

	# 6. Learning rate schedulers
	class LRPolynomialDecay:
	def __init__(
	self,
	epochs: int = Config.EPOCHS,
	initial_learning_rate: float = Config.INITIAL_LEARNING_RATE,
	power: float = 1.0,
	):
	self.epochs = epochs
	self.initial_learning_rate = initial_learning_rate
	self.power = power

	def __call__(self, epoch):
	decay = (1 - (epoch / float(self.epochs))) ** self.power
	return float(self.initial_learning_rate * decay)


	def lr_time_based_decay(epoch, lr):
	decay = Config.INITIAL_LEARNING_RATE / Config.EPOCHS
	return lr * 1 / (1 + decay * epoch)


	def lr_exp_decay(epoch):
	k = 0.1
	return Config.INITIAL_LEARNING_RATE * np.exp(-k * epoch)


	def lr_step_based_decay(epoch):
	drop_rate = 0.5
	epochs_drop = 20.0
	decay_factor = np.power(drop_rate, np.floor(epoch / epochs_drop))
	return Config.INITIAL_LEARNING_RATE * decay_factor


	# 7. Training functions
	def train_model(
	model: Sequential,
	x_train: np.ndarray,
	y_train: np.ndarray,
	optimizer: keras.optimizers.Optimizer,
	lr_scheduler: Callable = None,
	run: neptune.Run = None,
	) -> keras.callbacks.History:
	lr_metric = get_lr_metric(optimizer)
	model.compile(
	optimizer=optimizer,
	loss="sparse_categorical_crossentropy",
	metrics=["accuracy", lr_metric],
	)

	callbacks = []
	if run:
	callbacks.append(NeptuneCallback(run=run, base_namespace="metrics"))
	if lr_scheduler:
	callbacks.append(keras.callbacks.LearningRateScheduler(lr_scheduler, verbose=0))

	history = model.fit(
	x_train,
	y_train,
	epochs=Config.EPOCHS,
	batch_size=Config.BATCH_SIZE,
	validation_split=0.2,
	callbacks=callbacks,
	verbose=0,
	)

	# Print final train/validation metrics
	print("Final training accuracy:", history.history['accuracy'][-1])
	print("Final validation accuracy:", history.history['val_accuracy'][-1])
	print("Final training loss:", history.history['loss'][-1])
	print("Final validation loss:", history.history['val_loss'][-1])

	return history


	# 8. Experiment runners
	def run_constant_lr_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None:
	run = init_neptune_run(tags=["constant", "baseline"])
	model = create_model()
	optimizer = keras.optimizers.SGD(learning_rate=Config.INITIAL_LEARNING_RATE)
	history = train_model(model, x_train, y_train, optimizer, run=run)
	plot_neptune(history, "Constant", run)
	run.stop()


	def run_polynomial_decay_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None:
	run = init_neptune_run(tags=["PolynomialDecay"])
	model = create_model()
	optimizer = keras.optimizers.SGD(learning_rate=Config.INITIAL_LEARNING_RATE)
	lr_scheduler = LRPolynomialDecay(power=1.0)
	history = train_model(model, x_train, y_train, optimizer, lr_scheduler, run)
	plot_neptune(history, "Linear Decay", run)
	run.stop()


	def run_time_based_decay_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None:
	run = init_neptune_run(tags=["TimeBasedDecay"])
	model = create_model()
	optimizer = keras.optimizers.SGD(learning_rate=Config.INITIAL_LEARNING_RATE)
	history = train_model(model, x_train, y_train, optimizer, lr_time_based_decay, run)
	plot_neptune(history, "Time-Based Decay", run)
	run.stop()


	def run_exp_decay_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None:
	run = init_neptune_run(tags=["ExponentialDecay"])
	model = create_model()
	optimizer = keras.optimizers.SGD(learning_rate=Config.INITIAL_LEARNING_RATE)
	history = train_model(model, x_train, y_train, optimizer, lr_exp_decay, run)
	plot_neptune(history, "Exponential Decay", run)
	run.stop()


	def run_step_based_decay_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None:
	run = init_neptune_run(tags=["StepBasedDecay"])
	model = create_model()
	optimizer = keras.optimizers.SGD(learning_rate=Config.INITIAL_LEARNING_RATE)
	history = train_model(model, x_train, y_train, optimizer, lr_step_based_decay, run)
	plot_neptune(history, "Step-Based Decay", run)
	run.stop()


	def run_adam_experiment(x_train: np.ndarray, y_train: np.ndarray) -> None:
	run = init_neptune_run(tags=["Adam"])
	model = create_model()
	optimizer = keras.optimizers.Adam(learning_rate=0.01)
	history = train_model(model, x_train, y_train, optimizer, run=run)
	plot_neptune(history, "Adam Optimizer", run)
	run.stop()


	# 9. Main execution
	if __name__ == "__main__":
	reset_random_seeds()
	x_train, y_train, x_test, y_test = load_and_preprocess_data()

	experiments = [
	run_constant_lr_experiment,
	run_polynomial_decay_experiment,
	run_time_based_decay_experiment,
	run_exp_decay_experiment,
	run_step_based_decay_experiment,
	run_adam_experiment,
	]

	for experiment in experiments:
	experiment(x_train, y_train)