Skip to content

Instantly share code, notes, and snippets.

@Advaitgaur004
Last active August 11, 2025 08:53
Show Gist options
  • Save Advaitgaur004/e0ad0e99f1e4514bd6091f3cce10b23a to your computer and use it in GitHub Desktop.
Save Advaitgaur004/e0ad0e99f1e4514bd6091f3cce10b23a to your computer and use it in GitHub Desktop.
Optimizer - Test (in main.c) - CTensor
#include "cten.h"
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <time.h>
void* _cten_malloc(size_t size);
enum MemoryPoolIds {
PoolId_Default = 0,
PoolId_Model = 1,
PoolId_Optimizer = 2,
};
typedef struct Model {
Tensor weight_1, weight_2;
Tensor bias_1, bias_2;
} Model;
Tensor Model_forward(Model* model, Tensor x) {
x = nn_linear(x, model->weight_1, model->bias_1);
x = nn_relu(x);
x = nn_linear(x, model->weight_2, model->bias_2);
return x;
}
void test_adam_optimizer() {
printf("--- Testing Adam Optimizer ---\n");
const float target_w1 = 14.6f;
const float target_w2 = -8.7f;
const int iterations = 300;
const float learning_rate = 0.2f;
TensorShape w_shape = {1, 2, 0, 0};
Tensor w = Tensor_new(w_shape, true);
optim_adam* optimizer = optim_adam_new(1, &w, learning_rate, 0.9f, 0.999f, 1e-8f);
for (int i = 1; i <= iterations; ++i) {
optim_adam_zerograd(optimizer);
float w1 = w.data->flex[0];
float w2 = w.data->flex[1];
// MSE (Uncomment this to test this loss function, and comment below loss function)
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
float grad1 = 2 * (w1 - target_w1);
float grad2 = 2 * (w2 - target_w2);
// MAE (Uncomment this to test this loss function, and comment above loss function)
// float loss = fabsf(w1 - target_w1) + fabsf(w2 - target_w2);
// float grad1 = (w1 > target_w1) ? 1.0f : -1.0f;
// if (w1 == target_w1) grad1 = 0.0f;
// float grad2 = (w2 > target_w2) ? 1.0f : -1.0f;
// if (w2 == target_w2) grad2 = 0.0f;
if (i % 100 == 0 || i == 1) {
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
Tensor_print(w);
}
if (w.node->grad.data == NULL) {
w.node->grad = Tensor_zeros(w.shape, false);
}
w.node->grad.data->flex[0] = grad1;
w.node->grad.data->flex[1] = grad2;
optim_adam_step(optimizer);
}
printf("--------------------------------\n");
printf("Adam Test Complete:\n");
printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2);
printf("Final values: ");
Tensor_print(w);
printf("--------------------------------\n\n");
}
void test_rmsprop_optimizer() {
printf("--- Testing RMSProp Optimizer ---\n");
const float target_w1 = 14.6f;
const float target_w2 = -8.7f;
const int iterations = 300;
const float learning_rate = 0.3f;
TensorShape w_shape = {1, 2, 0, 0};
Tensor w = Tensor_new(w_shape, true);
optim_rmsprop* optimizer = optim_rmsprop_new(1, &w, learning_rate, 0.9f, 1e-8f);
for (int i = 1; i <= iterations; ++i) {
optim_rmsprop_zerograd(optimizer);
float w1 = w.data->flex[0];
float w2 = w.data->flex[1];
// MSE (Uncomment this to test this loss function, and comment below loss function)
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
float grad1 = 2 * (w1 - target_w1);
float grad2 = 2 * (w2 - target_w2);
// MAE (Uncomment this to test this loss function, and comment above loss function)
// float loss = fabsf(w1 - target_w1) + fabsf(w2 - target_w2);
// float grad1 = (w1 > target_w1) ? 1.0f : -1.0f;
// if (w1 == target_w1) grad1 = 0.0f;
// float grad2 = (w2 > target_w2) ? 1.0f : -1.0f;
// if (w2 == target_w2) grad2 = 0.0f;
if (i % 100 == 0 || i == 1) {
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
Tensor_print(w);
}
if (w.node->grad.data == NULL) {
w.node->grad = Tensor_zeros(w.shape, false);
}
w.node->grad.data->flex[0] = grad1;
w.node->grad.data->flex[1] = grad2;
optim_rmsprop_step(optimizer);
}
printf("--------------------------------\n");
printf("RMSProp Test Complete:\n");
printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2);
printf("Final values: ");
Tensor_print(w);
printf("--------------------------------\n\n");
}
void test_adagrad_optimizer() {
printf("--- Testing AdaGrad Optimizer ---\n");
const float target_w1 = 14.6f;
const float target_w2 = -8.7f;
const int iterations = 300;
const float learning_rate = 0.8f;
TensorShape w_shape = {1, 2, 0, 0};
Tensor w = Tensor_new(w_shape, true);
optim_adagrad* optimizer = optim_adagrad_new(1, &w, learning_rate, 1e-8f);
for (int i = 1; i <= iterations; ++i) {
optim_adagrad_zerograd(optimizer);
float w1 = w.data->flex[0];
float w2 = w.data->flex[1];
// MSE (Uncomment this to test this loss function, and comment below loss function)
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
float grad1 = 2 * (w1 - target_w1);
float grad2 = 2 * (w2 - target_w2);
// MAE (Uncomment this to test this loss function, and comment above loss function)
// float loss = fabsf(w1 - target_w1) + fabsf(w2 - target_w2);
// float grad1 = (w1 > target_w1) ? 1.0f : -1.0f;
// if (w1 == target_w1) grad1 = 0.0f;
// float grad2 = (w2 > target_w2) ? 1.0f : -1.0f;
// if (w2 == target_w2) grad2 = 0.0f;
if (i % 100 == 0 || i == 1) {
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
Tensor_print(w);
}
if (w.node->grad.data == NULL) {
w.node->grad = Tensor_zeros(w.shape, false);
}
w.node->grad.data->flex[0] = grad1;
w.node->grad.data->flex[1] = grad2;
optim_adagrad_step(optimizer);
}
printf("--------------------------------\n");
printf("AdaGrad Test Complete:\n");
printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2);
printf("Final values: ");
Tensor_print(w);
printf("--------------------------------\n\n");
}
int main() {
cten_initilize();
cten_begin_malloc(PoolId_Default);
printf("Optimizer Tests\n");
test_adam_optimizer();
test_rmsprop_optimizer();
test_adagrad_optimizer();
cten_end_malloc();
cten_finalize();
return 0;
}
#include "cten.h"
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <time.h>
void* _cten_malloc(size_t size);
enum MemoryPoolIds {
PoolId_Default = 0,
PoolId_Model = 1,
PoolId_Optimizer = 2,
};
typedef struct Model {
Tensor weight_1, weight_2;
Tensor bias_1, bias_2;
} Model;
Tensor Model_forward(Model* model, Tensor x) {
x = nn_linear(x, model->weight_1, model->bias_1);
x = nn_relu(x);
x = nn_linear(x, model->weight_2, model->bias_2);
return x;
}
/**
* @brief Contains tests for optim_sgd_new assertions.
*/
void test_sgd_assertions(Tensor* params) {
printf("--- Testing SGD Assertions ---\n");
// Uncomment one line at a time to test.
// The program is expected to abort with a descriptive error message.
// TEST 1: Trigger "n_params cannot be negative"
// optim_sgd_new(-1, params);
// TEST 2: Trigger "params array cannot be NULL when n_params is greater than 0"
// optim_sgd_new(2, NULL);
// SUCCESS CASE (should not fail)
// optim_sgd* sgd = optim_sgd_new(2, params);
// printf("SGD valid creation successful.\n");
printf("SGD: All assertion tests are commented out.\n\n");
}
/**
* @brief Contains tests for optim_adagrad_new assertions.
*/
void test_adagrad_assertions(Tensor* params) {
printf("--- Testing AdaGrad Assertions ---\n");
// TEST 1: Trigger "n_params cannot be negative"
// optim_adagrad_new(-5, params, 0.01f, 1e-8f);
// TEST 2: Trigger "params array cannot be NULL"
// optim_adagrad_new(2, NULL, 0.01f, 1e-8f);
// TEST 3: Trigger "learning rate must be non-negative"
// optim_adagrad_new(2, params, -0.01f, 1e-8f);
// TEST 4: Trigger "epsilon must be non-negative"
// optim_adagrad_new(2, params, 0.01f, -1e-8f);
// SUCCESS CASE (should not fail)
// optim_adagrad* adagrad = optim_adagrad_new(2, params, 0.01f, 1e-8f);
// printf("AdaGrad valid creation successful.\n");
printf("AdaGrad: All assertion tests are commented out.\n\n");
}
/**
* @brief Contains tests for optim_rmsprop_new assertions.
*/
void test_rmsprop_assertions(Tensor* params) {
printf("--- Testing RMSProp Assertions ---\n");
// TEST 1: Trigger "n_params cannot be negative"
// optim_rmsprop_new(-1, params, 0.01f, 0.9f, 1e-8f);
// TEST 2: Trigger "params array cannot be NULL"
// optim_rmsprop_new(2, NULL, 0.01f, 0.9f, 1e-8f);
// TEST 3: Trigger "learning rate must be non-negative"
// optim_rmsprop_new(2, params, -0.01f, 0.9f, 1e-8f);
// TEST 4: Trigger "beta (decay rate) must be in [0, 1)" (testing upper bound)
// optim_rmsprop_new(2, params, 0.01f, 1.0f, 1e-8f);
// TEST 5: Trigger "beta (decay rate) must be in [0, 1)" (testing lower bound)
// optim_rmsprop_new(2, params, 0.01f, -0.1f, 1e-8f);
// TEST 6: Trigger "epsilon must be non-negative"
// optim_rmsprop_new(2, params, 0.01f, 0.9f, -1e-8f);
// SUCCESS CASE (should not fail)
// optim_rmsprop* rmsprop = optim_rmsprop_new(2, params, 0.01f, 0.9f, 1e-8f);
// printf("RMSProp valid creation successful.\n");
printf("RMSProp: All assertion tests are commented out.\n\n");
}
/**
* @brief Contains tests for optim_adam_new assertions.
*/
void test_adam_assertions(Tensor* params) {
printf("--- Testing Adam Assertions ---\n");
// TEST 1: Trigger "n_params cannot be negative"
// optim_adam_new(-2, params, 0.001f, 0.9f, 0.999f, 1e-8f);
// TEST 2: Trigger "params array cannot be NULL"
// optim_adam_new(2, NULL, 0.001f, 0.9f, 0.999f, 1e-8f);
// TEST 3: Trigger "learning rate must be non-negative"
// optim_adam_new(2, params, -0.001f, 0.9f, 0.999f, 1e-8f);
// TEST 4: Trigger "beta1 must be in [0, 1)"
// optim_adam_new(2, params, 0.001f, 1.0f, 0.999f, 1e-8f);
// TEST 5: Trigger "beta2 must be in [0, 1)"
// optim_adam_new(2, params, 0.001f, 0.9f, -0.1f, 1e-8f);
// TEST 6: Trigger "epsilon must be non-negative"
// optim_adam_new(2, params, 0.001f, 0.9f, 0.999f, -1e-8f);
// SUCCESS CASE (should not fail)
// optim_adam* adam = optim_adam_new(2, params, 0.001f, 0.9f, 0.999f, 1e-8f);
// printf("Adam valid creation successful.\n");
printf("Adam: All assertion tests are commented out.\n\n");
}
int main() {
cten_initilize();
cten_begin_malloc(PoolId_Default);
TensorShape shape = {1, 5, 0, 0}; // A simple shape for our fake weights/biases
Tensor params[2];
params[0] = Tensor_new(shape, true);
params[1] = Tensor_new(shape, true);
const int n_params = 2;
printf("Starting optimizer assertion tests...\n");
printf("Uncomment a single test line in the code to see its assertion fail.\n\n");
test_sgd_assertions(params);
test_adagrad_assertions(params);
test_rmsprop_assertions(params);
test_adam_assertions(params);
printf("All test suites complete. If you saw no crashes, it means all failing tests are commented out.\n");
cten_end_malloc();
cten_finalize();
return 0;
}
#include "cten.h"
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <time.h>
void* _cten_malloc(size_t size);
enum MemoryPoolIds {
PoolId_Default = 0,
PoolId_Model = 1,
PoolId_Optimizer = 2,
};
typedef struct Model {
Tensor weight_1, weight_2;
Tensor bias_1, bias_2;
} Model;
Tensor Model_forward(Model* model, Tensor x) {
x = nn_linear(x, model->weight_1, model->bias_1);
x = nn_relu(x);
x = nn_linear(x, model->weight_2, model->bias_2);
return x;
}
void test_sgd_basic() {
printf("--- Test 1: Basic SGD (Momentum = 0.0) ---\n");
const float target_w1 = 94.7f;
const float target_w2 = -78.0f;
const int iterations = 50;
const float learning_rate = 0.05f;
TensorShape w_shape = {1, 2, 0, 0};
Tensor w = Tensor_new(w_shape, true);
// Initialize weights to 0
w.data->flex[0] = 0.0f;
w.data->flex[1] = 0.0f;
// Create and configure the optimizer
optim_sgd* optimizer = optim_sgd_new(1, &w);
optim_sgd_config(optimizer, learning_rate, 0.0f);
for (int i = 1; i <= iterations; ++i) {
optim_sgd_zerograd(optimizer);
float w1 = w.data->flex[0];
float w2 = w.data->flex[1];
// Simple MSE loss and gradient
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
float grad1 = 2 * (w1 - target_w1);
float grad2 = 2 * (w2 - target_w2);
if (i % 10 == 0 || i == 1) {
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
Tensor_print(w);
}
// Manually set the gradient
w.node->grad.data->flex[0] = grad1;
w.node->grad.data->flex[1] = grad2;
optim_sgd_step(optimizer);
}
printf("------------------------------------------\n");
printf("Basic SGD Test Complete.\n");
printf("Target values: (% .4f, % .4f)\n", target_w1, target_w2);
printf("Final values: ");
Tensor_print(w);
printf("------------------------------------------\n\n");
}
void test_sgd_with_momentum() {
printf("--- Test 2: SGD with Momentum (Momentum = 0.6) ---\n");
const float target_w1 = 94.7f;
const float target_w2 = -78.0f;
const int iterations = 50;
const float learning_rate = 0.05f;
TensorShape w_shape = {1, 2, 0, 0};
Tensor w = Tensor_new(w_shape, true);
w.data->flex[0] = 0.0f;
w.data->flex[1] = 0.0f;
// Create and configure the optimizer with momentum
optim_sgd* optimizer = optim_sgd_new(1, &w);
optim_sgd_config(optimizer, learning_rate, 0.6f);
for (int i = 1; i <= iterations; ++i) {
optim_sgd_zerograd(optimizer);
float w1 = w.data->flex[0];
float w2 = w.data->flex[1];
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
float grad1 = 2 * (w1 - target_w1);
float grad2 = 2 * (w2 - target_w2);
if (i % 10 == 0 || i == 1) {
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
Tensor_print(w);
}
w.node->grad.data->flex[0] = grad1;
w.node->grad.data->flex[1] = grad2;
optim_sgd_step(optimizer);
}
printf("------------------------------------------\n");
printf("SGD with Momentum Test Complete.\n");
printf("Target values: (% .4f, % .4f)\n", target_w1, target_w2);
printf("Final values: ");
Tensor_print(w);
printf("------------------------------------------\n\n");
}
void test_sgd_edge_cases() {
printf("--- Test 3: SGD Edge Case Validation ---\n");
TensorShape shape = {1, 1, 0, 0};
// --- EDGE CASE 1: Re-configuring an optimizer ---
printf("1. Testing re-configuration: [EXPECTED: SUCCESS]\n");
Tensor w1 = Tensor_new(shape, true);
optim_sgd* optimizer = optim_sgd_new(1, &w1);
optim_sgd_config(optimizer, 0.1f, 0.0f);
optim_sgd_config(optimizer, 0.2f, 0.9f); // Re-configure with momentum
printf(" SUCCESS: Optimizer re-configured without errors.\n\n");
// --- EDGE CASE 2: Invalid momentum value ---
printf("2. Testing invalid momentum input: [EXPECTED: ASSERTION FAILURE]\n");
printf(" To test, uncomment the following line in the code.\n");
// optim_sgd_config(optimizer, 0.01f, -0.5f); // This should abort the program.
printf(" Test skipped.\n\n");
// --- EDGE CASE 3: Stepping with no gradient ---
printf("3. Testing step with no gradient available: [EXPECTED: SUCCESS]\n");
optim_sgd_zerograd(optimizer); // Ensure grad is NULL
optim_sgd_step(optimizer); // Should not crash, just do nothing.
printf(" SUCCESS: Step was skipped safely when grad was NULL.\n\n");
printf("------------------------------------------\n");
printf("Edge Case Validation Complete.\n");
printf("------------------------------------------\n");
}
int main() {
cten_initilize();
cten_begin_malloc(PoolId_Default);
printf("======================================\n");
printf(" Testing SGD Optimizer Implementation\n");
printf("======================================\n\n");
// Run functional tests
test_sgd_basic();
test_sgd_with_momentum();
test_sgd_edge_cases();
cten_end_malloc();
cten_finalize();
return 0;
}
#include "cten.h"
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <time.h>
void* _cten_malloc(size_t size);
enum MemoryPoolIds {
PoolId_Default = 0,
PoolId_Model = 1,
PoolId_Optimizer = 2,
};
typedef struct Model {
Tensor weight_1, weight_2;
Tensor bias_1, bias_2;
} Model;
Tensor Model_forward(Model* model, Tensor x) {
x = nn_linear(x, model->weight_1, model->bias_1);
x = nn_relu(x);
x = nn_linear(x, model->weight_2, model->bias_2);
return x;
}
void test_adam_optimizer_with_enhancements(float weight_decay) {
printf("--- Testing Adam Optimizer---\n");
const float target_w1 = 14.6f;
const float target_w2 = -8.7f;
const int iterations = 200;
const float learning_rate = 0.2f;
TensorShape w_shape = {1, 2, 0, 0};
Tensor w = Tensor_new(w_shape, true);
// Updated Adam constructor with weight decay
optim_adam* optimizer = optim_adam_new(1, &w, learning_rate, 0.9f, 0.999f, 1e-8f, weight_decay);
printf("Hyperparameters: LR=%.3f, Weight_Decay=%.4f\n",
learning_rate,
weight_decay);
for(int i = 1; i <= iterations; ++i) {
optim_adam_zerograd(optimizer);
float w1 = w.data->flex[0];
float w2 = w.data->flex[1];
// MSE Loss
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
float grad1 = 2 * (w1 - target_w1);
float grad2 = 2 * (w2 - target_w2);
if(i % 100 == 0 || i == 1) {
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
Tensor_print(w);
}
if(w.node->grad.data == NULL) { w.node->grad = Tensor_zeros(w.shape, false); }
w.node->grad.data->flex[0] = grad1;
w.node->grad.data->flex[1] = grad2;
optim_adam_step(optimizer);
}
printf("--------------------------------\n");
printf("Adam Test Complete:\n");
printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2);
printf("Final values: ");
Tensor_print(w);
printf("--------------------------------\n\n");
}
void test_rmsprop_optimizer_with_enhancements(float weight_decay) {
printf("--- Testing RMSProp Optimizer ---\n");
const float target_w1 = 14.6f;
const float target_w2 = -8.7f;
const int iterations = 200;
const float learning_rate = 0.3f;
TensorShape w_shape = {1, 2, 0, 0};
Tensor w = Tensor_new(w_shape, true);
// Updated RMSProp constructor with weight decay
optim_rmsprop* optimizer = optim_rmsprop_new(1, &w, learning_rate, 0.9f, 1e-8f, weight_decay);
printf("Hyperparameters: LR=%.3f, Weight_Decay=%.4f\n",
learning_rate,
weight_decay);
for(int i = 1; i <= iterations; ++i) {
optim_rmsprop_zerograd(optimizer);
float w1 = w.data->flex[0];
float w2 = w.data->flex[1];
// MSE Loss
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
float grad1 = 2 * (w1 - target_w1);
float grad2 = 2 * (w2 - target_w2);
if(i % 100 == 0 || i == 1) {
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
Tensor_print(w);
}
if(w.node->grad.data == NULL) { w.node->grad = Tensor_zeros(w.shape, false); }
w.node->grad.data->flex[0] = grad1;
w.node->grad.data->flex[1] = grad2;
optim_rmsprop_step(optimizer);
}
printf("--------------------------------\n");
printf("RMSProp Test Complete:\n");
printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2);
printf("Final values: ");
Tensor_print(w);
printf("--------------------------------\n\n");
}
void test_adagrad_optimizer_with_enhancements(float weight_decay) {
printf("--- Testing AdaGrad Optimizer ---\n");
const float target_w1 = 14.6f;
const float target_w2 = -8.7f;
const int iterations = 200;
const float learning_rate = 0.8f;
TensorShape w_shape = {1, 2, 0, 0};
Tensor w = Tensor_new(w_shape, true);
// Updated AdaGrad constructor with weight decay
optim_adagrad* optimizer = optim_adagrad_new(1, &w, learning_rate, 1e-8f, weight_decay);
printf("Hyperparameters: LR=%.3f, Weight_Decay=%.4f\n",
learning_rate,
weight_decay);
for(int i = 1; i <= iterations; ++i) {
optim_adagrad_zerograd(optimizer);
float w1 = w.data->flex[0];
float w2 = w.data->flex[1];
// MSE Loss
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
float grad1 = 2 * (w1 - target_w1);
float grad2 = 2 * (w2 - target_w2);
if(i % 100 == 0 || i == 1) {
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
Tensor_print(w);
}
if(w.node->grad.data == NULL) { w.node->grad = Tensor_zeros(w.shape, false); }
w.node->grad.data->flex[0] = grad1;
w.node->grad.data->flex[1] = grad2;
optim_adagrad_step(optimizer);
}
printf("--------------------------------\n");
printf("AdaGrad Test Complete:\n");
printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2);
printf("Final values: ");
Tensor_print(w);
printf("--------------------------------\n\n");
}
void test_sgd_optimizer_with_enhancements(float weight_decay) {
printf("--- Testing SGD Optimizer ---\n");
const float target_w1 = 14.6f;
const float target_w2 = -8.7f;
const int iterations = 200;
const float learning_rate = 0.01f;
const float momentum = 0.9f;
TensorShape w_shape = {1, 2, 0, 0};
Tensor w = Tensor_new(w_shape, true);
// SGD constructor with weight decay
optim_sgd* optimizer = optim_sgd_new(1, &w, weight_decay);
optim_sgd_config(optimizer, learning_rate, momentum);
printf("Hyperparameters: LR=%.3f, Momentum=%.1f, Weight_Decay=%.4f\n",
learning_rate,
momentum,
weight_decay);
for(int i = 1; i <= iterations; ++i) {
optim_sgd_zerograd(optimizer);
float w1 = w.data->flex[0];
float w2 = w.data->flex[1];
// MSE Loss
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
float grad1 = 2 * (w1 - target_w1);
float grad2 = 2 * (w2 - target_w2);
if(i % 100 == 0 || i == 1) {
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
Tensor_print(w);
}
if(w.node->grad.data == NULL) { w.node->grad = Tensor_zeros(w.shape, false); }
w.node->grad.data->flex[0] = grad1;
w.node->grad.data->flex[1] = grad2;
optim_sgd_step(optimizer);
}
printf("--------------------------------\n");
printf("SGD Test Complete:\n");
printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2);
printf("Final values: ");
Tensor_print(w);
printf("--------------------------------\n\n");
}
int main() {
cten_initilize();
cten_begin_malloc(PoolId_Default);
printf("Optimizer Tests\n");
printf("==============================================================\n\n");
// Test all optimizers without weight decay
test_adam_optimizer_with_enhancements(0.0f);
test_rmsprop_optimizer_with_enhancements(0.0f);
test_adagrad_optimizer_with_enhancements(0.0f);
test_sgd_optimizer_with_enhancements(0.0f);
// Test all optimizers with weight decay
test_adam_optimizer_with_enhancements(0.001f);
test_rmsprop_optimizer_with_enhancements(0.001f);
test_adagrad_optimizer_with_enhancements(0.001f);
test_sgd_optimizer_with_enhancements(0.001f);
cten_end_malloc();
cten_finalize();
return 0;
}
#include "cten.h"
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <time.h>
void* _cten_malloc(size_t size);
enum MemoryPoolIds {
PoolId_Default = 0,
PoolId_Model = 1,
PoolId_Optimizer = 2,
};
typedef struct Model {
Tensor weight_1, weight_2;
Tensor bias_1, bias_2;
} Model;
Tensor Model_forward(Model* model, Tensor x) {
x = nn_linear(x, model->weight_1, model->bias_1);
x = nn_relu(x);
x = nn_linear(x, model->weight_2, model->bias_2);
return x;
}
// A small tolerance for comparing floating-point numbers.
#define FLOAT_TOLERANCE 1e-6
// Helper function to check if two floats are approximately equal.
bool check_floats_equal(float a, float b) {
return fabsf(a - b) < FLOAT_TOLERANCE;
}
// Helper function to set up a tensor with a gradient for testing.
void setup_test_tensor(Tensor* t, float grad1, float grad2) {
*t = Tensor_new((TensorShape){1, 2, 0, 0}, true);
// Ensure grad tensor is allocated
if (t->node->grad.data == NULL) {
t->node->grad = Tensor_zeros(t->shape, false);
}
t->node->grad.data->flex[0] = grad1;
t->node->grad.data->flex[1] = grad2;
}
// --- Test for cten_clip_grad_norm ---
void test_clip_grad_norm() {
printf("--- Testing cten_clip_grad_norm ---\n");
Tensor t;
// Case 1: Norm is greater than max_norm, so clipping should occur.
setup_test_tensor(&t, 3.0f, 4.0f); // Initial norm is sqrt(9 + 16) = 5.0
float max_norm = 1.0f;
cten_clip_grad_norm(&t, 1, max_norm);
float expected_g1 = 3.0f * (max_norm / 5.0f); // 0.6
float expected_g2 = 4.0f * (max_norm / 5.0f); // 0.8
if (check_floats_equal(t.node->grad.data->flex[0], expected_g1) &&
check_floats_equal(t.node->grad.data->flex[1], expected_g2)) {
printf("PASS: Gradients correctly scaled down.\n");
} else {
printf("FAIL: Gradients not scaled correctly. Got [%f, %f], expected [%f, %f]\n",
t.node->grad.data->flex[0], t.node->grad.data->flex[1], expected_g1, expected_g2);
}
// Case 2: Norm is less than max_norm, so no clipping should occur.
setup_test_tensor(&t, 0.5f, 0.5f); // Initial norm is sqrt(0.25 + 0.25) approx 0.707
max_norm = 2.0f;
cten_clip_grad_norm(&t, 1, max_norm);
if (check_floats_equal(t.node->grad.data->flex[0], 0.5f) &&
check_floats_equal(t.node->grad.data->flex[1], 0.5f)) {
printf("PASS: Gradients correctly left unchanged.\n");
} else {
printf("FAIL: Gradients were changed unexpectedly.\n");
}
printf("\n");
}
// --- Test for cten_clip_grad_value ---
void test_clip_grad_value() {
printf("--- Testing cten_clip_grad_value ---\n");
Tensor t;
// Case 1: Both positive and negative gradients are clipped.
setup_test_tensor(&t, 5.0f, -5.0f);
float max_value = 2.0f;
cten_clip_grad_value(&t, 1, max_value);
if (check_floats_equal(t.node->grad.data->flex[0], 2.0f) &&
check_floats_equal(t.node->grad.data->flex[1], -2.0f)) {
printf("PASS: Gradients correctly clipped to [-%f, %f].\n", max_value, max_value);
} else {
printf("FAIL: Gradients not clipped correctly. Got [%f, %f], expected [%f, %f]\n",
t.node->grad.data->flex[0], t.node->grad.data->flex[1], 2.0f, -2.0f);
}
// Case 2: Gradients are within the bounds, no clipping should occur.
setup_test_tensor(&t, 1.0f, -1.0f);
max_value = 2.0f;
cten_clip_grad_value(&t, 1, max_value);
if (check_floats_equal(t.node->grad.data->flex[0], 1.0f) &&
check_floats_equal(t.node->grad.data->flex[1], -1.0f)) {
printf("PASS: Gradients correctly left unchanged.\n");
} else {
printf("FAIL: Gradients were changed unexpectedly.\n");
}
printf("\n");
}
// --- Test for cten_clip_grad_value_range ---
void test_clip_grad_value_range() {
printf("--- Testing cten_clip_grad_value_range ---\n");
Tensor t;
// Test with an asymmetric range.
setup_test_tensor(&t, 10.0f, -10.0f);
float min_val = -1.0f;
float max_val = 2.0f;
cten_clip_grad_value_range(&t, 1, min_val, max_val);
if (check_floats_equal(t.node->grad.data->flex[0], max_val) &&
check_floats_equal(t.node->grad.data->flex[1], min_val)) {
printf("PASS: Gradients correctly clipped to range [%f, %f].\n", min_val, max_val);
} else {
printf("FAIL: Gradients not clipped correctly. Got [%f, %f], expected [%f, %f]\n",
t.node->grad.data->flex[0], t.node->grad.data->flex[1], max_val, min_val);
}
printf("\n");
}
// --- Test for cten_clip_grad_positive & cten_clip_grad_negative ---
// These are essentially special cases of value_range, but we test them to be sure.
void test_clip_one_sided() {
printf("--- Testing cten_clip_grad_positive & cten_clip_grad_negative ---\n");
Tensor t_pos, t_neg;
// Setup: One gradient is positive, one is negative.
setup_test_tensor(&t_pos, 5.0f, -2.0f);
setup_test_tensor(&t_neg, 5.0f, -2.0f);
// Test positive clipping
float max_val = 1.5f;
cten_clip_grad_positive(&t_pos, 1, max_val);
if (check_floats_equal(t_pos.node->grad.data->flex[0], max_val) &&
check_floats_equal(t_pos.node->grad.data->flex[1], -2.0f)) { // Negative value should be untouched
printf("PASS: cten_clip_grad_positive works correctly.\n");
} else {
printf("FAIL: cten_clip_grad_positive failed. Got [%f, %f], expected [%f, %f]\n",
t_pos.node->grad.data->flex[0], t_pos.node->grad.data->flex[1], max_val, -2.0f);
}
// Test negative clipping
float min_val = -0.5f;
cten_clip_grad_negative(&t_neg, 1, min_val);
if (check_floats_equal(t_neg.node->grad.data->flex[0], 5.0f) && // Positive value should be untouched
check_floats_equal(t_neg.node->grad.data->flex[1], min_val)) {
printf("PASS: cten_clip_grad_negative works correctly.\n");
} else {
printf("FAIL: cten_clip_grad_negative failed. Got [%f, %f], expected [%f, %f]\n",
t_neg.node->grad.data->flex[0], t_neg.node->grad.data->flex[1], 5.0f, min_val);
}
printf("\n");
}
// --- Test Edge Cases ---
void test_edge_cases() {
printf("--- Testing Edge Cases ---\n");
// Test with NULL parameters. These should not crash.
cten_clip_grad_norm(NULL, 1, 1.0f);
cten_clip_grad_norm(NULL, 0, 1.0f);
cten_clip_grad_value(NULL, 1, 1.0f);
cten_clip_grad_value_range(NULL, 1, -1.0f, 1.0f);
cten_clip_grad_positive(NULL, 1, 1.0f);
cten_clip_grad_negative(NULL, 1, -1.0f);
printf("PASS: All functions handled NULL and zero-sized inputs without crashing.\n");
// Test with a tensor that has no gradient.
Tensor t = Tensor_new((TensorShape){1, 1, 0, 0}, true); // Grad is NULL by default
cten_clip_grad_norm(&t, 1, 1.0f);
printf("PASS: Handled tensor with NULL gradient without crashing.\n");
// Test norm clipping with max_norm <= 0. Should do nothing.
setup_test_tensor(&t, 10.0f, 10.0f);
cten_clip_grad_norm(&t, 1, 0.0f);
cten_clip_grad_norm(&t, 1, -5.0f);
if (check_floats_equal(t.node->grad.data->flex[0], 10.0f)) {
printf("PASS: cten_clip_grad_norm correctly does nothing for max_norm <= 0.\n");
} else {
printf("FAIL: cten_clip_grad_norm modified gradients for max_norm <= 0.\n");
}
printf("\n");
}
int main() {
// You must have a function like this to initialize your library's memory, etc.
// Replace with your actual library initialization function.
cten_initilize();
cten_begin_malloc(0);
printf("================================================\n");
printf(" RUNNING GRADIENT CLIPPING TEST SUITE \n");
printf("================================================\n\n");
test_clip_grad_norm();
test_clip_grad_value();
test_clip_grad_value_range();
test_clip_one_sided();
test_edge_cases();
printf("================================================\n");
printf(" TEST SUITE EXECUTION COMPLETE \n");
printf("================================================\n");
cten_end_malloc();
cten_finalize();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment