Last active
August 11, 2025 08:53
-
-
Save Advaitgaur004/e0ad0e99f1e4514bd6091f3cce10b23a to your computer and use it in GitHub Desktop.
Optimizer - Test (in main.c) - CTensor
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "cten.h" | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <string.h> | |
#include <time.h> | |
void* _cten_malloc(size_t size); | |
enum MemoryPoolIds { | |
PoolId_Default = 0, | |
PoolId_Model = 1, | |
PoolId_Optimizer = 2, | |
}; | |
typedef struct Model { | |
Tensor weight_1, weight_2; | |
Tensor bias_1, bias_2; | |
} Model; | |
Tensor Model_forward(Model* model, Tensor x) { | |
x = nn_linear(x, model->weight_1, model->bias_1); | |
x = nn_relu(x); | |
x = nn_linear(x, model->weight_2, model->bias_2); | |
return x; | |
} | |
void test_adam_optimizer() { | |
printf("--- Testing Adam Optimizer ---\n"); | |
const float target_w1 = 14.6f; | |
const float target_w2 = -8.7f; | |
const int iterations = 300; | |
const float learning_rate = 0.2f; | |
TensorShape w_shape = {1, 2, 0, 0}; | |
Tensor w = Tensor_new(w_shape, true); | |
optim_adam* optimizer = optim_adam_new(1, &w, learning_rate, 0.9f, 0.999f, 1e-8f); | |
for (int i = 1; i <= iterations; ++i) { | |
optim_adam_zerograd(optimizer); | |
float w1 = w.data->flex[0]; | |
float w2 = w.data->flex[1]; | |
// MSE (Uncomment this to test this loss function, and comment below loss function) | |
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2); | |
float grad1 = 2 * (w1 - target_w1); | |
float grad2 = 2 * (w2 - target_w2); | |
// MAE (Uncomment this to test this loss function, and comment above loss function) | |
// float loss = fabsf(w1 - target_w1) + fabsf(w2 - target_w2); | |
// float grad1 = (w1 > target_w1) ? 1.0f : -1.0f; | |
// if (w1 == target_w1) grad1 = 0.0f; | |
// float grad2 = (w2 > target_w2) ? 1.0f : -1.0f; | |
// if (w2 == target_w2) grad2 = 0.0f; | |
if (i % 100 == 0 || i == 1) { | |
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss); | |
Tensor_print(w); | |
} | |
if (w.node->grad.data == NULL) { | |
w.node->grad = Tensor_zeros(w.shape, false); | |
} | |
w.node->grad.data->flex[0] = grad1; | |
w.node->grad.data->flex[1] = grad2; | |
optim_adam_step(optimizer); | |
} | |
printf("--------------------------------\n"); | |
printf("Adam Test Complete:\n"); | |
printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2); | |
printf("Final values: "); | |
Tensor_print(w); | |
printf("--------------------------------\n\n"); | |
} | |
void test_rmsprop_optimizer() { | |
printf("--- Testing RMSProp Optimizer ---\n"); | |
const float target_w1 = 14.6f; | |
const float target_w2 = -8.7f; | |
const int iterations = 300; | |
const float learning_rate = 0.3f; | |
TensorShape w_shape = {1, 2, 0, 0}; | |
Tensor w = Tensor_new(w_shape, true); | |
optim_rmsprop* optimizer = optim_rmsprop_new(1, &w, learning_rate, 0.9f, 1e-8f); | |
for (int i = 1; i <= iterations; ++i) { | |
optim_rmsprop_zerograd(optimizer); | |
float w1 = w.data->flex[0]; | |
float w2 = w.data->flex[1]; | |
// MSE (Uncomment this to test this loss function, and comment below loss function) | |
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2); | |
float grad1 = 2 * (w1 - target_w1); | |
float grad2 = 2 * (w2 - target_w2); | |
// MAE (Uncomment this to test this loss function, and comment above loss function) | |
// float loss = fabsf(w1 - target_w1) + fabsf(w2 - target_w2); | |
// float grad1 = (w1 > target_w1) ? 1.0f : -1.0f; | |
// if (w1 == target_w1) grad1 = 0.0f; | |
// float grad2 = (w2 > target_w2) ? 1.0f : -1.0f; | |
// if (w2 == target_w2) grad2 = 0.0f; | |
if (i % 100 == 0 || i == 1) { | |
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss); | |
Tensor_print(w); | |
} | |
if (w.node->grad.data == NULL) { | |
w.node->grad = Tensor_zeros(w.shape, false); | |
} | |
w.node->grad.data->flex[0] = grad1; | |
w.node->grad.data->flex[1] = grad2; | |
optim_rmsprop_step(optimizer); | |
} | |
printf("--------------------------------\n"); | |
printf("RMSProp Test Complete:\n"); | |
printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2); | |
printf("Final values: "); | |
Tensor_print(w); | |
printf("--------------------------------\n\n"); | |
} | |
void test_adagrad_optimizer() { | |
printf("--- Testing AdaGrad Optimizer ---\n"); | |
const float target_w1 = 14.6f; | |
const float target_w2 = -8.7f; | |
const int iterations = 300; | |
const float learning_rate = 0.8f; | |
TensorShape w_shape = {1, 2, 0, 0}; | |
Tensor w = Tensor_new(w_shape, true); | |
optim_adagrad* optimizer = optim_adagrad_new(1, &w, learning_rate, 1e-8f); | |
for (int i = 1; i <= iterations; ++i) { | |
optim_adagrad_zerograd(optimizer); | |
float w1 = w.data->flex[0]; | |
float w2 = w.data->flex[1]; | |
// MSE (Uncomment this to test this loss function, and comment below loss function) | |
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2); | |
float grad1 = 2 * (w1 - target_w1); | |
float grad2 = 2 * (w2 - target_w2); | |
// MAE (Uncomment this to test this loss function, and comment above loss function) | |
// float loss = fabsf(w1 - target_w1) + fabsf(w2 - target_w2); | |
// float grad1 = (w1 > target_w1) ? 1.0f : -1.0f; | |
// if (w1 == target_w1) grad1 = 0.0f; | |
// float grad2 = (w2 > target_w2) ? 1.0f : -1.0f; | |
// if (w2 == target_w2) grad2 = 0.0f; | |
if (i % 100 == 0 || i == 1) { | |
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss); | |
Tensor_print(w); | |
} | |
if (w.node->grad.data == NULL) { | |
w.node->grad = Tensor_zeros(w.shape, false); | |
} | |
w.node->grad.data->flex[0] = grad1; | |
w.node->grad.data->flex[1] = grad2; | |
optim_adagrad_step(optimizer); | |
} | |
printf("--------------------------------\n"); | |
printf("AdaGrad Test Complete:\n"); | |
printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2); | |
printf("Final values: "); | |
Tensor_print(w); | |
printf("--------------------------------\n\n"); | |
} | |
int main() { | |
cten_initilize(); | |
cten_begin_malloc(PoolId_Default); | |
printf("Optimizer Tests\n"); | |
test_adam_optimizer(); | |
test_rmsprop_optimizer(); | |
test_adagrad_optimizer(); | |
cten_end_malloc(); | |
cten_finalize(); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "cten.h" | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <string.h> | |
#include <time.h> | |
void* _cten_malloc(size_t size); | |
enum MemoryPoolIds { | |
PoolId_Default = 0, | |
PoolId_Model = 1, | |
PoolId_Optimizer = 2, | |
}; | |
typedef struct Model { | |
Tensor weight_1, weight_2; | |
Tensor bias_1, bias_2; | |
} Model; | |
Tensor Model_forward(Model* model, Tensor x) { | |
x = nn_linear(x, model->weight_1, model->bias_1); | |
x = nn_relu(x); | |
x = nn_linear(x, model->weight_2, model->bias_2); | |
return x; | |
} | |
/** | |
* @brief Contains tests for optim_sgd_new assertions. | |
*/ | |
void test_sgd_assertions(Tensor* params) { | |
printf("--- Testing SGD Assertions ---\n"); | |
// Uncomment one line at a time to test. | |
// The program is expected to abort with a descriptive error message. | |
// TEST 1: Trigger "n_params cannot be negative" | |
// optim_sgd_new(-1, params); | |
// TEST 2: Trigger "params array cannot be NULL when n_params is greater than 0" | |
// optim_sgd_new(2, NULL); | |
// SUCCESS CASE (should not fail) | |
// optim_sgd* sgd = optim_sgd_new(2, params); | |
// printf("SGD valid creation successful.\n"); | |
printf("SGD: All assertion tests are commented out.\n\n"); | |
} | |
/** | |
* @brief Contains tests for optim_adagrad_new assertions. | |
*/ | |
void test_adagrad_assertions(Tensor* params) { | |
printf("--- Testing AdaGrad Assertions ---\n"); | |
// TEST 1: Trigger "n_params cannot be negative" | |
// optim_adagrad_new(-5, params, 0.01f, 1e-8f); | |
// TEST 2: Trigger "params array cannot be NULL" | |
// optim_adagrad_new(2, NULL, 0.01f, 1e-8f); | |
// TEST 3: Trigger "learning rate must be non-negative" | |
// optim_adagrad_new(2, params, -0.01f, 1e-8f); | |
// TEST 4: Trigger "epsilon must be non-negative" | |
// optim_adagrad_new(2, params, 0.01f, -1e-8f); | |
// SUCCESS CASE (should not fail) | |
// optim_adagrad* adagrad = optim_adagrad_new(2, params, 0.01f, 1e-8f); | |
// printf("AdaGrad valid creation successful.\n"); | |
printf("AdaGrad: All assertion tests are commented out.\n\n"); | |
} | |
/** | |
* @brief Contains tests for optim_rmsprop_new assertions. | |
*/ | |
void test_rmsprop_assertions(Tensor* params) { | |
printf("--- Testing RMSProp Assertions ---\n"); | |
// TEST 1: Trigger "n_params cannot be negative" | |
// optim_rmsprop_new(-1, params, 0.01f, 0.9f, 1e-8f); | |
// TEST 2: Trigger "params array cannot be NULL" | |
// optim_rmsprop_new(2, NULL, 0.01f, 0.9f, 1e-8f); | |
// TEST 3: Trigger "learning rate must be non-negative" | |
// optim_rmsprop_new(2, params, -0.01f, 0.9f, 1e-8f); | |
// TEST 4: Trigger "beta (decay rate) must be in [0, 1)" (testing upper bound) | |
// optim_rmsprop_new(2, params, 0.01f, 1.0f, 1e-8f); | |
// TEST 5: Trigger "beta (decay rate) must be in [0, 1)" (testing lower bound) | |
// optim_rmsprop_new(2, params, 0.01f, -0.1f, 1e-8f); | |
// TEST 6: Trigger "epsilon must be non-negative" | |
// optim_rmsprop_new(2, params, 0.01f, 0.9f, -1e-8f); | |
// SUCCESS CASE (should not fail) | |
// optim_rmsprop* rmsprop = optim_rmsprop_new(2, params, 0.01f, 0.9f, 1e-8f); | |
// printf("RMSProp valid creation successful.\n"); | |
printf("RMSProp: All assertion tests are commented out.\n\n"); | |
} | |
/** | |
* @brief Contains tests for optim_adam_new assertions. | |
*/ | |
void test_adam_assertions(Tensor* params) { | |
printf("--- Testing Adam Assertions ---\n"); | |
// TEST 1: Trigger "n_params cannot be negative" | |
// optim_adam_new(-2, params, 0.001f, 0.9f, 0.999f, 1e-8f); | |
// TEST 2: Trigger "params array cannot be NULL" | |
// optim_adam_new(2, NULL, 0.001f, 0.9f, 0.999f, 1e-8f); | |
// TEST 3: Trigger "learning rate must be non-negative" | |
// optim_adam_new(2, params, -0.001f, 0.9f, 0.999f, 1e-8f); | |
// TEST 4: Trigger "beta1 must be in [0, 1)" | |
// optim_adam_new(2, params, 0.001f, 1.0f, 0.999f, 1e-8f); | |
// TEST 5: Trigger "beta2 must be in [0, 1)" | |
// optim_adam_new(2, params, 0.001f, 0.9f, -0.1f, 1e-8f); | |
// TEST 6: Trigger "epsilon must be non-negative" | |
// optim_adam_new(2, params, 0.001f, 0.9f, 0.999f, -1e-8f); | |
// SUCCESS CASE (should not fail) | |
// optim_adam* adam = optim_adam_new(2, params, 0.001f, 0.9f, 0.999f, 1e-8f); | |
// printf("Adam valid creation successful.\n"); | |
printf("Adam: All assertion tests are commented out.\n\n"); | |
} | |
int main() { | |
cten_initilize(); | |
cten_begin_malloc(PoolId_Default); | |
TensorShape shape = {1, 5, 0, 0}; // A simple shape for our fake weights/biases | |
Tensor params[2]; | |
params[0] = Tensor_new(shape, true); | |
params[1] = Tensor_new(shape, true); | |
const int n_params = 2; | |
printf("Starting optimizer assertion tests...\n"); | |
printf("Uncomment a single test line in the code to see its assertion fail.\n\n"); | |
test_sgd_assertions(params); | |
test_adagrad_assertions(params); | |
test_rmsprop_assertions(params); | |
test_adam_assertions(params); | |
printf("All test suites complete. If you saw no crashes, it means all failing tests are commented out.\n"); | |
cten_end_malloc(); | |
cten_finalize(); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "cten.h" | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <string.h> | |
#include <time.h> | |
void* _cten_malloc(size_t size); | |
enum MemoryPoolIds { | |
PoolId_Default = 0, | |
PoolId_Model = 1, | |
PoolId_Optimizer = 2, | |
}; | |
typedef struct Model { | |
Tensor weight_1, weight_2; | |
Tensor bias_1, bias_2; | |
} Model; | |
Tensor Model_forward(Model* model, Tensor x) { | |
x = nn_linear(x, model->weight_1, model->bias_1); | |
x = nn_relu(x); | |
x = nn_linear(x, model->weight_2, model->bias_2); | |
return x; | |
} | |
void test_sgd_basic() { | |
printf("--- Test 1: Basic SGD (Momentum = 0.0) ---\n"); | |
const float target_w1 = 94.7f; | |
const float target_w2 = -78.0f; | |
const int iterations = 50; | |
const float learning_rate = 0.05f; | |
TensorShape w_shape = {1, 2, 0, 0}; | |
Tensor w = Tensor_new(w_shape, true); | |
// Initialize weights to 0 | |
w.data->flex[0] = 0.0f; | |
w.data->flex[1] = 0.0f; | |
// Create and configure the optimizer | |
optim_sgd* optimizer = optim_sgd_new(1, &w); | |
optim_sgd_config(optimizer, learning_rate, 0.0f); | |
for (int i = 1; i <= iterations; ++i) { | |
optim_sgd_zerograd(optimizer); | |
float w1 = w.data->flex[0]; | |
float w2 = w.data->flex[1]; | |
// Simple MSE loss and gradient | |
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2); | |
float grad1 = 2 * (w1 - target_w1); | |
float grad2 = 2 * (w2 - target_w2); | |
if (i % 10 == 0 || i == 1) { | |
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss); | |
Tensor_print(w); | |
} | |
// Manually set the gradient | |
w.node->grad.data->flex[0] = grad1; | |
w.node->grad.data->flex[1] = grad2; | |
optim_sgd_step(optimizer); | |
} | |
printf("------------------------------------------\n"); | |
printf("Basic SGD Test Complete.\n"); | |
printf("Target values: (% .4f, % .4f)\n", target_w1, target_w2); | |
printf("Final values: "); | |
Tensor_print(w); | |
printf("------------------------------------------\n\n"); | |
} | |
void test_sgd_with_momentum() { | |
printf("--- Test 2: SGD with Momentum (Momentum = 0.6) ---\n"); | |
const float target_w1 = 94.7f; | |
const float target_w2 = -78.0f; | |
const int iterations = 50; | |
const float learning_rate = 0.05f; | |
TensorShape w_shape = {1, 2, 0, 0}; | |
Tensor w = Tensor_new(w_shape, true); | |
w.data->flex[0] = 0.0f; | |
w.data->flex[1] = 0.0f; | |
// Create and configure the optimizer with momentum | |
optim_sgd* optimizer = optim_sgd_new(1, &w); | |
optim_sgd_config(optimizer, learning_rate, 0.6f); | |
for (int i = 1; i <= iterations; ++i) { | |
optim_sgd_zerograd(optimizer); | |
float w1 = w.data->flex[0]; | |
float w2 = w.data->flex[1]; | |
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2); | |
float grad1 = 2 * (w1 - target_w1); | |
float grad2 = 2 * (w2 - target_w2); | |
if (i % 10 == 0 || i == 1) { | |
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss); | |
Tensor_print(w); | |
} | |
w.node->grad.data->flex[0] = grad1; | |
w.node->grad.data->flex[1] = grad2; | |
optim_sgd_step(optimizer); | |
} | |
printf("------------------------------------------\n"); | |
printf("SGD with Momentum Test Complete.\n"); | |
printf("Target values: (% .4f, % .4f)\n", target_w1, target_w2); | |
printf("Final values: "); | |
Tensor_print(w); | |
printf("------------------------------------------\n\n"); | |
} | |
void test_sgd_edge_cases() { | |
printf("--- Test 3: SGD Edge Case Validation ---\n"); | |
TensorShape shape = {1, 1, 0, 0}; | |
// --- EDGE CASE 1: Re-configuring an optimizer --- | |
printf("1. Testing re-configuration: [EXPECTED: SUCCESS]\n"); | |
Tensor w1 = Tensor_new(shape, true); | |
optim_sgd* optimizer = optim_sgd_new(1, &w1); | |
optim_sgd_config(optimizer, 0.1f, 0.0f); | |
optim_sgd_config(optimizer, 0.2f, 0.9f); // Re-configure with momentum | |
printf(" SUCCESS: Optimizer re-configured without errors.\n\n"); | |
// --- EDGE CASE 2: Invalid momentum value --- | |
printf("2. Testing invalid momentum input: [EXPECTED: ASSERTION FAILURE]\n"); | |
printf(" To test, uncomment the following line in the code.\n"); | |
// optim_sgd_config(optimizer, 0.01f, -0.5f); // This should abort the program. | |
printf(" Test skipped.\n\n"); | |
// --- EDGE CASE 3: Stepping with no gradient --- | |
printf("3. Testing step with no gradient available: [EXPECTED: SUCCESS]\n"); | |
optim_sgd_zerograd(optimizer); // Ensure grad is NULL | |
optim_sgd_step(optimizer); // Should not crash, just do nothing. | |
printf(" SUCCESS: Step was skipped safely when grad was NULL.\n\n"); | |
printf("------------------------------------------\n"); | |
printf("Edge Case Validation Complete.\n"); | |
printf("------------------------------------------\n"); | |
} | |
int main() { | |
cten_initilize(); | |
cten_begin_malloc(PoolId_Default); | |
printf("======================================\n"); | |
printf(" Testing SGD Optimizer Implementation\n"); | |
printf("======================================\n\n"); | |
// Run functional tests | |
test_sgd_basic(); | |
test_sgd_with_momentum(); | |
test_sgd_edge_cases(); | |
cten_end_malloc(); | |
cten_finalize(); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "cten.h" | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <string.h> | |
#include <time.h> | |
void* _cten_malloc(size_t size); | |
enum MemoryPoolIds { | |
PoolId_Default = 0, | |
PoolId_Model = 1, | |
PoolId_Optimizer = 2, | |
}; | |
typedef struct Model { | |
Tensor weight_1, weight_2; | |
Tensor bias_1, bias_2; | |
} Model; | |
Tensor Model_forward(Model* model, Tensor x) { | |
x = nn_linear(x, model->weight_1, model->bias_1); | |
x = nn_relu(x); | |
x = nn_linear(x, model->weight_2, model->bias_2); | |
return x; | |
} | |
void test_adam_optimizer_with_enhancements(float weight_decay) { | |
printf("--- Testing Adam Optimizer---\n"); | |
const float target_w1 = 14.6f; | |
const float target_w2 = -8.7f; | |
const int iterations = 200; | |
const float learning_rate = 0.2f; | |
TensorShape w_shape = {1, 2, 0, 0}; | |
Tensor w = Tensor_new(w_shape, true); | |
// Updated Adam constructor with weight decay | |
optim_adam* optimizer = optim_adam_new(1, &w, learning_rate, 0.9f, 0.999f, 1e-8f, weight_decay); | |
printf("Hyperparameters: LR=%.3f, Weight_Decay=%.4f\n", | |
learning_rate, | |
weight_decay); | |
for(int i = 1; i <= iterations; ++i) { | |
optim_adam_zerograd(optimizer); | |
float w1 = w.data->flex[0]; | |
float w2 = w.data->flex[1]; | |
// MSE Loss | |
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2); | |
float grad1 = 2 * (w1 - target_w1); | |
float grad2 = 2 * (w2 - target_w2); | |
if(i % 100 == 0 || i == 1) { | |
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss); | |
Tensor_print(w); | |
} | |
if(w.node->grad.data == NULL) { w.node->grad = Tensor_zeros(w.shape, false); } | |
w.node->grad.data->flex[0] = grad1; | |
w.node->grad.data->flex[1] = grad2; | |
optim_adam_step(optimizer); | |
} | |
printf("--------------------------------\n"); | |
printf("Adam Test Complete:\n"); | |
printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2); | |
printf("Final values: "); | |
Tensor_print(w); | |
printf("--------------------------------\n\n"); | |
} | |
void test_rmsprop_optimizer_with_enhancements(float weight_decay) { | |
printf("--- Testing RMSProp Optimizer ---\n"); | |
const float target_w1 = 14.6f; | |
const float target_w2 = -8.7f; | |
const int iterations = 200; | |
const float learning_rate = 0.3f; | |
TensorShape w_shape = {1, 2, 0, 0}; | |
Tensor w = Tensor_new(w_shape, true); | |
// Updated RMSProp constructor with weight decay | |
optim_rmsprop* optimizer = optim_rmsprop_new(1, &w, learning_rate, 0.9f, 1e-8f, weight_decay); | |
printf("Hyperparameters: LR=%.3f, Weight_Decay=%.4f\n", | |
learning_rate, | |
weight_decay); | |
for(int i = 1; i <= iterations; ++i) { | |
optim_rmsprop_zerograd(optimizer); | |
float w1 = w.data->flex[0]; | |
float w2 = w.data->flex[1]; | |
// MSE Loss | |
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2); | |
float grad1 = 2 * (w1 - target_w1); | |
float grad2 = 2 * (w2 - target_w2); | |
if(i % 100 == 0 || i == 1) { | |
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss); | |
Tensor_print(w); | |
} | |
if(w.node->grad.data == NULL) { w.node->grad = Tensor_zeros(w.shape, false); } | |
w.node->grad.data->flex[0] = grad1; | |
w.node->grad.data->flex[1] = grad2; | |
optim_rmsprop_step(optimizer); | |
} | |
printf("--------------------------------\n"); | |
printf("RMSProp Test Complete:\n"); | |
printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2); | |
printf("Final values: "); | |
Tensor_print(w); | |
printf("--------------------------------\n\n"); | |
} | |
void test_adagrad_optimizer_with_enhancements(float weight_decay) { | |
printf("--- Testing AdaGrad Optimizer ---\n"); | |
const float target_w1 = 14.6f; | |
const float target_w2 = -8.7f; | |
const int iterations = 200; | |
const float learning_rate = 0.8f; | |
TensorShape w_shape = {1, 2, 0, 0}; | |
Tensor w = Tensor_new(w_shape, true); | |
// Updated AdaGrad constructor with weight decay | |
optim_adagrad* optimizer = optim_adagrad_new(1, &w, learning_rate, 1e-8f, weight_decay); | |
printf("Hyperparameters: LR=%.3f, Weight_Decay=%.4f\n", | |
learning_rate, | |
weight_decay); | |
for(int i = 1; i <= iterations; ++i) { | |
optim_adagrad_zerograd(optimizer); | |
float w1 = w.data->flex[0]; | |
float w2 = w.data->flex[1]; | |
// MSE Loss | |
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2); | |
float grad1 = 2 * (w1 - target_w1); | |
float grad2 = 2 * (w2 - target_w2); | |
if(i % 100 == 0 || i == 1) { | |
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss); | |
Tensor_print(w); | |
} | |
if(w.node->grad.data == NULL) { w.node->grad = Tensor_zeros(w.shape, false); } | |
w.node->grad.data->flex[0] = grad1; | |
w.node->grad.data->flex[1] = grad2; | |
optim_adagrad_step(optimizer); | |
} | |
printf("--------------------------------\n"); | |
printf("AdaGrad Test Complete:\n"); | |
printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2); | |
printf("Final values: "); | |
Tensor_print(w); | |
printf("--------------------------------\n\n"); | |
} | |
void test_sgd_optimizer_with_enhancements(float weight_decay) { | |
printf("--- Testing SGD Optimizer ---\n"); | |
const float target_w1 = 14.6f; | |
const float target_w2 = -8.7f; | |
const int iterations = 200; | |
const float learning_rate = 0.01f; | |
const float momentum = 0.9f; | |
TensorShape w_shape = {1, 2, 0, 0}; | |
Tensor w = Tensor_new(w_shape, true); | |
// SGD constructor with weight decay | |
optim_sgd* optimizer = optim_sgd_new(1, &w, weight_decay); | |
optim_sgd_config(optimizer, learning_rate, momentum); | |
printf("Hyperparameters: LR=%.3f, Momentum=%.1f, Weight_Decay=%.4f\n", | |
learning_rate, | |
momentum, | |
weight_decay); | |
for(int i = 1; i <= iterations; ++i) { | |
optim_sgd_zerograd(optimizer); | |
float w1 = w.data->flex[0]; | |
float w2 = w.data->flex[1]; | |
// MSE Loss | |
float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2); | |
float grad1 = 2 * (w1 - target_w1); | |
float grad2 = 2 * (w2 - target_w2); | |
if(i % 100 == 0 || i == 1) { | |
printf("Iter: %-3d | Loss: %-8.4f | ", i, loss); | |
Tensor_print(w); | |
} | |
if(w.node->grad.data == NULL) { w.node->grad = Tensor_zeros(w.shape, false); } | |
w.node->grad.data->flex[0] = grad1; | |
w.node->grad.data->flex[1] = grad2; | |
optim_sgd_step(optimizer); | |
} | |
printf("--------------------------------\n"); | |
printf("SGD Test Complete:\n"); | |
printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2); | |
printf("Final values: "); | |
Tensor_print(w); | |
printf("--------------------------------\n\n"); | |
} | |
int main() { | |
cten_initilize(); | |
cten_begin_malloc(PoolId_Default); | |
printf("Optimizer Tests\n"); | |
printf("==============================================================\n\n"); | |
// Test all optimizers without weight decay | |
test_adam_optimizer_with_enhancements(0.0f); | |
test_rmsprop_optimizer_with_enhancements(0.0f); | |
test_adagrad_optimizer_with_enhancements(0.0f); | |
test_sgd_optimizer_with_enhancements(0.0f); | |
// Test all optimizers with weight decay | |
test_adam_optimizer_with_enhancements(0.001f); | |
test_rmsprop_optimizer_with_enhancements(0.001f); | |
test_adagrad_optimizer_with_enhancements(0.001f); | |
test_sgd_optimizer_with_enhancements(0.001f); | |
cten_end_malloc(); | |
cten_finalize(); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "cten.h" | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <string.h> | |
#include <time.h> | |
void* _cten_malloc(size_t size); | |
enum MemoryPoolIds { | |
PoolId_Default = 0, | |
PoolId_Model = 1, | |
PoolId_Optimizer = 2, | |
}; | |
typedef struct Model { | |
Tensor weight_1, weight_2; | |
Tensor bias_1, bias_2; | |
} Model; | |
Tensor Model_forward(Model* model, Tensor x) { | |
x = nn_linear(x, model->weight_1, model->bias_1); | |
x = nn_relu(x); | |
x = nn_linear(x, model->weight_2, model->bias_2); | |
return x; | |
} | |
// A small tolerance for comparing floating-point numbers. | |
#define FLOAT_TOLERANCE 1e-6 | |
// Helper function to check if two floats are approximately equal. | |
bool check_floats_equal(float a, float b) { | |
return fabsf(a - b) < FLOAT_TOLERANCE; | |
} | |
// Helper function to set up a tensor with a gradient for testing. | |
void setup_test_tensor(Tensor* t, float grad1, float grad2) { | |
*t = Tensor_new((TensorShape){1, 2, 0, 0}, true); | |
// Ensure grad tensor is allocated | |
if (t->node->grad.data == NULL) { | |
t->node->grad = Tensor_zeros(t->shape, false); | |
} | |
t->node->grad.data->flex[0] = grad1; | |
t->node->grad.data->flex[1] = grad2; | |
} | |
// --- Test for cten_clip_grad_norm --- | |
void test_clip_grad_norm() { | |
printf("--- Testing cten_clip_grad_norm ---\n"); | |
Tensor t; | |
// Case 1: Norm is greater than max_norm, so clipping should occur. | |
setup_test_tensor(&t, 3.0f, 4.0f); // Initial norm is sqrt(9 + 16) = 5.0 | |
float max_norm = 1.0f; | |
cten_clip_grad_norm(&t, 1, max_norm); | |
float expected_g1 = 3.0f * (max_norm / 5.0f); // 0.6 | |
float expected_g2 = 4.0f * (max_norm / 5.0f); // 0.8 | |
if (check_floats_equal(t.node->grad.data->flex[0], expected_g1) && | |
check_floats_equal(t.node->grad.data->flex[1], expected_g2)) { | |
printf("PASS: Gradients correctly scaled down.\n"); | |
} else { | |
printf("FAIL: Gradients not scaled correctly. Got [%f, %f], expected [%f, %f]\n", | |
t.node->grad.data->flex[0], t.node->grad.data->flex[1], expected_g1, expected_g2); | |
} | |
// Case 2: Norm is less than max_norm, so no clipping should occur. | |
setup_test_tensor(&t, 0.5f, 0.5f); // Initial norm is sqrt(0.25 + 0.25) approx 0.707 | |
max_norm = 2.0f; | |
cten_clip_grad_norm(&t, 1, max_norm); | |
if (check_floats_equal(t.node->grad.data->flex[0], 0.5f) && | |
check_floats_equal(t.node->grad.data->flex[1], 0.5f)) { | |
printf("PASS: Gradients correctly left unchanged.\n"); | |
} else { | |
printf("FAIL: Gradients were changed unexpectedly.\n"); | |
} | |
printf("\n"); | |
} | |
// --- Test for cten_clip_grad_value --- | |
void test_clip_grad_value() { | |
printf("--- Testing cten_clip_grad_value ---\n"); | |
Tensor t; | |
// Case 1: Both positive and negative gradients are clipped. | |
setup_test_tensor(&t, 5.0f, -5.0f); | |
float max_value = 2.0f; | |
cten_clip_grad_value(&t, 1, max_value); | |
if (check_floats_equal(t.node->grad.data->flex[0], 2.0f) && | |
check_floats_equal(t.node->grad.data->flex[1], -2.0f)) { | |
printf("PASS: Gradients correctly clipped to [-%f, %f].\n", max_value, max_value); | |
} else { | |
printf("FAIL: Gradients not clipped correctly. Got [%f, %f], expected [%f, %f]\n", | |
t.node->grad.data->flex[0], t.node->grad.data->flex[1], 2.0f, -2.0f); | |
} | |
// Case 2: Gradients are within the bounds, no clipping should occur. | |
setup_test_tensor(&t, 1.0f, -1.0f); | |
max_value = 2.0f; | |
cten_clip_grad_value(&t, 1, max_value); | |
if (check_floats_equal(t.node->grad.data->flex[0], 1.0f) && | |
check_floats_equal(t.node->grad.data->flex[1], -1.0f)) { | |
printf("PASS: Gradients correctly left unchanged.\n"); | |
} else { | |
printf("FAIL: Gradients were changed unexpectedly.\n"); | |
} | |
printf("\n"); | |
} | |
// --- Test for cten_clip_grad_value_range --- | |
void test_clip_grad_value_range() { | |
printf("--- Testing cten_clip_grad_value_range ---\n"); | |
Tensor t; | |
// Test with an asymmetric range. | |
setup_test_tensor(&t, 10.0f, -10.0f); | |
float min_val = -1.0f; | |
float max_val = 2.0f; | |
cten_clip_grad_value_range(&t, 1, min_val, max_val); | |
if (check_floats_equal(t.node->grad.data->flex[0], max_val) && | |
check_floats_equal(t.node->grad.data->flex[1], min_val)) { | |
printf("PASS: Gradients correctly clipped to range [%f, %f].\n", min_val, max_val); | |
} else { | |
printf("FAIL: Gradients not clipped correctly. Got [%f, %f], expected [%f, %f]\n", | |
t.node->grad.data->flex[0], t.node->grad.data->flex[1], max_val, min_val); | |
} | |
printf("\n"); | |
} | |
// --- Test for cten_clip_grad_positive & cten_clip_grad_negative --- | |
// These are essentially special cases of value_range, but we test them to be sure. | |
void test_clip_one_sided() { | |
printf("--- Testing cten_clip_grad_positive & cten_clip_grad_negative ---\n"); | |
Tensor t_pos, t_neg; | |
// Setup: One gradient is positive, one is negative. | |
setup_test_tensor(&t_pos, 5.0f, -2.0f); | |
setup_test_tensor(&t_neg, 5.0f, -2.0f); | |
// Test positive clipping | |
float max_val = 1.5f; | |
cten_clip_grad_positive(&t_pos, 1, max_val); | |
if (check_floats_equal(t_pos.node->grad.data->flex[0], max_val) && | |
check_floats_equal(t_pos.node->grad.data->flex[1], -2.0f)) { // Negative value should be untouched | |
printf("PASS: cten_clip_grad_positive works correctly.\n"); | |
} else { | |
printf("FAIL: cten_clip_grad_positive failed. Got [%f, %f], expected [%f, %f]\n", | |
t_pos.node->grad.data->flex[0], t_pos.node->grad.data->flex[1], max_val, -2.0f); | |
} | |
// Test negative clipping | |
float min_val = -0.5f; | |
cten_clip_grad_negative(&t_neg, 1, min_val); | |
if (check_floats_equal(t_neg.node->grad.data->flex[0], 5.0f) && // Positive value should be untouched | |
check_floats_equal(t_neg.node->grad.data->flex[1], min_val)) { | |
printf("PASS: cten_clip_grad_negative works correctly.\n"); | |
} else { | |
printf("FAIL: cten_clip_grad_negative failed. Got [%f, %f], expected [%f, %f]\n", | |
t_neg.node->grad.data->flex[0], t_neg.node->grad.data->flex[1], 5.0f, min_val); | |
} | |
printf("\n"); | |
} | |
// --- Test Edge Cases --- | |
void test_edge_cases() { | |
printf("--- Testing Edge Cases ---\n"); | |
// Test with NULL parameters. These should not crash. | |
cten_clip_grad_norm(NULL, 1, 1.0f); | |
cten_clip_grad_norm(NULL, 0, 1.0f); | |
cten_clip_grad_value(NULL, 1, 1.0f); | |
cten_clip_grad_value_range(NULL, 1, -1.0f, 1.0f); | |
cten_clip_grad_positive(NULL, 1, 1.0f); | |
cten_clip_grad_negative(NULL, 1, -1.0f); | |
printf("PASS: All functions handled NULL and zero-sized inputs without crashing.\n"); | |
// Test with a tensor that has no gradient. | |
Tensor t = Tensor_new((TensorShape){1, 1, 0, 0}, true); // Grad is NULL by default | |
cten_clip_grad_norm(&t, 1, 1.0f); | |
printf("PASS: Handled tensor with NULL gradient without crashing.\n"); | |
// Test norm clipping with max_norm <= 0. Should do nothing. | |
setup_test_tensor(&t, 10.0f, 10.0f); | |
cten_clip_grad_norm(&t, 1, 0.0f); | |
cten_clip_grad_norm(&t, 1, -5.0f); | |
if (check_floats_equal(t.node->grad.data->flex[0], 10.0f)) { | |
printf("PASS: cten_clip_grad_norm correctly does nothing for max_norm <= 0.\n"); | |
} else { | |
printf("FAIL: cten_clip_grad_norm modified gradients for max_norm <= 0.\n"); | |
} | |
printf("\n"); | |
} | |
int main() { | |
// You must have a function like this to initialize your library's memory, etc. | |
// Replace with your actual library initialization function. | |
cten_initilize(); | |
cten_begin_malloc(0); | |
printf("================================================\n"); | |
printf(" RUNNING GRADIENT CLIPPING TEST SUITE \n"); | |
printf("================================================\n\n"); | |
test_clip_grad_norm(); | |
test_clip_grad_value(); | |
test_clip_grad_value_range(); | |
test_clip_one_sided(); | |
test_edge_cases(); | |
printf("================================================\n"); | |
printf(" TEST SUITE EXECUTION COMPLETE \n"); | |
printf("================================================\n"); | |
cten_end_malloc(); | |
cten_finalize(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment