Advaitgaur004 · August 11, 2025 08:53
diff --git a/main.c b/main.c
 #include "cten.h"
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <string.h>
 #include <time.h>  

 void* _cten_malloc(size_t size);

 enum MemoryPoolIds {
    PoolId_Default = 0,
    PoolId_Model = 1,
    PoolId_Optimizer = 2,
 };

 typedef struct Model {
    Tensor weight_1, weight_2;
    Tensor bias_1, bias_2;
 } Model;

 Tensor Model_forward(Model* model, Tensor x) {
    x = nn_linear(x, model->weight_1, model->bias_1);
    x = nn_relu(x);
    x = nn_linear(x, model->weight_2, model->bias_2);
    return x;
 }

 void test_adam_optimizer() {
    printf("--- Testing Adam Optimizer ---\n");
    const float target_w1 = 14.6f;
    const float target_w2 = -8.7f;
    const int iterations = 300;
    const float learning_rate = 0.2f;

    TensorShape w_shape = {1, 2, 0, 0};
    Tensor w = Tensor_new(w_shape, true); 

    optim_adam* optimizer = optim_adam_new(1, &w, learning_rate, 0.9f, 0.999f, 1e-8f);

    for (int i = 1; i <= iterations; ++i) {
        optim_adam_zerograd(optimizer);

        float w1 = w.data->flex[0];
        float w2 = w.data->flex[1];

        // MSE (Uncomment this to test this loss function, and comment below loss function)
        float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
        float grad1 = 2 * (w1 - target_w1);
        float grad2 = 2 * (w2 - target_w2);

        // MAE (Uncomment this to test this loss function, and comment above loss function)
        // float loss = fabsf(w1 - target_w1) + fabsf(w2 - target_w2);
        // float grad1 = (w1 > target_w1) ? 1.0f : -1.0f;
        // if (w1 == target_w1) grad1 = 0.0f;
        // float grad2 = (w2 > target_w2) ? 1.0f : -1.0f;
        // if (w2 == target_w2) grad2 = 0.0f;


        if (i % 100 == 0 || i == 1) {
            printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
            Tensor_print(w);
        }

        if (w.node->grad.data == NULL) {
            w.node->grad = Tensor_zeros(w.shape, false);
        }
        w.node->grad.data->flex[0] = grad1;
        w.node->grad.data->flex[1] = grad2;

        optim_adam_step(optimizer);
    }

    printf("--------------------------------\n");
    printf("Adam Test Complete:\n");
    printf("Target values:          (%.4f, %.4f)\n", target_w1, target_w2);
    printf("Final values:           ");
    Tensor_print(w);
    printf("--------------------------------\n\n");
 }

 void test_rmsprop_optimizer() {
    printf("--- Testing RMSProp Optimizer ---\n");
    const float target_w1 = 14.6f;
    const float target_w2 = -8.7f;
    const int iterations = 300;
    const float learning_rate = 0.3f;

    TensorShape w_shape = {1, 2, 0, 0};
    Tensor w = Tensor_new(w_shape, true);

    optim_rmsprop* optimizer = optim_rmsprop_new(1, &w, learning_rate, 0.9f, 1e-8f);

    for (int i = 1; i <= iterations; ++i) {
        optim_rmsprop_zerograd(optimizer);

        float w1 = w.data->flex[0];
        float w2 = w.data->flex[1];

        // MSE (Uncomment this to test this loss function, and comment below loss function)
        float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
        float grad1 = 2 * (w1 - target_w1);
        float grad2 = 2 * (w2 - target_w2);

        // MAE (Uncomment this to test this loss function, and comment above loss function)
        // float loss = fabsf(w1 - target_w1) + fabsf(w2 - target_w2);
        // float grad1 = (w1 > target_w1) ? 1.0f : -1.0f;
        // if (w1 == target_w1) grad1 = 0.0f;
        // float grad2 = (w2 > target_w2) ? 1.0f : -1.0f;
        // if (w2 == target_w2) grad2 = 0.0f;

        if (i % 100 == 0 || i == 1) {
            printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
            Tensor_print(w);
        }

        if (w.node->grad.data == NULL) {
            w.node->grad = Tensor_zeros(w.shape, false);
        }
        w.node->grad.data->flex[0] = grad1;
        w.node->grad.data->flex[1] = grad2;
        
        optim_rmsprop_step(optimizer);
    }
    
    printf("--------------------------------\n");
    printf("RMSProp Test Complete:\n");
    printf("Target values:          (%.4f, %.4f)\n", target_w1, target_w2);
    printf("Final values:           ");
    Tensor_print(w);
    printf("--------------------------------\n\n");
 }

 void test_adagrad_optimizer() {
    printf("--- Testing AdaGrad Optimizer ---\n");
    const float target_w1 = 14.6f;
    const float target_w2 = -8.7f;
    const int iterations = 300;
    const float learning_rate = 0.8f;
    TensorShape w_shape = {1, 2, 0, 0};
    Tensor w = Tensor_new(w_shape, true);

    optim_adagrad* optimizer = optim_adagrad_new(1, &w, learning_rate, 1e-8f);

    for (int i = 1; i <= iterations; ++i) {
        optim_adagrad_zerograd(optimizer);

        float w1 = w.data->flex[0];
        float w2 = w.data->flex[1];

        // MSE (Uncomment this to test this loss function, and comment below loss function)
        float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
        float grad1 = 2 * (w1 - target_w1);
        float grad2 = 2 * (w2 - target_w2);

        // MAE (Uncomment this to test this loss function, and comment above loss function)
        // float loss = fabsf(w1 - target_w1) + fabsf(w2 - target_w2);
        // float grad1 = (w1 > target_w1) ? 1.0f : -1.0f;
        // if (w1 == target_w1) grad1 = 0.0f;
        // float grad2 = (w2 > target_w2) ? 1.0f : -1.0f;
        // if (w2 == target_w2) grad2 = 0.0f;

        if (i % 100 == 0 || i == 1) {
            printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
            Tensor_print(w);
        }

        if (w.node->grad.data == NULL) {
            w.node->grad = Tensor_zeros(w.shape, false);
        }
        w.node->grad.data->flex[0] = grad1;
        w.node->grad.data->flex[1] = grad2;
        
        optim_adagrad_step(optimizer);
    }

    printf("--------------------------------\n");
    printf("AdaGrad Test Complete:\n");
    printf("Target values:          (%.4f, %.4f)\n", target_w1, target_w2);
    printf("Final values:           ");
    Tensor_print(w);
    printf("--------------------------------\n\n");
 }

 int main() {
    cten_initilize();
    cten_begin_malloc(PoolId_Default);
    
    printf("Optimizer Tests\n");
    test_adam_optimizer();
    test_rmsprop_optimizer();
    test_adagrad_optimizer();

    cten_end_malloc();
    cten_finalize();
    return 0;
 }
diff --git a/main_checking_assert.c b/main_checking_assert.c
 #include "cten.h"
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <string.h>
 #include <time.h>  

 void* _cten_malloc(size_t size);

 enum MemoryPoolIds {
    PoolId_Default = 0,
    PoolId_Model = 1,
    PoolId_Optimizer = 2,
 };

 typedef struct Model {
    Tensor weight_1, weight_2;
    Tensor bias_1, bias_2;
 } Model;

 Tensor Model_forward(Model* model, Tensor x) {
    x = nn_linear(x, model->weight_1, model->bias_1);
    x = nn_relu(x);
    x = nn_linear(x, model->weight_2, model->bias_2);
    return x;
 }

 /**
 * @brief Contains tests for optim_sgd_new assertions.
 */
 void test_sgd_assertions(Tensor* params) {
    printf("--- Testing SGD Assertions ---\n");

    // Uncomment one line at a time to test.
    // The program is expected to abort with a descriptive error message.

    // TEST 1: Trigger "n_params cannot be negative"
    // optim_sgd_new(-1, params);

    // TEST 2: Trigger "params array cannot be NULL when n_params is greater than 0"
    // optim_sgd_new(2, NULL);

    // SUCCESS CASE (should not fail)
    // optim_sgd* sgd = optim_sgd_new(2, params);
    // printf("SGD valid creation successful.\n");

    printf("SGD: All assertion tests are commented out.\n\n");
 }

 /**
 * @brief Contains tests for optim_adagrad_new assertions.
 */
 void test_adagrad_assertions(Tensor* params) {
    printf("--- Testing AdaGrad Assertions ---\n");

    // TEST 1: Trigger "n_params cannot be negative"
    // optim_adagrad_new(-5, params, 0.01f, 1e-8f);

    // TEST 2: Trigger "params array cannot be NULL"
    // optim_adagrad_new(2, NULL, 0.01f, 1e-8f);

    // TEST 3: Trigger "learning rate must be non-negative"
    // optim_adagrad_new(2, params, -0.01f, 1e-8f);

    // TEST 4: Trigger "epsilon must be non-negative"
    // optim_adagrad_new(2, params, 0.01f, -1e-8f);

    // SUCCESS CASE (should not fail)
    // optim_adagrad* adagrad = optim_adagrad_new(2, params, 0.01f, 1e-8f);
    // printf("AdaGrad valid creation successful.\n");

    printf("AdaGrad: All assertion tests are commented out.\n\n");
 }

 /**
 * @brief Contains tests for optim_rmsprop_new assertions.
 */
 void test_rmsprop_assertions(Tensor* params) {
    printf("--- Testing RMSProp Assertions ---\n");

    // TEST 1: Trigger "n_params cannot be negative"
    // optim_rmsprop_new(-1, params, 0.01f, 0.9f, 1e-8f);

    // TEST 2: Trigger "params array cannot be NULL"
    // optim_rmsprop_new(2, NULL, 0.01f, 0.9f, 1e-8f);

    // TEST 3: Trigger "learning rate must be non-negative"
    // optim_rmsprop_new(2, params, -0.01f, 0.9f, 1e-8f);

    // TEST 4: Trigger "beta (decay rate) must be in [0, 1)" (testing upper bound)
    // optim_rmsprop_new(2, params, 0.01f, 1.0f, 1e-8f);

    // TEST 5: Trigger "beta (decay rate) must be in [0, 1)" (testing lower bound)
    // optim_rmsprop_new(2, params, 0.01f, -0.1f, 1e-8f);

    // TEST 6: Trigger "epsilon must be non-negative"
    // optim_rmsprop_new(2, params, 0.01f, 0.9f, -1e-8f);

    // SUCCESS CASE (should not fail)
    // optim_rmsprop* rmsprop = optim_rmsprop_new(2, params, 0.01f, 0.9f, 1e-8f);
    // printf("RMSProp valid creation successful.\n");

    printf("RMSProp: All assertion tests are commented out.\n\n");
 }

 /**
 * @brief Contains tests for optim_adam_new assertions.
 */
 void test_adam_assertions(Tensor* params) {
    printf("--- Testing Adam Assertions ---\n");

    // TEST 1: Trigger "n_params cannot be negative"
    // optim_adam_new(-2, params, 0.001f, 0.9f, 0.999f, 1e-8f);

    // TEST 2: Trigger "params array cannot be NULL"
    // optim_adam_new(2, NULL, 0.001f, 0.9f, 0.999f, 1e-8f);

    // TEST 3: Trigger "learning rate must be non-negative"
    // optim_adam_new(2, params, -0.001f, 0.9f, 0.999f, 1e-8f);

    // TEST 4: Trigger "beta1 must be in [0, 1)"
    // optim_adam_new(2, params, 0.001f, 1.0f, 0.999f, 1e-8f);

    // TEST 5: Trigger "beta2 must be in [0, 1)"
    // optim_adam_new(2, params, 0.001f, 0.9f, -0.1f, 1e-8f);

    // TEST 6: Trigger "epsilon must be non-negative"
    // optim_adam_new(2, params, 0.001f, 0.9f, 0.999f, -1e-8f);

    // SUCCESS CASE (should not fail)
    // optim_adam* adam = optim_adam_new(2, params, 0.001f, 0.9f, 0.999f, 1e-8f);
    // printf("Adam valid creation successful.\n");

    printf("Adam: All assertion tests are commented out.\n\n");
 }

 int main() {
    cten_initilize();
    cten_begin_malloc(PoolId_Default);

    TensorShape shape = {1, 5, 0, 0}; // A simple shape for our fake weights/biases
    Tensor params[2];
    params[0] = Tensor_new(shape, true);
    params[1] = Tensor_new(shape, true);
    const int n_params = 2;

    printf("Starting optimizer assertion tests...\n");
    printf("Uncomment a single test line in the code to see its assertion fail.\n\n");
    test_sgd_assertions(params);
    test_adagrad_assertions(params);
    test_rmsprop_assertions(params);
    test_adam_assertions(params);

    printf("All test suites complete. If you saw no crashes, it means all failing tests are commented out.\n");
    cten_end_malloc();
    cten_finalize();
    return 0;
 }
diff --git a/main_sgdm_test.c b/main_sgdm_test.c
 #include "cten.h"
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <string.h>
 #include <time.h>  

 void* _cten_malloc(size_t size);

 enum MemoryPoolIds {
    PoolId_Default = 0,
    PoolId_Model = 1,
    PoolId_Optimizer = 2,
 };

 typedef struct Model {
    Tensor weight_1, weight_2;
    Tensor bias_1, bias_2;
 } Model;

 Tensor Model_forward(Model* model, Tensor x) {
    x = nn_linear(x, model->weight_1, model->bias_1);
    x = nn_relu(x);
    x = nn_linear(x, model->weight_2, model->bias_2);
    return x;
 }

 void test_sgd_basic() {
    printf("--- Test 1: Basic SGD (Momentum = 0.0) ---\n");
    const float target_w1 = 94.7f;
    const float target_w2 = -78.0f;
    const int iterations = 50;
    const float learning_rate = 0.05f;

    TensorShape w_shape = {1, 2, 0, 0};
    Tensor w = Tensor_new(w_shape, true);
    // Initialize weights to 0
    w.data->flex[0] = 0.0f;
    w.data->flex[1] = 0.0f;

    // Create and configure the optimizer
    optim_sgd* optimizer = optim_sgd_new(1, &w);
    optim_sgd_config(optimizer, learning_rate, 0.0f);

    for (int i = 1; i <= iterations; ++i) {
        optim_sgd_zerograd(optimizer);

        float w1 = w.data->flex[0];
        float w2 = w.data->flex[1];

        // Simple MSE loss and gradient
        float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
        float grad1 = 2 * (w1 - target_w1);
        float grad2 = 2 * (w2 - target_w2);

        if (i % 10 == 0 || i == 1) {
            printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
            Tensor_print(w);
        }
        
        // Manually set the gradient
        w.node->grad.data->flex[0] = grad1;
        w.node->grad.data->flex[1] = grad2;

        optim_sgd_step(optimizer);
    }

    printf("------------------------------------------\n");
    printf("Basic SGD Test Complete.\n");
    printf("Target values: (% .4f, % .4f)\n", target_w1, target_w2);
    printf("Final values:  ");
    Tensor_print(w);
    printf("------------------------------------------\n\n");
 }

 void test_sgd_with_momentum() {
    printf("--- Test 2: SGD with Momentum (Momentum = 0.6) ---\n");
    const float target_w1 = 94.7f;
    const float target_w2 = -78.0f;
    const int iterations = 50;
    const float learning_rate = 0.05f;

    TensorShape w_shape = {1, 2, 0, 0};
    Tensor w = Tensor_new(w_shape, true);
    w.data->flex[0] = 0.0f;
    w.data->flex[1] = 0.0f;

    // Create and configure the optimizer with momentum
    optim_sgd* optimizer = optim_sgd_new(1, &w);
    optim_sgd_config(optimizer, learning_rate, 0.6f);

    for (int i = 1; i <= iterations; ++i) {
        optim_sgd_zerograd(optimizer);

        float w1 = w.data->flex[0];
        float w2 = w.data->flex[1];

        float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
        float grad1 = 2 * (w1 - target_w1);
        float grad2 = 2 * (w2 - target_w2);

        if (i % 10 == 0 || i == 1) {
            printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
            Tensor_print(w);
        }

        w.node->grad.data->flex[0] = grad1;
        w.node->grad.data->flex[1] = grad2;

        optim_sgd_step(optimizer);
    }

    printf("------------------------------------------\n");
    printf("SGD with Momentum Test Complete.\n");
    printf("Target values: (% .4f, % .4f)\n", target_w1, target_w2);
    printf("Final values:  ");
    Tensor_print(w);
    printf("------------------------------------------\n\n");
 }

 void test_sgd_edge_cases() {
    printf("--- Test 3: SGD Edge Case Validation ---\n");
    TensorShape shape = {1, 1, 0, 0};

    // --- EDGE CASE 1: Re-configuring an optimizer ---
    printf("1. Testing re-configuration: [EXPECTED: SUCCESS]\n");
    Tensor w1 = Tensor_new(shape, true);
    optim_sgd* optimizer = optim_sgd_new(1, &w1);
    optim_sgd_config(optimizer, 0.1f, 0.0f);
    optim_sgd_config(optimizer, 0.2f, 0.9f); // Re-configure with momentum
    printf("   SUCCESS: Optimizer re-configured without errors.\n\n");

    // --- EDGE CASE 2: Invalid momentum value ---
    printf("2. Testing invalid momentum input: [EXPECTED: ASSERTION FAILURE]\n");
    printf("   To test, uncomment the following line in the code.\n");
    // optim_sgd_config(optimizer, 0.01f, -0.5f); // This should abort the program.
    printf("   Test skipped.\n\n");
    
    // --- EDGE CASE 3: Stepping with no gradient ---
    printf("3. Testing step with no gradient available: [EXPECTED: SUCCESS]\n");
    optim_sgd_zerograd(optimizer); // Ensure grad is NULL
    optim_sgd_step(optimizer); // Should not crash, just do nothing.
    printf("   SUCCESS: Step was skipped safely when grad was NULL.\n\n");

    printf("------------------------------------------\n");
    printf("Edge Case Validation Complete.\n");
    printf("------------------------------------------\n");
 }

 int main() {
    cten_initilize();
    cten_begin_malloc(PoolId_Default);
    
    printf("======================================\n");
    printf("  Testing SGD Optimizer Implementation\n");
    printf("======================================\n\n");

    // Run functional tests
    test_sgd_basic();
    test_sgd_with_momentum();
    test_sgd_edge_cases();

    cten_end_malloc();
    cten_finalize();
    return 0;
 }
diff --git a/main_test_weight_decay.c b/main_test_weight_decay.c
 #include "cten.h"
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <string.h>
 #include <time.h>  

 void* _cten_malloc(size_t size);

 enum MemoryPoolIds {
    PoolId_Default = 0,
    PoolId_Model = 1,
    PoolId_Optimizer = 2,
 };

 typedef struct Model {
    Tensor weight_1, weight_2;
    Tensor bias_1, bias_2;
 } Model;

 Tensor Model_forward(Model* model, Tensor x) {
    x = nn_linear(x, model->weight_1, model->bias_1);
    x = nn_relu(x);
    x = nn_linear(x, model->weight_2, model->bias_2);
    return x;
 }

 void test_adam_optimizer_with_enhancements(float weight_decay) {
    printf("--- Testing Adam Optimizer---\n");
    const float target_w1 = 14.6f;
    const float target_w2 = -8.7f;
    const int iterations = 200;
    const float learning_rate = 0.2f;

    TensorShape w_shape = {1, 2, 0, 0};
    Tensor w = Tensor_new(w_shape, true);

    // Updated Adam constructor with weight decay
    optim_adam* optimizer = optim_adam_new(1, &w, learning_rate, 0.9f, 0.999f, 1e-8f, weight_decay);

    printf("Hyperparameters: LR=%.3f, Weight_Decay=%.4f\n",
           learning_rate,
           weight_decay);

    for(int i = 1; i <= iterations; ++i) {
        optim_adam_zerograd(optimizer);

        float w1 = w.data->flex[0];
        float w2 = w.data->flex[1];

        // MSE Loss
        float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
        float grad1 = 2 * (w1 - target_w1);
        float grad2 = 2 * (w2 - target_w2);

        if(i % 100 == 0 || i == 1) {
            printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
            Tensor_print(w);
        }

        if(w.node->grad.data == NULL) { w.node->grad = Tensor_zeros(w.shape, false); }
        w.node->grad.data->flex[0] = grad1;
        w.node->grad.data->flex[1] = grad2;

        optim_adam_step(optimizer);
    }


    printf("--------------------------------\n");
    printf("Adam Test Complete:\n");
    printf("Target values:          (%.4f, %.4f)\n", target_w1, target_w2);
    printf("Final values:           ");
    Tensor_print(w);
    printf("--------------------------------\n\n");
 }

 void test_rmsprop_optimizer_with_enhancements(float weight_decay) {
    printf("--- Testing RMSProp Optimizer ---\n");
    const float target_w1 = 14.6f;
    const float target_w2 = -8.7f;
    const int iterations = 200;
    const float learning_rate = 0.3f;

    TensorShape w_shape = {1, 2, 0, 0};
    Tensor w = Tensor_new(w_shape, true);

    // Updated RMSProp constructor with weight decay
    optim_rmsprop* optimizer = optim_rmsprop_new(1, &w, learning_rate, 0.9f, 1e-8f, weight_decay);

    printf("Hyperparameters: LR=%.3f, Weight_Decay=%.4f\n",
           learning_rate,
           weight_decay);

    for(int i = 1; i <= iterations; ++i) {
        optim_rmsprop_zerograd(optimizer);

        float w1 = w.data->flex[0];
        float w2 = w.data->flex[1];

        // MSE Loss
        float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
        float grad1 = 2 * (w1 - target_w1);
        float grad2 = 2 * (w2 - target_w2);

        if(i % 100 == 0 || i == 1) {
            printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
            Tensor_print(w);
        }

        if(w.node->grad.data == NULL) { w.node->grad = Tensor_zeros(w.shape, false); }
        w.node->grad.data->flex[0] = grad1;
        w.node->grad.data->flex[1] = grad2;

        optim_rmsprop_step(optimizer);
    }

    printf("--------------------------------\n");
    printf("RMSProp Test Complete:\n");
    printf("Target values:          (%.4f, %.4f)\n", target_w1, target_w2);
    printf("Final values:           ");
    Tensor_print(w);
    printf("--------------------------------\n\n");
 }

 void test_adagrad_optimizer_with_enhancements(float weight_decay) {
    printf("--- Testing AdaGrad Optimizer ---\n");
    const float target_w1 = 14.6f;
    const float target_w2 = -8.7f;
    const int iterations = 200;
    const float learning_rate = 0.8f;

    TensorShape w_shape = {1, 2, 0, 0};
    Tensor w = Tensor_new(w_shape, true);

    // Updated AdaGrad constructor with weight decay
    optim_adagrad* optimizer = optim_adagrad_new(1, &w, learning_rate, 1e-8f, weight_decay);

    printf("Hyperparameters: LR=%.3f, Weight_Decay=%.4f\n",
           learning_rate,
           weight_decay);

    for(int i = 1; i <= iterations; ++i) {
        optim_adagrad_zerograd(optimizer);

        float w1 = w.data->flex[0];
        float w2 = w.data->flex[1];

        // MSE Loss
        float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
        float grad1 = 2 * (w1 - target_w1);
        float grad2 = 2 * (w2 - target_w2);

        if(i % 100 == 0 || i == 1) {
            printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
            Tensor_print(w);
        }

        if(w.node->grad.data == NULL) { w.node->grad = Tensor_zeros(w.shape, false); }
        w.node->grad.data->flex[0] = grad1;
        w.node->grad.data->flex[1] = grad2;

        optim_adagrad_step(optimizer);
    }

    printf("--------------------------------\n");
    printf("AdaGrad Test Complete:\n");
    printf("Target values:          (%.4f, %.4f)\n", target_w1, target_w2);
    printf("Final values:           ");
    Tensor_print(w);
    printf("--------------------------------\n\n");
 }

 void test_sgd_optimizer_with_enhancements(float weight_decay) {
    printf("--- Testing SGD Optimizer ---\n");
    const float target_w1 = 14.6f;
    const float target_w2 = -8.7f;
    const int iterations = 200;
    const float learning_rate = 0.01f;
    const float momentum = 0.9f;

    TensorShape w_shape = {1, 2, 0, 0};
    Tensor w = Tensor_new(w_shape, true);

    // SGD constructor with weight decay
    optim_sgd* optimizer = optim_sgd_new(1, &w, weight_decay);
    optim_sgd_config(optimizer, learning_rate, momentum);

    printf("Hyperparameters: LR=%.3f, Momentum=%.1f, Weight_Decay=%.4f\n",
           learning_rate,
           momentum,
           weight_decay);

    for(int i = 1; i <= iterations; ++i) {
        optim_sgd_zerograd(optimizer);

        float w1 = w.data->flex[0];
        float w2 = w.data->flex[1];

        // MSE Loss
        float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
        float grad1 = 2 * (w1 - target_w1);
        float grad2 = 2 * (w2 - target_w2);

        if(i % 100 == 0 || i == 1) {
            printf("Iter: %-3d | Loss: %-8.4f | ", i, loss);
            Tensor_print(w);
        }

        if(w.node->grad.data == NULL) { w.node->grad = Tensor_zeros(w.shape, false); }
        w.node->grad.data->flex[0] = grad1;
        w.node->grad.data->flex[1] = grad2;

        optim_sgd_step(optimizer);
    }

    printf("--------------------------------\n");
    printf("SGD Test Complete:\n");
    printf("Target values:          (%.4f, %.4f)\n", target_w1, target_w2);
    printf("Final values:           ");
    Tensor_print(w);
    printf("--------------------------------\n\n");
 }

 int main() {
    cten_initilize();
    cten_begin_malloc(PoolId_Default);

    printf("Optimizer Tests\n");
    printf("==============================================================\n\n");

    // Test all optimizers without weight decay
    test_adam_optimizer_with_enhancements(0.0f);
    test_rmsprop_optimizer_with_enhancements(0.0f);
    test_adagrad_optimizer_with_enhancements(0.0f);
    test_sgd_optimizer_with_enhancements(0.0f);

    // Test all optimizers with weight decay
    test_adam_optimizer_with_enhancements(0.001f);
    test_rmsprop_optimizer_with_enhancements(0.001f);
    test_adagrad_optimizer_with_enhancements(0.001f);
    test_sgd_optimizer_with_enhancements(0.001f);

    cten_end_malloc();
    cten_finalize();
    return 0;
 }
diff --git a/main_with_clipping.c b/main_with_clipping.c
 #include "cten.h"
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <string.h>
 #include <time.h>  

 void* _cten_malloc(size_t size);

 enum MemoryPoolIds {
    PoolId_Default = 0,
    PoolId_Model = 1,
    PoolId_Optimizer = 2,
 };

 typedef struct Model {
    Tensor weight_1, weight_2;
    Tensor bias_1, bias_2;
 } Model;

 Tensor Model_forward(Model* model, Tensor x) {
    x = nn_linear(x, model->weight_1, model->bias_1);
    x = nn_relu(x);
    x = nn_linear(x, model->weight_2, model->bias_2);
    return x;
 }

 // A small tolerance for comparing floating-point numbers.
 #define FLOAT_TOLERANCE 1e-6

 // Helper function to check if two floats are approximately equal.
 bool check_floats_equal(float a, float b) {
    return fabsf(a - b) < FLOAT_TOLERANCE;
 }

 // Helper function to set up a tensor with a gradient for testing.
 void setup_test_tensor(Tensor* t, float grad1, float grad2) {
    *t = Tensor_new((TensorShape){1, 2, 0, 0}, true);
    // Ensure grad tensor is allocated
    if (t->node->grad.data == NULL) {
        t->node->grad = Tensor_zeros(t->shape, false);
    }
    t->node->grad.data->flex[0] = grad1;
    t->node->grad.data->flex[1] = grad2;
 }

 // --- Test for cten_clip_grad_norm ---
 void test_clip_grad_norm() {
    printf("--- Testing cten_clip_grad_norm ---\n");
    Tensor t;
    
    // Case 1: Norm is greater than max_norm, so clipping should occur.
    setup_test_tensor(&t, 3.0f, 4.0f); // Initial norm is sqrt(9 + 16) = 5.0
    float max_norm = 1.0f;
    cten_clip_grad_norm(&t, 1, max_norm);
    
    float expected_g1 = 3.0f * (max_norm / 5.0f); // 0.6
    float expected_g2 = 4.0f * (max_norm / 5.0f); // 0.8
    
    if (check_floats_equal(t.node->grad.data->flex[0], expected_g1) &&
        check_floats_equal(t.node->grad.data->flex[1], expected_g2)) {
        printf("PASS: Gradients correctly scaled down.\n");
    } else {
        printf("FAIL: Gradients not scaled correctly. Got [%f, %f], expected [%f, %f]\n",
               t.node->grad.data->flex[0], t.node->grad.data->flex[1], expected_g1, expected_g2);
    }

    // Case 2: Norm is less than max_norm, so no clipping should occur.
    setup_test_tensor(&t, 0.5f, 0.5f); // Initial norm is sqrt(0.25 + 0.25) approx 0.707
    max_norm = 2.0f;
    cten_clip_grad_norm(&t, 1, max_norm);
    
    if (check_floats_equal(t.node->grad.data->flex[0], 0.5f) &&
        check_floats_equal(t.node->grad.data->flex[1], 0.5f)) {
        printf("PASS: Gradients correctly left unchanged.\n");
    } else {
        printf("FAIL: Gradients were changed unexpectedly.\n");
    }
    printf("\n");
 }

 // --- Test for cten_clip_grad_value ---
 void test_clip_grad_value() {
    printf("--- Testing cten_clip_grad_value ---\n");
    Tensor t;
    
    // Case 1: Both positive and negative gradients are clipped.
    setup_test_tensor(&t, 5.0f, -5.0f);
    float max_value = 2.0f;
    cten_clip_grad_value(&t, 1, max_value);
    
    if (check_floats_equal(t.node->grad.data->flex[0], 2.0f) &&
        check_floats_equal(t.node->grad.data->flex[1], -2.0f)) {
        printf("PASS: Gradients correctly clipped to [-%f, %f].\n", max_value, max_value);
    } else {
        printf("FAIL: Gradients not clipped correctly. Got [%f, %f], expected [%f, %f]\n",
               t.node->grad.data->flex[0], t.node->grad.data->flex[1], 2.0f, -2.0f);
    }

    // Case 2: Gradients are within the bounds, no clipping should occur.
    setup_test_tensor(&t, 1.0f, -1.0f);
    max_value = 2.0f;
    cten_clip_grad_value(&t, 1, max_value);

    if (check_floats_equal(t.node->grad.data->flex[0], 1.0f) &&
        check_floats_equal(t.node->grad.data->flex[1], -1.0f)) {
        printf("PASS: Gradients correctly left unchanged.\n");
    } else {
        printf("FAIL: Gradients were changed unexpectedly.\n");
    }
    printf("\n");
 }

 // --- Test for cten_clip_grad_value_range ---
 void test_clip_grad_value_range() {
    printf("--- Testing cten_clip_grad_value_range ---\n");
    Tensor t;
    
    // Test with an asymmetric range.
    setup_test_tensor(&t, 10.0f, -10.0f);
    float min_val = -1.0f;
    float max_val = 2.0f;
    cten_clip_grad_value_range(&t, 1, min_val, max_val);
    
    if (check_floats_equal(t.node->grad.data->flex[0], max_val) &&
        check_floats_equal(t.node->grad.data->flex[1], min_val)) {
        printf("PASS: Gradients correctly clipped to range [%f, %f].\n", min_val, max_val);
    } else {
        printf("FAIL: Gradients not clipped correctly. Got [%f, %f], expected [%f, %f]\n",
               t.node->grad.data->flex[0], t.node->grad.data->flex[1], max_val, min_val);
    }
    printf("\n");
 }


 // --- Test for cten_clip_grad_positive & cten_clip_grad_negative ---
 // These are essentially special cases of value_range, but we test them to be sure.
 void test_clip_one_sided() {
    printf("--- Testing cten_clip_grad_positive & cten_clip_grad_negative ---\n");
    Tensor t_pos, t_neg;

    // Setup: One gradient is positive, one is negative.
    setup_test_tensor(&t_pos, 5.0f, -2.0f);
    setup_test_tensor(&t_neg, 5.0f, -2.0f);

    // Test positive clipping
    float max_val = 1.5f;
    cten_clip_grad_positive(&t_pos, 1, max_val);
    if (check_floats_equal(t_pos.node->grad.data->flex[0], max_val) &&
        check_floats_equal(t_pos.node->grad.data->flex[1], -2.0f)) { // Negative value should be untouched
        printf("PASS: cten_clip_grad_positive works correctly.\n");
    } else {
        printf("FAIL: cten_clip_grad_positive failed. Got [%f, %f], expected [%f, %f]\n",
               t_pos.node->grad.data->flex[0], t_pos.node->grad.data->flex[1], max_val, -2.0f);
    }

    // Test negative clipping
    float min_val = -0.5f;
    cten_clip_grad_negative(&t_neg, 1, min_val);
    if (check_floats_equal(t_neg.node->grad.data->flex[0], 5.0f) && // Positive value should be untouched
        check_floats_equal(t_neg.node->grad.data->flex[1], min_val)) {
        printf("PASS: cten_clip_grad_negative works correctly.\n");
    } else {
        printf("FAIL: cten_clip_grad_negative failed. Got [%f, %f], expected [%f, %f]\n",
               t_neg.node->grad.data->flex[0], t_neg.node->grad.data->flex[1], 5.0f, min_val);
    }

    printf("\n");
 }


 // --- Test Edge Cases ---
 void test_edge_cases() {
    printf("--- Testing Edge Cases ---\n");
    
    // Test with NULL parameters. These should not crash.
    cten_clip_grad_norm(NULL, 1, 1.0f);
    cten_clip_grad_norm(NULL, 0, 1.0f);
    cten_clip_grad_value(NULL, 1, 1.0f);
    cten_clip_grad_value_range(NULL, 1, -1.0f, 1.0f);
    cten_clip_grad_positive(NULL, 1, 1.0f);
    cten_clip_grad_negative(NULL, 1, -1.0f);
    printf("PASS: All functions handled NULL and zero-sized inputs without crashing.\n");

    // Test with a tensor that has no gradient.
    Tensor t = Tensor_new((TensorShape){1, 1, 0, 0}, true); // Grad is NULL by default
    cten_clip_grad_norm(&t, 1, 1.0f);
    printf("PASS: Handled tensor with NULL gradient without crashing.\n");

    // Test norm clipping with max_norm <= 0. Should do nothing.
    setup_test_tensor(&t, 10.0f, 10.0f);
    cten_clip_grad_norm(&t, 1, 0.0f);
    cten_clip_grad_norm(&t, 1, -5.0f);
    if (check_floats_equal(t.node->grad.data->flex[0], 10.0f)) {
        printf("PASS: cten_clip_grad_norm correctly does nothing for max_norm <= 0.\n");
    } else {
        printf("FAIL: cten_clip_grad_norm modified gradients for max_norm <= 0.\n");
    }
    printf("\n");
 }

 int main() {
    // You must have a function like this to initialize your library's memory, etc.
    // Replace with your actual library initialization function.
    cten_initilize(); 
    cten_begin_malloc(0);

    printf("================================================\n");
    printf("      RUNNING GRADIENT CLIPPING TEST SUITE      \n");
    printf("================================================\n\n");

    test_clip_grad_norm();
    test_clip_grad_value();
    test_clip_grad_value_range();
    test_clip_one_sided();
    test_edge_cases();

    printf("================================================\n");
    printf("          TEST SUITE EXECUTION COMPLETE         \n");
    printf("================================================\n");

    cten_end_malloc();
    cten_finalize();
    return 0;
 }
	#include "cten.h"
	#include <stdbool.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <math.h>
	#include <string.h>
	#include <time.h>

	void* _cten_malloc(size_t size);

	enum MemoryPoolIds {
	PoolId_Default = 0,
	PoolId_Model = 1,
	PoolId_Optimizer = 2,
	};

	typedef struct Model {
	Tensor weight_1, weight_2;
	Tensor bias_1, bias_2;
	} Model;

	Tensor Model_forward(Model* model, Tensor x) {
	x = nn_linear(x, model->weight_1, model->bias_1);
	x = nn_relu(x);
	x = nn_linear(x, model->weight_2, model->bias_2);
	return x;
	}

	void test_adam_optimizer() {
	printf("--- Testing Adam Optimizer ---\n");
	const float target_w1 = 14.6f;
	const float target_w2 = -8.7f;
	const int iterations = 300;
	const float learning_rate = 0.2f;

	TensorShape w_shape = {1, 2, 0, 0};
	Tensor w = Tensor_new(w_shape, true);

	optim_adam* optimizer = optim_adam_new(1, &w, learning_rate, 0.9f, 0.999f, 1e-8f);

	for (int i = 1; i <= iterations; ++i) {
	optim_adam_zerograd(optimizer);

	float w1 = w.data->flex[0];
	float w2 = w.data->flex[1];

	// MSE (Uncomment this to test this loss function, and comment below loss function)
	float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
	float grad1 = 2 * (w1 - target_w1);
	float grad2 = 2 * (w2 - target_w2);

	// MAE (Uncomment this to test this loss function, and comment above loss function)
	// float loss = fabsf(w1 - target_w1) + fabsf(w2 - target_w2);
	// float grad1 = (w1 > target_w1) ? 1.0f : -1.0f;
	// if (w1 == target_w1) grad1 = 0.0f;
	// float grad2 = (w2 > target_w2) ? 1.0f : -1.0f;
	// if (w2 == target_w2) grad2 = 0.0f;


	if (i % 100 == 0 \|\| i == 1) {
	printf("Iter: %-3d \| Loss: %-8.4f \| ", i, loss);
	Tensor_print(w);
	}

	if (w.node->grad.data == NULL) {
	w.node->grad = Tensor_zeros(w.shape, false);
	}
	w.node->grad.data->flex[0] = grad1;
	w.node->grad.data->flex[1] = grad2;

	optim_adam_step(optimizer);
	}

	printf("--------------------------------\n");
	printf("Adam Test Complete:\n");
	printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2);
	printf("Final values: ");
	Tensor_print(w);
	printf("--------------------------------\n\n");
	}

	void test_rmsprop_optimizer() {
	printf("--- Testing RMSProp Optimizer ---\n");
	const float target_w1 = 14.6f;
	const float target_w2 = -8.7f;
	const int iterations = 300;
	const float learning_rate = 0.3f;

	TensorShape w_shape = {1, 2, 0, 0};
	Tensor w = Tensor_new(w_shape, true);

	optim_rmsprop* optimizer = optim_rmsprop_new(1, &w, learning_rate, 0.9f, 1e-8f);

	for (int i = 1; i <= iterations; ++i) {
	optim_rmsprop_zerograd(optimizer);

	float w1 = w.data->flex[0];
	float w2 = w.data->flex[1];

	// MSE (Uncomment this to test this loss function, and comment below loss function)
	float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
	float grad1 = 2 * (w1 - target_w1);
	float grad2 = 2 * (w2 - target_w2);

	// MAE (Uncomment this to test this loss function, and comment above loss function)
	// float loss = fabsf(w1 - target_w1) + fabsf(w2 - target_w2);
	// float grad1 = (w1 > target_w1) ? 1.0f : -1.0f;
	// if (w1 == target_w1) grad1 = 0.0f;
	// float grad2 = (w2 > target_w2) ? 1.0f : -1.0f;
	// if (w2 == target_w2) grad2 = 0.0f;

	if (i % 100 == 0 \|\| i == 1) {
	printf("Iter: %-3d \| Loss: %-8.4f \| ", i, loss);
	Tensor_print(w);
	}

	if (w.node->grad.data == NULL) {
	w.node->grad = Tensor_zeros(w.shape, false);
	}
	w.node->grad.data->flex[0] = grad1;
	w.node->grad.data->flex[1] = grad2;

	optim_rmsprop_step(optimizer);
	}

	printf("--------------------------------\n");
	printf("RMSProp Test Complete:\n");
	printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2);
	printf("Final values: ");
	Tensor_print(w);
	printf("--------------------------------\n\n");
	}

	void test_adagrad_optimizer() {
	printf("--- Testing AdaGrad Optimizer ---\n");
	const float target_w1 = 14.6f;
	const float target_w2 = -8.7f;
	const int iterations = 300;
	const float learning_rate = 0.8f;
	TensorShape w_shape = {1, 2, 0, 0};
	Tensor w = Tensor_new(w_shape, true);

	optim_adagrad* optimizer = optim_adagrad_new(1, &w, learning_rate, 1e-8f);

	for (int i = 1; i <= iterations; ++i) {
	optim_adagrad_zerograd(optimizer);

	float w1 = w.data->flex[0];
	float w2 = w.data->flex[1];

	// MSE (Uncomment this to test this loss function, and comment below loss function)
	float loss = (w1 - target_w1) * (w1 - target_w1) + (w2 - target_w2) * (w2 - target_w2);
	float grad1 = 2 * (w1 - target_w1);
	float grad2 = 2 * (w2 - target_w2);

	// MAE (Uncomment this to test this loss function, and comment above loss function)
	// float loss = fabsf(w1 - target_w1) + fabsf(w2 - target_w2);
	// float grad1 = (w1 > target_w1) ? 1.0f : -1.0f;
	// if (w1 == target_w1) grad1 = 0.0f;
	// float grad2 = (w2 > target_w2) ? 1.0f : -1.0f;
	// if (w2 == target_w2) grad2 = 0.0f;

	if (i % 100 == 0 \|\| i == 1) {
	printf("Iter: %-3d \| Loss: %-8.4f \| ", i, loss);
	Tensor_print(w);
	}

	if (w.node->grad.data == NULL) {
	w.node->grad = Tensor_zeros(w.shape, false);
	}
	w.node->grad.data->flex[0] = grad1;
	w.node->grad.data->flex[1] = grad2;

	optim_adagrad_step(optimizer);
	}

	printf("--------------------------------\n");
	printf("AdaGrad Test Complete:\n");
	printf("Target values: (%.4f, %.4f)\n", target_w1, target_w2);
	printf("Final values: ");
	Tensor_print(w);
	printf("--------------------------------\n\n");
	}

	int main() {
	cten_initilize();
	cten_begin_malloc(PoolId_Default);

	printf("Optimizer Tests\n");
	test_adam_optimizer();
	test_rmsprop_optimizer();
	test_adagrad_optimizer();

	cten_end_malloc();
	cten_finalize();
	return 0;
	}