Last active
April 16, 2024 19:52
-
-
Save swyxio/d37350c1061d39b38b103c8d0f1438c7 to your computer and use it in GitHub Desktop.
Devin-coded version of @karpathy's train_gpt.py ported to C, per his challenge https://x.com/swyx/status/1777496494448488541 this is where Devin stopped about 6 hours in, however it is not complete and I can prompt it to keep going.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <assert.h> | |
#ifndef M_PI | |
#define M_PI 3.14159265358979323846 | |
#endif | |
// Constants for model dimensions, learning rate, etc. | |
#define VOCAB_SIZE 50257 // Example size, to be adjusted based on actual model | |
#define BLOCK_SIZE 1024 // Example size, to be adjusted based on actual model | |
#define N_LAYER 12 // Example size, to be adjusted based on actual model | |
#define N_HEAD 12 // Example size, to be adjusted based on actual model | |
#define N_EMBD 768 // Example size, to be adjusted based on actual model | |
#define LEARNING_RATE 0.001 | |
// Data structures for model configuration and layers | |
typedef struct { | |
int vocab_size; | |
int block_size; | |
int n_layer; | |
int n_head; | |
int n_embd; | |
} GPTConfig; | |
typedef struct { | |
// Embedding layers, attention blocks, MLP blocks, etc. | |
float **embedding_weights; // Example for embedding weights | |
// Other components to be added | |
float ***queries; | |
float ***keys; | |
float ***values; | |
// Weights for queries, keys, and values | |
float **query_weights; | |
float **key_weights; | |
float **value_weights; | |
GPTConfig config; // Added config here | |
float **token_embeddings; // Embeddings for tokens | |
float **position_embeddings; // Embeddings for positions | |
float *ln_gamma; // Layer normalization gamma parameter | |
float *ln_beta; // Layer normalization beta parameter | |
float *mlp_weights_1; // Weights for the first MLP layer | |
float *mlp_weights_2; // Weights for the second MLP layer | |
} GPTModel; | |
// Function prototypes | |
void initialize_model(GPTModel *model, GPTConfig config); | |
void forward_pass(GPTModel *model, int *input_indices, float *output); | |
void backward_pass(GPTModel *model, float *grad_output, float *grad_input); | |
void update_weights(GPTModel *model); | |
void matrix_multiply(float *A, float *B, float *C, int n, int m, int k); | |
float gelu_activation(float x); | |
void test_matrix_multiply(); | |
void test_gelu_activation(); | |
void initialize_attention_matrices(GPTModel *model, GPTConfig config); | |
void compute_queries_keys_values(float *input, GPTModel *model, float ***queries, float ***keys, float ***values); | |
void test_initialize_attention_matrices(); | |
void test_compute_queries_keys_values(); | |
void free_attention_matrices(GPTModel *model, GPTConfig config); | |
void initialize_embeddings(GPTModel *model, GPTConfig config); | |
void free_model(GPTModel *model, GPTConfig config); // Prototype for new function to free model memory | |
void test_embeddings(); // Prototype for new unit test function | |
void layer_normalize(float **inputs, float *gamma, float *beta, int n, int m, float epsilon); // New function prototype | |
void test_layer_normalize(); // New unit test function prototype | |
void softmax(float *input, float *output, int length); // New function prototype for softmax | |
void dot_product_attention(float *queries, float *keys, float *values, float *output, int n_head, int block_size, int n_embd); // New function prototype for dot-product attention | |
void mlp_block(float *input, float *output, int block_size, int n_embd, float *mlp_weights_1, float *mlp_weights_2); // New function prototype for MLP block | |
// Function to flatten 3D attention matrices into 1D arrays | |
void flatten_attention_matrices(float ***matrices, float *flat_array, int n_head, int block_size, int n_embd_per_head) { | |
for (int h = 0; h < n_head; ++h) { | |
for (int i = 0; i < block_size; ++i) { | |
for (int j = 0; j < n_embd_per_head; ++j) { | |
flat_array[h * block_size * n_embd_per_head + i * n_embd_per_head + j] = matrices[h][i][j]; | |
} | |
} | |
} | |
} | |
// Matrix multiplication function | |
void matrix_multiply(float *A, float *B, float *C, int n, int m, int k) { | |
// Assertions to ensure indices are within bounds | |
assert(A != NULL && B != NULL && C != NULL); | |
assert(n > 0 && m > 0 && k > 0); | |
// Initialize C to zero | |
for (int i = 0; i < n * m; ++i) { | |
C[i] = 0; | |
} | |
// Perform matrix multiplication | |
for (int i = 0; i < n; ++i) { | |
for (int j = 0; j < k; ++j) { // Change m to k to match the inner dimension | |
for (int p = 0; p < m; ++p) { // Change k to m to match the second matrix's inner dimension | |
C[i * m + p] += A[i * k + j] * B[j * m + p]; // Change the indices to match the correct access pattern | |
} | |
} | |
} | |
} | |
// GELU activation function | |
float gelu_activation(float x) { | |
return 0.5 * x * (1.0 + tanh(sqrt(2.0 / M_PI) * (x + 0.044715 * pow(x, 3)))); | |
} | |
// Softmax function | |
void softmax(float *input, float *output, int length) { | |
float max = input[0]; | |
for (int i = 1; i < length; ++i) { | |
if (input[i] > max) { | |
max = input[i]; | |
} | |
} | |
float sum = 0.0; | |
for (int i = 0; i < length; ++i) { | |
output[i] = exp(input[i] - max); | |
sum += output[i]; | |
} | |
for (int i = 0; i < length; ++i) { | |
output[i] /= sum; | |
} | |
} | |
// Corrected dot_product_attention function | |
void dot_product_attention(float *queries, float *keys, float *values, float *output, int n_head, int block_size, int n_embd) { | |
// Temporary storage for the attention scores | |
float *attention_scores = (float*)malloc(block_size * block_size * sizeof(float)); | |
for (int h = 0; h < n_head; ++h) { | |
// Compute the dot product between queries and keys for each head | |
matrix_multiply(queries + h * block_size * (n_embd / n_head), keys + h * block_size * (n_embd / n_head), attention_scores, block_size, block_size, n_embd / n_head); | |
// Apply softmax to the attention scores | |
for (int i = 0; i < block_size; ++i) { | |
softmax(attention_scores + i * block_size, attention_scores + i * block_size, block_size); | |
} | |
// Multiply by values to get the final attention output for this head | |
matrix_multiply(attention_scores, values + h * block_size * (n_embd / n_head), output + h * block_size * (n_embd / n_head), block_size, n_embd / n_head, block_size); | |
} | |
// Free the temporary storage for attention scores | |
free(attention_scores); | |
} | |
// Corrected mlp_block function | |
void mlp_block(float *input, float *output, int block_size, int n_embd, float *mlp_weights_1, float *mlp_weights_2) { | |
// Define the intermediate size for the MLP | |
int intermediate_size = n_embd * 4; // This can be a different size | |
float *intermediate_output = (float*)malloc(block_size * intermediate_size * sizeof(float)); | |
// First linear layer | |
matrix_multiply(input, mlp_weights_1, intermediate_output, block_size, intermediate_size, n_embd); | |
// Apply GELU activation | |
for (int i = 0; i < block_size * intermediate_size; ++i) { | |
intermediate_output[i] = gelu_activation(intermediate_output[i]); | |
} | |
// Second linear layer to project back to n_embd dimensions | |
matrix_multiply(intermediate_output, mlp_weights_2, output, block_size, n_embd, intermediate_size); | |
// Free the intermediate output | |
free(intermediate_output); | |
} | |
// Changes in initialize_model function to initialize new members | |
void initialize_model(GPTModel *model, GPTConfig config) { | |
// Example of allocating memory for the embedding layer and initializing weights | |
// Assuming embedding weights are a 2D array with dimensions [vocab_size, n_embd] | |
model->embedding_weights = (float**)malloc(config.vocab_size * sizeof(float*)); | |
for (int i = 0; i < config.vocab_size; ++i) { | |
model->embedding_weights[i] = (float*)malloc(config.n_embd * sizeof(float)); | |
for (int j = 0; j < config.n_embd; ++j) { | |
// Initialize weights with random values, for example using a simple normal distribution | |
model->embedding_weights[i][j] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
// Allocate and initialize weights for queries, keys, and values | |
model->query_weights = (float**)malloc(config.n_head * sizeof(float*)); | |
model->key_weights = (float**)malloc(config.n_head * sizeof(float*)); | |
model->value_weights = (float**)malloc(config.n_head * sizeof(float*)); | |
for (int h = 0; h < config.n_head; ++h) { | |
model->query_weights[h] = (float*)malloc((config.n_embd / config.n_head) * config.n_embd * sizeof(float)); | |
model->key_weights[h] = (float*)malloc((config.n_embd / config.n_head) * config.n_embd * sizeof(float)); | |
model->value_weights[h] = (float*)malloc((config.n_embd / config.n_head) * config.n_embd * sizeof(float)); | |
for (int i = 0; i < (config.n_embd / config.n_head) * config.n_embd; ++i) { | |
model->query_weights[h][i] = (float)rand() / (float)RAND_MAX; | |
model->key_weights[h][i] = (float)rand() / (float)RAND_MAX; | |
model->value_weights[h][i] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
// Initialize ln_gamma and ln_beta | |
model->ln_gamma = (float*)malloc(config.n_embd * sizeof(float)); | |
model->ln_beta = (float*)malloc(config.n_embd * sizeof(float)); | |
for (int i = 0; i < config.n_embd; ++i) { | |
model->ln_gamma[i] = 1.0; // Typically initialized to ones | |
model->ln_beta[i] = 0.0; // Typically initialized to zeros | |
} | |
// Initialize MLP weights | |
int intermediate_size = config.n_embd * 4; // This can be a different size | |
model->mlp_weights_1 = (float*)malloc(config.n_embd * intermediate_size * sizeof(float)); | |
model->mlp_weights_2 = (float*)malloc(intermediate_size * config.n_embd * sizeof(float)); | |
// Random initialization of MLP weights (example) | |
for (int i = 0; i < config.n_embd * intermediate_size; ++i) { | |
model->mlp_weights_1[i] = (float)rand() / (float)RAND_MAX; | |
model->mlp_weights_2[i] = (float)rand() / (float)RAND_MAX; | |
} | |
// Allocate and initialize token embeddings | |
model->token_embeddings = (float**)malloc(config.vocab_size * sizeof(float*)); | |
for (int i = 0; i < config.vocab_size; ++i) { | |
model->token_embeddings[i] = (float*)malloc(config.n_embd * sizeof(float)); | |
for (int j = 0; j < config.n_embd; ++j) { | |
model->token_embeddings[i][j] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
// Allocate and initialize position embeddings | |
model->position_embeddings = (float**)malloc(config.block_size * sizeof(float*)); | |
for (int i = 0; i < config.block_size; ++i) { | |
model->position_embeddings[i] = (float*)malloc(config.n_embd * sizeof(float)); | |
for (int j = 0; j < config.n_embd; ++j) { | |
model->position_embeddings[i][j] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
// Note: This is a simplified example. In practice, you would need to implement a proper random initialization | |
// (e.g., Xavier initialization) and also consider biases, layer normalization parameters, etc. | |
} | |
// Initialize attention matrices for queries, keys, and values | |
void initialize_attention_matrices(GPTModel *model, GPTConfig config) { | |
// Assuming queries, keys, and values are 3D arrays with dimensions [n_head, block_size, n_embd/n_head] | |
// Allocate memory for queries, keys, and values | |
model->queries = (float***)malloc(config.n_head * sizeof(float**)); | |
model->keys = (float***)malloc(config.n_head * sizeof(float**)); | |
model->values = (float***)malloc(config.n_head * sizeof(float**)); | |
for (int h = 0; h < config.n_head; ++h) { | |
model->queries[h] = (float**)malloc(config.block_size * sizeof(float*)); | |
model->keys[h] = (float**)malloc(config.block_size * sizeof(float*)); | |
model->values[h] = (float**)malloc(config.block_size * sizeof(float*)); | |
for (int s = 0; s < config.block_size; ++s) { | |
model->queries[h][s] = (float*)calloc(config.block_size * (config.n_embd / config.n_head), sizeof(float)); | |
model->keys[h][s] = (float*)calloc(config.block_size * (config.n_embd / config.n_head), sizeof(float)); | |
model->values[h][s] = (float*)calloc(config.block_size * (config.n_embd / config.n_head), sizeof(float)); | |
} | |
} | |
} | |
// Compute queries, keys, and values for the self-attention mechanism | |
void compute_queries_keys_values(float *input, GPTModel *model, float ***queries, float ***keys, float ***values) { | |
printf("Entering compute_queries_keys_values\n"); | |
printf("Model config - n_head: %d, block_size: %d, n_embd: %d\n", model->config.n_head, model->config.block_size, model->config.n_embd); | |
// Use the model's weights to compute queries, keys, and values from the input | |
// This will involve matrix multiplication and addition operations | |
for (int h = 0; h < model->config.n_head; ++h) { | |
printf("Matrix dimensions for queries (head %d): (%d, %d) * (%d, %d)\n", h, model->config.block_size, model->config.n_embd / model->config.n_head, model->config.n_embd, model->config.n_embd / model->config.n_head); | |
printf("Computing queries for head %d\n", h); | |
matrix_multiply(model->query_weights[h], input, (*queries)[h], model->config.block_size, model->config.n_embd / model->config.n_head, model->config.n_embd); | |
printf("Matrix dimensions for keys (head %d): (%d, %d) * (%d, %d)\n", h, model->config.block_size, model->config.n_embd / model->config.n_head, model->config.n_embd, model->config.n_embd / model->config.n_head); | |
printf("Computing keys for head %d\n", h); | |
matrix_multiply(model->key_weights[h], input, (*keys)[h], model->config.block_size, model->config.n_embd / model->config.n_head, model->config.n_embd); | |
printf("Matrix dimensions for values (head %d): (%d, %d) * (%d, %d)\n", h, model->config.block_size, model->config.n_embd / model->config.n_head, model->config.n_embd, model->config.n_embd / model->config.n_head); | |
printf("Computing values for head %d\n", h); | |
matrix_multiply(model->value_weights[h], input, (*values)[h], model->config.block_size, model->config.n_embd / model->config.n_head, model->config.n_embd); | |
} | |
printf("Exiting compute_queries_keys_values\n"); | |
} | |
// Unit test for matrix multiplication | |
void test_matrix_multiply() { | |
// Create test matrices A, B, and C | |
float A[2][3] = {{1, 2, 3}, {4, 5, 6}}; | |
float B[3][2] = {{7, 8}, {9, 10}, {11, 12}}; | |
float C[2][2] = {0}; | |
// Expected result of multiplication | |
float expected[2][2] = {{58, 64}, {139, 154}}; | |
// Perform matrix multiplication | |
matrix_multiply(&A[0][0], &B[0][0], &C[0][0], 2, 2, 3); | |
// Assert each element of the result matrix C is as expected | |
for (int i = 0; i < 2; ++i) { | |
for (int j = 0; j < 2; ++j) { | |
assert(fabs(C[i][j] - expected[i][j]) < 1e-5); | |
} | |
} | |
} | |
// Unit test for GELU activation | |
void test_gelu_activation() { | |
// Test input and expected output | |
float input = 0.5; | |
float expected_output = 0.3457; // Approximate expected value | |
printf("GELU activation input: %f\n", input); | |
printf("Expected output: %f\n", expected_output); | |
float output = gelu_activation(input); | |
printf("Actual output: %f\n", output); | |
printf("Difference: %f\n", fabs(output - expected_output)); | |
// Assert the output is as expected | |
assert(fabs(output - expected_output) < 1e-4); | |
} | |
// Unit test for initializing attention matrices | |
void test_initialize_attention_matrices() { | |
GPTConfig config = {VOCAB_SIZE, BLOCK_SIZE, N_LAYER, N_HEAD, N_EMBD}; | |
GPTModel model; | |
initialize_model(&model, config); // Assuming this also initializes attention matrices | |
initialize_attention_matrices(&model, config); | |
// Check if memory allocation was successful and dimensions are correct | |
assert(model.queries != NULL); | |
assert(model.keys != NULL); | |
assert(model.values != NULL); | |
for (int h = 0; h < config.n_head; ++h) { | |
assert(model.queries[h] != NULL); | |
assert(model.keys[h] != NULL); | |
assert(model.values[h] != NULL); | |
for (int s = 0; s < config.block_size; ++s) { | |
assert(model.queries[h][s] != NULL); | |
assert(model.keys[h][s] != NULL); | |
assert(model.values[h][s] != NULL); | |
} | |
} | |
// Clean up | |
free_attention_matrices(&model, config); | |
} | |
// Unit test for computing queries, keys, and values | |
void test_compute_queries_keys_values() { | |
GPTConfig config = {VOCAB_SIZE, BLOCK_SIZE, N_LAYER, N_HEAD, N_EMBD}; | |
GPTModel model; | |
model.config = config; // Set the model configuration | |
initialize_model(&model, config); // Assuming this also initializes attention matrices | |
initialize_attention_matrices(&model, config); | |
// Ensure that the weights are not NULL | |
assert(model.query_weights != NULL); | |
assert(model.key_weights != NULL); | |
assert(model.value_weights != NULL); | |
for (int h = 0; h < config.n_head; ++h) { | |
assert(model.query_weights[h] != NULL); | |
assert(model.key_weights[h] != NULL); | |
assert(model.value_weights[h] != NULL); | |
} | |
// Create mock input and model weights for testing | |
float *input = (float*)malloc(config.block_size * config.n_embd * sizeof(float)); | |
// Initialize input with some values | |
for (int i = 0; i < config.block_size * config.n_embd; ++i) { | |
input[i] = i; | |
} | |
// Assuming model weights are initialized in initialize_model | |
compute_queries_keys_values(input, &model, model.queries, model.keys, model.values); | |
// Check if queries, keys, and values are computed correctly | |
// This would involve checking the results of the matrix multiplication operations | |
// ... | |
// Clean up | |
free(input); | |
free_attention_matrices(&model, config); | |
} | |
// Function to free attention matrices | |
void free_attention_matrices(GPTModel *model, GPTConfig config) { | |
for (int h = 0; h < config.n_head; ++h) { | |
for (int s = 0; s < config.block_size; ++s) { | |
free(model->queries[h][s]); | |
free(model->keys[h][s]); | |
free(model->values[h][s]); | |
} | |
free(model->queries[h]); | |
free(model->keys[h]); | |
free(model->values[h]); | |
} | |
free(model->queries); | |
free(model->keys); | |
free(model->values); | |
} | |
// New function to initialize embeddings | |
void initialize_embeddings(GPTModel *model, GPTConfig config) { | |
// Allocate memory for token embeddings | |
model->token_embeddings = (float**)malloc(config.vocab_size * sizeof(float*)); | |
for (int i = 0; i < config.vocab_size; ++i) { | |
model->token_embeddings[i] = (float*)malloc(config.n_embd * sizeof(float)); | |
// Initialize weights with random values | |
for (int j = 0; j < config.n_embd; ++j) { | |
model->token_embeddings[i][j] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
// Allocate memory for position embeddings | |
model->position_embeddings = (float**)malloc(config.block_size * sizeof(float*)); | |
for (int i = 0; i < config.block_size; ++i) { | |
model->position_embeddings[i] = (float*)malloc(config.n_embd * sizeof(float)); | |
// Initialize weights with random values | |
for (int j = 0; j < config.n_embd; ++j) { | |
model->position_embeddings[i][j] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
} | |
// Modify forward_pass function to apply embeddings and call flatten_attention_matrices | |
void forward_pass(GPTModel *model, int *input_indices, float *output) { | |
// Allocate memory for the output array if not already allocated | |
if (output == NULL) { | |
output = (float*)malloc(model->config.block_size * model->config.n_embd * sizeof(float)); | |
assert(output != NULL); // Ensure memory allocation was successful | |
} | |
// Apply token and position embeddings to input indices | |
for (int i = 0; i < model->config.block_size; ++i) { | |
int index = input_indices[i]; | |
assert(index >= 0 && index < model->config.vocab_size); | |
assert(model->token_embeddings != NULL); | |
assert(model->position_embeddings != NULL); | |
printf("Embedding size (n_embd): %d\n", model->config.n_embd); | |
for (int j = 0; j < model->config.n_embd; ++j) { | |
assert(model->token_embeddings[index] != NULL); | |
assert(model->position_embeddings[i] != NULL); | |
printf("i: %d, j: %d, index: %d, token_emb_ptr: %p, pos_emb_ptr: %p\n", i, j, index, (void*)model->token_embeddings[index], (void*)model->position_embeddings[i]); | |
if (j >= model->config.n_embd) { | |
fprintf(stderr, "Error: Variable 'j' exceeded bounds: %d\n", j); | |
abort(); | |
} | |
output[i * model->config.n_embd + j] = model->token_embeddings[index][j] + model->position_embeddings[i][j]; | |
} | |
} | |
// Flatten the 3D arrays into 1D arrays for dot_product_attention | |
float *queries_flat = (float*)malloc(model->config.n_head * model->config.block_size * (model->config.n_embd / model->config.n_head) * sizeof(float)); | |
float *keys_flat = (float*)malloc(model->config.n_head * model->config.block_size * (model->config.n_embd / model->config.n_head) * sizeof(float)); | |
float *values_flat = (float*)malloc(model->config.n_head * model->config.block_size * (model->config.n_embd / model->config.n_head) * sizeof(float)); | |
float *self_attention_output_flat = (float*)malloc(model->config.n_head * model->config.block_size * (model->config.n_embd / model->config.n_head) * sizeof(float)); | |
// Call self-attention mechanism | |
compute_queries_keys_values(output, model, model->queries, model->keys, model->values); | |
flatten_attention_matrices(model->queries, queries_flat, model->config.n_head, model->config.block_size, model->config.n_embd / model->config.n_head); | |
flatten_attention_matrices(model->keys, keys_flat, model->config.n_head, model->config.block_size, model->config.n_embd / model->config.n_head); | |
flatten_attention_matrices(model->values, values_flat, model->config.n_head, model->config.block_size, model->config.n_embd / model->config.n_head); | |
dot_product_attention(queries_flat, keys_flat, values_flat, self_attention_output_flat, model->config.n_head, model->config.block_size, model->config.n_embd); | |
// Flatten the 3D self_attention_output into a 1D array for mlp_block | |
float *mlp_output_flat = (float*)malloc(model->config.block_size * model->config.n_embd * sizeof(float)); | |
// Call MLP block | |
mlp_block(self_attention_output_flat, mlp_output_flat, model->config.block_size, model->config.n_embd, model->mlp_weights_1, model->mlp_weights_2); | |
// Create a temporary 2D array for layer normalization | |
float **mlp_output_2d = (float**)malloc(model->config.block_size * sizeof(float*)); | |
for (int i = 0; i < model->config.block_size; ++i) { | |
mlp_output_2d[i] = &mlp_output_flat[i * model->config.n_embd]; | |
} | |
// Apply final layer normalization to the output of the MLP block | |
layer_normalize(mlp_output_2d, model->ln_gamma, model->ln_beta, model->config.block_size, model->config.n_embd, 1e-5); | |
// Free the temporary 2D array | |
free(mlp_output_2d); | |
// Copy the final output to the output variable | |
for (int i = 0; i < model->config.block_size; ++i) { | |
for (int j = 0; j < model->config.n_embd; ++j) { | |
output[i * model->config.n_embd + j] = mlp_output_flat[i * model->config.n_embd + j]; | |
} | |
} | |
// Free intermediate variables | |
free(queries_flat); | |
free(keys_flat); | |
free(values_flat); | |
free(self_attention_output_flat); | |
free(mlp_output_flat); | |
} | |
// Unit test for token and position embeddings | |
void test_embeddings() { | |
GPTConfig config = {VOCAB_SIZE, BLOCK_SIZE, N_LAYER, N_HEAD, N_EMBD}; | |
GPTModel model; | |
initialize_model(&model, config); // Initialize the model with embeddings | |
// Create mock input indices (for simplicity, use indices 0 to block_size-1) | |
int input_indices[BLOCK_SIZE]; | |
for (int i = 0; i < BLOCK_SIZE; ++i) { | |
input_indices[i] = i; | |
} | |
// Allocate memory for the output of the forward pass | |
float *output = (float*)malloc(BLOCK_SIZE * N_EMBD * sizeof(float)); | |
// Apply embeddings using the forward pass | |
forward_pass(&model, input_indices, output); | |
// Check if the output contains the correct values | |
for (int i = 0; i < BLOCK_SIZE; ++i) { | |
for (int j = 0; j < N_EMBD; ++j) { | |
float expected_value = model.token_embeddings[input_indices[i]][j] + model.position_embeddings[i][j]; | |
assert(fabs(output[i * N_EMBD + j] - expected_value) < 1e-5); | |
} | |
} | |
// Clean up | |
free(output); | |
free_model(&model, config); // This function will need to be implemented to free all allocated memory in the model | |
} | |
// Function to free the model | |
void free_model(GPTModel *model, GPTConfig config) { | |
// Free token and position embeddings | |
for (int i = 0; i < config.vocab_size; ++i) { | |
free(model->token_embeddings[i]); | |
} | |
free(model->token_embeddings); | |
for (int i = 0; i < config.block_size; ++i) { | |
free(model->position_embeddings[i]); | |
} | |
free(model->position_embeddings); | |
// Free queries, keys, and values | |
free_attention_matrices(model, config); | |
// Free weights for queries, keys, and values | |
for (int h = 0; h < config.n_head; ++h) { | |
free(model->query_weights[h]); | |
free(model->key_weights[h]); | |
free(model->value_weights[h]); | |
} | |
free(model->query_weights); | |
free(model->key_weights); | |
free(model->value_weights); | |
// Free layer normalization parameters | |
free(model->ln_gamma); | |
free(model->ln_beta); | |
// Free MLP weights | |
free(model->mlp_weights_1); | |
free(model->mlp_weights_2); | |
// Free any other dynamically allocated memory within the model | |
// ... | |
} | |
// Layer normalization function | |
void layer_normalize(float **inputs, float *gamma, float *beta, int n, int m, float epsilon) { | |
for (int i = 0; i < n; ++i) { | |
float sum = 0.0; | |
for (int j = 0; j < m; ++j) { | |
sum += inputs[i][j]; | |
} | |
float mean = sum / m; | |
float variance_sum = 0.0; | |
for (int j = 0; j < m; ++j) { | |
variance_sum += (inputs[i][j] - mean) * (inputs[i][j] - mean); | |
} | |
float variance = variance_sum / m; | |
for (int j = 0; j < m; ++j) { | |
inputs[i][j] = (inputs[i][j] - mean) / sqrt(variance + epsilon); | |
inputs[i][j] = inputs[i][j] * gamma[j] + beta[j]; | |
} | |
} | |
} | |
// Unit test for layer normalization | |
void test_layer_normalize() { | |
int n = 2; // Number of input vectors | |
int m = 3; // Number of features | |
float epsilon = 1e-5; | |
float **inputs = (float**)malloc(n * sizeof(float*)); | |
float *gamma = (float*)malloc(m * sizeof(float)); | |
float *beta = (float*)malloc(m * sizeof(float)); | |
// Initialize inputs, gamma, and beta | |
// ... | |
// Call layer_normalize | |
layer_normalize(inputs, gamma, beta, n, m, epsilon); | |
// Check if the output is normalized correctly | |
// ... | |
// Clean up | |
free(gamma); | |
free(beta); | |
for (int i = 0; i < n; ++i) { | |
free(inputs[i]); | |
} | |
free(inputs); | |
} | |
// Add the new unit test to the main function | |
int main(int argc, char *argv[]) { | |
// Run unit tests | |
test_matrix_multiply(); | |
test_gelu_activation(); | |
test_initialize_attention_matrices(); | |
test_compute_queries_keys_values(); | |
test_embeddings(); // New unit test for embeddings | |
test_layer_normalize(); // New unit test for layer normalization | |
// Rest of the main function... | |
// ... | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <assert.h> | |
#ifndef M_PI | |
#define M_PI 3.14159265358979323846 | |
#endif | |
// Constants for model dimensions, learning rate, etc. | |
#define VOCAB_SIZE 50257 // Example size, to be adjusted based on actual model | |
#define BLOCK_SIZE 1024 // Example size, to be adjusted based on actual model | |
#define N_LAYER 12 // Example size, to be adjusted based on actual model | |
#define N_HEAD 12 // Example size, to be adjusted based on actual model | |
#define N_EMBD 768 // Example size, to be adjusted based on actual model | |
#define LEARNING_RATE 0.001 | |
// Data structures for model configuration and layers | |
typedef struct { | |
int vocab_size; | |
int block_size; | |
int n_layer; | |
int n_head; | |
int n_embd; | |
} GPTConfig; | |
typedef struct { | |
// Embedding layers, attention blocks, MLP blocks, etc. | |
float **embedding_weights; // Example for embedding weights | |
// Other components to be added | |
float ***queries; | |
float ***keys; | |
float ***values; | |
// Weights for queries, keys, and values | |
float **query_weights; | |
float **key_weights; | |
float **value_weights; | |
GPTConfig config; // Added config here | |
float **token_embeddings; // Embeddings for tokens | |
float **position_embeddings; // Embeddings for positions | |
float *ln_gamma; // Layer normalization gamma parameter | |
float *ln_beta; // Layer normalization beta parameter | |
float *mlp_weights_1; // Weights for the first MLP layer | |
float *mlp_weights_2; // Weights for the second MLP layer | |
} GPTModel; | |
// Function prototypes | |
void initialize_model(GPTModel *model, GPTConfig config); | |
void forward_pass(GPTModel *model, int *input_indices, float **output); | |
void backward_pass(GPTModel *model, float *grad_output, float *grad_input); | |
void update_weights(GPTModel *model); | |
void matrix_multiply(float *A, float *B, float *C, int n, int m, int k); | |
float gelu_activation(float x); | |
void test_matrix_multiply(); | |
void test_gelu_activation(); | |
void initialize_attention_matrices(GPTModel *model, GPTConfig config); | |
void compute_queries_keys_values(float *input, GPTModel *model, float ***queries, float ***keys, float ***values); | |
void test_initialize_attention_matrices(); | |
void test_compute_queries_keys_values(); | |
void free_attention_matrices(GPTModel *model, GPTConfig config); | |
void initialize_embeddings(GPTModel *model, GPTConfig config); | |
void free_model(GPTModel *model, GPTConfig config); // Prototype for new function to free model memory | |
void test_embeddings(); // Prototype for new unit test function | |
void layer_normalize(float **inputs, float *gamma, float *beta, int n, int m, float epsilon); // New function prototype | |
void test_layer_normalize(); // New unit test function prototype | |
void softmax(float *input, float *output, int length); // New function prototype for softmax | |
void dot_product_attention(float *queries, float *keys, float *values, float *output, int n_head, int block_size, int n_embd); // New function prototype for dot-product attention | |
void mlp_block(float *input, float *output, int block_size, int n_embd, float *mlp_weights_1, float *mlp_weights_2); // New function prototype for MLP block | |
// Function to flatten 3D attention matrices into 1D arrays | |
void flatten_attention_matrices(float ***matrices, float *flat_array, int n_head, int block_size, int n_embd_per_head) { | |
for (int h = 0; h < n_head; ++h) { | |
for (int i = 0; i < block_size; ++i) { | |
for (int j = 0; j < n_embd_per_head; ++j) { | |
flat_array[h * block_size * n_embd_per_head + i * n_embd_per_head + j] = matrices[h][i][j]; | |
} | |
} | |
} | |
} | |
// Matrix multiplication function | |
void matrix_multiply(float *A, float *B, float *C, int n, int m, int k) { | |
// Ensure that the pointers are not NULL and dimensions are greater than zero | |
if (A == NULL || B == NULL || C == NULL) { | |
fprintf(stderr, "Null pointer provided to matrix_multiply function\n"); | |
exit(EXIT_FAILURE); | |
} | |
if (n <= 0 || m <= 0 || k <= 0) { | |
fprintf(stderr, "Invalid dimensions provided to matrix_multiply function\n"); | |
exit(EXIT_FAILURE); | |
} | |
// Diagnostic print statements | |
printf("Matrix A address: %p, Matrix B address: %p, Matrix C address: %p\n", (void*)A, (void*)B, (void*)C); | |
printf("Matrix dimensions - n: %d, m: %d, k: %d\n", n, m, k); | |
// Initialize C to zero | |
for (int i = 0; i < n * m; ++i) { | |
C[i] = 0; | |
} | |
// Perform matrix multiplication | |
for (int i = 0; i < n; ++i) { | |
for (int j = 0; j < k; ++j) { | |
for (int p = 0; p < m; ++p) { | |
C[i * m + p] += A[i * k + j] * B[j * m + p]; | |
} | |
} | |
} | |
} | |
// GELU activation function | |
float gelu_activation(float x) { | |
return 0.5 * x * (1.0 + tanh(sqrt(2.0 / M_PI) * (x + 0.044715 * pow(x, 3)))); | |
} | |
// Softmax function | |
void softmax(float *input, float *output, int length) { | |
float max = input[0]; | |
for (int i = 1; i < length; ++i) { | |
if (input[i] > max) { | |
max = input[i]; | |
} | |
} | |
float sum = 0.0; | |
for (int i = 0; i < length; ++i) { | |
output[i] = exp(input[i] - max); | |
sum += output[i]; | |
} | |
for (int i = 0; i < length; ++i) { | |
output[i] /= sum; | |
} | |
} | |
// Corrected dot_product_attention function | |
void dot_product_attention(float *queries, float *keys, float *values, float *output, int n_head, int block_size, int n_embd) { | |
// Temporary storage for the attention scores | |
float *attention_scores = (float*)malloc(block_size * block_size * sizeof(float)); | |
for (int h = 0; h < n_head; ++h) { | |
// Compute the dot product between queries and keys for each head | |
matrix_multiply(queries + h * block_size * (n_embd / n_head), keys + h * block_size * (n_embd / n_head), attention_scores, block_size, block_size, n_embd / n_head); | |
// Apply softmax to the attention scores | |
for (int i = 0; i < block_size; ++i) { | |
softmax(attention_scores + i * block_size, attention_scores + i * block_size, block_size); | |
} | |
// Multiply by values to get the final attention output for this head | |
matrix_multiply(attention_scores, values + h * block_size * (n_embd / n_head), output + h * block_size * (n_embd / n_head), block_size, n_embd / n_head, block_size); | |
} | |
// Free the temporary storage for attention scores | |
free(attention_scores); | |
} | |
// Corrected mlp_block function | |
void mlp_block(float *input, float *output, int block_size, int n_embd, float *mlp_weights_1, float *mlp_weights_2) { | |
// Define the intermediate size for the MLP | |
int intermediate_size = n_embd * 4; // This can be a different size | |
float *intermediate_output = (float*)malloc(block_size * intermediate_size * sizeof(float)); | |
// First linear layer | |
matrix_multiply(input, mlp_weights_1, intermediate_output, block_size, intermediate_size, n_embd); | |
// Apply GELU activation | |
for (int i = 0; i < block_size * intermediate_size; ++i) { | |
intermediate_output[i] = gelu_activation(intermediate_output[i]); | |
} | |
// Second linear layer to project back to n_embd dimensions | |
matrix_multiply(intermediate_output, mlp_weights_2, output, block_size, n_embd, intermediate_size); | |
// Free the intermediate output | |
free(intermediate_output); | |
} | |
// Check for successful allocation and handle errors | |
#define CHECK_ALLOCATION(ptr) if ((ptr) == NULL) { \ | |
fprintf(stderr, "Memory allocation failed\n"); \ | |
free_model(model, config); \ | |
exit(EXIT_FAILURE); \ | |
} | |
// Changes in initialize_model function to initialize new members | |
void initialize_model(GPTModel *model, GPTConfig config) { | |
// Example of allocating memory for the embedding layer and initializing weights | |
// Assuming embedding weights are a 2D array with dimensions [vocab_size, n_embd] | |
model->embedding_weights = (float**)malloc(config.vocab_size * sizeof(float*)); | |
CHECK_ALLOCATION(model->embedding_weights); | |
for (int i = 0; i < config.vocab_size; ++i) { | |
model->embedding_weights[i] = (float*)malloc(config.n_embd * sizeof(float)); | |
CHECK_ALLOCATION(model->embedding_weights[i]); | |
for (int j = 0; j < config.n_embd; ++j) { | |
// Initialize weights with random values, for example using a simple normal distribution | |
model->embedding_weights[i][j] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
// Allocate and initialize weights for queries, keys, and values | |
model->query_weights = (float**)malloc(config.n_head * sizeof(float*)); | |
CHECK_ALLOCATION(model->query_weights); | |
model->key_weights = (float**)malloc(config.n_head * sizeof(float*)); | |
CHECK_ALLOCATION(model->key_weights); | |
model->value_weights = (float**)malloc(config.n_head * sizeof(float*)); | |
CHECK_ALLOCATION(model->value_weights); | |
for (int h = 0; h < config.n_head; ++h) { | |
model->query_weights[h] = (float*)malloc(config.n_embd * (config.n_embd / config.n_head) * sizeof(float)); | |
CHECK_ALLOCATION(model->query_weights[h]); | |
model->key_weights[h] = (float*)malloc(config.n_embd * (config.n_embd / config.n_head) * sizeof(float)); | |
CHECK_ALLOCATION(model->key_weights[h]); | |
model->value_weights[h] = (float*)malloc(config.n_embd * (config.n_embd / config.n_head) * sizeof(float)); | |
CHECK_ALLOCATION(model->value_weights[h]); | |
for (int i = 0; i < config.n_embd * (config.n_embd / config.n_head); ++i) { | |
model->query_weights[h][i] = (float)rand() / (float)RAND_MAX; | |
model->key_weights[h][i] = (float)rand() / (float)RAND_MAX; | |
model->value_weights[h][i] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
// Initialize ln_gamma and ln_beta | |
model->ln_gamma = (float*)malloc(config.n_embd * sizeof(float)); | |
CHECK_ALLOCATION(model->ln_gamma); | |
model->ln_beta = (float*)malloc(config.n_embd * sizeof(float)); | |
CHECK_ALLOCATION(model->ln_beta); | |
for (int i = 0; i < config.n_embd; ++i) { | |
model->ln_gamma[i] = 1.0; // Typically initialized to ones | |
model->ln_beta[i] = 0.0; // Typically initialized to zeros | |
} | |
// Initialize MLP weights | |
int intermediate_size = config.n_embd * 4; // This can be a different size | |
model->mlp_weights_1 = (float*)malloc(config.n_embd * intermediate_size * sizeof(float)); | |
CHECK_ALLOCATION(model->mlp_weights_1); | |
model->mlp_weights_2 = (float*)malloc(intermediate_size * config.n_embd * sizeof(float)); | |
CHECK_ALLOCATION(model->mlp_weights_2); | |
// Random initialization of MLP weights (example) | |
for (int i = 0; i < config.n_embd * intermediate_size; ++i) { | |
model->mlp_weights_1[i] = (float)rand() / (float)RAND_MAX; | |
model->mlp_weights_2[i] = (float)rand() / (float)RAND_MAX; | |
} | |
// Allocate and initialize token embeddings | |
model->token_embeddings = (float**)malloc(config.vocab_size * sizeof(float*)); | |
CHECK_ALLOCATION(model->token_embeddings); | |
for (int i = 0; i < config.vocab_size; ++i) { | |
model->token_embeddings[i] = (float*)malloc(config.n_embd * sizeof(float)); | |
CHECK_ALLOCATION(model->token_embeddings[i]); | |
for (int j = 0; j < config.n_embd; ++j) { | |
model->token_embeddings[i][j] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
// Allocate and initialize position embeddings | |
model->position_embeddings = (float**)malloc(config.block_size * sizeof(float*)); | |
for (int i = 0; i < config.block_size; ++i) { | |
model->position_embeddings[i] = (float*)malloc(config.n_embd * sizeof(float)); | |
for (int j = 0; j < config.n_embd; ++j) { | |
model->position_embeddings[i][j] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
// Note: This is a simplified example. In practice, you would need to implement a proper random initialization | |
// (e.g., Xavier initialization) and also consider biases, layer normalization parameters, etc. | |
} | |
// Initialize attention matrices for queries, keys, and values | |
void initialize_attention_matrices(GPTModel *model, GPTConfig config) { | |
// Allocate memory for queries, keys, and values | |
model->queries = (float***)malloc(config.n_head * sizeof(float**)); | |
model->keys = (float***)malloc(config.n_head * sizeof(float**)); | |
model->values = (float***)malloc(config.n_head * sizeof(float**)); | |
if (!model->queries || !model->keys || !model->values) { | |
fprintf(stderr, "Allocation failed for attention matrices\n"); | |
if (model->queries) free(model->queries); | |
if (model->keys) free(model->keys); | |
if (model->values) free(model->values); | |
exit(EXIT_FAILURE); | |
} | |
for (int h = 0; h < config.n_head; ++h) { | |
model->queries[h] = (float**)malloc(config.block_size * sizeof(float*)); | |
model->keys[h] = (float**)malloc(config.block_size * sizeof(float*)); | |
model->values[h] = (float**)malloc(config.block_size * sizeof(float*)); | |
if (!model->queries[h] || !model->keys[h] || !model->values[h]) { | |
fprintf(stderr, "Allocation failed for attention matrix heads\n"); | |
// Free any allocated memory | |
for (int i = 0; i < h; ++i) { | |
free(model->queries[i]); | |
free(model->keys[i]); | |
free(model->values[i]); | |
} | |
free(model->queries); | |
free(model->keys); | |
free(model->values); | |
exit(EXIT_FAILURE); | |
} | |
for (int i = 0; i < config.block_size; ++i) { | |
model->queries[h][i] = (float*)calloc(config.n_embd / config.n_head, sizeof(float)); | |
model->keys[h][i] = (float*)calloc(config.n_embd / config.n_head, sizeof(float)); | |
model->values[h][i] = (float*)calloc(config.n_embd / config.n_head, sizeof(float)); | |
if (!model->queries[h][i] || !model->keys[h][i] || !model->values[h][i]) { | |
fprintf(stderr, "Allocation failed for attention matrix blocks\n"); | |
// Free any allocated memory | |
for (int j = 0; j <= h; ++j) { | |
for (int k = 0; k < (j < h ? config.block_size : i); ++k) { | |
if (model->queries[j][k]) free(model->queries[j][k]); | |
if (model->keys[j][k]) free(model->keys[j][k]); | |
if (model->values[j][k]) free(model->values[j][k]); | |
} | |
if (model->queries[j]) free(model->queries[j]); | |
if (model->keys[j]) free(model->keys[j]); | |
if (model->values[j]) free(model->values[j]); | |
} | |
free(model->queries); | |
free(model->keys); | |
free(model->values); | |
exit(EXIT_FAILURE); | |
} | |
} | |
for (int i = 0; i < config.block_size; ++i) { | |
model->keys[h][i] = (float*)calloc(config.n_embd / config.n_head, sizeof(float)); | |
if (!model->keys[h][i]) { | |
fprintf(stderr, "Allocation failed for keys block %d of head %d\n", i, h); | |
// Free any allocated memory | |
for (int j = 0; j <= h; ++j) { | |
for (int k = 0; k < (j < h ? config.block_size : i); ++k) { | |
free(model->keys[j][k]); | |
} | |
free(model->keys[j]); | |
} | |
free(model->keys); | |
exit(EXIT_FAILURE); | |
} | |
// Diagnostic print statement to check the address of the allocated block | |
printf("Allocated memory for keys[%d][%d] at address %p\n", h, i, (void*)model->keys[h][i]); | |
} | |
} | |
} | |
// Compute queries, keys, and values for the self-attention mechanism | |
void compute_queries_keys_values(float *input, GPTModel *model, float ***queries, float ***keys, float ***values) { | |
printf("Entering compute_queries_keys_values\n"); | |
printf("Model config - n_head: %d, block_size: %d, n_embd: %d\n", model->config.n_head, model->config.block_size, model->config.n_embd); | |
// Compute queries, keys, and values for each head | |
for (int h = 0; h < model->config.n_head; ++h) { | |
// Dimension checks for matrix_multiply | |
int expected_block_size = model->config.block_size; | |
int expected_n_embd_div_n_head = model->config.n_embd / model->config.n_head; | |
int expected_n_embd = model->config.n_embd; | |
if (expected_block_size != model->config.block_size || expected_n_embd_div_n_head != (model->config.n_embd / model->config.n_head) || expected_n_embd != model->config.n_embd) { | |
fprintf(stderr, "Dimension mismatch before matrix_multiply call for queries\n"); | |
exit(EXIT_FAILURE); | |
} | |
matrix_multiply(model->query_weights[h], input, (*queries)[h], model->config.block_size, model->config.n_embd / model->config.n_head, model->config.n_embd); | |
// Dimension checks for matrix_multiply | |
if (expected_block_size != model->config.block_size || expected_n_embd_div_n_head != (model->config.n_embd / model->config.n_head) || expected_n_embd != model->config.n_embd) { | |
fprintf(stderr, "Dimension mismatch before matrix_multiply call for keys\n"); | |
exit(EXIT_FAILURE); | |
} | |
matrix_multiply(model->key_weights[h], input, (*keys)[h], model->config.block_size, model->config.n_embd / model->config.n_head, model->config.n_embd); | |
// Dimension checks for matrix_multiply | |
if (expected_block_size != model->config.block_size || expected_n_embd_div_n_head != (model->config.n_embd / model->config.n_head) || expected_n_embd != model->config.n_embd) { | |
fprintf(stderr, "Dimension mismatch before matrix_multiply call for values\n"); | |
exit(EXIT_FAILURE); | |
} | |
matrix_multiply(model->value_weights[h], input, (*values)[h], model->config.block_size, model->config.n_embd / model->config.n_head, model->config.n_embd); | |
} | |
printf("Exiting compute_queries_keys_values - n_head: %d, block_size: %d, n_embd: %d\n", model->config.n_head, model->config.block_size, model->config.n_embd); | |
} | |
// Unit test for matrix multiplication | |
void test_matrix_multiply() { | |
// Create test matrices A, B, and C | |
float A[2][3] = {{1, 2, 3}, {4, 5, 6}}; | |
float B[3][2] = {{7, 8}, {9, 10}, {11, 12}}; | |
float C[2][2] = {0}; | |
// Expected result of multiplication | |
float expected[2][2] = {{58, 64}, {139, 154}}; | |
// Perform matrix multiplication | |
matrix_multiply(&A[0][0], &B[0][0], &C[0][0], 2, 2, 3); | |
// Assert each element of the result matrix C is as expected | |
for (int i = 0; i < 2; ++i) { | |
for (int j = 0; j < 2; ++j) { | |
assert(fabs(C[i][j] - expected[i][j]) < 1e-5); | |
} | |
} | |
} | |
// Unit test for GELU activation | |
void test_gelu_activation() { | |
// Test input and expected output | |
float input = 0.5; | |
float expected_output = 0.3457; // Approximate expected value | |
printf("GELU activation input: %f\n", input); | |
printf("Expected output: %f\n", expected_output); | |
float output = gelu_activation(input); | |
printf("Actual output: %f\n", output); | |
printf("Difference: %f\n", fabs(output - expected_output)); | |
// Assert the output is as expected | |
assert(fabs(output - expected_output) < 1e-4); | |
} | |
// Unit test for initializing attention matrices | |
void test_initialize_attention_matrices() { | |
GPTConfig config = {VOCAB_SIZE, BLOCK_SIZE, N_LAYER, N_HEAD, N_EMBD}; | |
GPTModel model; | |
initialize_model(&model, config); // Assuming this also initializes attention matrices | |
initialize_attention_matrices(&model, config); | |
// Check if memory allocation was successful and dimensions are correct | |
assert(model.queries != NULL); | |
assert(model.keys != NULL); | |
assert(model.values != NULL); | |
for (int h = 0; h < config.n_head; ++h) { | |
assert(model.queries[h] != NULL); | |
assert(model.keys[h] != NULL); | |
assert(model.values[h] != NULL); | |
for (int s = 0; s < config.block_size; ++s) { | |
assert(model.queries[h][s] != NULL); | |
assert(model.keys[h][s] != NULL); | |
assert(model.values[h][s] != NULL); | |
} | |
} | |
// Clean up | |
free_attention_matrices(&model, config); | |
} | |
// Unit test for computing queries, keys, and values | |
void test_compute_queries_keys_values() { | |
GPTConfig config = {VOCAB_SIZE, BLOCK_SIZE, N_LAYER, N_HEAD, N_EMBD}; | |
GPTModel model; | |
model.config = config; // Set the model configuration | |
initialize_model(&model, config); // Assuming this also initializes attention matrices | |
initialize_attention_matrices(&model, config); | |
// Ensure that the weights are not NULL | |
assert(model.query_weights != NULL); | |
assert(model.key_weights != NULL); | |
assert(model.value_weights != NULL); | |
for (int h = 0; h < config.n_head; ++h) { | |
assert(model.query_weights[h] != NULL); | |
assert(model.key_weights[h] != NULL); | |
assert(model.value_weights[h] != NULL); | |
} | |
// Create mock input and model weights for testing | |
float *input = (float*)malloc(config.block_size * config.n_embd * sizeof(float)); | |
// Initialize input with some values | |
for (int i = 0; i < config.block_size * config.n_embd; ++i) { | |
input[i] = i; | |
} | |
// Assuming model weights are initialized in initialize_model | |
compute_queries_keys_values(input, &model, model.queries, model.keys, model.values); | |
// Check if queries, keys, and values are computed correctly | |
// This would involve checking the results of the matrix multiplication operations | |
// ... | |
// Clean up | |
free(input); | |
free_attention_matrices(&model, config); | |
} | |
// Function to free attention matrices | |
void free_attention_matrices(GPTModel *model, GPTConfig config) { | |
if (model->queries != NULL) { | |
for (int h = 0; h < config.n_head; ++h) { | |
if (model->queries[h] != NULL) { | |
for (int s = 0; s < config.block_size; ++s) { | |
if (model->queries[h][s] != NULL) { | |
free(model->queries[h][s]); | |
model->queries[h][s] = NULL; | |
} | |
} | |
free(model->queries[h]); | |
model->queries[h] = NULL; | |
} | |
} | |
free(model->queries); | |
model->queries = NULL; | |
} | |
if (model->keys != NULL) { | |
for (int h = 0; h < config.n_head; ++h) { | |
if (model->keys[h] != NULL) { | |
for (int s = 0; s < config.block_size; ++s) { | |
if (model->keys[h][s] != NULL) { | |
free(model->keys[h][s]); | |
model->keys[h][s] = NULL; | |
} | |
} | |
free(model->keys[h]); | |
model->keys[h] = NULL; | |
} | |
} | |
free(model->keys); | |
model->keys = NULL; | |
} | |
if (model->values != NULL) { | |
for (int h = 0; h < config.n_head; ++h) { | |
if (model->values[h] != NULL) { | |
for (int s = 0; s < config.block_size; ++s) { | |
if (model->values[h][s] != NULL) { | |
free(model->values[h][s]); | |
model->values[h][s] = NULL; | |
} | |
} | |
free(model->values[h]); | |
model->values[h] = NULL; | |
} | |
} | |
free(model->values); | |
model->values = NULL; | |
} | |
} | |
// New function to initialize embeddings | |
void initialize_embeddings(GPTModel *model, GPTConfig config) { | |
// Allocate memory for token embeddings | |
model->token_embeddings = (float**)malloc(config.vocab_size * sizeof(float*)); | |
CHECK_ALLOCATION(model->token_embeddings); | |
for (int i = 0; i < config.vocab_size; ++i) { | |
model->token_embeddings[i] = (float*)malloc(config.n_embd * sizeof(float)); | |
CHECK_ALLOCATION(model->token_embeddings[i]); | |
// Initialize weights with random values | |
for (int j = 0; j < config.n_embd; ++j) { | |
model->token_embeddings[i][j] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
// Allocate memory for position embeddings | |
model->position_embeddings = (float**)malloc(config.block_size * sizeof(float*)); | |
CHECK_ALLOCATION(model->position_embeddings); | |
for (int i = 0; i < config.block_size; ++i) { | |
model->position_embeddings[i] = (float*)malloc(config.n_embd * sizeof(float)); | |
CHECK_ALLOCATION(model->position_embeddings[i]); | |
// Initialize weights with random values | |
for (int j = 0; j < config.n_embd; ++j) { | |
model->position_embeddings[i][j] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
} | |
// Modify forward_pass function to apply embeddings and call flatten_attention_matrices | |
void forward_pass(GPTModel *model, int *input_indices, float **output) { | |
printf("Entering forward_pass - n_head: %d, block_size: %d, n_embd: %d\n", model->config.n_head, model->config.block_size, model->config.n_embd); | |
// Allocate memory for the output array if not already allocated | |
if (*output == NULL) { | |
*output = (float*)malloc(model->config.block_size * model->config.n_embd * sizeof(float)); | |
assert(*output != NULL); // Ensure memory allocation was successful | |
} | |
printf("After embeddings - block_size: %d, n_embd: %d\n", model->config.block_size, model->config.n_embd); | |
// Apply token and position embeddings to input indices | |
for (int i = 0; i < model->config.block_size; ++i) { | |
int index = input_indices[i]; | |
assert(index >= 0 && index < model->config.vocab_size); | |
assert(model->token_embeddings != NULL); | |
assert(model->position_embeddings != NULL); | |
for (int j = 0; j < model->config.n_embd; ++j) { | |
assert(model->token_embeddings[index] != NULL); | |
assert(model->position_embeddings[i] != NULL); | |
assert(i < model->config.block_size); // Assert that i is within the expected range | |
assert(j < model->config.n_embd); // Assert that j is within the expected range | |
(*output)[i * model->config.n_embd + j] = model->token_embeddings[index][j] + model->position_embeddings[i][j]; | |
} | |
} | |
printf("Before compute_queries_keys_values - n_head: %d, block_size: %d, n_embd: %d\n", model->config.n_head, model->config.block_size, model->config.n_embd); | |
compute_queries_keys_values(*output, model, model->queries, model->keys, model->values); | |
printf("After compute_queries_keys_values - n_head: %d, block_size: %d, n_embd: %d\n", model->config.n_head, model->config.block_size, model->config.n_embd); | |
float *queries_flat = (float*)malloc(model->config.n_head * model->config.block_size * (model->config.n_embd / model->config.n_head) * sizeof(float)); | |
assert(queries_flat != NULL); // Ensure memory allocation was successful | |
float *keys_flat = (float*)malloc(model->config.n_head * model->config.block_size * (model->config.n_embd / model->config.n_head) * sizeof(float)); | |
assert(keys_flat != NULL); // Ensure memory allocation was successful | |
float *values_flat = (float*)malloc(model->config.n_head * model->config.block_size * (model->config.n_embd / model->config.n_head) * sizeof(float)); | |
assert(values_flat != NULL); // Ensure memory allocation was successful | |
float *self_attention_output_flat = (float*)malloc(model->config.block_size * model->config.n_embd * sizeof(float)); | |
assert(self_attention_output_flat != NULL); // Ensure memory allocation was successful | |
printf("Before dot_product_attention - n_head: %d, block_size: %d, n_embd: %d\n", model->config.n_head, model->config.block_size, model->config.n_embd); | |
dot_product_attention(queries_flat, keys_flat, values_flat, self_attention_output_flat, model->config.n_head, model->config.block_size, model->config.n_embd); | |
printf("After dot_product_attention - n_head: %d, block_size: %d, n_embd: %d\n", model->config.n_head, model->config.block_size, model->config.n_embd); | |
float *mlp_output_flat = (float*)malloc(model->config.block_size * model->config.n_embd * sizeof(float)); | |
assert(mlp_output_flat != NULL); // Ensure memory allocation was successful | |
printf("Before mlp_block - n_head: %d, block_size: %d, n_embd: %d\n", model->config.n_head, model->config.block_size, model->config.n_embd); | |
mlp_block(self_attention_output_flat, mlp_output_flat, model->config.block_size, model->config.n_embd, model->mlp_weights_1, model->mlp_weights_2); | |
printf("After mlp_block - n_head: %d, block_size: %d, n_embd: %d\n", model->config.n_head, model->config.block_size, model->config.n_embd); | |
float **mlp_output_2d = (float**)malloc(model->config.block_size * sizeof(float*)); | |
for (int i = 0; i < model->config.block_size; ++i) { | |
mlp_output_2d[i] = &mlp_output_flat[i * model->config.n_embd]; | |
} | |
printf("Before layer_normalize - n_head: %d, block_size: %d, n_embd: %d\n", model->config.n_head, model->config.block_size, model->config.n_embd); | |
layer_normalize(mlp_output_2d, model->ln_gamma, model->ln_beta, model->config.block_size, model->config.n_embd, 1e-5); | |
printf("After layer_normalize - n_head: %d, block_size: %d, n_embd: %d\n", model->config.n_head, model->config.block_size, model->config.n_embd); | |
free(mlp_output_2d); | |
for (int i = 0; i < model->config.block_size; ++i) { | |
for (int j = 0; j < model->config.n_embd; ++j) { | |
(*output)[i * model->config.n_embd + j] = mlp_output_flat[i * model->config.n_embd + j]; | |
} | |
} | |
free(queries_flat); | |
free(keys_flat); | |
free(values_flat); | |
free(self_attention_output_flat); | |
free(mlp_output_flat); | |
printf("At end of forward_pass (before returning) - n_head: %d, block_size: %d, n_embd: %d\n", model->config.n_head, model->config.block_size, model->config.n_embd); | |
} | |
// Unit test for token and position embeddings | |
void test_embeddings() { | |
GPTConfig config = {VOCAB_SIZE, BLOCK_SIZE, N_LAYER, N_HEAD, N_EMBD}; | |
GPTModel model; | |
initialize_model(&model, config); // Initialize the model with embeddings | |
// Create mock input indices (for simplicity, use indices 0 to block_size-1) | |
int input_indices[BLOCK_SIZE]; | |
for (int i = 0; i < BLOCK_SIZE; ++i) { | |
input_indices[i] = i; | |
} | |
// Allocate memory for the output of the forward pass | |
float *output = NULL; | |
// Apply embeddings using the forward pass | |
forward_pass(&model, input_indices, &output); | |
// Check if the output contains the correct values | |
for (int i = 0; i < BLOCK_SIZE; ++i) { | |
for (int j = 0; j < N_EMBD; ++j) { | |
float expected_value = model.token_embeddings[input_indices[i]][j] + model.position_embeddings[i][j]; | |
assert(fabs(output[i * N_EMBD + j] - expected_value) < 1e-5); | |
} | |
} | |
// Clean up | |
free(output); | |
free_model(&model, config); // This function will need to be implemented to free all allocated memory in the model | |
} | |
// Function to free the model | |
void free_model(GPTModel *model, GPTConfig config) { | |
printf("Entering free_model - n_head: %d, block_size: %d, n_embd: %d\n", config.n_head, config.block_size, config.n_embd); | |
// Free token and position embeddings | |
if (model->token_embeddings != NULL) { | |
for (int i = 0; i < config.vocab_size; ++i) { | |
free(model->token_embeddings[i]); | |
} | |
free(model->token_embeddings); | |
model->token_embeddings = NULL; | |
} | |
if (model->position_embeddings != NULL) { | |
for (int i = 0; i < config.block_size; ++i) { | |
free(model->position_embeddings[i]); | |
} | |
free(model->position_embeddings); | |
model->position_embeddings = NULL; | |
} | |
// Free embedding weights | |
if (model->embedding_weights != NULL) { | |
for (int i = 0; i < config.vocab_size; ++i) { | |
free(model->embedding_weights[i]); | |
} | |
free(model->embedding_weights); | |
model->embedding_weights = NULL; | |
} | |
// Free layer normalization parameters | |
if (model->ln_gamma != NULL) { | |
free(model->ln_gamma); | |
model->ln_gamma = NULL; | |
} | |
if (model->ln_beta != NULL) { | |
free(model->ln_beta); | |
model->ln_beta = NULL; | |
} | |
// Free MLP weights | |
if (model->mlp_weights_1 != NULL) { | |
free(model->mlp_weights_1); | |
model->mlp_weights_1 = NULL; | |
} | |
if (model->mlp_weights_2 != NULL) { | |
free(model->mlp_weights_2); | |
model->mlp_weights_2 = NULL; | |
} | |
// Free attention matrices if they have not been freed already | |
if (model->queries != NULL || model->keys != NULL || model->values != NULL) { | |
free_attention_matrices(model, config); | |
} | |
// Reset the model configuration to a known state | |
model->config.vocab_size = 0; | |
model->config.block_size = 0; | |
model->config.n_layer = 0; | |
model->config.n_head = 0; | |
model->config.n_embd = 0; | |
printf("Exiting free_model - n_head: %d, block_size: %d, n_embd: %d\n", config.n_head, config.block_size, config.n_embd); | |
} | |
// Layer normalization function | |
void layer_normalize(float **inputs, float *gamma, float *beta, int n, int m, float epsilon) { | |
for (int i = 0; i < n; ++i) { | |
float sum = 0.0; | |
for (int j = 0; j < m; ++j) { | |
sum += inputs[i][j]; | |
} | |
float mean = sum / m; | |
float variance_sum = 0.0; | |
for (int j = 0; j < m; ++j) { | |
variance_sum += (inputs[i][j] - mean) * (inputs[i][j] - mean); | |
} | |
float variance = variance_sum / m; | |
for (int j = 0; j < m; ++j) { | |
inputs[i][j] = (inputs[i][j] - mean) / sqrt(variance + epsilon); | |
inputs[i][j] = inputs[i][j] * gamma[j] + beta[j]; | |
} | |
} | |
} | |
// Unit test for layer normalization | |
void test_layer_normalize() { | |
int n = 2; // Number of input vectors | |
int m = 3; // Number of features | |
float epsilon = 1e-5; | |
float **inputs = (float**)malloc(n * sizeof(float*)); | |
float *gamma = (float*)malloc(m * sizeof(float)); | |
float *beta = (float*)malloc(m * sizeof(float)); | |
// Initialize inputs, gamma, and beta | |
// ... | |
// Call layer_normalize | |
layer_normalize(inputs, gamma, beta, n, m, epsilon); | |
// Check if the output is normalized correctly | |
// ... | |
// Clean up | |
free(gamma); | |
free(beta); | |
for (int i = 0; i < n; ++i) { | |
free(inputs[i]); | |
} | |
free(inputs); | |
} | |
// Add the new unit test to the main function | |
int main(int argc, char *argv[]) { | |
GPTConfig config = {VOCAB_SIZE, BLOCK_SIZE, N_LAYER, N_HEAD, N_EMBD}; | |
GPTModel model = {0}; // Zero-initialize the model structure | |
// Initialize the model with the configuration | |
initialize_model(&model, config); | |
// Run unit tests with fresh model instances | |
test_matrix_multiply(); | |
free_model(&model, config); | |
initialize_model(&model, config); | |
test_gelu_activation(); | |
free_model(&model, config); | |
initialize_model(&model, config); | |
test_initialize_attention_matrices(); | |
free_model(&model, config); | |
initialize_model(&model, config); | |
test_compute_queries_keys_values(); | |
free_model(&model, config); | |
initialize_model(&model, config); | |
test_embeddings(); // New unit test for embeddings | |
free_model(&model, config); | |
initialize_model(&model, config); | |
test_layer_normalize(); // New unit test for layer normalization | |
free_model(&model, config); | |
// Rest of the main function... | |
// ... | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// i realized i wasnt uploading train_gpt2.py in previous days which was extremely unfair to devin - this is | |
// where i started to be more hands on rather than hands off and explicitly tell it to read and log the gpt2.py first before | |
// attempting the gpt2.c impl | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <assert.h> | |
#include <string.h> | |
#ifndef M_PI | |
#define M_PI 3.14159265358979323846 | |
#endif | |
// Constants for model dimensions, learning rate, etc. | |
#define VOCAB_SIZE 50257 // Example size, to be adjusted based on actual model | |
#define BLOCK_SIZE 1024 // Example size, to be adjusted based on actual model | |
#define N_LAYER 12 // Example size, to be adjusted based on actual model | |
#define N_HEAD 12 // Example size, to be adjusted based on actual model | |
#define N_EMBD 768 // Example size, to be adjusted based on actual model | |
#define LEARNING_RATE 0.001 | |
// Data structures for model configuration and layers | |
typedef struct { | |
int vocab_size; | |
int block_size; | |
int n_layer; | |
int n_head; | |
int n_embd; | |
} GPTConfig; | |
typedef struct { | |
// Embedding layers, attention blocks, MLP blocks, etc. | |
float **embedding_weights; // Example for embedding weights | |
// Other components to be added | |
float ***queries; | |
float ***keys; | |
float ***values; | |
// Weights for queries, keys, and values | |
float **query_weights; | |
float **key_weights; | |
float **value_weights; | |
GPTConfig config; // Added config here | |
float **token_embeddings; // Embeddings for tokens | |
float **position_embeddings; // Embeddings for positions | |
float *ln_gamma; // Layer normalization gamma parameter | |
float *ln_beta; // Layer normalization beta parameter | |
float *mlp_weights_1; // Weights for the first MLP layer | |
float *mlp_weights_2; // Weights for the second MLP layer | |
} GPTModel; | |
// Function prototypes | |
void initialize_model(GPTModel *model, GPTConfig config); | |
void forward_pass(GPTModel *model, int *input_indices, float **output); | |
void backward_pass(GPTModel *model, float *grad_output, float *grad_input); | |
void update_weights(GPTModel *model); | |
void matrix_multiply(float *A, float *B, float *C, int n, int m, int k); | |
float gelu_activation(float x); | |
void test_matrix_multiply(); | |
void test_gelu_activation(); | |
void initialize_attention_matrices(GPTModel *model, GPTConfig config); | |
void compute_queries_keys_values(float *input, GPTModel *model, float ***queries, float ***keys, float ***values); | |
void test_initialize_attention_matrices(); | |
void test_compute_queries_keys_values(); | |
void free_attention_matrices(GPTModel *model, GPTConfig config); | |
void initialize_embeddings(GPTModel *model, GPTConfig config); | |
void free_model(GPTModel *model, GPTConfig config); // Prototype for new function to free model memory | |
void test_embeddings(); // Prototype for new unit test function | |
void layer_normalize(float **inputs, float *gamma, float *beta, int n, int m, float epsilon); // New function prototype | |
void test_layer_normalize(); // New unit test function prototype | |
void softmax(float *input, float *output, int length); // New function prototype for softmax | |
void dot_product_attention(float *queries, float *keys, float *values, float *output, int n_head, int block_size, int n_embd); // New function prototype for dot-product attention | |
void mlp_block(float *input, float *output, int block_size, int n_embd, float *mlp_weights_1, float *mlp_weights_2); // New function prototype for MLP block | |
// Function to flatten 3D attention matrices into 1D arrays | |
void flatten_attention_matrices(float ***matrices, float *flat_array, int n_head, int block_size, int n_embd_per_head) { | |
for (int h = 0; h < n_head; ++h) { | |
for (int i = 0; i < block_size; ++i) { | |
for (int j = 0; j < n_embd_per_head; ++j) { | |
flat_array[h * block_size * n_embd_per_head + i * n_embd_per_head + j] = matrices[h][i][j]; | |
} | |
} | |
} | |
} | |
// Matrix multiplication function with boundary checks | |
void matrix_multiply(float *A, float *B, float *C, int n, int m, int k) { | |
// Ensure that the pointers are not NULL and dimensions are greater than zero | |
if (A == NULL || B == NULL || C == NULL) { | |
fprintf(stderr, "Null pointer provided to matrix_multiply function\n"); | |
exit(EXIT_FAILURE); | |
} | |
if (n <= 0 || m <= 0 || k <= 0) { | |
fprintf(stderr, "Invalid dimensions provided to matrix_multiply function\n"); | |
exit(EXIT_FAILURE); | |
} | |
printf("Matrix multiplication dimensions: A[%d][%d], B[%d][%d], C[%d][%d]\n", n, m, m, k, n, k); | |
// Perform matrix multiplication | |
// The outer loop iterates over the rows of matrix A and the result matrix C | |
for (int i = 0; i < n; ++i) { | |
// The inner loop iterates over the columns of matrix B and the result matrix C | |
for (int j = 0; j < k; ++j) { | |
float sum = 0; | |
// The innermost loop performs the dot product of the i-th row of matrix A and the j-th column of matrix B | |
for (int p = 0; p < m; ++p) { | |
sum += A[i * m + p] * B[p * k + j]; | |
} | |
// Log the indices and sum before writing to matrix C | |
printf("Attempting to write to C at index [%d][%d] (linear index %d), total allocated size: %d\n", i, j, i * k + j, n * k); | |
C[i * k + j] = sum; | |
} | |
} | |
} | |
// GELU activation function | |
float gelu_activation(float x) { | |
return 0.5 * x * (1.0 + tanh(sqrt(2.0 / M_PI) * (x + 0.044715 * pow(x, 3)))); | |
} | |
// Softmax function | |
void softmax(float *input, float *output, int length) { | |
float max = input[0]; | |
for (int i = 1; i < length; ++i) { | |
if (input[i] > max) { | |
max = input[i]; | |
} | |
} | |
float sum = 0.0; | |
for (int i = 0; i < length; ++i) { | |
output[i] = exp(input[i] - max); | |
sum += output[i]; | |
} | |
for (int i = 0; i < length; ++i) { | |
output[i] /= sum; | |
} | |
} | |
// Corrected dot_product_attention function | |
void dot_product_attention(float *queries, float *keys, float *values, float *output, int n_head, int block_size, int n_embd) { | |
printf("Entering dot_product_attention\n"); | |
// Temporary storage for the attention scores | |
float *attention_scores = (float*)malloc(block_size * block_size * sizeof(float)); | |
for (int h = 0; h < n_head; ++h) { | |
// Compute the dot product between queries and keys for each head | |
printf("Before matrix_multiply in dot_product_attention\n"); | |
matrix_multiply(queries + h * block_size * (n_embd / n_head), keys + h * block_size * (n_embd / n_head), attention_scores, block_size, block_size, n_embd / n_head); | |
printf("After matrix_multiply in dot_product_attention\n"); | |
// Apply softmax to the attention scores | |
for (int i = 0; i < block_size; ++i) { | |
softmax(attention_scores + i * block_size, attention_scores + i * block_size, block_size); | |
} | |
// Multiply by values to get the final attention output for this head | |
matrix_multiply(attention_scores, values + h * block_size * (n_embd / n_head), output + h * block_size * (n_embd / n_head), block_size, n_embd / n_head, block_size); | |
} | |
// Free the temporary storage for attention scores | |
free(attention_scores); | |
printf("Exiting dot_product_attention\n"); | |
} | |
// Corrected mlp_block function | |
void mlp_block(float *input, float *output, int block_size, int n_embd, float *mlp_weights_1, float *mlp_weights_2) { | |
// Define the intermediate size for the MLP | |
int intermediate_size = n_embd * 4; // This can be a different size | |
float *intermediate_output = (float*)malloc(block_size * intermediate_size * sizeof(float)); | |
// First linear layer | |
matrix_multiply(input, mlp_weights_1, intermediate_output, block_size, intermediate_size, n_embd); | |
// Apply GELU activation | |
for (int i = 0; i < block_size * intermediate_size; ++i) { | |
intermediate_output[i] = gelu_activation(intermediate_output[i]); | |
} | |
// Second linear layer to project back to n_embd dimensions | |
matrix_multiply(intermediate_output, mlp_weights_2, output, block_size, n_embd, intermediate_size); | |
// Free the intermediate output | |
free(intermediate_output); | |
} | |
// Check for successful allocation and handle errors | |
#define CHECK_ALLOCATION(ptr) if ((ptr) == NULL) { \ | |
fprintf(stderr, "Memory allocation failed\n"); \ | |
free_model(model, config); \ | |
exit(EXIT_FAILURE); \ | |
} | |
// Add print statements to the initialize_model function | |
void initialize_model(GPTModel *model, GPTConfig config) { | |
printf("Entering initialize_model\n"); | |
// Example of allocating memory for the embedding layer and initializing weights | |
// Assuming embedding weights are a 2D array with dimensions [vocab_size, n_embd] | |
model->embedding_weights = (float**)malloc(config.vocab_size * sizeof(float*)); | |
CHECK_ALLOCATION(model->embedding_weights); | |
for (int i = 0; i < config.vocab_size; ++i) { | |
model->embedding_weights[i] = (float*)malloc(config.n_embd * sizeof(float)); | |
CHECK_ALLOCATION(model->embedding_weights[i]); | |
for (int j = 0; j < config.n_embd; ++j) { | |
// Initialize weights with random values, for example using a simple normal distribution | |
model->embedding_weights[i][j] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
// Allocate and initialize weights for queries, keys, and values | |
model->query_weights = (float**)malloc(config.n_head * sizeof(float*)); | |
CHECK_ALLOCATION(model->query_weights); | |
model->key_weights = (float**)malloc(config.n_head * sizeof(float*)); | |
CHECK_ALLOCATION(model->key_weights); | |
model->value_weights = (float**)malloc(config.n_head * sizeof(float*)); | |
CHECK_ALLOCATION(model->value_weights); | |
for (int h = 0; h < config.n_head; ++h) { | |
model->query_weights[h] = (float*)malloc(config.n_embd * (config.n_embd / config.n_head) * sizeof(float)); | |
CHECK_ALLOCATION(model->query_weights[h]); | |
model->key_weights[h] = (float*)malloc(config.n_embd * (config.n_embd / config.n_head) * sizeof(float)); | |
CHECK_ALLOCATION(model->key_weights[h]); | |
model->value_weights[h] = (float*)malloc(config.n_embd * (config.n_embd / config.n_head) * sizeof(float)); | |
CHECK_ALLOCATION(model->value_weights[h]); | |
for (int i = 0; i < config.n_embd; ++i) { | |
for (int j = 0; j < config.n_embd / config.n_head; ++j) { | |
model->query_weights[h][i * (config.n_embd / config.n_head) + j] = ((float)rand() / (float)RAND_MAX - 0.5) * sqrt(2.0 / (config.n_embd + config.n_embd / config.n_head)); | |
model->key_weights[h][i * (config.n_embd / config.n_head) + j] = ((float)rand() / (float)RAND_MAX - 0.5) * sqrt(2.0 / (config.n_embd + config.n_embd / config.n_head)); | |
model->value_weights[h][i * (config.n_embd / config.n_head) + j] = ((float)rand() / (float)RAND_MAX - 0.5) * sqrt(2.0 / (config.n_embd + config.n_embd / config.n_head)); | |
} | |
} | |
} | |
// Initialize ln_gamma and ln_beta | |
model->ln_gamma = (float*)malloc(config.n_embd * sizeof(float)); | |
CHECK_ALLOCATION(model->ln_gamma); | |
model->ln_beta = (float*)malloc(config.n_embd * sizeof(float)); | |
CHECK_ALLOCATION(model->ln_beta); | |
for (int i = 0; i < config.n_embd; ++i) { | |
model->ln_gamma[i] = 1.0; // Typically initialized to ones | |
model->ln_beta[i] = 0.0; // Typically initialized to zeros | |
} | |
// Initialize MLP weights | |
int intermediate_size = config.n_embd * 4; // This can be a different size | |
model->mlp_weights_1 = (float*)malloc(config.n_embd * intermediate_size * sizeof(float)); | |
CHECK_ALLOCATION(model->mlp_weights_1); | |
model->mlp_weights_2 = (float*)malloc(intermediate_size * config.n_embd * sizeof(float)); | |
CHECK_ALLOCATION(model->mlp_weights_2); | |
// Random initialization of MLP weights (example) | |
for (int i = 0; i < config.n_embd * intermediate_size; ++i) { | |
model->mlp_weights_1[i] = (float)rand() / (float)RAND_MAX; | |
model->mlp_weights_2[i] = (float)rand() / (float)RAND_MAX; | |
} | |
// Allocate and initialize token embeddings | |
model->token_embeddings = (float**)malloc(config.vocab_size * sizeof(float*)); | |
CHECK_ALLOCATION(model->token_embeddings); | |
for (int i = 0; i < config.vocab_size; ++i) { | |
model->token_embeddings[i] = (float*)malloc(config.n_embd * sizeof(float)); | |
CHECK_ALLOCATION(model->token_embeddings[i]); | |
for (int j = 0; j < config.n_embd; ++j) { | |
model->token_embeddings[i][j] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
// Allocate and initialize position embeddings | |
model->position_embeddings = (float**)malloc(config.block_size * sizeof(float*)); | |
for (int i = 0; i < config.block_size; ++i) { | |
model->position_embeddings[i] = (float*)malloc(config.n_embd * sizeof(float)); | |
for (int j = 0; j < config.n_embd; ++j) { | |
model->position_embeddings[i][j] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
// Note: This is a simplified example. In practice, you would need to implement a proper random initialization | |
// (e.g., Xavier initialization) and also consider biases, layer normalization parameters, etc. | |
printf("Exiting initialize_model\n"); | |
} | |
// Initialize attention matrices for queries, keys, and values | |
void initialize_attention_matrices(GPTModel *model, GPTConfig config) { | |
int n_head = config.n_head; | |
int block_size = config.block_size; | |
int k = config.n_embd / n_head; // Corrected number of columns in the result matrix | |
printf("Initializing attention matrices...\n"); | |
fflush(stdout); | |
printf("n_head: %d, block_size: %d, k: %d\n", n_head, block_size, k); | |
fflush(stdout); | |
printf("Debug: block_size=%d, n_head=%d, model=%p, model->queries=%p\n", block_size, n_head, (void*)model, (void*)model->queries); | |
fflush(stdout); | |
// Allocate memory for the array of pointers for queries, keys, and values | |
model->queries = (float***)malloc(n_head * sizeof(float**)); | |
if (model->queries == NULL) { | |
fprintf(stderr, "Failed to allocate memory for queries\n"); | |
exit(EXIT_FAILURE); | |
} | |
printf("Allocated memory for queries array of pointers: %p, size: %lu\n", (void*)model->queries, n_head * sizeof(float**)); | |
model->keys = (float***)malloc(n_head * sizeof(float**)); | |
if (model->keys == NULL) { | |
fprintf(stderr, "Failed to allocate memory for keys\n"); | |
exit(EXIT_FAILURE); | |
} | |
printf("Allocated memory for keys array of pointers: %p, size: %lu\n", (void*)model->keys, n_head * sizeof(float**)); | |
model->values = (float***)malloc(n_head * sizeof(float**)); | |
if (model->values == NULL) { | |
fprintf(stderr, "Failed to allocate memory for values\n"); | |
exit(EXIT_FAILURE); | |
} | |
printf("Allocated memory for values array of pointers: %p, size: %lu\n", (void*)model->values, n_head * sizeof(float**)); | |
// Allocate 2D arrays for each head | |
for (int i = 0; i < n_head; ++i) { | |
model->queries[i] = (float**)calloc(block_size, sizeof(float*)); | |
if (model->queries[i] == NULL) { | |
fprintf(stderr, "Failed to allocate memory for queries for head %d\n", i); | |
free_attention_matrices(model, config); | |
exit(EXIT_FAILURE); | |
} | |
printf("Allocated memory for queries 2D array for head %d: %p, size: %lu\n", i, (void*)model->queries[i], block_size * sizeof(float*)); | |
model->keys[i] = (float**)calloc(block_size, sizeof(float*)); | |
if (model->keys[i] == NULL) { | |
fprintf(stderr, "Failed to allocate memory for keys for head %d\n", i); | |
free_attention_matrices(model, config); | |
exit(EXIT_FAILURE); | |
} | |
printf("Allocated memory for keys 2D array for head %d: %p, size: %lu\n", i, (void*)model->keys[i], block_size * sizeof(float*)); | |
model->values[i] = (float**)calloc(block_size, sizeof(float*)); | |
if (model->values[i] == NULL) { | |
fprintf(stderr, "Failed to allocate memory for values for head %d\n", i); | |
free_attention_matrices(model, config); | |
exit(EXIT_FAILURE); | |
} | |
printf("Allocated memory for values 2D array for head %d: %p, size: %lu\n", i, (void*)model->values[i], block_size * sizeof(float*)); | |
} | |
} | |
// Compute queries, keys, and values for each head | |
void compute_queries_keys_values(float *input, GPTModel *model, float ***queries, float ***keys, float ***values) { | |
printf("Entering compute_queries_keys_values\n"); | |
// Verify that the input matrix has the correct dimensions | |
assert(input != NULL); | |
// Assertions to verify the dimensions of the matrices before multiplication | |
for (int h = 0; h < model->config.n_head; ++h) { | |
assert(model->query_weights[h] != NULL); | |
assert(model->key_weights[h] != NULL); | |
assert(model->value_weights[h] != NULL); | |
assert(queries[h] != NULL); | |
assert(keys[h] != NULL); | |
assert(values[h] != NULL); | |
} | |
// Allocate memory for each row of queries, keys, and values for each head | |
int k = model->config.n_embd / model->config.n_head; // Corrected number of columns in the result matrix | |
for (int h = 0; h < model->config.n_head; ++h) { | |
for (int s = 0; s < model->config.block_size; ++s) { | |
queries[h][s] = (float*)calloc(k, sizeof(float)); | |
keys[h][s] = (float*)calloc(k, sizeof(float)); | |
values[h][s] = (float*)calloc(k, sizeof(float)); | |
if (!queries[h][s] || !keys[h][s] || !values[h][s]) { | |
fprintf(stderr, "Allocation failed for attention matrix rows for head %d, row %d\n", h, s); | |
// Handle allocation failure: free any allocated memory and exit | |
free_attention_matrices(model, model->config); | |
exit(EXIT_FAILURE); | |
} | |
} | |
} | |
// Compute queries, keys, and values for each head | |
int matrix_multiply_count = 0; // Counter to track the number of matrix_multiply calls | |
for (int h = 0; h < model->config.n_head; ++h) { | |
// Corrected dimensions for matrix multiplication | |
int n = model->config.block_size; // Number of rows in the result matrix | |
int m = k; // Number of columns in matrix A and rows in matrix B | |
for (int s = 0; s < model->config.block_size; ++s) { | |
printf("Before matrix_multiply for queries head %d\n", h); | |
matrix_multiply(input, model->query_weights[h], queries[h][s], n, m, k); | |
matrix_multiply_count++; | |
printf("After matrix_multiply for queries head %d\n", h); | |
printf("Before matrix_multiply for keys head %d\n", h); | |
matrix_multiply(input, model->key_weights[h], keys[h][s], n, m, k); | |
matrix_multiply_count++; | |
printf("After matrix_multiply for keys head %d\n", h); | |
printf("Before matrix_multiply for values head %d\n", h); | |
matrix_multiply(input, model->value_weights[h], values[h][s], n, m, k); | |
matrix_multiply_count++; | |
printf("After matrix_multiply for values head %d\n", h); | |
} | |
} | |
printf("Exiting compute_queries_keys_values with %d calls to matrix_multiply\n", matrix_multiply_count); | |
} | |
// Unit test for matrix multiplication | |
void test_matrix_multiply() { | |
// Create test matrices A, B, and C | |
float A[2][3] = {{1, 2, 3}, {4, 5, 6}}; | |
float B[3][2] = {{7, 8}, {9, 10}, {11, 12}}; | |
float C[2][2] = {0}; | |
// Expected result of multiplication | |
float expected[2][2] = {{58, 64}, {139, 154}}; | |
// Perform matrix multiplication | |
matrix_multiply(&A[0][0], &B[0][0], &C[0][0], 2, 2, 3); | |
// Assert each element of the result matrix C is as expected | |
for (int i = 0; i < 2; ++i) { | |
for (int j = 0; j < 2; ++j) { | |
assert(fabs(C[i][j] - expected[i][j]) < 1e-5); | |
} | |
} | |
} | |
// Unit test for GELU activation | |
void test_gelu_activation() { | |
// Test input and expected output | |
float input = 0.5; | |
float expected_output = 0.3457; // Approximate expected value | |
float output = gelu_activation(input); | |
// Assert the output is as expected | |
assert(fabs(output - expected_output) < 1e-4); | |
} | |
// Unit test for initializing attention matrices | |
void test_initialize_attention_matrices() { | |
GPTConfig config = {VOCAB_SIZE, BLOCK_SIZE, N_LAYER, N_HEAD, N_EMBD}; | |
GPTModel model; | |
initialize_model(&model, config); // Assuming this also initializes attention matrices | |
initialize_attention_matrices(&model, config); | |
// Check if memory allocation was successful and dimensions are correct | |
assert(model.queries != NULL); | |
assert(model.keys != NULL); | |
assert(model.values != NULL); | |
for (int h = 0; h < config.n_head; ++h) { | |
assert(model.queries[h] != NULL); | |
assert(model.keys[h] != NULL); | |
assert(model.values[h] != NULL); | |
for (int s = 0; s < config.block_size; ++s) { | |
assert(model.queries[h][s] != NULL); | |
assert(model.keys[h][s] != NULL); | |
assert(model.values[h][s] != NULL); | |
} | |
} | |
// Clean up | |
free_attention_matrices(&model, config); | |
} | |
// Unit test for computing queries, keys, and values | |
void test_compute_queries_keys_values() { | |
GPTConfig config = {VOCAB_SIZE, BLOCK_SIZE, N_LAYER, N_HEAD, N_EMBD}; | |
GPTModel model = {0}; // Zero-initialize the model structure to ensure all fields are set to a valid state | |
model.config = config; // Set the model configuration | |
initialize_model(&model, config); // Assuming this also initializes attention matrices | |
initialize_attention_matrices(&model, config); | |
// Ensure that the weights are not NULL | |
assert(model.query_weights != NULL); | |
assert(model.key_weights != NULL); | |
assert(model.value_weights != NULL); | |
for (int h = 0; h < config.n_head; ++h) { | |
assert(model.query_weights[h] != NULL); | |
assert(model.key_weights[h] != NULL); | |
assert(model.value_weights[h] != NULL); | |
} | |
// Create mock input and model weights for testing | |
float *input = (float*)calloc(config.block_size * config.n_embd, sizeof(float)); // Use calloc to ensure the input is initialized to zero | |
// Check if input allocation was successful | |
assert(input != NULL); | |
printf("Debug: Input allocation successful.\n"); | |
// Initialize input with some values | |
for (int i = 0; i < config.block_size * config.n_embd; ++i) { | |
input[i] = i; | |
} | |
// Print the first few elements of the input array for verification | |
printf("Debug: First elements of input array after initialization:\n"); | |
for (int i = 0; i < 5; ++i) { | |
printf("input[%d] = %f\n", i, input[i]); | |
} | |
fflush(stdout); | |
compute_queries_keys_values(input, &model, model.queries, model.keys, model.values); | |
// Check if queries, keys, and values are computed correctly | |
// This would involve checking the results of the matrix multiplication operations | |
// ... | |
// Clean up | |
free(input); | |
free_attention_matrices(&model, config); | |
} | |
// Function to free attention matrices | |
void free_attention_matrices(GPTModel *model, GPTConfig config) { | |
if (model->queries != NULL) { | |
for (int h = 0; h < config.n_head; ++h) { | |
if (model->queries[h] != NULL) { | |
for (int s = 0; s < config.block_size; ++s) { | |
if (model->queries[h][s] != NULL) { | |
free(model->queries[h][s]); | |
model->queries[h][s] = NULL; | |
} | |
} | |
free(model->queries[h]); | |
model->queries[h] = NULL; | |
} | |
} | |
free(model->queries); | |
model->queries = NULL; | |
} | |
if (model->keys != NULL) { | |
for (int h = 0; h < config.n_head; ++h) { | |
if (model->keys[h] != NULL) { | |
for (int s = 0; s < config.block_size; ++s) { | |
if (model->keys[h][s] != NULL) { | |
free(model->keys[h][s]); | |
model->keys[h][s] = NULL; | |
} | |
} | |
free(model->keys[h]); | |
model->keys[h] = NULL; | |
} | |
} | |
free(model->keys); | |
model->keys = NULL; | |
} | |
if (model->values != NULL) { | |
for (int h = 0; h < config.n_head; ++h) { | |
if (model->values[h] != NULL) { | |
for (int s = 0; s < config.block_size; ++s) { | |
if (model->values[h][s] != NULL) { | |
free(model->values[h][s]); | |
model->values[h][s] = NULL; | |
} | |
} | |
free(model->values[h]); | |
model->values[h] = NULL; | |
} | |
} | |
free(model->values); | |
model->values = NULL; | |
} | |
} | |
// New function to initialize embeddings | |
void initialize_embeddings(GPTModel *model, GPTConfig config) { | |
// Allocate memory for token embeddings | |
model->token_embeddings = (float**)malloc(config.vocab_size * sizeof(float*)); | |
CHECK_ALLOCATION(model->token_embeddings); | |
for (int i = 0; i < config.vocab_size; ++i) { | |
model->token_embeddings[i] = (float*)malloc(config.n_embd * sizeof(float)); | |
CHECK_ALLOCATION(model->token_embeddings[i]); | |
// Initialize weights with random values | |
for (int j = 0; j < config.n_embd; ++j) { | |
model->token_embeddings[i][j] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
// Allocate memory for position embeddings | |
model->position_embeddings = (float**)malloc(config.block_size * sizeof(float*)); | |
CHECK_ALLOCATION(model->position_embeddings); | |
for (int i = 0; i < config.block_size; ++i) { | |
model->position_embeddings[i] = (float*)malloc(config.n_embd * sizeof(float)); | |
CHECK_ALLOCATION(model->position_embeddings[i]); | |
// Initialize weights with random values | |
for (int j = 0; j < config.n_embd; ++j) { | |
model->position_embeddings[i][j] = (float)rand() / (float)RAND_MAX; | |
} | |
} | |
} | |
// Modify forward_pass function to apply embeddings and call flatten_attention_matrices | |
void forward_pass(GPTModel *model, int *input_indices, float **output) { | |
// Allocate memory for the output array if not already allocated | |
if (*output == NULL) { | |
*output = (float*)malloc(model->config.block_size * model->config.n_embd * sizeof(float)); | |
assert(*output != NULL); // Ensure memory allocation was successful | |
} | |
// Apply token and position embeddings to input indices | |
for (int i = 0; i < model->config.block_size; ++i) { | |
int index = input_indices[i]; | |
assert(index >= 0 && index < model->config.vocab_size); | |
assert(model->token_embeddings != NULL); | |
assert(model->position_embeddings != NULL); | |
for (int j = 0; j < model->config.n_embd; ++j) { | |
assert(model->token_embeddings[index] != NULL); | |
assert(model->position_embeddings[i] != NULL); | |
assert(i < model->config.block_size); // Assert that i is within the expected range | |
assert(j < model->config.n_embd); // Assert that j is within the expected range | |
(*output)[i * model->config.n_embd + j] = model->token_embeddings[index][j] + model->position_embeddings[i][j]; | |
} | |
} | |
compute_queries_keys_values(*output, model, model->queries, model->keys, model->values); | |
float *queries_flat = (float*)malloc(model->config.n_head * model->config.block_size * (model->config.n_embd / model->config.n_head) * sizeof(float)); | |
assert(queries_flat != NULL); // Ensure memory allocation was successful | |
float *keys_flat = (float*)malloc(model->config.n_head * model->config.block_size * (model->config.n_embd / model->config.n_head) * sizeof(float)); | |
assert(keys_flat != NULL); // Ensure memory allocation was successful | |
float *values_flat = (float*)malloc(model->config.n_head * model->config.block_size * (model->config.n_embd / model->config.n_head) * sizeof(float)); | |
assert(values_flat != NULL); // Ensure memory allocation was successful | |
float *self_attention_output_flat = (float*)malloc(model->config.block_size * model->config.n_embd * sizeof(float)); | |
assert(self_attention_output_flat != NULL); // Ensure memory allocation was successful | |
dot_product_attention(queries_flat, keys_flat, values_flat, self_attention_output_flat, model->config.n_head, model->config.block_size, model->config.n_embd); | |
float *mlp_output_flat = (float*)malloc(model->config.block_size * model->config.n_embd * sizeof(float)); | |
assert(mlp_output_flat != NULL); // Ensure memory allocation was successful | |
mlp_block(self_attention_output_flat, mlp_output_flat, model->config.block_size, model->config.n_embd, model->mlp_weights_1, model->mlp_weights_2); | |
float **mlp_output_2d = (float**)malloc(model->config.block_size * sizeof(float*)); | |
for (int i = 0; i < model->config.block_size; ++i) { | |
mlp_output_2d[i] = &mlp_output_flat[i * model->config.n_embd]; | |
} | |
layer_normalize(mlp_output_2d, model->ln_gamma, model->ln_beta, model->config.block_size, model->config.n_embd, 1e-5); | |
free(mlp_output_2d); | |
for (int i = 0; i < model->config.block_size; ++i) { | |
for (int j = 0; j < model->config.n_embd; ++j) { | |
(*output)[i * model->config.n_embd + j] = mlp_output_flat[i * model->config.n_embd + j]; | |
} | |
} | |
free(queries_flat); | |
free(keys_flat); | |
free(values_flat); | |
free(self_attention_output_flat); | |
free(mlp_output_flat); | |
} | |
// Unit test for token and position embeddings | |
void test_embeddings() { | |
GPTConfig config = {VOCAB_SIZE, BLOCK_SIZE, N_LAYER, N_HEAD, N_EMBD}; | |
GPTModel model; | |
initialize_model(&model, config); // Initialize the model with embeddings | |
// Create mock input indices (for simplicity, use indices 0 to block_size-1) | |
int input_indices[BLOCK_SIZE]; | |
for (int i = 0; i < BLOCK_SIZE; ++i) { | |
input_indices[i] = i; | |
} | |
// Allocate memory for the output of the forward pass | |
float *output = NULL; | |
// Apply embeddings using the forward pass | |
forward_pass(&model, input_indices, &output); | |
// Check if the output contains the correct values | |
for (int i = 0; i < BLOCK_SIZE; ++i) { | |
for (int j = 0; j < N_EMBD; ++j) { | |
float expected_value = model.token_embeddings[input_indices[i]][j] + model.position_embeddings[i][j]; | |
assert(fabs(output[i * N_EMBD + j] - expected_value) < 1e-5); | |
} | |
} | |
// Clean up | |
free(output); | |
free_model(&model, config); // This function will need to be implemented to free all allocated memory in the model | |
} | |
// Function to free the model | |
void free_model(GPTModel *model, GPTConfig config) { | |
// Free token and position embeddings | |
if (model->token_embeddings != NULL) { | |
for (int i = 0; i < config.vocab_size; ++i) { | |
free(model->token_embeddings[i]); | |
} | |
free(model->token_embeddings); | |
model->token_embeddings = NULL; | |
} | |
if (model->position_embeddings != NULL) { | |
for (int i = 0; i < config.block_size; ++i) { | |
free(model->position_embeddings[i]); | |
} | |
free(model->position_embeddings); | |
model->position_embeddings = NULL; | |
} | |
// Free embedding weights | |
if (model->embedding_weights != NULL) { | |
for (int i = 0; i < config.vocab_size; ++i) { | |
free(model->embedding_weights[i]); | |
} | |
free(model->embedding_weights); | |
model->embedding_weights = NULL; | |
} | |
// Free layer normalization parameters | |
if (model->ln_gamma != NULL) { | |
free(model->ln_gamma); | |
model->ln_gamma = NULL; | |
} | |
if (model->ln_beta != NULL) { | |
free(model->ln_beta); | |
model->ln_beta = NULL; | |
} | |
// Free MLP weights | |
if (model->mlp_weights_1 != NULL) { | |
free(model->mlp_weights_1); | |
model->mlp_weights_1 = NULL; | |
} | |
if (model->mlp_weights_2 != NULL) { | |
free(model->mlp_weights_2); | |
model->mlp_weights_2 = NULL; | |
} | |
// Free queries, keys, and values | |
for (int h = 0; h < config.n_head; ++h) { | |
if (model->queries[h] != NULL) { | |
for (int s = 0; s < config.block_size; ++s) { | |
free(model->queries[h][s]); | |
} | |
free(model->queries[h]); | |
} | |
if (model->keys[h] != NULL) { | |
for (int s = 0; s < config.block_size; ++s) { | |
free(model->keys[h][s]); | |
} | |
free(model->keys[h]); | |
} | |
if (model->values[h] != NULL) { | |
for (int s = 0; s < config.block_size; ++s) { | |
free(model->values[h][s]); | |
} | |
free(model->values[h]); | |
} | |
} | |
free(model->queries); | |
free(model->keys); | |
free(model->values); | |
// Free query, key, and value weights | |
if (model->query_weights != NULL) { | |
for (int i = 0; i < config.n_head; ++i) { | |
free(model->query_weights[i]); | |
} | |
free(model->query_weights); | |
model->query_weights = NULL; | |
} | |
if (model->key_weights != NULL) { | |
for (int i = 0; i < config.n_head; ++i) { | |
free(model->key_weights[i]); | |
} | |
free(model->key_weights); | |
model->key_weights = NULL; | |
} | |
if (model->value_weights != NULL) { | |
for (int i = 0; i < config.n_head; ++i) { | |
free(model->value_weights[i]); | |
} | |
free(model->value_weights); | |
model->value_weights = NULL; | |
} | |
// Reset the model configuration to a known state | |
model->config.vocab_size = 0; | |
model->config.block_size = 0; | |
model->config.n_layer = 0; | |
model->config.n_head = 0; | |
model->config.n_embd = 0; | |
} | |
// Layer normalization function | |
void layer_normalize(float **inputs, float *gamma, float *beta, int n, int m, float epsilon) { | |
for (int i = 0; i < n; ++i) { | |
float sum = 0.0; | |
for (int j = 0; j < m; ++j) { | |
sum += inputs[i][j]; | |
} | |
float mean = sum / m; | |
float variance_sum = 0.0; | |
for (int j = 0; j < m; ++j) { | |
variance_sum += (inputs[i][j] - mean) * (inputs[i][j] - mean); | |
} | |
float variance = variance_sum / m; | |
for (int j = 0; j < m; ++j) { | |
inputs[i][j] = (inputs[i][j] - mean) / sqrt(variance + epsilon); | |
inputs[i][j] = inputs[i][j] * gamma[j] + beta[j]; | |
} | |
} | |
} | |
// Unit test for layer normalization | |
void test_layer_normalize() { | |
int n = 2; // Number of input vectors | |
int m = 3; // Number of features | |
float epsilon = 1e-5; | |
float **inputs = (float**)malloc(n * sizeof(float*)); | |
float *gamma = (float*)malloc(m * sizeof(float)); | |
float *beta = (float*)malloc(m * sizeof(float)); | |
// Initialize inputs, gamma, and beta | |
// ... | |
// Call layer_normalize | |
layer_normalize(inputs, gamma, beta, n, m, epsilon); | |
// Check if the output is normalized correctly | |
// ... | |
// Clean up | |
free(gamma); | |
free(beta); | |
for (int i = 0; i < n; ++i) { | |
free(inputs[i]); | |
} | |
free(inputs); | |
} | |
// Add the new unit test to the main function | |
int main(int argc, char *argv[]) { | |
printf("Starting main function\n"); | |
GPTConfig config = {VOCAB_SIZE, BLOCK_SIZE, N_LAYER, N_HEAD, N_EMBD}; | |
GPTModel model = {0}; // Zero-initialize the model structure | |
printf("Calling initialize_model\n"); | |
initialize_model(&model, config); | |
printf("initialize_model completed\n"); | |
// Commenting out all other tests to isolate test_compute_queries_keys_values | |
// test_matrix_multiply(); | |
// free_model(&model, config); | |
// initialize_model(&model, config); | |
// test_gelu_activation(); | |
// free_model(&model, config); | |
// initialize_model(&model, config); | |
// test_initialize_attention_matrices(); | |
// free_model(&model, config); | |
// initialize_model(&model, config); | |
// test_embeddings(); // New unit test for embeddings | |
// free_model(&model, config); | |
// initialize_model(&model, config); | |
// test_layer_normalize(); // New unit test for layer normalization | |
// free_model(&model, config); | |
printf("Calling test_compute_queries_keys_values\n"); | |
test_compute_queries_keys_values(); | |
printf("test_compute_queries_keys_values completed\n"); | |
free_model(&model, config); | |
// Rest of the main function... | |
// ... | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment