Created
May 1, 2025 13:13
-
-
Save shauray8/7efa299bfd456b1d11daced2739fa5af to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <vector> | |
#include <chrono> | |
#include <iostream> | |
// Function to get current time in seconds | |
double get_time() { | |
auto now = std::chrono::high_resolution_clock::now(); | |
return std::chrono::duration<double>(now.time_since_epoch()).count(); | |
} | |
// Function to access matrix elements based on layout | |
float get_element(const std::vector<float>& mat, int i, int j, int n, bool row_major) { | |
if (row_major) { | |
return mat[i * n + j]; // Row-major indexing | |
} else { | |
return mat[j * n + i]; // Column-major indexing | |
} | |
} | |
// Matrix multiplication function with optional address printing | |
void matmul(const std::vector<float>& A, const std::vector<float>& B, | |
std::vector<float>& C, int n, bool B_row_major, bool print_access = false) { | |
for (int i = 0; i < n; i++) { | |
for (int j = 0; j < n; j++) { | |
float sum = 0; | |
for (int k = 0; k < n; k++) { | |
if (print_access && i == 0 && j == 0) { | |
size_t idx_B = B_row_major ? (k * n + j) : (j * n + k); | |
const float* addr = &B[idx_B]; | |
std::cout << "Accessing B[" << k << "," << j << "] at index " << idx_B | |
<< ", address " << (void*)addr << std::endl; | |
} | |
sum += A[i * n + k] * get_element(B, k, j, n, B_row_major); | |
} | |
C[i * n + j] = sum; | |
} | |
} | |
} | |
int main() { | |
// Demonstration of access patterns with small n | |
int n_demo = 4; | |
std::vector<float> A_demo(n_demo * n_demo, 1.0f); | |
std::vector<float> B_row_demo(n_demo * n_demo, 1.0f); | |
std::vector<float> B_col_demo(n_demo * n_demo); | |
std::vector<float> C_demo(n_demo * n_demo, 0.0f); | |
// Fill B_col_demo for column-major layout | |
for (int i = 0; i < n_demo; i++) { | |
for (int j = 0; j < n_demo; j++) { | |
B_col_demo[j * n_demo + i] = B_row_demo[i * n_demo + j]; | |
} | |
} | |
std::cout << "Memory Access Pattern for B (Row-Major):\n"; | |
matmul(A_demo, B_row_demo, C_demo, n_demo, true, true); | |
std::cout << "\nMemory Access Pattern for B (Column-Major):\n"; | |
matmul(A_demo, B_col_demo, C_demo, n_demo, false, true); | |
// Original timing code with n = 1024 | |
int n = 1024; | |
std::vector<float> A(n * n, 1.0f); | |
std::vector<float> B_row(n * n, 1.0f); | |
std::vector<float> B_col(n * n); | |
std::vector<float> C(n * n, 0.0f); | |
// Fill B_col for column-major layout | |
for (int i = 0; i < n; i++) { | |
for (int j = 0; j < n; j++) { | |
B_col[j * n + i] = B_row[i * n + j]; | |
} | |
} | |
int num_runs = 10; | |
double time_row = 0.0; | |
for (int run = 0; run < num_runs; run++) { | |
double start = get_time(); | |
matmul(A, B_row, C, n, true); // No printing | |
double end = get_time(); | |
time_row += (end - start); | |
} | |
time_row /= num_runs; | |
double time_col = 0.0; | |
for (int run = 0; run < num_runs; run++) { | |
double start = get_time(); | |
matmul(A, B_col, C, n, false); // No printing | |
double end = get_time(); | |
time_col += (end - start); | |
} | |
time_col /= num_runs; | |
double ops = 2.0 * n * n * n; | |
double tflops_row = ops / (time_row * 1e12); | |
double tflops_col = ops / (time_col * 1e12); | |
std::cout << "\nPerformance Results:\n"; | |
std::cout << "TFLOPS with B row-major: " << tflops_row << " TFLOPS\n"; | |
std::cout << "TFLOPS with B column-major: " << tflops_col << " TFLOPS\n"; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment