Skip to content

Instantly share code, notes, and snippets.

@shauray8
Created May 1, 2025 13:13
Show Gist options
  • Save shauray8/7efa299bfd456b1d11daced2739fa5af to your computer and use it in GitHub Desktop.
Save shauray8/7efa299bfd456b1d11daced2739fa5af to your computer and use it in GitHub Desktop.
#include <vector>
#include <chrono>
#include <iostream>
// Function to get current time in seconds
double get_time() {
auto now = std::chrono::high_resolution_clock::now();
return std::chrono::duration<double>(now.time_since_epoch()).count();
}
// Function to access matrix elements based on layout
float get_element(const std::vector<float>& mat, int i, int j, int n, bool row_major) {
if (row_major) {
return mat[i * n + j]; // Row-major indexing
} else {
return mat[j * n + i]; // Column-major indexing
}
}
// Matrix multiplication function with optional address printing
void matmul(const std::vector<float>& A, const std::vector<float>& B,
std::vector<float>& C, int n, bool B_row_major, bool print_access = false) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
float sum = 0;
for (int k = 0; k < n; k++) {
if (print_access && i == 0 && j == 0) {
size_t idx_B = B_row_major ? (k * n + j) : (j * n + k);
const float* addr = &B[idx_B];
std::cout << "Accessing B[" << k << "," << j << "] at index " << idx_B
<< ", address " << (void*)addr << std::endl;
}
sum += A[i * n + k] * get_element(B, k, j, n, B_row_major);
}
C[i * n + j] = sum;
}
}
}
int main() {
// Demonstration of access patterns with small n
int n_demo = 4;
std::vector<float> A_demo(n_demo * n_demo, 1.0f);
std::vector<float> B_row_demo(n_demo * n_demo, 1.0f);
std::vector<float> B_col_demo(n_demo * n_demo);
std::vector<float> C_demo(n_demo * n_demo, 0.0f);
// Fill B_col_demo for column-major layout
for (int i = 0; i < n_demo; i++) {
for (int j = 0; j < n_demo; j++) {
B_col_demo[j * n_demo + i] = B_row_demo[i * n_demo + j];
}
}
std::cout << "Memory Access Pattern for B (Row-Major):\n";
matmul(A_demo, B_row_demo, C_demo, n_demo, true, true);
std::cout << "\nMemory Access Pattern for B (Column-Major):\n";
matmul(A_demo, B_col_demo, C_demo, n_demo, false, true);
// Original timing code with n = 1024
int n = 1024;
std::vector<float> A(n * n, 1.0f);
std::vector<float> B_row(n * n, 1.0f);
std::vector<float> B_col(n * n);
std::vector<float> C(n * n, 0.0f);
// Fill B_col for column-major layout
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
B_col[j * n + i] = B_row[i * n + j];
}
}
int num_runs = 10;
double time_row = 0.0;
for (int run = 0; run < num_runs; run++) {
double start = get_time();
matmul(A, B_row, C, n, true); // No printing
double end = get_time();
time_row += (end - start);
}
time_row /= num_runs;
double time_col = 0.0;
for (int run = 0; run < num_runs; run++) {
double start = get_time();
matmul(A, B_col, C, n, false); // No printing
double end = get_time();
time_col += (end - start);
}
time_col /= num_runs;
double ops = 2.0 * n * n * n;
double tflops_row = ops / (time_row * 1e12);
double tflops_col = ops / (time_col * 1e12);
std::cout << "\nPerformance Results:\n";
std::cout << "TFLOPS with B row-major: " << tflops_row << " TFLOPS\n";
std::cout << "TFLOPS with B column-major: " << tflops_col << " TFLOPS\n";
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment