Skip to content

Instantly share code, notes, and snippets.

@swvitaliy
Last active September 23, 2023 20:05
Show Gist options
  • Save swvitaliy/33fe0696d6f7b08e307916d898d046b4 to your computer and use it in GitHub Desktop.
Save swvitaliy/33fe0696d6f7b08e307916d898d046b4 to your computer and use it in GitHub Desktop.
#include <cstdlib>
#include <ctime>
#include <iostream>
#include <string>
#include <vector>
/* https://www.youtube.com/watch?v=lYS7aqGtXPU */
using std::cout;
using std::rand;
using std::srand;
using std::string;
using std::time;
using std::vector;
constexpr int N = 1024;
inline int to_index(int i, int j) { return i * N + j; }
void naive(vector<float> a, vector<float> b, vector<float> &c) {
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++) {
c[to_index(i, j)] = .0f;
for (int k = 0; k < N; k++)
c[to_index(i, j)] += a[to_index(i, k)] * b[to_index(k, j)];
}
}
void better(vector<float> a, vector<float> b, vector<float> &c) {
std::fill(c.begin(), c.end(), .0f);
for (int i = 0; i < N; i++)
for (int k = 0; k < N; k++)
for (int j = 0; j < N; j++)
c[to_index(i, j)] += a[to_index(i, k)] * b[to_index(k, j)];
}
void generate(vector<float> &a) {
for (float &ai : a)
ai = rand();
}
int main(int argc, char **argv) {
const int N2 = N * N;
srand(time(nullptr));
vector<float> a(N2), b(N2), c(N2);
cout << "generate\n";
generate(a);
generate(b);
cout << "start exec\n";
char mode = 'n';
if (argc >= 2 && string(argv[1]) == "b")
mode = 'b';
if (mode == 'n')
naive(a, b, c);
else
better(a, b, c);
return 0;
}
build:
g++ -std=c++14 -Wall -O2 -oa a.cpp
naive:
sudo perf stat -B -e cache-misses ./a n
better:
sudo perf stat -B -e cache-misses ./a b
tmp|$ make naive
sudo perf stat -B -e cache-misses ./a n
generate
start exec
Performance counter stats for './a n':
37,222,639 cache-misses
3.401735000 seconds time elapsed
3.381160000 seconds user
0.019983000 seconds sys
tmp|$
tmp|$
tmp|$ make better
sudo perf stat -B -e cache-misses ./a b
generate
start exec
Performance counter stats for './a b':
7,845,267 cache-misses
0.636575810 seconds time elapsed
0.616525000 seconds user
0.020017000 seconds sys
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment