Skip to content

Instantly share code, notes, and snippets.

@JackyYin
Last active October 24, 2021 07:43
Show Gist options
  • Save JackyYin/e977910804c50f7e5275bad0b465b2f9 to your computer and use it in GitHub Desktop.
Save JackyYin/e977910804c50f7e5275bad0b465b2f9 to your computer and use it in GitHub Desktop.
Test for L1 cache misses in raspi 4.
#include <unistd.h>
#include <iostream>
#include <chrono>
#include <vector>
typedef struct x {
int a;
float b;
} sx;
typedef struct y {
int a;
float b;
double c[8];
// 8 + 8 * 8 = 72 bytes
} sy;
template <typename T>
void test_stack(long vlen)
{
std::vector<T> v;
for (int i = 0; i < vlen; i++) {
v.push_back({ 0,0.0 });
}
int itmp;
float ftmp;
std::chrono::steady_clock::time_point ts = std::chrono::steady_clock::now();
for (int i = 0; i < vlen; i++) {
//std::chrono::steady_clock::time_point t1 = std::chrono::steady_clock::now();
itmp = v[i].a;
ftmp = v[i].b;
//std::chrono::steady_clock::time_point t2 = std::chrono::steady_clock::now();
//std::cout << i << ": " << std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count() << std::endl;
}
std::chrono::steady_clock::time_point tf = std::chrono::steady_clock::now();
std::cout << "stack total : " << std::chrono::duration_cast<std::chrono::microseconds>(tf - ts).count() << std::endl;
}
template <typename T>
void test_heap(long vlen)
{
std::vector<T*> v;
for (int i = 0; i < vlen; i++) {
T *xptr = (T*)malloc(sizeof(T));
v.push_back(xptr);
}
int itmp;
float ftmp;
std::chrono::steady_clock::time_point ts = std::chrono::steady_clock::now();
for (int i = 0; i < vlen; i++) {
//std::chrono::steady_clock::time_point t1 = std::chrono::steady_clock::now();
itmp = v[i]->a;
ftmp = v[i]->b;
//std::chrono::steady_clock::time_point t2 = std::chrono::steady_clock::now();
//std::cout << i << ": " << std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count() << std::endl;
}
std::chrono::steady_clock::time_point tf = std::chrono::steady_clock::now();
std::cout << "heap total : " << std::chrono::duration_cast<std::chrono::microseconds>(tf - ts).count() << std::endl;
}
template <typename T>
void test()
{
long l1cachesz = 32 * 1024;
long l2cachesz = 1024 * 1024;
long cachelinesz = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
std::cout << "L1 cache size: " << l1cachesz << std::endl;
std::cout << "L1 cache line size: " << cachelinesz << std::endl;
std::cout << "L2 cache size: " << l2cachesz << std::endl;
long vlen = (l2cachesz / sizeof(T)) * 2;
test_stack<T>(vlen);
test_heap<T>(vlen);
}
int main() {
test<sx>();
}
@JackyYin
Copy link
Author

test output:

stack total : 2980
heap total : 4724

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment