Skip to content

Instantly share code, notes, and snippets.

@matthieubulte
Last active February 12, 2025 14:35
Show Gist options
  • Save matthieubulte/16a9e47c4cc0f5c233710613e53364cf to your computer and use it in GitHub Desktop.
Save matthieubulte/16a9e47c4cc0f5c233710613e53364cf to your computer and use it in GitHub Desktop.
cache_experiment.py
class CEnv:
def __init__(self):
self.executable = None
def compile(self, code: str) -> str:
with tempfile.NamedTemporaryFile(mode="w", suffix=".c", delete=False) as temp_c:
temp_c.write(code)
c_file = temp_c.name
try:
executable = c_file[:-2]
subprocess.run(
["gcc", c_file, "-O3", "-o", executable],
capture_output=True,
text=True,
check=True,
)
self.executable = executable
return executable
except subprocess.CalledProcessError as e:
raise Exception(f"Compilation Error: {e.stderr}")
finally:
os.unlink(c_file)
def run(self, code: str) -> tuple[str, str]:
try:
executable = self.compile(code)
result = subprocess.run([executable], capture_output=True, text=True)
return result.stdout, result.stderr
finally:
if self.executable:
try:
os.unlink(self.executable)
except OSError:
pass
def size_to_str(n):
if n < 1024:
return f"{n} B"
elif n < 1024 * 1024:
return f"{n / 1024} KB"
else:
return f"{n / 1024 / 1024} MB"
def bench(c_env, n, M):
stdout, stderr = c_env.run(
f"""
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
void cache_test(int32_t* a, int32_t* idx) {{
int32_t sum = 0;
for (int32_t i = 0; i < {n}; i++) {{
sum += a[idx[i]];
}}
if (sum == -12345) printf("DONT OPT ME", sum);
}}
int main(int argc, char **argv) {{
static int32_t a[{n}];
static int32_t idx[{n}];
struct timespec start, end;
double duration_ns;
for (int j = 0; j < {M}; j++) {{
for (int32_t i = 0; i < {n}; i++) {{
a[i] = i;
idx[i] = i;
}}
for (int32_t i = {n}-1; i > 0; i--) {{
int32_t j = rand() % (i + 1);
int32_t temp = idx[i];
idx[i] = idx[j];
idx[j] = temp;
}}
clock_gettime(CLOCK_MONOTONIC, &start);
cache_test(a, idx);
clock_gettime(CLOCK_MONOTONIC, &end);
duration_ns = (end.tv_sec - start.tv_sec) * 1e9 + (end.tv_nsec - start.tv_nsec);
printf("%.0f ", duration_ns);
}}
return 0;
}}
"""
)
return np.array(stdout.strip().split(" ")).astype(float)
M = 100
log_ns = np.arange(12, 24)
ns = 2**log_ns
working_sets = 8 * ns # 2 arrays of n elements of 4 bytes each
times = np.zeros((len(log_ns), M))
c_env = CEnv()
for i, logn in enumerate(log_ns):
n = 2**logn
print(f"2**{logn} = {size_to_str(n)}")
times[i] = bench(c_env, n, M)
plt.loglog(base=2)
plt.grid()
plt.plot(working_sets, np.median(times, axis=1) / ns, "-o")
plt.axvline(2**22, linestyle="--", label=f"L2 Cache")
plt.axvline(2**16, linestyle="--", label=f"L1 Cache")
plt.fill_between(
working_sets,
np.quantile(times, 0.025, axis=1) / ns,
np.quantile(times, 0.975, axis=1) / ns,
alpha=0.1,
)
plt.xticks(working_sets, [size_to_str(ws) for ws in working_sets], rotation=45)
plt.legend()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment