Last active
July 12, 2024 12:37
-
-
Save azat/622fa1f9a5d8e7d546ee9d294501961d to your computer and use it in GitHub Desktop.
How many syscalls on Linux can you have?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ lscpu -J | jq '.lscpu.[] | select(.field == "Model name:").data' -r | |
AMD Ryzen Threadripper PRO 5975WX 32-Cores | |
$ musl-clang -static -O3 -o test-syscalls test-syscalls.cpp | |
$ ldd test-syscalls | |
not a dynamic executable | |
$ ./test-syscalls | |
gettid: 48735.848us, 20518777.061 cps, 48.736 ns per call | |
clock_gettime (VDSO): 20660.516us, 48401501.686 cps, 20.661 ns per call | |
nothing_indirect_call: 0.020us, 50000000000000.000 cps, 0.000 ns per call | |
nothing: 0.020us, 50000000000000.000 cps, 0.000 ns per call | |
nothing_no_optimize: 220.223us, 4540851772.976 cps, 0.220 ns per call |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ lscpu -J | jq '.lscpu.[] | select(.field == "Model name:").data' -r | |
AMD Ryzen Threadripper PRO 5975WX 32-Cores | |
$ musl-clang -O3 -o test-syscalls test-syscalls.cpp | |
$ file test-syscalls | |
test-syscalls: ELF 64-bit LSB pie executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib/ld-musl-x86_64.so.1, BuildID[sha1]=f0af0be6a9a0a426059fc81a786541891f711335, not stripped | |
$ ./test-syscalls | |
gettid: 45955.213us, 21760316.942 cps, 45.955 ns per call | |
clock_gettime (VDSO): 20291.234us, 49282364.986 cps, 20.291 ns per call | |
nothing_indirect_call: 0.020us, 50000000000000.000 cps, 0.000 ns per call | |
nothing: 0.020us, 50000000000000.000 cps, 0.000 ns per call | |
nothing_no_optimize: 225.633us, 4431975819.140 cps, 0.226 ns per call |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ lscpu -J | jq '.lscpu.[] | select(.field == "Model name:").data' -r | |
AMD Ryzen Threadripper PRO 5975WX 32-Cores | |
$ clang -static -O3 -o test-syscalls test-syscalls.cpp | |
$ ldd test-syscalls | |
not a dynamic executable | |
$ ./test-syscalls | |
gettid: 44307.483us, 22569551.062 cps, 44.307 ns per call | |
clock_gettime (VDSO): 19663.977us, 50854412.614 cps, 19.664 ns per call | |
nothing_indirect_call: 0.020us, 50000000000000.000 cps, 0.000 ns per call | |
nothing: 0.020us, 50000000000000.000 cps, 0.000 ns per call | |
nothing_no_optimize: 220.233us, 4540645588.990 cps, 0.220 ns per call |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <ctime> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <time.h> | |
#include <syscall.h> | |
#include <unistd.h> | |
uint64_t now_ns() | |
{ | |
struct timespec ts; | |
clock_gettime(CLOCK_MONOTONIC_RAW, &ts); | |
return ts.tv_sec * (uint64_t)1e9 + ts.tv_nsec; | |
} | |
uint64_t getThreadId() | |
{ | |
return static_cast<uint64_t>(syscall(SYS_gettid)); | |
} | |
void getClockGetTime(clockid_t clock) | |
{ | |
struct timespec ts; | |
clock_gettime(clock, &ts); | |
} | |
void nothing_indirect() | |
{ | |
} | |
struct Nothing | |
{ | |
void operator()() const {} | |
}; | |
struct NothingNoOptimize | |
{ | |
void operator()() const | |
{ | |
/// Idea from old version of folly | |
int r; | |
asm volatile ("" : "+r" (r)); | |
} | |
}; | |
template <class F> | |
uint64_t run_bench(const char * name, const F & f, size_t iterations = 1'000'000) | |
{ | |
uint64_t start = now_ns(); | |
for (size_t i = 0; i < iterations; ++i) | |
f(); | |
uint64_t end = now_ns(); | |
uint64_t elapsed = end - start; | |
printf("%s: %.3fus, %.3f cps, %.3f ns per call\n", | |
name, | |
elapsed/1e3, | |
iterations/(elapsed/1e9), | |
(double)elapsed/iterations); | |
return elapsed; | |
} | |
int main() | |
{ | |
run_bench("gettid", getThreadId); | |
/// clock_gettime() in VDSO (so it is 2x faster then regular syscall) | |
run_bench("clock_gettime(CLOCK_MONOTONIC) (VDSO)", [&]() { getClockGetTime(CLOCK_MONOTONIC); }); | |
run_bench("clock_gettime(CLOCK_MONOTONIC_COARSE) (VDSO)", [&]() { getClockGetTime(CLOCK_MONOTONIC_COARSE); }); | |
run_bench("clock_gettime(CLOCK_MONOTONIC_RAW) (VDSO)", [&]() { getClockGetTime(CLOCK_MONOTONIC_RAW); }); | |
run_bench("nothing_indirect_call", nothing_indirect); | |
run_bench("nothing", Nothing()); | |
run_bench("nothing_no_optimize", NothingNoOptimize()); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ lscpu -J | jq '.lscpu.[] | select(.field == "Model name:").data' -r | |
AMD Ryzen Threadripper PRO 5975WX 32-Cores | |
$ clang++ test-syscalls.cpp -o test-syscalls -O3 -g3 | |
$ ./test-syscalls | |
gettid: 45611.8us, 2.19241e+07cps, 45.6118ns per call | |
clock_gettime: 21192.4us, 4.71867e+07cps, 21.1924ns per call | |
nothing_indirect_call: 1165.02us, 8.58358e+08cps, 1.16501ns per call | |
nothing: 0.02us, 5e+13cps, 2e-05ns per call | |
nothing_no_optimize: 233.818us, 4.27683e+09cps, 0.233818ns per call |
Author
azat
commented
Jul 12, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment