Last active
February 1, 2025 00:12
-
-
Save Boostibot/70e56eabe7628569efa0d969c2e70041 to your computer and use it in GitHub Desktop.
Get RDTSC frequency on linux x86
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Combination of the following: | |
// original gist: https://gist.github.com/pmttavara/6f06fc5c7679c07375483b06bb77430c | |
// discussion here: https://hero.handmade.network/forums/code-discussion/t/7485-queryperformancefrequency_returning_10mhz_bug/2#23567 | |
// dump_vdso_data.c: https://gist.github.com/mildsunrise/c63505931534bd3c0e143c0db8cad3f3 | |
// | |
// Original license: | |
// SPDX-FileCopyrightText: © 2022 Phillip Trudeau-Tavara <[email protected]> | |
// SPDX-License-Identifier: 0BSD | |
// https://linux.die.net/man/2/perf_event_open | |
// https://stackoverflow.com/a/57835630 | |
#include <stdbool.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <sys/mman.h> | |
#include <linux/perf_event.h> | |
#include <time.h> | |
#include <unistd.h> | |
#include <x86intrin.h> | |
static inline uint64_t get_rdtsc_freq(void) { | |
// Cache the answer so that multiple calls never take the slow path more than once | |
static uint64_t tsc_freq = 0; | |
if (tsc_freq) { | |
return tsc_freq; | |
} | |
//just the start of the vdso_data structure (since thats all we need) | |
typedef struct vdso_data_start_t { | |
uint32_t seq; | |
int32_t clock_mode; | |
uint64_t cycle_last; | |
uint64_t mask; | |
uint32_t mult; | |
uint32_t shift; | |
} vdso_data_start_t; | |
// Fast path: Get the frequency through vdso | |
uint8_t* vvar_addr = NULL; | |
{ | |
// quickly parse /proc/self/maps to find [vvar] mapping | |
char mmaps [4096*4] = {0}; | |
FILE* mmapsfile = fopen("/proc/self/maps", "r"); | |
if (!mmapsfile) | |
fprintf(stderr, "get_rdtsc_freq(): could not access own maps\n"); | |
else | |
{ | |
size_t nread = fread(mmaps, 1, sizeof(mmaps)-1, mmapsfile); | |
fclose(mmapsfile); | |
if(nread > 0) | |
{ | |
for (char* line = mmaps; line != NULL;) { | |
char* next_line = strchr(line, '\n'); | |
if (next_line != NULL) | |
*(next_line++) = 0; | |
if (strstr(line, "[vvar]")) { | |
vvar_addr = (uint8_t*) (void*) strtoull(line, NULL, 16); | |
break; | |
} | |
line = next_line; | |
} | |
} | |
if(vvar_addr == NULL) | |
fprintf(stderr, "get_rdtsc_freq(): could not find [vvar] mapping\n"); | |
} | |
} | |
if(vvar_addr) { | |
//+ 128 because thats where the vdso_data starts | |
// (this one does not get its mult and shift adjusted continuously) | |
vdso_data_start_t* vdso_data = (vdso_data_start_t*) (vvar_addr + 128); | |
if(vdso_data->shift < 64 && vdso_data->mult != 0) | |
tsc_freq = (uint64_t) (((__uint128_t)1000000000ull << vdso_data->shift) / vdso_data->mult); | |
} | |
// Fast path: Load kernel-mapped memory page (this may require higher priveldges and might fail) | |
if(!tsc_freq) { | |
fprintf(stderr, "get_rdtsc_freq(): get_vvar_address() failed attempting to get perf event info\n"); | |
struct perf_event_attr pe = {0}; | |
pe.type = PERF_TYPE_HARDWARE; | |
pe.size = sizeof(struct perf_event_attr); | |
pe.config = PERF_COUNT_HW_INSTRUCTIONS; | |
pe.disabled = 1; | |
pe.exclude_kernel = 1; | |
pe.exclude_hv = 1; | |
// __NR_perf_event_open == 298 (on x86_64) | |
int fd = syscall(298, &pe, 0, -1, -1, 0); | |
if (fd != -1) { | |
struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd, 0); | |
if (pc) { | |
// success | |
if (pc->cap_user_time == 1) { | |
// docs say nanoseconds = (tsc * time_mult) >> time_shift | |
// set nanoseconds = 1000000000 = 1 second in nanoseconds, solve for tsc | |
// => tsc = (1000000000 << time_shift) / time_mult | |
tsc_freq = (uint64_t) (((__uint128_t)1000000000ull << pc->time_shift) / pc->time_mult); | |
} | |
munmap(pc, 4096); | |
} | |
close(fd); | |
} | |
} | |
// Slow path | |
if (!tsc_freq) { | |
fprintf(stderr, "get_rdtsc_freq(): all other modes failed timing sleep\n"); | |
// Get time before sleep | |
struct timespec t; | |
clock_gettime(CLOCK_MONOTONIC_RAW, &t); | |
uint64_t nsc_begin = (uint64_t)t.tv_sec * 1000000000ull + t.tv_nsec; | |
uint64_t tsc_begin = __rdtsc(); | |
usleep(10000); // 10ms gives ~4.5 digits of precision - the longer you sleep, the more precise you get | |
// Get time after sleep | |
clock_gettime(CLOCK_MONOTONIC_RAW, &t); | |
uint64_t nsc_end = (uint64_t)t.tv_sec * 1000000000ull + t.tv_nsec; | |
uint64_t tsc_end = __rdtsc(); | |
// Do the math to extrapolate the RDTSC ticks elapsed in 1 second | |
tsc_freq = (tsc_end - tsc_begin) * 1000000000 / (nsc_end - nsc_begin); | |
} | |
// Failure case | |
if (!tsc_freq) { | |
tsc_freq = 1000000000; | |
} | |
return tsc_freq; | |
} | |
#include <stdio.h> | |
#include <math.h> | |
int main() { | |
printf("Timing 10 ms...\n"); | |
struct timespec t; | |
clock_gettime(CLOCK_MONOTONIC_RAW, &t); | |
uint64_t nsc_begin = (uint64_t)t.tv_sec * 1000000000ull + t.tv_nsec; | |
uint64_t start = __rdtsc(); | |
usleep(10000); | |
clock_gettime(CLOCK_MONOTONIC_RAW, &t); | |
uint64_t nsc_end = (uint64_t)t.tv_sec * 1000000000ull + t.tv_nsec; | |
uint64_t end = __rdtsc(); | |
uint64_t freq = get_rdtsc_freq(); | |
printf("...took %f ms (rdtsc)\n", (end - start) * 1e3 / freq); | |
printf("...took %f ms (clock_gettime)\n", (nsc_end - nsc_begin)*1e-6); | |
printf("=> error:%lf ns (keep in mind the two started at different times)\n", | |
fabs(((end - start)*1e9/freq) - (nsc_end - nsc_begin)) | |
); | |
printf("RDTSC frequency is %lu (%.4f GHz).\n", (unsigned long)freq, freq * 1e-9); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment