Skip to content

Instantly share code, notes, and snippets.

@Boostibot
Last active February 1, 2025 00:12
Show Gist options
  • Save Boostibot/70e56eabe7628569efa0d969c2e70041 to your computer and use it in GitHub Desktop.
Save Boostibot/70e56eabe7628569efa0d969c2e70041 to your computer and use it in GitHub Desktop.
Get RDTSC frequency on linux x86
//Combination of the following:
// original gist: https://gist.github.com/pmttavara/6f06fc5c7679c07375483b06bb77430c
// discussion here: https://hero.handmade.network/forums/code-discussion/t/7485-queryperformancefrequency_returning_10mhz_bug/2#23567
// dump_vdso_data.c: https://gist.github.com/mildsunrise/c63505931534bd3c0e143c0db8cad3f3
//
// Original license:
// SPDX-FileCopyrightText: © 2022 Phillip Trudeau-Tavara <[email protected]>
// SPDX-License-Identifier: 0BSD
// https://linux.die.net/man/2/perf_event_open
// https://stackoverflow.com/a/57835630
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <linux/perf_event.h>
#include <time.h>
#include <unistd.h>
#include <x86intrin.h>
static inline uint64_t get_rdtsc_freq(void) {
// Cache the answer so that multiple calls never take the slow path more than once
static uint64_t tsc_freq = 0;
if (tsc_freq) {
return tsc_freq;
}
//just the start of the vdso_data structure (since thats all we need)
typedef struct vdso_data_start_t {
uint32_t seq;
int32_t clock_mode;
uint64_t cycle_last;
uint64_t mask;
uint32_t mult;
uint32_t shift;
} vdso_data_start_t;
// Fast path: Get the frequency through vdso
uint8_t* vvar_addr = NULL;
{
// quickly parse /proc/self/maps to find [vvar] mapping
char mmaps [4096*4] = {0};
FILE* mmapsfile = fopen("/proc/self/maps", "r");
if (!mmapsfile)
fprintf(stderr, "get_rdtsc_freq(): could not access own maps\n");
else
{
size_t nread = fread(mmaps, 1, sizeof(mmaps)-1, mmapsfile);
fclose(mmapsfile);
if(nread > 0)
{
for (char* line = mmaps; line != NULL;) {
char* next_line = strchr(line, '\n');
if (next_line != NULL)
*(next_line++) = 0;
if (strstr(line, "[vvar]")) {
vvar_addr = (uint8_t*) (void*) strtoull(line, NULL, 16);
break;
}
line = next_line;
}
}
if(vvar_addr == NULL)
fprintf(stderr, "get_rdtsc_freq(): could not find [vvar] mapping\n");
}
}
if(vvar_addr) {
//+ 128 because thats where the vdso_data starts
// (this one does not get its mult and shift adjusted continuously)
vdso_data_start_t* vdso_data = (vdso_data_start_t*) (vvar_addr + 128);
if(vdso_data->shift < 64 && vdso_data->mult != 0)
tsc_freq = (uint64_t) (((__uint128_t)1000000000ull << vdso_data->shift) / vdso_data->mult);
}
// Fast path: Load kernel-mapped memory page (this may require higher priveldges and might fail)
if(!tsc_freq) {
fprintf(stderr, "get_rdtsc_freq(): get_vvar_address() failed attempting to get perf event info\n");
struct perf_event_attr pe = {0};
pe.type = PERF_TYPE_HARDWARE;
pe.size = sizeof(struct perf_event_attr);
pe.config = PERF_COUNT_HW_INSTRUCTIONS;
pe.disabled = 1;
pe.exclude_kernel = 1;
pe.exclude_hv = 1;
// __NR_perf_event_open == 298 (on x86_64)
int fd = syscall(298, &pe, 0, -1, -1, 0);
if (fd != -1) {
struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd, 0);
if (pc) {
// success
if (pc->cap_user_time == 1) {
// docs say nanoseconds = (tsc * time_mult) >> time_shift
// set nanoseconds = 1000000000 = 1 second in nanoseconds, solve for tsc
// => tsc = (1000000000 << time_shift) / time_mult
tsc_freq = (uint64_t) (((__uint128_t)1000000000ull << pc->time_shift) / pc->time_mult);
}
munmap(pc, 4096);
}
close(fd);
}
}
// Slow path
if (!tsc_freq) {
fprintf(stderr, "get_rdtsc_freq(): all other modes failed timing sleep\n");
// Get time before sleep
struct timespec t;
clock_gettime(CLOCK_MONOTONIC_RAW, &t);
uint64_t nsc_begin = (uint64_t)t.tv_sec * 1000000000ull + t.tv_nsec;
uint64_t tsc_begin = __rdtsc();
usleep(10000); // 10ms gives ~4.5 digits of precision - the longer you sleep, the more precise you get
// Get time after sleep
clock_gettime(CLOCK_MONOTONIC_RAW, &t);
uint64_t nsc_end = (uint64_t)t.tv_sec * 1000000000ull + t.tv_nsec;
uint64_t tsc_end = __rdtsc();
// Do the math to extrapolate the RDTSC ticks elapsed in 1 second
tsc_freq = (tsc_end - tsc_begin) * 1000000000 / (nsc_end - nsc_begin);
}
// Failure case
if (!tsc_freq) {
tsc_freq = 1000000000;
}
return tsc_freq;
}
#include <stdio.h>
#include <math.h>
int main() {
printf("Timing 10 ms...\n");
struct timespec t;
clock_gettime(CLOCK_MONOTONIC_RAW, &t);
uint64_t nsc_begin = (uint64_t)t.tv_sec * 1000000000ull + t.tv_nsec;
uint64_t start = __rdtsc();
usleep(10000);
clock_gettime(CLOCK_MONOTONIC_RAW, &t);
uint64_t nsc_end = (uint64_t)t.tv_sec * 1000000000ull + t.tv_nsec;
uint64_t end = __rdtsc();
uint64_t freq = get_rdtsc_freq();
printf("...took %f ms (rdtsc)\n", (end - start) * 1e3 / freq);
printf("...took %f ms (clock_gettime)\n", (nsc_end - nsc_begin)*1e-6);
printf("=> error:%lf ns (keep in mind the two started at different times)\n",
fabs(((end - start)*1e9/freq) - (nsc_end - nsc_begin))
);
printf("RDTSC frequency is %lu (%.4f GHz).\n", (unsigned long)freq, freq * 1e-9);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment