Skip to content

Instantly share code, notes, and snippets.

@Boostibot
Last active January 31, 2025 18:38
Show Gist options
  • Save Boostibot/3058caba7e5009bc7dfd180195cf6fcd to your computer and use it in GitHub Desktop.
Save Boostibot/3058caba7e5009bc7dfd180195cf6fcd to your computer and use it in GitHub Desktop.
custom implementation of CLOCK_REALTIME to show whats actually going on
#include <stdatomic.h>
#include <stdint.h>
#include <time.h>
#include <stdio.h>
#include <string.h>
typedef struct vdso_timestamp_t {
uint64_t sec;
uint64_t nsec;
} vdso_timestamp_t;
#define CS_HRES_COARSE 0
#define CS_RAW 1
#define CS_BASES (CS_RAW + 1)
typedef struct vdso_data_t {
uint32_t seq;
int32_t clock_mode;
uint64_t cycle_last;
uint64_t mask;
uint32_t mult;
uint32_t shift;
union {
vdso_timestamp_t basetime[12];
//struct timens_offset offset[12];
};
int32_t tz_minuteswest;
int32_t tz_dsttime;
uint32_t hrtimer_res;
uint32_t __unused;
} vdso_data_t;
typedef struct Clock_Info {
vdso_timestamp_t* offset_ptr;
vdso_data_t* data;
vdso_data_t* coarse;
uint64_t nominal_freq;
uint64_t mult;
uint64_t init_state;
} Clock_Info;
#define CLOCK_REALTIME 0
#define CLOCK_MONOTONIC 1
#define CLOCK_PROCESS_CPUTIME_ID 2
#define CLOCK_THREAD_CPUTIME_ID 3
#define CLOCK_MONOTONIC_RAW 4
#define CLOCK_REALTIME_COARSE 5
#define CLOCK_MONOTONIC_COARSE 6
#define CLOCK_BOOTTIME 7
#define CLOCK_REALTIME_ALARM 8
#define CLOCK_BOOTTIME_ALARM 9
#define CLOCK_SGI_CYCLE 10
#define CLOCK_TAI 11
#include <x86intrin.h>
Clock_Info global_clock_info = {0};
void prepare_clock_info()
{
//Find where the vvar is mapped to within this process.
// This is a special page thats is periodically updated by the kernel
// with the precise time from some other hardware components which I havent looked into.
//The raw TSC time is not fully accurate and drifts by like 1000ns every second (on my machine).
//Because of this most things prefer to talk in terms of ns. If we get a timestep in ns
// we are sure that we are accurate to at least the frequency of the kernel updating the precise time
// (and then some because we use TSC to "interpolate"). Then when we get some other timepoint in ns
// and calculate the difference we know that the error is at max 2x the error of a single time point.
// With TSC raw we have no such gurantee.
// see: https://gist.github.com/mildsunrise/c63505931534bd3c0e143c0db8cad3f3
uint8_t* vvar_addr = NULL;
{
// quickly parse /proc/self/maps to find [vvar] mapping
char mmaps [4096*4] = {0};
FILE* mmapsfile = fopen("/proc/self/maps", "r");
if (!mmapsfile)
fprintf(stderr, "get_rdtsc_freq(): could not access own maps\n");
else
{
size_t nread = fread(mmaps, 1, sizeof(mmaps)-1, mmapsfile);
fclose(mmapsfile);
if(nread > 0)
{
for (char* line = mmaps; line != NULL;) {
char* next_line = strchr(line, '\n');
if (next_line != NULL)
*(next_line++) = 0;
if (strstr(line, "[vvar]")) {
vvar_addr = (uint8_t*) (void*) strtoull(line, NULL, 16);
break;
}
line = next_line;
}
}
if(vvar_addr == NULL)
fprintf(stderr, "get_rdtsc_freq(): could not find [vvar] mapping\n");
}
}
if(vvar_addr) {
vdso_data_t *vdso_data = (vdso_data_t *)(vvar_addr + 128);
//get the vdso data and calculate frequency of TSC - we use the CS_RAW vdso_data
// because the mult in CS_HRES_COARSE gets periodically updated by the kernel - we dont want that.
//The kernel calucates ns from cycles using: ns = (cycles*mult) >> shift
// so we just set ns = 1000000000 (1 second) and factor out cycles.
uint64_t nominal_freq = (uint64_t) (((__uint128_t)1000000000ull << vdso_data[CS_RAW].shift) / vdso_data[CS_RAW].mult);
//round to something reasonable to get rid of the inaccuracy from the mult-shift -> freq conversion
uint64_t rounding = 2000;
nominal_freq = (nominal_freq + rounding/2)/rounding*rounding;
//calculate windows style mult constant for 64 bit shift.
//We will calculate ns from cycles using: ns = (cycles*mult) >> 64
// which is fast because cpus can directly get the high part of 64 bit multiplcation.
//We essentially treat mult as 64.64 bit fixed point.
uint64_t mult_shift_64 = 0;
{
//see: https://elixir.bootlin.com/barebox/v2023.12.0/source/common/clock.c#L138
uint64_t time_period = 1000000000;
__uint128_t temp = ((__uint128_t)time_period << 64) + nominal_freq/2;
mult_shift_64 = (uint64_t) (temp / nominal_freq);
}
global_clock_info.data = vdso_data;
global_clock_info.offset_ptr = &vdso_data[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE];
global_clock_info.coarse = &vdso_data[CS_HRES_COARSE];
global_clock_info.mult = mult_shift_64;
global_clock_info.nominal_freq = nominal_freq;
global_clock_info.init_state = 1;
}
}
uint64_t custom_clock_realtime_ns()
{
//despite this loop looking scary the raw __rdtsc() call takes like
// 90% of the cpu time here. You can try benchmarking this function against
// clock_gettime(CLOCK_REALTIME, &t) and raw __rdtsc() calls.
//see: https://elixir.bootlin.com/linux/v5.9/source/lib/vdso/gettimeofday.c#L107
//init and handle errors
if(global_clock_info.init_state == 0)
{
prepare_clock_info();
if(global_clock_info.init_state == 0)
return 0;
}
//do the time reading
//The kernel periodically goes around and updates all the relevant clock data.
// namely it sets the actual real time as measure by some external hardware
// and sets that instnatnt TSC.
//We can then read this real time and use it, only adjusting it by the ellapsed
// time between current TSC and the TSC set by the kernel.
//Also whenever the kernel updates this data it increments the seq variable.
//We can use that to see if something has changed before we are done reading everythign.
// If it has we simply try again.
uint32_t seq = global_clock_info.coarse->seq;
for(;;) {
atomic_thread_fence(memory_order_acquire);
uint64_t now = __rdtsc();
uint64_t offset_s = global_clock_info.offset_ptr->sec;
uint64_t offset_ns = global_clock_info.offset_ptr->nsec;
uint64_t last_clock = global_clock_info.coarse->cycle_last;
atomic_thread_fence(memory_order_release);
uint32_t new_seq = global_clock_info.coarse->seq;
//if something has changed go again.
// If not do the actual computation
if(new_seq == seq)
{
uint64_t diff = 0;
if(now > last_clock)
diff = now - last_clock;
uint64_t diff_ns = (uint64_t) (((__uint128_t)diff * global_clock_info.mult) >> 64);
uint64_t realtime_offset_ns = offset_s*1000000000ull + offset_ns;
return realtime_offset_ns + diff_ns;
}
seq = new_seq;
}
}
uint64_t custom_clock_realtime_ns_with_kernel_adjustements()
{
//This function is just like the prevous one except we take advantage of the
// kernel TSC frequency scaling. As stated above the TSC drifts a bit and becomes
// innacurate after some time. We fix it by periodically taking into count the
// real time.
//However the kernel also tries to guess the "real" frequency of the TSC, by
// simply comparing the current value of TSC and current time and determining
// what the frequency should be so that the two match up. For the kernel side see:
// https://github.com/torvalds/linux/blob/d3d90cc2891c9cf4ecba7b85c0af716ab755c7e5/kernel/time/timekeeping.c#L1929
//Luckily all we need to do is use the update values which are in CS_HRES_COARSE clock source.
if(global_clock_info.init_state == 0)
{
prepare_clock_info();
if(global_clock_info.init_state == 0)
return 0;
}
uint32_t seq = global_clock_info.coarse->seq;
for(;;) {
atomic_thread_fence(memory_order_acquire);
uint64_t now = __rdtsc();
uint64_t offset_s = global_clock_info.offset_ptr->sec;
uint64_t offset_ns = global_clock_info.offset_ptr->nsec;
uint64_t mult = global_clock_info.data[CS_HRES_COARSE].mult;
uint64_t shift = global_clock_info.data[CS_HRES_COARSE].shift;
uint64_t last_clock = global_clock_info.coarse->cycle_last;
atomic_thread_fence(memory_order_release);
uint32_t new_seq = global_clock_info.coarse->seq;
if(new_seq == seq)
{
uint64_t diff = 0;
if(now > last_clock)
diff = now - last_clock;
uint64_t diff_ns = (diff*mult) >> shift;
uint64_t realtime_offset_ns = offset_s*1000000000ull + offset_ns;
return realtime_offset_ns + diff_ns;
}
seq = new_seq;
}
}
#include <unistd.h>
int main()
{
for(;;) {
uint64_t realtime_custom = 0;
uint64_t realtime_os = 0;
struct timespec t;
clock_gettime(CLOCK_REALTIME, &t);
realtime_custom = custom_clock_realtime_ns();
realtime_os = (uint64_t)t.tv_sec*1000000000ull + t.tv_nsec;
printf("os: %llu\n", (unsigned long long) realtime_os);
printf("custom: %llu\n", (unsigned long long) realtime_custom);
printf("diff: %lli\n", (long long) (realtime_os - realtime_custom));
clock_gettime(CLOCK_REALTIME, &t);
realtime_custom = custom_clock_realtime_ns_with_kernel_adjustements();
realtime_os = (uint64_t)t.tv_sec*1000000000ull + t.tv_nsec;
printf("os: %llu\n", (unsigned long long) realtime_os);
printf("custom_adj: %llu\n", (unsigned long long) realtime_custom);
printf("diff: %lli\n", (long long) (realtime_os - realtime_custom));
usleep(500000);
}
}
@mmozeiko
Copy link

mmozeiko commented Jan 31, 2025

That does not solve CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT build option - it does mess up mult/shift offsets which happens on my newer Arch kernel machine.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment