Last active
January 31, 2025 18:38
-
-
Save Boostibot/3058caba7e5009bc7dfd180195cf6fcd to your computer and use it in GitHub Desktop.
custom implementation of CLOCK_REALTIME to show whats actually going on
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdatomic.h> | |
#include <stdint.h> | |
#include <time.h> | |
#include <stdio.h> | |
#include <string.h> | |
typedef struct vdso_timestamp_t { | |
uint64_t sec; | |
uint64_t nsec; | |
} vdso_timestamp_t; | |
#define CS_HRES_COARSE 0 | |
#define CS_RAW 1 | |
#define CS_BASES (CS_RAW + 1) | |
typedef struct vdso_data_t { | |
uint32_t seq; | |
int32_t clock_mode; | |
uint64_t cycle_last; | |
uint64_t mask; | |
uint32_t mult; | |
uint32_t shift; | |
union { | |
vdso_timestamp_t basetime[12]; | |
//struct timens_offset offset[12]; | |
}; | |
int32_t tz_minuteswest; | |
int32_t tz_dsttime; | |
uint32_t hrtimer_res; | |
uint32_t __unused; | |
} vdso_data_t; | |
typedef struct Clock_Info { | |
vdso_timestamp_t* offset_ptr; | |
vdso_data_t* data; | |
vdso_data_t* coarse; | |
uint64_t nominal_freq; | |
uint64_t mult; | |
uint64_t init_state; | |
} Clock_Info; | |
#define CLOCK_REALTIME 0 | |
#define CLOCK_MONOTONIC 1 | |
#define CLOCK_PROCESS_CPUTIME_ID 2 | |
#define CLOCK_THREAD_CPUTIME_ID 3 | |
#define CLOCK_MONOTONIC_RAW 4 | |
#define CLOCK_REALTIME_COARSE 5 | |
#define CLOCK_MONOTONIC_COARSE 6 | |
#define CLOCK_BOOTTIME 7 | |
#define CLOCK_REALTIME_ALARM 8 | |
#define CLOCK_BOOTTIME_ALARM 9 | |
#define CLOCK_SGI_CYCLE 10 | |
#define CLOCK_TAI 11 | |
#include <x86intrin.h> | |
Clock_Info global_clock_info = {0}; | |
void prepare_clock_info() | |
{ | |
//Find where the vvar is mapped to within this process. | |
// This is a special page thats is periodically updated by the kernel | |
// with the precise time from some other hardware components which I havent looked into. | |
//The raw TSC time is not fully accurate and drifts by like 1000ns every second (on my machine). | |
//Because of this most things prefer to talk in terms of ns. If we get a timestep in ns | |
// we are sure that we are accurate to at least the frequency of the kernel updating the precise time | |
// (and then some because we use TSC to "interpolate"). Then when we get some other timepoint in ns | |
// and calculate the difference we know that the error is at max 2x the error of a single time point. | |
// With TSC raw we have no such gurantee. | |
// see: https://gist.github.com/mildsunrise/c63505931534bd3c0e143c0db8cad3f3 | |
uint8_t* vvar_addr = NULL; | |
{ | |
// quickly parse /proc/self/maps to find [vvar] mapping | |
char mmaps [4096*4] = {0}; | |
FILE* mmapsfile = fopen("/proc/self/maps", "r"); | |
if (!mmapsfile) | |
fprintf(stderr, "get_rdtsc_freq(): could not access own maps\n"); | |
else | |
{ | |
size_t nread = fread(mmaps, 1, sizeof(mmaps)-1, mmapsfile); | |
fclose(mmapsfile); | |
if(nread > 0) | |
{ | |
for (char* line = mmaps; line != NULL;) { | |
char* next_line = strchr(line, '\n'); | |
if (next_line != NULL) | |
*(next_line++) = 0; | |
if (strstr(line, "[vvar]")) { | |
vvar_addr = (uint8_t*) (void*) strtoull(line, NULL, 16); | |
break; | |
} | |
line = next_line; | |
} | |
} | |
if(vvar_addr == NULL) | |
fprintf(stderr, "get_rdtsc_freq(): could not find [vvar] mapping\n"); | |
} | |
} | |
if(vvar_addr) { | |
vdso_data_t *vdso_data = (vdso_data_t *)(vvar_addr + 128); | |
//get the vdso data and calculate frequency of TSC - we use the CS_RAW vdso_data | |
// because the mult in CS_HRES_COARSE gets periodically updated by the kernel - we dont want that. | |
//The kernel calucates ns from cycles using: ns = (cycles*mult) >> shift | |
// so we just set ns = 1000000000 (1 second) and factor out cycles. | |
uint64_t nominal_freq = (uint64_t) (((__uint128_t)1000000000ull << vdso_data[CS_RAW].shift) / vdso_data[CS_RAW].mult); | |
//round to something reasonable to get rid of the inaccuracy from the mult-shift -> freq conversion | |
uint64_t rounding = 2000; | |
nominal_freq = (nominal_freq + rounding/2)/rounding*rounding; | |
//calculate windows style mult constant for 64 bit shift. | |
//We will calculate ns from cycles using: ns = (cycles*mult) >> 64 | |
// which is fast because cpus can directly get the high part of 64 bit multiplcation. | |
//We essentially treat mult as 64.64 bit fixed point. | |
uint64_t mult_shift_64 = 0; | |
{ | |
//see: https://elixir.bootlin.com/barebox/v2023.12.0/source/common/clock.c#L138 | |
uint64_t time_period = 1000000000; | |
__uint128_t temp = ((__uint128_t)time_period << 64) + nominal_freq/2; | |
mult_shift_64 = (uint64_t) (temp / nominal_freq); | |
} | |
global_clock_info.data = vdso_data; | |
global_clock_info.offset_ptr = &vdso_data[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE]; | |
global_clock_info.coarse = &vdso_data[CS_HRES_COARSE]; | |
global_clock_info.mult = mult_shift_64; | |
global_clock_info.nominal_freq = nominal_freq; | |
global_clock_info.init_state = 1; | |
} | |
} | |
uint64_t custom_clock_realtime_ns() | |
{ | |
//despite this loop looking scary the raw __rdtsc() call takes like | |
// 90% of the cpu time here. You can try benchmarking this function against | |
// clock_gettime(CLOCK_REALTIME, &t) and raw __rdtsc() calls. | |
//see: https://elixir.bootlin.com/linux/v5.9/source/lib/vdso/gettimeofday.c#L107 | |
//init and handle errors | |
if(global_clock_info.init_state == 0) | |
{ | |
prepare_clock_info(); | |
if(global_clock_info.init_state == 0) | |
return 0; | |
} | |
//do the time reading | |
//The kernel periodically goes around and updates all the relevant clock data. | |
// namely it sets the actual real time as measure by some external hardware | |
// and sets that instnatnt TSC. | |
//We can then read this real time and use it, only adjusting it by the ellapsed | |
// time between current TSC and the TSC set by the kernel. | |
//Also whenever the kernel updates this data it increments the seq variable. | |
//We can use that to see if something has changed before we are done reading everythign. | |
// If it has we simply try again. | |
uint32_t seq = global_clock_info.coarse->seq; | |
for(;;) { | |
atomic_thread_fence(memory_order_acquire); | |
uint64_t now = __rdtsc(); | |
uint64_t offset_s = global_clock_info.offset_ptr->sec; | |
uint64_t offset_ns = global_clock_info.offset_ptr->nsec; | |
uint64_t last_clock = global_clock_info.coarse->cycle_last; | |
atomic_thread_fence(memory_order_release); | |
uint32_t new_seq = global_clock_info.coarse->seq; | |
//if something has changed go again. | |
// If not do the actual computation | |
if(new_seq == seq) | |
{ | |
uint64_t diff = 0; | |
if(now > last_clock) | |
diff = now - last_clock; | |
uint64_t diff_ns = (uint64_t) (((__uint128_t)diff * global_clock_info.mult) >> 64); | |
uint64_t realtime_offset_ns = offset_s*1000000000ull + offset_ns; | |
return realtime_offset_ns + diff_ns; | |
} | |
seq = new_seq; | |
} | |
} | |
uint64_t custom_clock_realtime_ns_with_kernel_adjustements() | |
{ | |
//This function is just like the prevous one except we take advantage of the | |
// kernel TSC frequency scaling. As stated above the TSC drifts a bit and becomes | |
// innacurate after some time. We fix it by periodically taking into count the | |
// real time. | |
//However the kernel also tries to guess the "real" frequency of the TSC, by | |
// simply comparing the current value of TSC and current time and determining | |
// what the frequency should be so that the two match up. For the kernel side see: | |
// https://github.com/torvalds/linux/blob/d3d90cc2891c9cf4ecba7b85c0af716ab755c7e5/kernel/time/timekeeping.c#L1929 | |
//Luckily all we need to do is use the update values which are in CS_HRES_COARSE clock source. | |
if(global_clock_info.init_state == 0) | |
{ | |
prepare_clock_info(); | |
if(global_clock_info.init_state == 0) | |
return 0; | |
} | |
uint32_t seq = global_clock_info.coarse->seq; | |
for(;;) { | |
atomic_thread_fence(memory_order_acquire); | |
uint64_t now = __rdtsc(); | |
uint64_t offset_s = global_clock_info.offset_ptr->sec; | |
uint64_t offset_ns = global_clock_info.offset_ptr->nsec; | |
uint64_t mult = global_clock_info.data[CS_HRES_COARSE].mult; | |
uint64_t shift = global_clock_info.data[CS_HRES_COARSE].shift; | |
uint64_t last_clock = global_clock_info.coarse->cycle_last; | |
atomic_thread_fence(memory_order_release); | |
uint32_t new_seq = global_clock_info.coarse->seq; | |
if(new_seq == seq) | |
{ | |
uint64_t diff = 0; | |
if(now > last_clock) | |
diff = now - last_clock; | |
uint64_t diff_ns = (diff*mult) >> shift; | |
uint64_t realtime_offset_ns = offset_s*1000000000ull + offset_ns; | |
return realtime_offset_ns + diff_ns; | |
} | |
seq = new_seq; | |
} | |
} | |
#include <unistd.h> | |
int main() | |
{ | |
for(;;) { | |
uint64_t realtime_custom = 0; | |
uint64_t realtime_os = 0; | |
struct timespec t; | |
clock_gettime(CLOCK_REALTIME, &t); | |
realtime_custom = custom_clock_realtime_ns(); | |
realtime_os = (uint64_t)t.tv_sec*1000000000ull + t.tv_nsec; | |
printf("os: %llu\n", (unsigned long long) realtime_os); | |
printf("custom: %llu\n", (unsigned long long) realtime_custom); | |
printf("diff: %lli\n", (long long) (realtime_os - realtime_custom)); | |
clock_gettime(CLOCK_REALTIME, &t); | |
realtime_custom = custom_clock_realtime_ns_with_kernel_adjustements(); | |
realtime_os = (uint64_t)t.tv_sec*1000000000ull + t.tv_nsec; | |
printf("os: %llu\n", (unsigned long long) realtime_os); | |
printf("custom_adj: %llu\n", (unsigned long long) realtime_custom); | |
printf("diff: %lli\n", (long long) (realtime_os - realtime_custom)); | |
usleep(500000); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
That does not solve CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT build option - it does mess up mult/shift offsets which happens on my newer Arch kernel machine.