Last active
March 13, 2023 03:18
-
-
Save savanovich/f07eda9dba9300eb9ccf to your computer and use it in GitHub Desktop.
Using RDTSC instruction that returns CPU TSC (Time Stamp Counter)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
https://en.wikipedia.org/wiki/Time_Stamp_Counter | |
https://ru.wikipedia.org/wiki/Rdtsc | |
*/ | |
#include <stdio.h> | |
typedef unsigned long long uint64; | |
int main() { | |
uint64 val; | |
unsigned int h, l; | |
for (int i=0; i<=10; i++) { | |
__asm__ __volatile__("rdtsc" : "=a" (l), "=d" (h)); | |
val = ((uint64)l) | (((uint64)h) << 32); | |
printf("%llu \n", val); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// http://developers.redhat.com/blog/2016/03/11/practical-micro-benchmarking-with-ltrace-and-sched/ | |
/* One drawback of the RDTSC instruction is that the CPU is allowed to reorder | |
it relative to other instructions, which causes noise in our results. Fortunately, | |
Intel has provided an RDTSCP instruction that’s more deterministic. We’ll pair | |
that with a CPUID instruction which acts as a memory barrier, resulting in this: */ | |
static __inline__ int64_t rdtsc_s(void) | |
{ | |
unsigned a, d; | |
asm volatile("cpuid" ::: "%rax", "%rbx", "%rcx", "%rdx"); | |
asm volatile("rdtsc" : "=a" (a), "=d" (d)); | |
return ((unsigned long)a) | (((unsigned long)d) << 32); | |
} | |
static __inline__ int64_t rdtsc_e(void) | |
{ | |
unsigned a, d; | |
asm volatile("rdtscp" : "=a" (a), "=d" (d)); | |
asm volatile("cpuid" ::: "%rax", "%rbx", "%rcx", "%rdx"); | |
return ((unsigned long)a) | (((unsigned long)d) << 32); | |
} | |
. . . | |
clocks_before = rdtsc_s (); | |
p = malloc (i); /* Test goes here */ | |
clocks_after = rdtsc_e (); | |
clocks_per_malloc = clocks_after - clocks_before; | |
// let the OS use CPU #0 | |
// boot options: | |
// linux . . . isolcpus=1,2,3,4,5,6,7 | |
// check: | |
// taskset -p $$ | |
// Interrupt affinity: | |
// cd /proc/irq | |
// for i in */smp_affinity; do echo 1 > $i; done | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks a lot for this. U just saved my life