Last active
March 5, 2025 21:00
-
-
Save mmozeiko/98bb947fb5a9d5b8a695adf503308a58 to your computer and use it in GitHub Desktop.
armv8 timer & cycle counter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#if defined(__linux__) | |
# define _GNU_SOURCE | |
# include <sched.h> | |
# include <unistd.h> | |
# include <sys/syscall.h> | |
# include <linux/perf_event.h> | |
#elif defined(_WIN32) | |
# include <intrin.h> | |
# include <windows.h> | |
#else | |
# error not supported | |
#endif | |
#include <stdint.h> | |
#include <stdbool.h> | |
// | |
// fixed frequency counter, always available | |
// | |
static inline uint64_t armv8_cntvct(void) | |
{ | |
uint64_t value; | |
#if defined(__linux__) | |
__asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(value)); | |
#elif defined(_WIN32) | |
// "Accessing CNTVCT_EL0" in https://developer.arm.com/documentation/ddi0601/latest/AArch64-Registers/CNTVCT-EL0--Counter-timer-Virtual-Count-Register | |
value = _ReadStatusReg(ARM64_SYSREG(3, 3, 14, 0, 2)); | |
#endif | |
return value; | |
} | |
static inline uint64_t armv8_cntfrq(void) | |
{ | |
uint64_t value; | |
#if defined(__linux__) | |
__asm__ __volatile__("mrs %0, cntfrq_el0" : "=r"(value)); | |
#elif defined(_WIN32) | |
// "Accessing CNTFRQ_EL0" in https://developer.arm.com/documentation/ddi0601/latest/AArch64-Registers/CNTFRQ-EL0--Counter-timer-Frequency-Register | |
value = _ReadStatusReg(ARM64_SYSREG(3, 3, 14, 0, 0)); | |
#endif | |
return value; | |
} | |
// | |
// performance monitor cycle counter, pinned to one core | |
// on Linux requires extra setup - read comments below | |
// | |
#if defined(__linux__) | |
static int armv8_perf_fd; | |
static cpu_set_t armv8_perf_mask; | |
#elif defined(_WIN32) | |
static DWORD_PTR armv8_thread_mask; | |
#endif | |
static inline bool armv8_tsc_init(void) | |
{ | |
#if defined(__linux__) | |
int core = sched_getcpu(); | |
sched_getaffinity(0, sizeof(cpu_set_t), &armv8_perf_mask); | |
cpu_set_t set; | |
CPU_ZERO(&set); | |
CPU_SET(core, &set); | |
sched_setaffinity(0, sizeof(cpu_set_t), &set); | |
struct perf_event_attr attr = | |
{ | |
.size = sizeof(attr), | |
.type = PERF_TYPE_HARDWARE, | |
.config = PERF_COUNT_HW_CPU_CYCLES, | |
.config1 = 1 | 2, // 1=64-bit counters, 2=allow user access | |
.pinned = 1, | |
}; | |
int fd = syscall(__NR_perf_event_open, &attr, 0, core, -1, 0); | |
if (fd < 0) | |
{ | |
// perf not enabled in kernel, or perf requires root | |
// to allow non-root access, run the following: | |
// echo 1 | sudo tee /proc/sys/kernel/perf_event_paranoid | |
return false; | |
} | |
uint64_t value; | |
__asm__ __volatile__("mrs %0, pmuserenr_el0" : "=r"(value)); | |
if (!(value & 4)) | |
{ | |
// PMU not allowed for user-space access, to allow run this: | |
// echo 1 | sudo tee /proc/sys/kernel/perf_user_access | |
close(fd); | |
return false; | |
} | |
armv8_perf_fd = fd; | |
return true; | |
#elif defined(_WIN32) | |
armv8_thread_mask = SetThreadAffinityMask(GetCurrentThread(), 1ULL << GetCurrentProcessorNumber()); | |
// no setup needed to access cycle counter from user-space | |
// https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170#cycle-counter | |
return true; | |
#else | |
return false; | |
#endif | |
} | |
static inline void armv8_tsc_done(void) | |
{ | |
#if defined(__linux__) | |
close(armv8_perf_fd); | |
sched_setaffinity(0, sizeof(cpu_set_t), &armv8_perf_mask); | |
#elif defined(_WIN32) | |
SetThreadAffinityMask(GetCurrentThread(), armv8_thread_mask); | |
#endif | |
} | |
// use only when armv8_tsc_init() returned true, otherwise SIGILL will be raised | |
static inline uint64_t armv8_pmccntr(void) | |
{ | |
uint64_t value; | |
#if defined(__linux__) | |
__asm__ __volatile__("mrs %0, pmccntr_el0" : "=r"(value)); | |
#elif defined(_WIN32) | |
// "Accessing PMCCNTR_EL0" in https://developer.arm.com/documentation/ddi0601/latest/AArch64-Registers/PMCCNTR-EL0--Performance-Monitors-Cycle-Count-Register | |
value = _ReadStatusReg(ARM64_SYSREG(3, 3, 9, 13, 0)); | |
#endif | |
return value; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "armv8_tsc.h" | |
#include <stdio.h> | |
static void loop() | |
{ | |
for (int i=0; i<1000000; i++) | |
{ | |
#if defined(_MSC_VER) | |
__nop(); | |
#else | |
__asm__ __volatile__(""); | |
#endif | |
} | |
} | |
int main() | |
{ | |
uint64_t ticks0 = armv8_cntvct(); | |
loop(); | |
uint64_t ticks1 = armv8_cntvct(); | |
uint64_t freq = armv8_cntfrq(); | |
printf("cntvct : %zu ticks @ %zu MHz = %.2f msec\n", | |
(size_t)(ticks1 - ticks0), | |
(size_t)(freq / 1000000), | |
(ticks1 - ticks0) * 1000.0 / freq); | |
if (armv8_tsc_init()) | |
{ | |
uint64_t cycles0 = armv8_pmccntr(); | |
loop(); | |
uint64_t cycles1 = armv8_pmccntr(); | |
printf("pmccntr: %zu cycles\n", (size_t)(cycles1 - cycles0)); | |
armv8_tsc_done(); | |
} | |
else | |
{ | |
printf("perf not available! not enough privileges?\n"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment