Skip to content

Instantly share code, notes, and snippets.

@nickav
Last active April 2, 2025 22:04
Show Gist options
  • Save nickav/fe5fde07038d3fb97206565402457e07 to your computer and use it in GitHub Desktop.
Save nickav/fe5fde07038d3fb97206565402457e07 to your computer and use it in GitHub Desktop.
Single-file spall.h to capture profiling info for a C program in clang
//
// NOTE(nick): modified version of spall.h to make it a single-file header library
//
// Usage:
#if 0
// 1) Include this in your C source:
#include "spall.h"
// 2) Call the setup functions:
int main() {
spall_init_profile("profile.json");
spall_init_thread(0, 10*1024*1024, 1000);
// run your code...
spall_exit_thread();
spall_exit_profile();
return 0;
}
// 3) In any new thread you spawn, call:
void mythread() {
spall_init_thread((uint32_t)(uint64_t)pthread_self(), 10 * 1024 * 1024, 1000);
// run your code...
spall_exit_thread();
}
#endif
//
// 4) Compile with:
// > clang -ldl -lpthread -finstrument-functions -rdynamic -O3 sample_program.c -o instrument_test
//
// 5) View the output here: https://gravitymoth.com/spall/spall.html
// (or in any viewer that understands Google's JSON trace format)
//
// @See: https://github.com/colrdavidson/spall-web/blob/master/spall.h
//
// SPDX-FileCopyrightText: © 2023 Phillip Trudeau-Tavara <[email protected]>
// SPDX-License-Identifier: MIT
/*
TODO: Optional Helper APIs:
- Compression API: would require a mutexed lockable context (yuck...)
- Either using a ZIP library, a name cache + TIDPID cache, or both (but ZIP is likely more than enough!!!)
- begin()/end() writes compressed chunks to a caller-determined destination
- The destination can be the buffered-writing API or a custom user destination
- Ultimately need to take a lock with some granularity... can that be the caller's responsibility?
- Counter Event: should allow tracking arbitrary named values with a single event, for memory and frame profiling
- Ring-buffer API
spall_ring_init
spall_ring_emit_begin
spall_ring_emit_end
spall_ring_flush
*/
#ifndef SPALL_H
#define SPALL_H
#if !defined(_MSC_VER) || defined(__clang__)
#define SPALL_NOINSTRUMENT __attribute__((no_instrument_function))
#define SPALL_FORCEINLINE __attribute__((always_inline))
#else
#define _CRT_SECURE_NO_WARNINGS
#define SPALL_NOINSTRUMENT // Can't noinstrument on MSVC!
#define SPALL_FORCEINLINE __forceinline
#endif
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#define SPALL_FN static inline SPALL_NOINSTRUMENT
#define SPALL_MIN(a, b) (((a) < (b)) ? (a) : (b))
#pragma pack(push, 1)
typedef struct SpallHeader {
uint64_t magic_header; // = 0x0BADF00D
uint64_t version; // = 1
double timestamp_unit;
uint64_t must_be_0;
} SpallHeader;
enum {
SpallEventType_Invalid = 0,
SpallEventType_Custom_Data = 1, // Basic readers can skip this.
SpallEventType_StreamOver = 2,
SpallEventType_Begin = 3,
SpallEventType_End = 4,
SpallEventType_Instant = 5,
SpallEventType_Overwrite_Timestamp = 6, // Retroactively change timestamp units - useful for incrementally improving RDTSC frequency.
SpallEventType_Pad_Skip = 7,
};
typedef struct SpallBeginEvent {
uint8_t type; // = SpallEventType_Begin
uint8_t category;
uint32_t pid;
uint32_t tid;
double when;
uint8_t name_length;
uint8_t args_length;
} SpallBeginEvent;
typedef struct SpallBeginEventMax {
SpallBeginEvent event;
char name_bytes[255];
char args_bytes[255];
} SpallBeginEventMax;
typedef struct SpallEndEvent {
uint8_t type; // = SpallEventType_End
uint32_t pid;
uint32_t tid;
double when;
} SpallEndEvent;
typedef struct SpallPadSkipEvent {
uint8_t type; // = SpallEventType_Pad_Skip
uint32_t size;
} SpallPadSkipEvent;
#pragma pack(pop)
typedef struct SpallProfile SpallProfile;
// Important!: If you define your own callbacks, mark them SPALL_NOINSTRUMENT!
typedef bool (*SpallWriteCallback)(SpallProfile *self, const void *data, size_t length);
typedef bool (*SpallFlushCallback)(SpallProfile *self);
typedef void (*SpallCloseCallback)(SpallProfile *self);
struct SpallProfile {
double timestamp_unit;
bool is_json;
SpallWriteCallback write;
SpallFlushCallback flush;
SpallCloseCallback close;
void *data;
};
// Important!: If you are writing Begin/End events, then do NOT write
// events for the same PID + TID pair on different buffers!!!
typedef struct SpallBuffer {
void *data;
size_t length;
// Internal data - don't assign this
size_t head;
SpallProfile *ctx;
} SpallBuffer;
#ifdef __cplusplus
extern "C" {
#endif
#if defined(SPALL_BUFFER_PROFILING) && !defined(SPALL_BUFFER_PROFILING_GET_TIME)
#error "You must #define SPALL_BUFFER_PROFILING_GET_TIME() to profile buffer flushes."
#endif
SPALL_FN SPALL_FORCEINLINE void spall__buffer_profile(SpallProfile *ctx, SpallBuffer *wb, double spall_time_begin, double spall_time_end, const char *name, int name_len);
#ifdef SPALL_BUFFER_PROFILING
#define SPALL_BUFFER_PROFILE_BEGIN() double spall_time_begin = (SPALL_BUFFER_PROFILING_GET_TIME())
// Don't call this with anything other than a string literal
#define SPALL_BUFFER_PROFILE_END(name) spall__buffer_profile(ctx, wb, spall_time_begin, (SPALL_BUFFER_PROFILING_GET_TIME()), "" name "", sizeof("" name "") - 1)
#else
#define SPALL_BUFFER_PROFILE_BEGIN()
#define SPALL_BUFFER_PROFILE_END(name)
#endif
SPALL_FN SPALL_FORCEINLINE bool spall__file_write(SpallProfile *ctx, const void *p, size_t n) {
if (!ctx->data) return false;
#ifdef SPALL_DEBUG
if (feof((FILE *)ctx->data)) return false;
if (ferror((FILE *)ctx->data)) return false;
#endif
if (fwrite(p, n, 1, (FILE *)ctx->data) != 1) return false;
return true;
}
SPALL_FN bool spall__file_flush(SpallProfile *ctx) {
if (!ctx->data) return false;
if (fflush((FILE *)ctx->data)) return false;
return true;
}
SPALL_FN void spall__file_close(SpallProfile *ctx) {
if (!ctx->data) return;
if (ctx->is_json) {
#ifdef SPALL_DEBUG
if (!feof((FILE *)ctx->data) && !ferror((FILE *)ctx->data))
#endif
{
fseek((FILE *)ctx->data, -2, SEEK_CUR); // seek back to overwrite trailing comma
fwrite("\n]}\n", sizeof("\n]}\n") - 1, 1, (FILE *)ctx->data);
}
}
fflush((FILE *)ctx->data);
fclose((FILE *)ctx->data);
ctx->data = NULL;
}
SPALL_FN SPALL_FORCEINLINE bool spall__buffer_flush(SpallProfile *ctx, SpallBuffer *wb) {
// precon: wb
// precon: wb->data
// precon: wb->head <= wb->length
// precon: !ctx || ctx->write
#ifdef SPALL_DEBUG
if (wb->ctx != ctx) return false; // Buffer must be bound to this context (or to NULL)
#endif
if (wb->head && ctx) {
SPALL_BUFFER_PROFILE_BEGIN();
if (!ctx->write) return false;
if (ctx->write == spall__file_write) {
if (!spall__file_write(ctx, wb->data, wb->head)) return false;
} else {
if (!ctx->write(ctx, wb->data, wb->head)) return false;
}
SPALL_BUFFER_PROFILE_END("Buffer Flush");
}
wb->head = 0;
return true;
}
SPALL_FN SPALL_FORCEINLINE bool spall__buffer_write(SpallProfile *ctx, SpallBuffer *wb, void *p, size_t n) {
// precon: !wb || wb->head < wb->length
// precon: !ctx || ctx->write
if (!wb) return ctx->write && ctx->write(ctx, p, n);
#ifdef SPALL_DEBUG
if (wb->ctx != ctx) return false; // Buffer must be bound to this context (or to NULL)
#endif
if (wb->head + n > wb->length && !spall__buffer_flush(ctx, wb)) return false;
if (n > wb->length) {
SPALL_BUFFER_PROFILE_BEGIN();
if (!ctx->write || !ctx->write(ctx, p, n)) return false;
SPALL_BUFFER_PROFILE_END("Unbuffered Write");
return true;
}
memcpy((char *)wb->data + wb->head, p, n);
wb->head += n;
return true;
}
SPALL_FN bool spall_buffer_flush(SpallProfile *ctx, SpallBuffer *wb) {
#ifdef SPALL_DEBUG
if (!wb) return false;
if (!wb->data) return false;
#endif
if (!spall__buffer_flush(ctx, wb)) return false;
return true;
}
SPALL_FN bool spall_buffer_init(SpallProfile *ctx, SpallBuffer *wb) {
if (!spall_buffer_flush(NULL, wb)) return false;
wb->ctx = ctx;
return true;
}
SPALL_FN bool spall_buffer_quit(SpallProfile *ctx, SpallBuffer *wb) {
if (!spall_buffer_flush(ctx, wb)) return false;
wb->ctx = NULL;
return true;
}
SPALL_FN bool spall_buffer_abort(SpallBuffer *wb) {
if (!wb) return false;
wb->ctx = NULL;
if (!spall__buffer_flush(NULL, wb)) return false;
return true;
}
SPALL_FN size_t spall_build_header(void *buffer, size_t rem_size, double timestamp_unit) {
size_t header_size = sizeof(SpallHeader);
if (header_size > rem_size) {
return 0;
}
SpallHeader *header = (SpallHeader *)buffer;
header->magic_header = 0x0BADF00D;
header->version = 1;
header->timestamp_unit = timestamp_unit;
header->must_be_0 = 0;
return header_size;
}
SPALL_FN SPALL_FORCEINLINE size_t spall_build_begin(void *buffer, size_t rem_size, const char *name, signed long name_len, const char *args, signed long args_len, double when, uint32_t tid, uint32_t pid) {
SpallBeginEventMax *ev = (SpallBeginEventMax *)buffer;
uint8_t trunc_name_len = (uint8_t)SPALL_MIN(name_len, 255); // will be interpreted as truncated in the app (?)
uint8_t trunc_args_len = (uint8_t)SPALL_MIN(args_len, 255); // will be interpreted as truncated in the app (?)
size_t ev_size = sizeof(SpallBeginEvent) + trunc_name_len + trunc_args_len;
if (ev_size > rem_size) {
return 0;
}
ev->event.type = SpallEventType_Begin;
ev->event.category = 0;
ev->event.pid = pid;
ev->event.tid = tid;
ev->event.when = when;
ev->event.name_length = trunc_name_len;
ev->event.args_length = trunc_args_len;
memcpy(ev->name_bytes, name, trunc_name_len);
memcpy(ev->name_bytes + trunc_name_len, args, trunc_args_len);
return ev_size;
}
SPALL_FN SPALL_FORCEINLINE size_t spall_build_end(void *buffer, size_t rem_size, double when, uint32_t tid, uint32_t pid) {
size_t ev_size = sizeof(SpallEndEvent);
if (ev_size > rem_size) {
return 0;
}
SpallEndEvent *ev = (SpallEndEvent *)buffer;
ev->type = SpallEventType_End;
ev->pid = pid;
ev->tid = tid;
ev->when = when;
return ev_size;
}
SPALL_FN void spall_quit(SpallProfile *ctx) {
if (!ctx) return;
if (ctx->close) ctx->close(ctx);
memset(ctx, 0, sizeof(*ctx));
}
SPALL_FN SpallProfile spall_init_callbacks(double timestamp_unit,
SpallWriteCallback write,
SpallFlushCallback flush,
SpallCloseCallback close,
void *userdata,
bool is_json) {
SpallProfile ctx;
memset(&ctx, 0, sizeof(ctx));
if (timestamp_unit < 0) return ctx;
ctx.timestamp_unit = timestamp_unit;
ctx.is_json = is_json;
ctx.data = userdata;
ctx.write = write;
ctx.flush = flush;
ctx.close = close;
if (ctx.is_json) {
if (!ctx.write(&ctx, "{\"traceEvents\":[\n", sizeof("{\"traceEvents\":[\n") - 1)) { spall_quit(&ctx); return ctx; }
} else {
SpallHeader header;
size_t len = spall_build_header(&header, sizeof(header), timestamp_unit);
if (!ctx.write(&ctx, &header, len)) { spall_quit(&ctx); return ctx; }
}
return ctx;
}
SPALL_FN SpallProfile spall_init_file_ex(const char *filename, double timestamp_unit, bool is_json) {
SpallProfile ctx;
memset(&ctx, 0, sizeof(ctx));
if (!filename) return ctx;
ctx.data = fopen(filename, "wb"); // TODO: handle utf8 and long paths on windows
if (ctx.data) { // basically freopen() but we don't want to force users to lug along another macro define
fclose((FILE *)ctx.data);
ctx.data = fopen(filename, "ab");
}
if (!ctx.data) { spall_quit(&ctx); return ctx; }
ctx = spall_init_callbacks(timestamp_unit, spall__file_write, spall__file_flush, spall__file_close, ctx.data, is_json);
return ctx;
}
SPALL_FN SpallProfile spall_init_file (const char* filename, double timestamp_unit) { return spall_init_file_ex(filename, timestamp_unit, false); }
SPALL_FN SpallProfile spall_init_file_json(const char* filename, double timestamp_unit) { return spall_init_file_ex(filename, timestamp_unit, true); }
SPALL_FN bool spall_flush(SpallProfile *ctx) {
#ifdef SPALL_DEBUG
if (!ctx) return false;
#endif
if (!ctx->flush || !ctx->flush(ctx)) return false;
return true;
}
SPALL_FN SPALL_FORCEINLINE bool spall_buffer_begin_args(SpallProfile *ctx, SpallBuffer *wb, const char *name, signed long name_len, const char *args, signed long args_len, double when, uint32_t tid, uint32_t pid) {
#ifdef SPALL_DEBUG
if (!ctx) return false;
if (!name) return false;
if (name_len <= 0) return false;
if (!wb) return false;
#endif
if (ctx->is_json) {
char buf[1024];
int buf_len = snprintf(buf, sizeof(buf),
"{\"ph\":\"B\",\"ts\":%f,\"pid\":%u,\"tid\":%u,\"name\":\"%.*s\",\"args\":\"%.*s\"},\n",
when * ctx->timestamp_unit, pid, tid, (int)(uint8_t)name_len, name, (int)(uint8_t)args_len, args);
if (buf_len <= 0) return false;
if (buf_len >= sizeof(buf)) return false;
if (!spall__buffer_write(ctx, wb, buf, buf_len)) return false;
} else {
if ((wb->head + sizeof(SpallBeginEventMax)) > wb->length) {
if (!spall__buffer_flush(ctx, wb)) {
return false;
}
}
wb->head += spall_build_begin((char *)wb->data + wb->head, wb->length - wb->head, name, name_len, args, args_len, when, tid, pid);
}
return true;
}
SPALL_FN SPALL_FORCEINLINE bool spall_buffer_begin_ex(SpallProfile *ctx, SpallBuffer *wb, const char *name, signed long name_len, double when, uint32_t tid, uint32_t pid) {
return spall_buffer_begin_args(ctx, wb, name, name_len, "", 0, when, tid, pid);
}
SPALL_FN bool spall_buffer_begin(SpallProfile *ctx, SpallBuffer *wb, const char *name, signed long name_len, double when) {
return spall_buffer_begin_args(ctx, wb, name, name_len, "", 0, when, 0, 0);
}
SPALL_FN SPALL_FORCEINLINE bool spall_buffer_end_ex(SpallProfile *ctx, SpallBuffer *wb, double when, uint32_t tid, uint32_t pid) {
#ifdef SPALL_DEBUG
if (!ctx) return false;
if (!wb) return false;
#endif
if (ctx->is_json) {
char buf[512];
int buf_len = snprintf(buf, sizeof(buf),
"{\"ph\":\"E\",\"ts\":%f,\"pid\":%u,\"tid\":%u},\n",
when * ctx->timestamp_unit, pid, tid);
if (buf_len <= 0) return false;
if (buf_len >= sizeof(buf)) return false;
if (!spall__buffer_write(ctx, wb, buf, buf_len)) return false;
} else {
if ((wb->head + sizeof(SpallEndEvent)) > wb->length) {
if (!spall__buffer_flush(ctx, wb)) {
return false;
}
}
wb->head += spall_build_end((char *)wb->data + wb->head, wb->length - wb->head, when, tid, pid);
}
return true;
}
SPALL_FN bool spall_buffer_end(SpallProfile *ctx, SpallBuffer *wb, double when) { return spall_buffer_end_ex(ctx, wb, when, 0, 0); }
SPALL_FN SPALL_FORCEINLINE void spall__buffer_profile(SpallProfile *ctx, SpallBuffer *wb, double spall_time_begin, double spall_time_end, const char *name, int name_len) {
// precon: ctx
// precon: ctx->write
char temp_buffer_data[2048];
SpallBuffer temp_buffer = { temp_buffer_data, sizeof(temp_buffer_data) };
if (!spall_buffer_begin_ex(ctx, &temp_buffer, name, name_len, spall_time_begin, (uint32_t)(uintptr_t)wb->data, 4222222222)) return;
if (!spall_buffer_end_ex(ctx, &temp_buffer, spall_time_end, (uint32_t)(uintptr_t)wb->data, 4222222222)) return;
if (ctx->write) ctx->write(ctx, temp_buffer_data, temp_buffer.head);
}
#ifdef __cplusplus
}
#endif
#endif // SPALL_H
#ifndef INSTRUMENT_H
#define INSTRUMENT_H
#include <stdint.h>
SPALL_FN void spall_init_profile(char *filename);
SPALL_FN void spall_exit_profile(void);
SPALL_FN void spall_init_thread(uint32_t tid, size_t buffer_size, int64_t symbol_cache_size);
SPALL_FN void spall_exit_thread(void);
#endif // INSTRUMENT_H
#ifndef SPALL_NOIMPL
#ifndef INSTRUMENT_C
#define INSTRUMENT_C
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdint.h>
#include <dlfcn.h>
#include <time.h>
#include <pthread.h>
#include <unistd.h>
typedef struct {
char *str;
int len;
} Name;
typedef struct {
void *addr;
Name name;
} SymEntry;
typedef struct {
SymEntry *arr;
uint64_t len;
uint64_t cap;
} SymArr;
typedef struct {
int64_t *arr;
uint64_t len;
} HashArr;
typedef struct {
SymArr entries;
HashArr hashes;
} AddrHash;
static SpallProfile spall_ctx;
static _Thread_local SpallBuffer spall_buffer;
static _Thread_local AddrHash addr_map;
static _Thread_local uint32_t tid;
static _Thread_local bool spall_thread_running = false;
// we're not checking overflow here...Don't do stupid things with input sizes
SPALL_FN uint64_t next_pow2(uint64_t x) {
return 1 << (64 - __builtin_clzll(x - 1));
}
// This is not thread-safe... Use one per thread!
SPALL_FN AddrHash ah_init(int64_t size) {
AddrHash ah;
ah.entries.cap = size;
ah.entries.arr = calloc(sizeof(SymEntry), size);
ah.entries.len = 0;
ah.hashes.len = next_pow2(size);
ah.hashes.arr = malloc(sizeof(int64_t) * ah.hashes.len);
for (int64_t i = 0; i < ah.hashes.len; i++) {
ah.hashes.arr[i] = -1;
}
return ah;
}
SPALL_FN void ah_free(AddrHash *ah) {
free(ah->entries.arr);
free(ah->hashes.arr);
memset(ah, 0, sizeof(AddrHash));
}
// fibhash addresses
SPALL_FN int ah_hash(void *addr) {
return (int)(((uint32_t)(uintptr_t)addr) * 2654435769);
}
// Replace me with your platform's addr->name resolver if needed
SPALL_FN bool get_addr_name(void *addr, Name *name_ret) {
Dl_info info;
if (dladdr(addr, &info) != 0 && info.dli_sname != NULL) {
char *str = (char *)info.dli_sname;
*name_ret = (Name){.str = str, .len = strlen(str)};
return true;
}
return false;
}
SPALL_FN bool ah_get(AddrHash *ah, void *addr, Name *name_ret) {
int addr_hash = ah_hash(addr);
uint64_t hv = ((uint64_t)addr_hash) & (ah->hashes.len - 1);
for (uint64_t i = 0; i < ah->hashes.len; i++) {
uint64_t idx = (hv + i) & (ah->hashes.len - 1);
int64_t e_idx = ah->hashes.arr[idx];
if (e_idx == -1) {
Name name;
if (!get_addr_name(addr, &name)) {
// Failed to get a name for the address!
return false;
}
SymEntry entry = {.addr = addr, .name = name};
ah->hashes.arr[idx] = ah->entries.len;
ah->entries.arr[ah->entries.len] = entry;
ah->entries.len += 1;
*name_ret = name;
return true;
}
if ((uint64_t)ah->entries.arr[e_idx].addr == (uint64_t)addr) {
*name_ret = ah->entries.arr[e_idx].name;
return true;
}
}
// The symbol map is full, make the symbol map bigger!
return false;
}
#ifdef __linux__
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <sys/mman.h>
SPALL_FN uint64_t mul_u64_u32_shr(uint64_t cyc, uint32_t mult, uint32_t shift) {
__uint128_t x = cyc;
x *= mult;
x >>= shift;
return x;
}
SPALL_FN long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags) {
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
}
SPALL_FN uint64_t __rdtsc(void)
{
#if defined(__x86_64__)
uint32_t hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ((uint64_t)lo) | (((uint64_t)hi)<<32);
#elif defined(__aarch64__)
uint64_t x;
__asm__ volatile("mrs \t%0, cntvct_el0" : "=r"(x));
return x;
#else
#error "[ReadCPUTimer] Unsupported OS/compiler!"
#endif
return 0;
}
SPALL_FN double ReadOSTimer()
{
double result = 0.0;
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) == 0) {
result = (double)ts.tv_sec + (double)ts.tv_nsec / 1e9;
}
return result;
}
SPALL_FN uint64_t ProfilerEstimateClocksPerSecond(double estimate_time_seconds)
{
uint64_t start = __rdtsc();
double now = ReadOSTimer();
double then = now + estimate_time_seconds;
while (now < then)
{
now = ReadOSTimer();
}
uint64_t end = __rdtsc();
uint64_t clocks_per_second = (uint64_t)((end - start) / estimate_time_seconds);
return clocks_per_second;
}
SPALL_FN double get_rdtsc_multiplier() {
// NOTE(nick): hack to measure clocks per second (instead of reading this infromation directly from the OS)
uint64_t clocks_per_second = ProfilerEstimateClocksPerSecond(0.1);
return 1000000.0 / (double)clocks_per_second;
#if 0
struct perf_event_attr pe = {
.type = PERF_TYPE_HARDWARE,
.size = sizeof(struct perf_event_attr),
.config = PERF_COUNT_HW_INSTRUCTIONS,
.disabled = 1,
.exclude_kernel = 1,
.exclude_hv = 1
};
int fd = perf_event_open(&pe, 0, -1, -1, 0);
if (fd == -1) {
perror("perf_event_open failed");
return 1;
}
void *addr = mmap(NULL, 4*1024, PROT_READ, MAP_SHARED, fd, 0);
if (!addr) {
perror("mmap failed");
return 1;
}
struct perf_event_mmap_page *pc = addr;
if (pc->cap_user_time != 1) {
fprintf(stderr, "Perf system doesn't support user time\n");
return 1;
}
double nanos = (double)mul_u64_u32_shr(1000000, pc->time_mult, pc->time_shift);
return nanos / 1000000000;
#endif
}
#elif __APPLE__
#include <sys/types.h>
#include <sys/sysctl.h>
#include <assert.h>
SPALL_FN uint64_t __rdtsc(void)
{
#if defined(__aarch64__)
uint64_t x;
__asm__ volatile("mrs \t%0, cntvct_el0" : "=r"(x));
return x;
#else
#error "[ReadCPUTimer] Unsupported OS/compiler!"
#endif
return 0;
}
SPALL_FN double get_rdtsc_multiplier() {
uint64_t freq = 0;
size_t size = sizeof(freq);
int ret;
ret = sysctlbyname("machdep.tsc.frequency", &freq, &size, NULL, 0);
if (ret == 0 && freq > 0) {
return 1000000.0 / (double)freq;
}
ret = sysctlbyname("hw.cpufrequency", &freq, &size, NULL, 0);
if (ret == 0 && freq > 0) {
return 1000000.0 / (double)freq;
}
ret = sysctlbyname("hw.tbfrequency", &freq, &size, NULL, 0);
if (ret == 0 && freq > 0) {
return 1000000.0 / (double)freq;
}
fprintf(stderr, "Failed to get CPU frequency\n");
return 1.0;
// uint64_t cps = ProfilerEstimateClocksPerSecond(0.1);
// return 1000000.0 / (cps);
}
#endif
SPALL_FN void spall_init_thread(uint32_t _tid, size_t buffer_size, int64_t symbol_cache_size) {
uint8_t *buffer = (uint8_t *)malloc(buffer_size);
spall_buffer = (SpallBuffer){ .data = buffer, .length = buffer_size };
// removing initial page-fault bubbles to make the data a little more accurate, at the cost of thread spin-up time
memset(buffer, 1, buffer_size);
spall_buffer_init(&spall_ctx, &spall_buffer);
tid = _tid;
addr_map = ah_init(symbol_cache_size);
spall_thread_running = true;
}
SPALL_FN void spall_exit_thread() {
spall_thread_running = false;
ah_free(&addr_map);
spall_buffer_quit(&spall_ctx, &spall_buffer);
free(spall_buffer.data);
}
SPALL_FN void spall_init_profile(char *filename) {
spall_ctx = spall_init_file_json(filename, get_rdtsc_multiplier());
}
SPALL_FN void spall_exit_profile(void) {
spall_quit(&spall_ctx);
}
char not_found[] = "(unknown name)";
SPALL_NOINSTRUMENT void __cyg_profile_func_enter(void *fn, void *caller) {
if (!spall_thread_running) {
return;
}
Name name;
if (!ah_get(&addr_map, fn, &name)) {
name = (Name){.str = not_found, .len = sizeof(not_found) - 1};
}
spall_buffer_begin_ex(&spall_ctx, &spall_buffer, name.str, name.len, __rdtsc(), tid, 0);
}
SPALL_NOINSTRUMENT void __cyg_profile_func_exit(void *fn, void *caller) {
if (!spall_thread_running) {
return;
}
spall_buffer_end_ex(&spall_ctx, &spall_buffer, __rdtsc(), tid, 0);
}
#endif // INSTRUMENT_C
#endif // SPALL_NOIMPL
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment