Last active
April 2, 2025 22:04
-
-
Save nickav/fe5fde07038d3fb97206565402457e07 to your computer and use it in GitHub Desktop.
Single-file spall.h to capture profiling info for a C program in clang
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// NOTE(nick): modified version of spall.h to make it a single-file header library | |
// | |
// Usage: | |
#if 0 | |
// 1) Include this in your C source: | |
#include "spall.h" | |
// 2) Call the setup functions: | |
int main() { | |
spall_init_profile("profile.json"); | |
spall_init_thread(0, 10*1024*1024, 1000); | |
// run your code... | |
spall_exit_thread(); | |
spall_exit_profile(); | |
return 0; | |
} | |
// 3) In any new thread you spawn, call: | |
void mythread() { | |
spall_init_thread((uint32_t)(uint64_t)pthread_self(), 10 * 1024 * 1024, 1000); | |
// run your code... | |
spall_exit_thread(); | |
} | |
#endif | |
// | |
// 4) Compile with: | |
// > clang -ldl -lpthread -finstrument-functions -rdynamic -O3 sample_program.c -o instrument_test | |
// | |
// 5) View the output here: https://gravitymoth.com/spall/spall.html | |
// (or in any viewer that understands Google's JSON trace format) | |
// | |
// @See: https://github.com/colrdavidson/spall-web/blob/master/spall.h | |
// | |
// SPDX-FileCopyrightText: © 2023 Phillip Trudeau-Tavara <[email protected]> | |
// SPDX-License-Identifier: MIT | |
/* | |
TODO: Optional Helper APIs: | |
- Compression API: would require a mutexed lockable context (yuck...) | |
- Either using a ZIP library, a name cache + TIDPID cache, or both (but ZIP is likely more than enough!!!) | |
- begin()/end() writes compressed chunks to a caller-determined destination | |
- The destination can be the buffered-writing API or a custom user destination | |
- Ultimately need to take a lock with some granularity... can that be the caller's responsibility? | |
- Counter Event: should allow tracking arbitrary named values with a single event, for memory and frame profiling | |
- Ring-buffer API | |
spall_ring_init | |
spall_ring_emit_begin | |
spall_ring_emit_end | |
spall_ring_flush | |
*/ | |
#ifndef SPALL_H | |
#define SPALL_H | |
#if !defined(_MSC_VER) || defined(__clang__) | |
#define SPALL_NOINSTRUMENT __attribute__((no_instrument_function)) | |
#define SPALL_FORCEINLINE __attribute__((always_inline)) | |
#else | |
#define _CRT_SECURE_NO_WARNINGS | |
#define SPALL_NOINSTRUMENT // Can't noinstrument on MSVC! | |
#define SPALL_FORCEINLINE __forceinline | |
#endif | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <stdbool.h> | |
#define SPALL_FN static inline SPALL_NOINSTRUMENT | |
#define SPALL_MIN(a, b) (((a) < (b)) ? (a) : (b)) | |
#pragma pack(push, 1) | |
typedef struct SpallHeader { | |
uint64_t magic_header; // = 0x0BADF00D | |
uint64_t version; // = 1 | |
double timestamp_unit; | |
uint64_t must_be_0; | |
} SpallHeader; | |
enum { | |
SpallEventType_Invalid = 0, | |
SpallEventType_Custom_Data = 1, // Basic readers can skip this. | |
SpallEventType_StreamOver = 2, | |
SpallEventType_Begin = 3, | |
SpallEventType_End = 4, | |
SpallEventType_Instant = 5, | |
SpallEventType_Overwrite_Timestamp = 6, // Retroactively change timestamp units - useful for incrementally improving RDTSC frequency. | |
SpallEventType_Pad_Skip = 7, | |
}; | |
typedef struct SpallBeginEvent { | |
uint8_t type; // = SpallEventType_Begin | |
uint8_t category; | |
uint32_t pid; | |
uint32_t tid; | |
double when; | |
uint8_t name_length; | |
uint8_t args_length; | |
} SpallBeginEvent; | |
typedef struct SpallBeginEventMax { | |
SpallBeginEvent event; | |
char name_bytes[255]; | |
char args_bytes[255]; | |
} SpallBeginEventMax; | |
typedef struct SpallEndEvent { | |
uint8_t type; // = SpallEventType_End | |
uint32_t pid; | |
uint32_t tid; | |
double when; | |
} SpallEndEvent; | |
typedef struct SpallPadSkipEvent { | |
uint8_t type; // = SpallEventType_Pad_Skip | |
uint32_t size; | |
} SpallPadSkipEvent; | |
#pragma pack(pop) | |
typedef struct SpallProfile SpallProfile; | |
// Important!: If you define your own callbacks, mark them SPALL_NOINSTRUMENT! | |
typedef bool (*SpallWriteCallback)(SpallProfile *self, const void *data, size_t length); | |
typedef bool (*SpallFlushCallback)(SpallProfile *self); | |
typedef void (*SpallCloseCallback)(SpallProfile *self); | |
struct SpallProfile { | |
double timestamp_unit; | |
bool is_json; | |
SpallWriteCallback write; | |
SpallFlushCallback flush; | |
SpallCloseCallback close; | |
void *data; | |
}; | |
// Important!: If you are writing Begin/End events, then do NOT write | |
// events for the same PID + TID pair on different buffers!!! | |
typedef struct SpallBuffer { | |
void *data; | |
size_t length; | |
// Internal data - don't assign this | |
size_t head; | |
SpallProfile *ctx; | |
} SpallBuffer; | |
#ifdef __cplusplus | |
extern "C" { | |
#endif | |
#if defined(SPALL_BUFFER_PROFILING) && !defined(SPALL_BUFFER_PROFILING_GET_TIME) | |
#error "You must #define SPALL_BUFFER_PROFILING_GET_TIME() to profile buffer flushes." | |
#endif | |
SPALL_FN SPALL_FORCEINLINE void spall__buffer_profile(SpallProfile *ctx, SpallBuffer *wb, double spall_time_begin, double spall_time_end, const char *name, int name_len); | |
#ifdef SPALL_BUFFER_PROFILING | |
#define SPALL_BUFFER_PROFILE_BEGIN() double spall_time_begin = (SPALL_BUFFER_PROFILING_GET_TIME()) | |
// Don't call this with anything other than a string literal | |
#define SPALL_BUFFER_PROFILE_END(name) spall__buffer_profile(ctx, wb, spall_time_begin, (SPALL_BUFFER_PROFILING_GET_TIME()), "" name "", sizeof("" name "") - 1) | |
#else | |
#define SPALL_BUFFER_PROFILE_BEGIN() | |
#define SPALL_BUFFER_PROFILE_END(name) | |
#endif | |
SPALL_FN SPALL_FORCEINLINE bool spall__file_write(SpallProfile *ctx, const void *p, size_t n) { | |
if (!ctx->data) return false; | |
#ifdef SPALL_DEBUG | |
if (feof((FILE *)ctx->data)) return false; | |
if (ferror((FILE *)ctx->data)) return false; | |
#endif | |
if (fwrite(p, n, 1, (FILE *)ctx->data) != 1) return false; | |
return true; | |
} | |
SPALL_FN bool spall__file_flush(SpallProfile *ctx) { | |
if (!ctx->data) return false; | |
if (fflush((FILE *)ctx->data)) return false; | |
return true; | |
} | |
SPALL_FN void spall__file_close(SpallProfile *ctx) { | |
if (!ctx->data) return; | |
if (ctx->is_json) { | |
#ifdef SPALL_DEBUG | |
if (!feof((FILE *)ctx->data) && !ferror((FILE *)ctx->data)) | |
#endif | |
{ | |
fseek((FILE *)ctx->data, -2, SEEK_CUR); // seek back to overwrite trailing comma | |
fwrite("\n]}\n", sizeof("\n]}\n") - 1, 1, (FILE *)ctx->data); | |
} | |
} | |
fflush((FILE *)ctx->data); | |
fclose((FILE *)ctx->data); | |
ctx->data = NULL; | |
} | |
SPALL_FN SPALL_FORCEINLINE bool spall__buffer_flush(SpallProfile *ctx, SpallBuffer *wb) { | |
// precon: wb | |
// precon: wb->data | |
// precon: wb->head <= wb->length | |
// precon: !ctx || ctx->write | |
#ifdef SPALL_DEBUG | |
if (wb->ctx != ctx) return false; // Buffer must be bound to this context (or to NULL) | |
#endif | |
if (wb->head && ctx) { | |
SPALL_BUFFER_PROFILE_BEGIN(); | |
if (!ctx->write) return false; | |
if (ctx->write == spall__file_write) { | |
if (!spall__file_write(ctx, wb->data, wb->head)) return false; | |
} else { | |
if (!ctx->write(ctx, wb->data, wb->head)) return false; | |
} | |
SPALL_BUFFER_PROFILE_END("Buffer Flush"); | |
} | |
wb->head = 0; | |
return true; | |
} | |
SPALL_FN SPALL_FORCEINLINE bool spall__buffer_write(SpallProfile *ctx, SpallBuffer *wb, void *p, size_t n) { | |
// precon: !wb || wb->head < wb->length | |
// precon: !ctx || ctx->write | |
if (!wb) return ctx->write && ctx->write(ctx, p, n); | |
#ifdef SPALL_DEBUG | |
if (wb->ctx != ctx) return false; // Buffer must be bound to this context (or to NULL) | |
#endif | |
if (wb->head + n > wb->length && !spall__buffer_flush(ctx, wb)) return false; | |
if (n > wb->length) { | |
SPALL_BUFFER_PROFILE_BEGIN(); | |
if (!ctx->write || !ctx->write(ctx, p, n)) return false; | |
SPALL_BUFFER_PROFILE_END("Unbuffered Write"); | |
return true; | |
} | |
memcpy((char *)wb->data + wb->head, p, n); | |
wb->head += n; | |
return true; | |
} | |
SPALL_FN bool spall_buffer_flush(SpallProfile *ctx, SpallBuffer *wb) { | |
#ifdef SPALL_DEBUG | |
if (!wb) return false; | |
if (!wb->data) return false; | |
#endif | |
if (!spall__buffer_flush(ctx, wb)) return false; | |
return true; | |
} | |
SPALL_FN bool spall_buffer_init(SpallProfile *ctx, SpallBuffer *wb) { | |
if (!spall_buffer_flush(NULL, wb)) return false; | |
wb->ctx = ctx; | |
return true; | |
} | |
SPALL_FN bool spall_buffer_quit(SpallProfile *ctx, SpallBuffer *wb) { | |
if (!spall_buffer_flush(ctx, wb)) return false; | |
wb->ctx = NULL; | |
return true; | |
} | |
SPALL_FN bool spall_buffer_abort(SpallBuffer *wb) { | |
if (!wb) return false; | |
wb->ctx = NULL; | |
if (!spall__buffer_flush(NULL, wb)) return false; | |
return true; | |
} | |
SPALL_FN size_t spall_build_header(void *buffer, size_t rem_size, double timestamp_unit) { | |
size_t header_size = sizeof(SpallHeader); | |
if (header_size > rem_size) { | |
return 0; | |
} | |
SpallHeader *header = (SpallHeader *)buffer; | |
header->magic_header = 0x0BADF00D; | |
header->version = 1; | |
header->timestamp_unit = timestamp_unit; | |
header->must_be_0 = 0; | |
return header_size; | |
} | |
SPALL_FN SPALL_FORCEINLINE size_t spall_build_begin(void *buffer, size_t rem_size, const char *name, signed long name_len, const char *args, signed long args_len, double when, uint32_t tid, uint32_t pid) { | |
SpallBeginEventMax *ev = (SpallBeginEventMax *)buffer; | |
uint8_t trunc_name_len = (uint8_t)SPALL_MIN(name_len, 255); // will be interpreted as truncated in the app (?) | |
uint8_t trunc_args_len = (uint8_t)SPALL_MIN(args_len, 255); // will be interpreted as truncated in the app (?) | |
size_t ev_size = sizeof(SpallBeginEvent) + trunc_name_len + trunc_args_len; | |
if (ev_size > rem_size) { | |
return 0; | |
} | |
ev->event.type = SpallEventType_Begin; | |
ev->event.category = 0; | |
ev->event.pid = pid; | |
ev->event.tid = tid; | |
ev->event.when = when; | |
ev->event.name_length = trunc_name_len; | |
ev->event.args_length = trunc_args_len; | |
memcpy(ev->name_bytes, name, trunc_name_len); | |
memcpy(ev->name_bytes + trunc_name_len, args, trunc_args_len); | |
return ev_size; | |
} | |
SPALL_FN SPALL_FORCEINLINE size_t spall_build_end(void *buffer, size_t rem_size, double when, uint32_t tid, uint32_t pid) { | |
size_t ev_size = sizeof(SpallEndEvent); | |
if (ev_size > rem_size) { | |
return 0; | |
} | |
SpallEndEvent *ev = (SpallEndEvent *)buffer; | |
ev->type = SpallEventType_End; | |
ev->pid = pid; | |
ev->tid = tid; | |
ev->when = when; | |
return ev_size; | |
} | |
SPALL_FN void spall_quit(SpallProfile *ctx) { | |
if (!ctx) return; | |
if (ctx->close) ctx->close(ctx); | |
memset(ctx, 0, sizeof(*ctx)); | |
} | |
SPALL_FN SpallProfile spall_init_callbacks(double timestamp_unit, | |
SpallWriteCallback write, | |
SpallFlushCallback flush, | |
SpallCloseCallback close, | |
void *userdata, | |
bool is_json) { | |
SpallProfile ctx; | |
memset(&ctx, 0, sizeof(ctx)); | |
if (timestamp_unit < 0) return ctx; | |
ctx.timestamp_unit = timestamp_unit; | |
ctx.is_json = is_json; | |
ctx.data = userdata; | |
ctx.write = write; | |
ctx.flush = flush; | |
ctx.close = close; | |
if (ctx.is_json) { | |
if (!ctx.write(&ctx, "{\"traceEvents\":[\n", sizeof("{\"traceEvents\":[\n") - 1)) { spall_quit(&ctx); return ctx; } | |
} else { | |
SpallHeader header; | |
size_t len = spall_build_header(&header, sizeof(header), timestamp_unit); | |
if (!ctx.write(&ctx, &header, len)) { spall_quit(&ctx); return ctx; } | |
} | |
return ctx; | |
} | |
SPALL_FN SpallProfile spall_init_file_ex(const char *filename, double timestamp_unit, bool is_json) { | |
SpallProfile ctx; | |
memset(&ctx, 0, sizeof(ctx)); | |
if (!filename) return ctx; | |
ctx.data = fopen(filename, "wb"); // TODO: handle utf8 and long paths on windows | |
if (ctx.data) { // basically freopen() but we don't want to force users to lug along another macro define | |
fclose((FILE *)ctx.data); | |
ctx.data = fopen(filename, "ab"); | |
} | |
if (!ctx.data) { spall_quit(&ctx); return ctx; } | |
ctx = spall_init_callbacks(timestamp_unit, spall__file_write, spall__file_flush, spall__file_close, ctx.data, is_json); | |
return ctx; | |
} | |
SPALL_FN SpallProfile spall_init_file (const char* filename, double timestamp_unit) { return spall_init_file_ex(filename, timestamp_unit, false); } | |
SPALL_FN SpallProfile spall_init_file_json(const char* filename, double timestamp_unit) { return spall_init_file_ex(filename, timestamp_unit, true); } | |
SPALL_FN bool spall_flush(SpallProfile *ctx) { | |
#ifdef SPALL_DEBUG | |
if (!ctx) return false; | |
#endif | |
if (!ctx->flush || !ctx->flush(ctx)) return false; | |
return true; | |
} | |
SPALL_FN SPALL_FORCEINLINE bool spall_buffer_begin_args(SpallProfile *ctx, SpallBuffer *wb, const char *name, signed long name_len, const char *args, signed long args_len, double when, uint32_t tid, uint32_t pid) { | |
#ifdef SPALL_DEBUG | |
if (!ctx) return false; | |
if (!name) return false; | |
if (name_len <= 0) return false; | |
if (!wb) return false; | |
#endif | |
if (ctx->is_json) { | |
char buf[1024]; | |
int buf_len = snprintf(buf, sizeof(buf), | |
"{\"ph\":\"B\",\"ts\":%f,\"pid\":%u,\"tid\":%u,\"name\":\"%.*s\",\"args\":\"%.*s\"},\n", | |
when * ctx->timestamp_unit, pid, tid, (int)(uint8_t)name_len, name, (int)(uint8_t)args_len, args); | |
if (buf_len <= 0) return false; | |
if (buf_len >= sizeof(buf)) return false; | |
if (!spall__buffer_write(ctx, wb, buf, buf_len)) return false; | |
} else { | |
if ((wb->head + sizeof(SpallBeginEventMax)) > wb->length) { | |
if (!spall__buffer_flush(ctx, wb)) { | |
return false; | |
} | |
} | |
wb->head += spall_build_begin((char *)wb->data + wb->head, wb->length - wb->head, name, name_len, args, args_len, when, tid, pid); | |
} | |
return true; | |
} | |
SPALL_FN SPALL_FORCEINLINE bool spall_buffer_begin_ex(SpallProfile *ctx, SpallBuffer *wb, const char *name, signed long name_len, double when, uint32_t tid, uint32_t pid) { | |
return spall_buffer_begin_args(ctx, wb, name, name_len, "", 0, when, tid, pid); | |
} | |
SPALL_FN bool spall_buffer_begin(SpallProfile *ctx, SpallBuffer *wb, const char *name, signed long name_len, double when) { | |
return spall_buffer_begin_args(ctx, wb, name, name_len, "", 0, when, 0, 0); | |
} | |
SPALL_FN SPALL_FORCEINLINE bool spall_buffer_end_ex(SpallProfile *ctx, SpallBuffer *wb, double when, uint32_t tid, uint32_t pid) { | |
#ifdef SPALL_DEBUG | |
if (!ctx) return false; | |
if (!wb) return false; | |
#endif | |
if (ctx->is_json) { | |
char buf[512]; | |
int buf_len = snprintf(buf, sizeof(buf), | |
"{\"ph\":\"E\",\"ts\":%f,\"pid\":%u,\"tid\":%u},\n", | |
when * ctx->timestamp_unit, pid, tid); | |
if (buf_len <= 0) return false; | |
if (buf_len >= sizeof(buf)) return false; | |
if (!spall__buffer_write(ctx, wb, buf, buf_len)) return false; | |
} else { | |
if ((wb->head + sizeof(SpallEndEvent)) > wb->length) { | |
if (!spall__buffer_flush(ctx, wb)) { | |
return false; | |
} | |
} | |
wb->head += spall_build_end((char *)wb->data + wb->head, wb->length - wb->head, when, tid, pid); | |
} | |
return true; | |
} | |
SPALL_FN bool spall_buffer_end(SpallProfile *ctx, SpallBuffer *wb, double when) { return spall_buffer_end_ex(ctx, wb, when, 0, 0); } | |
SPALL_FN SPALL_FORCEINLINE void spall__buffer_profile(SpallProfile *ctx, SpallBuffer *wb, double spall_time_begin, double spall_time_end, const char *name, int name_len) { | |
// precon: ctx | |
// precon: ctx->write | |
char temp_buffer_data[2048]; | |
SpallBuffer temp_buffer = { temp_buffer_data, sizeof(temp_buffer_data) }; | |
if (!spall_buffer_begin_ex(ctx, &temp_buffer, name, name_len, spall_time_begin, (uint32_t)(uintptr_t)wb->data, 4222222222)) return; | |
if (!spall_buffer_end_ex(ctx, &temp_buffer, spall_time_end, (uint32_t)(uintptr_t)wb->data, 4222222222)) return; | |
if (ctx->write) ctx->write(ctx, temp_buffer_data, temp_buffer.head); | |
} | |
#ifdef __cplusplus | |
} | |
#endif | |
#endif // SPALL_H | |
#ifndef INSTRUMENT_H | |
#define INSTRUMENT_H | |
#include <stdint.h> | |
SPALL_FN void spall_init_profile(char *filename); | |
SPALL_FN void spall_exit_profile(void); | |
SPALL_FN void spall_init_thread(uint32_t tid, size_t buffer_size, int64_t symbol_cache_size); | |
SPALL_FN void spall_exit_thread(void); | |
#endif // INSTRUMENT_H | |
#ifndef SPALL_NOIMPL | |
#ifndef INSTRUMENT_C | |
#define INSTRUMENT_C | |
#define _GNU_SOURCE | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <dlfcn.h> | |
#include <time.h> | |
#include <pthread.h> | |
#include <unistd.h> | |
typedef struct { | |
char *str; | |
int len; | |
} Name; | |
typedef struct { | |
void *addr; | |
Name name; | |
} SymEntry; | |
typedef struct { | |
SymEntry *arr; | |
uint64_t len; | |
uint64_t cap; | |
} SymArr; | |
typedef struct { | |
int64_t *arr; | |
uint64_t len; | |
} HashArr; | |
typedef struct { | |
SymArr entries; | |
HashArr hashes; | |
} AddrHash; | |
static SpallProfile spall_ctx; | |
static _Thread_local SpallBuffer spall_buffer; | |
static _Thread_local AddrHash addr_map; | |
static _Thread_local uint32_t tid; | |
static _Thread_local bool spall_thread_running = false; | |
// we're not checking overflow here...Don't do stupid things with input sizes | |
SPALL_FN uint64_t next_pow2(uint64_t x) { | |
return 1 << (64 - __builtin_clzll(x - 1)); | |
} | |
// This is not thread-safe... Use one per thread! | |
SPALL_FN AddrHash ah_init(int64_t size) { | |
AddrHash ah; | |
ah.entries.cap = size; | |
ah.entries.arr = calloc(sizeof(SymEntry), size); | |
ah.entries.len = 0; | |
ah.hashes.len = next_pow2(size); | |
ah.hashes.arr = malloc(sizeof(int64_t) * ah.hashes.len); | |
for (int64_t i = 0; i < ah.hashes.len; i++) { | |
ah.hashes.arr[i] = -1; | |
} | |
return ah; | |
} | |
SPALL_FN void ah_free(AddrHash *ah) { | |
free(ah->entries.arr); | |
free(ah->hashes.arr); | |
memset(ah, 0, sizeof(AddrHash)); | |
} | |
// fibhash addresses | |
SPALL_FN int ah_hash(void *addr) { | |
return (int)(((uint32_t)(uintptr_t)addr) * 2654435769); | |
} | |
// Replace me with your platform's addr->name resolver if needed | |
SPALL_FN bool get_addr_name(void *addr, Name *name_ret) { | |
Dl_info info; | |
if (dladdr(addr, &info) != 0 && info.dli_sname != NULL) { | |
char *str = (char *)info.dli_sname; | |
*name_ret = (Name){.str = str, .len = strlen(str)}; | |
return true; | |
} | |
return false; | |
} | |
SPALL_FN bool ah_get(AddrHash *ah, void *addr, Name *name_ret) { | |
int addr_hash = ah_hash(addr); | |
uint64_t hv = ((uint64_t)addr_hash) & (ah->hashes.len - 1); | |
for (uint64_t i = 0; i < ah->hashes.len; i++) { | |
uint64_t idx = (hv + i) & (ah->hashes.len - 1); | |
int64_t e_idx = ah->hashes.arr[idx]; | |
if (e_idx == -1) { | |
Name name; | |
if (!get_addr_name(addr, &name)) { | |
// Failed to get a name for the address! | |
return false; | |
} | |
SymEntry entry = {.addr = addr, .name = name}; | |
ah->hashes.arr[idx] = ah->entries.len; | |
ah->entries.arr[ah->entries.len] = entry; | |
ah->entries.len += 1; | |
*name_ret = name; | |
return true; | |
} | |
if ((uint64_t)ah->entries.arr[e_idx].addr == (uint64_t)addr) { | |
*name_ret = ah->entries.arr[e_idx].name; | |
return true; | |
} | |
} | |
// The symbol map is full, make the symbol map bigger! | |
return false; | |
} | |
#ifdef __linux__ | |
#include <linux/perf_event.h> | |
#include <asm/unistd.h> | |
#include <sys/mman.h> | |
SPALL_FN uint64_t mul_u64_u32_shr(uint64_t cyc, uint32_t mult, uint32_t shift) { | |
__uint128_t x = cyc; | |
x *= mult; | |
x >>= shift; | |
return x; | |
} | |
SPALL_FN long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, | |
int cpu, int group_fd, unsigned long flags) { | |
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); | |
} | |
SPALL_FN uint64_t __rdtsc(void) | |
{ | |
#if defined(__x86_64__) | |
uint32_t hi, lo; | |
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); | |
return ((uint64_t)lo) | (((uint64_t)hi)<<32); | |
#elif defined(__aarch64__) | |
uint64_t x; | |
__asm__ volatile("mrs \t%0, cntvct_el0" : "=r"(x)); | |
return x; | |
#else | |
#error "[ReadCPUTimer] Unsupported OS/compiler!" | |
#endif | |
return 0; | |
} | |
SPALL_FN double ReadOSTimer() | |
{ | |
double result = 0.0; | |
struct timespec ts; | |
if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts) == 0) { | |
result = (double)ts.tv_sec + (double)ts.tv_nsec / 1e9; | |
} | |
return result; | |
} | |
SPALL_FN uint64_t ProfilerEstimateClocksPerSecond(double estimate_time_seconds) | |
{ | |
uint64_t start = __rdtsc(); | |
double now = ReadOSTimer(); | |
double then = now + estimate_time_seconds; | |
while (now < then) | |
{ | |
now = ReadOSTimer(); | |
} | |
uint64_t end = __rdtsc(); | |
uint64_t clocks_per_second = (uint64_t)((end - start) / estimate_time_seconds); | |
return clocks_per_second; | |
} | |
SPALL_FN double get_rdtsc_multiplier() { | |
// NOTE(nick): hack to measure clocks per second (instead of reading this infromation directly from the OS) | |
uint64_t clocks_per_second = ProfilerEstimateClocksPerSecond(0.1); | |
return 1000000.0 / (double)clocks_per_second; | |
#if 0 | |
struct perf_event_attr pe = { | |
.type = PERF_TYPE_HARDWARE, | |
.size = sizeof(struct perf_event_attr), | |
.config = PERF_COUNT_HW_INSTRUCTIONS, | |
.disabled = 1, | |
.exclude_kernel = 1, | |
.exclude_hv = 1 | |
}; | |
int fd = perf_event_open(&pe, 0, -1, -1, 0); | |
if (fd == -1) { | |
perror("perf_event_open failed"); | |
return 1; | |
} | |
void *addr = mmap(NULL, 4*1024, PROT_READ, MAP_SHARED, fd, 0); | |
if (!addr) { | |
perror("mmap failed"); | |
return 1; | |
} | |
struct perf_event_mmap_page *pc = addr; | |
if (pc->cap_user_time != 1) { | |
fprintf(stderr, "Perf system doesn't support user time\n"); | |
return 1; | |
} | |
double nanos = (double)mul_u64_u32_shr(1000000, pc->time_mult, pc->time_shift); | |
return nanos / 1000000000; | |
#endif | |
} | |
#elif __APPLE__ | |
#include <sys/types.h> | |
#include <sys/sysctl.h> | |
#include <assert.h> | |
SPALL_FN uint64_t __rdtsc(void) | |
{ | |
#if defined(__aarch64__) | |
uint64_t x; | |
__asm__ volatile("mrs \t%0, cntvct_el0" : "=r"(x)); | |
return x; | |
#else | |
#error "[ReadCPUTimer] Unsupported OS/compiler!" | |
#endif | |
return 0; | |
} | |
SPALL_FN double get_rdtsc_multiplier() { | |
uint64_t freq = 0; | |
size_t size = sizeof(freq); | |
int ret; | |
ret = sysctlbyname("machdep.tsc.frequency", &freq, &size, NULL, 0); | |
if (ret == 0 && freq > 0) { | |
return 1000000.0 / (double)freq; | |
} | |
ret = sysctlbyname("hw.cpufrequency", &freq, &size, NULL, 0); | |
if (ret == 0 && freq > 0) { | |
return 1000000.0 / (double)freq; | |
} | |
ret = sysctlbyname("hw.tbfrequency", &freq, &size, NULL, 0); | |
if (ret == 0 && freq > 0) { | |
return 1000000.0 / (double)freq; | |
} | |
fprintf(stderr, "Failed to get CPU frequency\n"); | |
return 1.0; | |
// uint64_t cps = ProfilerEstimateClocksPerSecond(0.1); | |
// return 1000000.0 / (cps); | |
} | |
#endif | |
SPALL_FN void spall_init_thread(uint32_t _tid, size_t buffer_size, int64_t symbol_cache_size) { | |
uint8_t *buffer = (uint8_t *)malloc(buffer_size); | |
spall_buffer = (SpallBuffer){ .data = buffer, .length = buffer_size }; | |
// removing initial page-fault bubbles to make the data a little more accurate, at the cost of thread spin-up time | |
memset(buffer, 1, buffer_size); | |
spall_buffer_init(&spall_ctx, &spall_buffer); | |
tid = _tid; | |
addr_map = ah_init(symbol_cache_size); | |
spall_thread_running = true; | |
} | |
SPALL_FN void spall_exit_thread() { | |
spall_thread_running = false; | |
ah_free(&addr_map); | |
spall_buffer_quit(&spall_ctx, &spall_buffer); | |
free(spall_buffer.data); | |
} | |
SPALL_FN void spall_init_profile(char *filename) { | |
spall_ctx = spall_init_file_json(filename, get_rdtsc_multiplier()); | |
} | |
SPALL_FN void spall_exit_profile(void) { | |
spall_quit(&spall_ctx); | |
} | |
char not_found[] = "(unknown name)"; | |
SPALL_NOINSTRUMENT void __cyg_profile_func_enter(void *fn, void *caller) { | |
if (!spall_thread_running) { | |
return; | |
} | |
Name name; | |
if (!ah_get(&addr_map, fn, &name)) { | |
name = (Name){.str = not_found, .len = sizeof(not_found) - 1}; | |
} | |
spall_buffer_begin_ex(&spall_ctx, &spall_buffer, name.str, name.len, __rdtsc(), tid, 0); | |
} | |
SPALL_NOINSTRUMENT void __cyg_profile_func_exit(void *fn, void *caller) { | |
if (!spall_thread_running) { | |
return; | |
} | |
spall_buffer_end_ex(&spall_ctx, &spall_buffer, __rdtsc(), tid, 0); | |
} | |
#endif // INSTRUMENT_C | |
#endif // SPALL_NOIMPL |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment