Last active
October 18, 2022 09:01
-
-
Save pkhuong/1ce34e33c6df4b9be3bc9beb22415a47 to your computer and use it in GitHub Desktop.
minimal BTS tracing wrapper for linux perf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define RUN_ME /* | |
exec cc -O2 -W -Wall -std=c99 -shared $0 -o "$(basename $0 .c).so" -fPIC | |
*/ | |
/* | |
* Copyright 2019 Paul Khuong | |
* SPDX-License-Identifier: BSD-2-Clause | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
* are met: | |
* | |
* 1. Redistributions of source code must retain the above copyright | |
* notice, this list of conditions and the following disclaimer. | |
* | |
* 2. Redistributions in binary form must reproduce the above | |
* copyright notice, this list of conditions and the following | |
* disclaimer in the documentation and/or other materials provided | |
* with the distribution. | |
* | |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | |
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | |
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, | |
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
* OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
#define _GNU_SOURCE | |
#include <errno.h> | |
#include <linux/perf_event.h> | |
#include <pthread.h> | |
#include <stddef.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/ioctl.h> | |
#include <sys/mman.h> | |
#include <sys/syscall.h> | |
#include <sys/types.h> | |
#include <time.h> | |
#include <unistd.h> | |
/* | |
* Public interface. | |
*/ | |
struct bts_aux_record { | |
uint64_t from; /* from and to are instruction addresses. */ | |
uint64_t to; | |
uint64_t flags; /* 0x10 = predicted, in theory, maybe. */ | |
}; | |
/* | |
* This function must be called with the value in | |
* `/sys/bus/event_source/devices/intel_bts/type` before calling | |
* `bts_setup`. | |
*/ | |
void bts_init(int detected_bts_perf_type); | |
/* | |
* Cleans up any BTS state for the current thread. | |
*/ | |
void bts_teardown(void); | |
/* | |
* Overwrites or creates the BTS state for the current thread, with | |
* an auxiliary (tracing) buffer of `aux_size` bytes. `aux_bytes` | |
* must be a power of two and must be at least one page. | |
* | |
* Returns 0 on success, negative on failure. | |
*/ | |
int bts_setup(size_t aux_size); | |
/* | |
* Enables branch tracing for the calling thread, which must have | |
* a BTS state (i.e., only call `bts_start` after `bts_setup`). | |
* | |
* Returns 0 on success, negative on failure. | |
*/ | |
int bts_start(void); | |
/* | |
* Stops branch tracing for the current thread, and returns a | |
* temporary (thread-local) buffer of the BTS records since | |
* the last call to `bts_start`. | |
* | |
* The first argument is overwritten with the number of valid | |
* records in the return value, or a negative count on error. | |
* | |
* When `(*OUT_num_elements + 2) * sizeof(struct bts_aux_record)` | |
* exceeds the `aux_size` passed to `bts_setup`, tracing may have | |
* exhausted the buffer space and stopped early. This trace | |
* truncation does not affect the execution of the traced program. | |
*/ | |
const struct bts_aux_record *bts_stop(ssize_t *OUT_num_elements); | |
/* | |
* Actual implementation follows. | |
*/ | |
/* Thread-local BTS state. */ | |
struct bts_recording_state { | |
int bts_fd; | |
struct perf_event_mmap_page *perf; | |
void *aux_buffer; | |
/* | |
* This buffer has the same size as aux_buffer, and holds the | |
* aux records in linear order. | |
*/ | |
struct bts_aux_record *linear_result; | |
}; | |
#ifndef PAGE_SIZE | |
#define PAGE_SIZE 4096ULL | |
#endif | |
/* Map in the perf control page, and no data ring buffer page. */ | |
static const size_t perf_page_size = PAGE_SIZE; | |
static int bts_perf_type = -1; | |
static __thread struct bts_recording_state thread_state = { | |
.bts_fd = -1, | |
}; | |
static pid_t bts_gettid(void) | |
{ | |
return syscall(__NR_gettid); | |
} | |
static long perf_event_open(struct perf_event_attr* hw_event, pid_t pid, int cpu, | |
int group_fd, unsigned long flags) | |
{ | |
return syscall(__NR_perf_event_open, hw_event, (uintptr_t)pid, (uintptr_t)cpu, | |
(uintptr_t)group_fd, (uintptr_t)flags); | |
} | |
/* Opens a BTS perf fd for `tid`. */ | |
static int bts_open_fd(pid_t tid) | |
{ | |
struct perf_event_attr pe = { | |
.size = sizeof(pe), | |
.exclude_kernel = 1, | |
.disabled = 1, | |
.exclude_hv = 1, | |
.type = bts_perf_type, | |
}; | |
return perf_event_open(&pe, tid, /*cpu=*/-1, | |
/*group_fd=*/-1, PERF_FLAG_FD_CLOEXEC); | |
} | |
void bts_init(int detected_bts_perf_type) | |
{ | |
bts_perf_type = detected_bts_perf_type; | |
return; | |
} | |
void bts_teardown(void) | |
{ | |
free(thread_state.linear_result); | |
thread_state.linear_result = NULL; | |
if (thread_state.aux_buffer != NULL) { | |
munmap(thread_state.aux_buffer, thread_state.perf->aux_size); | |
} | |
thread_state.aux_buffer = NULL; | |
if (thread_state.perf != NULL) { | |
munmap(thread_state.perf, perf_page_size); | |
} | |
thread_state.perf = NULL; | |
if (thread_state.bts_fd >= 0) { | |
close(thread_state.bts_fd); | |
} | |
thread_state.bts_fd = -1; | |
return; | |
} | |
int bts_setup(size_t aux_size) | |
{ | |
int r; | |
/* Clean up any current state. */ | |
bts_teardown(); | |
/* Get a BTS perf fd for this thread. */ | |
{ | |
int fd = bts_open_fd(bts_gettid()); | |
if (fd < 0) { | |
perror("perf_open_fd"); | |
r = fd; | |
goto fail; | |
} | |
thread_state.bts_fd = fd; | |
} | |
/* Map in the perf mmap control block. */ | |
{ | |
void *mapped = mmap(NULL, perf_page_size, PROT_READ | PROT_WRITE, | |
MAP_SHARED, thread_state.bts_fd, 0); | |
if (mapped == MAP_FAILED) { | |
perror("perf"); | |
r = -1; | |
goto fail; | |
} | |
thread_state.perf = mapped; | |
/* Populate the aux metadata fields so we can mmap the aux buffer in. */ | |
thread_state.perf->aux_offset = | |
thread_state.perf->data_offset + thread_state.perf->data_size; | |
thread_state.perf->aux_size = aux_size; | |
r = mprotect(mapped, perf_page_size, PROT_READ); | |
if (r < 0) { | |
goto fail; | |
} | |
} | |
/* Map in the auxiliary data ring buffer. */ | |
{ | |
void *mapped = mmap(NULL, thread_state.perf->aux_size, | |
PROT_READ | PROT_WRITE, MAP_SHARED, | |
thread_state.bts_fd, | |
thread_state.perf->aux_offset); | |
if (mapped == MAP_FAILED) { | |
perror("aux_buffer"); | |
r = -1; | |
goto fail; | |
} | |
thread_state.aux_buffer = mapped; | |
} | |
/* Alocate a buffer large enough to hold a linearised copy of the aux data. */ | |
thread_state.linear_result = malloc(thread_state.perf->aux_size); | |
if (thread_state.linear_result == NULL) { | |
perror("malloc"); | |
r = -1; | |
goto fail; | |
} | |
return 0; | |
fail: | |
{ | |
int err = errno; | |
bts_teardown(); | |
errno = err; | |
} | |
return r; | |
} | |
int bts_start(void) | |
{ | |
int r; | |
/* | |
* Make sure BTS tracing is disabled before messing with the | |
* ring buffer. | |
*/ | |
r = ioctl(thread_state.bts_fd, PERF_EVENT_IOC_DISABLE, 0); | |
if (r < 0) { | |
perror("perf disable"); | |
return r; | |
} | |
/* | |
* The perf mmap block is usually read-only to let the kernel | |
* silently overwrite entries in the zero-sized data ring | |
* buffer. | |
* | |
* We need write access to advance the aux read pointer, | |
* `aux_tail`. | |
*/ | |
r = mprotect(thread_state.perf, perf_page_size, PROT_READ | PROT_WRITE); | |
if (r < 0) { | |
perror("mprotect READ | WRITE"); | |
return r; | |
} | |
/* Consume all auxiliary data produced so far. */ | |
thread_state.perf->aux_tail = thread_state.perf->aux_head; | |
/* | |
* I don't think this is necessary (there should be no data | |
* record), but it doesn't hurt to switch the data ring buffer | |
* to silent overwrite mode. | |
*/ | |
r = mprotect(thread_state.perf, perf_page_size, PROT_READ); | |
if (r < 0) { | |
perror("mprotect READ"); | |
return r; | |
} | |
/* This also seems redundant, but honggfuzz does it... */ | |
r = ioctl(thread_state.bts_fd, PERF_EVENT_IOC_RESET, 0); | |
if (r < 0) { | |
perror("perf reset"); | |
return r; | |
} | |
/* | |
* Enable BTS tracing as late as possible to minimise noise. | |
*/ | |
return ioctl(thread_state.bts_fd, PERF_EVENT_IOC_ENABLE, 0); | |
} | |
const struct bts_aux_record *bts_stop(ssize_t *OUT_num_elements) | |
{ | |
const size_t element_size = sizeof(uint64_t) * 3; | |
int r; | |
/* Stop BTS tracing ASAP to minimise noise. */ | |
r = ioctl(thread_state.bts_fd, PERF_EVENT_IOC_DISABLE, 0); | |
*OUT_num_elements = 0; | |
if (r < 0) { | |
perror("perf disable"); | |
*OUT_num_elements = r; | |
return NULL; | |
} | |
const uint64_t aux_head = thread_state.perf->aux_head; | |
const uint64_t aux_tail = thread_state.perf->aux_tail; | |
const uint64_t aux_size = thread_state.perf->aux_size; | |
/* | |
* aux_size is a power of two, so | |
* (x & aux_mask) == (x % aux_size). | |
*/ | |
const uint64_t aux_mask = aux_size - 1; | |
if (aux_head - aux_tail > aux_size) { | |
fprintf(stderr, | |
"Auxiliary data overflow despite non-overwrite mode (!?)\n"); | |
*OUT_num_elements = -1; | |
return NULL; | |
} | |
if (aux_head == aux_tail) { | |
*OUT_num_elements = 0; | |
return NULL; | |
} | |
/* | |
* Copy complete BTS records from the ring buffer in order. | |
*/ | |
const char *src = thread_state.aux_buffer; | |
struct bts_aux_record *dst = thread_state.linear_result; | |
uint64_t offset = aux_tail; | |
while (offset < aux_head) { | |
/* | |
* Convert the logical monotonic offset to a byte | |
* index by taking mod aux_size. | |
*/ | |
size_t index = offset & aux_mask; | |
/* If there's room for a record, copy it */ | |
if (index <= aux_size - element_size) { | |
memcpy(dst++, src + index, element_size); | |
offset += element_size; | |
} else { | |
/* ... otherwise, skip over the padding. */ | |
offset += aux_size - index; | |
} | |
} | |
/* Consumption should match the production logic exactly. */ | |
if (offset != aux_head) { | |
fprintf(stderr, "Unexpected aux packet size.\n"); | |
*OUT_num_elements = -1; | |
return NULL; | |
} | |
*OUT_num_elements = dst - thread_state.linear_result; | |
return thread_state.linear_result; | |
} | |
#ifdef TEST | |
#include <assert.h> | |
#include <inttypes.h> | |
int main (int argc, char**argv) | |
{ | |
if (argc > 1) { | |
bts_init(atoi(argv[1])); | |
} else { | |
/* | |
* The value in | |
* `/sys/bus/event_source/devices/intel_bts/type` on | |
* my machine. | |
*/ | |
bts_init(8); | |
} | |
int r = bts_setup(256 * PAGE_SIZE); | |
assert(r == 0); | |
ssize_t num_entries; | |
const struct bts_aux_record *entries; | |
for (size_t j = 0; j < 10; j++) { | |
r = bts_start(); | |
assert(r == 0); | |
for (size_t i = 0; i < 1000; i++) { | |
asm volatile("" : "+r"(i)); | |
} | |
entries = bts_stop(&num_entries); | |
printf("num_entries: %zd\n", num_entries); | |
assert(entries != NULL); | |
printf("%"PRIx64" %"PRIx64" %"PRIx64"\n", | |
entries[10].from, | |
entries[10].to, | |
entries[10].flags); | |
printf("%"PRIx64" %"PRIx64" %"PRIx64"\n", | |
entries[num_entries - 10].from, | |
entries[num_entries - 10].to, | |
entries[num_entries - 10].flags); | |
} | |
{ | |
r = bts_start(); | |
assert(r == 0); | |
for (size_t i = 0; i < 10; i++) { | |
asm volatile("" : "+r"(i)); | |
} | |
entries = bts_stop(&num_entries); | |
printf("num_entries: %zd\n", num_entries); | |
printf("%"PRIx64" %"PRIx64" %"PRIx64"\n", | |
entries[num_entries - 1].from, | |
entries[num_entries - 1].to, | |
entries[num_entries - 1].flags); | |
} | |
bts_teardown(); | |
return 0; | |
} | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment