Skip to content

Instantly share code, notes, and snippets.

@pablogsal
Last active November 14, 2024 19:03
Show Gist options
  • Save pablogsal/0c5304a0c8928c1d1cc9e6bb8a0e765a to your computer and use it in GitHub Desktop.
Save pablogsal/0c5304a0c8928c1d1cc9e6bb8a0e765a to your computer and use it in GitHub Desktop.
#include <elf.h>
#include <execinfo.h>
#include <fcntl.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <unwind.h>
// DW_CFA instruction opcodes
enum dwarf_cfa_ops {
DW_CFA_advance_loc = 0x40,
DW_CFA_offset = 0x80,
DW_CFA_restore = 0xc0,
DW_CFA_nop = 0x00,
DW_CFA_set_loc = 0x01,
DW_CFA_advance_loc1 = 0x02,
DW_CFA_advance_loc2 = 0x03,
DW_CFA_advance_loc4 = 0x04,
DW_CFA_offset_extended = 0x05,
DW_CFA_restore_extended = 0x06,
DW_CFA_undefined = 0x07,
DW_CFA_same_value = 0x08,
DW_CFA_register = 0x09,
DW_CFA_remember_state = 0x0a,
DW_CFA_restore_state = 0x0b,
DW_CFA_def_cfa = 0x0c,
DW_CFA_def_cfa_register = 0x0d,
DW_CFA_def_cfa_offset = 0x0e,
DW_CFA_def_cfa_expression = 0x0f,
DW_CFA_expression = 0x10,
DW_CFA_offset_extended_sf = 0x11,
DW_CFA_def_cfa_sf = 0x12,
DW_CFA_def_cfa_offset_sf = 0x13,
DW_CFA_val_offset = 0x14,
DW_CFA_val_offset_sf = 0x15,
DW_CFA_val_expression = 0x16
};
/* Write unsigned LEB128 */
static size_t write_uleb128(uint8_t **buf, uint64_t val) {
uint8_t *start = *buf;
do {
uint8_t byte = val & 0x7f;
val >>= 7;
**buf = byte | (val ? 0x80 : 0);
(*buf)++;
} while (val);
return *buf - start;
}
/* Write signed LEB128 */
static size_t write_sleb128(uint8_t **buf, int64_t val) {
uint8_t *start = *buf;
bool more;
do {
uint8_t byte = val & 0x7f;
val >>= 7;
more = !(((val == 0) && !(byte & 0x40)) ||
((val == -1) && (byte & 0x40)));
**buf = byte | (more ? 0x80 : 0);
(*buf)++;
} while (more);
return *buf - start;
}
static uint8_t* generate_test_frames(uintptr_t code_start, size_t code_size, size_t* size_out) {
uint8_t* buf = malloc(1024);
uint8_t* start = buf;
uint8_t* cur = start;
// Skip CIE length
cur += 4;
// CIE ID
*(uint32_t*)cur = 0;
cur += 4;
// Version
*cur++ = 1;
// Augmentation
*cur++ = 'z';
*cur++ = 'R';
*cur++ = '\0';
write_uleb128(&cur, 1); // code align
write_sleb128(&cur, -8); // data align
write_uleb128(&cur, 16); // return reg
write_uleb128(&cur, 1); // aug length
*cur++ = 0x00; // aug data
uint8_t instructions[] = {
DW_CFA_def_cfa, 7, 8, // def_cfa r7 ofs 8
DW_CFA_offset | 16, 1, // offset r16 at cfa-8
DW_CFA_nop, DW_CFA_nop
};
memcpy(cur, instructions, sizeof(instructions));
cur += sizeof(instructions);
// Write CIE length
*(uint32_t*)start = cur - start - 4;
// Begin FDE
uint8_t* fde_start = cur;
cur += 4;
// CIE pointer
*(uint32_t*)cur = (uint32_t)((uintptr_t)cur - (uintptr_t)start);
cur += 4;
// PC begin and range
*(uint64_t*)cur = (uint64_t)(code_start);
cur += 8;
*(uint64_t*)cur = code_size; // Range
cur += 8;
// Aug length
write_uleb128(&cur, 0); // No augmentation data
// FDE instructions
uint8_t fde_instructions[] = {
DW_CFA_advance_loc | 1, // advance 1
DW_CFA_def_cfa_offset, 16, // def_cfa_offset 16
DW_CFA_offset | 6, 2, // offset r6 at cfa-16 (2 = -16/-8)
DW_CFA_advance_loc | 3, // advance 3
DW_CFA_def_cfa_register, 6, // def_cfa_register r6
DW_CFA_advance_loc | 15, // advance 15
DW_CFA_def_cfa, 7, 8, // def_cfa r7 ofs 8
DW_CFA_nop, DW_CFA_nop, DW_CFA_nop
};
memcpy(cur, fde_instructions, sizeof(fde_instructions));
cur += sizeof(fde_instructions);
// Write FDE length
*(uint32_t*)fde_start = cur - fde_start - 4;
*size_out = cur - start;
return start;
}
extern void __register_frame(const void*);
extern void __deregister_frame(const void*);
void print_symbol(void *addr) {
void *trace[1] = {addr};
char **symbols = backtrace_symbols(trace, 1);
if (symbols != NULL) {
printf("Symbol for address %p: %s\n", addr, symbols[0]);
free(symbols);
}
}
// Example backtrace callback
static _Unwind_Reason_Code trace_func(struct _Unwind_Context* context, void* arg) {
void* pc = (void*)_Unwind_GetIP(context);
if (pc) {
void** current = (void**)arg;
*current++ = pc;
*(void**)arg = current;
}
return _URC_NO_REASON;
}
// Backtrace callback that we'll use for testing
static _Unwind_Reason_Code trace_fn(struct _Unwind_Context *ctx, void *arg) {
uintptr_t pc = _Unwind_GetIP(ctx);
printf("PC: %lx\n", pc);
print_symbol((void*)pc);
return _URC_NO_REASON;
}
// Helper function that we'll call from JIT code
int print_hello(void) {
printf("Hello from nested function!\n");
printf("\nBacktrace:\n");
_Unwind_Backtrace(trace_fn, NULL);
printf("\n");
return 42;
}
typedef int (*trampoline)(void);
typedef int (*call_func_t)(trampoline);
// Our source function that we'll copy
__attribute__((noinline))
int call_func(trampoline function) {
return function();
}
void write_elf_file(const char* filename, uint8_t* eh_frame_data, size_t eh_frame_size) {
Elf64_Ehdr ehdr = {
.e_ident = {
ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
ELFCLASS64, ELFDATA2LSB, EV_CURRENT,
0, 0, 0, 0, 0, 0, 0, 0, 0
},
.e_type = ET_REL,
.e_machine = EM_X86_64,
.e_version = EV_CURRENT,
.e_shoff = sizeof(Elf64_Ehdr),
.e_ehsize = sizeof(Elf64_Ehdr),
.e_shentsize = sizeof(Elf64_Shdr),
.e_shnum = 4,
.e_shstrndx = 2
};
const char strtab[] = "\0.eh_frame\0.shstrtab";
size_t shstrtab_offset = sizeof(Elf64_Ehdr) + (4 * sizeof(Elf64_Shdr));
size_t eh_frame_offset = shstrtab_offset + sizeof(strtab);
Elf64_Shdr shdrs[4] = {
// Null section
{},
// .eh_frame section
{
.sh_name = 1, // Offset in shstrtab of 'eh_frame'
.sh_type = SHT_PROGBITS,
.sh_flags = SHF_ALLOC,
.sh_offset = eh_frame_offset,
.sh_size = eh_frame_size,
.sh_addralign = 8,
.sh_link = 0,
.sh_info = 0
},
// .shstrtab section
{
.sh_name = 11, // Offset in shstrtab of 'shstrtab'
.sh_type = SHT_STRTAB,
.sh_offset = shstrtab_offset,
.sh_size = sizeof(strtab),
.sh_addralign = 1,
.sh_link = 0,
.sh_info = 0
},
// Final null section
{}
};
FILE* f = fopen(filename, "wb");
if (!f) return;
fwrite(&ehdr, sizeof(ehdr), 1, f);
fwrite(shdrs, sizeof(shdrs), 1, f);
fwrite(strtab, sizeof(strtab), 1, f);
fwrite(eh_frame_data, eh_frame_size, 1, f);
fclose(f);
}
int main() {
// Get page size for alignment
size_t page_size = sysconf(_SC_PAGESIZE);
// First, let's examine our source function
void* func_addr = (void*)call_func;
size_t func_size = 64; // Increased size since we're calling another function
// Allocate two pages: one for code, one for frame info
size_t alloc_size = page_size * 2;
// Map memory with specific alignment and flags
void* mem = mmap(NULL, alloc_size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED) {
perror("mmap failed");
return 1;
}
// Align to page boundary
void* jit_memory = (void*)(((uintptr_t)mem + page_size - 1) & ~(page_size - 1));
void* frame_memory = (void*)((char*)jit_memory + page_size);
// Copy the function
memcpy(jit_memory, func_addr, func_size);
// Make JIT memory executable
if (mprotect(jit_memory, page_size, PROT_READ | PROT_EXEC) != 0) {
perror("mprotect failed");
munmap(mem, alloc_size);
return 1;
}
// Create unwind info for AArch64
size_t size;
uint8_t *generated = generate_test_frames((uintptr_t)jit_memory, func_size, &size);
if (!generated) {
perror("Failed to create unwind info");
munmap(mem, alloc_size);
return 1;
}
// Register the frame
void* fde_ptr = generated + *(uint32_t*)(generated) + 4; // Start + content length + length field
__register_frame(fde_ptr);
// Ensure instruction cache coherency for AArch64
__builtin___clear_cache(jit_memory, (char*)jit_memory + func_size);
// Cast and call the copied function
call_func_t jit_func = (call_func_t)jit_memory;
printf("JIT function returned: %d\n", jit_func(&print_hello));
// Cleanup
__deregister_frame(fde_ptr);
free(generated);
munmap(mem, alloc_size);
// After generating eh_frame data:
uint8_t* eh_frame_data;
size_t eh_frame_size;
eh_frame_data = generate_test_frames((uintptr_t)jit_memory, func_size, &eh_frame_size);
write_elf_file("output.o", eh_frame_data, eh_frame_size);
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <stdint.h>
#include <unwind.h>
#include <elf.h>
#include <fcntl.h>
/*
$ cat trampoline.c
typedef int (*trampoline)(void);
typedef int (*call_func_t)(trampoline);
__attribute__((noinline))
int call_func(trampoline function) {
return function();
}
$ gcc -O0 -c trampoline.c
*/
extern void __register_frame(const void*);
extern void __deregister_frame(const void*);
// DWARF constants for CFI
#define DW_CFA_advance_loc 0x40
#define DW_CFA_offset 0x80
#define DW_CFA_restore 0xc0
#define DW_CFA_nop 0x00
#define DW_CFA_def_cfa 0x0c
#define DW_CFA_def_cfa_register 0x0d
#define DW_CFA_def_cfa_offset 0x0e
// AArch64 registers
#define AARCH64_FP 29 // x29 - Frame Pointer
#define AARCH64_LR 30 // x30 - Link Register
#define AARCH64_SP 31 // sp - Stack Pointer
// DWARF CIE structure matching GCC's output
typedef struct {
uint32_t length; // 0x10
uint32_t cie_id; // 0
uint8_t version; // 1
char augmentation[3]; // "zR\0"
uint8_t code_align; // 4
int8_t data_align; // 0x78 (-8 for 64-bit)
uint8_t return_reg; // 0x1e (x30)
uint8_t aug_len; // 1
uint8_t aug_data; // 0x1b
uint8_t cfa_insn[3]; // {0x0c, 0x1f, 0x00}
} __attribute__((packed)) CIE;
// DWARF FDE structure matching GCC's output
typedef struct {
uint32_t length; // 0x20
uint32_t cie_pointer; // 0x18
uint32_t initial_loc; // PC-relative
uint32_t range; // 0x1c
uint8_t aug_len; // 0
uint8_t insns[15]; // Instructions and padding
} __attribute__((packed)) FDE;
static void dump_memory(const char* desc, const uint8_t* data, size_t size) {
printf("\n%s (%zu bytes):\n", desc, size);
for (size_t i = 0; i < size; i++) {
if (i % 16 == 0) printf(" %04zx ", i);
printf("%02x ", data[i]);
if ((i + 1) % 4 == 0) printf(" ");
if ((i + 1) % 16 == 0) printf("\n");
}
printf("\n");
}
#include <execinfo.h>
#include <stdio.h>
void print_symbol(void *addr) {
void *trace[1] = {addr};
char **symbols = backtrace_symbols(trace, 1);
if (symbols != NULL) {
printf("Symbol for address %p: %s\n", addr, symbols[0]);
free(symbols);
}
}
static uint8_t* load_eh_frame(const char* filename, size_t* size_out) {
int fd = open(filename, O_RDONLY);
if (fd < 0) {
perror("open failed");
return NULL;
}
// Read ELF header
Elf64_Ehdr ehdr;
if (read(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr)) {
close(fd);
return NULL;
}
// Read section headers
Elf64_Shdr* shdrs = malloc(ehdr.e_shentsize * ehdr.e_shnum);
lseek(fd, ehdr.e_shoff, SEEK_SET);
if (read(fd, shdrs, ehdr.e_shentsize * ehdr.e_shnum) != ehdr.e_shentsize * ehdr.e_shnum) {
free(shdrs);
close(fd);
return NULL;
}
// Read section name string table
char* strtab = malloc(shdrs[ehdr.e_shstrndx].sh_size);
lseek(fd, shdrs[ehdr.e_shstrndx].sh_offset, SEEK_SET);
if (read(fd, strtab, shdrs[ehdr.e_shstrndx].sh_size) != shdrs[ehdr.e_shstrndx].sh_size) {
free(strtab);
free(shdrs);
close(fd);
return NULL;
}
// Find .eh_frame section
Elf64_Shdr* eh_frame = NULL;
for (int i = 0; i < ehdr.e_shnum; i++) {
const char* name = strtab + shdrs[i].sh_name;
if (strcmp(name, ".eh_frame") == 0) {
eh_frame = &shdrs[i];
break;
}
}
if (!eh_frame) {
free(strtab);
free(shdrs);
close(fd);
return NULL;
}
uint8_t* frame_data = malloc(eh_frame->sh_size);
lseek(fd, eh_frame->sh_offset, SEEK_SET);
if (read(fd, frame_data, eh_frame->sh_size) != eh_frame->sh_size) {
perror("read eh_frame failed");
free(frame_data);
free(strtab);
free(shdrs);
close(fd);
return NULL;
}
printf("\nParsing .eh_frame structures:\n");
// Parse CIE
const CIE* cie = (const CIE*)frame_data;
printf("\nCIE at offset 0x00:\n");
printf(" length: 0x%08x (4 bytes)\n", cie->length);
printf(" cie_id: 0x%08x (4 bytes)\n", cie->cie_id);
printf(" version: 0x%02x (1 byte)\n", cie->version);
printf(" augmentation: '%c%c%c' (3 bytes)\n",
cie->augmentation[0], cie->augmentation[1], cie->augmentation[2]);
printf(" code_align: 0x%02x (1 byte)\n", cie->code_align);
printf(" data_align: 0x%02x (1 byte)\n", cie->data_align);
printf(" return_reg: 0x%02x (1 byte)\n", cie->return_reg);
printf(" aug_len: 0x%02x (1 byte)\n", cie->aug_len);
printf(" aug_data: 0x%02x (1 byte)\n", cie->aug_data);
printf(" cfa_insn: 0x%02x 0x%02x 0x%02x (3 bytes)\n",
cie->cfa_insn[0], cie->cfa_insn[1], cie->cfa_insn[2]);
dump_memory("Raw CIE bytes", frame_data, sizeof(CIE));
// Parse FDE
const FDE* fde = (const FDE*)(frame_data + sizeof(CIE));
printf("\nFDE at offset 0x%zx:\n", sizeof(CIE));
printf(" length: 0x%08x (4 bytes)\n", fde->length);
printf(" cie_pointer: 0x%08x (4 bytes)\n", fde->cie_pointer);
printf(" initial_loc: 0x%08x (4 bytes)\n", fde->initial_loc);
printf(" range: 0x%08x (4 bytes)\n", fde->range);
printf(" aug_len: 0x%02x (1 byte)\n", fde->aug_len);
printf(" insns: ");
for (size_t i = 0; i < sizeof(fde->insns); i++) {
printf("%02x ", fde->insns[i]);
if ((i + 1) % 4 == 0) printf(" ");
}
printf(" (%zu bytes)\n", sizeof(fde->insns));
dump_memory("Raw FDE bytes", (uint8_t*)fde, sizeof(FDE));
*size_out = eh_frame->sh_size;
free(strtab);
free(shdrs);
close(fd);
return frame_data;
}
// static uint8_t* create_unwind_info(void* code_start, size_t code_size) {
// size_t eh_size;
// uint8_t* template_data = load_eh_frame("trampoline.o", &eh_size);
// if (!template_data) {
// fprintf(stderr, "Failed to load eh_frame from object file\n");
// return NULL;
// }
// // Allocate and copy the template
// uint8_t* frame_addr = malloc(eh_size+4); // 4 is for padding
// printf("Allocated frame_addr: %p\n", frame_addr);
// memcpy(frame_addr, template_data, eh_size);
// // Validate structures match
// CIE* cie = (CIE*)frame_addr;
// FDE* fde = (FDE*)(frame_addr + sizeof(CIE));
// printf("Loaded CIE:\n");
// printf(" length: 0x%x\n", cie->length);
// printf(" augmentation: %c%c\\0\n", cie->augmentation[0], cie->augmentation[1]);
// printf(" data_align: 0x%x\n", cie->data_align);
// // Patch the PC-relative offset
// fde->initial_loc = (int32_t)((intptr_t)code_start - (intptr_t)(frame_addr + sizeof(CIE)));
// fde->cie_pointer = (uint32_t) ((uintptr_t)&(fde->cie_pointer) - (uintptr_t)cie);
// printf("Initial location: %p\n", fde->initial_loc);
// printf("code start: 0x%lx\n", (intptr_t)code_start);
// free(template_data);
// printf("Returning frame_addr: %p\n", frame_addr + sizeof(CIE));
// return frame_addr + sizeof(CIE);
/* } */
//
static uint8_t* create_unwind_info(void* code_start, size_t code_size) {
size_t eh_size;
uint8_t* template_data = load_eh_frame("trampoline.o", &eh_size);
if (!template_data) {
fprintf(stderr, "Failed to load eh_frame from object file\n");
return NULL;
}
// Allocate buffer
uint8_t* frame_addr = malloc(eh_size + 2 * 4);
printf("Allocated frame_addr: %p\n", frame_addr);
// Get source structures for reference
CIE* src_cie = (CIE*)template_data;
FDE* src_fde = (FDE*)(template_data + sizeof(CIE));
// Write CIE fields one by one
uint8_t* current = frame_addr;
// CIE fields
memcpy(current, &src_cie->length, sizeof(uint32_t));
current += sizeof(uint32_t);
memcpy(current, &src_cie->cie_id, sizeof(uint32_t));
current += sizeof(uint32_t);
*current++ = src_cie->version;
memcpy(current, src_cie->augmentation, 3);
current += 3;
*current++ = src_cie->code_align;
*current++ = src_cie->data_align;
*current++ = src_cie->return_reg;
*current++ = src_cie->aug_len;
src_cie->aug_data = 0x1c; //
*current++ = src_cie->aug_data;
memcpy(current, src_cie->cfa_insn, 3);
current += 3;
// Start writing FDE fields
uint32_t fde_length = src_fde->length;
memcpy(current, &fde_length, sizeof(uint32_t));
current += sizeof(uint32_t);
uint32_t cie_pointer = (uint32_t)((uintptr_t)current - (uintptr_t)frame_addr);
memcpy(current, &cie_pointer, sizeof(uint32_t));
current += sizeof(uint32_t);
// Write initial_loc as 8 bytes
uint64_t initial_loc = (uint64_t)((intptr_t)code_start - (intptr_t)(frame_addr + sizeof(CIE)));
memcpy(current, &initial_loc, sizeof(uint64_t));
current += sizeof(uint64_t);
// Write range as 8 bytes
uint64_t range = (uint64_t)code_size;
memcpy(current, &range, sizeof(uint64_t));
current += sizeof(uint64_t);
// Write remaining FDE fields
*current++ = 0;
memcpy(current, src_fde->insns, sizeof(src_fde->insns));
printf("Initial location: %p\n", (void*)initial_loc);
printf("code start: 0x%lx\n", (intptr_t)code_start);
free(template_data);
printf("Returning frame_addr: %p\n", frame_addr + sizeof(CIE));
return frame_addr + sizeof(CIE);
}
// Example backtrace callback
static _Unwind_Reason_Code trace_func(struct _Unwind_Context* context, void* arg) {
void* pc = (void*)_Unwind_GetIP(context);
if (pc) {
void** current = (void**)arg;
*current++ = pc;
*(void**)arg = current;
}
return _URC_NO_REASON;
}
// Backtrace callback that we'll use for testing
static _Unwind_Reason_Code trace_fn(struct _Unwind_Context *ctx, void *arg) {
uintptr_t pc = _Unwind_GetIP(ctx);
printf("PC: %lx\n", pc);
print_symbol((void*)pc);
return _URC_NO_REASON;
}
// Helper function that we'll call from JIT code
int print_hello(void) {
printf("Hello from nested function!\n");
printf("\nBacktrace:\n");
_Unwind_Backtrace(trace_fn, NULL);
printf("\n");
return 42;
}
typedef int (*trampoline)(void);
typedef int (*call_func_t)(trampoline);
// Our source function that we'll copy
__attribute__((noinline))
int call_func(trampoline function) {
return function();
}
int main() {
// Get page size for alignment
size_t page_size = sysconf(_SC_PAGESIZE);
// First, let's examine our source function
void* func_addr = (void*)call_func;
size_t func_size = 64; // Increased size since we're calling another function
// Allocate two pages: one for code, one for frame info
size_t alloc_size = page_size * 2;
// Map memory with specific alignment and flags
void* mem = mmap(NULL, alloc_size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED) {
perror("mmap failed");
return 1;
}
// Align to page boundary
void* jit_memory = (void*)(((uintptr_t)mem + page_size - 1) & ~(page_size - 1));
void* frame_memory = (void*)((char*)jit_memory + page_size);
// Copy the function
memcpy(jit_memory, func_addr, func_size);
// Make JIT memory executable
if (mprotect(jit_memory, page_size, PROT_READ | PROT_EXEC) != 0) {
perror("mprotect failed");
munmap(mem, alloc_size);
return 1;
}
// Create unwind info for AArch64
uint8_t* frame_data = create_unwind_info(jit_memory, func_size);
if (!frame_data) {
perror("Failed to create unwind info");
munmap(mem, alloc_size);
return 1;
}
// Copy frame data to frame memory
// memcpy(frame_memory, frame_data, page_size); // Assuming frame data fits in a page
// free(frame_data); // Free the temporary buffer
// Register the frame
__register_frame(frame_data);
// Ensure instruction cache coherency for AArch64
__builtin___clear_cache(jit_memory, (char*)jit_memory + func_size);
// Cast and call the copied function
call_func_t jit_func = (call_func_t)jit_memory;
printf("JIT function returned: %d\n", jit_func(&print_hello));
// Cleanup
// __deregister_frame(frame_memory);
munmap(mem, alloc_size);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment