Last active
January 31, 2022 14:07
-
-
Save numinit/721d3ea5dc63b19660600a2bbc4f33d7 to your computer and use it in GitHub Desktop.
Dumps in-memory text segments by single-stepping with ptrace. Usage: dtext <program> [args...]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Dumps in-memory text segments by single-stepping with ptrace. | |
* Usage: dtext <program> [args...] | |
*/ | |
#define _GNU_SOURCE | |
#include <stdio.h> | |
#include <unistd.h> | |
#include <stdbool.h> | |
#include <stdlib.h> | |
#include <errno.h> | |
#include <stdint.h> | |
#include <stdarg.h> | |
#include <string.h> | |
#include <signal.h> | |
#include <stddef.h> | |
#include <inttypes.h> | |
#include <elf.h> | |
#include <sys/ptrace.h> | |
#include <sys/types.h> | |
#include <sys/wait.h> | |
#include <sys/user.h> | |
#include <sys/syscall.h> | |
#define PAGEMASK 0xfffffffffffff000ULL | |
#define PAGESIZE 0x1000ULL | |
#define INVALID_PAGE 0ULL | |
/* three ring buffers for the Elven-kings under sky.clear.rice.edu */ | |
#define ELF_MAGIC 0x464c457fUL | |
typedef uint64_t reg_t; | |
typedef struct segment { | |
reg_t start, end; | |
} segment_t; | |
static void info(const char *fmt, ...) { | |
va_list va; | |
va_start(va, fmt); | |
fprintf(stderr, "[I] "); | |
vfprintf(stderr, fmt, va); | |
fprintf(stderr, "\n"); | |
va_end(va); | |
} | |
static void error(const char *fmt, ...) { | |
va_list va; | |
va_start(va, fmt); | |
fprintf(stderr, "[E] "); | |
vfprintf(stderr, fmt, va); | |
fprintf(stderr, ": %d (%s)\n", errno, strerror(errno)); | |
va_end(va); | |
} | |
static void wait_for(pid_t pid) { | |
int status; | |
waitpid(pid, &status, 0); | |
if (WIFEXITED(status) || WIFSIGNALED(status)) { | |
int code = WEXITSTATUS(status); | |
info("child exited with code %d, our work here is done", code); | |
exit(code); | |
} | |
} | |
static int peek(pid_t pid, reg_t addr, reg_t *out) { | |
long data; | |
errno = 0; | |
// PTRACE_PEEKTEXT and PTRACE_PEEKDATA allegedly do the same thing? | |
// In any case, try peeking at text, and then peek at data if that | |
// fails. | |
data = ptrace(PTRACE_PEEKTEXT, pid, (void *)addr, NULL); | |
if (data == -1 && errno != 0) { | |
errno = 0; | |
data = ptrace(PTRACE_PEEKDATA, pid, (void *)addr, NULL); | |
} | |
if (data == -1 && errno != 0) { | |
// Still? Okay, we've probably reached a boundary. | |
return -1; | |
} else { | |
*out = (reg_t)data; | |
return 0; | |
} | |
} | |
static reg_t get_pc(pid_t pid) { | |
struct user_regs_struct regs; | |
ptrace(PTRACE_GETREGS, pid, NULL, ®s); | |
return (reg_t)regs.rip; | |
} | |
static reg_t get_text_base(pid_t pid) { | |
reg_t rip = get_pc(pid); | |
// Walk backward in 1-page increments until we find the ELF header. | |
reg_t ptr = rip & PAGEMASK; // align to a page boundary | |
while (true) { | |
reg_t data; | |
if (peek(pid, ptr, &data) != 0) { | |
error("ptrace"); | |
ptr = INVALID_PAGE; | |
break; | |
} else if ((data & 0xffffffffUL) == ELF_MAGIC) { | |
// ELF header | |
break; | |
} else { | |
// back up a page | |
ptr -= PAGESIZE; | |
} | |
} | |
return ptr; | |
} | |
static reg_t dump_data(pid_t pid, reg_t base, reg_t size, FILE *f) { | |
reg_t ptr = base; | |
reg_t dumped = 0; | |
while (dumped < size) { | |
reg_t data; | |
if (peek(pid, ptr, &data) != 0) { | |
break; | |
} else if (f && fwrite(&data, sizeof(data), 1, f) != 1) { | |
error("fwrite"); | |
ptr = INVALID_PAGE; | |
break; | |
} else { | |
ptr += sizeof(data); | |
dumped += sizeof(data); | |
} | |
} | |
// Round to the nearest page | |
return ((ptr & PAGEMASK) + PAGESIZE - 1); | |
} | |
static int dump_segment(const char *bn, reg_t vaddr, reg_t size, | |
segment_t **segments_ptr, size_t *num_segments_ptr, | |
pid_t pid) { | |
segment_t *segments = *segments_ptr; | |
size_t num_segments = *num_segments_ptr; | |
char filename[128]; | |
snprintf(filename, sizeof(filename), "%s-%016zx.elf", bn, vaddr); | |
FILE *f = fopen(filename, "wb"); | |
reg_t final; | |
if (f == NULL) { | |
error("fopen"); | |
return -1; | |
} else { | |
info("dumping segment @ 0x%016zx to %s...", vaddr, filename); | |
if ((final = dump_data(pid, vaddr, size, f)) == INVALID_PAGE) { | |
info("failed to dump data"); | |
fclose(f); | |
return -1; | |
} else { | |
info("success!"); | |
fclose(f); | |
} | |
} | |
// Add a segment | |
info("adding segment 0x%016zx..0x%016zx", vaddr, final); | |
segments = realloc(segments, sizeof(segment_t) * (++num_segments)); | |
if (segments == NULL) { | |
error("realloc"); | |
return -1; | |
} else { | |
segments[num_segments - 1].start = vaddr; | |
segments[num_segments - 1].end = final; | |
*segments_ptr = segments; | |
*num_segments_ptr = num_segments; | |
return 0; | |
} | |
} | |
static int dump_segments(const char *bn, reg_t base, segment_t **segments_ptr, | |
size_t *num_segments_ptr, pid_t pid) { | |
// Parse the ELF header to get all the segments. | |
reg_t header = 0, e_phoff = 0, e_shoff = 0; | |
union { | |
uint64_t reg; | |
struct { | |
uint16_t e_phentsize; | |
uint16_t e_phnum; | |
uint16_t e_shentsize; | |
uint16_t e_shnum; | |
} __attribute__ ((packed)) u16; | |
} sizes = {.reg = 0}; | |
int ret = 0; | |
if ((ret = peek(pid, base, &header)) != 0) { | |
info("couldn't peek at segment base"); | |
return ret; | |
} else if ((header & 0xffffffffUL) != ELF_MAGIC) { | |
info("segment was not an ELF binary"); | |
return -1; | |
} else if ((ret = peek(pid, base + offsetof(Elf64_Ehdr, e_phoff), &e_phoff)) != 0) { | |
info("couldn't peek at program header offset"); | |
return ret; | |
} else if ((ret = peek(pid, base + offsetof(Elf64_Ehdr, e_shoff), &e_shoff)) != 0) { | |
info("couldn't peek at section header offset"); | |
return ret; | |
} else if ((ret = peek(pid, base + offsetof(Elf64_Ehdr, e_phentsize), &sizes.reg)) != 0) { | |
info("couldn't peek at sizes offset"); | |
return ret; | |
} | |
info("got %" PRIu16 " program %s", sizes.u16.e_phnum, | |
sizes.u16.e_phnum == 1 ? "segment" : "segments"); | |
info("got %" PRIu16 " standard %s", sizes.u16.e_shnum, | |
sizes.u16.e_shnum == 1 ? "segment" : "segments"); | |
reg_t p_offset = e_phoff; | |
for (uint16_t i = 0; i < sizes.u16.e_phnum; i++) { | |
reg_t program_vaddr = 0, program_size = 0; | |
if ((ret = peek(pid, base + p_offset + offsetof(Elf64_Phdr, p_vaddr), &program_vaddr)) != 0) { | |
info("failed to peek at program segment p_vaddr"); | |
return ret; | |
} else if ((ret = peek(pid, base + p_offset + offsetof(Elf64_Phdr, p_memsz), &program_size)) != 0) { | |
info("failed to peek at program segment p_memsz"); | |
return ret; | |
} | |
if (program_vaddr != 0 && program_size > 0) { | |
if ((ret = dump_segment(bn, program_vaddr, program_size, | |
segments_ptr, num_segments_ptr, pid)) != 0) { | |
info("failed to dump program segment %" PRIu16, i); | |
return ret; | |
} | |
} else { | |
info("program segment %" PRIu16 " had invalid address, skipping", i); | |
} | |
p_offset += sizes.u16.e_phentsize; | |
} | |
reg_t s_offset = e_shoff; | |
for (uint16_t i = 0; i < sizes.u16.e_shnum; i++) { | |
reg_t section_vaddr = 0, section_size = 0; | |
if ((ret = peek(pid, base + s_offset + offsetof(Elf64_Shdr, sh_addr), §ion_vaddr)) != 0) { | |
info("failed to peek at section segment sh_addr"); | |
return ret; | |
} else if ((ret = peek(pid, base + s_offset + offsetof(Elf64_Shdr, sh_size), §ion_size)) != 0) { | |
info("failed to peek at section segment sh_size"); | |
return ret; | |
} | |
if (section_vaddr != 0 && section_size > 0) { | |
if ((ret = dump_segment(bn, section_vaddr, section_size, | |
segments_ptr, num_segments_ptr, pid)) != 0) { | |
info("failed to dump section segment %" PRIu16, i); | |
return ret; | |
} | |
} else { | |
info("section segment %" PRIu16 " had invalid address, skipping", i); | |
} | |
s_offset += sizes.u16.e_shentsize; | |
} | |
return 0; | |
} | |
static bool go = true; | |
static void dump(const char *bn, pid_t pid) { | |
char filename[32]; | |
segment_t *segments = NULL; | |
size_t num_segments = 0; | |
// Wait for the process to become ready | |
wait_for(pid); | |
while (go) { | |
info("----- got a new ELF binary"); | |
// Get the text base for this segment | |
reg_t base = get_text_base(pid), final; | |
if (base == INVALID_PAGE) { | |
info("couldn't find ELF header"); | |
break; | |
} else { | |
info("new text base is at %016zx", base); | |
} | |
// Okay, we have the ELF text base. Figure out which segments are | |
// currently loaded. | |
if (dump_segments(bn, base, &segments, &num_segments, pid) != 0) { | |
error("dumping segments failed"); | |
break; | |
} | |
// Wait for the PC to exit all known segments | |
size_t step = 0; | |
bool new_segment = false; | |
while (!new_segment && go) { | |
reg_t rip; | |
ptrace(PTRACE_SINGLESTEP, pid, 0, NULL); | |
wait_for(pid); | |
rip = get_pc(pid); | |
new_segment = true; | |
for (size_t i = 0; i < num_segments; i++) { | |
if (rip >= segments[i].start && rip <= segments[i].end) { | |
// Already seen it. | |
new_segment = false; | |
break; | |
} | |
} | |
if (step % 10000 == 0) { | |
info("step %zu: ip=0x%016zx", step, rip); | |
} | |
step++; | |
} | |
} | |
free(segments); | |
ptrace(PTRACE_DETACH, pid, 0, SIGKILL); | |
wait_for(pid); | |
} | |
static void sigint_handler(int sig) { | |
info("received signal %d", sig); | |
go = false; | |
} | |
int main(int argc, char * const argv[]) { | |
pid_t pid; | |
if ((pid = fork()) == 0) { | |
// Hello! Please trace me, regardless of what anyone else says. | |
ptrace(PTRACE_TRACEME, 0, NULL, NULL); | |
execvp(argv[1], &argv[1]); | |
error("execvp failed!"); | |
return -1; | |
} else { | |
const char *bn = basename(argv[1]); | |
info("starting %s as pid %d", bn, pid); | |
// Install a SIGINT handler | |
signal(SIGINT, sigint_handler); | |
// Dump the child process | |
dump(bn, pid); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment