Skip to content

Instantly share code, notes, and snippets.

@wh1te4ever
Last active May 31, 2024 16:12
Show Gist options
  • Save wh1te4ever/f8b3cedf4d555b00527de57f848ca766 to your computer and use it in GitHub Desktop.
Save wh1te4ever/f8b3cedf4d555b00527de57f848ca766 to your computer and use it in GitHub Desktop.
find_unexported_symbols.c
// seohyun-gyu@MacBook-Pro-2 find_xpc_rich_error_create % gcc -o find_xpc_rich_error_create find_xpc_rich_error_create.c
// seohyun-gyu@MacBook-Pro-2 find_xpc_rich_error_create % ./find_xpc_rich_error_create
// Found image_name: /usr/lib/system/libxpc.dylib, libxpc_hdr: 0x7ff80647a000
// _xpc_rich_error_create address: 0x7ff80649362c
#include <stdio.h>
#include <dlfcn.h>
#include <pthread.h>
#include <mach-o/dyld_images.h>
#include <mach-o/dyld.h>
#include <mach-o/nlist.h>
#include <mach/mach.h>
#include <mach-o/loader.h>
#include <sys/mman.h>
#include <mach-o/dyld.h>
#define ASSERT(x)
#define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT_64
typedef struct segment_command_64 segment_command_t;
typedef struct nlist_64 nlist_t;
typedef struct mach_header_64 mach_header_t;
typedef struct macho_ctx
{
bool is_runtime_mode;
mach_header_t *header;
uintptr_t slide;
uintptr_t linkedit_base;
segment_command_t *segments[64];
int segments_count;
segment_command_t *text_seg;
segment_command_t *data_seg;
segment_command_t *text_exec_seg;
segment_command_t *data_const_seg;
segment_command_t *linkedit_seg;
struct symtab_command *symtab_cmd;
struct dysymtab_command *dysymtab_cmd;
struct dyld_info_command *dyld_info_cmd;
struct linkedit_data_command *exports_trie_cmd;
struct linkedit_data_command *chained_fixups_cmd;
nlist_t *symtab;
char *strtab;
uint32_t *indirect_symtab;
} macho_ctx_t;
typedef enum
{
RESOLVE_SYMBOL_TYPE_SYMBOL_TABLE = 1 << 0,
RESOLVE_SYMBOL_TYPE_EXPORTED = 1 << 1,
RESOLVE_SYMBOL_TYPE_ALL = RESOLVE_SYMBOL_TYPE_SYMBOL_TABLE | RESOLVE_SYMBOL_TYPE_EXPORTED
} resolve_symbol_type_t;
void macho_ctx_init(macho_ctx_t *ctx, mach_header_t *header, bool is_runtime_mode)
{
memset(ctx, 0, sizeof(macho_ctx_t));
ctx->is_runtime_mode = is_runtime_mode;
ctx->header = header;
segment_command_t *curr_seg_cmd;
segment_command_t *text_segment = 0, *text_exec_segment = 0, *data_segment = 0, *data_const_segment = 0,
*linkedit_segment = 0;
struct symtab_command *symtab_cmd = 0;
struct dysymtab_command *dysymtab_cmd = 0;
struct dyld_info_command *dyld_info_cmd = 0;
struct linkedit_data_command *exports_trie_cmd = 0;
struct linkedit_data_command *chained_fixups_cmd = NULL;
curr_seg_cmd = (segment_command_t *)((uintptr_t)header + sizeof(mach_header_t));
for (int i = 0; i < header->ncmds; i++)
{
if (curr_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT)
{
// BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB and REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
ctx->segments[ctx->segments_count++] = curr_seg_cmd;
if (strcmp(curr_seg_cmd->segname, "__LINKEDIT") == 0)
{
linkedit_segment = curr_seg_cmd;
}
else if (strcmp(curr_seg_cmd->segname, "__DATA") == 0)
{
data_segment = curr_seg_cmd;
}
else if (strcmp(curr_seg_cmd->segname, "__DATA_CONST") == 0)
{
data_const_segment = curr_seg_cmd;
}
else if (strcmp(curr_seg_cmd->segname, "__TEXT") == 0)
{
text_segment = curr_seg_cmd;
}
else if (strcmp(curr_seg_cmd->segname, "__TEXT_EXEC") == 0)
{
text_exec_segment = curr_seg_cmd;
}
}
else if (curr_seg_cmd->cmd == LC_SYMTAB)
{
symtab_cmd = (struct symtab_command *)curr_seg_cmd;
}
else if (curr_seg_cmd->cmd == LC_DYSYMTAB)
{
dysymtab_cmd = (struct dysymtab_command *)curr_seg_cmd;
}
else if (curr_seg_cmd->cmd == LC_DYLD_INFO || curr_seg_cmd->cmd == LC_DYLD_INFO_ONLY)
{
dyld_info_cmd = (struct dyld_info_command *)curr_seg_cmd;
}
else if (curr_seg_cmd->cmd == LC_DYLD_EXPORTS_TRIE)
{
exports_trie_cmd = (struct linkedit_data_command *)curr_seg_cmd;
}
else if (curr_seg_cmd->cmd == LC_DYLD_CHAINED_FIXUPS)
{
chained_fixups_cmd = (struct linkedit_data_command *)curr_seg_cmd;
}
curr_seg_cmd = (segment_command_t *)((uintptr_t)curr_seg_cmd + curr_seg_cmd->cmdsize);
}
uintptr_t slide = (uintptr_t)header - (uintptr_t)text_segment->vmaddr;
uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
if (is_runtime_mode == false)
{
// as mmap, all segment is close
uintptr_t linkedit_segment_vmaddr = linkedit_segment->fileoff;
linkedit_base = (uintptr_t)slide + linkedit_segment_vmaddr - linkedit_segment->fileoff;
}
ctx->text_seg = text_segment;
ctx->text_exec_seg = text_exec_segment;
ctx->data_seg = data_segment;
ctx->data_const_seg = data_const_segment;
ctx->linkedit_seg = linkedit_segment;
ctx->symtab_cmd = symtab_cmd;
ctx->dysymtab_cmd = dysymtab_cmd;
ctx->dyld_info_cmd = dyld_info_cmd;
ctx->exports_trie_cmd = exports_trie_cmd;
ctx->chained_fixups_cmd = chained_fixups_cmd;
ctx->slide = slide;
ctx->linkedit_base = linkedit_base;
ctx->symtab = (nlist_t *)(ctx->linkedit_base + ctx->symtab_cmd->symoff);
ctx->strtab = (char *)(ctx->linkedit_base + ctx->symtab_cmd->stroff);
ctx->indirect_symtab = (uint32_t *)(ctx->linkedit_base + ctx->dysymtab_cmd->indirectsymoff);
}
uintptr_t macho_ctx_iterate_symbol_table(macho_ctx_t *ctx, const char *symbol_name_pattern)
{
nlist_t *symtab = ctx->symtab;
uint32_t symtab_count = ctx->symtab_cmd->nsyms;
char *strtab = ctx->strtab;
for (uint32_t i = 0; i < symtab_count; i++)
{
if (symtab[i].n_value)
{
uint32_t strtab_offset = symtab[i].n_un.n_strx;
char *symbol_name = strtab + strtab_offset;
#if 0
printf("> %s", symbol_name);
#endif
if (strcmp(symbol_name_pattern, symbol_name) == 0)
{
return symtab[i].n_value;
}
if (symbol_name[0] == '_')
{
if (strcmp(symbol_name_pattern, &symbol_name[1]) == 0)
{
return symtab[i].n_value;
}
}
}
}
return 0;
}
uint64_t read_uleb128(const uint8_t **pp, const uint8_t *end)
{
uint8_t *p = (uint8_t *)*pp;
uint64_t result = 0;
int bit = 0;
do
{
if (p == end)
ASSERT(p == end);
uint64_t slice = *p & 0x7f;
if (bit > 63)
ASSERT(bit > 63);
else
{
result |= (slice << bit);
bit += 7;
}
} while (*p++ & 0x80);
*pp = p;
return (uintptr_t)result;
}
uint8_t *tail_walk(const uint8_t *start, const uint8_t *end, const char *symbol)
{
uint32_t visitedNodeOffsets[128];
int visitedNodeOffsetCount = 0;
visitedNodeOffsets[visitedNodeOffsetCount++] = 0;
const uint8_t *p = start;
while (p < end)
{
uint64_t terminalSize = *p++;
if (terminalSize > 127)
{
// except for re-export-with-rename, all terminal sizes fit in one byte
--p;
terminalSize = read_uleb128(&p, end);
}
if ((*symbol == '\0') && (terminalSize != 0))
{
return (uint8_t *)p;
}
const uint8_t *children = p + terminalSize;
if (children > end)
{
// diag.error("malformed trie node, terminalSize=0x%llX extends past end of trie\n", terminalSize);
return NULL;
}
uint8_t childrenRemaining = *children++;
p = children;
uint64_t nodeOffset = 0;
for (; childrenRemaining > 0; --childrenRemaining)
{
const char *ss = symbol;
bool wrongEdge = false;
// scan whole edge to get to next edge
// if edge is longer than target symbol name, don't read past end of symbol name
char c = *p;
while (c != '\0')
{
if (!wrongEdge)
{
if (c != *ss)
wrongEdge = true;
++ss;
}
++p;
c = *p;
}
if (wrongEdge)
{
// advance to next child
++p; // skip over zero terminator
// skip over uleb128 until last byte is found
while ((*p & 0x80) != 0)
++p;
++p; // skip over last byte of uleb128
if (p > end)
{
// diag.error("malformed trie node, child node extends past end of trie\n");
return NULL;
}
}
else
{
// the symbol so far matches this edge (child)
// so advance to the child's node
++p;
nodeOffset = read_uleb128(&p, end);
if ((nodeOffset == 0) || (&start[nodeOffset] > end))
{
// diag.error("malformed trie child, nodeOffset=0x%llX out of range\n", nodeOffset);
return NULL;
}
symbol = ss;
break;
}
}
if (nodeOffset != 0)
{
if (nodeOffset > (uint64_t)(end - start))
{
// diag.error("malformed trie child, nodeOffset=0x%llX out of range\n", nodeOffset);
return NULL;
}
for (int i = 0; i < visitedNodeOffsetCount; ++i)
{
if (visitedNodeOffsets[i] == nodeOffset)
{
// diag.error("malformed trie child, cycle to nodeOffset=0x%llX\n", nodeOffset);
return NULL;
}
}
visitedNodeOffsets[visitedNodeOffsetCount++] = (uint32_t)nodeOffset;
p = &start[nodeOffset];
}
else
p = end;
}
return NULL;
}
uint64_t macho_ctx_iterate_exported_symbol(macho_ctx_t *ctx, const char *symbol_name, uint64_t *out_flags)
{
if (ctx->text_seg == NULL || ctx->linkedit_seg == NULL)
{
return 0;
}
struct dyld_info_command *dyld_info_cmd = ctx->dyld_info_cmd;
struct linkedit_data_command *exports_trie_cmd = ctx->exports_trie_cmd;
if (exports_trie_cmd == NULL && dyld_info_cmd == NULL)
return 0;
uint32_t trieFileOffset = dyld_info_cmd ? dyld_info_cmd->export_off : exports_trie_cmd->dataoff;
uint32_t trieFileSize = dyld_info_cmd ? dyld_info_cmd->export_size : exports_trie_cmd->datasize;
void *exports = (void *)(ctx->linkedit_base + trieFileOffset);
if (exports == NULL)
return 0;
uint8_t *exports_start = (uint8_t *)exports;
uint8_t *exports_end = exports_start + trieFileSize;
uint8_t *node = (uint8_t *)tail_walk(exports_start, exports_end, symbol_name);
if (node == NULL)
return 0;
const uint8_t *p = node;
const uint64_t flags = read_uleb128(&p, exports_end);
if (out_flags)
*out_flags = flags;
if (flags & EXPORT_SYMBOL_FLAGS_REEXPORT)
{
const uint64_t ordinal = read_uleb128(&p, exports_end);
const char *importedName = (const char *)p;
if (importedName[0] == '\0')
{
importedName = symbol_name;
return 0;
}
// trick
// printf("reexported symbol: %s\n", importedName);
return (uint64_t)importedName;
}
uint64_t trieValue = read_uleb128(&p, exports_end);
return trieValue;
}
uint64_t macho_ctx_symbol_resolve_options(macho_ctx_t *ctx, const char *symbol_name_pattern,
resolve_symbol_type_t type)
{
if (type & RESOLVE_SYMBOL_TYPE_SYMBOL_TABLE)
{
uint64_t result = macho_ctx_iterate_symbol_table(ctx, symbol_name_pattern);
if (result)
{
result = result + (ctx->is_runtime_mode ? ctx->slide : 0);
return result;
}
}
if (type & RESOLVE_SYMBOL_TYPE_EXPORTED)
{
// binary exported table(uleb128)
uint64_t flags;
uint64_t result = macho_ctx_iterate_exported_symbol(ctx, symbol_name_pattern, &flags);
if (result)
{
switch (flags & EXPORT_SYMBOL_FLAGS_KIND_MASK)
{
case EXPORT_SYMBOL_FLAGS_KIND_REGULAR:
{
result += (uint64_t)ctx->header;
}
break;
case EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL:
{
result += (uint64_t)ctx->header;
}
break;
case EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE:
{
}
break;
default:
break;
}
return result;
}
}
return 0;
}
uintptr_t macho_symbol_resolve_options(mach_header_t *header, const char *symbol_name_pattern,
resolve_symbol_type_t type)
{
macho_ctx_t ctx;
macho_ctx_init(&ctx, header, true);
return macho_ctx_symbol_resolve_options(&ctx, symbol_name_pattern, type);
}
uint64_t macho_symbol_resolve(mach_header_t *header, const char *symbol_name_pattern)
{
return macho_symbol_resolve_options(header, symbol_name_pattern, RESOLVE_SYMBOL_TYPE_ALL);
}
uint64_t find_image_header(const char *image_name)
{
uint32_t count = _dyld_image_count();
uint64_t libxpc_hdr = 0;
for (uint32_t i = 0; i < count; i++)
{
const char *dyld = _dyld_get_image_name(i);
if (strcmp(dyld, image_name) == 0)
{
printf("Found image_name: %s, libxpc_hdr: %p\n", dyld, _dyld_get_image_header(i));
libxpc_hdr = (uint64_t)_dyld_get_image_header(i);
}
}
return libxpc_hdr;
}
int main(void)
{
const char *image_name = "/usr/lib/system/libxpc.dylib";
const char *symbol_name = "_xpc_rich_error_create";
dlopen(image_name, RTLD_NOW);
uint64_t libxpc_hdr = find_image_header(image_name);
uint64_t address = macho_symbol_resolve((struct mach_header_64 *)libxpc_hdr, symbol_name);
printf("_xpc_rich_error_create address: 0x%llx\n", address);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment