Skip to content

Instantly share code, notes, and snippets.

@DerekSelander
Created November 6, 2018 21:08
Show Gist options
  • Save DerekSelander/1a4dc89e060e3593f676328d91fd5cea to your computer and use it in GitHub Desktop.
Save DerekSelander/1a4dc89e060e3593f676328d91fd5cea to your computer and use it in GitHub Desktop.
A cleaned up and commented file of the dynamically generated code from the LLDB search.py command
//
// LLDB_command_search.mm
// Code Challenge
//
// Created by Derek Selander on 11/6/18.
// Copyright © 2018 Selander. All rights reserved.
//
#import <UIKit/UIKit.h>
#import <objc/runtime.h>
#import <malloc/malloc.h>
#import <mach/mach.h>
#import <mach-o/getsect.h>
/**
This is a cleaner, more commented example of the dynamic code generated by the "search" LLDB
command found here: https://github.com/DerekSelander/LLDB/blob/master/lldb_commands/search.py
This command will enumerate all pointers found in the "heap" and see if they match
a particular type of Objective-C or Swift class
The background for this can be broken down into 3 different steps.
1) malloc and zones
The "heap" is actually broken down to different "zones". These zones are responsible
for returning different memory sizes depending on the amount of memory requested.
There's several APIs that let you grab all the zones that are found in <malloc/malloc.h>
Check out malloc_get_all_zones, malloc_default_zone
The malloc_default_zone will get you the zone that is primarily of interest, which is the default
zone for all your Objc/Swift classes
A zone's structure is given by the struct malloc_zone_t found in <malloc/malloc.h>
Inside this structure is the enumerator callback method which can enumerate all active
pointers in a zone
2) Cautiously massage the pointers
The pointers given by the enumerator could point to anything so we need to see
if they are Objective-C or Swift classes. If not, skip that pointer and go to the
next ptr in the zone
The code will iterate through all pointers in a zone and checks the following:
* Is the pointer at least as big as the expected Objc/Swift class?
* Is the pointer a potential tagged object? i.e. @"a" or @0
* Is the pointers "isa" value point to a valid class known already?
* Does the instance of the class respond to the "-description" method?
* Does the pointers class (determined from the "isa") size match the pointers size?
* Is the class a subclass of the query?
* [optional] Does the class originate from the module provided via "moduleName"
3) Arrange data for output
You don't see it that much in this code, but the search command must take the data
returned by this generated code and format it in a certain way. For example,
swift code likes to hide the pointer of the object's description, so there's the
search -b option, which dumps the raw pointer data and doesn't print out the object's
description. Check out the search python script to see what is happening, especially
the logic with lldb:SBValues
*/
void searchInstances(NSString *instanceName, NSString *moduleName) {
// We pass an instance of this struct into the c++ closure which enumerates all heap pointers
typedef struct _DSSearchContext {
Class query; // The query of the class of interest
CFMutableSetRef classesSet; // All the ObjC classes known at runtime
CFMutableSetRef results; // All the returne ObjC references found that match query
const char * moduleName;
///////////////////////////////////////////////////////////////////////////////
uintptr_t *pointerRef; // Used for search -r, (finding references to ptr)
int *offsets; // Used for search -r, (finding references to ptr)
CFMutableArrayRef ptrRefResults; // Used for search -r, (finding references to ptr)
} DSSearchContext;
// See <malloc/malloc.h>, enumerates all zones, but likely the only interesting stuff is in the main zone
vm_address_t *zones = NULL;
unsigned int count = 0;
unsigned int maxresults = 200;
kern_return_t __unused error = (kern_return_t)malloc_get_all_zones(mach_task_self(), 0, &zones, &count);
DSSearchContext *_ds_context = (DSSearchContext *)calloc(1, sizeof(DSSearchContext));
int classCount = (int)objc_getClassList(NULL, 0);
CFMutableSetRef set = (CFMutableSetRef)CFSetCreateMutable(0, classCount, NULL);
Class *classes = (__unsafe_unretained Class *)malloc(sizeof(Class) * classCount);
objc_getClassList(classes, classCount);
_ds_context->moduleName = [moduleName UTF8String];
// see malloc/malloc.h the full struct has many more options, but only need functionality
// at offset 0x0 (the "enumerator" callback)
typedef struct malloc_introspection_t {
kern_return_t (*enumerator)(task_t task, void *, unsigned type_mask, vm_address_t zone_address, memory_reader_t reader, vm_range_recorder_t recorder);
} malloc_introspection_t;
// LLDB has had trouble @import'ing this struct over different clang iterations
// So we are importing it ourselves....
//
// We don't need a lot of these items, but we want the "introspect" enumerator
// This means we need to pad the struct the appropriate amount to get to the correct
// offset
typedef struct malloc_zone_t {
void *reserved1[9];
const char *zone_name;
void *reserved2[2];
struct malloc_introspection_t *introspect;
unsigned version;
void *reserved3[3];
} malloc_zone_t;
// c++ closures are not a fan of ARC code and we don't want to be malloc'ing
// anything inside the heap enumerator c++ closure
for (int i = 0; i < classCount; i++) {
Class cls = classes[i];
CFSetAddValue(set, (__bridge const void *)(cls));
}
// Setup callback context
_ds_context->results = (CFMutableSetRef)CFSetCreateMutable(0, maxresults, NULL);
_ds_context->ptrRefResults = (CFMutableArrayRef)CFArrayCreateMutable(0, maxresults, NULL);
_ds_context->classesSet = set;
_ds_context->offsets = (int *)calloc(maxresults, sizeof(int));
_ds_context->query = NSClassFromString(instanceName);
for (unsigned i = 0; i < count; i++) {
malloc_zone_t *zone = (malloc_zone_t *)zones[i];
if (zone == NULL || zone->introspect == NULL){
continue;
}
//for each zone, enumerate using our enumerator c++ closure callback
// Opted for using a c++ closure since it can be called inline
//
// For example, we could of just used a c callback func, but with LLDB
// I'd have to generate a "top level" LLDB function and make sure it is valid
zone->introspect->enumerator(0, _ds_context, MALLOC_PTR_IN_USE_RANGE_TYPE, zones[i], 0,
[] (task_t task, void *baton, unsigned type, vm_range_t *ranges, unsigned count) -> void {
// task: Low level mach/XNU stuff, not applicable here since we're inspecting ourselves, not a different process/task
// baton: is the reference to whatever you've passed in (i.e. _ds_context instance)
// type: the type of pointer, for this example we passed in MALLOC_PTR_IN_USE_RANGE_TYPE, active malloc'd ptrs
// ranges: An array which contains the start and size of all the pointers in the zone
// count: The count of the rages array
DSSearchContext *_ds_context = (DSSearchContext *)baton;
CFMutableSetRef classesSet = _ds_context->classesSet;
CFMutableSetRef results = _ds_context->results;
// If it's more than maxCount, don't add anymore since we can't malloc inside this closure
int maxCount = 200;
for (int j = 0; j < count; j++) {
// The amount of results is greater than the allocated amount, and since
// we can't allocate more in this loop while enumerating ptrs in the zone
// we will just exit out of this loop
if (CFSetGetCount(results) >= maxCount) {
break;
}
// This is the address of the object, we don't know what it is yet, so treat it with caution...
vm_address_t potentialObject = ranges[j].address;
// The query is the Objc/Swift classname provided as param1 in the searchInstances function
Class query = _ds_context->query;
// The moduleName is the option module name provided as param2 in the searchInstances function
const char *moduleName = _ds_context->moduleName;
// test 1, is the ptr's malloc size smaller than an instance of the ObjC's class size
size_t querySize = (size_t)class_getInstanceSize(query);
if (ranges[j].size < querySize) {
continue;
}
// ignore tagged pointer stuff
// These tagged pointers don't have an "isa" that means we can't deref them
// Also they are in a non-read region of memory!
// taken from https://github.com/opensource-apple/objc4
if ((0xFFFF800000000000 & potentialObject) != 0) {
continue;
}
// test 2 is a tagged pointer 0x8000000000000000
// https://github.com/opensource-apple/objc4/blob/cd5e62a5597ea7a31dccef089317abb3a661c154/runtime/objc-internal.h
if ((potentialObject & 0x8000000000000000) == 0x8000000000000000) {
continue;
}
// This will deref the pointer, so hopefully it's valid at offset +0x0
Class potentialClass = object_getClass((__bridge id)((void *)potentialObject));
// test 3, does the potential isa match with one of the classes in the classesSet?
if (!(int)CFSetContainsValue(classesSet, (__bridge const void *)(potentialClass))) {
continue;
}
// test 4, does the ptr's size match a classes size?
if ((size_t)malloc_good_size((size_t)class_getInstanceSize(potentialClass)) != ranges[j].size) {
continue;
}
// Starting to get more confident now that it's valid ObjC ptr,
// cast it to an id
id obj = (__bridge id)(void *)potentialObject;
// If it doesn't respond to description, get out of there,
// There's a lot of private classes that don't inherit from NSObject
// that are just plain weird
if (!(BOOL)[obj respondsToSelector:@selector(description)]) {
continue;
}
// Looks like this is an instance of a valid ObjC/Swift class, if we're here check if it's a subclass of query
if ((int)(long long)[[potentialClass class] isSubclassOfClass:query]) {
uintptr_t addr = (uintptr_t)potentialClass;
// check for a valid module (search -m)
if (moduleName) {
unsigned long size = 0;
uintptr_t startAddress = (uintptr_t)::getsectdatafromFramework(moduleName, "__DATA", "__objc_data", &size);
if (startAddress && size) {
if (startAddress > addr || addr > (startAddress + size)) {
continue;
}
} else {
malloc_printf("No framework called %s\n", moduleName);
}
}
CFSetAddValue(results, (__bridge const void *)(obj));
}
}
}); // c++ malloc ptr enumerator closure
} // for loop to iterate all zones
/////////////////////////////////////////////////////////////////////
// Needed in LLDB to have a known struct to iterate all the instances
typedef struct LLDBHeapObjects {
const void **values;
uint32_t count = 0;
int *offsets;
} LLDBHeapObjects;
LLDBHeapObjects lldbheap;
CFIndex index = (CFIndex)CFSetGetCount(_ds_context->results);
lldbheap.values = (const void **)calloc(index, sizeof(id));
CFSetGetValues(_ds_context->results, lldbheap.values);
lldbheap.count = (uint32_t)index;
// The lldbheap would be returned in LLDB, and then post processing of this struct
// would iterate through all the values and print out the instances
/////////////////////////////////////////////////////////////////////
// Instead for this code sample, the references will be printed out
for (int i = 0; i < lldbheap.count; i++) {
id object = (__bridge id)lldbheap.values[i];
printf("%s\n", [[object description] UTF8String] );
}
free(lldbheap.values);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment