Skip to content

Instantly share code, notes, and snippets.

@aconz2
Created August 7, 2024 20:14
Show Gist options
  • Save aconz2/aef366a7b198b8ac151df147fec323c7 to your computer and use it in GitHub Desktop.
Save aconz2/aef366a7b198b8ac151df147fec323c7 to your computer and use it in GitHub Desktop.

i got nerd sniped by this article wondering how you would find the responsible file isle_opt.rs if you couldn't repro and run with tracing. I didn't get a satisfying result but wanted to save some of the stuff on scripting lldb b/c there aren't too many examples I found to get started. The code is just left off in a raw state with no cleanup.

Mainly my goal was to find a string containing isle_opt.rs from the stack, or reachable in n steps from the stack (see spider_strings. I wasn't using any debuginfo so I just treated every 8 bytes as a potential pointer. I found isle_x64.rs from the stack in this way but could only find isle_opt.rs by searching over all memory regions. Reverse searching from there seems extra hard b/c you don't know what the start of the containing object is.

I even cheated by looking at the source and strace stack trace which showed MacroExpander::full_expand_fragment in common between the core backtrace and the open that opens isle_opt.rs so it should be there somewhere.

git clone https://github.com/bytecodealliance/wasmtime/
cd wasmtime
git checkout 87817f38a128caa76eaa6a3c3c8ceac81a329a3e
RUST_MIN_STACK=1048576 cargo build # segfaults
coredumpctl -1 dump > core
or
SYSTEMD_DEBUGGER=lldb coredumpctl -1 debug
RUST_MIN_STACK=1048576 strace --stack-traces -f --trace='open,openat' cargo build
strings core | grep '/tmp' | grep '\.rs'
# this is too slow!
memory find -s isle 0x0000000000000000 0x0000ffffffffffff
lldb --batch -c core -s x.lldb
from spider_strings
[508] 00007fc3ae3e5220->00007fc3ab21ef00->00007fc3ab912280->00007fc3a41f9500 '/tmp/wasmtime/target/debug/build/cranelift-codegen-dc974f1f3f10409a/out/isle_x64.rs'
from search_string
00007fc3a40bc408 in region [5] 00007fc3a0400000 - 00007fc3a4c00000 b'\x08\x00\x00\x00\x00\x00\x00\x00/tmp/wasmtime/target/debug/build/cranelift-codegen-dc974f1f3f10409a/out/isle_opt.rs \x00\xff\xff\xff\x00t\xee\xb6'
00007fc3a44f8380 in region [5] 00007fc3a0400000 - 00007fc3a4c00000 b'tor_eq\x00\x00/tmp/wasmtime/target/debug/build/cranelift-codegen-dc974f1f3f10409a/out/isle_opt.rs\x00\x00\x00\x00\x00Expe'
command script import x.py
command script add -f x.print_strings_in_frames print_strings_in_frames
command script add -f x.search_string search_string
command script add -f x.search_pointer search_pointer
command script add -f x.spider_strings spider_strings
thread select 1
search_string
import lldb
import struct
import re
def get_addr(s):
if not isinstance(s, str):
s = s.GetValue()
if s is None: return None
if not s.startswith('0x'): return None
return int(s[2:], 16)
def decode_or_none(b):
try:
s = b.decode('ascii')
if b[0] < 32:
return None
return s
except Exception:
return None
def is_str(b):
try:
zb = b.find(b'\0')
except ValueError:
zb = None
if zb is None: return decode_or_none(b)
b = b[:zb]
return decode_or_none(b)
def read_mem(process, addr, n):
error_ref = lldb.SBError()
memory = process.ReadMemory(addr, n, error_ref)
if error_ref.Success():
return memory
#print('err', error_ref)
return None
def memory_regions(process):
regions = process.GetMemoryRegions()
region = lldb.SBMemoryRegionInfo()
n = regions.GetSize()
for i in range(n):
regions.GetMemoryRegionAtIndex(i, region)
yield region
def search_string(debugger, command, result, internal_dict):
process = debugger.GetSelectedTarget().GetProcess()
thread = process.GetSelectedThread()
for i, region in enumerate(memory_regions(process)):
if region.IsWritable():
base = region.GetRegionBase()
end = region.GetRegionEnd()
data = read_mem(process, base, end - base)
if data is None:
continue
for match in re.finditer(b'isle_opt', data):
offset = match.start()
print('{:016x} in region [{}] {:016x} - {:016x} {}'.format(base + offset, i, base, end, data[max(0, offset - 80):offset+20]))
def search_pointer(debugger, command, result, internal_dict):
process = debugger.GetSelectedTarget().GetProcess()
thread = process.GetSelectedThread()
pointers = '00007fc3a40bc408'.split(' ')
for pointer in pointers:
addr = int(pointer, 16)
regions = process.GetMemoryRegions()
region = lldb.SBMemoryRegionInfo()
regions.GetMemoryRegionContainingAddress(addr, region)
print('yo region', region)
for i, frame in enumerate(thread.frames[0:-1]):
regions.GetMemoryRegionContainingAddress(frame.sp, region)
print('frame {} region {}'.format(i, region))
#stack_end = frame.get_parent_frame().sp
#if stack_start <= addr <= stack_end:
# print('yo frame', frame)
#addrb = struct.pack('<Q', addr)
#print(addrb)
# for region in memory_regions(process):
# #if region.IsWritable():
# if True:
# base = region.GetRegionBase()
# end = region.GetRegionEnd()
# data = read_mem(process, base, end - base)
# if data is None:
# continue
# for match in re.finditer(re.escape(addrb), data):
# offset = match.start()
# print('{:016x} in region {:016x} - {:016x} {}'.format(base + offset, base, end, data[max(0, offset - 10):offset+10]))
def trace_strings(process, visited, addr, sizes, history=()):
if len(sizes) == 1:
if addr in visited: return
mem = read_mem(process, addr, sizes[0])
if not mem: return
s = is_str(mem)
if s and s[0] == '/':
visited.add(addr)
yield history + (addr,), s
else:
mem = read_mem(process, addr, sizes[0])
if not mem: return
next_sizes = sizes[1:]
n = ((len(mem) + 1) // 8) * 8
print(len(mem), len(mem) % 8, n)
if len(mem) == 0:
return
for next_addr, in struct.iter_unpack('<Q', mem[:n]):
yield from trace_strings(process, visited, next_addr, next_sizes, history + (addr,))
def print_addr_chain(addrs):
return '->'.join(map('{:016x}'.format, addrs))
def spider_strings(debugger, command, result, internal_dict):
process = debugger.GetSelectedTarget().GetProcess()
thread = process.GetSelectedThread()
visited = set()
#for frame in thread.frames[500:-1]:
frames = thread.frames[508:509] if thread.idx == 1 else thread.frames[:-1]
for frame in frames:
frame_printed = False
# print(frame.sp, hex(frame.sp))
# print(dir(frame))
frame_size = frame.get_parent_frame().sp - frame.sp
if frame_size == 0 or frame_size % 8 != 0:
continue
# mem = read_mem(process, frame.sp, frame_size)
#if not mem: continue
addr_visited = set()
for addrs, s in trace_strings(process, addr_visited, frame.sp, (frame_size, 128*8, 128*8, 128*8, 128*8, 128)):
if len(s) < 3: continue
if not frame_printed:
print(frame)
frame_printed = True
#print(' {:016x} {:016x} {!r}'.format(q, q2, s))
print(' [{}] {} {!r}'.format(frame.idx, print_addr_chain(addrs), s))
# for stack_root, in struct.iter_unpack('<Q', mem):
# mem2 = read_mem(process, stack_root, 256) # treat each value on the stack as a pointer to a struct of potentially 4 strings
# if not mem2: continue
# if stack_root in visited: continue
# visited.add(q)
# for q2, in struct.iter_unpack('<Q', mem2):
# mem3 = read_mem(process, q2, 192)
# if not mem3: continue
# s = is_str(mem3)
# #if s and s.startswith('/'):
# if s:
# if not frame_printed:
# print(frame)
# frame_printed = True
# print(' {:016x} {:016x} {!r}'.format(q, q2, s))
def print_strings_in_frames(debugger, command, result, internal_dict):
process = debugger.GetSelectedTarget().GetProcess()
thread = process.GetSelectedThread()
maxcount = 20
count = 0
for frame in thread.frames[0:-1]:
count += 1
#if count >= maxcount:
# break
print(frame)
# print(frame.sp, hex(frame.sp))
# print(dir(frame))
frame_size = frame.get_parent_frame().sp - frame.sp
if frame_size == 0:
continue
mem = read_mem(process, frame.sp, frame_size)
if mem:
for q, in struct.iter_unpack('<Q', mem):
mem2 = read_mem(process, q, 128)
if mem2:
s = is_str(mem2)
if s:
print(' {:016x} {!r}'.format(q, s))
#gpr = next(iter(frame.GetRegisters()))
#rbp = get_addr(gpr.GetChildMemberWithName('rbp'))
#rsp = get_addr(gpr.GetChildMemberWithName('rsp'))
#print(rbp, rsp, rbp < rsp, rsp - rbp, (rsp - rbp) / 8)
if False:
regs = 'r11 r12 r13 r14 r15 rax rbc rcx rdx rdi rsi rbp rsp'.split(' ')
for r in regs:
rv = gpr.GetChildMemberWithName(r)
vs = rv.GetValue()
if vs is None or not vs.startswith('0x'):
continue
addr = int(vs[2:], 16)
if addr == 0:
continue
memory = process.ReadMemory(addr, 16, error_ref)
if error_ref.Success():
print(memory)
else:
pass
#print(r, hex(addr), str(error_ref))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment