Last active
June 12, 2017 16:18
-
-
Save zb3/af8eac04f17ab4623df049e7e4c49d18 to your computer and use it in GitHub Desktop.
Script to display function calls and string constant references in MIPS executables (lui + addiu/ori), disassembled by objdump. Needs objdump and readelf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import subprocess | |
import sys | |
import re | |
import os | |
from collections import defaultdict | |
# | |
# similar tool exists: https://sourceware.org/ml/binutils/2010-07/msg00172.html | |
# but we aim to support string literals... by "interpreting" selected instructions | |
# | |
exe = sys.argv[1] | |
BASE = '/home/zb3/workrouter/opt/toolchains/uclibc-crosstools-gcc-4.4.2-1/usr/bin/mips-linux-uclibc-' | |
if 'CROSS_COMPILE' in os.environ: | |
BASE = os.environ['CROSS_COMPILE'] | |
raw = subprocess.check_output([BASE+'readelf', '-S', '-A', exe]).decode('iso-8859-1') | |
entry = re.compile(r'^\s*[0-9a-f]+\s*(?P<key>[0-9-]*[(]gp[)])\s*(?P<value>[0-9a-f]+)(\s*[0-9a-f]+\s*[0-9a-f]+\s*(?P<type>FUNC|OBJECT)\s*[^\s]*\s*(?P<name>.*))?$', re.MULTILINE) | |
rodata = re.compile(r'^\s*\[..\]\s*\.rodata\s*PROGBITS\s*(?P<addr>[0-9a-f]+)\s*(?P<off>[0-9a-f]+)\s*(?P<size>[0-9a-f]+)', re.MULTILINE) | |
call = re.compile(r'^\s*[0-9a-f]+:*\s*[0-9a-f]+\s*jalr\s*t9$') | |
load_global = re.compile(r'^\s*[0-9a-f]+:*\s*[0-9a-f]+\s*lw\s*(?P<reg>..),(?P<key>[0-9-]+[(]gp[)])$') | |
load_immediate = re.compile(r'^\s*[0-9a-f]+:*\s*[0-9a-f]+\s*(?P<instr>li|lui)\s*(?P<reg>..),(?P<val>(0x)?[0-9a-f-]+)$') | |
move = re.compile(r'^\s*[0-9a-f]+:*\s*[0-9a-f]+\s*move\s*(?P<reg1>..),(?P<reg2>..)$') | |
addiu = re.compile(r'^\s*[0-9a-f]+:*\s*[0-9a-f]+\s*addiu\s*(?P<dreg>..),(?P<reg>..),(?P<val>(0x)?[0-9a-f-]+)$') | |
ori = re.compile(r'^\s*[0-9a-f]+:*\s*[0-9a-f]+\s*ori\s*(?P<dreg>..),(?P<reg>..),(?P<val>(0x)?[0-9a-f-]+)$') | |
rd_info = rodata.search(raw) | |
rd_start = int(rd_info.group('addr'), 16) | |
rd_end = rd_start + int(rd_info.group('size'), 16) | |
rd_delta = int(rd_info.group('off'), 16) - rd_start | |
with open(exe, 'rb') as f: | |
mem = f.read() | |
immediates = {} | |
symbols = {} | |
for m in entry.finditer(raw): | |
val = immediates[m.group('key')] = int(m.group('value'), 16) | |
if m.group('name'): | |
symbols[val] = (m.group('name'), m.group('type') == 'FUNC') | |
disas = subprocess.check_output([BASE+'objdump', '-d', exe]).decode('iso-8859-1').split('\n') | |
print('got disassembly...') | |
vm = defaultdict(int) | |
vm['zero'] = 0 | |
for ln in range(len(disas)): | |
line = disas[ln] | |
m = call.match(line) | |
if m and vm['t9'] in symbols: | |
disas[ln] = line + ' // '+symbols[vm['t9']][0]+'()' | |
creg = None #register that we currently write to | |
#load global, we change it to load immediate | |
m = load_global.match(line) | |
if m and m.group('key') in immediates: | |
creg = m.group('reg') | |
val = vm[creg] = immediates[m.group('key')] | |
if val in symbols: | |
prefix = 'func: ' if symbols[val][1] else 'obj: ' | |
disas[ln] = line + ' // '+ prefix + symbols[val][0] | |
else: | |
disas[ln] = line + ' // '+ hex(val) | |
#load immediates and upper immediates | |
m = move.match(line) | |
if m: | |
creg = m.group('reg1') | |
vm[creg] = vm[m.group('reg2')] | |
#load immediates and upper immediates | |
m = load_immediate.match(line) | |
if m: | |
val = int(m.group('val'), 0) | |
if m.group('instr') == 'lui': | |
val = val << 16 | |
creg = m.group('reg') | |
vm[creg] = val | |
m = addiu.match(line) | |
if m: | |
creg = m.group('dreg') | |
vm[creg] = (2**32 + vm[m.group('reg')] + int(m.group('val'), 0) ) & 0xffffffff | |
m = ori.match(line) | |
if m: | |
creg = m.group('dreg') | |
vm[creg] = (vm[m.group('reg')] | int(m.group('val'), 0) ) & 0xffffffff | |
if creg: | |
addr = vm[creg] | |
off = addr + rd_delta | |
if addr >= rd_start and addr < rd_end - 1 and (addr == rd_start or mem[off-1] == 0): | |
literal = '' | |
while mem[off] > 0: | |
literal += chr(mem[off]) | |
off += 1 | |
disas[ln] = line + ' // STRING '+literal | |
disas = '\n'.join(disas) | |
with open(exe+'.asm', 'w') as f: | |
f.write(disas) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment