Last active
July 6, 2019 16:15
-
-
Save integeruser/5509d0d0e533db0e4c2e488fe8b9f46c to your computer and use it in GitHub Desktop.
Enhance disassembly of the function surrounding the pc of the selected frame
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import collections | |
import random | |
import re | |
import shutil | |
import gdb | |
colors = { | |
'red': '\u001b[31m', | |
'green': '\u001b[32m', | |
'yellow': '\u001b[33m', | |
'blue': '\u001b[34m', | |
'magenta': '\u001b[35m', | |
'cyan': '\u001b[36m', | |
'white': '\u001b[37m', | |
'reset': '\u001b[0m' | |
} | |
columns = shutil.get_terminal_size((80, 20)).columns | |
def colorize(text, color): | |
return '%s%s%s' % (colors[color], text, colors['reset']) | |
def decolorize(text): | |
return re.sub(r'(\u001b\[3.m)|(\u001b\[0m)', '', text) | |
################################################################################################### | |
def parse_disassembly(disassembly): | |
instructions = collections.OrderedDict() | |
for line in disassembly: | |
try: | |
curr, addr, off, instr, info = parse_instruction(line) | |
except AttributeError: | |
continue # not an instruction | |
else: | |
instructions[addr] = curr, addr, off, instr, info | |
return instructions | |
def parse_instruction(line): | |
curr, addr, off, instr, info = re.search( | |
r'(?P<curr>=>)?[ +](?P<addr>0[xX][0-9a-fA-F]+) <\+(?P<off>\d+)>:\s+(?P<instr>.+?(?=\s+(?P<info>#\s+.+?$)|$))', | |
line).groups() | |
curr = True if curr is not None else False | |
addr, off = int(addr, 16), int(off) | |
instr, info = instr.strip(), info.strip() if info else None | |
return curr, addr, off, instr, info | |
################################################################################################### | |
def find_jumps(instructions): | |
jumps = {} | |
for _, from_addr, _, instr, _ in instructions.values(): | |
try: | |
jumps[from_addr] = int( | |
re.search(r'[jmp|je|jne|ja|jb]\s+(?P<to_addr>0[xX][0-9a-fA-F]+)', | |
instr).group('to_addr'), 16) | |
except AttributeError: | |
continue # not a jump | |
return jumps | |
def find_paths(instructions, jumps): | |
paths = [] | |
for from_addr, to_addr in jumps.items(): | |
start_addr = min(from_addr, to_addr) | |
end_addr = max(from_addr, to_addr) | |
steps = end_addr - start_addr | |
paths.append((start_addr, end_addr, steps)) | |
sorted_jumps = sorted(paths, key=lambda t: t[2], reverse=True) | |
paths = {addr: [(' ', 'reset') for _ in range(len(jumps))] for addr in instructions.keys()} | |
for i, (start_addr, end_addr, _) in enumerate(sorted_jumps): | |
color = list(colors.keys())[i % len(colors)] | |
paths[start_addr][i] = ('┏', color) | |
for j in range(i + 1, len(jumps)): | |
if paths[start_addr][j][0] == '┏': | |
break | |
paths[start_addr][j] = ('━', color) | |
for addr in range(start_addr + 1, end_addr): | |
try: | |
paths[addr][i] = ('┃', color) | |
except KeyError: | |
continue | |
paths[end_addr][i] = ('┗', color) | |
for j in range(i + 1, len(jumps)): | |
if paths[end_addr][j][0] == '┗': | |
break | |
paths[end_addr][j] = ('━', color) | |
return paths | |
def find_basic_blocks(instructions, jumps): | |
addresses = list(instructions.keys()) | |
basic_blocks = set() | |
for from_addr, to_addr in jumps.items(): | |
basic_blocks.add(addresses[addresses.index(from_addr) + 1]) | |
basic_blocks.add(to_addr) | |
return basic_blocks | |
################################################################################################### | |
def to_pseudo(instruction, info): | |
pseudo = '' | |
call_match = re.search('call\s+(?P<addr>.+) (<(?P<name>.+)>)?', instruction) | |
if call_match: | |
try: | |
pseudo = 'call ' + call_match.group('name') | |
except IndexError: | |
pseudo = 'call ' + call_match.group('addr') | |
return colorize(pseudo, 'yellow') | |
mov_match = re.search('mov\s+(?P<dst>.+),(?P<src>.+)', instruction) | |
if mov_match: | |
pseudo = mov_match.group('dst') + ' := ' + mov_match.group('src') | |
pseudo = re.sub(r'.WORD PTR \[(.+?)\]', r'[\1]', pseudo) | |
if info: | |
addr = re.search(r'0[xX][0-9a-fA-F]+', info).group(0) | |
pseudo = re.sub(r'rip\+0[xX][0-9a-fA-F]+', addr, pseudo) | |
return pseudo | |
lea_match = re.search(r'lea\s+(?P<dst>.+),\[(?P<src>.+)\]', instruction) | |
if lea_match: | |
pseudo = lea_match.group('dst') + ' := ' + lea_match.group('src') | |
pseudo = re.sub(r'.WORD PTR \[(.+?)\]', r'[\1]', pseudo) | |
if info: | |
addr = re.search(r'0[xX][0-9a-fA-F]+', info).group(0) | |
pseudo = re.sub(r'rip\+0[xX][0-9a-fA-F]+', addr, pseudo) | |
return pseudo | |
add_match = re.search('add\s+(?P<dst>.+),(?P<op>.+)', instruction) | |
if add_match: | |
pseudo = add_match.group('dst') + ' += ' + add_match.group('op') | |
return pseudo | |
sub_match = re.search('sub\s+(?P<dst>.+),(?P<op>.+)', instruction) | |
if sub_match: | |
pseudo = sub_match.group('dst') + ' -= ' + sub_match.group('op') | |
return pseudo | |
and_match = re.search('and\s+(?P<dst>.+),(?P<op>.+)', instruction) | |
if and_match: | |
pseudo = and_match.group('dst') + ' &= ' + and_match.group('op') | |
return pseudo | |
xor_match = re.search('xor\s+(?P<dst>.+),(?P<op>.+)', instruction) | |
if xor_match: | |
pseudo = xor_match.group('dst') + ' ^= ' + xor_match.group('op') | |
return pseudo | |
################################################################################################### | |
def colorize_registers(code): | |
code = re.sub(r'(?P<reg>[re]?ax)', lambda m: colorize(m.group('reg'), 'white'), code) | |
code = re.sub(r'(?P<reg>[re]?bx)', lambda m: colorize(m.group('reg'), 'green'), code) | |
code = re.sub(r'(?P<reg>[re]?cx)', lambda m: colorize(m.group('reg'), 'cyan'), code) | |
code = re.sub(r'(?P<reg>[re]?dx)', lambda m: colorize(m.group('reg'), 'magenta'), code) | |
code = re.sub(r'(?P<reg>[re]di)', lambda m: colorize(m.group('reg'), 'yellow'), code) | |
code = re.sub(r'(?P<reg>[re]si)', lambda m: colorize(m.group('reg'), 'blue'), code) | |
return code | |
################################################################################################### | |
class EnhanceCommand(gdb.Command): | |
'Enhance disassembly of the function surrounding the pc of the selected frame.' | |
def __init__(self): | |
super(EnhanceCommand, self).__init__('enhance', gdb.COMMAND_SUPPORT, gdb.COMPLETE_NONE) | |
def invoke(self, argument, from_tty): | |
try: | |
disassembly = gdb.execute('disassemble %s' % argument, to_string=True).split('\n') | |
except Exception as e: | |
print(e) | |
return | |
instructions = parse_disassembly(disassembly) | |
jumps = find_jumps(instructions) | |
paths = find_paths(instructions, jumps) | |
basic_blocks = find_basic_blocks(instructions, jumps) | |
print(disassembly[0]) # 'Dump of assembler code for function *:' | |
for curr, addr, _, instr, info in instructions.values(): | |
addr_str = str(hex(addr)) | |
if addr in basic_blocks: | |
prefix = '─' * (2 + 1 + len(addr_str) + 1) | |
middle = ''.join( | |
colorize('┃', color) if c == '┃' or c == '┗' else '─' | |
for i, (c, color) in enumerate(paths[addr])) | |
suffix = '─' * (columns - len(prefix) - len(jumps)) | |
print(prefix + middle + suffix) | |
path = ''.join(colorize(c, color) for j, (c, color) in enumerate(paths[addr])) | |
pseudo = to_pseudo(instr, info) | |
code = pseudo if pseudo else instr | |
code = colorize_registers(code) | |
instr = instr if pseudo else '' | |
line = '{curr} {addr} {path} {code: <{width}} '.format( | |
curr='=>' if curr else ' ', | |
addr=addr_str, | |
path=path, | |
code=code, | |
width=35 + len(code) - len(decolorize(code))) | |
if len(decolorize(line)) + len(instr) > columns: | |
line += '\033[92m' + instr[:(columns - len(decolorize(line)) - 1)] + '…' + '\033[0m' | |
else: | |
line += '\033[92m' + instr + '\033[0m' | |
print(line) | |
print(disassembly[-2]) # 'End of assembler dump.' | |
EnhanceCommand() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment