Created
September 24, 2018 21:29
-
-
Save alexander-hanel/7ee68959b80fb9d23acc10a8e583a3cc to your computer and use it in GitHub Desktop.
a simple recursive traversal disassembly using capstone and pefile. Only follows code execution.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import re | |
import pefile | |
import string | |
import struct | |
from capstool import CapsTool | |
from capstone import * | |
from capstone.x86 import * | |
BCC = ["je", "jne", "js", "jns", "jp", "jnp", "jo", "jno", "jl", "jle", "jg", | |
"jge", "jb", "jbe", "ja", "jae", "jcxz", "jecxz", "jrcxz", "loop", "loopne", | |
"loope", "call", "lcall"] | |
END = ["ret", "retn", "retf", "iret", "int3"] | |
BNC = ["jmp", "jmpf", "ljmp"] | |
def get_pe_data(_data): | |
bit = 0 | |
try: | |
pe = pefile.PE(data=_data) | |
pe_entry_point = pe.OPTIONAL_HEADER.AddressOfEntryPoint | |
rva = pe_entry_point - pe.OPTIONAL_HEADER.ImageBase | |
entry_point = pe.get_offset_from_rva(pe_entry_point) | |
except Exception as e: | |
print e | |
return False, None, None | |
if pe.FILE_HEADER.Machine == 0x14c: | |
bit = 32 | |
elif pe.FILE_HEADER.Machine == 0x8664: | |
bit = 64 | |
else: | |
return False, None, None | |
return True, entry_point, bit | |
def to_signed_32(n): | |
n = n & 0xffffffff | |
return (n ^ 0x80000000) - 0x80000000 | |
def to_signed_64(n): | |
n = n & 0xffffffffffffffff | |
return (n ^ 0x8000000000000000) - 0x8000000000000000 | |
def get_op_dist(bit, addr): | |
opp = cs.get_operand_value(addr, 0) | |
# check if operand is a register or some other non-int value | |
if not isinstance(opp, int): | |
return False, None | |
# convert to unsigned int based off of bit | |
elif bit == 32: | |
op_dist = to_signed_32(opp) | |
elif bit == 64: | |
op_dist = to_signed_64(opp) | |
return True, op_dist | |
def get_false_key(addr_bcc): | |
for key in addr_bcc: | |
if addr_bcc[key] is False: | |
return True, key | |
return False, None | |
def disassemble(addr, cs, debug=False): | |
visited = [] | |
addr_bcc = {} | |
strings = {} | |
while True: | |
instr = cs.get_mnem(addr) | |
if debug: | |
print hex(addr), instr , addr_bcc # , [hex(x) for x in visited] | |
if instr is None or cs.dword(addr) == 0x0: | |
status, t_addr = get_false_key(addr_bcc) | |
if status: | |
addr = t_addr | |
continue | |
else: | |
break | |
if addr in addr_bcc: | |
if addr_bcc[addr] is False: | |
addr_bcc[addr] = True | |
else: | |
status, t_addr = get_false_key(addr_bcc) | |
if status: | |
addr = t_addr | |
continue | |
else: | |
break | |
if addr not in visited: | |
visited.append(addr) | |
if instr in BNC: | |
status, op_dist = get_op_dist(bit, addr) | |
if status: | |
addr = addr + op_dist | |
if addr in visited: | |
if addr in addr_bcc: | |
if addr_bcc[addr] is False: | |
addr_bcc[addr] = True | |
else: | |
addr_bcc[addr] = False | |
status, t_addr = get_false_key(addr_bcc) | |
if status: | |
addr = t_addr | |
continue | |
continue | |
elif instr in BCC: | |
if cs.word(addr) != 0x15ff: | |
status, op_dist = get_op_dist(bit, addr) | |
if status: | |
cal_addr = addr + op_dist | |
if cal_addr not in addr_bcc: | |
if cal_addr not in visited: | |
addr_bcc[cal_addr] = False | |
if cs.byte(cal_addr - 1) == 0x00: | |
temp_data = cs.get_many_bytes(addr + 5, op_dist - 6) | |
if temp_data: | |
if all(c in string.printable for c in temp_data): | |
strings[addr] = temp_data | |
status, t_addr = get_false_key(addr_bcc) | |
if status: | |
addr = t_addr | |
continue | |
elif instr in END: | |
status, t_addr = get_false_key(addr_bcc) | |
if status: | |
addr = t_addr | |
continue | |
else: | |
break | |
addr = cs.next_head(addr) | |
return visited, strings | |
with open(sys.argv[1], "rb") as infile: | |
data = infile.read() | |
status, addr, bit = get_pe_data(data) | |
cs = CapsTool("\x00\x00" + data[2:], bit) | |
yy, ss = disassemble(addr, cs) | |
for x in yy: | |
print hex(x), cs.get_disasm(x) | |
print ss |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment