Created
September 24, 2018 21:18
-
-
Save alexander-hanel/a266251ad9bf569c2985dcd625b5913a to your computer and use it in GitHub Desktop.
python recursive traversal disassembly using capstone and pefile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import re | |
import pefile | |
import string | |
import struct | |
from capstool import CapsTool | |
from capstone import * | |
from capstone.x86 import * | |
BCC = ["je", "jne", "js", "jns", "jp", "jnp", "jo", "jno", "jl", "jle", "jg", | |
"jge", "jb", "jbe", "ja", "jae", "jcxz", "jecxz", "jrcxz", "loop", "loopne", | |
"loope", "call", "lcall"] | |
END = ["ret", "retn", "retf", "iret", "int3"] | |
BNC = ["jmp", "jmpf", "ljmp"] | |
def get_pe_data(_data): | |
bit = 0 | |
try: | |
pe = pefile.PE(data=_data) | |
pe_entry_point = pe.OPTIONAL_HEADER.AddressOfEntryPoint | |
rva = pe_entry_point - pe.OPTIONAL_HEADER.ImageBase | |
entry_point = pe.get_offset_from_rva(pe_entry_point) | |
except Exception as e: | |
print e | |
return False, None, None | |
if pe.FILE_HEADER.Machine == 0x14c: | |
bit = 32 | |
elif pe.FILE_HEADER.Machine == 0x8664: | |
bit = 64 | |
else: | |
return False, None, None | |
return True, entry_point, bit | |
def to_signed_32(n): | |
n = n & 0xffffffff | |
return (n ^ 0x80000000) - 0x80000000 | |
def to_signed_64(n): | |
n = n & 0xffffffffffffffff | |
return (n ^ 0x8000000000000000) - 0x8000000000000000 | |
def get_op_dist(bit, addr): | |
opp = cs.get_operand_value(addr, 0) | |
# check if operand is a register or some other non-int value | |
if not isinstance(opp, int): | |
return False, None | |
# convert to unsigned int based off of bit | |
elif bit == 32: | |
op_dist = to_signed_32(opp) | |
elif bit == 64: | |
op_dist = to_signed_64(opp) | |
return True, op_dist | |
def get_false_key(addr_bcc): | |
for key in addr_bcc: | |
if addr_bcc[key] is False: | |
return True, key | |
return False, None | |
def disassemble(addr, cs, debug=False): | |
visited = [] | |
addr_bcc = {} | |
strings = {} | |
while True: | |
instr = cs.get_mnem(addr) | |
if debug: | |
print hex(addr), instr , addr_bcc # , [hex(x) for x in visited] | |
if instr is None or cs.dword(addr) == 0x0: | |
status, t_addr = get_false_key(addr_bcc) | |
if status: | |
addr = t_addr | |
continue | |
else: | |
break | |
if addr in addr_bcc: | |
if addr_bcc[addr] is False: | |
addr_bcc[addr] = True | |
else: | |
status, t_addr = get_false_key(addr_bcc) | |
if status: | |
addr = t_addr | |
continue | |
else: | |
break | |
if addr not in visited: | |
visited.append(addr) | |
if instr in BNC: | |
status, op_dist = get_op_dist(bit, addr) | |
if status: | |
addr = addr + op_dist | |
if addr in visited: | |
if addr in addr_bcc: | |
if addr_bcc[addr] is False: | |
addr_bcc[addr] = True | |
else: | |
addr_bcc[addr] = False | |
status, t_addr = get_false_key(addr_bcc) | |
if status: | |
addr = t_addr | |
continue | |
continue | |
elif instr in BCC: | |
if cs.word(addr) != 0x15ff: | |
status, op_dist = get_op_dist(bit, addr) | |
if status: | |
cal_addr = addr + op_dist | |
if cal_addr not in addr_bcc: | |
if cal_addr not in visited: | |
addr_bcc[cal_addr] = False | |
if cs.byte(cal_addr - 1) == 0x00: | |
temp_data = cs.get_many_bytes(addr + 5, op_dist - 6) | |
if temp_data: | |
if all(c in string.printable for c in temp_data): | |
strings[addr] = temp_data | |
status, t_addr = get_false_key(addr_bcc) | |
if status: | |
addr = t_addr | |
continue | |
elif instr in END: | |
status, t_addr = get_false_key(addr_bcc) | |
if status: | |
addr = t_addr | |
continue | |
else: | |
break | |
addr = cs.next_head(addr) | |
return visited, strings | |
with open(sys.argv[1], "rb") as infile: | |
data = infile.read() | |
status, addr, bit = get_pe_data(data) | |
cs = CapsTool("\x00\x00" + data[2:], bit) | |
yy, ss = disassemble(addr, cs) | |
for x in yy: | |
print hex(x), cs.get_disasm(x) | |
print ss |
Wow thank you very much for all the insight Alex I really appricate it. I will review everything you said and also take a look at what you have provided. If you like disassembly/Assembly there is also a framework available called capstone. I highly recommend it.
Sent from Outlook Mobile<https://aka.ms/blhgte>
…________________________________
From: Alexander Hanel <[email protected]>
Sent: Monday, April 20, 2020 9:48:52 AM
To: alexander-hanel <[email protected]>
Cc: ContegoCode <[email protected]>; Mention <[email protected]>
Subject: Re: alexander-hanel/rtd.py
@alexander-hanel commented on this gist.
________________________________
@ContegoCode<https://github.com/ContegoCode>
This code was written for Python2.7. I have tested it but if you change "\x00\x00" from a string to a byte b"\x00\x00" it should work. I think the "\x00\x00" were inserted because I was testing with a portable executable file that I was having issues with or I wanted to test on raw data. I'm not 100% sure what I was thinking or how good this code is.... I probably need to review it. If you are looking for more details on recursive decent for disassembling, I have heard good things about No Starch's Binary Analysis book. I have it on my book shelf but haven't given it a full read but it does have some code https://nostarch.com/binaryanalysis
Cheers,
Alex
—
You are receiving this because you were mentioned.
Reply to this email directly, view it on GitHub<https://gist.github.com/a266251ad9bf569c2985dcd625b5913a#gistcomment-3261875>, or unsubscribe<https://github.com/notifications/unsubscribe-auth/AKWKYRCL3JJLYB5EWD6UDALRNRVGJANCNFSM4MLJQ23A>.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@ContegoCode
This code was written for Python2.7. I have tested it but if you change
"\x00\x00"
from a string to a byteb"\x00\x00"
it should work. I think the"\x00\x00"
were inserted because I was testing with a portable executable file that I was having issues with or I wanted to test on raw data. I'm not 100% sure what I was thinking or how good this code is.... I probably need to review it. If you are looking for more details on recursive decent for disassembling, I have heard good things about No Starch's Binary Analysis book. I have it on my book shelf but haven't given it a full read but it does have some code https://nostarch.com/binaryanalysisCheers,
Alex