Last active
March 15, 2020 14:56
-
-
Save inaz2/b7048b366a5a5b075ee2 to your computer and use it in GitHub Desktop.
filter script from x86 instructions to C-like expressions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import re | |
lines = [] | |
locs = [] | |
def add_loc(s): | |
def _add_loc(m): | |
locs.append(m.group(1)) | |
return s % m.group(1) | |
return _add_loc | |
def flush(): | |
global lines, locs | |
for line in lines: | |
addr = line.strip().split(':')[0] | |
if addr in locs: | |
print "loc_%s:" % addr | |
print line | |
lines = [] | |
locs = [] | |
for line in sys.stdin: | |
line = line.rstrip() | |
if not line: | |
flush() | |
continue | |
if re.search(r'nop$', line): | |
continue | |
line = re.sub(r'DWORD PTR \[ebp\+0x(\w+)\]', r'prm_\1', line) | |
line = re.sub(r'DWORD PTR \[esp\+0x(\w+)\]', r'arg_\1', line) | |
line = re.sub(r'DWORD PTR \[esp\]', r'arg_0', line) | |
line = re.sub(r'DWORD PTR \[(\w+)\]', r'*\1', line) | |
line = re.sub(r'DWORD PTR \[([^\]]+)\]', r'*(\1)', line) | |
line = re.sub(r'DWORD PTR ds:(\w+)', r'*(\1)', line) | |
line = re.sub(r'BYTE PTR \[(\w+)\]', r'*(char *)\1', line) | |
line = re.sub(r'BYTE PTR \[([^\]]+)\]', r'*(char *)(\1)', line) | |
line = re.sub(r'BYTE PTR ds:(\w+)', r'*(char *)(\1)', line) | |
line = re.sub(r'mov\s+([^,]+),([^,]+)', r'\1 = \2;', line) | |
line = re.sub(r'lea\s+([^,]+),\[([^\]]+)\]', r'\1 = \2;', line) | |
line = re.sub(r'push\s+(.+)', r'push(\1);', line) | |
line = re.sub(r'pop\s+(.+)', r'pop(\1);', line) | |
line = re.sub(r'add\s+([^,]+),([^,]+)', r'\1 += \2;', line) | |
line = re.sub(r'sub\s+([^,]+),([^,]+)', r'\1 -= \2;', line) | |
line = re.sub(r'and\s+([^,]+),([^,]+)', r'\1 &= \2;', line) | |
line = re.sub(r'xor\s+([^,]+),\1', r'\1 = 0;', line) | |
line = re.sub(r'xor\s+([^,]+),([^,]+)', r'\1 ^= \2;', line) | |
line = re.sub(r'sal\s+([^,]+),([^,]+)', r'\1 <<= \2;', line) | |
line = re.sub(r'shl\s+([^,]+),([^,]+)', r'\1 <<<= \2;', line) | |
line = re.sub(r'sar\s+([^,]+),([^,]+)', r'\1 >>= \2;', line) | |
line = re.sub(r'shr\s+([^,]+),([^,]+)', r'\1 >>>= \2;', line) | |
line = re.sub(r'test\s+([^,]+),([^,]+)', r'flag = \1 & \2;', line) | |
line = re.sub(r'cmp\s+([^,]+),([^,]+)', r'flag = \1 - \2;', line) | |
line = re.sub(r'jmp\s+(\w+).*', add_loc("goto loc_%s;"), line) | |
line = re.sub(r'jmp\s+(.+)', r'goto \1;', line) | |
line = re.sub(r'je\s+(\w+).*', add_loc("if (flag == 0) goto loc_%s;"), line) | |
line = re.sub(r'jne\s+(\w+).*', add_loc("if (flag != 0) goto loc_%s;"), line) | |
line = re.sub(r'j[ga]\s+(\w+).*', add_loc("if (flag > 0) goto loc_%s;"), line) | |
line = re.sub(r'j[ga]e\s+(\w+).*', add_loc("if (flag >= 0) goto loc_%s;"), line) | |
line = re.sub(r'j[lb]\s+(\w+).*', add_loc("if (flag < 0) goto loc_%s;"), line) | |
line = re.sub(r'j[lb]e\s+(\w+).*', add_loc("if (flag <= 0) goto loc_%s;"), line) | |
line = re.sub(r'call\s+\w+ <([^>+-]+)>', r'\1();', line) | |
line = re.sub(r'call\s+(\w+) <[^>]+>', r'sub_\1();', line) | |
line = re.sub(r'call\s+([a-z]+).*', r'(*\1)();', line) | |
line = re.sub(r'call\s+(.+)', r'addr = \1; (*addr)();', line) | |
line = re.sub(r'leave$', r'esp = ebp; pop(ebp);', line) | |
line = re.sub(r'ret$', r'return;', line) | |
lines.append(line) | |
flush() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ cat hello.c | |
#include <stdio.h> | |
int main() | |
{ | |
puts("Hello, world!"); | |
return 0; | |
} | |
$ gcc hello.c | |
$ file a.out | |
a.out: ELF 32-bit LSB executable, Intel 80386, version 1 (SYSV), dynamically linked (uses shared libs), for GNU/Linux 2.6.18, not stripped | |
$ objdump -M intel -d a.out | python decom.py | |
a.out: file format elf32-i386 | |
Disassembly of section .init: | |
08048290 <_init>: | |
8048290: 55 push(ebp); | |
8048291: 89 e5 ebp = esp; | |
8048293: 53 push(ebx); | |
8048294: 83 ec 04 esp -= 0x4; | |
8048297: e8 00 00 00 00 sub_804829c(); | |
804829c: 5b pop(ebx); | |
804829d: 81 c3 88 13 00 00 ebx += 0x1388; | |
80482a3: 8b 93 fc ff ff ff edx = *(ebx-0x4); | |
80482a9: 85 d2 flag = edx & edx; | |
80482ab: 74 05 if (flag == 0) goto loc_80482b2; | |
80482ad: e8 1e 00 00 00 __gmon_start__@plt(); | |
loc_80482b2: | |
80482b2: e8 d9 00 00 00 frame_dummy(); | |
80482b7: e8 84 01 00 00 __do_global_ctors_aux(); | |
80482bc: 58 pop(eax); | |
80482bd: 5b pop(ebx); | |
80482be: c9 esp = ebp; pop(ebp); | |
80482bf: c3 return; | |
Disassembly of section .plt: | |
080482c0 <__gmon_start__@plt-0x10>: | |
80482c0: ff 35 28 96 04 08 push(*(0x8049628)); | |
80482c6: ff 25 2c 96 04 08 goto *(0x804962c); | |
80482cc: 00 00 *(char *)eax += al; | |
... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment