Skip to content

Instantly share code, notes, and snippets.

@reanimat0r
Forked from mak/hdoc.py
Created October 28, 2018 21:09
Show Gist options
  • Save reanimat0r/917e22aa07888749f59c29b53377e768 to your computer and use it in GitHub Desktop.
Save reanimat0r/917e22aa07888749f59c29b53377e768 to your computer and use it in GitHub Desktop.
Extract payload from H-docs
#!/usr/bin/env python2
import os
import re
import sys
import math
import pefile
import struct
import hashlib
import argparse
from oletools import olevba
from mlib.bits import chunks
apr = argparse.ArgumentParser(description='Extract payload from H-docs')
apr.add_argument('file',type=str, nargs='?', help='File path')
apr.add_argument('-u' '--url',help='Fetch maldoc frist',action='store_true')
apr.add_argument('-d','--dir',help='Output dir',default='/tmp')
apr.add_argument('-o','--out',help="prefix for filenames used to store decoded chunks",default='')
apr.add_argument('-use_dde','--use_dde',help='This doc is using DDE vuln not stored shellcode',default=False,action='store_true')
args = None
ARP = r'(?P<arr_name>[a-z]+)\((?P<idx_name>[a-z]+)\)\s*=\s*(?P=arr_name)\((?P=idx_name)\)\s*(\+|-)\s*([0-9a-z]+)'
MRP = r'=\s*([a-zA-Z0-9]+)\.([a-zA-Z0-9]+)(\(|\.)?'
MARKER_0 = r'\x81.(....)\x74.\xFF\xC1'
MARKER_1 = r'\x81.(....)\x75.\x81.\x04(....)\x74.\xFF\xC1'
MARKER_2 = r'\x81.(....)\x75.\x8d.\x04\x81.(....)'
MARKER_3 = r'\x81.(....)\x89..\x75.\xb9(.{2,4})\x66...\x04\x74'
MARKER_4 = r'\x81.(....)\x75.\xba(....)\x66...\x04\x74'
MARKERS = [MARKER_0,MARKER_1,MARKER_2,MARKER_3,MARKER_4]
SH_LOOP_0 = r'\x0F\xB6\x01\x48\xFF\xC1\x04(.)\x34(.)'
SH_LOOP_1 = r'\x8a.\x04(.)\xf6.\x01\x75.\x34(.)\xeb\x02\x34(.)'
#SH_LOOP_2 = r'\xf6.\x01\x75\x07\x35(....)\xeb\x05\x35(....)\x89..\x83.\x04\x81.(..\x00\x00)'
SH_LOOP_2 = r'\x75\x07\x35(....)\xeb\x05\x35(....)\xff..\x89..\x83.\x04\x81.(...\x00)'
SH_LOOP_3 = r'\xbe(....)\x56.*\x75\x08\x81.(....)\xeb\x06\x81.(....)'
SH_LOOPS = [SH_LOOP_0,SH_LOOP_1,SH_LOOP_2,SH_LOOP_3]
dexor = lambda d,a,x,m:''.join(chr((((ord(c)-a)^x)+((i%2)if m else 0))&0xff) for i,c in enumerate(d)).decode('base64')
# dexor2 = lambda d,a,x0,x1,r:''.join(chr(((ord(c)+a)^r(x1 if i&1 else x0,i))&0xff) for i,c in enumerate(d)).decode('base64')
get_strings = lambda d: re.findall('[ -~]{3,}',d)
def to_int(d,s):
f=['',ord,lambda x: struct.unpack('H',x)[0],'',lambda x:struct.unpack('I',x)[0]]
return f[s](d)
def to_str(x,s):
f=['',lambda x:chr(x&0xff),lambda x: struct.pack('H',x),'',lambda x:struct.pack('I',x)]
return f[s](x)
dexor2 = lambda d,a,x0,x1,s:get_strings(''.join(to_str(((to_int(c,s)+a)^(x1 if (i)&1 else x0)),s) for i,c in enumerate(chunks(d,s))))[0]
vba_cnts = {
'vbKeyShift':16
}
def write_payload(t,payload,force=False):
if (args and args.out) or force:
h = hashlib.sha256(payload).hexdigest()
fn = args.dir + os.sep
if args.out:
fn += args.out + '.'
fn += h + '.' + t
print '[+] saving %s as %s' % (t,fn)
with open(fn,'w') as f:
f.write(payload)
return fn
def get_var(txt,var):
rgb = lambda a,b,c: (a<<16) + (b<<8) + c
try:
return int(vba_cnts.get(var,var))
except:
txt = txt.replace('RGB(0, 1, 0)','256')
# print r'\n\s*%s\s*=\s*([0-9-+\sLogSqr()/ ]+)'%var
# print var
vdef=re.findall(r'\n\s*%s\s*=\s*([0-9-+\svbKeyShiftLogSqr()/ ]+)'%var,txt)[0].strip()
# print `vdef`
return int(eval(vdef,{},{'Log':math.log,'Sqr':math.sqrt,'vbKeyShift':16}))
def get_code_vars(txt):
xor = add = mod = 0
if 'Xor' in txt:
xor_pos = txt.find('Xor')
x = txt[xor_pos-200:xor_pos+20].splitlines()
x = ( x for x in x if 'Xor' in x).next()
xor = get_var(txt,x.split()[-1])
if ' Mod ' in txt:
mod = True
try:
x=re.findall(ARP,txt,flags=re.I)[0]
add = get_var(txt,x[-1])
except Exception as e:
print `e`
return xor,add,mod
def get_shellcode(filepath):
vba = olevba.VBA_Parser(filepath)
ole = vba.ole_file
macros = {}
for _,_,n,t in vba.extract_macros():
n,_ = os.path.splitext(n)
macros[n] = t
main_m = 'ThisDocument'
main = macros['ThisDocument']
data = None
xor = add = mod = 0
fail = False
for m in macros:
modules = re.findall(MRP,macros[m] )
if modules:
main = macros[m]
main_m = m
break
# print main
# modules = re.findall(MRP,main)
# print len(modules)
for m in macros:
modules = re.findall(MRP,macros[m] )
#print modules
for module,obj,t in modules:
#print module,obj,t
if data and add: break
if t in ['','.']:
for n in ole.listdir(streams=True,storages=False):
if len(n) > 2 and n[1] == module:
data = ole.openstream(n).read()
data = data.split("\x00")
data = filter(lambda x: len(x)>500,data)
if data:
data = data[0]
data_stream = '/'.join(n)
break
elif t == '(' and module in macros:
xor,add,mod = get_code_vars(macros[module])
if not add and not xor and not mod:
xor,add,mod = get_code_vars( main)
shellcode = ''
for i in range(5,0,-1):
## brutforce shits a little bit
try:
shellcode = dexor(data[i:],add,xor,mod)
break
except Exception as e:
print `e`
if not shellcode:
raise Exception('Cant get shellcode!')
return shellcode,data_stream,add,xor
def get_payload_info(shellcode):
for i,rgx in enumerate(MARKERS):
m = re.findall(rgx,shellcode)
if not m: continue
if i == 0:
marker = m[0]
else:
marker = ''.join(list(m[0])).strip("\x00")
break
add = xor0 = xor1 = s = 0
cs = 1
for i,rgx in enumerate(SH_LOOPS):
hit = re.findall(rgx,shellcode,flags=re.S)
if not hit: continue
if i == 0:
add,xor = hit[0]
add = ord(add)
xor0 = ord(xor0)
elif i == 1:
add,xor0,xor1 = hit[0]
xor1 = ord(xor1)
add = ord(add)
xor0 = ord(xor0)
elif i == 2:
xor0,xor1,s = struct.unpack('III',''.join(hit[0]))
cs = 4
elif i == 3:
s,xor0,xor1 = struct.unpack('III',''.join(hit[0]))
cs = 4
return marker,add,xor0,xor1,cs,s
def extract_dde(file):
## stolen from jsoo - https://ghostbin.com/paste/x6hqa
WORD_NAMESPACE = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
PARA = WORD_NAMESPACE + 'p'
TEXT = WORD_NAMESPACE + 'instrText'
import zipfile
from lxml.etree import XML
d = zipfile.ZipFile(file)
xml = XML(d.read('word/document.xml'))
d.close()
paragraphs = []
for paragraph in xml.getiterator(PARA):
texts = ''.join((node.text
for node in paragraph.getiterator(TEXT)
if node.text))
if texts:
paragraphs.append(texts)
return '\n\n'.join(paragraphs)
if __name__ == '__main__':
args = apr.parse_args()
if args.u__url:
import requests
print '[*] getting maldoc from',args.file
UA='Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko'
EMAIL='[email protected]'
for suf in ['',EMAIL,EMAIL.encode('base64').strip() ]:
r =requests.get(args.file + suf,
headers={'User-Agent':UA},
proxies={'http':'socks5://ru.proxy.cert.pl:9050'}
)
if r.ok and r.content.startswith("\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"):
break
if r.ok and r.content.startswith("\x50\x4b\x03\x04") and '/document.xml' in r.content:
args.use_dde = True
break
h = hashlib.sha256(r.content).hexdigest()
args.file = write_payload('h.doc',r.content,force=True)
if args.use_dde:
print extract_dde(args.file)
sys.exit(1)
shellcode,data_stream,add,xor = get_shellcode(args.file)
print '[+] payload in:', data_stream
print '[+] add: %d xor: %d' % (add,xor)
write_payload('sh.bin',shellcode)
marker, add, xor0, xor1,cs, size = get_payload_info(shellcode)
print '[+] Marker',marker
print '[+] sh add: %d xor0: %d xor1: %d' % (add,xor0,xor1)
with open(args.file) as f:
data= f.read()
idx = data.index(marker)+len(marker)+4
if not size:
data= filter(lambda x: len(x) > 300,data[idx:].split("\x00"))[0]
else:
data = data[idx:idx+size]
if xor1:
binary = dexor2(data,add,xor0,xor1,cs)
for i in range(1,10):
try:
x=binary[:-i].decode('base64')
except Exception as e:
continue
break
binary = x
else:
binary = dexor(data,add,xor0,False)
write_payload('h.exe',binary,force=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment