-
-
Save davehull/69fc8aa87b52d5dc5041b46ee492628e to your computer and use it in GitHub Desktop.
Dump some PE file features from memory images.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
''' | |
Dump some PE file features from memory images. | |
author: Willi Ballenthin | |
email: [email protected] | |
website: https://gist.github.com/williballenthin/cbc102d561e2eb647f7aec3c3753ba55 | |
''' | |
import os | |
import sys | |
import hashlib | |
import logging | |
import datetime | |
import contextlib | |
# from pypi:: | |
# | |
# pip install pytz argparse | |
import pytz | |
import argparse | |
# from vivisect:: | |
# | |
# pip install https://github.com/williballenthin/vivisect/zipball/master | |
import PE | |
logger = logging.getLogger(__name__) | |
@contextlib.contextmanager | |
def restoring_offset(f): | |
''' | |
context manager that restores the current offset of the file pointer | |
after some block of operations. | |
Example:: | |
with open('test.bin', 'rb') as f: | |
assert f.tell() == 0x0 | |
with restoring_offset(f): | |
f.seek(0x200) | |
assert f.tell() == 0x200 | |
assert f.tell() == 0x0 | |
''' | |
t = f.tell() | |
try: | |
yield | |
finally: | |
f.seek(t, os.SEEK_SET) | |
class FileView(object): | |
''' | |
Given an open file object, provide read access to a subsection of the file | |
as if it were its own file object. This is a bit like `losetup(8)`, except a | |
file-like object in Python. | |
Example:: | |
with open('logical-process-memory.bin', 'rb') as f: | |
g = FileView(f, 0x401000) | |
assert g.read(0x2) == 'MZ' | |
''' | |
def __init__(self, f, start=0, length=None): | |
super(FileView, self).__init__() | |
self.f = f | |
self.start = start | |
self.f.seek(self.start) | |
if length is None: | |
with restoring_offset(f): | |
f.seek(0, os.SEEK_END) | |
self.length = f.tell() - self.start | |
else: | |
self.length = length | |
def tell(self): | |
return self.f.tell() - self.start | |
def seek(self, offset, whence=os.SEEK_SET): | |
final_offset = 0 | |
if whence == os.SEEK_SET: | |
final_offset = self.start + offset | |
elif whence == os.SEEK_CUR: | |
final_offset = self.f.tell() + offset | |
elif whence == os.SEEK_END: | |
final_offset = self.start + self.length - offset | |
else: | |
raise IOError('unknown seek whence') | |
logger.debug('seek offset: 0x%x whence: 0x%x final offset: 0x%x', | |
offset, whence, final_offset) | |
if final_offset < self.start: | |
raise IOError('cant read offset %d (underrun)' % (final_offset - self.start)) | |
if final_offset > self.start + self.length: | |
raise IOError('cant read offset %d (overrun)' % (final_offset - self.start)) | |
self.f.seek(final_offset) | |
def read(self, length=None): | |
max_length = self.length - self.tell() | |
logger.debug('read length: 0x%x', length or max_length) | |
if length is None: | |
return self.f.read(max_length) | |
else: | |
if max_length < length: | |
return self.f.read(max_length) | |
else: | |
return self.f.read(length) | |
def md5(buf): | |
m = hashlib.md5() | |
m.update(buf) | |
return m.hexdigest() | |
def get_imphash(pe): | |
impstrs = [] | |
exts = ['ocx', 'sys', 'dll'] | |
for (off, libname, funcname) in pe.getImports(): | |
parts = libname.rsplit('.', 1) | |
if len(parts) > 1 and parts[1] in exts: | |
libname = parts[0] | |
impstrs.append('%s.%s' % (libname.lower(), funcname.lower())) | |
return md5(','.join(impstrs).encode()) | |
def guess_is_memory_image(f): | |
''' | |
guess if the provided file is a PE from memory or on disk. | |
it works by exploiting the differing alignment between file | |
sectors (0x200, PE file alignment) and memory pages (0x1000, | |
PE section alignment). on disk, the first section's content | |
typically begins at offset 0x400, while in memory, it usually | |
begins at 0x1000. | |
Example:: | |
with open('kernel32.dll', 'rb') as f: | |
assert guess_is_memory_image(f) == False | |
with open('0x401000.bin', 'rb') as f: | |
assert guess_is_memory_image(f) == True | |
''' | |
with restoring_offset(f): | |
f.seek(0x400) | |
return f.read(0x200) == '\x00' * 0x200 | |
def output_normal_mode(pe, args): | |
ts = datetime.datetime.fromtimestamp(pe.IMAGE_NT_HEADERS.FileHeader.TimeDateStamp, pytz.utc) | |
print('timestamp: ' + ts.isoformat()) | |
print('checksum: ' + hex(pe.IMAGE_NT_HEADERS.OptionalHeader.CheckSum)) | |
if pe.getExportName(): | |
print('export name: ' + pe.getExportName()) | |
print('exports:') | |
for (_, ord_, funcname) in pe.getExports(): | |
print(' %d) %s' % (ord_, funcname)) | |
if not args.no_imports: | |
print('imports:') | |
for (_, libname, funcname) in pe.getImports(): | |
print(' - %s.%s' % (libname, funcname)) | |
print('sections:') | |
for section in pe.getSections(): | |
print(' - ' + section.Name) | |
print(' virtual address: ' + hex(section.VirtualAddress) + '\tsize: ' + hex(section.VirtualSize)) | |
print(' raw address: ' + hex(section.PointerToRawData) + '\tsize: ' + hex(section.SizeOfRawData)) | |
print('imphash: ' + get_imphash(pe)) | |
def output_bulk_mode(pe, args): | |
filename = args.input | |
offset = args.offset | |
export_name = pe.getExportName() or '' | |
ts = datetime.datetime.fromtimestamp(pe.IMAGE_NT_HEADERS.FileHeader.TimeDateStamp, pytz.utc) | |
timestamp = ts.isoformat() | |
checksum = hex(pe.IMAGE_NT_HEADERS.OptionalHeader.CheckSum) | |
imphash = get_imphash(pe) | |
print('{filename}|{offset}|{export_name}|{timestamp}|{checksum}|{imphash}'.format(**locals())) | |
def number(s): | |
if s.startswith('0x'): | |
return int(s, 0x10) | |
else: | |
return int(s) | |
def main(argv=None): | |
if argv is None: | |
argv = sys.argv[1:] | |
parser = argparse.ArgumentParser(description="Dump some PE file features features from memory images.") | |
parser.add_argument("input", type=str, | |
help="Path to input file") | |
parser.add_argument("offset", type=number, | |
help="Offset from which to parse the PE image.") | |
parser.add_argument("-v", "--verbose", action="store_true", | |
help="Enable debug logging") | |
parser.add_argument("-q", "--quiet", action="store_true", | |
help="Disable all output but errors") | |
parser.add_argument("--no-imports", dest='no_imports', action="store_true", | |
help="Don't show imports") | |
parser.add_argument("--bulk-mode", dest='bulk_mode', action="store_true", | |
help="Output in bulk mode (|SV)") | |
args = parser.parse_args() | |
if args.verbose: | |
logging.basicConfig(level=logging.DEBUG) | |
elif args.quiet: | |
logging.basicConfig(level=logging.ERROR) | |
else: | |
logging.basicConfig(level=logging.INFO) | |
logging.debug('offset: 0x%x', args.offset) | |
with open(args.input, 'rb') as f: | |
fv = FileView(f, args.offset) | |
pe = PE.PE(fv, inmem=guess_is_memory_image(fv)) | |
with restoring_offset(fv): | |
if fv.read(0x2) != 'MZ': | |
logger.warning('missing PE header!') | |
if args.bulk_mode: | |
output_bulk_mode(pe, args) | |
else: | |
output_normal_mode(pe, args) | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment