Created
June 1, 2012 07:37
-
-
Save marpie/2849980 to your computer and use it in GitHub Desktop.
PE file format parsing *incomplete*
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" peEvade | |
TEMPLATE Description | |
Author: marpie ([email protected]) | |
Last Update: 20120531 | |
Created: 20120530 | |
""" | |
# Imports | |
import struct | |
import os.path | |
# Version Information | |
__version__ = "0.0.1" | |
__program__ = "peEvade v" + __version__ | |
__author__ = "marpie" | |
__email__ = "[email protected]" | |
__license__ = "BSD License" | |
__copyright__ = "Copyright 2011, a12d404.net" | |
__status__ = "Prototype" # ("Prototype", "Development", "Testing", "Production") | |
#SCRIPT_PATH = os.path.dirname( os.path.realpath( __file__ ) ) | |
POINTER_SIZE = 4 | |
IMAGE_ORDINAL_FLAG_64 = 0x8000000000000000 | |
IMAGE_ORDINAL_FLAG_32 = 0x80000000 | |
IMAGE_DOS_SIGNATURE = 0x5A4D # MZ | |
IMAGE_NT_SIGNATURE = 0x00004550 # PE00 | |
IMAGE_DIRECTORY_ENTRY_EXPORT = 0 | |
IMAGE_DIRECTORY_ENTRY_IMPORT = 1 | |
IMAGE_DIRECTORY_ENTRY_RESOURCE = 2 | |
IMAGE_DIRECTORY_ENTRY_EXCEPTION = 3 | |
IMAGE_DIRECTORY_ENTRY_SECURITY = 4 | |
IMAGE_DIRECTORY_ENTRY_BASERELOC = 5 | |
IMAGE_DIRECTORY_ENTRY_DEBUG = 6 | |
IMAGE_DIRECTORY_ENTRY_COPYRIGHT = 7 | |
IMAGE_DIRECTORY_ENTRY_GLOBALPTR = 8 | |
IMAGE_DIRECTORY_ENTRY_TLS = 9 | |
IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG = 10 | |
IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT = 11 | |
IMAGE_DIRECTORY_ENTRY_IAT = 12 | |
############################################################################# | |
# Helper functions | |
def getString(data): | |
res = "" | |
pos = 0 | |
while ord(data[pos]) <> 0x00: | |
res += data[pos] | |
pos += 1 | |
return res | |
############################################################################# | |
# Windows DEFs | |
class IMAGE_DOS_HEADER(object): | |
def __init__(self, raw_data): | |
self.e_magic, \ | |
self.e_cblp, \ | |
self.e_cp, \ | |
self.e_crlc, \ | |
self.e_cparhdr, \ | |
self.e_minalloc, \ | |
self.e_maxalloc, \ | |
self.e_ss, \ | |
self.e_sp, \ | |
self.e_csum, \ | |
self.e_ip, \ | |
self.e_cs, \ | |
self.e_lfarlc, \ | |
self.e_ovno = struct.unpack('<HHHHHHHHHHHHHH', raw_data[:28]) | |
from_pos = struct.calcsize('<HHHHHHHHHHHHHH')+8 | |
to_pos = from_pos+struct.calcsize('<HH') | |
self.e_oemid, self.e_oeminfo = struct.unpack('<HH', raw_data[from_pos:to_pos]) | |
from_pos = to_pos + 20 | |
to_pos = from_pos + struct.calcsize('<L') | |
self.e_lfanew = struct.unpack('<L', raw_data[from_pos:to_pos])[0] | |
self.__size = to_pos+4 | |
def size(self): | |
return self.__size | |
def valid(self): | |
return self.e_magic == IMAGE_DOS_SIGNATURE | |
class IMAGE_FILE_HEADER(object): | |
HDR_DEF = r'<HHLLLHH' | |
def __init__(self, raw_data): | |
self.__size = struct.calcsize(self.HDR_DEF) | |
self.Machine, \ | |
self.NumberOfSections, \ | |
self.TimeDateStamp, \ | |
self.PointerToSymbolTable, \ | |
self.NumberOfSymbols, \ | |
self.SizeOfOptionalHeader, \ | |
self.Characteristics = struct.unpack(self.HDR_DEF, raw_data[:self.__size]) | |
def size(self): | |
return self.__size | |
class IMAGE_DATA_DIRECTORY(object): | |
SIZE = 8 | |
def __init__(self, raw_data): | |
self.VirtualAddress, self.Size = struct.unpack('<LL', raw_data[:self.SIZE]) | |
def size(self): | |
return self.SIZE | |
class IMAGE_OPTIONAL_HEADER(object): | |
HDR_DEF = r'<HBBLLLLLLLLLHHHHHHLLLLHHLLLLLL' | |
def __init__(self, raw_data): | |
self.__size = struct.calcsize(self.HDR_DEF) | |
self.Magic, \ | |
self.MajorLinkerVersion, \ | |
self.MinorLinkerVersion, \ | |
self.SizeOfCode, \ | |
self.SizeOfInitializedData, \ | |
self.SizeOfUninitializedData, \ | |
self.AddressOfEntryPoint, \ | |
self.BaseOfCode, \ | |
self.BaseOfData, \ | |
self.ImageBase, \ | |
self.SectionAlignment, \ | |
self.FileAlignment, \ | |
self.MajorOperatingSystemVersion, \ | |
self.MinorOperatingSystemVersion, \ | |
self.MajorImageVersion, \ | |
self.MinorImageVersion, \ | |
self.MajorSubsystemVersion, \ | |
self.MinorSubsystemVersion, \ | |
self.Win32VersionValue, \ | |
self.SizeOfImage, \ | |
self.SizeOfHeaders, \ | |
self.CheckSum, \ | |
self.Subsystem, \ | |
self.DllCharacteristics, \ | |
self.SizeOfStackReserve, \ | |
self.SizeOfStackCommit, \ | |
self.SizeOfHeapReserve, \ | |
self.SizeOfHeapCommit, \ | |
self.LoaderFlags, \ | |
self.NumberOfRvaAndSizes = struct.unpack(self.HDR_DEF, raw_data[:self.__size]) | |
self.DataDirectory = [] | |
for i in xrange(0, self.NumberOfRvaAndSizes): | |
self.DataDirectory.append(IMAGE_DATA_DIRECTORY(raw_data[self.__size:])) | |
self.__size += 8 | |
def size(self): | |
return self.__size | |
def getDataDirectory(self, directory_idx): | |
if len(self.DataDirectory) < directory_idx: | |
return None | |
return self.DataDirectory[directory_idx] | |
class IMAGE_NT_HEADERS(object): | |
def __init__(self, raw_data): | |
self.Signature = struct.unpack('<L', raw_data[:4])[0] | |
self.FileHeader = IMAGE_FILE_HEADER(raw_data[4:]) | |
self.OptionalHeader = IMAGE_OPTIONAL_HEADER(raw_data[self.FileHeader.size()+4:]) | |
self.__size = 4+self.FileHeader.size()+self.OptionalHeader.size() | |
def size(self): | |
return self.__size | |
def valid(self): | |
return (self.Signature == IMAGE_NT_SIGNATURE) and \ | |
(self.OptionalHeader.size() == self.FileHeader.SizeOfOptionalHeader) | |
class IMAGE_IMPORT_DESCRIPTOR(object): | |
HDR_DEF = r'<LLLLL' | |
def __init__(self, raw_data): | |
self.__size = struct.calcsize(self.HDR_DEF) | |
self.Characteristics, \ | |
self.TimeDateStamp, \ | |
self.ForwarderChain, \ | |
self.Name, \ | |
self.FirstThunk = struct.unpack(self.HDR_DEF, raw_data[:self.__size]) | |
self.OriginalFirstThunk = self.Characteristics | |
def size(self): | |
return self.__size | |
class IMAGE_IMPORT_BY_NAME(object): | |
def __init__(self, raw_data): | |
self.Hint = struct.unpack('<H', raw_data[:2]) | |
self.Name = getString(raw_data[2:]) | |
class IMAGE_THUNK_DATA(object): | |
def __init__(self, raw_data): | |
self.__size = 4 | |
self.ForwarderString = struct.unpack('<L', raw_data[:self.__size])[0] | |
self.Function = self.ForwarderString | |
self.Ordinal = self.ForwarderString | |
self.AddressOfData = self.ForwarderString | |
class IMAGE_SECTION_HEADER(object): | |
def __init__(self, raw_data, rawIndex): | |
self.Name = getString(raw_data[:8]) | |
self.rawIndex = rawIndex | |
self.__size = struct.calcsize('<LLLLLLHHL')+8 | |
self.PhysicalAddress, \ | |
self.VirtualAddress, \ | |
self.SizeOfRawData, \ | |
self.PointerToRawData, \ | |
self.PointerToRelocations, \ | |
self.PointerToLinenumbers, \ | |
self.NumberOfRelocations, \ | |
self.NumberOfLinenumbers, \ | |
self.Characteristics = struct.unpack('<LLLLLLHHL', raw_data[8:self.__size]) | |
self.VirtualSize = self.PhysicalAddress | |
def size(self): | |
return self.__size | |
############################################################################# | |
# PE file format parsing classes | |
class Lib(object): | |
""" Lib represents one PE-Import-Dictionary-Entry. """ | |
def __init__(self, pe, importDescriptor): | |
self.__pe = pe | |
self.__importDescriptor = importDescriptor | |
self.name = getString(self.__pe.imageByRva(importDescriptor.Name)) | |
self.functions = [] | |
for function in self.__parseFunctions(): | |
self.functions.append(function) | |
def __repr__(self): | |
return self.__str__() | |
def __str__(self): | |
fmtd = "" | |
for func in self.functions: | |
if type(func) == str: | |
fmtd += "\t" + func + "\n" | |
else: | |
fmtd += "\tOrdinal: " + str(func) + "\n" | |
return "Library: " + self.name + "\n" + fmtd | |
def valid(self): | |
return (self.__importDescriptor.Name != 0) and (self.name != "") | |
def __parseFunctions(self): | |
ptrFirstThunk = self.__importDescriptor.FirstThunk | |
ptrThunkRef = self.__importDescriptor.OriginalFirstThunk | |
if ptrThunkRef == 0: | |
ptrThunkRef = ptrFirstThunk | |
while True: | |
thunkRef = IMAGE_THUNK_DATA(self.__pe.imageByRva(ptrThunkRef)) | |
if thunkRef.AddressOfData == 0: | |
break | |
if thunkRef.Ordinal & IMAGE_ORDINAL_FLAG_32: | |
importName = thunkRef.Ordinal & 0xffff | |
else: | |
importName = IMAGE_IMPORT_BY_NAME(self.__pe.imageByRva(thunkRef.AddressOfData)).Name | |
if not importName: | |
break | |
ptrFirstThunk += POINTER_SIZE | |
ptrThunkRef += POINTER_SIZE | |
yield importName | |
class PE(object): | |
""" | |
PE implements all functions needed to parse the PE file format. | |
The implementation is *incomplete* and only written to parse | |
the imports to generate the fasm template file. | |
""" | |
def __init__(self, raw_data): | |
self.pe_image = raw_data | |
try: | |
self.dos_hdr = IMAGE_DOS_HEADER(self.pe_image) | |
self.pe_hdr = IMAGE_NT_HEADERS(self.pe_image[self.dos_hdr.e_lfanew:]) | |
except: | |
raise TypeError | |
if not (self.dos_hdr.valid() and self.pe_hdr.valid()): | |
raise TypeError | |
self.__sections = None | |
self.__imports = None | |
def __iterateDescriptors(self, directory): | |
adr = self.rva2ptr(directory.VirtualAddress) | |
while True: | |
importDescriptor = IMAGE_IMPORT_DESCRIPTOR(self.pe_image[adr:]) | |
if importDescriptor.Name == 0: | |
break | |
lib = Lib(self, importDescriptor) | |
if not lib.valid(): | |
break | |
yield lib | |
adr += importDescriptor.size() | |
def parseSections(self): | |
""" parseSections tries to parse all sections of the PE file. """ | |
if self.__sections != None: | |
return self.__sections | |
self.__sections = [] | |
ptr = self.dos_hdr.e_lfanew + self.pe_hdr.size() | |
for idx in xrange(0, self.pe_hdr.FileHeader.NumberOfSections): | |
section = IMAGE_SECTION_HEADER(self.pe_image[ptr:], ptr) | |
if section.size() > 0: | |
self.__sections.append(section) | |
ptr += section.size() | |
return self.__sections | |
def dumpSection(self, sectionName, fileName): | |
section = None | |
for iSection in self.__sections: | |
if iSection.Name == sectionName: | |
section = iSection | |
if not section: | |
return None | |
with open(fileName, 'w') as f: | |
f.write(self.imageByRva(section.VirtualAddress)[:section.SizeOfRawData]) | |
return True | |
def parseImports(self): | |
""" parseImports builds a list of all static imports. """ | |
if self.__imports != None: | |
return self.__imports | |
self.__imports = [] | |
directory = self.pe_hdr.OptionalHeader.getDataDirectory(IMAGE_DIRECTORY_ENTRY_IMPORT) | |
if not directory: | |
return None | |
if not self.parseSections(): | |
return None | |
if directory.VirtualAddress == 0: | |
return None | |
for lib in self.__iterateDescriptors(directory): | |
self.__imports.append(lib) | |
return self.__imports | |
def getCurrentSectionHeader(self, rva): | |
""" | |
getCurrentSectionHeader returns the section that rva | |
belongs to or None. | |
""" | |
for section in self.__sections: | |
if (rva >= section.VirtualAddress) and \ | |
(rva < (section.VirtualAddress + section.VirtualSize)): | |
return section | |
return None | |
def imageByRva(self, rva): | |
""" | |
imageByRva returns the PE-Image beginning at the file | |
position that rva belongs to. | |
""" | |
ptr = self.rva2ptr(rva) | |
if not ptr: | |
return None | |
return self.pe_image[ptr:] | |
def rva2ptr(self, rva): | |
""" | |
rva2ptr returns the position in the PE-Image (on disk) | |
that rva belongs to. | |
""" | |
sectionHeader = self.getCurrentSectionHeader(rva) | |
if not sectionHeader: | |
return None | |
diff = sectionHeader.VirtualAddress - sectionHeader.PointerToRawData | |
return rva-diff | |
############################################################################# | |
# fasm template | |
FASM_TEMPLATE = r"""format PE GUI 4.0 at 0x00200000 | |
entry start | |
include 'win32a.inc' | |
section '.code' code readable executable | |
code_start: | |
file "code_section.bin" | |
proc start | |
invoke MessageBoxA,0,_beginMsg,_caption,MB_ICONINFORMATION+MB_OK | |
jmp code_start | |
invoke MessageBoxA,0,_doneMsg,_caption,MB_ICONINFORMATION+MB_OK | |
.exit: | |
invoke ExitProcess, 0 | |
.unreachable: | |
jmp .exit | |
%%fake_imports%% | |
endp | |
section '.data' data readable | |
_beginMsg db 'Start',0 | |
_doneMsg db 'Start',0 | |
_caption db 'peEvade TEMPLATE',0 | |
section '.idata' import data readable | |
%%imports%% | |
""" | |
class FasmSection(object): | |
def __init__(self, name, attributes): | |
self.name = name | |
self.attributes = attributes | |
def __str__(self): | |
return "section '" + self.name + "' " + ' '.join(self.attributes) | |
class FasmTemplate(object): | |
def __init__(self): | |
self.__libs = {} | |
self.__sections = [] | |
def addLib(self, lib): | |
lib_name = os.path.splitext(os.path.basename(lib.name))[0].lower() | |
self.__libs[lib_name] = lib | |
def addSection(self, name, attributes): | |
self.__sections.append(FasmSection(name, attributes)) | |
def __repr__(self): | |
return str(self) | |
def __str__(self): | |
fake_imports, imports = self.__prepareImports() | |
template = FASM_TEMPLATE.replace("%%fake_imports%%", fake_imports) | |
template = template.replace("%%imports%%", imports) | |
return template | |
def __prepareImports(self): | |
outStr = "\nlibrary " | |
first = True | |
for lib_name, lib in self.__libs.items(): | |
if not first: | |
outStr += ",\\\n " | |
else: | |
first = False | |
outStr += "%s,'%s'" % (lib_name, lib.name) | |
outStr += "\n\n" | |
fake_imports = "" | |
for lib_name, lib in self.__libs.items(): | |
outStr += "import " + lib_name | |
for func in lib.functions: | |
outStr += ",\\\n " + func + ",'" + func + "'" | |
fake_imports += "\n invoke " + func | |
outStr += "\n\n" | |
return (fake_imports, outStr,) | |
############################################################################# | |
# Main | |
def main(argv): | |
with open(argv[1], 'rb') as f: | |
pe_image = f.read() | |
template = FasmTemplate() | |
pe = PE(pe_image) | |
for lib in pe.parseImports(): | |
template.addLib(lib) | |
pe.dumpSection(".code", "code_section.bin") | |
print(template) | |
return True | |
############################################################################# | |
if __name__ == "__main__": | |
import sys | |
#print( __doc__ ) | |
sys.exit( not main( sys.argv ) ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment