Created
November 23, 2019 14:10
-
-
Save runapp/2fba96f3702daee3c99e2d35a9d485ff to your computer and use it in GitHub Desktop.
A naive python PE resource parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
import struct | |
unpack = struct.unpack | |
ts = """CURSOR=1 | |
BITMAP=2 | |
ICON=3 | |
MENU=4 | |
DIALOG=5 | |
STRING=6 | |
FONTDIR=7 | |
FONT=8 | |
ACCELERATOR=9 | |
RCDATA=10 | |
MESSAGETABLE=11 | |
VERSION=16 | |
DLGINCLUDE=17 | |
PLUGPLAY=19 | |
VXD=20 | |
ANICURSOR=21 | |
ANIICON=22 | |
HTML=23 | |
MANIFEST=24""".split() | |
RESID_NAME_MAP = {} | |
for t in ts: | |
t = t.split('=') | |
RESID_NAME_MAP[int(t[1])] = '#'+t[0] | |
if t[0] in ['CURSOR', 'ICON']: | |
RESID_NAME_MAP[int(t[1])+11] = '#GROUP_'+t[0] | |
del ts | |
class PE_Header: | |
def __init__(self, stream): | |
if stream.read(4) != b'PE\0\0': | |
raise ValueError("PE Signature invalid") | |
(self.Machine, self.NumberOfSections, self.TimeDateStamp, self.PointerToSymbolTable, | |
self.NumberOfSymbols, self.SizeOfOptionalHeader, self.Characteristics) = \ | |
unpack('<HHIIIHH', stream.read(20)) | |
self.raw_OptionalHeader = stream.read(self.SizeOfOptionalHeader) | |
class PE_Section: | |
def __init__(self, stream): | |
self.Name = stream.read(8).rstrip(b'\0').decode('ascii') | |
(self.VirtualSize, self.VirtualAddress, self.SizeOfRawData, | |
self.PointerToRawData, self.PointerToRelocations, self.PointerToLinenumbers, | |
self.NumberOfRelocations, self.NumberOfLinenumbers, self.Characteristics) =\ | |
unpack('<IIIIIIHHI', stream.read(32)) | |
class PEFileParser: | |
def __init__(self, filename): | |
self.f = open(filename, 'rb') | |
self.rsrc_RBase = None | |
self.pos_stack = [] | |
self.f.seek(0x3c) | |
self.PE_Offset = unpack('<I', self.read(4))[0] | |
self.f.seek(self.PE_Offset) | |
self.PE_Header = PE_Header(self) | |
self.Sections = [] | |
for i in range(self.PE_Header.NumberOfSections): | |
self.Sections.append(PE_Section(self)) | |
self.Resource = None | |
for s in self.Sections: | |
if s.Name == '.rsrc': | |
self.rsrc_RBase = s.PointerToRawData | |
self.rsrc_VBase = s.VirtualAddress | |
print(hex(s.PointerToRawData)) | |
self.f.seek(s.PointerToRawData) | |
self.Resource = IMAGE_RESOURCE_DIRECTORY(self) | |
break | |
def read(self, size): | |
r = self.f.read(size) | |
if len(r) != size: | |
raise IOError("Error reading {len:d}({len:X}) bytes at {pos:d}({pos:}x)".format( | |
len=size, pos=self.f.tell())) | |
return r | |
def Rseek(self, Roffset): | |
self.f.seek(self.rsrc_RBase+Roffset) | |
def VOffset_to_Offset(self, VOffset): | |
return VOffset-self.rsrc_VBase | |
def push_pos(self): | |
self.pos_stack.append(self.f.tell()) | |
def pop_pos(self): | |
self.f.seek(self.pos_stack.pop()) | |
def abs_pos(self): | |
return self.f.tell() | |
class IMAGE_RESOURCE_DIRECTORY_ENTRY: | |
def __init__(self, stream: PEFileParser, check_name_type=False, expected_ID_name=False, prefix=0): | |
print("{}Parsing Entry at {:X}".format(' '*prefix, stream.abs_pos())) | |
self.Offset = stream.abs_pos() | |
(name, offset) = unpack('<II', stream.read(8)) | |
# Parse Name | |
self.NameIsID = not bool(name & 0x80000000) | |
if check_name_type and (self.NameIsID ^ expected_ID_name): | |
raise TypeError("expected type: {}, actual type: {} (T=ID,F=Str) actual={}".format( | |
expected_ID_name, self.NameIsID, name)) | |
if self.NameIsID: | |
self.Name = name | |
else: | |
stream.push_pos() | |
self.NameOffset = name & 0x7fffffff | |
stream.Rseek(self.NameOffset) | |
namelen = unpack('<H', stream.read(2))[0] | |
self.Name = stream.read(2*namelen).decode('utf16') | |
stream.pop_pos() | |
# Parse Offset(Data) | |
self.PayloadIsData = not bool(offset & 0x80000000) | |
self.PayloadOffset = offset & 0x7fffffff | |
stream.push_pos() | |
stream.Rseek(self.PayloadOffset) | |
if self.PayloadIsData: | |
self.DataOffset, self.DataSize, self.CodePage, self.IMAGE_RESOURCE_DATA_ENTRY_Reserved = unpack( | |
'<IIII', stream.read(16)) | |
stream.Rseek(stream.VOffset_to_Offset(self.DataOffset)) | |
self.Data = stream.read(self.DataSize) | |
else: | |
self.Payload = IMAGE_RESOURCE_DIRECTORY(stream, prefix=prefix+1) | |
stream.pop_pos() | |
def __str__(self): | |
if self.NameIsID: | |
n = RESID_NAME_MAP[self.Name] if self.Name in RESID_NAME_MAP else '0x{:X}'.format(self.Name) | |
else: | |
n = self.Name | |
if self.PayloadIsData: | |
d = self.Data[:20] if self.DataSize > 20 else self.Data | |
l = self.DataSize | |
o = self.DataOffset | |
d = 'data={} ds={:X} len={:X}@{:X}'.format(d, len(d), l, o) | |
else: | |
d = '(sub-node)' | |
return 'Entry name={} {}'.format(n, d) | |
class IMAGE_RESOURCE_DIRECTORY: | |
def __init__(self, stream: PEFileParser, prefix=0): | |
print("{}Parsing Directory at {:X}".format(' '*prefix, stream.abs_pos())) | |
self.Offset = stream.abs_pos() | |
(self.Characteristics, self.TimeDateStamp, | |
self.MajorVersion, self.MinorVersion, | |
self.NumberOfNamedEntries, self.NumberOfIdEntries) = unpack('<IIHHHH', stream.read(16)) | |
self.NamedEntries = [] | |
for i in range(self.NumberOfNamedEntries): | |
print("{}Parsing Named Entry #{}".format(' '*prefix, i)) | |
self.NamedEntries.append(IMAGE_RESOURCE_DIRECTORY_ENTRY(stream, True, False, prefix=prefix+1)) | |
self.IdEntries = [] | |
for i in range(self.NumberOfIdEntries): | |
print("{}Parsing Id Entry #{}".format(' '*prefix, i)) | |
self.IdEntries.append(IMAGE_RESOURCE_DIRECTORY_ENTRY(stream, True, True, prefix=prefix+1)) | |
def __str__(self): | |
return ('Characteristics: {c:08X} TimeDateStamp: {t:08X} Version: {majv:04X} {minv:04X} ' + | |
'NumberOfNamedEntries: {nn:5d} NumberOfIdEntries: {ni:5d}').format( | |
c=self.Characteristics, t=self.TimeDateStamp, majv=self.MajorVersion, minv=self.MinorVersion, | |
nn=self.NumberOfNamedEntries, ni=self.NumberOfIdEntries | |
) | |
a = PEFileParser('c:\\windows\\notepad.exe') | |
def print_allocs(root: IMAGE_RESOURCE_DIRECTORY, depth=0): | |
print('{}-{} directory {}'.format(root.Offset, root.Offset+16, depth)) | |
for i in itertools.chain(root.NamedEntries, root.IdEntries): | |
print('{}-{} entry {}'.format(i.Offset, i.Offset+8, depth)) | |
if not i.NameIsID: | |
print('{}-{} namestr {}'.format(i.NameOffset, i.NameOffset+2+len(i.Name)*2, depth)) | |
if i.PayloadIsData: | |
print('{}-{} dataheader {}'.format(i.PayloadOffset, i.PayloadOffset+16, depth)) | |
print('{}-{} data {}'.format(i.DataOffset, i.DataOffset+i.DataSize, depth)) | |
else: | |
print_allocs(i.Payload, depth=depth+1) | |
# print_allocs(a) | |
def print_tree(root: IMAGE_RESOURCE_DIRECTORY, depth=0): | |
# print('{}{}'.format(' '*depth, str(root))) | |
for i in itertools.chain(root.NamedEntries, root.IdEntries): | |
print('{}{} {:X}'.format(' '*depth, str(i), i.Offset)) | |
if not i.PayloadIsData: | |
print_tree(i.Payload, depth=depth+1) | |
print_tree(a.Resource) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment