Skip to content

Instantly share code, notes, and snippets.

@runapp
Created November 23, 2019 14:10
Show Gist options
  • Save runapp/2fba96f3702daee3c99e2d35a9d485ff to your computer and use it in GitHub Desktop.
Save runapp/2fba96f3702daee3c99e2d35a9d485ff to your computer and use it in GitHub Desktop.
A naive python PE resource parser
import itertools
import struct
unpack = struct.unpack
ts = """CURSOR=1
BITMAP=2
ICON=3
MENU=4
DIALOG=5
STRING=6
FONTDIR=7
FONT=8
ACCELERATOR=9
RCDATA=10
MESSAGETABLE=11
VERSION=16
DLGINCLUDE=17
PLUGPLAY=19
VXD=20
ANICURSOR=21
ANIICON=22
HTML=23
MANIFEST=24""".split()
RESID_NAME_MAP = {}
for t in ts:
t = t.split('=')
RESID_NAME_MAP[int(t[1])] = '#'+t[0]
if t[0] in ['CURSOR', 'ICON']:
RESID_NAME_MAP[int(t[1])+11] = '#GROUP_'+t[0]
del ts
class PE_Header:
def __init__(self, stream):
if stream.read(4) != b'PE\0\0':
raise ValueError("PE Signature invalid")
(self.Machine, self.NumberOfSections, self.TimeDateStamp, self.PointerToSymbolTable,
self.NumberOfSymbols, self.SizeOfOptionalHeader, self.Characteristics) = \
unpack('<HHIIIHH', stream.read(20))
self.raw_OptionalHeader = stream.read(self.SizeOfOptionalHeader)
class PE_Section:
def __init__(self, stream):
self.Name = stream.read(8).rstrip(b'\0').decode('ascii')
(self.VirtualSize, self.VirtualAddress, self.SizeOfRawData,
self.PointerToRawData, self.PointerToRelocations, self.PointerToLinenumbers,
self.NumberOfRelocations, self.NumberOfLinenumbers, self.Characteristics) =\
unpack('<IIIIIIHHI', stream.read(32))
class PEFileParser:
def __init__(self, filename):
self.f = open(filename, 'rb')
self.rsrc_RBase = None
self.pos_stack = []
self.f.seek(0x3c)
self.PE_Offset = unpack('<I', self.read(4))[0]
self.f.seek(self.PE_Offset)
self.PE_Header = PE_Header(self)
self.Sections = []
for i in range(self.PE_Header.NumberOfSections):
self.Sections.append(PE_Section(self))
self.Resource = None
for s in self.Sections:
if s.Name == '.rsrc':
self.rsrc_RBase = s.PointerToRawData
self.rsrc_VBase = s.VirtualAddress
print(hex(s.PointerToRawData))
self.f.seek(s.PointerToRawData)
self.Resource = IMAGE_RESOURCE_DIRECTORY(self)
break
def read(self, size):
r = self.f.read(size)
if len(r) != size:
raise IOError("Error reading {len:d}({len:X}) bytes at {pos:d}({pos:}x)".format(
len=size, pos=self.f.tell()))
return r
def Rseek(self, Roffset):
self.f.seek(self.rsrc_RBase+Roffset)
def VOffset_to_Offset(self, VOffset):
return VOffset-self.rsrc_VBase
def push_pos(self):
self.pos_stack.append(self.f.tell())
def pop_pos(self):
self.f.seek(self.pos_stack.pop())
def abs_pos(self):
return self.f.tell()
class IMAGE_RESOURCE_DIRECTORY_ENTRY:
def __init__(self, stream: PEFileParser, check_name_type=False, expected_ID_name=False, prefix=0):
print("{}Parsing Entry at {:X}".format(' '*prefix, stream.abs_pos()))
self.Offset = stream.abs_pos()
(name, offset) = unpack('<II', stream.read(8))
# Parse Name
self.NameIsID = not bool(name & 0x80000000)
if check_name_type and (self.NameIsID ^ expected_ID_name):
raise TypeError("expected type: {}, actual type: {} (T=ID,F=Str) actual={}".format(
expected_ID_name, self.NameIsID, name))
if self.NameIsID:
self.Name = name
else:
stream.push_pos()
self.NameOffset = name & 0x7fffffff
stream.Rseek(self.NameOffset)
namelen = unpack('<H', stream.read(2))[0]
self.Name = stream.read(2*namelen).decode('utf16')
stream.pop_pos()
# Parse Offset(Data)
self.PayloadIsData = not bool(offset & 0x80000000)
self.PayloadOffset = offset & 0x7fffffff
stream.push_pos()
stream.Rseek(self.PayloadOffset)
if self.PayloadIsData:
self.DataOffset, self.DataSize, self.CodePage, self.IMAGE_RESOURCE_DATA_ENTRY_Reserved = unpack(
'<IIII', stream.read(16))
stream.Rseek(stream.VOffset_to_Offset(self.DataOffset))
self.Data = stream.read(self.DataSize)
else:
self.Payload = IMAGE_RESOURCE_DIRECTORY(stream, prefix=prefix+1)
stream.pop_pos()
def __str__(self):
if self.NameIsID:
n = RESID_NAME_MAP[self.Name] if self.Name in RESID_NAME_MAP else '0x{:X}'.format(self.Name)
else:
n = self.Name
if self.PayloadIsData:
d = self.Data[:20] if self.DataSize > 20 else self.Data
l = self.DataSize
o = self.DataOffset
d = 'data={} ds={:X} len={:X}@{:X}'.format(d, len(d), l, o)
else:
d = '(sub-node)'
return 'Entry name={} {}'.format(n, d)
class IMAGE_RESOURCE_DIRECTORY:
def __init__(self, stream: PEFileParser, prefix=0):
print("{}Parsing Directory at {:X}".format(' '*prefix, stream.abs_pos()))
self.Offset = stream.abs_pos()
(self.Characteristics, self.TimeDateStamp,
self.MajorVersion, self.MinorVersion,
self.NumberOfNamedEntries, self.NumberOfIdEntries) = unpack('<IIHHHH', stream.read(16))
self.NamedEntries = []
for i in range(self.NumberOfNamedEntries):
print("{}Parsing Named Entry #{}".format(' '*prefix, i))
self.NamedEntries.append(IMAGE_RESOURCE_DIRECTORY_ENTRY(stream, True, False, prefix=prefix+1))
self.IdEntries = []
for i in range(self.NumberOfIdEntries):
print("{}Parsing Id Entry #{}".format(' '*prefix, i))
self.IdEntries.append(IMAGE_RESOURCE_DIRECTORY_ENTRY(stream, True, True, prefix=prefix+1))
def __str__(self):
return ('Characteristics: {c:08X} TimeDateStamp: {t:08X} Version: {majv:04X} {minv:04X} ' +
'NumberOfNamedEntries: {nn:5d} NumberOfIdEntries: {ni:5d}').format(
c=self.Characteristics, t=self.TimeDateStamp, majv=self.MajorVersion, minv=self.MinorVersion,
nn=self.NumberOfNamedEntries, ni=self.NumberOfIdEntries
)
a = PEFileParser('c:\\windows\\notepad.exe')
def print_allocs(root: IMAGE_RESOURCE_DIRECTORY, depth=0):
print('{}-{} directory {}'.format(root.Offset, root.Offset+16, depth))
for i in itertools.chain(root.NamedEntries, root.IdEntries):
print('{}-{} entry {}'.format(i.Offset, i.Offset+8, depth))
if not i.NameIsID:
print('{}-{} namestr {}'.format(i.NameOffset, i.NameOffset+2+len(i.Name)*2, depth))
if i.PayloadIsData:
print('{}-{} dataheader {}'.format(i.PayloadOffset, i.PayloadOffset+16, depth))
print('{}-{} data {}'.format(i.DataOffset, i.DataOffset+i.DataSize, depth))
else:
print_allocs(i.Payload, depth=depth+1)
# print_allocs(a)
def print_tree(root: IMAGE_RESOURCE_DIRECTORY, depth=0):
# print('{}{}'.format(' '*depth, str(root)))
for i in itertools.chain(root.NamedEntries, root.IdEntries):
print('{}{} {:X}'.format(' '*depth, str(i), i.Offset))
if not i.PayloadIsData:
print_tree(i.Payload, depth=depth+1)
print_tree(a.Resource)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment