Last active
September 12, 2024 06:51
-
-
Save joevt/da0500cd574f00042f0db61f9af5512f to your computer and use it in GitHub Desktop.
Process DWARF and stabs debug information
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import sys | |
import tempfile | |
import subprocess | |
import re | |
import uuid | |
import pprint | |
#import lldb | |
debug = 0 | |
dbgindent = 0 | |
def dbgprint(thestring): | |
global dbgindent | |
if thestring[0] == '}': | |
if dbgindent > 0: | |
dbgindent = dbgindent - 1 | |
print("%*s%s" % (dbgindent * 4, "", thestring)) | |
if thestring[0] == '{': | |
if dbgindent < 100: | |
dbgindent = dbgindent + 1 | |
else: | |
dbgindent = 48 | |
#========================================================================================= | |
class Hopper(dict): | |
BaseTypes = [ | |
# Base types that exist in Hopper: | |
{"uuid":"054086d7b17b4685971643925db72c00", "name":"void" , "size":None , "preferred":False, "encoding":None }, | |
{"uuid":"054086d7b17b4685971643925db72c01", "name":"int8_t" , "size":1 , "preferred":True , "encoding":"ATE_signed" }, | |
{"uuid":"054086d7b17b4685971643925db72c02", "name":"uint8_t" , "size":1 , "preferred":True , "encoding":"ATE_unsigned" }, | |
{"uuid":"054086d7b17b4685971643925db72c03", "name":"int16_t" , "size":2 , "preferred":True , "encoding":"ATE_signed" }, | |
{"uuid":"054086d7b17b4685971643925db72c04", "name":"uint16_t" , "size":2 , "preferred":True , "encoding":"ATE_unsigned" }, | |
{"uuid":"054086d7b17b4685971643925db72c05", "name":"int32_t" , "size":4 , "preferred":True , "encoding":"ATE_signed" }, | |
{"uuid":"054086d7b17b4685971643925db72c06", "name":"uint32_t" , "size":4 , "preferred":True , "encoding":"ATE_unsigned" }, | |
{"uuid":"054086d7b17b4685971643925db72c07", "name":"int64_t" , "size":8 , "preferred":True , "encoding":"ATE_signed" }, | |
{"uuid":"054086d7b17b4685971643925db72c08", "name":"uint64_t" , "size":8 , "preferred":True , "encoding":"ATE_unsigned" }, | |
{"uuid":"054086d7b17b4685971643925db72c09", "name":"float" , "size":4 , "preferred":True , "encoding":"ATE_float" }, | |
{"uuid":"054086d7b17b4685971643925db72c0a", "name":"double" , "size":8 , "preferred":True , "encoding":"ATE_float" }, | |
{"uuid":"054086d7b17b4685971643925db72c0b", "name":"int" , "size":None , "preferred":False, "encoding":"ATE_signed" }, | |
{"uuid":"054086d7b17b4685971643925db72c0c", "name":"unsigned int" , "size":None , "preferred":False, "encoding":"ATE_unsigned" }, | |
{"uuid":"054086d7b17b4685971643925db72c0d", "name":"long" , "size":8 , "preferred":False, "encoding":"ATE_signed" }, | |
{"uuid":"054086d7b17b4685971643925db72c0e", "name":"unsigned long" , "size":8 , "preferred":False, "encoding":"ATE_unsigned" }, | |
{"uuid":"054086d7b17b4685971643925db72c0f", "name":"long long" , "size":8 , "preferred":False, "encoding":"ATE_signed" }, | |
{"uuid":"054086d7b17b4685971643925db72c10", "name":"unsigned long long" , "size":8 , "preferred":False, "encoding":"ATE_unsigned" }, | |
{"uuid":"054086d7b17b4685971643925db72c11", "name":"char" , "size":1 , "preferred":True , "encoding":"ATE_signed_char" }, | |
{"uuid":"054086d7b17b4685971643925db72c12", "name":"short" , "size":2 , "preferred":False, "encoding":"ATE_signed" }, | |
{"uuid":"054086d7b17b4685971643925db72c13", "name":"unsigned char" , "size":1 , "preferred":True , "encoding":"ATE_unsigned_char"}, | |
{"uuid":"054086d7b17b4685971643925db72c14", "name":"unsigned short" , "size":2 , "preferred":False, "encoding":"ATE_unsigned" }, | |
{"uuid":"054086d7b17b4685971643925db72c15", "name":"bool" , "size":1 , "preferred":True , "encoding":"ATE_boolean" }, | |
# Base types that don't exist in Hopper with substitutes that exist in Hopper: | |
{"uuid":"054086d7b17b4685971643925db72c04", "name":"char16_t" , "size":2 , "preferred":True , "encoding":"ATE_UTF" }, | |
{"uuid":"054086d7b17b4685971643925db72c06", "name":"char32_t" , "size":4 , "preferred":True , "encoding":"ATE_UTF" }, | |
# Base types that don't exist in Hopper: | |
{"uuid":"054086d7b17b4685971643925db72e00", "name":"long double" , "size":16 , "preferred":True , "encoding":"ATE_float" }, | |
] | |
class Type(dict): | |
# Type : 16 byte type uuid, 4 byte len + name, 2 byte type | |
TYPE_pointer = 0x0011 # type uuid | |
TYPE_struct = 0x0012 # 4 byte numfields * { 4 byte len + name, type uuid, byte format, 4 byte len + comment } null | |
TYPE_union = 0x0013 # 4 byte numUnions * { 4 byte len + name, type uuid, byte format, 4 byte null } | |
TYPE_array = 0x0014 # 4 byte count, type uuid | |
TYPE_typedef = 0x0015 # 4 byte len + name, type uuid | |
TYPE_function = 0x001b # flag1 (ff), return type uuid, 2 byte numParams * {4 byte len + name, type uuid, byte format? }, ff=variadic, extra1 (6 null bytes), ff=no return, extra2 (0700=user input, or 0100=header import) | |
TYPE_enumeration = 0x001c # extra1 (00=user enum, 04=built-in enums), 4 byte numEnums * { 4 byte len + name, 8 byte signed value } | |
FORMAT_DEFAULT = 0 | |
FORMAT_HEXADECIMAL = 1 | |
FORMAT_DECIMAL = 2 | |
FORMAT_OCTAL = 3 | |
FORMAT_CHARACTER = 4 | |
FORMAT_STACKVARIABLE = 5 | |
FORMAT_OFFSET = 6 | |
FORMAT_ADDRESS = 7 | |
FORMAT_FLOAT = 8 | |
FORMAT_BINARY = 9 | |
FORMAT_STRUCTURED = 10 | |
FORMAT_ENUM = 11 | |
FORMAT_ADDRESS_DIFF=12 | |
FORMAT_NEGATE = 0x20 | |
FORMAT_LEADINGZEROES = 0x40 | |
FORMAT_SIGNED = 0x80 | |
Types = [] | |
UUIDs = {} | |
def NewUUID(self, DIE): | |
if hasattr(DIE, 'uuid'): | |
print ("Error: uuid already created :%s:" % DIE.GetAddress()) | |
else: | |
if DIE.compile_unit.AT_comp_dir in DIE.compile_unit.AT_name: | |
DIE.uuid = uuid.uuid5(uuid.NAMESPACE_URL, DIE.compile_unit.AT_name + ("%s" % DIE.GetAddress())) | |
else: | |
DIE.uuid = uuid.uuid5(uuid.NAMESPACE_URL, DIE.compile_unit.AT_comp_dir + DIE.compile_unit.AT_name + ("%s" % DIE.GetAddress())) | |
if DIE.uuid in self.UUIDs: | |
print ("Error: uuid collision %s" % DIE.GetAddress()) | |
else: | |
self.UUIDs[DIE.uuid] = DIE | |
def AddType(self, name, typetype, DIE): | |
type = self.Type() | |
type.type = typetype | |
type.DIE = DIE | |
type.name = name | |
type.DIE.type = type | |
self.NewUUID(type.DIE) | |
self.Types.append(type) | |
return type | |
def AddFunctionPointer(self, DIE, name, attype): | |
# Find artifical | |
# ^0x\w+: +TAG_formal_parameter.*\n( +AT_.*\n)* +AT_artificial.*\n( +AT_.*\n)* | |
type = self.AddType(name, self.Type.TYPE_function, DIE) | |
if attype.HasType(): | |
type.returntype = attype.GetType() | |
else: | |
type.returntype = None | |
type.variadic = False | |
type.params = [] | |
for child in attype.children: | |
if child.tag == "TAG_formal_parameter": | |
if child.HasType(): | |
if hasattr(child, "AT_artificial"): | |
if hasattr(child, "AT_name"): | |
type.params.append({"name":child.AT_name, "attype":child.GetType()}) | |
else: | |
type.params.append({"name":"this", "attype":child.GetType()}) | |
elif hasattr(child, "AT_name"): | |
type.params.append({"name":child.AT_name, "attype":child.GetType()}) | |
else: | |
type.params.append({"name":None, "attype":child.GetType()}) | |
else: | |
print("Error: unknown parameter type %s" % child.GetAddress()) | |
elif child.tag == "TAG_unspecified_parameters": | |
type.variadic = True | |
else: | |
print("Error: unknown parameter type %s" % child.GetAddress()) | |
def AddPointerToMember(self, DIE, name, attype): | |
# Find TAG_ptr_to_member_type | |
# ^0x\w+: +TAG_ptr_to_member_type.*\n( +AT_.*\n)* | |
type = self.AddType(name, self.Type.TYPE_struct, DIE) | |
DIEf = DIEDict() | |
DIEf.dSYM = DIE.dSYM | |
DIEf.address = ("%s_1" % DIE.GetAddress()) | |
DIEf.tag = "TAG_pointer_type" | |
DIEf.attype = DIE.attype | |
DIEf.compile_unit = DIE.compile_unit | |
DIEf.dSYM.DIELookup[DIEf.address] = DIEf | |
DIEc = DIEDict() | |
DIEc.dSYM = DIE.dSYM | |
DIEc.address = ("%s_2" % DIE.GetAddress()) | |
DIEc.tag = "TAG_pointer_type" | |
DIEc.attype = DIE.GetContainingType() | |
DIEc.compile_unit = DIE.compile_unit | |
DIEc.dSYM.DIELookup[DIEc.address] = DIEc | |
AddFunctionPointer(DIEf, None, attype) | |
AddPointer(DIEc, None, DIEc.attype) | |
type.fields = [{"name":None, "attype":DIEf}, {"name":None, "attype":DIEc}] | |
def AddPointer(self, DIE, name, attype): | |
type = self.AddType(name, self.Type.TYPE_pointer, DIE) | |
type.attype = attype | |
def AddStruct(self, DIE, name, child): | |
# Find multiple inheritance | |
# 0x\w+: +TAG_inheritance.*\n( +AT_.*\n)*\n0x(\w+): +TAG_inheritance.* | |
# Find bit fields | |
# 0x\w+: +TAG_.*\n( +AT_.*\n)* +AT_\w*bit_offset.*\n( +AT_.*\n)* | |
type = self.AddType(name, self.Type.TYPE_struct, DIE) | |
type.fields = [] | |
def AddClass(self, DIE, name, child): | |
type = self.AddType(name, self.Type.TYPE_struct, DIE) | |
type.fields = [] | |
def AddTypedef(self, DIE, name, attype, atuuid): | |
# attype is None for base type, use uuid instead | |
type = self.AddType(name, self.Type.TYPE_typedef, DIE) | |
type.attype = attype | |
type.atuuid = uuid | |
def AddBaseType(self, DIE): | |
found = False | |
for basetype in self.BaseTypes: | |
#print(basetype) | |
if basetype['name'] == DIE.AT_name and basetype['size'] == DIE.AT_byte_size and basetype['encoding'] == DIE.AT_encoding: | |
found = True | |
DIE.uuid = basetype['uuid'] | |
DIE.baseHopperType = True | |
break | |
if not found: | |
found = False | |
for basetype in self.BaseTypes: | |
if basetype['preferred'] == True and basetype['size'] == DIE.AT_byte_size and basetype['encoding'] == DIE.AT_encoding: | |
found = True | |
self.AddTypedef(DIE, DIE.AT_name, None, basetype['uuid']) | |
break | |
if not found: | |
print("Error: cannot find a base type %s" % DIE.GetAddress()) | |
def AddArray(self, DIE, name, attype): | |
# Find multiple sub ranges examples: | |
# ^0x\w+:([ ]+)TAG_subrange_type.*\n( +AT_.*\n)*\n0x\w+:\1TAG | |
curType = attype | |
for i in range(len(DIE.children) - 1, 0, -1): | |
child = DIE.children[i] | |
if child.tag == "TAG_subrange_type": | |
if hasattr(child, "AT_count"): | |
if i == 0: | |
type = self.AddType(name, self.Type.TYPE_aray, DIE) | |
else: | |
type = self.AddType(None, self.Type.TYPE_aray, child) | |
type.attype = curType | |
type.count = DIE.AT_count | |
else: | |
print ("Error getting count %s" % child.GetAddress()) | |
else: | |
print ("Error getting count %s" % self.GetAddress()) | |
curType = child | |
def AddEnumeration(self, DIE, name): | |
# Find multiple enumerations examples: | |
# ^0x\w+:([ ]+)TAG_enumerator.*\n( +AT_.*\n)*\n0x\w+:\1TAG | |
type = self.AddType(name, self.Type.TYPE_enumeration, DIE) | |
type.size = DIE.AT_byte_size | |
type.enumerations = [] | |
for child in DIE.children: | |
if child.tag == "TAG_enumerator": | |
enumerations.append({"name":child.AT_name, "value":child.AT_const_value}) | |
else: | |
print ("Error getting enumeration %s" % child.GetAddress()) | |
def DumpHex(self): | |
# create types for unknown base types such as "long double" | |
# go through all Types and delete duplicates | |
# go through all pointers, if pointer to hopper base type then replace pointer uuid with base type uuid | |
# pointer with no type void * | |
return | |
#========================================================================================= | |
class lldb(dict): | |
eTypeClassClass = -1 | |
eTypeClassUnion = -2 | |
eTypeClassStruct = -3 | |
#========================================================================================= | |
arrtypeRE = re.compile(r'^(.*?)((?:\[[\]\[\d]*)?)$') | |
#========================================================================================= | |
class DIEDict(dict): | |
def GetOffsetInBytes(self): | |
if hasattr(self, "AT_data_member_location"): | |
if (self.AT_data_member_location.__class__.__name__ != "int"): | |
print("Error in tag %s %s (AT_data_member_location) containing value (%s)" % (self.GetAddress(), self.tag, self.AT_data_member_location)) | |
return 0 | |
return self.AT_data_member_location | |
if hasattr(self, "AT_data_bit_offset"): | |
return self.AT_data_bit_offset >> 3 | |
return None | |
def GetOffsetInBits(self): | |
if hasattr(self, "AT_bit_offset"): | |
return self.AT_bit_offset | |
if hasattr(self, "AT_data_bit_offset"): | |
return self.AT_data_bit_offset & 7 | |
return 0 | |
def GetClass(self): | |
ctype = None | |
if self.tag == "TAG_class_type": | |
ctype = lldb.eTypeClassClass | |
elif self.tag == "TAG_union_type": | |
ctype = lldb.eTypeClassUnion | |
elif self.tag == "TAG_structure_type": | |
ctype = lldb.eTypeClassStruct | |
return ctype | |
def GetNumberOfDirectBaseClasses(self): | |
if not hasattr(self, "DirectBaseClasses"): | |
return 0 | |
return len(self.DirectBaseClasses) | |
def GetDirectBaseClassAtIndex(self, i): | |
return self.DirectBaseClasses[i] | |
def GetNumberOfVirtualBaseClasses(self): | |
if not hasattr(self, "VirtualBaseClasses"): | |
return 0 | |
return len(self.VirtualBaseClasses) | |
def GetVirtualBaseClassAtIndex(self, i): | |
return self.VirtualBaseClasses[i] | |
def GetNumberOfFields(self): | |
if not hasattr(self, "Fields"): | |
return 0 | |
return len(self.Fields) | |
def GetFieldAtIndex(self, i): | |
return self.Fields[i] | |
def HasType(self): | |
if hasattr(self, "AT_type"): | |
return True | |
return False | |
def GetType(self): | |
if self.HasType(): | |
return self.dSYM.DIELookup[self.AT_type] | |
else: | |
print("Error getting type %s" % self.GetAddress()) | |
return None | |
def GetBaseType(self): | |
if self.HasType(): | |
result = self.GetType() | |
if result.tag == "TAG_typedef": | |
return result.GetBaseType() | |
return result | |
else: | |
print("Error getting base type %s" % self.GetAddress()) | |
return None | |
def GetContainingType(self): | |
if hasattr(self, "AT_containing_type"): | |
return self.dSYM.DIELookup[self.AT_containing_type] | |
else: | |
print("Error getting conter type %s" % self.GetAddress()) | |
return None | |
def GetNameForType(self, forType): | |
if (not forType) and hasattr(self, "AT_name"): | |
return self.AT_name | |
elif self.tag == "TAG_structure_type": | |
return "(anonymous struct)" | |
elif self.tag == "TAG_union_type": | |
return "(anonymous union)" | |
elif self.tag == "TAG_class_type": | |
return "(anonymous class)" | |
elif self.tag == "TAG_enumeration_type": | |
return "(anonymous enum)" | |
elif self.tag == "TAG_const_type": | |
if self.HasType(): | |
return "const " + self.GetType().GetName() | |
else: | |
return "const void" | |
elif self.tag == "TAG_volatile_type": | |
if self.HasType(): | |
return "volatile " + self.GetType().GetName() | |
else: | |
return "volatile void" | |
elif self.tag == "TAG_pointer_type": | |
if self.HasType(): | |
result = self.GetType().GetName() | |
if result[-1:] == "*": | |
return result + "*" | |
else: | |
return result + " *" | |
else: | |
return "void *" | |
elif self.tag == "TAG_reference_type": | |
if self.HasType(): | |
return "&" + self.GetType().GetName() | |
else: | |
return "& void" | |
elif self.tag == "TAG_ptr_to_member_type": | |
if self.HasType(): | |
result = self.GetType().GetName() | |
else: | |
print("Error getting type %s" % self.GetAddress()) | |
result = "void" | |
if result[-1:] == "*": | |
return result + "*" | |
else: | |
return result + " *" | |
elif self.tag == "TAG_array_type": | |
counts = self.GetCounts() | |
if self.HasType(): | |
arrtype = self.GetType().GetName() | |
else: | |
print("Error getting type %s" % self.GetAddress()) | |
arrtype = "void" | |
countstr = "" | |
for count in counts: | |
if count == None: | |
countstr += "[]" | |
else: | |
countstr += "[%d]" % count | |
p = arrtypeRE.match(arrtype) | |
if p: | |
# handles array of array | |
return p.group(1) + countstr + p.group(2) | |
else: | |
print("Error parsing type name for array %s %s" % (self.GetAddress(), arrtype)) | |
return arrtype + countstr | |
elif self.tag == "TAG_subroutine_type" or self.tag == "TAG_subprogram": | |
if self.HasType(): | |
returntype = self.GetType().GetName() | |
else: | |
returntype = "void" | |
i = 0 | |
result = returntype + " ()(" | |
if hasattr(self, "children"): | |
for child in self.children: | |
i += 1 | |
if child.HasType(): | |
if hasattr(child, "AT_artificial"): | |
if i < len(self.children): | |
result += "/* " + child.GetType().GetName() + ", */ " | |
else: | |
result += "/* " + child.GetType().GetName() + " */" | |
else: | |
result += child.GetType().GetName() | |
if i < len(self.children): | |
result += ", " | |
elif child.tag == "TAG_unspecified_parameters": | |
result += "..." | |
else: | |
print("Error: unknown parameter type %s" % self.GetAddress()) | |
result += ")" | |
return result | |
elif self.tag == "TAG_member": | |
if self.GetType().GetClass() == lldb.eTypeClassUnion: | |
return "" # unnamed union member | |
elif self.tag == "TAG_typedef": | |
return self.GetType().GetNameForType(forType) | |
else: | |
print("Error getting name for %s %s" % (self.GetAddress(), self.tag)) | |
return "" | |
def GetName(self): | |
return self.GetNameForType(False) | |
def GetByteSizeForAlign0(self, forAlign, class_depth=0): | |
if forAlign and (self.tag == "TAG_class_type" or self.tag == "TAG_structure_type" or self.tag == "TAG_union_type"): | |
max_align = 1 | |
numClasses = self.GetNumberOfDirectBaseClasses() | |
for i in range(numClasses): | |
member = self.GetDirectBaseClassAtIndex(i) | |
m_type = member.GetBaseType() | |
m_align = m_type.GetAlign(class_depth+1) | |
if m_align > max_align: | |
max_align = m_align | |
numFields = self.GetNumberOfFields() | |
for i in range(numFields): | |
member = self.GetFieldAtIndex(i) | |
m_type = member.GetBaseType() | |
m_align = m_type.GetAlign() | |
if m_align > max_align: | |
max_align = m_align | |
if class_depth == 0 and hasattr(self, "AllVirtualBaseClasses"): | |
for virtualbaseclassinfo in self.AllVirtualBaseClasses: | |
member = virtualbaseclassinfo.member | |
m_type = member.GetBaseType() | |
m_align = m_type.GetAlign() | |
if m_align > max_align: | |
max_align = m_align | |
return max_align | |
if hasattr(self, "AT_byte_size"): | |
if self.AT_byte_size == 1 and (self.tag == "TAG_class_type" or self.tag == "TAG_structure_type") and self.GetNumberOfFields() == 0: | |
# classes have size 1 when they don't contain any fields | |
return 0 | |
#print("byte size for %s class_depth:%d forAlign:%d" % (self.GetAddress(), class_depth, forAlign)) | |
return self.AT_byte_size | |
if self.tag == "TAG_const_type": | |
return self.GetType().GetByteSizeForAlign(forAlign) | |
if self.tag == "TAG_volatile_type": | |
return self.GetType().GetByteSizeForAlign(forAlign) | |
if self.tag == "TAG_typedef": | |
return self.GetType().GetByteSizeForAlign(forAlign) | |
if self.tag == "TAG_pointer_type": | |
return self.compile_unit.addr_size | |
if self.tag == "TAG_ptr_to_member_type": | |
return self.compile_unit.addr_size * 2 | |
if self.tag == "TAG_array_type": | |
if forAlign: | |
return self.GetType().GetAlign() | |
size = self.GetType().GetByteSize() | |
counts = self.GetCounts() | |
for count in counts: | |
if count == None: | |
count = 0 | |
size *= count | |
return size | |
if self.HasType(): | |
if forAlign: | |
return self.GetType().GetAlign() | |
return self.GetType().GetByteSize() | |
if hasattr(self, "AT_bit_size") and hasattr(self, "AT_data_bit_offset"): | |
return (self.AT_data_bit_offset & 7 + self.AT_bit_size) >> 3 | |
if self.tag == "TAG_structure_type" and self.GetNumberOfFields() == 0: | |
return 0 | |
print("Error getting byte size for %s %s %s class_depth:%d forAlign:%d" % (self.GetAddress(), self.tag, self.GetName(), class_depth, forAlign)) | |
return 1 | |
def GetByteSizeForAlign(self, forAlign, class_depth=0): | |
if hasattr(self, "GetByteSizeForAlignFlag"): | |
print("Error recursive GetByteSizeForAlign %s" % self.GetAddress()) | |
return 1 | |
self.GetByteSizeForAlignFlag = 1 | |
result = self.GetByteSizeForAlign0(forAlign, class_depth) | |
del self.GetByteSizeForAlignFlag | |
return result | |
def GetByteSize(self): | |
return self.GetByteSizeForAlign(False) | |
def GetCompactSize(self, class_depth=0): | |
# doesn't include virtual classes | |
if self.tag == "TAG_class_type" or self.tag == "TAG_structure_type": | |
if hasattr(self, "GetCompactSizeFlag"): | |
print("Error recursive GetCompactSize %s" % self.GetAddress()) | |
return 1 | |
self.GetCompactSizeFlag = 1 | |
m_offset = None | |
numFields = self.GetNumberOfFields() | |
if numFields > 0: | |
member = self.GetFieldAtIndex(numFields-1) | |
if hasattr(self, "AT_bit_size") and hasattr(self, "AT_data_bit_offset"): | |
m_offset = (member.AT_data_bit_offset + member.AT_bit_size + 7) >> 3 | |
else: | |
m_offset = member.GetOffsetInBytes() + member.GetByteSize() | |
if m_offset == None: | |
numClasses = self.GetNumberOfDirectBaseClasses() | |
if numClasses > 0: | |
member = self.GetDirectBaseClassAtIndex(numClasses - 1) | |
m_type = member.GetBaseType() | |
m_offset = member.GetOffsetInBytes() + member.GetCompactSize(class_depth + 1) | |
if m_offset == None: | |
m_offset = 0 | |
del self.GetCompactSizeFlag | |
return m_offset | |
return self.GetByteSize() | |
def GetAlign(self, class_depth=0): | |
m_size = self.GetByteSizeForAlign(True, class_depth) | |
if m_size >= 8: | |
m_align = 8 | |
elif m_size >= 4: | |
m_align = 4 | |
elif m_size >= 2: | |
m_align = 2 | |
else: | |
m_align = 1 | |
return m_align | |
def GetCounts(self): | |
counts = [] | |
if hasattr(self, "children"): | |
for child in self.children: | |
if child.tag == "TAG_subrange_type": | |
if hasattr(child, "AT_count"): | |
counts.append(child.AT_count) | |
elif hasattr(child, "AT_upper_bound"): | |
if hasattr(child, "AT_lower_bound"): | |
counts.append(child.AT_upper_bound + 1 - child.AT_lower_bound) | |
else: | |
counts.append(child.AT_upper_bound + 1) | |
else: | |
counts.append(None) | |
else: | |
print ("Error getting count %s" % self.GetAddress()) | |
return counts | |
def GetBitSize(self): | |
if hasattr(self, "AT_bit_size"): | |
return self.AT_bit_size | |
else: | |
return self.GetByteSize() * 8 | |
def UpdateBaseClassLists(self): | |
if hasattr(self, "AT_virtuality"): | |
if debug: dbgprint("UpdateBaseClassLists virtual base class %s->:%08x: added to %s %s" % (self.GetAddress(), self.AT_type, self.parent.GetAddress(), self.parent.GetName())) | |
if not hasattr(self.parent, "VirtualBaseClasses"): | |
self.parent.VirtualBaseClasses = [] | |
self.parent.VirtualBaseClasses.append(self) | |
else: | |
if debug: dbgprint("UpdateBaseClassLists direct base class %s" % self.parent.GetAddress()) | |
if not hasattr(self.parent, "DirectBaseClasses"): | |
self.parent.DirectBaseClasses = [] | |
self.parent.DirectBaseClasses.append(self) | |
def CheckName(self): | |
if self.AT_name == " ": | |
self.isnameblank = 1 | |
elif self.AT_name == "": | |
self.isnameblank = 1 | |
else: | |
return | |
#print("Error: DIE (%s) blank name (%s)" % (self.GetAddress(), self.AT_name)) | |
del self.AT_name | |
def SetName(self, AT_name): | |
if hasattr(self, "AT_name") and self.AT_name != AT_name: | |
print("Error: DIE (%s) name (%s) changed to (%s)" % (self.GetAddress(), self.AT_name, AT_name)) | |
self.AT_name = AT_name | |
self.CheckName() | |
def SetNameWithClass(self, nameLong): | |
if hasattr(self, "nameLong") and self.nameLong != nameLong: | |
print("Error: DIE (%s) name (%s) changed to (%s)" % (self.GetAddress(), self.nameLong, nameLong)) | |
self.nameLong = nameLong | |
def SetType(self, AT_type): | |
if hasattr(self, "AT_type") and self.AT_type != AT_type: | |
print("Error: DIE (%s) type (%s) changed to (%s)" % (self.GetAddress(), self.AT_type, AT_type)) | |
self.AT_type = AT_type | |
def SetSize(self, AT_byte_size): | |
if hasattr(self, "AT_bit_size"): | |
print("Error: DIE (%s) size (%d bits) changed to (%d)" % (self.GetAddress(), self.AT_bit_size, AT_byte_size)) | |
del self.AT_bit_size | |
elif (hasattr(self, "AT_byte_size") and self.AT_byte_size != AT_byte_size): | |
print("Error: DIE (%s) size (%d) changed to (%d)" % (self.GetAddress(), self.AT_byte_size, AT_byte_size)) | |
self.AT_byte_size = AT_byte_size | |
def SetTag(self, tag): | |
if hasattr(self, "tag"): | |
if self.tag != tag: | |
if hasattr(self, "firsttag"): | |
del self.firsttag | |
else: | |
print("Error: DIE (%s) tag (%s) changed to (%s)" % (self.GetAddress(), self.tag, tag)) | |
self.tag = tag | |
def Settypenumber(self, typenumber): | |
self.typenumber = typenumber | |
self.dSYM.DIELookup[typenumber] = self | |
if debug: dbgprint("Added to DIELookup «%s»" % (self.dSYM.DIELookup[typenumber].typenumber)) | |
def AppendChild(self, DIE): | |
if not hasattr(self, "children"): | |
self.children = [] | |
self.children.append(DIE) | |
def GetAddress(self): | |
if self.dSYM.sourceType == "stabs": | |
if hasattr(self, "typenumber"): | |
return ("[%d] «%s»" % (self.address, self.typenumber)) | |
return ("[%d]" % (self.address)) | |
else: | |
return (":%08x:" % (self.address)) | |
#========================================================================================= | |
# make VTables | |
class VTableInfo(dict): | |
pass | |
class VTableItem(dict): | |
pass | |
class DerivationItem(dict): | |
pass | |
class VirtualBaseClassInfo(dict): | |
pass | |
def CheckVTables(msg, parent): | |
# for every class or structure, make a list of vtables (more than one vtable exists for multiple inheritance) | |
if hasattr(parent, "children"): | |
for child in parent.children: | |
numClasses = child.GetNumberOfVirtualBaseClasses() | |
for i in range(numClasses): | |
member = child.GetVirtualBaseClassAtIndex(i) | |
if debug: dbgprint("%d %s Got virtual base class %s %s" % (i, child.GetAddress(), member.GetAddress(), msg)) | |
m_type = member.GetBaseType() | |
CheckVTables(msg, child) | |
def dumpderivationpath(derivationPath): | |
s = "" | |
for derivationItem in derivationPath: | |
baseClass = derivationItem.DIE | |
if len(s) > 0: | |
s += "," | |
if derivationItem.isvirtual: | |
v = "virtual " | |
else: | |
v = "" | |
s += "%d:%s%s" % (derivationItem.index, v, baseClass.GetName()) | |
return s | |
def MakeVTables(derivationPath, begin_offset=0): | |
prefix = "%*s" % (4 * len(derivationPath), "") | |
thefirst = derivationPath[0].DIE | |
thelast = derivationPath[-1].DIE | |
if debug: print("%sMakeVTables offset:%d first:%s last:%s" % (prefix, begin_offset, thefirst.GetAddress(), thelast.GetAddress())) | |
# Does this class contain a vPtr? | |
numFields = thelast.GetNumberOfFields() | |
vPtrOffset = None | |
for i in range(numFields): | |
member = thelast.GetFieldAtIndex(i) | |
thename = member.GetName() | |
if thename != None: | |
#print("%smember:%s" % (prefix, thename)) | |
if "vptr" in thename and hasattr(member, "AT_artificial"): | |
vPtrOffset = member.GetOffsetInBytes() + begin_offset | |
if debug: print("%sfound vptr (%s) classoffset:0x%x vptroffset:0x%x" % (prefix, dumpderivationpath(derivationPath), begin_offset, vPtrOffset)) | |
break | |
if vPtrOffset == None: | |
# No vPtr exists, follow base classes | |
numClasses = thelast.GetNumberOfDirectBaseClasses() | |
for i in range(numClasses): | |
member = thelast.GetDirectBaseClassAtIndex(i) | |
m_offset = member.GetOffsetInBytes() + begin_offset | |
m_type = member.GetBaseType() | |
derivationItem = DerivationItem() | |
derivationItem.DIE = m_type | |
derivationItem.index = i | |
derivationItem.isvirtual = False | |
derivationItem.VTables = thefirst.VTables | |
derivationItem.derivationPathText = derivationPath[-1].derivationPathText + "%02d" % (i + 1) | |
derivationPath.append(derivationItem) | |
if debug: print("%s[ derived (%s) classoffset:0x%x baseclassoffset:0x%x" % (prefix, dumpderivationpath(derivationPath), begin_offset, m_offset)) | |
MakeVTables(derivationPath, m_offset) | |
if debug: print("%s] derived (%s) classoffset:0x%x baseclassoffset:0x%x" % (prefix, dumpderivationpath(derivationPath), begin_offset, m_offset)) | |
derivationPath.pop() | |
numClasses = thelast.GetNumberOfVirtualBaseClasses() | |
for i in range(numClasses): | |
member = thelast.GetVirtualBaseClassAtIndex(i) | |
m_type = member.GetBaseType() | |
if debug: print("%s%d/%d Checking virtual base class %s type:%s:%08x: numv:%d thefirst:%s" % (prefix, i, numClasses, member.GetAddress(), m_type.GetAddress(), m_type.address, len(thefirst.AllVirtualBaseClasses), thefirst.GetAddress())) | |
if m_type.GetAddress() in thefirst.IncludedVirtualBaseClasses: | |
virtualbaseclassinfo = thefirst.IncludedVirtualBaseClasses[m_type.GetAddress()] | |
m_offset = virtualbaseclassinfo.offset | |
derivationItem = DerivationItem() | |
derivationItem.DIE = m_type | |
derivationItem.index = 0 | |
derivationItem.isvirtual = True | |
derivationItem.VTables = thefirst.VTablesVirtual | |
derivationItem.derivationPathText = derivationPath[-1].derivationPathText + "%02d" % (i + 31) | |
derivationPath.append(derivationItem) | |
if debug: print("%s[ virtual 2nd derived (%s) classoffset:0x%x baseclassoffset:0x%x member%s type%s" % (prefix, dumpderivationpath(derivationPath), begin_offset, m_offset, member.GetAddress(), m_type.GetAddress())) | |
MakeVTables(derivationPath, m_offset) | |
if debug: print("%s] virtual 2nd derived (%s) classoffset:0x%x baseclassoffset:0x%x member%s type%s" % (prefix, dumpderivationpath(derivationPath), begin_offset, m_offset, member.GetAddress(), m_type.GetAddress())) | |
derivationPath.pop() | |
else: | |
m_offset = thefirst.VirtualBaseClassOffset | |
m_align = m_type.GetAlign() | |
m_offset = ((m_offset + m_align-1) & -m_align) | |
thefirst.VirtualBaseClassOffset += m_type.GetByteSize() | |
derivationItem = DerivationItem() | |
derivationItem.DIE = m_type | |
derivationItem.index = 0 | |
derivationItem.isvirtual = True | |
derivationItem.VTables = thefirst.VTablesVirtual | |
derivationItem.derivationPathText = derivationPath[-1].derivationPathText + "%02d" % (i + 61) | |
virtualbaseclassinfo = VirtualBaseClassInfo() | |
virtualbaseclassinfo.member = member | |
virtualbaseclassinfo.offset = m_offset | |
thefirst.AllVirtualBaseClasses.append(virtualbaseclassinfo) | |
if debug: print("%sAdded %s to %s.IncludedVirtualBaseClasses" % (prefix, m_type.GetAddress(), thefirst.GetAddress())) | |
thefirst.IncludedVirtualBaseClasses[m_type.GetAddress()] = virtualbaseclassinfo | |
derivationPath.append(derivationItem) | |
if debug: print("%s[ virtual 1st derived (%s) m_type.address:%08x: classoffset:0x%x baseclassoffset:0x%x member%s type%s" % (prefix, dumpderivationpath(derivationPath), m_type.address, begin_offset, m_offset, member.GetAddress(), m_type.GetAddress())) | |
MakeVTables(derivationPath, m_offset) | |
if debug: print("%s] virtual 1st derived (%s) m_type.address:%08x: classoffset:0x%x baseclassoffset:0x%x member%s type%s" % (prefix, dumpderivationpath(derivationPath), m_type.address, begin_offset, m_offset, member.GetAddress(), m_type.GetAddress())) | |
derivationPath.pop() | |
if vPtrOffset != None: | |
# A vPtr exists, make a vtable for it. | |
if vPtrOffset in thefirst.VTablesByOffset: | |
vtableinfo = thefirst.VTablesByOffset[vPtrOffset] | |
vmax = vtableinfo.vmax | |
else: | |
vtableinfo = VTableInfo() | |
vtableinfo.vPtrOffset = vPtrOffset | |
vtableinfo.mergedVTableEntries = {} | |
vtableinfo.derivationPathText = None | |
derivationPath[-1].VTables.append(vtableinfo) | |
thefirst.VTablesByOffset[vPtrOffset] = vtableinfo | |
vmax = -1 | |
mergedVTableEntries = vtableinfo.mergedVTableEntries | |
gotmultiinherit = False | |
gotvirtual = False | |
for derivationItem in reversed(derivationPath): | |
baseClass = derivationItem.DIE | |
if hasattr(baseClass, "VTableEntries"): | |
if debug: print("%sProcessing %s path:%s previouspath:%s" % (prefix, baseClass.GetName(), derivationItem.derivationPathText, vtableinfo.derivationPathText)) | |
if vtableinfo.derivationPathText == None or len(derivationItem.derivationPathText) < len(vtableinfo.derivationPathText): | |
if gotvirtual or gotmultiinherit: | |
# Trying to build vtable of multiple inheritance is hard. | |
# This is probably wrong - maybe check virtuality, and parameters, but then I might as well try to code a C++ compiler. | |
# We'll just check the name. | |
for k,w in iter(baseClass.VTableEntries.items()): | |
namesuper = w.GetName() | |
if debug: print("%s Looking for vtableitem %s" % (prefix, namesuper)) | |
for j,v1 in iter(mergedVTableEntries.items()): | |
v = v1.DIE | |
namebase = v.GetName() | |
if (namesuper == namebase or (namesuper[:1] == "~" and namebase[:1] == "~")): | |
# a "non-virtual thunk" to w is what this probably is: | |
v1.DIE = w | |
if gotvirtual: | |
v1.ThunkType = "virtual thunk to " | |
else: | |
v1.ThunkType = "non-virtual thunk to " | |
if debug: print("%s Changed vtableitem 0x%x %s%s" % (prefix, v1.DIE.compile_unit.addr_size * j, v1.ThunkType, namesuper)) | |
break | |
else: | |
for j,v in iter(baseClass.VTableEntries.items()): | |
namesuper = v.GetName() | |
if debug: print("%s Adding vtableitem 0x%x %s" % (prefix, v.compile_unit.addr_size * j, namesuper)) | |
if j in mergedVTableEntries: | |
namebase = mergedVTableEntries[j].DIE.GetName() | |
if namesuper != namebase and not (namesuper[:1] == "~" and namebase[:1] == "~"): | |
if namesuper[:1] == "~" and j == 0 and 1 in mergedVTableEntries and mergedVTableEntries[1].DIE.GetName()[:1] == "~": | |
vtableitem = VTableItem() | |
vtableitem.DIE = v | |
mergedVTableEntries[1] = vtableitem | |
print("Error: performed workaround for vtable entry function name %s%s differing from super %s%s" % (v.GetAddress(), namebase, mergedVTableEntries[j].DIE.GetAddress(), namesuper)) | |
else: | |
print("Error: vtable entry function name %s%s differs from super %s%s" % (v.GetAddress(), namebase, mergedVTableEntries[j].DIE.GetAddress(), namesuper)) | |
else: | |
vtableitem = VTableItem() | |
vtableitem.DIE = v | |
mergedVTableEntries[j] = vtableitem | |
else: | |
if j > vmax: | |
vmax = j | |
vtableitem = VTableItem() | |
vtableitem.DIE = v | |
mergedVTableEntries[j] = vtableitem | |
else: | |
if debug: print("%sSkipping" % prefix) | |
if derivationItem.index > 0 and not gotmultiinherit: | |
# index is > 0 for non primary base class of multiple inheritance class. These requires a different method to build vtable. | |
if debug: print("%sgotmultiinherit vmax:0x%x" % (prefix, vmax * baseClass.compile_unit.addr_size)) | |
gotmultiinherit = True | |
if derivationItem.isvirtual and not gotvirtual: | |
if debug: print("%sgotvirtual vmax:0x%x" % (prefix, vmax * baseClass.compile_unit.addr_size)) | |
gotvirtual = True | |
vtableinfo.vmax = vmax | |
if vtableinfo.derivationPathText == None: | |
vtableinfo.derivationPathText = derivationPath[-1].derivationPathText | |
if debug: print("%sadded vtable (%s) classoffset:0x%x vptroffset:0x%x vmax:%d numvtables:%d numvirtualvtables:%d" % (prefix, dumpderivationpath(derivationPath), begin_offset, vPtrOffset, vmax, len(thefirst.VTables), len(thefirst.VTablesVirtual))) | |
def MakeAllVTables(parent): | |
# for every class or structure, make a list of vtables (more than one vtable exists for multiple inheritance) | |
if hasattr(parent, "children"): | |
for child in parent.children: | |
if child.tag == "TAG_class_type" or child.tag == "TAG_structure_type": | |
child.VTables = [] | |
child.VTablesByOffset = {} | |
child.VTablesVirtual = [] | |
child.AllVirtualBaseClasses = [] | |
child.IncludedVirtualBaseClasses = {} | |
child.VirtualBaseClassOffset = child.GetCompactSize() | |
derivationItem = DerivationItem() | |
derivationItem.DIE = child | |
derivationItem.index = 0 | |
derivationItem.isvirtual = False | |
derivationItem.VTables = child.VTables | |
derivationItem.derivationPathText = "%02d" % 1 | |
derivationPath = [derivationItem] | |
if debug: print("[ starting (parent%s child%s %s)" % (parent.GetAddress(), child.GetAddress(), dumpderivationpath(derivationPath))) | |
MakeVTables(derivationPath) | |
if debug: print("]") | |
MakeAllVTables(child) | |
#========================================================================================= | |
class DSYM_Reader: | |
compileUnitRE = re.compile(r'(0x[0-9a-f]+): Compile Unit: .* addr_size = (0x[0-9a-f]+) .*\n') | |
tagRE = re.compile(r'(0x[0-9a-f]+):( +)(?:Unknown )?(?:DW_)?(TAG(?:_\w+| constant: 0x[0-9a-f]+)).*\n') | |
nullRE = re.compile(r'(0x[0-9a-f]+): +(NULL)\n') | |
blankRE = re.compile(r'\n') | |
AT_locationRE = re.compile(r' +(.*?) *(\))?\n') | |
AT_rangesRE = re.compile(r' +(?:\[(0x[0-9a-f]+)(?: - |, )(0x[0-9a-f]+)\)(\)?))| *(End \))\n') | |
AT_byte_sizeRE = re.compile(r' +(?:\[(0x[0-9a-f]+)(?: - |, )(0x[0-9a-f]+)\)(?::?[^)\n]*)(\)?))| *(End \))\n') | |
AT_REList = [ | |
re.compile(r' +(?:DW_)?(AT_location)\t?\( *(0x[0-9a-f]+)(\n)'), # loop until ')\n' is found | |
re.compile(r' +(?:DW_)?(AT_ranges)\t?\( *(0x[0-9a-f]+)(\n)'), # loop until 'End )\n' is found | |
re.compile(r' +(?:DW_)?(AT_byte_size)\t?\( *(0x[0-9a-f]+):? *(\n)'), # loop until 'End )\n' is found | |
re.compile(r' +(?:DW_)?(AT_type)\t?\( *\{(0x[0-9a-f]+)\} \( .*? *\)\n'), | |
re.compile(r' +(?:DW_)?(AT_vtable_elem_location)\t?\( *<(0x[0-9a-f]+)> ([0-9a-f]{2}) ([0-9a-f]{2})(?: ([0-9a-f]{2})?)? *\)\n'), | |
re.compile(r' +(?:DW_)?(AT_vtable_elem_location)\t?\( *(?:DW_)?(OP_constu) (0x[0-9a-f]+) *\)\n'), | |
re.compile(r' +(?:DW_)?(AT_data_member_location)\t?\( *(?:DW_)?(OP_plus_uconst) (0x[0-9a-f]+) *\)\n'), # found this in 10.10.5_14F2511 kernel | |
re.compile(r' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *\{(0x[0-9a-f]+)\}".*" *\)\n'), | |
re.compile(r' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *\[(.*)\] *\)\n'), | |
re.compile(r' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *\{(.*)\} *\)\n'), | |
re.compile(r' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *"(.*)" *\)\n'), | |
re.compile(r' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *(0x\w+) ".*" *\)\n'), | |
re.compile(r' +(?:Unknown )?(?:DW_)?(AT(?:_\w+| constant: 0x[0-9a-f]+))\t?\( *(.*) *\)\n'), | |
] | |
neghexRE = re.compile(r'^0x[8-9a-f][0-9a-f]{15} *$') | |
hexRE = re.compile(r'^(0x[0-9a-f]+):? *$') | |
decRE = re.compile(r'^[-+]?\d+ *$') | |
def ReadDIEList(self, f, parent): | |
indent = None | |
unexpectedlist = False | |
if hasattr(parent, "children"): | |
if len(parent.children) == 1: | |
indent = parent.children[0].indent | |
elif len(parent.children) != 0: | |
print("Error: unexpected list %s %s" % (parent.GetAddress(), parent.tag)) | |
for child in parent.children: | |
print(" %s %s" % (child.GetAddress(), child.tag)) | |
unexpectedlist = True | |
while True: | |
DIE = self.ReadNextDIE(f, parent.dSYM) | |
if DIE == None: | |
break | |
if unexpectedlist: | |
print("Error: first item of unexpected list %s %s" % (DIE.tag, DIE.GetAddress())) | |
unexpectedlist = False | |
if indent == None: | |
indent = DIE.indent | |
if DIE.indent > indent: | |
# indent increased, this record is the first child of the last added record | |
#print("{ %d" % DIE.indent) | |
DIE.parent = parent.children[-1] # last added record | |
DIE.parent.AppendChild(DIE) # make this record the first child of the last added record | |
elif DIE.indent < indent: | |
print("Error: indentation") | |
break | |
else: | |
DIE.parent = parent | |
parent.AppendChild(DIE) | |
if DIE.tag == "TAG_inheritance": | |
DIE.UpdateBaseClassLists() | |
elif DIE.tag == "TAG_friend": | |
if not hasattr(DIE.parent, "Friends"): | |
DIE.parent.Friends = [] | |
DIE.parent.Friends.append(DIE) | |
elif hasattr(DIE, "AT_data_member_location") or hasattr(DIE, "AT_data_bit_offset"): | |
if not hasattr(DIE.parent, "Fields"): | |
DIE.parent.Fields = [] | |
DIE.parent.Fields.append(DIE) | |
elif DIE.tag == "TAG_member" and not hasattr(DIE, "AT_external"): | |
DIE.AT_data_member_location = 0 | |
DIE.noLocation = 1 | |
if not hasattr(DIE.parent, "Fields"): | |
DIE.parent.Fields = [] | |
DIE.parent.Fields.append(DIE) | |
elif hasattr(DIE, "AT_vtable_elem_location"): | |
if not hasattr(DIE.parent, "VTableEntries"): | |
DIE.parent.VTableEntries = {} | |
if DIE.AT_vtable_elem_location in DIE.parent.VTableEntries: | |
nameold = DIE.parent.VTableEntries[DIE.AT_vtable_elem_location].GetName() | |
namenew = DIE.GetName() | |
if namenew != nameold: | |
print("Error: %s has duplicate VTableEntries 0x%x %s%s %s%s" % (DIE.parent.GetAddress(), DIE.AT_vtable_elem_location * DIE.compile_unit.addr_size, DIE.parent.VTableEntries[DIE.AT_vtable_elem_location].GetAddress(), nameold, DIE.GetAddress(), namenew)) | |
# workaround problem for Mammal and WingedAnimal examples | |
if DIE.AT_vtable_elem_location == 0 and namenew[:1] == "~" and not nameold[:1] == "~" and not 1 in DIE.parent.VTableEntries: | |
DIE.parent.VTableEntries[1] = DIE | |
else: | |
DIE.parent.VTableEntries[DIE.AT_vtable_elem_location] = DIE | |
if DIE.indent > indent: | |
self.ReadDIEList(f, DIE.parent) | |
#print("} %d" % DIE.indent) | |
def ReadNextDIE(self, f, dSYM): | |
DIE = None | |
for line in f: | |
#print (line) | |
if self.nullRE.match(line): | |
break | |
c = self.compileUnitRE.match(line) | |
if c: | |
addr_size = int(c.group(2), 16) | |
continue | |
t = self.tagRE.match(line) | |
if t: | |
DIE = DIEDict() | |
DIE.dSYM = dSYM | |
DIE.address = int(t.group(1), 16) | |
DIE.indent = len(t.group(2)) | |
#print("indent: %d" % DIE.indent) | |
DIE.tag = t.group(3) | |
#print("Added DIE %s" % DIE.GetAddress()) | |
for line in f: | |
if self.blankRE.match(line): | |
break | |
for atRE in self.AT_REList: | |
m = atRE.match(line) | |
if m: | |
if atRE.groups == 2: | |
if m.group(1) == "AT_bit_offset" and self.neghexRE.match(m.group(2)): | |
DIE.AT_bit_offset = -int(2**64 - int(m.group(2),16)) | |
else: | |
m2 = self.hexRE.match(m.group(2)) | |
if m2: | |
setattr(DIE, m.group(1), int(m2.group(1), 16)) | |
elif self.decRE.match(m.group(2)): | |
setattr(DIE, m.group(1), int(m.group(2), 10)) | |
else: | |
setattr(DIE, m.group(1), m.group(2)) | |
# the rest of these have more than 2 capture groups (sometimes the third capture group | |
# is the linefeed just so we can do the following special processing) | |
elif m.group(1) == "AT_data_member_location": | |
if m.group(2) == "OP_plus_uconst": | |
thenum = int(m.group(3),16) | |
DIE.AT_data_member_location = thenum | |
elif m.group(1) == "AT_vtable_elem_location": | |
#print ("AT_vtable_elem_location «%s•%s•%s»" % (m.group(1), m.group(2), m.group(3))) | |
if m.group(2) == "OP_constu": | |
thenum = int(m.group(3),16) | |
else: | |
numbytes = int(m.group(2),16) | |
thenum = 0 | |
for i in range(numbytes + 2, 3, -1): | |
part = int(m.group(i),16) | |
if (i == numbytes + 2) == (part & 128 != 0): | |
print("Error: unexpected high bit of elem location byte (%s) %s" % (m.group(3), DIE.GetAddress())) | |
thenum = thenum * 128 + (part & 127) | |
if m.group(3) != "10": | |
print("Error: unexpected elem location type (%s) %s" % (m.group(3), DIE.GetAddress())) | |
DIE.AT_vtable_elem_location = thenum | |
elif m.group(1) == "AT_location": | |
setattr(DIE, m.group(1), int(m.group(2), 16)) | |
lines = [] | |
for line in f: | |
m = self.AT_locationRE.match(line) | |
if m: | |
lines.append(m.group(1)) | |
#print ("AT_location «%s•%s»" % (m.group(1), m.group(2))) | |
if m.group(2) == ")": | |
break # AT_location_list finished with error | |
else: | |
print("Error in tag %s (AT_location) with line %s" % (DIE.GetAddress(), line)) | |
break # AT_location_list finished with error | |
DIE.AT_location_list = lines | |
elif m.group(1) == "AT_ranges": | |
DIE.AT_ranges = int(m.group(2), 16) | |
lines = [] | |
for line in f: | |
m = self.AT_rangesRE.match(line) | |
if m: | |
#print ("«%s•%s•%s•%s»" % (m.group(1), m.group(2), m.group(3), m.group(4))) | |
if m.group(4) == 'End )': | |
break # AT_ranges_list finished | |
lines.append([m.group(1), m.group(2)]) | |
if m.group(3) == ')': | |
break # AT_ranges_list finished | |
else: | |
print("Error in tag %s (AT_ranges_list) with line %s" % (DIE.GetAddress(), line)) | |
break # AT_ranges_list finished with error | |
DIE.AT_ranges_list = lines | |
elif m.group(1) == "AT_byte_size": | |
DIE.AT_byte_size = int(m.group(2), 16) | |
lines = [] | |
for line in f: | |
m = self.AT_byte_sizeRE.match(line) | |
if m: | |
#print ("«%s•%s•%s•%s»" % (m.group(1), m.group(2), m.group(3), m.group(4))) | |
if m.group(4) == 'End )': | |
break # AT_byte_size_list finished | |
lines.append([m.group(1), m.group(2)]) | |
if m.group(3) == ')': | |
break # AT_byte_size_list finished | |
else: | |
print("Error in tag %s (AT_byte_size_list) with line %s" % (DIE.GetAddress(), line)) | |
break # AT_byte_size_list finished with error | |
DIE.AT_byte_size_list = lines | |
else: | |
print("Error in tag %s with line %s" % (DIE.GetAddress(), line)) | |
break # AT_ created | |
dSYM.DIELookup[DIE.address] = DIE | |
if DIE.tag == "TAG_compile_unit": | |
dSYM.CompileUnits.append(DIE) | |
dSYM.currentCompileUnit = DIE | |
DIE.addr_size = addr_size | |
else: | |
DIE.compile_unit = dSYM.currentCompileUnit | |
break # DIE created | |
return DIE | |
def Process_dSYM(self, filename): | |
if re.match(r'.*\.txt', filename): | |
f = open(filename, "r") | |
else: | |
f = tempfile.NamedTemporaryFile() | |
#print("Created temp file: %s" % f.name) | |
subprocess.call(["dwarfdump", filename], stdout=f) | |
f.seek(0) | |
#print("Processing file: %s" % f.name) | |
dSYM = DIEDict() | |
dSYM.dSYM = dSYM | |
dSYM.sourceType = "dwarf" | |
dSYM.address = 0 | |
dSYM.currentCompileUnit = None | |
dSYM.DIELookup = {} | |
dSYM.CompileUnits = [] | |
dSYM.filename = filename | |
self.ReadDIEList(f, dSYM) | |
del dSYM.currentCompileUnit | |
f.close() | |
MakeAllVTables(dSYM) | |
return dSYM | |
#========================================================================================= | |
# Some (not all) stabs notes | |
""" | |
{name}:{symbol-descriptor}{type-information} The overall format of the string field for most stab types. | |
{name} can be omitted for unnamed types. | |
{symbol-descriptor} can be omitted for variables. | |
{type-information} is one of these: | |
({filenumber},{type-number})|{type-number} - A {type-number}. | |
{type-number} - A {type reference}, referring directly to a type that has already been defined. | |
{type-number}= - A {type definition}, where the number represents a new type which is about to be defined. The type definition may refer to other types by number, and those type numbers may be followed by ‘=’ and nested definitions. Also, the Lucid compiler will repeat ‘type-number=’ more than once if it wants to define several type numbers at once. | |
a{boundary} - {boundary} is an integer specifying the alignment. I assume it applies to all variables of this type. | |
p{integer} - Pointer class (for checking). Not sure what this means, or how integer is interpreted. | |
P - Indicate this is a packed type, meaning that structure fields or array elements are placed more closely in memory, to save memory at the expense of speed. | |
s{size} - Size in bits of a variable of this type. This is fully supported by GDB 4.11 and later. | |
S - Indicate that this type is a string instead of an array of characters, or a bitstring instead of a set. It doesn’t change the layout of the data being represented, but does enable the debugger to know which type it is. | |
V - Indicate that this type is a vector instead of an array. The only major difference between vectors and arrays is that vectors are passed by value instead of by reference (vector coprocessor extension). | |
Stab Symbol Types: | |
The following symbol types indicate that this is a stab. This is the full list of stab numbers, including stab types that are used in languages other than C. | |
0x20 N_GSYM - Global symbol; see Global Variables. | |
0x22 N_FNAME - Function name (for BSD Fortran); see Procedures. | |
0x24 N_FUN - Function name (see Procedures) or text segment variable (see Statics). | |
0x26 N_STSYM - Data segment file-scope variable; see Statics. | |
0x28 N_LCSYM - BSS segment file-scope variable; see Statics. | |
0x2a N_MAIN - Name of main routine; see Main Program. | |
0x2c N_ROSYM - Variable in .rodata section; see Statics. | |
0x30 N_PC - Global symbol (for Pascal); see N_PC. | |
0x32 N_NSYMS - Number of symbols (according to Ultrix V4.0); see N_NSYMS. | |
0x34 N_NOMAP - No DST map; see N_NOMAP. | |
0x36 N_MAC_DEFINE - Name and body of a #defined macro; see Macro define and undefine. | |
0x38 N_OBJ - Object file (Solaris2). | |
0x3a N_MAC_UNDEF - Name of an #undefed macro; see Macro define and undefine. | |
0x3c N_OPT - Debugger options (Solaris2). | |
0x40 N_RSYM - Register variable; see Register Variables. | |
0x42 N_M2C - Modula-2 compilation unit; see N_M2C. | |
0x44 N_SLINE - Line number in text segment; see Line Numbers. | |
0x46 N_DSLINE - Line number in data segment; see Line Numbers. | |
0x48 N_BSLINE - Line number in bss segment; see Line Numbers. | |
0x48 N_BROWS - Sun source code browser, path to .cb file; see N_BROWS. | |
0x4a N_DEFD - GNU Modula2 definition module dependency; see N_DEFD. | |
0x4c N_FLINE - Function start/body/end line numbers (Solaris2). | |
0x50 N_EHDECL - GNU C++ exception variable; see N_EHDECL. | |
0x50 N_MOD2 - Modula2 info "for imc" (according to Ultrix V4.0); see N_MOD2. | |
0x54 N_CATCH - GNU C++ catch clause; see N_CATCH. | |
0x60 N_SSYM - Structure of union element; see N_SSYM. | |
0x62 N_ENDM - Last stab for module (Solaris2). | |
0x64 N_SO - Path and name of source file; see Source Files. | |
0x80 N_LSYM - Stack variable (see Stack Variables) or type (see Typedefs). | |
0x82 N_BINCL - Beginning of an include file (Sun only); see Include Files. | |
0x84 N_SOL - Name of include file; see Include Files. | |
0xa0 N_PSYM - Parameter variable; see Parameters. | |
0xa2 N_EINCL - End of an include file; see Include Files. | |
0xa4 N_ENTRY - Alternate entry point; see Alternate Entry Points. | |
0xc0 N_LBRAC - Beginning of a lexical block; see Block Structure. | |
0xc2 N_EXCL - Place holder for a deleted include file; see Include Files. | |
0xc4 N_SCOPE - Modula2 scope information (Sun linker); see N_SCOPE. | |
0xe0 N_RBRAC - End of a lexical block; see Block Structure. | |
0xe2 N_BCOMM - Begin named common block; see Common Blocks. | |
0xe4 N_ECOMM - End named common block; see Common Blocks. | |
0xe8 N_ECOML - Member of a common block; see Common Blocks. | |
0xea N_WITH - Pascal with statement: type,,0,0,offset (Solaris2). | |
0xf0 N_NBTEXT - Gould non-base registers; see Gould. | |
0xf2 N_NBDATA - Gould non-base registers; see Gould. | |
0xf4 N_NBBSS - Gould non-base registers; see Gould. | |
0xf6 N_NBSTS - Gould non-base registers; see Gould. | |
0xf8 N_NBLCS - Gould non-base registers; see Gould. | |
Table of Symbol Descriptors: | |
{symbol-descriptor} The symbol descriptor is the character which follows the colon in many stabs, and which tells what kind of stab it is. See String Field, for more information about their use. | |
none - Variable on the stack; see Stack Variables. | |
: - C++ nested symbol; see See Nested Symbols. | |
a - Parameter passed by reference in register; see Reference Parameters. | |
b - Based variable; see Based Variables. | |
c - Constant; see Constants. | |
C - Conformant array bound (Pascal, maybe other languages); Conformant Arrays. Name of a caught exception (GNU C++). These can be distinguished because the latter uses N_CATCH and the former uses another symbol type. | |
d - Floating point register variable; see Register Variables. | |
D - Parameter in floating point register; see Register Parameters. | |
f - File scope function; see Procedures. | |
F - Global function; see Procedures. | |
G - Global variable; see Global Variables. | |
i - See Register Parameters. | |
I - Internal (nested) procedure; see Nested Procedures. | |
J - Internal (nested) function; see Nested Procedures. | |
L - Label name (documented by AIX, no further information known). | |
m - Module; see Procedures. | |
p - Argument list parameter; see Parameters. | |
pP - See Parameters. | |
pF - Fortran Function parameter; see Parameters. | |
P - Unfortunately, three separate meanings have been independently invented for this symbol descriptor. At least the GNU and Sun uses can be distinguished by the symbol type. Global Procedure (AIX) (symbol type used unknown); see Procedures. Register parameter (GNU) (symbol type N_PSYM); see Parameters. Prototype of function referenced by this file (Sun acc) (symbol type N_FUN). | |
Q - Static Procedure; see Procedures. | |
R - Register parameter; see Register Parameters. | |
r - Register variable; see Register Variables. | |
S - File scope variable; see Statics. | |
s - Local variable (OS9000). | |
t - Type name; see Typedefs. | |
T - Enumeration, structure, or union tag; see Typedefs. | |
v - Parameter passed by reference; see Reference Parameters. | |
V - Procedure scope static variable; see Statics. | |
x - Conformant array; see Conformant Arrays. | |
X - Function return variable; see Parameters. | |
Table of Type Descriptors: | |
The type descriptor is the character which follows the type number and an equals sign. It specifies what kind of type is being defined. See String Field, for more information about their use. | |
digit|( - Type reference; see String Field. | |
- - Reference to builtin type; see Negative Type Numbers. | |
# - Method (C++); see Method Type Descriptor. | |
* - Pointer; see Miscellaneous Types. | |
& - Reference (C++). | |
@ - Type Attributes (AIX); see String Field. Member (class and variable) type (GNU C++); see Member Type Descriptor. | |
a - Array; see Arrays. | |
A - Open array; see Arrays. | |
b - Pascal space type (AIX); see Miscellaneous Types. Builtin integer type (Sun); see Builtin Type Descriptors. Const and volatile qualified type (OS9000). | |
B - Volatile-qualified type; see Miscellaneous Types. | |
c - Complex builtin type (AIX); see Builtin Type Descriptors. Const-qualified type (OS9000). | |
C - COBOL Picture type. See AIX documentation for details. | |
d - File type; see Miscellaneous Types. | |
D - N-dimensional dynamic array; see Arrays. | |
e - Enumeration type; see Enumerations. | |
E - N-dimensional subarray; see Arrays. | |
f - Function type; see Function Types. | |
F - Pascal function parameter; see Function Types | |
g - Builtin floating point type; see Builtin Type Descriptors. | |
G - COBOL Group. See AIX documentation for details. | |
i - Imported type (AIX); see Cross-References. Volatile-qualified type (OS9000). | |
k - Const-qualified type; see Miscellaneous Types. | |
K - COBOL File Descriptor. See AIX documentation for details. | |
M - Multiple instance type; see Miscellaneous Types. | |
n - String type; see Strings. | |
N - Stringptr; see Strings. | |
o - Opaque type; see Typedefs. | |
p - Procedure; see Function Types. | |
P - Packed array; see Arrays. | |
r - Range type; see Subranges. | |
R - Builtin floating type; see Builtin Type Descriptors (Sun). Pascal subroutine parameter; see Function Types (AIX). Detecting this conflict is possible with careful parsing (hint: a Pascal subroutine parameter type will always contain a comma, and a builtin type descriptor never will). | |
s - Structure type; see Structures. | |
S - Set type; see Miscellaneous Types. | |
u - Union; see Unions. | |
v - Variant record. This is a Pascal and Modula-2 feature which is like a union within a struct in C. See AIX documentation for details. | |
w - Wide character; see Builtin Type Descriptors. | |
x - Cross-reference; see Cross-References. | |
Y - Used by IBM’s xlC C++ compiler (for structures, I think). | |
z - gstring; see Strings. | |
Cross-References to Other Types | |
x{s|u|e}{name}: - Another way is with the ‘x’ type descriptor, which is followed by ‘s’ for a structure tag, ‘u’ for a union tag, or ‘e’ for a enumerator tag, followed by the name of the tag, followed by ‘:’. If the name contains ‘::’ between a ‘<’ and ‘>’ pair (for C++ templates), such a ‘::’ does not end the name—only a single ‘:’ ends the name; see Nested Symbols. | |
Subrange Types: | |
r{type-information};{lower-bound};{upper-bound}; The ‘r’ type descriptor defines a type as a subrange of another type. It is followed by type information for the type of which it is a subrange, a semicolon, an integral lower bound, a semicolon, an integral upper bound, and a semicolon. The AIX documentation does not specify the trailing semicolon, in an effort to specify array indexes more cleanly, but a subrange which is not an array index has always included a trailing semicolon (see Arrays). | |
Instead of an integer, either bound can be one of the following: | |
- | |
A{offset} - The bound is passed by reference on the stack at offset offset from the argument list. See Parameters, for more information on such offsets. | |
T{offset} - The bound is passed by value on the stack at offset offset from the argument list. | |
a{register-number} - The bound is passed by reference in register number register-number. | |
t{register-number} - The bound is passed by value in register number register-number. | |
J - There is no bound. | |
Subranges are also used for builtin types; see Traditional Builtin Types. | |
Array Types: | |
Negative Type Numbers: | |
Here is the list of negative type numbers. The phrase integral type is used to mean twos-complement (I strongly suspect that all machines which use stabs use twos-complement; most machines use twos-complement these days). | |
-1 - int, 32 bit signed integral type. | |
-2 - char, 8 bit type holding a character. Both GDB and dbx on AIX treat this as signed. GCC uses this type whether char is signed or not, which seems like a bad idea. The AIX compiler (xlc) seems to avoid this type; it uses -5 instead for char. | |
-3 - short, 16 bit signed integral type. | |
-4 - long, 32 bit signed integral type. | |
-5 - unsigned char, 8 bit unsigned integral type. | |
-6 - signed char, 8 bit signed integral type. | |
-7 - unsigned short, 16 bit unsigned integral type. | |
-8 - unsigned int, 32 bit unsigned integral type. | |
-9 - unsigned, 32 bit unsigned integral type. | |
-10 - unsigned long, 32 bit unsigned integral type. | |
-11 - void, type indicating the lack of a value. | |
-12 - float, IEEE single precision. | |
-13 - double, IEEE double precision. | |
-14 - long double, IEEE double precision. The compiler claims the size will increase in a future release, and for binary compatibility you have to avoid using long double. I hope when they increase it they use a new negative type number. | |
-15 - integer. 32 bit signed integral type. | |
-16 - boolean. 32 bit type. GDB and GCC assume that zero is false, one is true, and other values have unspecified meaning. I hope this agrees with how the IBM tools use the type. | |
-17 - short real. IEEE single precision. | |
-18 - real. IEEE double precision. | |
-19 - stringptr. See Strings. | |
-20 - character, 8 bit unsigned character type. | |
-21 - logical*1, 8 bit type. This Fortran type has a split personality in that it is used for boolean variables, but can also be used for unsigned integers. 0 is false, 1 is true, and other values are non-boolean. | |
-22 - logical*2, 16 bit type. This Fortran type has a split personality in that it is used for boolean variables, but can also be used for unsigned integers. 0 is false, 1 is true, and other values are non-boolean. | |
-23 - logical*4, 32 bit type. This Fortran type has a split personality in that it is used for boolean variables, but can also be used for unsigned integers. 0 is false, 1 is true, and other values are non-boolean. | |
-24 - logical, 32 bit type. This Fortran type has a split personality in that it is used for boolean variables, but can also be used for unsigned integers. 0 is false, 1 is true, and other values are non-boolean. | |
-25 - complex. A complex type consisting of two IEEE single-precision floating point values. | |
-26 - complex. A complex type consisting of two IEEE double-precision floating point values. | |
-27 - integer*1, 8 bit signed integral type. | |
-28 - integer*2, 16 bit signed integral type. | |
-29 - integer*4, 32 bit signed integral type. | |
-30 - wchar. Wide character, 16 bits wide, unsigned (what format? Unicode?). | |
-31 - long long, 64 bit signed integral type. | |
-32 - unsigned long long, 64 bit unsigned integral type. | |
-33 - logical*8, 64 bit unsigned integral type. | |
-34 - integer*8, 64 bit signed integral type. | |
Miscellaneous Types: | |
b{type-information};bytes - Pascal space type. This is documented by IBM; what does it mean? This use of the ‘b’ type descriptor can be distinguished from its use for builtin integral types (see Builtin Type Descriptors) because the character following the type descriptor is always a digit, ‘(’, or ‘-’. | |
B{type-information} - A volatile-qualified version of type-information. This is a Sun extension. References and stores to a variable with a volatile-qualified type must not be optimized or cached; they must occur as the user specifies them. | |
d{type-information} - File of type type-information. As far as I know this is only used by Pascal. | |
k{type-information} - A const-qualified version of type-information. This is a Sun extension. A variable with a const-qualified type cannot be modified. | |
M{type-information};{length} - Multiple instance type. The type seems to composed of length repetitions of type-information, for example character*3 is represented by ‘M-2;3’, where ‘-2’ is a reference to a character type (see Negative Type Numbers). I’m not sure how this differs from an array. This appears to be a Fortran feature. length is a bound, like those in range types; see Subranges. | |
S{type-information} - Pascal set type. type-information must be a small type such as an enumeration or a subrange, and the type is a bitmask whose length is specified by the number of elements in type-information. In CHILL, if it is a bitstring instead of a set, also use the ‘S’ type attribute (see String Field). | |
*{type-information} - Pointer to type-information. | |
""" | |
#========================================================================================= | |
class stab_Reader: | |
stabRE = re.compile(r'\[ *(\d+)\] [0-9a-f]{8} ([0-9a-f]{2}) \( *(.+?) *\) ([0-9a-f]{2}) ([0-9a-f]{4}) ([0-9a-f]{16})(?: \'(.*)?\')?') | |
dirpathRE = re.compile(r'(.*)/$') | |
stringRE = re.compile(r'((?:[^:\n]|::)*):((?:pF|pP|Tt|[a-zA-Z])?)(.*)') # \2\t\1\t\t\t\t\t\3 | |
typeRE = re.compile(r'(\(\d+,\d+\)|\d+)(=?)(.*)') | |
negativeTypeRE = re.compile(r'(-\d+);(.*)') | |
pointerRE = re.compile(r'([*&kB])(.*)') | |
methodRE = re.compile(r'([#f])(.*)') | |
parameterRE = re.compile(r'(;|,)(.*)') | |
methodpropertiesRE = re.compile(r':([^;]+);([0-2])([A-D])(?:(\?)|(\.)|(\*)(\d+))(.*)') | |
attributeRE = re.compile(r'@(?:s(\d+);)(.*)') | |
enumerationRE = re.compile(r'e(.*)') | |
enumeratorRE = re.compile(r'(?:(;)|([^:]+):(-?\d+),)(.*)') | |
subrangeRE = re.compile(r'r(.*)') | |
subrangeLimitsRE = re.compile(r';(-?\d+);(-?\d+);(.*)') | |
arrayRE = re.compile(r'a(.*)') | |
crossReferenceRE = re.compile(r'x([sue])((?:::|\w|\$)+):(.*)') | |
structRE = re.compile(r'([su])(\d+)(?:!(\d+),)?(.*)') | |
baseclassRE = re.compile(r'(?:(;)|(\d)(\d)(\d+),)(.*)') | |
fieldnameRE = re.compile(r'(?:(;)|(?:([^:\n]*)(?:(?:(::)|(:))(?:/([0-2]))?)))(.*)') | |
fieldlocationRE = re.compile(r'(?:(,)(\d+),(\d+)|:(\w+))(.*)') | |
methodEndRE = re.compile(r';(.*)') | |
fieldEndRE = re.compile(r';(.*)') | |
structContainingRE = re.compile(r"~%(.*)") | |
structContainingEndRE = re.compile(r';(.*)') | |
path = None | |
currentfile = None | |
addr_size = 4 | |
compilationUnitNumber = 0 | |
def makeDIE(self, parent, Index, tag): | |
DIE = DIEDict() | |
DIE.dSYM = parent.dSYM | |
DIE.address = Index | |
DIE.parent = parent | |
DIE.AT_decl_file = self.currentfile | |
parent.AppendChild(DIE) | |
DIE.tag = tag | |
if DIE.tag == "TAG_compile_unit": | |
parent.dSYM.CompileUnits.append(DIE) | |
parent.dSYM.currentCompileUnit = DIE | |
DIE.addr_size = self.addr_size | |
self.compilationUnitNumber = self.compilationUnitNumber + 1 | |
DIE.unit_number = self.compilationUnitNumber | |
else: | |
DIE.compile_unit = parent.dSYM.currentCompileUnit | |
return DIE | |
def parseStabType(self, leftDIE, parent, Index, tstr): | |
p = self.typeRE.match(tstr) | |
if p: | |
typenumber = ("%d %s" % (parent.dSYM.currentCompileUnit.unit_number, p.group(1))) | |
if typenumber in parent.dSYM.DIELookup: | |
DIE = parent.dSYM.DIELookup[typenumber] | |
if debug: dbgprint("found existing type «%s»" % (typenumber)) | |
else: | |
DIE = self.makeDIE(parent, Index, "TAG_typedef") | |
DIE.firsttag = 1 | |
DIE.Settypenumber(typenumber) | |
if leftDIE != None: | |
if not hasattr(DIE, "createdFrom"): | |
DIE.createdFrom = [] | |
DIE.createdFrom.append(leftDIE) | |
if not hasattr(leftDIE, "creates"): | |
leftDIE.creates = [] | |
leftDIE.creates.append(DIE) | |
if p.group(2) == "=": | |
if debug: dbgprint("{ = «%s» «%s» «%s»" % (p.group(1), p.group(2), p.group(3))) | |
typeDIE, remaining = self.parseStabType(DIE, parent, Index, p.group(3)) | |
if leftDIE != None: | |
if hasattr(typeDIE, "iscrossreference"): | |
leftDIE.SetName(typeDIE.AT_name) | |
leftDIE.isnamefromcrossreference = 1 | |
if debug: dbgprint("} =") | |
else: | |
remaining = p.group(3) | |
if leftDIE != None: | |
if hasattr(leftDIE, "AT_type") and leftDIE.AT_type != typenumber: | |
print("Error: [%d] Left DIE (%s) type (%s) changed to (%s): %s" % (Index, leftDIE.typenumber, leftDIE.AT_type, typenumber, tstr)) | |
if leftDIE.typenumber == typenumber: | |
if debug: dbgprint("Warning: [%d] Type «%s» assumed to be void" % (Index, typenumber)) | |
leftDIE.isvoid = 1 | |
leftDIE.SetName("void") | |
else: | |
leftDIE.AT_type = typenumber | |
return DIE, remaining | |
if leftDIE != None: | |
if debug: dbgprint("{ leftDIE «%s»" % tstr) | |
remaining = tstr | |
while True: | |
p = self.attributeRE.match(remaining) | |
if p: | |
bits = int(p.group(1), 10) | |
if bits & 7 == 0: | |
leftDIE.AT_byte_size = bits >> 3 | |
else: | |
leftDIE.AT_bit_size = bits | |
remaining = p.group(2) | |
else: | |
break | |
p = self.negativeTypeRE.match(remaining) | |
if p: | |
leftDIE.SetTag("TAG_base_type") | |
leftDIE.stabsbasetype = int(p.group(1), 10) | |
if 1 == 0: | |
pass | |
#elif leftDIE.stabsbasetype == -1 : # int, 32 bit signed integral type. | |
#elif leftDIE.stabsbasetype == -2 : # char, 8 bit type holding a character. Both GDB and dbx on AIX treat this as signed. GCC uses this type whether char is signed or not, which seems like a bad idea. The AIX compiler (xlc) seems to avoid this type; it uses -5 instead for char. | |
#elif leftDIE.stabsbasetype == -3 : # short, 16 bit signed integral type. | |
#elif leftDIE.stabsbasetype == -4 : # long, 32 bit signed integral type. | |
#elif leftDIE.stabsbasetype == -5 : # unsigned char, 8 bit unsigned integral type. | |
#elif leftDIE.stabsbasetype == -6 : # signed char, 8 bit signed integral type. | |
#elif leftDIE.stabsbasetype == -7 : # unsigned short, 16 bit unsigned integral type. | |
#elif leftDIE.stabsbasetype == -8 : # unsigned int, 32 bit unsigned integral type. | |
#elif leftDIE.stabsbasetype == -9 : # unsigned, 32 bit unsigned integral type. | |
#elif leftDIE.stabsbasetype == -10: # unsigned long, 32 bit unsigned integral type. | |
#elif leftDIE.stabsbasetype == -11: # void, type indicating the lack of a value. | |
#elif leftDIE.stabsbasetype == -12: # float, IEEE single precision. | |
#elif leftDIE.stabsbasetype == -13: # double, IEEE double precision. | |
#elif leftDIE.stabsbasetype == -14: # long double, IEEE double precision. The compiler claims the size will increase in a future release, and for binary compatibility you have to avoid using long double. I hope when they increase it they use a new negative type number. | |
#elif leftDIE.stabsbasetype == -15: # integer. 32 bit signed integral type. | |
elif leftDIE.stabsbasetype == -16: # boolean. 32 bit type. GDB and GCC assume that zero is false, one is true, and other values have unspecified meaning. I hope this agrees with how the IBM tools use the type. | |
leftDIE.AT_encoding = "ATE_boolean" | |
leftDIE.SetName("bool") | |
#leftDIE.AT_byte_size = 1 | |
#elif leftDIE.stabsbasetype == -17: # short real. IEEE single precision. | |
#elif leftDIE.stabsbasetype == -18: # real. IEEE double precision. | |
#elif leftDIE.stabsbasetype == -19: # stringptr. See Strings. | |
#elif leftDIE.stabsbasetype == -20: # character, 8 bit unsigned character type. | |
#elif leftDIE.stabsbasetype == -21: # logical*1, 8 bit type. This Fortran type has a split personality in that it is used for boolean variables, but can also be used for unsigned integers. 0 is false, 1 is true, and other values are non-boolean. | |
#elif leftDIE.stabsbasetype == -22: # logical*2, 16 bit type. This Fortran type has a split personality in that it is used for boolean variables, but can also be used for unsigned integers. 0 is false, 1 is true, and other values are non-boolean. | |
#elif leftDIE.stabsbasetype == -23: # logical*4, 32 bit type. This Fortran type has a split personality in that it is used for boolean variables, but can also be used for unsigned integers. 0 is false, 1 is true, and other values are non-boolean. | |
#elif leftDIE.stabsbasetype == -24: # logical, 32 bit type. This Fortran type has a split personality in that it is used for boolean variables, but can also be used for unsigned integers. 0 is false, 1 is true, and other values are non-boolean. | |
#elif leftDIE.stabsbasetype == -25: # complex. A complex type consisting of two IEEE single-precision floating point values. | |
#elif leftDIE.stabsbasetype == -26: # complex. A complex type consisting of two IEEE double-precision floating point values. | |
#elif leftDIE.stabsbasetype == -27: # integer*1, 8 bit signed integral type. | |
#elif leftDIE.stabsbasetype == -28: # integer*2, 16 bit signed integral type. | |
#elif leftDIE.stabsbasetype == -29: # integer*4, 32 bit signed integral type. | |
#elif leftDIE.stabsbasetype == -30: # wchar. Wide character, 16 bits wide, unsigned (what format? Unicode?). | |
#elif leftDIE.stabsbasetype == -31: # long long, 64 bit signed integral type. | |
#elif leftDIE.stabsbasetype == -32: # unsigned long long, 64 bit unsigned integral type. | |
#elif leftDIE.stabsbasetype == -33: # logical*8, 64 bit unsigned integral type. | |
#elif leftDIE.stabsbasetype == -34: # integer*8, 64 bit signed integral type. | |
else: | |
print("Error: unknown negative type «%s»" % p.group(1)) | |
if debug: dbgprint("} leftDIE negativeType") | |
return None, p.group(2) | |
p = self.pointerRE.match(remaining) | |
if p: | |
DIE, remaining = self.parseStabType(None, parent, Index, p.group(2)) | |
if DIE == None: | |
print("Error: No type for %s type: %d %s" % (p.group(1), Index, remaining)) | |
leftDIE.SetType(DIE.typenumber) | |
if p.group(1) == "*": | |
leftDIE.SetTag("TAG_pointer_type") | |
elif p.group(1) == "&": | |
leftDIE.SetTag("TAG_reference_type") | |
elif p.group(1) == "k": | |
leftDIE.SetTag("TAG_const_type") | |
elif p.group(1) == "B": | |
leftDIE.SetTag("TAG_volatile_type") | |
if debug: dbgprint("} leftDIE %s" % leftDIE.tag) | |
return None, remaining | |
p = self.methodRE.match(remaining) | |
if p: | |
classDIE, remaining = self.parseStabType(None, parent, Index, p.group(2)) | |
if classDIE == None: | |
print("Error: No type for method: [%d] «%s»" % (Index, remaining)) | |
if debug: dbgprint("} leftDIE") | |
return None, remaining | |
if p.group(1) == "#": | |
if debug: dbgprint("{ ismethod «%s»" % (leftDIE.typenumber)) | |
leftDIE.ismethod = 1 | |
leftDIE.methodClassDIE = classDIE | |
leftDIE.SetTag("TAG_subprogram") | |
#if not hasattr(classDIE, "pointerclasstypenumber"): | |
# pointerclasstypenumber = ("* %s" % classDIE.typenumber) | |
# pointerclassDIE = self.makeDIE(parent.parent, Index, "TAG_pointer_type") | |
# pointerclassDIE.Settypenumber(pointerclasstypenumber) | |
# pointerclassDIE.AT_type = classDIE.typenumber | |
# classDIE.pointerclasstypenumber = pointerclasstypenumber | |
# | |
#artificalDIE = self.makeDIE(leftDIE, Index, "TAG_formal_parameter") | |
#artificalDIE.AT_type = classDIE.pointerclasstypenumber | |
#artificalDIE.AT_artificial = 1 # the this parameter is artificial | |
#leftDIE.artificalDIE = artificalDIE | |
numParameters = 0 | |
while True: | |
q = self.parameterRE.match(remaining) | |
if not q: | |
print("Error: Expected , or ; : [%d] «%s»" % (Index, remaining)) | |
break | |
if q.group(1) == ";": | |
remaining = q.group(2) | |
break | |
parametertypeDIE, remaining = self.parseStabType(None, parent, Index, q.group(2)) | |
if parametertypeDIE == None: | |
print("Error: Expected parameter type: [%d] «%s»" % (Index, q.group(2))) | |
break | |
if numParameters == 0: | |
leftDIE.AT_type = parametertypeDIE.typenumber | |
else: | |
parameterDIE = self.makeDIE(leftDIE, Index, "TAG_formal_parameter") | |
parameterDIE.AT_type = parametertypeDIE.typenumber | |
parameterDIE.parameterNumber = numParameters | |
if leftDIE.ismethod and numParameters == 1: | |
parameterDIE.AT_artificial = 1 | |
leftDIE.artificalDIE = parameterDIE | |
numParameters += 1 | |
if numParameters < 1: | |
print("Error: Expected return type: [%d] «%s»" % (Index, remaining)) | |
elif numParameters < 2: | |
print("Error: Expected artifical parameter: [%d] «%s»" % (Index, remaining)) | |
elif not hasattr(parametertypeDIE, "isvoid"): | |
#print("Error: Expected terminating void: [%d] «%s»" % (Index, remaining)) | |
pass | |
else: | |
leftDIE.children.pop() | |
if debug: dbgprint("} ismethod") | |
else: # p.group(1) == "f" | |
if debug: dbgprint("{ isstaticmethod «%s»" % (leftDIE.typenumber)) | |
leftDIE.isstaticmethod = 1 | |
leftDIE.AT_type = classDIE.typenumber | |
leftDIE.SetTag("TAG_subroutine_type") | |
if debug: dbgprint("} isstaticmethod") | |
if debug: dbgprint("} leftDIE") | |
return None, remaining | |
p = self.enumerationRE.match(remaining) | |
if p: | |
leftDIE.SetTag("TAG_enumeration_type") | |
if not hasattr(leftDIE, "AT_byte_size"): | |
leftDIE.AT_byte_size = 1 | |
leftDIE.enumsizemin = 1 | |
leftDIE.enumsizemax = 16 | |
remaining = p.group(1) | |
while True: | |
q = self.enumeratorRE.match(remaining) | |
if q: | |
remaining = q.group(4) | |
if q.group(1) == ";": | |
break | |
DIE = self.makeDIE(leftDIE, Index, "TAG_enumerator") | |
DIE.SetName(q.group(2)) | |
if q.group(3)[0] == "0": | |
DIE.AT_const_value = int(q.group(3), 8) | |
else: | |
DIE.AT_const_value = int(q.group(3), 10) | |
if hasattr(leftDIE, "enumsizemin"): | |
if leftDIE.AT_byte_size <= 1 and DIE.AT_const_value >= -0x80 and DIE.AT_const_value <= 0x7f: | |
leftDIE.AT_byte_size = 1 | |
leftDIE.enumsizemin = 1 | |
elif leftDIE.AT_byte_size <= 2 and DIE.AT_const_value >= -0x8000 and DIE.AT_const_value <= 0x7fff: | |
leftDIE.AT_byte_size = 2 | |
leftDIE.enumsizemin = 2 | |
elif leftDIE.AT_byte_size <= 4 and DIE.AT_const_value >= -0x80000000 and DIE.AT_const_value <= 0x7fffffff: | |
leftDIE.AT_byte_size = 4 | |
leftDIE.enumsizemin = 4 | |
elif leftDIE.AT_byte_size <= 8 and DIE.AT_const_value >= -0x8000000000000000 and DIE.AT_const_value <= 0x7fffffffffffffff: | |
leftDIE.AT_byte_size = 8 | |
leftDIE.enumsizemin = 8 | |
else: | |
print("Error: Expected enumerator: [%d] «%s»" % (Index, remaining)) | |
break | |
if debug: dbgprint("} leftDIE") | |
return None, remaining | |
p = self.subrangeRE.match(remaining) | |
if p: | |
if debug: dbgprint("{ subrange type: [%d] «%s»" % (Index, p.group(1))) | |
leftDIE.SetTag("TAG_subrange_type") | |
typeDIE, remaining = self.parseStabType(None, parent, Index, p.group(1)) | |
if typeDIE == None: | |
print("Error: Expected subrange type: [%d] «%s»" % (Index, p.group(1))) | |
if debug: dbgprint("} subrange type") | |
if debug: dbgprint("} leftDIE") | |
return None, remaining | |
if leftDIE.typenumber == typeDIE.typenumber: | |
leftDIE.selfReference = 1 | |
else: | |
leftDIE.AT_type = typeDIE.typenumber | |
boundsDIE = leftDIE | |
# This block matches array sub range block below [ | |
q = self.subrangeLimitsRE.match(remaining) | |
if not q: | |
print("Error: Expected subrange limits: [%d] «%s»" % (Index, remaining)) | |
if debug: dbgprint("} subrange type error") | |
if debug: dbgprint("} leftDIE") | |
return None, remaining | |
if q.group(1)[0] == "0" or (q.group(1)[0] == "-" and q.group(1)[1] == "0"): | |
boundsDIE.AT_lower_bound = int(q.group(1), 8) | |
else: | |
boundsDIE.AT_lower_bound = int(q.group(1), 10) | |
if q.group(2)[0] == "0" or (q.group(2)[0] == "-" and q.group(2)[1] == "0"): | |
boundsDIE.AT_upper_bound = int(q.group(2), 8) | |
else: | |
boundsDIE.AT_upper_bound = int(q.group(2), 10) | |
if boundsDIE.AT_lower_bound > boundsDIE.AT_upper_bound: | |
if boundsDIE.AT_lower_bound == 0x8000000000000000: | |
# 64-bit lower bound | |
boundsDIE.AT_lower_bound = -0x8000000000000000 | |
elif boundsDIE.AT_lower_bound == 0 and boundsDIE.AT_upper_bound == -1: | |
# unbounded array [] | |
del boundsDIE.AT_lower_bound | |
del boundsDIE.AT_upper_bound | |
boundsDIE.unbounded = 1 | |
# ] | |
elif boundsDIE.AT_lower_bound > 0 and boundsDIE.AT_upper_bound == 0: | |
# floating point number | |
boundsDIE.tag = "TAG_base_type" | |
boundsDIE.AT_encoding = "ATE_float" | |
boundsDIE.AT_byte_size = boundsDIE.AT_lower_bound | |
boundsDIE.floatAtType = boundsDIE.AT_type | |
del boundsDIE.AT_lower_bound | |
del boundsDIE.AT_upper_bound | |
del boundsDIE.AT_type | |
else: | |
print("Error: Expected subrange type lower bound %d to be less than upper bound %d: [%d] «%s»" % (boundsDIE.AT_lower_bound, boundsDIE.AT_upper_bound, Index, remaining)) | |
if hasattr(leftDIE, "selfReference"): | |
leftDIE.tag = "TAG_base_type" | |
if leftDIE.AT_lower_bound < 0: | |
leftDIE.AT_encoding = "ATE_signed" | |
if leftDIE.AT_lower_bound >= -0x80 and leftDIE.AT_upper_bound <= 0x7f: | |
leftDIE.AT_encoding = "ATE_signed_char" | |
leftDIE.AT_byte_size = 1 | |
elif leftDIE.AT_lower_bound >= -0x8000 and leftDIE.AT_upper_bound <= 0x7fff: | |
leftDIE.AT_byte_size = 2 | |
elif leftDIE.AT_lower_bound >= -0x80000000 and leftDIE.AT_upper_bound <= 0x7fffffff: | |
leftDIE.AT_byte_size = 4 | |
elif leftDIE.AT_lower_bound >= -0x8000000000000000 and leftDIE.AT_upper_bound <= 0x7fffffffffffffff: | |
leftDIE.AT_byte_size = 8 | |
else: | |
leftDIE.AT_encoding = "ATE_unsigned" | |
if leftDIE.AT_upper_bound <= 0xff: | |
leftDIE.AT_encoding = "ATE_unsigned_char" | |
leftDIE.AT_byte_size = 1 | |
elif leftDIE.AT_upper_bound <= 0xffff: | |
leftDIE.AT_byte_size = 2 | |
elif leftDIE.AT_upper_bound <= 0xffffffff: | |
leftDIE.AT_byte_size = 4 | |
elif leftDIE.AT_upper_bound <= 0xffffffffffffffff: | |
leftDIE.AT_byte_size = 8 | |
elif leftDIE.tag != "TAG_base_type": | |
print("Error: Expected subrange type to be a base type: [%d] «%s»" % (Index, remaining)) | |
remaining = q.group(3) | |
if debug: dbgprint("} subrange type") | |
if debug: dbgprint("} leftDIE") | |
return None, remaining | |
# arrayRE = re.compile(r'a(.*)') | |
p = self.arrayRE.match(remaining) | |
if p: | |
if debug: dbgprint("{ array type: [%d] «%s»" % (Index, p.group(1))) | |
leftDIE.SetTag("TAG_array_type") | |
q = self.subrangeRE.match(p.group(1)) | |
if not q: | |
print("Error: Expected array subrange: [%d] «%s»" % (Index, p.group(1))) | |
if debug: dbgprint("} array type error 1") | |
if debug: dbgprint("} leftDIE") | |
return None, remaining | |
boundsDIE = self.makeDIE(leftDIE, Index, "TAG_subrange_type") | |
arrayNdxTypeDIE, remaining = self.parseStabType(None, boundsDIE, Index, q.group(1)) # leftDIE, parent, Index, tstr | |
if arrayNdxTypeDIE == None: | |
print("Error: Expected array subrange type: [%d] «%s»" % (Index, q.group(1))) | |
if debug: dbgprint("} array type error 2") | |
if debug: dbgprint("} leftDIE") | |
return None, remaining | |
boundsDIE.AT_type = arrayNdxTypeDIE.typenumber | |
# This block matches subrange block above [ | |
q = self.subrangeLimitsRE.match(remaining) | |
if not q: | |
print("Error: Expected subrange limits: [%d] «%s»" % (Index, remaining)) | |
if debug: dbgprint("} array type error 3") | |
if debug: dbgprint("} leftDIE") | |
return None, remaining | |
if q.group(1)[0] == "0" or (q.group(1)[0] == "-" and q.group(1)[1] == "0"): | |
boundsDIE.AT_lower_bound = int(q.group(1), 8) | |
else: | |
boundsDIE.AT_lower_bound = int(q.group(1), 10) | |
if q.group(2)[0] == "0" or (q.group(2)[0] == "-" and q.group(2)[1] == "0"): | |
boundsDIE.AT_upper_bound = int(q.group(2), 8) | |
else: | |
boundsDIE.AT_upper_bound = int(q.group(2), 10) | |
if boundsDIE.AT_lower_bound > boundsDIE.AT_upper_bound: | |
if boundsDIE.AT_lower_bound == 0x8000000000000000: | |
# 64-bit lower bound | |
boundsDIE.AT_lower_bound = -0x8000000000000000 | |
elif boundsDIE.AT_lower_bound == 0 and boundsDIE.AT_upper_bound == -1: | |
# unbounded array [] | |
del boundsDIE.AT_lower_bound | |
del boundsDIE.AT_upper_bound | |
boundsDIE.unbounded = 1 | |
# ] | |
else: | |
print("Error: Expected subrange type lower bound %d to be less than upper bound %d: [%d] «%s»" % (boundsDIE.AT_lower_bound, boundsDIE.AT_upper_bound, Index, remaining)) | |
elif boundsDIE.AT_lower_bound == 0: | |
boundsDIE.AT_count = boundsDIE.AT_upper_bound + 1 | |
del boundsDIE.AT_lower_bound | |
del boundsDIE.AT_upper_bound | |
remaining = q.group(3) | |
typeDIE, remaining = self.parseStabType(None, parent, Index, remaining) | |
if typeDIE == None: | |
print("Error: Expected array element type: [%d] «%s»" % (Index, remaining)) | |
if debug: dbgprint("} array type error 4") | |
if debug: dbgprint("} leftDIE") | |
return None, remaining | |
if typeDIE.tag == "TAG_array_type": | |
# handle array of array | |
arrayNdx2DIE = DIEDict() | |
arrayNdx2DIE.__dict__.update(typeDIE.children[0].__dict__) | |
arrayNdx2DIE.parent = leftDIE | |
leftDIE.children.append(arrayNdx2DIE) | |
leftDIE.AT_type = typeDIE.AT_type | |
else: | |
leftDIE.AT_type = typeDIE.typenumber | |
if debug: dbgprint("} array type") | |
if debug: dbgprint("} leftDIE") | |
return None, remaining | |
p = self.crossReferenceRE.match(remaining) | |
if p: | |
if debug: dbgprint("{ cross reference -- [%d] type:«%s» name:«%s» remaining:«%s»" % (Index, p.group(1), p.group(2), p.group(3))) | |
if p.group(1) == "s": | |
leftDIE.SetTag("TAG_structure_type") | |
elif p.group(1) == "u": | |
leftDIE.SetTag("TAG_union_type") | |
elif p.group(1) == "e": | |
leftDIE.SetTag("TAG_enumeration_type") | |
leftDIE.SetSize(4) | |
leftDIE.SetName(p.group(2)) | |
leftDIE.iscrossreference = 1 | |
if debug: dbgprint("} cross reference") | |
if debug: dbgprint("} leftDIE") | |
return leftDIE, p.group(3) | |
p = self.structRE.match(remaining) | |
if p: | |
if debug: dbgprint("{ struct -- type:«%s» size:«%s» numBaseClasses:«%s» remaining:«%s»" % (p.group(1), p.group(2), p.group(3), p.group(4))) | |
if p.group(1) == "s": | |
leftDIE.SetTag("TAG_structure_type") | |
else: | |
leftDIE.SetTag("TAG_union_type") | |
leftDIE.AT_byte_size = int(p.group(2), 10) | |
remaining = p.group(4) | |
if p.group(3) != None: | |
if debug: dbgprint("{ baseclasses") | |
numBaseClasses = int(p.group(3), 10) | |
numFoundBaseClasses = 0 | |
#baseclassRE = re.compile(r'(?:(;)|(\d)(\d)(\d+),)(.*)') | |
while True: | |
if debug: dbgprint("{ baseclassRE") | |
q = self.baseclassRE.match(remaining) | |
if not q: | |
print("Error: Expected base class: [%d] «%s»" % (Index, remaining)) | |
if debug: dbgprint("} baseclassRE break 1") | |
break | |
if q.group(1) == ";": | |
remaining = q.group(5) | |
if debug: dbgprint("} baseclassRE break 2") | |
break | |
if debug: dbgprint("got base class -- last:«%s» virtuality:«%s» access:«%s» location:«%s» remaining:«%s»" % (q.group(1), q.group(2), q.group(3), q.group(4), q.group(5))) | |
numFoundBaseClasses += 1 | |
DIE = self.makeDIE(leftDIE, Index, "TAG_inheritance") | |
if q.group(2) == "1": | |
DIE.AT_virtuality = "VIRTUALITY_virtual" | |
elif q.group(2) == "0": | |
pass | |
else: | |
print("Error: Unexpected virtuality: [%d] «%s» «%s»" % (Index, q.group(2), remaining)) | |
if q.group(3) == "2": | |
DIE.AT_accessibility = "ACCESS_public" | |
elif q.group(3) == "1": | |
DIE.AT_accessibility = "ACCESS_protected" | |
elif q.group(3) == "0": | |
DIE.AT_accessibility = "ACCESS_private" | |
else: | |
print("Error: Unexpected field accessibility: [%d] «%s» «%s»" % (Index, q.group(2), remaining)) | |
DIE.AT_data_member_location = int(q.group(4), 10) | |
typeDIE, remaining = self.parseStabType(None, leftDIE, Index, q.group(5)) | |
DIE.AT_type = typeDIE.typenumber | |
if debug: dbgprint("{ UpdateBaseClassLists") | |
DIE.UpdateBaseClassLists() | |
if debug: dbgprint("} UpdateBaseClassLists") | |
if debug: dbgprint("} baseclassRE") | |
if numFoundBaseClasses > numBaseClasses: | |
print("Error: Unexpected number of listed base classes: [%d] «%s» «%s»" % (Index, q.group2, remaining)) | |
if debug: dbgprint("} baseclasses") | |
if debug: dbgprint("{ members") | |
while True: | |
if debug: dbgprint("{ member «%s»" % remaining) | |
q = self.fieldnameRE.match(remaining) | |
if not q: | |
print("Error: Expected field name: [%d] «%s»" % (Index, remaining)) | |
if debug: dbgprint("} member error 1") | |
break | |
# 1 2 3 4 5 6 | |
# (?:(;)|(?:([^:\n]*)(?:(?:(::)|(:))(?:/([0-2]))?)))(.*) | |
if debug: dbgprint("got field -- last:«%s» name:«%s» method:«%s» field:«%s» access:«%s» remaining:«%s»" % (q.group(1), q.group(2), q.group(3), q.group(4), q.group(5), q.group(6))) | |
remaining = q.group(6) | |
if q.group(1) == ";": | |
if debug: dbgprint("} member simple") | |
break | |
# we can use the same name for multiple fields | |
if debug: dbgprint("{ names") | |
while len(remaining) > 0 and (remaining[0] != ";"): | |
if q.group(3) == "::": | |
if debug: dbgprint("{ method") | |
DIE, remaining = self.parseStabType(None, leftDIE, Index, remaining) | |
if DIE == None: | |
print("Error: Expected type «%s»" % remaining) | |
if debug: dbgprint("} method break 1") | |
break | |
DIE.tag = "TAG_subprogram" | |
if debug: dbgprint("Duplicating: [%d] «%s»" % (Index, q.group(6))) | |
if debug: DumpDIE(DIE,0) | |
newDIE = DIEDict() | |
newDIE.__dict__.update(DIE.__dict__) | |
#DIE.copy()) # need a new DIE for a different method wth same parameters | |
if hasattr(DIE, "AT_name"): | |
del newDIE.AT_name | |
newDIE.duplicateof = DIE | |
del newDIE.typenumber | |
DIE = newDIE | |
newDIE.parent.AppendChild(newDIE) | |
DIE.SetName(q.group(2)) | |
DIE.isclassmethod = 1 | |
if debug: dbgprint("newDIE:") | |
if debug: DumpDIE(DIE,0) | |
# 1 2 3 4 5 6 7 8 | |
# :([^;]+);([0-2])([A-D])(?:(\?)|(\.)|(\*)(\d+))(.*) | |
r = self.methodpropertiesRE.match(remaining) | |
if not r: | |
print("Error: Expected method properties: [%d] «%s»" % (Index, remaining)) | |
if debug: dbgprint("} method break 2") | |
break | |
if debug: dbgprint ("got method properties -- linkage:«%s» access:«%s» modifier«%s» static:«%s» method:«%s» virtual:«%s» location:«%s» remaining:«%s»" % (r.group(1), r.group(2), r.group(3), r.group(4), r.group(5), r.group(6), r.group(7), r.group(8))) | |
DIE.AT_MIPS_linkage_name = r.group(1) | |
if r.group(2) == "2": | |
pass | |
# DIE.AT_accessibility = "ACCESS_public" | |
elif r.group(2) == "1": | |
DIE.AT_accessibility = "ACCESS_protected" | |
elif r.group(2) == "0": | |
DIE.AT_accessibility = "ACCESS_private" | |
# method properties (static/method/virtual): | |
if r.group(6) == "*" and hasattr(DIE, "ismethod"): | |
DIE.AT_virtuality = "VIRTUALITY_virtual" | |
DIE.AT_vtable_elem_location = int(r.group(7),10) | |
s = self.methodEndRE.match(r.group(8)) | |
if not s: | |
print("Error: Expected ; before virtual method containing type: [%d] «%s»" % (Index, r.group(8))) | |
if debug: dbgprint("} method break 3") | |
break | |
containingDIE, remaining = self.parseStabType(None, parent, Index, s.group(1)) | |
DIE.AT_containing_type = containingDIE.typenumber | |
if containingDIE.typenumber != leftDIE.typenumber: | |
print("Error: containing type «%s» ≠ struct type «%s»" % (containingDIE.typenumber, leftDIE.typenumber)) | |
s = self.methodEndRE.match(remaining) | |
if not s: | |
print("Error: Expected ; after virtual method containing type: [%d] «%s»" % (Index, remaining)) | |
if debug: dbgprint("} method break 4") | |
break | |
remaining = s.group(1) | |
if not hasattr(DIE.parent, "VTableEntries"): | |
DIE.parent.VTableEntries = {} | |
if DIE.AT_vtable_elem_location in DIE.parent.VTableEntries: | |
nameold = DIE.parent.VTableEntries[DIE.AT_vtable_elem_location].GetName() | |
namenew = DIE.GetName() | |
if namenew != nameold: | |
print("Error: %s has duplicate VTableEntries 0x%x %s%s %s%s" % (DIE.parent.GetAddress(), DIE.AT_vtable_elem_location * DIE.compile_unit.addr_size, DIE.parent.VTableEntries[DIE.AT_vtable_elem_location].GetAddress(), nameold, DIE.GetAddress(), namenew)) | |
else: | |
DIE.parent.VTableEntries[DIE.AT_vtable_elem_location] = DIE | |
else: | |
if r.group(4) == "?": | |
if hasattr(DIE, "isstaticmethod"): | |
pass | |
elif hasattr(DIE, "ismethod"): | |
print("Error: Invalid method properties. Expectining a static method but got a regular method: [%d] «%s»" % (Index, remaining)) | |
else: | |
DIE.isstaticmethod = 1 | |
DIE.staticfrommethodproperties = 1 | |
elif r.group(5) == ".": | |
if hasattr(DIE, "ismethod"): | |
pass | |
elif hasattr(DIE, "isstaticmethod"): | |
print("Error: Invalid method properties. Expectining a regular method but got a static method: [%d] «%s»" % (Index, remaining)) | |
else: | |
DIE.ismethod = 1 | |
DIE.methodfrommethodproperties = 1 | |
else: | |
print("Error: Invalid method properties: [%d] «%s»" % (Index, remaining)) | |
if debug: DumpDIE(DIE, 0) | |
if debug: dbgprint("} method break 5") | |
break | |
remaining = r.group(8) | |
# method modifier (A,B,C,D): | |
#if r.group(3) == "A": | |
# pass | |
#else: | |
# classDIE = DIE.dSYM.DIELookup[leftDIE.typenumber] #classtypenumber | |
# | |
# if r.group(3) == "B": | |
# if not hasattr(leftDIE, "pointerconstclasstypenumber"): | |
# constclasstypenumber = ("c %s" % classDIE.typenumber) | |
# constclassDIE = self.makeDIE(parent.parent, Index, "TAG_const_type") | |
# constclassDIE.Settypenumber(constclasstypenumber) | |
# constclassDIE.AT_type = classDIE.typenumber | |
# | |
# pointerconstclasstypenumber = ("* %s" % constclassDIE.typenumber) | |
# pointerconstclassDIE = self.makeDIE(parent.parent, Index, "TAG_pointer_type") | |
# pointerconstclassDIE.Settypenumber(pointerconstclasstypenumber) | |
# pointerconstclassDIE.AT_type = constclassDIE.typenumber | |
# classDIE.pointerconstclasstypenumber = pointerconstclasstypenumber | |
# | |
# DIE.artificalDIE.AT_type = classDIE.pointerconstclasstypenumber | |
# elif r.group(3) == "C": | |
# if not hasattr(leftDIE, "pointervolatileclasstypenumber"): | |
# volatileclasstypenumber = ("v %s" % classDIE.typenumber) | |
# volatileclassDIE = self.makeDIE(parent.parent, Index, "TAG_volatile_type") | |
# volatileclassDIE.Settypenumber(volatileclasstypenumber) | |
# volatileclassDIE.AT_type = classDIE.typenumber | |
# | |
# pointervolatileclasstypenumber = ("* %s" % volatileclassDIE.typenumber) | |
# pointervolatileclassDIE = self.makeDIE(parent.parent, Index, "TAG_pointer_type") | |
# pointervolatileclassDIE.Settypenumber(pointervolatileclasstypenumber) | |
# pointervolatileclassDIE.AT_type = volatileclassDIE.typenumber | |
# classDIE.pointervolatileclasstypenumber = pointervolatileclasstypenumber | |
# | |
# DIE.artificalDIE.AT_type = classDIE.pointervolatileclasstypenumber | |
# elif r.group(3) == "D": | |
# if not hasattr(leftDIE, "pointerconstvolatileclasstypenumber"): | |
# volatileclasstypenumber = ("v %s" % classDIE.typenumber) | |
# volatileclassDIE = self.makeDIE(parent.parent, Index, "TAG_volatile_type") | |
# volatileclassDIE.Settypenumber(volatileclasstypenumber) | |
# volatileclassDIE.AT_type = classDIE.typenumber | |
# | |
# constvolatileclasstypenumber = ("c %s" % volatileclassDIE.typenumber) | |
# constvolatileclassDIE = self.makeDIE(parent.parent, Index, "TAG_const_type") | |
# constvolatileclassDIE.Settypenumber(constvolatileclasstypenumber) | |
# constvolatileclassDIE.AT_type = volatileclassDIE.typenumber | |
# | |
# pointerconstvolatileclasstypenumber = ("* %s" % constvolatileclassDIE.typenumber) | |
# pointerconstvolatileclassDIE = self.makeDIE(parent.parent, Index, "TAG_pointer_type") | |
# pointerconstvolatileclassDIE.Settypenumber(typenumber) | |
# pointerconstvolatileclassDIE.AT_type = classDIE.typenumber | |
# classDIE.pointerconstvolatileclasstypenumber = pointerconstvolatileclasstypenumber | |
# | |
# DIE.artificalDIE.AT_type = classDIE.pointerconstvolatileclasstypenumber | |
if debug: dbgprint("} method") | |
elif q.group(4) == ":": | |
if debug: dbgprint("{ field") | |
DIE = self.makeDIE(leftDIE, Index, "TAG_member") | |
DIE.SetName(q.group(2)) | |
if q.group(5) == "2": | |
pass | |
# DIE.AT_accessibility = "ACCESS_public" | |
elif q.group(5) == "1": | |
DIE.AT_accessibility = "ACCESS_protected" | |
elif q.group(5) == "0": | |
DIE.AT_accessibility = "ACCESS_private" | |
elif q.group(5) != None: | |
print("Error: Unexpected member accessibility: [%d] «%s» «%s»" % (Index, q.group(5), remaining)) | |
typeDIE, remaining = self.parseStabType(None, leftDIE, Index, q.group(6)) | |
DIE.AT_type = typeDIE.typenumber | |
q = self.fieldlocationRE.match(remaining) | |
if q: | |
if q.group(1) == ",": | |
bits = int(q.group(3), 10) | |
if (bits == 0): | |
if debug: dbgprint("got 0 size field: [%d]" % (Index)) | |
leftDIE.children.pop() | |
else: | |
location = int(q.group(2), 10) | |
if (location & 7) or (bits & 7): | |
DIE.AT_data_bit_offset = location | |
DIE.AT_bit_size = bits | |
else: | |
DIE.AT_data_member_location = location >> 3 | |
DIE.AT_byte_size = bits >> 3 | |
DIE.checkmembersize = 1 | |
if not hasattr(leftDIE, "Fields"): | |
leftDIE.Fields = [] | |
leftDIE.Fields.append(DIE) | |
if hasattr(DIE, "AT_name") and "vptr$" in DIE.AT_name: | |
DIE.AT_artificial = 1 | |
else: # == ":": | |
# :{variable_name} for static members | |
if debug: dbgprint("got static variable -- linkage:«%s» remaining:«%s»" % (q.group(4), remaining)) | |
DIE.tag = "TAG_variable" # don't call SetTag because we are changing the tag from "TAG_member" | |
DIE.AT_MIPS_linkage_name = q.group(4) | |
remaining = q.group(5) | |
else: | |
print("Error: Expected field location: [%d] «%s»" % (Index, remaining)) | |
if debug: dbgprint("} field break") | |
break | |
if debug: dbgprint("} field") | |
if debug: dbgprint("} names") | |
s = self.fieldEndRE.match(remaining) | |
if not s: | |
print("Error: Expected ; at end of field: [%d] «%s»" % (Index, remaining)) | |
if debug: dbgprint("} member break 4") | |
break | |
else: | |
remaining = s.group(1) | |
if debug: dbgprint("} member") | |
if debug: dbgprint("} members") | |
#structContainingRE = re.compile(r"~%(.*)") | |
#structContainingEndRE = re.compile(r';(.*)') | |
s = self.structContainingRE.match(remaining) | |
if s: | |
structContainingDIE, remaining = self.parseStabType(None, parent, Index, s.group(1)) | |
leftDIE.AT_containing_type = structContainingDIE.typenumber | |
t = self.structContainingEndRE.match(remaining) | |
if not t: | |
print("Error: Expected ; at end of containing class: [%d] «%s»" % (Index, remaining)) | |
#break | |
remaining = t.group(1) | |
if debug: dbgprint("} struct") | |
if debug: dbgprint("} leftDIE") | |
return None, remaining | |
print("Error: Uknown type: [%d] «%s»" % (Index, tstr)) | |
return None, tstr | |
def Readstabs(self, f, parent): | |
for line in f: | |
if debug: dbgprint("%.*s" % (len(line) - 1, line)) | |
s = self.stabRE.match(line) | |
if s: | |
Index = int(s.group(1), 10) | |
n_type = int(s.group(2), 16) | |
n_type_str = s.group(3) | |
n_sect = int(s.group(4), 16) | |
n_desc = int(s.group(5), 16) | |
n_value = int(s.group(6), 16) | |
n_str = s.group(7) | |
remaining = "" | |
if n_type == 0x64: # N_SO | |
if n_str == None: | |
if parent.tag != "TAG_compile_unit": | |
if parent.tag == "TAG_include" and parent.dSYM.currentCompileUnit.AT_name == parent.AT_name: | |
#print("Warning: parent tag (%s) is not TAG_compile_unit: [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (parent.tag, Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
pass | |
else: | |
print("Error: parent tag (%s) is not TAG_compile_unit: [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (parent.tag, Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
parent.dSYM.popuntiltag = "TAG_compile_unit" | |
break | |
else: | |
p = self.dirpathRE.match(n_str) | |
if p: | |
if self.path != None or parent.dSYM.currentCompileUnit != None: | |
print("Error: path (%s) or currentCompileUnit (%s) is already set: [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (self.path, "yes" if parent.dSYM.currentCompileUnit != None else "None", Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
self.path = p.group(1) | |
if debug: dbgprint("[%d] Path: «%s»" % (Index, self.path)) | |
else: | |
if parent.dSYM.currentCompileUnit != None: | |
print("Error: starting new currentCompileUnit when previous hasn't been finished yet: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
DIE = self.makeDIE(parent, Index, "TAG_compile_unit") | |
self.currentfile = n_str | |
DIE.SetName(n_str) | |
DIE.AT_comp_dir = self.path | |
if n_desc == 0: pass | |
elif n_desc == 0x1: DIE.AT_language = "N_SO_AS" # Assembly language | |
elif n_desc == 0x2: DIE.AT_language = "N_SO_C" # K&R traditional C | |
elif n_desc == 0x3: DIE.AT_language = "N_SO_ANSI_C" # ANSI C | |
elif n_desc == 0x4: DIE.AT_language = "N_SO_CC" # C++ | |
elif n_desc == 0x5: DIE.AT_language = "N_SO_FORTRAN" # Fortran | |
elif n_desc == 0x6: DIE.AT_language = "N_SO_PASCAL" # Pascal | |
elif n_desc == 0x7: DIE.AT_language = "N_SO_FORTRAN90" # Fortran90 | |
elif n_desc == 0x32: DIE.AT_language = "N_SO_OBJC" # Objective-C | |
elif n_desc == 0x33: DIE.AT_language = "N_SO_OBJCPLUS" # Objective-C++ | |
else: print("Error: [%d] Unknown souce language %x" % (Index, n_desc)) | |
self.Readstabs(f, DIE) | |
parent.dSYM.currentCompileUnit = None | |
self.path = None | |
self.currentfile = None | |
elif n_type == 0x66: # N_OSO | |
if n_str != None: | |
if parent.dSYM.currentCompileUnit != None: | |
parent.dSYM.currentCompileUnit.objectfile = n_str | |
elif n_type == 0x84: # N_SOL | |
self.currentfile = n_str | |
elif n_type == 0x82: # N_BINCL | |
DIE = self.makeDIE(parent, Index, "TAG_include") # fake dwarf tag | |
savecurrentfile = self.currentfile | |
self.currentfile = n_str | |
DIE.AT_name = n_str | |
self.Readstabs(f, DIE) | |
self.currentfile = savecurrentfile | |
elif n_type == 0xa2: # N_EINCL | |
if parent.tag != "TAG_include": | |
print("Error: parent tag (%s) is not TAG_include: [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (parent.tag, Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
parent.dSYM.popuntiltag = "TAG_include" | |
break | |
elif n_type == 0xc2: # N_EXCL | |
DIE = self.makeDIE(parent, Index, "TAG_exclude") # fake dwarf tag | |
DIE.AT_name = n_str | |
elif n_type == 0x2e: # N_BNSYM | |
DIE = self.makeDIE(parent, Index, "TAG_symbol") # fake dwarf tag | |
self.Readstabs(f, DIE) | |
elif n_type == 0x4e: # N_ENSYM | |
if parent.tag != "TAG_symbol": | |
print("Error: parent tag (%s) is not TAG_symbol: [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (parent.tag, Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
break | |
elif n_type == 0x3c: # N_OPT | |
if not hasattr(parent.dSYM, "currentCompileUnit"): | |
print("Error: expected N_OPT to be inside an N_SO: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
else: | |
parent.dSYM.currentCompileUnit.AT_producer = n_str | |
elif n_type == 0x24: # N_FUN | |
if n_str == None: | |
if parent.tag != "TAG_subprogram": | |
print("Error: parent tag (%s) is not TAG_subprogram: [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (parent.tag, Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
break | |
else: | |
p = self.stringRE.match(n_str) | |
if p: | |
if p.group(2) == "F" or p.group(2) == "f": | |
DIE = self.makeDIE(parent, Index, "TAG_subprogram") | |
typeDIE, remaining = self.parseStabType(None, parent, Index, p.group(3)) | |
DIE.AT_type = typeDIE.typenumber | |
DIE.SetName(p.group(1)) | |
if p.group(2) == "F": | |
DIE.AT_external = 1 | |
if n_desc > 0: | |
DIE.AT_decl_line = n_desc | |
DIE.AT_low_pc = n_value | |
self.Readstabs(f, DIE) | |
else: | |
print("Error: Expected F or f for N_FUN: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
else: | |
print("Error: Unknown N_FUN format: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
elif n_type == 0xc0: # N_LBRAC | |
DIE = self.makeDIE(parent, Index, "TAG_lexical_block") | |
DIE.AT_low_pc = n_value | |
self.Readstabs(f, DIE) | |
elif n_type == 0xe0: # N_RBRAC | |
if parent.tag != "TAG_lexical_block": | |
print("Error: parent tag (%s) is not TAG_lexical_block: [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (parent.tag, Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
else: | |
parent.AT_high_pc = n_value | |
break | |
elif n_type == 0x80: # N_LSYM | |
p = self.stringRE.match(n_str) | |
if p: | |
if debug: dbgprint("{ N_LSYM [%d] «%s» «%s» «%s»" % (Index, p.group(1), p.group(2), p.group(3))) | |
DIE = None | |
typeDIE = None | |
if p.group(2) == "" or p.group(2) == "t" or p.group(2) == "T" or p.group(2) == "Tt": | |
typeDIE, remaining = self.parseStabType(None, parent, Index, p.group(3)) | |
if debug: DumpDIE(typeDIE, 0) | |
if p.group(2) != "": | |
if not hasattr(typeDIE, "AT_name"): | |
if debug: dbgprint("setting type «%s» name to «%s»" % (typeDIE.typenumber, p.group(1))) | |
typeDIE.SetName(p.group(1)) | |
elif typeDIE.AT_name != p.group(1): | |
if hasattr(typeDIE, "iscrossreference"): | |
# This is the usual form of the message: | |
# Warning: New name «IORegistryEntry::ExpansionData» ≠ old name «ExpansionData» | |
if p.group(1) != "" and p.group(1) != " ": | |
if p.group(1).endswith("::" + typeDIE.AT_name): | |
typeDIE.SetNameWithClass(p.group(1)) | |
else: | |
print("Warning: [%d] «%s» New name «%s» ≠ old name «%s»" % (Index, typeDIE.typenumber, p.group(1), typeDIE.AT_name)) | |
elif hasattr(typeDIE, "isnamefromcrossreference"): | |
print("Warning: [%d] «%s» Name «%s» from cross reference is replaced by typedef name «%s»" % (Index, typeDIE.typenumber, typeDIE.AT_name, p.group(1))) | |
del typeDIE.isnamefromcrossreference | |
typeDIE.AT_name = p.group(1) | |
typeDIE.CheckName() | |
elif hasattr(typeDIE, "namesetfromtyperef"): | |
del typeDIE.namesetfromtyperef | |
typeDIE.AT_name = p.group(1) | |
typeDIE.CheckName() | |
elif hasattr(typeDIE, "namesetfromtyperef2"): | |
del typeDIE.namesetfromtyperef2 | |
typeDIE.AT_name = p.group(1) | |
typeDIE.CheckName() | |
else: | |
if p.group(1) != "" and p.group(1) != " ": | |
if p.group(1).endswith("::" + typeDIE.AT_name): | |
typeDIE.SetNameWithClass(p.group(1)) | |
else: | |
print("Error: [%d] «%s» New name «%s» ≠ old name «%s»" % (Index, typeDIE.typenumber, p.group(1), typeDIE.AT_name)) | |
if typeDIE.tag == "TAG_typedef" and typeDIE.HasType(): | |
attype = typeDIE.GetType() | |
if attype != None and attype.tag != "TAG_pointer_type" and attype.tag != "TAG_reference_type" and attype.tag != "TAG_const_type" and attype.tag != "TAG_volatile_type": | |
if not hasattr(attype, "AT_name"): | |
attype.AT_name = p.group(1) | |
attype.CheckName() | |
attype.namesetfromtyperef = 1 | |
if hasattr(typeDIE, "createdFrom"): | |
for attype in typeDIE.createdFrom: | |
if attype != None and attype.tag != "TAG_pointer_type" and attype.tag != "TAG_reference_type" and attype.tag != "TAG_const_type" and attype.tag != "TAG_volatile_type": | |
if not hasattr(attype, "AT_name"): | |
attype.AT_name = p.group(1) | |
attype.CheckName() | |
attype.namesetfromtyperef2 = 1 | |
if p.group(2) == "Tt": | |
#DIE = self.makeDIE(parent, Index, "TAG_typedef") | |
#DIE.SetName(p.group(1)) | |
#DIE.AT_type = typeDIE.typenumber | |
DIE = typeDIE | |
elif p.group(2) == "t": | |
if typeDIE.HasType() and typeDIE.tag != "TAG_typedef" and typeDIE.tag != "TAG_base_type" and typeDIE.tag != "TAG_pointer_type" and typeDIE.tag != "TAG_reference_type" and typeDIE.tag != "TAG_const_type" and typeDIE.tag != "TAG_volatile_type" and typeDIE.tag != "TAG_array_type" and typeDIE.tag != "TAG_subroutine_type" and typeDIE.tag != "TAG_subprogram": | |
if hasattr(typeDIE, "iscrossreference"): | |
#print("Warning: [%d] «%s» typedef «%s» created from crossreference had tag «%s»" % (Index, typeDIE.typenumber, typeDIE.AT_name, typeDIE.tag)) | |
typeDIE.tag = "TAG_typedef" | |
elif typeDIE.HasType(): | |
print("Error: [%d] «%s» typedef «%s» has unexpected tag «%s»" % (Index, typeDIE.typenumber, "None" if not hasattr(typeDIE, "AT_name") else typeDIE.AT_name, typeDIE.tag)) | |
DIE = typeDIE | |
elif p.group(2) == "": | |
DIE = self.makeDIE(parent, Index, "TAG_variable") | |
DIE.SetName(p.group(1)) | |
DIE.AT_type = typeDIE.typenumber | |
else: | |
DIE = typeDIE | |
if DIE != None: | |
if n_desc > 0: | |
DIE.AT_decl_line = n_desc | |
if n_value > 0: | |
DIE.AT_location = ("local %x" % n_value) | |
else: | |
print("Error: Expected \"\" or t or T or Tt for N_LSYM: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
if debug: dbgprint("} N_LSYM") | |
else: | |
print("Error: Unknown N_LSYM format: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
elif n_type == 0xa0: # N_PSYM | |
p = self.stringRE.match(n_str) | |
if p: | |
if p.group(2) == "p": | |
typeDIE, remaining = self.parseStabType(None, parent, Index, p.group(3)) | |
#typeDIE.SetName(p.group(1)) | |
DIE = self.makeDIE(parent, Index, "TAG_formal_parameter") | |
DIE.SetName(p.group(1)) | |
DIE.AT_type = typeDIE.typenumber | |
if n_desc > 0: | |
DIE.AT_decl_line = n_desc | |
if n_value > 0: | |
DIE.AT_location = ("stack %x" % n_value) | |
else: | |
print("Error: Expected p for N_PSYM: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
else: | |
print("Error: Unknown N_PSYM format: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
elif n_type == 0x40: # N_RSYM | |
p = self.stringRE.match(n_str) | |
if p: | |
DIE = None | |
typeDIE = None | |
if p.group(2) == "r" or p.group(2) == "P": | |
if debug: dbgprint("{ N_RSYM r or P: [%d] «%s» «%s» «%s»" % (Index, p.group(1), p.group(2), p.group(3))) | |
typeDIE, remaining = self.parseStabType(None, parent, Index, p.group(3)) | |
if p.group(2) == "r": | |
DIE = self.makeDIE(parent, Index, "TAG_variable") | |
elif p.group(2) == "P": | |
DIE = self.makeDIE(parent, Index, "TAG_formal_parameter") | |
if DIE != None: | |
DIE.SetName(p.group(1)) | |
DIE.AT_type = typeDIE.typenumber | |
if n_desc > 0: | |
DIE.AT_decl_line = n_desc | |
if n_value > 0: | |
DIE.AT_location = ("register %x" % n_value) | |
if debug: dbgprint("} N_RSYM") | |
else: | |
print("Error: Expected r or P for N_RSYM: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
else: | |
print("Error: Unknown N_RSYM format: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
elif n_type == 0x26: # N_STSYM | |
p = self.stringRE.match(n_str) | |
if p: | |
if p.group(2) == "S" or p.group(2) == "V": | |
typeDIE, remaining = self.parseStabType(None, parent, Index, p.group(3)) | |
#typeDIE.SetName(p.group(1)) | |
DIE = self.makeDIE(parent, Index, "TAG_variable") | |
DIE.SetName(p.group(1)) | |
DIE.AT_type = typeDIE.typenumber | |
if n_desc > 0: | |
DIE.AT_decl_line = n_desc | |
if n_value > 0: | |
DIE.AT_location = ("data %x" % n_value) | |
else: | |
print("Error: Expected S or V for N_STSYM: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
else: | |
print("Error: Unknown N_STSYM format: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
elif n_type == 0x28: # N_LCSYM | |
p = self.stringRE.match(n_str) | |
if p: | |
if p.group(2) == "S" or p.group(2) == "V": | |
typeDIE, remaining = self.parseStabType(None, parent, Index, p.group(3)) | |
#typeDIE.SetName(p.group(1)) | |
DIE = self.makeDIE(parent, Index, "TAG_variable") | |
DIE.SetName(p.group(1)) | |
DIE.AT_type = typeDIE.typenumber | |
if n_desc > 0: | |
DIE.AT_decl_line = n_desc | |
if n_value > 0: | |
DIE.AT_location = ("BSS %x" % n_value) | |
else: | |
print("Error: Expected S or V for N_LCSYM: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
else: | |
print("Error: Unknown N_LCSYM format: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
elif n_type == 0x20: # N_GSYM | |
p = self.stringRE.match(n_str) | |
if p: | |
if p.group(2) == "G": | |
typeDIE, remaining = self.parseStabType(None, parent, Index, p.group(3)) | |
#typeDIE.SetName(p.group(1)) | |
DIE = self.makeDIE(parent, Index, "TAG_variable") | |
DIE.SetName(p.group(1)) | |
DIE.AT_type = typeDIE.typenumber | |
DIE.AT_external = 1 | |
if n_desc > 0: | |
DIE.AT_decl_line = n_desc | |
else: | |
print("Error: Expected G for N_GSYM: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
else: | |
print("Error: Unknown N_GSYM format: [%d] %02x (%-13s) %02x %04x %16x '%s'" % (Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
elif n_type == 0x01: # UNDF EXT | |
pass | |
elif n_type == 0x0d: # PBUD EXT | |
pass | |
elif n_type == 0x0e: # SECT | |
pass | |
elif n_type == 0x0f: # SECT EXT | |
pass | |
elif n_type == 0x12: # PEXT ABS | |
pass | |
elif n_type == 0x1e: # PEXT SECT | |
pass | |
elif n_type == 0x44: # N_SLINE # line numbers | |
pass | |
elif n_type == 0x02: # ABS | |
pass | |
elif n_type == 0x03: # ABS EXT | |
pass | |
else: | |
print("Error: Unknown stab type (0x%x = %s): [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (n_type, n_type_str, Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
if remaining != "": | |
print("Error: Unparsed stuff (%s): [%6d] %02x (%-13s) %02x %04x %16x '%s'" % (remaining, Index, n_type, n_type_str, n_sect, n_desc, n_value, n_str)) | |
else: # if s: | |
#print("%s" % line) | |
pass | |
if hasattr(parent.dSYM, "popuntiltag"): | |
if parent.tag != parent.dSYM.popuntiltag: | |
print("Warning: got %s breaking until %s [%6d]" % (parent.tag, parent.dSYM.popuntiltag, Index)) | |
else: | |
#print("Warning: reached tag %s [%6d]" % (parent.tag, Index)) | |
del parent.dSYM.popuntiltag | |
break | |
def CheckDIEs(self, parent): | |
if hasattr(parent, "children"): | |
for child in parent.children: | |
if hasattr(child, "checkmembersize"): | |
del child.checkmembersize | |
typeDIE = child.GetBaseType() | |
if hasattr(typeDIE, "enumsizemax") and child.AT_byte_size < typeDIE.enumsizemax: | |
# set enum type size to minimum of struct member size | |
typeDIE.AT_byte_size = child.AT_byte_size | |
typeDIE.enumsizemax = child.AT_byte_size | |
if child.AT_byte_size < typeDIE.GetByteSize(): | |
# assume struct members that are small than their type are actually bit fields | |
child.AT_data_bit_offset = child.AT_data_member_location * 8 | |
child.AT_bit_size = child.AT_byte_size * 8 | |
del child.AT_data_member_location | |
del child.AT_byte_size | |
self.CheckDIEs(child) | |
def Process_stab(self, filename): | |
if re.match(r'.*\.txt', filename): | |
f = open(filename, "r") | |
else: | |
f = tempfile.NamedTemporaryFile() | |
#print("Created temp file: %s" % f.name) | |
subprocess.call(["dsymutil", "-s", filename], stdout=f) | |
f.seek(0) | |
#print("Processing file: %s" % f.name) | |
dSYM = DIEDict() | |
dSYM.dSYM = dSYM | |
dSYM.sourceType = "stabs" | |
dSYM.address = 0 | |
dSYM.currentCompileUnit = None | |
dSYM.DIELookup = {} | |
dSYM.CompileUnits = [] | |
dSYM.filename = filename | |
#print(dSYM) | |
self.Readstabs(f, dSYM) | |
self.CheckDIEs(dSYM) | |
del dSYM.currentCompileUnit | |
f.close() | |
MakeAllVTables(dSYM) | |
return dSYM | |
#========================================================================================= | |
# DumpAllStructs | |
def MakeOffsetStr(offset): | |
#return "%4d" % offset | |
return "%6s" % ("0x%x" % offset) | |
# from /Library/Developer/KDKs/KDK_10.11.5_15F34.kdk/System/Library/Kernels/kernel.dSYM/Contents/Resources/Python/lldbmacros/structanalyze.py | |
def _showStructPacking(symbol, typename, fieldname, prefix, depth, class_depth, begin_offset=0): | |
classType = symbol.GetClass() | |
if classType == lldb.eTypeClassClass : | |
ctype = "class" | |
elif classType == lldb.eTypeClassUnion : | |
ctype = "union" | |
elif classType == lldb.eTypeClassStruct : | |
ctype = "struct" | |
else: | |
ctype = "_unknown_%x" % (classType or 0) | |
if typename == None: | |
typename = symbol.GetName() or "_anon_%s" % symbol.GetAddress() | |
if fieldname != None and fieldname != "": | |
outstr = "[%4d] (%s) %s %s {" % (symbol.GetByteSize(), ctype, typename, fieldname) + "\n" | |
else: | |
outstr = "[%4d] (%s) %s {" % (symbol.GetByteSize(), ctype, typename) + "\n" | |
if hasattr(symbol, "Friends"): | |
for friend in symbol.Friends: | |
friendType = None | |
friendClass = None | |
if friend.HasType(): | |
friendType = friend.GetType() | |
elif hasattr(friend, "AT_friend"): | |
friendType = friend.dSYM.DIELookup[friend.AT_friend] | |
if friendType != None: | |
friendClass = friendType.GetClass() | |
if friendClass == lldb.eTypeClassClass : | |
ctype = "class" | |
elif friendClass == lldb.eTypeClassUnion : | |
ctype = "union" | |
elif friendClass == lldb.eTypeClassStruct : | |
ctype = "struct" | |
else: | |
ctype = "_unknown_%x" % (classType or 0) | |
outstr = outstr + prefix + " friend %s %s\n" % (ctype, friendType.GetName() or "_anon_%x" % friendType.GetAddress()) | |
""" | |
Mac OS X 10.8: | |
0x009fcff0: TAG_structure_type | |
AT_name ("IOStatistics") | |
AT_declaration (0x01) | |
0x009fd4d4: TAG_friend | |
AT_type (0x009fcff0 "IOStatistics") | |
AT_data_member_location (OP_plus_uconst 0x0) | |
AT_accessibility (ACCESS_public) | |
Mac OS X 10.9: | |
0x00b67148: TAG_class_type | |
AT_name ("IOStatistics") | |
AT_declaration (0x01) | |
0x00b675ca: TAG_friend | |
AT_friend (0x00b67148) | |
""" | |
_compact_offset = begin_offset | |
max_union_member_size = 0 | |
max_align = 1 | |
m_align = 1 | |
numClasses = symbol.GetNumberOfDirectBaseClasses() | |
for i in range(numClasses): | |
member = symbol.GetDirectBaseClassAtIndex(i) | |
m_offset = member.GetOffsetInBytes() + begin_offset | |
m_type = member.GetType() | |
membertypename = m_type.GetName() | |
m_type = member.GetBaseType() | |
m_size = m_type.GetByteSize() | |
warningstr = "" | |
debugstr = "" # + str((begin_offset, m_offset, _compact_offset, m_size)) | |
#print(prefix, "V", membertypename, debugstr) | |
if _compact_offset > m_offset: | |
warningstr = " *** Possible memory overlap ***" | |
elif _compact_offset < m_offset: | |
align_offset = ((_compact_offset + m_align-1) & -m_align) | |
if align_offset != m_offset : | |
# ignore memory holes that may be caused by field alignment | |
#_has_memory_hole = True | |
warningstr = " *** Possible memory hole (msize:%d align:%d calc:0x%x calcaligned:0x%x actual:0x%x) ***" % (m_size, m_align, _compact_offset, align_offset, m_offset) | |
_compact_offset = m_offset | |
s, compact_size, m_align = _showStructPacking(m_type, membertypename, None, prefix+" ", depth + 1, class_depth + 1, m_offset) | |
if m_align > max_align: | |
max_align = m_align | |
#print(prefix, "V", membertypename, "m_align:%d max_align:%d compact_size:%d" % (m_align, max_align, compact_size)) | |
outstr += prefix + ("*%s," % MakeOffsetStr(m_offset)) + s + warningstr + debugstr + "\n" | |
_compact_offset += compact_size | |
numFields = symbol.GetNumberOfFields() | |
#_has_memory_hole = False | |
inBitField = False | |
totalBits = 0 | |
bitFieldStartByte = -1 | |
bitFieldTotalBytes = -1 | |
used_bits = 0 | |
next_used_bits = 0 | |
next_totalBits = 0 | |
for i in range(numFields): | |
member = symbol.GetFieldAtIndex(i) | |
m_offset = member.GetOffsetInBytes() + begin_offset | |
m_size_bits = member.GetBitSize() | |
m_offset_bits = member.GetOffsetInBits() | |
isBitField = hasattr(member, "AT_bit_size") | |
m_name = member.GetName() or "" # "_anon_member_%s" % member.GetAddress() | |
if m_name == None: | |
print("Error: m_name is None") | |
m_type = member.GetType() | |
membertypename = m_type.GetName() | |
m_type = member.GetBaseType() | |
membertypeclass = None | |
m_size = 1 | |
if m_type != None: | |
membertypeclass = m_type.GetClass() | |
m_size = m_type.GetByteSize() | |
if inBitField: | |
# continuing previously started bit fields? | |
if (not isBitField) or classType == lldb.eTypeClassUnion or (m_offset >= (bitFieldStartByte + bitFieldTotalBytes)): | |
# no, finish previously started bit field | |
compactbitsbytes = 0 | |
tmp_used_bits = used_bits | |
while tmp_used_bits != 0: | |
tmp_used_bits >>= 8 | |
compactbitsbytes += 1 | |
#if compactbitsbytes != bitFieldTotalBytes: | |
# if debug: dbgprint("*** usedbits:%x bitsbytes:%d bitFieldTotalBytes:%d ***" % (used_bits, compactbitsbytes, bitFieldTotalBytes)) | |
if classType != lldb.eTypeClassUnion: | |
#_compact_offset += bitFieldTotalBytes | |
_compact_offset += compactbitsbytes | |
inBitField = False | |
bitFieldStartByte = m_offset | |
used_bits = 0 | |
totalBits = 0 | |
if isBitField: | |
if not inBitField: | |
# new set of bit fields started | |
inBitField = True | |
bitFieldStartByte = m_offset | |
bitFieldTotalBytes = m_size | |
used_bits = next_used_bits | |
totalBits = next_totalBits | |
next_used_bits = 0 | |
next_totalBits = 0 | |
if next_used_bits: | |
print("Error: have carry over bits but not in new bit field next_used_bits:0x%08x at %s" % (next_used_bits, member.GetAddress())) | |
next_used_bits = 0 | |
next_totalBits = 0 | |
totalBits += m_size_bits | |
m_offset_bits += (m_offset - bitFieldStartByte) * 8 | |
m_offset = bitFieldStartByte | |
# the type of a bitfield does not always mean the total size of all bit fields because you can mix sized types in a sequence of bit fields | |
while bitFieldTotalBytes * 8 < m_offset_bits + m_size_bits: | |
#print("[ bitFieldTotalBytes %d" % bitFieldTotalBytes) | |
bitFieldTotalBytes *= 2 | |
#print("] bitFieldTotalBytes %d" % bitFieldTotalBytes) | |
if m_offset_bits >= 0: | |
bitfield = " : %d // %d..%d" % (m_size_bits, m_offset_bits, m_offset_bits + m_size_bits - 1) | |
elif m_size_bits + m_offset_bits > 0: | |
bitfield = " : %d // %d..%d,%d..%d" % (m_size_bits, bitFieldTotalBytes * 8 + m_offset_bits, bitFieldTotalBytes * 8 - 1, 0, m_size_bits + m_offset_bits - 1) | |
else: | |
bitfield = " : %d // %d..%d,nothing" % (m_size_bits, bitFieldTotalBytes * 8 + m_offset_bits, bitFieldTotalBytes * 8 - 1) | |
else: | |
bitFieldStartByte = m_offset | |
bitFieldTotalBytes = m_size | |
used_bits = 0 | |
totalBits = 0 | |
bitfield = "" | |
if next_used_bits: | |
print("Error: have carry over bits but not in bit field next_used_bits:0x%08x at %s" % (next_used_bits, member.GetAddress())) | |
next_used_bits = 0 | |
next_totalBits = 0 | |
warningstr = "" | |
thebits = 0 | |
try: | |
if m_offset_bits >= 0: | |
thebits = (~(-1 << m_size_bits)) << m_offset_bits | |
else: | |
next_totalBits = -m_offset_bits | |
next_used_bits = (~(-1 << next_totalBits)) << (bitFieldTotalBytes * 8 + m_offset_bits) | |
thebits = (~(-1 << (m_size_bits + m_offset_bits))) << 0 | |
except: | |
# negative bit offset means something like bit field overlaps next member... complicated | |
print("Error with bits used_bits:0x%08x thebits(previous):0x%08x size:%d offset:%d type:%s at %s" % (used_bits, thebits, m_size_bits, m_offset_bits, m_offset_bits.__class__.__name__, member.GetAddress())) | |
thebits = 0 | |
if ((thebits & used_bits) != 0) or m_size_bits < 0 or m_size_bits + m_offset_bits > bitFieldTotalBytes * 8: | |
warningstr = " *** Possible bit field error ***" | |
used_bits = 0 | |
used_bits |= thebits | |
debugstr = "" # + str((begin_offset, m_offset, _compact_offset, m_offset_bits, m_size, m_size_bits, thebits, used_bits)) | |
if membertypeclass == lldb.eTypeClassStruct or membertypeclass == lldb.eTypeClassUnion or membertypeclass == lldb.eTypeClassClass : | |
s, compact_size, m_align = _showStructPacking(m_type, membertypename, m_name, prefix+" ", depth + 1, 0, m_offset) | |
outstr += prefix + ("*%s," % MakeOffsetStr(m_offset)) + s | |
else: | |
outstr += prefix + ("+%s,[%4d] (%s)%s%s%s" % (MakeOffsetStr(m_offset), m_size, membertypename, "" if m_name == "" else " ", m_name, bitfield)) | |
compact_size = m_size | |
m_align = 1 | |
if m_type != None: | |
m_align = m_type.GetAlign() | |
if m_align > max_align: | |
max_align = m_align | |
#print(prefix, membertypename, m_name, "calc:0x%x actual:0x%x calcsize:%d actualsize:%d m_align:%d max_align:%d" % (_compact_offset, m_offset, compact_size, m_size, m_align, max_align)) | |
if _compact_offset > m_offset: | |
warningstr = " *** Possible memory overlap (msize:%d align:%d calc:%d actual:%d) ***" % (m_size, m_align, _compact_offset, m_offset) | |
elif _compact_offset < m_offset: | |
align_offset = ((_compact_offset + m_align-1) & -m_align) | |
if align_offset != m_offset : | |
# ignore memory holes that may be caused by field alignment | |
#_has_memory_hole = True | |
if m_align == 8: | |
align_offset4 = ((_compact_offset + 4-1) & -4) | |
if align_offset4 == m_offset : | |
warningstr = " *** Alignment is 4 instead of 8 ***" | |
else: | |
warningstr = " *** Possible memory hole (msize:%d align:%d calc:0x%x calcaligned:0x%x actual:0x%x) ***" % (m_size, m_align, _compact_offset, align_offset, m_offset) | |
else: | |
warningstr = " *** Possible memory hole (msize:%d align:%d calc:0x%x calcaligned:0x%x actual:0x%x) ***" % (m_size, m_align, _compact_offset, align_offset, m_offset) | |
_compact_offset = m_offset | |
if classType == lldb.eTypeClassUnion: | |
if m_size > max_union_member_size: | |
max_union_member_size = m_size | |
elif not inBitField: | |
_compact_offset += m_size | |
outstr += warningstr + debugstr + "\n" | |
if next_used_bits: | |
print("Error: have carry over bits after fields next_used_bits:0x%08x at %s" % (next_used_bits, member.GetAddress())) | |
next_used_bits = 0 | |
next_totalBits = 0 | |
if classType != lldb.eTypeClassUnion and inBitField: | |
_compact_offset += bitFieldTotalBytes | |
inBitField = False | |
if class_depth == 0 and hasattr(symbol, "AllVirtualBaseClasses"): | |
for virtualbaseclassinfo in symbol.AllVirtualBaseClasses: | |
member = virtualbaseclassinfo.member | |
m_type = member.GetType() | |
membertypename = m_type.GetName() | |
m_type = member.GetBaseType() | |
m_size = m_type.GetByteSize() | |
m_align = m_type.GetAlign() | |
m_offset = ((_compact_offset + m_align-1) & -m_align) | |
warningstr = " virtual" | |
debugstr = "" # + str((begin_offset, m_offset, _compact_offset, m_size)) | |
#print(prefix, "V", membertypename, debugstr) | |
_compact_offset = m_offset | |
s, compact_size, a = _showStructPacking(m_type, membertypename, None, prefix+" ", depth + 1, 0, m_offset) | |
outstr += prefix + ("*%s," % MakeOffsetStr(m_offset)) + s + warningstr + debugstr + "\n" | |
_compact_offset += compact_size | |
outstr += prefix + "}" | |
if classType == lldb.eTypeClassUnion: | |
_compact_offset += max_union_member_size | |
#if _has_memory_hole == True : | |
# outstr += " *** Warning: Struct layout leaves memory hole ***" | |
return outstr, _compact_offset - begin_offset, max_align | |
vtableFunctionRE = re.compile(r'(.*?) \(\)\(/\*( const)? (.*?) \*,? \*/ ?(.*)\)') | |
# group(1) = function return type | |
# group(2) = ' const' | |
# group(3) = class from artificial parameter | |
# group(4) = parameters | |
def doOneVTable(symbol, prefix, vtableinfo): | |
outstr = "" | |
numEntries = vtableinfo.vmax + 1 | |
if numEntries > 0: | |
if vtableinfo.vPtrOffset == 0: | |
outstr = outstr + prefix + "vtable for %s {\n" % (symbol.GetName()) | |
else: | |
outstr = outstr + prefix + "vtable for %s 0x%x {\n" % (symbol.GetName(), vtableinfo.vPtrOffset) | |
for i in range(numEntries): | |
if i in vtableinfo.mergedVTableEntries: | |
vtableitem = vtableinfo.mergedVTableEntries[i] | |
member = vtableitem.DIE | |
m_name = member.GetName() | |
membertypename = member.GetNameForType(True) | |
containingtype = member.GetContainingType() | |
if containingtype != None: | |
containingtypename = containingtype.GetName() | |
containingtypenamequalified = containingtypename+"::" | |
else: | |
containingtypename = "" | |
containingtypenamequalified = "" | |
if hasattr(vtableitem, "ThunkType"): | |
namemodify = vtableitem.ThunkType | |
else: | |
namemodify = "" | |
else: | |
m_name = None | |
m_size = symbol.compile_unit.addr_size | |
if m_name == None: | |
outstr += prefix + ("+%s,[%4d]\n" % (MakeOffsetStr(i * m_size), m_size)) | |
else: | |
m = vtableFunctionRE.match(membertypename) | |
if m: | |
#print("vtablefunc", m.group(0), m.group(1), m.group(2), m.group(3), m.group(4)) | |
if containingtypename == m.group(3): | |
if m.group(2) == None: | |
constpart = "" | |
else: | |
constpart = m.group(2) | |
if m.group(1) == "void": | |
typepart = "" | |
else: | |
typepart = m.group(1) | |
#outstr += prefix + ("+%s,[%4d] (%s) %s%s\n" % (MakeOffsetStr(i * m_size), m_size, membertypename, containingtypenamequalified, m_name)) | |
outstr += prefix + ("+%s,[%4d] %s %s%s::%s(%s)%s\n" % (MakeOffsetStr(i * m_size), m_size, typepart, namemodify, containingtypename, m_name, m.group(4), constpart)) | |
else: | |
outstr += prefix + ("+%s,[%4d] (%s) %s%s\n" % (MakeOffsetStr(i * m_size), m_size, membertypename, containingtypenamequalified, m_name)) | |
print("Error: containingtype '%s' doesn't match artifical parameter '%s'" % (containingtypename, m.group(3))) | |
else: | |
#print("unknownfunc", membertypename) | |
outstr += prefix + ("+%s,[%4d] (%s) %s%s\n" % (MakeOffsetStr(i * m_size), m_size, membertypename, containingtypenamequalified, m_name)) | |
outstr += prefix + "}" | |
return outstr | |
def _showVTablePacking(symbol, prefix): | |
outstr = "" | |
if hasattr(symbol, "VTables"): | |
for vtableinfo in symbol.VTables: | |
vstr = doOneVTable(symbol, prefix, vtableinfo) | |
if len(vstr) > 0: | |
if len(outstr) > 0: | |
outstr += "\n\n" | |
outstr += "%s" % vstr | |
if hasattr(symbol, "VTablesVirtual"): | |
for vtableinfo in symbol.VTablesVirtual: | |
vstr = doOneVTable(symbol, prefix, vtableinfo) | |
if len(vstr) > 0: | |
if len(outstr) > 0: | |
outstr += "\n\n" | |
outstr += "%s" % vstr | |
return outstr | |
def DumpAllStructs(parent, names, level): | |
#print("[ Structs %d" % level) | |
if hasattr(parent, "children"): | |
for child in parent.children: | |
if hasattr(child, "AT_name"): | |
# only dump named types | |
#print("address %s" % member.GetAddress()) | |
# also dump typedef'd structs too | |
member = child | |
membertypename = member.GetName() | |
if member.tag == "TAG_typedef": | |
typedepth = 0 | |
while member.tag == "TAG_typedef" and member.HasType(): | |
member = member.GetType() # follow typedefs | |
typedepth = typedepth + 1 | |
if typedepth > 20: | |
print("Error: type depth is too big «%s» «%s»" % (member.typenumber, member.GetName())) | |
break | |
if member.GetName() == membertypename: | |
# don't do typedef if struct has same name, we'll do the struct when we get there | |
member = None | |
if member != None and member.GetClass() != None and hasattr(member, "children") and len(member.children) > 0: | |
if names == None or member.GetName() in names: | |
print("==========================================================================================") | |
if hasattr(child, "AT_decl_file"): | |
print('%s "%s"\n' % (child.GetAddress(), child.AT_decl_file)) | |
else: | |
print("%s\n" % child.GetAddress()) | |
s, n, a = _showStructPacking(member, membertypename, None, "", 0, 0, 0) | |
print(s) | |
s = _showVTablePacking(member, "") | |
if s != "": | |
print(s) | |
#if member.GetName() = "_lck_grp_": break | |
DumpAllStructs(child, names, level + 1) | |
# print("] Structs %d" % level) | |
#========================================================================================= | |
# DumpAllTypes | |
def DumpAllTypes(Hopper, parent): | |
for child in parent.children: | |
name = None | |
if hasattr(child, "AT_name"): | |
name = child.AT_name | |
attype = None | |
if child.HasType(): | |
attype = child.GetType() | |
if child.tag == "TAG_compile_unit": | |
pass | |
elif child.tag == "TAG_variable": | |
pass | |
elif child.tag == "TAG_inheritance": # handled by TAG_class_type | |
pass | |
elif child.tag == "TAG_member": # handled by TAG_class_type, TAG_structure_type, TAG_union_type (AT_data_member_location) | |
pass | |
elif child.tag == "TAG_subprogram": # handled by TAG_class_type (AT_data_member_location) | |
pass | |
elif child.tag == "TAG_formal_parameter": | |
pass | |
elif child.tag == "TAG_subroutine_type": # handled by TAG_pointer_type | |
pass | |
elif child.tag == "TAG_subrange_type": # handled by TAG_array_type | |
pass | |
elif child.tag == "TAG_unspecified_parameters": # handled by TAG_subroutine_type | |
pass | |
elif child.tag == "TAG_enumerator": # handled by TAG_enumeration_type | |
pass | |
elif child.tag == "TAG_lexical_block": | |
pass | |
elif child.tag == "TAG_inlined_subroutine": | |
pass | |
elif child.tag == "TAG_GNU_template_parameter_pack": # template | |
pass | |
elif child.tag == "TAG_imported_declaration": | |
pass | |
elif child.tag == "TAG_imported_module": # points to TAG_namespace | |
pass | |
elif child.tag == "TAG_namespace": | |
pass | |
elif child.tag == "TAG_template_type_parameter": | |
pass | |
elif child.tag == "TAG_template_value_parameter": | |
pass | |
elif child.tag == "TAG_unspecified_type": | |
pass | |
elif child.tag == "TAG_pointer_type": | |
if attype != None and attype.tag == "TAG_subroutine_type": | |
Hopper.AddFunctionPointer(child, name, attype) | |
else: | |
Hopper.AddPointer(child, name, attype) | |
elif child.tag == "TAG_reference_type": | |
if name == None: | |
if attype == None: | |
name = "&" | |
else: | |
name = child.GetType().GetName() + " &" | |
Hopper.AddPointer(child, name, attype) | |
elif child.tag == "TAG_rvalue_reference_type": | |
# I don't know whan an rvalue_reference_type looks like so I use &_ | |
if name == None: | |
if attype == None: | |
name = "&_" | |
else: | |
name = child.GetType().GetName() + " &_" | |
Hopper.AddPointer(child, name, attype) | |
elif child.tag == "TAG_const_type": | |
if name == None: | |
if attype == None: | |
name = 'const' | |
else: | |
name = 'const ' + child.GetType().GetName() | |
Hopper.AddTypedef(child, name, attype, None) | |
elif child.tag == "TAG_volatile_type": | |
if name == None: | |
if attype == None: | |
name = 'volatile' | |
else: | |
name = 'volatile ' + child.GetType().GetName() | |
Hopper.AddTypedef(child, name, attype, None) | |
elif child.tag == "TAG_class_type": | |
Hopper.AddClass(child, name, child) | |
# might just be a declaration AT_declaration( true ) | |
# or it might contain children with AT_data_member_location | |
# Direct inheritance TAG_inheritance DirectBaseClasses | |
elif child.tag == "TAG_structure_type": | |
Hopper.AddStruct(child, name, child) | |
# might be a declaration - replace with define if it exists in same compileunit | |
elif child.tag == "TAG_union_type": | |
Hopper.AddStruct(child, name, child) | |
# might be a declaration - replace with define if it exists in same compileunit | |
elif child.tag == "TAG_typedef": | |
Hopper.AddTypedef(child, name, attype, None) | |
elif child.tag == "TAG_base_type": | |
Hopper.AddBaseType(child) | |
elif child.tag == "TAG_ptr_to_member_type": | |
if attype != None and attype.tag == "TAG_subroutine_type": | |
Hopper.AddPointerToMember(child, name, attype) | |
else: | |
print("Error: unexpected tag %s" % child.GetAddress()) | |
elif child.tag == "TAG_array_type": | |
Hopper.AddArray(child, name, attype) | |
elif child.tag == "TAG_enumeration_type": | |
Hopper.AddEnumeration(child, name) | |
#bytesize | |
else: | |
print("Error: unknown tag '%s':" % child.tag) | |
DumpAllTypes(Hopper, child) | |
#========================================================================================= | |
def DumpDIE(DIE, level): | |
dumpstr = "%-30s %*s%*s" % (DIE.GetAddress(), level * 2, "", -40 + level * 2, "" if not hasattr(DIE, "tag") else DIE.tag) | |
for attr, value in vars(DIE).items(): | |
if 1 == 0: pass | |
elif attr == "dSYM" : pass | |
elif attr == "address" : pass | |
elif attr == "typenumber" : pass | |
elif attr == "compile_unit" : pass | |
elif attr == "children" : pass | |
elif attr == "tag" : pass | |
elif attr == "AT_decl_file" : pass | |
elif attr == "DIELookup" or attr == "Fields" or attr == "VTables" or attr == "VTablesByOffset" or attr == "VTablesVirtual" or attr == "VTableEntries": | |
dumpstr += (" %s:[%d]" % (attr, len(getattr(DIE, attr)))) | |
elif attr == "parent" or attr == "duplicateof" or attr == "methodClassDIE": | |
dumpstr += (" %s:%s" % (attr, getattr(DIE, attr).GetAddress())) | |
elif attr == "artificalDIE": | |
dumpstr += (" %s:%s:parameter:%d" % (attr, DIE.artificalDIE.parent.GetAddress(), DIE.artificalDIE.parameterNumber)) | |
elif attr == "creates" or attr == "createdFrom": | |
thestr = "" | |
for thetype in getattr(DIE, attr): | |
thestr += "%s{%s}" % ("" if thestr == "" else ", ", thetype.GetAddress()) | |
dumpstr += (" %s:[%s]" % (attr, thestr)) | |
elif attr == "DirectBaseClasses" or attr == "VirtualBaseClasses": | |
thestr = "" | |
for theinheritance in getattr(DIE, attr): | |
thestr += "%s{%s}" % ("" if thestr == "" else ", ", theinheritance.GetType().GetAddress()) | |
dumpstr += (" %s:[%s]" % (attr, thestr)) | |
elif attr == "AllVirtualBaseClasses": | |
thestr = "" | |
for virtualbaseclassinfo in getattr(DIE, attr): | |
member = virtualbaseclassinfo.member | |
m_type = member.GetType() | |
membertypename = m_type.GetName() | |
thestr += "%s{%s}" % ("" if thestr == "" else ", ", membertypename) | |
dumpstr += (" %s:[%s]" % (attr, thestr)) | |
elif attr == "IncludedVirtualBaseClasses": | |
thestr = "" | |
for address in getattr(DIE, attr): | |
virtualbaseclassinfo = getattr(DIE, attr)[address] | |
member = virtualbaseclassinfo.member | |
m_type = member.GetType() | |
membertypename = m_type.GetName() | |
thestr += "%s{%s%s}" % ("" if thestr == "" else ", ", address, membertypename) | |
dumpstr += (" %s:[%s]" % (attr, thestr)) | |
else : dumpstr += (" %s:«%s»" % (attr, value)) | |
dbgprint(dumpstr) | |
if hasattr(DIE, "children"): | |
for child in DIE.children: | |
DumpDIE(child, level + 1) | |
#========================================================================================= | |
# Main | |
if __name__ == '__main__': | |
if len(sys.argv) < 1: | |
print('Expected usage: {0} <dsym>'.format(sys.argv[0])) | |
sys.exit(1) | |
H = Hopper() | |
dostab = False | |
for i in range(1, len(sys.argv)): | |
if sys.argv[i] == "-s": | |
dostab = True | |
else: | |
print("==========================================================================================") | |
print("The file: %s\n" % sys.argv[i]) | |
if dostab == True: | |
stabr = stab_Reader() | |
dSYM = stabr.Process_stab(sys.argv[i]) | |
else: | |
dSYMr = DSYM_Reader() | |
dSYM = dSYMr.Process_dSYM(sys.argv[i]) | |
if debug: | |
print("==========================================================================================") | |
DumpDIE(dSYM, 0) | |
DumpAllStructs(dSYM, None, 0) | |
#••••••• TO DO: Finish DumpAllTypes | |
#DumpAllTypes(H, dSYM) | |
''' | |
pp = pprint.PrettyPrinter(indent=4, depth=10) | |
pp.pprint(H.Types) | |
pp.pprint(H.UUIDs) | |
for k,v in H.UUIDs.items(): | |
pp.pprint(k) | |
for attr, value in iter(v.__dict__.items()): | |
print attr, value | |
''' | |
dostab = False |
Very useful script indeed.
I made a couple changes here to support python3 (s/xrange/range/)
https://gist.github.com/jquirke/132d18b7bba9e3f96368bbc43230b755
Thanks @jquirke . I also changed d.iteritems()
to iter(d.items())
as described at https://peps.python.org/pep-0469/
- Added support for
stabs
so that KDKs for Mac OS X 10.2.6 to 10.4.11 can be processed. - Fixed some issues with arrays and unions.
- Updated for python 3.12
- Fix duplicate virtual base class inclusion.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Description
This script is based on the structanalyze.py script from KDK_10.11.5_15F34.kdk. It outputs offsets of fields in structs and classes and offsets of methods in vtables. The result can help in disassembly and debugging.
How To Use
The information comes from DWARF or stabs.
DWARF
First, use
dwarfdump
to convert a dSYM (DWARF debug symbol file) to text. You must specify an architecture if there is more than one in the file. You can use thefile
command to check what architectures a dSYM file contains.Then use the
joedwarftohpt.py
script to output the struct and class and vtable offsets:stabs
First, use
dsymutil -s
to extract stabs from a binary. If the binary contains more than one architecture then uselipo
to extract an architecture to a separate file. You can use thefile
command to check what architectures a binary contains.Then use the
joedwarftohpt.py -s
to output the struct and class and vtable offsets:KDKs
If you have downloaded and installed many KDKs to /Library/Developer/KDKs (such as the KDK for the last revision of each macOS version), then the following script will create commands to dump the info from the kernel files.
> Output commands to dump debug symbols of all kernels from all KDKs
> Output commands to output text files with struct/class/vtable offsets for all dumps of kernel types from all KDKs
Here's a script to list all offsets of certain fields in a specific struct.
> Search for fields in struct vm_map_entry
Notes
*** Possible memory overlap ***
for nearby fields.debug = 0
todebug = 1
to see log messages in the output and to see the generated DWARF dictionary that is used to produce the output.To Do