Skip to content

Instantly share code, notes, and snippets.

@Jinmo
Last active May 28, 2024 15:08
Show Gist options
  • Save Jinmo/5f131a8bf3335f747e0ae7d6d6b881a4 to your computer and use it in GitHub Desktop.
Save Jinmo/5f131a8bf3335f747e0ae7d6d6b881a4 to your computer and use it in GitHub Desktop.
C/C++ header to IDA

Usage

In IDAPython,

execfile('<path>/cxxparser.py')
parse_file('<path>/a.cpp',[r'-I<path>\LuaJIT-2.0.5\src', '-D__NT__', '-D__X64__', '-D__EA64__'])
parse_file('<path>/malloc.c',['-target=x86_64-linux-gnu'])

a.cpp

#include <lj_crecord.h>

struct MyStruct2 {
	int meh;
};

struct MyStruct: public MyStruct2 {
	struct MyStruct2 *a;
	char b;
	char c;
	int d;
};

TODO

  • create an IDE-agnostic database
  • optimize
import re
from functools import reduce
from clang.cindex import Index, CursorKind, TypeKind, BaseEnumeration, conf
try:
import idaapi
from ida_bytes import *
from ida_typeinf import *
from idc import *
except:
pass
class CallingConv(BaseEnumeration):
_kinds = []
_name_map = None
pass
CallingConv.Default = CallingConv(0)
CallingConv.C = CallingConv(1)
CallingConv.X86StdCall = CallingConv(2)
CallingConv.X86FastCall = CallingConv(3)
CallingConv.X86ThisCall = CallingConv(4)
CallingConv.X86Pascal = CallingConv(5)
CallingConv.AAPCS = CallingConv(6)
CallingConv.AAPCS_VFP = CallingConv(7)
CallingConv.X86RegCall = CallingConv(8)
CallingConv.IntelOclBicc = CallingConv(9)
CallingConv.Win64 = CallingConv(10)
CallingConv.X86_64Win64 = CallingConv.Win64
CallingConv.X86_64SysV = CallingConv(11)
CallingConv.X86VectorCall = CallingConv(12)
CallingConv.Swift = CallingConv(13)
CallingConv.PreserveMost = CallingConv(14)
CallingConv.PreserveAll = CallingConv(15)
CallingConv.AArch64VectorCall = CallingConv(16)
CallingConv.Invalid = CallingConv(100)
CallingConv.Unexposed = CallingConv(200)
handlers = {}
idati = idaapi.get_idati()
# idati = idaapi.til_t()
if idaapi.BADADDR == 2 ** 64 - 1:
FF_POINTER = FF_QWORD
POINTER_SIZE = 8
else:
FF_POINTER = FF_DWORD
POINTER_SIZE = 4
def preprocess(dict):
return {
key: (ida_type & 0xffffffff, string if string is None else (
parse_decl(string, 0)[1] if string != 'void' else b'\x01'
))
for key, (ida_type, string) in dict.items()
}
def _size_to_flags(size):
return {
1: FF_BYTE,
2: FF_WORD,
4: FF_DWORD,
8: FF_QWORD
}[size]
builtin_types = preprocess({
TypeKind.RECORD: (FF_STRUCT, None),
TypeKind.ENUM: (FF_DWORD, 'int'),
TypeKind.BOOL: (_size_to_flags(idati.cc.size_b), 'bool'),
TypeKind.DOUBLE: (FF_DOUBLE, 'double'),
TypeKind.LONGDOUBLE: (FF_DOUBLE, 'double'),
TypeKind.FLOAT: (FF_FLOAT, 'float'),
TypeKind.WCHAR: (FF_WORD, 'unsigned short'),
TypeKind.CHAR16: (FF_WORD, 'unsigned short'),
TypeKind.CHAR32: (FF_DWORD, 'unsigned int'),
TypeKind.SHORT: (_size_to_flags(idati.cc.size_s), 'short'),
TypeKind.USHORT: (_size_to_flags(idati.cc.size_s), 'unsigned short'),
TypeKind.INT: (_size_to_flags(idati.cc.size_i), 'int'),
TypeKind.LONG: (_size_to_flags(idati.cc.size_l), 'long'),
TypeKind.LONGLONG: (_size_to_flags(idati.cc.size_ll), 'long long'),
TypeKind.UINT: (_size_to_flags(idati.cc.size_i), 'unsigned int'),
TypeKind.ULONG: (_size_to_flags(idati.cc.size_l), 'unsigned long'),
TypeKind.ULONGLONG: (_size_to_flags(idati.cc.size_ll), 'unsigned long long'),
TypeKind.CHAR_S: (FF_BYTE, 'signed char'),
TypeKind.CHAR_U: (FF_BYTE, 'unsigned char'),
TypeKind.SCHAR: (FF_BYTE, 'signed char'),
TypeKind.UCHAR: (FF_BYTE, 'unsigned char'),
TypeKind.INT128: (FF_OWORD, '__int128'),
TypeKind.UINT128: (FF_OWORD, 'unsigned __int128'),
TypeKind.VOID: (FF_0VOID, 'void'),
TypeKind.POINTER: (FF_0OFF | FF_POINTER, None),
TypeKind.LVALUEREFERENCE: (FF_0OFF | FF_POINTER, None)
})
callingconv_map = {
CallingConv.C: idaapi.CM_CC_CDECL,
CallingConv.X86FastCall: idaapi.CM_CC_FASTCALL,
CallingConv.X86ThisCall: idaapi.CM_CC_THISCALL,
CallingConv.X86StdCall: idaapi.CM_CC_STDCALL,
CallingConv.X86Pascal: idaapi.CM_CC_PASCAL,
}
visited = dict()
virtuals_mapping = dict()
def handle(kind):
def decorator(f):
handlers[kind] = f
return f
return decorator
@handle(CursorKind.ENUM_DECL)
def handle_enum(item, context):
members = []
for member in item.get_children():
members.append((member.spelling, member.enum_value))
enum_id = add_enum(idaapi.BADADDR, item.spelling, 0)
for name, value in members:
add_enum_member(enum_id, name, value, -1)
class Struct:
def __init__(self, name, is_union, flags=0):
self.is_union = is_union
self.ti = idaapi.tinfo_t()
self.udt = idaapi.udt_type_data_t()
self.udt.taudt_bits = flags
self.name = name
self.save(True)
def add_member(self, name, offset, flag, size, tif):
member = idaapi.udt_member_t()
member.offset = offset
member.name = name
member.size = size
member.type = tif
if name.endswith('_vftable'):
member.set_vftable()
self.udt.push_back(member)
def set_align(self, align):
self.udt.effalign = align
def save(self, replace=True):
name = self.name
while True:
self.ti.create_udt(
self.udt, idaapi.BTF_STRUCT if not is_union else idaapi.BTF_UNION)
res = self.ti.set_named_type(
idati, name, idaapi.NTF_REPLACE if replace else 0)
if res == idaapi.TERR_OK:
break
elif res == idaapi.TERR_SAVE: # name conflict
assert replace == False, '?!'
name = '_' + name
elif res == idaapi.TERR_WRONGNAME:
raise Exception('not allowed name: %r' % name)
self.name = name
return self.ti
def is_primitive(kind):
if kind not in builtin_types:
return False
return builtin_types[kind][1]
def resolve_pointer(type, context):
tif = idaapi.tinfo_t()
pointee = type.get_pointee()
_register_type(pointee, context)
pointee_type = idaapi.tinfo_t()
if pointee.kind in (TypeKind.UNEXPOSED,):
pointee = pointee.get_canonical()
if pointee.kind in (TypeKind.TYPEDEF, TypeKind.POINTER, TypeKind.LVALUEREFERENCE, TypeKind.ELABORATED):
pointee_type = _register_type(pointee, context)
elif pointee.kind == TypeKind.INVALID:
pointee_type.deserialize(idati, builtin_types[TypeKind.VOID][1], b"")
elif pointee.kind == TypeKind.FUNCTIONPROTO or is_primitive(pointee.kind):
pointee_type = _register_type(pointee, context)
else:
name = pointee.spelling
if not context.resolve(name, lambda name: pointee_type.get_named_type(idati, name)):
pointee_type.create_forward_decl(idati, BTF_STRUCT, name)
if pointee_type is None:
pointee_type = idaapi.tinfo_t()
assert pointee_type.create_forward_decl(
idati, BTF_STRUCT, pointee.spelling)
tif.create_ptr(pointee_type)
return tif
def _make_vtable(name, virtuals, context):
# Creates a special struct(record) for vtable
class FakeType(object):
def __init__(self, kind, spelling, size=POINTER_SIZE, pointee=None):
self.kind = kind
self.spelling = spelling
self.size = size
self.pointee = pointee
def get_size(self):
return self.size
def get_canonical(self):
return self
def get_pointee(self):
return self.pointee
vtable_name = "%s_vftable" % (name)
struct = Struct(vtable_name, False, idaapi.TAUDT_VFTABLE)
for i, func in enumerate(virtuals):
size = POINTER_SIZE
flag = FF_POINTER | FF_0OFF
member_name = '%s' % (func.spelling)
tif = resolve_function(func.type, context, class_=name)
tif.create_ptr(tif)
struct.add_member(member_name, i * POINTER_SIZE, flag, size, tif)
visited[vtable_name] = {
'bases': [],
'is_typedef': False,
'resolved': None
}
struct.save()
return FakeType(TypeKind.POINTER, vtable_name + " *",
pointee=FakeType(TypeKind.RECORD, vtable_name))
def resolve_function(type, context, flags=0, class_=None):
func = idaapi.tinfo_t()
data = idaapi.func_type_data_t()
data.flags = flags
data.rettype = _register_type(type.get_result(), context)
data.stkargs = 0
data.spoiled.clear()
data.clear()
cc = CallingConv.from_id(conf.lib.clang_getFunctionTypeCallingConv(type))
# ida only supports cdecl + ellipsis when varargs exists
if type.is_function_variadic():
data.cc = idaapi.CM_CC_ELLIPSIS
elif class_:
# you can use one of these
data.cc = idaapi.CM_CC_THISCALL
# data.cc = idaapi.CM_CC_FASTCALL
else:
data.cc = callingconv_map.get(cc, idaapi.CM_CC_CDECL)
if class_:
funcarg = idaapi.funcarg_t()
class_type_ = _create_forward_declaration(class_)
class_type_.create_ptr(class_type_)
funcarg.type = class_type_
data.push_back(funcarg)
for argument in type.argument_types():
funcarg = idaapi.funcarg_t()
funcarg.type = _register_type(argument, context)
data.push_back(funcarg)
func.create_func(data)
func.get_func_details(data)
return func
def _create_forward_declaration(typename):
tif = idaapi.tinfo_t()
if tif.get_named_type(idati, typename):
return tif
tif.create_forward_decl(idati, BTF_STRUCT, typename)
return tif
def _register_type(type, context, bases=[], virtuals=[]):
global debug
typename = context.name(type.spelling)
found = visited.get(typename)
if found:
return found['resolved']
result = {
'bases': bases,
'is_typedef': type.kind == TypeKind.TYPEDEF,
'resolved': None
}
if type.kind == TypeKind.UNEXPOSED:
type = type.get_canonical()
if type.kind not in (TypeKind.TYPEDEF, TypeKind.ELABORATED):
visited[typename] = result
if type.kind == TypeKind.ELABORATED:
result['resolved'] = tif = idaapi.tinfo_t()
tif.create_typedef(idati, typename, BTF_STRUCT)
visited[typename] = result
return tif
if type.kind == TypeKind.VARIABLEARRAY:
tif = idaapi.tinfo_t()
tif.create_ptr(_register_type(type.element_type, context))
result['resolved'] = tif
return tif
if type.kind in (TypeKind.CONSTANTARRAY, TypeKind.VECTOR, TypeKind.INCOMPLETEARRAY):
count = type.element_count if type.kind != TypeKind.INCOMPLETEARRAY else 1
tif = idaapi.tinfo_t()
debug = type
tif.create_array(_register_type(type.element_type, context), count)
result['resolved'] = tif
return tif
if is_primitive(type.kind):
tif = idaapi.tinfo_t()
assert tif.deserialize(
idati, builtin_types[type.kind][1], b"")
result['resolved'] = tif
return tif
if type.kind == TypeKind.FUNCTIONPROTO:
result['resolved'] = tif = resolve_function(type, context)
return tif
if type.kind in (TypeKind.POINTER, TypeKind.LVALUEREFERENCE):
result['resolved'] = tif = resolve_pointer(type, context)
return tif
if type.kind == TypeKind.TYPEDEF:
canonical = type.get_canonical()
original = canonical.spelling
# if original == typename:
# del visited[typename]
tif = _register_type(canonical, context)
if original != typename and tif:
tif.set_named_type(idati, typename, NTF_TYPE)
result['resolved'] = tif
visited[typename] = result
origkey = typename.split("<")[0]
target = canonical.spelling.split("<")[0]
if target in virtuals_mapping:
virtuals_mapping[origkey] = virtuals_mapping[target]
return tif
else:
debug = type
return tif
if type.kind == TypeKind.RECORD:
align = type.get_align()
item = type
unique_sizes = set(x.get_field_offsetof() for x in type.get_fields())
if len(unique_sizes) == 1 and list(unique_sizes)[0] == -2:
is_union = False
should_guess = True
else:
is_union = len(unique_sizes) == 1 and len(
list(type.get_fields())) != 1
should_guess = False
if item.get_size() == -2:
# forward declaration
tif = _create_forward_declaration(typename)
del visited[typename]
result['resolved'] = tif
return tif
members = []
offset = 0
# populate_bases(members, base)
delta = 0
has_virtuals = False
for i, base in enumerate(bases):
base_type = base.type
base_size = base.type.get_size()
base_align = base.type.get_align()
if virtuals_mapping[context.name(base_type.spelling.split('<')[0])]:
has_virtuals = True
has_virtuals = has_virtuals or len(virtuals)
for i, base in enumerate(bases):
base_type = base.type
base_size = base.type.get_size()
base_align = base.type.get_align()
base_virtuals = virtuals_mapping[context.name(
base_type.spelling.split('<')[0])]
vtable_delta = POINTER_SIZE if has_virtuals and not base_virtuals and i == 0 else 0
for member in base_type.get_fields():
# If has virtuals and not first, we should substract vtable pointer size
members.append((vtable_delta * 8 + offset + member.get_field_offsetof() - delta * 8,
member.type, 'base%d_%s' % (i, member.spelling)))
print(delta, offset // 8, member.get_field_offsetof() // 8)
if i:
members.insert(0, (offset, _make_vtable(
context.name(base.spelling), base_virtuals, context), "base%d__vftable" % i))
_register_type(base_type, context)
offset += (base_size + base_align - 1) // \
base_align * base_align * 8
if not i:
virtuals = base_virtuals + virtuals
__visited = set()
virtuals = [x for x in virtuals if (x.spelling, x.type.spelling) not in __visited and (
__visited.add((x.spelling, x.type.spelling)) or True)]
if virtuals:
members.insert(
0, (0, _make_vtable(typename, virtuals, context), "__vftable"))
virtuals_mapping[typename] = virtuals
for member in item.get_fields():
if member.is_bitfield():
continue
if member.kind == CursorKind.FIELD_DECL:
members.append((member.get_field_offsetof(),
member.type, member.spelling))
else:
continue
struc = Struct(typename, is_union,
idaapi.TAUDT_CPPOBJ if virtuals else 0)
struc.set_align(align.bit_length() - 1)
for offset, type, name_ in members:
size = type.get_size()
if not name_:
name_ = '__offset%x' % (offset >> 3)
if size < 0:
if type.kind == TypeKind.INCOMPLETEARRAY:
# later fixed to array
size = type.element_type.get_size()
else:
debug = type
wtf
continue
flag = 0
canonical = type.get_canonical()
tif = None
if canonical.kind == TypeKind.RECORD:
tif = _register_type(canonical, context)
elif canonical.kind in (TypeKind.POINTER, TypeKind.LVALUEREFERENCE):
tif = resolve_pointer(canonical, context)
else:
if canonical.kind in builtin_types:
flag |= builtin_types[canonical.kind][0]
tif = _register_type(canonical, context)
if offset % 8:
continue
res = struc.add_member(name_, offset >> 3, flag, size, tif)
tif = result['resolved'] = struc.save()
return tif
@handle(CursorKind.CLASS_DECL)
@handle(CursorKind.CLASS_TEMPLATE)
@handle(CursorKind.STRUCT_DECL)
@handle(CursorKind.UNION_DECL)
def handle_struct(item, context):
# Is there any way to check if it's forward declaration or not?
if len(list(item.get_children())) == 0 and item.type.get_size() > 1:
# forward class/struct declaration
return
bases = []
virtuals = []
virtuals_mapping[context.name(item.spelling)] = virtuals
for member in item.get_children():
if member.kind == CursorKind.CXX_BASE_SPECIFIER:
bases.append(member)
elif member.kind in (CursorKind.CXX_METHOD, CursorKind.DESTRUCTOR, CursorKind.CONSTRUCTOR):
if member.is_virtual_method():
virtuals.append(member)
_register_type(item.type, context, bases, virtuals)
@handle(CursorKind.TYPEDEF_DECL)
@handle(CursorKind.TYPE_ALIAS_DECL)
@handle(CursorKind.FUNCTION_DECL)
@handle(CursorKind.VAR_DECL)
def typedefs(item, context):
type = _register_type(item.type, context)
if item.kind in (CursorKind.FUNCTION_DECL, CursorKind.VAR_DECL):
address = get_name_ea_simple(item.spelling)
if address != idaapi.BADADDR:
res = idaapi.apply_tinfo(
address, type, idaapi.TINFO_DELAYFUNC | idaapi.TINFO_DEFINITE)
@handle(CursorKind.NAMESPACE)
def namespace(item, context):
process_cursor(item, context.nest_namespace(item.spelling))
@handle(CursorKind.LINKAGE_SPEC)
@handle(CursorKind.UNEXPOSED_DECL)
def linkage(item, context):
process_cursor(item, context)
def parse_file(path, args=[]):
index = Index.create()
tx = index.parse(path, args)
if idaapi.BADADDR == 2 ** 64 - 1:
args.insert(0, '-m64')
else:
args.insert(0, '-m32')
process_cursor(tx.cursor)
def parse_ast(path, args=[]):
index = Index.create()
tx = TranslationUnit.from_ast_file(path, index)
if idaapi.BADADDR == 2 ** 64 - 1:
args.insert(0, '-m64')
else:
args.insert(0, '-m32')
process_cursor(tx.cursor)
class Context(object):
def __init__(self, namespaces=[]):
self.namespaces = namespaces
def nest_namespace(self, namespace):
return Context(namespaces=self.namespaces+[namespace])
def name(self, name):
return Context._generate_name(name, self.namespaces)
@staticmethod
def _generate_name(name, namespaces):
name = re.sub('^(const |volatile |struct |union |class )+', '', name)
name = re.sub(r'[^a-zA-Z0-9:<>=]', '_', name)
if not namespaces:
return name
prefix = "::".join(namespaces) + "::"
if name.startswith(prefix):
return name
return "%s%s" % (prefix, name)
def resolve(self, name, predicate):
return reduce(
lambda acc, item: acc or predicate(
Context._generate_name(name, item)),
(self.namespaces[:- i] for i in range(len(self.namespaces) + 1)),
False
)
def process_cursor(cursor, context=None):
if context is None:
context = Context()
for item in cursor.get_children():
print(item.location.file.name, item.location.line,
item.kind, item.spelling)
if item.kind in handlers:
handlers[item.kind](item, context)
else:
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment