XertroV · April 28, 2025 13:55
diff --git a/NameFromStartProfileCall.py b/NameFromStartProfileCall.py
 # -*- coding: utf-8 -*-

 #Find instances of calls to ProfileStart(ctx, name) and get the name.
 #@author xert
 #@category tm2020
 #@keybinding
 #@menupath
 #@toolbar
 #@runtime Jython

 '''
 # USAGE

 Requirements:

 1. load the TM exe in ghidra and analyze it.
 2. find the profile function and name it. (You can find it easily by
   searching for strings with :: and looking for what looks like a method name.)
    - there are two profile functions called, one takes extra arguments.
      We are looking for the outer one first (which takes two arguments)
    - Name it "ProfileStart" if you you don't want to have to modify this script.

 1. Add this via Script Manager:
  - Window > Script Manager
  - create new script > jython
  - name it "NameFromStartProfileCall" or whatever
  - paste the code in

 2. change PROFILE_FUNCITON_NAME below to whatever the name is in your
   ghidra instance if your profile function is not called "ProfileStart".

 3. Run the script:
    - output will be in the console window (it was in the same place as bookmarks
      for me by default; should be able to Window > Console too)
    - takes 4-5 minutes to run

 Note: this will work for the inner profile function too, just update the name.

 Note: run it a second time to get a convenient output of all places where that might need manual naming.

 '''

 PROFILE_FUNCITON_NAME = "ProfileStart"

 from time import time, sleep

 start_time = time()

 '''
 exmaple to extract name from:

 void CFastAlgo::ComputeMD5_Digest(longlong *param_1,undefined8 param_2) {
  uStack_10 = 0x140123ab0;
  local_18 = DAT_141e2b060 ^ (ulonglong)auStack_40b8;
  local_4088 = 0;
  ProfileStart(local_4098,"CFastAlgo::ComputeMD5_Digest");
  (**(code **)(*param_1 + 0x18))(param_1);
  FUN_1414f4850(local_4078);
  iVar1 = (**(code **)(*param_1 + 0x48))(param_1,local_4018,0x4000);
  while (iVar1 != 0) {

  ...

                             **************************************************************
                             *                          FUNCTION                          *
                             **************************************************************
                             undefined ComputeMD5_Digest()
                               assume GS_OFFSET = 0xff00000000
             undefined         <UNASSIGNED>   <RETURN>
             undefined8        Stack[0x18]:8  local_res18                             XREF[2]:     140123aa0(W),
                                                                                                   140123b7b(R)
             undefined8        Stack[-0x18]:8 local_18                                XREF[2]:     140123abd(W),
                                                                                                   140123b6b(R)
             undefined1        Stack[-0x401   local_4018                              XREF[3]:     140123afa(*),
                                                                                                   140123b23(*),
                                                                                                   140123b38(*)
             undefined1        Stack[-0x407   local_4078                              XREF[3]:     140123aed(*),
                                                                                                   140123b2b(*),
                                                                                                   140123b50(*)
             undefined4        Stack[-0x408   local_4088                              XREF[2]:     140123ac8(W),
                                                                                                   140123b5d(R)
             undefined1        Stack[-0x409   local_4098                              XREF[2]:     140123ada(*),
                                                                                                   140123b61(*)
                             CFastAlgo::ComputeMD5_Digest                    XREF[2]:     FUN_1408bb020:1408bb070(c),
                                                                                          142ad2ec4(*)
       140123aa0 48 89 5c        MOV        qword ptr [RSP + local_res18],RBX
                 24 18
       140123aa5 57              PUSH       RDI
       140123aa6 b8 b0 40        MOV        EAX,0x40b0
                 00 00
       140123aab e8 80 06        CALL       __chkstk                                         undefined __chkstk()
                 3d 01
       140123ab0 48 2b e0        SUB        RSP,RAX
       140123ab3 48 8b 05        MOV        RAX,qword ptr [DAT_141e2b060]                    = 00002B992DDFA232h
                 a6 75 d0 01
       140123aba 48 33 c4        XOR        RAX,RSP
       140123abd 48 89 84        MOV        qword ptr [RSP + local_18],RAX
                 24 a0 40
                 00 00
       140123ac5 48 8b fa        MOV        RDI,RDX
       140123ac8 c7 44 24        MOV        dword ptr [RSP + local_4088],0x0
                 30 00 00
                 00 00
       140123ad0 48 8b d9        MOV        RBX,RCX
       140123ad3 48 8d 15        LEA        RDX,[s_CFastAlgo::ComputeMD5_Digest_141b25a78]   = "CFastAlgo::ComputeMD5_Digest"
                 9e 1f a0 01
       140123ada 48 8d 4c        LEA        RCX=>local_4098,[RSP + 0x20]
                 24 20
       140123adf e8 3c 25        CALL       ProfileStart                                     undefined ProfileStart()
                 ff ff
       140123ae4 48 8b 03        MOV        RAX,qword ptr [RBX]



 '''




 from ghidra.program.model.symbol import SourceType
 from ghidra.program.model.listing import Function
 from ghidra.program.model.listing import Instruction
 from ghidra.program.model.address import GenericAddress, Address
 from ghidra.program.model.scalar import Scalar

 namespaces = {}
 global_ns = currentProgram.getGlobalNamespace()
 symbol_table = currentProgram.getSymbolTable()

 def get_namespace(parts):
    ''' get or create a namespace from list[str]'''
    global namespaces, global_ns

    if len(parts) == 0:
        return None

    full_name = parts[0] if len(parts) == 1 else "::".join(parts)
    if full_name in namespaces:
        return namespaces[full_name]

    parent_ns = global_ns
    if len(parts) > 1:
        parent_ns = get_namespace(parts[:-1])

    ns_name = parts[-1]
    ns = symbol_table.getOrCreateNameSpace(parent_ns, ns_name, SourceType.ANALYSIS)
    namespaces[full_name] = ns
    return ns


 def find_profile_start(func):
    ''' find the ProfileStart call in the function '''
    # print("Checking func: " + func.getName())
    listing = currentProgram.getListing()
    instructions = listing.getInstructions(func.getBody(), True)
    for i, instr in enumerate(instructions):
        if i > 100:
            # print("More than 100 iters for " + func.entryPoint.toString())
            break
        if instr.getMnemonicString() == "CALL":
            for op in instr.getOpObjects(0):
                if not isinstance(op, Address):
                    continue
                #print("got address")
                symbol = getSymbolAt(op)
                #print("Symbol: %s" % symbol)
                if PROFILE_FUNCITON_NAME not in symbol.getName():
                    continue
                # print("FPS found: %s" % symbol)
                return instr, symbol
    return None, None

 def get_string_argument(instr):
    ''' get the string argument from the ProfileStart call '''
    # Assumes the string pointer is in the first argument (rcx on Windows x64 ABI)
    prev_instr = instr.getPrevious()
    count = 0
    while prev_instr and count < 100:
        count += 1
        mnemonic = prev_instr.getMnemonicString()
        if mnemonic.startswith("INT3"):
            print("INT3 found: %s; count = %d; addr = %s" % (prev_instr, count, instr.getAddress()))
            break
        if mnemonic.startswith("LEA"):
            objs = prev_instr.getOpObjects(0)
            if len(objs) != 1:
                print("LEA found but not 1 object: %s" % prev_instr)
                print("location %s" % prev_instr.getAddress())
                break
            # print("LEA found: %s" % prev_instr)
            # for i, op in enumerate(objs):
            #     print("  [%d] %s" % (i, op))
            # Check if it's moving an address into rdx (2nd param)
            if str(objs[0]) == "RDX":
                # print("RDX found: %s" % prev_instr)
                op1Objs = prev_instr.getOpObjects(1)
                string_addr = None
                # for str_op in op1Objs:
                #     print("%s - Type: %s" % (str_op, type(str_op)))

                str_op = op1Objs[0]
                if isinstance(str_op, Scalar):
                    # print("Scalar found: %s" % str_op)
                    val = str_op.getValue()
                    addr = toAddr(val)
                    # print("converted to address: %s" % addr)
                    data = getDataAt(addr)
                    if data:
                        # print("GSA Found s->a: " + str(data))
                        if isinstance(data.value, Address):
                            print("String data is an Address: %s / %s" % (str_op, data.value))
                            data = getDataAt(data.value)
                        return data.value

                if isinstance(str_op, Address):
                    # print("String address is an Address: %s" % str_op)
                    data = getDataAt(str_op)
                    if isinstance(data, Address):
                        print("String address is an Address: %s" % str_op)
                        data = getDataAt(data)
                    if data:
                        # print("GSA Found: " + str(data))
                        return data.value
        prev_instr = prev_instr.getPrevious()
    return None

 fm = currentProgram.getFunctionManager()
 functions = fm.getFunctions(True)

 nonalpha_names = []
 skipped = []
 renamed = 0
 names_set = set()
 names_list = []

 for func in functions:
    if not func.getName().startswith("FUN_") and not func.getName().startswith("thunk_FUN_"):
        # print("Skipping %s" % func.getName())
        skipped.append(func.getName())
        continue
    # if "ComputeMD5_Digest" not in func.getName():
    #     continue
    # print("Testing ComputeMD5_Digest specifically: %s" % func.entryPoint.toString())
    instr, symbol = find_profile_start(func)
    if instr:
        string = get_string_argument(instr)
        if string:
            if string.startswith("[WebM]"):
                string = "WebM_" + string[6:]
            print("Renaming %s to %s @ %s" % (func.getName(), string, func.entryPoint))

            # print out any names with non alphanumeric + ':' characters
            # and don't process them.
            if not string.replace('::', '').replace('_', '').isalnum():
                print("Non-alphanumeric characters in name: %s" % string)
                nonalpha_names.append((string, instr.getAddress()))
                print("  %d: %s @ %s" % (len(nonalpha_names), string, instr.getAddress()))
                # sleep(5.0)
                continue

            # check for duplicates, rename if so
            full_name = string
            if full_name in names_set:
                new_name = "%s_%d" % (full_name, names_list.count(full_name))
                print("Duplicate name found: %s -> %s @ %s" % (string, new_name, instr.getAddress()))
                string = new_name

            # check for namespace
            if '::' in string:
                parts = string.split('::')
                ns = get_namespace(parts[:-1])
                string = parts[-1]
                func.setParentNamespace(ns)

            # set the function name
            try:
               func.setName(string, SourceType.ANALYSIS)
            except Exception as e:
               print("Failed to rename %s to %s: %s" % (func.getName(), string, e))
               raise e

            # track stats and names
            renamed = renamed + 1
            names_set.add(full_name)
            names_list.append(full_name)

            # if testing; break after 1 function
            # break
            # if renamed > 10:
            #     print("Renamed %d functions, exiting" % renamed)
            #     break

 end_time = time()

 print("Done, found %d functions with non-alphanumeric names" % len(nonalpha_names))
 for i, (name, addr) in enumerate(nonalpha_names):
    print("  %d: %s @ %s" % (i, name, addr))

 print("Skipped %d functions: %s" % (len(skipped), "<omitted>")) #skipped))

 print("Done, renamed %d functions" % renamed)

 print("Found %d unique names" % len(names_set))
 print("Found %d names" % len(names_list))
 nb_dupes = len(names_list) - len(names_set)
 print('-> duplicates; %d' % nb_dupes)
 if nb_dupes > 0:
    duplicates = []
    for name in names_set:
        count = names_list.count(name)
        if count > 1:
            duplicates.append((name, count))
    print("Duplicates:")
    for name in duplicates:
        print("  %s: %d" % name)

 print("Elapsed time: %f seconds" % (end_time - start_time))
	# -- coding: utf-8 --

	#Find instances of calls to ProfileStart(ctx, name) and get the name.
	#@author xert
	#@category tm2020
	#@keybinding
	#@menupath
	#@toolbar
	#@runtime Jython

	'''
	# USAGE

	Requirements:

	1. load the TM exe in ghidra and analyze it.
	2. find the profile function and name it. (You can find it easily by
	searching for strings with :: and looking for what looks like a method name.)
	- there are two profile functions called, one takes extra arguments.
	We are looking for the outer one first (which takes two arguments)
	- Name it "ProfileStart" if you you don't want to have to modify this script.

	1. Add this via Script Manager:
	- Window > Script Manager
	- create new script > jython
	- name it "NameFromStartProfileCall" or whatever
	- paste the code in

	2. change PROFILE_FUNCITON_NAME below to whatever the name is in your
	ghidra instance if your profile function is not called "ProfileStart".

	3. Run the script:
	- output will be in the console window (it was in the same place as bookmarks
	for me by default; should be able to Window > Console too)
	- takes 4-5 minutes to run

	Note: this will work for the inner profile function too, just update the name.

	Note: run it a second time to get a convenient output of all places where that might need manual naming.

	'''

	PROFILE_FUNCITON_NAME = "ProfileStart"

	from time import time, sleep

	start_time = time()

	'''
	exmaple to extract name from:

	void CFastAlgo::ComputeMD5_Digest(longlong *param_1,undefined8 param_2) {
	uStack_10 = 0x140123ab0;
	local_18 = DAT_141e2b060 ^ (ulonglong)auStack_40b8;
	local_4088 = 0;
	ProfileStart(local_4098,"CFastAlgo::ComputeMD5_Digest");
	((code )(*param_1 + 0x18))(param_1);
	FUN_1414f4850(local_4078);
	iVar1 = ((code )(*param_1 + 0x48))(param_1,local_4018,0x4000);
	while (iVar1 != 0) {

	...

	**************************************************************
	* FUNCTION *
	**************************************************************
	undefined ComputeMD5_Digest()
	assume GS_OFFSET = 0xff00000000
	undefined <UNASSIGNED> <RETURN>
	undefined8 Stack[0x18]:8 local_res18 XREF[2]: 140123aa0(W),
	140123b7b(R)
	undefined8 Stack[-0x18]:8 local_18 XREF[2]: 140123abd(W),
	140123b6b(R)
	undefined1 Stack[-0x401 local_4018 XREF[3]: 140123afa(*),
	140123b23(*),
	140123b38(*)
	undefined1 Stack[-0x407 local_4078 XREF[3]: 140123aed(*),
	140123b2b(*),
	140123b50(*)
	undefined4 Stack[-0x408 local_4088 XREF[2]: 140123ac8(W),
	140123b5d(R)
	undefined1 Stack[-0x409 local_4098 XREF[2]: 140123ada(*),
	140123b61(*)
	CFastAlgo::ComputeMD5_Digest XREF[2]: FUN_1408bb020:1408bb070(c),
	142ad2ec4(*)
	140123aa0 48 89 5c MOV qword ptr [RSP + local_res18],RBX
	24 18
	140123aa5 57 PUSH RDI
	140123aa6 b8 b0 40 MOV EAX,0x40b0
	00 00
	140123aab e8 80 06 CALL __chkstk undefined __chkstk()
	3d 01
	140123ab0 48 2b e0 SUB RSP,RAX
	140123ab3 48 8b 05 MOV RAX,qword ptr [DAT_141e2b060] = 00002B992DDFA232h
	a6 75 d0 01
	140123aba 48 33 c4 XOR RAX,RSP
	140123abd 48 89 84 MOV qword ptr [RSP + local_18],RAX
	24 a0 40
	00 00
	140123ac5 48 8b fa MOV RDI,RDX
	140123ac8 c7 44 24 MOV dword ptr [RSP + local_4088],0x0
	30 00 00
	00 00
	140123ad0 48 8b d9 MOV RBX,RCX
	140123ad3 48 8d 15 LEA RDX,[s_CFastAlgo::ComputeMD5_Digest_141b25a78] = "CFastAlgo::ComputeMD5_Digest"
	9e 1f a0 01
	140123ada 48 8d 4c LEA RCX=>local_4098,[RSP + 0x20]
	24 20
	140123adf e8 3c 25 CALL ProfileStart undefined ProfileStart()
	ff ff
	140123ae4 48 8b 03 MOV RAX,qword ptr [RBX]



	'''




	from ghidra.program.model.symbol import SourceType
	from ghidra.program.model.listing import Function
	from ghidra.program.model.listing import Instruction
	from ghidra.program.model.address import GenericAddress, Address
	from ghidra.program.model.scalar import Scalar

	namespaces = {}
	global_ns = currentProgram.getGlobalNamespace()
	symbol_table = currentProgram.getSymbolTable()

	def get_namespace(parts):
	''' get or create a namespace from list[str]'''
	global namespaces, global_ns

	if len(parts) == 0:
	return None

	full_name = parts[0] if len(parts) == 1 else "::".join(parts)
	if full_name in namespaces:
	return namespaces[full_name]

	parent_ns = global_ns
	if len(parts) > 1:
	parent_ns = get_namespace(parts[:-1])

	ns_name = parts[-1]
	ns = symbol_table.getOrCreateNameSpace(parent_ns, ns_name, SourceType.ANALYSIS)
	namespaces[full_name] = ns
	return ns


	def find_profile_start(func):
	''' find the ProfileStart call in the function '''
	# print("Checking func: " + func.getName())
	listing = currentProgram.getListing()
	instructions = listing.getInstructions(func.getBody(), True)
	for i, instr in enumerate(instructions):
	if i > 100:
	# print("More than 100 iters for " + func.entryPoint.toString())
	break
	if instr.getMnemonicString() == "CALL":
	for op in instr.getOpObjects(0):
	if not isinstance(op, Address):
	continue
	#print("got address")
	symbol = getSymbolAt(op)
	#print("Symbol: %s" % symbol)
	if PROFILE_FUNCITON_NAME not in symbol.getName():
	continue
	# print("FPS found: %s" % symbol)
	return instr, symbol
	return None, None

	def get_string_argument(instr):
	''' get the string argument from the ProfileStart call '''
	# Assumes the string pointer is in the first argument (rcx on Windows x64 ABI)
	prev_instr = instr.getPrevious()
	count = 0
	while prev_instr and count < 100:
	count += 1
	mnemonic = prev_instr.getMnemonicString()
	if mnemonic.startswith("INT3"):
	print("INT3 found: %s; count = %d; addr = %s" % (prev_instr, count, instr.getAddress()))
	break
	if mnemonic.startswith("LEA"):
	objs = prev_instr.getOpObjects(0)
	if len(objs) != 1:
	print("LEA found but not 1 object: %s" % prev_instr)
	print("location %s" % prev_instr.getAddress())
	break
	# print("LEA found: %s" % prev_instr)
	# for i, op in enumerate(objs):
	# print(" [%d] %s" % (i, op))
	# Check if it's moving an address into rdx (2nd param)
	if str(objs[0]) == "RDX":
	# print("RDX found: %s" % prev_instr)
	op1Objs = prev_instr.getOpObjects(1)
	string_addr = None
	# for str_op in op1Objs:
	# print("%s - Type: %s" % (str_op, type(str_op)))

	str_op = op1Objs[0]
	if isinstance(str_op, Scalar):
	# print("Scalar found: %s" % str_op)
	val = str_op.getValue()
	addr = toAddr(val)
	# print("converted to address: %s" % addr)
	data = getDataAt(addr)
	if data:
	# print("GSA Found s->a: " + str(data))
	if isinstance(data.value, Address):
	print("String data is an Address: %s / %s" % (str_op, data.value))
	data = getDataAt(data.value)
	return data.value

	if isinstance(str_op, Address):
	# print("String address is an Address: %s" % str_op)
	data = getDataAt(str_op)
	if isinstance(data, Address):
	print("String address is an Address: %s" % str_op)
	data = getDataAt(data)
	if data:
	# print("GSA Found: " + str(data))
	return data.value
	prev_instr = prev_instr.getPrevious()
	return None

	fm = currentProgram.getFunctionManager()
	functions = fm.getFunctions(True)

	nonalpha_names = []
	skipped = []
	renamed = 0
	names_set = set()
	names_list = []

	for func in functions:
	if not func.getName().startswith("FUN_") and not func.getName().startswith("thunk_FUN_"):
	# print("Skipping %s" % func.getName())
	skipped.append(func.getName())
	continue
	# if "ComputeMD5_Digest" not in func.getName():
	# continue
	# print("Testing ComputeMD5_Digest specifically: %s" % func.entryPoint.toString())
	instr, symbol = find_profile_start(func)
	if instr:
	string = get_string_argument(instr)
	if string:
	if string.startswith("[WebM]"):
	string = "WebM_" + string[6:]
	print("Renaming %s to %s @ %s" % (func.getName(), string, func.entryPoint))

	# print out any names with non alphanumeric + ':' characters
	# and don't process them.
	if not string.replace('::', '').replace('_', '').isalnum():
	print("Non-alphanumeric characters in name: %s" % string)
	nonalpha_names.append((string, instr.getAddress()))
	print(" %d: %s @ %s" % (len(nonalpha_names), string, instr.getAddress()))
	# sleep(5.0)
	continue

	# check for duplicates, rename if so
	full_name = string
	if full_name in names_set:
	new_name = "%s_%d" % (full_name, names_list.count(full_name))
	print("Duplicate name found: %s -> %s @ %s" % (string, new_name, instr.getAddress()))
	string = new_name

	# check for namespace
	if '::' in string:
	parts = string.split('::')
	ns = get_namespace(parts[:-1])
	string = parts[-1]
	func.setParentNamespace(ns)

	# set the function name
	try:
	func.setName(string, SourceType.ANALYSIS)
	except Exception as e:
	print("Failed to rename %s to %s: %s" % (func.getName(), string, e))
	raise e

	# track stats and names
	renamed = renamed + 1
	names_set.add(full_name)
	names_list.append(full_name)

	# if testing; break after 1 function
	# break
	# if renamed > 10:
	# print("Renamed %d functions, exiting" % renamed)
	# break

	end_time = time()

	print("Done, found %d functions with non-alphanumeric names" % len(nonalpha_names))
	for i, (name, addr) in enumerate(nonalpha_names):
	print(" %d: %s @ %s" % (i, name, addr))

	print("Skipped %d functions: %s" % (len(skipped), "<omitted>")) #skipped))

	print("Done, renamed %d functions" % renamed)

	print("Found %d unique names" % len(names_set))
	print("Found %d names" % len(names_list))
	nb_dupes = len(names_list) - len(names_set)
	print('-> duplicates; %d' % nb_dupes)
	if nb_dupes > 0:
	duplicates = []
	for name in names_set:
	count = names_list.count(name)
	if count > 1:
	duplicates.append((name, count))
	print("Duplicates:")
	for name in duplicates:
	print(" %s: %d" % name)

	print("Elapsed time: %f seconds" % (end_time - start_time))