- Open the firmware binary in Ghidra
- Go to the data view and copy all the string addresses, and paste them into a Jupyter notebook with the code below
- Sort and copy all the possible pointers (probably
undefined4
type) similarly
-
Use the following Python code to process the data:
import pyperclip import matplotlib.pyplot as plt def process_list_to_hex(listraw): listhex = pyperclip.paste().splitlines() for i in listraw: i = i.strip() i = i.rstrip('h') i = int(i, 16) listhex.append(i) print(listhex[:5]) return listhex def find_best_offset(list1, list2): set2 = set(list2) offset_count = {} for addr1 in list1: offsets = [addr2 - addr1 for addr2 in set2] for offset in offsets: offset_count[offset] = offset_count.get(offset, 0) + 1 sorted_offsets = sorted(offset_count.items(), key=lambda x: x[1], reverse=True) return sorted_offsets[:20] def visualize_offsets(offset_counts): offsets = [offset for offset, count in offset_counts] counts = [count for offset, count in offset_counts] plt.figure(figsize=(12, 6)) plt.bar(offsets, counts) plt.xlabel('Offset') plt.ylabel('Number of Matches') plt.title('Best Offsets by Number of Matches') plt.xticks(rotation=45) plt.grid(True) plt.show()
- Identify the offset that has significantly more matches than the others. Go to Memory Map and move the starting address to that offset. Now you should see many strings whose references align.
from ghidra.program.model.listing import CodeUnit
from ghidra.program.model.data import DataType
from ghidra.program.model.mem import MemoryBlock
from ghidra.program.model.symbol import SymbolType
from ghidra.util.task import TaskMonitor
program = getCurrentProgram()
listing = program.getListing()
# Function to get all undefined4 and string addresses
def get_all_undefined4_and_strings():
undefined4_addresses = []
string_addresses = []
dataIterator = listing.getDefinedData(True)
while dataIterator.hasNext():
data = dataIterator.next()
dataType = data.getDataType()
if dataType.getDisplayName() == "undefined4" or "pointer" in dataType.getDisplayName():
undefined4_addresses.append(data.getValue())
elif "string" in dataType.getDisplayName().lower():
string_addresses.append(data.getMinAddress())
return undefined4_addresses, string_addresses
# Function to process list to hex
def process_list_to_hex(listraw):
listhex = []
for i in listraw:
i = str(i).rstrip('L').lstrip('0x') or '0'
listhex.append(int(i, 16))
return listhex
# Function to find best offset
def find_best_offset(list1, list2):
set2 = set(list2)
offset_count = {}
for addr1 in list1:
offsets = [addr2 - addr1 for addr2 in set2]
for offset in offsets:
offset_count[offset] = offset_count.get(offset, 0) + 1
sorted_offsets = sorted(offset_count.items(), key=lambda x: x[1], reverse=True)
return sorted_offsets[:20]
# Main script execution
pointer_addresses, string_addresses = get_all_undefined4_and_strings()
hex_string_addresses = process_list_to_hex(string_addresses)
hex_pointer_addresses = process_list_to_hex(pointer_addresses)
best_offsets = find_best_offset(hex_string_addresses, hex_pointer_addresses)
print(best_offsets[:5])
# Print the best offset
if len(best_offsets) > 1 and (best_offsets[0][1] - best_offsets[1][1]) / (best_offsets[1][1] - best_offsets[2][1]) > 4:
print("Best offset much better than any others:", hex(best_offsets[0][0]))
memory = currentProgram.getMemory()
minAddress = memory.getMinAddress()
print("Assuming you only have one memory block, you'll want to move its starting point by " + hex(best_offsets[0][0]) + " to " + hex(int(str(minAddress), 16)+best_offsets[0][0]))
# if len(memory.getBlocks())==1:
# memory.moveBlock(memory.getBlocks()[0],memory.getBlocks()[0].getStart().add(best_offsets[0][0]),TaskMonitor.DUMMY)
- Find the top-level function that contains code similar to the following:
puVar4 = pointer_to_20001730;
puVar3 = pointer_to_20000000;
puVar2 = pointer_to_10048a30;
bVar1 = (bool)isCurrentModePrivileged();
if (bVar1) {
setMainStackPointer(pointer_to_2000fff0);
}
for (iVar6 = 0; puVar5 = pointer_to_2000b358, puVar7 = (undefined4 *)pointer_to_20001730,
(int)(puVar3 + iVar6) < (int)puVar4; iVar6 = iVar6 + 4) {
*(undefined4 *)(puVar3 + iVar6) = *(undefined4 *)(puVar2 + iVar6);
}
- Rename the pointers based on the addresses they point to (keeping in mind that Ghidra names them by data address, not the pointed-to address). Determine which part of ROM is being copied to RAM (e.g., 0x200...). In the example, 0x1730 bytes are being copied from 0x10048a30 to 0x20000000.
- Go to the Memory Map again and add a section:
- Start address: 0x20000000 (substitute with your actual value)
- Length: 0x1730 (substitute with your actual value)
- Block Type: Byte Mapped
- Byte Mapped Start Address: 0x10048a30 (substitute with your actual value)
- Add another memory block for the remaining 0x200... RAM segment:
- Start address: previous block's end address + 1
- End address: possibly the address under setMainStackPointer (needs further investigation)