Last active
March 15, 2024 13:41
-
-
Save fariss/792f31e5f5d92458a60727464b93180e to your computer and use it in GitHub Desktop.
Just a POC on an idea
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Extract instruction-level number features with P-code | |
#@category PCode | |
#@author Soufiane Fariss | |
#@menupath | |
#@toolbar | |
from ghidra.program.model.pcode import HighParam, PcodeOp, PcodeOpAST | |
from ghidra.program.model.address import AddressSet | |
from capa.features.extractors.ghidra.insn import * | |
from capa.features.address import AbsoluteVirtualAddress | |
def extract_insn_number_features_via_pcode(insn): | |
""" | |
Extract number features from an instruction using P-code | |
""" | |
pcode = insn.getPcode() | |
# The code attempts to find contants (conts, value, size) via COPY, INT_ADD. It is still missing PcodeOps | |
# like LOAD, INT_SUB that could also reference const values | |
for op in pcode: | |
if op.getOpcode() == PcodeOp.COPY: | |
if op.getInput(0).isConstant(): | |
value = op.getInput(0).getOffset() | |
#if currentProgram().getMemory().contains(toAddr(value)): | |
# continue | |
print("absolute(0x{}):\t\t {}\t\t\tin [{}] \t\t @ {}".format(insn.getAddress().toString()[2:], hex(value), op.toString(), insn.getAddress())) | |
elif op.getOpcode() == PcodeOp.INT_ADD: | |
if op.getInput(1).isConstant(): | |
value = op.getInput(1).getOffset() | |
print("absolute(0x{}):\t\t {}\t\t\tin [{}] \t\t @ {}".format(insn.getAddress().toString()[2:], hex(value), op.toString(), insn.getAddress())) | |
elif op.getInput(0).isConstant(): | |
value = op.getInput(0).getOffset() | |
print("absolute(0x{}):\t\t {}\t\t\tin [{}] \t\t @ {}".format(insn.getAddress().toString()[2:], hex(value), op.toString(), insn.getAddress())) | |
# Get all instructions in .text section | |
start = getMemoryBlock('.text').getStart() | |
end = getMemoryBlock('.text').getEnd() | |
range = AddressSet(start, end) | |
instructions = currentProgram().getListing().getInstructions(range) | |
# Iterate over each insturction and extract features using the two methods | |
while instructions.hasNext(): | |
# Method 1: via P-code | |
insn = instructions.next() | |
extract_insn_number_features_via_pcode(insn) | |
# Method 2: via parsing machine code insturctions (from capa) | |
insn2 = InsnHandle(address=AbsoluteVirtualAddress(insn.getAddress().getOffset()), inner=insn) | |
features = extract_insn_number_features(0, 0, insn2) | |
for f in features: | |
print(insn2.address, f) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment