Skip to content

Instantly share code, notes, and snippets.

@LeadroyaL
Last active March 8, 2024 12:04
Show Gist options
  • Save LeadroyaL/80a5f6fbb83ee1c102c860aaf2bc594d to your computer and use it in GitHub Desktop.
Save LeadroyaL/80a5f6fbb83ee1c102c860aaf2bc594d to your computer and use it in GitHub Desktop.
Unicorn实战(一):去掉libcms.so的花指令
from elftools.elf.constants import P_FLAGS
from elftools.elf.elffile import ELFFile
from unicorn import Uc, UC_ARCH_ARM, UC_MODE_LITTLE_ENDIAN, UC_HOOK_CODE, UC_PROT_READ, UC_PROT_WRITE, UC_PROT_EXEC
from unicorn.arm_const import *
from capstone import Cs, CS_ARCH_ARM, CS_MODE_THUMB, CsInsn
from keystone import Ks, KS_MODE_THUMB, KS_ARCH_ARM
# 找到.text节
filename = "./libcms.so"
fd = open(filename, 'rb')
elf = ELFFile(fd)
sh_offset = elf.get_section_by_name(".text").header['sh_offset']
sh_size = elf.get_section_by_name(".text").header['sh_size']
fd.seek(sh_offset)
text_data = fd.read(sh_size)
# 找到 [PUSH{...}; MOV RX,PC; MOV RX,PC -> 只找thumb
cs = Cs(CS_ARCH_ARM, CS_MODE_THUMB)
# (address, push_regs)
entries = []
step = 1000
for i in range(0, len(text_data), step):
_i = max(0, i - 10)
g = cs.disasm(text_data[_i:_i + step], 0)
while True:
try:
ins = next(g)
assert isinstance(ins, CsInsn)
# push {rx, rx}
if ins.mnemonic != 'push':
continue
ins2 = next(g)
assert isinstance(ins2, CsInsn)
# mov rx, pc
if not ins2.mnemonic.startswith('mov') or not ins2.op_str.endswith('pc'):
continue
ins3 = next(g)
assert isinstance(ins3, CsInsn)
# mov rx, pc
if not ins3.mnemonic.startswith('mov') or not ins3.op_str.endswith('pc'):
continue
entries.append((_i + sh_offset + ins.address, ins.op_str))
except StopIteration:
break
print(entries)
# 加载 so 到内存中
def align(addr, size, align):
fr_addr = addr // align * align
to_addr = (addr + size + align - 1) // align * align
return fr_addr, to_addr - fr_addr
def pflags2prot(p_flags):
ret = 0
if p_flags & P_FLAGS.PF_R != 0:
ret |= UC_PROT_READ
if p_flags & P_FLAGS.PF_W != 0:
ret |= UC_PROT_WRITE
if p_flags & P_FLAGS.PF_X != 0:
ret |= UC_PROT_EXEC
return ret
load_base = 0
emu = Uc(UC_ARCH_ARM, UC_MODE_LITTLE_ENDIAN)
load_segments = [x for x in elf.iter_segments() if x.header.p_type == 'PT_LOAD']
for segment in load_segments:
fr_addr, size = align(load_base + segment.header.p_vaddr, segment.header.p_memsz, segment.header.p_align)
emu.mem_map(fr_addr, size, pflags2prot(segment.header.p_flags))
emu.mem_write(load_base + segment.header.p_vaddr, segment.data())
# 依次进入所有的entry,执行到栈平衡时退出
STACK_ADDR = 0x7F000000
STACK_SIZE = 1024 * 1024
start_addr = None
def hook_code(mu: Uc, address, size, user_data):
if mu.reg_read(UC_ARM_REG_PC) != start_addr and mu.reg_read(UC_ARM_REG_SP) == STACK_ADDR + STACK_SIZE:
emu.emu_stop()
emu.mem_map(STACK_ADDR, STACK_SIZE)
emu.hook_add(UC_HOOK_CODE, hook_code)
_to_reg_id = {
"r0": UC_ARM_REG_R0, "r1": UC_ARM_REG_R1, "r2": UC_ARM_REG_R2, "r3": UC_ARM_REG_R3,
"r4": UC_ARM_REG_R4, "r5": UC_ARM_REG_R5, "r6": UC_ARM_REG_R6, "r7": UC_ARM_REG_R7,
"r8": UC_ARM_REG_R8, "r9": UC_ARM_REG_R9, "r10": UC_ARM_REG_R10, "r11": UC_ARM_REG_R11,
"r12": UC_ARM_REG_R12, "r13": UC_ARM_REG_R13, "r14": UC_ARM_REG_R14, "r15": UC_ARM_REG_R15,
"lr": UC_ARM_REG_LR, "pc": UC_ARM_REG_PC, "sp": UC_ARM_REG_SP,
"sb": UC_ARM_REG_SB, "sl": UC_ARM_REG_SL, "fp": UC_ARM_REG_FP, "ip": UC_ARM_REG_IP,
}
ret = []
MAGIC32 = 0x12345678
for push_entry, push_regs in entries:
emu.reg_write(UC_ARM_REG_SP, STACK_ADDR + STACK_SIZE)
print("Emulate arm code start", hex(push_entry))
start_addr = push_entry
for r in push_regs.strip('{}').replace(' ', '').split(','):
emu.reg_write(_to_reg_id[r], MAGIC32)
emu.emu_start(push_entry + 1, 0, 0, 100)
print("Emulation arm code done")
changed = False
for r in push_regs.strip('{}').replace(' ', '').split(','):
if emu.reg_read(_to_reg_id[r]) != MAGIC32:
changed = True
break
stop_addr = emu.reg_read(UC_ARM_REG_PC)
if not changed:
print("Match:", start_addr, stop_addr)
ret.append((start_addr, stop_addr))
else:
print("Cannot handle:", start_addr)
fd.close()
print(ret)
# [(55302, 55386), (55390, 55474), (55538, 55624), (55916, 56002), (56006, 56090), (56114, 56200), (60314, 60398), (60780, 60866), (61258, 61342), (61346, 61432), (96392, 96476), (107254, 107338), (107412, 107498), (130468, 130552), (131490, 131574), (131578, 131664), (132818, 132902), (135238, 135324), (135456, 135542), (136624, 136710), (144270, 144354), (144434, 144520), (144856, 144940), (145070, 145156), (147232, 147316), (151298, 151382), (151512, 151598), (151662, 151748), (152022, 152106), (152110, 152196), (157910, 157996), (158170, 158256), (159260, 159346), (159348, 159434), (161294, 161380), (161476, 161562), (161574, 161660), (161806, 161890), (161930, 162016), (165348, 165434), (165504, 165588), (165600, 165686), (165756, 165840), (165852, 165938), (166008, 166092), (166104, 166190), (166260, 166344), (166356, 166442), (166512, 166596), (166608, 166694), (166764, 166848), (166860, 166946), (167016, 167100), (167112, 167198), (167268, 167352), (167364, 167450), (169646, 169732), (169744, 169830), (170418, 170504), (170518, 170604), (172880, 172964), (173540, 173626), (173638, 173724), (173732, 173816), (173820, 173906), (185340, 185424), (185568, 185652), (185830, 185914), (186568, 186654), (211188, 211272), (211418, 211504), (211550, 211634), (211680, 211766), (211790, 211874), (211880, 211968), (211974, 212058), (212068, 212154), (212162, 212246), (214604, 214690), (217610, 217694), (222522, 222608), (224688, 224772), (225110, 225194), (225280, 225366), (226396, 226482), (226680, 226766), (226874, 226960), (227020, 227104), (227172, 227258), (484228, 484314), (486500, 486586), (487556, 487642), (487696, 487782), (487828, 487914), (487916, 488000), (490978, 491064), (492458, 492544), (494114, 494200), (494444, 494528), (494568, 494654), (505414, 505498), (506140, 506226), (507860, 507944), (509882, 509968), (510050, 510136), (510540, 510624), (512724, 512810), (512858, 512942), (513104, 513188), (513282, 513366), (513406, 513490), (516024, 516108), (516152, 516238), (516476, 516562)]
# in idapython
# for start, stop in ret:
# ks = Ks(KS_ARCH_ARM, KS_MODE_THUMB)
# a = ks.asm("B.W $+" + str(stop - start))
# PatchByte(start, a[0][0])
# PatchByte(start + 1, a[0][1])
# PatchByte(start + 2, a[0][2])
# PatchByte(start + 3, a[0][3])
@wqsui
Copy link

wqsui commented Oct 9, 2020

好的,非常感谢,我试试,第一次接触。要学习一下。

@wqsui
Copy link

wqsui commented Oct 9, 2020

LeadroyaL,你好,我在ida里运行了python脚本,运行好没有报错,同时也没有反应。不知道新的SO文件在哪里?我用的是IDA 7.0版本,我的操作方法是:在IDA里打开Libcms.so,然后选择文件菜单->Scripting Command...,然后在Scripting language 选择 Python。然后运行如下代码:
from keystone import Ks, KS_MODE_THUMB, KS_ARCH_ARM
ret=[(13974, 14062), (14812, 14900), (15246, 15334), ...... 545662), (547720, 547808), (547858, 547946)]

#in idapython
for start, stop in ret:
ks = Ks(KS_ARCH_ARM, KS_MODE_THUMB)
a = ks.asm("B.W $+" + str(stop - start))
PatchByte(start, a[0][0])
PatchByte(start + 1, a[0][1])
PatchByte(start + 2, a[0][2])
PatchByte(start + 3, a[0][3])
运行好后IDA的Output Windows 里没有任何输出。修改后的SO文件怎么获得呢?非常感谢。

@wqsui
Copy link

wqsui commented Oct 10, 2020

LeadroyaL,你好,我好像研究出来方法了,用edit->patch prrogram ->Apply patches to input file.覆盖保存了so文件。但是同样遇到找不到JNI_onload 入口文件的情况,我用上面的脚本运行,但是报错:

for item in rst:
patch_in_ghidra(item) error: Traceback (most recent call last):
File "", line 1, in
ImportError: No module named ghidra.app.plugin.assembler

好像是ghidra.app.plugin.assembler包没有安装,但是我在网上也找不到这个包。你能告诉我如何安装吗?非常感谢。

@huhuang03
Copy link

@wqsui 你用的是Ghidra的脚本,作者用的的是ida的脚本。

Ghidra和ida都是反编译工具。Ghidra的脚本并不能在ida中运行

@wqsui
Copy link

wqsui commented Oct 18, 2020

好的,非常感谢!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment