Skip to content

Instantly share code, notes, and snippets.

@LeadroyaL
Last active March 8, 2024 12:04
Show Gist options
  • Save LeadroyaL/80a5f6fbb83ee1c102c860aaf2bc594d to your computer and use it in GitHub Desktop.
Save LeadroyaL/80a5f6fbb83ee1c102c860aaf2bc594d to your computer and use it in GitHub Desktop.
Unicorn实战(一):去掉libcms.so的花指令
from elftools.elf.constants import P_FLAGS
from elftools.elf.elffile import ELFFile
from unicorn import Uc, UC_ARCH_ARM, UC_MODE_LITTLE_ENDIAN, UC_HOOK_CODE, UC_PROT_READ, UC_PROT_WRITE, UC_PROT_EXEC
from unicorn.arm_const import *
from capstone import Cs, CS_ARCH_ARM, CS_MODE_THUMB, CsInsn
from keystone import Ks, KS_MODE_THUMB, KS_ARCH_ARM
# 找到.text节
filename = "./libcms.so"
fd = open(filename, 'rb')
elf = ELFFile(fd)
sh_offset = elf.get_section_by_name(".text").header['sh_offset']
sh_size = elf.get_section_by_name(".text").header['sh_size']
fd.seek(sh_offset)
text_data = fd.read(sh_size)
# 找到 [PUSH{...}; MOV RX,PC; MOV RX,PC -> 只找thumb
cs = Cs(CS_ARCH_ARM, CS_MODE_THUMB)
# (address, push_regs)
entries = []
step = 1000
for i in range(0, len(text_data), step):
_i = max(0, i - 10)
g = cs.disasm(text_data[_i:_i + step], 0)
while True:
try:
ins = next(g)
assert isinstance(ins, CsInsn)
# push {rx, rx}
if ins.mnemonic != 'push':
continue
ins2 = next(g)
assert isinstance(ins2, CsInsn)
# mov rx, pc
if not ins2.mnemonic.startswith('mov') or not ins2.op_str.endswith('pc'):
continue
ins3 = next(g)
assert isinstance(ins3, CsInsn)
# mov rx, pc
if not ins3.mnemonic.startswith('mov') or not ins3.op_str.endswith('pc'):
continue
entries.append((_i + sh_offset + ins.address, ins.op_str))
except StopIteration:
break
print(entries)
# 加载 so 到内存中
def align(addr, size, align):
fr_addr = addr // align * align
to_addr = (addr + size + align - 1) // align * align
return fr_addr, to_addr - fr_addr
def pflags2prot(p_flags):
ret = 0
if p_flags & P_FLAGS.PF_R != 0:
ret |= UC_PROT_READ
if p_flags & P_FLAGS.PF_W != 0:
ret |= UC_PROT_WRITE
if p_flags & P_FLAGS.PF_X != 0:
ret |= UC_PROT_EXEC
return ret
load_base = 0
emu = Uc(UC_ARCH_ARM, UC_MODE_LITTLE_ENDIAN)
load_segments = [x for x in elf.iter_segments() if x.header.p_type == 'PT_LOAD']
for segment in load_segments:
fr_addr, size = align(load_base + segment.header.p_vaddr, segment.header.p_memsz, segment.header.p_align)
emu.mem_map(fr_addr, size, pflags2prot(segment.header.p_flags))
emu.mem_write(load_base + segment.header.p_vaddr, segment.data())
# 依次进入所有的entry,执行到栈平衡时退出
STACK_ADDR = 0x7F000000
STACK_SIZE = 1024 * 1024
start_addr = None
def hook_code(mu: Uc, address, size, user_data):
if mu.reg_read(UC_ARM_REG_PC) != start_addr and mu.reg_read(UC_ARM_REG_SP) == STACK_ADDR + STACK_SIZE:
emu.emu_stop()
emu.mem_map(STACK_ADDR, STACK_SIZE)
emu.hook_add(UC_HOOK_CODE, hook_code)
_to_reg_id = {
"r0": UC_ARM_REG_R0, "r1": UC_ARM_REG_R1, "r2": UC_ARM_REG_R2, "r3": UC_ARM_REG_R3,
"r4": UC_ARM_REG_R4, "r5": UC_ARM_REG_R5, "r6": UC_ARM_REG_R6, "r7": UC_ARM_REG_R7,
"r8": UC_ARM_REG_R8, "r9": UC_ARM_REG_R9, "r10": UC_ARM_REG_R10, "r11": UC_ARM_REG_R11,
"r12": UC_ARM_REG_R12, "r13": UC_ARM_REG_R13, "r14": UC_ARM_REG_R14, "r15": UC_ARM_REG_R15,
"lr": UC_ARM_REG_LR, "pc": UC_ARM_REG_PC, "sp": UC_ARM_REG_SP,
"sb": UC_ARM_REG_SB, "sl": UC_ARM_REG_SL, "fp": UC_ARM_REG_FP, "ip": UC_ARM_REG_IP,
}
ret = []
MAGIC32 = 0x12345678
for push_entry, push_regs in entries:
emu.reg_write(UC_ARM_REG_SP, STACK_ADDR + STACK_SIZE)
print("Emulate arm code start", hex(push_entry))
start_addr = push_entry
for r in push_regs.strip('{}').replace(' ', '').split(','):
emu.reg_write(_to_reg_id[r], MAGIC32)
emu.emu_start(push_entry + 1, 0, 0, 100)
print("Emulation arm code done")
changed = False
for r in push_regs.strip('{}').replace(' ', '').split(','):
if emu.reg_read(_to_reg_id[r]) != MAGIC32:
changed = True
break
stop_addr = emu.reg_read(UC_ARM_REG_PC)
if not changed:
print("Match:", start_addr, stop_addr)
ret.append((start_addr, stop_addr))
else:
print("Cannot handle:", start_addr)
fd.close()
print(ret)
# [(55302, 55386), (55390, 55474), (55538, 55624), (55916, 56002), (56006, 56090), (56114, 56200), (60314, 60398), (60780, 60866), (61258, 61342), (61346, 61432), (96392, 96476), (107254, 107338), (107412, 107498), (130468, 130552), (131490, 131574), (131578, 131664), (132818, 132902), (135238, 135324), (135456, 135542), (136624, 136710), (144270, 144354), (144434, 144520), (144856, 144940), (145070, 145156), (147232, 147316), (151298, 151382), (151512, 151598), (151662, 151748), (152022, 152106), (152110, 152196), (157910, 157996), (158170, 158256), (159260, 159346), (159348, 159434), (161294, 161380), (161476, 161562), (161574, 161660), (161806, 161890), (161930, 162016), (165348, 165434), (165504, 165588), (165600, 165686), (165756, 165840), (165852, 165938), (166008, 166092), (166104, 166190), (166260, 166344), (166356, 166442), (166512, 166596), (166608, 166694), (166764, 166848), (166860, 166946), (167016, 167100), (167112, 167198), (167268, 167352), (167364, 167450), (169646, 169732), (169744, 169830), (170418, 170504), (170518, 170604), (172880, 172964), (173540, 173626), (173638, 173724), (173732, 173816), (173820, 173906), (185340, 185424), (185568, 185652), (185830, 185914), (186568, 186654), (211188, 211272), (211418, 211504), (211550, 211634), (211680, 211766), (211790, 211874), (211880, 211968), (211974, 212058), (212068, 212154), (212162, 212246), (214604, 214690), (217610, 217694), (222522, 222608), (224688, 224772), (225110, 225194), (225280, 225366), (226396, 226482), (226680, 226766), (226874, 226960), (227020, 227104), (227172, 227258), (484228, 484314), (486500, 486586), (487556, 487642), (487696, 487782), (487828, 487914), (487916, 488000), (490978, 491064), (492458, 492544), (494114, 494200), (494444, 494528), (494568, 494654), (505414, 505498), (506140, 506226), (507860, 507944), (509882, 509968), (510050, 510136), (510540, 510624), (512724, 512810), (512858, 512942), (513104, 513188), (513282, 513366), (513406, 513490), (516024, 516108), (516152, 516238), (516476, 516562)]
# in idapython
# for start, stop in ret:
# ks = Ks(KS_ARCH_ARM, KS_MODE_THUMB)
# a = ks.asm("B.W $+" + str(stop - start))
# PatchByte(start, a[0][0])
# PatchByte(start + 1, a[0][1])
# PatchByte(start + 2, a[0][2])
# PatchByte(start + 3, a[0][3])
@hanjackcyw
Copy link

你好能否把去掉花指令之后的这个cms发我一份,谢谢了 qq 1666274840

自己没手???

哈哈哈哈哈哈哈

@hanjackcyw
Copy link

我按照你这个方法运行了一下,确实有很多原来不能反编译的函数可以反编译了,但是jni_onload还是报sp值不对,想问一下,知道是什么原因吗?

@huhuang03
Copy link

huhuang03 commented Aug 4, 2020

使用ghidra的patch脚本:

from ghidra.app.plugin.assembler import Assemblers


# only test when base = 0
base = currentProgram.getImageBase()
asm = Assemblers.getAssembler(currentProgram)

def patch_in_ghidra(item):
    start = item[0]
    end = item[1]
    addr = base.add(start)
    print("patch at: ", addr)
    asm.assemble(addr, "b.w " + str(end + 2))

rst = [[0x23cc, 0x2422],
    [0x24a8, 0x24fe]]

for item in rst:
    patch_in_ghidra(item)

@wqsui
Copy link

wqsui commented Oct 8, 2020

LeadroyaL,你好,运行这个python脚本需要安装什么环境?我第一次python,配置了一天运行编译还是报错。
第一个错误:
File "unflower_cms.py", line 82
def hook_code(mu:Uc, address, size, user_data):
^
SyntaxError: invalid syntax
如果把改为: def hook_code(mu,Uc, address, size, user_data):则报下面的错误:

File "unflower_cms.py", line 107, in
emu.emu_start(push_entry + 1, 0, 0, 100)
File "build/bdist.linux-x86_64/egg/unicorn/unicorn.py", line 317, in emu_start
unicorn.unicorn.UcError: Invalid memory read (UC_ERR_READ_UNMAPPED)
写些问题应该是环境没有安装好,希望得到你的帮助,非常感谢!下面是我的
QQ:1092429189

@wqsui
Copy link

wqsui commented Oct 9, 2020

LeadroyaL,你好,今天终于搞定了。输出的部分信息如下:
Emulation arm code done
Match: 513282 513370
Emulate arm code start 0x7d57e
Emulation arm code done
Match: 513406 513494
Emulate arm code start 0x7dfb8
Emulation arm code done
Match: 516024 516112
Emulate arm code start 0x7e038
Emulation arm code done
Match: 516152 516240
Emulate arm code start 0x7e17c
Emulation arm code done
Match: 516476 516564
[(55302, 55388), (55390, 55478), (55538, 55626), (55916, 56004), (56006, 56094), (56114, 56202), (60314, 60402), (60780, 60868), (61258, 61346), (61346, 61434), (96392, 96480), (107254, 107342), (107412, 107500), (130468, 130556), (131490, 131578), (131578, 131666), (132818, 132906), (135238, 135326), (135456, 135544), (136624, 136712), (144270, 144358), (144434, 144522), (144856, 144944), (145070, 145158), (147232, 147320), (151298, 151386), (151512, 151600), (151662, 151750), (152022, 152110), (152110, 152198), (157910, 157998), (158170, 158258), (159260, 159348), (159348, 159436), (161294, 161382), (161476, 161564), (161574, 161662), (161806, 161894), (161930, 162018), (165348, 165436), (165504, 165592), (165600, 165688), (165756, 165844), (165852, 165940), (166008, 166096), (166104, 166192), (166260, 166348), (166356, 166444), (166512, 166600), (166608, 166696), (166764, 166852), (166860, 166948), (167016, 167104), (167112, 167200), (167268, 167356), (167364, 167452), (169646, 169734), (169744, 169832), (170418, 170506), (170518, 170606), (172880, 172968), (173540, 173628), (173638, 173726), (173732, 173820), (173820, 173908), (185340, 185428), (185568, 185656), (185830, 185918), (186568, 186656), (211188, 211276), (211418, 211506), (211550, 211638), (211680, 211768), (211790, 211878), (211880, 211968), (211974, 212062), (212068, 212156), (212162, 212250), (214604, 214692), (217610, 217698), (222522, 222610), (224688, 224776), (225110, 225198), (225280, 225368), (226396, 226484), (226680, 226768), (226874, 226962), (227020, 227108), (227172, 227260), (484228, 484316), (486500, 486588), (487556, 487644), (487696, 487784), (487828, 487916), (487916, 488004), (490978, 491066), (492458, 492546), (494114, 494202), (494444, 494532), (494568, 494656), (505414, 505502), (506140, 506228), (507860, 507948), (509882, 509970), (510050, 510138), (510540, 510628), (512724, 512812), (512858, 512946), (513104, 513192), (513282, 513370), (513406, 513494), (516024, 516112), (516152, 516240), (516476, 516564)]

但是改好的文件呢?

@LeadroyaL
Copy link
Author

但是改好的文件呢?

patch 文件是通过 idaapi 完成的,把这组数据和最后几行代码在 ida 里运行即可。

@wqsui
Copy link

wqsui commented Oct 9, 2020

好的,非常感谢,我试试,第一次接触。要学习一下。

@wqsui
Copy link

wqsui commented Oct 9, 2020

LeadroyaL,你好,我在ida里运行了python脚本,运行好没有报错,同时也没有反应。不知道新的SO文件在哪里?我用的是IDA 7.0版本,我的操作方法是:在IDA里打开Libcms.so,然后选择文件菜单->Scripting Command...,然后在Scripting language 选择 Python。然后运行如下代码:
from keystone import Ks, KS_MODE_THUMB, KS_ARCH_ARM
ret=[(13974, 14062), (14812, 14900), (15246, 15334), ...... 545662), (547720, 547808), (547858, 547946)]

#in idapython
for start, stop in ret:
ks = Ks(KS_ARCH_ARM, KS_MODE_THUMB)
a = ks.asm("B.W $+" + str(stop - start))
PatchByte(start, a[0][0])
PatchByte(start + 1, a[0][1])
PatchByte(start + 2, a[0][2])
PatchByte(start + 3, a[0][3])
运行好后IDA的Output Windows 里没有任何输出。修改后的SO文件怎么获得呢?非常感谢。

@wqsui
Copy link

wqsui commented Oct 10, 2020

LeadroyaL,你好,我好像研究出来方法了,用edit->patch prrogram ->Apply patches to input file.覆盖保存了so文件。但是同样遇到找不到JNI_onload 入口文件的情况,我用上面的脚本运行,但是报错:

for item in rst:
patch_in_ghidra(item) error: Traceback (most recent call last):
File "", line 1, in
ImportError: No module named ghidra.app.plugin.assembler

好像是ghidra.app.plugin.assembler包没有安装,但是我在网上也找不到这个包。你能告诉我如何安装吗?非常感谢。

@huhuang03
Copy link

@wqsui 你用的是Ghidra的脚本,作者用的的是ida的脚本。

Ghidra和ida都是反编译工具。Ghidra的脚本并不能在ida中运行

@wqsui
Copy link

wqsui commented Oct 18, 2020

好的,非常感谢!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment