Last active
February 27, 2021 14:38
-
-
Save duangsuse/90366621af2d206867496f7eb1e42438 to your computer and use it in GitHub Desktop.
Lua Opcode reordering and its analysics
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct | |
binPrefix = "<" | |
def binItem(fmt): | |
bp=binPrefix | |
n=struct.calcsize(bp+fmt) | |
return lambda f: struct.unpack(bp+fmt, f.read(n))[0] | |
def binAry(fmt_h, fmt): | |
h=binItem(fmt_h); x=binItem(fmt) | |
return lambda f: [x(f) for i in range(h(f))] | |
def mask1(n): return sum(1<<i for i in range(n)) | |
# in Lua 5.4, nbit=7; ifmt="L"; 0x22-2 (for number/int size in Header) | |
def main(args, nbit=6, ifmt="I"): | |
ba = binAry("I", ifmt) | |
mask = mask1(nbit) | |
def opcodes(fp): f=open(fp, "rb"); f.seek(0x22+12); return [nv&mask for nv in ba(f)] | |
(f, fOrig) = map(opcodes, args) | |
print({op1:op for op1,op in zip(f, fOrig)}) | |
from sys import argv; main(argv[1:]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Lua 5.4 完全改了,删了 0xC 的 sizes(int,size_t,Instruction,Integer,Number) 的后两项…… 草 | |
# Header 之前部分是 "\x1BLua" 53 00 "\25\147\r\n\26\n" ,之后是 (int)5678 和 (Number)370.5 和 1b 的 nUpvalues | |
#0x22 起始 name,lno,lno1,b_nParam,isVararg,b_maxStack | |
#0x2E (46) 就是全局程序体指令数组头的位置 | |
if [ $(basename $PWD) != src ]; then | |
curl -O http://www.lua.org/ftp/lua-5.3.6.tar.gz | |
tar xf lua-* &&cd lua-*/src/ | |
make luac lua | |
curl -O https://raw.githubusercontent.com/fengberd/xLuaDumper/master/xLuaDumper/opcode.lua | |
fi | |
cat>opcode_map.py <<EOF | |
import struct | |
def mask1(n): return sum([1<<i for i in range(n)]) | |
ifmt="<I"; ifmtN=struct.calcsize(ifmt) # LE unsigned int | |
def readInt(f): return struct.unpack(ifmt, f.read(ifmtN))[0] | |
# in Lua 5.4, nbit=7; ifmt="L"; 0x22-=2 (number/int size in Header) | |
def readOpcodes(f, nbit=6): | |
mask = mask1(nbit) | |
n = readInt(f) | |
for _ in range(n): yield readInt(f)&mask | |
from sys import argv | |
def main(args=argv[1:]): | |
def opcodes(fp): f=open(fp, "rb"); f.seek(0x22+12); return readOpcodes(f) | |
(f, fOrig) = map(opcodes, args) | |
print({op1:op for op1,op in zip(f, fOrig)}) | |
main() | |
EOF | |
./luac -s -o l0.luac opcode.lua | |
cat>shuf_opcode_h.py <<EOF | |
from re import sub, split | |
subsH = [ | |
'''/\*-*\n\s*name args description\s*-*\*/ => ''', | |
"A B C k R.* := {} => A B C k", | |
"A Bx if R.* ~= nil then { R.*=R.*; pc -= Bx } => A Bx", | |
r'''(\w+),/\*((.|\n)*?)\*/ => /*\2*/\1,''' | |
] | |
def applyTransform(op, transf, s): | |
acc = s | |
for (a, b) in map(lambda ss: ss.split(" => "), transf): acc = op(a, b, acc) | |
return acc | |
def insertAll(a, i, vs): | |
rvs = list(vs); rvs.reverse() | |
for v in rvs: a.insert(i, v) | |
def find(xs, p): | |
for (i,x) in enumerate(xs): | |
if p(x): return i | |
class separator: | |
def __init__(self,re,sep): self.re,self.sep=re,sep | |
def list(self,s): return split(self.re, s) | |
def join(self,vs): return self.sep.join(vs) | |
def randomIndicesWithin(r, n): | |
from random import shuffle, randint | |
xs = [i for i in range(n) if not i in r] | |
shuffle(xs); insertAll(xs, randint(0,len(xs)-1), r) | |
return xs | |
def pre(fp, s): | |
if fp.endswith("lopcodes.h"): | |
s1 = applyTransform(sub, subsH, s) | |
return sub(r'''/\*((.|\n)*?)\*/''', lambda m: "/*%s*/"%sub("[,}{]", " ", m[1]), s1) | |
return s | |
indices = [] | |
comma = separator(",(?! )", ",") | |
def shufOpc(s): | |
global indices | |
xs = comma.list(s) | |
print("".join(xs)) | |
if len(indices)==0: | |
iAddNot = tuple(find(xs, lambda s: ssub in s) for ssub in ["OP_ADD", "OP_NOT"]) | |
indices = randomIndicesWithin(range(*iAddNot), len(xs)) | |
assert(len(xs) == len(indices) or xs[-1].strip()=="NULL") | |
return comma.join(xs[i] for i in indices) | |
from sys import argv | |
def main(args=argv[1:]): | |
for fp in args: | |
re = "(?<=typedef enum ){((.|\n)*?)}" if fp.endswith("lopcodes.h") else "(?<== ){((.|\n)*?)}" | |
#print(fp,re) | |
with open(fp, "r+") as f: | |
sf1 = sub(re, lambda m: "{ %s }"%shufOpc(m[1]), pre(fp, f.read()), 2) | |
f.truncate(0); f.write(sf1) | |
main() | |
print(indices) | |
EOF | |
python shuf_*.py lopcodes.h lopcodes.c # 注意 tm.h 的 TM_OP 与 OP_ADD 的顺序未改好, metatable 会有问题,勉强能测试。 | |
make luac | |
./luac -s -o l1.luac opcode.lua | |
python opcode_map.py l1.luac l0.luac |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name="lua-5.3.6.tar.gz" | |
if [ $(basename $PWD) != src ]; then | |
curl -O http://www.lua.org/ftp/${name} | |
tar xf ${name} &&cd $(basename $name .tar.gz)/src/ | |
make luac lua | |
curl -O https://raw.githubusercontent.com/fengberd/xLuaDumper/master/xLuaDumper/opcode.lua | |
fi | |
cat>shuf_opcode.py <<EOF | |
EOF # TODO: use cp $(basname $0) | |
cat>dump_opcodetab.py <<EOF | |
EOF | |
if [ ];then | |
./luac -s -o l0.luac opcode.lua | |
python shuf_opcode.py lopcodes.h lopcodes.c | |
make luac | |
./luac -s -o l1.luac opcode.lua | |
python dump_opcodetab.py l1.luac l0.luac | |
fi |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from re import compile as Re | |
from random import shuffle | |
from functools import reduce | |
def flatten(vxs): | |
for vx in vxs: | |
if hasattr(vx, "__iter__"): | |
for x in iter(vx): yield x | |
else: yield vx | |
def randomIndicesWithin(r, n): | |
xs = [i for i in range(n) if i not in r]; xs.append(r); shuffle(xs) | |
return flatten(xs) | |
fileProc = { | |
"lopcodes.c": (",\n", Re(r'''(?s)(?<== ){(.*?)}'''), ",opmode => ,\\nopmode"), | |
"lopcodes.h": (",", Re(r'''(?s)(?<=typedef enum ){(.*?)}'''), r'''(?s)/\*(.*?)\*/ => ''') | |
} | |
def reSubstFor(srule): | |
row = lambda m: (Re(m[0]), m[1]) | |
rules = [row(ln.split(" => ", 1)) for ln in srule.split("\n") if not ln.isspace()] | |
return lambda s: reduce(lambda acc, r: r[0].sub(r[1], acc), rules, s) | |
def multiShuffler(indices, p_filt_noshuf, p_noerr): | |
def shuf(xs): | |
if len(indices)==0: | |
irKeep = tuple(filterIndex(p_filt_noshuf, xs)) | |
indices.extend(randomIndicesWithin(range(*irKeep), len(xs)) ) | |
assert(len(xs) == len(indices) or p_noerr(xs)) | |
return [xs[i] for i in indices]+xs[len(indices):] | |
return shuf | |
def commaMap(op, s, sep=","): return sep.join(op(s.split(sep))) | |
def filterIndex(p, xs): return (i for i,x in enumerate(xs) if p(x)) | |
def also(op, x): op(x); return x | |
import sys | |
def eprint(s): return print(s, file=sys.stderr) | |
def replaceShufOn(f, shuf, cfg): | |
(sep, re, rules) = cfg | |
pre = reSubstFor(rules) | |
s1 = re.sub(lambda m: "{\n%s\n}"%also(eprint, commaMap(shuf, m[1], sep).strip()), pre(f.read()), 2) | |
f.truncate(0); f.write(s1) | |
def main(args): | |
deftProc = fileProc["lopcodes.c"] | |
ishuff = [] | |
shuf = multiShuffler(ishuff, Re("\s*OP_(ADD|NOT)").match, lambda xs:xs[-1].strip()=="NULL") | |
for fp in args: | |
with open(fp, "r+") as f: | |
eprint(fp) | |
replaceShufOn(f, shuf, fileProc.get(fp) or deftProc) | |
print(ishuff) | |
from sys import argv; main(argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
关于 randIdxWithin ,我觉得切三块处理,再 randomize 拼接或许能实现 zerocopy ,后来发现三个的无序化本身就不能 0copy ,所以最好的方法大概是直接用 flatten 吧