Observation: Compile some naive demo and run you can find each character in the input of dice_macro::dice!{};
macro was 1-to-1 mapped to some integer arrays. So just enumerate all printable characters and figure out the flag byte by byte.
EXP:
'''
$ cat rust-toolchain
stable-2023-01-10
$ cat ./compile_run.sh
rustc ./src/main.rs --extern dice_macro=../libdice_macro.so -o dice
'''
enc = [
1,
3,
2,
1,
1,
1,
3,
1,
1,
1,
1,
2,
2,
1,
1,
1,
1,
2,
2,
1,
3,
1,
1,
1,
5,
1,
3,
2,
1,
1,
2,
2,
2,
2,
1,
3,
1,
2,
1,
2,
2,
2,
2,
1,
3,
2,
1,
1,
2,
2,
1,
1,
1,
1,
2,
2,
3,
1,
1,
2,
1,
3,
1,
1,
2,
2,
3,
1,
1,
1,
5,
1,
3,
1,
3,
2,
2,
3,
1,
1,
3,
1,
1,
2,
1,
2,
1,
1,
3,
1,
1,
1,
5,
2,
2,
1,
1,
2,
1,
1,
1,
5,
1,
2,
1,
2,
1,
1,
2,
2,
1,
1,
2,
1,
2,
3,
2,
1,
3,
2,
1,
1,
2,
2,
4,
1,
1,
1,
5,
1,
3,
1,
1,
2,
1,
3,
1,
3,
2,
2,
3,
1,
2,
2,
1,
1,
1,
1,
1,
3,
1,
1,
2,
]
import os
import subprocess
import string
from tqdm import tqdm
# charset = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ '
charset = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
dec_table = {}
for i,c in tqdm(enumerate(charset)):
code = 'extern crate dice_macro;\n\n'+\
'fn main() {\n' + \
' let tmp = dice_macro::dice!{A'+c+'};\n' + \
' println!("{:#?}", tmp);\n' + \
'}'
with open("src/main.rs", "w") as f:
f.write(code)
# break
subprocess.run("./compile_run.sh")
with open("result.log", "r") as f:
result = f.readlines()
result = eval(''.join(result))
dec_table[c] = result[4:]
reverse_dec_table = {}
for k,v in dec_table.items():
rk = ''.join([str(x) for x in v])
reverse_dec_table[rk]=k
def matches(seg):
if len(seg) >= 3:
k = ''.join([str(x) for x in seg[:3]])
if k in reverse_dec_table:
return reverse_dec_table[k], 3
if len(seg) >= 4:
k = ''.join([str(x) for x in seg[:4]])
if k in reverse_dec_table:
return reverse_dec_table[k], 4
if len(seg) >= 5:
k = ''.join([str(x) for x in seg[:5]])
if k in reverse_dec_table:
return reverse_dec_table[k], 5
if len(seg) >= 6:
k = ''.join([str(x) for x in seg[:6]])
if k in reverse_dec_table:
return reverse_dec_table[k], 6
if len(seg) >= 7:
k = ''.join([str(x) for x in seg[:7]])
if k in reverse_dec_table:
return reverse_dec_table[k], 7
if len(seg) >= 8:
k = ''.join([str(x) for x in seg[:8]])
if k in reverse_dec_table:
return reverse_dec_table[k], 8
return "", 0
idx = 0
flag = ""
while idx < len(enc):
c, step = matches(enc[idx:])
if c != "":
flag+=c
idx += step
print(f"{c} {step}")
else:
print(f"{c} {idx}")
break
print(flag)
# ru57_r3v3r51ng_w1th_4_m4cr0_tw15t
GNU C++ reversing.
In the main
function it just reads some integers from the input file and stored them in a vec<int>
. Then it starts n
threads to compute the output.
The interesting function is sub_336C
(renamed as thread_func_real
), where the calculations were performed on the vec<int>
. The sync of multiple threads was managed in sub_2CB0
(renamed as sem_wait_post
), using two semaphore
which were initialized in sub_2C1E
(renamed as init_sem_obj
):
union sem_t
{
char __size[32];
__int64 __align;
};
struct object1
{
int working_thread;
int total_thread;
char some_data[40];
sem_t sem1;
sem_t sem2;
};
Take a carefully look at thread_func_real
and sem_wait_post
, you can see that it parallelized the operations for disjoint segments of the vec<int>
.
struct args
{
vector *vec_1;
void *funcs;
int *cnt;
object1 *obj1;
};
char __fastcall thread_func_real(args *a1, double func_idx)
{
__int64 (__fastcall *v2)(_QWORD, __int64); // r12
__int64 v3; // rbx
_QWORD *ref_by_index; // rax
__int64 v5; // rbx
unsigned __int64 v6; // rbx
char result; // al
__int64 (__fastcall *v8)(_QWORD, __int64); // r12
__int64 v9; // rbx
_QWORD *v10; // rax
__int64 v11; // rbx
unsigned __int64 v12; // rbx
unsigned __int64 idx1; // [rsp+40h] [rbp-30h]
__int64 idx2; // [rsp+50h] [rbp-20h]
unsigned int i; // [rsp+5Ch] [rbp-14h]
for ( i = 1; ; i *= 2 )
{
result = (unsigned __int64)len_of_vec(&a1->vec_1->_M_start) >> 1 >= i;
if ( !result )
break;
while ( 1 )
{
v6 = 2 * i * (unsigned int)deref((unsigned int *)a1->cnt) - 1;
if ( v6 >= len_of_vec(&a1->vec_1->_M_start) )
break;
idx1 = 2 * i * _InterlockedExchangeAdd(a1->cnt, 1u) - 1;
if ( idx1 < len_of_vec(&a1->vec_1->_M_start) )
{
log2_0(i);
func_idx = round(func_idx);
v2 = (__int64 (__fastcall *)(_QWORD, __int64))*((_QWORD *)a1->funcs + (int)func_idx % 3);
v3 = *(_QWORD *)get_ref_by_index(&a1->vec_1->_M_start, idx1 - i);
ref_by_index = (_QWORD *)get_ref_by_index(&a1->vec_1->_M_start, idx1);
v5 = v2(*ref_by_index, v3);
*(_QWORD *)get_ref_by_index(&a1->vec_1->_M_start, idx1) = v5;
}
}
sem_wait_post(a1->obj1);
sub_3E48(a1->cnt, 1u);
sem_wait_post(a1->obj1);
}
while ( i )
{
while ( 1 )
{
v12 = 2 * i * (unsigned int)deref((unsigned int *)a1->cnt) - 1;
if ( v12 >= len_of_vec(&a1->vec_1->_M_start) )
break;
idx2 = 2 * i * _InterlockedExchangeAdd(a1->cnt, 1u) - 1;
if ( (unsigned __int64)i + idx2 < len_of_vec(&a1->vec_1->_M_start) )
{
log2_0(i);
func_idx = round(func_idx);
v8 = (__int64 (__fastcall *)(_QWORD, __int64))*((_QWORD *)a1->funcs + (int)func_idx % 3);
v9 = *(_QWORD *)get_ref_by_index(&a1->vec_1->_M_start, i + idx2);
v10 = (_QWORD *)get_ref_by_index(&a1->vec_1->_M_start, idx2);
v11 = v8(*v10, v9);
*(_QWORD *)get_ref_by_index(&a1->vec_1->_M_start, i + idx2) = v11;
}
}
sem_wait_post(a1->obj1);
i >>= 1;
sub_3E48(a1->cnt, 1u);
result = sem_wait_post(a1->obj1);
}
return result;
}
And the actual algorithm is realative naive and every operation can be reversed so just write the whole thing backwards. Note that the number of threads determines the random seed (in sub_3CD2
), which affects the order of 3 operation functions, so we have to guess the correct permutation. Luckily that's 6 tries at most.
EXP:
import math
enc = [
100,
13,
110,
19,
104,
30,
42,
1539,
1591,
1544,
1593,
136971,
137063,
137022,
137038,
5230,
5131,
5233,
5184,
397480,
397559,
397524,
397495,
12938,
13028,
12967,
13048,
892722,
892674,
892788,
892703,
19864,
19965,
19867,
19965,
357552,
357587,
357562,
357599,
19682,
19606,
19725,
19823,
1299012,
1298992,
1299055,
1298972,
24582,
24630,
24653,
24624
]
def add_inv(idx1, idx2):
if idx1 > idx2:
idx1, idx2 = idx2, idx1
t1 = enc[idx1]
t2 = enc[idx2]
t2 -= t1
enc[idx2] = t2
def mul_inv(idx1, idx2):
if idx1 > idx2:
idx1, idx2 = idx2, idx1
t1 = enc[idx1]
t2 = enc[idx2]
# if (t2 // t1) * t1 != t2:
# raise
t2 = t2 // t1
enc[idx2] = t2
def idx_inv(idx1, idx2):
if idx1 > idx2:
idx1, idx2 = idx2, idx1
t1 = enc[idx1]
t2 = enc[idx2]
t2 = t2 ^ t1
enc[idx2] = t2
# funcs = [add_inv, mul_inv, idx_inv]
# funcs = [add_inv, idx_inv, mul_inv]
# funcs = [mul_inv, add_inv, idx_inv]
# funcs = [mul_inv, idx_inv, add_inv]
# funcs = [idx_inv, mul_inv, add_inv]
funcs = [idx_inv, add_inv, mul_inv]
# last half
i = 1
while i <= len(enc) // 2:
cnt = 1
while 2 * i * cnt - 1 < len(enc):
idx = 2 * i * cnt - 1
cnt += 1
if idx + i < len(enc):
choice = round(math.log2(i))
func = funcs[choice%3]
func(idx, idx + i)
i = i << 1
# front half
while i >= 1:
cnt = 1
while 2 * i * cnt - 1 < len(enc):
idx = 2 * i * cnt - 1
cnt += 1
if idx < len(enc):
choice = round(math.log2(i))
func = funcs[choice%3]
func(idx - i, idx)
i = i >> 1
print(enc)
flag = ""
for i in enc:
flag += chr(i&0xff)
print(flag)