unicorn startup

目的是优化目标程序的 fibonacci 数列 从而快速解密

1
2
3
4
5
6
7
(gdb) shell cat /proc/`pidof fibonacci`/maps
00400000-00401000 r-xp 00000000 08:05 23233010 /home/squ/prac/unicore-prac/fibonacci
00600000-00602000 rw-p 00000000 08:05 23233010 /home/squ/prac/unicore-prac/fibonacci
7ffff7fc9000-7ffff7fcd000 r--p 00000000 00:00 0 [vvar]
7ffff7fcd000-7ffff7fcf000 r-xp 00000000 00:00 0 [vdso]
7ffff7fcf000-7ffff7fd0000 r--p 00000000 08:05 49939588 /usr/lib/x86_64-linux-gnu/ld-2.31.so
7ffff7fd0000-7ffff7ff3000 r-xp 00001000 08:05 49939588 /usr/lib/x86_64-linux-gnu/ld-2.31.so

先确定程序基址是 0x00400000

用 uc 的 map 去映射一些内存 增加 hook for debugging
然后开跑

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from unicorn import *
from unicorn.x86_const import *
from loguru import logger
from pathlib import Path
import struct

def read(f : Path) -> bytes:
with open(f, "rb") as file:
return file.read()

def u32(data : str):
return struct.unpack("I", data)[0] # I repr that unsigned int

def p32(data : str):
return struct.pack("I", data)

# initialize Unicorn engine
# first - main architecture branch. The constant starts with UC_ARCH_
# second - further architecture specification. The constant starts with UC_MODE_
mu = Uc (UC_ARCH_X86, UC_MODE_64)

BASE_ADDR = 0x400000
BASE_SIZE = 0x100000 # TODO: why don't need more?
STACK_ADDR = 0x0
STACK_SIZE = 0x100000

mu.mem_map(BASE_ADDR, BASE_SIZE)
mu.mem_map(STACK_ADDR, STACK_SIZE)

res = read("./fibonacci") # TODO:
logger.debug(type(res))
mu.mem_write(BASE_ADDR ,res)
mu.reg_write(UC_X86_REG_RSP, STACK_ADDR + STACK_SIZE - 1)

main_start = 0x4004E0
main_end = 0x400582

def hook_code(uc, addr, size, data):
logger.info(">>> Tracing instruction at 0x%x, instruction size = 0x%x" %(addr, size))
# tracing all instructions with customized callback
# add here for debugging
mu.hook_add(UC_HOOK_CODE, hook_code)

mu.emu_start(main_start, main_end)

报错

1
2
3
4
5
6
7
2023-03-15 20:11:26.366 | INFO     | __main__:hook_code:39 - >>> Tracing instruction at 0x4004ef, instruction size = 0x7
Traceback (most recent call last):
File "exp.py", line 44, in <module>
mu.emu_start(main_start, main_end)
File "/home/squ/.local/lib/python3.8/site-packages/unicorn/unicorn.py", line 547, in emu_start
raise UcError(status)
unicorn.unicorn.UcError: Invalid memory read (UC_ERR_READ_UNMAPPED)
1
2
3
4
5
6
7
8
9
.text:00000000004004E0 ; __unwind {
.text:00000000004004E0 push rbp
.text:00000000004004E1 push rbx
.text:00000000004004E2 xor esi, esi ; buf
.text:00000000004004E4 mov ebp, offset unk_4007E1
.text:00000000004004E9 xor ebx, ebx
.text:00000000004004EB sub rsp, 18h
.text:00000000004004EF mov rdi, cs:stdout ; stream <<-
.text:00000000004004F6 call _setbuf
1
2
.bss:0000000000601038 stdout          dq ?                    ; DATA XREF: LOAD:0000000000400350↑o
.bss:0000000000601038 ; main+F↑r ...

所以是映射太少了

但是我们不需要 glibc 对于所有的 glibc 模拟都可以跳过

1
2
3
4
5
6
.text:00000000004004EF                 mov     rdi, cs:stdout  ; stream
.text:00000000004004F6 call _setbuf

.text:0000000000400502 call _printf

.text:000000000040054F mov rsi, cs:stdout ; fp

为了打印 flag 出来 也需要对特定的寄存器去处理

1
2
3
.text:0000000000400558                 movsx   edi, dil        ; c
.text:000000000040055C add rbp, 1
.text:0000000000400560 call __IO_putc

# re

这里用了一点混淆来干扰 ida

1
2
3
4
5
.text:00000000004004E2                 xor     esi, esi        ; buf
.text:00000000004004E4 mov ebp, offset unk_4007E1
.text:00000000004004E9 xor ebx, ebx

v3 = (char *)&unk_4007E1;
1
2
3
4
5
6
7
8
9
10
.text:000000000040054F                 mov     rsi, cs:stdout  ; fp
.text:0000000000400556 jz short loc_400570
.text:0000000000400558 movsx edi, dil ; c
.text:000000000040055C add rbp, 1
.text:0000000000400560 call __IO_putc
.text:0000000000400565 movzx r9d, byte ptr [rbp-1]
.text:000000000040056A jmp short loc_400510


_IO_putc(v6 ^ (unsigned __int8)(LOBYTE(v10[0]) << v8), stdout);

rbp 就是指向加密的 flag 的指针

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42

undefined8 main(void)

{
int base;
byte *enc_flag_ptr;
uint result;
uint enc_flag_char;
long idx;
uint flag_char_copy;
int ptr [3];
byte idx_copy;

base = 0;
setbuf(stdout,(char *)0x0);
printf("The flag is: ");
enc_flag_char = 0x49;
enc_flag_ptr = &encrypted_flag;

while( true ) {

idx = 0;
do {
flag_char_copy = enc_flag_char;
ptr[0] = 0;
fibonacci(base + (int)idx,ptr);
idx_copy = (byte)idx;
idx = idx + 1;
result = ptr[0] << (idx_copy & 0x1f);
enc_flag_char = result ^ flag_char_copy;
} while (idx != 8);

base = base + 8;
if ((char)result == (char)flag_char_copy) break;
_IO_putc((int)(char)enc_flag_char,stdout);
enc_flag_char = (uint)*enc_flag_ptr;
enc_flag_ptr = enc_flag_ptr + 1;
}
_IO_putc(10,stdout);
return 0;
}

ghidra 这里更友好点

每轮里 do8 次 fibonacci 的第二个参数是用作计算的指针
这里计算结果是给到 ptr 的 而 ptr 的结果又是根据 fibonacci 的结果来的 所以可以缓存起来
fibonacci 的结果和 ptr 貌似没关系 但实际不然 当 fibonacci 的 N 为一个固定值的时候 ptr 指向的位置可以有不同的值 而新的 ptr 内容也需要 ptr 本身的内容

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
__int64 __fastcall fibonacci(int n, _DWORD *ptr)
{
int v3; // er12
__int64 result; // rax
unsigned int v5; // esi
unsigned int v6; // esi

if ( n )
{
if ( n == 1 )
{
result = fibonacci(0, ptr);
}
else
{
v3 = fibonacci(n - 2, ptr);
result = v3 + (unsigned int)fibonacci(n - 1, ptr);// F(N) = F(N-1) + F(N-2) (N >= 2)
}
v5 = (((unsigned int)result - (((unsigned int)result >> 1) & 0x55555555)) >> 2) & 0x33333333;
v6 = v5
+ ((result - (((unsigned int)result >> 1) & 0x55555555)) & 0x33333333)
+ ((v5 + (((_DWORD)result - (((unsigned int)result >> 1) & 0x55555555)) & 0x33333333)) >> 4);
*ptr ^= ((BYTE1(v6) & 0xF) + (v6 & 0xF) + (unsigned __int8)((((v6 >> 8) & 0xF0F0F) + (v6 & 0xF0F0F0F)) >> 16)) & 1;
}
else // a1 = 0
{
*ptr ^= 1u;
result = 1LL;
}
return result;
}

总结:

  • 进入函数时 rdi 可见 rsi 可见 rsi 指向的内容可见
  • 返回的时候 rsi 和 rdi 和 rsi 的内容已经改变了(会用到这些寄存器) 我们需要 缓存新的 rax 和老 rsi 现在的内容
    • 所以在程序入口的时候要保存 rdi rsi (为了取出内容) rsi 的内容 (为了做索引)
  • 退出函数的时候 拿到 rax 和老的 rdi rsi rsi 指向的内容 用 (rdi, rsi 内容) 做索引 缓存掉返回值 rax 和新的内容

总感觉有一种状态机的感觉 但是我微薄的数理基础描绘不出来

# exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from unicorn import *
from unicorn.x86_const import *
from loguru import logger
from pathlib import Path
from typing import Dict, Tuple

def read(f : Path) -> bytes:
with open(f, "rb") as file:
return file.read()

# initialize Unicorn engine
# first - main architecture branch. The constant starts with UC_ARCH_
# second - further architecture specification. The constant starts with UC_MODE_
mu = Uc (UC_ARCH_X86, UC_MODE_64)

BASE_ADDR = 0x400000
BASE_SIZE = 0x300000 # .bss .data .txt all load in
STACK_ADDR = 0x0
STACK_SIZE = 0x100000

mu.mem_map(BASE_ADDR, BASE_SIZE)
mu.mem_map(STACK_ADDR, STACK_SIZE)

mu.mem_write(BASE_ADDR ,read("./fibonacci"))
mu.reg_write(UC_X86_REG_RSP, STACK_ADDR + STACK_SIZE - 1)

main_start = 0x4004E0
main_end = 0x400582

skip_instructions = [
0x4004F6, # call _setbuf
0x400502, # call _printf
0x400575, # call __IO_putc
]

__flag_putc = 0x400560
fibonacci_entry = 0x400670
fibonacci_ends = [0x4006F1, 0x400709]
ret_of_fibonacci = fibonacci_ends[1]
stack = []
cache : Dict[Tuple[int, int], Tuple[int, bytes]] = {}

def hook_code(uc : Uc, addr, size, data):
# logger.info(">>> Tracing instruction at 0x%x, instruction size = 0x%x" %(addr, size))
if addr in skip_instructions:
uc.reg_write(UC_X86_REG_RIP, addr + size)

if addr == __flag_putc:
c = uc.reg_read(UC_X86_REG_RDI)
print(chr(int(c & 0xff)), end = "")
# print(chr(c), end = "")
uc.reg_write(UC_X86_REG_RIP, addr + size)

if addr == fibonacci_entry:
# logger.info("HIT fibonacci_entry")
rdi = uc.reg_read(UC_X86_REG_RDI)
rsi = uc.reg_read(UC_X86_REG_RSI)
res = bytes(uc.mem_read(rsi, 8))
stack.append((rdi, rsi, res))

if (rdi, res) in cache:
(rax, res) = cache[(rdi, res)]
uc.reg_write(UC_X86_REG_RAX, rax)
uc.mem_write(rsi, res)
uc.reg_write(UC_X86_REG_RIP, ret_of_fibonacci)

elif addr in fibonacci_ends:
# logger.info("HIT fibonacci_ends")
rax : int = uc.reg_read(UC_X86_REG_RAX)
rdi, rsi, res = stack.pop()

new_res = bytes(uc.mem_read(rsi, 8))

cache[(rdi, res)] = (rax, new_res)
# logger.info(f"N = {rdi} {res} cached")

# tracing all instructions with customized callback
# add here for debugging
mu.hook_add(UC_HOOK_CODE, hook_code)

mu.emu_start(main_start, main_end)
print("")

ref : https://eternal.red/2018/unicorn-engine-tutorial/