llvm-init

1
2
3
sudo apt install lld-12
sudo ln -s /lib/llvm-9/bin/llc /bin/llc
sudo ln -s /lib/llvm-9/bin/opt /bin/opt

官方文档:LLVM Language Reference Manual — LLVM 16.0.0git documentation

1
clang -Xclang -ast-dump -fsyntax-only test.c

生成 AST

  • -S Only run preprocess and compilation steps
  • -emit-llvm Use the LLVM representation for assembler and object files
  • -c Only run preprocess, compile, and assemble steps (生成字节码的 bc 文件)
1
clang -S -emit-llvm test.c 

生成的 ir 中

1
2
3
4
5
define dso_local i32 @main() #0 {
%1 = alloca i32, align 4
store i32 0, i32* %1, align 4
ret i32 0
}

如果开启优化

1
clang -S -emit-llvm -O3 test.c

会直接变成

1
2
3
define dso_local i32 @main() local_unnamed_addr #0 {
ret i32 0
}

然后用 llc 生成汇编

1
llc test.ll

ll 到 bc 文件可以用 llvm-as
反过来用 llvm-dis
注意,ll 和 bc 和内存中的形式是等价的。

dso_local 是一个 Runtime Preemption 说明符,表明该函数会在同一个链接单元(即该函数所在的文件以及包含的头文件)内解析符号。

对于

1
2
3
4
5
6
7
8
9
10
11
// main.c
int foo(int first, int second) {
return first + second;
}

int a = 5;

int main() {
int b = 4;
return foo(a, b);
}

生成

  • alloca 就是在栈中分配空间
  • 先把传入的值放入栈中再拿出来(嫩麻烦
  • nsw : no signed wrap
  • 所有的全局变量都以 @ 为前缀
  • 这里 #0 与之后的 attributes #0 相对应
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
; ModuleID = 'main.c'
source_filename = "main.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

@a = dso_local global i32 5, align 4

; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @foo(i32, i32) #0 {
%3 = alloca i32, align 4
%4 = alloca i32, align 4
store i32 %0, i32* %3, align 4
store i32 %1, i32* %4, align 4
%5 = load i32, i32* %3, align 4
%6 = load i32, i32* %4, align 4
%7 = add nsw i32 %5, %6
ret i32 %7
}

; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @main() #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
store i32 0, i32* %1, align 4
store i32 4, i32* %2, align 4
%3 = load i32, i32* @a, align 4
%4 = load i32, i32* %2, align 4
%5 = call i32 @foo(i32 %3, i32 %4)
ret i32 %5
}

attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 9.0.1-12 "}

对于库外函数 要使用就需要 declare 这个函数的签名

1
declare i32 @getint()

# 条件分支

1
2
3
4
5
6
7
8
9
10
11
12
13
//if.c
int main() {
int a = getint();
int b = getint();
int c = 0;
if (a == b) {
c = 5;
} else {
c = 10;
}
putint(c);
return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
define dso_local i32 @main() #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%3 = alloca i32, align 4
%4 = alloca i32, align 4
store i32 0, i32* %1, align 4
%5 = call i32 (...) @getint()
store i32 %5, i32* %2, align 4
%6 = call i32 (...) @getint()
store i32 %6, i32* %3, align 4
store i32 0, i32* %4, align 4
%7 = load i32, i32* %2, align 4
%8 = load i32, i32* %3, align 4
%9 = icmp eq i32 %7, %8
br i1 %9, label %10, label %11

10: ; preds = %0
store i32 5, i32* %4, align 4
br label %12

11: ; preds = %0
store i32 10, i32* %4, align 4
br label %12

12: ; preds = %11, %10
%13 = load i32, i32* %4, align 4
%14 = call i32 (i32, ...) bitcast (i32 (...)* @putint to i32 (i32, ...)*)(i32 %13)
ret i32 0
}

语法 br + 标志位 + truelabel + falselabel

# cfg 图

1
2
3
4
5
6
7
8
9
10
int max(int a, int b) {
if (a > b) {
return a;
} else {
return b;
}
}
int main(){
return max(1,2);
}

opt -dot-cfg test.ll 生成

为了可视化

1
sudo apt-get install -y graphviz-doc libgraphviz-dev graphviz
1
dot .max.dot  -Tpng -o max.png

image-20221018103046162
如果在 clang 的时候用 O3 编译
这里就变成了选择函数

1
2
3
4
5
define dso_local i32 @max(i32, i32) local_unnamed_addr #0 {
%3 = icmp sgt i32 %0, %1
%4 = select i1 %3, i32 %0, i32 %1
ret i32 %4
}

# SSA & phi node

这个概念在 ghidra,南大静态分析,各种文章里都看过了。。
SSA form enables and simplifies a vast number of compiler optimizations, and is the de-facto standard for intermediate representations in compilers of imperative programming languages.
看这个就行 SSA Explained

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
define i32 @max(i32 %a, i32 %b) {
entry:
%0 = icmp sgt i32 %a, %b
br i1 %0, label %btrue, label %bfalse

btrue: ; preds = %2
br label %end

bfalse: ; preds = %2
br label %end

end: ; preds = %btrue, %bfalse
%retval = phi i32 [%a, %btrue], [%b, %bfalse]
ret i32 %retval
}

依据控制流分支选择变量。

1
llc -O0 -filetype=asm test.ll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# %bb.0:                                # %entry
cmpl %esi, %edi
movl %edi, -4(%rsp) # 4-byte Spill
movl %esi, -8(%rsp) # 4-byte Spill
jle .LBB0_2
# %bb.1: # %btrue
movl -4(%rsp), %eax # 4-byte Reload
movl %eax, -12(%rsp) # 4-byte Spill
jmp .LBB0_3
.LBB0_2: # %bfalse
movl -8(%rsp), %eax # 4-byte Reload
movl %eax, -12(%rsp) # 4-byte Spill
jmp .LBB0_3
.LBB0_3: # %end
movl -12(%rsp), %eax # 4-byte Reload
retq

汇编里是实现就是每一条分支都往 -12(%rsp) 上放置数据,然后 end 分支读取。