1 2 3 sudo apt install lld-12 sudo ln -s /lib/llvm-9/bin/llc /bin/llc sudo ln -s /lib/llvm-9/bin/opt /bin/opt
官方文档:LLVM Language Reference Manual — LLVM 16.0.0git documentation
1 clang -Xclang -ast-dump -fsyntax-only test.c
生成 AST
-S Only run preprocess and compilation steps
-emit-llvm Use the LLVM representation for assembler and object files
-c Only run preprocess, compile, and assemble steps (生成字节码的 bc 文件)
1 clang -S -emit-llvm test.c
生成的 ir 中
1 2 3 4 5 define dso_local i32 @main() #0 { %1 = alloca i32, align 4 store i32 0 , i32* %1 , align 4 ret i32 0 }
如果开启优化
1 clang -S -emit-llvm -O3 test.c
会直接变成
1 2 3 define dso_local i32 @main() local_unnamed_addr #0 { ret i32 0 }
然后用 llc 生成汇编
ll 到 bc 文件可以用 llvm-as
反过来用 llvm-dis
注意,ll 和 bc 和内存中的形式是等价的。
dso_local
是一个 Runtime Preemption 说明符,表明该函数会在同一个链接单元(即该函数所在的文件以及包含的头文件)内解析符号。
对于
1 2 3 4 5 6 7 8 9 10 11 int foo (int first, int second) { return first + second; } int a = 5 ;int main () { int b = 4 ; return foo(a, b); }
生成
alloca 就是在栈中分配空间
先把传入的值放入栈中再拿出来(嫩麻烦
nsw : no signed wrap
所有的全局变量都以 @ 为前缀
这里 #0
与之后的 attributes #0
相对应
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 ; ModuleID = 'main.c' source_filename = "main.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-linux-gnu" @a = dso_local global i32 5 , align 4 ; Function Attrs: noinline nounwind optnone uwtable define dso_local i32 @foo(i32, i32) #0 { %3 = alloca i32, align 4 %4 = alloca i32, align 4 store i32 %0 , i32* %3 , align 4 store i32 %1 , i32* %4 , align 4 %5 = load i32, i32* %3 , align 4 %6 = load i32, i32* %4 , align 4 %7 = add nsw i32 %5 , %6 ret i32 %7 } ; Function Attrs: noinline nounwind optnone uwtable define dso_local i32 @main() #0 { %1 = alloca i32, align 4 %2 = alloca i32, align 4 store i32 0 , i32* %1 , align 4 store i32 4 , i32* %2 , align 4 %3 = load i32, i32* @a, align 4 %4 = load i32, i32* %2 , align 4 %5 = call i32 @foo(i32 %3 , i32 %4 ) ret i32 %5 } attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math" ="false" "disable-tail-calls" ="false" "less-precise-fpmad" ="false" "min-legal-vector-width" ="0" "no-frame-pointer-elim" ="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math" ="false" "no-jump-tables" ="false" "no-nans-fp-math" ="false" "no-signed-zeros-fp-math" ="false" "no-trapping-math" ="false" "stack-protector-buffer-size" ="8" "target-cpu" ="x86-64" "target-features" ="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math" ="false" "use-soft-float" ="false" } !llvm.module.flags = !{!0 } !llvm.ident = !{!1 } !0 = !{i32 1 , !"wchar_size" , i32 4 } !1 = !{!"clang version 9.0.1-12 " }
对于库外函数 要使用就需要 declare 这个函数的签名
# 条件分支
1 2 3 4 5 6 7 8 9 10 11 12 13 int main () { int a = getint(); int b = getint(); int c = 0 ; if (a == b) { c = 5 ; } else { c = 10 ; } putint(c); return 0 ; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 define dso_local i32 @main() #0 { %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 %4 = alloca i32, align 4 store i32 0 , i32* %1 , align 4 %5 = call i32 (...) @getint() store i32 %5 , i32* %2 , align 4 %6 = call i32 (...) @getint() store i32 %6 , i32* %3 , align 4 store i32 0 , i32* %4 , align 4 %7 = load i32, i32* %2 , align 4 %8 = load i32, i32* %3 , align 4 %9 = icmp eq i32 %7 , %8 br i1 %9 , label %10 , label %11 10 : ; preds = %0 store i32 5 , i32* %4 , align 4 br label %12 11 : ; preds = %0 store i32 10 , i32* %4 , align 4 br label %12 12 : ; preds = %11 , %10 %13 = load i32, i32* %4 , align 4 %14 = call i32 (i32, ...) bitcast (i32 (...)* @putint to i32 (i32, ...)*)(i32 %13 ) ret i32 0 }
语法 br + 标志位 + truelabel + falselabel
# cfg 图
1 2 3 4 5 6 7 8 9 10 int max (int a, int b) { if (a > b) { return a; } else { return b; } } int main () { return max(1 ,2 ); }
opt -dot-cfg test.ll
生成
为了可视化
1 sudo apt-get install -y graphviz-doc libgraphviz-dev graphviz
1 dot .max.dot -Tpng -o max.png
如果在 clang 的时候用 O3 编译
这里就变成了选择函数
1 2 3 4 5 define dso_local i32 @max(i32, i32) local_unnamed_addr #0 { %3 = icmp sgt i32 %0 , %1 %4 = select i1 %3 , i32 %0 , i32 %1 ret i32 %4 }
# SSA & phi node
这个概念在 ghidra,南大静态分析,各种文章里都看过了。。
SSA form enables and simplifies a vast number of compiler optimizations, and is the de-facto standard for intermediate representations in compilers of imperative programming languages.
看这个就行 SSA Explained
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 define i32 @max(i32 %a, i32 %b) { entry: %0 = icmp sgt i32 %a, %b br i1 %0 , label %btrue, label %bfalse btrue: ; preds = %2 br label %end bfalse: ; preds = %2 br label %end end: ; preds = %btrue, %bfalse %retval = phi i32 [%a, %btrue], [%b, %bfalse] ret i32 %retval }
依据控制流分支选择变量。
1 llc -O0 -filetype=asm test.ll
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 # %bb.0 : # %entry cmpl %esi, %edi movl %edi, -4 (%rsp) # 4 -byte Spill movl %esi, -8 (%rsp) # 4 -byte Spill jle .LBB0_2 # %bb.1 : # %btrue movl -4 (%rsp), %eax # 4 -byte Reload movl %eax, -12 (%rsp) # 4 -byte Spill jmp .LBB0_3 .LBB0_2: # %bfalse movl -8 (%rsp), %eax # 4 -byte Reload movl %eax, -12 (%rsp) # 4 -byte Spill jmp .LBB0_3 .LBB0_3: # %end movl -12 (%rsp), %eax # 4 -byte Reload retq
汇编里是实现就是每一条分支都往 -12(%rsp)
上放置数据,然后 end 分支读取。