概述
BCF即虚假控制流,将原程序基本块进行拆分和克隆,利用不透明谓词将克隆的基本块作为不可达分支用来混淆静态代码,达到迷惑攻击者的目的.
这里的不透明谓词是指一个表达式,它的值在执行到某处时,对代码编写者来说是确定的,但是由于某种原因,编译器或者静态分析器无法推断出这个值,只能在运行时确定.
其技术实现过程如下:
虚假控制流后,在IDA中的示例如下:
测试文件
1
2
3
4
5
6
7
8
9
#include <stdlib.h>
int main ( int argc , char ** argv ) {
int a = atoi ( argv [ 1 ]);
if ( a == 0 )
return 1 ;
else
return 10 ;
return 0 ;
}
流程分析
入口函数
bcf的声明在ollvm/include/llvm/Transforms/Obfuscation/BogusControlFlow.h
1
2
Pass * createBogus ();
Pass * createBogus ( bool flag );
bcf的入口函数是runOnFunction,在ollvm/lib/Transforms/Obfuscation/BogusControlFlow.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
virtual bool runOnFunction ( Function & F ){
// Check if the percentage is correct
if ( ObfTimes <= 0 ) {
errs () << "BogusControlFlow application number -bcf_loop=x must be x > 0" ;
return false ;
}
// Check if the number of applications is correct
if ( ! (( ObfProbRate > 0 ) && ( ObfProbRate <= 100 )) ) {
errs () << "BogusControlFlow application basic blocks percentage -bcf_prob=x must be 0 < x <= 100" ;
return false ;
}
std :: vector < BasicBlock *> orginalBBs ;
// check for compatible
for ( BasicBlock & bb : F . getBasicBlockList ()) {
if ( isa < InvokeInst > ( bb . getTerminator ())) {
return false ;
}
}
// If fla annotations
if ( toObfuscate ( flag , & F , "bcf" )) {
bogus ( F );
doF ( * F . getParent ());
return true ;
}
return false ;
} // end of runOnFunction()
ObfTimes是通过以下参数进行传递的,默认是1
1
2
//进行3次bcf混淆
- mllvm - bcf_loop = 3
ObfProbRate是通过以下参数进行传递的,默认是30
1
2
//每个基本块有40%的概率进行bcf混淆
- mllvm - bcf_prob = 40
toObfuscate函数用于判断是否需要进行bcf
虚假控制流构造
bogus函数内部会遍历函数的所有基本块,然后调用addBogusFlow函数进行虚假控制流的添加,其关键逻辑代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
void bogus ( Function & F ) {
//①通过命令行传参(-mllvm -bcf_loop=3)来构建循环体,决定进行多少次bcf混淆
int NumObfTimes = ObfTimes ;
do {
//②保存该函数所有基本块
std :: list < BasicBlock *> basicBlocks ;
for ( Function :: iterator i = F . begin (); i != F . end (); ++ i ) {
basicBlocks . push_back ( &* i );
}
//③遍历所有基本块
while ( ! basicBlocks . empty ()){
//④通过随机产生100以内的数来和命令行传参(-mllvm -bcf_prob=40)进行比较,决定该基本块是否进行bcf混淆
if (( int ) llvm :: cryptoutils -> get_range ( 100 ) <= ObfProbRate ){
//⑤调用addBogusFlow进行虚假控制流的添加
BasicBlock * basicBlock = basicBlocks . front ();
addBogusFlow ( basicBlock , F );
}
// remove the block from the list
basicBlocks . pop_front ();
} // end of while(!basicBlocks.empty())
} while ( -- NumObfTimes > 0 );
}
接下来我们分析下进行bcf混淆的核心函数addBogusFlow
1
virtual void addBogusFlow ( BasicBlock * basicBlock , Function & F )
进行基本块的分割
1
2
3
4
5
6
7
8
//获取基本块中的第一个非PHI、非调试信息、非生命周期传递指令
Instruction * i1 = &* basicBlock -> begin ();
if ( basicBlock -> getFirstNonPHIOrDbgOrLifetime ())
i1 = basicBlock -> getFirstNonPHIOrDbgOrLifetime ();
//调用splitBasicBlock,进行基本块的分割
Twine * var ;
var = new Twine ( "originalBB" );
BasicBlock * originalBB = basicBlock -> splitBasicBlock ( i1 , * var );
假设要处理的基本块IR指令如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
entry :
% retval = alloca i32 , align 4
% argc . addr = alloca i32 , align 4
% argv . addr = alloca i8 ** , align 8
% a = alloca i32 , align 4
store i32 0 , i32 * % retval , align 4
store i32 % argc , i32 * % argc . addr , align 4
store i8 ** % argv , i8 *** % argv . addr , align 8
% 0 = load i8 ** , i8 *** % argv . addr , align 8
% arrayidx = getelementptr inbounds i8 * , i8 ** % 0 , i64 1
% 1 = load i8 * , i8 ** % arrayidx , align 8
% call = call i32 @ atoi ( i8 * % 1 ) #2
store i32 % call , i32 * % a , align 4
% 2 = load i32 , i32 * % a , align 4
% cmp = icmp eq i32 % 2 , 0
br i1 % cmp , label % if . then , label % if . else
经过上述分割后的基本块IR指令如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
entry :
br label % originalBB
originalBB :
% retval = alloca i32 , align 4
% argc . addr = alloca i32 , align 4
% argv . addr = alloca i8 ** , align 8
% a = alloca i32 , align 4
store i32 0 , i32 * % retval , align 4
store i32 % argc , i32 * % argc . addr , align 4
store i8 ** % argv , i8 *** % argv . addr , align 8
% 0 = load i8 ** , i8 *** % argv . addr , align 8
% arrayidx = getelementptr inbounds i8 * , i8 ** % 0 , i64 1
% 1 = load i8 * , i8 ** % arrayidx , align 8
% call = call i32 @ atoi ( i8 * % 1 ) #2
store i32 % call , i32 * % a , align 4
% 2 = load i32 , i32 * % a , align 4
% cmp = icmp eq i32 % 2 , 0
br i1 % cmp , label % if . then , label % if . else
进行基本块的克隆
1
2
3
//仿originalBB进行基本块的克隆
Twine * var3 = new Twine ( "alteredBB" );
BasicBlock * alteredBB = createAlteredBasicBlock ( originalBB , * var3 , & F );
此时的IR指令如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
entry :
br label % originalBB
originalBB :
% retval = alloca i32 , align 4
% argc . addr = alloca i32 , align 4
% argv . addr = alloca i8 ** , align 8
% a = alloca i32 , align 4
store i32 0 , i32 * % retval , align 4
store i32 % argc , i32 * % argc . addr , align 4
store i8 ** % argv , i8 *** % argv . addr , align 8
% 0 = load i8 ** , i8 *** % argv . addr , align 8
% arrayidx = getelementptr inbounds i8 * , i8 ** % 0 , i64 1
% 1 = load i8 * , i8 ** % arrayidx , align 8
% call = call i32 @ atoi ( i8 * % 1 ) #2
store i32 % call , i32 * % a , align 4
% 2 = load i32 , i32 * % a , align 4
% cmp = icmp eq i32 % 2 , 0
br i1 % cmp , label % if . then , label % if . else
originalBBalteredBB :
% retvalalteredBB = alloca i32 , align 4
% argc . addralteredBB = alloca i32 , align 4
% argv . addralteredBB = alloca i8 ** , align 8
% aalteredBB = alloca i32 , align 4
store i32 0 , i32 * % retvalalteredBB , align 4
store i32 % argc , i32 * % argc . addralteredBB , align 4
store i8 ** % argv , i8 *** % argv . addralteredBB , align 8
% 4 = load i8 ** , i8 *** % argv . addralteredBB , align 8
% arrayidxalteredBB = getelementptr inbounds i8 * , i8 ** % 4 , i64 1
% 5 = load i8 * , i8 ** % arrayidxalteredBB , align 8
% callalteredBB = call i32 @ atoi ( i8 * % 5 ) #2
store i32 % callalteredBB , i32 * % aalteredBB , align 4
% 6 = load i32 , i32 * % aalteredBB , align 4
% cmpalteredBB = icmp eq i32 % 6 , 0
br i1 % cmpalteredBB , label % if . then , label % if . else
移除基本块的最后一条指令(通常来说是分支或返回指令),即删除其与后继块的关系
1
2
alteredBB -> getTerminator () -> eraseFromParent ();
basicBlock -> getTerminator () -> eraseFromParent ();
此时的IR指令如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
entry :
originalBB :
% retval = alloca i32 , align 4
% argc . addr = alloca i32 , align 4
% argv . addr = alloca i8 ** , align 8
% a = alloca i32 , align 4
store i32 0 , i32 * % retval , align 4
store i32 % argc , i32 * % argc . addr , align 4
store i8 ** % argv , i8 *** % argv . addr , align 8
% 0 = load i8 ** , i8 *** % argv . addr , align 8
% arrayidx = getelementptr inbounds i8 * , i8 ** % 0 , i64 1
% 1 = load i8 * , i8 ** % arrayidx , align 8
% call = call i32 @ atoi ( i8 * % 1 ) #2
store i32 % call , i32 * % a , align 4
% 2 = load i32 , i32 * % a , align 4
% cmp = icmp eq i32 % 2 , 0
br i1 % cmp , label % if . then , label % if . else
originalBBalteredBB :
% retvalalteredBB = alloca i32 , align 4
% argc . addralteredBB = alloca i32 , align 4
% argv . addralteredBB = alloca i8 ** , align 8
% aalteredBB = alloca i32 , align 4
store i32 0 , i32 * % retvalalteredBB , align 4
store i32 % argc , i32 * % argc . addralteredBB , align 4
store i8 ** % argv , i8 *** % argv . addralteredBB , align 8
% 4 = load i8 ** , i8 *** % argv . addralteredBB , align 8
% arrayidxalteredBB = getelementptr inbounds i8 * , i8 ** % 4 , i64 1
% 5 = load i8 * , i8 ** % arrayidxalteredBB , align 8
% callalteredBB = call i32 @ atoi ( i8 * % 5 ) #2
store i32 % callalteredBB , i32 * % aalteredBB , align 4
% 6 = load i32 , i32 * % aalteredBB , align 4
% cmpalteredBB = icmp eq i32 % 6 , 0
构建一个永真条件
1
2
3
4
5
6
7
Value * LHS = ConstantFP :: get ( Type :: getFloatTy ( F . getContext ()), 1.0 );
Value * RHS = ConstantFP :: get ( Type :: getFloatTy ( F . getContext ()), 1.0 );
DEBUG_WITH_TYPE ( "gen" , errs () << "bcf: Value LHS and RHS created \n " );
//在第一个块即上述entry块处插入永真条件
Twine * var4 = new Twine ( "condition" );
FCmpInst * condition = new FCmpInst ( * basicBlock , FCmpInst :: FCMP_TRUE , LHS , RHS , * var4 );
构建跳转逻辑
1
2
3
4
//在basicBlock即上述entry块处创建分支判断,条件为真则跳转到originalBB处,否则跳转到alteredBB块
BranchInst :: Create ( originalBB , alteredBB , ( Value * ) condition , basicBlock );
//在alteredBB块末尾添加分支指令,无条件跳转到originalBB块
BranchInst :: Create ( originalBB , alteredBB );
此时的IR指令如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
entry :
% condition = fcmp true float 1.000000e+00 , 1.000000e+00
br i1 % condition , label % originalBB , label % originalBBalteredBB
originalBB :
% retval = alloca i32 , align 4
% argc . addr = alloca i32 , align 4
% argv . addr = alloca i8 ** , align 8
% a = alloca i32 , align 4
store i32 0 , i32 * % retval , align 4
store i32 % argc , i32 * % argc . addr , align 4
store i8 ** % argv , i8 *** % argv . addr , align 8
% 0 = load i8 ** , i8 *** % argv . addr , align 8
% arrayidx = getelementptr inbounds i8 * , i8 ** % 0 , i64 1
% 1 = load i8 * , i8 ** % arrayidx , align 8
% call = call i32 @ atoi ( i8 * % 1 ) #2
store i32 % call , i32 * % a , align 4
% 2 = load i32 , i32 * % a , align 4
% cmp = icmp eq i32 % 2 , 0
br i1 % cmp , label % if . then , label % if . else
originalBBalteredBB :
% retvalalteredBB = alloca i32 , align 4
% argc . addralteredBB = alloca i32 , align 4
% argv . addralteredBB = alloca i8 ** , align 8
% aalteredBB = alloca i32 , align 4
store i32 0 , i32 * % retvalalteredBB , align 4
store i32 % argc , i32 * % argc . addralteredBB , align 4
store i8 ** % argv , i8 *** % argv . addralteredBB , align 8
% 4 = load i8 ** , i8 *** % argv . addralteredBB , align 8
% arrayidxalteredBB = getelementptr inbounds i8 * , i8 ** % 4 , i64 1
% 5 = load i8 * , i8 ** % arrayidxalteredBB , align 8
% callalteredBB = call i32 @ atoi ( i8 * % 5 ) #2
store i32 % callalteredBB , i32 * % aalteredBB , align 4
% 6 = load i32 , i32 * % aalteredBB , align 4
% cmpalteredBB = icmp eq i32 % 6 , 0
br label % originalBB
分割originalBB
1
2
3
4
5
BasicBlock :: iterator i = originalBB -> end ();
//以originalBB块的最后一条指令为界进行分割
Twine * var5 = new Twine ( "originalBBpart2" );
BasicBlock * originalBBpart2 = originalBB -> splitBasicBlock ( -- i , * var5 );
originalBB块的原始IR指令如上,经过分割后的IR指令如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
originalBB :
% retval = alloca i32 , align 4
% argc . addr = alloca i32 , align 4
% argv . addr = alloca i8 ** , align 8
% a = alloca i32 , align 4
store i32 0 , i32 * % retval , align 4
store i32 % argc , i32 * % argc . addr , align 4
store i8 ** % argv , i8 *** % argv . addr , align 8
% 0 = load i8 ** , i8 *** % argv . addr , align 8
% arrayidx = getelementptr inbounds i8 * , i8 ** % 0 , i64 1
% 1 = load i8 * , i8 ** % arrayidx , align 8
% call = call i32 @ atoi ( i8 * % 1 ) #2
store i32 % call , i32 * % a , align 4
% 2 = load i32 , i32 * % a , align 4
% cmp = icmp eq i32 % 2 , 0
br label % originalBBpart2
originalBBpart2 :
br i1 % cmp , label % if . then , label % if . else
删除originalBB块经过分割后的最后一条指令,即割断originalBB块和originalBBpart2块的联系
1
originalBB -> getTerminator () -> eraseFromParent ();
在originalBB块后构建永真条件,跳转为真则跳向originalBBpart2块,否则跳向alteredBB块
1
2
3
Twine * var6 = new Twine ( "condition2" );
FCmpInst * condition2 = new FCmpInst ( * originalBB , CmpInst :: FCMP_TRUE , LHS , RHS , * var6 );
BranchInst :: Create ( originalBBpart2 , alteredBB , ( Value * ) condition2 , originalBB );
如上面假设要处理的基本块是entry,经过上述处理后的IR指令如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
entry :
% condition = fcmp true float 1.000000e+00 , 1.000000e+00
br i1 % condition , label % originalBB , label % originalBBalteredBB
originalBB :
% retval = alloca i32 , align 4
% argc . addr = alloca i32 , align 4
% argv . addr = alloca i8 ** , align 8
% a = alloca i32 , align 4
store i32 0 , i32 * % retval , align 4
store i32 % argc , i32 * % argc . addr , align 4
store i8 ** % argv , i8 *** % argv . addr , align 8
% 0 = load i8 ** , i8 *** % argv . addr , align 8
% arrayidx = getelementptr inbounds i8 * , i8 ** % 0 , i64 1
% 1 = load i8 * , i8 ** % arrayidx , align 8
% call = call i32 @ atoi ( i8 * % 1 ) #2
store i32 % call , i32 * % a , align 4
% 2 = load i32 , i32 * % a , align 4
% cmp = icmp eq i32 % 2 , 0
% condition2 = fcmp true float 1.000000e+00 , 1.000000e+00
br i1 % condition2 , label % originalBBpart2 , label % originalBBalteredBB
originalBBpart2 :
br i1 % cmp , label % if . then , label % if . else
originalBBalteredBB :
% retvalalteredBB = alloca i32 , align 4
% argc . addralteredBB = alloca i32 , align 4
% argv . addralteredBB = alloca i8 ** , align 8
% aalteredBB = alloca i32 , align 4
store i32 0 , i32 * % retvalalteredBB , align 4
store i32 % argc , i32 * % argc . addralteredBB , align 4
store i8 ** % argv , i8 *** % argv . addralteredBB , align 8
% 4 = load i8 ** , i8 *** % argv . addralteredBB , align 8
% arrayidxalteredBB = getelementptr inbounds i8 * , i8 ** % 4 , i64 1
% 5 = load i8 * , i8 ** % arrayidxalteredBB , align 8
% callalteredBB = call i32 @ atoi ( i8 * % 5 ) #2
store i32 % callalteredBB , i32 * % aalteredBB , align 4
% 6 = load i32 , i32 * % aalteredBB , align 4
% cmpalteredBB = icmp eq i32 % 6 , 0
br label % originalBB
替换永真表达式
接下来看下最后一个函数doF,其函数声明如下:
这个函数主要是找到当前模块中的永真表达式,并将其替换为如下的表达式.
1
( y < 10 || x * ( x + 1 ) % 2 == 0 )
可以看到这个表达式也是个永真表达式,只是相对复杂一点.
创建全局变量x和y
1
2
3
4
5
6
7
8
9
10
11
Twine * varX = new Twine ( "x" );
Twine * varY = new Twine ( "y" );
Value * x1 = ConstantInt :: get ( Type :: getInt32Ty ( M . getContext ()), 0 , false );
Value * y1 = ConstantInt :: get ( Type :: getInt32Ty ( M . getContext ()), 0 , false );
GlobalVariable * x = new GlobalVariable ( M , Type :: getInt32Ty ( M . getContext ()), false ,
GlobalValue :: CommonLinkage , ( Constant * ) x1 ,
* varX );
GlobalVariable * y = new GlobalVariable ( M , Type :: getInt32Ty ( M . getContext ()), false ,
GlobalValue :: CommonLinkage , ( Constant * ) y1 ,
* varY );
找到表达式中的永真表达式
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
std :: vector < Instruction *> toEdit , toDelete ;
BinaryOperator * op , * op1 = NULL ;
LoadInst * opX , * opY ;
ICmpInst * condition , * condition2 ;
// Looking for the conditions and branches to transform
for ( Module :: iterator mi = M . begin (), me = M . end (); mi != me ; ++ mi ){
for ( Function :: iterator fi = mi -> begin (), fe = mi -> end (); fi != fe ; ++ fi ){
Instruction * tbb = fi -> getTerminator ();
if ( tbb -> getOpcode () == Instruction :: Br ){
BranchInst * br = ( BranchInst * )( tbb );
if ( br -> isConditional ()){
FCmpInst * cond = ( FCmpInst * ) br -> getCondition ();
unsigned opcode = cond -> getOpcode ();
if ( opcode == Instruction :: FCmp ){
if ( cond -> getPredicate () == FCmpInst :: FCMP_TRUE ){
DEBUG_WITH_TYPE ( "gen" ,
errs () << "bcf: an always true predicate ! \n " );
toDelete . push_back ( cond ); // The condition
toEdit . push_back ( tbb ); // The branch using the condition
}
}
}
}
}
}
创建永真表达式
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
for ( std :: vector < Instruction *>:: iterator i = toEdit . begin (); i != toEdit . end (); ++ i ){
//if y < 10 || x*(x+1) % 2 == 0
opX = new LoadInst (( Value * ) x , "" , ( * i ));
opY = new LoadInst (( Value * ) y , "" , ( * i ));
op = BinaryOperator :: Create ( Instruction :: Sub , ( Value * ) opX ,
ConstantInt :: get ( Type :: getInt32Ty ( M . getContext ()), 1 ,
false ), "" , ( * i ));
op1 = BinaryOperator :: Create ( Instruction :: Mul , ( Value * ) opX , op , "" , ( * i ));
op = BinaryOperator :: Create ( Instruction :: URem , op1 ,
ConstantInt :: get ( Type :: getInt32Ty ( M . getContext ()), 2 ,
false ), "" , ( * i ));
condition = new ICmpInst (( * i ), ICmpInst :: ICMP_EQ , op ,
ConstantInt :: get ( Type :: getInt32Ty ( M . getContext ()), 0 ,
false ));
condition2 = new ICmpInst (( * i ), ICmpInst :: ICMP_SLT , opY ,
ConstantInt :: get ( Type :: getInt32Ty ( M . getContext ()), 10 ,
false ));
op1 = BinaryOperator :: Create ( Instruction :: Or , ( Value * ) condition ,
( Value * ) condition2 , "" , ( * i ));
BranchInst :: Create ((( BranchInst * ) * i ) -> getSuccessor ( 0 ),
(( BranchInst * ) * i ) -> getSuccessor ( 1 ),( Value * ) op1 ,
(( BranchInst * ) * i ) -> getParent ());
( * i ) -> eraseFromParent (); // erase the branch
}
上述操作就是在指令i前创建如下表达式并构造分支跳转:
1
(y < 10 || x * (x + 1) % 2 == 0)
假设要处理的基本块IR指令如下:
1
2
3
entry:
%condition = fcmp true float 1.000000e+00, 1.000000e+00
br i1 %condition, label %originalBB, label %originalBBalteredBB
创建永真表达式后的IR指令如下:
1
2
3
4
5
6
7
8
9
10
11
entry :
% condition = fcmp true float 1.000000e+00 , 1.000000e+00
% 0 = load i32 , i32 * @ x
% 1 = load i32 , i32 * @ y
% 2 = sub i32 % 0 , 1
% 3 = mul i32 % 0 , % 2
% 4 = urem i32 % 3 , 2
% 5 = icmp eq i32 % 4 , 0
% 6 = icmp slt i32 % 1 , 10
% 7 = or i1 % 5 , % 6
br i1 % 7 , label % originalBB , label % originalBBalteredBB
删除原永真条件
1
2
3
4
5
for ( std :: vector < Instruction *>:: iterator i = toDelete . begin (); i != toDelete . end (); ++ i ){
DEBUG_WITH_TYPE ( "gen" , errs () << "bcf: Erase condition instruction:"
<< * (( Instruction * ) * i ) << " \n " );
( * i ) -> eraseFromParent ();
}
假设要处理的基本块IR指令如上,则经过doF函数处理后的IR指令如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
@ x = common global i32 0
@ y = common global i32 0
entry :
% 0 = load i32 , i32 * @ x
% 1 = load i32 , i32 * @ y
% 2 = sub i32 % 0 , 1
% 3 = mul i32 % 0 , % 2
% 4 = urem i32 % 3 , 2
% 5 = icmp eq i32 % 4 , 0
% 6 = icmp slt i32 % 1 , 10
% 7 = or i1 % 5 , % 6
br i1 % 7 , label % originalBB , label % originalBBalteredBB
参考链接
BogusControlFlow.cpp
OLLVM混淆研究之BCF篇
OLLVM虚假控制流源码学习笔记
代码混淆技术入门