OLLVM研究之BCF

概述

BCF即虚假控制流,将原程序基本块进行拆分和克隆,利用不透明谓词将克隆的基本块作为不可达分支用来混淆静态代码,达到迷惑攻击者的目的.

这里的不透明谓词是指一个表达式,它的值在执行到某处时,对代码编写者来说是确定的,但是由于某种原因,编译器或者静态分析器无法推断出这个值,只能在运行时确定.

其技术实现过程如下:

虚假控制流实现

虚假控制流后,在IDA中的示例如下:

IDA示例

测试文件

1
2
3
4
5
6
7
8
9
#include <stdlib.h>
int main(int argc, char** argv) {
  int a = atoi(argv[1]);
  if(a == 0)
    return 1;
  else
    return 10;
  return 0;
}

流程分析

入口函数

  1. bcf的声明在ollvm/include/llvm/Transforms/Obfuscation/BogusControlFlow.h
1
2
Pass *createBogus ();
Pass *createBogus (bool flag);
  1. bcf的入口函数是runOnFunction,在ollvm/lib/Transforms/Obfuscation/BogusControlFlow.cpp
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
virtual bool runOnFunction(Function &F){
  // Check if the percentage is correct
  if (ObfTimes <= 0) {
    errs()<<"BogusControlFlow application number -bcf_loop=x must be x > 0";
	return false;
  }

  // Check if the number of applications is correct
  if ( !((ObfProbRate > 0) && (ObfProbRate <= 100)) ) {
    errs()<<"BogusControlFlow application basic blocks percentage -bcf_prob=x must be 0 < x <= 100";
	return false;
  }
  std::vector<BasicBlock *> orginalBBs;
  // check for compatible
  for (BasicBlock &bb : F.getBasicBlockList()) {
    if (isa<InvokeInst>(bb.getTerminator())) {
      return false;
    }
  }
  // If fla annotations
  if(toObfuscate(flag,&F,"bcf")) {
    bogus(F);
    doF(*F.getParent());
    return true;
  }

  return false;
} // end of runOnFunction()

ObfTimes是通过以下参数进行传递的,默认是1

1
2
//进行3次bcf混淆
-mllvm -bcf_loop=3

ObfProbRate是通过以下参数进行传递的,默认是30

1
2
//每个基本块有40%的概率进行bcf混淆
-mllvm -bcf_prob=40
  1. toObfuscate函数用于判断是否需要进行bcf

toObfuscate函数

虚假控制流构造

bogus函数内部会遍历函数的所有基本块,然后调用addBogusFlow函数进行虚假控制流的添加,其关键逻辑代码如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
void bogus(Function &F) {
  //①通过命令行传参(-mllvm -bcf_loop=3)来构建循环体,决定进行多少次bcf混淆
  int NumObfTimes = ObfTimes;
    do{
      //②保存该函数所有基本块
      std::list<BasicBlock *> basicBlocks;
      for (Function::iterator i=F.begin();i!=F.end();++i) {
        basicBlocks.push_back(&*i);
      }

	  //③遍历所有基本块
      while(!basicBlocks.empty()){
        //④通过随机产生100以内的数来和命令行传参(-mllvm -bcf_prob=40)进行比较,决定该基本块是否进行bcf混淆
        if((int)llvm::cryptoutils->get_range(100) <= ObfProbRate){
          //⑤调用addBogusFlow进行虚假控制流的添加
          BasicBlock *basicBlock = basicBlocks.front();
          addBogusFlow(basicBlock, F);
        }
        // remove the block from the list
        basicBlocks.pop_front();

      } // end of while(!basicBlocks.empty())
    }while(--NumObfTimes > 0);
}

接下来我们分析下进行bcf混淆的核心函数addBogusFlow

1
virtual void addBogusFlow(BasicBlock * basicBlock, Function &F)
  1. 进行基本块的分割
1
2
3
4
5
6
7
8
//获取基本块中的第一个非PHI、非调试信息、非生命周期传递指令
Instruction *i1 = &*basicBlock->begin();
 if(basicBlock->getFirstNonPHIOrDbgOrLifetime())
   i1 = basicBlock->getFirstNonPHIOrDbgOrLifetime();
//调用splitBasicBlock,进行基本块的分割
 Twine *var;
 var = new Twine("originalBB");
 BasicBlock *originalBB = basicBlock->splitBasicBlock(i1, *var);

假设要处理的基本块IR指令如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
entry:
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #2
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  %cmp = icmp eq i32 %2, 0
  br i1 %cmp, label %if.then, label %if.else

经过上述分割后的基本块IR指令如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
entry:
  br label %originalBB

originalBB:
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #2
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  %cmp = icmp eq i32 %2, 0
  br i1 %cmp, label %if.then, label %if.else
  1. 进行基本块的克隆
1
2
3
//仿originalBB进行基本块的克隆
Twine * var3 = new Twine("alteredBB");
BasicBlock *alteredBB = createAlteredBasicBlock(originalBB, *var3, &F);

此时的IR指令如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
entry:
  br label %originalBB

originalBB:
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #2
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  %cmp = icmp eq i32 %2, 0
  br i1 %cmp, label %if.then, label %if.else

originalBBalteredBB:
  %retvalalteredBB = alloca i32, align 4
  %argc.addralteredBB = alloca i32, align 4
  %argv.addralteredBB = alloca i8**, align 8
  %aalteredBB = alloca i32, align 4
  store i32 0, i32* %retvalalteredBB, align 4
  store i32 %argc, i32* %argc.addralteredBB, align 4
  store i8** %argv, i8*** %argv.addralteredBB, align 8
  %4 = load i8**, i8*** %argv.addralteredBB, align 8
  %arrayidxalteredBB = getelementptr inbounds i8*, i8** %4, i64 1
  %5 = load i8*, i8** %arrayidxalteredBB, align 8
  %callalteredBB = call i32 @atoi(i8* %5) #2
  store i32 %callalteredBB, i32* %aalteredBB, align 4
  %6 = load i32, i32* %aalteredBB, align 4
  %cmpalteredBB = icmp eq i32 %6, 0
  br i1 %cmpalteredBB, label %if.then, label %if.else
  1. 移除基本块的最后一条指令(通常来说是分支或返回指令),即删除其与后继块的关系
1
2
alteredBB->getTerminator()->eraseFromParent();
basicBlock->getTerminator()->eraseFromParent();

此时的IR指令如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
entry:

originalBB:
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #2
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  %cmp = icmp eq i32 %2, 0
  br i1 %cmp, label %if.then, label %if.else
	  
originalBBalteredBB:
  %retvalalteredBB = alloca i32, align 4
  %argc.addralteredBB = alloca i32, align 4
  %argv.addralteredBB = alloca i8**, align 8
  %aalteredBB = alloca i32, align 4
  store i32 0, i32* %retvalalteredBB, align 4
  store i32 %argc, i32* %argc.addralteredBB, align 4
  store i8** %argv, i8*** %argv.addralteredBB, align 8
  %4 = load i8**, i8*** %argv.addralteredBB, align 8
  %arrayidxalteredBB = getelementptr inbounds i8*, i8** %4, i64 1
  %5 = load i8*, i8** %arrayidxalteredBB, align 8
  %callalteredBB = call i32 @atoi(i8* %5) #2
  store i32 %callalteredBB, i32* %aalteredBB, align 4
  %6 = load i32, i32* %aalteredBB, align 4
  %cmpalteredBB = icmp eq i32 %6, 0
  1. 构建一个永真条件
1
2
3
4
5
6
7
Value * LHS = ConstantFP::get(Type::getFloatTy(F.getContext()), 1.0);
Value * RHS = ConstantFP::get(Type::getFloatTy(F.getContext()), 1.0);
DEBUG_WITH_TYPE("gen", errs() << "bcf: Value LHS and RHS created\n");

//在第一个块即上述entry块处插入永真条件
Twine * var4 = new Twine("condition");
FCmpInst * condition = new FCmpInst(*basicBlock, FCmpInst::FCMP_TRUE , LHS, RHS, *var4);
  1. 构建跳转逻辑
1
2
3
4
//在basicBlock即上述entry块处创建分支判断,条件为真则跳转到originalBB处,否则跳转到alteredBB块
BranchInst::Create(originalBB, alteredBB, (Value *)condition, basicBlock);
//在alteredBB块末尾添加分支指令,无条件跳转到originalBB块
BranchInst::Create(originalBB, alteredBB);

此时的IR指令如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
entry:
  %condition = fcmp true float 1.000000e+00, 1.000000e+00
  br i1 %condition, label %originalBB, label %originalBBalteredBB

originalBB:
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #2
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  %cmp = icmp eq i32 %2, 0
  br i1 %cmp, label %if.then, label %if.else

originalBBalteredBB:
  %retvalalteredBB = alloca i32, align 4
  %argc.addralteredBB = alloca i32, align 4
  %argv.addralteredBB = alloca i8**, align 8
  %aalteredBB = alloca i32, align 4
  store i32 0, i32* %retvalalteredBB, align 4
  store i32 %argc, i32* %argc.addralteredBB, align 4
  store i8** %argv, i8*** %argv.addralteredBB, align 8
  %4 = load i8**, i8*** %argv.addralteredBB, align 8
  %arrayidxalteredBB = getelementptr inbounds i8*, i8** %4, i64 1
  %5 = load i8*, i8** %arrayidxalteredBB, align 8
  %callalteredBB = call i32 @atoi(i8* %5) #2
  store i32 %callalteredBB, i32* %aalteredBB, align 4
  %6 = load i32, i32* %aalteredBB, align 4
  %cmpalteredBB = icmp eq i32 %6, 0
  br label %originalBB
  1. 分割originalBB
1
2
3
4
5
BasicBlock::iterator i = originalBB->end();

//以originalBB块的最后一条指令为界进行分割
Twine * var5 = new Twine("originalBBpart2");
BasicBlock * originalBBpart2 = originalBB->splitBasicBlock(--i , *var5);

originalBB块的原始IR指令如上,经过分割后的IR指令如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
originalBB:
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #2
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  %cmp = icmp eq i32 %2, 0
  br label %originalBBpart2

originalBBpart2:
  br i1 %cmp, label %if.then, label %if.else
  1. 删除originalBB块经过分割后的最后一条指令,即割断originalBB块和originalBBpart2块的联系
1
originalBB->getTerminator()->eraseFromParent();
  1. 在originalBB块后构建永真条件,跳转为真则跳向originalBBpart2块,否则跳向alteredBB块
1
2
3
Twine * var6 = new Twine("condition2");
FCmpInst * condition2 = new FCmpInst(*originalBB, CmpInst::FCMP_TRUE , LHS, RHS, *var6);
BranchInst::Create(originalBBpart2, alteredBB, (Value *)condition2, originalBB);

如上面假设要处理的基本块是entry,经过上述处理后的IR指令如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
entry:
  %condition = fcmp true float 1.000000e+00, 1.000000e+00
  br i1 %condition, label %originalBB, label %originalBBalteredBB

originalBB:
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  %a = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i8**, i8*** %argv.addr, align 8
  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
  %1 = load i8*, i8** %arrayidx, align 8
  %call = call i32 @atoi(i8* %1) #2
  store i32 %call, i32* %a, align 4
  %2 = load i32, i32* %a, align 4
  %cmp = icmp eq i32 %2, 0
  %condition2 = fcmp true float 1.000000e+00, 1.000000e+00
  br i1 %condition2, label %originalBBpart2, label %originalBBalteredBB

originalBBpart2:
  br i1 %cmp, label %if.then, label %if.else

originalBBalteredBB:
  %retvalalteredBB = alloca i32, align 4
  %argc.addralteredBB = alloca i32, align 4
  %argv.addralteredBB = alloca i8**, align 8
  %aalteredBB = alloca i32, align 4
  store i32 0, i32* %retvalalteredBB, align 4
  store i32 %argc, i32* %argc.addralteredBB, align 4
  store i8** %argv, i8*** %argv.addralteredBB, align 8
  %4 = load i8**, i8*** %argv.addralteredBB, align 8
  %arrayidxalteredBB = getelementptr inbounds i8*, i8** %4, i64 1
  %5 = load i8*, i8** %arrayidxalteredBB, align 8
  %callalteredBB = call i32 @atoi(i8* %5) #2
  store i32 %callalteredBB, i32* %aalteredBB, align 4
  %6 = load i32, i32* %aalteredBB, align 4
  %cmpalteredBB = icmp eq i32 %6, 0
  br label %originalBB

替换永真表达式

接下来看下最后一个函数doF,其函数声明如下:

1
bool doF(Module &M)

这个函数主要是找到当前模块中的永真表达式,并将其替换为如下的表达式.

1
(y < 10 || x * (x + 1) % 2 == 0)

可以看到这个表达式也是个永真表达式,只是相对复杂一点.

  1. 创建全局变量x和y
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
Twine * varX = new Twine("x");
Twine * varY = new Twine("y");
Value * x1 =ConstantInt::get(Type::getInt32Ty(M.getContext()), 0, false);
Value * y1 =ConstantInt::get(Type::getInt32Ty(M.getContext()), 0, false);

GlobalVariable 	* x = new GlobalVariable(M, Type::getInt32Ty(M.getContext()), false,
    GlobalValue::CommonLinkage, (Constant * )x1,
    *varX);
GlobalVariable 	* y = new GlobalVariable(M, Type::getInt32Ty(M.getContext()), false,
    GlobalValue::CommonLinkage, (Constant * )y1,
    *varY);
  1. 找到表达式中的永真表达式
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
std::vector<Instruction*> toEdit, toDelete;
BinaryOperator *op,*op1 = NULL;
LoadInst * opX , * opY;
ICmpInst * condition, * condition2;
// Looking for the conditions and branches to transform
for(Module::iterator mi = M.begin(), me = M.end(); mi != me; ++mi){
  for(Function::iterator fi = mi->begin(), fe = mi->end(); fi != fe; ++fi){
    Instruction * tbb= fi->getTerminator();
    if(tbb->getOpcode() == Instruction::Br){
      BranchInst * br = (BranchInst *)(tbb);
      if(br->isConditional()){
        FCmpInst * cond = (FCmpInst *)br->getCondition();
        unsigned opcode = cond->getOpcode();
        if(opcode == Instruction::FCmp){
          if (cond->getPredicate() == FCmpInst::FCMP_TRUE){
            DEBUG_WITH_TYPE("gen",
                errs()<<"bcf: an always true predicate !\n");
            toDelete.push_back(cond); // The condition
            toEdit.push_back(tbb);    // The branch using the condition
          }
        }
      }
    }
  }
}
  1. 创建永真表达式
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
for(std::vector<Instruction*>::iterator i =toEdit.begin();i!=toEdit.end();++i){
  //if y < 10 || x*(x+1) % 2 == 0
  opX = new LoadInst ((Value *)x, "", (*i));
  opY = new LoadInst ((Value *)y, "", (*i));

  op = BinaryOperator::Create(Instruction::Sub, (Value *)opX,
      ConstantInt::get(Type::getInt32Ty(M.getContext()), 1,
        false), "", (*i));
  op1 = BinaryOperator::Create(Instruction::Mul, (Value *)opX, op, "", (*i));
  op = BinaryOperator::Create(Instruction::URem, op1,
      ConstantInt::get(Type::getInt32Ty(M.getContext()), 2,
        false), "", (*i));
  condition = new ICmpInst((*i), ICmpInst::ICMP_EQ, op,
      ConstantInt::get(Type::getInt32Ty(M.getContext()), 0,
        false));
  condition2 = new ICmpInst((*i), ICmpInst::ICMP_SLT, opY,
      ConstantInt::get(Type::getInt32Ty(M.getContext()), 10,
        false));
  op1 = BinaryOperator::Create(Instruction::Or, (Value *)condition,
      (Value *)condition2, "", (*i));

  BranchInst::Create(((BranchInst*)*i)->getSuccessor(0),
      ((BranchInst*)*i)->getSuccessor(1),(Value *) op1,
      ((BranchInst*)*i)->getParent());

  (*i)->eraseFromParent(); // erase the branch
}

上述操作就是在指令i前创建如下表达式并构造分支跳转:

1
(y < 10 || x * (x + 1) % 2 == 0)

假设要处理的基本块IR指令如下:

1
2
3
entry:
  %condition = fcmp true float 1.000000e+00, 1.000000e+00
  br i1 %condition, label %originalBB, label %originalBBalteredBB

创建永真表达式后的IR指令如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
entry:
  %condition = fcmp true float 1.000000e+00, 1.000000e+00
  %0 = load i32, i32* @x
  %1 = load i32, i32* @y
  %2 = sub i32 %0, 1
  %3 = mul i32 %0, %2
  %4 = urem i32 %3, 2
  %5 = icmp eq i32 %4, 0
  %6 = icmp slt i32 %1, 10
  %7 = or i1 %5, %6
  br i1 %7, label %originalBB, label %originalBBalteredBB
  1. 删除原永真条件
1
2
3
4
5
for(std::vector<Instruction*>::iterator i =toDelete.begin();i!=toDelete.end();++i){
  DEBUG_WITH_TYPE("gen", errs() << "bcf: Erase condition instruction:"
      << *((Instruction*)*i)<< "\n");
  (*i)->eraseFromParent();
}

假设要处理的基本块IR指令如上,则经过doF函数处理后的IR指令如下:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
@x = common global i32 0
@y = common global i32 0

entry:
  %0 = load i32, i32* @x
  %1 = load i32, i32* @y
  %2 = sub i32 %0, 1
  %3 = mul i32 %0, %2
  %4 = urem i32 %3, 2
  %5 = icmp eq i32 %4, 0
  %6 = icmp slt i32 %1, 10
  %7 = or i1 %5, %6
  br i1 %7, label %originalBB, label %originalBBalteredBB

参考链接

BogusControlFlow.cpp

OLLVM混淆研究之BCF篇

OLLVM虚假控制流源码学习笔记

代码混淆技术入门


相关内容

0%