Hi all, I was trying to implement an obfuscation tool for C-code on the basis of LLVM. I got a prototype of the simple obfuscation transformation which converting control flow graph to something like a state machine. I am not sure I will have time to work on extending further this tool with new transformations like opaque predicates and decided to put here source code I have by now with hope that it can be helpful to somebody. I am newbie with LLVM and am sure there are a lot of bugs in current implementation, but I have tested it with complex enough C-program and in my case it worked. Sorry for the poor English and grammar. Regards, Evgeny Podlepaev --------------------------------------------- MakeDispatcherPass.h--------------------------------------------- #include <map> #include <string> #include "llvm/Pass.h" #include "llvm/Function.h" #include "llvm/BasicBlock.h" #include "llvm/Instructions.h" using namespace llvm; class MakeDispatcherPass : public FunctionPass { public: virtual bool runOnFunction( Function& currFunction ); private: typedef std::map< BasicBlock*, unsigned > BBindexBBMap; static unsigned IndexSourceBasicBlocks( Function& function, BBindexBBMap& indexBBMap ); static BasicBlock* CreateNewEntryBlock( const std::string& name, BasicBlock *oldEntryBB ); static void LinkBasicBlockToDispatcher( BasicBlock* basicBlock, BasicBlock* dispatcherBlock, Value* statePtr, BBindexBBMap& indexBBMap ); static void MovePHINodesToDispatcher( BBindexBBMap& indexBBMap, BasicBlock* dispatcherBB ); static void ConvertSwitch( Function& function ); static BasicBlock* ProcessCase( SwitchInst* switchInst, int caseIdx, BasicBlock* prevBB, Value* testValuePtr ); static void ReduceTempVarsLifetime( Function& function ); static bool IsUsedOutsideParentBlock( Instruction* value ); static const std::string PassName; }; --------------------------------------------- MakeDispatcherPass.cpp--------------------------------------------- #include <map> #include <vector> #include <cassert> #include <iostream> #include <exception> #include "llvm/Type.h" #include "llvm/Module.h" #include "llvm/Function.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Casting.h" #include "llvm/Support/InstIterator.h" #include "MakeDispatcherPass.h" // // Static variables. // const std::string MakeDispatcherPass::PassName = "MakeDispatcherPass"; // // Public methods. // bool MakeDispatcherPass::runOnFunction( Function& currFunction ) { DEBUG( std::cerr << PassName << ": Processing " << currFunction.getName() << " ...\n" ); bool madeChange = true; // Because of changes which we will make in control flow graph it is necessary // to get rid of temporary variables used outside their parent basic block. ReduceTempVarsLifetime( currFunction ); // Replace all SwitchInst instructions with chained branch instructions // in order to simplify the dispatcher embedding. // // NOTE: We can use LowerSwitch Pass instead of our implementation. ConvertSwitch( currFunction ); // Keep a reference to the old entry basic block. BasicBlock* oldEntryBB = &currFunction.getEntryBlock(); // Collect basic blocks to be reordered and assign unique index for each. BBindexBBMap indexBBMap; unsigned firstBBIndex = IndexSourceBasicBlocks( currFunction, indexBBMap ); // Create basic blocks for initialization and dispatching. BasicBlock* disInitBB = CreateNewEntryBlock( "dispatcher_init", oldEntryBB ); BasicBlock* disMainBB = new BasicBlock( "dispatcher_main", &currFunction, oldEntryBB ); // Add instructions that allocate and initialize the dispatcher's state variable // to the initialization block. AllocaInst* statePtr = new AllocaInst( Type::UIntTy, 0, "state", disInitBB ); new StoreInst( ConstantUInt::get( Type::UIntTy, firstBBIndex ), statePtr, disInitBB ); new BranchInst( disMainBB, disInitBB ); // Connection to the dispatcher's main block. // Add instructions to the dispatcher's main block that implement switching // between the basic blocks depending on the state variable's value. LoadInst* loadState = new LoadInst( statePtr, "next", disMainBB ); SwitchInst* switchInst = new SwitchInst( loadState, oldEntryBB, unsigned( indexBBMap.size() ), disMainBB ); for( BBindexBBMap::iterator idxBBPair = indexBBMap.begin(); idxBBPair !indexBBMap.end(); idxBBPair++ ) { switchInst->addCase( ConstantUInt::get( Type::UIntTy, idxBBPair->second ), idxBBPair->first ); } // Unlink all source basic blocks and relink them to the dispatcher's main block. for( BBindexBBMap::iterator idxBBPair = indexBBMap.begin(); idxBBPair !indexBBMap.end(); idxBBPair++ ) { LinkBasicBlockToDispatcher( idxBBPair->first, disMainBB, statePtr, indexBBMap ); } // Since the predecessors of source basic blocks was replaced by the dispatcher's // basic block it is necessary to update all PHI nodes in source blocks. PHI is a // special artificial definition for supporting the Static Single Assignment form // used for data flow analysis ( http://gcc.gnu.org/onlinedocs/gccint/SSA.html). MovePHINodesToDispatcher( indexBBMap, disMainBB ); return madeChange; } // // Private methods. // unsigned MakeDispatcherPass::IndexSourceBasicBlocks( Function& function, BBindexBBMap& indexBBMap ) { unsigned startIdx = 0; // Keep all source basic blocks, including entry and exit blocks. unsigned idx = startIdx; for( Function::iterator i = function.begin(); i != function.end(); i++ ) { // Add basic block with index. indexBBMap[ &*i ] = idx++; } // Return the index of the first basic block that is an entry to the function. return startIdx; } BasicBlock* MakeDispatcherPass::CreateNewEntryBlock( const std::string& name, BasicBlock *oldEntryBB ) { // Create a new basic block and insert it before old entry block. BasicBlock* newEntryBB = new BasicBlock( name, oldEntryBB->getParent(), oldEntryBB ); // Move all allocation instructions for stack variables from the old entry block to the new one. // These instructions should be placed at the beginning of the function and should dominate // all their uses. for( BasicBlock::iterator i = oldEntryBB->begin(); i !oldEntryBB->end(); ) { if( AllocationInst* ai = dyn_cast< AllocationInst >( i ) ) { oldEntryBB->getInstList().remove( i++ ); // Trick to preserve the valid iterator after removing an element. newEntryBB->getInstList().push_back( ai ); } else { i++; } } oldEntryBB->setName( "old_entry" ); return newEntryBB; } void MakeDispatcherPass::LinkBasicBlockToDispatcher( BasicBlock* basicBlock, BasicBlock* dispatcherBlock, Value* statePtr, BBindexBBMap& indexBBMap ) { // Get a reference to the basic block terminator. We should replace it // by jump to the dispatcher's basic block. TerminatorInst* terminator = basicBlock->getTerminator(); assert( terminator && "Basic block is not well formed and has no terminator!" ); if( isa< ReturnInst >( terminator ) ) { // This block is exit from the function. We will not change it. return; } else if( isa< BranchInst >( terminator ) ) { // This block is terminating by branch instruction. We will replace target // of the branch by the dispatcher block and set current state variable to // a proper basic block's index. BranchInst* branchInst = dynamic_cast< BranchInst* >( terminator ); // Add instruction selecting a proper basic block's index. Value* stateValue = 0; if( !branchInst->isConditional() ) { stateValue = ConstantUInt::get( Type::UIntTy, indexBBMap[ branchInst->getSuccessor( 0 ) ] ); } else { stateValue = new SelectInst( branchInst->getCondition(), ConstantUInt::get( Type::UIntTy, indexBBMap[ branchInst->getSuccessor( 0 ) ] ), ConstantUInt::get( Type::UIntTy, indexBBMap[ branchInst->getSuccessor( 1 ) ] ), "select_state", basicBlock ); } // Add instructions for load of the state variable and branch to the dispatcher. new StoreInst( stateValue, statePtr, basicBlock ); new BranchInst( dispatcherBlock, basicBlock ); basicBlock->getInstList().erase( branchInst ); } else { // TODO: Support other possible terminators. throw std::exception( ( PassName + ": Unsupported basic block terminator: " + typeid( terminator ).name() ).c_str() ); } } void MakeDispatcherPass::MovePHINodesToDispatcher( BBindexBBMap& indexBBMap, BasicBlock* dispatcherBB ) { // Iterate through the list of indexed basic blocks and move all PHIs nodes // to the dispatcher's basic block (target basic block) that is new predecessor // for all them. for( BBindexBBMap::iterator idxBBPair = indexBBMap.begin(); idxBBPair !indexBBMap.end(); idxBBPair++ ) { // Iterate through the PHIs nodes that can be located only at the // beginning of the basic block. PHINode* phi = 0; for( BasicBlock::iterator inst = idxBBPair->first->begin(); ( phi dyn_cast< PHINode >( inst ) ) != 0; ) { // Move the PHI node to the target basic block. phi->getParent()->getInstList().remove( inst++ ); dispatcherBB->getInstList().insert( dispatcherBB->getInstList().begin(), phi ); // Since a PHI node must have an incoming value for every predecessor of // the parent basic block we should add all predecessors of the target // basic block that have no corresponding incoming value in the PHI node // as new incoming undefined value to the PHI node. int wasIncomingCount = 0; for( pred_iterator pred = pred_begin( dispatcherBB ); pred !pred_end( dispatcherBB ); pred++ ) { int idx = phi->getBasicBlockIndex( *pred ); if( idx == -1 ) { phi->addIncoming( UndefValue::get( phi->getType() ), *pred ); } else { wasIncomingCount++; } } assert( wasIncomingCount && "No one of the predecessors was not incoming value to the PHI!" ); } } } void MakeDispatcherPass::ConvertSwitch( Function& function ) { BasicBlock* entryBB = &function.getEntryBlock(); for( Function::iterator i = function.begin(); i != function.end(); i++ ) { BasicBlock* basicBlock = &*i; TerminatorInst* terminator = basicBlock->getTerminator(); assert( terminator && "Basic block is not well formed and has no terminator!" ); if( isa< SwitchInst >( terminator ) ) { SwitchInst* switchInst = dynamic_cast< SwitchInst * >( basicBlock->getTerminator() ); // Allocate a local variable and add an instruction saving the value // being tested by the switch instruction to this variable. // // Allocating of the new variable is necessary because initial order // of the basic blocks will be changed by dispatcher embedding. AllocaInst* testValuePtr = new AllocaInst( switchInst->getCondition()->getType(), 0, "switch_test_val", entryBB->getTerminator() ); new StoreInst( switchInst->getCondition(), testValuePtr, false, switchInst ); // Replace the switch instruction by basic blocks with conditional branches. BasicBlock* caseBlock = ProcessCase( switchInst, 1, basicBlock, testValuePtr ); switchInst->eraseFromParent(); new BranchInst( caseBlock, basicBlock ); } } } BasicBlock* MakeDispatcherPass::ProcessCase( SwitchInst* switchInst, int caseIdx, BasicBlock* prevBB, Value* testValuePtr ) { // Create a new basic block for instructions checking the current case of the switch instruction. BasicBlock* caseBlock = new BasicBlock( "case", prevBB->getParent(), prevBB->getNext() ); // If not all cases was handled ... if( caseIdx != switchInst->getNumCases() ) { // Instructions for checking the case. LoadInst* testValue = new LoadInst( testValuePtr, "test_value", caseBlock ); SetCondInst* setCondInst = new SetCondInst( Instruction::SetEQ, testValue, switchInst->getCaseValue( caseIdx ), "is_case", caseBlock ); // Get a reference to the basic block for the next case. BasicBlock* nextCaseBlock = ProcessCase( switchInst, caseIdx + 1, caseBlock, testValuePtr ); // Add an instruction for branching to the basic block // corresponding to the condition of this case or to // the basic block checking the next case. BranchInst* branchInst = new BranchInst( switchInst->getSuccessor( caseIdx ), nextCaseBlock, setCondInst, caseBlock ); } else if( switchInst->getDefaultDest() != 0 ) { BranchInst* branchInst = new BranchInst( switchInst->getDefaultDest(), caseBlock ); } return caseBlock; } void MakeDispatcherPass::ReduceTempVarsLifetime( Function& function ) { BasicBlock* entryBB = &function.getEntryBlock(); typedef std::vector< Instruction * > InstList; // Collect all instructions (temporary variables) that have result used outside // their parent basic block. Skip the AllocInst instructions because it will be // handled separately during creation of new entry basic block. InstList insts; for( inst_iterator i = inst_begin( function ); i != inst_end( function ); i++ ) { Instruction* inst = &*i; if( IsUsedOutsideParentBlock( inst ) && !isa< AllocaInst >( inst ) ) { insts.push_back( inst ); } } // Walk through the list of temporary variables to reduce their lifetime // to its parent basic block. for( InstList::iterator i = insts.begin(); i != insts.end(); i++ ) { Instruction* inst = *i; // Collect all instructions that are using the temporary variable. InstList users; for( Value::use_iterator j = inst->use_begin(); j !inst->use_end(); j++ ) { users.push_back( dynamic_cast< Instruction* >( *j ) ); } // Allocate local variable on the function's stack frame and save there // the temporary variable (result of the instruction). AllocaInst* valuePtr = new AllocaInst( inst->getType(), 0, "reduced_var", entryBB->getTerminator() ); new StoreInst( inst, valuePtr, false, inst->getNext() ); // Patch users of the temporary variable to use the local function variable. for( InstList::iterator j = users.begin(); j != users.end(); j++ ) { Instruction* user = dynamic_cast< Instruction* >( *j ); if( isa< PHINode >( user ) ) { // For the phi node we should look through every incoming value and, // if it is necessary, add to the preceding basic block an instruction // for loading the saved temporary variable's value. PHINode* phiNode = dynamic_cast< PHINode* >( user ); for( unsigned idx = 0; idx < phiNode->getNumIncomingValues(); idx++ ) { if( phiNode->getIncomingValue( idx ) == inst ) { LoadInst* loadedValue = new LoadInst( valuePtr, "value_of_reduced_var", phiNode->getIncomingBlock( idx )->getTerminator() ); phiNode->setIncomingValue( idx, loadedValue ); } } } else { user->replaceUsesOfWith( inst, new LoadInst( valuePtr, "value_of_reduced_var", user ) ); } } } } bool MakeDispatcherPass::IsUsedOutsideParentBlock( Instruction* value ) { for( Value::use_iterator i = value->use_begin(); i != value->use_end(); i++ ) { Instruction* user = dynamic_cast< Instruction* >( *i ); if( user->getParent() != value->getParent() ) { return true; } } return false; } --------------------------------------------- Main.cpp--------------------------------------------- #include <iostream> #include <fstream> #include "llvm/Pass.h" #include "llvm/Module.h" #include "llvm/PassManager.h" #include "llvm/Bytecode/Reader.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Bytecode/WriteBytecodePass.h" #include "MakeDispatcherPass.h" using namespace llvm; static cl::opt< std::string > InputFilename( cl::Positional, cl::desc( "<input .bc file>" ), cl::Required ); static cl::opt< std::string > OutputFilename( cl::Positional, cl::desc( "<output .bc file>" ), cl::Required ); static cl::opt< bool > NoVerify( "disable-verify", cl::desc( "Do not verify result module" ), cl::Optional ); int main( int argc, char* argv[] ) { std::string programName = argv[ 0 ] = "obfuscator"; try { cl::ParseCommandLineOptions( argc, argv ); std::string errorMessage; // Load the input module. std::auto_ptr< Module > M( ParseBytecodeFile( InputFilename, &errorMessage ) ); if( M.get() == 0 ) { std::cerr << programName << ": " << errorMessage << "\n"; return 1; } // Open the stream we are supposed to write to. std::ostream *Out = new std::ofstream( OutputFilename.c_str(), std::ios::out | std::ios::trunc | std::ios::binary ); if( !Out->good() ) { std::cerr << programName << ": Error opening " << OutputFilename << "!\n"; return 1; } // Create a PassManager to hold and optimize the collection of passes we are about to build... PassManager Passes; // Add an appropriate TargetData instance for this module. Passes.add( new TargetData( "obfuscator", M.get() ) ); Passes.add( new MakeDispatcherPass() ); if( !NoVerify ) { Passes.add( createVerifierPass() ); } Passes.add( new WriteBytecodePass( Out, true ) ); // Now that we have all of the passes ready, run them. Passes.run( *M.get() ); } catch( std::exception e ) { std::cerr << programName << ": " << e.what() << "\n"; return 1; } catch( const std::string& msg ) { std::cerr << programName << ": " << msg << "\n"; return 1; } catch( ... ) { std::cerr << programName << ": Unexpected exception occurred.\n"; return 1; } return 0; } -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20060517/0d482cb3/attachment.html>