Hi folks! In /lib/Transforms/Utils/SimplifyCFG.cpp is optimization code which converts a switch statement to a table lookup but has problems when there are holes in the cases list and the default case can not be served with the table. My first attempt to fix this is done by additionally testing a small set. As an example the function ==> unsigned test(unsigned x) { switch (x) { case 100: return 0; case 101: return 1; case 103: return 2; case 105: return 3; case 107: return 4; case 109: return 5; case 110: return 6; default: return x*3; } } <=can be converted to ==> .file "t.cpp" .text .globl _Z4testj .align 16, 0x90 .type _Z4testj, at function _Z4testj: # @_Z4testj .cfi_startproc # BB#0: # %entry # kill: EDI<def> EDI<kill> RDI<def> leal -100(%rdi), %eax cmpl $11, %eax jae .LBB0_3 # BB#1: # %switch.lookup movl $1707, %ecx # imm = 0x6AB btl %eax, %ecx jae .LBB0_3 # BB#2: # %switch.lookup2 cltq movl .Lswitch.table(,%rax,4), %eax retq .LBB0_3: # %sw.default leal (%rdi,%rdi,2), %eax retq .Ltmp0: .size _Z4testj, .Ltmp0-_Z4testj .cfi_endproc .type .Lswitch.table, at object # @switch.table .section .rodata,"a", at progbits .align 16 .Lswitch.table: .long 0 # 0x0 .long 1 # 0x1 .long 0 # 0x0 .long 2 # 0x2 .long 0 # 0x0 .long 3 # 0x3 .long 0 # 0x0 .long 4 # 0x4 .long 0 # 0x0 .long 5 # 0x5 .long 6 # 0x6 .size .Lswitch.table, 44 .ident "clang version 3.5 (trunk 200617)" .section ".note.GNU-stack","", at progbits <= By the way, what for is "cltq" just before accessing the value table? The upper dword of rax should be zero. Unfortunately my patch does not always work and the compiler crashes. What have I overlooked? Please help. Best regards Jasper PS: I posted a very similar message to llvm-commits at cs.uiuc.edu but got no response; perhaps this was the wrong place to discuss it. -------------- next part -------------- Index: SimplifyCFG.cpp ==================================================================--- SimplifyCFG.cpp (revision 200617) +++ SimplifyCFG.cpp (working copy) @@ -66,6 +66,10 @@ "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes")); +static cl::opt<bool> CheckHoles( + "simplifycfg-check-holes", cl::Hidden, cl::init(true), + cl::desc("Allow holes in a value table by testing a mask")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); @@ -3745,12 +3749,33 @@ uint64_t TableSize = RangeSpread.getLimitedValue() + 1; bool TableHasHoles = (NumResults < TableSize); - // If the table has holes, we need a constant result for the default case. + unsigned NeededMaskSize = 0; + if (CheckHoles) { + if (TableSize <= 32 && TD->fitsInLegalInteger(32)) + NeededMaskSize = 32; + else if (TableSize <= 64 && TD->fitsInLegalInteger(64)) + NeededMaskSize = 64; + } + + // If the table has holes, we need a constant result for the default case... SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; - if (TableHasHoles && !GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest, - DefaultResultsList, TD)) + bool NeedMask = (TableHasHoles && + !GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest, + DefaultResultsList, TD)); + // ...or need to check for valid values with a sufficiently small bit set. + if (NeedMask && NeededMaskSize == 0) return false; + if (NeedMask) { + // As an extra penalty for the validity test we require more cases. + if (SI->getNumCases() < 4) // TODO: Make threshold configurable. + return false; + // We need any value to fill the table; the first one suffices. + SwitchInst::CaseIt CI = SI->case_begin(); + GetCaseResults(SI, CI.getCaseValue(), CI.getCaseSuccessor(), + &CommonDest, DefaultResultsList, TD); + } + for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { PHINode *PHI = DefaultResultsList[I].first; Constant *Result = DefaultResultsList[I].second; @@ -3796,6 +3821,43 @@ // Populate the BB that does the lookups. Builder.SetInsertPoint(LookupBB); + + if (NeedMask) { + uint64_t UseMask = 0; + for (SwitchInst::CaseIt CI = SI->case_begin(), E = SI->case_end(); + CI != E; ++CI) { + ConstantInt *CaseVal = CI.getCaseValue(); + UseMask |= 1ULL << + (CaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue(); + } + + BasicBlock *MaskBB = BasicBlock::Create(Mod.getContext(), + "switch.lookup2", + CommonDest->getParent(), + CommonDest); + ConstantInt *Zero; + ConstantInt *One; + ConstantInt *Mask; + Value *CmpVal; + if (NeededMaskSize == 32) { + Zero = Builder.getInt32(0); + One = Builder.getInt32(1); + Mask = Builder.getInt32(UseMask); + CmpVal = Builder.CreateZExtOrTrunc(TableIndex, Builder.getInt32Ty()); + } else { + Zero = Builder.getInt64(0); + One = Builder.getInt64(1); + Mask = Builder.getInt64(UseMask); + CmpVal = Builder.CreateZExtOrTrunc(TableIndex, Builder.getInt64Ty()); + } + Value *Shr = Builder.CreateLShr(Mask, CmpVal); + Value *And = Builder.CreateAnd(Shr, One); + Value *CmpZ = Builder.CreateICmpNE(And, Zero); + Builder.CreateCondBr(CmpZ, MaskBB, SI->getDefaultDest()); + Builder.SetInsertPoint(MaskBB); + LookupBB = MaskBB; + } + bool ReturnedEarly = false; for (size_t I = 0, E = PHIs.size(); I != E; ++I) { PHINode *PHI = PHIs[I];