robert muth
2009-Jun-03 01:26 UTC
[LLVMdev] patch for llc/ARM: added mechanism to move switch tables from .text -> .data; also cleanup and documentation
Hi: This is my first patch submission. Hopefully, this is the proper the protocol. Attached is a patch for the llc ARM backend: Added mechanism to generate switch table in a data section rather than having it interleaved with the code. This is controlled by command line flags and off by default. Also, tried to document and improve the code where I modified it. Robert -------------- next part -------------- Index: lib/Target/ARM/ARMISelLowering.h ==================================================================--- lib/Target/ARM/ARMISelLowering.h (revision 72728) +++ lib/Target/ARM/ARMISelLowering.h (working copy) @@ -147,9 +147,10 @@ /// make the right decision when generating code for different targets. const ARMSubtarget *Subtarget; - /// ARMPCLabelIndex - Keep track the number of ARM PC labels created. - /// + /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created. unsigned ARMPCLabelIndex; + /// ARMJumpTableIndex - Keep track of the number ofJump Tables + unsigned ARMJumpTableIndex; SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, const SDValue &StackPtr, const CCValAssign &VA, Index: lib/Target/ARM/ARMConstantPoolValue.h ==================================================================--- lib/Target/ARM/ARMConstantPoolValue.h (revision 72728) +++ lib/Target/ARM/ARMConstantPoolValue.h (working copy) @@ -25,6 +25,7 @@ enum ARMCPKind { CPValue, CPNonLazyPtr, + CPDataSegmentJumpTable, CPStub }; } @@ -54,7 +55,6 @@ ARMConstantPoolValue(GlobalValue *GV, ARMCP::ARMCPKind Kind, const char *Modifier); - GlobalValue *getGV() const { return GV; } const char *getSymbol() const { return S; } const char *getModifier() const { return Modifier; } @@ -63,6 +63,9 @@ unsigned getLabelId() const { return LabelId; } bool isNonLazyPointer() const { return Kind == ARMCP::CPNonLazyPtr; } bool isStub() const { return Kind == ARMCP::CPStub; } + bool isValue() const { return Kind == ARMCP::CPValue; } + bool isDataSegmentJumpTable() const { + return Kind == ARMCP::CPDataSegmentJumpTable; } unsigned char getPCAdjustment() const { return PCAdjust; } virtual int getExistingMachineCPValue(MachineConstantPool *CP, Index: lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp ==================================================================--- lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp (revision 72728) +++ lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp (working copy) @@ -33,13 +33,19 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Mangler.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <cctype> +#include <sstream> using namespace llvm; +extern cl::opt<bool> FlagOutlineJumpTables; +cl::opt<std::string> FlagJumpTableSection("jumptable-section", + cl::init(".data.jtab")); + STATISTIC(EmittedInsts, "Number of machine instrs printed"); namespace { @@ -139,6 +145,7 @@ /// EmitMachineConstantPoolValue - Print a machine constantpool value to /// the .s file. virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { + // NOTE: A lot of this code is replicated in ARMConstantPoolValue::print printDataDirective(MCPV->getType()); ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV); @@ -155,8 +162,13 @@ } else if (ACPV->isStub()) { FnStubs.insert(Name); printSuffixedName(Name, "$stub"); - } else + } else if (ACPV->isDataSegmentJumpTable()) { + // requires synchronization with code (grep for "$jumptable$") + O << Name << "$jumptable$" << ACPV->getLabelId(); + } else { + assert(ACPV->isValue() && "unknown CP kind"); O << Name; + } if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")"; if (ACPV->getPCAdjustment() != 0) { O << "-(" << TAI->getPrivateGlobalPrefix() << "PC" @@ -695,12 +707,31 @@ } } + void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) { - const MachineOperand &MO1 = MI->getOperand(OpNo); - const MachineOperand &MO2 = MI->getOperand(OpNo+1); // Unique Id - unsigned JTI = MO1.getIndex(); - O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << JTI << '_' << MO2.getImm() << ":\n"; + std::stringstream prefix; + + const unsigned JTI = MI->getOperand(OpNo).getIndex(); + const unsigned uid = MI->getOperand(OpNo+1).getImm(); + + if (FlagOutlineJumpTables) { + // needs to be synchronized with ARMConstantPoolValue.cpp + prefix << ".T$jumptable$" << uid; + } else { + // needs to be synchronized with other places in this file + prefix << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << JTI << '_' << uid; + } + + if (FlagOutlineJumpTables) { + // switch out of text section + O << ".section " << FlagJumpTableSection << ",\"aMS\",%progbits,1\n"; + O << ".align 4\n"; + O << "\n\n"; + } + + // the table label + O << prefix.str() << ":\n"; const char *JTEntryDirective = TAI->getJumpTableDirective(); if (!JTEntryDirective) @@ -715,24 +746,28 @@ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { MachineBasicBlock *MBB = JTBBs[i]; if (UseSet && JTSets.insert(MBB).second) - printPICJumpTableSetLabel(JTI, MO2.getImm(), MBB); + printPICJumpTableSetLabel(JTI, uid, MBB); O << JTEntryDirective << ' '; if (UseSet) - O << TAI->getPrivateGlobalPrefix() << getFunctionNumber() - << '_' << JTI << '_' << MO2.getImm() - << "_set_" << MBB->getNumber(); + O << prefix.str() << "_set_" << MBB->getNumber(); else if (TM.getRelocationModel() == Reloc::PIC_) { printBasicBlockLabel(MBB, false, false, false); // If the arch uses custom Jump Table directives, don't calc relative to JT if (!TAI->getJumpTableDirective()) - O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" - << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm(); + O << '-' << prefix.str(); } else printBasicBlockLabel(MBB, false, false, false); if (i != e-1) O << '\n'; } + + if (FlagOutlineJumpTables) { + // switch back into the text section + O << "\n\n"; + O << ".text\n"; + O << "\n\n"; + } } Index: lib/Target/ARM/ARMISelLowering.cpp ==================================================================--- lib/Target/ARM/ARMISelLowering.cpp (revision 72728) +++ lib/Target/ARM/ARMISelLowering.cpp (working copy) @@ -36,9 +36,16 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/MathExtras.h" + using namespace llvm; + + +cl::opt<bool> FlagOutlineJumpTables("outline-jumptables", + cl::desc("move jumptables from text to data")); + static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, @@ -1390,27 +1397,119 @@ return Res; } -SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { + + +// Similar to LowerBR_JT_Inline except that the jumptable +// is moved to the data segment. +// This causes a extra load to access the table but keeps the +// text segment small and avoids some issues with generating PIC +// Needs to be activated with a commandline flag. +static SDValue LowerBR_JT_Outline(SDValue Op, + SelectionDAG &DAG, + unsigned Num, + MVT PTy) { + SDValue Chain = Op.getOperand(0); - SDValue Table = Op.getOperand(1); - SDValue Index = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); - MVT PTy = getPointerTy(); - JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); + const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op.getOperand(1)); + const SDValue Index = Op.getOperand(2); + + + const DebugLoc dl = Op.getDebugLoc(); + + const SDValue UId = DAG.getConstant(Num, PTy); + const SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); + + // create a new global symbol for the jumptable + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(".T", Num, + ARMCP::CPDataSegmentJumpTable); + const SDValue CPAddr = DAG.getTargetConstantPool(CPV, PTy, 4); + + // An ARM idiosyncrasy: wrap each constant pool entry before accessing it + const SDValue Wrapper = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + + // Load Table start from constan pool + const SDValue Table = DAG.getLoad(PTy, dl, DAG.getEntryNode(), Wrapper, NULL, 0); + + // table entries are 4 bytes, so multiple index by 4 + const SDValue ScaledIndex = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); + + // add scaled index to table beginning + const SDValue TabEntryAddr = DAG.getNode(ISD::ADD, dl, PTy, ScaledIndex, Table); + const SDValue JumpTarget = DAG.getLoad(PTy, dl, Chain, TabEntryAddr, NULL, 0); + + return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, JumpTarget, JTI, UId); +} + + +static SDValue LowerBR_JT_Inline(SDValue Op, + SelectionDAG &DAG, + bool isPIC, + MVT PTy) { + // The Jumptable idiom we are aiming for looks somthing like this: + // + // .set PCRELV0, (.LJTI9_0_0-(.LPCRELL0+8)) + // .LPCRELL0: + // add r3, pc, #PCRELV0 + // ldr pc, [r3, +r0, lsl #2] + // .LJTI9_0_0: + // .long .LBB9_2 + // .long .LBB9_5 + // .long .LBB9_7 + // .long .LBB9_4 + // .long .LBB9_8 + // + // In pic mode the table entries are relative to table beginning + // requiring and extra addition + // + // The code generation logic for ARMISD::BR_JT will also + // emit the table (c.f. ARMAsmPrinter::printJTBlockOperand()) + // Also check "def BR_JTm" in ARMInstrInfo.td + + // allocate constant pool entry ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); - SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); - SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); - Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); - Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); - SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); - bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; - Addr = DAG.getLoad(isPIC ? (MVT)MVT::i32 : PTy, dl, - Chain, Addr, NULL, 0); - Chain = Addr.getValue(1); - if (isPIC) - Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); - return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); + + SDValue Chain = Op.getOperand(0); + + const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op.getOperand(1)); + const SDValue Index = Op.getOperand(2); + const DebugLoc dl = Op.getDebugLoc(); + + + const SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); + + const SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); + + // this uses pcrel magic to materialize the table start address + const SDValue Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); + + // table entries are 4 bytes, so multiple index by 4 + const SDValue ScaledIndex = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); + + // add scaled index to table beginning + const SDValue TabEntryAddr = DAG.getNode(ISD::ADD, dl, PTy, ScaledIndex, Table); + SDValue JumpTarget; + if (isPIC) { + JumpTarget = DAG.getLoad((MVT)MVT::i32, dl, Chain, TabEntryAddr, NULL, 0); + Chain = JumpTarget.getValue(1); + // the table entry is not the actual target but relative to other stuff + // TODO: explain the exact magic here + JumpTarget = DAG.getNode(ISD::ADD, dl, PTy, JumpTarget, Table); + } else { + JumpTarget = DAG.getLoad(PTy, dl, Chain, TabEntryAddr, NULL, 0); + Chain = JumpTarget.getValue(1); + } + + return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, JumpTarget, JTI, UId); +} + +SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { + if (FlagOutlineJumpTables) { + return LowerBR_JT_Outline(Op, DAG, ++ARMJumpTableIndex, getPointerTy()); + } else { + const bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; + return LowerBR_JT_Inline(Op, DAG, isPIC, getPointerTy()); + } } static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { Index: lib/Target/ARM/ARMConstantPoolValue.cpp ==================================================================--- lib/Target/ARM/ARMConstantPoolValue.cpp (revision 72728) +++ lib/Target/ARM/ARMConstantPoolValue.cpp (working copy) @@ -85,12 +85,26 @@ } void ARMConstantPoolValue::print(raw_ostream &O) const { + // NOTE: this is not used for codegeneration, moreover + // some of the logic is replicated in + // EmitMachineConstantPoolValue() + if (GV) O << GV->getName(); else O << S; - if (isNonLazyPointer()) O << "$non_lazy_ptr"; - else if (isStub()) O << "$stub"; + + if (isNonLazyPointer()) { + O << "$non_lazy_ptr"; + } else if (isStub()) { + O << "$stub"; + } else if (isDataSegmentJumpTable()) { + // requires synchronization with ARMAsmPrinter.cpp + O << "$jumptable$" << LabelId; + } else { + assert(isValue() && "unknown CP kind"); + } + if (Modifier) O << "(" << Modifier << ")"; if (PCAdjust != 0) { O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;
Eli Friedman
2009-Jun-03 01:40 UTC
[LLVMdev] patch for llc/ARM: added mechanism to move switch tables from .text -> .data; also cleanup and documentation
On Tue, Jun 2, 2009 at 6:26 PM, robert muth<robert at muth.org> wrote:> This is my first patch submission. Hopefully, this is the proper the protocol.This is fine, although it's usually better to submit patches to llvm-commits.> Added mechanism to generate switch table in a data section > rather than having it interleaved with the code. > This is controlled by command line flags and off by default.When would you want the flag on? When would you want the flag off? -Eli
robert muth
2009-Jun-03 18:33 UTC
[LLVMdev] patch for llc/ARM: added mechanism to move switch tables from .text -> .data; also cleanup and documentation
On Tue, Jun 2, 2009 at 9:40 PM, Eli Friedman <eli.friedman at gmail.com> wrote:> On Tue, Jun 2, 2009 at 6:26 PM, robert muth<robert at muth.org> wrote: >> This is my first patch submission. Hopefully, this is the proper the protocol. > > This is fine, although it's usually better to submit patches to llvm-commits. > >> Added mechanism to generate switch table in a data section >> rather than having it interleaved with the code. >> This is controlled by command line flags and off by default. > > When would you want the flag on? When would you want the flag off? > > -EliThe disadvantage of "outlining" the switch tables is the extra constant pool load incurred for loading the start address of the table into the register The advantages are: * cleaner code both on the llvm side and the generated side which may be beneficial for the ARM JIT (though I have not looked at this). * no PIC problems in the text section as we just move the relocatable entries elsewhere. * better instruction density in the text section as jumptables can grow quite large subsequently and may cause certain offsets to overflow * also AFAIK jumptables are the only "bigger than wordsize" data item in text. The constant pools usually just contain 32bit quantities. In a past life this made it hard for me to binary rewrite arm executables.> _______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev >
Evan Cheng
2009-Jun-04 00:53 UTC
[LLVMdev] patch for llc/ARM: added mechanism to move switch tables from .text -> .data; also cleanup and documentation
I will have a look at this in a couple of days... Evan On Jun 2, 2009, at 6:26 PM, robert muth wrote:> Hi: > > This is my first patch submission. Hopefully, this is the proper the > protocol. > Attached is a patch for the llc ARM backend: > > Added mechanism to generate switch table in a data section > rather than having it interleaved with the code. > This is controlled by command line flags and off by default. > Also, tried to document and improve the code where I modified it. > > Robert > <llc.patch.txt>_______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
Evan Cheng
2009-Jun-06 20:51 UTC
[LLVMdev] patch for llc/ARM: added mechanism to move switch tables from .text -> .data; also cleanup and documentation
+cl::opt<std::string> FlagJumpTableSection("jumptable-section", + cl::init(".data.jtab")); + Is this necessary? Why not just put it in the normal data section? Also "outline" jumptable seems like a strange term. Can you think of something else? Thanks, Evan Sent from my iPhone On Jun 2, 2009, at 6:26 PM, robert muth <robert at muth.org> wrote:> Hi: > > This is my first patch submission. Hopefully, this is the proper the > protocol. > Attached is a patch for the llc ARM backend: > > Added mechanism to generate switch table in a data section > rather than having it interleaved with the code. > This is controlled by command line flags and off by default. > Also, tried to document and improve the code where I modified it. > > Robert > <llc.patch.txt> > _______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20090606/bbfb602d/attachment.html>
robert muth
2009-Jun-07 13:59 UTC
[LLVMdev] patch for llc/ARM: added mechanism to move switch tables from .text -> .data; also cleanup and documentation
On Sat, Jun 6, 2009 at 4:51 PM, Evan Cheng<evan.cheng at apple.com> wrote:> +cl::opt<std::string> FlagJumpTableSection("jumptable-section", > + cl::init(".data.jtab")); > +I thought it would be nice to group all the jumptables together. But as long as it stays configurable, I am fine to change the default to ".data".> Is this necessary? Why not just put it in the normal data section? > Also "outline" jumptable seems like a strange term. Can you think of > something else?Yes, that is a tough one. How about "NonInline" instead. Robert> Thanks, > Evan > > Sent from my iPhone > On Jun 2, 2009, at 6:26 PM, robert muth <robert at muth.org> wrote: > > Hi: > > This is my first patch submission. Hopefully, this is the proper the > protocol. > Attached is a patch for the llc ARM backend: > > Added mechanism to generate switch table in a data section > rather than having it interleaved with the code. > This is controlled by command line flags and off by default. > Also, tried to document and improve the code where I modified it. > > Robert > > <llc.patch.txt> > > _______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev > > _______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev > >
Apparently Analagous Threads
- [LLVMdev] patch for llc/ARM: added mechanism to move switch tables from .text -> .data; also cleanup and documentation
- [LLVMdev] patch for llc/ARM: added mechanism to move switch tables from .text -> .data; also cleanup and documentation
- [LLVMdev] patch for llc/ARM: added mechanism to move switch tables from .text -> .data; also cleanup and documentation
- [LLVMdev] patch for llc/ARM: added mechanism to move switch tables from .text -> .data; also cleanup and documentation
- [LLVMdev] patch for llc/ARM: added mechanism to move switch tables from .text -> .data; also cleanup and documentation