Hello,
I did a little experiment modifying LLVM to be able to use alias-analysis
information in scheduling so that independent memory operations may be
reordered.
Attached is a patch which implements this. I copied some routines from
DAGCombiner.cpp for using SDOperands with alias queries; it should
probably be factored out somewhere so the code can be shared. I
reorganized SelectionDAGLowering::getLoadFrom a little, to make it
simpler to use in other contexts.
Also, the patch fixes a bug where SelectionDAG::getLoad and
SelectionDAG::getStore were being called with the wrong arguments, with
a default argument helping to hide it.
I'm interested in any comments or feedback that people might have.
Dan
--
Dan Gohman, Cray Inc. <djg at cray.com>
-------------- next part --------------
Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
==================================================================RCS file:
/var/cvs/llvm/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp,v
retrieving revision 1.332
diff -u -r1.332 SelectionDAGISel.cpp
--- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp 16 Dec 2006 21:14:48 -0000
1.332
+++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp 19 Dec 2006 19:06:06 -0000
@@ -81,6 +81,16 @@
static RegisterScheduler
defaultListDAGScheduler("default", " Best scheduler for the
target",
createDefaultScheduler);
+
+ static cl::opt<bool>
+ SchedulerMemoryDisambiguation(
+ "sched-memory-disambiguation", cl::Hidden,
+ cl::desc("Turn on memory disambiguation for scheduling"));
+
+ static cl::opt<bool>
+ SchedulerMemoryDisambiguationAA(
+ "sched-alias-analysis", cl::Hidden,
+ cl::desc("Include alias analysis in memory disambiguation for
scheduling"));
} // namespace
namespace {
@@ -364,10 +374,9 @@
std::map<const Value*, SDOperand> NodeMap;
- /// PendingLoads - Loads are not emitted to the program immediately. We
bunch
- /// them up and then emit token factor nodes when possible. This allows us
to
- /// get simple disambiguation between loads without worrying about alias
- /// analysis.
+ /// PendingLoads - Some loads and stores are not emitted to the program
immediately.
+ /// We bunch them up and then emit token factor nodes when possible. This
allows us
+ /// to avoid dependencies between independent memory operations.
std::vector<SDOperand> PendingLoads;
/// Case - A pair of values to record the Value for a switch case, and the
@@ -412,6 +421,7 @@
// implemented with a libcall, etc.
TargetLowering &TLI;
SelectionDAG &DAG;
+ AliasAnalysis &AA;
const TargetData *TD;
/// SwitchCases - Vector of CaseBlock structures used to communicate
@@ -424,8 +434,9 @@
FunctionLoweringInfo &FuncInfo;
SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli,
+ AliasAnalysis &aa,
FunctionLoweringInfo &funcinfo)
- : TLI(tli), DAG(dag), TD(DAG.getTarget().getTargetData()),
+ : TLI(tli), DAG(dag), AA(aa), TD(DAG.getTarget().getTargetData()),
JT(0,0,0,0), FuncInfo(funcinfo) {
}
@@ -450,6 +461,139 @@
return Root;
}
+ /* FIXME: copied from DAGCombiner.cpp */
+ /// FindBaseOffset - Return true if base is known not to alias with anything
+ /// but itself. Provides base object and offset as results.
+ bool FindBaseOffset(SDOperand Ptr, SDOperand &Base, int64_t &Offset)
{
+ // Assume it is a primitive operation.
+ Base = Ptr; Offset = 0;
+
+ // If it's an adding a simple constant then integrate the offset.
+ if (Base.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
+ Base = Base.getOperand(0);
+ Offset += C->getValue();
+ }
+ }
+
+ // If it's any of the following then it can't alias with anything
but itself.
+ return isa<FrameIndexSDNode>(Base) ||
+ isa<ConstantPoolSDNode>(Base) ||
+ isa<GlobalAddressSDNode>(Base);
+ }
+
+ /* FIXME: copied from DAGCombiner.cpp */
+ /// isAlias - Return true if there is any possibility that the two addresses
+ /// overlap.
+ bool isAlias(SDOperand Ptr1, int64_t Size1,
+ const Value *SrcValue1, int SrcValueOffset1,
+ SDOperand Ptr2, int64_t Size2,
+ const Value *SrcValue2, int SrcValueOffset2)
+ {
+ // If they are the same then they must be aliases.
+ if (Ptr1 == Ptr2) return true;
+
+ // Gather base node and offset information.
+ SDOperand Base1, Base2;
+ int64_t Offset1, Offset2;
+ bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1);
+ bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2);
+
+ // If they have a same base address then...
+ if (Base1 == Base2) {
+ // Check to see if the addresses overlap.
+ return!((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <=
Offset1);
+ }
+
+ // If we know both bases then they can't alias.
+ if (KnownBase1 && KnownBase2) return false;
+
+ if (SchedulerMemoryDisambiguationAA) {
+ // Use alias analysis information.
+ int Overlap1 = Size1 + SrcValueOffset1 + Offset1;
+ int Overlap2 = Size2 + SrcValueOffset2 + Offset2;
+ AliasAnalysis::AliasResult AAResult =
+ AA.alias(SrcValue1, Overlap1, SrcValue2,
Overlap2);
+ if (AAResult == AliasAnalysis::NoAlias)
+ return false;
+ }
+
+ // Otherwise we have to assume they alias.
+ return true;
+ }
+
+ /* FIXME: copied from DAGCombiner.cpp */
+ /// FindAliasInfo - Extracts the relevant alias information from the memory
+ /// node. Returns true if the operand was a load.
+ bool FindAliasInfo(SDNode *N,
+ SDOperand &Ptr, int64_t &Size,
+ const Value *&SrcValue, int &SrcValueOffset) {
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ Size = MVT::getSizeInBits(LD->getLoadedVT()) >> 3;
+ SrcValue = LD->getSrcValue();
+ SrcValueOffset = LD->getSrcValueOffset();
+ return true;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ Size = MVT::getSizeInBits(ST->getStoredVT()) >> 3;
+ SrcValue = ST->getSrcValue();
+ SrcValueOffset = ST->getSrcValueOffset();
+ } else {
+ assert(0 && "FindAliasInfo expected a memory operand");
+ }
+
+ return false;
+ }
+
+ /// getStoreRoot - Return a virtual root for a store to a specified address.
+ ///
+ SDOperand getStoreRoot(SDOperand StoreAddr,
+ int64_t StoreSize,
+ const Value *StoreAddrValue,
+ int StoreOffset) {
+ // The special behavior may be disabled by an option.
+ if (!SchedulerMemoryDisambiguation)
+ return getRoot();
+
+ std::vector<SDOperand> DependentMemOps, IndependentMemOps;
+
+ for (std::vector<SDOperand>::iterator I = PendingLoads.begin(),
+ E = PendingLoads.end(); I != E; ++I) {
+ SDOperand PendingMemOp = *I;
+
+ SDOperand OpPtr;
+ int64_t OpSize;
+ const Value *OpValue;
+ int OpOffset;
+ FindAliasInfo(PendingMemOp.Val, OpPtr, OpSize, OpValue, OpOffset);
+
+ if (isAlias(OpPtr, OpSize, OpValue, OpOffset,
+ StoreAddr, StoreSize, StoreAddrValue, StoreOffset)) {
+ DependentMemOps.push_back(PendingMemOp);
+ } else {
+ IndependentMemOps.push_back(PendingMemOp);
+ }
+ }
+
+ if (DependentMemOps.empty())
+ return DAG.getRoot();
+
+ if (DependentMemOps.size() == 1) {
+ SDOperand Root = DependentMemOps[0];
+ DAG.setRoot(Root);
+ PendingLoads = IndependentMemOps;
+ return Root;
+ }
+
+ // Otherwise, we have to make a token factor node.
+ SDOperand Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &DependentMemOps[0],
DependentMemOps.size());
+ PendingLoads = IndependentMemOps;
+ DAG.setRoot(Root);
+ return Root;
+ }
+
SDOperand CopyValueToVirtualRegister(Value *V, unsigned Reg);
void visit(Instruction &I) { visit(I.getOpcode(), I); }
@@ -470,9 +614,13 @@
void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; }
SDOperand getLoadFrom(const Type *Ty, SDOperand Ptr,
- const Value *SV, SDOperand Root,
+ const Value *PtrV, int Offset,
bool isVolatile);
+ void getStoreTo(SDOperand Src, const Value *SrcV,
+ SDOperand Ptr, const Value *PtrV,
+ int Offset, bool isVolatile);
+
SDOperand getIntPtrConstant(uint64_t Val) {
return DAG.getConstant(Val, TLI.getPointerTy());
}
@@ -1814,28 +1962,28 @@
void SelectionDAGLowering::visitLoad(LoadInst &I) {
SDOperand Ptr = getValue(I.getOperand(0));
+ setValue(&I, getLoadFrom(I.getType(), Ptr, I.getOperand(0),
+ 0, I.isVolatile()));
+}
+
+SDOperand SelectionDAGLowering::getLoadFrom(const Type *Ty,
+ SDOperand Ptr, const Value *PtrV,
+ int Offset, bool isVolatile) {
SDOperand Root;
- if (I.isVolatile())
+ if (isVolatile)
Root = getRoot();
else {
// Do not serialize non-volatile loads against each other.
Root = DAG.getRoot();
}
- setValue(&I, getLoadFrom(I.getType(), Ptr, I.getOperand(0),
- Root, I.isVolatile()));
-}
-
-SDOperand SelectionDAGLowering::getLoadFrom(const Type *Ty, SDOperand Ptr,
- const Value *SV, SDOperand Root,
- bool isVolatile) {
SDOperand L;
if (const PackedType *PTy = dyn_cast<PackedType>(Ty)) {
MVT::ValueType PVT = TLI.getValueType(PTy->getElementType());
L = DAG.getVecLoad(PTy->getNumElements(), PVT, Root, Ptr,
- DAG.getSrcValue(SV));
+ DAG.getSrcValue(PtrV));
} else {
- L = DAG.getLoad(TLI.getValueType(Ty), Root, Ptr, SV, isVolatile);
+ L = DAG.getLoad(TLI.getValueType(Ty), Root, Ptr, PtrV, Offset, isVolatile);
}
if (isVolatile)
@@ -1848,13 +1996,37 @@
void SelectionDAGLowering::visitStore(StoreInst &I) {
- Value *SrcV = I.getOperand(0);
- SDOperand Src = getValue(SrcV);
+ SDOperand Src = getValue(I.getOperand(0));
SDOperand Ptr = getValue(I.getOperand(1));
- DAG.setRoot(DAG.getStore(getRoot(), Src, Ptr, I.getOperand(1),
- I.isVolatile()));
+
+ getStoreTo(Src, I.getOperand(0),
+ Ptr, I.getOperand(1),
+ 0, I.isVolatile());
+}
+
+void SelectionDAGLowering::getStoreTo(SDOperand Src, const Value *SrcV,
+ SDOperand Ptr, const Value *PtrV,
+ int Offset, bool isVolatile) {
+
+ SDOperand Root;
+ if (isVolatile)
+ Root = getRoot();
+ else {
+ // Do not serialize independent non-volatile references.
+ Root = getStoreRoot(Ptr, MVT::getSizeInBits(Src.getValueType()),
+ PtrV, Offset);
+ }
+
+ SDOperand S = DAG.getStore(Root, Src, Ptr,
+ PtrV, Offset, isVolatile);
+
+ if (isVolatile)
+ DAG.setRoot(S.getValue(0));
+ else
+ PendingLoads.push_back(S.getValue(0));
}
+
/// IntrinsicCannotAccessMemory - Return true if the specified intrinsic cannot
/// access memory and has no other side effects at all.
static bool IntrinsicCannotAccessMemory(unsigned IntrinsicID) {
@@ -3987,7 +4159,7 @@
void SelectionDAGISel::BuildSelectionDAG(SelectionDAG &DAG, BasicBlock
*LLVMBB,
std::vector<std::pair<MachineInstr*, unsigned> >
&PHINodesToUpdate,
FunctionLoweringInfo &FuncInfo) {
- SelectionDAGLowering SDL(DAG, TLI, FuncInfo);
+ SelectionDAGLowering SDL(DAG, TLI, getAnalysis<AliasAnalysis>(),
FuncInfo);
std::vector<SDOperand> UnorderedChains;
@@ -4191,7 +4363,7 @@
assert(SwitchCases.empty() && "Cannot have jump table and
lowered switch");
SelectionDAG SDAG(TLI, MF, getAnalysisToUpdate<MachineDebugInfo>());
CurDAG = &SDAG;
- SelectionDAGLowering SDL(SDAG, TLI, FuncInfo);
+ SelectionDAGLowering SDL(SDAG, TLI, getAnalysis<AliasAnalysis>(),
FuncInfo);
MachineBasicBlock *RangeBB = BB;
// Set the current basic block to the mbb we wish to insert the code into
BB = JT.MBB;
@@ -4235,7 +4407,7 @@
for (unsigned i = 0, e = SwitchCases.size(); i != e; ++i) {
SelectionDAG SDAG(TLI, MF, getAnalysisToUpdate<MachineDebugInfo>());
CurDAG = &SDAG;
- SelectionDAGLowering SDL(SDAG, TLI, FuncInfo);
+ SelectionDAGLowering SDL(SDAG, TLI, getAnalysis<AliasAnalysis>(),
FuncInfo);
// Set the current basic block to the mbb we wish to insert the code into
BB = SwitchCases[i].ThisBB;