Hi, Scalar evolution seems to be wrapping around the trip count in the following loop. void add (int *restrict a, int *restrict b, int *restrict c) { char i; for (i = 0; i < 255; i++) a[i] = b[i] + c[i]; } When I run scalar evolution on the bit code, I get a backedge-taken count which is obviously wrong. $> cat loop.ll ; Function Attrs: nounwind define void @add(i32* noalias nocapture %a, i32* noalias nocapture %b, i32* noalias nocapture %c) #0 { entry: br label %for.body for.body: ; preds = %entry, %for.body %arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ] %arrayidx3.phi = phi i32* [ %c, %entry ], [ %arrayidx3.inc, %for.body ] %arrayidx5.phi = phi i32* [ %a, %entry ], [ %arrayidx5.inc, %for.body ] %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %0 = load i32* %arrayidx.phi, align 4, !tbaa !0 %1 = load i32* %arrayidx3.phi, align 4, !tbaa !0 %add = add nsw i32 %1, %0 store i32 %add, i32* %arrayidx5.phi, align 4, !tbaa !0 %indvars.iv.next = add i32 %indvars.iv, 1 %lftr.wideiv = trunc i32 %indvars.iv.next to i8 %exitcond = icmp eq i8 %lftr.wideiv, -1 %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1 %arrayidx3.inc = getelementptr i32* %arrayidx3.phi, i32 1 %arrayidx5.inc = getelementptr i32* %arrayidx5.phi, i32 1 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body ret void } $> opt -scalar-evolution -analyze loop.ll Printing analysis 'Scalar Evolution Analysis' for function 'add': Classifying expressions for: @add %arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ] --> {%b,+,4}<%for.body> Exits: (1016 + %b) %arrayidx3.phi = phi i32* [ %c, %entry ], [ %arrayidx3.inc, %for.body ] --> {%c,+,4}<%for.body> Exits: (1016 + %c) %arrayidx5.phi = phi i32* [ %a, %entry ], [ %arrayidx5.inc, %for.body ] --> {%a,+,4}<%for.body> Exits: (1016 + %a) %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ] --> {0,+,1}<%for.body> Exits: 254 %0 = load i32* %arrayidx.phi, align 4, !tbaa !0 --> %0 Exits: <<Unknown>> %1 = load i32* %arrayidx3.phi, align 4, !tbaa !0 --> %1 Exits: <<Unknown>> %add = add nsw i32 %1, %0 --> (%0 + %1) Exits: <<Unknown>> %indvars.iv.next = add i32 %indvars.iv, 1 --> {1,+,1}<%for.body> Exits: 255 %lftr.wideiv = trunc i32 %indvars.iv.next to i8 --> {1,+,1}<%for.body> Exits: -1 %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1 --> {(4 + %b),+,4}<%for.body> Exits: (1020 + %b) %arrayidx3.inc = getelementptr i32* %arrayidx3.phi, i32 1 --> {(4 + %c),+,4}<%for.body> Exits: (1020 + %c) %arrayidx5.inc = getelementptr i32* %arrayidx5.phi, i32 1 --> {(4 + %a),+,4}<%for.body> Exits: (1020 + %a) Determining loop execution counts for: @add Loop %for.body: backedge-taken count is -2 Loop %for.body: max backedge-taken count is -2 The problem seems to be in SCEVAddRecExpr:getNumIterationsInRange, specifically // If this is an affine expression then we have this situation: // Solve {0,+,A} in Range === Ax in Range // We know that zero is in the range. If A is positive then we know that // the upper value of the range must be the first possible exit value. // If A is negative then the lower of the range is the last possible loop // value. Also note that we already checked for a full range. APInt One(BitWidth,1); APInt A = cast<SCEVConstant>(getOperand(1))->getValue()->getValue(); APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower(); // The exit value should be (End+A)/A. APInt ExitVal = (End + A).udiv(A); ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal); In gdb, $6 = {BitWidth = 8, {VAL = 254, pVal = 0xfe}} (gdb) p ExitVal.isNegative() $7 = true (gdb) p ExitValue->dump() i8 -2 $8 = void It looks like whenever the value of ExitVal is greater than 128, ExitValue is going to be negative. This makes getBackedgeTakenCount return a negative number, which does not make sense. Any thoughts ? Pranav -- Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation
On Thu, Jul 11, 2013 at 11:16 AM, Pranav Bhandarkar <pranavb at codeaurora.org> wrote:> Hi, > > Scalar evolution seems to be wrapping around the trip count in the following > loop. > > void add (int *restrict a, int *restrict b, int *restrict c) { > char i; > for (i = 0; i < 255; i++) > a[i] = b[i] + c[i]; > } > > When I run scalar evolution on the bit code, I get a backedge-taken count > which is obviously wrong. > $> cat loop.ll > ; Function Attrs: nounwind > define void @add(i32* noalias nocapture %a, i32* noalias nocapture %b, i32* > noalias nocapture %c) #0 { > entry: > br label %for.body > > for.body: ; preds = %entry, > %for.body > %arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ] > %arrayidx3.phi = phi i32* [ %c, %entry ], [ %arrayidx3.inc, %for.body ] > %arrayidx5.phi = phi i32* [ %a, %entry ], [ %arrayidx5.inc, %for.body ] > %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ] > %0 = load i32* %arrayidx.phi, align 4, !tbaa !0 > %1 = load i32* %arrayidx3.phi, align 4, !tbaa !0 > %add = add nsw i32 %1, %0 > store i32 %add, i32* %arrayidx5.phi, align 4, !tbaa !0 > %indvars.iv.next = add i32 %indvars.iv, 1 > %lftr.wideiv = trunc i32 %indvars.iv.next to i8 > %exitcond = icmp eq i8 %lftr.wideiv, -1 > %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1 > %arrayidx3.inc = getelementptr i32* %arrayidx3.phi, i32 1 > %arrayidx5.inc = getelementptr i32* %arrayidx5.phi, i32 1 > br i1 %exitcond, label %for.end, label %for.body > > for.end: ; preds = %for.body > ret void > } > > $> opt -scalar-evolution -analyze loop.ll > Printing analysis 'Scalar Evolution Analysis' for function 'add': > Classifying expressions for: @add > %arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ] > --> {%b,+,4}<%for.body> Exits: (1016 + %b) > %arrayidx3.phi = phi i32* [ %c, %entry ], [ %arrayidx3.inc, %for.body ] > --> {%c,+,4}<%for.body> Exits: (1016 + %c) > %arrayidx5.phi = phi i32* [ %a, %entry ], [ %arrayidx5.inc, %for.body ] > --> {%a,+,4}<%for.body> Exits: (1016 + %a) > %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ] > --> {0,+,1}<%for.body> Exits: 254 > %0 = load i32* %arrayidx.phi, align 4, !tbaa !0 > --> %0 Exits: <<Unknown>> > %1 = load i32* %arrayidx3.phi, align 4, !tbaa !0 > --> %1 Exits: <<Unknown>> > %add = add nsw i32 %1, %0 > --> (%0 + %1) Exits: <<Unknown>> > %indvars.iv.next = add i32 %indvars.iv, 1 > --> {1,+,1}<%for.body> Exits: 255 > %lftr.wideiv = trunc i32 %indvars.iv.next to i8 > --> {1,+,1}<%for.body> Exits: -1 > %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1 > --> {(4 + %b),+,4}<%for.body> Exits: (1020 + %b) > %arrayidx3.inc = getelementptr i32* %arrayidx3.phi, i32 1 > --> {(4 + %c),+,4}<%for.body> Exits: (1020 + %c) > %arrayidx5.inc = getelementptr i32* %arrayidx5.phi, i32 1 > --> {(4 + %a),+,4}<%for.body> Exits: (1020 + %a) > Determining loop execution counts for: @add > Loop %for.body: backedge-taken count is -2 > Loop %for.body: max backedge-taken count is -2 > > The problem seems to be in SCEVAddRecExpr:getNumIterationsInRange, > specifically > // If this is an affine expression then we have this situation: > // Solve {0,+,A} in Range === Ax in Range > > // We know that zero is in the range. If A is positive then we know > that > // the upper value of the range must be the first possible exit value. > // If A is negative then the lower of the range is the last possible > loop > // value. Also note that we already checked for a full range. > APInt One(BitWidth,1); > APInt A > cast<SCEVConstant>(getOperand(1))->getValue()->getValue(); > APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower(); > > // The exit value should be (End+A)/A. > APInt ExitVal = (End + A).udiv(A); > ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal); > > In gdb, > $6 = {BitWidth = 8, {VAL = 254, pVal = 0xfe}} > (gdb) p ExitVal.isNegative() > $7 = true > (gdb) p ExitValue->dump() > i8 -2 > $8 = void > > It looks like whenever the value of ExitVal is greater than 128, ExitValue > is going to be negative. This makes getBackedgeTakenCount return a negative > number, which does not make sense. Any thoughts ?getBackedgeTakenCount returns an unsigned number; we're just printing it wrong. -Eli
Possibly Parallel Threads
- [LLVMdev] alias analysis on llvm internal globals
- Canonicalize induction variables
- [LLVMdev] scalar-evolution + indvars fail to get the loop trip count?
- [LLVMdev] [llvm] r184698 - Add a flag to defer vectorization into a phase after the inliner and its
- [LLVMdev] scalar-evolution + indvars fail to get the loop trip count?