Preston Briggs via llvm-dev
2015-Dec-09 06:56 UTC
[llvm-dev] persuading licm to do the right thing
When I compile two different modules using clang -O -S -emit-llvm I get different .ll files, no surprise. The first looks like double *v; double zap(long n) { double sum = 0; for (long i = 0; i < n; i++) sum += v[i]; return sum; } yielding @v = common global double* null, align 8 ; Function Attrs: nounwind readonly uwtable define double @zap(i64 %n) #0 { entry: %cmp4 = icmp sgt i64 %n, 0 br i1 %cmp4, label %for.body.lr.ph, label %for.end for.body.lr.ph: ; preds = %entry %0 = load double** @v, align 8, !tbaa !1 br label %for.body for.body: ; preds = %for.body, % for.body.lr.ph %i.06 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] %sum.05 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add, %for.body ] %arrayidx = getelementptr inbounds double* %0, i64 %i.06 %1 = load double* %arrayidx, align 8, !tbaa !5 %add = fadd double %sum.05, %1 %inc = add nsw i64 %i.06, 1 %exitcond = icmp eq i64 %inc, %n br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body, %entry %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ] ret double %sum.0.lcssa } and the second looks like double v[10000]; double zap(long n) { double sum = 0; for (long i = 0; i < n; i++) sum += v[i]; return sum; } yielding ; ModuleID = 'z.c' target datalayout "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @v = common global [10000 x double] zeroinitializer, align 16 ; Function Attrs: nounwind readonly uwtable define double @zap(i64 %n) #0 { entry: %cmp4 = icmp sgt i64 %n, 0 br i1 %cmp4, label %for.body, label %for.end for.body: ; preds = %entry, %for.body %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ] %sum.05 = phi double [ %add, %for.body ], [ 0.000000e+00, %entry ] %arrayidx = getelementptr inbounds [10000 x double]* @v, i64 0, i64 %i.06 %0 = load double* %arrayidx, align 8, !tbaa !1 %add = fadd double %sum.05, %0 %inc = add nsw i64 %i.06, 1 %exitcond = icmp eq i64 %inc, %n br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body, %entry %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ] ret double %sum.0.lcssa } attributes #0 = { nounwind readonly uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.ident = !{!0} !0 = metadata !{metadata !"Clang Front-End version 3.4.1 (tags/RELEASE_34/final)"} !1 = metadata !{metadata !2, metadata !2, i64 0} !2 = metadata !{metadata !"double", metadata !3, i64 0} !3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} !4 = metadata !{metadata !"Simple C/C++ TBAA"} (I included all the metadata and such for the 2nd case, on the off chance it matters.) Is there any way I can convince licm (or something) to rip open the GEP and hoist the reference to @v outside the loop, similar to the first example? Thanks, Preston -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20151208/4c81fefb/attachment.html>
Mehdi Amini via llvm-dev
2015-Dec-09 07:21 UTC
[llvm-dev] persuading licm to do the right thing
Hi Preston,> On Dec 8, 2015, at 10:56 PM, Preston Briggs via llvm-dev <llvm-dev at lists.llvm.org> wrote: > > When I compile two different modules using > > clang -O -S -emit-llvm > > I get different .ll files, no surprise. > > The first looks like > > double *v; > > double zap(long n) { > double sum = 0; > for (long i = 0; i < n; i++) > sum += v[i]; > return sum; > } > > yielding > > @v = common global double* null, align 8 > > ; Function Attrs: nounwind readonly uwtable > define double @zap(i64 %n) #0 { > entry: > %cmp4 = icmp sgt i64 %n, 0 > br i1 %cmp4, label %for.body.lr.ph <http://for.body.lr.ph/>, label %for.end > > for.body.lr.ph <http://for.body.lr.ph/>: ; preds = %entry > %0 = load double** @v, align 8, !tbaa !1 > br label %for.body > > for.body: ; preds = %for.body, %for.body.lr.ph <http://for.body.lr.ph/> > %i.06 = phi i64 [ 0, %for.body.lr.ph <http://for.body.lr.ph/> ], [ %inc, %for.body ] > %sum.05 = phi double [ 0.000000e+00, %for.body.lr.ph <http://for.body.lr.ph/> ], [ %add, %for.body ] > %arrayidx = getelementptr inbounds double* %0, i64 %i.06 > %1 = load double* %arrayidx, align 8, !tbaa !5 > %add = fadd double %sum.05, %1 > %inc = add nsw i64 %i.06, 1 > > %exitcond = icmp eq i64 %inc, %n > br i1 %exitcond, label %for.end, label %for.body > > for.end: ; preds = %for.body, %entry > %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ] > ret double %sum.0.lcssa > } > > and the second looks like > > double v[10000]; > > double zap(long n) { > double sum = 0; > for (long i = 0; i < n; i++) > sum += v[i]; > return sum; > } > > yielding > > ; ModuleID = 'z.c' > target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64-S128" > target triple = "x86_64-unknown-linux-gnu" > > @v = common global [10000 x double] zeroinitializer, align 16 > > ; Function Attrs: nounwind readonly uwtable > define double @zap(i64 %n) #0 { > entry: > %cmp4 = icmp sgt i64 %n, 0 > br i1 %cmp4, label %for.body, label %for.end > > for.body: ; preds = %entry, %for.body > %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ] > %sum.05 = phi double [ %add, %for.body ], [ 0.000000e+00, %entry ] > %arrayidx = getelementptr inbounds [10000 x double]* @v, i64 0, i64 %i.06 > %0 = load double* %arrayidx, align 8, !tbaa !1 > %add = fadd double %sum.05, %0 > %inc = add nsw i64 %i.06, 1 > %exitcond = icmp eq i64 %inc, %n > br i1 %exitcond, label %for.end, label %for.body > > for.end: ; preds = %for.body, %entry > %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ] > ret double %sum.0.lcssa > } > > attributes #0 = { nounwind readonly uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } > > !llvm.ident = !{!0} > > !0 = metadata !{metadata !"Clang Front-End version 3.4.1 (tags/RELEASE_34/final)"} > !1 = metadata !{metadata !2, metadata !2, i64 0} > !2 = metadata !{metadata !"double", metadata !3, i64 0} > !3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} > !4 = metadata !{metadata !"Simple C/C++ TBAA"} > > (I included all the metadata and such for the 2nd case, on the off chance it matters.) > > Is there any way I can convince licm (or something) to rip open the GEP and hoist the reference to @v outside the loop, similar to the first example?I believe that in the second case, there is no need to load the address of v as it is constant. However you have a constant address to an array, which is represented by [10000 x double]* @v in the IR, which requires to use the two-level GEP. You “could” manage to represent it this way: define double @zap(i64 %n) #0 { entry: %cmp6 = icmp sgt i64 %n, 0 %hoisted = bitcast [10000 x double]* @v to double* br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: ; preds = %entry br label %for.body for.cond.cleanup.loopexit: ; preds = %for.body %add.lcssa = phi double [ %add, %for.body ] br label %for.cond.cleanup for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ] ret double %sum.0.lcssa for.body: ; preds = %for.body.preheader, %for.body %i.08 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ] %sum.07 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ] %arrayidx = getelementptr double, double* %hoisted, i64 %i.08 %0 = load double, double* %arrayidx, align 8, !tbaa !2 %add = fadd double %sum.07, %0 %inc = add nuw nsw i64 %i.08, 1 %exitcond = icmp eq i64 %inc, %n br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body } However instcombine will recanonicalize it like it was originally. Since it is a GEP that operate on a constant address, this shouldn’t matter, why would you want to split this? Best, — Mehdi -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20151208/4541186a/attachment.html>
Preston Briggs via llvm-dev
2015-Dec-09 15:13 UTC
[llvm-dev] persuading licm to do the right thing
On some targets with limited addressing modes, getting that 64-bit relocatable but loop-invariant value into a register requires several instructions. I'd like those several instruction outside the loop, where they belong. Yes, my experience is that something (I assume instcombine) recanonicalizes. Thanks, Preston On Tue, Dec 8, 2015 at 11:21 PM, Mehdi Amini <mehdi.amini at apple.com> wrote:> Hi Preston, > > On Dec 8, 2015, at 10:56 PM, Preston Briggs via llvm-dev < > llvm-dev at lists.llvm.org> wrote: > > When I compile two different modules using > > clang -O -S -emit-llvm > > > I get different .ll files, no surprise. > > The first looks like > > double *v; > > double zap(long n) { > double sum = 0; > for (long i = 0; i < n; i++) > sum += v[i]; > return sum; > } > > > yielding > > @v = common global double* null, align 8 > > ; Function Attrs: nounwind readonly uwtable > define double @zap(i64 %n) #0 { > entry: > %cmp4 = icmp sgt i64 %n, 0 > br i1 %cmp4, label %for.body.lr.ph, label %for.end > > for.body.lr.ph: ; preds = %entry > %0 = load double** @v, align 8, !tbaa !1 > br label %for.body > > for.body: ; preds = %for.body, % > for.body.lr.ph > %i.06 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] > %sum.05 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add, > %for.body ] > %arrayidx = getelementptr inbounds double* %0, i64 %i.06 > %1 = load double* %arrayidx, align 8, !tbaa !5 > %add = fadd double %sum.05, %1 > %inc = add nsw i64 %i.06, 1 > > %exitcond = icmp eq i64 %inc, %n > br i1 %exitcond, label %for.end, label %for.body > > for.end: ; preds = %for.body, > %entry > %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ] > ret double %sum.0.lcssa > } > > > and the second looks like > > double v[10000]; > > double zap(long n) { > double sum = 0; > for (long i = 0; i < n; i++) > sum += v[i]; > return sum; > } > > > yielding > > ; ModuleID = 'z.c' > target datalayout > "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64-S128" > target triple = "x86_64-unknown-linux-gnu" > > @v = common global [10000 x double] zeroinitializer, align 16 > > ; Function Attrs: nounwind readonly uwtable > define double @zap(i64 %n) #0 { > entry: > %cmp4 = icmp sgt i64 %n, 0 > br i1 %cmp4, label %for.body, label %for.end > > for.body: ; preds = %entry, > %for.body > %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ] > %sum.05 = phi double [ %add, %for.body ], [ 0.000000e+00, %entry ] > %arrayidx = getelementptr inbounds [10000 x double]* @v, i64 0, i64 %i.06 > %0 = load double* %arrayidx, align 8, !tbaa !1 > %add = fadd double %sum.05, %0 > %inc = add nsw i64 %i.06, 1 > %exitcond = icmp eq i64 %inc, %n > br i1 %exitcond, label %for.end, label %for.body > > for.end: ; preds = %for.body, > %entry > %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ] > ret double %sum.0.lcssa > } > > attributes #0 = { nounwind readonly uwtable "less-precise-fpmad"="false" > "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" > "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" > "unsafe-fp-math"="false" "use-soft-float"="false" } > > !llvm.ident = !{!0} > > !0 = metadata !{metadata !"Clang Front-End version 3.4.1 > (tags/RELEASE_34/final)"} > !1 = metadata !{metadata !2, metadata !2, i64 0} > !2 = metadata !{metadata !"double", metadata !3, i64 0} > !3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} > !4 = metadata !{metadata !"Simple C/C++ TBAA"} > > > (I included all the metadata and such for the 2nd case, on the off chance > it matters.) > > Is there any way I can convince licm (or something) to rip open the GEP > and hoist the reference to @v outside the loop, similar to the first > example? > > > > I believe that in the second case, there is no need to load the address of > v as it is constant. However you have a constant address to an array, which > is represented by [10000 x double]* @v in the IR, which requires to use the > two-level GEP. > > You “could” manage to represent it this way: > > define double @zap(i64 %n) #0 { > entry: > %cmp6 = icmp sgt i64 %n, 0 > %hoisted = bitcast [10000 x double]* @v to double* > br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup > > for.body.preheader: ; preds = %entry > br label %for.body > > for.cond.cleanup.loopexit: ; preds = %for.body > %add.lcssa = phi double [ %add, %for.body ] > br label %for.cond.cleanup > > for.cond.cleanup: ; preds > %for.cond.cleanup.loopexit, %entry > %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, > %for.cond.cleanup.loopexit ] > ret double %sum.0.lcssa > > for.body: ; preds > %for.body.preheader, %for.body > %i.08 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ] > %sum.07 = phi double [ %add, %for.body ], [ 0.000000e+00, > %for.body.preheader ] > %arrayidx = getelementptr double, double* %hoisted, i64 %i.08 > %0 = load double, double* %arrayidx, align 8, !tbaa !2 > %add = fadd double %sum.07, %0 > %inc = add nuw nsw i64 %i.08, 1 > %exitcond = icmp eq i64 %inc, %n > br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body > } > > > However instcombine will recanonicalize it like it was originally. > > Since it is a GEP that operate on a constant address, this shouldn’t > matter, why would you want to split this? > > Best, > > — > Mehdi > >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20151209/3af77054/attachment-0001.html>