thr3ads.net - llvm dev - [llvm-dev] persuading licm to do the right thing [Dec 2015]

If this information is useful, please help other people find it:
Share via:

Preston Briggs via llvm-dev

2015-Dec-09 06:56 UTC

[llvm-dev] persuading licm to do the right thing

When I compile two different modules using

clang -O -S -emit-llvm


I get different .ll files, no surprise.

The first looks like

double *v;

double zap(long n) {
  double sum = 0;
  for (long i = 0; i < n; i++)
    sum += v[i];
  return sum;
}


yielding

@v = common global double* null, align 8

; Function Attrs: nounwind readonly uwtable
define double @zap(i64 %n) #0 {
entry:
  %cmp4 = icmp sgt i64 %n, 0
  br i1 %cmp4, label %for.body.lr.ph, label %for.end

for.body.lr.ph:                                   ; preds = %entry
  %0 = load double** @v, align 8, !tbaa !1
  br label %for.body

for.body:                                         ; preds = %for.body, %
for.body.lr.ph
  %i.06 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
  %sum.05 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add, %for.body
]
  %arrayidx = getelementptr inbounds double* %0, i64 %i.06
  %1 = load double* %arrayidx, align 8, !tbaa !5
  %add = fadd double %sum.05, %1
  %inc = add nsw i64 %i.06, 1

%exitcond = icmp eq i64 %inc, %n
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body,
%entry
  %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ]
  ret double %sum.0.lcssa
}


and the second looks like

double v[10000];

double zap(long n) {
  double sum = 0;
  for (long i = 0; i < n; i++)
    sum += v[i];
  return sum;
}


yielding

; ModuleID = 'z.c'
target datalayout
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@v = common global [10000 x double] zeroinitializer, align 16

; Function Attrs: nounwind readonly uwtable
define double @zap(i64 %n) #0 {
entry:
  %cmp4 = icmp sgt i64 %n, 0
  br i1 %cmp4, label %for.body, label %for.end

for.body:                                         ; preds = %entry,
%for.body
  %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
  %sum.05 = phi double [ %add, %for.body ], [ 0.000000e+00, %entry ]
  %arrayidx = getelementptr inbounds [10000 x double]* @v, i64 0, i64 %i.06
  %0 = load double* %arrayidx, align 8, !tbaa !1
  %add = fadd double %sum.05, %0
  %inc = add nsw i64 %i.06, 1
  %exitcond = icmp eq i64 %inc, %n
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body,
%entry
  %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ]
  ret double %sum.0.lcssa
}

attributes #0 = { nounwind readonly uwtable
"less-precise-fpmad"="false"
"no-frame-pointer-elim"="false"
"no-infs-fp-math"="false"
"no-nans-fp-math"="false"
"stack-protector-buffer-size"="8"
"unsafe-fp-math"="false"
"use-soft-float"="false" }

!llvm.ident = !{!0}

!0 = metadata !{metadata !"Clang Front-End version 3.4.1
(tags/RELEASE_34/final)"}
!1 = metadata !{metadata !2, metadata !2, i64 0}
!2 = metadata !{metadata !"double", metadata !3, i64 0}
!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
!4 = metadata !{metadata !"Simple C/C++ TBAA"}


(I included all the metadata and such for the 2nd case, on the off chance
it matters.)

Is there any way I can convince licm (or something) to rip open the GEP and
hoist the reference to @v outside the loop, similar to the first example?

Thanks,
Preston
-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20151208/4c81fefb/attachment.html>

Mehdi Amini via llvm-dev

2015-Dec-09 07:21 UTC

head link

[llvm-dev] persuading licm to do the right thing

Hi Preston,
> On Dec 8, 2015, at 10:56 PM, Preston Briggs via llvm-dev <llvm-dev at
lists.llvm.org> wrote:
> 
> When I compile two different modules using
> 
> clang -O -S -emit-llvm
> 
> I get different .ll files, no surprise.
> 
> The first looks like
> 
> double *v;
> 
> double zap(long n) {
>   double sum = 0;
>   for (long i = 0; i < n; i++)
>     sum += v[i];
>   return sum;
> }
> 
> yielding
> 
> @v = common global double* null, align 8
> 
> ; Function Attrs: nounwind readonly uwtable
> define double @zap(i64 %n) #0 {
> entry:
>   %cmp4 = icmp sgt i64 %n, 0
>   br i1 %cmp4, label %for.body.lr.ph <http://for.body.lr.ph/>, label
%for.end
> 
> for.body.lr.ph <http://for.body.lr.ph/>:                             
; preds = %entry
>   %0 = load double** @v, align 8, !tbaa !1
>   br label %for.body
> 
> for.body:                                         ; preds = %for.body,
%for.body.lr.ph <http://for.body.lr.ph/>
>   %i.06 = phi i64 [ 0, %for.body.lr.ph <http://for.body.lr.ph/> ], [
%inc, %for.body ]
>   %sum.05 = phi double [ 0.000000e+00, %for.body.lr.ph
<http://for.body.lr.ph/> ], [ %add, %for.body ]
>   %arrayidx = getelementptr inbounds double* %0, i64 %i.06
>   %1 = load double* %arrayidx, align 8, !tbaa !5
>   %add = fadd double %sum.05, %1
>   %inc = add nsw i64 %i.06, 1
>   
> %exitcond = icmp eq i64 %inc, %n
>   br i1 %exitcond, label %for.end, label %for.body
> 
> for.end:                                          ; preds = %for.body,
%entry
>   %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ]
>   ret double %sum.0.lcssa
> }
> 
> and the second looks like
> 
> double v[10000];
> 
> double zap(long n) {
>   double sum = 0;
>   for (long i = 0; i < n; i++)
>     sum += v[i];
>   return sum;
> }
> 
> yielding
> 
> ; ModuleID = 'z.c'
> target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64-S128"
> target triple = "x86_64-unknown-linux-gnu"
> 
> @v = common global [10000 x double] zeroinitializer, align 16
> 
> ; Function Attrs: nounwind readonly uwtable
> define double @zap(i64 %n) #0 {
> entry:
>   %cmp4 = icmp sgt i64 %n, 0
>   br i1 %cmp4, label %for.body, label %for.end
> 
> for.body:                                         ; preds = %entry,
%for.body
>   %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
>   %sum.05 = phi double [ %add, %for.body ], [ 0.000000e+00, %entry ]
>   %arrayidx = getelementptr inbounds [10000 x double]* @v, i64 0, i64 %i.06
>   %0 = load double* %arrayidx, align 8, !tbaa !1
>   %add = fadd double %sum.05, %0
>   %inc = add nsw i64 %i.06, 1
>   %exitcond = icmp eq i64 %inc, %n
>   br i1 %exitcond, label %for.end, label %for.body
> 
> for.end:                                          ; preds = %for.body,
%entry
>   %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ]
>   ret double %sum.0.lcssa
> }
> 
> attributes #0 = { nounwind readonly uwtable
"less-precise-fpmad"="false"
"no-frame-pointer-elim"="false"
"no-infs-fp-math"="false"
"no-nans-fp-math"="false"
"stack-protector-buffer-size"="8"
"unsafe-fp-math"="false"
"use-soft-float"="false" }
> 
> !llvm.ident = !{!0}
> 
> !0 = metadata !{metadata !"Clang Front-End version 3.4.1
(tags/RELEASE_34/final)"}
> !1 = metadata !{metadata !2, metadata !2, i64 0}
> !2 = metadata !{metadata !"double", metadata !3, i64 0}
> !3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
> !4 = metadata !{metadata !"Simple C/C++ TBAA"}
> 
> (I included all the metadata and such for the 2nd case, on the off chance
it matters.)
> 
> Is there any way I can convince licm (or something) to rip open the GEP and
hoist the reference to @v outside the loop, similar to the first example?

I believe that in the second case, there is no need to load the address of v as
it is constant. However you have a constant address to an array, which is
represented by [10000 x double]* @v in the IR, which requires to use the
two-level GEP.

You “could” manage to represent it this way:

define double @zap(i64 %n) #0 {
entry:
  %cmp6 = icmp sgt i64 %n, 0
  %hoisted = bitcast [10000 x double]* @v to double*
  br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:                               ; preds = %entry
  br label %for.body

for.cond.cleanup.loopexit:                        ; preds = %for.body
  %add.lcssa = phi double [ %add, %for.body ]
  br label %for.cond.cleanup

for.cond.cleanup:                                 ; preds =
%for.cond.cleanup.loopexit, %entry
  %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa,
%for.cond.cleanup.loopexit ]
  ret double %sum.0.lcssa

for.body:                                         ; preds = %for.body.preheader,
%for.body
  %i.08 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
  %sum.07 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader
]
  %arrayidx = getelementptr double, double* %hoisted, i64 %i.08
  %0 = load double, double* %arrayidx, align 8, !tbaa !2
  %add = fadd double %sum.07, %0
  %inc = add nuw nsw i64 %i.08, 1
  %exitcond = icmp eq i64 %inc, %n
  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
}


However instcombine will recanonicalize it like it was originally.

Since it is a GEP that operate on a constant address, this shouldn’t matter, why
would you want to split this?

Best,

— 
Mehdi

-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20151208/4541186a/attachment.html>

Preston Briggs via llvm-dev

2015-Dec-09 15:13 UTC

head link

[llvm-dev] persuading licm to do the right thing

On some targets with limited addressing modes,
getting that 64-bit relocatable but loop-invariant value into a register
requires several instructions. I'd like those several instruction outside
the loop, where they belong.

Yes, my experience is that something (I assume instcombine) recanonicalizes.

Thanks,
Preston


On Tue, Dec 8, 2015 at 11:21 PM, Mehdi Amini <mehdi.amini at apple.com>
wrote:
> Hi Preston,
>
> On Dec 8, 2015, at 10:56 PM, Preston Briggs via llvm-dev <
> llvm-dev at lists.llvm.org> wrote:
>
> When I compile two different modules using
>
> clang -O -S -emit-llvm
>
>
> I get different .ll files, no surprise.
>
> The first looks like
>
> double *v;
>
> double zap(long n) {
>   double sum = 0;
>   for (long i = 0; i < n; i++)
>     sum += v[i];
>   return sum;
> }
>
>
> yielding
>
> @v = common global double* null, align 8
>
> ; Function Attrs: nounwind readonly uwtable
> define double @zap(i64 %n) #0 {
> entry:
>   %cmp4 = icmp sgt i64 %n, 0
>   br i1 %cmp4, label %for.body.lr.ph, label %for.end
>
> for.body.lr.ph:                                   ; preds = %entry
>   %0 = load double** @v, align 8, !tbaa !1
>   br label %for.body
>
> for.body:                                         ; preds = %for.body, %
> for.body.lr.ph
>   %i.06 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
>   %sum.05 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add,
> %for.body ]
>   %arrayidx = getelementptr inbounds double* %0, i64 %i.06
>   %1 = load double* %arrayidx, align 8, !tbaa !5
>   %add = fadd double %sum.05, %1
>   %inc = add nsw i64 %i.06, 1
>
> %exitcond = icmp eq i64 %inc, %n
>   br i1 %exitcond, label %for.end, label %for.body
>
> for.end:                                          ; preds = %for.body,
> %entry
>   %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ]
>   ret double %sum.0.lcssa
> }
>
>
> and the second looks like
>
> double v[10000];
>
> double zap(long n) {
>   double sum = 0;
>   for (long i = 0; i < n; i++)
>     sum += v[i];
>   return sum;
> }
>
>
> yielding
>
> ; ModuleID = 'z.c'
> target datalayout >
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64-S128"
> target triple = "x86_64-unknown-linux-gnu"
>
> @v = common global [10000 x double] zeroinitializer, align 16
>
> ; Function Attrs: nounwind readonly uwtable
> define double @zap(i64 %n) #0 {
> entry:
>   %cmp4 = icmp sgt i64 %n, 0
>   br i1 %cmp4, label %for.body, label %for.end
>
> for.body:                                         ; preds = %entry,
> %for.body
>   %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
>   %sum.05 = phi double [ %add, %for.body ], [ 0.000000e+00, %entry ]
>   %arrayidx = getelementptr inbounds [10000 x double]* @v, i64 0, i64 %i.06
>   %0 = load double* %arrayidx, align 8, !tbaa !1
>   %add = fadd double %sum.05, %0
>   %inc = add nsw i64 %i.06, 1
>   %exitcond = icmp eq i64 %inc, %n
>   br i1 %exitcond, label %for.end, label %for.body
>
> for.end:                                          ; preds = %for.body,
> %entry
>   %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ]
>   ret double %sum.0.lcssa
> }
>
> attributes #0 = { nounwind readonly uwtable
"less-precise-fpmad"="false"
> "no-frame-pointer-elim"="false"
"no-infs-fp-math"="false"
> "no-nans-fp-math"="false"
"stack-protector-buffer-size"="8"
> "unsafe-fp-math"="false"
"use-soft-float"="false" }
>
> !llvm.ident = !{!0}
>
> !0 = metadata !{metadata !"Clang Front-End version 3.4.1
> (tags/RELEASE_34/final)"}
> !1 = metadata !{metadata !2, metadata !2, i64 0}
> !2 = metadata !{metadata !"double", metadata !3, i64 0}
> !3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
> !4 = metadata !{metadata !"Simple C/C++ TBAA"}
>
>
> (I included all the metadata and such for the 2nd case, on the off chance
> it matters.)
>
> Is there any way I can convince licm (or something) to rip open the GEP
> and hoist the reference to @v outside the loop, similar to the first
> example?
>
>
>
> I believe that in the second case, there is no need to load the address of
> v as it is constant. However you have a constant address to an array, which
> is represented by [10000 x double]* @v in the IR, which requires to use the
> two-level GEP.
>
> You “could” manage to represent it this way:
>
> define double @zap(i64 %n) #0 {
> entry:
>   %cmp6 = icmp sgt i64 %n, 0
>   %hoisted = bitcast [10000 x double]* @v to double*
>   br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
>
> for.body.preheader:                               ; preds = %entry
>   br label %for.body
>
> for.cond.cleanup.loopexit:                        ; preds = %for.body
>   %add.lcssa = phi double [ %add, %for.body ]
>   br label %for.cond.cleanup
>
> for.cond.cleanup:                                 ; preds >
%for.cond.cleanup.loopexit, %entry
>   %sum.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa,
> %for.cond.cleanup.loopexit ]
>   ret double %sum.0.lcssa
>
> for.body:                                         ; preds >
%for.body.preheader, %for.body
>   %i.08 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
>   %sum.07 = phi double [ %add, %for.body ], [ 0.000000e+00,
> %for.body.preheader ]
>   %arrayidx = getelementptr double, double* %hoisted, i64 %i.08
>   %0 = load double, double* %arrayidx, align 8, !tbaa !2
>   %add = fadd double %sum.07, %0
>   %inc = add nuw nsw i64 %i.08, 1
>   %exitcond = icmp eq i64 %inc, %n
>   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
> }
>
>
> However instcombine will recanonicalize it like it was originally.
>
> Since it is a GEP that operate on a constant address, this shouldn’t
> matter, why would you want to split this?
>
> Best,
>
> —
> Mehdi
>
>-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20151209/3af77054/attachment-0001.html>

llvm dev - Dec 2015 - persuading licm to do the right thing

[llvm-dev] persuading licm to do the right thing

[llvm-dev] persuading licm to do the right thing

[llvm-dev] persuading licm to do the right thing