The following function compiles with -O3 into the following IR.
http://llvm.org/docs/Passes.html#simplifycfg-simplify-the-cfg says
- Eliminates a basic block that only contains an unconditional branch.
but the first and third blocks in the compiled function only contain an
unconditional branch; I would have expected them to be eliminated. What am
I missing?
double f(double *a) {
for (int i = 0; i < 1000; i++)
a[i] *= 2;
for (int i = 0; i < 1000; i++)
a[i] *= 2;
return a[0] + a[1];
}
; Function Attrs: nounwind uwtable
define double @"\01?f@@YANPEAN at Z"(double* nocapture %a) #1 {
overflow.checked:
br label %vector.body, !dbg !18
vector.body: ; preds = %vector.body,
%overflow.checked
%index = phi i64 [ 0, %overflow.checked ], [ %index.next.1, %vector.body
], !dbg !18
%0 = getelementptr inbounds double, double* %a, i64 %index, !dbg !19
%1 = bitcast double* %0 to <2 x double>*, !dbg !20
%wide.load = load <2 x double>, <2 x double>* %1, align 8, !dbg
!20
%2 = getelementptr double, double* %0, i64 2, !dbg !20
%3 = bitcast double* %2 to <2 x double>*, !dbg !20
%wide.load8 = load <2 x double>, <2 x double>* %3, align 8, !dbg
!20
%4 = fmul <2 x double> %wide.load, <double 2.000000e+00, double
2.000000e+00>, !dbg !20
%5 = fmul <2 x double> %wide.load8, <double 2.000000e+00, double
2.000000e+00>, !dbg !20
%6 = bitcast double* %0 to <2 x double>*, !dbg !20
store <2 x double> %4, <2 x double>* %6, align 8, !dbg !20
%7 = bitcast double* %2 to <2 x double>*, !dbg !20
store <2 x double> %5, <2 x double>* %7, align 8, !dbg !20
%index.next = or i64 %index, 4, !dbg !18
%8 = getelementptr inbounds double, double* %a, i64 %index.next, !dbg !19
%9 = bitcast double* %8 to <2 x double>*, !dbg !20
%wide.load.1 = load <2 x double>, <2 x double>* %9, align 8, !dbg
!20
%10 = getelementptr double, double* %8, i64 2, !dbg !20
%11 = bitcast double* %10 to <2 x double>*, !dbg !20
%wide.load8.1 = load <2 x double>, <2 x double>* %11, align 8,
!dbg !20
%12 = fmul <2 x double> %wide.load.1, <double 2.000000e+00, double
2.000000e+00>, !dbg !20
%13 = fmul <2 x double> %wide.load8.1, <double 2.000000e+00, double
2.000000e+00>, !dbg !20
%14 = bitcast double* %8 to <2 x double>*, !dbg !20
store <2 x double> %12, <2 x double>* %14, align 8, !dbg !20
%15 = bitcast double* %10 to <2 x double>*, !dbg !20
store <2 x double> %13, <2 x double>* %15, align 8, !dbg !20
%index.next.1 = add nsw i64 %index, 8, !dbg !18
%16 = icmp eq i64 %index.next.1, 1000, !dbg !18
br i1 %16, label %vector.body10.preheader, label %vector.body, !dbg !18,
!llvm.loop !21
vector.body10.preheader: ; preds = %vector.body
br label %vector.body10, !dbg !24
vector.body10: ; preds = %vector.body10,
%vector.body10.preheader
%index13 = phi i64 [ 0, %vector.body10.preheader ], [ %index.next21.1,
%vector.body10 ], !dbg !25
%17 = getelementptr inbounds double, double* %a, i64 %index13, !dbg !24
%18 = bitcast double* %17 to <2 x double>*, !dbg !26
%wide.load26 = load <2 x double>, <2 x double>* %18, align 8, !dbg
!26
%19 = getelementptr double, double* %17, i64 2, !dbg !26
%20 = bitcast double* %19 to <2 x double>*, !dbg !26
%wide.load27 = load <2 x double>, <2 x double>* %20, align 8, !dbg
!26
%21 = fmul <2 x double> %wide.load26, <double 2.000000e+00, double
2.000000e+00>, !dbg !26
%22 = fmul <2 x double> %wide.load27, <double 2.000000e+00, double
2.000000e+00>, !dbg !26
%23 = bitcast double* %17 to <2 x double>*, !dbg !26
store <2 x double> %21, <2 x double>* %23, align 8, !dbg !26
%24 = bitcast double* %19 to <2 x double>*, !dbg !26
store <2 x double> %22, <2 x double>* %24, align 8, !dbg !26
%index.next21 = or i64 %index13, 4, !dbg !25
%25 = getelementptr inbounds double, double* %a, i64 %index.next21, !dbg
!24
%26 = bitcast double* %25 to <2 x double>*, !dbg !26
%wide.load26.1 = load <2 x double>, <2 x double>* %26, align 8,
!dbg !26
%27 = getelementptr double, double* %25, i64 2, !dbg !26
%28 = bitcast double* %27 to <2 x double>*, !dbg !26
%wide.load27.1 = load <2 x double>, <2 x double>* %28, align 8,
!dbg !26
%29 = fmul <2 x double> %wide.load26.1, <double 2.000000e+00, double
2.000000e+00>, !dbg !26
%30 = fmul <2 x double> %wide.load27.1, <double 2.000000e+00, double
2.000000e+00>, !dbg !26
%31 = bitcast double* %25 to <2 x double>*, !dbg !26
store <2 x double> %29, <2 x double>* %31, align 8, !dbg !26
%32 = bitcast double* %27 to <2 x double>*, !dbg !26
store <2 x double> %30, <2 x double>* %32, align 8, !dbg !26
%index.next21.1 = add nsw i64 %index13, 8, !dbg !25
%33 = icmp eq i64 %index.next21.1, 1000, !dbg !25
br i1 %33, label %middle.block11, label %vector.body10, !dbg !25,
!llvm.loop !27
middle.block11: ; preds = %vector.body10
%34 = load double, double* %a, align 8, !dbg !28
%35 = getelementptr inbounds double, double* %a, i64 1, !dbg !29
%36 = load double, double* %35, align 8, !dbg !29
%37 = fadd double %34, %36, !dbg !30
ret double %37, !dbg !31
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20150920/4ada174f/attachment.html>
Hi, if you use opt -simplifycfg, the third BB can be eliminated. 2015-09-20 0:03 GMT-04:00 Russell Wallace via llvm-dev < llvm-dev at lists.llvm.org>:> The following function compiles with -O3 into the following IR. > http://llvm.org/docs/Passes.html#simplifycfg-simplify-the-cfg says > > - Eliminates a basic block that only contains an unconditional branch. > > but the first and third blocks in the compiled function only contain an > unconditional branch; I would have expected them to be eliminated. What am > I missing? > > double f(double *a) { > for (int i = 0; i < 1000; i++) > a[i] *= 2; > for (int i = 0; i < 1000; i++) > a[i] *= 2; > return a[0] + a[1]; > } > > ; Function Attrs: nounwind uwtable > define double @"\01?f@@YANPEAN at Z"(double* nocapture %a) #1 { > overflow.checked: > br label %vector.body, !dbg !18 > > vector.body: ; preds = %vector.body, > %overflow.checked > %index = phi i64 [ 0, %overflow.checked ], [ %index.next.1, %vector.body > ], !dbg !18 > %0 = getelementptr inbounds double, double* %a, i64 %index, !dbg !19 > %1 = bitcast double* %0 to <2 x double>*, !dbg !20 > %wide.load = load <2 x double>, <2 x double>* %1, align 8, !dbg !20 > %2 = getelementptr double, double* %0, i64 2, !dbg !20 > %3 = bitcast double* %2 to <2 x double>*, !dbg !20 > %wide.load8 = load <2 x double>, <2 x double>* %3, align 8, !dbg !20 > %4 = fmul <2 x double> %wide.load, <double 2.000000e+00, double > 2.000000e+00>, !dbg !20 > %5 = fmul <2 x double> %wide.load8, <double 2.000000e+00, double > 2.000000e+00>, !dbg !20 > %6 = bitcast double* %0 to <2 x double>*, !dbg !20 > store <2 x double> %4, <2 x double>* %6, align 8, !dbg !20 > %7 = bitcast double* %2 to <2 x double>*, !dbg !20 > store <2 x double> %5, <2 x double>* %7, align 8, !dbg !20 > %index.next = or i64 %index, 4, !dbg !18 > %8 = getelementptr inbounds double, double* %a, i64 %index.next, !dbg !19 > %9 = bitcast double* %8 to <2 x double>*, !dbg !20 > %wide.load.1 = load <2 x double>, <2 x double>* %9, align 8, !dbg !20 > %10 = getelementptr double, double* %8, i64 2, !dbg !20 > %11 = bitcast double* %10 to <2 x double>*, !dbg !20 > %wide.load8.1 = load <2 x double>, <2 x double>* %11, align 8, !dbg !20 > %12 = fmul <2 x double> %wide.load.1, <double 2.000000e+00, double > 2.000000e+00>, !dbg !20 > %13 = fmul <2 x double> %wide.load8.1, <double 2.000000e+00, double > 2.000000e+00>, !dbg !20 > %14 = bitcast double* %8 to <2 x double>*, !dbg !20 > store <2 x double> %12, <2 x double>* %14, align 8, !dbg !20 > %15 = bitcast double* %10 to <2 x double>*, !dbg !20 > store <2 x double> %13, <2 x double>* %15, align 8, !dbg !20 > %index.next.1 = add nsw i64 %index, 8, !dbg !18 > %16 = icmp eq i64 %index.next.1, 1000, !dbg !18 > br i1 %16, label %vector.body10.preheader, label %vector.body, !dbg !18, > !llvm.loop !21 > > vector.body10.preheader: ; preds = %vector.body > br label %vector.body10, !dbg !24 > > vector.body10: ; preds > %vector.body10, %vector.body10.preheader > %index13 = phi i64 [ 0, %vector.body10.preheader ], [ %index.next21.1, > %vector.body10 ], !dbg !25 > %17 = getelementptr inbounds double, double* %a, i64 %index13, !dbg !24 > %18 = bitcast double* %17 to <2 x double>*, !dbg !26 > %wide.load26 = load <2 x double>, <2 x double>* %18, align 8, !dbg !26 > %19 = getelementptr double, double* %17, i64 2, !dbg !26 > %20 = bitcast double* %19 to <2 x double>*, !dbg !26 > %wide.load27 = load <2 x double>, <2 x double>* %20, align 8, !dbg !26 > %21 = fmul <2 x double> %wide.load26, <double 2.000000e+00, double > 2.000000e+00>, !dbg !26 > %22 = fmul <2 x double> %wide.load27, <double 2.000000e+00, double > 2.000000e+00>, !dbg !26 > %23 = bitcast double* %17 to <2 x double>*, !dbg !26 > store <2 x double> %21, <2 x double>* %23, align 8, !dbg !26 > %24 = bitcast double* %19 to <2 x double>*, !dbg !26 > store <2 x double> %22, <2 x double>* %24, align 8, !dbg !26 > %index.next21 = or i64 %index13, 4, !dbg !25 > %25 = getelementptr inbounds double, double* %a, i64 %index.next21, !dbg > !24 > %26 = bitcast double* %25 to <2 x double>*, !dbg !26 > %wide.load26.1 = load <2 x double>, <2 x double>* %26, align 8, !dbg !26 > %27 = getelementptr double, double* %25, i64 2, !dbg !26 > %28 = bitcast double* %27 to <2 x double>*, !dbg !26 > %wide.load27.1 = load <2 x double>, <2 x double>* %28, align 8, !dbg !26 > %29 = fmul <2 x double> %wide.load26.1, <double 2.000000e+00, double > 2.000000e+00>, !dbg !26 > %30 = fmul <2 x double> %wide.load27.1, <double 2.000000e+00, double > 2.000000e+00>, !dbg !26 > %31 = bitcast double* %25 to <2 x double>*, !dbg !26 > store <2 x double> %29, <2 x double>* %31, align 8, !dbg !26 > %32 = bitcast double* %27 to <2 x double>*, !dbg !26 > store <2 x double> %30, <2 x double>* %32, align 8, !dbg !26 > %index.next21.1 = add nsw i64 %index13, 8, !dbg !25 > %33 = icmp eq i64 %index.next21.1, 1000, !dbg !25 > br i1 %33, label %middle.block11, label %vector.body10, !dbg !25, > !llvm.loop !27 > > middle.block11: ; preds = %vector.body10 > %34 = load double, double* %a, align 8, !dbg !28 > %35 = getelementptr inbounds double, double* %a, i64 1, !dbg !29 > %36 = load double, double* %35, align 8, !dbg !29 > %37 = fadd double %34, %36, !dbg !30 > ret double %37, !dbg !31 > } > > > _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev > >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150920/c1fada28/attachment.html>
You're right, it can indeed. Is there a reason -O3 doesn't do this? I had been expecting -O3 to perform full optimization. The first block still remains in any case. Is the first block needed for some purpose I'm not taking into account? On Sun, Sep 20, 2015 at 5:27 AM, Xiangyang Guo <eceguo at gmail.com> wrote:> Hi, > > if you use opt -simplifycfg, the third BB can be eliminated. > > 2015-09-20 0:03 GMT-04:00 Russell Wallace via llvm-dev < > llvm-dev at lists.llvm.org>: > >> The following function compiles with -O3 into the following IR. >> http://llvm.org/docs/Passes.html#simplifycfg-simplify-the-cfg says >> >> - Eliminates a basic block that only contains an unconditional branch. >> >> but the first and third blocks in the compiled function only contain an >> unconditional branch; I would have expected them to be eliminated. What am >> I missing? >> >> double f(double *a) { >> for (int i = 0; i < 1000; i++) >> a[i] *= 2; >> for (int i = 0; i < 1000; i++) >> a[i] *= 2; >> return a[0] + a[1]; >> } >> >> ; Function Attrs: nounwind uwtable >> define double @"\01?f@@YANPEAN at Z"(double* nocapture %a) #1 { >> overflow.checked: >> br label %vector.body, !dbg !18 >> >> vector.body: ; preds = %vector.body, >> %overflow.checked >> %index = phi i64 [ 0, %overflow.checked ], [ %index.next.1, >> %vector.body ], !dbg !18 >> %0 = getelementptr inbounds double, double* %a, i64 %index, !dbg !19 >> %1 = bitcast double* %0 to <2 x double>*, !dbg !20 >> %wide.load = load <2 x double>, <2 x double>* %1, align 8, !dbg !20 >> %2 = getelementptr double, double* %0, i64 2, !dbg !20 >> %3 = bitcast double* %2 to <2 x double>*, !dbg !20 >> %wide.load8 = load <2 x double>, <2 x double>* %3, align 8, !dbg !20 >> %4 = fmul <2 x double> %wide.load, <double 2.000000e+00, double >> 2.000000e+00>, !dbg !20 >> %5 = fmul <2 x double> %wide.load8, <double 2.000000e+00, double >> 2.000000e+00>, !dbg !20 >> %6 = bitcast double* %0 to <2 x double>*, !dbg !20 >> store <2 x double> %4, <2 x double>* %6, align 8, !dbg !20 >> %7 = bitcast double* %2 to <2 x double>*, !dbg !20 >> store <2 x double> %5, <2 x double>* %7, align 8, !dbg !20 >> %index.next = or i64 %index, 4, !dbg !18 >> %8 = getelementptr inbounds double, double* %a, i64 %index.next, !dbg >> !19 >> %9 = bitcast double* %8 to <2 x double>*, !dbg !20 >> %wide.load.1 = load <2 x double>, <2 x double>* %9, align 8, !dbg !20 >> %10 = getelementptr double, double* %8, i64 2, !dbg !20 >> %11 = bitcast double* %10 to <2 x double>*, !dbg !20 >> %wide.load8.1 = load <2 x double>, <2 x double>* %11, align 8, !dbg !20 >> %12 = fmul <2 x double> %wide.load.1, <double 2.000000e+00, double >> 2.000000e+00>, !dbg !20 >> %13 = fmul <2 x double> %wide.load8.1, <double 2.000000e+00, double >> 2.000000e+00>, !dbg !20 >> %14 = bitcast double* %8 to <2 x double>*, !dbg !20 >> store <2 x double> %12, <2 x double>* %14, align 8, !dbg !20 >> %15 = bitcast double* %10 to <2 x double>*, !dbg !20 >> store <2 x double> %13, <2 x double>* %15, align 8, !dbg !20 >> %index.next.1 = add nsw i64 %index, 8, !dbg !18 >> %16 = icmp eq i64 %index.next.1, 1000, !dbg !18 >> br i1 %16, label %vector.body10.preheader, label %vector.body, !dbg >> !18, !llvm.loop !21 >> >> vector.body10.preheader: ; preds = %vector.body >> br label %vector.body10, !dbg !24 >> >> vector.body10: ; preds >> %vector.body10, %vector.body10.preheader >> %index13 = phi i64 [ 0, %vector.body10.preheader ], [ %index.next21.1, >> %vector.body10 ], !dbg !25 >> %17 = getelementptr inbounds double, double* %a, i64 %index13, !dbg !24 >> %18 = bitcast double* %17 to <2 x double>*, !dbg !26 >> %wide.load26 = load <2 x double>, <2 x double>* %18, align 8, !dbg !26 >> %19 = getelementptr double, double* %17, i64 2, !dbg !26 >> %20 = bitcast double* %19 to <2 x double>*, !dbg !26 >> %wide.load27 = load <2 x double>, <2 x double>* %20, align 8, !dbg !26 >> %21 = fmul <2 x double> %wide.load26, <double 2.000000e+00, double >> 2.000000e+00>, !dbg !26 >> %22 = fmul <2 x double> %wide.load27, <double 2.000000e+00, double >> 2.000000e+00>, !dbg !26 >> %23 = bitcast double* %17 to <2 x double>*, !dbg !26 >> store <2 x double> %21, <2 x double>* %23, align 8, !dbg !26 >> %24 = bitcast double* %19 to <2 x double>*, !dbg !26 >> store <2 x double> %22, <2 x double>* %24, align 8, !dbg !26 >> %index.next21 = or i64 %index13, 4, !dbg !25 >> %25 = getelementptr inbounds double, double* %a, i64 %index.next21, >> !dbg !24 >> %26 = bitcast double* %25 to <2 x double>*, !dbg !26 >> %wide.load26.1 = load <2 x double>, <2 x double>* %26, align 8, !dbg !26 >> %27 = getelementptr double, double* %25, i64 2, !dbg !26 >> %28 = bitcast double* %27 to <2 x double>*, !dbg !26 >> %wide.load27.1 = load <2 x double>, <2 x double>* %28, align 8, !dbg !26 >> %29 = fmul <2 x double> %wide.load26.1, <double 2.000000e+00, double >> 2.000000e+00>, !dbg !26 >> %30 = fmul <2 x double> %wide.load27.1, <double 2.000000e+00, double >> 2.000000e+00>, !dbg !26 >> %31 = bitcast double* %25 to <2 x double>*, !dbg !26 >> store <2 x double> %29, <2 x double>* %31, align 8, !dbg !26 >> %32 = bitcast double* %27 to <2 x double>*, !dbg !26 >> store <2 x double> %30, <2 x double>* %32, align 8, !dbg !26 >> %index.next21.1 = add nsw i64 %index13, 8, !dbg !25 >> %33 = icmp eq i64 %index.next21.1, 1000, !dbg !25 >> br i1 %33, label %middle.block11, label %vector.body10, !dbg !25, >> !llvm.loop !27 >> >> middle.block11: ; preds = %vector.body10 >> %34 = load double, double* %a, align 8, !dbg !28 >> %35 = getelementptr inbounds double, double* %a, i64 1, !dbg !29 >> %36 = load double, double* %35, align 8, !dbg !29 >> %37 = fadd double %34, %36, !dbg !30 >> ret double %37, !dbg !31 >> } >> >> >> _______________________________________________ >> LLVM Developers mailing list >> llvm-dev at lists.llvm.org >> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev >> >> >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150920/94971409/attachment.html>
Apparently Analagous Threads
- simplifycfg not happening?
- [LLVMdev] [llvm-commits] [PATCH] BasicBlock Autovectorization Pass
- Question about the order of predecessors in LoopVectorizer with VPlanNatviePath
- [LLVMdev] [llvm-commits] [PATCH] BasicBlock Autovectorization Pass
- How to invoke simplifycfg from code