The following function compiles with -O3 into the following IR. http://llvm.org/docs/Passes.html#simplifycfg-simplify-the-cfg says - Eliminates a basic block that only contains an unconditional branch. but the first and third blocks in the compiled function only contain an unconditional branch; I would have expected them to be eliminated. What am I missing? double f(double *a) { for (int i = 0; i < 1000; i++) a[i] *= 2; for (int i = 0; i < 1000; i++) a[i] *= 2; return a[0] + a[1]; } ; Function Attrs: nounwind uwtable define double @"\01?f@@YANPEAN at Z"(double* nocapture %a) #1 { overflow.checked: br label %vector.body, !dbg !18 vector.body: ; preds = %vector.body, %overflow.checked %index = phi i64 [ 0, %overflow.checked ], [ %index.next.1, %vector.body ], !dbg !18 %0 = getelementptr inbounds double, double* %a, i64 %index, !dbg !19 %1 = bitcast double* %0 to <2 x double>*, !dbg !20 %wide.load = load <2 x double>, <2 x double>* %1, align 8, !dbg !20 %2 = getelementptr double, double* %0, i64 2, !dbg !20 %3 = bitcast double* %2 to <2 x double>*, !dbg !20 %wide.load8 = load <2 x double>, <2 x double>* %3, align 8, !dbg !20 %4 = fmul <2 x double> %wide.load, <double 2.000000e+00, double 2.000000e+00>, !dbg !20 %5 = fmul <2 x double> %wide.load8, <double 2.000000e+00, double 2.000000e+00>, !dbg !20 %6 = bitcast double* %0 to <2 x double>*, !dbg !20 store <2 x double> %4, <2 x double>* %6, align 8, !dbg !20 %7 = bitcast double* %2 to <2 x double>*, !dbg !20 store <2 x double> %5, <2 x double>* %7, align 8, !dbg !20 %index.next = or i64 %index, 4, !dbg !18 %8 = getelementptr inbounds double, double* %a, i64 %index.next, !dbg !19 %9 = bitcast double* %8 to <2 x double>*, !dbg !20 %wide.load.1 = load <2 x double>, <2 x double>* %9, align 8, !dbg !20 %10 = getelementptr double, double* %8, i64 2, !dbg !20 %11 = bitcast double* %10 to <2 x double>*, !dbg !20 %wide.load8.1 = load <2 x double>, <2 x double>* %11, align 8, !dbg !20 %12 = fmul <2 x double> %wide.load.1, <double 2.000000e+00, double 2.000000e+00>, !dbg !20 %13 = fmul <2 x double> %wide.load8.1, <double 2.000000e+00, double 2.000000e+00>, !dbg !20 %14 = bitcast double* %8 to <2 x double>*, !dbg !20 store <2 x double> %12, <2 x double>* %14, align 8, !dbg !20 %15 = bitcast double* %10 to <2 x double>*, !dbg !20 store <2 x double> %13, <2 x double>* %15, align 8, !dbg !20 %index.next.1 = add nsw i64 %index, 8, !dbg !18 %16 = icmp eq i64 %index.next.1, 1000, !dbg !18 br i1 %16, label %vector.body10.preheader, label %vector.body, !dbg !18, !llvm.loop !21 vector.body10.preheader: ; preds = %vector.body br label %vector.body10, !dbg !24 vector.body10: ; preds = %vector.body10, %vector.body10.preheader %index13 = phi i64 [ 0, %vector.body10.preheader ], [ %index.next21.1, %vector.body10 ], !dbg !25 %17 = getelementptr inbounds double, double* %a, i64 %index13, !dbg !24 %18 = bitcast double* %17 to <2 x double>*, !dbg !26 %wide.load26 = load <2 x double>, <2 x double>* %18, align 8, !dbg !26 %19 = getelementptr double, double* %17, i64 2, !dbg !26 %20 = bitcast double* %19 to <2 x double>*, !dbg !26 %wide.load27 = load <2 x double>, <2 x double>* %20, align 8, !dbg !26 %21 = fmul <2 x double> %wide.load26, <double 2.000000e+00, double 2.000000e+00>, !dbg !26 %22 = fmul <2 x double> %wide.load27, <double 2.000000e+00, double 2.000000e+00>, !dbg !26 %23 = bitcast double* %17 to <2 x double>*, !dbg !26 store <2 x double> %21, <2 x double>* %23, align 8, !dbg !26 %24 = bitcast double* %19 to <2 x double>*, !dbg !26 store <2 x double> %22, <2 x double>* %24, align 8, !dbg !26 %index.next21 = or i64 %index13, 4, !dbg !25 %25 = getelementptr inbounds double, double* %a, i64 %index.next21, !dbg !24 %26 = bitcast double* %25 to <2 x double>*, !dbg !26 %wide.load26.1 = load <2 x double>, <2 x double>* %26, align 8, !dbg !26 %27 = getelementptr double, double* %25, i64 2, !dbg !26 %28 = bitcast double* %27 to <2 x double>*, !dbg !26 %wide.load27.1 = load <2 x double>, <2 x double>* %28, align 8, !dbg !26 %29 = fmul <2 x double> %wide.load26.1, <double 2.000000e+00, double 2.000000e+00>, !dbg !26 %30 = fmul <2 x double> %wide.load27.1, <double 2.000000e+00, double 2.000000e+00>, !dbg !26 %31 = bitcast double* %25 to <2 x double>*, !dbg !26 store <2 x double> %29, <2 x double>* %31, align 8, !dbg !26 %32 = bitcast double* %27 to <2 x double>*, !dbg !26 store <2 x double> %30, <2 x double>* %32, align 8, !dbg !26 %index.next21.1 = add nsw i64 %index13, 8, !dbg !25 %33 = icmp eq i64 %index.next21.1, 1000, !dbg !25 br i1 %33, label %middle.block11, label %vector.body10, !dbg !25, !llvm.loop !27 middle.block11: ; preds = %vector.body10 %34 = load double, double* %a, align 8, !dbg !28 %35 = getelementptr inbounds double, double* %a, i64 1, !dbg !29 %36 = load double, double* %35, align 8, !dbg !29 %37 = fadd double %34, %36, !dbg !30 ret double %37, !dbg !31 } -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150920/4ada174f/attachment.html>
Hi, if you use opt -simplifycfg, the third BB can be eliminated. 2015-09-20 0:03 GMT-04:00 Russell Wallace via llvm-dev < llvm-dev at lists.llvm.org>:> The following function compiles with -O3 into the following IR. > http://llvm.org/docs/Passes.html#simplifycfg-simplify-the-cfg says > > - Eliminates a basic block that only contains an unconditional branch. > > but the first and third blocks in the compiled function only contain an > unconditional branch; I would have expected them to be eliminated. What am > I missing? > > double f(double *a) { > for (int i = 0; i < 1000; i++) > a[i] *= 2; > for (int i = 0; i < 1000; i++) > a[i] *= 2; > return a[0] + a[1]; > } > > ; Function Attrs: nounwind uwtable > define double @"\01?f@@YANPEAN at Z"(double* nocapture %a) #1 { > overflow.checked: > br label %vector.body, !dbg !18 > > vector.body: ; preds = %vector.body, > %overflow.checked > %index = phi i64 [ 0, %overflow.checked ], [ %index.next.1, %vector.body > ], !dbg !18 > %0 = getelementptr inbounds double, double* %a, i64 %index, !dbg !19 > %1 = bitcast double* %0 to <2 x double>*, !dbg !20 > %wide.load = load <2 x double>, <2 x double>* %1, align 8, !dbg !20 > %2 = getelementptr double, double* %0, i64 2, !dbg !20 > %3 = bitcast double* %2 to <2 x double>*, !dbg !20 > %wide.load8 = load <2 x double>, <2 x double>* %3, align 8, !dbg !20 > %4 = fmul <2 x double> %wide.load, <double 2.000000e+00, double > 2.000000e+00>, !dbg !20 > %5 = fmul <2 x double> %wide.load8, <double 2.000000e+00, double > 2.000000e+00>, !dbg !20 > %6 = bitcast double* %0 to <2 x double>*, !dbg !20 > store <2 x double> %4, <2 x double>* %6, align 8, !dbg !20 > %7 = bitcast double* %2 to <2 x double>*, !dbg !20 > store <2 x double> %5, <2 x double>* %7, align 8, !dbg !20 > %index.next = or i64 %index, 4, !dbg !18 > %8 = getelementptr inbounds double, double* %a, i64 %index.next, !dbg !19 > %9 = bitcast double* %8 to <2 x double>*, !dbg !20 > %wide.load.1 = load <2 x double>, <2 x double>* %9, align 8, !dbg !20 > %10 = getelementptr double, double* %8, i64 2, !dbg !20 > %11 = bitcast double* %10 to <2 x double>*, !dbg !20 > %wide.load8.1 = load <2 x double>, <2 x double>* %11, align 8, !dbg !20 > %12 = fmul <2 x double> %wide.load.1, <double 2.000000e+00, double > 2.000000e+00>, !dbg !20 > %13 = fmul <2 x double> %wide.load8.1, <double 2.000000e+00, double > 2.000000e+00>, !dbg !20 > %14 = bitcast double* %8 to <2 x double>*, !dbg !20 > store <2 x double> %12, <2 x double>* %14, align 8, !dbg !20 > %15 = bitcast double* %10 to <2 x double>*, !dbg !20 > store <2 x double> %13, <2 x double>* %15, align 8, !dbg !20 > %index.next.1 = add nsw i64 %index, 8, !dbg !18 > %16 = icmp eq i64 %index.next.1, 1000, !dbg !18 > br i1 %16, label %vector.body10.preheader, label %vector.body, !dbg !18, > !llvm.loop !21 > > vector.body10.preheader: ; preds = %vector.body > br label %vector.body10, !dbg !24 > > vector.body10: ; preds > %vector.body10, %vector.body10.preheader > %index13 = phi i64 [ 0, %vector.body10.preheader ], [ %index.next21.1, > %vector.body10 ], !dbg !25 > %17 = getelementptr inbounds double, double* %a, i64 %index13, !dbg !24 > %18 = bitcast double* %17 to <2 x double>*, !dbg !26 > %wide.load26 = load <2 x double>, <2 x double>* %18, align 8, !dbg !26 > %19 = getelementptr double, double* %17, i64 2, !dbg !26 > %20 = bitcast double* %19 to <2 x double>*, !dbg !26 > %wide.load27 = load <2 x double>, <2 x double>* %20, align 8, !dbg !26 > %21 = fmul <2 x double> %wide.load26, <double 2.000000e+00, double > 2.000000e+00>, !dbg !26 > %22 = fmul <2 x double> %wide.load27, <double 2.000000e+00, double > 2.000000e+00>, !dbg !26 > %23 = bitcast double* %17 to <2 x double>*, !dbg !26 > store <2 x double> %21, <2 x double>* %23, align 8, !dbg !26 > %24 = bitcast double* %19 to <2 x double>*, !dbg !26 > store <2 x double> %22, <2 x double>* %24, align 8, !dbg !26 > %index.next21 = or i64 %index13, 4, !dbg !25 > %25 = getelementptr inbounds double, double* %a, i64 %index.next21, !dbg > !24 > %26 = bitcast double* %25 to <2 x double>*, !dbg !26 > %wide.load26.1 = load <2 x double>, <2 x double>* %26, align 8, !dbg !26 > %27 = getelementptr double, double* %25, i64 2, !dbg !26 > %28 = bitcast double* %27 to <2 x double>*, !dbg !26 > %wide.load27.1 = load <2 x double>, <2 x double>* %28, align 8, !dbg !26 > %29 = fmul <2 x double> %wide.load26.1, <double 2.000000e+00, double > 2.000000e+00>, !dbg !26 > %30 = fmul <2 x double> %wide.load27.1, <double 2.000000e+00, double > 2.000000e+00>, !dbg !26 > %31 = bitcast double* %25 to <2 x double>*, !dbg !26 > store <2 x double> %29, <2 x double>* %31, align 8, !dbg !26 > %32 = bitcast double* %27 to <2 x double>*, !dbg !26 > store <2 x double> %30, <2 x double>* %32, align 8, !dbg !26 > %index.next21.1 = add nsw i64 %index13, 8, !dbg !25 > %33 = icmp eq i64 %index.next21.1, 1000, !dbg !25 > br i1 %33, label %middle.block11, label %vector.body10, !dbg !25, > !llvm.loop !27 > > middle.block11: ; preds = %vector.body10 > %34 = load double, double* %a, align 8, !dbg !28 > %35 = getelementptr inbounds double, double* %a, i64 1, !dbg !29 > %36 = load double, double* %35, align 8, !dbg !29 > %37 = fadd double %34, %36, !dbg !30 > ret double %37, !dbg !31 > } > > > _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev > >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150920/c1fada28/attachment.html>
You're right, it can indeed. Is there a reason -O3 doesn't do this? I had been expecting -O3 to perform full optimization. The first block still remains in any case. Is the first block needed for some purpose I'm not taking into account? On Sun, Sep 20, 2015 at 5:27 AM, Xiangyang Guo <eceguo at gmail.com> wrote:> Hi, > > if you use opt -simplifycfg, the third BB can be eliminated. > > 2015-09-20 0:03 GMT-04:00 Russell Wallace via llvm-dev < > llvm-dev at lists.llvm.org>: > >> The following function compiles with -O3 into the following IR. >> http://llvm.org/docs/Passes.html#simplifycfg-simplify-the-cfg says >> >> - Eliminates a basic block that only contains an unconditional branch. >> >> but the first and third blocks in the compiled function only contain an >> unconditional branch; I would have expected them to be eliminated. What am >> I missing? >> >> double f(double *a) { >> for (int i = 0; i < 1000; i++) >> a[i] *= 2; >> for (int i = 0; i < 1000; i++) >> a[i] *= 2; >> return a[0] + a[1]; >> } >> >> ; Function Attrs: nounwind uwtable >> define double @"\01?f@@YANPEAN at Z"(double* nocapture %a) #1 { >> overflow.checked: >> br label %vector.body, !dbg !18 >> >> vector.body: ; preds = %vector.body, >> %overflow.checked >> %index = phi i64 [ 0, %overflow.checked ], [ %index.next.1, >> %vector.body ], !dbg !18 >> %0 = getelementptr inbounds double, double* %a, i64 %index, !dbg !19 >> %1 = bitcast double* %0 to <2 x double>*, !dbg !20 >> %wide.load = load <2 x double>, <2 x double>* %1, align 8, !dbg !20 >> %2 = getelementptr double, double* %0, i64 2, !dbg !20 >> %3 = bitcast double* %2 to <2 x double>*, !dbg !20 >> %wide.load8 = load <2 x double>, <2 x double>* %3, align 8, !dbg !20 >> %4 = fmul <2 x double> %wide.load, <double 2.000000e+00, double >> 2.000000e+00>, !dbg !20 >> %5 = fmul <2 x double> %wide.load8, <double 2.000000e+00, double >> 2.000000e+00>, !dbg !20 >> %6 = bitcast double* %0 to <2 x double>*, !dbg !20 >> store <2 x double> %4, <2 x double>* %6, align 8, !dbg !20 >> %7 = bitcast double* %2 to <2 x double>*, !dbg !20 >> store <2 x double> %5, <2 x double>* %7, align 8, !dbg !20 >> %index.next = or i64 %index, 4, !dbg !18 >> %8 = getelementptr inbounds double, double* %a, i64 %index.next, !dbg >> !19 >> %9 = bitcast double* %8 to <2 x double>*, !dbg !20 >> %wide.load.1 = load <2 x double>, <2 x double>* %9, align 8, !dbg !20 >> %10 = getelementptr double, double* %8, i64 2, !dbg !20 >> %11 = bitcast double* %10 to <2 x double>*, !dbg !20 >> %wide.load8.1 = load <2 x double>, <2 x double>* %11, align 8, !dbg !20 >> %12 = fmul <2 x double> %wide.load.1, <double 2.000000e+00, double >> 2.000000e+00>, !dbg !20 >> %13 = fmul <2 x double> %wide.load8.1, <double 2.000000e+00, double >> 2.000000e+00>, !dbg !20 >> %14 = bitcast double* %8 to <2 x double>*, !dbg !20 >> store <2 x double> %12, <2 x double>* %14, align 8, !dbg !20 >> %15 = bitcast double* %10 to <2 x double>*, !dbg !20 >> store <2 x double> %13, <2 x double>* %15, align 8, !dbg !20 >> %index.next.1 = add nsw i64 %index, 8, !dbg !18 >> %16 = icmp eq i64 %index.next.1, 1000, !dbg !18 >> br i1 %16, label %vector.body10.preheader, label %vector.body, !dbg >> !18, !llvm.loop !21 >> >> vector.body10.preheader: ; preds = %vector.body >> br label %vector.body10, !dbg !24 >> >> vector.body10: ; preds >> %vector.body10, %vector.body10.preheader >> %index13 = phi i64 [ 0, %vector.body10.preheader ], [ %index.next21.1, >> %vector.body10 ], !dbg !25 >> %17 = getelementptr inbounds double, double* %a, i64 %index13, !dbg !24 >> %18 = bitcast double* %17 to <2 x double>*, !dbg !26 >> %wide.load26 = load <2 x double>, <2 x double>* %18, align 8, !dbg !26 >> %19 = getelementptr double, double* %17, i64 2, !dbg !26 >> %20 = bitcast double* %19 to <2 x double>*, !dbg !26 >> %wide.load27 = load <2 x double>, <2 x double>* %20, align 8, !dbg !26 >> %21 = fmul <2 x double> %wide.load26, <double 2.000000e+00, double >> 2.000000e+00>, !dbg !26 >> %22 = fmul <2 x double> %wide.load27, <double 2.000000e+00, double >> 2.000000e+00>, !dbg !26 >> %23 = bitcast double* %17 to <2 x double>*, !dbg !26 >> store <2 x double> %21, <2 x double>* %23, align 8, !dbg !26 >> %24 = bitcast double* %19 to <2 x double>*, !dbg !26 >> store <2 x double> %22, <2 x double>* %24, align 8, !dbg !26 >> %index.next21 = or i64 %index13, 4, !dbg !25 >> %25 = getelementptr inbounds double, double* %a, i64 %index.next21, >> !dbg !24 >> %26 = bitcast double* %25 to <2 x double>*, !dbg !26 >> %wide.load26.1 = load <2 x double>, <2 x double>* %26, align 8, !dbg !26 >> %27 = getelementptr double, double* %25, i64 2, !dbg !26 >> %28 = bitcast double* %27 to <2 x double>*, !dbg !26 >> %wide.load27.1 = load <2 x double>, <2 x double>* %28, align 8, !dbg !26 >> %29 = fmul <2 x double> %wide.load26.1, <double 2.000000e+00, double >> 2.000000e+00>, !dbg !26 >> %30 = fmul <2 x double> %wide.load27.1, <double 2.000000e+00, double >> 2.000000e+00>, !dbg !26 >> %31 = bitcast double* %25 to <2 x double>*, !dbg !26 >> store <2 x double> %29, <2 x double>* %31, align 8, !dbg !26 >> %32 = bitcast double* %27 to <2 x double>*, !dbg !26 >> store <2 x double> %30, <2 x double>* %32, align 8, !dbg !26 >> %index.next21.1 = add nsw i64 %index13, 8, !dbg !25 >> %33 = icmp eq i64 %index.next21.1, 1000, !dbg !25 >> br i1 %33, label %middle.block11, label %vector.body10, !dbg !25, >> !llvm.loop !27 >> >> middle.block11: ; preds = %vector.body10 >> %34 = load double, double* %a, align 8, !dbg !28 >> %35 = getelementptr inbounds double, double* %a, i64 1, !dbg !29 >> %36 = load double, double* %35, align 8, !dbg !29 >> %37 = fadd double %34, %36, !dbg !30 >> ret double %37, !dbg !31 >> } >> >> >> _______________________________________________ >> LLVM Developers mailing list >> llvm-dev at lists.llvm.org >> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev >> >> >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150920/94971409/attachment.html>
Maybe Matching Threads
- simplifycfg not happening?
- Question about the order of predecessors in LoopVectorizer with VPlanNatviePath
- [LLVMdev] [llvm-commits] [PATCH] BasicBlock Autovectorization Pass
- [InstCombine] rL292492 affected LoopVectorizer and caused 17.30%/11.37% perf regressions on Cortex-A53/Cortex-A15 LNT machines
- [LLVMdev] [llvm-commits] [PATCH] BasicBlock Autovectorization Pass