thr3ads.net - llvm dev - [llvm-dev] LLVM 6.0's LoopUnroll PASS is not able to work? [Oct 2017]

If this information is useful, please help other people find it:
Share via:

Jatin Bhateja via llvm-dev

2017-Oct-26 16:54 UTC

[llvm-dev] LLVM 6.0's LoopUnroll PASS is not able to work?

Hi Leslie,

There is a disable unroll meta data (!llvm.loop !2) associated with unCatN
loop basic block , probaly in the source pragma clang loop unroll (disable)
was used before the loop.

I tried removing that and used -unroll-count=4 both the catN and uncatN
were unrolled.

Options :  -mem2reg -loops -loop-simplify -loop-rotate -lcssa -loop-unroll
-unroll-count=4 -sccp -simplifycfg -o /tmp/1  -debug-only=loop-unroll.

Loop Unroll: F[catN] Loop %for.body
  Loop Size = 17
UNROLLING loop %for.body by 4!
Loop Unroll: F[unCatN] Loop %for.body
  Loop Size = 9
UNROLLING loop %for.body by 4 with run-time trip count!

Thanks,
Jatin

On Wed, Oct 25, 2017 at 10:17 AM, Leslie Zhai via llvm-dev <
llvm-dev at lists.llvm.org> wrote:
> Hi Michael,
>
> Dropped *optnone* still failed to unroll loops for this testcase:
>
> $ cat cat_state.n044.ll
> ; ModuleID = 'cat_state.n045a.ll'
> source_filename = "cat_state.n04_merged.scaffold"
> target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-unknown-linux-gnu"
>
> ; Function Attrs: noinline nounwind uwtable
> define void @catN(i16* %bit, i32 %n) local_unnamed_addr #0 {
> entry:
>   %0 = load i16, i16* %bit, align 2
>   tail call void @llvm.H.i16(i16 %0)
>   %cmp1 = icmp sgt i32 %n, 1
>   br i1 %cmp1, label %for.body.lr.ph, label %for.end
>
> for.body.lr.ph:                                   ; preds = %entry
>   %1 = add i32 %n, -1
>   %2 = add i32 %n, -2
>   %xtraiter = and i32 %1, 1
>   %3 = icmp ult i32 %2, 1
>   br i1 %3, label %for.cond.for.end_crit_edge.unr-lcssa, label
> %for.body.lr.ph.new
>
> for.body.lr.ph.new:                               ; preds = %
> for.body.lr.ph
>   %unroll_iter = sub i32 %1, %xtraiter
>   br label %for.body
>
> for.body:                                         ; preds = %for.body,
> %for.body.lr.ph.new
>   %inc3 = phi i32 [ 1, %for.body.lr.ph.new ], [ %inc.1, %for.body ]
>   %niter = phi i32 [ %unroll_iter, %for.body.lr.ph.new ], [ %niter.nsub.1,
> %for.body ]
>   %idxprom = sext i32 %inc3 to i64
>   %arrayidx1 = getelementptr inbounds i16, i16* %bit, i64 %idxprom
>   %4 = load i16, i16* %arrayidx1, align 2
>   %sub = add nsw i32 %inc3, -1
>   %idxprom2 = sext i32 %sub to i64
>   %arrayidx3 = getelementptr inbounds i16, i16* %bit, i64 %idxprom2
>   %5 = load i16, i16* %arrayidx3, align 2
>   tail call void @llvm.CNOT.i16.i16(i16 %4, i16 %5)
>   %inc = add nsw i32 %inc3, 1
>   %niter.nsub = sub i32 %niter, 1
>   %idxprom.1 = sext i32 %inc to i64
>   %arrayidx1.1 = getelementptr inbounds i16, i16* %bit, i64 %idxprom.1
>   %6 = load i16, i16* %arrayidx1.1, align 2
>   %idxprom2.1 = sext i32 %inc3 to i64
>   %arrayidx3.1 = getelementptr inbounds i16, i16* %bit, i64 %idxprom2.1
>   %7 = load i16, i16* %arrayidx3.1, align 2
>   tail call void @llvm.CNOT.i16.i16(i16 %6, i16 %7)
>   %inc.1 = add nsw i32 %inc, 1
>   %niter.nsub.1 = sub i32 %niter.nsub, 1
>   %niter.ncmp.1 = icmp ne i32 %niter.nsub.1, 0
>   br i1 %niter.ncmp.1, label %for.body, label
> %for.cond.for.end_crit_edge.unr-lcssa
>
> for.cond.for.end_crit_edge.unr-lcssa:             ; preds = %for.body, %
> for.body.lr.ph
>   %inc3.unr = phi i32 [ 1, %for.body.lr.ph ], [ %inc.1, %for.body ]
>   %lcmp.mod = icmp ne i32 %xtraiter, 0
>   br i1 %lcmp.mod, label %for.body.epil, label %for.end
>
> for.body.epil:                                    ; preds >
%for.cond.for.end_crit_edge.unr-lcssa
>   %inc3.epil = phi i32 [ %inc3.unr, %for.cond.for.end_crit_edge.unr-lcssa
> ]
>   %idxprom.epil = sext i32 %inc3.epil to i64
>   %arrayidx1.epil = getelementptr inbounds i16, i16* %bit, i64
> %idxprom.epil
>   %8 = load i16, i16* %arrayidx1.epil, align 2
>   %sub.epil = add nsw i32 %inc3.epil, -1
>   %idxprom2.epil = sext i32 %sub.epil to i64
>   %arrayidx3.epil = getelementptr inbounds i16, i16* %bit, i64
> %idxprom2.epil
>   %9 = load i16, i16* %arrayidx3.epil, align 2
>   tail call void @llvm.CNOT.i16.i16(i16 %8, i16 %9)
>   %inc.epil = add nsw i32 %inc3.epil, 1
>   %cmp.epil = icmp slt i32 %inc.epil, %n
>   br label %for.end
>
> for.end:                                          ; preds >
%for.body.epil, %for.cond.for.end_crit_edge.unr-lcssa, %entry
>   ret void
> }
>
> ; Function Attrs: nounwind
> declare void @llvm.H.i16(i16) #1
>
> ; Function Attrs: nounwind
> declare void @llvm.CNOT.i16.i16(i16, i16) #1
>
> ; Function Attrs: noinline nounwind uwtable
> define void @unCatN(i16* %bit, i32 %n) local_unnamed_addr #0 {
> entry:
>   %storemerge1 = add nsw i32 %n, -1
>   %cmp2 = icmp sgt i32 %n, 1
>   br i1 %cmp2, label %for.body.peel, label %for.end
>
> for.body.peel:                                    ; preds = %entry
>   %idxprom.peel = sext i32 %storemerge1 to i64
>   %arrayidx.peel = getelementptr inbounds i16, i16* %bit, i64 %idxprom.peel
>   %0 = load i16, i16* %arrayidx.peel, align 2
>   %sub1.peel = add nsw i32 %n, -2
>   %idxprom2.peel = sext i32 %sub1.peel to i64
>   %arrayidx3.peel = getelementptr inbounds i16, i16* %bit, i64
> %idxprom2.peel
>   %1 = load i16, i16* %arrayidx3.peel, align 2
>   tail call void @llvm.CNOT.i16.i16(i16 %0, i16 %1)
>   %storemerge.peel = add nsw i32 %storemerge1, -1
>   %cmp.peel = icmp sgt i32 %storemerge1, 1
>   br i1 %cmp.peel, label %for.body.lr.ph.peel.newph, label %for.end
>
> for.body.lr.ph.peel.newph:                        ; preds = %for.body.peel
>   br label %for.body
>
> for.body:                                         ; preds = %for.body,
> %for.body.lr.ph.peel.newph
>   %storemerge5 = phi i32 [ %storemerge.peel, %for.body.lr.ph.peel.newph ],
> [ %storemerge, %for.body ]
>   %storemerge.in3 = phi i32 [ %storemerge1, %for.body.lr.ph.peel.newph ],
> [ %storemerge5, %for.body ]
>   %idxprom = sext i32 %storemerge5 to i64
>   %arrayidx = getelementptr inbounds i16, i16* %bit, i64 %idxprom
>   %2 = load i16, i16* %arrayidx, align 2
>   %sub1 = add nsw i32 %storemerge.in3, -2
>   %idxprom2 = sext i32 %sub1 to i64
>   %arrayidx3 = getelementptr inbounds i16, i16* %bit, i64 %idxprom2
>   %3 = load i16, i16* %arrayidx3, align 2
>   tail call void @llvm.CNOT.i16.i16(i16 %2, i16 %3)
>   %storemerge = add nsw i32 %storemerge5, -1
>   %cmp = icmp sgt i32 %storemerge5, 1
>   br i1 %cmp, label %for.body, label %for.end, !llvm.loop !2
>
> for.end:                                          ; preds = %for.body,
> %for.body.peel, %entry
>   %.lcssa = phi i16* [ %bit, %entry ], [ %bit, %for.body.peel ], [ %bit,
> %for.body ]
>   %4 = load i16, i16* %.lcssa, align 2
>   tail call void @llvm.H.i16(i16 %4)
>   ret void
> }
>
> ; Function Attrs: noinline nounwind uwtable
> define i32 @main() local_unnamed_addr #0 {
> entry:
>   %bits = alloca [4 x i16], align 2
>   %arraydecay = getelementptr inbounds [4 x i16], [4 x i16]* %bits, i64 0,
> i64 0
>   call void @catN_IP4_IPx_IPx_IPx_DPx_DPx_DPx_DPx(i16* %arraydecay, i32
> undef)
>   ret i32 0
> }
>
> define void @catN_IP4_IPx_IPx_IPx_DPx_DPx_DPx_DPx(i16* %bit, i32 %n) {
> entry.:
>   %0 = load i16, i16* %bit, align 2
>   tail call void @llvm.H.i16(i16 %0)
>   %arrayidx1. = getelementptr inbounds i16, i16* %bit, i64 1
> <https://maps.google.com/?q=i64+1&entry=gmail&source=g>
>   %1 = load i16, i16* %arrayidx1., align 2
>   %2 = load i16, i16* %bit, align 2
>   tail call void @llvm.CNOT.i16.i16(i16 %1, i16 %2)
>   %arrayidx1.1. = getelementptr inbounds i16, i16* %bit, i64 2
> <https://maps.google.com/?q=i64+2&entry=gmail&source=g>
>   %3 = load i16, i16* %arrayidx1.1., align 2
>   %arrayidx3.1. = getelementptr inbounds i16, i16* %bit, i64 1
> <https://maps.google.com/?q=i64+1&entry=gmail&source=g>
>   %4 = load i16, i16* %arrayidx3.1., align 2
>   tail call void @llvm.CNOT.i16.i16(i16 %3, i16 %4)
>   %arrayidx1.epil. = getelementptr inbounds i16, i16* %bit, i64 3
> <https://maps.google.com/?q=i64+3&entry=gmail&source=g>
>   %5 = load i16, i16* %arrayidx1.epil., align 2
>   %arrayidx3.epil. = getelementptr inbounds i16, i16* %bit, i64 2
> <https://maps.google.com/?q=i64+2&entry=gmail&source=g>
>   %6 = load i16, i16* %arrayidx3.epil., align 2
>   tail call void @llvm.CNOT.i16.i16(i16 %5, i16 %6)
>   ret void
> }
>
> attributes #0 = { noinline nounwind uwtable
"correctly-rounded-divide-sqrt-fp-math"="false"
> "disable-tail-calls"="false"
"less-precise-fpmad"="false"
> "no-frame-pointer-elim"="true"
"no-frame-pointer-elim-non-leaf"
> "no-infs-fp-math"="false"
"no-jump-tables"="false"
> "no-nans-fp-math"="false"
"no-signed-zeros-fp-math"="false"
> "no-trapping-math"="false"
"stack-protector-buffer-size"="8"
> "target-cpu"="x86-64"
"target-features"="+fxsr,+mmx,+sse,+sse2,+x87"
> "unsafe-fp-math"="false"
"use-soft-float"="false" }
> attributes #1 = { nounwind }
>
> !llvm.module.flags = !{!0}
> !llvm.ident = !{!1}
>
> !0 = !{i32 1, !"wchar_size", i32 4}
> !1 = !{!"clang version 6.0.0 (git at github.com:llvm-mirror/clang.git
> 0aed123216ad4a38a9c2b16f1783895fd5cb1a04) (git at
github.com:llvm-mirror/llvm.git
> d209b37aec1e392dabbf9b5324ea4a60c36fbc55)"}
> !2 = distinct !{!2, !3}
> !3 = !{!"llvm.loop.unroll.disable"}
>
> $(OPT) -S cat_state.n044.ll -mem2reg -loops -loop-simplify -loop-rotate
> -lcssa -loop-unroll -unroll-threshold=100000000 -sccp -simplifycfg -o
> cat_state.n045.ll
>
> $ cat cat_state.n045.ll
> ; ModuleID = 'cat_state.n044.ll'
> source_filename = "cat_state.n04_merged.scaffold"
> target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-unknown-linux-gnu"
>
> ; Function Attrs: noinline nounwind uwtable
> define void @catN(i16* %bit, i32 %n) local_unnamed_addr #0 {
> entry:
>   %0 = load i16, i16* %bit, align 2
>   tail call void @llvm.H.i16(i16 %0)
>   %cmp1 = icmp sgt i32 %n, 1
>   br i1 %cmp1, label %for.body.lr.ph, label %for.end
>
> for.body.lr.ph:                                   ; preds = %entry
>   %1 = add i32 %n, -1
>   %2 = add i32 %n, -2
>   %xtraiter = and i32 %1, 1
>   %3 = icmp ult i32 %2, 1
>   br i1 %3, label %for.cond.for.end_crit_edge.unr-lcssa, label
> %for.body.lr.ph.new
>
> for.body.lr.ph.new:                               ; preds = %
> for.body.lr.ph
>   %unroll_iter = sub i32 %1, %xtraiter
>   br label %for.body
>
> for.body:                                         ; preds = %for.body,
> %for.body.lr.ph.new
>   %inc3 = phi i32 [ 1, %for.body.lr.ph.new ], [ %inc.1, %for.body ]
>   %niter = phi i32 [ %unroll_iter, %for.body.lr.ph.new ], [ %niter.nsub.1,
> %for.body ]
>   %idxprom = sext i32 %inc3 to i64
>   %arrayidx1 = getelementptr inbounds i16, i16* %bit, i64 %idxprom
>   %4 = load i16, i16* %arrayidx1, align 2
>   %sub = add nsw i32 %inc3, -1
>   %idxprom2 = sext i32 %sub to i64
>   %arrayidx3 = getelementptr inbounds i16, i16* %bit, i64 %idxprom2
>   %5 = load i16, i16* %arrayidx3, align 2
>   tail call void @llvm.CNOT.i16.i16(i16 %4, i16 %5)
>   %inc = add nsw i32 %inc3, 1
>   %niter.nsub = sub i32 %niter, 1
>   %idxprom.1 = sext i32 %inc to i64
>   %arrayidx1.1 = getelementptr inbounds i16, i16* %bit, i64 %idxprom.1
>   %6 = load i16, i16* %arrayidx1.1, align 2
>   %idxprom2.1 = sext i32 %inc3 to i64
>   %arrayidx3.1 = getelementptr inbounds i16, i16* %bit, i64 %idxprom2.1
>   %7 = load i16, i16* %arrayidx3.1, align 2
>   tail call void @llvm.CNOT.i16.i16(i16 %6, i16 %7)
>   %inc.1 = add nsw i32 %inc, 1
>   %niter.nsub.1 = sub i32 %niter.nsub, 1
>   %niter.ncmp.1 = icmp ne i32 %niter.nsub.1, 0
>   br i1 %niter.ncmp.1, label %for.body, label
> %for.cond.for.end_crit_edge.unr-lcssa
>
> for.cond.for.end_crit_edge.unr-lcssa:             ; preds = %for.body, %
> for.body.lr.ph
>   %inc3.unr = phi i32 [ 1, %for.body.lr.ph ], [ %inc.1, %for.body ]
>   %lcmp.mod = icmp ne i32 %xtraiter, 0
>   br i1 %lcmp.mod, label %for.body.epil, label %for.end
>
> for.body.epil:                                    ; preds >
%for.cond.for.end_crit_edge.unr-lcssa
>   %inc3.epil = phi i32 [ %inc3.unr, %for.cond.for.end_crit_edge.unr-lcssa
> ]
>   %idxprom.epil = sext i32 %inc3.epil to i64
>   %arrayidx1.epil = getelementptr inbounds i16, i16* %bit, i64
> %idxprom.epil
>   %8 = load i16, i16* %arrayidx1.epil, align 2
>   %sub.epil = add nsw i32 %inc3.epil, -1
>   %idxprom2.epil = sext i32 %sub.epil to i64
>   %arrayidx3.epil = getelementptr inbounds i16, i16* %bit, i64
> %idxprom2.epil
>   %9 = load i16, i16* %arrayidx3.epil, align 2
>   tail call void @llvm.CNOT.i16.i16(i16 %8, i16 %9)
>   %inc.epil = add nsw i32 %inc3.epil, 1
>   %cmp.epil = icmp slt i32 %inc.epil, %n
>   br label %for.end
>
> for.end:                                          ; preds >
%for.body.epil, %for.cond.for.end_crit_edge.unr-lcssa, %entry
>   ret void
> }
>
> ; Function Attrs: nounwind
> declare void @llvm.H.i16(i16) #1
>
> ; Function Attrs: nounwind
> declare void @llvm.CNOT.i16.i16(i16, i16) #1
>
> ; Function Attrs: noinline nounwind uwtable
> define void @unCatN(i16* %bit, i32 %n) local_unnamed_addr #0 {
> entry:
>   %storemerge1 = add nsw i32 %n, -1
>   %cmp2 = icmp sgt i32 %n, 1
>   br i1 %cmp2, label %for.body.peel, label %for.end
>
> for.body.peel:                                    ; preds = %entry
>   %idxprom.peel = sext i32 %storemerge1 to i64
>   %arrayidx.peel = getelementptr inbounds i16, i16* %bit, i64 %idxprom.peel
>   %0 = load i16, i16* %arrayidx.peel, align 2
>   %sub1.peel = add nsw i32 %n, -2
>   %idxprom2.peel = sext i32 %sub1.peel to i64
>   %arrayidx3.peel = getelementptr inbounds i16, i16* %bit, i64
> %idxprom2.peel
>   %1 = load i16, i16* %arrayidx3.peel, align 2
>   tail call void @llvm.CNOT.i16.i16(i16 %0, i16 %1)
>   %storemerge.peel = add nsw i32 %storemerge1, -1
>   %cmp.peel = icmp sgt i32 %storemerge1, 1
>   br i1 %cmp.peel, label %for.body.lr.ph.peel.newph, label %for.end
>
> for.body.lr.ph.peel.newph:                        ; preds = %for.body.peel
>   br label %for.body
>
> for.body:                                         ; preds = %for.body,
> %for.body.lr.ph.peel.newph
>   %storemerge5 = phi i32 [ %storemerge.peel, %for.body.lr.ph.peel.newph ],
> [ %storemerge, %for.body ]
>   %storemerge.in3 = phi i32 [ %storemerge1, %for.body.lr.ph.peel.newph ],
> [ %storemerge5, %for.body ]
>   %idxprom = sext i32 %storemerge5 to i64
>   %arrayidx = getelementptr inbounds i16, i16* %bit, i64 %idxprom
>   %2 = load i16, i16* %arrayidx, align 2
>   %sub1 = add nsw i32 %storemerge.in3, -2
>   %idxprom2 = sext i32 %sub1 to i64
>   %arrayidx3 = getelementptr inbounds i16, i16* %bit, i64 %idxprom2
>   %3 = load i16, i16* %arrayidx3, align 2
>   tail call void @llvm.CNOT.i16.i16(i16 %2, i16 %3)
>   %storemerge = add nsw i32 %storemerge5, -1
>   %cmp = icmp sgt i32 %storemerge5, 1
>   br i1 %cmp, label %for.body, label %for.end, !llvm.loop !2
>
> for.end:                                          ; preds = %for.body,
> %for.body.peel, %entry
>   %.lcssa = phi i16* [ %bit, %entry ], [ %bit, %for.body.peel ], [ %bit,
> %for.body ]
>   %4 = load i16, i16* %.lcssa, align 2
>   tail call void @llvm.H.i16(i16 %4)
>   ret void
> }
>
> ; Function Attrs: noinline nounwind uwtable
> define i32 @main() local_unnamed_addr #0 {
> entry:
>   %bits = alloca [4 x i16], align 2
>   %arraydecay = getelementptr inbounds [4 x i16], [4 x i16]* %bits, i64 0,
> i64 0
>   call void @catN_IP4_IPx_IPx_IPx_DPx_DPx_DPx_DPx(i16* %arraydecay, i32
> undef)
>   ret i32 0
> }
>
> define void @catN_IP4_IPx_IPx_IPx_DPx_DPx_DPx_DPx(i16* %bit, i32 %n) {
> entry.:
>   %0 = load i16, i16* %bit, align 2
>   tail call void @llvm.H.i16(i16 %0)
>   %arrayidx1. = getelementptr inbounds i16, i16* %bit, i64 1
> <https://maps.google.com/?q=i64+1&entry=gmail&source=g>
>   %1 = load i16, i16* %arrayidx1., align 2
>   %2 = load i16, i16* %bit, align 2
>   tail call void @llvm.CNOT.i16.i16(i16 %1, i16 %2)
>   %arrayidx1.1. = getelementptr inbounds i16, i16* %bit, i64 2
> <https://maps.google.com/?q=i64+2&entry=gmail&source=g>
>   %3 = load i16, i16* %arrayidx1.1., align 2
>   %arrayidx3.1. = getelementptr inbounds i16, i16* %bit, i64 1
> <https://maps.google.com/?q=i64+1&entry=gmail&source=g>
>   %4 = load i16, i16* %arrayidx3.1., align 2
>   tail call void @llvm.CNOT.i16.i16(i16 %3, i16 %4)
>   %arrayidx1.epil. = getelementptr inbounds i16, i16* %bit, i64 3
> <https://maps.google.com/?q=i64+3&entry=gmail&source=g>
>   %5 = load i16, i16* %arrayidx1.epil., align 2
>   %arrayidx3.epil. = getelementptr inbounds i16, i16* %bit, i64 2
> <https://maps.google.com/?q=i64+2&entry=gmail&source=g>
>   %6 = load i16, i16* %arrayidx3.epil., align 2
>   tail call void @llvm.CNOT.i16.i16(i16 %5, i16 %6)
>   ret void
> }
>
> attributes #0 = { noinline nounwind uwtable
"correctly-rounded-divide-sqrt-fp-math"="false"
> "disable-tail-calls"="false"
"less-precise-fpmad"="false"
> "no-frame-pointer-elim"="true"
"no-frame-pointer-elim-non-leaf"
> "no-infs-fp-math"="false"
"no-jump-tables"="false"
> "no-nans-fp-math"="false"
"no-signed-zeros-fp-math"="false"
> "no-trapping-math"="false"
"stack-protector-buffer-size"="8"
> "target-cpu"="x86-64"
"target-features"="+fxsr,+mmx,+sse,+sse2,+x87"
> "unsafe-fp-math"="false"
"use-soft-float"="false" }
> attributes #1 = { nounwind }
>
> !llvm.module.flags = !{!0}
> !llvm.ident = !{!1}
>
> !0 = !{i32 1, !"wchar_size", i32 4}
> !1 = !{!"clang version 6.0.0 (git at github.com:llvm-mirror/clang.git
> 0aed123216ad4a38a9c2b16f1783895fd5cb1a04) (git at
github.com:llvm-mirror/llvm.git
> d209b37aec1e392dabbf9b5324ea4a60c36fbc55)"}
> !2 = distinct !{!2, !3}
> !3 = !{!"llvm.loop.unroll.disable"}
>
>
> There are still for *loops* in catN and unCatN Functions, workaround might
> be using GlobalDCE PASS towards cat_state.n045.ll to remove !Live
> DeadFunctions.
>
> Cat_State testcase: https://github.com/ScaffCC/Sca
> ffCC/blob/master/Algorithms/Cat_State/cat_state.n04.scaffold
>
> Scaffold builtin gates: https://github.com/ScaffCC/sca
> ff-clang/blob/master/include/clang/Basic/Builtins.def#L108
>
> Ali JavadiAbhari, Shruti Patil, Daniel Kudrow, Jeff Heckey, Alexey Lvov,
> Frederic Chong and Margaret Martonosi, ScaffCC: A Framework for Compilation
> and Analysis of Quantum Computing Programs, ACM International Conference on
> Computing Frontiers (CF 2014), Cagliari, Italy, May 2014
>
>
> 在 2017年10月24日 12:52, Michael Kruse 写道:
>
>> 2017-10-24 6:19 GMT+02:00 Leslie Zhai via llvm-dev <
>> llvm-dev at lists.llvm.org <mailto:llvm-dev at
lists.llvm.org>>:
>> > attributes #0 = { noinline nounwind *optnone *uwtable
>>
>> The optnone attribute (added by clang in -O0) says LLV? to not apply
any
>> transformation. Avoid with -Xclang -disable-O0-optnone
>>
>> Michael
>>
>>
> --
> Regards,
> Leslie Zhai - https://reviews.llvm.org/p/xiangzhai/
>
>
>
> _______________________________________________
> LLVM Developers mailing list
> llvm-dev at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
>-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20171026/30cfa42a/attachment-0001.html>

Leslie Zhai via llvm-dev

2017-Nov-01 03:45 UTC

head link

[llvm-dev] LLVM 6.0's LoopUnroll PASS is not able to work?

Hi Jatin,

Wrongly use -loop-simplify produce this issue 
https://github.com/epiqc/ScaffCC/issues/11


在 2017年10月27日 00:54, Jatin Bhateja 写道:> Hi Leslie,
>
> There is a disable unroll meta data (!llvm.loop !2) associated with 
> unCatN loop basic block , probaly in the source pragma clang loop 
> unroll (disable) was used before the loop.
>
> I tried removing that and used -unroll-count=4 both the catN and 
> uncatN were unrolled.
>
> Options :  -mem2reg -loops -loop-simplify -loop-rotate -lcssa 
> -loop-unroll -unroll-count=4 -sccp -simplifycfg -o /tmp/1  
> -debug-only=loop-unroll.
>
> Loop Unroll: F[catN] Loop %for.body
>   Loop Size = 17
> UNROLLING loop %for.body by 4!
> Loop Unroll: F[unCatN] Loop %for.body
>   Loop Size = 9
> UNROLLING loop %for.body by 4 with run-time trip count!
>
> Thanks,
> Jatin
>
> On Wed, Oct 25, 2017 at 10:17 AM, Leslie Zhai via llvm-dev 
> <llvm-dev at lists.llvm.org <mailto:llvm-dev at
lists.llvm.org>> wrote:
>
>     Hi Michael,
>
>     Dropped *optnone* still failed to unroll loops for this testcase:
>
>     $ cat cat_state.n044.ll
>     ; ModuleID = 'cat_state.n045a.ll'
>     source_filename = "cat_state.n04_merged.scaffold"
>     target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
>     target triple = "x86_64-unknown-linux-gnu"
>
>     ; Function Attrs: noinline nounwind uwtable
>     define void @catN(i16* %bit, i32 %n) local_unnamed_addr #0 {
>     entry:
>       %0 = load i16, i16* %bit, align 2
>       tail call void @llvm.H.i16(i16 %0)
>       %cmp1 = icmp sgt i32 %n, 1
>       br i1 %cmp1, label %for.body.lr.ph <http://for.body.lr.ph>,
>     label %for.end
>
>     for.body.lr.ph <http://for.body.lr.ph>: ; preds = %entry
>       %1 = add i32 %n, -1
>       %2 = add i32 %n, -2
>       %xtraiter = and i32 %1, 1
>       %3 = icmp ult i32 %2, 1
>       br i1 %3, label %for.cond.for.end_crit_edge.unr-lcssa, label
>     %for.body.lr.ph.new
>
>     for.body.lr.ph.new:                     ; preds = %for.body.lr.ph
>     <http://for.body.lr.ph>
>       %unroll_iter = sub i32 %1, %xtraiter
>       br label %for.body
>
>     for.body:                     ; preds = %for.body, %for.body.lr.ph.new
>       %inc3 = phi i32 [ 1, %for.body.lr.ph.new ], [ %inc.1, %for.body ]
>       %niter = phi i32 [ %unroll_iter, %for.body.lr.ph.new ], [
>     %niter.nsub.1, %for.body ]
>       %idxprom = sext i32 %inc3 to i64
>       %arrayidx1 = getelementptr inbounds i16, i16* %bit, i64 %idxprom
>       %4 = load i16, i16* %arrayidx1, align 2
>       %sub = add nsw i32 %inc3, -1
>       %idxprom2 = sext i32 %sub to i64
>       %arrayidx3 = getelementptr inbounds i16, i16* %bit, i64 %idxprom2
>       %5 = load i16, i16* %arrayidx3, align 2
>       tail call void @llvm.CNOT.i16.i16(i16 %4, i16 %5)
>       %inc = add nsw i32 %inc3, 1
>       %niter.nsub = sub i32 %niter, 1
>       %idxprom.1 = sext i32 %inc to i64
>       %arrayidx1.1 = getelementptr inbounds i16, i16* %bit, i64 %idxprom.1
>       %6 = load i16, i16* %arrayidx1.1, align 2
>       %idxprom2.1 = sext i32 %inc3 to i64
>       %arrayidx3.1 = getelementptr inbounds i16, i16* %bit, i64
>     %idxprom2.1
>       %7 = load i16, i16* %arrayidx3.1, align 2
>       tail call void @llvm.CNOT.i16.i16(i16 %6, i16 %7)
>       %inc.1 = add nsw i32 %inc, 1
>       %niter.nsub.1 = sub i32 %niter.nsub, 1
>       %niter.ncmp.1 = icmp ne i32 %niter.nsub.1, 0
>       br i1 %niter.ncmp.1, label %for.body, label
>     %for.cond.for.end_crit_edge.unr-lcssa
>
>     for.cond.for.end_crit_edge.unr-lcssa:             ; preds >    
%for.body, %for.body.lr.ph <http://for.body.lr.ph>
>       %inc3.unr = phi i32 [ 1, %for.body.lr.ph
<http://for.body.lr.ph>
>     ], [ %inc.1, %for.body ]
>       %lcmp.mod = icmp ne i32 %xtraiter, 0
>       br i1 %lcmp.mod, label %for.body.epil, label %for.end
>
>     for.body.epil:                     ; preds >    
%for.cond.for.end_crit_edge.unr-lcssa
>       %inc3.epil = phi i32 [ %inc3.unr,
>     %for.cond.for.end_crit_edge.unr-lcssa ]
>       %idxprom.epil = sext i32 %inc3.epil to i64
>       %arrayidx1.epil = getelementptr inbounds i16, i16* %bit, i64
>     %idxprom.epil
>       %8 = load i16, i16* %arrayidx1.epil, align 2
>       %sub.epil = add nsw i32 %inc3.epil, -1
>       %idxprom2.epil = sext i32 %sub.epil to i64
>       %arrayidx3.epil = getelementptr inbounds i16, i16* %bit, i64
>     %idxprom2.epil
>       %9 = load i16, i16* %arrayidx3.epil, align 2
>       tail call void @llvm.CNOT.i16.i16(i16 %8, i16 %9)
>       %inc.epil = add nsw i32 %inc3.epil, 1
>       %cmp.epil = icmp slt i32 %inc.epil, %n
>       br label %for.end
>
>     for.end:                     ; preds = %for.body.epil,
>     %for.cond.for.end_crit_edge.unr-lcssa, %entry
>       ret void
>     }
>
>     ; Function Attrs: nounwind
>     declare void @llvm.H.i16(i16) #1
>
>     ; Function Attrs: nounwind
>     declare void @llvm.CNOT.i16.i16(i16, i16) #1
>
>     ; Function Attrs: noinline nounwind uwtable
>     define void @unCatN(i16* %bit, i32 %n) local_unnamed_addr #0 {
>     entry:
>       %storemerge1 = add nsw i32 %n, -1
>       %cmp2 = icmp sgt i32 %n, 1
>       br i1 %cmp2, label %for.body.peel, label %for.end
>
>     for.body.peel:                     ; preds = %entry
>       %idxprom.peel = sext i32 %storemerge1 to i64
>       %arrayidx.peel = getelementptr inbounds i16, i16* %bit, i64
>     %idxprom.peel
>       %0 = load i16, i16* %arrayidx.peel, align 2
>       %sub1.peel = add nsw i32 %n, -2
>       %idxprom2.peel = sext i32 %sub1.peel to i64
>       %arrayidx3.peel = getelementptr inbounds i16, i16* %bit, i64
>     %idxprom2.peel
>       %1 = load i16, i16* %arrayidx3.peel, align 2
>       tail call void @llvm.CNOT.i16.i16(i16 %0, i16 %1)
>       %storemerge.peel = add nsw i32 %storemerge1, -1
>       %cmp.peel = icmp sgt i32 %storemerge1, 1
>       br i1 %cmp.peel, label %for.body.lr.ph.peel.newph, label %for.end
>
>     for.body.lr.ph.peel.newph:                     ; preds >    
%for.body.peel
>       br label %for.body
>
>     for.body:                     ; preds = %for.body,
>     %for.body.lr.ph.peel.newph
>       %storemerge5 = phi i32 [ %storemerge.peel,
>     %for.body.lr.ph.peel.newph ], [ %storemerge, %for.body ]
>       %storemerge.in3 = phi i32 [ %storemerge1,
>     %for.body.lr.ph.peel.newph ], [ %storemerge5, %for.body ]
>       %idxprom = sext i32 %storemerge5 to i64
>       %arrayidx = getelementptr inbounds i16, i16* %bit, i64 %idxprom
>       %2 = load i16, i16* %arrayidx, align 2
>       %sub1 = add nsw i32 %storemerge.in3, -2
>       %idxprom2 = sext i32 %sub1 to i64
>       %arrayidx3 = getelementptr inbounds i16, i16* %bit, i64 %idxprom2
>       %3 = load i16, i16* %arrayidx3, align 2
>       tail call void @llvm.CNOT.i16.i16(i16 %2, i16 %3)
>       %storemerge = add nsw i32 %storemerge5, -1
>       %cmp = icmp sgt i32 %storemerge5, 1
>       br i1 %cmp, label %for.body, label %for.end, !llvm.loop !2
>
>     for.end:                     ; preds = %for.body, %for.body.peel,
>     %entry
>       %.lcssa = phi i16* [ %bit, %entry ], [ %bit, %for.body.peel ], [
>     %bit, %for.body ]
>       %4 = load i16, i16* %.lcssa, align 2
>       tail call void @llvm.H.i16(i16 %4)
>       ret void
>     }
>
>     ; Function Attrs: noinline nounwind uwtable
>     define i32 @main() local_unnamed_addr #0 {
>     entry:
>       %bits = alloca [4 x i16], align 2
>       %arraydecay = getelementptr inbounds [4 x i16], [4 x i16]*
>     %bits, i64 0, i64 0
>       call void @catN_IP4_IPx_IPx_IPx_DPx_DPx_DPx_DPx(i16*
>     %arraydecay, i32 undef)
>       ret i32 0
>     }
>
>     define void @catN_IP4_IPx_IPx_IPx_DPx_DPx_DPx_DPx(i16* %bit, i32 %n) {
>     entry.:
>       %0 = load i16, i16* %bit, align 2
>       tail call void @llvm.H.i16(i16 %0)
>       %arrayidx1. = getelementptr inbounds i16, i16* %bit, i64 1
>     <https://maps.google.com/?q=i64+1&entry=gmail&source=g>
>       %1 = load i16, i16* %arrayidx1., align 2
>       %2 = load i16, i16* %bit, align 2
>       tail call void @llvm.CNOT.i16.i16(i16 %1, i16 %2)
>       %arrayidx1.1. = getelementptr inbounds i16, i16* %bit, i64 2
>     <https://maps.google.com/?q=i64+2&entry=gmail&source=g>
>       %3 = load i16, i16* %arrayidx1.1., align 2
>       %arrayidx3.1. = getelementptr inbounds i16, i16* %bit, i64 1
>     <https://maps.google.com/?q=i64+1&entry=gmail&source=g>
>       %4 = load i16, i16* %arrayidx3.1., align 2
>       tail call void @llvm.CNOT.i16.i16(i16 %3, i16 %4)
>       %arrayidx1.epil. = getelementptr inbounds i16, i16* %bit, i64 3
>     <https://maps.google.com/?q=i64+3&entry=gmail&source=g>
>       %5 = load i16, i16* %arrayidx1.epil., align 2
>       %arrayidx3.epil. = getelementptr inbounds i16, i16* %bit, i64 2
>     <https://maps.google.com/?q=i64+2&entry=gmail&source=g>
>       %6 = load i16, i16* %arrayidx3.epil., align 2
>       tail call void @llvm.CNOT.i16.i16(i16 %5, i16 %6)
>       ret void
>     }
>
>     attributes #0 = { noinline nounwind uwtable
>     "correctly-rounded-divide-sqrt-fp-math"="false"
>     "disable-tail-calls"="false"
"less-precise-fpmad"="false"
>     "no-frame-pointer-elim"="true"
"no-frame-pointer-elim-non-leaf"
>     "no-infs-fp-math"="false"
"no-jump-tables"="false"
>     "no-nans-fp-math"="false"
"no-signed-zeros-fp-math"="false"
>     "no-trapping-math"="false"
"stack-protector-buffer-size"="8"
>     "target-cpu"="x86-64"
>     "target-features"="+fxsr,+mmx,+sse,+sse2,+x87"
>     "unsafe-fp-math"="false"
"use-soft-float"="false" }
>     attributes #1 = { nounwind }
>
>     !llvm.module.flags = !{!0}
>     !llvm.ident = !{!1}
>
>     !0 = !{i32 1, !"wchar_size", i32 4}
>     !1 = !{!"clang version 6.0.0 (git at
github.com:llvm-mirror/clang.git
>     0aed123216ad4a38a9c2b16f1783895fd5cb1a04)
>     (git at github.com:llvm-mirror/llvm.git
>     d209b37aec1e392dabbf9b5324ea4a60c36fbc55)"}
>     !2 = distinct !{!2, !3}
>     !3 = !{!"llvm.loop.unroll.disable"}
>
>     $(OPT) -S cat_state.n044.ll -mem2reg -loops -loop-simplify
>     -loop-rotate -lcssa -loop-unroll -unroll-threshold=100000000 -sccp
>     -simplifycfg -o cat_state.n045.ll
>
>     $ cat cat_state.n045.ll
>     ; ModuleID = 'cat_state.n044.ll'
>     source_filename = "cat_state.n04_merged.scaffold"
>     target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
>     target triple = "x86_64-unknown-linux-gnu"
>
>     ; Function Attrs: noinline nounwind uwtable
>     define void @catN(i16* %bit, i32 %n) local_unnamed_addr #0 {
>     entry:
>       %0 = load i16, i16* %bit, align 2
>       tail call void @llvm.H.i16(i16 %0)
>       %cmp1 = icmp sgt i32 %n, 1
>       br i1 %cmp1, label %for.body.lr.ph <http://for.body.lr.ph>,
>     label %for.end
>
>     for.body.lr.ph <http://for.body.lr.ph>: ; preds = %entry
>       %1 = add i32 %n, -1
>       %2 = add i32 %n, -2
>       %xtraiter = and i32 %1, 1
>       %3 = icmp ult i32 %2, 1
>       br i1 %3, label %for.cond.for.end_crit_edge.unr-lcssa, label
>     %for.body.lr.ph.new
>
>     for.body.lr.ph.new:                     ; preds = %for.body.lr.ph
>     <http://for.body.lr.ph>
>       %unroll_iter = sub i32 %1, %xtraiter
>       br label %for.body
>
>     for.body:                     ; preds = %for.body, %for.body.lr.ph.new
>       %inc3 = phi i32 [ 1, %for.body.lr.ph.new ], [ %inc.1, %for.body ]
>       %niter = phi i32 [ %unroll_iter, %for.body.lr.ph.new ], [
>     %niter.nsub.1, %for.body ]
>       %idxprom = sext i32 %inc3 to i64
>       %arrayidx1 = getelementptr inbounds i16, i16* %bit, i64 %idxprom
>       %4 = load i16, i16* %arrayidx1, align 2
>       %sub = add nsw i32 %inc3, -1
>       %idxprom2 = sext i32 %sub to i64
>       %arrayidx3 = getelementptr inbounds i16, i16* %bit, i64 %idxprom2
>       %5 = load i16, i16* %arrayidx3, align 2
>       tail call void @llvm.CNOT.i16.i16(i16 %4, i16 %5)
>       %inc = add nsw i32 %inc3, 1
>       %niter.nsub = sub i32 %niter, 1
>       %idxprom.1 = sext i32 %inc to i64
>       %arrayidx1.1 = getelementptr inbounds i16, i16* %bit, i64 %idxprom.1
>       %6 = load i16, i16* %arrayidx1.1, align 2
>       %idxprom2.1 = sext i32 %inc3 to i64
>       %arrayidx3.1 = getelementptr inbounds i16, i16* %bit, i64
>     %idxprom2.1
>       %7 = load i16, i16* %arrayidx3.1, align 2
>       tail call void @llvm.CNOT.i16.i16(i16 %6, i16 %7)
>       %inc.1 = add nsw i32 %inc, 1
>       %niter.nsub.1 = sub i32 %niter.nsub, 1
>       %niter.ncmp.1 = icmp ne i32 %niter.nsub.1, 0
>       br i1 %niter.ncmp.1, label %for.body, label
>     %for.cond.for.end_crit_edge.unr-lcssa
>
>     for.cond.for.end_crit_edge.unr-lcssa:             ; preds >    
%for.body, %for.body.lr.ph <http://for.body.lr.ph>
>       %inc3.unr = phi i32 [ 1, %for.body.lr.ph
<http://for.body.lr.ph>
>     ], [ %inc.1, %for.body ]
>       %lcmp.mod = icmp ne i32 %xtraiter, 0
>       br i1 %lcmp.mod, label %for.body.epil, label %for.end
>
>     for.body.epil:                     ; preds >    
%for.cond.for.end_crit_edge.unr-lcssa
>       %inc3.epil = phi i32 [ %inc3.unr,
>     %for.cond.for.end_crit_edge.unr-lcssa ]
>       %idxprom.epil = sext i32 %inc3.epil to i64
>       %arrayidx1.epil = getelementptr inbounds i16, i16* %bit, i64
>     %idxprom.epil
>       %8 = load i16, i16* %arrayidx1.epil, align 2
>       %sub.epil = add nsw i32 %inc3.epil, -1
>       %idxprom2.epil = sext i32 %sub.epil to i64
>       %arrayidx3.epil = getelementptr inbounds i16, i16* %bit, i64
>     %idxprom2.epil
>       %9 = load i16, i16* %arrayidx3.epil, align 2
>       tail call void @llvm.CNOT.i16.i16(i16 %8, i16 %9)
>       %inc.epil = add nsw i32 %inc3.epil, 1
>       %cmp.epil = icmp slt i32 %inc.epil, %n
>       br label %for.end
>
>     for.end:                     ; preds = %for.body.epil,
>     %for.cond.for.end_crit_edge.unr-lcssa, %entry
>       ret void
>     }
>
>     ; Function Attrs: nounwind
>     declare void @llvm.H.i16(i16) #1
>
>     ; Function Attrs: nounwind
>     declare void @llvm.CNOT.i16.i16(i16, i16) #1
>
>     ; Function Attrs: noinline nounwind uwtable
>     define void @unCatN(i16* %bit, i32 %n) local_unnamed_addr #0 {
>     entry:
>       %storemerge1 = add nsw i32 %n, -1
>       %cmp2 = icmp sgt i32 %n, 1
>       br i1 %cmp2, label %for.body.peel, label %for.end
>
>     for.body.peel:                     ; preds = %entry
>       %idxprom.peel = sext i32 %storemerge1 to i64
>       %arrayidx.peel = getelementptr inbounds i16, i16* %bit, i64
>     %idxprom.peel
>       %0 = load i16, i16* %arrayidx.peel, align 2
>       %sub1.peel = add nsw i32 %n, -2
>       %idxprom2.peel = sext i32 %sub1.peel to i64
>       %arrayidx3.peel = getelementptr inbounds i16, i16* %bit, i64
>     %idxprom2.peel
>       %1 = load i16, i16* %arrayidx3.peel, align 2
>       tail call void @llvm.CNOT.i16.i16(i16 %0, i16 %1)
>       %storemerge.peel = add nsw i32 %storemerge1, -1
>       %cmp.peel = icmp sgt i32 %storemerge1, 1
>       br i1 %cmp.peel, label %for.body.lr.ph.peel.newph, label %for.end
>
>     for.body.lr.ph.peel.newph:                     ; preds >    
%for.body.peel
>       br label %for.body
>
>     for.body:                     ; preds = %for.body,
>     %for.body.lr.ph.peel.newph
>       %storemerge5 = phi i32 [ %storemerge.peel,
>     %for.body.lr.ph.peel.newph ], [ %storemerge, %for.body ]
>       %storemerge.in3 = phi i32 [ %storemerge1,
>     %for.body.lr.ph.peel.newph ], [ %storemerge5, %for.body ]
>       %idxprom = sext i32 %storemerge5 to i64
>       %arrayidx = getelementptr inbounds i16, i16* %bit, i64 %idxprom
>       %2 = load i16, i16* %arrayidx, align 2
>       %sub1 = add nsw i32 %storemerge.in3, -2
>       %idxprom2 = sext i32 %sub1 to i64
>       %arrayidx3 = getelementptr inbounds i16, i16* %bit, i64 %idxprom2
>       %3 = load i16, i16* %arrayidx3, align 2
>       tail call void @llvm.CNOT.i16.i16(i16 %2, i16 %3)
>       %storemerge = add nsw i32 %storemerge5, -1
>       %cmp = icmp sgt i32 %storemerge5, 1
>       br i1 %cmp, label %for.body, label %for.end, !llvm.loop !2
>
>     for.end:                     ; preds = %for.body, %for.body.peel,
>     %entry
>       %.lcssa = phi i16* [ %bit, %entry ], [ %bit, %for.body.peel ], [
>     %bit, %for.body ]
>       %4 = load i16, i16* %.lcssa, align 2
>       tail call void @llvm.H.i16(i16 %4)
>       ret void
>     }
>
>     ; Function Attrs: noinline nounwind uwtable
>     define i32 @main() local_unnamed_addr #0 {
>     entry:
>       %bits = alloca [4 x i16], align 2
>       %arraydecay = getelementptr inbounds [4 x i16], [4 x i16]*
>     %bits, i64 0, i64 0
>       call void @catN_IP4_IPx_IPx_IPx_DPx_DPx_DPx_DPx(i16*
>     %arraydecay, i32 undef)
>       ret i32 0
>     }
>
>     define void @catN_IP4_IPx_IPx_IPx_DPx_DPx_DPx_DPx(i16* %bit, i32 %n) {
>     entry.:
>       %0 = load i16, i16* %bit, align 2
>       tail call void @llvm.H.i16(i16 %0)
>       %arrayidx1. = getelementptr inbounds i16, i16* %bit, i64 1
>     <https://maps.google.com/?q=i64+1&entry=gmail&source=g>
>       %1 = load i16, i16* %arrayidx1., align 2
>       %2 = load i16, i16* %bit, align 2
>       tail call void @llvm.CNOT.i16.i16(i16 %1, i16 %2)
>       %arrayidx1.1. = getelementptr inbounds i16, i16* %bit, i64 2
>     <https://maps.google.com/?q=i64+2&entry=gmail&source=g>
>       %3 = load i16, i16* %arrayidx1.1., align 2
>       %arrayidx3.1. = getelementptr inbounds i16, i16* %bit, i64 1
>     <https://maps.google.com/?q=i64+1&entry=gmail&source=g>
>       %4 = load i16, i16* %arrayidx3.1., align 2
>       tail call void @llvm.CNOT.i16.i16(i16 %3, i16 %4)
>       %arrayidx1.epil. = getelementptr inbounds i16, i16* %bit, i64 3
>     <https://maps.google.com/?q=i64+3&entry=gmail&source=g>
>       %5 = load i16, i16* %arrayidx1.epil., align 2
>       %arrayidx3.epil. = getelementptr inbounds i16, i16* %bit, i64 2
>     <https://maps.google.com/?q=i64+2&entry=gmail&source=g>
>       %6 = load i16, i16* %arrayidx3.epil., align 2
>       tail call void @llvm.CNOT.i16.i16(i16 %5, i16 %6)
>       ret void
>     }
>
>     attributes #0 = { noinline nounwind uwtable
>     "correctly-rounded-divide-sqrt-fp-math"="false"
>     "disable-tail-calls"="false"
"less-precise-fpmad"="false"
>     "no-frame-pointer-elim"="true"
"no-frame-pointer-elim-non-leaf"
>     "no-infs-fp-math"="false"
"no-jump-tables"="false"
>     "no-nans-fp-math"="false"
"no-signed-zeros-fp-math"="false"
>     "no-trapping-math"="false"
"stack-protector-buffer-size"="8"
>     "target-cpu"="x86-64"
>     "target-features"="+fxsr,+mmx,+sse,+sse2,+x87"
>     "unsafe-fp-math"="false"
"use-soft-float"="false" }
>     attributes #1 = { nounwind }
>
>     !llvm.module.flags = !{!0}
>     !llvm.ident = !{!1}
>
>     !0 = !{i32 1, !"wchar_size", i32 4}
>     !1 = !{!"clang version 6.0.0 (git at
github.com:llvm-mirror/clang.git
>     0aed123216ad4a38a9c2b16f1783895fd5cb1a04)
>     (git at github.com:llvm-mirror/llvm.git
>     d209b37aec1e392dabbf9b5324ea4a60c36fbc55)"}
>     !2 = distinct !{!2, !3}
>     !3 = !{!"llvm.loop.unroll.disable"}
>
>
>     There are still for *loops* in catN and unCatN Functions,
>     workaround might be using GlobalDCE PASS towards cat_state.n045.ll
>     to remove !Live DeadFunctions.
>
>     Cat_State testcase:
>    
https://github.com/ScaffCC/ScaffCC/blob/master/Algorithms/Cat_State/cat_state.n04.scaffold
>    
<https://github.com/ScaffCC/ScaffCC/blob/master/Algorithms/Cat_State/cat_state.n04.scaffold>
>
>     Scaffold builtin gates:
>    
https://github.com/ScaffCC/scaff-clang/blob/master/include/clang/Basic/Builtins.def#L108
>    
<https://github.com/ScaffCC/scaff-clang/blob/master/include/clang/Basic/Builtins.def#L108>
>
>     Ali JavadiAbhari, Shruti Patil, Daniel Kudrow, Jeff Heckey, Alexey
>     Lvov, Frederic Chong and Margaret Martonosi, ScaffCC: A Framework
>     for Compilation and Analysis of Quantum Computing Programs, ACM
>     International Conference on Computing Frontiers (CF 2014),
>     Cagliari, Italy, May 2014
>
>
>     在 2017年10月24日 12:52, Michael Kruse 写道:
>
>         2017-10-24 6:19 GMT+02:00 Leslie Zhai via llvm-dev
>         <llvm-dev at lists.llvm.org <mailto:llvm-dev at
lists.llvm.org>
>         <mailto:llvm-dev at lists.llvm.org
>         <mailto:llvm-dev at lists.llvm.org>>>:
>         > attributes #0 = { noinline nounwind *optnone *uwtable
>
>         The optnone attribute (added by clang in -O0) says LLV? to not
>         apply any transformation. Avoid with -Xclang -disable-O0-optnone
>
>         Michael
>
>
>     -- 
>     Regards,
>     Leslie Zhai - https://reviews.llvm.org/p/xiangzhai/
>     <https://reviews.llvm.org/p/xiangzhai/>
>
>
>
>     _______________________________________________
>     LLVM Developers mailing list
>     llvm-dev at lists.llvm.org <mailto:llvm-dev at lists.llvm.org>
>     http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
>     <http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev>
>
>
-- 
Regards,
Leslie Zhai - https://reviews.llvm.org/p/xiangzhai/

llvm dev - Oct 2017 - LLVM 6.0's LoopUnroll PASS is not able to work?

[llvm-dev] LLVM 6.0's LoopUnroll PASS is not able to work?

[llvm-dev] LLVM 6.0's LoopUnroll PASS is not able to work?