Xiangyang Guo via llvm-dev
2015-Aug-22 14:27 UTC
[llvm-dev] loop unrolling introduces conditional branch
Hi, Mehdi, For example, I have this very simple source code: void foo( int n, int array_x[]) { for (int i=0; i < n; i++) array_x[i] = i; } After I use "clang -emit-llvm -o bc_from_clang.bc -c try.cc", I get bc_from_clang.bc. With my code (using LLVM IRbuilder API), I get bc_from_api.bc. Attachment please find thse two files. I also past the IR here. ******************************** Clang Generate IR Start *********************************************************** ; ModuleID = 'bc_from_clang.bc' target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" ; Function Attrs: nounwind uwtable define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 { %1 = alloca i32, align 4 %2 = alloca i32*, align 8 %i = alloca i32, align 4 store i32 %n, i32* %1, align 4 store i32* %array_x, i32** %2, align 8 store i32 0, i32* %i, align 4 br label %3 ; <label>:3 ; preds = %13, %0 %4 = load i32, i32* %i, align 4 %5 = load i32, i32* %1, align 4 %6 = icmp slt i32 %4, %5 br i1 %6, label %7, label %16 ; <label>:7 ; preds = %3 %8 = load i32, i32* %i, align 4 %9 = load i32, i32* %i, align 4 %10 = sext i32 %9 to i64 %11 = load i32*, i32** %2, align 8 %12 = getelementptr inbounds i32, i32* %11, i64 %10 store i32 %8, i32* %12, align 4 br label %13 ; <label>:13 ; preds = %7 %14 = load i32, i32* %i, align 4 %15 = add nsw i32 %14, 1 store i32 %15, i32* %i, align 4 br label %3 ; <label>:16 ; preds = %3 ret void } attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.ident = !{!0} !0 = !{!"clang version 3.8.0 (trunk 245730) (llvm/trunk 245727)"} ******************************** Clang Generate IR End *********************************************************** ******************************** API Generate IR Start *********************************************************** ; ModuleID = 'bc_from_api.bc' target triple = "x86_64-unkown-linux-gnu" ; Function Attrs: nounwind define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 { entry: %n.addr = alloca i32, align 4 %array_x.addr = alloca i32*, align 8 %i = alloca i32, align 4 store i32 %n, i32* %n.addr, align 4 store i32* %array_x, i32** %array_x.addr, align 8 store i32 0, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc, %entry %0 = load i32, i32* %i, align 4 %1 = load i32, i32* %n.addr, align 4 %cmp = icmp slt i32 %0, %1 br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond %2 = load i32, i32* %i, align 4 %3 = load i32, i32* %i, align 4 %idxprom = sext i32 %3 to i64 %4 = load i32*, i32** %array_x.addr, align 8 %arrayidx = getelementptr inbounds i32, i32* %4, i64 %idxprom store i32 %2, i32* %arrayidx, align 4 br label %for.inc for.inc: ; preds = %for.body %5 = load i32, i32* %i, align 4 %inc = add i32 %5, 1 store i32 %inc, i32* %i, align 4 br label %for.cond for.end: ; preds = %for.cond ret void } attributes #0 = { nounwind } ******************************** API Generate IR End *********************************************************** Then I use "opt file.bc -mem2reg -loops -loop-simplify -loop-rotate -lcssa -indvars -loop-unroll -unroll-count=4 -irce -simplifycfg -S" to run both .bc files. The first .bc file give me this: ***************************** Clang Generate IR with LoopUnrolling Start********************************************** ; ModuleID = 'bc_from_clang.bc' target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" ; Function Attrs: nounwind uwtable define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 { %1 = icmp slt i32 0, %n br i1 %1, label %.lr.ph, label %._crit_edge .lr.ph: ; preds = %0 %2 = add i32 %n, -1 %xtraiter = and i32 %n, 3 %lcmp.mod = icmp ne i32 %xtraiter, 0 br i1 %lcmp.mod, label %3, label %.lr.ph.split ; <label>:3 ; preds = %3, %.lr.ph %indvars.iv.prol = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.prol, %3 ] %prol.iter = phi i32 [ %xtraiter, %.lr.ph ], [ %prol.iter.sub, %3 ] %4 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.prol %5 = trunc i64 %indvars.iv.prol to i32 store i32 %5, i32* %4, align 4 %indvars.iv.next.prol = add nuw nsw i64 %indvars.iv.prol, 1 %lftr.wideiv.prol = trunc i64 %indvars.iv.next.prol to i32 %exitcond.prol = icmp ne i32 %lftr.wideiv.prol, %n %prol.iter.sub = sub i32 %prol.iter, 1 %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0 br i1 %prol.iter.cmp, label %3, label %.lr.ph.split, !llvm.loop !1 .lr.ph.split: ; preds = %3, %.lr.ph %indvars.iv.unr = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next.prol, %3 ] %6 = icmp ult i32 %2, 3 br i1 %6, label %._crit_edge, label %.lr.ph.split.split .lr.ph.split.split: ; preds = %.lr.ph.split, %.lr.ph.split.split %indvars.iv = phi i64 [ %indvars.iv.next.3, %.lr.ph.split.split ], [ %indvars.iv.unr, %.lr.ph.split ] %7 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv %8 = trunc i64 %indvars.iv to i32 store i32 %8, i32* %7, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %9 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next %10 = trunc i64 %indvars.iv.next to i32 store i32 %10, i32* %9, align 4 %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1 %lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32 %11 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next.1 %12 = trunc i64 %indvars.iv.next.1 to i32 store i32 %12, i32* %11, align 4 %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1 %lftr.wideiv.2 = trunc i64 %indvars.iv.next.2 to i32 %13 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next.2 %14 = trunc i64 %indvars.iv.next.2 to i32 store i32 %14, i32* %13, align 4 %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1 %lftr.wideiv.3 = trunc i64 %indvars.iv.next.3 to i32 %exitcond.3 = icmp ne i32 %lftr.wideiv.3, %n br i1 %exitcond.3, label %.lr.ph.split.split, label %._crit_edge ._crit_edge: ; preds = %.lr.ph.split, %.lr.ph.split.split, %0 ret void } attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.ident = !{!0} !0 = !{!"clang version 3.8.0 (trunk 245730) (llvm/trunk 245727)"} !1 = distinct !{!1, !2} !2 = !{!"llvm.loop.unroll.disable"} ******************************Clang Generate IR with LoopUnrolling End*********************************************** The second .bc file gives me this: ******************************API Generate IR with LoopUnrolling Start************************************************* ; ModuleID = 'bc_from_api.bc' target triple = "x86_64-unkown-linux-gnu" ; Function Attrs: nounwind define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 { entry: %cmp.1 = icmp slt i32 0, %n br i1 %cmp.1, label %for.body, label %for.end for.body: ; preds = %entry, %for.body.3 %i.02 = phi i32 [ %inc.3, %for.body.3 ], [ 0, %entry ] %idxprom = sext i32 %i.02 to i64 %arrayidx = getelementptr inbounds i32, i32* %array_x, i64 %idxprom store i32 %i.02, i32* %arrayidx, align 4 %inc = add nuw nsw i32 %i.02, 1 %cmp = icmp slt i32 %inc, %n br i1 %cmp, label %for.body.1, label %for.end for.end: ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry ret void for.body.1: ; preds = %for.body %idxprom.1 = sext i32 %inc to i64 %arrayidx.1 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.1 store i32 %inc, i32* %arrayidx.1, align 4 %inc.1 = add nuw nsw i32 %inc, 1 %cmp.1.3 = icmp slt i32 %inc.1, %n br i1 %cmp.1.3, label %for.body.2, label %for.end for.body.2: ; preds = %for.body.1 %idxprom.2 = sext i32 %inc.1 to i64 %arrayidx.2 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.2 store i32 %inc.1, i32* %arrayidx.2, align 4 %inc.2 = add nuw nsw i32 %inc.1, 1 %cmp.2 = icmp slt i32 %inc.2, %n br i1 %cmp.2, label %for.body.3, label %for.end for.body.3: ; preds = %for.body.2 %idxprom.3 = sext i32 %inc.2 to i64 %arrayidx.3 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.3 store i32 %inc.2, i32* %arrayidx.3, align 4 %inc.3 = add nuw nsw i32 %inc.2, 1 %cmp.3 = icmp slt i32 %inc.3, %n br i1 %cmp.3, label %for.body, label %for.end } attributes #0 = { nounwind } ******************************API Generate IR with LoopUnrolling End************************************************** Sorry I post two many code here. Can you give me any suggestion? Thanks -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150822/fd12e18f/attachment.html> -------------- next part -------------- A non-text attachment was scrubbed... Name: bc_from_clang.bc Type: application/octet-stream Size: 1260 bytes Desc: not available URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150822/fd12e18f/attachment.obj> -------------- next part -------------- A non-text attachment was scrubbed... Name: bc_from_api.bc Type: application/octet-stream Size: 788 bytes Desc: not available URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150822/fd12e18f/attachment-0001.obj>
Mehdi Amini via llvm-dev
2015-Aug-22 15:36 UTC
[llvm-dev] loop unrolling introduces conditional branch
> On Aug 22, 2015, at 7:27 AM, Xiangyang Guo <xguo6 at ncsu.edu> wrote: > > Hi, Mehdi, > > For example, I have this very simple source code: > void foo( int n, int array_x[]) > { > for (int i=0; i < n; i++) > array_x[i] = i; > } > > After I use "clang -emit-llvm -o bc_from_clang.bc -c try.cc", I get bc_from_clang.bc. With my code (using LLVM IRbuilder API), I get bc_from_api.bc. Attachment please find thse two files. I also past the IR here. > ******************************** Clang Generate IR Start *********************************************************** > ; ModuleID = 'bc_from_clang.bc' > target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" > target triple = "x86_64-unknown-linux-gnu" > > ; Function Attrs: nounwind uwtable > define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 { > %1 = alloca i32, align 4 > %2 = alloca i32*, align 8 > %i = alloca i32, align 4 > store i32 %n, i32* %1, align 4 > store i32* %array_x, i32** %2, align 8 > store i32 0, i32* %i, align 4 > br label %3 > > ; <label>:3 ; preds = %13, %0 > %4 = load i32, i32* %i, align 4 > %5 = load i32, i32* %1, align 4 > %6 = icmp slt i32 %4, %5 > br i1 %6, label %7, label %16 > > ; <label>:7 ; preds = %3 > %8 = load i32, i32* %i, align 4 > %9 = load i32, i32* %i, align 4 > %10 = sext i32 %9 to i64 > %11 = load i32*, i32** %2, align 8 > %12 = getelementptr inbounds i32, i32* %11, i64 %10 > store i32 %8, i32* %12, align 4 > br label %13 > > ; <label>:13 ; preds = %7 > %14 = load i32, i32* %i, align 4 > %15 = add nsw i32 %14, 1 > store i32 %15, i32* %i, align 4 > br label %3 > > ; <label>:16 ; preds = %3 > ret void > } > > attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } > > !llvm.ident = !{!0} > > !0 = !{!"clang version 3.8.0 (trunk 245730) (llvm/trunk 245727)"} > > ******************************** Clang Generate IR End *********************************************************** > > ******************************** API Generate IR Start *********************************************************** > ; ModuleID = 'bc_from_api.bc' > target triple = "x86_64-unkown-linux-gnu" > > ; Function Attrs: nounwind > define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 { > entry: > %n.addr = alloca i32, align 4 > %array_x.addr = alloca i32*, align 8 > %i = alloca i32, align 4 > store i32 %n, i32* %n.addr, align 4 > store i32* %array_x, i32** %array_x.addr, align 8 > store i32 0, i32* %i, align 4 > br label %for.cond > > for.cond: ; preds = %for.inc, %entry > %0 = load i32, i32* %i, align 4 > %1 = load i32, i32* %n.addr, align 4 > %cmp = icmp slt i32 %0, %1 > br i1 %cmp, label %for.body, label %for.end > > for.body: ; preds = %for.cond > %2 = load i32, i32* %i, align 4 > %3 = load i32, i32* %i, align 4 > %idxprom = sext i32 %3 to i64 > %4 = load i32*, i32** %array_x.addr, align 8 > %arrayidx = getelementptr inbounds i32, i32* %4, i64 %idxprom > store i32 %2, i32* %arrayidx, align 4 > br label %for.inc > > for.inc: ; preds = %for.body > %5 = load i32, i32* %i, align 4 > %inc = add i32 %5, 1 > store i32 %inc, i32* %i, align 4 > br label %for.cond > > for.end: ; preds = %for.cond > ret void > } > > attributes #0 = { nounwind } > > ******************************** API Generate IR End *********************************************************** > > Then I use "opt file.bc -mem2reg -loops -loop-simplify -loop-rotate -lcssa -indvars -loop-unroll -unroll-count=4 -irce -simplifycfg -S" to run both .bc files. > The first .bc file give me this: > > ***************************** Clang Generate IR with LoopUnrolling Start********************************************** > ; ModuleID = 'bc_from_clang.bc' > target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" > target triple = "x86_64-unknown-linux-gnu" > > ; Function Attrs: nounwind uwtable > define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 { > %1 = icmp slt i32 0, %n > br i1 %1, label %.lr.ph <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=>, label %._crit_edge > > .lr.ph <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=>: ; preds = %0 > %2 = add i32 %n, -1 > %xtraiter = and i32 %n, 3 > %lcmp.mod = icmp ne i32 %xtraiter, 0 > br i1 %lcmp.mod, label %3, label %.lr.ph.split > > ; <label>:3 ; preds = %3, %.lr.ph <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> > %indvars.iv.prol = phi i64 [ 0, %.lr.ph <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> ], [ %indvars.iv.next.prol, %3 ] > %prol.iter = phi i32 [ %xtraiter, %.lr.ph <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> ], [ %prol.iter.sub, %3 ] > %4 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.prol > %5 = trunc i64 %indvars.iv.prol to i32 > store i32 %5, i32* %4, align 4 > %indvars.iv.next.prol = add nuw nsw i64 %indvars.iv.prol, 1 > %lftr.wideiv.prol = trunc i64 %indvars.iv.next.prol to i32 > %exitcond.prol = icmp ne i32 %lftr.wideiv.prol, %n > %prol.iter.sub = sub i32 %prol.iter, 1 > %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0 > br i1 %prol.iter.cmp, label %3, label %.lr.ph.split, !llvm.loop !1 > > .lr.ph.split: ; preds = %3, %.lr.ph <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> > %indvars.iv.unr = phi i64 [ 0, %.lr.ph <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> ], [ %indvars.iv.next.prol, %3 ] > %6 = icmp ult i32 %2, 3 > br i1 %6, label %._crit_edge, label %.lr.ph.split.split > > .lr.ph.split.split: ; preds = %.lr.ph.split, %.lr.ph.split.split > %indvars.iv = phi i64 [ %indvars.iv.next.3, %.lr.ph.split.split ], [ %indvars.iv.unr, %.lr.ph.split ] > %7 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv > %8 = trunc i64 %indvars.iv to i32 > store i32 %8, i32* %7, align 4 > %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 > %lftr.wideiv = trunc i64 %indvars.iv.next to i32 > %9 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next > %10 = trunc i64 %indvars.iv.next to i32 > store i32 %10, i32* %9, align 4 > %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1 > %lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32 > %11 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next.1 > %12 = trunc i64 %indvars.iv.next.1 to i32 > store i32 %12, i32* %11, align 4 > %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1 > %lftr.wideiv.2 = trunc i64 %indvars.iv.next.2 to i32 > %13 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next.2 > %14 = trunc i64 %indvars.iv.next.2 to i32 > store i32 %14, i32* %13, align 4 > %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1 > %lftr.wideiv.3 = trunc i64 %indvars.iv.next.3 to i32 > %exitcond.3 = icmp ne i32 %lftr.wideiv.3, %n > br i1 %exitcond.3, label %.lr.ph.split.split, label %._crit_edge > > ._crit_edge: ; preds = %.lr.ph.split, %.lr.ph.split.split, %0 > ret void > } > > attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } > > !llvm.ident = !{!0} > > !0 = !{!"clang version 3.8.0 (trunk 245730) (llvm/trunk 245727)"} > !1 = distinct !{!1, !2} > !2 = !{!"llvm.loop.unroll.disable"} > > ******************************Clang Generate IR with LoopUnrolling End*********************************************** > > The second .bc file gives me this: > ******************************API Generate IR with LoopUnrolling Start************************************************* > ; ModuleID = 'bc_from_api.bc' > target triple = "x86_64-unkown-linux-gnu" > > ; Function Attrs: nounwind > define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 { > entry: > %cmp.1 = icmp slt i32 0, %n > br i1 %cmp.1, label %for.body, label %for.end > > for.body: ; preds = %entry, %for.body.3 > %i.02 = phi i32 [ %inc.3, %for.body.3 ], [ 0, %entry ] > %idxprom = sext i32 %i.02 to i64 > %arrayidx = getelementptr inbounds i32, i32* %array_x, i64 %idxprom > store i32 %i.02, i32* %arrayidx, align 4 > %inc = add nuw nsw i32 %i.02, 1 > %cmp = icmp slt i32 %inc, %n > br i1 %cmp, label %for.body.1, label %for.end > > for.end: ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry > ret void > > for.body.1: ; preds = %for.body > %idxprom.1 = sext i32 %inc to i64 > %arrayidx.1 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.1 > store i32 %inc, i32* %arrayidx.1, align 4 > %inc.1 = add nuw nsw i32 %inc, 1 > %cmp.1.3 = icmp slt i32 %inc.1, %n > br i1 %cmp.1.3, label %for.body.2, label %for.end > > for.body.2: ; preds = %for.body.1 > %idxprom.2 = sext i32 %inc.1 to i64 > %arrayidx.2 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.2 > store i32 %inc.1, i32* %arrayidx.2, align 4 > %inc.2 = add nuw nsw i32 %inc.1, 1 > %cmp.2 = icmp slt i32 %inc.2, %n > br i1 %cmp.2, label %for.body.3, label %for.end > > for.body.3: ; preds = %for.body.2 > %idxprom.3 = sext i32 %inc.2 to i64 > %arrayidx.3 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.3 > store i32 %inc.2, i32* %arrayidx.3, align 4 > %inc.3 = add nuw nsw i32 %inc.2, 1 > %cmp.3 = icmp slt i32 %inc.3, %n > br i1 %cmp.3, label %for.body, label %for.end > } > > attributes #0 = { nounwind } > ******************************API Generate IR with LoopUnrolling End************************************************** > > Sorry I post two many code here. Can you give me any suggestion?Yes, use an online service like pastebin :) You don’t have defined the DataLayout in the API cases, it should help to do so. — Mehdi -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150822/4cfa0a2a/attachment.html>
Xiangyang Guo via llvm-dev
2015-Aug-22 15:47 UTC
[llvm-dev] loop unrolling introduces conditional branch
Thanks for your point that out. I just add DataLayout in my code such as "mod->setDataLayout("e-m:e-i64:64-f80:128-n8:16:32:64-S128");", still no luck. I'm really confused about this. Do I need to add more passes before -loop-unroll? On Sat, Aug 22, 2015 at 11:36 AM, Mehdi Amini <mehdi.amini at apple.com> wrote:> > On Aug 22, 2015, at 7:27 AM, Xiangyang Guo <xguo6 at ncsu.edu> wrote: > > Hi, Mehdi, > > For example, I have this very simple source code: > void foo( int n, int array_x[]) > { > for (int i=0; i < n; i++) > array_x[i] = i; > } > > After I use "clang -emit-llvm -o bc_from_clang.bc -c try.cc", I get > bc_from_clang.bc. With my code (using LLVM IRbuilder API), I get > bc_from_api.bc. Attachment please find thse two files. I also past the IR > here. > ******************************** Clang Generate IR Start > *********************************************************** > ; ModuleID = 'bc_from_clang.bc' > target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" > target triple = "x86_64-unknown-linux-gnu" > > ; Function Attrs: nounwind uwtable > define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 { > %1 = alloca i32, align 4 > %2 = alloca i32*, align 8 > %i = alloca i32, align 4 > store i32 %n, i32* %1, align 4 > store i32* %array_x, i32** %2, align 8 > store i32 0, i32* %i, align 4 > br label %3 > > ; <label>:3 ; preds = %13, %0 > %4 = load i32, i32* %i, align 4 > %5 = load i32, i32* %1, align 4 > %6 = icmp slt i32 %4, %5 > br i1 %6, label %7, label %16 > > ; <label>:7 ; preds = %3 > %8 = load i32, i32* %i, align 4 > %9 = load i32, i32* %i, align 4 > %10 = sext i32 %9 to i64 > %11 = load i32*, i32** %2, align 8 > %12 = getelementptr inbounds i32, i32* %11, i64 %10 > store i32 %8, i32* %12, align 4 > br label %13 > > ; <label>:13 ; preds = %7 > %14 = load i32, i32* %i, align 4 > %15 = add nsw i32 %14, 1 > store i32 %15, i32* %i, align 4 > br label %3 > > ; <label>:16 ; preds = %3 > ret void > } > > attributes #0 = { nounwind uwtable "disable-tail-calls"="false" > "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" > "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" > "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" > "target-cpu"="x86-64" "target-features"="+sse,+sse2" > "unsafe-fp-math"="false" "use-soft-float"="false" } > > !llvm.ident = !{!0} > > !0 = !{!"clang version 3.8.0 (trunk 245730) (llvm/trunk 245727)"} > > ******************************** Clang Generate IR End > *********************************************************** > > ******************************** API Generate IR Start > *********************************************************** > ; ModuleID = 'bc_from_api.bc' > target triple = "x86_64-unkown-linux-gnu" > > ; Function Attrs: nounwind > define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 { > entry: > %n.addr = alloca i32, align 4 > %array_x.addr = alloca i32*, align 8 > %i = alloca i32, align 4 > store i32 %n, i32* %n.addr, align 4 > store i32* %array_x, i32** %array_x.addr, align 8 > store i32 0, i32* %i, align 4 > br label %for.cond > > for.cond: ; preds = %for.inc, > %entry > %0 = load i32, i32* %i, align 4 > %1 = load i32, i32* %n.addr, align 4 > %cmp = icmp slt i32 %0, %1 > br i1 %cmp, label %for.body, label %for.end > > for.body: ; preds = %for.cond > %2 = load i32, i32* %i, align 4 > %3 = load i32, i32* %i, align 4 > %idxprom = sext i32 %3 to i64 > %4 = load i32*, i32** %array_x.addr, align 8 > %arrayidx = getelementptr inbounds i32, i32* %4, i64 %idxprom > store i32 %2, i32* %arrayidx, align 4 > br label %for.inc > > for.inc: ; preds = %for.body > %5 = load i32, i32* %i, align 4 > %inc = add i32 %5, 1 > store i32 %inc, i32* %i, align 4 > br label %for.cond > > for.end: ; preds = %for.cond > ret void > } > > attributes #0 = { nounwind } > > ******************************** API Generate IR End > *********************************************************** > > Then I use "opt file.bc -mem2reg -loops -loop-simplify -loop-rotate -lcssa > -indvars -loop-unroll -unroll-count=4 -irce -simplifycfg -S" to run both > .bc files. > The first .bc file give me this: > > ***************************** Clang Generate IR with LoopUnrolling > Start********************************************** > ; ModuleID = 'bc_from_clang.bc' > target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" > target triple = "x86_64-unknown-linux-gnu" > > ; Function Attrs: nounwind uwtable > define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 { > %1 = icmp slt i32 0, %n > br i1 %1, label %.lr.ph > <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=>, > label %._crit_edge > > .lr.ph > <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=>: > ; preds = %0 > %2 = add i32 %n, -1 > %xtraiter = and i32 %n, 3 > %lcmp.mod = icmp ne i32 %xtraiter, 0 > br i1 %lcmp.mod, label %3, label %.lr.ph.split > > ; <label>:3 ; preds = %3, %.lr.ph > <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> > %indvars.iv.prol = phi i64 [ 0, %.lr.ph > <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> > ], [ %indvars.iv.next.prol, %3 ] > %prol.iter = phi i32 [ %xtraiter, %.lr.ph > <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> > ], [ %prol.iter.sub, %3 ] > %4 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.prol > %5 = trunc i64 %indvars.iv.prol to i32 > store i32 %5, i32* %4, align 4 > %indvars.iv.next.prol = add nuw nsw i64 %indvars.iv.prol, 1 > %lftr.wideiv.prol = trunc i64 %indvars.iv.next.prol to i32 > %exitcond.prol = icmp ne i32 %lftr.wideiv.prol, %n > %prol.iter.sub = sub i32 %prol.iter, 1 > %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0 > br i1 %prol.iter.cmp, label %3, label %.lr.ph.split, !llvm.loop !1 > > .lr.ph.split: ; preds = %3, %.lr.ph > <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> > %indvars.iv.unr = phi i64 [ 0, %.lr.ph > <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> > ], [ %indvars.iv.next.prol, %3 ] > %6 = icmp ult i32 %2, 3 > br i1 %6, label %._crit_edge, label %.lr.ph.split.split > > .lr.ph.split.split: ; preds = %.lr.ph.split, > %.lr.ph.split.split > %indvars.iv = phi i64 [ %indvars.iv.next.3, %.lr.ph.split.split ], [ > %indvars.iv.unr, %.lr.ph.split ] > %7 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv > %8 = trunc i64 %indvars.iv to i32 > store i32 %8, i32* %7, align 4 > %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 > %lftr.wideiv = trunc i64 %indvars.iv.next to i32 > %9 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next > %10 = trunc i64 %indvars.iv.next to i32 > store i32 %10, i32* %9, align 4 > %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1 > %lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32 > %11 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next.1 > %12 = trunc i64 %indvars.iv.next.1 to i32 > store i32 %12, i32* %11, align 4 > %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1 > %lftr.wideiv.2 = trunc i64 %indvars.iv.next.2 to i32 > %13 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next.2 > %14 = trunc i64 %indvars.iv.next.2 to i32 > store i32 %14, i32* %13, align 4 > %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1 > %lftr.wideiv.3 = trunc i64 %indvars.iv.next.3 to i32 > %exitcond.3 = icmp ne i32 %lftr.wideiv.3, %n > br i1 %exitcond.3, label %.lr.ph.split.split, label %._crit_edge > > ._crit_edge: ; preds = %.lr.ph.split, > %.lr.ph.split.split, %0 > ret void > } > > attributes #0 = { nounwind uwtable "disable-tail-calls"="false" > "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" > "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" > "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" > "target-cpu"="x86-64" "target-features"="+sse,+sse2" > "unsafe-fp-math"="false" "use-soft-float"="false" } > > !llvm.ident = !{!0} > > !0 = !{!"clang version 3.8.0 (trunk 245730) (llvm/trunk 245727)"} > !1 = distinct !{!1, !2} > !2 = !{!"llvm.loop.unroll.disable"} > > ******************************Clang Generate IR with LoopUnrolling > End*********************************************** > > The second .bc file gives me this: > ******************************API Generate IR with LoopUnrolling > Start************************************************* > ; ModuleID = 'bc_from_api.bc' > target triple = "x86_64-unkown-linux-gnu" > > ; Function Attrs: nounwind > define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 { > entry: > %cmp.1 = icmp slt i32 0, %n > br i1 %cmp.1, label %for.body, label %for.end > > for.body: ; preds = %entry, > %for.body.3 > %i.02 = phi i32 [ %inc.3, %for.body.3 ], [ 0, %entry ] > %idxprom = sext i32 %i.02 to i64 > %arrayidx = getelementptr inbounds i32, i32* %array_x, i64 %idxprom > store i32 %i.02, i32* %arrayidx, align 4 > %inc = add nuw nsw i32 %i.02, 1 > %cmp = icmp slt i32 %inc, %n > br i1 %cmp, label %for.body.1, label %for.end > > for.end: ; preds = %for.body, > %for.body.1, %for.body.2, %for.body.3, %entry > ret void > > for.body.1: ; preds = %for.body > %idxprom.1 = sext i32 %inc to i64 > %arrayidx.1 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.1 > store i32 %inc, i32* %arrayidx.1, align 4 > %inc.1 = add nuw nsw i32 %inc, 1 > %cmp.1.3 = icmp slt i32 %inc.1, %n > br i1 %cmp.1.3, label %for.body.2, label %for.end > > for.body.2: ; preds = %for.body.1 > %idxprom.2 = sext i32 %inc.1 to i64 > %arrayidx.2 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.2 > store i32 %inc.1, i32* %arrayidx.2, align 4 > %inc.2 = add nuw nsw i32 %inc.1, 1 > %cmp.2 = icmp slt i32 %inc.2, %n > br i1 %cmp.2, label %for.body.3, label %for.end > > for.body.3: ; preds = %for.body.2 > %idxprom.3 = sext i32 %inc.2 to i64 > %arrayidx.3 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.3 > store i32 %inc.2, i32* %arrayidx.3, align 4 > %inc.3 = add nuw nsw i32 %inc.2, 1 > %cmp.3 = icmp slt i32 %inc.3, %n > br i1 %cmp.3, label %for.body, label %for.end > } > > attributes #0 = { nounwind } > ******************************API Generate IR with LoopUnrolling > End************************************************** > > Sorry I post two many code here. Can you give me any suggestion? > > > Yes, use an online service like pastebin :) > > You don’t have defined the DataLayout in the API cases, it should help to > do so. > > — > Mehdi > >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150822/43bb8ab8/attachment-0001.html>
Xiangyang Guo via llvm-dev
2015-Aug-22 15:53 UTC
[llvm-dev] loop unrolling introduces conditional branch
Actually, My code to generate the IR is really simple. I attach the code here for reference. I appreciate it if you can give me any suggestion about this loop-unrolling thing. Regards, Xiangyang On Sat, Aug 22, 2015 at 11:36 AM, Mehdi Amini <mehdi.amini at apple.com> wrote:> > On Aug 22, 2015, at 7:27 AM, Xiangyang Guo <xguo6 at ncsu.edu> wrote: > > Hi, Mehdi, > > For example, I have this very simple source code: > void foo( int n, int array_x[]) > { > for (int i=0; i < n; i++) > array_x[i] = i; > } > > After I use "clang -emit-llvm -o bc_from_clang.bc -c try.cc", I get > bc_from_clang.bc. With my code (using LLVM IRbuilder API), I get > bc_from_api.bc. Attachment please find thse two files. I also past the IR > here. > ******************************** Clang Generate IR Start > *********************************************************** > ; ModuleID = 'bc_from_clang.bc' > target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" > target triple = "x86_64-unknown-linux-gnu" > > ; Function Attrs: nounwind uwtable > define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 { > %1 = alloca i32, align 4 > %2 = alloca i32*, align 8 > %i = alloca i32, align 4 > store i32 %n, i32* %1, align 4 > store i32* %array_x, i32** %2, align 8 > store i32 0, i32* %i, align 4 > br label %3 > > ; <label>:3 ; preds = %13, %0 > %4 = load i32, i32* %i, align 4 > %5 = load i32, i32* %1, align 4 > %6 = icmp slt i32 %4, %5 > br i1 %6, label %7, label %16 > > ; <label>:7 ; preds = %3 > %8 = load i32, i32* %i, align 4 > %9 = load i32, i32* %i, align 4 > %10 = sext i32 %9 to i64 > %11 = load i32*, i32** %2, align 8 > %12 = getelementptr inbounds i32, i32* %11, i64 %10 > store i32 %8, i32* %12, align 4 > br label %13 > > ; <label>:13 ; preds = %7 > %14 = load i32, i32* %i, align 4 > %15 = add nsw i32 %14, 1 > store i32 %15, i32* %i, align 4 > br label %3 > > ; <label>:16 ; preds = %3 > ret void > } > > attributes #0 = { nounwind uwtable "disable-tail-calls"="false" > "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" > "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" > "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" > "target-cpu"="x86-64" "target-features"="+sse,+sse2" > "unsafe-fp-math"="false" "use-soft-float"="false" } > > !llvm.ident = !{!0} > > !0 = !{!"clang version 3.8.0 (trunk 245730) (llvm/trunk 245727)"} > > ******************************** Clang Generate IR End > *********************************************************** > > ******************************** API Generate IR Start > *********************************************************** > ; ModuleID = 'bc_from_api.bc' > target triple = "x86_64-unkown-linux-gnu" > > ; Function Attrs: nounwind > define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 { > entry: > %n.addr = alloca i32, align 4 > %array_x.addr = alloca i32*, align 8 > %i = alloca i32, align 4 > store i32 %n, i32* %n.addr, align 4 > store i32* %array_x, i32** %array_x.addr, align 8 > store i32 0, i32* %i, align 4 > br label %for.cond > > for.cond: ; preds = %for.inc, > %entry > %0 = load i32, i32* %i, align 4 > %1 = load i32, i32* %n.addr, align 4 > %cmp = icmp slt i32 %0, %1 > br i1 %cmp, label %for.body, label %for.end > > for.body: ; preds = %for.cond > %2 = load i32, i32* %i, align 4 > %3 = load i32, i32* %i, align 4 > %idxprom = sext i32 %3 to i64 > %4 = load i32*, i32** %array_x.addr, align 8 > %arrayidx = getelementptr inbounds i32, i32* %4, i64 %idxprom > store i32 %2, i32* %arrayidx, align 4 > br label %for.inc > > for.inc: ; preds = %for.body > %5 = load i32, i32* %i, align 4 > %inc = add i32 %5, 1 > store i32 %inc, i32* %i, align 4 > br label %for.cond > > for.end: ; preds = %for.cond > ret void > } > > attributes #0 = { nounwind } > > ******************************** API Generate IR End > *********************************************************** > > Then I use "opt file.bc -mem2reg -loops -loop-simplify -loop-rotate -lcssa > -indvars -loop-unroll -unroll-count=4 -irce -simplifycfg -S" to run both > .bc files. > The first .bc file give me this: > > ***************************** Clang Generate IR with LoopUnrolling > Start********************************************** > ; ModuleID = 'bc_from_clang.bc' > target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" > target triple = "x86_64-unknown-linux-gnu" > > ; Function Attrs: nounwind uwtable > define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 { > %1 = icmp slt i32 0, %n > br i1 %1, label %.lr.ph > <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=>, > label %._crit_edge > > .lr.ph > <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=>: > ; preds = %0 > %2 = add i32 %n, -1 > %xtraiter = and i32 %n, 3 > %lcmp.mod = icmp ne i32 %xtraiter, 0 > br i1 %lcmp.mod, label %3, label %.lr.ph.split > > ; <label>:3 ; preds = %3, %.lr.ph > <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> > %indvars.iv.prol = phi i64 [ 0, %.lr.ph > <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> > ], [ %indvars.iv.next.prol, %3 ] > %prol.iter = phi i32 [ %xtraiter, %.lr.ph > <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> > ], [ %prol.iter.sub, %3 ] > %4 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.prol > %5 = trunc i64 %indvars.iv.prol to i32 > store i32 %5, i32* %4, align 4 > %indvars.iv.next.prol = add nuw nsw i64 %indvars.iv.prol, 1 > %lftr.wideiv.prol = trunc i64 %indvars.iv.next.prol to i32 > %exitcond.prol = icmp ne i32 %lftr.wideiv.prol, %n > %prol.iter.sub = sub i32 %prol.iter, 1 > %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0 > br i1 %prol.iter.cmp, label %3, label %.lr.ph.split, !llvm.loop !1 > > .lr.ph.split: ; preds = %3, %.lr.ph > <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> > %indvars.iv.unr = phi i64 [ 0, %.lr.ph > <https://urldefense.proofpoint.com/v2/url?u=http-3A__lr.ph&d=BQMFaQ&c=eEvniauFctOgLOKGJOplqw&r=v-ruWq0KCv2O3thJZiK6naxuXK8mQHZUmGq5FBtAmZ4&m=9pD2qNHBiPJscAs-ZzWT7q61NeZNcwsY8Io52JnaI3E&s=OaVJWcn8TI03qp9lgqeWMIVMySoyBIPowkHDEyR7lvQ&e=> > ], [ %indvars.iv.next.prol, %3 ] > %6 = icmp ult i32 %2, 3 > br i1 %6, label %._crit_edge, label %.lr.ph.split.split > > .lr.ph.split.split: ; preds = %.lr.ph.split, > %.lr.ph.split.split > %indvars.iv = phi i64 [ %indvars.iv.next.3, %.lr.ph.split.split ], [ > %indvars.iv.unr, %.lr.ph.split ] > %7 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv > %8 = trunc i64 %indvars.iv to i32 > store i32 %8, i32* %7, align 4 > %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 > %lftr.wideiv = trunc i64 %indvars.iv.next to i32 > %9 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next > %10 = trunc i64 %indvars.iv.next to i32 > store i32 %10, i32* %9, align 4 > %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1 > %lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32 > %11 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next.1 > %12 = trunc i64 %indvars.iv.next.1 to i32 > store i32 %12, i32* %11, align 4 > %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1 > %lftr.wideiv.2 = trunc i64 %indvars.iv.next.2 to i32 > %13 = getelementptr inbounds i32, i32* %array_x, i64 %indvars.iv.next.2 > %14 = trunc i64 %indvars.iv.next.2 to i32 > store i32 %14, i32* %13, align 4 > %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1 > %lftr.wideiv.3 = trunc i64 %indvars.iv.next.3 to i32 > %exitcond.3 = icmp ne i32 %lftr.wideiv.3, %n > br i1 %exitcond.3, label %.lr.ph.split.split, label %._crit_edge > > ._crit_edge: ; preds = %.lr.ph.split, > %.lr.ph.split.split, %0 > ret void > } > > attributes #0 = { nounwind uwtable "disable-tail-calls"="false" > "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" > "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" > "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" > "target-cpu"="x86-64" "target-features"="+sse,+sse2" > "unsafe-fp-math"="false" "use-soft-float"="false" } > > !llvm.ident = !{!0} > > !0 = !{!"clang version 3.8.0 (trunk 245730) (llvm/trunk 245727)"} > !1 = distinct !{!1, !2} > !2 = !{!"llvm.loop.unroll.disable"} > > ******************************Clang Generate IR with LoopUnrolling > End*********************************************** > > The second .bc file gives me this: > ******************************API Generate IR with LoopUnrolling > Start************************************************* > ; ModuleID = 'bc_from_api.bc' > target triple = "x86_64-unkown-linux-gnu" > > ; Function Attrs: nounwind > define void @_Z3fooiPi(i32 %n, i32* %array_x) #0 { > entry: > %cmp.1 = icmp slt i32 0, %n > br i1 %cmp.1, label %for.body, label %for.end > > for.body: ; preds = %entry, > %for.body.3 > %i.02 = phi i32 [ %inc.3, %for.body.3 ], [ 0, %entry ] > %idxprom = sext i32 %i.02 to i64 > %arrayidx = getelementptr inbounds i32, i32* %array_x, i64 %idxprom > store i32 %i.02, i32* %arrayidx, align 4 > %inc = add nuw nsw i32 %i.02, 1 > %cmp = icmp slt i32 %inc, %n > br i1 %cmp, label %for.body.1, label %for.end > > for.end: ; preds = %for.body, > %for.body.1, %for.body.2, %for.body.3, %entry > ret void > > for.body.1: ; preds = %for.body > %idxprom.1 = sext i32 %inc to i64 > %arrayidx.1 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.1 > store i32 %inc, i32* %arrayidx.1, align 4 > %inc.1 = add nuw nsw i32 %inc, 1 > %cmp.1.3 = icmp slt i32 %inc.1, %n > br i1 %cmp.1.3, label %for.body.2, label %for.end > > for.body.2: ; preds = %for.body.1 > %idxprom.2 = sext i32 %inc.1 to i64 > %arrayidx.2 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.2 > store i32 %inc.1, i32* %arrayidx.2, align 4 > %inc.2 = add nuw nsw i32 %inc.1, 1 > %cmp.2 = icmp slt i32 %inc.2, %n > br i1 %cmp.2, label %for.body.3, label %for.end > > for.body.3: ; preds = %for.body.2 > %idxprom.3 = sext i32 %inc.2 to i64 > %arrayidx.3 = getelementptr inbounds i32, i32* %array_x, i64 %idxprom.3 > store i32 %inc.2, i32* %arrayidx.3, align 4 > %inc.3 = add nuw nsw i32 %inc.2, 1 > %cmp.3 = icmp slt i32 %inc.3, %n > br i1 %cmp.3, label %for.body, label %for.end > } > > attributes #0 = { nounwind } > ******************************API Generate IR with LoopUnrolling > End************************************************** > > Sorry I post two many code here. Can you give me any suggestion? > > > Yes, use an online service like pastebin :) > > You don’t have defined the DataLayout in the API cases, it should help to > do so. > > — > Mehdi > >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150822/98e074c9/attachment.html> -------------- next part -------------- A non-text attachment was scrubbed... Name: main.cpp Type: text/x-c++src Size: 7082 bytes Desc: not available URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150822/98e074c9/attachment.cpp>