Nat! via llvm-dev
2016-Feb-11 16:58 UTC
[llvm-dev] Expected constant simplification not happening
Hi the appended IR code does not optimize to my liking :) this is the interesting part in x86_64, that got produced via clang -Os: --- movq -16(%r12), %rax movl -4(%rax), %ecx andl $2298949, %ecx ## imm = 0x231445 cmpq $2298949, (%rax,%rcx) ## imm = 0x231445 leaq 8(%rax,%rcx), %rax cmovneq %r15, %rax movl $2298949, %esi ## imm = 0x231445 movq %r12, %rdi movq %r14, %rdx callq *(%rax) --- and clang -O3: --- movq -16(%r12), %rax movl -4(%rax), %ecx andl $2298949, %ecx ## imm = 0x231445 cmpl $2298949, (%rax,%rcx) ## imm = 0x231445 jne LBB1_4 leaq 8(%rax,%rcx), %rax jmp LBB1_5 .align 4, 0x90 LBB1_4: movq %r15, %rax LBB1_5: movl $2298949, %esi ## imm = 0x231445 movq %r12, %rdi movq %r14, %rdx callq *(%rax) --- As you can see in both cases the constant $2298949 is replicated 3 times. I would have expected something like the following code at least for -Os: --- movq -16(%r12), %rax movl $2298949, %esi ### **** move on up movl -4(%rax), %ecx andl %esi, %ecx ### cmpl %esi, (%rax,%rcx) ### leaq 8(%rax,%rcx), %rax cmovneq %r15, %rax movq %r12, %rdi movq %r14, %rdx callq *(%rax) --- It is much shorter (33 bytes vs. 42 bytes) and I would assume at least the same speed or better. This is with llvm 3.7.0. And yes this pains me at the moment :) Ciao Nat! ---- ; ModuleID = 'optimize-fail.c' target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" %struct._foo = type {} %struct._entry = type { i32, i32, i8* (%struct._foo*, i32, i8*)* } %struct._table = type { i64, i32, i32, [1 x %struct._entry] } %struct.test = type { %struct.__foo, i32 } %struct.__foo = type { %struct._dispatch } %struct._dispatch = type { %struct._entry*, i8* (%struct._foo*, i32, i8*)* } @str = private unnamed_addr constant [8 x i8] c"table_f\00" @str.2 = private unnamed_addr constant [11 x i8] c"dispatch_f\00" ; Function Attrs: nounwind declare void @llvm.lifetime.start(i64, i8* nocapture) #1 ; Function Attrs: nounwind declare void @llvm.lifetime.end(i64, i8* nocapture) #1 ; Function Attrs: noinline nounwind ssp uwtable define i8* @foo(%struct._foo* %obj, i32 %unused, i8* %value) #2 { entry: %tobool.i = icmp eq %struct._foo* %obj, null %0 = bitcast %struct._foo* %obj to i8* %arrayidx.i.i = getelementptr inbounds i8, i8* %0, i64 -16 %entries2.i = bitcast i8* %arrayidx.i.i to %struct._entry** %f7.i = getelementptr inbounds i8, i8* %0, i64 -8 br i1 %tobool.i, label %for.end, label %call.exit.preheader call.exit.preheader: ; preds = %entry br label %call.exit call.exit: ; preds = %call.exit.preheader, %call.exit %i.04 = phi i32 [ %inc, %call.exit ], [ 0, %call.exit.preheader ] %1 = load %struct._entry*, %struct._entry** %entries2.i, align 8, !tbaa !2 %arrayidx1.i.i = getelementptr inbounds %struct._entry, %struct._entry* %1, i64 -1 %2 = bitcast %struct._entry* %arrayidx1.i.i to %struct._table* %mask4.i = getelementptr inbounds %struct._table, %struct._table* %2, i64 0, i32 2 %3 = load i32, i32* %mask4.i, align 4, !tbaa !7 %and.i = and i32 %3, 2298949 %idxprom.i = zext i32 %and.i to i64 %4 = bitcast %struct._entry* %1 to i8* %arrayidx.i = getelementptr inbounds i8, i8* %4, i64 %idxprom.i %key5.i = bitcast i8* %arrayidx.i to i32* %5 = load i32, i32* %key5.i, align 4, !tbaa !11 %cmp.i = icmp eq i32 %5, 2298949 %f6.i = getelementptr inbounds i8, i8* %arrayidx.i, i64 8 %cond.in.v.i = select i1 %cmp.i, i8* %f6.i, i8* %f7.i %cond.in.i = bitcast i8* %cond.in.v.i to i8* (%struct._foo*, i32, i8*)** %cond.i = load i8* (%struct._foo*, i32, i8*)*, i8* (%struct._foo*, i32, i8*)** %cond.in.i, align 8 %call8.i = tail call i8* %cond.i(%struct._foo* %obj, i32 2298949, i8* %value) #1 %inc = add nuw nsw i32 %i.04, 1 %exitcond = icmp eq i32 %inc, 100 br i1 %exitcond, label %for.end.loopexit, label %call.exit for.end.loopexit: ; preds = %call.exit %call8.i.lcssa = phi i8* [ %call8.i, %call.exit ] br label %for.end for.end: ; preds = %for.end.loopexit, %entry %rval.0.lcssa = phi i8* [ %0, %entry ], [ %call8.i.lcssa, %for.end.loopexit ] ret i8* %rval.0.lcssa } attributes #1 = { nounwind } attributes #2 = { noinline nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.module.flags = !{!0} !llvm.ident = !{!1} ---
Sanjay Patel via llvm-dev
2016-Feb-11 23:02 UTC
[llvm-dev] Expected constant simplification not happening
[cc'ing Zia] We have this transform with -Os for some cases after: http://reviews.llvm.org/rL244601 http://reviews.llvm.org/D11363 but something in this example is causing the transform to not trigger. I filed a related bug here: https://llvm.org/bugs/show_bug.cgi?id=24448 If you can file your test case(s) in a bug report, that would be the best way to track progress on solving it. Thanks! On Thu, Feb 11, 2016 at 9:58 AM, Nat! via llvm-dev <llvm-dev at lists.llvm.org> wrote:> Hi > > the appended IR code does not optimize to my liking :) > > this is the interesting part in x86_64, that got produced via clang -Os: > --- > movq -16(%r12), %rax > movl -4(%rax), %ecx > andl $2298949, %ecx ## imm = 0x231445 > cmpq $2298949, (%rax,%rcx) ## imm = 0x231445 > leaq 8(%rax,%rcx), %rax > cmovneq %r15, %rax > movl $2298949, %esi ## imm = 0x231445 > movq %r12, %rdi > movq %r14, %rdx > callq *(%rax) > --- > > > and clang -O3: > --- > movq -16(%r12), %rax > movl -4(%rax), %ecx > andl $2298949, %ecx ## imm = 0x231445 > cmpl $2298949, (%rax,%rcx) ## imm = 0x231445 > jne LBB1_4 > leaq 8(%rax,%rcx), %rax > jmp LBB1_5 > .align 4, 0x90 > LBB1_4: > movq %r15, %rax > LBB1_5: > movl $2298949, %esi ## imm = 0x231445 > movq %r12, %rdi > movq %r14, %rdx > callq *(%rax) > --- > > As you can see in both cases the constant $2298949 is replicated 3 times. > I would have expected something like the following code at least for -Os: > > --- > movq -16(%r12), %rax > movl $2298949, %esi ### **** move on up > movl -4(%rax), %ecx > andl %esi, %ecx ### > cmpl %esi, (%rax,%rcx) ### > leaq 8(%rax,%rcx), %rax > cmovneq %r15, %rax > movq %r12, %rdi > movq %r14, %rdx > callq *(%rax) > --- > > It is much shorter (33 bytes vs. 42 bytes) and I would assume at least the > same speed or better. This is with llvm 3.7.0. And yes this pains me at the > moment :) > > Ciao > Nat! > ---- > ; ModuleID = 'optimize-fail.c' > target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" > target triple = "x86_64-apple-macosx10.10.0" > > %struct._foo = type {} > %struct._entry = type { i32, i32, i8* (%struct._foo*, i32, i8*)* } > %struct._table = type { i64, i32, i32, [1 x %struct._entry] } > %struct.test = type { %struct.__foo, i32 } > %struct.__foo = type { %struct._dispatch } > %struct._dispatch = type { %struct._entry*, i8* (%struct._foo*, i32, i8*)* > } > > @str = private unnamed_addr constant [8 x i8] c"table_f\00" > @str.2 = private unnamed_addr constant [11 x i8] c"dispatch_f\00" > > ; Function Attrs: nounwind > declare void @llvm.lifetime.start(i64, i8* nocapture) #1 > > ; Function Attrs: nounwind > declare void @llvm.lifetime.end(i64, i8* nocapture) #1 > > ; Function Attrs: noinline nounwind ssp uwtable > define i8* @foo(%struct._foo* %obj, i32 %unused, i8* %value) #2 { > entry: > %tobool.i = icmp eq %struct._foo* %obj, null > %0 = bitcast %struct._foo* %obj to i8* > %arrayidx.i.i = getelementptr inbounds i8, i8* %0, i64 -16 > %entries2.i = bitcast i8* %arrayidx.i.i to %struct._entry** > %f7.i = getelementptr inbounds i8, i8* %0, i64 -8 > br i1 %tobool.i, label %for.end, label %call.exit.preheader > > call.exit.preheader: ; preds = %entry > br label %call.exit > > call.exit: ; preds > %call.exit.preheader, %call.exit > %i.04 = phi i32 [ %inc, %call.exit ], [ 0, %call.exit.preheader ] > %1 = load %struct._entry*, %struct._entry** %entries2.i, align 8, !tbaa > !2 > %arrayidx1.i.i = getelementptr inbounds %struct._entry, %struct._entry* > %1, i64 -1 > %2 = bitcast %struct._entry* %arrayidx1.i.i to %struct._table* > %mask4.i = getelementptr inbounds %struct._table, %struct._table* %2, > i64 0, i32 2 > %3 = load i32, i32* %mask4.i, align 4, !tbaa !7 > %and.i = and i32 %3, 2298949 > %idxprom.i = zext i32 %and.i to i64 > %4 = bitcast %struct._entry* %1 to i8* > %arrayidx.i = getelementptr inbounds i8, i8* %4, i64 %idxprom.i > %key5.i = bitcast i8* %arrayidx.i to i32* > %5 = load i32, i32* %key5.i, align 4, !tbaa !11 > %cmp.i = icmp eq i32 %5, 2298949 > %f6.i = getelementptr inbounds i8, i8* %arrayidx.i, i64 8 > %cond.in.v.i = select i1 %cmp.i, i8* %f6.i, i8* %f7.i > %cond.in.i = bitcast i8* %cond.in.v.i to i8* (%struct._foo*, i32, i8*)** > %cond.i = load i8* (%struct._foo*, i32, i8*)*, i8* (%struct._foo*, i32, > i8*)** %cond.in.i, align 8 > %call8.i = tail call i8* %cond.i(%struct._foo* %obj, i32 2298949, i8* > %value) #1 > %inc = add nuw nsw i32 %i.04, 1 > %exitcond = icmp eq i32 %inc, 100 > br i1 %exitcond, label %for.end.loopexit, label %call.exit > > for.end.loopexit: ; preds = %call.exit > %call8.i.lcssa = phi i8* [ %call8.i, %call.exit ] > br label %for.end > > for.end: ; preds > %for.end.loopexit, %entry > %rval.0.lcssa = phi i8* [ %0, %entry ], [ %call8.i.lcssa, > %for.end.loopexit ] > ret i8* %rval.0.lcssa > } > > > attributes #1 = { nounwind } > attributes #2 = { noinline nounwind ssp uwtable > "disable-tail-calls"="false" "less-precise-fpmad"="false" > "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" > "no-infs-fp-math"="false" "no-nans-fp-math"="false" > "stack-protector-buffer-size"="8" "target-cpu"="core2" > "target-features"="+cx16,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" > "use-soft-float"="false" } > > !llvm.module.flags = !{!0} > !llvm.ident = !{!1} > > --- > > _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20160211/2181720a/attachment.html>
Ansari, Zia via llvm-dev
2016-Feb-12 19:50 UTC
[llvm-dev] Expected constant simplification not happening
I took a quick look at this and relaxed the conditions under which we prevent immediate subsumption (specifically, optsize and instruction shape), and I managed to merge 2 of those immediates. I need to dig in a little more to see why it didn’t catch the third. I suspect a phase ordering issue. I’ll take a closer look at this as soon as I get a chance. If you could please attach a testcase to the bug report, I’d appreciate it. Thanks, Zia. From: Sanjay Patel [mailto:spatel at rotateright.com] Sent: Thursday, February 11, 2016 3:02 PM To: Nat! <nat at mulle-kybernetik.com> Cc: llvm-dev <llvm-dev at lists.llvm.org>; Ansari, Zia <zia.ansari at intel.com> Subject: Re: [llvm-dev] Expected constant simplification not happening [cc'ing Zia] We have this transform with -Os for some cases after: http://reviews.llvm.org/rL244601 http://reviews.llvm.org/D11363 but something in this example is causing the transform to not trigger. I filed a related bug here: https://llvm.org/bugs/show_bug.cgi?id=24448 If you can file your test case(s) in a bug report, that would be the best way to track progress on solving it. Thanks! On Thu, Feb 11, 2016 at 9:58 AM, Nat! via llvm-dev <llvm-dev at lists.llvm.org<mailto:llvm-dev at lists.llvm.org>> wrote: Hi the appended IR code does not optimize to my liking :) this is the interesting part in x86_64, that got produced via clang -Os: --- movq -16(%r12), %rax movl -4(%rax), %ecx andl $2298949, %ecx ## imm = 0x231445 cmpq $2298949, (%rax,%rcx) ## imm = 0x231445 leaq 8(%rax,%rcx), %rax cmovneq %r15, %rax movl $2298949, %esi ## imm = 0x231445 movq %r12, %rdi movq %r14, %rdx callq *(%rax) --- and clang -O3: --- movq -16(%r12), %rax movl -4(%rax), %ecx andl $2298949, %ecx ## imm = 0x231445 cmpl $2298949, (%rax,%rcx) ## imm = 0x231445 jne LBB1_4 leaq 8(%rax,%rcx), %rax jmp LBB1_5 .align 4, 0x90 LBB1_4: movq %r15, %rax LBB1_5: movl $2298949, %esi ## imm = 0x231445 movq %r12, %rdi movq %r14, %rdx callq *(%rax) --- As you can see in both cases the constant $2298949 is replicated 3 times. I would have expected something like the following code at least for -Os: --- movq -16(%r12), %rax movl $2298949, %esi ### **** move on up movl -4(%rax), %ecx andl %esi, %ecx ### cmpl %esi, (%rax,%rcx) ### leaq 8(%rax,%rcx), %rax cmovneq %r15, %rax movq %r12, %rdi movq %r14, %rdx callq *(%rax) --- It is much shorter (33 bytes vs. 42 bytes) and I would assume at least the same speed or better. This is with llvm 3.7.0. And yes this pains me at the moment :) Ciao Nat! ---- ; ModuleID = 'optimize-fail.c' target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" %struct._foo = type {} %struct._entry = type { i32, i32, i8* (%struct._foo*, i32, i8*)* } %struct._table = type { i64, i32, i32, [1 x %struct._entry] } %struct.test = type { %struct.__foo, i32 } %struct.__foo = type { %struct._dispatch } %struct._dispatch = type { %struct._entry*, i8* (%struct._foo*, i32, i8*)* } @str = private unnamed_addr constant [8 x i8] c"table_f\00" @str.2 = private unnamed_addr constant [11 x i8] c"dispatch_f\00" ; Function Attrs: nounwind declare void @llvm.lifetime.start(i64, i8* nocapture) #1 ; Function Attrs: nounwind declare void @llvm.lifetime.end(i64, i8* nocapture) #1 ; Function Attrs: noinline nounwind ssp uwtable define i8* @foo(%struct._foo* %obj, i32 %unused, i8* %value) #2 { entry: %tobool.i = icmp eq %struct._foo* %obj, null %0 = bitcast %struct._foo* %obj to i8* %arrayidx.i.i = getelementptr inbounds i8, i8* %0, i64 -16 %entries2.i = bitcast i8* %arrayidx.i.i to %struct._entry** %f7.i = getelementptr inbounds i8, i8* %0, i64 -8 br i1 %tobool.i, label %for.end, label %call.exit.preheader call.exit.preheader: ; preds = %entry br label %call.exit call.exit: ; preds = %call.exit.preheader, %call.exit %i.04 = phi i32 [ %inc, %call.exit ], [ 0, %call.exit.preheader ] %1 = load %struct._entry*, %struct._entry** %entries2.i, align 8, !tbaa !2 %arrayidx1.i.i = getelementptr inbounds %struct._entry, %struct._entry* %1, i64 -1 %2 = bitcast %struct._entry* %arrayidx1.i.i to %struct._table* %mask4.i = getelementptr inbounds %struct._table, %struct._table* %2, i64 0, i32 2 %3 = load i32, i32* %mask4.i, align 4, !tbaa !7 %and.i = and i32 %3, 2298949 %idxprom.i = zext i32 %and.i to i64 %4 = bitcast %struct._entry* %1 to i8* %arrayidx.i = getelementptr inbounds i8, i8* %4, i64 %idxprom.i %key5.i = bitcast i8* %arrayidx.i to i32* %5 = load i32, i32* %key5.i, align 4, !tbaa !11 %cmp.i = icmp eq i32 %5, 2298949 %f6.i = getelementptr inbounds i8, i8* %arrayidx.i, i64 8 %cond.in.v.i = select i1 %cmp.i, i8* %f6.i, i8* %f7.i %cond.in.i = bitcast i8* %cond.in.v.i to i8* (%struct._foo*, i32, i8*)** %cond.i = load i8* (%struct._foo*, i32, i8*)*, i8* (%struct._foo*, i32, i8*)** %cond.in.i, align 8 %call8.i = tail call i8* %cond.i(%struct._foo* %obj, i32 2298949, i8* %value) #1 %inc = add nuw nsw i32 %i.04, 1 %exitcond = icmp eq i32 %inc, 100 br i1 %exitcond, label %for.end.loopexit, label %call.exit for.end.loopexit: ; preds = %call.exit %call8.i.lcssa = phi i8* [ %call8.i, %call.exit ] br label %for.end for.end: ; preds = %for.end.loopexit, %entry %rval.0.lcssa = phi i8* [ %0, %entry ], [ %call8.i.lcssa, %for.end.loopexit ] ret i8* %rval.0.lcssa } attributes #1 = { nounwind } attributes #2 = { noinline nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.module.flags = !{!0} !llvm.ident = !{!1} --- _______________________________________________ LLVM Developers mailing list llvm-dev at lists.llvm.org<mailto:llvm-dev at lists.llvm.org> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20160212/7a5d1fdf/attachment.html>
Nat! via llvm-dev
2016-Dec-07 17:13 UTC
[llvm-dev] Expected constant simplification not happening
Hello Has there been any progress on this topic ? The 3.9 optimizer output is still the same as I just looked. https://llvm.org/bugs/show_bug.cgi?id=24448 Ciao Nat! Sanjay Patel schrieb:> [cc'ing Zia] > > We have this transform with -Os for some cases after: > http://reviews.llvm.org/rL244601 > http://reviews.llvm.org/D11363 > > but something in this example is causing the transform to not trigger. > > I filed a related bug here: > https://llvm.org/bugs/show_bug.cgi?id=24448 > > If you can file your test case(s) in a bug report, that would be the > best way to track progress on solving it. Thanks! > > > On Thu, Feb 11, 2016 at 9:58 AM, Nat! via llvm-dev > <llvm-dev at lists.llvm.org <mailto:llvm-dev at lists.llvm.org>> wrote: > > Hi > > the appended IR code does not optimize to my liking :) > > this is the interesting part in x86_64, that got produced via clang -Os: > --- > movq -16(%r12), %rax > movl -4(%rax), %ecx > andl $2298949, %ecx ## imm = 0x231445 > cmpq $2298949, (%rax,%rcx) ## imm = 0x231445 > leaq 8(%rax,%rcx), %rax > cmovneq %r15, %rax > movl $2298949, %esi ## imm = 0x231445 > movq %r12, %rdi > movq %r14, %rdx > callq *(%rax) > --- > > > and clang -O3: > --- > movq -16(%r12), %rax > movl -4(%rax), %ecx > andl $2298949, %ecx ## imm = 0x231445 > cmpl $2298949, (%rax,%rcx) ## imm = 0x231445 > jne LBB1_4 > leaq 8(%rax,%rcx), %rax > jmp LBB1_5 > .align 4, 0x90 > LBB1_4: > movq %r15, %rax > LBB1_5: > movl $2298949, %esi ## imm = 0x231445 > movq %r12, %rdi > movq %r14, %rdx > callq *(%rax) > --- > > As you can see in both cases the constant $2298949 is replicated 3 > times. I would have expected something like the following code at > least for -Os: > > --- > movq -16(%r12), %rax > movl $2298949, %esi ### **** move on up > movl -4(%rax), %ecx > andl %esi, %ecx ### > cmpl %esi, (%rax,%rcx) ### > leaq 8(%rax,%rcx), %rax > cmovneq %r15, %rax > movq %r12, %rdi > movq %r14, %rdx > callq *(%rax) > --- > > It is much shorter (33 bytes vs. 42 bytes) and I would assume at > least the same speed or better. This is with llvm 3.7.0. And yes > this pains me at the moment :) > > Ciao > Nat! > ---- > ; ModuleID = 'optimize-fail.c' > target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" > target triple = "x86_64-apple-macosx10.10.0" > > %struct._foo = type {} > %struct._entry = type { i32, i32, i8* (%struct._foo*, i32, i8*)* } > %struct._table = type { i64, i32, i32, [1 x %struct._entry] } > %struct.test = type { %struct.__foo, i32 } > %struct.__foo = type { %struct._dispatch } > %struct._dispatch = type { %struct._entry*, i8* (%struct._foo*, i32, > i8*)* } > > @str = private unnamed_addr constant [8 x i8] c"table_f\00" > @str.2 = private unnamed_addr constant [11 x i8] c"dispatch_f\00" > > ; Function Attrs: nounwind > declare void @llvm.lifetime.start(i64, i8* nocapture) #1 > > ; Function Attrs: nounwind > declare void @llvm.lifetime.end(i64, i8* nocapture) #1 > > ; Function Attrs: noinline nounwind ssp uwtable > define i8* @foo(%struct._foo* %obj, i32 %unused, i8* %value) #2 { > entry: > %tobool.i = icmp eq %struct._foo* %obj, null > %0 = bitcast %struct._foo* %obj to i8* > %arrayidx.i.i = getelementptr inbounds i8, i8* %0, i64 -16 > %entries2.i = bitcast i8* %arrayidx.i.i to %struct._entry** > %f7.i = getelementptr inbounds i8, i8* %0, i64 -8 > br i1 %tobool.i, label %for.end, label %call.exit.preheader > > call.exit.preheader: ; preds = %entry > br label %call.exit > > call.exit: ; preds > %call.exit.preheader, %call.exit > %i.04 = phi i32 [ %inc, %call.exit ], [ 0, %call.exit.preheader ] > %1 = load %struct._entry*, %struct._entry** %entries2.i, align 8, > !tbaa !2 > %arrayidx1.i.i = getelementptr inbounds %struct._entry, > %struct._entry* %1, i64 -1 > %2 = bitcast %struct._entry* %arrayidx1.i.i to %struct._table* > %mask4.i = getelementptr inbounds %struct._table, %struct._table* > %2, i64 0, i32 2 > %3 = load i32, i32* %mask4.i, align 4, !tbaa !7 > %and.i = and i32 %3, 2298949 > %idxprom.i = zext i32 %and.i to i64 > %4 = bitcast %struct._entry* %1 to i8* > %arrayidx.i = getelementptr inbounds i8, i8* %4, i64 %idxprom.i > %key5.i = bitcast i8* %arrayidx.i to i32* > %5 = load i32, i32* %key5.i, align 4, !tbaa !11 > %cmp.i = icmp eq i32 %5, 2298949 > %f6.i = getelementptr inbounds i8, i8* %arrayidx.i, i64 8 > %cond.in.v.i = select i1 %cmp.i, i8* %f6.i, i8* %f7.i > %cond.in.i = bitcast i8* %cond.in.v.i to i8* (%struct._foo*, i32, > i8*)** > %cond.i = load i8* (%struct._foo*, i32, i8*)*, i8* > (%struct._foo*, i32, i8*)** %cond.in.i, align 8 > %call8.i = tail call i8* %cond.i(%struct._foo* %obj, i32 2298949, > i8* %value) #1 > %inc = add nuw nsw i32 %i.04, 1 > %exitcond = icmp eq i32 %inc, 100 > br i1 %exitcond, label %for.end.loopexit, label %call.exit > > for.end.loopexit: ; preds = %call.exit > %call8.i.lcssa = phi i8* [ %call8.i, %call.exit ] > br label %for.end > > for.end: ; preds > %for.end.loopexit, %entry > %rval.0.lcssa = phi i8* [ %0, %entry ], [ %call8.i.lcssa, > %for.end.loopexit ] > ret i8* %rval.0.lcssa > } > > > attributes #1 = { nounwind } > attributes #2 = { noinline nounwind ssp uwtable > "disable-tail-calls"="false" "less-precise-fpmad"="false" > "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" > "no-infs-fp-math"="false" "no-nans-fp-math"="false" > "stack-protector-buffer-size"="8" "target-cpu"="core2" > "target-features"="+cx16,+sse,+sse2,+sse3,+ssse3" > "unsafe-fp-math"="false" "use-soft-float"="false" } > > !llvm.module.flags = !{!0} > !llvm.ident = !{!1} > > --- > > _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org <mailto:llvm-dev at lists.llvm.org> > http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev > >
Reasonably Related Threads
- Expected constant simplification not happening
- [LLVMdev] Need a clue to improve the optimization of some C code
- [LLVMdev] Need a clue to improve the optimization of some C code
- Help required regarding IPRA and Local Function optimization
- Rather poor code optimisation of current clang/LLVM targeting Intel x86 (both -64 and -32)