thr3ads.net - llvm dev - [llvm-dev] Expected constant simplification not happening [Dec 2016]

If this information is useful, please help other people find it:
Share via:

Nat! via llvm-dev

2016-Feb-11 16:58 UTC

[llvm-dev] Expected constant simplification not happening

Hi

the appended IR code does not optimize to my liking :)

this is the interesting part in x86_64, that got produced via clang -Os:
---
	movq	-16(%r12), %rax
	movl	-4(%rax), %ecx
	andl	$2298949, %ecx          ## imm = 0x231445
	cmpq	$2298949, (%rax,%rcx)   ## imm = 0x231445
	leaq	8(%rax,%rcx), %rax
	cmovneq	%r15, %rax			
	movl	$2298949, %esi          ## imm = 0x231445
	movq	%r12, %rdi
	movq	%r14, %rdx
	callq	*(%rax)
---


and clang -O3:
---
	movq	-16(%r12), %rax
	movl	-4(%rax), %ecx
	andl	$2298949, %ecx          ## imm = 0x231445
	cmpl	$2298949, (%rax,%rcx)   ## imm = 0x231445
	jne	LBB1_4
	leaq	8(%rax,%rcx), %rax
	jmp	LBB1_5
	.align	4, 0x90
LBB1_4:
	movq	%r15, %rax
LBB1_5:
	movl	$2298949, %esi          ## imm = 0x231445
	movq	%r12, %rdi
	movq	%r14, %rdx
	callq	*(%rax)
---

As you can see in both cases the constant $2298949 is replicated 3 
times. I would have expected something like the following code at least 
for -Os:

---
	movq	-16(%r12), %rax
	movl	$2298949, %esi          ### **** move on up	
	movl	-4(%rax), %ecx
	andl	%esi, %ecx         	###
	cmpl	%esi, (%rax,%rcx)   	###
	leaq	8(%rax,%rcx), %rax
	cmovneq	%r15, %rax			
	movq	%r12, %rdi
	movq	%r14, %rdx
	callq	*(%rax)
---

It is much shorter (33 bytes vs. 42 bytes) and I would assume at least 
the same speed or better. This is with llvm 3.7.0. And yes this pains me 
at the moment :)

Ciao
    Nat!
----
; ModuleID = 'optimize-fail.c'
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"

%struct._foo = type {}
%struct._entry = type { i32, i32, i8* (%struct._foo*, i32, i8*)* }
%struct._table = type { i64, i32, i32, [1 x %struct._entry] }
%struct.test = type { %struct.__foo, i32 }
%struct.__foo = type { %struct._dispatch }
%struct._dispatch = type { %struct._entry*, i8* (%struct._foo*, i32, i8*)* }

@str = private unnamed_addr constant [8 x i8] c"table_f\00"
@str.2 = private unnamed_addr constant [11 x i8] c"dispatch_f\00"

; Function Attrs: nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) #1

; Function Attrs: nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) #1

; Function Attrs: noinline nounwind ssp uwtable
define i8* @foo(%struct._foo* %obj, i32 %unused, i8* %value) #2 {
entry:
   %tobool.i = icmp eq %struct._foo* %obj, null
   %0 = bitcast %struct._foo* %obj to i8*
   %arrayidx.i.i = getelementptr inbounds i8, i8* %0, i64 -16
   %entries2.i = bitcast i8* %arrayidx.i.i to %struct._entry**
   %f7.i = getelementptr inbounds i8, i8* %0, i64 -8
   br i1 %tobool.i, label %for.end, label %call.exit.preheader

call.exit.preheader:                              ; preds = %entry
   br label %call.exit

call.exit:                                        ; preds = 
%call.exit.preheader, %call.exit
   %i.04 = phi i32 [ %inc, %call.exit ], [ 0, %call.exit.preheader ]
   %1 = load %struct._entry*, %struct._entry** %entries2.i, align 8, 
!tbaa !2
   %arrayidx1.i.i = getelementptr inbounds %struct._entry, 
%struct._entry* %1, i64 -1
   %2 = bitcast %struct._entry* %arrayidx1.i.i to %struct._table*
   %mask4.i = getelementptr inbounds %struct._table, %struct._table* %2, 
i64 0, i32 2
   %3 = load i32, i32* %mask4.i, align 4, !tbaa !7
   %and.i = and i32 %3, 2298949
   %idxprom.i = zext i32 %and.i to i64
   %4 = bitcast %struct._entry* %1 to i8*
   %arrayidx.i = getelementptr inbounds i8, i8* %4, i64 %idxprom.i
   %key5.i = bitcast i8* %arrayidx.i to i32*
   %5 = load i32, i32* %key5.i, align 4, !tbaa !11
   %cmp.i = icmp eq i32 %5, 2298949
   %f6.i = getelementptr inbounds i8, i8* %arrayidx.i, i64 8
   %cond.in.v.i = select i1 %cmp.i, i8* %f6.i, i8* %f7.i
   %cond.in.i = bitcast i8* %cond.in.v.i to i8* (%struct._foo*, i32, i8*)**
   %cond.i = load i8* (%struct._foo*, i32, i8*)*, i8* (%struct._foo*, 
i32, i8*)** %cond.in.i, align 8
   %call8.i = tail call i8* %cond.i(%struct._foo* %obj, i32 2298949, i8* 
%value) #1
   %inc = add nuw nsw i32 %i.04, 1
   %exitcond = icmp eq i32 %inc, 100
   br i1 %exitcond, label %for.end.loopexit, label %call.exit

for.end.loopexit:                                 ; preds = %call.exit
   %call8.i.lcssa = phi i8* [ %call8.i, %call.exit ]
   br label %for.end

for.end:                                          ; preds = 
%for.end.loopexit, %entry
   %rval.0.lcssa = phi i8* [ %0, %entry ], [ %call8.i.lcssa, 
%for.end.loopexit ]
   ret i8* %rval.0.lcssa
}


attributes #1 = { nounwind }
attributes #2 = { noinline nounwind ssp uwtable 
"disable-tail-calls"="false"
"less-precise-fpmad"="false"
"no-frame-pointer-elim"="true"
"no-frame-pointer-elim-non-leaf"
"no-infs-fp-math"="false"
"no-nans-fp-math"="false"
"stack-protector-buffer-size"="8"
"target-cpu"="core2"
"target-features"="+cx16,+sse,+sse2,+sse3,+ssse3" 
"unsafe-fp-math"="false"
"use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

---

Sanjay Patel via llvm-dev

2016-Feb-11 23:02 UTC

head link

[llvm-dev] Expected constant simplification not happening

[cc'ing Zia]

We have this transform with -Os for some cases after:
http://reviews.llvm.org/rL244601
http://reviews.llvm.org/D11363

but something in this example is causing the transform to not trigger.

I filed a related bug here:
https://llvm.org/bugs/show_bug.cgi?id=24448

If you can file your test case(s) in a bug report, that would be the best
way to track progress on solving it. Thanks!


On Thu, Feb 11, 2016 at 9:58 AM, Nat! via llvm-dev <llvm-dev at
lists.llvm.org>
wrote:
> Hi
>
> the appended IR code does not optimize to my liking :)
>
> this is the interesting part in x86_64, that got produced via clang -Os:
> ---
>         movq    -16(%r12), %rax
>         movl    -4(%rax), %ecx
>         andl    $2298949, %ecx          ## imm = 0x231445
>         cmpq    $2298949, (%rax,%rcx)   ## imm = 0x231445
>         leaq    8(%rax,%rcx), %rax
>         cmovneq %r15, %rax
>         movl    $2298949, %esi          ## imm = 0x231445
>         movq    %r12, %rdi
>         movq    %r14, %rdx
>         callq   *(%rax)
> ---
>
>
> and clang -O3:
> ---
>         movq    -16(%r12), %rax
>         movl    -4(%rax), %ecx
>         andl    $2298949, %ecx          ## imm = 0x231445
>         cmpl    $2298949, (%rax,%rcx)   ## imm = 0x231445
>         jne     LBB1_4
>         leaq    8(%rax,%rcx), %rax
>         jmp     LBB1_5
>         .align  4, 0x90
> LBB1_4:
>         movq    %r15, %rax
> LBB1_5:
>         movl    $2298949, %esi          ## imm = 0x231445
>         movq    %r12, %rdi
>         movq    %r14, %rdx
>         callq   *(%rax)
> ---
>
> As you can see in both cases the constant $2298949 is replicated 3 times.
> I would have expected something like the following code at least for -Os:
>
> ---
>         movq    -16(%r12), %rax
>         movl    $2298949, %esi          ### **** move on up
>         movl    -4(%rax), %ecx
>         andl    %esi, %ecx              ###
>         cmpl    %esi, (%rax,%rcx)       ###
>         leaq    8(%rax,%rcx), %rax
>         cmovneq %r15, %rax
>         movq    %r12, %rdi
>         movq    %r14, %rdx
>         callq   *(%rax)
> ---
>
> It is much shorter (33 bytes vs. 42 bytes) and I would assume at least the
> same speed or better. This is with llvm 3.7.0. And yes this pains me at the
> moment :)
>
> Ciao
>    Nat!
> ----
> ; ModuleID = 'optimize-fail.c'
> target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-apple-macosx10.10.0"
>
> %struct._foo = type {}
> %struct._entry = type { i32, i32, i8* (%struct._foo*, i32, i8*)* }
> %struct._table = type { i64, i32, i32, [1 x %struct._entry] }
> %struct.test = type { %struct.__foo, i32 }
> %struct.__foo = type { %struct._dispatch }
> %struct._dispatch = type { %struct._entry*, i8* (%struct._foo*, i32, i8*)*
> }
>
> @str = private unnamed_addr constant [8 x i8] c"table_f\00"
> @str.2 = private unnamed_addr constant [11 x i8] c"dispatch_f\00"
>
> ; Function Attrs: nounwind
> declare void @llvm.lifetime.start(i64, i8* nocapture) #1
>
> ; Function Attrs: nounwind
> declare void @llvm.lifetime.end(i64, i8* nocapture) #1
>
> ; Function Attrs: noinline nounwind ssp uwtable
> define i8* @foo(%struct._foo* %obj, i32 %unused, i8* %value) #2 {
> entry:
>   %tobool.i = icmp eq %struct._foo* %obj, null
>   %0 = bitcast %struct._foo* %obj to i8*
>   %arrayidx.i.i = getelementptr inbounds i8, i8* %0, i64 -16
>   %entries2.i = bitcast i8* %arrayidx.i.i to %struct._entry**
>   %f7.i = getelementptr inbounds i8, i8* %0, i64 -8
>   br i1 %tobool.i, label %for.end, label %call.exit.preheader
>
> call.exit.preheader:                              ; preds = %entry
>   br label %call.exit
>
> call.exit:                                        ; preds >
%call.exit.preheader, %call.exit
>   %i.04 = phi i32 [ %inc, %call.exit ], [ 0, %call.exit.preheader ]
>   %1 = load %struct._entry*, %struct._entry** %entries2.i, align 8, !tbaa
> !2
>   %arrayidx1.i.i = getelementptr inbounds %struct._entry, %struct._entry*
> %1, i64 -1
>   %2 = bitcast %struct._entry* %arrayidx1.i.i to %struct._table*
>   %mask4.i = getelementptr inbounds %struct._table, %struct._table* %2,
> i64 0, i32 2
>   %3 = load i32, i32* %mask4.i, align 4, !tbaa !7
>   %and.i = and i32 %3, 2298949
>   %idxprom.i = zext i32 %and.i to i64
>   %4 = bitcast %struct._entry* %1 to i8*
>   %arrayidx.i = getelementptr inbounds i8, i8* %4, i64 %idxprom.i
>   %key5.i = bitcast i8* %arrayidx.i to i32*
>   %5 = load i32, i32* %key5.i, align 4, !tbaa !11
>   %cmp.i = icmp eq i32 %5, 2298949
>   %f6.i = getelementptr inbounds i8, i8* %arrayidx.i, i64 8
>   %cond.in.v.i = select i1 %cmp.i, i8* %f6.i, i8* %f7.i
>   %cond.in.i = bitcast i8* %cond.in.v.i to i8* (%struct._foo*, i32, i8*)**
>   %cond.i = load i8* (%struct._foo*, i32, i8*)*, i8* (%struct._foo*, i32,
> i8*)** %cond.in.i, align 8
>   %call8.i = tail call i8* %cond.i(%struct._foo* %obj, i32 2298949, i8*
> %value) #1
>   %inc = add nuw nsw i32 %i.04, 1
>   %exitcond = icmp eq i32 %inc, 100
>   br i1 %exitcond, label %for.end.loopexit, label %call.exit
>
> for.end.loopexit:                                 ; preds = %call.exit
>   %call8.i.lcssa = phi i8* [ %call8.i, %call.exit ]
>   br label %for.end
>
> for.end:                                          ; preds >
%for.end.loopexit, %entry
>   %rval.0.lcssa = phi i8* [ %0, %entry ], [ %call8.i.lcssa,
> %for.end.loopexit ]
>   ret i8* %rval.0.lcssa
> }
>
>
> attributes #1 = { nounwind }
> attributes #2 = { noinline nounwind ssp uwtable
> "disable-tail-calls"="false"
"less-precise-fpmad"="false"
> "no-frame-pointer-elim"="true"
"no-frame-pointer-elim-non-leaf"
> "no-infs-fp-math"="false"
"no-nans-fp-math"="false"
> "stack-protector-buffer-size"="8"
"target-cpu"="core2"
> "target-features"="+cx16,+sse,+sse2,+sse3,+ssse3"
"unsafe-fp-math"="false"
> "use-soft-float"="false" }
>
> !llvm.module.flags = !{!0}
> !llvm.ident = !{!1}
>
> ---
>
> _______________________________________________
> LLVM Developers mailing list
> llvm-dev at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
>-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20160211/2181720a/attachment.html>

Ansari, Zia via llvm-dev

2016-Feb-12 19:50 UTC

head link

[llvm-dev] Expected constant simplification not happening

I took a quick look at this and relaxed the conditions under which we prevent
immediate subsumption (specifically, optsize and instruction shape), and I
managed to merge 2 of those immediates. I need to dig in a little more to see
why it didn’t catch the third. I suspect a phase ordering issue.

I’ll take a closer look at this as soon as I get a chance. If you could please
attach a testcase to the bug report, I’d appreciate it.

Thanks,
Zia.

From: Sanjay Patel [mailto:spatel at rotateright.com]
Sent: Thursday, February 11, 2016 3:02 PM
To: Nat! <nat at mulle-kybernetik.com>
Cc: llvm-dev <llvm-dev at lists.llvm.org>; Ansari, Zia <zia.ansari at
intel.com>
Subject: Re: [llvm-dev] Expected constant simplification not happening

[cc'ing Zia]
We have this transform with -Os for some cases after:
http://reviews.llvm.org/rL244601
http://reviews.llvm.org/D11363
but something in this example is causing the transform to not trigger.

I filed a related bug here:
https://llvm.org/bugs/show_bug.cgi?id=24448
If you can file your test case(s) in a bug report, that would be the best way to
track progress on solving it. Thanks!


On Thu, Feb 11, 2016 at 9:58 AM, Nat! via llvm-dev <llvm-dev at
lists.llvm.org<mailto:llvm-dev at lists.llvm.org>> wrote:
Hi

the appended IR code does not optimize to my liking :)

this is the interesting part in x86_64, that got produced via clang -Os:
---
        movq    -16(%r12), %rax
        movl    -4(%rax), %ecx
        andl    $2298949, %ecx          ## imm = 0x231445
        cmpq    $2298949, (%rax,%rcx)   ## imm = 0x231445
        leaq    8(%rax,%rcx), %rax
        cmovneq %r15, %rax
        movl    $2298949, %esi          ## imm = 0x231445
        movq    %r12, %rdi
        movq    %r14, %rdx
        callq   *(%rax)
---


and clang -O3:
---
        movq    -16(%r12), %rax
        movl    -4(%rax), %ecx
        andl    $2298949, %ecx          ## imm = 0x231445
        cmpl    $2298949, (%rax,%rcx)   ## imm = 0x231445
        jne     LBB1_4
        leaq    8(%rax,%rcx), %rax
        jmp     LBB1_5
        .align  4, 0x90
LBB1_4:
        movq    %r15, %rax
LBB1_5:
        movl    $2298949, %esi          ## imm = 0x231445
        movq    %r12, %rdi
        movq    %r14, %rdx
        callq   *(%rax)
---

As you can see in both cases the constant $2298949 is replicated 3 times. I
would have expected something like the following code at least for -Os:

---
        movq    -16(%r12), %rax
        movl    $2298949, %esi          ### **** move on up
        movl    -4(%rax), %ecx
        andl    %esi, %ecx              ###
        cmpl    %esi, (%rax,%rcx)       ###
        leaq    8(%rax,%rcx), %rax
        cmovneq %r15, %rax
        movq    %r12, %rdi
        movq    %r14, %rdx
        callq   *(%rax)
---

It is much shorter (33 bytes vs. 42 bytes) and I would assume at least the same
speed or better. This is with llvm 3.7.0. And yes this pains me at the moment :)

Ciao
   Nat!
----
; ModuleID = 'optimize-fail.c'
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"

%struct._foo = type {}
%struct._entry = type { i32, i32, i8* (%struct._foo*, i32, i8*)* }
%struct._table = type { i64, i32, i32, [1 x %struct._entry] }
%struct.test = type { %struct.__foo, i32 }
%struct.__foo = type { %struct._dispatch }
%struct._dispatch = type { %struct._entry*, i8* (%struct._foo*, i32, i8*)* }

@str = private unnamed_addr constant [8 x i8] c"table_f\00"
@str.2 = private unnamed_addr constant [11 x i8] c"dispatch_f\00"

; Function Attrs: nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) #1

; Function Attrs: nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) #1

; Function Attrs: noinline nounwind ssp uwtable
define i8* @foo(%struct._foo* %obj, i32 %unused, i8* %value) #2 {
entry:
  %tobool.i = icmp eq %struct._foo* %obj, null
  %0 = bitcast %struct._foo* %obj to i8*
  %arrayidx.i.i = getelementptr inbounds i8, i8* %0, i64 -16
  %entries2.i = bitcast i8* %arrayidx.i.i to %struct._entry**
  %f7.i = getelementptr inbounds i8, i8* %0, i64 -8
  br i1 %tobool.i, label %for.end, label %call.exit.preheader

call.exit.preheader:                              ; preds = %entry
  br label %call.exit

call.exit:                                        ; preds =
%call.exit.preheader, %call.exit
  %i.04 = phi i32 [ %inc, %call.exit ], [ 0, %call.exit.preheader ]
  %1 = load %struct._entry*, %struct._entry** %entries2.i, align 8, !tbaa !2
  %arrayidx1.i.i = getelementptr inbounds %struct._entry, %struct._entry* %1,
i64 -1
  %2 = bitcast %struct._entry* %arrayidx1.i.i to %struct._table*
  %mask4.i = getelementptr inbounds %struct._table, %struct._table* %2, i64 0,
i32 2
  %3 = load i32, i32* %mask4.i, align 4, !tbaa !7
  %and.i = and i32 %3, 2298949
  %idxprom.i = zext i32 %and.i to i64
  %4 = bitcast %struct._entry* %1 to i8*
  %arrayidx.i = getelementptr inbounds i8, i8* %4, i64 %idxprom.i
  %key5.i = bitcast i8* %arrayidx.i to i32*
  %5 = load i32, i32* %key5.i, align 4, !tbaa !11
  %cmp.i = icmp eq i32 %5, 2298949
  %f6.i = getelementptr inbounds i8, i8* %arrayidx.i, i64 8
  %cond.in.v.i = select i1 %cmp.i, i8* %f6.i, i8* %f7.i
  %cond.in.i = bitcast i8* %cond.in.v.i to i8* (%struct._foo*, i32, i8*)**
  %cond.i = load i8* (%struct._foo*, i32, i8*)*, i8* (%struct._foo*, i32, i8*)**
%cond.in.i, align 8
  %call8.i = tail call i8* %cond.i(%struct._foo* %obj, i32 2298949, i8* %value)
#1
  %inc = add nuw nsw i32 %i.04, 1
  %exitcond = icmp eq i32 %inc, 100
  br i1 %exitcond, label %for.end.loopexit, label %call.exit

for.end.loopexit:                                 ; preds = %call.exit
  %call8.i.lcssa = phi i8* [ %call8.i, %call.exit ]
  br label %for.end

for.end:                                          ; preds = %for.end.loopexit,
%entry
  %rval.0.lcssa = phi i8* [ %0, %entry ], [ %call8.i.lcssa, %for.end.loopexit ]
  ret i8* %rval.0.lcssa
}


attributes #1 = { nounwind }
attributes #2 = { noinline nounwind ssp uwtable
"disable-tail-calls"="false"
"less-precise-fpmad"="false"
"no-frame-pointer-elim"="true"
"no-frame-pointer-elim-non-leaf"
"no-infs-fp-math"="false"
"no-nans-fp-math"="false"
"stack-protector-buffer-size"="8"
"target-cpu"="core2"
"target-features"="+cx16,+sse,+sse2,+sse3,+ssse3"
"unsafe-fp-math"="false"
"use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

---

_______________________________________________
LLVM Developers mailing list
llvm-dev at lists.llvm.org<mailto:llvm-dev at lists.llvm.org>
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev

-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20160212/7a5d1fdf/attachment.html>

Nat! via llvm-dev

2016-Dec-07 17:13 UTC

head link

[llvm-dev] Expected constant simplification not happening

Hello

Has there been any progress on this topic ? The 3.9 optimizer output is 
still the same as I just looked.

https://llvm.org/bugs/show_bug.cgi?id=24448

Ciao
    Nat!



Sanjay Patel schrieb:> [cc'ing Zia]
>
> We have this transform with -Os for some cases after:
> http://reviews.llvm.org/rL244601
> http://reviews.llvm.org/D11363
>
> but something in this example is causing the transform to not trigger.
>
> I filed a related bug here:
> https://llvm.org/bugs/show_bug.cgi?id=24448
>
> If you can file your test case(s) in a bug report, that would be the
> best way to track progress on solving it. Thanks!
>
>
> On Thu, Feb 11, 2016 at 9:58 AM, Nat! via llvm-dev
> <llvm-dev at lists.llvm.org <mailto:llvm-dev at
lists.llvm.org>> wrote:
>
>     Hi
>
>     the appended IR code does not optimize to my liking :)
>
>     this is the interesting part in x86_64, that got produced via clang
-Os:
>     ---
>              movq    -16(%r12), %rax
>              movl    -4(%rax), %ecx
>              andl    $2298949, %ecx          ## imm = 0x231445
>              cmpq    $2298949, (%rax,%rcx)   ## imm = 0x231445
>              leaq    8(%rax,%rcx), %rax
>              cmovneq %r15, %rax
>              movl    $2298949, %esi          ## imm = 0x231445
>              movq    %r12, %rdi
>              movq    %r14, %rdx
>              callq   *(%rax)
>     ---
>
>
>     and clang -O3:
>     ---
>              movq    -16(%r12), %rax
>              movl    -4(%rax), %ecx
>              andl    $2298949, %ecx          ## imm = 0x231445
>              cmpl    $2298949, (%rax,%rcx)   ## imm = 0x231445
>              jne     LBB1_4
>              leaq    8(%rax,%rcx), %rax
>              jmp     LBB1_5
>              .align  4, 0x90
>     LBB1_4:
>              movq    %r15, %rax
>     LBB1_5:
>              movl    $2298949, %esi          ## imm = 0x231445
>              movq    %r12, %rdi
>              movq    %r14, %rdx
>              callq   *(%rax)
>     ---
>
>     As you can see in both cases the constant $2298949 is replicated 3
>     times. I would have expected something like the following code at
>     least for -Os:
>
>     ---
>              movq    -16(%r12), %rax
>              movl    $2298949, %esi          ### **** move on up
>              movl    -4(%rax), %ecx
>              andl    %esi, %ecx              ###
>              cmpl    %esi, (%rax,%rcx)       ###
>              leaq    8(%rax,%rcx), %rax
>              cmovneq %r15, %rax
>              movq    %r12, %rdi
>              movq    %r14, %rdx
>              callq   *(%rax)
>     ---
>
>     It is much shorter (33 bytes vs. 42 bytes) and I would assume at
>     least the same speed or better. This is with llvm 3.7.0. And yes
>     this pains me at the moment :)
>
>     Ciao
>         Nat!
>     ----
>     ; ModuleID = 'optimize-fail.c'
>     target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
>     target triple = "x86_64-apple-macosx10.10.0"
>
>     %struct._foo = type {}
>     %struct._entry = type { i32, i32, i8* (%struct._foo*, i32, i8*)* }
>     %struct._table = type { i64, i32, i32, [1 x %struct._entry] }
>     %struct.test = type { %struct.__foo, i32 }
>     %struct.__foo = type { %struct._dispatch }
>     %struct._dispatch = type { %struct._entry*, i8* (%struct._foo*, i32,
>     i8*)* }
>
>     @str = private unnamed_addr constant [8 x i8] c"table_f\00"
>     @str.2 = private unnamed_addr constant [11 x i8]
c"dispatch_f\00"
>
>     ; Function Attrs: nounwind
>     declare void @llvm.lifetime.start(i64, i8* nocapture) #1
>
>     ; Function Attrs: nounwind
>     declare void @llvm.lifetime.end(i64, i8* nocapture) #1
>
>     ; Function Attrs: noinline nounwind ssp uwtable
>     define i8* @foo(%struct._foo* %obj, i32 %unused, i8* %value) #2 {
>     entry:
>        %tobool.i = icmp eq %struct._foo* %obj, null
>        %0 = bitcast %struct._foo* %obj to i8*
>        %arrayidx.i.i = getelementptr inbounds i8, i8* %0, i64 -16
>        %entries2.i = bitcast i8* %arrayidx.i.i to %struct._entry**
>        %f7.i = getelementptr inbounds i8, i8* %0, i64 -8
>        br i1 %tobool.i, label %for.end, label %call.exit.preheader
>
>     call.exit.preheader:                              ; preds = %entry
>        br label %call.exit
>
>     call.exit:                                        ; preds >    
%call.exit.preheader, %call.exit
>        %i.04 = phi i32 [ %inc, %call.exit ], [ 0, %call.exit.preheader ]
>        %1 = load %struct._entry*, %struct._entry** %entries2.i, align 8,
>     !tbaa !2
>        %arrayidx1.i.i = getelementptr inbounds %struct._entry,
>     %struct._entry* %1, i64 -1
>        %2 = bitcast %struct._entry* %arrayidx1.i.i to %struct._table*
>        %mask4.i = getelementptr inbounds %struct._table, %struct._table*
>     %2, i64 0, i32 2
>        %3 = load i32, i32* %mask4.i, align 4, !tbaa !7
>        %and.i = and i32 %3, 2298949
>        %idxprom.i = zext i32 %and.i to i64
>        %4 = bitcast %struct._entry* %1 to i8*
>        %arrayidx.i = getelementptr inbounds i8, i8* %4, i64 %idxprom.i
>        %key5.i = bitcast i8* %arrayidx.i to i32*
>        %5 = load i32, i32* %key5.i, align 4, !tbaa !11
>        %cmp.i = icmp eq i32 %5, 2298949
>        %f6.i = getelementptr inbounds i8, i8* %arrayidx.i, i64 8
>        %cond.in.v.i = select i1 %cmp.i, i8* %f6.i, i8* %f7.i
>        %cond.in.i = bitcast i8* %cond.in.v.i to i8* (%struct._foo*, i32,
>     i8*)**
>        %cond.i = load i8* (%struct._foo*, i32, i8*)*, i8*
>     (%struct._foo*, i32, i8*)** %cond.in.i, align 8
>        %call8.i = tail call i8* %cond.i(%struct._foo* %obj, i32 2298949,
>     i8* %value) #1
>        %inc = add nuw nsw i32 %i.04, 1
>        %exitcond = icmp eq i32 %inc, 100
>        br i1 %exitcond, label %for.end.loopexit, label %call.exit
>
>     for.end.loopexit:                                 ; preds = %call.exit
>        %call8.i.lcssa = phi i8* [ %call8.i, %call.exit ]
>        br label %for.end
>
>     for.end:                                          ; preds >    
%for.end.loopexit, %entry
>        %rval.0.lcssa = phi i8* [ %0, %entry ], [ %call8.i.lcssa,
>     %for.end.loopexit ]
>        ret i8* %rval.0.lcssa
>     }
>
>
>     attributes #1 = { nounwind }
>     attributes #2 = { noinline nounwind ssp uwtable
>     "disable-tail-calls"="false"
"less-precise-fpmad"="false"
>     "no-frame-pointer-elim"="true"
"no-frame-pointer-elim-non-leaf"
>     "no-infs-fp-math"="false"
"no-nans-fp-math"="false"
>     "stack-protector-buffer-size"="8"
"target-cpu"="core2"
>     "target-features"="+cx16,+sse,+sse2,+sse3,+ssse3"
>     "unsafe-fp-math"="false"
"use-soft-float"="false" }
>
>     !llvm.module.flags = !{!0}
>     !llvm.ident = !{!1}
>
>     ---
>
>     _______________________________________________
>     LLVM Developers mailing list
>     llvm-dev at lists.llvm.org <mailto:llvm-dev at lists.llvm.org>
>     http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
>
>

Possibly Parallel Threads

Search for more reasonably related threads

llvm dev - Dec 2016 - Expected constant simplification not happening

[llvm-dev] Expected constant simplification not happening

[llvm-dev] Expected constant simplification not happening

[llvm-dev] Expected constant simplification not happening

[llvm-dev] Expected constant simplification not happening

Possibly Parallel Threads