Displaying 20 results from an estimated 274 matches for "xmm1".
Did you mean:
xmm0
2014 Oct 13
2
[LLVMdev] Unexpected spilling of vector register during lane extraction on some x86_64 targets
...ing */
$ objdump -dC --no-show-raw-insn ./a.out
...
00000000004004f0 <main>:
4004f0: vmovdqa 0x2004c8(%rip),%xmm0 # 6009c0 <x>
4004f8: vpsrld $0x17,%xmm0,%xmm0
4004fd: vpaddd 0x17b(%rip),%xmm0,%xmm0 # 400680
<__dso_handle+0x8>
400505: vcvtdq2ps %xmm0,%xmm1
400509: vdivps 0x17f(%rip),%xmm1,%xmm1 # 400690
<__dso_handle+0x18>
400511: vcvttps2dq %xmm1,%xmm1
400515: vpmullw 0x183(%rip),%xmm1,%xmm1 # 4006a0
<__dso_handle+0x28>
40051d: vpsubd %xmm1,%xmm0,%xmm0
400521: vmovq %xmm0,%rax
400526: movslq %eax,%r...
2014 Sep 05
3
[LLVMdev] Please benchmark new x86 vector shuffle lowering, planning to make it the default very soon!
On Fri, Sep 5, 2014 at 9:32 AM, Robert Lougher <rob.lougher at gmail.com>
wrote:
> Unfortunately, another team, while doing internal testing has seen the
> new path generating illegal insertps masks. A sample here:
>
> vinsertps $256, %xmm0, %xmm13, %xmm4 # xmm4 = xmm0[0],xmm13[1,2,3]
> vinsertps $256, %xmm1, %xmm0, %xmm6 # xmm6 = xmm1[0],xmm0[1,2,3]
> vinsertps $256, %xmm13, %xmm1, %xmm7 # xmm7 = xmm13[0],xmm1[1,2,3]
> vinsertps $416, %xmm1, %xmm4, %xmm14 # xmm14 =
> xmm4[0,1],xmm1[2],xmm4[3]
> vinser...
2013 Jul 19
0
[LLVMdev] llvm.x86.sse2.sqrt.pd not using sqrtpd, calling a function that modifies ECX
...002E00F3 movddup xmm0,mmword ptr [eax+8]
002E00F8 movapd xmmword ptr [esp+70h],xmm0
002E00FE movddup xmm0,mmword ptr [eax]
002E0102 movapd xmmword ptr [esp+60h],xmm0
002E0108 xorpd xmm0,xmm0
002E010C movapd xmmword ptr [esp+0C0h],xmm0
002E0115 xorpd xmm1,xmm1
002E0119 xorpd xmm7,xmm7
002E011D movapd xmmword ptr [esp+0A0h],xmm1
002E0126 movapd xmmword ptr [esp+0B0h],xmm7
002E012F movapd xmm3,xmm1
002E0133 movlpd qword ptr [esp+0F0h],xmm3
002E013C movhpd qword ptr [esp+0E0h],xmm3
002E0145 movlpd qword...
2008 Jul 12
2
[LLVMdev] Shuffle regression
...2.3 but revision 52648 fails, and I suspect that the issue is still present.
2.3 generates the following x86 code:
03A10010 push ebp
03A10011 mov ebp,esp
03A10013 and esp,0FFFFFFF0h
03A10019 movups xmm0,xmmword ptr ds:[141D280h]
03A10020 xorps xmm1,xmm1
03A10023 movaps xmm2,xmm0
03A10026 shufps xmm2,xmm1,32h
03A1002A movaps xmm1,xmm0
03A1002D shufps xmm1,xmm2,84h
03A10031 shufps xmm0,xmm1,23h
03A10035 shufps xmm1,xmm1,40h
03A10039 shufps xmm1,xmm0,2Eh
03A1003D movups xmmword ptr...
2014 Sep 05
2
[LLVMdev] Please benchmark new x86 vector shuffle lowering, planning to make it the default very soon!
...:32 AM, Robert Lougher <rob.lougher at gmail.com>
>> wrote:
>>>
>>> Unfortunately, another team, while doing internal testing has seen the
>>> new path generating illegal insertps masks. A sample here:
>>>
>>> vinsertps $256, %xmm0, %xmm13, %xmm4 # xmm4 = xmm0[0],xmm13[1,2,3]
>>> vinsertps $256, %xmm1, %xmm0, %xmm6 # xmm6 = xmm1[0],xmm0[1,2,3]
>>> vinsertps $256, %xmm13, %xmm1, %xmm7 # xmm7 = xmm13[0],xmm1[1,2,3]
>>> vinsertps $416, %xmm1, %xmm4, %xmm14 # xmm14 =
>>> xmm4[0,1],xm...
2012 Mar 28
2
[LLVMdev] Suboptimal code due to excessive spilling
...orl %ebx, %ebx
movl 108(%esp), %ecx
movl 104(%esp), %edx
xorl %esi, %esi
.align 16, 0x90
.LBB1_2: # %.lr.ph.i
# =>This Inner Loop Header: Depth=1
movsd (%edx,%ebx,8), %xmm2
addsd .LCPI1_0, %xmm2
movsd 16(%edx,%ebx,8), %xmm1
movsd %xmm1, (%esp) # 8-byte Spill
movl %ebx, %edi
addl $1, %edi
addsd (%edx,%edi,8), %xmm2
movsd 136(%edx,%ebx,8), %xmm1
movsd %xmm1, 72(%esp) # 8-byte Spill
movsd 128(%edx,%ebx,8), %xmm1
movsd %xmm1, 64(%esp) # 8-byte Spill
movsd 120(%edx,%ebx,8), %xmm1
movsd %...
2004 Aug 06
2
[PATCH] Make SSE Run Time option. Add Win32 SSE code
...*/
+ _asm
+ {
+ mov eax, num
+ mov ebx, den
+ mov ecx, mem
+
+ mov edx, in1
+ movss xmm0, [edx]
+
+ movss xmm1, [ecx]
+ addss xmm1, xmm0
+
+ mov edx, in2
+ movss [edx], xmm1
+
+ shufps xmm0, xmm0, 0x00
+ shufps xmm1, xmm1, 0x00
+
+ movaps xmm2, [eax+4]
+ m...
2008 Jul 12
0
[LLVMdev] Shuffle regression
...and I suspect
> that the issue is still present.
>
> 2.3 generates the following x86 code:
>
> 03A10010 push ebp
> 03A10011 mov ebp,esp
> 03A10013 and esp,0FFFFFFF0h
> 03A10019 movups xmm0,xmmword ptr ds:[141D280h]
> 03A10020 xorps xmm1,xmm1
> 03A10023 movaps xmm2,xmm0
> 03A10026 shufps xmm2,xmm1,32h
> 03A1002A movaps xmm1,xmm0
> 03A1002D shufps xmm1,xmm2,84h
> 03A10031 shufps xmm0,xmm1,23h
> 03A10035 shufps xmm1,xmm1,40h
> 03A10039 shufps xmm1,xmm0,2Eh
> 03A1003D...
2015 Jul 29
2
[LLVMdev] x86-64 backend generates aligned ADDPS with unaligned address
....align 16, 0x90
.LBB0_1: # %loop2
# =>This Inner Loop Header: Depth=1
movq offset_array3(,%rsi,8), %rdi
movq offset_array2(,%rsi,8), %r10
movss -28(%rax), %xmm0
movss -8(%rax), %xmm1
movss -4(%rax), %xmm2
unpcklps %xmm0, %xmm2 # xmm2 =
xmm2[0],xmm0[0],xmm2[1],xmm0[1]
movss (%rax), %xmm0
unpcklps %xmm0, %xmm1 # xmm1 =
xmm1[0],xmm0[0],xmm1[1],xmm0[1]
unpcklps %xmm2, %xmm1 # xmm1 =
xmm1[0],xmm2[0],xm...
2014 Sep 06
2
[LLVMdev] Please benchmark new x86 vector shuffle lowering, planning to make it the default very soon!
...5, 2014 at 9:32 AM, Robert Lougher <rob.lougher at gmail.com>
>> wrote:
>>
>>
>> Unfortunately, another team, while doing internal testing has seen the
>> new path generating illegal insertps masks. A sample here:
>>
>> vinsertps $256, %xmm0, %xmm13, %xmm4 # xmm4 = xmm0[0],xmm13[1,2,3]
>> vinsertps $256, %xmm1, %xmm0, %xmm6 # xmm6 = xmm1[0],xmm0[1,2,3]
>> vinsertps $256, %xmm13, %xmm1, %xmm7 # xmm7 = xmm13[0],xmm1[1,2,3]
>> vinsertps $416, %xmm1, %xmm4, %xmm14 # xmm14 =
>> xmm4[0,1],xmm1[2],xmm4[3]
&g...
2014 Sep 08
2
[LLVMdev] Please benchmark new x86 vector shuffle lowering, planning to make it the default very soon!
...gt;>
>>>> wrote:
>>>>>
>>>>> Unfortunately, another team, while doing internal testing has seen the
>>>>> new path generating illegal insertps masks. A sample here:
>>>>>
>>>>> vinsertps $256, %xmm0, %xmm13, %xmm4 # xmm4 = xmm0[0],xmm13[1,2,3]
>>>>> vinsertps $256, %xmm1, %xmm0, %xmm6 # xmm6 = xmm1[0],xmm0[1,2,3]
>>>>> vinsertps $256, %xmm13, %xmm1, %xmm7 # xmm7 = xmm13[0],xmm1[1,2,3]
>>>>> vinsertps $416, %xmm1, %xmm4, %xmm14 # xmm14 =
&...
2012 Jul 06
2
[LLVMdev] Excessive register spilling in large automatically generated functions, such as is found in FFTW
...tes much
>> better code. Here is an example of a sequence of instructions from a
>> 32-point FFT, compiled with clang/LLVM 3.1 for x86_64 with SSE:
>>
>> [...]
>> movaps 32(%rdi), %xmm3
>> movaps 48(%rdi), %xmm2
>> movaps %xmm3, %xmm1 ### <-- xmm3 mov'ed into xmm1
>> movaps %xmm3, %xmm4 ### <-- xmm3 mov'ed into xmm4
>> addps %xmm0, %xmm1
>> movaps %xmm1, -16(%rbp) ## 16-byte Spill
>> movaps 144(%rdi), %xmm3 ### <-- new data mov'ed into xmm...
2012 Apr 05
0
[LLVMdev] Suboptimal code due to excessive spilling
...orl %ebx, %ebx
movl 108(%esp), %ecx
movl 104(%esp), %edx
xorl %esi, %esi
.align 16, 0x90
.LBB1_2: # %.lr.ph.i
# =>This Inner Loop Header: Depth=1
movsd (%edx,%ebx,8), %xmm2
addsd .LCPI1_0, %xmm2
movsd 16(%edx,%ebx,8), %xmm1
movsd %xmm1, (%esp) # 8-byte Spill
movl %ebx, %edi
addl $1, %edi
addsd (%edx,%edi,8), %xmm2
movsd 136(%edx,%ebx,8), %xmm1
movsd %xmm1, 72(%esp) # 8-byte Spill
movsd 128(%edx,%ebx,8), %xmm1
movsd %xmm1, 64(%esp) # 8-byte Spill
movsd 120(%edx,%ebx,8), %xmm1
movsd %...
2012 Jul 06
0
[LLVMdev] Excessive register spilling in large automatically generated functions, such as is found in FFTW
...PM, Jakob Stoklund Olesen <stoklund at 2pi.dk> wrote:
>> On Jul 5, 2012, at 9:06 PM, Anthony Blake <amb33 at cs.waikato.ac.nz> wrote:
>>> [...]
>>> movaps 32(%rdi), %xmm3
>>> movaps 48(%rdi), %xmm2
>>> movaps %xmm3, %xmm1 ### <-- xmm3 mov'ed into xmm1
>>> movaps %xmm3, %xmm4 ### <-- xmm3 mov'ed into xmm4
>>> addps %xmm0, %xmm1
>>> movaps %xmm1, -16(%rbp) ## 16-byte Spill
>>> movaps 144(%rdi), %xmm3 ### <-- new data mov...
2014 Jul 23
4
[LLVMdev] the clang 3.5 loop optimizer seems to jump in unintentional for simple loops
...FFFFFFFFFFFFF8
andq %rsi, %rax
pxor %xmm0, %xmm0
je .LBB0_1
# BB#2: # %vector.body.preheader
leaq (%rdi,%rax,4), %r8
addq $16, %rdi
movq %rsi, %rdx
andq $-8, %rdx
pxor %xmm0, %xmm0
pxor %xmm1, %xmm1
.align 16, 0x90
.LBB0_3: # %vector.body
# =>This Inner Loop Header: Depth=1
movdqa %xmm1, %xmm2
movdqa %xmm0, %xmm3
movdqu -16(%rdi), %xmm0
movdqu (%rdi), %xmm1
padd...
2014 Sep 09
5
[LLVMdev] Please benchmark new x86 vector shuffle lowering, planning to make it the default very soon!
...fps
instructions.
Example:
;;;
define <4 x float> @foo(<4 x float> %A, <4 x float> %B) {
%1 = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> <i32 0,
i32 5, i32 2, i32 7>
ret <4 x float> %1
}
;;;
llc (-mcpu=corei7-avx):
vblendps $10, %xmm1, %xmm0, %xmm0 # xmm0 = xmm0[0],xmm1[5],xmm0[2],xmm1[7]
llc -x86-experimental-vector-shuffle-lowering (-mcpu=corei7-avx):
vshufps $-40, %xmm0, %xmm1, %xmm0 # xmm0 = xmm1[0,2],xmm0[1,3]
vshufps $-40, %xmm0, %xmm0, %xmm0 # xmm0[0,2,1,3]
2) On SSE4.1, we should try not to emit an insertps if t...
2010 Aug 31
5
[LLVMdev] "equivalent" .ll files diverge after optimizations are applied
...24, %rsp
movq %rsi, %rdx
movl $0, 16(%rsp)
movl $0, 20(%rsp)
movl $0, 8(%rsp)
movl $0, 12(%rsp)
movq 8(%rdi), %rsi
leaq 16(%rsp), %rcx
leaq 8(%rsp), %r8
callq __ZN7WebCore5mouniEPNS_15GraphicsContextEPNS_30GraphicsContextPlatformPrivateERKNS_9FloatRectERNS_10FloatPointES8_
movss 8(%rsp), %xmm1
movss 12(%rsp), %xmm0
subss 20(%rsp), %xmm0
subss 16(%rsp), %xmm1
## kill: XMM1<def> XMM1<kill> XMM1<def>
insertps $16, %xmm0, %xmm1 ## xmm1 = xmm1[0],xmm0[0],xmm1[2,3]
movq 16(%rsp), %xmm0
addq $24, %rsp
ret
$ opt -std-compile-opts...
2010 Aug 31
0
[LLVMdev] "equivalent" .ll files diverge after optimizations are applied
...is doing an aggregate copy field-by-field while the
failing code has lowered this to a memcpy. I would certainly expect
the memcpy expansion to be smart enough to avoid using MM registers,
though; that's a serious bug if it isn't.
movd %xmm0, %rax
movd %rax, %mm0
movq2dq %mm0, %xmm1
movq2dq %mm0, %xmm2
punpcklqdq %xmm2, %xmm1 ## xmm1 = xmm1[0],xmm2[0]
movq 16(%rsp), %rax
movd %rax, %mm0
movq2dq %mm0, %xmm0
punpcklqdq %xmm2, %xmm0 ## xmm0 = xmm0[0],xmm2[0]
On Aug 31, 2010, at 11:18 AMPDT, Argyrios Kyrtzidis wrote:
> Hi,
>
> I've attached 2 .ll files...
2013 Jul 19
4
[LLVMdev] SIMD instructions and memory alignment on X86
Hmm, I'm not able to get those .ll files to compile if I disable SSE and I
end up with SSE instructions(including sqrtpd) if I don't disable it.
On Thu, Jul 18, 2013 at 10:53 PM, Peter Newman <peter at uformia.com> wrote:
> Is there something specifically required to enable SSE? If it's not
> detected as available (based from the target triple?) then I don't think
2010 Aug 31
2
[LLVMdev] "equivalent" .ll files diverge after optimizations are applied
...ggregate copy field-by-field while the failing code has lowered this to a memcpy. I would certainly expect the memcpy expansion to be smart enough to avoid using MM registers, though; that's a serious bug if it isn't.
>
> movd %xmm0, %rax
> movd %rax, %mm0
> movq2dq %mm0, %xmm1
> movq2dq %mm0, %xmm2
> punpcklqdq %xmm2, %xmm1 ## xmm1 = xmm1[0],xmm2[0]
> movq 16(%rsp), %rax
> movd %rax, %mm0
> movq2dq %mm0, %xmm0
> punpcklqdq %xmm2, %xmm0 ## xmm0 = xmm0[0],xmm2[0]
>
>
> On Aug 31, 2010, at 11:18 AMPDT, Argyrios Kyrtzidis wrote:
>...