Displaying 11 results from an estimated 11 matches for "subss".
Did you mean:
subsys
2017 Apr 19
3
[cfe-dev] FE_INEXACT being set for an exact conversion from float to unsigned long long
...d.
>
>
> $ more llvm/lib/Target/X86//README-X86-64.txt
> …
> Are we better off using branches instead of cmove to implement FP to
> unsigned i64?
>
> _conv:
> ucomiss LC0(%rip), %xmm0
> cvttss2siq %xmm0, %rdx
> jb L3
> subss LC0(%rip), %xmm0
> movabsq $-9223372036854775808, %rax
> cvttss2siq %xmm0, %rdx
> xorq %rax, %rdx
> L3:
> movq %rdx, %rax
> ret
>
> instead of
>
> _conv:
> movss LCPI1_0(%rip), %xmm1
> cvttss2...
2010 Aug 31
5
[LLVMdev] "equivalent" .ll files diverge after optimizations are applied
...vl $0, 16(%rsp)
movl $0, 20(%rsp)
movl $0, 8(%rsp)
movl $0, 12(%rsp)
movq 8(%rdi), %rsi
leaq 16(%rsp), %rcx
leaq 8(%rsp), %r8
callq __ZN7WebCore5mouniEPNS_15GraphicsContextEPNS_30GraphicsContextPlatformPrivateERKNS_9FloatRectERNS_10FloatPointES8_
movss 8(%rsp), %xmm1
movss 12(%rsp), %xmm0
subss 20(%rsp), %xmm0
subss 16(%rsp), %xmm1
## kill: XMM1<def> XMM1<kill> XMM1<def>
insertps $16, %xmm0, %xmm1 ## xmm1 = xmm1[0],xmm0[0],xmm1[2,3]
movq 16(%rsp), %xmm0
addq $24, %rsp
ret
$ opt -std-compile-opts unopt-fail.ll -o - | llc -o -...
2010 Aug 31
0
[LLVMdev] "equivalent" .ll files diverge after optimizations are applied
...%rsp)
> movl $0, 12(%rsp)
> movq 8(%rdi), %rsi
> leaq 16(%rsp), %rcx
> leaq 8(%rsp), %r8
> callq
> __ZN7WebCore5mouniEPNS_15GraphicsContextEPNS_30GraphicsContextPlatformPrivateERKNS_9FloatRectERNS_10FloatPointES8_
> movss 8(%rsp), %xmm1
> movss 12(%rsp), %xmm0
> subss 20(%rsp), %xmm0
> subss 16(%rsp), %xmm1
> ## kill: XMM1<def> XMM1<kill>
> XMM1<def>
> insertps $16, %xmm0, %xmm1 ## xmm1 = xmm1[0],xmm0[0],xmm1[2,3]
> movq 16(%rsp), %xmm0
> addq $24, %rsp
> ret
>
>
> $ op...
2010 Aug 31
2
[LLVMdev] "equivalent" .ll files diverge after optimizations are applied
...12(%rsp)
>> movq 8(%rdi), %rsi
>> leaq 16(%rsp), %rcx
>> leaq 8(%rsp), %r8
>> callq __ZN7WebCore5mouniEPNS_15GraphicsContextEPNS_30GraphicsContextPlatformPrivateERKNS_9FloatRectERNS_10FloatPointES8_
>> movss 8(%rsp), %xmm1
>> movss 12(%rsp), %xmm0
>> subss 20(%rsp), %xmm0
>> subss 16(%rsp), %xmm1
>> ## kill: XMM1<def> XMM1<kill> XMM1<def>
>> insertps $16, %xmm0, %xmm1 ## xmm1 = xmm1[0],xmm0[0],xmm1[2,3]
>> movq 16(%rsp), %xmm0
>> addq $24, %rsp
>> ret
>...
2010 Aug 31
0
[LLVMdev] "equivalent" .ll files diverge after optimizations are applied
...gt;>> leaq 16(%rsp), %rcx
>>> leaq 8(%rsp), %r8
>>> callq
>>> __ZN7WebCore5mouniEPNS_15GraphicsContextEPNS_30GraphicsContextPlatformPrivateERKNS_9FloatRectERNS_10FloatPointES8_
>>> movss 8(%rsp), %xmm1
>>> movss 12(%rsp), %xmm0
>>> subss 20(%rsp), %xmm0
>>> subss 16(%rsp), %xmm1
>>> ## kill: XMM1<def> XMM1<kill>
>>> XMM1<def>
>>> insertps $16, %xmm0, %xmm1 ## xmm1 = xmm1[0],xmm0[0],xmm1[2,3]
>>> movq 16(%rsp), %xmm0
>>>...
2017 Apr 20
4
[cfe-dev] FE_INEXACT being set for an exact conversion from float to unsigned long long
...the target layer so I can find where the sequence is emitted.
$ more llvm/lib/Target/X86//README-X86-64.txt
…
Are we better off using branches instead of cmove to implement FP to
unsigned i64?
_conv:
ucomiss LC0(%rip), %xmm0
cvttss2siq %xmm0, %rdx
jb L3
subss LC0(%rip), %xmm0
movabsq $-9223372036854775808, %rax
cvttss2siq %xmm0, %rdx
xorq %rax, %rdx
L3:
movq %rdx, %rax
ret
instead of
_conv:
movss LCPI1_0(%rip), %xmm1
cvttss2siq %xmm0, %rcx
movaps %xmm0, %xmm2
subss %x...
2004 Aug 06
2
[PATCH] Make SSE Run Time option. Add Win32 SSE code
...3, [ebx+36]
+ mulss xmm3, xmm1
+ addss xmm2, [ecx+36]
+ movss xmm4, [eax+40]
+ mulss xmm4, xmm0
+ movss xmm5, [ebx+40]
+ mulss xmm5, xmm1
+ subss xmm2, xmm3
+ movss [ecx+32], xmm2
+ subss xmm4, xmm5
+ movss [ecx+36], xmm4
+
+ mov edx, in1
+ movss xmm0, [edx+4]
+
+ movss xmm1, [ecx]
+ ad...
2017 Sep 29
2
Trouble when suppressing a portion of fast-math-transformations
...ly return the first argument (and that reassociation does happen with
'-ffast-math', with both the old and new compilers):
$ clang -c -O2 -o x.o assoc.cpp
$ llvm-objdump -d x.o | grep "^ .*: "
0: f3 0f 58 c1 addss %xmm1, %xmm0
4: f3 0f 5c c1 subss %xmm1, %xmm0
8: c3 retq
$ clang -c -O2 -ffast-math -o x.o assoc.cpp
$ llvm-objdump -d x.o | grep "^ .*: "
0: c3 retq
$
FTR, GCC also does the reassociation transformation here when '-ffast-math' is
used, as expected.
But when using '-...
2017 Sep 29
0
Trouble when suppressing a portion of fast-math-transformations
...eassociation does happen with
>
> '-ffast-math', with both the old and new compilers):
>
> $ clang -c -O2 -o x.o assoc.cpp
>
> $ llvm-objdump -d x.o | grep "^ .*: "
>
> 0: f3 0f 58 c1 addss %xmm1, %xmm0
>
> 4: f3 0f 5c c1 subss %xmm1, %xmm0
>
> 8: c3 retq
>
> $ clang -c -O2 -ffast-math -o x.o assoc.cpp
>
> $ llvm-objdump -d x.o | grep "^ .*: "
>
> 0: c3 retq
>
> $
>
> FTR, GCC also does the reassociation transformation here when
> '-ffast-mat...
2002 Oct 18
7
RAM usage
Hi,
I'm having problems while working with large data sets with R 1.5.1 in
windows 2000. Given a integer matrix size of 30 columns and 15000 rows
my function should return a boolean matrix size of about 5000 rows and
15000 columns.
First of all I tried to run this function on computer with 256 MB of
RAM. I increased memory limit of R with memory.limit() up to 512 MB. I
was inspecting
2017 Apr 21
2
[cfe-dev] FE_INEXACT being set for an exact conversion from float to unsigned long long
...the target layer so I can find where the sequence is emitted.
$ more llvm/lib/Target/X86//README-X86-64.txt
…
Are we better off using branches instead of cmove to implement FP to
unsigned i64?
_conv:
ucomiss LC0(%rip), %xmm0
cvttss2siq %xmm0, %rdx
jb L3
subss LC0(%rip), %xmm0
movabsq $-9223372036854775808, %rax
cvttss2siq %xmm0, %rdx
xorq %rax, %rdx
L3:
movq %rdx, %rax
ret
instead of
_conv:
movss LCPI1_0(%rip), %xmm1
cvttss2siq %xmm0, %rcx
movaps %xmm0, %xmm2
subss %x...