Displaying 3 results from an estimated 3 matches for "vfmsub213ss".
2016 Jun 29
2
avx512 JIT backend generates wrong code on <4 x float>
...addq 8(%rsp), %rdx
.align 16, 0x90
.LBB0_1:
vmovaps -16(%rdx), %xmm0
vmovaps (%rdx), %xmm1
vmovaps -16(%rdi), %xmm2
vmovaps (%rdi), %xmm3
vmulps %xmm3, %xmm1, %xmm4
vmulps %xmm2, %xmm1, %xmm1
vfmadd213ss %xmm4, %xmm0, %xmm2
vfmsub213ss %xmm1, %xmm0, %xmm3
vmovaps %xmm2, -16(%rsi)
vmovaps %xmm3, (%rsi)
addq $1, %rax
addq $32, %rsi
addq $32, %rdi
addq $32, %rdx
cmpq %rcx, %rax
jl .LBB0_1
retq
.Lfunc_end0:
.size adjmul, .Lfunc_end0-adjmul
.cfi_endpr...
2016 Jun 29
0
avx512 JIT backend generates wrong code on <4 x float>
...0x90
> .LBB0_1:
> vmovaps -16(%rdx), %xmm0
> vmovaps (%rdx), %xmm1
> vmovaps -16(%rdi), %xmm2
> vmovaps (%rdi), %xmm3
> vmulps %xmm3, %xmm1, %xmm4
> vmulps %xmm2, %xmm1, %xmm1
> vfmadd213ss %xmm4, %xmm0, %xmm2
> vfmsub213ss %xmm1, %xmm0, %xmm3
> vmovaps %xmm2, -16(%rsi)
> vmovaps %xmm3, (%rsi)
> addq $1, %rax
> addq $32, %rsi
> addq $32, %rdi
> addq $32, %rdx
> cmpq %rcx, %rax
> jl .LBB0_1
> retq
> .Lfunc_end0:
>...
2016 Jun 30
1
avx512 JIT backend generates wrong code on <4 x float>
...s -16(%rdx), %xmm0
>> vmovaps (%rdx), %xmm1
>> vmovaps -16(%rdi), %xmm2
>> vmovaps (%rdi), %xmm3
>> vmulps %xmm3, %xmm1, %xmm4
>> vmulps %xmm2, %xmm1, %xmm1
>> vfmadd213ss %xmm4, %xmm0, %xmm2
>> vfmsub213ss %xmm1, %xmm0, %xmm3
>> vmovaps %xmm2, -16(%rsi)
>> vmovaps %xmm3, (%rsi)
>> addq $1, %rax
>> addq $32, %rsi
>> addq $32, %rdi
>> addq $32, %rdx
>> cmpq %rcx, %rax
>> jl .LBB0_1...