Displaying 19 results from an estimated 19 matches for "shlq".
Did you mean:
shl
2015 Jul 27
3
[LLVMdev] i1* function argument on x86-64
I am running into a problem with 'i1*' as a function's argument which
seems to have appeared since I switched to LLVM 3.6 (but can have other
source, of course). If I look at the assembler that the MCJIT generates
for an x86-64 target I see that the array 'i1*' is taken as a sequence
of 1 bit wide elements. (I guess that's correct). However, I used to
call the function
2010 Jul 05
0
[LLVMdev] Vector promotions for calling conventions
...of XMM1. Both llvm-gcc and clang currently generate atrocious IR for these structs, which you can see if you compile this:
struct x { float a,b,c,d; };
struct x foo(struct x *P) { return *P; };
The machine code generated by llvm-gcc[*] for this is:
_foo:
movl (%rdi), %eax
movl 4(%rdi), %ecx
shlq $32, %rcx
addq %rax, %rcx
movd %rcx, %xmm0
movl 8(%rdi), %eax
movl 12(%rdi), %ecx
shlq $32, %rcx
addq %rax, %rcx
movd %rcx, %xmm1
ret
when we really just want:
_foo:
movq (%rdi), %xmm0
movq 8(%rdi), %xmm1
ret
I'm looking at having clang generate IR for this by passing and returnin...
2013 Oct 30
1
[LLVMdev] Optimization bug - spurious shift in partial word test
...a partial word is tested, lets say >0, by shifting
left to get the sign bit into the msb and testing llvm is inserting a
spurious right shift instruction.
For example this IR:
...
%0 = load i64* %a.addr, align 8
%shl = shl i64 %0, 28
%cmp = icmp sgt i64 %shl, 0
...
results in
...
shlq $28, %rdi
sarq $28, %rdi ; <<< spurious shift
testq %rdi, %rdi
gcc doesnt have this problem. It just emits the shift and test.
The reason appears to be that the instruction combining pass decides that
the shift and test is equivalent to a test on the partial word,...
2016 Jun 29
2
avx512 JIT backend generates wrong code on <4 x float>
...q %rdi, %rdx
cmoveq %rsi, %r8
movq %rdx, %rax
sarq $63, %rax
shrq $62, %rax
addq %rdx, %rax
sarq $2, %rax
movq %r8, %rcx
sarq $63, %rcx
shrq $62, %rcx
addq %r8, %rcx
sarq $2, %rcx
movq %rax, %rdx
shlq $5, %rdx
leaq 16(%r9,%rdx), %rsi
orq $16, %rdx
movq 16(%rsp), %rdi
addq %rdx, %rdi
addq 8(%rsp), %rdx
.align 16, 0x90
.LBB0_1:
vmovaps -16(%rdx), %xmm0
vmovaps (%rdx), %xmm1
vmovaps -16(%rdi), %xmm2
vmovaps (%rdi), %...
2015 Nov 04
2
Vectorizing structure reads, writes, etc on X86-64 AVX
...runk. I made a change
that comes into play if you don't specify a particular CPU:
http://llvm.org/viewvc/llvm-project?view=revision&revision=245950
$ ./clang -O1 -mavx copy.c -S -o -
...
movslq %edi, %rax
movq _spr_dynamic at GOTPCREL(%rip), %rcx
movq (%rcx), %rcx
shlq $5, %rax
movslq %esi, %rdx
movq _spr_static at GOTPCREL(%rip), %rsi
movq (%rsi), %rsi
shlq $5, %rdx
vmovups (%rsi,%rdx), %ymm0 <--- 32-byte load
vmovups %ymm0, (%rcx,%rax) <--- 32-byte store
popq %rbp
vze...
2016 Jun 29
0
avx512 JIT backend generates wrong code on <4 x float>
...rdx, %rax
> sarq $63, %rax
> shrq $62, %rax
> addq %rdx, %rax
> sarq $2, %rax
> movq %r8, %rcx
> sarq $63, %rcx
> shrq $62, %rcx
> addq %r8, %rcx
> sarq $2, %rcx
> movq %rax, %rdx
> shlq $5, %rdx
> leaq 16(%r9,%rdx), %rsi
> orq $16, %rdx
> movq 16(%rsp), %rdi
> addq %rdx, %rdi
> addq 8(%rsp), %rdx
> .align 16, 0x90
> .LBB0_1:
> vmovaps -16(%rdx), %xmm0
> vmovaps (%rdx), %xmm1
> vmo...
2016 Jun 30
1
avx512 JIT backend generates wrong code on <4 x float>
...shrq $62, %rax
>> addq %rdx, %rax
>> sarq $2, %rax
>> movq %r8, %rcx
>> sarq $63, %rcx
>> shrq $62, %rcx
>> addq %r8, %rcx
>> sarq $2, %rcx
>> movq %rax, %rdx
>> shlq $5, %rdx
>> leaq 16(%r9,%rdx), %rsi
>> orq $16, %rdx
>> movq 16(%rsp), %rdi
>> addq %rdx, %rdi
>> addq 8(%rsp), %rdx
>> .align 16, 0x90
>> .LBB0_1:
>> vmovaps -16(%rdx), %xmm0
>>...
2016 Jun 23
2
AVX512 instruction generated when JIT compiling for an avx2 architecture
...$2, %rax
movq %r8, %rcx
sarq $63, %rcx
shrq $62, %rcx
addq %r8, %rcx
sarq $2, %rcx
movq (%r10), %r8
movq 8(%r10), %r10
movq %r8, %rdi
shrq $32, %rdi
movq %r10, %rsi
shrq $32, %rsi
movq %rax, %rdx
shlq $6, %rdx
leaq 48(%rdx,%r9), %rdx
.align 16, 0x90
.LBB0_1:
vmovd %r8d, %xmm0
vpbroadcastd %xmm0, %xmm0
vmovd %edi, %xmm1
vpbroadcastd %xmm1, %xmm1
vmovd %r10d, %xmm2
vpbroadcastd %xmm2, %xmm2
vmovd %esi, %xmm3
vpbroadcas...
2016 Jun 23
2
AVX512 instruction generated when JIT compiling for an avx2 architecture
...> addq %r8, %rcx
> sarq $2, %rcx
> movq (%r10), %r8
> movq 8(%r10), %r10
> movq %r8, %rdi
> shrq $32, %rdi
> movq %r10, %rsi
> shrq $32, %rsi
> movq %rax, %rdx
> shlq $6, %rdx
> leaq 48(%rdx,%r9), %rdx
> .align 16, 0x90
> .LBB0_1:
> vmovd %r8d, %xmm0
> vpbroadcastd %xmm0, %xmm0
> vmovd %edi, %xmm1
> vpbroadcastd %xmm1, %xmm1
> vmovd %r10d, %xmm2
>...
2010 Jul 08
1
[LLVMdev] X86 gcc and clang have incompatible calling conventions for returning some small structs?
Hello,
I think I have come across an inconsistency between gcc and clang/llvm with respect to returning small structs. Given the
following code:
> struct s {
> int a;
> int b;
> };
>
> struct s3 {
> int a;
> int b;
> int c;
> };
>
> struct s new_s(int v){
> struct s res;
> res.a = v;
> res.b = -v;
> return res;
2016 Oct 12
4
[test-suite] making polybench/symm succeed with "-Ofast" and "-ffp-contract=on"
On Wed, Oct 12, 2016 at 10:53 AM, Hal Finkel <hfinkel at anl.gov> wrote:
> I don't think that Clang/LLVM uses it by default on x86_64. If you're using -Ofast, however, that would explain it. I recommend looking at -O3 vs -O0 and make sure those are the same. -Ofast enables -ffast-math, which can legitimately cause differences.
>
The following tests pass at "-O3" and
2015 Nov 03
2
Vectorizing structure reads, writes, etc on X86-64 AVX
----- Original Message -----
> From: "Sanjay Patel via llvm-dev" <llvm-dev at lists.llvm.org>
> To: "Jay McCarthy" <jay.mccarthy at gmail.com>
> Cc: "llvm-dev" <llvm-dev at lists.llvm.org>
> Sent: Tuesday, November 3, 2015 12:30:51 PM
> Subject: Re: [llvm-dev] Vectorizing structure reads, writes, etc on X86-64 AVX
>
> If the
2007 Jun 27
0
[PATCH 1/10] Provide basic Xen PM infrastructure
...movq GREG(x), SAVED_GREG(x)
+#define LOAD_GREG(x) movq SAVED_GREG(x), GREG(x)
+
+#define REF(x) x(%rip)
+
+#define RDMSR(ind, m) \
+ xorq %rdx, %rdx; \
+ mov $ind, %ecx; \
+ rdmsr; \
+ shlq $0x20, %rdx; \
+ orq %rax, %rdx; \
+ movq %rdx, m(%rip);
+
+#define WRMSR(ind, m) \
+ mov $ind, %ecx; \
+ movq m(%rip), %rdx; \
+ mov %edx, %eax; \
+ shrq $0...
2018 Mar 23
5
RFC: Speculative Load Hardening (a Spectre variant #1 mitigation)
...ct when it has been marked due to mis-speculation. The callsite
instruction
sequence looks like (assuming a mis-speculated state value of `-1`):
```
...
.LBB0_4: # %danger
cmovneq %r8, %rax # Conditionally update predicate
state.
shlq $47, %rax
orq %rax, %rsp
callq other_function
movq %rsp, %rax
sarq 63, %rax # Sign extend the high bit to all
bits.
```
This first puts the predicate state into the high bits of `%rsp` before
calling
the function and then reads it back...
2007 Mar 27
0
[PATCH] make all performance counter per-cpu
...\
- popq %rdx;
+#define PERFC_INCR(_name,_idx,_cur) \
+ pushq _cur; \
+ movslq VCPU_processor(_cur),_cur; \
+ pushq %rdx; \
+ leaq per_cpu__perfcounters(%rip),%rdx; \
+ shlq $PERCPU_SHIFT,_cur; \
+ addq %rdx,_cur; \
+ popq %rdx; \
+ incl _name*4(_cur,_idx,4); \
+ popq _cur
#else
-#define PERFC_INCR(_name,_idx)
+#define PERFC_INCR(_name,_idx,_cur)
#endif
/*...
2013 Oct 15
0
[LLVMdev] [llvm-commits] r192750 - Enable MI Sched for x86.
...t6:
>> -; X32: movl 12(%esp), %edx
>> +; X32: movl 4(%esp), %eax
>> +; X32-NEXT: movl 12(%esp), %edx
>> ; X32-NEXT: addl 8(%esp), %edx
>> -; X32-NEXT: movl 4(%esp), %eax
>> ; X32-NEXT: ret
>> -
>> +
>> ; X64-LABEL: test6:
>> ; X64: shlq $32, %r[[A1]]
>> ; X64: leaq (%r[[A1]],%r[[A0]]), %rax
>>
>> Modified: llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/alloca-align-rounding.ll?rev=192750&r1=192749&r2=192750&view...
2012 Nov 22
41
[PATCH V3] vmx/nmi: Do not use self_nmi() in VMEXIT handler
The self_nmi() code cause''s an NMI to be triggered by sending an APIC
message to the local processor. However, NMIs are blocked by the
VMEXIT, until the next iret or VMENTER.
Volume 3 Chapter 27 Section 1 of the Intel SDM states:
An NMI causes subsequent NMIs to be blocked, but only after the VM exit
completes.
As a result, as soon as the VMENTER happens, an immediate VMEXIT
happens
2007 Apr 18
1
[RFC/PATCH LGUEST X86_64 03/13] lguest64 core
...and RH must be %ah)
+ *
+ * This clobbers both SEG and RESULT regs.
+ */
+/* Why does Intel need to make everything so darn complex! */
+.macro DECODE_SSEG SEG RESULT RW RH
+ movzbq 7(\SEG), \RESULT
+ shl $16, \RESULT
+ movb 4(\SEG), \RH
+ shl $8, \RESULT
+ movw 2(\SEG), \RW
+ movq 8(\SEG), \SEG
+ shlq $32, \SEG
+ orq \SEG, \RESULT
+.endm
+
+.global switch_to_guest
+ .type switch_to_guest, @function
+/* rdi holds the pointer to vcpu.
+ * Interrupts are off on entry */
+switch_to_guest:
+ SAVE_REGS
+ /* save host stack */
+ movq %rsp, LGUEST_VCPU_host_stack(%rdi)
+ /* put the guest's stack i...
2007 Apr 18
1
[RFC/PATCH LGUEST X86_64 03/13] lguest64 core
...and RH must be %ah)
+ *
+ * This clobbers both SEG and RESULT regs.
+ */
+/* Why does Intel need to make everything so darn complex! */
+.macro DECODE_SSEG SEG RESULT RW RH
+ movzbq 7(\SEG), \RESULT
+ shl $16, \RESULT
+ movb 4(\SEG), \RH
+ shl $8, \RESULT
+ movw 2(\SEG), \RW
+ movq 8(\SEG), \SEG
+ shlq $32, \SEG
+ orq \SEG, \RESULT
+.endm
+
+.global switch_to_guest
+ .type switch_to_guest, @function
+/* rdi holds the pointer to vcpu.
+ * Interrupts are off on entry */
+switch_to_guest:
+ SAVE_REGS
+ /* save host stack */
+ movq %rsp, LGUEST_VCPU_host_stack(%rdi)
+ /* put the guest's stack i...