Displaying 20 results from an estimated 135 matches for "cmpq".
Did you mean:
cmpl
2017 Oct 11
1
[PATCH v1 06/27] x86/entry/64: Adapt assembly for PIE support
...gument registers are clobbered.
*/
- call *sys_call_table(, %rax, 8)
+ call *(%r11, %rax, 8)
.Lentry_SYSCALL_64_after_fastpath_call:
movq %rax, RAX(%rsp)
@@ -334,7 +337,8 @@ ENTRY(stub_ptregs_64)
* RAX stores a pointer to the C function implementing the syscall.
* IRQs are on.
*/
- cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
+ leaq .Lentry_SYSCALL_64_after_fastpath_call(%rip), %r11
+ cmpq %r11, (%rsp)
jne 1f
/*
@@ -1172,7 +1176,8 @@ ENTRY(error_entry)
movl %ecx, %eax /* zero extend */
cmpq %rax, RIP+8(%rsp)
je .Lbstep_iret
- cmpq $.Lgs_change, RIP+8(%rsp)
+...
2017 Oct 20
3
[PATCH v1 06/27] x86/entry/64: Adapt assembly for PIE support
...8)
>> .Lentry_SYSCALL_64_after_fastpath_call:
>>
>> movq %rax, RAX(%rsp)
>> @@ -334,7 +337,8 @@ ENTRY(stub_ptregs_64)
>> * RAX stores a pointer to the C function implementing the syscall.
>> * IRQs are on.
>> */
>> - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
>> + leaq .Lentry_SYSCALL_64_after_fastpath_call(%rip), %r11
>> + cmpq %r11, (%rsp)
>> jne 1f
>>
>> /*
>> @@ -1172,7 +1176,8 @@ ENTRY(error_entry)
>> movl %ecx, %eax...
2017 Oct 20
3
[PATCH v1 06/27] x86/entry/64: Adapt assembly for PIE support
...8)
>> .Lentry_SYSCALL_64_after_fastpath_call:
>>
>> movq %rax, RAX(%rsp)
>> @@ -334,7 +337,8 @@ ENTRY(stub_ptregs_64)
>> * RAX stores a pointer to the C function implementing the syscall.
>> * IRQs are on.
>> */
>> - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
>> + leaq .Lentry_SYSCALL_64_after_fastpath_call(%rip), %r11
>> + cmpq %r11, (%rsp)
>> jne 1f
>>
>> /*
>> @@ -1172,7 +1176,8 @@ ENTRY(error_entry)
>> movl %ecx, %eax...
2016 May 13
4
RFC: callee saved register verifier
...xCA5FCA5FCA5FCA5F, %rbp
movabsq $0xCA5FCA5FCA5FCA5F, %rbx # can also be movq %rbp, %rbx etc.
movabsq $0xCA5FCA5FCA5FCA5F, %r12
movabsq $0xCA5FCA5FCA5FCA5F, %r13
movabsq $0xCA5FCA5FCA5FCA5F, %r14
movabsq $0xCA5FCA5FCA5FCA5F, %r15
callq foo
movabsq $0xCA5FCA5FCA5FCA5F, %rax
cmpq %rax, %rbp
jne .LBB1_5
movabsq $0xCA5FCA5FCA5FCA5F, %rax
cmpq %rax, %rbx
jne .LBB1_5
movabsq $0xCA5FCA5FCA5FCA5F, %rax
cmpq %rax, %r12
jne .LBB1_5
movabsq $0xCA5FCA5FCA5FCA5F, %rax
cmpq %rax, %r13
jne .LBB1_5
movabsq $0xCA5FCA5FCA5FCA5F,...
2017 Oct 20
0
[PATCH v1 06/27] x86/entry/64: Adapt assembly for PIE support
...*sys_call_table(, %rax, 8)
> + call *(%r11, %rax, 8)
> .Lentry_SYSCALL_64_after_fastpath_call:
>
> movq %rax, RAX(%rsp)
> @@ -334,7 +337,8 @@ ENTRY(stub_ptregs_64)
> * RAX stores a pointer to the C function implementing the syscall.
> * IRQs are on.
> */
> - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
> + leaq .Lentry_SYSCALL_64_after_fastpath_call(%rip), %r11
> + cmpq %r11, (%rsp)
> jne 1f
>
> /*
> @@ -1172,7 +1176,8 @@ ENTRY(error_entry)
> movl %ecx, %eax /* zero extend */
> cmpq %rax, RIP+8(%rsp)
> je .Lb...
2015 Oct 27
4
How can I tell llvm, that a branch is preferred ?
...cify
the preferred branch, correct ? I see nothing in the specs for "branch"
or "switch". And __buildin_expect does nothing, that I am sure of.
Unfortunately llvm has this knack for ordering my one most crucial part
of code exactly the opposite I want to, it does: (x86_64)
cmpq %r15, (%rax,%rdx)
jne LBB0_3
Ltmp18:
leaq 8(%rax,%rdx), %rcx
jmp LBB0_4
LBB0_3:
addq $8, %rcx
LBB0_4:
when I want,
cmpq %r15, (%rax,%rdx)
jeq LBB0_3
addq $8, %rcx
jmp LBB0_4
LBB0_3:
leaq 8(%rax,%rdx), %rcx
LBB0_4:
since that saves me executing a jump 99.9% of the time. Is there
anyt...
2014 Jul 23
4
[LLVMdev] the clang 3.5 loop optimizer seems to jump in unintentional for simple loops
...----
compiled with gcc 4.9.1 and clang 3.5
with clang3.5 + #define ITER the_func contains masses of code
the code in main is also sometimes different (not just inlined) to the_func
clang -DITER -O2
clang -DITER -O3
gives:
the_func:
leaq 12(%rdi), %rcx
leaq 4(%rdi), %rax
cmpq %rax, %rcx
cmovaq %rcx, %rax
movq %rdi, %rsi
notq %rsi
addq %rax, %rsi
shrq $2, %rsi
incq %rsi
xorl %edx, %edx
movabsq $9223372036854775800, %rax # imm = 0x7FFFFFFFFFFFFFF8
andq %rsi, %rax
pxor %xmm0, %xmm0...
2018 Mar 13
0
[PATCH v2 06/27] x86/entry/64: Adapt assembly for PIE support
...oline)
* spill RDI and restore it in a second-stage trampoline.
*/
pushq %rdi
- movq $entry_SYSCALL_64_stage2, %rdi
+ movabsq $entry_SYSCALL_64_stage2, %rdi
JMP_NOSPEC %rdi
END(entry_SYSCALL_64_trampoline)
@@ -1275,7 +1275,8 @@ ENTRY(error_entry)
movl %ecx, %eax /* zero extend */
cmpq %rax, RIP+8(%rsp)
je .Lbstep_iret
- cmpq $.Lgs_change, RIP+8(%rsp)
+ leaq .Lgs_change(%rip), %rcx
+ cmpq %rcx, RIP+8(%rsp)
jne .Lerror_entry_done
/*
@@ -1480,10 +1481,10 @@ ENTRY(nmi)
* resume the outer NMI.
*/
- movq $repeat_nmi, %rdx
+ leaq repeat_nmi(%rip), %rdx
cmpq 8(%rsp), %...
2016 May 13
2
RFC: callee saved register verifier
...%rbx # can also be movq %rbp, %rbx etc.
> > movabsq $0xCA5FCA5FCA5FCA5F, %r12
> > movabsq $0xCA5FCA5FCA5FCA5F, %r13
> > movabsq $0xCA5FCA5FCA5FCA5F, %r14
> > movabsq $0xCA5FCA5FCA5FCA5F, %r15
> > callq foo
> > movabsq $0xCA5FCA5FCA5FCA5F, %rax
> > cmpq %rax, %rbp
> > jne .LBB1_5
> > movabsq $0xCA5FCA5FCA5FCA5F, %rax
> > cmpq %rax, %rbx
> > jne .LBB1_5
> > movabsq $0xCA5FCA5FCA5FCA5F, %rax
> > cmpq %rax, %r12
> > jne .LBB1_5
> > movabsq $0xCA5FCA5FCA5FCA5F, %rax
> >...
2018 Apr 04
0
SCEV and LoopStrengthReduction Formulae
> cmpq %rbx, %r14
> jne .LBB0_1
>
> LLVM can perform compare-jump fusion, it already does in certain cases, but
> not in the case above. We can remove the cmp above if we were to perform
> the following transformation:
Do you mean branch-fusion (https://en.wikichip.org/wiki/macro-operati...
2017 Dec 19
4
A code layout related side-effect introduced by rL318299
...workarea/llvm-r318298/dbuild/bin/llc
.cfi_startproc
# BB#0: # %entry
pushq %rax
.cfi_def_cfa_offset 16
movl $i, %eax
.p2align 4, 0x90
.LBB0_1: # %while.cond
# =>This Inner Loop Header: Depth=1
cmpq %rax, %rsi
ja .LBB0_4
# BB#2: # %while.body
# in Loop: Header=BB0_1 Depth=1
movq (%rdi), %rcx
movq %rcx, (%rsi)
movq 8(%rdi), %rcx
movq %rcx, (%rsi)
addq $6, %rdi
addq $6, %rsi
cmpq %rdx, %rsi
jb .LBB0_1
# BB#3:...
2018 Apr 03
4
SCEV and LoopStrengthReduction Formulae
...Example which can be optimized via cmp/jmp fusion.
// clang -O3 -S test.c
extern void g(int);
void f(int *p, long long n) {
do {
g(*p++);
} while (--n);
}
LLVM currently generates the following sequence for x86_64 targets:
LBB0_1:
movl (%r15,%rbx,4), %edi
callq g
addq $1, %rbx
cmpq %rbx, %r14
jne .LBB0_1
LLVM can perform compare-jump fusion, it already does in certain cases, but not
in the case above. We can remove the cmp above if we were to perform
the following transformation:
1.0) Initialize the induction variable, %rbx, to be 'n' instead of zero.
1.1) Negate t...
2010 Jun 13
2
[LLVMdev] Bignum development
I was able to get the loop to increment from -999 to 0 using IR
directly. That got rid of the cmpq.
The carry i was after was able to be obtained using the intrinsic
@llvm.uadd.with.overflow.i64, however there is no way to add with
carry and have it realise that the resulting *carry out* cannot exceed
1. It actually writes the carry to a byte, and then uses logical
operations on it, which slows...
2017 Dec 19
2
A code layout related side-effect introduced by rL318299
...# %entry
>> pushq %rax
>> .cfi_def_cfa_offset 16
>> movl $i, %eax
>> .p2align 4, 0x90
>> .LBB0_1: # %while.cond
>> # =>This Inner Loop Header:
>> Depth=1
>> cmpq %rax, %rsi
>> ja .LBB0_4
>> # BB#2: # %while.body
>> # in Loop: Header=BB0_1 Depth=1
>> movq (%rdi), %rcx
>> movq %rcx, (%rsi)
>> movq 8(%rdi), %rcx
>> movq %rcx, (%rsi)
>> addq...
2012 Feb 15
0
[LLVMdev] LLVM GHC Backend: Tables Next To Code
...of this form:
>
> .text
> .align 8
> .long Main_main1_srt-(Main_main1_info)+0
> .long 0
> .quad 4294967299
> .quad 0
> .quad 270582939663
> .globl Main_main1_info
> .type Main_main1_info, @object
> Main_main1_info:
> .Lc1Df:
> leaq -8(%rbp),%rax
> cmpq %r15,%rax
> jb .Lc1Dh
Ok. I'd strongly recommend the approach of generating the table inside the prolog of the function. This means you'd get something like this:
.text
.align 8
.globl Main_main1_info
.type Main_main1_info, @object
Main_main1_info:
.Lc1Df:
jmp .Ltmp
.long Main_ma...
2012 Feb 15
2
[LLVMdev] LLVM GHC Backend: Tables Next To Code
...8
>> .long Main_main1_srt-(Main_main1_info)+0
>> .long 0
>> .quad 4294967299
>> .quad 0
>> .quad 270582939663
>> .globl Main_main1_info
>> .type Main_main1_info, @object
>> Main_main1_info:
>> .Lc1Df:
>> leaq -8(%rbp),%rax
>> cmpq %r15,%rax
>> jb .Lc1Dh
>
> Ok. I'd strongly recommend the approach of generating the table inside the prolog of the function. This means you'd get something like this:
>
This is starting to look very similar to how ARM constant islands work, without the extra ugliness f...
2015 Mar 03
2
[LLVMdev] Need a clue to improve the optimization of some C code
...feedback.
Ciao
Nat!
P.S. In case someone is interested, here is the assembler code and the IR that produced it.
Relevant LLVM generated x86_64 assembler portion with -Os
~~~
testq %r12, %r12
je LBB0_5
## BB#1:
movq -8(%r12), %rcx
movq (%rcx), %rax
movq -8(%rax), %rdx
andq %r15, %rdx
cmpq %r15, (%rax,%rdx)
je LBB0_2
## BB#3:
addq $8, %rcx
jmp LBB0_4
LBB0_2:
leaq 8(%rdx,%rax), %rcx
LBB0_4:
movq %r12, %rdi
movq %r15, %rsi
movq %r14, %rdx
callq *(%rcx)
movq %rax, %rbx
LBB0_5:
~~~
Better/tighter assembler code would be (saves 2 instructions, one jump less)
~~~
testq %r12, %r1...
2012 Feb 14
3
[LLVMdev] LLVM GHC Backend: Tables Next To Code
...eapObjects#InfoTables
With TNTC enabled we generate code for closures of this form:
.text
.align 8
.long Main_main1_srt-(Main_main1_info)+0
.long 0
.quad 4294967299
.quad 0
.quad 270582939663
.globl Main_main1_info
.type Main_main1_info, @object
Main_main1_info:
.Lc1Df:
leaq -8(%rbp),%rax
cmpq %r15,%rax
jb .Lc1Dh
[...]
.data
.globl Main_main1_closure
.type Main_main1_closure, @object
Main_main1_closure:
.quad Main_main1_info
.quad 0
Without TNTC we instead generated code of this form:
.text
.globl Main_main1_entry
.type Main_main1_entry, @function
Main_main1_entry:
.LFB15:
lea...
2017 Jul 17
2
A bug related with undef value when bootstrap MemorySSA.cpp
...ip). I wish this translation is
now correct.
```
73 .globl hoo # -- Begin function hoo
74 .p2align 4, 0x90
75 .type hoo, at function
76 hoo: # @hoo
77 .cfi_startproc
78 # BB#0:
79 movq a(%rip), %rax
80 movq cnt(%rip), %rcx
81 cmpq $0, i_hasval(%rip)
82 sete %sil
83 xorl %edx, %edx
84 .p2align 4, 0x90
85 .LBB1_1: # =>This Inner Loop Header:
Depth=1
86 testb $1, %sil
87 je .LBB1_3
88 # BB#2: # in Loop: Header=BB1_1 Depth=1
89 movq b(%rip), %r...
2010 Jun 13
0
[LLVMdev] Bignum development
...ision arithmetic to add-with-carry or subtract-with-borrow through i65/i33. That would remove the need for the overflow intrinsics entirely.
Alistair
On 13 Jun 2010, at 02:27, Bill Hart wrote:
> I was able to get the loop to increment from -999 to 0 using IR
> directly. That got rid of the cmpq.
>
> The carry i was after was able to be obtained using the intrinsic
> @llvm.uadd.with.overflow.i64, however there is no way to add with
> carry and have it realise that the resulting *carry out* cannot exceed
> 1. It actually writes the carry to a byte, and then uses logical
>...