Displaying 20 results from an estimated 1184 matches for "rbx".
Did you mean:
ebx
2020 Aug 31
2
Vectorization of math function failed?
...vinsertps $0x30,%xmm0,%xmm1,%xmm0
ce: 48 83 c4 48 add $0x48,%rsp
d2: c3 retq
d3: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
da: 00 00 00
dd: 0f 1f 00 nopl (%rax)
00000000000000e0 <_Z4fct3Pf>:
e0: 53 push %rbx
e1: 48 83 ec 10 sub $0x10,%rsp
e5: 48 89 fb mov %rdi,%rbx
e8: c5 fa 10 07 vmovss (%rdi),%xmm0
ec: c5 fa 10 4f 04 vmovss 0x4(%rdi),%xmm1
f1: c5 fa 11 4c 24 0c vmovss %xmm1,0xc(%rsp)
f7: e8 00 00 00 00 callq fc <_Z4fct3Pf+0x1c>...
2018 Sep 11
2
Byte-wide stores aren't coalesced if interspersed with other stores
Andres:
FWIW, codegen will do the merge if you turn on global alias analysis for it
"-combiner-global-alias-analysis". That said, we should be able to do this
merging earlier.
-Nirav
On Mon, Sep 10, 2018 at 8:33 PM, Andres Freund via llvm-dev <
llvm-dev at lists.llvm.org> wrote:
> Hi,
>
> On 2018-09-10 13:42:21 -0700, Andres Freund wrote:
> > I have, in postres,
2018 Sep 11
2
Byte-wide stores aren't coalesced if interspersed with other stores
...; In the case at hand, with a manual 64bit store (this is on a 64bit
> target), llvm then combines 8 byte-wide stores into one.
>
>
> Without -combiner-global-alias-analysis it generates:
>
> movb $0, 1(%rdx)
> movl 4(%rsi,%rdi), %ebx
> movq %rbx, 8(%rcx)
> movb $0, 2(%rdx)
> movl 8(%rsi,%rdi), %ebx
> movq %rbx, 16(%rcx)
> movb $0, 3(%rdx)
> movl 12(%rsi,%rdi), %ebx
> movq %rbx, 24(%rcx)
> movb $0, 4(%rdx)
> movq 16(%rsi,%rdi), %r...
2010 Sep 01
5
[LLVMdev] equivalent IR, different asm
...ext,regular,pure_instructions
.globl __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
.align 4, 0x90
__ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE: ## @_ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
## BB#0:
pushq %r14
pushq %rbx
subq $8, %rsp
movq %rsi, %rbx
movq %rdi, %r14
movq %rdx, %rdi
movq %rcx, %rsi
callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE
movq %rax, %rcx
shrq $32, %rcx
testl %ecx, %ecx
je LBB0_2
## BB#1:
imull (%rbx), %eax
cltd
idivl %ecx
movl %eax, (%r14)
LBB0_2:
addq $8,...
2018 Apr 03
4
SCEV and LoopStrengthReduction Formulae
...ould stand alone as its own pass:
// Example which can be optimized via cmp/jmp fusion.
// clang -O3 -S test.c
extern void g(int);
void f(int *p, long long n) {
do {
g(*p++);
} while (--n);
}
LLVM currently generates the following sequence for x86_64 targets:
LBB0_1:
movl (%r15,%rbx,4), %edi
callq g
addq $1, %rbx
cmpq %rbx, %r14
jne .LBB0_1
LLVM can perform compare-jump fusion, it already does in certain cases, but not
in the case above. We can remove the cmp above if we were to perform
the following transformation:
1.0) Initialize the induction variable, %rbx, to be ...
2018 Apr 04
0
SCEV and LoopStrengthReduction Formulae
> cmpq %rbx, %r14
> jne .LBB0_1
>
> LLVM can perform compare-jump fusion, it already does in certain cases, but
> not in the case above. We can remove the cmp above if we were to perform
> the following transformation:
Do you mean branch-fusion (https://en.wikichip.org/wiki/macro-operation_fu...
2014 May 11
2
[LLVMdev] [cfe-dev] Code generation for noexcept functions
...>
> .LHOTB0:
> .p2align 4,,15
> .globl _Z5test2PFvvE
> .type _Z5test2PFvvE, @function
> _Z5test2PFvvE:
> .LFB1:
> .cfi_startproc
> .cfi_personality 0x3,__gxx_personality_v0
> .cfi_lsda 0x3,.LLSDA1
> pushq %rbx
> .cfi_def_cfa_offset 16
> .cfi_offset 3, -16
> movq %rdi, %rbx
> call *%rdi
> movq %rbx, %rax
> popq %rbx
> .cfi_def_cfa_offset 8
> jmp *%rax
> .cfi_endproc
> .LFE1:
> ....
2010 Sep 01
0
[LLVMdev] equivalent IR, different asm
...> .globl __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
> .align 4, 0x90
> __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE: ## @_ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
> ## BB#0:
> pushq %r14
> pushq %rbx
> subq $8, %rsp
> movq %rsi, %rbx
> movq %rdi, %r14
> movq %rdx, %rdi
> movq %rcx, %rsi
> callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE
> movq %rax, %rcx
> shrq $32, %rcx
> testl %ecx, %ecx
> je LBB0_2
> ## BB#1:
> imull (%rbx),...
2015 Jul 13
5
[LLVMdev] Poor register allocations vs gcc
...against gcc:
gcc_custom -std=c11 -O3 -march=native -c app2.c -S
Versions (latest for each, downloaded just a few days ago):
gcc : 5.1
clang/llvm: clang+llvm-3.6.1-x86_64-apple-darwin
Host:
osx yosemite.
The assembly (cut to the essential):
LLVM:
pushq %rbp
movq %rsp, %rbp
pushq %r14
pushq %rbx
movl %edi, %r14d
leal 71(%r14), %eax
xorl %ecx, %ecx
cmpl $56, %eax
movl $92, %ebx
cmovnel %ecx, %ebx
leaq L_.str(%rip), %rdi
callq _puts
leal 71(%rbx,%r14), %eax
popq %rbx
popq %r14
popq %rbp
retq
and the gcc one:
pushq %rbp
movl $0, %eax
movl $92, %ebp
pushq %rbx
leal 7...
2012 Jul 26
2
[PATCH] x86-64: drop updating of UREGS_rip when converting sysenter to #GP
...RY(sysenter_entry)
pushfq
.globl sysenter_eflags_saved
sysenter_eflags_saved:
- pushq $0
- pushq $0
+ pushq $3 /* ring 3 null cs */
+ pushq $0 /* null rip */
pushq $0
movl $TRAP_syscall,4(%rsp)
SAVE_ALL
GET_CURRENT(%rbx)
cmpb $0,VCPU_sysenter_disables_events(%rbx)
- movq $0,UREGS_rip(%rsp) /* null rip */
- movl $3,UREGS_cs(%rsp) /* ring 3 null cs */
movq VCPU_sysenter_addr(%rbx),%rax
setne %cl
leaq VCPU_trap_bounce(%rbx),%rdx
@@ -292,7 +290,6 @@ sysenter_efla...
2007 Aug 08
2
[PATCH] x86-64: syscall/sysenter support for 32-bit apps
...00
+++ 2007-08-08/xen/arch/x86/x86_64/compat/entry.S 2007-08-08 11:37:08.000000000 +0200
@@ -187,6 +187,39 @@ ENTRY(compat_post_handle_exception)
movb $0,TRAPBOUNCE_flags(%rdx)
jmp compat_test_all_events
+ENTRY(compat_syscall)
+ cmpb $0,VCPU_syscall32_disables_events(%rbx)
+ movzwl VCPU_syscall32_sel(%rbx),%esi
+ movq VCPU_syscall32_addr(%rbx),%rax
+ setne %cl
+ leaq VCPU_trap_bounce(%rbx),%rdx
+ testl $~3,%esi
+ leal (,%rcx,TBF_INTERRUPT),%ecx
+ jz 2f
+1: movq %rax,TRAPBOUNCE_eip(%rdx)
+ movw %si,...
2016 May 06
3
Unnecessary spill/fill issue
...s of constant vectors at compile time.
Each vector has a single use. In the final asm, I see a series of spills at
the top of the function of all the constant vectors immediately to stack,
then each use references the stack pointer directly:
Lots of these at top of function:
movabsq $.LCPI0_212, %rbx
vmovaps (%rbx), %ymm0
vmovaps %ymm0, 2816(%rsp) # 32-byte Spill
Later on, each use references the stack pointer:
vpaddd 2816(%rsp), %ymm4, %ymm1 # 32-byte Folded Reload
It seems the spill to stack is unnecessary. In one particularly bad kernel,
I have 128 8-wide constant vectors, and so th...
2015 Jul 29
2
[LLVMdev] optimizer clobber EFLAGS
...d using the following command line:
---
$ clang -O2 -c -o clang-eflag.o clang-eflag.c
---
Produces this output:
---
$ objdump -S clang-eflag.o
clang-eflag.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <bar>:
0: 53 push %rbx
1: e8 00 00 00 00 callq 6 <bar+0x6>
6: ff 0d 00 00 00 00 decl 0x0(%rip) # c <bar+0xc>
c: 9c pushfq
d: 5b pop %rbx
e: e8 00 00 00 00 callq 13 <bar+0x13>
13: b8 01 00 0...
2015 Dec 17
2
llvm-3.6 MCAsmParser x64 Error "invalid operand for instruction" when msb set
Hello,
I am experiencing problems, when trying to assemble these two x86-64 Opcodes
"add r64, imm32"
"imul r64, r64, imm32"
When having the most significant bit set for imm32, for example:
"add rax, 0x80000000", "add rax, 0xffffffff", ...
"imul rbx, rsi, 0x80000000", "imul rbx, rsi, 0xffffffff", ...
The Error Message I receive is the following:
"Instruction:1:1: error: invalid operand for instruction"
I was using the MCAsmParser, with the RelaxAll Flag set to true, for the MCStreamer.
Can someone clarify, as to why...
2013 Jan 04
3
[LLVMdev] instruction scheduling issue
...s: the function call at 402a99 has a parameter passed to %rdi at 402a91. One can see that the function call is exactly before the
memory access I want to monitor because the effective address used by 402a9e is passed to the function call.
402a91: 48 8d bc 1c 48 02 00 lea 0x248(%rsp,%rbx,1),%rdi
402a98: 00
402a99: e8 82 e0 ff ff callq 400b20 <llvm_memory_profiling at plt>
/home/xl10/llvm/test//luleshOMP-0611.cc:1974
402a9e: f2 0f 10 84 1c 48 02 movsd 0x248(%rsp,%rbx,1),%xmm0
402aa5: 00 00
402aa7: f2 0f 11 84 24 b8 01 mo...
2013 May 22
1
[LLVMdev] x86 frame pointer and __builtin_setjmp/__builtin_longjmp
...ain(int argc, char *argv[]) {
void *buf[20];
__attribute__((__aligned__(64))) char q; // realign the stack
char *p = __builtin_alloca(argc); // dynamic alloca
if (__builtin_setjmp(buf)) {
*p = 'p';
q = 'q';
return 0;
}
asm("movq $0, %rbx");
__builtin_longjmp(buf, 1);
}
LLVM is choosing to use rbx as a base pointer to access p and q, but during builtin_setjmp, rbx is not saved; when the longjmp is executed rbx may have a garbage value. GCC on the other hand, is using rbp, which is saved in the jump buffer. Is this a bug in...
2015 Jul 13
2
[LLVMdev] Poor register allocations vs gcc
...m-3.6.1-x86_64-apple-darwin<br /><br />šHost:<br />šosx yosemite.<br /><br />šThe assembly (cut to the essential):<br /><br />šLLVM:<br />šššššššššpushq %rbp<br />šššššššššmovq %rsp, %rbp<br />šššššššššpushq %r14<br />šššššššššpushq %rbx<br />šššššššššmovl %edi, %r14d<br />šššššššššleal 71(%r14), %eax<br />šššššššššxorl %ecx, %ecx<br />šššššššššcmpl $56, %eax<br />šššššššššmovl $92, %ebx<br />šššššššššcmovnel %ecx, %ebx<br />šššššššššleaq L_.str(%rip), %rdi<br />šššššššššcallq _puts&l...
2023 May 30
1
[syzbot] [kvm?] [net?] [virt?] general protection fault in vhost_work_queue
.../vhost.c:248
> Code: 00 00 fc ff df 48 89 da 48 c1 ea 03 80 3c 02 00 75 56 48 b8 00 00 00 00 00 fc ff df 48 8b 1b 48 8d 7b 70 48 89 fa 48 c1 ea 03 <80> 3c 02 00 75 42 48 8b 7b 70 e8 95 9e ae f9 5b 5d 41 5c 41 5d e9
> RSP: 0018:ffffc9000333faf8 EFLAGS: 00010202
> RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffc9000d84d000
> RDX: 000000000000000e RSI: ffffffff841221d7 RDI: 0000000000000070
> RBP: ffff88804b6b95b0 R08: 0000000000000001 R09: 0000000000000000
> R10: 0000000000000001 R11: 0000000000000000 R12: ffff88804b6b00b0
> R13: 0000000000000000 R14: ffff88804b6b95...
2012 Oct 02
18
[PATCH 0/3] x86: adjust entry frame generation
This set of patches converts the way frames gets created from
using PUSHes/POPs to using MOVes, thus allowing (in certain
cases) to avoid saving/restoring part of the register set.
While the place where the (small) win from this comes from varies
between CPUs, the net effect is a 1 to 2% reduction on a
combined interruption entry and exit when the full state save
can be avoided.
1: use MOV
2013 Sep 05
2
[LLVMdev] CFI Directives
...have something like this:
.cfi_startproc
Lfunc_begin175:
pushq %rbp
Ltmp1532:
.cfi_def_cfa_offset 16
Ltmp1533:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp1534:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %rbx
subq $3224, %rsp ## imm = 0xC98
Ltmp1535:
.cfi_offset %rbx, -40
Ltmp1536:
.cfi_offset %r14, -32
Ltmp1537:
.cfi_offset %r15, -24
I need to be able to know that `%rsp' was adjusted by 3224. However, there are no CFI directives that encode this infor...