Displaying 20 results from an estimated 583 matches for "movq".
Did you mean:
movl
2017 Mar 01
2
[Codegen bug in LLVM 3.8?] br following `fcmp une` is present in ll, absent in asm
...align 8
%603 = fcmp une double %rtb_Sum3_737, 0.000000e+00
%_rtB_739 = load %B_repro_T*, %B_repro_T** %_rtB_, align 8
br i1 %603, label %true73, label %false74
Now, in broken.asm, notice the same merge128 is missing the branch instruction:
.LBB6_55: # %merge128
movq 184(%rsp), %rcx
movq %rax, 728(%rcx)
movq 184(%rsp), %rax
movq 728(%rax), %rcx
movq %rcx, 736(%rax)
movq 184(%rsp), %rax
movq $0, 744(%rax)
movq 184(%rsp), %rax
movq $0, 752(%rax)
movq 184(%rsp), %rax
movq $0, 760(%rax)
movq 176(%rsp), %rax
movsd 5608(%rax), %xmm0 # xmm0 = mem[0],zero
movq 18...
2007 Nov 14
10
[GE users] Apple Leopard has dtrace -- anyone used the SGE probes/scripts yet?
Hi,
Chris (cc) and I try to get the SGE master monitor work with Apple Leopard
dtrace. Unfortunately we are stuck with the error msg below.
Anyone having an idea what could be the cause? What I can rule out as
cause is function inlining for the reasons explained below.
Background information on SGE master monitor implementation is under
http://wiki.gridengine.info/wiki/index.php/Dtrace
2018 May 23
0
[PATCH v3 09/27] x86/acpi: Adapt assembly for PIE support
.../kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 50b8ed0317a3..472659c0f811 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -14,7 +14,7 @@
* Hooray, we are in Long 64-bit mode (but still running in low memory)
*/
ENTRY(wakeup_long64)
- movq saved_magic, %rax
+ movq saved_magic(%rip), %rax
movq $0x123456789abcdef0, %rdx
cmpq %rdx, %rax
jne bogus_64_magic
@@ -25,14 +25,14 @@ ENTRY(wakeup_long64)
movw %ax, %es
movw %ax, %fs
movw %ax, %gs
- movq saved_rsp, %rsp
+ movq saved_rsp(%rip), %rsp
- movq saved_rbx, %rbx
- movq saved...
2012 Oct 02
18
[PATCH 0/3] x86: adjust entry frame generation
This set of patches converts the way frames gets created from
using PUSHes/POPs to using MOVes, thus allowing (in certain
cases) to avoid saving/restoring part of the register set.
While the place where the (small) win from this comes from varies
between CPUs, the net effect is a 1 to 2% reduction on a
combined interruption entry and exit when the full state save
can be avoided.
1: use MOV
2005 Jul 20
1
MMX IDCT for theora-exp
...] = p[3] = idctcosTbl[ j - 1];
+ }
+ while( ++j <= 7);
+
+ idctconstants[44] = idctconstants[45] = idctconstants[46] = idctconstants[47] = IdctAdjustBeforeShift;
+}
+
+
+#define MtoSTR(s) #s
+
+#define Dump "call MMX_dump\n"
+
+#define BeginIDCT "#BeginIDCT\n"\
+ \
+ " movq " I(3)","r2"\n" \
+ \
+ " movq " C(3)","r6"\n" \
+ " movq " r2","r4"\n" \
+ " movq " J(5)","r7"\n" \
+ " pmulhw " r6","r4"\n" \
+ " movq " C(...
2013 Aug 20
0
[LLVMdev] Memory optimizations for LLVM JIT
...gt;a[5] = p->a[4]; mov %rdx,0x28(%rax)
--------------------------------------------------------------------
JIT (map p to GlobalVariable) ==> JIT (map p to constant GlobalVariable)
1* movabsq $0x18c6b88, %rax 1* movabsq $0x18c6b88, %rax
2* movq (%rax), %rcx // p 2* movq (%rax), %rax
3* movq 0x8(%rcx), %rdx // a[1] 3* movq 0x8(%rax), %rcx
4* movq %rdx, 0x10(%rcx) // a[2] 4* movq %rcx, 0x10(%rax)
5 movq (%rax), %rcx 5
6 movq 0x8(%rcx), %rdx 6 movq 0x8(...
2011 Sep 09
1
[LLVMdev] Reserved call frame
...p are not
correctly updated between the sub instruction and the call.
Is this some kind of a bug, or i should make more changes to disable
reserved call frame ?
Here is a piece of the output of the testcase with the reservedCallFrame
enabled:
# BB#26: # %L51
movq 536(%rsp), %rbp
movq $-5, 536(%rsp)
movq 552(%rsp), %rsi
movq 560(%rsp), %rdx
movq 544(%rsp), %rcx
movq %rdi, (%rsp)
movq $2, 8(%rsp)
sarq $4, %rbp
leaq *128*(%rsp), %rdi
movq %rbp, %r8
movq *40*(%rsp), %r9 # 8-b...
2018 Sep 11
2
Byte-wide stores aren't coalesced if interspersed with other stores
Andres:
FWIW, codegen will do the merge if you turn on global alias analysis for it
"-combiner-global-alias-analysis". That said, we should be able to do this
merging earlier.
-Nirav
On Mon, Sep 10, 2018 at 8:33 PM, Andres Freund via llvm-dev <
llvm-dev at lists.llvm.org> wrote:
> Hi,
>
> On 2018-09-10 13:42:21 -0700, Andres Freund wrote:
> > I have, in postres,
2013 Aug 20
4
[LLVMdev] Memory optimizations for LLVM JIT
...gt;a[5] = p->a[4]; mov %rdx,0x28(%rax)
--------------------------------------------------------------------
JIT (map p to GlobalVariable) ==> JIT (map p to constant
GlobalVariable)
1* movabsq $0x18c6b88, %rax 1* movabsq $0x18c6b88, %rax
2* movq (%rax), %rcx // p 2* movq (%rax), %rax
3* movq 0x8(%rcx), %rdx // a[1] 3* movq 0x8(%rax), %rcx
4* movq %rdx, 0x10(%rcx) // a[2] 4* movq %rcx, 0x10(%rax)
5 movq (%rax), %rcx 5
6 movq 0x8(%rcx), %rdx 6 movq 0x8(...
2020 Aug 17
3
Code generation option for wide integers on x86_64?
Is there an existing option in X86_64 target code generator to emit a loop for the following code:
define i4096 @add(i4096 %a, i4096 %b) alwaysinline {
%c = add i4096 %a, %b
ret i4096 %c
}
instead of:
movq %rdi, %rax
addq 96(%rsp), %rsi
adcq 104(%rsp), %rdx
movq %rdx, 8(%rdi)
movq %rsi, (%rdi)
adcq 112(%rsp), %rcx
movq %rcx, 16(%rdi)
adcq 120(%rsp), %r8
movq %r8, 24(%rdi)
adcq 128(%rsp), %r9
movq %r9, 32(%rdi)
movq 8(%rsp), %rcx
adcq 136(%rs...
2010 Sep 01
5
[LLVMdev] equivalent IR, different asm
...tructions
.globl __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
.align 4, 0x90
__ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE: ## @_ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
## BB#0:
pushq %r14
pushq %rbx
subq $8, %rsp
movq %rsi, %rbx
movq %rdi, %r14
movq %rdx, %rdi
movq %rcx, %rsi
callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE
movq %rax, %rcx
shrq $32, %rcx
testl %ecx, %ecx
je LBB0_2
## BB#1:
imull (%rbx), %eax
cltd
idivl %ecx
movl %eax, (%r14)
LBB0_2:
addq $8, %rsp
popq %rbx
pop...
2018 Sep 11
2
Byte-wide stores aren't coalesced if interspersed with other stores
...>
> In the case at hand, with a manual 64bit store (this is on a 64bit
> target), llvm then combines 8 byte-wide stores into one.
>
>
> Without -combiner-global-alias-analysis it generates:
>
> movb $0, 1(%rdx)
> movl 4(%rsi,%rdi), %ebx
> movq %rbx, 8(%rcx)
> movb $0, 2(%rdx)
> movl 8(%rsi,%rdi), %ebx
> movq %rbx, 16(%rcx)
> movb $0, 3(%rdx)
> movl 12(%rsi,%rdi), %ebx
> movq %rbx, 24(%rcx)
> movb $0, 4(%rdx)
> movq 16(%rsi,%...
2005 Mar 23
3
[PATCH] promised MMX patches rc1
...\n\t" /* pack with next(high) four values */
+);
+}
+
+void oc_frag_recon_intra__mmx(unsigned char *_dst,int _dst_ystride,
+ const ogg_int16_t *_residue) {
+
+ __asm__ __volatile__ (
+" mov $0x7, %%ecx \n\t" /* 8x loop */
+" .balign 16 \n\t"
+"1: movq (V128), %%mm0 \n\t" /* Set mm0 to 0x0080008000800080 */
+" movq (%1), %%mm2 \n\t" /* First four input values */
+" movq %%mm0, %%mm1 \n\t" /* Set mm1 == mm0 */
+" movq 8(%1), %%mm3 \n\t" /* Next four input values */
+" decl %%...
2010 Sep 01
0
[LLVMdev] equivalent IR, different asm
...s1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
> .align 4, 0x90
> __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE: ## @_ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
> ## BB#0:
> pushq %r14
> pushq %rbx
> subq $8, %rsp
> movq %rsi, %rbx
> movq %rdi, %r14
> movq %rdx, %rdi
> movq %rcx, %rsi
> callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE
> movq %rax, %rcx
> shrq $32, %rcx
> testl %ecx, %ecx
> je LBB0_2
> ## BB#1:
> imull (%rbx), %eax
> cltd
> idivl %e...
2004 Aug 24
5
MMX/mmxext optimisations
quite some speed improvement indeed.
attached the updated patch to apply to svn/trunk.
j
-------------- next part --------------
A non-text attachment was scrubbed...
Name: theora-mmx.patch.gz
Type: application/x-gzip
Size: 8648 bytes
Desc: not available
Url : http://lists.xiph.org/pipermail/theora-dev/attachments/20040824/5a5f2731/theora-mmx.patch-0001.bin
2006 May 25
2
Compilation issues with s390
Hi all,
I'm trying to compile asterisk on the mainframe (s390 / s390x) and I am
running into issues. I was wondering if somebody could give a hand?
I'm thinking that I should be able to do this. I have noticed that Debian
even has binary RPM's out for Asterisk now. I'm trying to do this on SuSE
SLES8 (with the 2.4 kernel).
What I see is, an issue that arch=s390 isn't
2007 Apr 18
0
[RFC/PATCH PV_OPS X86_64 03/17] paravirt_ops - system routines
...t we're using (ie
- * Pentium 4MB enable and PPro Global page
- * enable), so that any CPU's that boot up
- * after us can get the correct flags.
- */
-extern unsigned long mmu_cr4_features;
-
-static inline void set_in_cr4 (unsigned long mask)
-{
- mmu_cr4_features |= mask;
- __asm__("movq %%cr4,%%rax\n\t"
- "orq %0,%%rax\n\t"
- "movq %%rax,%%cr4\n"
- : : "irg" (mask)
- :"ax");
-}
-
-static inline void clear_in_cr4 (unsigned long mask)
-{
- mmu_cr4_features &= ~mask;
- __asm__("movq %%cr4,%%rax\n\t"
- "andq %0,%%rax...
2007 Apr 18
0
[RFC/PATCH PV_OPS X86_64 03/17] paravirt_ops - system routines
...t we're using (ie
- * Pentium 4MB enable and PPro Global page
- * enable), so that any CPU's that boot up
- * after us can get the correct flags.
- */
-extern unsigned long mmu_cr4_features;
-
-static inline void set_in_cr4 (unsigned long mask)
-{
- mmu_cr4_features |= mask;
- __asm__("movq %%cr4,%%rax\n\t"
- "orq %0,%%rax\n\t"
- "movq %%rax,%%cr4\n"
- : : "irg" (mask)
- :"ax");
-}
-
-static inline void clear_in_cr4 (unsigned long mask)
-{
- mmu_cr4_features &= ~mask;
- __asm__("movq %%cr4,%%rax\n\t"
- "andq %0,%%rax...
2015 Mar 03
2
[LLVMdev] Need a clue to improve the optimization of some C code
...to the code generator and try to add an optimization pass ?
Thanks for any feedback.
Ciao
Nat!
P.S. In case someone is interested, here is the assembler code and the IR that produced it.
Relevant LLVM generated x86_64 assembler portion with -Os
~~~
testq %r12, %r12
je LBB0_5
## BB#1:
movq -8(%r12), %rcx
movq (%rcx), %rax
movq -8(%rax), %rdx
andq %r15, %rdx
cmpq %r15, (%rax,%rdx)
je LBB0_2
## BB#3:
addq $8, %rcx
jmp LBB0_4
LBB0_2:
leaq 8(%rdx,%rax), %rcx
LBB0_4:
movq %r12, %rdi
movq %r15, %rsi
movq %r14, %rdx
callq *(%rcx)
movq %rax, %rbx
LBB0_5:
~~~
Better/tighter asse...
2018 May 24
2
[PATCH v3 11/27] x86/power/64: Adapt assembly for PIE support
...ower/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
> index ce8da3a0412c..6fdd7bbc3c33 100644
> --- a/arch/x86/power/hibernate_asm_64.S
> +++ b/arch/x86/power/hibernate_asm_64.S
> @@ -24,7 +24,7 @@
> #include <asm/frame.h>
>
> ENTRY(swsusp_arch_suspend)
> - movq $saved_context, %rax
> + leaq saved_context(%rip), %rax
> movq %rsp, pt_regs_sp(%rax)
> movq %rbp, pt_regs_bp(%rax)
> movq %rsi, pt_regs_si(%rax)
> @@ -115,7 +115,7 @@ ENTRY(restore_registers)
> movq %rax, %cr4; # turn PGE back on
>
> /* We don't restore %ra...