Displaying 20 results from an estimated 2314 matches for "ecx".
Did you mean:
eax
2020 May 22
2
[PATCH] Optimized assembler version of md5_process() for x86-64
...er)
+ # rsi = arg #2 (ptr, data pointer)
+ # rdx = arg #3 (nbr, number of 16-word blocks to process)
+ mov %rdi, %rbp # rbp = ctx
+ shl $6, %rdx # rdx = nbr in bytes
+ lea (%rsi,%rdx), %rdi # rdi = end
+ mov 0*4(%rbp), %eax # eax = ctx->A
+ mov 1*4(%rbp), %ebx # ebx = ctx->B
+ mov 2*4(%rbp), %ecx # ecx = ctx->C
+ mov 3*4(%rbp), %edx # edx = ctx->D
+ # end is 'rdi'
+ # ptr is 'rsi'
+ # A is 'eax'
+ # B is 'ebx'
+ # C is 'ecx'
+ # D is 'edx'
+
+ cmp %rdi, %rsi # cmp end with ptr
+ je 1f # jmp if ptr == end
+
+ # BEGIN of loop over 16-word...
2008 May 27
3
[LLVMdev] Float compare-for-equality and select optimization opportunity
...nated. In C syntax the
code looks like this:
float x, y;
int a, b, c
if(x == y) // Rotate the integers
{
int t;
t = a;
a = b;
b = c;
c = t;
}
This is the resulting x86 assembly code:
movss xmm0,dword ptr [ecx+4]
ucomiss xmm0,dword ptr [ecx+8]
sete al
setnp dl
test dl,al
mov edx,edi
cmovne edx,ecx
cmovne ecx,esi
cmovne esi,edi
While I'm pleasantly surprised that my branch does get turned into several
select operations as intended...
2007 Aug 09
0
[PATCH] x86/hvm: miscellaneous CPUID handling changes
...============================
--- 2007-08-08.orig/xen/arch/x86/hvm/hvm.c 2007-08-06 15:08:40.000000000 +0200
+++ 2007-08-08/xen/arch/x86/hvm/hvm.c 2007-08-08 11:45:25.000000000 +0200
@@ -614,37 +614,38 @@ void hvm_cpuid(unsigned int input, unsig
{
if ( !cpuid_hypervisor_leaves(input, eax, ebx, ecx, edx) )
{
+ struct vcpu *v = current;
+
cpuid(input, eax, ebx, ecx, edx);
- if ( input == 0x00000001 )
+ switch ( input )
{
- struct vcpu *v = current;
-
- clear_bit(X86_FEATURE_MWAIT & 31, ecx);
+ case 0x00000001:
+...
2013 Sep 23
11
[PATCH v4 0/4] x86/HVM: miscellaneous improvements
The first and third patches are cleaned up versions of an earlier v3
submission by Yang.
1: Nested VMX: check VMX capability before read VMX related MSRs
2: VMX: clean up capability checks
3: Nested VMX: fix IA32_VMX_CR4_FIXED1 msr emulation
4: x86: make hvm_cpuid() tolerate NULL pointers
Signed-off-by: Jan Beulich <jbeulich@suse.com>
2008 May 27
1
[LLVMdev] Float compare-for-equality and select optimizationopportunity
Both ZF and PF will be set if unordered, so the code below is IEEE
correct...you want to generate 'fcmp ueq' instead of 'fcmp oqe'
This is the resulting x86 assembly code:
movss xmm0,dword ptr [ecx+4]
ucomiss xmm0,dword ptr [ecx+8]
sete al
setnp dl
test dl,al
mov edx,edi
cmovne edx,ecx
cmovne ecx,esi
cmovne esi,edi
While I'm pleasantly surprised that my branch does get turned into several
select operations as intended...
2007 Apr 18
1
[RFC, PATCH 12/24] i386 Vmi processor header
...rocessor.h 2006-03-10 12:55:09.000000000 -0800
+++ linux-2.6.16-rc5/include/asm-i386/processor.h 2006-03-10 13:03:35.000000000 -0800
@@ -137,79 +137,6 @@ static inline void detect_ht(struct cpui
#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
/*
- * Generic CPUID function
- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
- * resulting in stale register contents being returned.
- */
-static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
-{
- __asm__("cpuid"
- : "=a" (*eax),
- &...
2007 Apr 18
1
[RFC, PATCH 12/24] i386 Vmi processor header
...rocessor.h 2006-03-10 12:55:09.000000000 -0800
+++ linux-2.6.16-rc5/include/asm-i386/processor.h 2006-03-10 13:03:35.000000000 -0800
@@ -137,79 +137,6 @@ static inline void detect_ht(struct cpui
#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
/*
- * Generic CPUID function
- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
- * resulting in stale register contents being returned.
- */
-static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
-{
- __asm__("cpuid"
- : "=a" (*eax),
- &...
2010 Sep 01
5
[LLVMdev] equivalent IR, different asm
...@_ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
## BB#0:
pushq %r14
pushq %rbx
subq $8, %rsp
movq %rsi, %rbx
movq %rdi, %r14
movq %rdx, %rdi
movq %rcx, %rsi
callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE
movq %rax, %rcx
shrq $32, %rcx
testl %ecx, %ecx
je LBB0_2
## BB#1:
imull (%rbx), %eax
cltd
idivl %ecx
movl %eax, (%r14)
LBB0_2:
addq $8, %rsp
popq %rbx
popq %r14
ret
$ llc opt-fail.ll -o -
.section __TEXT,__text,regular,pure_instructions
.globl __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE
.align 4,...
2012 Mar 28
2
[LLVMdev] Suboptimal code due to excessive spilling
...hl %esi
.Ltmp15:
.cfi_def_cfa_offset 16
subl $88, %esp
.Ltmp16:
.cfi_def_cfa_offset 104
.Ltmp17:
.cfi_offset %esi, -16
.Ltmp18:
.cfi_offset %edi, -12
.Ltmp19:
.cfi_offset %ebx, -8
pxor %xmm0, %xmm0
movl 112(%esp), %eax
testl %eax, %eax
je .LBB1_3
# BB#1:
xorl %ebx, %ebx
movl 108(%esp), %ecx
movl 104(%esp), %edx
xorl %esi, %esi
.align 16, 0x90
.LBB1_2: # %.lr.ph.i
# =>This Inner Loop Header: Depth=1
movsd (%edx,%ebx,8), %xmm2
addsd .LCPI1_0, %xmm2
movsd 16(%edx,%ebx,8), %xmm1
movsd %xmm1, (%esp)...
2007 Apr 18
2
[PATCH] Fix CONFIG_PARAVIRT for 2.6.19-rc5-mm1
...-rc5-mm1/include/asm-i386/paravirt.h 2006-11-09 11:22:25.000000000 +1100
+++ working-2.6.19-rc5-mm1-paravirt/include/asm-i386/paravirt.h 2006-11-09 11:29:20.000000000 +1100
@@ -449,20 +449,16 @@ static inline unsigned long __raw_local_
return f;
}
-#define CLI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \
- "call *paravirt_ops+%c[irq_disable];" \
- "popl %%edx; popl %%ecx", \
+#define CLI_STRING paravirt_alt("pushl %ecx; pushl %edx;" \
+ "call *paravirt_ops+PARAVIRT_irq_disable;" \
+ "popl %edx; popl %e...
2007 Apr 18
2
[PATCH] Fix CONFIG_PARAVIRT for 2.6.19-rc5-mm1
...-rc5-mm1/include/asm-i386/paravirt.h 2006-11-09 11:22:25.000000000 +1100
+++ working-2.6.19-rc5-mm1-paravirt/include/asm-i386/paravirt.h 2006-11-09 11:29:20.000000000 +1100
@@ -449,20 +449,16 @@ static inline unsigned long __raw_local_
return f;
}
-#define CLI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \
- "call *paravirt_ops+%c[irq_disable];" \
- "popl %%edx; popl %%ecx", \
+#define CLI_STRING paravirt_alt("pushl %ecx; pushl %edx;" \
+ "call *paravirt_ops+PARAVIRT_irq_disable;" \
+ "popl %edx; popl %e...
2007 Apr 18
0
[RFC/PATCH PV_OPS X86_64 02/17] paravirt_ops - msr
...========================
--- clean-start.orig/include/asm-x86_64/msr.h
+++ clean-start/include/asm-x86_64/msr.h
@@ -2,6 +2,62 @@
#define X86_64_MSR_H 1
#ifndef __ASSEMBLY__
+#include <asm/errno.h>
+
+static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ __asm__("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+/* wrmsr with exception handling */
+static inline int native_write_msr_safe(unsi...
2007 Apr 18
0
[RFC/PATCH PV_OPS X86_64 02/17] paravirt_ops - msr
...========================
--- clean-start.orig/include/asm-x86_64/msr.h
+++ clean-start/include/asm-x86_64/msr.h
@@ -2,6 +2,62 @@
#define X86_64_MSR_H 1
#ifndef __ASSEMBLY__
+#include <asm/errno.h>
+
+static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ __asm__("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+/* wrmsr with exception handling */
+static inline int native_write_msr_safe(unsi...
2015 Feb 13
2
[LLVMdev] trunk's optimizer generates slower code than 3.5
...mov rsi, offset __mh_execute_header
add rsi, rax
sar rsi, 20h ; size_t
mov edi, 4 ; size_t
call _calloc
lea edx, [r15-1]
movsxd r8, edx
mov ecx, r15d
add ecx, 0FFFFFFFEh
js loc_100000DFA
test r15d, r15d
mov r11d, [rax+r8*4]
jle loc_100000EAE
mov ecx, r15d
add ecx, 0FFFFFFFEh
mov [rs...
2019 Mar 04
2
Where's the optimiser gone (part 11): use the proper instruction for sign extension
...gt; 0) - (x < 0);
}
While the code generated for the "long" version of this function is quite
OK, the code for the "long long" version misses an obvious optimisation:
lsign: # @lsign
mov eax, dword ptr [esp + 4] | mov eax, dword ptr [esp + 4]
xor ecx, ecx |
test eax, eax | cdq
setg cl | neg eax
sar eax, 31 | adc edx, edx
add eax, ecx | mov eax, edx
ret |...
2015 Feb 14
2
[LLVMdev] trunk's optimizer generates slower code than 3.5
...add rsi, rax
>> sar rsi, 20h ; size_t
>> mov edi, 4 ; size_t
>> call _calloc
>> lea edx, [r15-1]
>> movsxd r8, edx
>> mov ecx, r15d
>> add ecx, 0FFFFFFFEh
>> js loc_100000DFA
>> test r15d, r15d
>> mov r11d, [rax+r8*4]
>> jle loc_100000EAE
>> mov ecx, r15d
>>...
2013 Aug 28
3
[PATCH] x86: AVX instruction emulation fixes
...into the same variable)
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -94,13 +94,25 @@ static inline uint64_t xgetbv(uint32_t x
}
#define cpu_has_avx ({ \
- unsigned int eax = 1, ecx = 0, edx; \
- cpuid(&eax, &edx, &ecx, &edx, NULL); \
+ unsigned int eax = 1, ecx = 0; \
+ cpuid(&eax, &eax, &ecx, &eax, NULL); \
if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \
ecx = 0; \
(ecx & (1U << 28)) !...
2012 Apr 05
0
[LLVMdev] Suboptimal code due to excessive spilling
...hl %esi
.Ltmp15:
.cfi_def_cfa_offset 16
subl $88, %esp
.Ltmp16:
.cfi_def_cfa_offset 104
.Ltmp17:
.cfi_offset %esi, -16
.Ltmp18:
.cfi_offset %edi, -12
.Ltmp19:
.cfi_offset %ebx, -8
pxor %xmm0, %xmm0
movl 112(%esp), %eax
testl %eax, %eax
je .LBB1_3
# BB#1:
xorl %ebx, %ebx
movl 108(%esp), %ecx
movl 104(%esp), %edx
xorl %esi, %esi
.align 16, 0x90
.LBB1_2: # %.lr.ph.i
# =>This Inner Loop Header: Depth=1
movsd (%edx,%ebx,8), %xmm2
addsd .LCPI1_0, %xmm2
movsd 16(%edx,%ebx,8), %xmm1
movsd %xmm1, (%esp)...
2015 Feb 14
2
[LLVMdev] trunk's optimizer generates slower code than 3.5
...sar rsi, 20h ; size_t
>>>> mov edi, 4 ; size_t
>>>> call _calloc
>>>> lea edx, [r15-1]
>>>> movsxd r8, edx
>>>> mov ecx, r15d
>>>> add ecx, 0FFFFFFFEh
>>>> js loc_100000DFA
>>>> test r15d, r15d
>>>> mov r11d, [rax+r8*4]
>>>> jle loc_100000EAE
>>>>...
2011 Nov 02
5
[LLVMdev] About JIT by LLVM 2.9 or later
...ementations, ASMs are:
* Caller:*
float4x4 f;
float4 b = fetch(&f);
// Calling function. first address is a temporary result generated by
caller. And secondary is the &f.
013C1428 lea eax,[ebp-48h]
013C142B push eax
013C142C lea ecx,[ebp-138h]
013C1432 push ecx
013C1433 call fetch (13C11D6h)
013C1438 add esp,8
// Copy result to another temporary vairable.
013C143B mov edx,dword ptr [eax]
013C143D mov dword ptr [ebp-150h],edx
013C1443 mov ecx,dword...