Displaying 20 results from an estimated 2242 matches for "edx".
Did you mean:
eax
2020 May 22
2
[PATCH] Optimized assembler version of md5_process() for x86-64
...+ # rdx = arg #3 (nbr, number of 16-word blocks to process)
+ mov %rdi, %rbp # rbp = ctx
+ shl $6, %rdx # rdx = nbr in bytes
+ lea (%rsi,%rdx), %rdi # rdi = end
+ mov 0*4(%rbp), %eax # eax = ctx->A
+ mov 1*4(%rbp), %ebx # ebx = ctx->B
+ mov 2*4(%rbp), %ecx # ecx = ctx->C
+ mov 3*4(%rbp), %edx # edx = ctx->D
+ # end is 'rdi'
+ # ptr is 'rsi'
+ # A is 'eax'
+ # B is 'ebx'
+ # C is 'ecx'
+ # D is 'edx'
+
+ cmp %rdi, %rsi # cmp end with ptr
+ je 1f # jmp if ptr == end
+
+ # BEGIN of loop over 16-word blocks
+2: # save old values of A, B, C,...
2018 Nov 06
4
Rather poor code optimisation of current clang/LLVM targeting Intel x86 (both -64 and -32)
...32be: # @crc32be
xor eax, eax
test esi, esi
jne .LBB0_2
jmp .LBB0_5
.LBB0_4: # in Loop: Header=BB0_2 Depth=1
add rdi, 1
test esi, esi
je .LBB0_5
.LBB0_2: # =>This Loop Header: Depth=1
add esi, -1
movzx edx, byte ptr [rdi]
shl edx, 24
xor edx, eax
mov ecx, -8
mov eax, edx
.LBB0_3: # Parent Loop BB0_2 Depth=1 | # 4 instructions instead of 6, r8 not clobbered!
lea r8d, [rax + rax] | add eax, eax
mov edx, r8d...
2007 Aug 09
0
[PATCH] x86/hvm: miscellaneous CPUID handling changes
...=======================
--- 2007-08-08.orig/xen/arch/x86/hvm/hvm.c 2007-08-06 15:08:40.000000000 +0200
+++ 2007-08-08/xen/arch/x86/hvm/hvm.c 2007-08-08 11:45:25.000000000 +0200
@@ -614,37 +614,38 @@ void hvm_cpuid(unsigned int input, unsig
{
if ( !cpuid_hypervisor_leaves(input, eax, ebx, ecx, edx) )
{
+ struct vcpu *v = current;
+
cpuid(input, eax, ebx, ecx, edx);
- if ( input == 0x00000001 )
+ switch ( input )
{
- struct vcpu *v = current;
-
- clear_bit(X86_FEATURE_MWAIT & 31, ecx);
+ case 0x00000001:
+...
2005 Feb 22
5
[LLVMdev] Area for improvement
...following X86 code:
.text
.align 16
.globl init_board
.type init_board, @function
init_board:
subl $4, %esp
movl %esi, (%esp)
movl 8(%esp), %eax
movl $0, %ecx
.LBBinit_board_1: # loopexit.1
imull $7, %ecx, %edx
movl %eax, %esi
addl %edx, %esi
movb $46, (%esi)
imull $7, %ecx, %edx
movl %eax, %esi
addl %edx, %esi
leal 1(%esi), %edx
movb $46, (%edx)
imull $7, %ecx, %edx
movl %eax, %esi
addl %edx, %esi
leal 2(%esi)...
2019 Mar 04
2
Where's the optimiser gone (part 11): use the proper instruction for sign extension
...n:
lsign: # @lsign
mov eax, dword ptr [esp + 4] | mov eax, dword ptr [esp + 4]
xor ecx, ecx |
test eax, eax | cdq
setg cl | neg eax
sar eax, 31 | adc edx, edx
add eax, ecx | mov eax, edx
ret | ret
llsign: # @llsign
xor ecx, ecx | xor edx, edx
mov eax, dword ptr [esp + 8] | mov eax, dword ptr [esp + 8]
cmp ecx, dword p...
2005 Feb 22
0
[LLVMdev] Area for improvement
...gt; int i,j;
>
> for (i=0;i<COLS;i++)
> for (j=0;j<ROWS;j++)
> b[i][j]='.';
> for (i=0;i<COLS;i++)
> b[i][ROWS]=0;
> }
>
> This generates the following X86 code:
> imull $7, %ecx, %edx
> movl %eax, %esi
> addl %edx, %esi
> movb $46, (%esi)
> imull $7, %ecx, %edx
> movl %eax, %esi
> addl %edx, %esi
> leal 1(%esi), %edx
... (many many copies of this, see the end of the email for full output)
...
> The code generate...
2005 Feb 22
2
[LLVMdev] Area for improvement
...or (i=0;i<COLS;i++)
>> for (j=0;j<ROWS;j++)
>> b[i][j]='.';
>> for (i=0;i<COLS;i++)
>> b[i][ROWS]=0;
>> }
>>
>> This generates the following X86 code:
>> imull $7, %ecx, %edx
>> movl %eax, %esi
>> addl %edx, %esi
>> movb $46, (%esi)
>> imull $7, %ecx, %edx
>> movl %eax, %esi
>> addl %edx, %esi
>> leal 1(%esi), %edx
>
> ... (many many copies of this, see the end of the email for ful...
2019 Aug 15
2
Slow XCHG in arch/i386/libgcc/__ashrdi3.S and arch/i386/libgcc/__lshrdi3.S
...ps://git.kernel.org/pub/scm/libs/klibc/klibc.git/plain/usr/klibc/arch/i386/libgcc/__ashldi3.S
and
https://git.kernel.org/pub/scm/libs/klibc/klibc.git/plain/usr/klibc/arch/i386/libgcc/__lshrdi3.S
use the following code sequences for shift counts greater 31:
1: 1:
xorl %edx,%edx shrl %cl,%edx
shl %cl,%eax xorl %eax,%eax
^
xchgl %edx,%eax xchgl %edx,%eax
ret ret
At least and especially on Intel processors XCHG was and
still is a rather slow instruction and should be avoided.
Use the fo...
2004 Sep 10
2
An assembly optimization and fix
...ub esp, byte 16
; qword [esp] == temp space for loading FLAC__uint64s to FPU regs
- ; dword [esp] == last_error_0
- ; dword [esp + 4] == last_error_1
- ; dword [esp + 8] == last_error_2
- ; dword [esp + 12] == last_error_3
- ; eax == error
; ebx == &data[i]
; ecx == loop counter (i)
- ; edx == temp
- ; edi == save
; ebp == order
; mm0 == total_error_1:total_error_0
- ; mm1 == total_error_3:total_error_2
- ; mm2 == 0:total_error_4
- ; mm3/4 == 0:unpackarea
- ; mm5 == abs(error_1):abs(error_0)
- ; mm5 == abs(error_3):abs(error_2)
+ ; mm1 == total_error_2:total_error_3
+ ; mm2 == :to...
2004 Sep 10
3
patch
...edi, [esp + 28] ; edi == autoc
+ mov esi, [esp + 16] ; esi == data
inc ecx ; we are looping <= limit so we add one to the counter
; for(sample = 0; sample <= limit; sample++) {
@@ -97,7 +98,11 @@
; each iteration is 11 bytes so we need (-eax)*11, so we do (-12*eax + eax)
lea edx, [eax + eax*2]
neg edx
- lea edx, [eax + edx*4 + .jumper1_0]
+ lea edx, [eax + edx*4 + .jumper1_0 - .get_eip1]
+ call .get_eip1
+.get_eip1:
+ pop ebx
+ add edx, ebx
inc edx ; compensate for the shorter opcode on the last iteration
inc edx ; compensate for the shorter opcode on the last...
2018 Nov 27
2
Rather poor code optimisation of current clang/LLVM targeting Intel x86 (both -64 and -32)
...jne .LBB0_2
>> jmp .LBB0_5
>> .LBB0_4: # in Loop: Header=BB0_2 Depth=1
>> add rdi, 1
>> test esi, esi
>> je .LBB0_5
>> .LBB0_2: # =>This Loop Header: Depth=1
>> add esi, -1
>> movzx edx, byte ptr [rdi]
>> shl edx, 24
>> xor edx, eax
>> mov ecx, -8
>> mov eax, edx
>> .LBB0_3: # Parent Loop BB0_2 Depth=1 | # 4 instructions instead of 6, r8
>> not clobbered!
>> lea r8d, [rax + rax]...
2012 Mar 28
2
[LLVMdev] Suboptimal code due to excessive spilling
..._def_cfa_offset 16
subl $88, %esp
.Ltmp16:
.cfi_def_cfa_offset 104
.Ltmp17:
.cfi_offset %esi, -16
.Ltmp18:
.cfi_offset %edi, -12
.Ltmp19:
.cfi_offset %ebx, -8
pxor %xmm0, %xmm0
movl 112(%esp), %eax
testl %eax, %eax
je .LBB1_3
# BB#1:
xorl %ebx, %ebx
movl 108(%esp), %ecx
movl 104(%esp), %edx
xorl %esi, %esi
.align 16, 0x90
.LBB1_2: # %.lr.ph.i
# =>This Inner Loop Header: Depth=1
movsd (%edx,%ebx,8), %xmm2
addsd .LCPI1_0, %xmm2
movsd 16(%edx,%ebx,8), %xmm1
movsd %xmm1, (%esp) # 8-byte Spill
movl %...
2005 Feb 22
0
[LLVMdev] Area for improvement
...lign 16
> .globl init_board
> .type init_board, @function
> init_board:
> subl $4, %esp
> movl %esi, (%esp)
> movl 8(%esp), %eax
> movl $0, %ecx
> .LBBinit_board_1: # loopexit.1
> imull $7, %ecx, %edx
> movl %eax, %esi
> addl %edx, %esi
> movb $46, (%esi)
> imull $7, %ecx, %edx
> movl %eax, %esi
> addl %edx, %esi
> leal 1(%esi), %edx
> movb $46, (%edx)
> imull $7, %ecx, %edx
> movl %eax,...
2013 Sep 23
11
[PATCH v4 0/4] x86/HVM: miscellaneous improvements
The first and third patches are cleaned up versions of an earlier v3
submission by Yang.
1: Nested VMX: check VMX capability before read VMX related MSRs
2: VMX: clean up capability checks
3: Nested VMX: fix IA32_VMX_CR4_FIXED1 msr emulation
4: x86: make hvm_cpuid() tolerate NULL pointers
Signed-off-by: Jan Beulich <jbeulich@suse.com>
2010 Oct 15
6
[PATCH 01/13] Nested Virtualization: tools
--
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
2005 Feb 22
0
[LLVMdev] Area for improvement
...t; for (j=0;j<ROWS;j++)
>>> b[i][j]='.';
>>> for (i=0;i<COLS;i++)
>>> b[i][ROWS]=0;
>>> }
>>>
>>> This generates the following X86 code:
>>> imull $7, %ecx, %edx
>>> movl %eax, %esi
>>> addl %edx, %esi
>>> movb $46, (%esi)
>>> imull $7, %ecx, %edx
>>> movl %eax, %esi
>>> addl %edx, %esi
>>> leal 1(%esi), %edx
>>
>> ... (many many copies of t...
2013 Jun 28
3
[LLVMdev] Question regarding the x86 SBB instruction.
Hi,
I have the x86 SBB instruction. how should I represent this in LLVM
IR. (as part of a decompiler from binary to LLVM IR)
Pre-conditions:
%eax = 0xffffffff
%edx = 0xffffffff
%carry = 1
SBB %eax, %edx // %edx is the destination doing %edx = %edx -
(%eax + carry)
JC jump_destination1 // If the Carry flag is set, jump to jump_destination1
How do I represent this correctly in LLVM IR?
In the above case, the carry flag should be set by the SBB becau...
2015 Feb 13
2
[LLVMdev] trunk's optimizer generates slower code than 3.5
...sub rsp, 18h
mov ebx, 0FFFFFFFFh
cmp edi, 2
jnz loc_100000F29
mov rdi, [rsi+8] ; char *
xor r14d, r14d
xor esi, esi ; char **
mov edx, 0Ah ; int
call _strtol
mov r15, rax
shl rax, 20h
mov rsi, offset __mh_execute_header
add rsi, rax
sar rsi, 20h ; size_t
mov edi, 4 ; siz...
2012 Apr 05
0
[LLVMdev] Suboptimal code due to excessive spilling
..._def_cfa_offset 16
subl $88, %esp
.Ltmp16:
.cfi_def_cfa_offset 104
.Ltmp17:
.cfi_offset %esi, -16
.Ltmp18:
.cfi_offset %edi, -12
.Ltmp19:
.cfi_offset %ebx, -8
pxor %xmm0, %xmm0
movl 112(%esp), %eax
testl %eax, %eax
je .LBB1_3
# BB#1:
xorl %ebx, %ebx
movl 108(%esp), %ecx
movl 104(%esp), %edx
xorl %esi, %esi
.align 16, 0x90
.LBB1_2: # %.lr.ph.i
# =>This Inner Loop Header: Depth=1
movsd (%edx,%ebx,8), %xmm2
addsd .LCPI1_0, %xmm2
movsd 16(%edx,%ebx,8), %xmm1
movsd %xmm1, (%esp) # 8-byte Spill
movl %...
2007 Apr 18
1
[RFC, PATCH 12/24] i386 Vmi processor header
...PUID detection flag */
/*
- * Generic CPUID function
- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
- * resulting in stale register contents being returned.
- */
-static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
-{
- __asm__("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (op), "c"(0));
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(int op, in...