thr3ads.net - llvm dev - [LLVMdev] Bug in X86CompilationCallback

If this information is useful, please help other people find it:
Share via:

Corrado Zoccolo

2009-Mar-11 21:39 UTC

[LLVMdev] Bug in X86CompilationCallback_SSE

I don't know how to file a PR, but I have a patch (see below), that
should work regardless of abi differences, since it relies on the
compiler to do the though job.

void X86CompilationCallback_SSE(void) {
    char * SAVEBUF= (char*) alloca(64+12); // alloca is 16byte aligned

    asm volatile (
    "movl %%eax,(%0)\n"
    "movl %%edx,4(%0)\n"          // Save EAX/EDX/ECX
    "movl %%ecx,8(%0)\n"
    :: "r"(SAVEBUF+64): "memory" );

    asm volatile (
    // Save all XMM arg registers
    "movaps  %%xmm0, (%0)\n"
    "movaps  %%xmm1, 16(%0)\n"
    "movaps  %%xmm2, 32(%0)\n"
    "movaps  %%xmm3, 48(%0)\n"
    :: "r"(SAVEBUF) : "memory" );

    intptr_t *StackPtr=0, RetAddr=0;

    asm volatile (     // get stack ptr and retaddr
    "movl %%ebp,%0\n"
    "movl 4(%%ebp),%1\n"
    :"=r"(StackPtr), "=r"(RetAddr) :: "memory" );

    X86CompilationCallback2(StackPtr,RetAddr); // gcc knows how to
call this according to the ABI

    asm volatile ( // restore XMM arg registers
    "movaps  48(%0), %%xmm3\n"
    "movaps  32(%0), %%xmm2\n"
    "movaps  16(%0), %%xmm1\n"
    "movaps  (%0), %%xmm0\n"
    :: "r"(SAVEBUF) : "memory" );

    asm volatile (
    "movl (%0),%%eax\n"
    "movl 4(%0),%%edx\n"          // Restore EAX/EDX/ECX
    "movl 8(%0),%%ecx\n"
    :: "r"(SAVEBUF+64): "memory" );
}

The generated  code is as follows:

Dump of assembler code for function X86CompilationCallback_SSE:
0xb74b98e0 <X86CompilationCallback_SSE+0>:      push   %ebp
0xb74b98e1 <X86CompilationCallback_SSE+1>:      mov    %esp,%ebp
0xb74b98e3 <X86CompilationCallback_SSE+3>:      sub    $0x78,%esp
0xb74b98e6 <X86CompilationCallback_SSE+6>:      mov    %esi,-0x8(%ebp)
0xb74b98e9 <X86CompilationCallback_SSE+9>:      lea    0x17(%esp),%esi
0xb74b98ed <X86CompilationCallback_SSE+13>:     and    $0xfffffff0,%esi
0xb74b98f0 <X86CompilationCallback_SSE+16>:     mov    %ebx,-0xc(%ebp)
0xb74b98f3 <X86CompilationCallback_SSE+19>:     mov    %edi,-0x4(%ebp)
0xb74b98f6 <X86CompilationCallback_SSE+22>:     lea    0x40(%esi),%edi
0xb74b98f9 <X86CompilationCallback_SSE+25>:     call   0xb7315577
<__i686.get_pc_thunk.bx>
0xb74b98fe <X86CompilationCallback_SSE+30>:     add    $0x76d71e,%ebx
0xb74b9904 <X86CompilationCallback_SSE+36>:     mov    %eax,(%edi)
0xb74b9906 <X86CompilationCallback_SSE+38>:     mov    %edx,0x4(%edi)
0xb74b9909 <X86CompilationCallback_SSE+41>:     mov    %ecx,0x8(%edi)
0xb74b990c <X86CompilationCallback_SSE+44>:     movaps %xmm0,(%esi)
0xb74b990f <X86CompilationCallback_SSE+47>:     movaps %xmm1,0x10(%esi)
0xb74b9913 <X86CompilationCallback_SSE+51>:     movaps %xmm2,0x20(%esi)
0xb74b9917 <X86CompilationCallback_SSE+55>:     movaps %xmm3,0x30(%esi)
0xb74b991b <X86CompilationCallback_SSE+59>:     mov    %ebp,%edx
0xb74b991d <X86CompilationCallback_SSE+61>:     mov    0x4(%ebp),%eax
0xb74b9920 <X86CompilationCallback_SSE+64>:     mov    %eax,0x4(%esp)
0xb74b9924 <X86CompilationCallback_SSE+68>:     mov    %edx,(%esp)
0xb74b9927 <X86CompilationCallback_SSE+71>:     call   0xb7303348
<X86CompilationCallback2 at plt>
0xb74b992c <X86CompilationCallback_SSE+76>:     movaps 0x30(%esi),%xmm3
0xb74b9930 <X86CompilationCallback_SSE+80>:     movaps 0x20(%esi),%xmm2
0xb74b9934 <X86CompilationCallback_SSE+84>:     movaps 0x10(%esi),%xmm1
0xb74b9938 <X86CompilationCallback_SSE+88>:     movaps (%esi),%xmm0
0xb74b993b <X86CompilationCallback_SSE+91>:     mov    (%edi),%eax
0xb74b993d <X86CompilationCallback_SSE+93>:     mov    0x4(%edi),%edx
0xb74b9940 <X86CompilationCallback_SSE+96>:     mov    0x8(%edi),%ecx
0xb74b9943 <X86CompilationCallback_SSE+99>:     mov    -0xc(%ebp),%ebx
0xb74b9946 <X86CompilationCallback_SSE+102>:    mov    -0x8(%ebp),%esi
0xb74b9949 <X86CompilationCallback_SSE+105>:    mov    -0x4(%ebp),%edi
0xb74b994c <X86CompilationCallback_SSE+108>:    mov    %ebp,%esp
0xb74b994e <X86CompilationCallback_SSE+110>:    pop    %ebp
0xb74b994f <X86CompilationCallback_SSE+111>:    ret
End of assembler dump.

And I verified that it works in my use case.
Clearly the same should be done for other asm functions in that same
file (e.g. the non-sse case).

Corrado

Evan Cheng

2009-Mar-12 05:46 UTC

head link

[LLVMdev] Bug in X86CompilationCallback_SSE

On Mar 11, 2009, at 2:39 PM, Corrado Zoccolo wrote:
> I don't know how to file a PR, but I have a patch (see below), that
> should work regardless of abi differences, since it relies on the
> compiler to do the though job.
>
> void X86CompilationCallback_SSE(void) {
>    char * SAVEBUF= (char*) alloca(64+12); // alloca is 16byte aligned
How do you ensure it's 16-byte aligned? Can you declare a local array  
and specify alignment using attribute __aligned?

Evan
>
>    asm volatile (
>    "movl %%eax,(%0)\n"
>    "movl %%edx,4(%0)\n"          // Save EAX/EDX/ECX
>    "movl %%ecx,8(%0)\n"
>    :: "r"(SAVEBUF+64): "memory" );
>
>    asm volatile (
>    // Save all XMM arg registers
>    "movaps  %%xmm0, (%0)\n"
>    "movaps  %%xmm1, 16(%0)\n"
>    "movaps  %%xmm2, 32(%0)\n"
>    "movaps  %%xmm3, 48(%0)\n"
>    :: "r"(SAVEBUF) : "memory" );
>
>    intptr_t *StackPtr=0, RetAddr=0;
>
>    asm volatile (     // get stack ptr and retaddr
>    "movl %%ebp,%0\n"
>    "movl 4(%%ebp),%1\n"
>    :"=r"(StackPtr), "=r"(RetAddr) :: "memory"
);
>
>    X86CompilationCallback2(StackPtr,RetAddr); // gcc knows how to
> call this according to the ABI
>
>    asm volatile ( // restore XMM arg registers
>    "movaps  48(%0), %%xmm3\n"
>    "movaps  32(%0), %%xmm2\n"
>    "movaps  16(%0), %%xmm1\n"
>    "movaps  (%0), %%xmm0\n"
>    :: "r"(SAVEBUF) : "memory" );
>
>    asm volatile (
>    "movl (%0),%%eax\n"
>    "movl 4(%0),%%edx\n"          // Restore EAX/EDX/ECX
>    "movl 8(%0),%%ecx\n"
>    :: "r"(SAVEBUF+64): "memory" );
> }
>
> The generated  code is as follows:
>
> Dump of assembler code for function X86CompilationCallback_SSE:
> 0xb74b98e0 <X86CompilationCallback_SSE+0>:      push   %ebp
> 0xb74b98e1 <X86CompilationCallback_SSE+1>:      mov    %esp,%ebp
> 0xb74b98e3 <X86CompilationCallback_SSE+3>:      sub    $0x78,%esp
> 0xb74b98e6 <X86CompilationCallback_SSE+6>:      mov   
%esi,-0x8(%ebp)
> 0xb74b98e9 <X86CompilationCallback_SSE+9>:      lea   
0x17(%esp),%esi
> 0xb74b98ed <X86CompilationCallback_SSE+13>:     and     
> $0xfffffff0,%esi
> 0xb74b98f0 <X86CompilationCallback_SSE+16>:     mov   
%ebx,-0xc(%ebp)
> 0xb74b98f3 <X86CompilationCallback_SSE+19>:     mov   
%edi,-0x4(%ebp)
> 0xb74b98f6 <X86CompilationCallback_SSE+22>:     lea   
0x40(%esi),%edi
> 0xb74b98f9 <X86CompilationCallback_SSE+25>:     call   0xb7315577
> <__i686.get_pc_thunk.bx>
> 0xb74b98fe <X86CompilationCallback_SSE+30>:     add    $0x76d71e,%ebx
> 0xb74b9904 <X86CompilationCallback_SSE+36>:     mov    %eax,(%edi)
> 0xb74b9906 <X86CompilationCallback_SSE+38>:     mov    %edx,0x4(%edi)
> 0xb74b9909 <X86CompilationCallback_SSE+41>:     mov    %ecx,0x8(%edi)
> 0xb74b990c <X86CompilationCallback_SSE+44>:     movaps %xmm0,(%esi)
> 0xb74b990f <X86CompilationCallback_SSE+47>:     movaps %xmm1,0x10 
> (%esi)
> 0xb74b9913 <X86CompilationCallback_SSE+51>:     movaps %xmm2,0x20 
> (%esi)
> 0xb74b9917 <X86CompilationCallback_SSE+55>:     movaps %xmm3,0x30 
> (%esi)
> 0xb74b991b <X86CompilationCallback_SSE+59>:     mov    %ebp,%edx
> 0xb74b991d <X86CompilationCallback_SSE+61>:     mov    0x4(%ebp),%eax
> 0xb74b9920 <X86CompilationCallback_SSE+64>:     mov    %eax,0x4(%esp)
> 0xb74b9924 <X86CompilationCallback_SSE+68>:     mov    %edx,(%esp)
> 0xb74b9927 <X86CompilationCallback_SSE+71>:     call   0xb7303348
> <X86CompilationCallback2 at plt>
> 0xb74b992c <X86CompilationCallback_SSE+76>:     movaps 0x30(%esi), 
> %xmm3
> 0xb74b9930 <X86CompilationCallback_SSE+80>:     movaps 0x20(%esi), 
> %xmm2
> 0xb74b9934 <X86CompilationCallback_SSE+84>:     movaps 0x10(%esi), 
> %xmm1
> 0xb74b9938 <X86CompilationCallback_SSE+88>:     movaps (%esi),%xmm0
> 0xb74b993b <X86CompilationCallback_SSE+91>:     mov    (%edi),%eax
> 0xb74b993d <X86CompilationCallback_SSE+93>:     mov    0x4(%edi),%edx
> 0xb74b9940 <X86CompilationCallback_SSE+96>:     mov    0x8(%edi),%ecx
> 0xb74b9943 <X86CompilationCallback_SSE+99>:     mov   
-0xc(%ebp),%ebx
> 0xb74b9946 <X86CompilationCallback_SSE+102>:    mov   
-0x8(%ebp),%esi
> 0xb74b9949 <X86CompilationCallback_SSE+105>:    mov   
-0x4(%ebp),%edi
> 0xb74b994c <X86CompilationCallback_SSE+108>:    mov    %ebp,%esp
> 0xb74b994e <X86CompilationCallback_SSE+110>:    pop    %ebp
> 0xb74b994f <X86CompilationCallback_SSE+111>:    ret
> End of assembler dump.
>
> And I verified that it works in my use case.
> Clearly the same should be done for other asm functions in that same
> file (e.g. the non-sse case).
>
> Corrado
> _______________________________________________
> LLVM Developers mailing list
> LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev

Corrado Zoccolo

2009-Mar-12 11:19 UTC

head link

[LLVMdev] Bug in X86CompilationCallback_SSE

On Thu, Mar 12, 2009 at 6:46 AM, Evan Cheng <echeng at apple.com>
wrote:>
> On Mar 11, 2009, at 2:39 PM, Corrado Zoccolo wrote:
>
>> I don't know how to file a PR, but I have a patch (see below), that
>> should work regardless of abi differences, since it relies on the
>> compiler to do the though job.
>>
>> void X86CompilationCallback_SSE(void) {
>>    char * SAVEBUF= (char*) alloca(64+12); // alloca is 16byte aligned
>
> How do you ensure it's 16-byte aligned? Can you declare a local array
> and specify alignment using attribute __aligned?
>
> Evan
Well, my gcc ensures it is properly alligned (following malloc
specification), but alloca is not fully specified, so probably it is
better to use the local array with the attribute to be sure that this
will work also on different versions of gcc.

-- 
__________________________________________________________________________

dott. Corrado Zoccolo                          mailto:czoccolo at gmail.com
PhD - Department of Computer Science - University of Pisa, Italy
--------------------------------------------------------------------------

Dan Gohman

2009-Mar-12 16:18 UTC

head link

[LLVMdev] Bug in X86CompilationCallback_SSE

This looks like an interesting idea. As written, the inline asms  
aren't safe
though; they reference %eax, %edx, etc. without declaring such things in
constraints, so the compiler wouldn't know that it can't clobber those
registers.

Dan

On Mar 11, 2009, at 2:39 PM, Corrado Zoccolo wrote:
> I don't know how to file a PR, but I have a patch (see below), that
> should work regardless of abi differences, since it relies on the
> compiler to do the though job.
>
> void X86CompilationCallback_SSE(void) {
>    char * SAVEBUF= (char*) alloca(64+12); // alloca is 16byte aligned
>
>    asm volatile (
>    "movl %%eax,(%0)\n"
>    "movl %%edx,4(%0)\n"          // Save EAX/EDX/ECX
>    "movl %%ecx,8(%0)\n"
>    :: "r"(SAVEBUF+64): "memory" );
>
>    asm volatile (
>    // Save all XMM arg registers
>    "movaps  %%xmm0, (%0)\n"
>    "movaps  %%xmm1, 16(%0)\n"
>    "movaps  %%xmm2, 32(%0)\n"
>    "movaps  %%xmm3, 48(%0)\n"
>    :: "r"(SAVEBUF) : "memory" );
>
>    intptr_t *StackPtr=0, RetAddr=0;
>
>    asm volatile (     // get stack ptr and retaddr
>    "movl %%ebp,%0\n"
>    "movl 4(%%ebp),%1\n"
>    :"=r"(StackPtr), "=r"(RetAddr) :: "memory"
);
>
>    X86CompilationCallback2(StackPtr,RetAddr); // gcc knows how to
> call this according to the ABI
>
>    asm volatile ( // restore XMM arg registers
>    "movaps  48(%0), %%xmm3\n"
>    "movaps  32(%0), %%xmm2\n"
>    "movaps  16(%0), %%xmm1\n"
>    "movaps  (%0), %%xmm0\n"
>    :: "r"(SAVEBUF) : "memory" );
>
>    asm volatile (
>    "movl (%0),%%eax\n"
>    "movl 4(%0),%%edx\n"          // Restore EAX/EDX/ECX
>    "movl 8(%0),%%ecx\n"
>    :: "r"(SAVEBUF+64): "memory" );
> }
>
> The generated  code is as follows:
>
> Dump of assembler code for function X86CompilationCallback_SSE:
> 0xb74b98e0 <X86CompilationCallback_SSE+0>:      push   %ebp
> 0xb74b98e1 <X86CompilationCallback_SSE+1>:      mov    %esp,%ebp
> 0xb74b98e3 <X86CompilationCallback_SSE+3>:      sub    $0x78,%esp
> 0xb74b98e6 <X86CompilationCallback_SSE+6>:      mov   
%esi,-0x8(%ebp)
> 0xb74b98e9 <X86CompilationCallback_SSE+9>:      lea   
0x17(%esp),%esi
> 0xb74b98ed <X86CompilationCallback_SSE+13>:     and     
> $0xfffffff0,%esi
> 0xb74b98f0 <X86CompilationCallback_SSE+16>:     mov   
%ebx,-0xc(%ebp)
> 0xb74b98f3 <X86CompilationCallback_SSE+19>:     mov   
%edi,-0x4(%ebp)
> 0xb74b98f6 <X86CompilationCallback_SSE+22>:     lea   
0x40(%esi),%edi
> 0xb74b98f9 <X86CompilationCallback_SSE+25>:     call   0xb7315577
> <__i686.get_pc_thunk.bx>
> 0xb74b98fe <X86CompilationCallback_SSE+30>:     add    $0x76d71e,%ebx
> 0xb74b9904 <X86CompilationCallback_SSE+36>:     mov    %eax,(%edi)
> 0xb74b9906 <X86CompilationCallback_SSE+38>:     mov    %edx,0x4(%edi)
> 0xb74b9909 <X86CompilationCallback_SSE+41>:     mov    %ecx,0x8(%edi)
> 0xb74b990c <X86CompilationCallback_SSE+44>:     movaps %xmm0,(%esi)
> 0xb74b990f <X86CompilationCallback_SSE+47>:     movaps  
> %xmm1,0x10(%esi)
> 0xb74b9913 <X86CompilationCallback_SSE+51>:     movaps  
> %xmm2,0x20(%esi)
> 0xb74b9917 <X86CompilationCallback_SSE+55>:     movaps  
> %xmm3,0x30(%esi)
> 0xb74b991b <X86CompilationCallback_SSE+59>:     mov    %ebp,%edx
> 0xb74b991d <X86CompilationCallback_SSE+61>:     mov    0x4(%ebp),%eax
> 0xb74b9920 <X86CompilationCallback_SSE+64>:     mov    %eax,0x4(%esp)
> 0xb74b9924 <X86CompilationCallback_SSE+68>:     mov    %edx,(%esp)
> 0xb74b9927 <X86CompilationCallback_SSE+71>:     call   0xb7303348
> <X86CompilationCallback2 at plt>
> 0xb74b992c <X86CompilationCallback_SSE+76>:     movaps 0x30(%esi), 
> %xmm3
> 0xb74b9930 <X86CompilationCallback_SSE+80>:     movaps 0x20(%esi), 
> %xmm2
> 0xb74b9934 <X86CompilationCallback_SSE+84>:     movaps 0x10(%esi), 
> %xmm1
> 0xb74b9938 <X86CompilationCallback_SSE+88>:     movaps (%esi),%xmm0
> 0xb74b993b <X86CompilationCallback_SSE+91>:     mov    (%edi),%eax
> 0xb74b993d <X86CompilationCallback_SSE+93>:     mov    0x4(%edi),%edx
> 0xb74b9940 <X86CompilationCallback_SSE+96>:     mov    0x8(%edi),%ecx
> 0xb74b9943 <X86CompilationCallback_SSE+99>:     mov   
-0xc(%ebp),%ebx
> 0xb74b9946 <X86CompilationCallback_SSE+102>:    mov   
-0x8(%ebp),%esi
> 0xb74b9949 <X86CompilationCallback_SSE+105>:    mov   
-0x4(%ebp),%edi
> 0xb74b994c <X86CompilationCallback_SSE+108>:    mov    %ebp,%esp
> 0xb74b994e <X86CompilationCallback_SSE+110>:    pop    %ebp
> 0xb74b994f <X86CompilationCallback_SSE+111>:    ret
> End of assembler dump.
>
> And I verified that it works in my use case.
> Clearly the same should be done for other asm functions in that same
> file (e.g. the non-sse case).
>
> Corrado
> _______________________________________________
> LLVM Developers mailing list
> LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev

Corrado Zoccolo

2009-Mar-12 20:55 UTC

head link

[LLVMdev] Bug in X86CompilationCallback_SSE

On Thu, Mar 12, 2009 at 5:18 PM, Dan Gohman <gohman at apple.com>
wrote:> This looks like an interesting idea. As written, the inline asms
> aren't safe
> though; they reference %eax, %edx, etc. without declaring such things in
> constraints, so the compiler wouldn't know that it can't clobber
those
> registers.
>
> Dan
>I don't know how to specify the constraint, since it should constrain
gcc to not use the register before the asm block, while constraints
can just say how to pass info to/from the asm block, and describe how
the asm block modifies the environment.

Actually, I don't think it is needed, though, since those asm blocks
are put at the very beginning and the very end of the function (and
declared volatile, i.e. they cannot be moved), so the compiler has no
need for those registers:
* they are not used by standard prologue/epilogue
* the only thing that is done before is the allocation of the stack
space, and since we use it both before and after a call, the compiler
must use one (or more) callee saved registers for it, so it can't use
any of the registers we need to save.

Corrado

Reasonably Related Threads

Search for more apparently analagous threads

llvm dev - Mar 2009 - [LLVMdev] Bug in X86CompilationCallback_SSE

[LLVMdev] Bug in X86CompilationCallback_SSE

[LLVMdev] Bug in X86CompilationCallback_SSE

[LLVMdev] Bug in X86CompilationCallback_SSE

[LLVMdev] Bug in X86CompilationCallback_SSE

[LLVMdev] Bug in X86CompilationCallback_SSE

Reasonably Related Threads