Stefan Kanthak via llvm-dev
2018-Nov-25  15:52 UTC
[llvm-dev] BUGS n code generated for target i386 compiling __bswapdi3, and for target x86-64 compiling __bswapsi2()
Hi @ll,
targetting i386, LLVM/clang generates wrong code for the following
functions:
unsigned long __bswapsi2 (unsigned long ul)
{
    return (((ul) & 0xff000000ul) >> 3 * 8)
         | (((ul) & 0x00ff0000ul) >>     8)
         | (((ul) & 0x0000ff00ul) <<     8)
         | (((ul) & 0x000000fful) << 3 * 8);
}
unsigned long long __bswapdi2(unsigned long long ull)
{
    return ((ull & 0xff00000000000000ull) >> 7 * 8)
         | ((ull & 0x00ff000000000000ull) >> 5 * 8)
         | ((ull & 0x0000ff0000000000ull) >> 3 * 8)
         | ((ull & 0x000000ff00000000ull) >>     8)
         | ((ull & 0x00000000ff000000ull) <<     8)
         | ((ull & 0x0000000000ff0000ull) << 3 * 8)
         | ((ull & 0x000000000000ff00ull) << 5 * 8)
         | ((ull & 0x00000000000000ffull) << 7 * 8);
}
You can find these sources in "compiler-rt/lib/builtins/bswapsi2.c"
and "compiler-rt/lib/builtins/bswapdi2.c", for example!
Compiled with "-O3 -target i386" this yields the following code
(see <https://godbolt.org/z/F4UIl4>):
__bswapsi2: # @__bswapsi2
    push  ebp
    mov   ebp, esp
    mov   eax, dword ptr [ebp + 8]
    bswap eax
    pop   ebp
    ret
__bswapdi2: # @__bswapdi2
    push  ebp
    mov   ebp, esp
    mov   edx, dword ptr [ebp + 8]
    mov   eax, dword ptr [ebp + 12]
    bswap eax
    bswap edx
    pop   ebp
    ret
__bswapsi2() is correct, but __bswapdi2() NOT: swapping just the
halves of a "long long" is OBVIOUSLY WRONG!
>From the C source, the expected result for the input value
0x0123456789ABCDEF is 0xEFCDAB8967452301; the compiled code but
produces 0x67452301EFCDAB89
And compiled for x86-64 this yields the following code (see
<https://godbolt.org/z/uM9nvN>):
__bswapsi2: # @__bswapsi2
    mov   eax, edi
    shr   eax, 24
    mov   rcx, rdi
    shr   rcx, 8
    and   ecx, 65280
    or    rax, rcx
    mov   rcx, rdi
    shl   rcx, 8
    and   ecx, 16711680
    or    rax, rcx
    and   rdi, 255
    shl   rdi, 24
    or    rax, rdi
    ret
__bswapdi2: # @__bswapdi2
    bswap rdi
    mov   rax, rdi
    ret
Both are correct, but __bswapsi2() should of course use BSWAP too!
Stefan Kanthak
PS: for comparision with another compiler, take a look at
    <https://skanthak.homepage.t-online.de/msvc.html#example5>
Craig Topper via llvm-dev
2018-Nov-25  18:36 UTC
[llvm-dev] BUGS n code generated for target i386 compiling __bswapdi3, and for target x86-64 compiling __bswapsi2()
bswapsi2 on the x86-64 isn't using the bswap instruction because "unsigned long" is 64-bits on x86-64 linux. But its 32-bits on x86-64 msvc. Not sure about the bswapdi2 i386 case. ~Craig On Sun, Nov 25, 2018 at 8:03 AM Stefan Kanthak via llvm-dev < llvm-dev at lists.llvm.org> wrote:> Hi @ll, > > targetting i386, LLVM/clang generates wrong code for the following > functions: > > unsigned long __bswapsi2 (unsigned long ul) > { > return (((ul) & 0xff000000ul) >> 3 * 8) > | (((ul) & 0x00ff0000ul) >> 8) > | (((ul) & 0x0000ff00ul) << 8) > | (((ul) & 0x000000fful) << 3 * 8); > } > > unsigned long long __bswapdi2(unsigned long long ull) > { > return ((ull & 0xff00000000000000ull) >> 7 * 8) > | ((ull & 0x00ff000000000000ull) >> 5 * 8) > | ((ull & 0x0000ff0000000000ull) >> 3 * 8) > | ((ull & 0x000000ff00000000ull) >> 8) > | ((ull & 0x00000000ff000000ull) << 8) > | ((ull & 0x0000000000ff0000ull) << 3 * 8) > | ((ull & 0x000000000000ff00ull) << 5 * 8) > | ((ull & 0x00000000000000ffull) << 7 * 8); > } > > You can find these sources in "compiler-rt/lib/builtins/bswapsi2.c" > and "compiler-rt/lib/builtins/bswapdi2.c", for example! > > > Compiled with "-O3 -target i386" this yields the following code > (see <https://godbolt.org/z/F4UIl4>): > > __bswapsi2: # @__bswapsi2 > push ebp > mov ebp, esp > mov eax, dword ptr [ebp + 8] > bswap eax > pop ebp > ret > > __bswapdi2: # @__bswapdi2 > push ebp > mov ebp, esp > mov edx, dword ptr [ebp + 8] > mov eax, dword ptr [ebp + 12] > bswap eax > bswap edx > pop ebp > ret > > __bswapsi2() is correct, but __bswapdi2() NOT: swapping just the > halves of a "long long" is OBVIOUSLY WRONG! > > From the C source, the expected result for the input value > 0x0123456789ABCDEF is 0xEFCDAB8967452301; the compiled code but > produces 0x67452301EFCDAB89 > > > And compiled for x86-64 this yields the following code (see > <https://godbolt.org/z/uM9nvN>): > > __bswapsi2: # @__bswapsi2 > mov eax, edi > shr eax, 24 > mov rcx, rdi > shr rcx, 8 > and ecx, 65280 > or rax, rcx > mov rcx, rdi > shl rcx, 8 > and ecx, 16711680 > or rax, rcx > and rdi, 255 > shl rdi, 24 > or rax, rdi > ret > > __bswapdi2: # @__bswapdi2 > bswap rdi > mov rax, rdi > ret > > Both are correct, but __bswapsi2() should of course use BSWAP too! > > > Stefan Kanthak > > PS: for comparision with another compiler, take a look at > <https://skanthak.homepage.t-online.de/msvc.html#example5> > _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20181125/d406b958/attachment.html>
Craig Topper via llvm-dev
2018-Nov-25  18:39 UTC
[llvm-dev] BUGS n code generated for target i386 compiling __bswapdi3, and for target x86-64 compiling __bswapsi2()
bswapdi2 for i386 is correct Bits 31:0 of the source are loaded into edx. Bits 63:32 are loaded into eax. Those are each bswapped. The ABI for the return is edx contains bits [63:32] and eax contains [31:0]. This is opposite of how the register were loaded. ~Craig On Sun, Nov 25, 2018 at 10:36 AM Craig Topper <craig.topper at gmail.com> wrote:> bswapsi2 on the x86-64 isn't using the bswap instruction because "unsigned > long" is 64-bits on x86-64 linux. But its 32-bits on x86-64 msvc. > > Not sure about the bswapdi2 i386 case. > > > ~Craig > > > On Sun, Nov 25, 2018 at 8:03 AM Stefan Kanthak via llvm-dev < > llvm-dev at lists.llvm.org> wrote: > >> Hi @ll, >> >> targetting i386, LLVM/clang generates wrong code for the following >> functions: >> >> unsigned long __bswapsi2 (unsigned long ul) >> { >> return (((ul) & 0xff000000ul) >> 3 * 8) >> | (((ul) & 0x00ff0000ul) >> 8) >> | (((ul) & 0x0000ff00ul) << 8) >> | (((ul) & 0x000000fful) << 3 * 8); >> } >> >> unsigned long long __bswapdi2(unsigned long long ull) >> { >> return ((ull & 0xff00000000000000ull) >> 7 * 8) >> | ((ull & 0x00ff000000000000ull) >> 5 * 8) >> | ((ull & 0x0000ff0000000000ull) >> 3 * 8) >> | ((ull & 0x000000ff00000000ull) >> 8) >> | ((ull & 0x00000000ff000000ull) << 8) >> | ((ull & 0x0000000000ff0000ull) << 3 * 8) >> | ((ull & 0x000000000000ff00ull) << 5 * 8) >> | ((ull & 0x00000000000000ffull) << 7 * 8); >> } >> >> You can find these sources in "compiler-rt/lib/builtins/bswapsi2.c" >> and "compiler-rt/lib/builtins/bswapdi2.c", for example! >> >> >> Compiled with "-O3 -target i386" this yields the following code >> (see <https://godbolt.org/z/F4UIl4>): >> >> __bswapsi2: # @__bswapsi2 >> push ebp >> mov ebp, esp >> mov eax, dword ptr [ebp + 8] >> bswap eax >> pop ebp >> ret >> >> __bswapdi2: # @__bswapdi2 >> push ebp >> mov ebp, esp >> mov edx, dword ptr [ebp + 8] >> mov eax, dword ptr [ebp + 12] >> bswap eax >> bswap edx >> pop ebp >> ret >> >> __bswapsi2() is correct, but __bswapdi2() NOT: swapping just the >> halves of a "long long" is OBVIOUSLY WRONG! >> >> From the C source, the expected result for the input value >> 0x0123456789ABCDEF is 0xEFCDAB8967452301; the compiled code but >> produces 0x67452301EFCDAB89 >> >> >> And compiled for x86-64 this yields the following code (see >> <https://godbolt.org/z/uM9nvN>): >> >> __bswapsi2: # @__bswapsi2 >> mov eax, edi >> shr eax, 24 >> mov rcx, rdi >> shr rcx, 8 >> and ecx, 65280 >> or rax, rcx >> mov rcx, rdi >> shl rcx, 8 >> and ecx, 16711680 >> or rax, rcx >> and rdi, 255 >> shl rdi, 24 >> or rax, rdi >> ret >> >> __bswapdi2: # @__bswapdi2 >> bswap rdi >> mov rax, rdi >> ret >> >> Both are correct, but __bswapsi2() should of course use BSWAP too! >> >> >> Stefan Kanthak >> >> PS: for comparision with another compiler, take a look at >> <https://skanthak.homepage.t-online.de/msvc.html#example5> >> _______________________________________________ >> LLVM Developers mailing list >> llvm-dev at lists.llvm.org >> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev >> >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20181125/f8fa6869/attachment.html>
Stefan Kanthak via llvm-dev
2018-Nov-25  19:04 UTC
[llvm-dev] BUGS n code generated for target i386 compiling __bswapdi3, and for target x86-64 compiling __bswapsi2()
"Craig Topper" <craig.topper at gmail.com> wrote:> bswapsi2 on the x86-64 isn't using the bswap instruction because "unsigned > long" is 64-bits on x86-64 linux.Believe me: I KNOW THIS! Now take one more THOROUGH look at the C source of _bswapsi2(): 1. ALL 4 & operations clear the higher 32 bits; 2. the 4 >> operations BSWAP the lower 32 bits. Assuming that the argument was loaded into RDI, this is a BSWAP EDI in Intel x86-64 machine code! Remember that in "long mode", writing to a 32-bit register clears the higher 32 bits of the respective 64-bit register. AGAIN: why doesn't LLVM generate the BSWAP operation here? The expected code is __bswapsi2: # @__bswapsi2 bswap edi mov rax, rdi ret not amused Stefan Kanthak> But its 32-bits on x86-64 msvc. > > Not sure about the bswapdi2 i386 case. > > > ~Craig > > > On Sun, Nov 25, 2018 at 8:03 AM Stefan Kanthak via llvm-dev < > llvm-dev at lists.llvm.org> wrote: > >> Hi @ll, >> >> targetting i386, LLVM/clang generates wrong code for the following >> functions: >> >> unsigned long __bswapsi2 (unsigned long ul) >> { >> return (((ul) & 0xff000000ul) >> 3 * 8) >> | (((ul) & 0x00ff0000ul) >> 8) >> | (((ul) & 0x0000ff00ul) << 8) >> | (((ul) & 0x000000fful) << 3 * 8); >> } >> >> unsigned long long __bswapdi2(unsigned long long ull) >> { >> return ((ull & 0xff00000000000000ull) >> 7 * 8) >> | ((ull & 0x00ff000000000000ull) >> 5 * 8) >> | ((ull & 0x0000ff0000000000ull) >> 3 * 8) >> | ((ull & 0x000000ff00000000ull) >> 8) >> | ((ull & 0x00000000ff000000ull) << 8) >> | ((ull & 0x0000000000ff0000ull) << 3 * 8) >> | ((ull & 0x000000000000ff00ull) << 5 * 8) >> | ((ull & 0x00000000000000ffull) << 7 * 8); >> } >> >> You can find these sources in "compiler-rt/lib/builtins/bswapsi2.c" >> and "compiler-rt/lib/builtins/bswapdi2.c", for example! >> >> >> Compiled with "-O3 -target i386" this yields the following code >> (see <https://godbolt.org/z/F4UIl4>): >> >> __bswapsi2: # @__bswapsi2 >> push ebp >> mov ebp, esp >> mov eax, dword ptr [ebp + 8] >> bswap eax >> pop ebp >> ret >> >> __bswapdi2: # @__bswapdi2 >> push ebp >> mov ebp, esp >> mov edx, dword ptr [ebp + 8] >> mov eax, dword ptr [ebp + 12] >> bswap eax >> bswap edx >> pop ebp >> ret >> >> __bswapsi2() is correct, but __bswapdi2() NOT: swapping just the >> halves of a "long long" is OBVIOUSLY WRONG! >> >> From the C source, the expected result for the input value >> 0x0123456789ABCDEF is 0xEFCDAB8967452301; the compiled code but >> produces 0x67452301EFCDAB89 >> >> >> And compiled for x86-64 this yields the following code (see~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~>> <https://godbolt.org/z/uM9nvN>): >> >> __bswapsi2: # @__bswapsi2 >> mov eax, edi >> shr eax, 24 >> mov rcx, rdi >> shr rcx, 8 >> and ecx, 65280 >> or rax, rcx >> mov rcx, rdi >> shl rcx, 8 >> and ecx, 16711680 >> or rax, rcx >> and rdi, 255 >> shl rdi, 24 >> or rax, rdi >> ret >> >> __bswapdi2: # @__bswapdi2 >> bswap rdi >> mov rax, rdi >> ret >> >> Both are correct, but __bswapsi2() should of course use BSWAP too! >> >> >> Stefan Kanthak >> >> PS: for comparision with another compiler, take a look at >> <https://skanthak.homepage.t-online.de/msvc.html#example5> >> _______________________________________________ >> LLVM Developers mailing list >> llvm-dev at lists.llvm.org >> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev >> >
Possibly Parallel Threads
- BUGS n code generated for target i386 compiling __bswapdi3, and for target x86-64 compiling __bswapsi2()
- BUGS n code generated for target i386 compiling __bswapdi3, and for target x86-64 compiling __bswapsi2()
- "No such file or directory" error setting up dict quota for mdbox
- Compiling for baremetal ARMv4 on Ubuntu Linux
- unsigned long long suffix