Stefan Kanthak via llvm-dev
2018-Dec-17  23:40 UTC
[llvm-dev] Where's the optimiser gone (part 9): bit twiddling not recognised
Hi @ll,
compile with -O3 -m32 -march=i686 (see <https://godbolt.org/z/fjjUl6>)
unsigned long lreverse(unsigned long x)    // swap all bits
{
    x = ((x &  0xAAAAAAAAUL) >> 1)
      | ((x & ~0xAAAAAAAAUL) << 1);
    x = ((x &  0xCCCCCCCCUL) >> 2)
      | ((x & ~0xCCCCCCCCUL) << 2);
    x = ((x &  0xF0F0F0F0UL) >> 4)
      | ((x & ~0xF0F0F0F0UL) << 4);
    x = ((x &  0xFF00FF00UL) >> 8)
      | ((x & ~0xFF00FF00UL) << 8);
    x = ((x &  0xFFFF0000UL) >> 16)
      | ((x & ~0xFFFF0000UL) << 16);
    return x;
}
lreverse: # @lreverse
    mov   eax, dword ptr [esp + 4]
    mov   ecx, eax
    shr   ecx, 1
    and   ecx, 1431655765
    and   eax, 1431655765
    lea   eax, [ecx + 2*eax]
    mov   ecx, eax
    shr   ecx, 2
    and   ecx, 858993459
    and   eax, 858993459
    lea   eax, [ecx + 4*eax]
    mov   ecx, eax
    shr   ecx, 4
    and   ecx, 252645135
    shl   eax, 4
    and   eax, -252645136
    or    eax, ecx
#ifndef _OPTIMISER_GONE_FISHING // until here, almost perfect;
    bswap eax                   //  the next 7 instructions but can
#else                           //   be replaced by a single BSWAP
    mov   ecx, eax
    shr   ecx, 8
    and   ecx, 16711935               
    shl   eax, 8
    and   eax, -16711936
    or    eax, ecx
    rol   eax, 16
#endif
    ret
unsigned long long llreverse(unsigned long long x)
{
#if 0
    x = ((x & 0xAAAAAAAAAAAAAAAAULL) >> 1)
      | ((x & ~0xAAAAAAAAAAAAAAAAULL) << 1);
    x = ((x & 0xCCCCCCCCCCCCCCCCULL) >> 2)
      | ((x & ~0xCCCCCCCCCCCCCCCCULL) << 2);
    x = ((x & 0xF0F0F0F0F0F0F0F0ULL) >> 4)
      | ((x & ~0xF0F0F0F0F0F0F0F0ULL) << 4);
    x = ((x & 0xFF00FF00FF00FF00ULL) >> 8)
      | ((x & ~0xFF00FF00FF00FF00ULL) << 8);
    x = ((x & 0xFFFF0000FFFF0000ULL) >> 16)
      | ((x & ~0xFFFF0000FFFF0000ULL) << 16);
    x = ((x & 0xFFFFFFFF00000000ULL) >> 32)
      | ((x & ~0xFFFFFFFF00000000ULL) << 32);
    return x;
#else
    x = ((x >> 1) & ~0xAAAAAAAAAAAAAAAAULL)
      | ((x << 1) &  0xAAAAAAAAAAAAAAAAULL);
    x = ((x >> 2) & ~0xCCCCCCCCCCCCCCCCULL)
      | ((x << 2) &  0xCCCCCCCCCCCCCCCCULL);
    x = ((x >> 4) & ~0xF0F0F0F0F0F0F0F0ULL)
      | ((x << 4) &  0xF0F0F0F0F0F0F0F0ULL);
    x = ((x >> 8) & ~0xFF00FF00FF00FF00ULL)
      | ((x << 8) &  0xFF00FF00FF00FF00ULL);
    x = ((x >> 16) & ~0xFFFF0000FFFF0000ULL)
      | ((x << 16) &  0xFFFF0000FFFF0000ULL);
    return (x >> 32) | (x << 32);
#endif
}
llreverse: # @llreverse
    push  esi
    mov   eax, dword ptr [esp + 8]
    mov   ecx, dword ptr [esp + 12]
    mov   edx, eax
    shr   edx
    mov   esi, ecx
    shr   esi
#ifdef OPTIMISER_GONE_FISHING
    and   esi, 1431655765
    and   edx, 1431655765
    and   ecx, 1431655765
    and   eax, 1431655765
#else
    push  ebx
    mov   ebx, 1431655765
    and   esi, ebx
    and   edx, ebx
    and   ecx, ebx
    and   eax, ebx
#endif
    lea   edx, [edx + 2*eax]
    lea   eax, [esi + 2*ecx]
    mov   ecx, eax
    shr   ecx, 2
    mov   esi, edx
    shr   esi, 2
#ifdef OPTIMISER_GONE_FISHING
    and   esi, 858993459
    and   ecx, 858993459
    and   edx, 858993459
    and   eax, 858993459
#else
    mov   ebx, 858993459
    and   esi, ebx
    and   ecx, ebx
    and   edx, ebx
    and   eax, ebx
#endif
    lea   eax, [ecx + 4*eax]
    lea   edx, [esi + 4*edx]
    mov   ecx, edx
    shr   ecx, 4
    mov   esi, eax
    shr   esi, 4
#ifdef OPTIMISER_GONE_FISHING
    and   esi, 252645135
    and   ecx, 252645135
#else
    mov   ebx, 252645135
    and   esi, ebx
    and   ecx, ebx
#endif
    shl   edx, 4
    shl   eax, 4
#ifdef OPTIMISER_GONE_FISHING
    and   eax, -252645136
#else
    not   ebx
    and   eax, ebx
#endif
    or    eax, esi
#ifdef OPTIMISER_GONE_FISHING
    and   edx, -252645136
#else
    and   edx, ebx
    pop   ebx
#endif
    or    edx, ecx
#ifndef OPTIMISER_GONE_FISHING // until here, quite good;
    bswap eax                  //  the next 14 instructions but can
    bswap edx                  //   be replaced by a two BSWAPs
#else
    mov   ecx, eax
    shr   ecx, 8
    mov   esi, edx
    shr   esi, 8
    and   esi, 16711935
    and   ecx, 16711935
    shl   eax, 8
    shl   edx, 8
    and   edx, -16711936
    or    edx, esi
    and   eax, -16711936
    or    eax, ecx
    rol   edx, 16
    rol   eax, 16
#endif
    pop   esi
    ret
Craig Topper via llvm-dev
2018-Dec-18  07:41 UTC
[llvm-dev] Where's the optimiser gone (part 9): bit twiddling not recognised
I've filed a bugzilla for this https://bugs.llvm.org/show_bug.cgi?id=40058 ~Craig On Mon, Dec 17, 2018 at 3:48 PM Stefan Kanthak via llvm-dev < llvm-dev at lists.llvm.org> wrote:> Hi @ll, > > compile with -O3 -m32 -march=i686 (see <https://godbolt.org/z/fjjUl6>) > > unsigned long lreverse(unsigned long x) // swap all bits > { > x = ((x & 0xAAAAAAAAUL) >> 1) > | ((x & ~0xAAAAAAAAUL) << 1); > x = ((x & 0xCCCCCCCCUL) >> 2) > | ((x & ~0xCCCCCCCCUL) << 2); > x = ((x & 0xF0F0F0F0UL) >> 4) > | ((x & ~0xF0F0F0F0UL) << 4); > x = ((x & 0xFF00FF00UL) >> 8) > | ((x & ~0xFF00FF00UL) << 8); > x = ((x & 0xFFFF0000UL) >> 16) > | ((x & ~0xFFFF0000UL) << 16); > > return x; > } > > lreverse: # @lreverse > mov eax, dword ptr [esp + 4] > mov ecx, eax > shr ecx, 1 > and ecx, 1431655765 > and eax, 1431655765 > lea eax, [ecx + 2*eax] > mov ecx, eax > shr ecx, 2 > and ecx, 858993459 > and eax, 858993459 > lea eax, [ecx + 4*eax] > mov ecx, eax > shr ecx, 4 > and ecx, 252645135 > shl eax, 4 > and eax, -252645136 > or eax, ecx > #ifndef _OPTIMISER_GONE_FISHING // until here, almost perfect; > bswap eax // the next 7 instructions but can > #else // be replaced by a single BSWAP > mov ecx, eax > shr ecx, 8 > and ecx, 16711935 > shl eax, 8 > and eax, -16711936 > or eax, ecx > rol eax, 16 > #endif > ret > > > unsigned long long llreverse(unsigned long long x) > { > #if 0 > x = ((x & 0xAAAAAAAAAAAAAAAAULL) >> 1) > | ((x & ~0xAAAAAAAAAAAAAAAAULL) << 1); > x = ((x & 0xCCCCCCCCCCCCCCCCULL) >> 2) > | ((x & ~0xCCCCCCCCCCCCCCCCULL) << 2); > x = ((x & 0xF0F0F0F0F0F0F0F0ULL) >> 4) > | ((x & ~0xF0F0F0F0F0F0F0F0ULL) << 4); > x = ((x & 0xFF00FF00FF00FF00ULL) >> 8) > | ((x & ~0xFF00FF00FF00FF00ULL) << 8); > x = ((x & 0xFFFF0000FFFF0000ULL) >> 16) > | ((x & ~0xFFFF0000FFFF0000ULL) << 16); > x = ((x & 0xFFFFFFFF00000000ULL) >> 32) > | ((x & ~0xFFFFFFFF00000000ULL) << 32); > > return x; > #else > x = ((x >> 1) & ~0xAAAAAAAAAAAAAAAAULL) > | ((x << 1) & 0xAAAAAAAAAAAAAAAAULL); > x = ((x >> 2) & ~0xCCCCCCCCCCCCCCCCULL) > | ((x << 2) & 0xCCCCCCCCCCCCCCCCULL); > x = ((x >> 4) & ~0xF0F0F0F0F0F0F0F0ULL) > | ((x << 4) & 0xF0F0F0F0F0F0F0F0ULL); > x = ((x >> 8) & ~0xFF00FF00FF00FF00ULL) > | ((x << 8) & 0xFF00FF00FF00FF00ULL); > x = ((x >> 16) & ~0xFFFF0000FFFF0000ULL) > | ((x << 16) & 0xFFFF0000FFFF0000ULL); > > return (x >> 32) | (x << 32); > #endif > } > > llreverse: # @llreverse > push esi > mov eax, dword ptr [esp + 8] > mov ecx, dword ptr [esp + 12] > mov edx, eax > shr edx > mov esi, ecx > shr esi > > #ifdef OPTIMISER_GONE_FISHING > and esi, 1431655765 > and edx, 1431655765 > and ecx, 1431655765 > and eax, 1431655765 > #else > push ebx > mov ebx, 1431655765 > and esi, ebx > and edx, ebx > and ecx, ebx > and eax, ebx > #endif > lea edx, [edx + 2*eax] > lea eax, [esi + 2*ecx] > mov ecx, eax > shr ecx, 2 > mov esi, edx > shr esi, 2 > #ifdef OPTIMISER_GONE_FISHING > and esi, 858993459 > and ecx, 858993459 > and edx, 858993459 > and eax, 858993459 > > #else > mov ebx, 858993459 > and esi, ebx > and ecx, ebx > and edx, ebx > and eax, ebx > #endif > lea eax, [ecx + 4*eax] > lea edx, [esi + 4*edx] > mov ecx, edx > shr ecx, 4 > mov esi, eax > shr esi, 4 > > #ifdef OPTIMISER_GONE_FISHING > and esi, 252645135 > and ecx, 252645135 > > #else > mov ebx, 252645135 > and esi, ebx > and ecx, ebx > #endif > shl edx, 4 > shl eax, 4 > > #ifdef OPTIMISER_GONE_FISHING > and eax, -252645136 > #else > not ebx > and eax, ebx > #endif > or eax, esi > > #ifdef OPTIMISER_GONE_FISHING > and edx, -252645136 > #else > and edx, ebx > pop ebx > #endif > or edx, ecx > #ifndef OPTIMISER_GONE_FISHING // until here, quite good; > bswap eax // the next 14 instructions but can > bswap edx // be replaced by a two BSWAPs > #else > mov ecx, eax > shr ecx, 8 > mov esi, edx > shr esi, 8 > and esi, 16711935 > and ecx, 16711935 > shl eax, 8 > shl edx, 8 > and edx, -16711936 > or edx, esi > and eax, -16711936 > or eax, ecx > rol edx, 16 > rol eax, 16 > #endif > pop esi > ret > _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20181217/17eccae2/attachment.html>