Stefan Kanthak via llvm-dev
2018-Dec-17 23:40 UTC
[llvm-dev] Where's the optimiser gone (part 9): bit twiddling not recognised
Hi @ll, compile with -O3 -m32 -march=i686 (see <https://godbolt.org/z/fjjUl6>) unsigned long lreverse(unsigned long x) // swap all bits { x = ((x & 0xAAAAAAAAUL) >> 1) | ((x & ~0xAAAAAAAAUL) << 1); x = ((x & 0xCCCCCCCCUL) >> 2) | ((x & ~0xCCCCCCCCUL) << 2); x = ((x & 0xF0F0F0F0UL) >> 4) | ((x & ~0xF0F0F0F0UL) << 4); x = ((x & 0xFF00FF00UL) >> 8) | ((x & ~0xFF00FF00UL) << 8); x = ((x & 0xFFFF0000UL) >> 16) | ((x & ~0xFFFF0000UL) << 16); return x; } lreverse: # @lreverse mov eax, dword ptr [esp + 4] mov ecx, eax shr ecx, 1 and ecx, 1431655765 and eax, 1431655765 lea eax, [ecx + 2*eax] mov ecx, eax shr ecx, 2 and ecx, 858993459 and eax, 858993459 lea eax, [ecx + 4*eax] mov ecx, eax shr ecx, 4 and ecx, 252645135 shl eax, 4 and eax, -252645136 or eax, ecx #ifndef _OPTIMISER_GONE_FISHING // until here, almost perfect; bswap eax // the next 7 instructions but can #else // be replaced by a single BSWAP mov ecx, eax shr ecx, 8 and ecx, 16711935 shl eax, 8 and eax, -16711936 or eax, ecx rol eax, 16 #endif ret unsigned long long llreverse(unsigned long long x) { #if 0 x = ((x & 0xAAAAAAAAAAAAAAAAULL) >> 1) | ((x & ~0xAAAAAAAAAAAAAAAAULL) << 1); x = ((x & 0xCCCCCCCCCCCCCCCCULL) >> 2) | ((x & ~0xCCCCCCCCCCCCCCCCULL) << 2); x = ((x & 0xF0F0F0F0F0F0F0F0ULL) >> 4) | ((x & ~0xF0F0F0F0F0F0F0F0ULL) << 4); x = ((x & 0xFF00FF00FF00FF00ULL) >> 8) | ((x & ~0xFF00FF00FF00FF00ULL) << 8); x = ((x & 0xFFFF0000FFFF0000ULL) >> 16) | ((x & ~0xFFFF0000FFFF0000ULL) << 16); x = ((x & 0xFFFFFFFF00000000ULL) >> 32) | ((x & ~0xFFFFFFFF00000000ULL) << 32); return x; #else x = ((x >> 1) & ~0xAAAAAAAAAAAAAAAAULL) | ((x << 1) & 0xAAAAAAAAAAAAAAAAULL); x = ((x >> 2) & ~0xCCCCCCCCCCCCCCCCULL) | ((x << 2) & 0xCCCCCCCCCCCCCCCCULL); x = ((x >> 4) & ~0xF0F0F0F0F0F0F0F0ULL) | ((x << 4) & 0xF0F0F0F0F0F0F0F0ULL); x = ((x >> 8) & ~0xFF00FF00FF00FF00ULL) | ((x << 8) & 0xFF00FF00FF00FF00ULL); x = ((x >> 16) & ~0xFFFF0000FFFF0000ULL) | ((x << 16) & 0xFFFF0000FFFF0000ULL); return (x >> 32) | (x << 32); #endif } llreverse: # @llreverse push esi mov eax, dword ptr [esp + 8] mov ecx, dword ptr [esp + 12] mov edx, eax shr edx mov esi, ecx shr esi #ifdef OPTIMISER_GONE_FISHING and esi, 1431655765 and edx, 1431655765 and ecx, 1431655765 and eax, 1431655765 #else push ebx mov ebx, 1431655765 and esi, ebx and edx, ebx and ecx, ebx and eax, ebx #endif lea edx, [edx + 2*eax] lea eax, [esi + 2*ecx] mov ecx, eax shr ecx, 2 mov esi, edx shr esi, 2 #ifdef OPTIMISER_GONE_FISHING and esi, 858993459 and ecx, 858993459 and edx, 858993459 and eax, 858993459 #else mov ebx, 858993459 and esi, ebx and ecx, ebx and edx, ebx and eax, ebx #endif lea eax, [ecx + 4*eax] lea edx, [esi + 4*edx] mov ecx, edx shr ecx, 4 mov esi, eax shr esi, 4 #ifdef OPTIMISER_GONE_FISHING and esi, 252645135 and ecx, 252645135 #else mov ebx, 252645135 and esi, ebx and ecx, ebx #endif shl edx, 4 shl eax, 4 #ifdef OPTIMISER_GONE_FISHING and eax, -252645136 #else not ebx and eax, ebx #endif or eax, esi #ifdef OPTIMISER_GONE_FISHING and edx, -252645136 #else and edx, ebx pop ebx #endif or edx, ecx #ifndef OPTIMISER_GONE_FISHING // until here, quite good; bswap eax // the next 14 instructions but can bswap edx // be replaced by a two BSWAPs #else mov ecx, eax shr ecx, 8 mov esi, edx shr esi, 8 and esi, 16711935 and ecx, 16711935 shl eax, 8 shl edx, 8 and edx, -16711936 or edx, esi and eax, -16711936 or eax, ecx rol edx, 16 rol eax, 16 #endif pop esi ret
Craig Topper via llvm-dev
2018-Dec-18 07:41 UTC
[llvm-dev] Where's the optimiser gone (part 9): bit twiddling not recognised
I've filed a bugzilla for this https://bugs.llvm.org/show_bug.cgi?id=40058 ~Craig On Mon, Dec 17, 2018 at 3:48 PM Stefan Kanthak via llvm-dev < llvm-dev at lists.llvm.org> wrote:> Hi @ll, > > compile with -O3 -m32 -march=i686 (see <https://godbolt.org/z/fjjUl6>) > > unsigned long lreverse(unsigned long x) // swap all bits > { > x = ((x & 0xAAAAAAAAUL) >> 1) > | ((x & ~0xAAAAAAAAUL) << 1); > x = ((x & 0xCCCCCCCCUL) >> 2) > | ((x & ~0xCCCCCCCCUL) << 2); > x = ((x & 0xF0F0F0F0UL) >> 4) > | ((x & ~0xF0F0F0F0UL) << 4); > x = ((x & 0xFF00FF00UL) >> 8) > | ((x & ~0xFF00FF00UL) << 8); > x = ((x & 0xFFFF0000UL) >> 16) > | ((x & ~0xFFFF0000UL) << 16); > > return x; > } > > lreverse: # @lreverse > mov eax, dword ptr [esp + 4] > mov ecx, eax > shr ecx, 1 > and ecx, 1431655765 > and eax, 1431655765 > lea eax, [ecx + 2*eax] > mov ecx, eax > shr ecx, 2 > and ecx, 858993459 > and eax, 858993459 > lea eax, [ecx + 4*eax] > mov ecx, eax > shr ecx, 4 > and ecx, 252645135 > shl eax, 4 > and eax, -252645136 > or eax, ecx > #ifndef _OPTIMISER_GONE_FISHING // until here, almost perfect; > bswap eax // the next 7 instructions but can > #else // be replaced by a single BSWAP > mov ecx, eax > shr ecx, 8 > and ecx, 16711935 > shl eax, 8 > and eax, -16711936 > or eax, ecx > rol eax, 16 > #endif > ret > > > unsigned long long llreverse(unsigned long long x) > { > #if 0 > x = ((x & 0xAAAAAAAAAAAAAAAAULL) >> 1) > | ((x & ~0xAAAAAAAAAAAAAAAAULL) << 1); > x = ((x & 0xCCCCCCCCCCCCCCCCULL) >> 2) > | ((x & ~0xCCCCCCCCCCCCCCCCULL) << 2); > x = ((x & 0xF0F0F0F0F0F0F0F0ULL) >> 4) > | ((x & ~0xF0F0F0F0F0F0F0F0ULL) << 4); > x = ((x & 0xFF00FF00FF00FF00ULL) >> 8) > | ((x & ~0xFF00FF00FF00FF00ULL) << 8); > x = ((x & 0xFFFF0000FFFF0000ULL) >> 16) > | ((x & ~0xFFFF0000FFFF0000ULL) << 16); > x = ((x & 0xFFFFFFFF00000000ULL) >> 32) > | ((x & ~0xFFFFFFFF00000000ULL) << 32); > > return x; > #else > x = ((x >> 1) & ~0xAAAAAAAAAAAAAAAAULL) > | ((x << 1) & 0xAAAAAAAAAAAAAAAAULL); > x = ((x >> 2) & ~0xCCCCCCCCCCCCCCCCULL) > | ((x << 2) & 0xCCCCCCCCCCCCCCCCULL); > x = ((x >> 4) & ~0xF0F0F0F0F0F0F0F0ULL) > | ((x << 4) & 0xF0F0F0F0F0F0F0F0ULL); > x = ((x >> 8) & ~0xFF00FF00FF00FF00ULL) > | ((x << 8) & 0xFF00FF00FF00FF00ULL); > x = ((x >> 16) & ~0xFFFF0000FFFF0000ULL) > | ((x << 16) & 0xFFFF0000FFFF0000ULL); > > return (x >> 32) | (x << 32); > #endif > } > > llreverse: # @llreverse > push esi > mov eax, dword ptr [esp + 8] > mov ecx, dword ptr [esp + 12] > mov edx, eax > shr edx > mov esi, ecx > shr esi > > #ifdef OPTIMISER_GONE_FISHING > and esi, 1431655765 > and edx, 1431655765 > and ecx, 1431655765 > and eax, 1431655765 > #else > push ebx > mov ebx, 1431655765 > and esi, ebx > and edx, ebx > and ecx, ebx > and eax, ebx > #endif > lea edx, [edx + 2*eax] > lea eax, [esi + 2*ecx] > mov ecx, eax > shr ecx, 2 > mov esi, edx > shr esi, 2 > #ifdef OPTIMISER_GONE_FISHING > and esi, 858993459 > and ecx, 858993459 > and edx, 858993459 > and eax, 858993459 > > #else > mov ebx, 858993459 > and esi, ebx > and ecx, ebx > and edx, ebx > and eax, ebx > #endif > lea eax, [ecx + 4*eax] > lea edx, [esi + 4*edx] > mov ecx, edx > shr ecx, 4 > mov esi, eax > shr esi, 4 > > #ifdef OPTIMISER_GONE_FISHING > and esi, 252645135 > and ecx, 252645135 > > #else > mov ebx, 252645135 > and esi, ebx > and ecx, ebx > #endif > shl edx, 4 > shl eax, 4 > > #ifdef OPTIMISER_GONE_FISHING > and eax, -252645136 > #else > not ebx > and eax, ebx > #endif > or eax, esi > > #ifdef OPTIMISER_GONE_FISHING > and edx, -252645136 > #else > and edx, ebx > pop ebx > #endif > or edx, ecx > #ifndef OPTIMISER_GONE_FISHING // until here, quite good; > bswap eax // the next 14 instructions but can > bswap edx // be replaced by a two BSWAPs > #else > mov ecx, eax > shr ecx, 8 > mov esi, edx > shr esi, 8 > and esi, 16711935 > and ecx, 16711935 > shl eax, 8 > shl edx, 8 > and edx, -16711936 > or edx, esi > and eax, -16711936 > or eax, ecx > rol edx, 16 > rol eax, 16 > #endif > pop esi > ret > _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20181217/17eccae2/attachment.html>