Stefan Kanthak via llvm-dev
2018-Nov-28 11:14 UTC
[llvm-dev] Where's the optimiser gone? (part 2)
Hi @ll, when called with "-O3 -target i386-win32", the compiler fails to generate properly optimised code (as shown on the right side) for two of the following (rather trivial) functions: (see <https://godbolt.org/z/jQxIIi>) Especially notice the difference between the signed and unsigned variants: the latter are properly optimised! regards Stefan Kanthak --- sample2.c --- __inline __int64 __fastcall Int32x32To64(long x, long y) { return (__int64) x * y; } long Int32x32To64Div32(long a, long b, long c) { return Int32x32To64(a, b) / c; } _Int32x32To64Div32: # @Int32x32To64Div32 push esi mov eax, dword ptr [esp + 12] mov ecx, dword ptr [esp + 16] | mov eax, dword ptr [esp + 12] imul dword ptr [esp + 8] | cdq mov esi, ecx | push edx sar esi, 31 | push eax push esi | mov eax, dword ptr [esp + 8] push ecx | imul dword ptr [esp + 4] push edx | push edx push eax | push eax call __alldiv | jmp __alldiv pop esi ret long Int32x32To64Rem32(long a, long b, long c) { return Int32x32To64(a, b) % c; } _Int32x32To64Rem32: # @Int32x32To64Rem32 push esi mov eax, dword ptr [esp + 12] mov ecx, dword ptr [esp + 16] | mov eax, dword ptr [esp + 12] imul dword ptr [esp + 8] | cdq mov esi, ecx | push edx sar esi, 31 | push eax push esi | mov eax, dword ptr [esp + 8] push ecx | imul dword ptr [esp + 4] push edx | push edx push eax | push eax call __allrem | jmp __alldiv pop esi ret __inline unsigned __int64 __fastcall UInt32x32To64(unsigned long x, unsigned long y) { return (unsigned __int64) x * y; } unsigned long UInt32x32To64Div32(unsigned long a, unsigned long b, unsigned long c) { return UInt32x32To64(a, b) / c; } _UInt32x32To64Div32: # @UInt32x32To64Div32 mov eax, dword ptr [esp + 8] mul dword ptr [esp + 4] push 0 push dword ptr [esp + 16] push edx push eax call __aulldiv ret unsigned long UInt32x32To64Rem32(unsigned long a, unsigned long b, unsigned long c) { return UInt32x32To64(a, b) % c; } _UInt32x32To64Rem32: # @UInt32x32To64Rem32 mov eax, dword ptr [esp + 8] mul dword ptr [esp + 4] push 0 push dword ptr [esp + 16] push edx push eax call __aullrem ret