Stefan Kanthak via llvm-dev
2018-Nov-28 11:13 UTC
[llvm-dev] Where's the optimiser gone? (part 3)
Hi @ll, when called with "-O3 -target i386", the compiler fails to generate properly optimised code (as shown on the right side) for two of the following (rather trivial) functions: (see <https://godbolt.org/z/tiwJqX>) Especially notice the difference between the signed and unsigned variants: the latter are properly optimised! regards Stefan Kanthak --- sample3.c --- __inline long long Int32x32To64(long x, long y) { return (long long) x * y; } long Int32x32To64Div32(long a, long b, long c) { return Int32x32To64(a, b) / c; } Int32x32To64Div32: # @Int32x32To64Div32 push ebp | push ebp mov ebp, esp | mov ebp, esp push esi mov eax, dword ptr [ebp + 12] mov ecx, dword ptr [ebp + 16] | mov eax, dword ptr [ebp + 16] imul dword ptr [ebp + 8] | cdq mov esi, ecx | push edx sar esi, 31 | push eax push esi | mov eax, dword ptr [ebp + 12] push ecx | imul dword ptr [ebp + 8] push edx | push edx push eax | push eax call __divdi3 | call __divdi3 add esp, 16 | add esp, 16 pop esi pop ebp | pop ebp ret | ret long Int32x32To64Rem32(long a, long b, long c) { return Int32x32To64(a, b) % c; } Int32x32To64Rem32: # @Int32x32To64Rem32 push ebp | push ebp mov ebp, esp | mov ebp, esp push esi mov eax, dword ptr [ebp + 12] mov ecx, dword ptr [ebp + 16] | mov eax, dword ptr [ebp + 16] imul dword ptr [ebp + 8] | cdq mov esi, ecx | push edx sar esi, 31 | push eax push esi | mov eax, dword ptr [ebp + 12] push ecx | imul dword ptr [ebp + 8] push edx | push edx push eax | push eax call __moddi3 | call __modddi3 add esp, 16 | add esp, 16 pop esi pop ebp | pop ebp ret | ret __inline unsigned long long __fastcall UInt32x32To64(unsigned long x, unsigned long y) { return (unsigned long long) x * y; } unsigned long UInt32x32To64Div32(unsigned long a, unsigned long b, unsigned long c) { return UInt32x32To64(a, b) / c; } UInt32x32To64Div32: # @UInt32x32To64Div32 push ebp mov ebp, esp mov eax, dword ptr [ebp + 12] mul dword ptr [ebp + 8] push 0 push dword ptr [ebp + 16] push edx push eax call __udivdi3 add esp, 16 pop ebp ret unsigned long UInt32x32To64Rem32(unsigned long a, unsigned long b, unsigned long c) { return UInt32x32To64(a, b) % c; } UInt32x32To64Rem32: # @UInt32x32To64Rem32 push ebp mov ebp, esp mov eax, dword ptr [ebp + 12] mul dword ptr [ebp + 8] push 0 push dword ptr [ebp + 16] push edx push eax call __umoddi3 add esp, 16 pop ebp ret