Riyaz Puthiyapurayil via llvm-dev
2021-Jan-26 18:28 UTC
[llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd`
It appears that LLVM 10 is miscompiling the following test on x86_64 but the trunk version seems to work fine. Before I spend any time debugging this, it would be helpful if someone can point me to a bug fix if there was one. #include <stdio.h> #include <stdint.h> __attribute__ ((noinline)) uint64_t foo4(unsigned* arr) { unsigned chunk = 34 >> 5; unsigned bitIndex = 34 - chunk * 32; arr = arr + chunk; __int128_t* arr1 = (__int128_t*) arr; __int128_t v1 = *arr1; v1 = v1 >> bitIndex; return (uint64_t) v1; } __attribute__ ((noinline)) uint64_t foo3(unsigned* arr, unsigned index) { unsigned chunk = index >> 5; unsigned bitIndex = index - chunk * 32; arr = arr + chunk; __int128_t* arr1 = (__int128_t*) arr; __int128_t v1 = *arr1; v1 = v1 >> bitIndex; return (uint64_t) v1; } int main() { unsigned arr[5]; arr[0]= 0x6f7cfd6f; arr[1]= 0xd96c9806; arr[2]= 0x89420144; arr[3]= 0x8a20f548; printf("foo3 %lx\n",foo3(arr,34) & 0x3ffffffff); // prints 222508051 instead of 1365b2601 printf("foo4 %lx\n",foo4(arr) & 0x3ffffffff); // prints 1365b2601 as expected } If you look at the assembly, there is an 'and cl, 31' missing in foo3 which is needed to make sure that the shift count is in the range [0...31]. This is missing in the generated code. Note that if the count in CL register is greater than 31, x86_64 manual states that the behavior of shrd is undefined. # clang 10 foo3(unsigned int*, unsigned int): # @foo3(unsigned int*, unsigned int) mov ecx, esi mov edx, esi shr edx, 5 mov rax, qword ptr [rdi + 4*rdx] mov rdx, qword ptr [rdi + 4*rdx + 8] shrd rax, rdx, cl ret # Trunk foo3(unsigned int*, unsigned int): # @foo3(unsigned int*, unsigned int) mov ecx, esi mov edx, esi shr edx, 5 mov rax, qword ptr [rdi + 4*rdx] mov rdx, qword ptr [rdi + 4*rdx + 8] and cl, 31 shrd rax, rdx, cl ret -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20210126/57c41c82/attachment.html>
Roman Lebedev via llvm-dev
2021-Jan-26 18:34 UTC
[llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd`
On Tue, Jan 26, 2021 at 9:28 PM Riyaz Puthiyapurayil via llvm-dev <llvm-dev at lists.llvm.org> wrote:> > It appears that LLVM 10 is miscompiling the following test on x86_64 but the trunk version seems to work fine. Before I spend any time debugging this, it would be helpful if someone can point me to a bug fix if there was one. > > > > #include <stdio.h> > > #include <stdint.h> > > > > __attribute__ ((noinline)) uint64_t foo4(unsigned* arr) > > { > > unsigned chunk = 34 >> 5; > > unsigned bitIndex = 34 - chunk * 32; > > > > arr = arr + chunk; > > __int128_t* arr1 = (__int128_t*) arr;I'm pretty sure this is UB, because the alignment of __int128_t is bigger than that of unsigned.> > __int128_t v1 = *arr1; > > v1 = v1 >> bitIndex; > > > > return (uint64_t) v1; > > } > > > > __attribute__ ((noinline)) uint64_t foo3(unsigned* arr, unsigned index) > > { > > unsigned chunk = index >> 5; > > unsigned bitIndex = index - chunk * 32; > > > > arr = arr + chunk; > > __int128_t* arr1 = (__int128_t*) arr; > > __int128_t v1 = *arr1; > > v1 = v1 >> bitIndex; > > > > return (uint64_t) v1; > > } > > > > > > int main() { > > > > unsigned arr[5]; > > > > arr[0]= 0x6f7cfd6f; > > arr[1]= 0xd96c9806; > > arr[2]= 0x89420144; > > arr[3]= 0x8a20f548; > > > > printf("foo3 %lx\n",foo3(arr,34) & 0x3ffffffff); // prints 222508051 instead of 1365b2601 > > printf("foo4 %lx\n",foo4(arr) & 0x3ffffffff); // prints 1365b2601 as expected > > } > > > > If you look at the assembly, there is an ‘and cl, 31’ missing in foo3 which is needed to make sure that the shift count is in the range [0...31]. This is missing in the generated code. Note that if the count in CL register is greater than 31, x86_64 manual states that the behavior of shrd is undefined. > > > > # clang 10 > > foo3(unsigned int*, unsigned int): # @foo3(unsigned int*, unsigned int) > > mov ecx, esi > > mov edx, esi > > shr edx, 5 > > mov rax, qword ptr [rdi + 4*rdx] > > mov rdx, qword ptr [rdi + 4*rdx + 8] > > shrd rax, rdx, cl > > ret > > > > # Trunk > > foo3(unsigned int*, unsigned int): # @foo3(unsigned int*, unsigned int) > > mov ecx, esi > > mov edx, esi > > shr edx, 5 > > mov rax, qword ptr [rdi + 4*rdx] > > mov rdx, qword ptr [rdi + 4*rdx + 8] > > and cl, 31 > > shrd rax, rdx, cl > > ret >Roman.> _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org > https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev