Riyaz Puthiyapurayil via llvm-dev
2021-Jan-26 19:25 UTC
[llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd`
Do you see any UB in the following modified test? Here also, I see a difference between foo3 and foo4. Again, trunk and gcc match. But not Clang-10 #include <stdio.h> #include <stdint.h> #include <string.h> __attribute__ ((noinline)) uint64_t foo4(__int128_t* arr1) { unsigned chunk = 34 >> 5; unsigned bitIndex = 34 - chunk * 32; unsigned* arr = (unsigned *)arr + chunk; __int128_t v1 = *arr1; v1 = v1 >> bitIndex; return (uint64_t) v1; } __attribute__ ((noinline)) uint64_t foo3(__int128_t* arr1, unsigned index) { unsigned chunk = index >> 5; unsigned bitIndex = index - chunk * 32; unsigned *arr = (unsigned *)arr + chunk; __int128_t v1 = *arr1; v1 = v1 >> bitIndex; return (uint64_t) v1; } int main() { unsigned arr[4]; arr[0]= 0x6f7cfd6f; arr[1]= 0xd96c9806; arr[2]= 0x89420144; arr[3]= 0x8a20f548; __int128_t arr1; memcpy(&arr1, arr, sizeof(arr1)); printf("foo3 %lx\n",foo3(&arr1,34) & 0x3ffffffff); // prints 1365b2601 (clang-10) 19bdf3f5b (gcc, clang-12) printf("foo4 %lx\n",foo4(&arr1) & 0x3ffffffff); // prints 19bdf3f5b } -----Original Message----- From: Riyaz Puthiyapurayil Sent: Tuesday, January 26, 2021 11:10 AM To: 'Roman Lebedev' <lebedev.ri at gmail.com> Subject: RE: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd` You may be right about that the cast from unsigned* to __int128_t* being UB. But I think that has nothing to do with the bug. That code can be fixed to use a __int128_t array and change all the casts from __int128_t* to unsigned* (instead of the other way around) and we would still have the issue. -----Original Message----- From: Roman Lebedev <lebedev.ri at gmail.com<mailto:lebedev.ri at gmail.com>> Sent: Tuesday, January 26, 2021 10:34 AM To: Riyaz Puthiyapurayil <riyaz at synopsys.com<mailto:riyaz at synopsys.com>> Cc: llvm-dev at lists.llvm.org<mailto:llvm-dev at lists.llvm.org> Subject: Re: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd` On Tue, Jan 26, 2021 at 9:28 PM Riyaz Puthiyapurayil via llvm-dev <llvm-dev at lists.llvm.org<mailto:llvm-dev at lists.llvm.org>> wrote:>> It appears that LLVM 10 is miscompiling the following test on x86_64 but the trunk version seems to work fine. Before I spend any time debugging this, it would be helpful if someone can point me to a bug fix if there was one.>>>> #include <stdio.h>>> #include <stdint.h>>>>> __attribute__ ((noinline)) uint64_t foo4(unsigned* arr)>> {>> unsigned chunk = 34 >> 5;>> unsigned bitIndex = 34 - chunk * 32;>>>> arr = arr + chunk;>> __int128_t* arr1 = (__int128_t*) arr;I'm pretty sure this is UB, because the alignment of __int128_t is bigger than that of unsigned.>> __int128_t v1 = *arr1;>> v1 = v1 >> bitIndex;>>>> return (uint64_t) v1;>> }>>>> __attribute__ ((noinline)) uint64_t foo3(unsigned* arr, unsigned> index)>> {>> unsigned chunk = index >> 5;>> unsigned bitIndex = index - chunk * 32;>>>> arr = arr + chunk;>> __int128_t* arr1 = (__int128_t*) arr;>> __int128_t v1 = *arr1;>> v1 = v1 >> bitIndex;>>>> return (uint64_t) v1;>> }>>>>>> int main() {>>>> unsigned arr[5];>>>> arr[0]= 0x6f7cfd6f;>> arr[1]= 0xd96c9806;>> arr[2]= 0x89420144;>> arr[3]= 0x8a20f548;>>>> printf("foo3 %lx\n",foo3(arr,34) & 0x3ffffffff); // prints> 222508051 instead of 1365b2601>> printf("foo4 %lx\n",foo4(arr) & 0x3ffffffff); // prints 1365b2601> as expected>> }>>>> If you look at the assembly, there is an ‘and cl, 31’ missing in foo3 which is needed to make sure that the shift count is in the range [0...31]. This is missing in the generated code. Note that if the count in CL register is greater than 31, x86_64 manual states that the behavior of shrd is undefined.>>>> # clang 10>> foo3(unsigned int*, unsigned int): # @foo3(unsigned int*, unsigned int)>> mov ecx, esi>> mov edx, esi>> shr edx, 5>> mov rax, qword ptr [rdi + 4*rdx]>> mov rdx, qword ptr [rdi + 4*rdx + 8]>> shrd rax, rdx, cl>> ret>>>> # Trunk>> foo3(unsigned int*, unsigned int): # @foo3(unsigned int*, unsigned int)>> mov ecx, esi>> mov edx, esi>> shr edx, 5>> mov rax, qword ptr [rdi + 4*rdx]>> mov rdx, qword ptr [rdi + 4*rdx + 8]>> and cl, 31>> shrd rax, rdx, cl>> ret>Roman.> _______________________________________________> LLVM Developers mailing list> llvm-dev at lists.llvm.org<mailto:llvm-dev at lists.llvm.org>> https://urldefense.com/v3/__https://lists.llvm.org/cgi-bin/mailman/lis<https://urldefense.com/v3/__https:/lists.llvm.org/cgi-bin/mailman/lis>> tinfo/llvm-dev__;!!A4F2R9G_pg!Mq1ZgXCY7kwDMwNwdn4Tcz-KgkoXMlP_rjq7_U9M> rh1gbrtqvDoSi6cMuSLZaXIPgtijPW55JJ_8$-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20210126/6835eefa/attachment-0001.html>
Riyaz Puthiyapurayil via llvm-dev
2021-Jan-26 19:29 UTC
[llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd`
Apologies, outlook messed up the formatting in the previous email. Here is it again. Do you see any UB in the following modified test? Here also, I see a difference between foo3 and foo4. Again, trunk and gcc match. But not Clang-10. #include <stdio.h> #include <stdint.h> #include <string.h> __attribute__ ((noinline)) uint64_t foo4(__int128_t* arr1) { unsigned chunk = 34 >> 5; unsigned bitIndex = 34 - chunk * 32; unsigned* arr = (unsigned *)arr + chunk; __int128_t v1 = *arr1; v1 = v1 >> bitIndex; return (uint64_t) v1; } __attribute__ ((noinline)) uint64_t foo3(__int128_t* arr1, unsigned index) { unsigned chunk = index >> 5; unsigned bitIndex = index - chunk * 32; unsigned *arr = (unsigned *)arr + chunk; __int128_t v1 = *arr1; v1 = v1 >> bitIndex; return (uint64_t) v1; } int main() { unsigned arr[4]; arr[0]= 0x6f7cfd6f; arr[1]= 0xd96c9806; arr[2]= 0x89420144; arr[3]= 0x8a20f548; __int128_t arr1; memcpy(&arr1, arr, sizeof(arr1)); printf("foo3 %lx\n",foo3(&arr1,34) & 0x3ffffffff); printf("foo4 %lx\n",foo4(&arr1) & 0x3ffffffff); } From: Riyaz Puthiyapurayil Sent: Tuesday, January 26, 2021 11:25 AM To: 'Roman Lebedev' <lebedev.ri at gmail.com> Cc: llvm-dev at lists.llvm.org Subject: RE: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd` Do you see any UB in the following modified test? Here also, I see a difference between foo3 and foo4. Again, trunk and gcc match. But not Clang-10 #include <stdio.h> #include <stdint.h> #include <string.h> __attribute__ ((noinline)) uint64_t foo4(__int128_t* arr1) { unsigned chunk = 34 >> 5; unsigned bitIndex = 34 - chunk * 32; unsigned* arr = (unsigned *)arr + chunk; __int128_t v1 = *arr1; v1 = v1 >> bitIndex; return (uint64_t) v1; } __attribute__ ((noinline)) uint64_t foo3(__int128_t* arr1, unsigned index) { unsigned chunk = index >> 5; unsigned bitIndex = index - chunk * 32; unsigned *arr = (unsigned *)arr + chunk; __int128_t v1 = *arr1; v1 = v1 >> bitIndex; return (uint64_t) v1; } int main() { unsigned arr[4]; arr[0]= 0x6f7cfd6f; arr[1]= 0xd96c9806; arr[2]= 0x89420144; arr[3]= 0x8a20f548; __int128_t arr1; memcpy(&arr1, arr, sizeof(arr1)); printf("foo3 %lx\n",foo3(&arr1,34) & 0x3ffffffff); // prints 1365b2601 (clang-10) 19bdf3f5b (gcc, clang-12) printf("foo4 %lx\n",foo4(&arr1) & 0x3ffffffff); // prints 19bdf3f5b } -----Original Message----- From: Riyaz Puthiyapurayil Sent: Tuesday, January 26, 2021 11:10 AM To: 'Roman Lebedev' <lebedev.ri at gmail.com<mailto:lebedev.ri at gmail.com>> Subject: RE: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd` You may be right about that the cast from unsigned* to __int128_t* being UB. But I think that has nothing to do with the bug. That code can be fixed to use a __int128_t array and change all the casts from __int128_t* to unsigned* (instead of the other way around) and we would still have the issue. -----Original Message----- From: Roman Lebedev <lebedev.ri at gmail.com<mailto:lebedev.ri at gmail.com>> Sent: Tuesday, January 26, 2021 10:34 AM To: Riyaz Puthiyapurayil <riyaz at synopsys.com<mailto:riyaz at synopsys.com>> Cc: llvm-dev at lists.llvm.org<mailto:llvm-dev at lists.llvm.org> Subject: Re: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd` On Tue, Jan 26, 2021 at 9:28 PM Riyaz Puthiyapurayil via llvm-dev <llvm-dev at lists.llvm.org<mailto:llvm-dev at lists.llvm.org>> wrote:>> It appears that LLVM 10 is miscompiling the following test on x86_64 but the trunk version seems to work fine. Before I spend any time debugging this, it would be helpful if someone can point me to a bug fix if there was one.>>>> #include <stdio.h>>> #include <stdint.h>>>>> __attribute__ ((noinline)) uint64_t foo4(unsigned* arr)>> {>> unsigned chunk = 34 >> 5;>> unsigned bitIndex = 34 - chunk * 32;>>>> arr = arr + chunk;>> __int128_t* arr1 = (__int128_t*) arr;I'm pretty sure this is UB, because the alignment of __int128_t is bigger than that of unsigned.>> __int128_t v1 = *arr1;>> v1 = v1 >> bitIndex;>>>> return (uint64_t) v1;>> }>>>> __attribute__ ((noinline)) uint64_t foo3(unsigned* arr, unsigned> index)>> {>> unsigned chunk = index >> 5;>> unsigned bitIndex = index - chunk * 32;>>>> arr = arr + chunk;>> __int128_t* arr1 = (__int128_t*) arr;>> __int128_t v1 = *arr1;>> v1 = v1 >> bitIndex;>>>> return (uint64_t) v1;>> }>>>>>> int main() {>>>> unsigned arr[5];>>>> arr[0]= 0x6f7cfd6f;>> arr[1]= 0xd96c9806;>> arr[2]= 0x89420144;>> arr[3]= 0x8a20f548;>>>> printf("foo3 %lx\n",foo3(arr,34) & 0x3ffffffff); // prints> 222508051 instead of 1365b2601>> printf("foo4 %lx\n",foo4(arr) & 0x3ffffffff); // prints 1365b2601> as expected>> }>>>> If you look at the assembly, there is an ‘and cl, 31’ missing in foo3 which is needed to make sure that the shift count is in the range [0...31]. This is missing in the generated code. Note that if the count in CL register is greater than 31, x86_64 manual states that the behavior of shrd is undefined.>>>> # clang 10>> foo3(unsigned int*, unsigned int): # @foo3(unsigned int*, unsigned int)>> mov ecx, esi>> mov edx, esi>> shr edx, 5>> mov rax, qword ptr [rdi + 4*rdx]>> mov rdx, qword ptr [rdi + 4*rdx + 8]>> shrd rax, rdx, cl>> ret>>>> # Trunk>> foo3(unsigned int*, unsigned int): # @foo3(unsigned int*, unsigned int)>> mov ecx, esi>> mov edx, esi>> shr edx, 5>> mov rax, qword ptr [rdi + 4*rdx]>> mov rdx, qword ptr [rdi + 4*rdx + 8]>> and cl, 31>> shrd rax, rdx, cl>> ret>Roman.> _______________________________________________> LLVM Developers mailing list> llvm-dev at lists.llvm.org<mailto:llvm-dev at lists.llvm.org>> https://urldefense.com/v3/__https://lists.llvm.org/cgi-bin/mailman/lis<https://urldefense.com/v3/__https:/lists.llvm.org/cgi-bin/mailman/lis>> tinfo/llvm-dev__;!!A4F2R9G_pg!Mq1ZgXCY7kwDMwNwdn4Tcz-KgkoXMlP_rjq7_U9M> rh1gbrtqvDoSi6cMuSLZaXIPgtijPW55JJ_8$-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20210126/33600961/attachment-0001.html>
Roman Lebedev via llvm-dev
2021-Jan-26 19:31 UTC
[llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd`
https://godbolt.org/z/5bcsj9 example.c:43:37: runtime error: subtraction of unsigned offset from 0x000000000022 overflowed to 0x7fffd01217c8 SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior example.c:43:37 in example.c:19:37: runtime error: applying non-zero offset 140576899311424 to null pointer SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior example.c:19:37 in foo3 19bdf3f5b foo4 19bdf3f5b On Tue, Jan 26, 2021 at 10:29 PM Riyaz Puthiyapurayil <Riyaz.Puthiyapurayil at synopsys.com> wrote:> > Apologies, outlook messed up the formatting in the previous email. Here is it again. > > > > Do you see any UB in the following modified test? Here also, I see a difference between foo3 and foo4. Again, trunk and gcc match. But not Clang-10. > > > > #include <stdio.h> > > #include <stdint.h> > > #include <string.h> > > > > __attribute__ ((noinline)) uint64_t foo4(__int128_t* arr1) > > { > > unsigned chunk = 34 >> 5; > > unsigned bitIndex = 34 - chunk * 32; > > > > unsigned* arr = (unsigned *)arr + chunk; > > __int128_t v1 = *arr1; > > v1 = v1 >> bitIndex; > > > > return (uint64_t) v1; > > } > > > > __attribute__ ((noinline)) uint64_t foo3(__int128_t* arr1, unsigned index) > > { > > unsigned chunk = index >> 5; > > unsigned bitIndex = index - chunk * 32; > > > > unsigned *arr = (unsigned *)arr + chunk; > > __int128_t v1 = *arr1; > > v1 = v1 >> bitIndex; > > > > return (uint64_t) v1; > > } > > > > int main() { > > > > unsigned arr[4]; > > > > arr[0]= 0x6f7cfd6f; > > arr[1]= 0xd96c9806; > > arr[2]= 0x89420144; > > arr[3]= 0x8a20f548; > > > > __int128_t arr1; > > > > memcpy(&arr1, arr, sizeof(arr1)); > > > > printf("foo3 %lx\n",foo3(&arr1,34) & 0x3ffffffff); > > printf("foo4 %lx\n",foo4(&arr1) & 0x3ffffffff); > > } > > > > > > > > From: Riyaz Puthiyapurayil > Sent: Tuesday, January 26, 2021 11:25 AM > To: 'Roman Lebedev' <lebedev.ri at gmail.com> > Cc: llvm-dev at lists.llvm.org > Subject: RE: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd` > > > > Do you see any UB in the following modified test? Here also, I see a difference between foo3 and foo4. Again, trunk and gcc match. But not Clang-10 > > > > #include <stdio.h> > > #include <stdint.h> > > #include <string.h> > > > > __attribute__ ((noinline)) uint64_t foo4(__int128_t* arr1) > > { > > unsigned chunk = 34 >> 5; > > unsigned bitIndex = 34 - chunk * 32; > > > > unsigned* arr = (unsigned *)arr + chunk; > > __int128_t v1 = *arr1; > > v1 = v1 >> bitIndex; > > > > return (uint64_t) v1; > > } > > > > __attribute__ ((noinline)) uint64_t foo3(__int128_t* arr1, unsigned index) > > { > > unsigned chunk = index >> 5; > > unsigned bitIndex = index - chunk * 32; > > > > unsigned *arr = (unsigned *)arr + chunk; > > __int128_t v1 = *arr1; > > v1 = v1 >> bitIndex; > > > > return (uint64_t) v1; > > } > > > > int main() { > > > > unsigned arr[4]; > > > > arr[0]= 0x6f7cfd6f; > > arr[1]= 0xd96c9806; > > arr[2]= 0x89420144; > > arr[3]= 0x8a20f548; > > > > __int128_t arr1; > > > > memcpy(&arr1, arr, sizeof(arr1)); > > > > printf("foo3 %lx\n",foo3(&arr1,34) & 0x3ffffffff); // prints 1365b2601 (clang-10) 19bdf3f5b (gcc, clang-12) > > printf("foo4 %lx\n",foo4(&arr1) & 0x3ffffffff); // prints 19bdf3f5b > > } > > > > -----Original Message----- > From: Riyaz Puthiyapurayil > Sent: Tuesday, January 26, 2021 11:10 AM > To: 'Roman Lebedev' <lebedev.ri at gmail.com> > Subject: RE: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd` > > > > You may be right about that the cast from unsigned* to __int128_t* being UB. But I think that has nothing to do with the bug. That code can be fixed to use a __int128_t array and change all the casts from __int128_t* to unsigned* (instead of the other way around) and we would still have the issue. > > > > -----Original Message----- > > From: Roman Lebedev <lebedev.ri at gmail.com> > > Sent: Tuesday, January 26, 2021 10:34 AM > > To: Riyaz Puthiyapurayil <riyaz at synopsys.com> > > Cc: llvm-dev at lists.llvm.org > > Subject: Re: [llvm-dev] LLVM 10 is miscompiling this code on x86_64 with an invalid shift count for `shrd` > > > > On Tue, Jan 26, 2021 at 9:28 PM Riyaz Puthiyapurayil via llvm-dev <llvm-dev at lists.llvm.org> wrote: > > > > > > It appears that LLVM 10 is miscompiling the following test on x86_64 but the trunk version seems to work fine. Before I spend any time debugging this, it would be helpful if someone can point me to a bug fix if there was one. > > > > > > > > > > > > #include <stdio.h> > > > > > > #include <stdint.h> > > > > > > > > > > > > __attribute__ ((noinline)) uint64_t foo4(unsigned* arr) > > > > > > { > > > > > > unsigned chunk = 34 >> 5; > > > > > > unsigned bitIndex = 34 - chunk * 32; > > > > > > > > > > > > arr = arr + chunk; > > > > > > __int128_t* arr1 = (__int128_t*) arr; > > I'm pretty sure this is UB, because the alignment of __int128_t is bigger than that of unsigned. > > > > > > > > __int128_t v1 = *arr1; > > > > > > v1 = v1 >> bitIndex; > > > > > > > > > > > > return (uint64_t) v1; > > > > > > } > > > > > > > > > > > > __attribute__ ((noinline)) uint64_t foo3(unsigned* arr, unsigned > > > index) > > > > > > { > > > > > > unsigned chunk = index >> 5; > > > > > > unsigned bitIndex = index - chunk * 32; > > > > > > > > > > > > arr = arr + chunk; > > > > > > __int128_t* arr1 = (__int128_t*) arr; > > > > > > __int128_t v1 = *arr1; > > > > > > v1 = v1 >> bitIndex; > > > > > > > > > > > > return (uint64_t) v1; > > > > > > } > > > > > > > > > > > > > > > > > > int main() { > > > > > > > > > > > > unsigned arr[5]; > > > > > > > > > > > > arr[0]= 0x6f7cfd6f; > > > > > > arr[1]= 0xd96c9806; > > > > > > arr[2]= 0x89420144; > > > > > > arr[3]= 0x8a20f548; > > > > > > > > > > > > printf("foo3 %lx\n",foo3(arr,34) & 0x3ffffffff); // prints > > > 222508051 instead of 1365b2601 > > > > > > printf("foo4 %lx\n",foo4(arr) & 0x3ffffffff); // prints 1365b2601 > > > as expected > > > > > > } > > > > > > > > > > > > If you look at the assembly, there is an ‘and cl, 31’ missing in foo3 which is needed to make sure that the shift count is in the range [0...31]. This is missing in the generated code. Note that if the count in CL register is greater than 31, x86_64 manual states that the behavior of shrd is undefined. > > > > > > > > > > > > # clang 10 > > > > > > foo3(unsigned int*, unsigned int): # @foo3(unsigned int*, unsigned int) > > > > > > mov ecx, esi > > > > > > mov edx, esi > > > > > > shr edx, 5 > > > > > > mov rax, qword ptr [rdi + 4*rdx] > > > > > > mov rdx, qword ptr [rdi + 4*rdx + 8] > > > > > > shrd rax, rdx, cl > > > > > > ret > > > > > > > > > > > > # Trunk > > > > > > foo3(unsigned int*, unsigned int): # @foo3(unsigned int*, unsigned int) > > > > > > mov ecx, esi > > > > > > mov edx, esi > > > > > > shr edx, 5 > > > > > > mov rax, qword ptr [rdi + 4*rdx] > > > > > > mov rdx, qword ptr [rdi + 4*rdx + 8] > > > > > > and cl, 31 > > > > > > shrd rax, rdx, cl > > > > > > ret > > > > > > > Roman. > > > > > _______________________________________________ > > > LLVM Developers mailing list > > > llvm-dev at lists.llvm.org > > > https://urldefense.com/v3/__https://lists.llvm.org/cgi-bin/mailman/lis > > > tinfo/llvm-dev__;!!A4F2R9G_pg!Mq1ZgXCY7kwDMwNwdn4Tcz-KgkoXMlP_rjq7_U9M > > > rh1gbrtqvDoSi6cMuSLZaXIPgtijPW55JJ_8$