Taral
2011-Aug-02 00:36 UTC
[LLVMdev] clang: Manual unfolding doesn't match automatic unfolding
Here's the code and compilation steps: #include <stdint.h> typedef unsigned int uint128_t __attribute__((mode(TI))); typedef struct{ uint64_t l[5]; } s; void f(s * restrict r, const s * restrict x, const s * restrict y) { uint128_t t[5] = {0, 0, 0, 0, 0}; #define BODY(i,j) { int i_ = i < j ? i : j; int j_ = i < j ? j : i; uint128_t m = (uint128_t) x->l[i_] * (y->l[j_] * (i + j > 4 ? 19 : 1)); if (i + j > 4) { t[i + j - 5] += m; } else { t[i + j] += m; } } #define LOOP(i) BODY(i, 0); BODY(i, 1); BODY(i, 2); BODY(i, 3); BODY(i, 4); LOOP(0); LOOP(1); LOOP(2); LOOP(3); LOOP(4); const uint64_t mask = (1LL << 51) - 1; for (int i = 0; i < 5; i++) { r->l[i] = ((uint64_t) t[i] & mask) + (i == 0 ? 19 : 1) * (uint64_t)(t[(i + 4) % 5] >> 51); } } % clang -O4 -S -o f.l f.c If you change the loop to the unrolled version: #define FOLD1(i) r->l[i] = ((uint64_t) t[i] & mask) + (i == 0 ? 19 : 1) * (uint64_t)(t[(i + 4) % 5] >> 51) FOLD1(0); FOLD1(1); FOLD1(2); FOLD1(3); FOLD1(4); you get different code, which is very sad-making. Any ideas? -- Taral <taralx at gmail.com> "Please let me know if there's any further trouble I can give you." -- Unknown