Juliusz Sompolski
2010-Nov-23 07:23 UTC
[LLVMdev] Unrolling loops into constant-time expressions
Hello, I've come across another example: I'm compiling with clang -S -emit-llvm -std=gnu99 -O3 clang version 2.9 (trunk 118238) Target: x86_64-unknown-linux-gnu Thread model: posix I take the code: int loops(int x) { int ret = 0; for(int i = 0; i < x; i++) { for(int j = 0; j < x; j++) { ret += 1; } } return ret; } and the generated code is: define i32 @loops(i32 %x) nounwind readnone { ; <label>:0 %1 = icmp sgt i32 %x, 0 br i1 %1, label %2, label %._crit_edge6 ; <label>:2 ; preds = %0, %2 %i.03.us = phi i32 [ %3, %2 ], [ 0, %0 ] %ret.14.us = phi i32 [ %tmp, %2 ], [ 0, %0 ] %tmp = add i32 %ret.14.us, %x %3 = add nsw i32 %i.03.us, 1 %exitcond = icmp eq i32 %3, %x br i1 %exitcond, label %._crit_edge6, label %2 ._crit_edge6: ; preds = %2, %0 %ret.1.lcssa = phi i32 [ 0, %0 ], [ %tmp, %2 ] ret i32 %ret.1.lcssa } This code: * unrolls the inner loop simplifying the code to sth like: for(int i = 0; i < x; i++) { ret += x; * But it is unable to also unroll the outer loop, even though it is a simple expression. On the other hand it is able to unroll a complicated expression inside one loop into a constant-time expression, like here: int loop(int x) { int ret = 0; for(int i = 0; i < x; i++) { ret += 1+i*i + i*(i+2); } return ret; } generates: define i32 @loop(i32 %x) nounwind readnone { %1 = icmp sgt i32 %x, 0 br i1 %1, label %bb.nph, label %3 bb.nph: ; preds = %0 %tmp4 = add i32 %x, -1 %tmp6 = add i32 %x, -2 %tmp16 = add i32 %x, -3 %tmp7 = zext i32 %tmp6 to i33 %tmp5 = zext i32 %tmp4 to i33 %tmp17 = zext i32 %tmp16 to i33 %tmp15 = mul i33 %tmp5, %tmp7 %tmp18 = mul i33 %tmp15, %tmp17 %tmp8 = mul i32 %tmp4, %tmp6 %tmp19 = lshr i33 %tmp18, 1 %2 = shl i32 %tmp8, 2 %tmp20 = trunc i33 %tmp19 to i32 %tmp12 = mul i32 %x, 5 %tmp1125 = and i32 %2, -8 %tmp21 = mul i32 %tmp20, 1431655764 %tmp13 = add i32 %tmp1125, %tmp12 %tmp14 = add i32 %tmp13, -4 %tmp22 = sub i32 %tmp14, %tmp21 ret i32 %tmp22 ; <label>:3 ; preds = %0 ret i32 0 } which has no loop, which means that clang -O3 is capable of: * unrolling expressions like for(int i = 0; i < n; i++) ret += i (or i*i, or i*i*i... i checked up to i6 AFAIR) into a constant-time formula * unrolling such polynomial-like expressions into a constant time formula, and discarding the loop at all. Even though it can perform such complicated tricks on one loop, it never unrolls two nested loop into a constant even in a simple case. Is there a way to make it do so? Cheers, Juliusz Sompolski
Duncan Sands
2010-Nov-23 08:51 UTC
[LLVMdev] Unrolling loops into constant-time expressions
Hi Juliusz,> I've come across another example:this looks like a different issue to the previous example. If you run the output of clang a second time through the optimizers it gets simplified. Ciao, Duncan.> > I'm compiling with > clang -S -emit-llvm -std=gnu99 -O3 > clang version 2.9 (trunk 118238) > Target: x86_64-unknown-linux-gnu > Thread model: posix > > I take the code: > int loops(int x) { > int ret = 0; > for(int i = 0; i< x; i++) { > for(int j = 0; j< x; j++) { > ret += 1; > } > } > return ret; > } > > and the generated code is: > define i32 @loops(i32 %x) nounwind readnone { > ;<label>:0 > %1 = icmp sgt i32 %x, 0 > br i1 %1, label %2, label %._crit_edge6 > > ;<label>:2 ; preds = %0, %2 > %i.03.us = phi i32 [ %3, %2 ], [ 0, %0 ] > %ret.14.us = phi i32 [ %tmp, %2 ], [ 0, %0 ] > %tmp = add i32 %ret.14.us, %x > %3 = add nsw i32 %i.03.us, 1 > %exitcond = icmp eq i32 %3, %x > br i1 %exitcond, label %._crit_edge6, label %2 > > ._crit_edge6: ; preds = %2, %0 > %ret.1.lcssa = phi i32 [ 0, %0 ], [ %tmp, %2 ] > ret i32 %ret.1.lcssa > } > > This code: > * unrolls the inner loop simplifying the code to sth like: > for(int i = 0; i< x; i++) { > ret += x; > * But it is unable to also unroll the outer loop, even though it is a > simple expression. > > On the other hand it is able to unroll a complicated expression inside > one loop into a constant-time expression, like here: > int loop(int x) { > int ret = 0; > for(int i = 0; i< x; i++) { > ret += 1+i*i + i*(i+2); > } > return ret; > } > generates: > define i32 @loop(i32 %x) nounwind readnone { > %1 = icmp sgt i32 %x, 0 > br i1 %1, label %bb.nph, label %3 > > bb.nph: ; preds = %0 > %tmp4 = add i32 %x, -1 > %tmp6 = add i32 %x, -2 > %tmp16 = add i32 %x, -3 > %tmp7 = zext i32 %tmp6 to i33 > %tmp5 = zext i32 %tmp4 to i33 > %tmp17 = zext i32 %tmp16 to i33 > %tmp15 = mul i33 %tmp5, %tmp7 > %tmp18 = mul i33 %tmp15, %tmp17 > %tmp8 = mul i32 %tmp4, %tmp6 > %tmp19 = lshr i33 %tmp18, 1 > %2 = shl i32 %tmp8, 2 > %tmp20 = trunc i33 %tmp19 to i32 > %tmp12 = mul i32 %x, 5 > %tmp1125 = and i32 %2, -8 > %tmp21 = mul i32 %tmp20, 1431655764 > %tmp13 = add i32 %tmp1125, %tmp12 > %tmp14 = add i32 %tmp13, -4 > %tmp22 = sub i32 %tmp14, %tmp21 > ret i32 %tmp22 > > ;<label>:3 ; preds = %0 > ret i32 0 > } > which has no loop, which means that clang -O3 is capable of: > * unrolling expressions like > for(int i = 0; i< n; i++) > ret += i (or i*i, or i*i*i... i checked up to i6 AFAIR) > into a constant-time formula > * unrolling such polynomial-like expressions into a constant time > formula, and discarding the loop at all. > > Even though it can perform such complicated tricks on one loop, it never > unrolls two nested loop into a constant even in a simple case. > > Is there a way to make it do so? > > Cheers, > Juliusz Sompolski > _______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev