On Wed, Oct 1, 2008 at 1:19 PM, Villmow, Micah <Micah.Villmow at amd.com>
wrote:> LLVM seems to be generating way too complex of branching based on the
> short-circuit optimization. The code in question is as follows:
>
> define void @ test_fc_while_and(float %x, float %y, float addrspace(11)*
> %result) nounwind {
>
> entry:
>
> %tobool3 = fcmp une float %x, 0.000000e+000 ;
<i1>
> [#uses=1]
>
> %tobool24 = fcmp une float %y, 0.000000e+000 ;
<i1>
> [#uses=2]
>
> %or.cond5 = and i1 %tobool3, %tobool24 ; <i1>
[#uses=1]
>
> br i1 %or.cond5, label %bb.nph, label %whileexit
>
>
>
> bb.nph: ; preds = %entry
>
> br i1 %tobool24, label %whilebody.us, label %whilebody
>
>
>
> whilebody.us: ; preds = %whilebody.us, %bb.nph
>
> …code here…
>
> br i1 %phitmp, label %whilebody.us, label %whileexit
>
>
>
> whilebody: ; preds = %bb.nph
>
> …code here…
>
> br label %whileexit
>
>
>
> whileexit: ; preds = %whilebody, %whilebody.us, %entry
>
> %z.0.lcssa = phi float [ 0.000000e+000, %entry ], [ %add,
%whilebody
> ], [ %add.us, %whilebody.us ] ; <float> [#uses=1]
>
> store float %z.0.lcssa, float addrspace(11)* %result
>
> ret void
>
> }
>
> based on original code of:
>
> void test_fc_while_and(float x, float y, float* result)
>
> {
>
> float z = (float)0;
>
> while (x && y) {
>
> z += (x * y);
>
> ++x
>
> }
>
> *result = z;
>
> }
>
>
>
> Now the problem issue is with the bolded code. The two comparisons and the
> and instruction that are mapped to the bolded while statement. What I am
> trying to figure out why the bb.nph branch is even required and how do I
> disable it from being generated? The first branch instruction handles
> correctly the condition that I wanted, so there should be no reason that
> bb.nph is generated. The same for whilebody, as it shouldn't be there.
>
What optimization level are you running at? Here's what I get at -Os:
llvm-gcc -S -emit-llvm -o - a.c -Os
; ModuleID = 'a.c'
target datalayout
"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin9.5"
define void @test_fc_while_and(float %x, float %y, float* %result)
nounwind optsize {
entry:
%.not7 = fcmp une float %x, 0.000000e+00 ; <i1> [#uses=1]
%0 = fcmp une float %y, 0.000000e+00 ; <i1> [#uses=2]
%or.cond8 = and i1 %.not7, %0 ; <i1> [#uses=1]
br i1 %or.cond8, label %bb, label %bb4
bb: ; preds = %bb, %entry
%x_addr.06 = phi float [ %x, %entry ], [ %3, %bb ] ; <float> [#uses=2]
%z.05 = phi float [ 0.000000e+00, %entry ], [ %2, %bb ] ; <float>
[#uses=1]
%1 = mul float %x_addr.06, %y ; <float> [#uses=1]
%2 = add float %z.05, %1 ; <float> [#uses=2]
%3 = add float %x_addr.06, 1.000000e+00 ; <float> [#uses=2]
%phitmp = fcmp une float %3, 0.000000e+00 ; <i1> [#uses=1]
%or.cond = and i1 %phitmp, %0 ; <i1> [#uses=1]
br i1 %or.cond, label %bb, label %bb4
bb4: ; preds = %bb, %entry
%z.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %2, %bb ] ;
<float> [#uses=1]
store float %z.0.lcssa, float* %result, align 4
ret void
}
-Os runs these passes more than -O2: -domfrontier -lcssa -loop-unroll
-bw