The following C code : #include <stdio.h> #include <stdlib.h> int TESTE2( int parami , int paraml ,double paramd ) { int varx=0,vary; int nI =0; //varx= parami; if( parami > 0 ) { varx = parami; vary = varx + 1; } else { varx = vary + 1; vary = paraml; } varx = varx + parami + paraml; for( nI = 1 ; nI <= paraml; nI++) { varx = varx + parami + 1 ; vary = varx + nI; } vary = varx + 5; varx = vary + paraml; return varx ; } Generates the IR : ; ModuleID = '/tmp/webcompile/_9908_0.bc' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" target triple = "i386-pc-linux-gnu" define i32 @TESTE2(i32 %parami, i32 %paraml, double %paramd) nounwind readnone { entry: %0 = shl i32 %parami, 1 ; <i32> [#uses=1] %varx.110 = add i32 %0, %paraml ; <i32> [#uses=2] %1 = icmp slt i32 %paraml, 1 ; <i1> [#uses=1] br i1 %1, label %bb5, label %bb3 bb3: ; preds = %bb3, %entry %indvar = phi i32 [ %indvar.next, %bb3 ], [ 0, %entry ] ; <i32> [#uses=3] %2 = add i32 %indvar, 2 ; <i32> [#uses=1] %3 = icmp sgt i32 %2, %paraml ; <i1> [#uses=1] %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1] br i1 %3, label %bb5.loopexit, label %bb3 bb5.loopexit: ; preds = %bb3 %tmp14 = add i32 %parami, 1 ; <i32> [#uses=1] %tmp15 = mul i32 %indvar, %tmp14 ; <i32> [#uses=1] %varx.111 = add i32 %tmp15, %varx.110 ; <i32> [#uses=1] %4 = add i32 %varx.111, %parami ; <i32> [#uses=1] %phitmp = add i32 %4, 1 ; <i32> [#uses=1] br label %bb5 bb5: ; preds = %bb5.loopexit, %entry %varx.1.lcssa = phi i32 [ %varx.110, %entry ], [ %phitmp, %bb5.loopexit ] ; <i32> [#uses=1] %5 = add i32 %paraml, 5 ; <i32> [#uses=1] %6 = add i32 %5, %varx.1.lcssa ; <i32> [#uses=1] ret i32 %6 } While the MSVC generates the assemble : PUBLIC _TESTE ; Function compile flags: /Ogtpy ; File c:\msys\1.0\home\mteixeira\testeadvpl.c ; COMDAT _TESTE _TEXT SEGMENT _parami$ = 8 ; size = 4 _paraml$ = 12 ; size = 4 _paramd$ = 16 ; size = 8 _TESTE PROC ; COMDAT ; 6 : int varx=0,vary; ; 7 : int nI =0; ; 8 : //varx= parami; ; 9 : if( parami > 0 ) mov ecx, DWORD PTR _parami$[esp-4] ; 10 : { ; 11 : varx = parami; ; 12 : vary = 0; ; 13 : } ; 14 : else ; 15 : { ; 16 : varx = 0; ; 17 : vary = paraml; ; 18 : } ; 19 : for( nI = 1 ; nI <= paraml; nI++) mov edx, DWORD PTR _paraml$[esp-4] xor eax, eax test ecx, ecx setle al sub eax, 1 and eax, ecx cmp edx, 1 jl SHORT $LN3 at TESTE add ecx, 1 imul ecx, edx add eax, ecx $LN3 at TESTE: ; 20 : { ; 21 : varx = varx + parami + 1 ; ; 22 : vary = varx + nI; ; 23 : } ; 24 : ; 25 : return varx ; ; 26 : } ret 0 _TESTE ENDP _TEXT ENDS Running the same code, the objetc generated with MSVC is 600 times faster than that generate with the LLVM compiler Is threre any way to get the same optimzation with the LLVM? Manoel Teixeira
Hi Manoel, More info is needed. What optimization level did you use when compiling LLVM? What does the LLVM x86 assembly code look like? How is this run? What are the times you're seeing? What is your system's configuration? -bw On Tue, Jan 6, 2009 at 3:02 AM, Manoel Teixeira <manoel at fonetica.com.br> wrote:> > The following C code : > #include <stdio.h> > #include <stdlib.h> > > int TESTE2( int parami , int paraml ,double paramd ) > { > int varx=0,vary; > int nI =0; > //varx= parami; > if( parami > 0 ) > { > varx = parami; > vary = varx + 1; > } > else > { > varx = vary + 1; > vary = paraml; > } > varx = varx + parami + paraml; > for( nI = 1 ; nI <= paraml; nI++) > { > varx = varx + parami + 1 ; > vary = varx + nI; > } > vary = varx + 5; > varx = vary + paraml; > > return varx ; > } > > Generates the IR : > ; ModuleID = '/tmp/webcompile/_9908_0.bc' > target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" > target triple = "i386-pc-linux-gnu" > > define i32 @TESTE2(i32 %parami, i32 %paraml, double %paramd) nounwind readnone { > entry: > %0 = shl i32 %parami, 1 ; <i32> [#uses=1] > %varx.110 = add i32 %0, %paraml ; <i32> [#uses=2] > %1 = icmp slt i32 %paraml, 1 ; <i1> [#uses=1] > br i1 %1, label %bb5, label %bb3 > > bb3: ; preds = %bb3, %entry > %indvar = phi i32 [ %indvar.next, %bb3 ], [ 0, %entry ] ; <i32> [#uses=3] > %2 = add i32 %indvar, 2 ; <i32> [#uses=1] > %3 = icmp sgt i32 %2, %paraml ; <i1> [#uses=1] > %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1] > br i1 %3, label %bb5.loopexit, label %bb3 > > bb5.loopexit: ; preds = %bb3 > %tmp14 = add i32 %parami, 1 ; <i32> [#uses=1] > %tmp15 = mul i32 %indvar, %tmp14 ; <i32> [#uses=1] > %varx.111 = add i32 %tmp15, %varx.110 ; <i32> [#uses=1] > %4 = add i32 %varx.111, %parami ; <i32> [#uses=1] > %phitmp = add i32 %4, 1 ; <i32> [#uses=1] > br label %bb5 > > bb5: ; preds = %bb5.loopexit, %entry > %varx.1.lcssa = phi i32 [ %varx.110, %entry ], [ %phitmp, %bb5.loopexit ] ; <i32> [#uses=1] > %5 = add i32 %paraml, 5 ; <i32> [#uses=1] > %6 = add i32 %5, %varx.1.lcssa ; <i32> [#uses=1] > ret i32 %6 > } > > > While the MSVC generates the assemble : > PUBLIC _TESTE > ; Function compile flags: /Ogtpy > ; File c:\msys\1.0\home\mteixeira\testeadvpl.c > ; COMDAT _TESTE > _TEXT SEGMENT > _parami$ = 8 ; size = 4 > _paraml$ = 12 ; size = 4 > _paramd$ = 16 ; size = 8 > _TESTE PROC ; COMDAT > > ; 6 : int varx=0,vary; > ; 7 : int nI =0; > ; 8 : //varx= parami; > ; 9 : if( parami > 0 ) > > mov ecx, DWORD PTR _parami$[esp-4] > > ; 10 : { > ; 11 : varx = parami; > ; 12 : vary = 0; > ; 13 : } > ; 14 : else > ; 15 : { > ; 16 : varx = 0; > ; 17 : vary = paraml; > ; 18 : } > ; 19 : for( nI = 1 ; nI <= paraml; nI++) > > mov edx, DWORD PTR _paraml$[esp-4] > xor eax, eax > test ecx, ecx > setle al > sub eax, 1 > and eax, ecx > cmp edx, 1 > jl SHORT $LN3 at TESTE > add ecx, 1 > imul ecx, edx > add eax, ecx > $LN3 at TESTE: > > ; 20 : { > ; 21 : varx = varx + parami + 1 ; > ; 22 : vary = varx + nI; > ; 23 : } > ; 24 : > ; 25 : return varx ; > ; 26 : } > > ret 0 > _TESTE ENDP > _TEXT ENDS > > Running the same code, the objetc generated with MSVC is 600 times faster than that generate with the LLVM compiler > Is threre any way to get the same optimzation with the LLVM? > > > Manoel Teixeira > > > > _______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev >
The code here is invalid, it executes an operation with undefined behavior in the else branch of the first if (read from uninitialized vary). John On Tue, 6 Jan 2009, Manoel Teixeira wrote:> > The following C code : > #include <stdio.h> > #include <stdlib.h> > > int TESTE2( int parami , int paraml ,double paramd ) > { > int varx=0,vary; > int nI =0; > //varx= parami; > if( parami > 0 ) > { > varx = parami; > vary = varx + 1; > } > else > { > varx = vary + 1; > vary = paraml; > } > varx = varx + parami + paraml; > for( nI = 1 ; nI <= paraml; nI++) > { > varx = varx + parami + 1 ; > vary = varx + nI; > } > vary = varx + 5; > varx = vary + paraml; > > return varx ; > } > > Generates the IR : > ; ModuleID = '/tmp/webcompile/_9908_0.bc' > target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" > target triple = "i386-pc-linux-gnu" > > define i32 @TESTE2(i32 %parami, i32 %paraml, double %paramd) nounwind readnone { > entry: > %0 = shl i32 %parami, 1 ; <i32> [#uses=1] > %varx.110 = add i32 %0, %paraml ; <i32> [#uses=2] > %1 = icmp slt i32 %paraml, 1 ; <i1> [#uses=1] > br i1 %1, label %bb5, label %bb3 > > bb3: ; preds = %bb3, %entry > %indvar = phi i32 [ %indvar.next, %bb3 ], [ 0, %entry ] ; <i32> [#uses=3] > %2 = add i32 %indvar, 2 ; <i32> [#uses=1] > %3 = icmp sgt i32 %2, %paraml ; <i1> [#uses=1] > %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1] > br i1 %3, label %bb5.loopexit, label %bb3 > > bb5.loopexit: ; preds = %bb3 > %tmp14 = add i32 %parami, 1 ; <i32> [#uses=1] > %tmp15 = mul i32 %indvar, %tmp14 ; <i32> [#uses=1] > %varx.111 = add i32 %tmp15, %varx.110 ; <i32> [#uses=1] > %4 = add i32 %varx.111, %parami ; <i32> [#uses=1] > %phitmp = add i32 %4, 1 ; <i32> [#uses=1] > br label %bb5 > > bb5: ; preds = %bb5.loopexit, %entry > %varx.1.lcssa = phi i32 [ %varx.110, %entry ], [ %phitmp, %bb5.loopexit ] ; <i32> [#uses=1] > %5 = add i32 %paraml, 5 ; <i32> [#uses=1] > %6 = add i32 %5, %varx.1.lcssa ; <i32> [#uses=1] > ret i32 %6 > } > > > While the MSVC generates the assemble : > PUBLIC _TESTE > ; Function compile flags: /Ogtpy > ; File c:\msys\1.0\home\mteixeira\testeadvpl.c > ; COMDAT _TESTE > _TEXT SEGMENT > _parami$ = 8 ; size = 4 > _paraml$ = 12 ; size = 4 > _paramd$ = 16 ; size = 8 > _TESTE PROC ; COMDAT > > ; 6 : int varx=0,vary; > ; 7 : int nI =0; > ; 8 : //varx= parami; > ; 9 : if( parami > 0 ) > > mov ecx, DWORD PTR _parami$[esp-4] > > ; 10 : { > ; 11 : varx = parami; > ; 12 : vary = 0; > ; 13 : } > ; 14 : else > ; 15 : { > ; 16 : varx = 0; > ; 17 : vary = paraml; > ; 18 : } > ; 19 : for( nI = 1 ; nI <= paraml; nI++) > > mov edx, DWORD PTR _paraml$[esp-4] > xor eax, eax > test ecx, ecx > setle al > sub eax, 1 > and eax, ecx > cmp edx, 1 > jl SHORT $LN3 at TESTE > add ecx, 1 > imul ecx, edx > add eax, ecx > $LN3 at TESTE: > > ; 20 : { > ; 21 : varx = varx + parami + 1 ; > ; 22 : vary = varx + nI; > ; 23 : } > ; 24 : > ; 25 : return varx ; > ; 26 : } > > ret 0 > _TESTE ENDP > _TEXT ENDS > > Running the same code, the objetc generated with MSVC is 600 times faster than that generate with the LLVM compiler > Is threre any way to get the same optimzation with the LLVM? > > > Manoel Teixeira > > > > _______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev >