The following C code :
#include <stdio.h>
#include <stdlib.h>
int TESTE2( int parami , int paraml ,double paramd )
{
int varx=0,vary;
int nI =0;
//varx= parami;
if( parami > 0 )
{
varx = parami;
vary = varx + 1;
}
else
{
varx = vary + 1;
vary = paraml;
}
varx = varx + parami + paraml;
for( nI = 1 ; nI <= paraml; nI++)
{
varx = varx + parami + 1 ;
vary = varx + nI;
}
vary = varx + 5;
varx = vary + paraml;
return varx ;
}
Generates the IR :
; ModuleID = '/tmp/webcompile/_9908_0.bc'
target datalayout =
"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
target triple = "i386-pc-linux-gnu"
define i32 @TESTE2(i32 %parami, i32 %paraml, double %paramd) nounwind readnone {
entry:
%0 = shl i32 %parami, 1 ; <i32> [#uses=1]
%varx.110 = add i32 %0, %paraml ; <i32> [#uses=2]
%1 = icmp slt i32 %paraml, 1 ; <i1> [#uses=1]
br i1 %1, label %bb5, label %bb3
bb3: ; preds = %bb3, %entry
%indvar = phi i32 [ %indvar.next, %bb3 ], [ 0, %entry ] ; <i32>
[#uses=3]
%2 = add i32 %indvar, 2 ; <i32> [#uses=1]
%3 = icmp sgt i32 %2, %paraml ; <i1> [#uses=1]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
br i1 %3, label %bb5.loopexit, label %bb3
bb5.loopexit: ; preds = %bb3
%tmp14 = add i32 %parami, 1 ; <i32> [#uses=1]
%tmp15 = mul i32 %indvar, %tmp14 ; <i32> [#uses=1]
%varx.111 = add i32 %tmp15, %varx.110 ; <i32> [#uses=1]
%4 = add i32 %varx.111, %parami ; <i32> [#uses=1]
%phitmp = add i32 %4, 1 ; <i32> [#uses=1]
br label %bb5
bb5: ; preds = %bb5.loopexit, %entry
%varx.1.lcssa = phi i32 [ %varx.110, %entry ], [ %phitmp, %bb5.loopexit ] ;
<i32> [#uses=1]
%5 = add i32 %paraml, 5 ; <i32> [#uses=1]
%6 = add i32 %5, %varx.1.lcssa ; <i32> [#uses=1]
ret i32 %6
}
While the MSVC generates the assemble :
PUBLIC _TESTE
; Function compile flags: /Ogtpy
; File c:\msys\1.0\home\mteixeira\testeadvpl.c
; COMDAT _TESTE
_TEXT SEGMENT
_parami$ = 8 ; size = 4
_paraml$ = 12 ; size = 4
_paramd$ = 16 ; size = 8
_TESTE PROC ; COMDAT
; 6 : int varx=0,vary;
; 7 : int nI =0;
; 8 : //varx= parami;
; 9 : if( parami > 0 )
mov ecx, DWORD PTR _parami$[esp-4]
; 10 : {
; 11 : varx = parami;
; 12 : vary = 0;
; 13 : }
; 14 : else
; 15 : {
; 16 : varx = 0;
; 17 : vary = paraml;
; 18 : }
; 19 : for( nI = 1 ; nI <= paraml; nI++)
mov edx, DWORD PTR _paraml$[esp-4]
xor eax, eax
test ecx, ecx
setle al
sub eax, 1
and eax, ecx
cmp edx, 1
jl SHORT $LN3 at TESTE
add ecx, 1
imul ecx, edx
add eax, ecx
$LN3 at TESTE:
; 20 : {
; 21 : varx = varx + parami + 1 ;
; 22 : vary = varx + nI;
; 23 : }
; 24 :
; 25 : return varx ;
; 26 : }
ret 0
_TESTE ENDP
_TEXT ENDS
Running the same code, the objetc generated with MSVC is 600 times faster than
that generate with the LLVM compiler
Is threre any way to get the same optimzation with the LLVM?
Manoel Teixeira
Hi Manoel, More info is needed. What optimization level did you use when compiling LLVM? What does the LLVM x86 assembly code look like? How is this run? What are the times you're seeing? What is your system's configuration? -bw On Tue, Jan 6, 2009 at 3:02 AM, Manoel Teixeira <manoel at fonetica.com.br> wrote:> > The following C code : > #include <stdio.h> > #include <stdlib.h> > > int TESTE2( int parami , int paraml ,double paramd ) > { > int varx=0,vary; > int nI =0; > //varx= parami; > if( parami > 0 ) > { > varx = parami; > vary = varx + 1; > } > else > { > varx = vary + 1; > vary = paraml; > } > varx = varx + parami + paraml; > for( nI = 1 ; nI <= paraml; nI++) > { > varx = varx + parami + 1 ; > vary = varx + nI; > } > vary = varx + 5; > varx = vary + paraml; > > return varx ; > } > > Generates the IR : > ; ModuleID = '/tmp/webcompile/_9908_0.bc' > target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" > target triple = "i386-pc-linux-gnu" > > define i32 @TESTE2(i32 %parami, i32 %paraml, double %paramd) nounwind readnone { > entry: > %0 = shl i32 %parami, 1 ; <i32> [#uses=1] > %varx.110 = add i32 %0, %paraml ; <i32> [#uses=2] > %1 = icmp slt i32 %paraml, 1 ; <i1> [#uses=1] > br i1 %1, label %bb5, label %bb3 > > bb3: ; preds = %bb3, %entry > %indvar = phi i32 [ %indvar.next, %bb3 ], [ 0, %entry ] ; <i32> [#uses=3] > %2 = add i32 %indvar, 2 ; <i32> [#uses=1] > %3 = icmp sgt i32 %2, %paraml ; <i1> [#uses=1] > %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1] > br i1 %3, label %bb5.loopexit, label %bb3 > > bb5.loopexit: ; preds = %bb3 > %tmp14 = add i32 %parami, 1 ; <i32> [#uses=1] > %tmp15 = mul i32 %indvar, %tmp14 ; <i32> [#uses=1] > %varx.111 = add i32 %tmp15, %varx.110 ; <i32> [#uses=1] > %4 = add i32 %varx.111, %parami ; <i32> [#uses=1] > %phitmp = add i32 %4, 1 ; <i32> [#uses=1] > br label %bb5 > > bb5: ; preds = %bb5.loopexit, %entry > %varx.1.lcssa = phi i32 [ %varx.110, %entry ], [ %phitmp, %bb5.loopexit ] ; <i32> [#uses=1] > %5 = add i32 %paraml, 5 ; <i32> [#uses=1] > %6 = add i32 %5, %varx.1.lcssa ; <i32> [#uses=1] > ret i32 %6 > } > > > While the MSVC generates the assemble : > PUBLIC _TESTE > ; Function compile flags: /Ogtpy > ; File c:\msys\1.0\home\mteixeira\testeadvpl.c > ; COMDAT _TESTE > _TEXT SEGMENT > _parami$ = 8 ; size = 4 > _paraml$ = 12 ; size = 4 > _paramd$ = 16 ; size = 8 > _TESTE PROC ; COMDAT > > ; 6 : int varx=0,vary; > ; 7 : int nI =0; > ; 8 : //varx= parami; > ; 9 : if( parami > 0 ) > > mov ecx, DWORD PTR _parami$[esp-4] > > ; 10 : { > ; 11 : varx = parami; > ; 12 : vary = 0; > ; 13 : } > ; 14 : else > ; 15 : { > ; 16 : varx = 0; > ; 17 : vary = paraml; > ; 18 : } > ; 19 : for( nI = 1 ; nI <= paraml; nI++) > > mov edx, DWORD PTR _paraml$[esp-4] > xor eax, eax > test ecx, ecx > setle al > sub eax, 1 > and eax, ecx > cmp edx, 1 > jl SHORT $LN3 at TESTE > add ecx, 1 > imul ecx, edx > add eax, ecx > $LN3 at TESTE: > > ; 20 : { > ; 21 : varx = varx + parami + 1 ; > ; 22 : vary = varx + nI; > ; 23 : } > ; 24 : > ; 25 : return varx ; > ; 26 : } > > ret 0 > _TESTE ENDP > _TEXT ENDS > > Running the same code, the objetc generated with MSVC is 600 times faster than that generate with the LLVM compiler > Is threre any way to get the same optimzation with the LLVM? > > > Manoel Teixeira > > > > _______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev >
The code here is invalid, it executes an operation with undefined behavior in the else branch of the first if (read from uninitialized vary). John On Tue, 6 Jan 2009, Manoel Teixeira wrote:> > The following C code : > #include <stdio.h> > #include <stdlib.h> > > int TESTE2( int parami , int paraml ,double paramd ) > { > int varx=0,vary; > int nI =0; > //varx= parami; > if( parami > 0 ) > { > varx = parami; > vary = varx + 1; > } > else > { > varx = vary + 1; > vary = paraml; > } > varx = varx + parami + paraml; > for( nI = 1 ; nI <= paraml; nI++) > { > varx = varx + parami + 1 ; > vary = varx + nI; > } > vary = varx + 5; > varx = vary + paraml; > > return varx ; > } > > Generates the IR : > ; ModuleID = '/tmp/webcompile/_9908_0.bc' > target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" > target triple = "i386-pc-linux-gnu" > > define i32 @TESTE2(i32 %parami, i32 %paraml, double %paramd) nounwind readnone { > entry: > %0 = shl i32 %parami, 1 ; <i32> [#uses=1] > %varx.110 = add i32 %0, %paraml ; <i32> [#uses=2] > %1 = icmp slt i32 %paraml, 1 ; <i1> [#uses=1] > br i1 %1, label %bb5, label %bb3 > > bb3: ; preds = %bb3, %entry > %indvar = phi i32 [ %indvar.next, %bb3 ], [ 0, %entry ] ; <i32> [#uses=3] > %2 = add i32 %indvar, 2 ; <i32> [#uses=1] > %3 = icmp sgt i32 %2, %paraml ; <i1> [#uses=1] > %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1] > br i1 %3, label %bb5.loopexit, label %bb3 > > bb5.loopexit: ; preds = %bb3 > %tmp14 = add i32 %parami, 1 ; <i32> [#uses=1] > %tmp15 = mul i32 %indvar, %tmp14 ; <i32> [#uses=1] > %varx.111 = add i32 %tmp15, %varx.110 ; <i32> [#uses=1] > %4 = add i32 %varx.111, %parami ; <i32> [#uses=1] > %phitmp = add i32 %4, 1 ; <i32> [#uses=1] > br label %bb5 > > bb5: ; preds = %bb5.loopexit, %entry > %varx.1.lcssa = phi i32 [ %varx.110, %entry ], [ %phitmp, %bb5.loopexit ] ; <i32> [#uses=1] > %5 = add i32 %paraml, 5 ; <i32> [#uses=1] > %6 = add i32 %5, %varx.1.lcssa ; <i32> [#uses=1] > ret i32 %6 > } > > > While the MSVC generates the assemble : > PUBLIC _TESTE > ; Function compile flags: /Ogtpy > ; File c:\msys\1.0\home\mteixeira\testeadvpl.c > ; COMDAT _TESTE > _TEXT SEGMENT > _parami$ = 8 ; size = 4 > _paraml$ = 12 ; size = 4 > _paramd$ = 16 ; size = 8 > _TESTE PROC ; COMDAT > > ; 6 : int varx=0,vary; > ; 7 : int nI =0; > ; 8 : //varx= parami; > ; 9 : if( parami > 0 ) > > mov ecx, DWORD PTR _parami$[esp-4] > > ; 10 : { > ; 11 : varx = parami; > ; 12 : vary = 0; > ; 13 : } > ; 14 : else > ; 15 : { > ; 16 : varx = 0; > ; 17 : vary = paraml; > ; 18 : } > ; 19 : for( nI = 1 ; nI <= paraml; nI++) > > mov edx, DWORD PTR _paraml$[esp-4] > xor eax, eax > test ecx, ecx > setle al > sub eax, 1 > and eax, ecx > cmp edx, 1 > jl SHORT $LN3 at TESTE > add ecx, 1 > imul ecx, edx > add eax, ecx > $LN3 at TESTE: > > ; 20 : { > ; 21 : varx = varx + parami + 1 ; > ; 22 : vary = varx + nI; > ; 23 : } > ; 24 : > ; 25 : return varx ; > ; 26 : } > > ret 0 > _TESTE ENDP > _TEXT ENDS > > Running the same code, the objetc generated with MSVC is 600 times faster than that generate with the LLVM compiler > Is threre any way to get the same optimzation with the LLVM? > > > Manoel Teixeira > > > > _______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev >