The following C test program was compiled using LLVM with -O3 option and MSVC
with /O2.
The MSVC one is about 600 times faster than the one compiled with the LLVM.
We can see that the for loop in MSVC assembler is solved in the optimization
pass more efficiently than that in LLVM.
Is there an way to get a optimization result in LLVM like that of the MSVC?
Manoel Teixeira
#include <windows.h>
#include <stdio.h>
int TESTE ( int parami ,int paraml ,double paramd )
{
int varx=0,vary=0;
int nI =0;
if( parami > 0 )
{
varx = parami;
vary = 0;
}
else
{
varx = 0;
vary = paraml;
}
for( nI = 1 ; nI <= paraml; nI++)
{
varx = varx + parami + 1 ;
vary = varx + nI;
}
return varx ;
}
unsigned long thread_call( LPVOID c )
{
int num = 1;
int (*fp)(int, int, double) = (int (*)(int, int,double)) c;
//printf("\n(1)threadid = %ld seqt=%ld
inum=%d",GetCurrentThreadId(),num,inum);
int ret = fp(num,1000000000,1);
printf("\n(2)leu %ld threadid = %ld seqt=%ld ",ret ,
GetCurrentThreadId(),num);
return (unsigned long) ret;
}
///cronometro
unsigned long tini;
unsigned long tfim;
#define getmilisecs(x) (x)
#define num_th 100
unsigned long milisecs() { return getmilisecs(tfim-tini);};
unsigned long secs() { return milisecs()/1000;};
const char *spenttime ()
{
static char buffer[64];
unsigned long systime = secs();
unsigned long milisectime = milisecs()%1000;
sprintf(buffer,"%02d:%02d:%02d:%03d",systime/3600,(systime%3600)/60,(systime%3600)%60,milisectime);
return (const char*) buffer;
};
//fim cronometro
int main(int a, char **b)
{
int i;
DWORD iThreadId;
HANDLE mainThread[num_th];
tfim = 0;
tini = GetTickCount();
for(i=0; i< num_th;i++)
mainThread[i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)thread_call,
(LPVOID)TESTE, 0, (DWORD *)&iThreadId);
//WaitForMultipleObjects( num_th, (const HANDLE* )mainThread, TRUE,
INFINITE);
for( i=0; i < num_th; i++)
{
WaitForSingleObject( mainThread[i], INFINITE );
CloseHandle(mainThread[i]);
}
tfim = GetTickCount();
printf("\n chamou = %s",spenttime () );
return 0;
}
//////////////////////////
; ModuleID = 'testeadvpl.c'
target datalayout =
"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
target triple = "i386-mingw32"
%struct._SECURITY_ATTRIBUTES = type { i32, i8*, i32 }
@tfim = common global i32 0 ; <i32*> [#uses=5]
@tini = common global i32 0 ; <i32*> [#uses=5]
@.str = internal constant [38 x i8] c"\0A(2)leu %ld threadid = %ld
seqt=%ld \00" ; <[38 x i8]*> [#uses=1]
@buffer.30732 = internal global [64 x i8] zeroinitializer, align 32 ; <[64 x
i8]*> [#uses=1]
@.str1 = internal constant [20 x i8] c"%02d:%02d:%02d:%03d\00" ;
<[20 x i8]*> [#uses=1]
@.str2 = internal constant [14 x i8] c"\0A chamou = %s\00" ; <[14
x i8]*> [#uses=1]
define i32 @TESTE(i32 %parami, i32 %paraml, double %paramd) nounwind readnone {
entry:
%0 = icmp sgt i32 %parami, 0 ; <i1> [#uses=1]
%varx.0 = select i1 %0, i32 %parami, i32 0 ; <i32> [#uses=1]
%1 = icmp slt i32 %paraml, 1 ; <i1> [#uses=1]
br i1 %1, label %bb5, label %bb.nph
bb.nph: ; preds = %entry
%2 = add i32 %parami, 1 ; <i32> [#uses=2]
br label %bb3
bb3: ; preds = %bb3, %bb.nph
%indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb3 ] ; <i32>
[#uses=3]
%tmp = icmp slt i32 %parami, 0 ; <i1> [#uses=1]
%smax = select i1 %tmp, i32 0, i32 %parami ; <i32> [#uses=1]
%tmp11 = mul i32 %indvar, %2 ; <i32> [#uses=1]
%varx.18 = add i32 %tmp11, %smax ; <i32> [#uses=1]
%3 = add i32 %2, %varx.18 ; <i32> [#uses=1]
%4 = add i32 %indvar, 2 ; <i32> [#uses=1]
%5 = icmp sgt i32 %4, %paraml ; <i1> [#uses=1]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
br i1 %5, label %bb5, label %bb3
bb5: ; preds = %bb3, %entry
%varx.1.lcssa = phi i32 [ %varx.0, %entry ], [ %3, %bb3 ] ; <i32>
[#uses=1]
ret i32 %varx.1.lcssa
}
define i32 @milisecs() nounwind readonly {
entry:
%0 = load i32* @tfim, align 4 ; <i32> [#uses=1]
%1 = load i32* @tini, align 4 ; <i32> [#uses=1]
%2 = sub i32 %0, %1 ; <i32> [#uses=1]
ret i32 %2
}
define i32 @thread_call(i8* %c) nounwind {
entry:
%0 = bitcast i8* %c to i32 (i32, i32, double)* ; <i32 (i32, i32,
double)*> [#uses=1]
%1 = tail call i32 %0(i32 1, i32 1000000000, double 1.000000e+000) nounwind ;
<i32> [#uses=2]
%2 = tail call x86_stdcallcc i32 @GetCurrentThreadId() nounwind ; <i32>
[#uses=1]
%3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([38 x i8]* @.str, i32
0, i32 0), i32 %1, i32 %2, i32 1) nounwind ; <i32> [#uses=0]
ret i32 %1
}
declare x86_stdcallcc i32 @GetCurrentThreadId()
declare i32 @printf(i8*, ...) nounwind
define i32 @secs() nounwind readonly {
entry:
%0 = load i32* @tfim, align 4 ; <i32> [#uses=1]
%1 = load i32* @tini, align 4 ; <i32> [#uses=1]
%2 = sub i32 %0, %1 ; <i32> [#uses=1]
%3 = udiv i32 %2, 1000 ; <i32> [#uses=1]
ret i32 %3
}
define i8* @spenttime() nounwind {
entry:
%0 = load i32* @tfim, align 4 ; <i32> [#uses=1]
%1 = load i32* @tini, align 4 ; <i32> [#uses=1]
%2 = sub i32 %0, %1 ; <i32> [#uses=3]
%3 = udiv i32 %2, 1000 ; <i32> [#uses=1]
%4 = urem i32 %2, 1000 ; <i32> [#uses=1]
%5 = urem i32 %3, 3600 ; <i32> [#uses=2]
%6 = urem i32 %5, 60 ; <i32> [#uses=1]
%7 = udiv i32 %5, 60 ; <i32> [#uses=1]
%8 = udiv i32 %2, 3600000 ; <i32> [#uses=1]
%9 = tail call i32 (i8*, i8*, ...)* @sprintf(i8* getelementptr ([64 x i8]*
@buffer.30732, i32 0, i32 0), i8* getelementptr ([20 x i8]* @.str1, i32 0, i32
0), i32 %8, i32 %7, i32 %6, i32 %4) nounwind ; <i32> [#uses=0]
ret i8* getelementptr ([64 x i8]* @buffer.30732, i32 0, i32 0)
}
declare i32 @sprintf(i8*, i8*, ...) nounwind
define i32 @main(i32 %a, i8** %b) nounwind {
entry:
%mainThread = alloca [100 x i8*] ; <[100 x i8*]*> [#uses=2]
%iThreadId = alloca i32 ; <i32*> [#uses=1]
store i32 0, i32* @tfim, align 4
%0 = call x86_stdcallcc i32 @GetTickCount() nounwind ; <i32> [#uses=1]
store i32 %0, i32* @tini, align 4
br label %bb
bb: ; preds = %bb, %entry
%i.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %indvar.next14, %bb ] ; <i32>
[#uses=2]
%1 = call x86_stdcallcc i8* @CreateThread(%struct._SECURITY_ATTRIBUTES* null,
i32 0, i32 (i8*)* @thread_call, i8* bitcast (i32 (i32, i32, double)* @TESTE to
i8*), i32 0, i32* %iThreadId) nounwind ; <i8*> [#uses=1]
%2 = getelementptr [100 x i8*]* %mainThread, i32 0, i32 %i.0.reg2mem.0 ;
<i8**> [#uses=1]
store i8* %1, i8** %2, align 4
%indvar.next14 = add i32 %i.0.reg2mem.0, 1 ; <i32> [#uses=2]
%exitcond15 = icmp eq i32 %indvar.next14, 100 ; <i1> [#uses=1]
br i1 %exitcond15, label %bb3, label %bb
bb3: ; preds = %bb3, %bb
%i.1.reg2mem.0 = phi i32 [ 0, %bb ], [ %indvar.next, %bb3 ] ; <i32>
[#uses=2]
%3 = getelementptr [100 x i8*]* %mainThread, i32 0, i32 %i.1.reg2mem.0 ;
<i8**> [#uses=2]
%4 = load i8** %3, align 4 ; <i8*> [#uses=1]
%5 = call x86_stdcallcc i32 @WaitForSingleObject(i8* %4, i32 -1) nounwind ;
<i32> [#uses=0]
%6 = load i8** %3, align 4 ; <i8*> [#uses=1]
%7 = call x86_stdcallcc i32 @CloseHandle(i8* %6) nounwind ; <i32>
[#uses=0]
%indvar.next = add i32 %i.1.reg2mem.0, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, 100 ; <i1> [#uses=1]
br i1 %exitcond, label %bb5, label %bb3
bb5: ; preds = %bb3
%8 = call x86_stdcallcc i32 @GetTickCount() nounwind ; <i32> [#uses=2]
store i32 %8, i32* @tfim, align 4
%9 = load i32* @tini, align 4 ; <i32> [#uses=1]
%10 = sub i32 %8, %9 ; <i32> [#uses=3]
%11 = udiv i32 %10, 1000 ; <i32> [#uses=1]
%12 = urem i32 %10, 1000 ; <i32> [#uses=1]
%13 = urem i32 %11, 3600 ; <i32> [#uses=2]
%14 = urem i32 %13, 60 ; <i32> [#uses=1]
%15 = udiv i32 %13, 60 ; <i32> [#uses=1]
%16 = udiv i32 %10, 3600000 ; <i32> [#uses=1]
%17 = call i32 (i8*, i8*, ...)* @sprintf(i8* getelementptr ([64 x i8]*
@buffer.30732, i32 0, i32 0), i8* getelementptr ([20 x i8]* @.str1, i32 0, i32
0), i32 %16, i32 %15, i32 %14, i32 %12) nounwind ; <i32> [#uses=0]
%18 = call i32 (i8*, ...)* @printf(i8* getelementptr ([14 x i8]* @.str2, i32 0,
i32 0), i8* getelementptr ([64 x i8]* @buffer.30732, i32 0, i32 0)) nounwind ;
<i32> [#uses=0]
ret i32 0
}
declare x86_stdcallcc i32 @GetTickCount()
declare x86_stdcallcc i8* @CreateThread(%struct._SECURITY_ATTRIBUTES*, i32, i32
(i8*)*, i8*, i32, i32*)
declare x86_stdcallcc i32 @WaitForSingleObject(i8*, i32)
declare x86_stdcallcc i32 @CloseHandle(i8*)
////////////////////////
; Listing generated by Microsoft (R) Optimizing Compiler Version 14.00.50727.762
TITLE C:\msys\1.0\home\mteixeira\testeadvpl.c
.686P
.XMM
include listing.inc
.model flat
INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES
_DATA SEGMENT
COMM _tini:DWORD
COMM _tfim:DWORD
_DATA ENDS
PUBLIC _TESTE
; Function compile flags: /Ogtpy
; File c:\msys\1.0\home\mteixeira\testeadvpl.c
; COMDAT _TESTE
_TEXT SEGMENT
_parami$ = 8 ; size = 4
_paraml$ = 12 ; size = 4
_paramd$ = 16 ; size = 8
_TESTE PROC ; COMDAT
; 6 : int varx=0,vary=0;
; 7 : int nI =0;
; 8 : //varx= parami;
; 9 : if( parami > 0 )
mov ecx, DWORD PTR _parami$[esp-4]
; 10 : {
; 11 : varx = parami;
; 12 : vary = 0;
; 13 : }
; 14 : else
; 15 : {
; 16 : varx = 0;
; 17 : vary = paraml;
; 18 : }
; 19 : for( nI = 1 ; nI <= paraml; nI++)
mov edx, DWORD PTR _paraml$[esp-4]
xor eax, eax
test ecx, ecx
setle al
sub eax, 1
and eax, ecx
cmp edx, 1
jl SHORT $LN3 at TESTE
add ecx, 1
imul ecx, edx
add eax, ecx
$LN3 at TESTE:
; 20 : {
; 21 : varx = varx + parami + 1 ;
; 22 : vary = varx + nI;
; 23 : }
; 24 :
; 25 : return varx ;
; 26 : }
ret 0
_TESTE ENDP
_TEXT ENDS
PUBLIC ??_C at
_0CG@LBAPCNHJ@?6?$CI2?$CJleu?5?$CFld?5threadid?5?$DN?5?5?$CFld?5seqt@ ;
`string'
PUBLIC __real at 3ff0000000000000
PUBLIC _thread_call
EXTRN _printf:PROC
EXTRN __imp__GetCurrentThreadId at 0:PROC
EXTRN __fltused:DWORD
; COMDAT ??_C at
_0CG@LBAPCNHJ@?6?$CI2?$CJleu?5?$CFld?5threadid?5?$DN?5?5?$CFld?5seqt@
CONST SEGMENT
??_C at _0CG@LBAPCNHJ@?6?$CI2?$CJleu?5?$CFld?5threadid?5?$DN?5?5?$CFld?5seqt@ DB
0aH
DB '(2)leu %ld threadid = %ld seqt=%ld ', 00H ; `string'
CONST ENDS
; COMDAT __real at 3ff0000000000000
CONST SEGMENT
__real at 3ff0000000000000 DQ 03ff0000000000000r ; 1
; Function compile flags: /Ogtpy
CONST ENDS
; COMDAT _thread_call
_TEXT SEGMENT
_c$ = 8 ; size = 4
_thread_call PROC ; COMDAT
; 29 : int num = 1;
; 30 : int (*fp)(int, int, double) = (int (*)(int, int,double)) c;
; 31 : //printf("\n(1)threadid = %ld seqt=%ld
inum=%d",GetCurrentThreadId(),num,inum);
; 32 : int ret = fp(num,1000000000,1);
fld1
push esi
sub esp, 8
fstp QWORD PTR [esp]
push 1000000000 ; 3b9aca00H
push 1
call DWORD PTR _c$[esp+16]
add esp, 16 ; 00000010H
; 33 : printf("\n(2)leu %ld threadid = %ld seqt=%ld ",ret ,
GetCurrentThreadId(),num);
push 1
mov esi, eax
call DWORD PTR __imp__GetCurrentThreadId at 0
push eax
push esi
push OFFSET ??_C at
_0CG@LBAPCNHJ@?6?$CI2?$CJleu?5?$CFld?5threadid?5?$DN?5?5?$CFld?5seqt@
call _printf
add esp, 16 ; 00000010H
; 34 : return (unsigned long) ret;
mov eax, esi
pop esi
; 35 : }
ret 0
_thread_call ENDP
_TEXT ENDS
PUBLIC _milisecs
; Function compile flags: /Ogtpy
; COMDAT _milisecs
_TEXT SEGMENT
_milisecs PROC ; COMDAT
; 41 : unsigned long milisecs() { return getmilisecs(tfim-tini);};
mov eax, DWORD PTR _tfim
sub eax, DWORD PTR _tini
ret 0
_milisecs ENDP
_TEXT ENDS
PUBLIC _secs
; Function compile flags: /Ogtpy
; COMDAT _secs
_TEXT SEGMENT
_secs PROC ; COMDAT
; 42 : unsigned long secs() { return milisecs()/1000;};
mov ecx, DWORD PTR _tfim
sub ecx, DWORD PTR _tini
mov eax, 274877907 ; 10624dd3H
mul ecx
shr edx, 6
mov eax, edx
ret 0
_secs ENDP
_TEXT ENDS
PUBLIC ??_C at _0BE@FFMOMMDD@?$CF02d?3?$CF02d?3?$CF02d?3?$CF03d?$AA@ ;
`string'
PUBLIC _spenttime
EXTRN _sprintf:PROC
_BSS SEGMENT
?buffer@?1??spenttime@@9 at 9 DB 040H DUP (?) ;
`spenttime'::`2'::buffer
_BSS ENDS
; COMDAT ??_C at _0BE@FFMOMMDD@?$CF02d?3?$CF02d?3?$CF02d?3?$CF03d?$AA@
CONST SEGMENT
??_C at _0BE@FFMOMMDD@?$CF02d?3?$CF02d?3?$CF02d?3?$CF03d?$AA@ DB
'%02d:%02d:'
DB '%02d:%03d', 00H ; `string'
; Function compile flags: /Ogtpy
CONST ENDS
; COMDAT _spenttime
_TEXT SEGMENT
_spenttime PROC ; COMDAT
; 45 : static char buffer[64];
; 46 : unsigned long systime = secs();
mov eax, DWORD PTR _tfim
sub eax, DWORD PTR _tini
xor edx, edx
mov ecx, 1000 ; 000003e8H
div ecx
push esi
push edi
mov esi, eax
mov edi, edx
; 47 : unsigned long milisectime = milisecs()%1000;
; 48 :
sprintf(buffer,"%02d:%02d:%02d:%03d",systime/3600,(systime%3600)/60,(systime%3600)%60,milisectime);
mov eax, -1851608123 ; 91a2b3c5H
mul esi
mov ecx, edx
shr ecx, 11 ; 0000000bH
mov edx, ecx
imul edx, 3600 ; 00000e10H
mov eax, esi
sub eax, edx
xor edx, edx
mov esi, 60 ; 0000003cH
div esi
push edi
push edx
push eax
push ecx
push OFFSET ??_C at _0BE@FFMOMMDD@?$CF02d?3?$CF02d?3?$CF02d?3?$CF03d?$AA@
push OFFSET ?buffer@?1??spenttime@@9 at 9
call _sprintf
add esp, 24 ; 00000018H
pop edi
; 49 : return (const char*) buffer;
mov eax, OFFSET ?buffer@?1??spenttime@@9 at 9
pop esi
; 50 : };
ret 0
_spenttime ENDP
_TEXT ENDS
PUBLIC ??_C at _0O@BKPPOCPE@?6?5chamou?5?$DN?5?$CFs?$AA@ ; `string'
PUBLIC _main
EXTRN __imp__CloseHandle at 4:PROC
EXTRN __imp__WaitForSingleObject at 8:PROC
EXTRN __imp__CreateThread at 24:PROC
EXTRN __imp__GetTickCount at 0:PROC
; COMDAT ??_C at _0O@BKPPOCPE@?6?5chamou?5?$DN?5?$CFs?$AA@
CONST SEGMENT
??_C at _0O@BKPPOCPE@?6?5chamou?5?$DN?5?$CFs?$AA@ DB 0aH, ' chamou =
%s', 00H ; `string'
; Function compile flags: /Ogtpy
CONST ENDS
; COMDAT _main
_TEXT SEGMENT
_iThreadId$ = -404 ; size = 4
_mainThread$ = -400 ; size = 400
_a$ = 8 ; size = 4
_b$ = 12 ; size = 4
_main PROC ; COMDAT
; 53 : {
sub esp, 404 ; 00000194H
push ebx
push ebp
push esi
push edi
; 54 : int i;
; 55 : DWORD iThreadId;
; 56 : HANDLE mainThread[num_th];
; 57 : tfim = 0;
mov DWORD PTR _tfim, 0
; 58 : tini = GetTickCount();
call DWORD PTR __imp__GetTickCount at 0
; 59 : for(i=0; i< num_th;i++)
mov edi, DWORD PTR __imp__CreateThread at 24
mov DWORD PTR _tini, eax
xor esi, esi
$LL6 at main:
; 60 : mainThread[i] = CreateThread(NULL, 0,
(LPTHREAD_START_ROUTINE)thread_call, (LPVOID)TESTE, 0, (DWORD *)&iThreadId);
lea eax, DWORD PTR _iThreadId$[esp+420]
push eax
push 0
push OFFSET _TESTE
push OFFSET _thread_call
push 0
push 0
call edi
mov DWORD PTR _mainThread$[esp+esi*4+420], eax
add esi, 1
cmp esi, 100 ; 00000064H
jl SHORT $LL6 at main
; 61 :
; 62 : //WaitForMultipleObjects( num_th, (const HANDLE* )mainThread, TRUE,
INFINITE);
; 63 : for( i=0; i < num_th; i++)
mov ebx, DWORD PTR __imp__WaitForSingleObject at 8
mov ebp, DWORD PTR __imp__CloseHandle at 4
xor esi, esi
$LL3 at main:
; 64 : {
; 65 : WaitForSingleObject( mainThread[i], INFINITE );
mov edi, DWORD PTR _mainThread$[esp+esi*4+420]
push -1
push edi
call ebx
; 66 : CloseHandle(mainThread[i]);
push edi
call ebp
add esi, 1
cmp esi, 100 ; 00000064H
jl SHORT $LL3 at main
; 67 : }
; 68 : tfim = GetTickCount();
call DWORD PTR __imp__GetTickCount at 0
mov DWORD PTR _tfim, eax
; 69 :
; 70 : printf("\n chamou = %s",spenttime () );
sub eax, DWORD PTR _tini
xor edx, edx
mov ecx, 1000 ; 000003e8H
div ecx
mov esi, eax
mov edi, edx
mov eax, -1851608123 ; 91a2b3c5H
mul esi
mov ecx, edx
shr ecx, 11 ; 0000000bH
mov edx, ecx
imul edx, 3600 ; 00000e10H
mov eax, esi
sub eax, edx
xor edx, edx
mov esi, 60 ; 0000003cH
div esi
push edi
push edx
push eax
push ecx
push OFFSET ??_C at _0BE@FFMOMMDD@?$CF02d?3?$CF02d?3?$CF02d?3?$CF03d?$AA@
push OFFSET ?buffer@?1??spenttime@@9 at 9
call _sprintf
push OFFSET ?buffer@?1??spenttime@@9 at 9
push OFFSET ??_C at _0O@BKPPOCPE@?6?5chamou?5?$DN?5?$CFs?$AA@
call _printf
add esp, 32 ; 00000020H
pop edi
pop esi
pop ebp
; 71 : return 0;
xor eax, eax
pop ebx
; 72 : }
add esp, 404 ; 00000194H
ret 0
_main ENDP
_TEXT ENDS
END