lixiong hz
2012-Jan-04 15:43 UTC
[LLVMdev] How can I compile a c source file to use SSE2 Data Movement Instructions?
I write a small function and test it under clang and gcc,
filet test.c:
double X[100]; double Y[100]; double DA = 0.3;
int f()
{
int i;
for (i = 0; i < 100; i++)
Y[i] = Y[i] - DA * X[i];
return 0;
}
clang -S -O3 -o test.s test.c -march=native -ccc-echo
result:
"D:/work/trunk/bin/Release/clang.exe" -cc1 -triple i686-pc-win32 -S
-disable-fr
e -disable-llvm-verifier -main-file-name test.c -mrelocation-model static
-mdis
ble-fp-elim -masm-verbose -mconstructor-aliases -target-cpu corei7
-momit-leaf-
rame-pointer -coverage-file test.s -resource-dir
"D:/work/trunk/bin/Release\\..
\lib\\clang\\3.1" -fmodule-cache-path
"C:\\DOCUME~1\\ADMINI~1\\LOCALS~1\\Temp\\
lang-module-cache" -internal-isystem
D:/work/trunk/bin/Release/../lib/clang/3.1
include -internal-isystem "C:\\Program Files\\Microsoft Visual Studio
9.0\\VC\\
nclude" -internal-isystem "C:\\Program Files\\Microsoft
SDKs\\Windows\\v6.0A\\\
include" -O3 -ferror-limit 19 -fmessage-length 80 -mstackrealign
-fms-extension
-fms-compatibility -fmsc-version=1300 -fdelayed-template-parsing
-fgnu-runtime
-fobjc-runtime-has-arc -fobjc-runtime-has-weak -fobjc-fragile-abi
-fdiagnostics
show-option -fcolor-diagnostics -o test.s -x c test.c
.def _f;
.scl 2;
.type 32;
.endef
.text
.globl _f
.align 16, 0x90
_f: # @f
# BB#0:
movl $-800, %eax # imm = 0xFFFFFFFFFFFFFCE0
movsd _DA, %xmm0
.align 16, 0x90
LBB0_1: # =>This Inner Loop Header: Depth=1
movsd _X+800(%eax), %xmm1
mulsd %xmm0, %xmm1
movsd _Y+800(%eax), %xmm2
subsd %xmm1, %xmm2
movsd %xmm2, _Y+800(%eax)
addl $8, %eax
jne LBB0_1
# BB#2:
xorl %eax, %eax
ret
.data
.globl _DA # @DA
.align 8
_DA:
.quad 4599075939470750515 # double 3.000000e-01
.comm _Y,800,3 # @Y
.comm _X,800,3 # @X
gcc -S -O3 -o test2.s test.c -march=native
result:
.file "test.c"
.text
.p2align 4,,15
.globl _f
.def _f; .scl 2; .type 32; .endef
_f:
pushl %ebp
movddup _DA, %xmm2
movl %esp, %ebp
xorl %eax, %eax
.p2align 4,,10
L2:
movapd _Y(%eax), %xmm0
movapd _X(%eax), %xmm1
mulpd %xmm2, %xmm1
subpd %xmm1, %xmm0
movapd %xmm0, _Y(%eax)
addl $16, %eax
cmpl $800, %eax
jne L2
xorw %ax, %ax
leave
ret
.globl _DA
.data
.align 16
_DA:
.long 858993459
.long 1070805811
.comm _X, 800, 5
.comm _Y, 800, 5
It seems gcc emit more effectivenss instuction. Are there any clang command
arguments to get the similar result?
-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20120104/96cd942c/attachment.html>
Duncan Sands
2012-Jan-04 16:09 UTC
[LLVMdev] How can I compile a c source file to use SSE2 Data Movement Instructions?
Hi lixiong hz, clang doesn't have an autovectorizer yet, so with clang for the moment you have to use vectors explicitly in your C code. However dragonegg can produce vector code using the -fplugin-arg-dragonegg-enable-gcc-optzns (it is actually the gcc optimizers that do the vectorizing). Ciao, Duncan. On 04/01/12 16:43, lixiong hz wrote:> I write a small function and test it under clang and gcc, > filet test.c: > double X[100]; double Y[100]; double DA = 0.3; > int f() > { > int i; > for (i = 0; i < 100; i++) > Y[i] = Y[i] - DA * X[i]; > return 0; > } > clang -S -O3 -o test.s test.c -march=native -ccc-echo > result: > "D:/work/trunk/bin/Release/clang.exe" -cc1 -triple i686-pc-win32 -S -disable-fr > e -disable-llvm-verifier -main-file-name test.c -mrelocation-model static -mdis > ble-fp-elim -masm-verbose -mconstructor-aliases -target-cpu corei7 -momit-leaf- > rame-pointer -coverage-file test.s -resource-dir "D:/work/trunk/bin/Release\\.. > \lib\\clang\\3.1" -fmodule-cache-path "C:\\DOCUME~1\\ADMINI~1\\LOCALS~1\\Temp\\ > lang-module-cache" -internal-isystem D:/work/trunk/bin/Release/../lib/clang/3.1 > include -internal-isystem "C:\\Program Files\\Microsoft Visual Studio 9.0\\VC\\ > nclude" -internal-isystem "C:\\Program Files\\Microsoft SDKs\\Windows\\v6.0A\\\ > include" -O3 -ferror-limit 19 -fmessage-length 80 -mstackrealign -fms-extension > -fms-compatibility -fmsc-version=1300 -fdelayed-template-parsing -fgnu-runtime > -fobjc-runtime-has-arc -fobjc-runtime-has-weak -fobjc-fragile-abi -fdiagnostics > show-option -fcolor-diagnostics -o test.s -x c test.c > .def _f; > .scl 2; > .type 32; > .endef > .text > .globl _f > .align 16, 0x90 > _f: # @f > # BB#0: > movl $-800, %eax # imm = 0xFFFFFFFFFFFFFCE0 > movsd _DA, %xmm0 > .align 16, 0x90 > LBB0_1: # =>This Inner Loop Header: Depth=1 > movsd _X+800(%eax), %xmm1 > mulsd %xmm0, %xmm1 > movsd _Y+800(%eax), %xmm2 > subsd %xmm1, %xmm2 > movsd %xmm2, _Y+800(%eax) > addl $8, %eax > jne LBB0_1 > # BB#2: > xorl %eax, %eax > ret > .data > .globl _DA # @DA > .align 8 > _DA: > .quad 4599075939470750515 # double 3.000000e-01 > .comm _Y,800,3 # @Y > .comm _X,800,3 # @X > gcc -S -O3 -o test2.s test.c -march=native > result: > .file "test.c" > .text > .p2align 4,,15 > .globl _f > .def _f; .scl 2; .type 32; .endef > _f: > pushl %ebp > movddup _DA, %xmm2 > movl %esp, %ebp > xorl %eax, %eax > .p2align 4,,10 > L2: > movapd _Y(%eax), %xmm0 > movapd _X(%eax), %xmm1 > mulpd %xmm2, %xmm1 > subpd %xmm1, %xmm0 > movapd %xmm0, _Y(%eax) > addl $16, %eax > cmpl $800, %eax > jne L2 > xorw %ax, %ax > leave > ret > .globl _DA > .data > .align 16 > _DA: > .long 858993459 > .long 1070805811 > .comm _X, 800, 5 > .comm _Y, 800, 5 > It seems gcc emit more effectivenss instuction. Are there any clang command > arguments to get the similar result? > > > _______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
Possibly Parallel Threads
- [LLVMdev] llvm.x86.sse2.sqrt.pd not using sqrtpd, calling a function that modifies ECX
- [LLVMdev] SIMD instructions and memory alignment on X86
- [LLVMdev] Suboptimal code due to excessive spilling
- [LLVMdev] Suboptimal code due to excessive spilling
- [LLVMdev] llc -O# / opt -O# differences