thr3ads.net - llvm dev - [LLVMdev] How can I compile a c source file to use SSE2 Data Movement Instructions? [Jan 2012]

If this information is useful, please help other people find it:
Share via:

lixiong hz

2012-Jan-04 15:43 UTC

[LLVMdev] How can I compile a c source file to use SSE2 Data Movement Instructions?

I write a small function and test it under clang and gcc,

filet test.c:
double X[100];  double Y[100];  double DA = 0.3;
int f()
{
  int i;
  for (i = 0; i < 100; i++)
   Y[i] = Y[i] - DA * X[i];
  return 0;
}
clang -S -O3 -o test.s test.c -march=native -ccc-echo
result:
"D:/work/trunk/bin/Release/clang.exe" -cc1 -triple i686-pc-win32 -S
-disable-fr
e -disable-llvm-verifier -main-file-name test.c -mrelocation-model static
-mdis
ble-fp-elim -masm-verbose -mconstructor-aliases -target-cpu corei7
-momit-leaf-
rame-pointer -coverage-file test.s -resource-dir
"D:/work/trunk/bin/Release\\..
\lib\\clang\\3.1" -fmodule-cache-path
"C:\\DOCUME~1\\ADMINI~1\\LOCALS~1\\Temp\\
lang-module-cache" -internal-isystem
D:/work/trunk/bin/Release/../lib/clang/3.1
include -internal-isystem "C:\\Program Files\\Microsoft Visual Studio
9.0\\VC\\
nclude" -internal-isystem "C:\\Program Files\\Microsoft
SDKs\\Windows\\v6.0A\\\
include" -O3 -ferror-limit 19 -fmessage-length 80 -mstackrealign
-fms-extension
 -fms-compatibility -fmsc-version=1300 -fdelayed-template-parsing
-fgnu-runtime
-fobjc-runtime-has-arc -fobjc-runtime-has-weak -fobjc-fragile-abi
-fdiagnostics
show-option -fcolor-diagnostics -o test.s -x c test.c

 .def  _f;
 .scl 2;
 .type 32;
 .endef
 .text
 .globl _f
 .align 16, 0x90
_f:                                     # @f
# BB#0:
 movl $-800, %eax             # imm = 0xFFFFFFFFFFFFFCE0
 movsd _DA, %xmm0
 .align 16, 0x90
LBB0_1:                                 # =>This Inner Loop Header: Depth=1
 movsd _X+800(%eax), %xmm1
 mulsd %xmm0, %xmm1
 movsd _Y+800(%eax), %xmm2
 subsd %xmm1, %xmm2
 movsd %xmm2, _Y+800(%eax)
 addl $8, %eax
 jne LBB0_1
# BB#2:
 xorl %eax, %eax
 ret
 .data
 .globl _DA                     # @DA
 .align 8
_DA:
 .quad 4599075939470750515     # double 3.000000e-01
 .comm _Y,800,3                # @Y
 .comm _X,800,3                # @X


gcc -S -O3 -o test2.s test.c -march=native
result:
 .file "test.c"
 .text
 .p2align 4,,15
.globl _f
 .def _f; .scl 2; .type 32; .endef
_f:
 pushl %ebp
 movddup _DA, %xmm2
 movl %esp, %ebp
 xorl %eax, %eax
 .p2align 4,,10
L2:
 movapd _Y(%eax), %xmm0
 movapd _X(%eax), %xmm1
 mulpd %xmm2, %xmm1
 subpd %xmm1, %xmm0
 movapd %xmm0, _Y(%eax)
 addl $16, %eax
 cmpl $800, %eax
 jne L2
 xorw %ax, %ax
 leave
 ret
.globl _DA
 .data
 .align 16
_DA:
 .long 858993459
 .long 1070805811
 .comm _X, 800, 5
 .comm _Y, 800, 5

It seems gcc emit more effectivenss instuction. Are there any clang command
arguments to get the similar result?
-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20120104/96cd942c/attachment.html>

Duncan Sands

2012-Jan-04 16:09 UTC

head link

[LLVMdev] How can I compile a c source file to use SSE2 Data Movement Instructions?

Hi lixiong hz, clang doesn't have an autovectorizer yet, so with clang for
the
moment you have to use vectors explicitly in your C code.  However dragonegg can
produce vector code using the -fplugin-arg-dragonegg-enable-gcc-optzns (it is
actually the gcc optimizers that do the vectorizing).

Ciao, Duncan.

On 04/01/12 16:43, lixiong hz wrote:> I write a small function and test it under clang and gcc,
> filet test.c:
> double X[100];  double Y[100];  double DA = 0.3;
> int f()
> {
>    int i;
>    for (i = 0; i < 100; i++)
>     Y[i] = Y[i] - DA * X[i];
>    return 0;
> }
> clang -S -O3 -o test.s test.c -march=native -ccc-echo
> result:
> "D:/work/trunk/bin/Release/clang.exe" -cc1 -triple i686-pc-win32
-S -disable-fr
> e -disable-llvm-verifier -main-file-name test.c -mrelocation-model static
-mdis
> ble-fp-elim -masm-verbose -mconstructor-aliases -target-cpu corei7
-momit-leaf-
> rame-pointer -coverage-file test.s -resource-dir
"D:/work/trunk/bin/Release\\..
> \lib\\clang\\3.1" -fmodule-cache-path
"C:\\DOCUME~1\\ADMINI~1\\LOCALS~1\\Temp\\
> lang-module-cache" -internal-isystem
D:/work/trunk/bin/Release/../lib/clang/3.1
> include -internal-isystem "C:\\Program Files\\Microsoft Visual Studio
9.0\\VC\\
> nclude" -internal-isystem "C:\\Program Files\\Microsoft
SDKs\\Windows\\v6.0A\\\
> include" -O3 -ferror-limit 19 -fmessage-length 80 -mstackrealign
-fms-extension
>   -fms-compatibility -fmsc-version=1300 -fdelayed-template-parsing
-fgnu-runtime
> -fobjc-runtime-has-arc -fobjc-runtime-has-weak -fobjc-fragile-abi
-fdiagnostics
> show-option -fcolor-diagnostics -o test.s -x c test.c
>   .def  _f;
>   .scl 2;
>   .type 32;
>   .endef
>   .text
>   .globl _f
>   .align 16, 0x90
> _f:                                     # @f
> # BB#0:
>   movl $-800, %eax             # imm = 0xFFFFFFFFFFFFFCE0
>   movsd _DA, %xmm0
>   .align 16, 0x90
> LBB0_1:                                 # =>This Inner Loop Header:
Depth=1
>   movsd _X+800(%eax), %xmm1
>   mulsd %xmm0, %xmm1
>   movsd _Y+800(%eax), %xmm2
>   subsd %xmm1, %xmm2
>   movsd %xmm2, _Y+800(%eax)
>   addl $8, %eax
>   jne LBB0_1
> # BB#2:
>   xorl %eax, %eax
>   ret
>   .data
>   .globl _DA                     # @DA
>   .align 8
> _DA:
>   .quad 4599075939470750515     # double 3.000000e-01
>   .comm _Y,800,3                # @Y
>   .comm _X,800,3                # @X
> gcc -S -O3 -o test2.s test.c -march=native
> result:
>   .file "test.c"
>   .text
>   .p2align 4,,15
> .globl _f
>   .def _f; .scl 2; .type 32; .endef
> _f:
>   pushl %ebp
>   movddup _DA, %xmm2
>   movl %esp, %ebp
>   xorl %eax, %eax
>   .p2align 4,,10
> L2:
>   movapd _Y(%eax), %xmm0
>   movapd _X(%eax), %xmm1
>   mulpd %xmm2, %xmm1
>   subpd %xmm1, %xmm0
>   movapd %xmm0, _Y(%eax)
>   addl $16, %eax
>   cmpl $800, %eax
>   jne L2
>   xorw %ax, %ax
>   leave
>   ret
> .globl _DA
>   .data
>   .align 16
> _DA:
>   .long 858993459
>   .long 1070805811
>   .comm _X, 800, 5
>   .comm _Y, 800, 5
> It seems gcc emit more effectivenss instuction. Are there any clang command
> arguments to get the similar result?
>
>
> _______________________________________________
> LLVM Developers mailing list
> LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev

Maybe Matching Threads

Search for more reasonably related threads

llvm dev - Jan 2012 - [LLVMdev] How can I compile a c source file to use SSE2 Data Movement Instructions?

[LLVMdev] How can I compile a c source file to use SSE2 Data Movement Instructions?

[LLVMdev] How can I compile a c source file to use SSE2 Data Movement Instructions?

Maybe Matching Threads