Sandoval Gonzalez, Leonardo via llvm-dev
2019-Nov-28 17:45 UTC
[llvm-dev] SLP example not being vectorized
Hi, I am new to llvm with a particular interested in the optimization area, specially on SLP. While working through the tutorial, I ran this example [1] with the hope to see SLP vectorization in action but for some reason, I do not see it on the LLVM assembly as seen below. Is there anything I am missing? I am using Clearlinux as build machine and this has clang version 9.0.0. --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- $ cat arraysum.c void foo(int a1, int a2, int b1, int b2, int *A) { A[0] = a1*(a1 + b1)/b1 + 50*b1/a1; A[1] = a2*(a2 + b2)/b2 + 50*b2/a2; } $ clang -emit-llvm -S -O3 arraysum.c -o - ; ModuleID = 'arraysum.c' source_filename = "arraysum.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-generic-linux" ; Function Attrs: nofree norecurse nounwind uwtable writeonly define dso_local void @foo(i32, i32, i32, i32, i32* nocapture) local_unnamed_addr #0 { %6 = add nsw i32 %2, %0 %7 = mul nsw i32 %6, %0 %8 = sdiv i32 %7, %2 %9 = mul nsw i32 %2, 50 %10 = sdiv i32 %9, %0 %11 = add nsw i32 %10, %8 store i32 %11, i32* %4, align 4, !tbaa !2 %12 = add nsw i32 %3, %1 %13 = mul nsw i32 %12, %1 %14 = sdiv i32 %13, %3 %15 = mul nsw i32 %3, 50 %16 = sdiv i32 %15, %1 %17 = add nsw i32 %16, %14 %18 = getelementptr inbounds i32, i32* %4, i64 1 store i32 %17, i32* %18, align 4, !tbaa !2 ret void } attributes #0 = { nofree norecurse nounwind uwtable writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false\ " "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"=\ "false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false\ " "use-soft-float"="false" } !llvm.module.flags = !{!0} !llvm.ident = !{!1} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{!"clang version 9.0.0 (tags/RELEASE_900/final)"} !2 = !{!3, !3, i64 0} !3 = !{!"int", !4, i64 0} !4 = !{!"omnipotent char", !5, i64 0} !5 = !{!"Simple C/C++ TBAA"} -------------------------------------------- thanks for your help. [1] https://llvm.org/docs/Vectorizers.html#the-slp-vectorizer
Adrien Guinet via llvm-dev
2019-Nov-28 18:43 UTC
[llvm-dev] SLP example not being vectorized
On 11/28/19 6:45 PM, Sandoval Gonzalez, Leonardo via llvm-dev wrote:> Hi, > > I am new to llvm with a particular interested in the optimization area, > specially on SLP. While working through the tutorial, I ran this example > [1] with the hope to see SLP vectorization in action but for some > reason, I do not see it on the LLVM assembly as seen below. Is there > anything I am missing? I am using Clearlinux as build machine and this > has clang version 9.0.0. >If you're on Intel hardware, I'd say that AFAIK there is no vectorial integer division instruction, so LLVM won't vectorize this code. Moreover, you should specify which instruction set you want to use, whether by specifying the CPU architecure with -march, or by activating various instruction sets by "hand" (e.g. with -mavx2). For instance, the SLP vectorizer will work here: $ cat a.c void foo(int a1, int a2, int b1, int b2, int *A) { A[0] = a1+a2; A[1] = b1+b2; A[2] = a1+b2; A[3] = a2+b1; A[4] = a2+a2; A[5] = b2+b2; A[6] = a2+b2; A[7] = a1+b1; } $ clang-9 -S -emit-llvm -O3 -march=native -o - a.c define dso_local void @foo(i32, i32, i32, i32, i32* nocapture) local_unnamed_addr #0 { [...] %20 = add nsw <8 x i32> %13, %19 [...] }
Sandoval Gonzalez, Leonardo via llvm-dev
2019-Nov-29 15:31 UTC
[llvm-dev] SLP example not being vectorized
Thanks Adrien, I did not realize the integer division has no vector instruction. IMO, we should provide a better code snipped in this section, indicating a 'simpler' arithmetic expression that would work (in terms of SLP vectorization) in most architectures at indicate the -march parameter. I will propose it and see the comments. lsg On 11/28/2019 12:43 PM, Adrien Guinet via llvm-dev wrote:> On 11/28/19 6:45 PM, Sandoval Gonzalez, Leonardo via llvm-dev wrote: >> Hi, >> >> I am new to llvm with a particular interested in the optimization area, >> specially on SLP. While working through the tutorial, I ran this example >> [1] with the hope to see SLP vectorization in action but for some >> reason, I do not see it on the LLVM assembly as seen below. Is there >> anything I am missing? I am using Clearlinux as build machine and this >> has clang version 9.0.0. >> > > If you're on Intel hardware, I'd say that AFAIK there is no vectorial > integer division instruction, so LLVM won't vectorize this code. > Moreover, you should specify which instruction set you want to use, > whether by specifying the CPU architecure with -march, or by activating > various instruction sets by "hand" (e.g. with -mavx2). > > For instance, the SLP vectorizer will work here: > > $ cat a.c > void foo(int a1, int a2, int b1, int b2, int *A) { > A[0] = a1+a2; > A[1] = b1+b2; > A[2] = a1+b2; > A[3] = a2+b1; > A[4] = a2+a2; > A[5] = b2+b2; > A[6] = a2+b2; > A[7] = a1+b1; > } > > $ clang-9 -S -emit-llvm -O3 -march=native -o - a.c > define dso_local void @foo(i32, i32, i32, i32, i32* nocapture) > local_unnamed_addr #0 { > [...] > %20 = add nsw <8 x i32> %13, %19 > [...] > } > _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org > https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev