This is my example (for.c):
#include <stdio.h>
int add(int a, int b) {
return a + b;
}
int main() {
int a, b, c, d;
a = 5;
b = 15;
c = add(a, b);
d = 0;
for(int i=0;i<16;i++)
d = add(c, d);
}
I run:
$ clang -O0 -Xclang -disable-O0-optnone -emit-llvm for.c -S -o forO0.ll
$ opt -O0 -S --loop-unroll --unroll-count=4 -view-cfg forO0.ll -o
for-opt00-unroll4.ll
And this is the LLVM IR code that I get:
; ModuleID = 'forO0.ll'
source_filename = "for.c"
target datalayout
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: noinline nounwind uwtable
define dso_local i32 @add(i32 %a, i32 %b) #0 {
entry:
%a.addr = alloca i32, align 4
%b.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
store i32 %b, i32* %b.addr, align 4
%0 = load i32, i32* %a.addr, align 4
%1 = load i32, i32* %b.addr, align 4
%add = add nsw i32 %0, %1
ret i32 %add
}
; Function Attrs: noinline nounwind uwtable
define dso_local i32 @main() #0 {
entry:
%retval = alloca i32, align 4
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
%d = alloca i32, align 4
%i = alloca i32, align 4
store i32 0, i32* %retval, align 4
store i32 5, i32* %a, align 4
store i32 15, i32* %b, align 4
%0 = load i32, i32* %a, align 4
%1 = load i32, i32* %b, align 4
%call = call i32 @add(i32 %0, i32 %1)
store i32 %call, i32* %c, align 4
store i32 0, i32* %d, align 4
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc.3,
%entry
%2 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %2, 16
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%3 = load i32, i32* %c, align 4
%4 = load i32, i32* %d, align 4
%call1 = call i32 @add(i32 %3, i32 %4)
store i32 %call1, i32* %d, align 4
br label %for.inc
for.inc: ; preds = %for.body
%5 = load i32, i32* %i, align 4
%inc = add nsw i32 %5, 1
store i32 %inc, i32* %i, align 4
%6 = load i32, i32* %i, align 4
%cmp.1 = icmp slt i32 %6, 16
br i1 %cmp.1, label %for.body.1, label %for.end
for.end: ; preds = %for.inc.2,
%for.inc.1, %for.inc, %for.cond
%7 = load i32, i32* %d, align 4
%call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x
i8], [20 x i8]* @.str, i64 0, i64 0), i32 %7)
%8 = load i32, i32* %retval, align 4
ret i32 %8
for.body.1: ; preds = %for.inc
%9 = load i32, i32* %c, align 4
%10 = load i32, i32* %d, align 4
%call1.1 = call i32 @add(i32 %9, i32 %10)
store i32 %call1.1, i32* %d, align 4
br label %for.inc.1
for.inc.1: ; preds = %for.body.1
%11 = load i32, i32* %i, align 4
%inc.1 = add nsw i32 %11, 1
store i32 %inc.1, i32* %i, align 4
%12 = load i32, i32* %i, align 4
%cmp.2 = icmp slt i32 %12, 16
br i1 %cmp.2, label %for.body.2, label %for.end
for.body.2: ; preds = %for.inc.1
%13 = load i32, i32* %c, align 4
%14 = load i32, i32* %d, align 4
%call1.2 = call i32 @add(i32 %13, i32 %14)
store i32 %call1.2, i32* %d, align 4
br label %for.inc.2
for.inc.2: ; preds = %for.body.2
%15 = load i32, i32* %i, align 4
%inc.2 = add nsw i32 %15, 1
store i32 %inc.2, i32* %i, align 4
%16 = load i32, i32* %i, align 4
%cmp.3 = icmp slt i32 %16, 16
br i1 %cmp.3, label %for.body.3, label %for.end
for.body.3: ; preds = %for.inc.2
%17 = load i32, i32* %c, align 4
%18 = load i32, i32* %d, align 4
%call1.3 = call i32 @add(i32 %17, i32 %18)
store i32 %call1.3, i32* %d, align 4
br label %for.inc.3
for.inc.3: ; preds = %for.body.3
%19 = load i32, i32* %i, align 4
%inc.3 = add nsw i32 %19, 1
store i32 %inc.3, i32* %i, align 4
br label %for.cond, !llvm.loop !2
}
declare dso_local i32 @printf(i8*, ...) #1
attributes #0 = { noinline nounwind uwtable
"correctly-rounded-divide-sqrt-fp-math"="false"
"disable-tail-calls"="false"
"frame-pointer"="all"
"less-precise-fpmad"="false"
"min-legal-vector-width"="0"
"no-infs-fp-math"="false"
"no-jump-tables"="false"
"no-nans-fp-math"="false"
"no-signed-zeros-fp-math"="false"
"no-trapping-math"="false"
"stack-protector-buffer-size"="8"
"target-cpu"="x86-64"
"target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87"
"unsafe-fp-math"="false"
"use-soft-float"="false" }
attributes #1 = {
"correctly-rounded-divide-sqrt-fp-math"="false"
"disable-tail-calls"="false"
"frame-pointer"="all"
"less-precise-fpmad"="false"
"no-infs-fp-math"="false"
"no-nans-fp-math"="false"
"no-signed-zeros-fp-math"="false"
"no-trapping-math"="false"
"stack-protector-buffer-size"="8"
"target-cpu"="x86-64"
"target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87"
"unsafe-fp-math"="false"
"use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git
a3485301d4870f57590d7b69eed7959134a694ab)"}
!2 = distinct !{!2, !3}
!3 = !{!"llvm.loop.unroll.disable"}
So my problem is:
With unroll 4 on the loop with 16 bounds I should see one single block for
the incrementation i=i+4, then 4 instructions for each previous one
instruction, and the condition should check if i<16. This is the intuitive
code. However, the incrementation that I get is i=i+1 and there are only 4
blocks.
Do you know why this happen?
Thanks.
El vie., 22 may. 2020 a las 19:49, Florian Hahn (<florian_hahn at
apple.com>)
escribió:
>
>
> > On May 22, 2020, at 09:55, legend xx via llvm-dev <
> llvm-dev at lists.llvm.org> wrote:
> >
> > Hi,
> >
> > I'm interesting in find a pass for loop unrolling in LLVM
compiler. I
> tried opt --loop-unroll --unroll-count=4, but it don't work well.
> >
> > What pass I can used and how?
> >
>
> -loop-unroll should be the right pass. There are multiple possible reasons
> why the loop is not unrolled and the pass has a bunch of options to
> enable/force unrolling for more cases (see
>
https://github.com/llvm/llvm-project/blob/master/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp#L81).
>
>
> Passing `-debug` should give you a better idea why the loop is not
> unrolled. If you would share the IR, someone might be able to provide
> additional insight.
>
> > I would also like to know if there is any way to mark the loops that I
> want them to be unroll
>
> Yes it is possible to explicitly mark loops for unrolling using metadata
> in LLVM IR: https://llvm.org/docs/LangRef.html#llvm-loop-unroll. But the
> metadata might not help, if the loop contains code the unroller does not
> support.
>
> Cheers,
> Florian
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20200523/e1e2bd42/attachment-0001.html>