Leslie Zhai via llvm-dev
2017-Oct-24 04:19 UTC
[llvm-dev] LLVM 6.0's LoopUnroll PASS is not able to work?
Hi LLVM developers, $ cat hello.c #include <stdio.h> int main(int argc, char *argv[]) { for (int i = 0; i < 10; i++) { printf("%d\n", i); } return 0; } $ /opt/llvm-svn/bin/clang --version Fedora clang version 6.0.0 (trunk 316308) (based on LLVM 6.0.0svn) Target: x86_64-redhat-linux Thread model: posix InstalledDir: /opt/llvm-svn/bin $ /opt/llvm-svn/bin/clang -S -emit-llvm hello.c -o hello3.ll $ cat hello3.ll ; ModuleID = 'hello.c' source_filename = "hello.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-redhat-linux" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: noinline nounwind optnone uwtable define i32 @main(i32, i8**) #0 { %3 = alloca i32, align 4 %4 = alloca i32, align 4 %5 = alloca i8**, align 8 %6 = alloca i32, align 4 store i32 0, i32* %3, align 4 store i32 %0, i32* %4, align 4 store i8** %1, i8*** %5, align 8 store i32 0, i32* %6, align 4 br label %7 ; <label>:7: ; preds = %13, %2 %8 = load i32, i32* %6, align 4 %9 = icmp slt i32 %8, 10 br i1 %9, label %10, label %16 ; <label>:10: ; preds = %7 %11 = load i32, i32* %6, align 4 %12 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %11) br label %13 ; <label>:13: ; preds = %10 %14 = load i32, i32* %6, align 4 %15 = add nsw i32 %14, 1 store i32 %15, i32* %6, align 4 br label %7 ; <label>:16: ; preds = %7 ret i32 0 } declare i32 @printf(i8*, ...) #1 attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.module.flags = !{!0} !llvm.ident = !{!1} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{!"Fedora clang version 6.0.0 (trunk 316308) (based on LLVM 6.0.0svn)"} $ /opt/llvm-svn//bin/opt -S -mem2reg -loops -loop-simplify -loop-rotate -lcssa -loop-unroll -sccp -simplifycfg hello3.ll -o hello3.loop.ll $ cat hello3.loop.ll ; ModuleID = 'hello3.ll' source_filename = "hello.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-redhat-linux" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: noinline nounwind optnone uwtable define i32 @main(i32, i8**) #0 { %3 = alloca i32, align 4 %4 = alloca i32, align 4 %5 = alloca i8**, align 8 %6 = alloca i32, align 4 store i32 0, i32* %3, align 4 store i32 %0, i32* %4, align 4 store i8** %1, i8*** %5, align 8 store i32 0, i32* %6, align 4 br label %7 ; <label>:7: ; preds = %13, %2 %8 = load i32, i32* %6, align 4 %9 = icmp slt i32 %8, 10 br i1 %9, label %10, label %16 ; <label>:10: ; preds = %7 %11 = load i32, i32* %6, align 4 %12 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %11) br label %13 ; <label>:13: ; preds = %10 %14 = load i32, i32* %6, align 4 %15 = add nsw i32 %14, 1 store i32 %15, i32* %6, align 4 br label %7 ; <label>:16: ; preds = %7 ret i32 0 } declare i32 @printf(i8*, ...) #1 attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.module.flags = !{!0} !llvm.ident = !{!1} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{!"Fedora clang version 6.0.0 (trunk 316308) (based on LLVM 6.0.0svn)"} LLVM 6.0's LoopUnroll PASS failed to unroll loops? But LLVM 3.9.1 is able to work: $ clang --version clang version 3.9.1 (tags/RELEASE_391/final) Target: x86_64-unknown-linux-gnu Thread model: posix InstalledDir: /usr/bin $ clang -S -emit-llvm hello.c -o hello1.ll $ cat hello1.ll ; ModuleID = 'hello.c' source_filename = "hello.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: nounwind uwtable define i32 @main(i32, i8**) #0 { %3 = alloca i32, align 4 %4 = alloca i32, align 4 %5 = alloca i8**, align 8 %6 = alloca i32, align 4 store i32 0, i32* %3, align 4 store i32 %0, i32* %4, align 4 store i8** %1, i8*** %5, align 8 store i32 0, i32* %6, align 4 br label %7 ; <label>:7: ; preds = %13, %2 %8 = load i32, i32* %6, align 4 %9 = icmp slt i32 %8, 10 br i1 %9, label %10, label %16 ; <label>:10: ; preds = %7 %11 = load i32, i32* %6, align 4 %12 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %11) br label %13 ; <label>:13: ; preds = %10 %14 = load i32, i32* %6, align 4 %15 = add nsw i32 %14, 1 store i32 %15, i32* %6, align 4 br label %7 ; <label>:16: ; preds = %7 ret i32 0 } declare i32 @printf(i8*, ...) #1 attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.ident = !{!0} !0 = !{!"clang version 3.9.1 (tags/RELEASE_391/final)"} $ opt -S -mem2reg -loops -loop-simplify -loop-rotate -lcssa -loop-unroll -sccp -simplifycfg hello1.ll -o hello1.loop.ll $ cat hello1.loop.ll ; ModuleID = 'hello1.ll' source_filename = "hello.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: nounwind uwtable define i32 @main(i32, i8**) #0 { %3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 0) %4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 1) %5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 2) %6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 3) %7 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 4) %8 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 5) %9 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 6) %10 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 7) %11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 8) %12 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 9) ret i32 0 } declare i32 @printf(i8*, ...) #1 attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.ident = !{!0} !0 = !{!"clang version 3.9.1 (tags/RELEASE_391/final)"} And also LLVM 3.1 is still able to work: $ ./build/Release+Asserts/bin/opt --version LLVM (http://llvm.org/): LLVM version 3.1 Optimized build with assertions. Built Oct 23 2017 (16:22:51). Default target: x86_64-unknown-linux-gnu Host CPU: corei7-avx $ ./build/Release+Asserts/bin/clang -S -emit-llvm hello.c -o hello.ll $ cat hello.ll ; ModuleID = 'hello.c' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 define i32 @main(i32 %argc, i8** %argv) nounwind uwtable { entry: %retval = alloca i32, align 4 %argc.addr = alloca i32, align 4 %argv.addr = alloca i8**, align 8 %bits = alloca [4 x i32], align 16 %iter = alloca i32, align 4 store i32 0, i32* %retval store i32 %argc, i32* %argc.addr, align 4 store i8** %argv, i8*** %argv.addr, align 8 store i32 0, i32* %iter, align 4 br label %for.cond for.cond: ; preds = %for.inc, %entry %0 = load i32* %iter, align 4 %cmp = icmp slt i32 %0, 10 br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond %1 = load i32* %iter, align 4 %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %1) br label %for.inc for.inc: ; preds = %for.body %2 = load i32* %iter, align 4 %inc = add nsw i32 %2, 1 store i32 %inc, i32* %iter, align 4 br label %for.cond for.end: ; preds = %for.cond ret i32 0 } declare i32 @printf(i8*, ...) $ ./build/Release+Asserts/bin/opt -S -mem2reg -loops -loop-simplify -loop-rotate -lcssa -loop-unroll -sccp -simplifycfg hello.ll -o hello.loop.ll $ cat hello.loop.ll ; ModuleID = 'hello.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 define i32 @main(i32 %argc, i8** %argv) nounwind uwtable { entry: %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 0) %call.1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 1) %call.2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 2) %call.3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 3) %call.4 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 4) %call.5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 5) %call.6 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 6) %call.7 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 7) %call.8 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 8) %call.9 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 9) ret i32 0 } declare i32 @printf(i8*, ...) Succeeded unrolled Optimized build with assertions. Maybe I wrongly use the LoopUnroll PASS? or please give me some hints, thanks a lot! -- Regards, Leslie Zhai - https://reviews.llvm.org/p/xiangzhai/