Andrew Kelley via llvm-dev
2018-Nov-23 20:32 UTC
[llvm-dev] is this a bug in an optimization pass?
The frontend code is a pretty simple for loop, that counts from i = 0; i != 10; i += 1 It gets optimized into and endless loop. export fn entry() void { var array: [10]Bar = undefined; var x = for (array) |elem, i| { if (i == 1) break elem; } else bar2(); } Here's the generated IR: ; ModuleID = 'test' source_filename = "test" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" %Bar = type { i32, i32 } %"[]u8" = type { i8*, i64 } %StackTrace = type { i64, %"[]usize" } %"[]usize" = type { i64*, i64 } @0 = internal unnamed_addr constant %Bar { i32 3, i32 4 }, align 4 ; Function Attrs: nounwind readnone speculatable declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 ; Function Attrs: nobuiltin nounwind define void @entry() #2 !dbg !41 { Entry: %array = alloca [10 x %Bar], align 4 %x = alloca %Bar, align 4 %for_index = alloca i64, align 8 %0 = bitcast [10 x %Bar]* %array to i8*, !dbg !60 call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 -86, i64 80, i1 false), !dbg !60 call void @llvm.dbg.declare(metadata [10 x %Bar]* %array, metadata !45, metadata !DIExpression()), !dbg !60 store i64 0, i64* %for_index, align 8, !dbg !61 call void @llvm.dbg.declare(metadata i64* %for_index, metadata !55, metadata !DIExpression()), !dbg !63 br label %ForCond, !dbg !61 ForCond: ; preds = %Else, %Entry %1 = load i64, i64* %for_index, align 8, !dbg !61 %2 = icmp ne i64 %1, 10, !dbg !61 br i1 %2, label %ForBody, label %ForElse, !dbg !61 ForBody: ; preds = %ForCond %3 = getelementptr inbounds [10 x %Bar], [10 x %Bar]* %array, i64 0, i64 %1, !dbg !61 call void @llvm.dbg.declare(metadata %Bar* %3, metadata !58, metadata !DIExpression()), !dbg !64 %4 = load i64, i64* %for_index, align 8, !dbg !65 %5 = icmp eq i64 %4, 1, !dbg !67 br i1 %5, label %Then, label %Else, !dbg !67 Then: ; preds = %ForBody %6 = bitcast %Bar* %3 to i8*, !dbg !68 %7 = bitcast %Bar* %x to i8*, !dbg !68 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %7, i8* align 4 %6, i64 8, i1 false), !dbg !68 br label %ForEnd, !dbg !69 Else: ; preds = %ForBody %8 = add nuw i64 %1, 1, !dbg !61 store i64 %8, i64* %for_index, align 8, !dbg !61 br label %ForCond, !dbg !61 ForElse: ; preds = %ForCond call fastcc void @bar2(%Bar* sret %x), !dbg !70 br label %ForEnd, !dbg !71 ForEnd: ; preds = %ForElse, %Then call void @llvm.dbg.declare(metadata %Bar* %x, metadata !59, metadata !DIExpression()), !dbg !72 ret void, !dbg !73 } ; Function Attrs: argmemonly nounwind declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #3 ; Function Attrs: argmemonly nounwind declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #3 ; Function Attrs: nobuiltin nounwind define internal fastcc void @bar2(%Bar* nonnull sret) unnamed_addr #2 !dbg !74 { Entry: call fastcc void @bar(%Bar* sret %0), !dbg !79 ret void, !dbg !81 } ; Function Attrs: nobuiltin nounwind define internal fastcc void @bar(%Bar* nonnull sret) unnamed_addr #2 !dbg !82 { Entry: %1 = bitcast %Bar* %0 to i8*, !dbg !83 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%Bar* @0 to i8*), i64 8, i1 false), !dbg !83 ret void, !dbg !83 } Here's the optimized IR, an endless loop: define void @entry() local_unnamed_addr #0 !dbg !8 { Entry: call void @llvm.dbg.value(metadata i64 0, metadata !24, metadata !DIExpression()), !dbg !30 br label %ForCond, !dbg !31 ForCond: ; preds = %Else, %Entry %for_index.0 = phi i64 [ 0, %Entry ], [ %0, %Else ], !dbg !31 call void @llvm.dbg.value(metadata i64 %for_index.0, metadata !24, metadata !DIExpression()), !dbg !30 switch i64 %for_index.0, label %Else [ Else: ; preds = %ForCond %0 = add nuw i64 %for_index.0, 1, !dbg !31 call void @llvm.dbg.value(metadata i64 %0, metadata !24, metadata !DIExpression()), !dbg !30 br label %ForCond, !dbg !31 ForEnd: ; preds = %ForCond, %ForCond ret void, !dbg !33 } I don't see how this follows from the original source. Is it a bug?
Andrew Kelley via llvm-dev
2018-Nov-23 20:47 UTC
[llvm-dev] is this a bug in an optimization pass?
Here's a godbolt link: https://godbolt.org/z/BaWZrM And here's the expected @entry function after optimization: define void @entry() local_unnamed_addr #0 { Entry: ret void } On Fri, Nov 23, 2018 at 3:32 PM Andrew Kelley <superjoe30 at gmail.com> wrote:> > The frontend code is a pretty simple for loop, that counts from i = 0; > i != 10; i += 1 > It gets optimized into and endless loop. > > export fn entry() void { > var array: [10]Bar = undefined; > var x = for (array) |elem, i| { > if (i == 1) break elem; > } else bar2(); > } > > Here's the generated IR: > > ; ModuleID = 'test' > source_filename = "test" > target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" > target triple = "x86_64-unknown-linux-gnu" > > %Bar = type { i32, i32 } > %"[]u8" = type { i8*, i64 } > %StackTrace = type { i64, %"[]usize" } > %"[]usize" = type { i64*, i64 } > > @0 = internal unnamed_addr constant %Bar { i32 3, i32 4 }, align 4 > > ; Function Attrs: nounwind readnone speculatable > declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 > > ; Function Attrs: nobuiltin nounwind > define void @entry() #2 !dbg !41 { > Entry: > %array = alloca [10 x %Bar], align 4 > %x = alloca %Bar, align 4 > %for_index = alloca i64, align 8 > %0 = bitcast [10 x %Bar]* %array to i8*, !dbg !60 > call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 -86, i64 80, i1 > false), !dbg !60 > call void @llvm.dbg.declare(metadata [10 x %Bar]* %array, metadata > !45, metadata !DIExpression()), !dbg !60 > store i64 0, i64* %for_index, align 8, !dbg !61 > call void @llvm.dbg.declare(metadata i64* %for_index, metadata !55, > metadata !DIExpression()), !dbg !63 > br label %ForCond, !dbg !61 > > ForCond: ; preds = %Else, %Entry > %1 = load i64, i64* %for_index, align 8, !dbg !61 > %2 = icmp ne i64 %1, 10, !dbg !61 > br i1 %2, label %ForBody, label %ForElse, !dbg !61 > > ForBody: ; preds = %ForCond > %3 = getelementptr inbounds [10 x %Bar], [10 x %Bar]* %array, i64 0, > i64 %1, !dbg !61 > call void @llvm.dbg.declare(metadata %Bar* %3, metadata !58, > metadata !DIExpression()), !dbg !64 > %4 = load i64, i64* %for_index, align 8, !dbg !65 > %5 = icmp eq i64 %4, 1, !dbg !67 > br i1 %5, label %Then, label %Else, !dbg !67 > > Then: ; preds = %ForBody > %6 = bitcast %Bar* %3 to i8*, !dbg !68 > %7 = bitcast %Bar* %x to i8*, !dbg !68 > call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %7, i8* align 4 %6, > i64 8, i1 false), !dbg !68 > br label %ForEnd, !dbg !69 > > Else: ; preds = %ForBody > %8 = add nuw i64 %1, 1, !dbg !61 > store i64 %8, i64* %for_index, align 8, !dbg !61 > br label %ForCond, !dbg !61 > > ForElse: ; preds = %ForCond > call fastcc void @bar2(%Bar* sret %x), !dbg !70 > br label %ForEnd, !dbg !71 > > ForEnd: ; preds = %ForElse, %Then > call void @llvm.dbg.declare(metadata %Bar* %x, metadata !59, > metadata !DIExpression()), !dbg !72 > ret void, !dbg !73 > } > > ; Function Attrs: argmemonly nounwind > declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #3 > > ; Function Attrs: argmemonly nounwind > declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* > nocapture readonly, i64, i1) #3 > > ; Function Attrs: nobuiltin nounwind > define internal fastcc void @bar2(%Bar* nonnull sret) unnamed_addr #2 !dbg !74 { > Entry: > call fastcc void @bar(%Bar* sret %0), !dbg !79 > ret void, !dbg !81 > } > > ; Function Attrs: nobuiltin nounwind > define internal fastcc void @bar(%Bar* nonnull sret) unnamed_addr #2 !dbg !82 { > Entry: > %1 = bitcast %Bar* %0 to i8*, !dbg !83 > call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 > bitcast (%Bar* @0 to i8*), i64 8, i1 false), !dbg !83 > ret void, !dbg !83 > } > > > Here's the optimized IR, an endless loop: > > define void @entry() local_unnamed_addr #0 !dbg !8 { > Entry: > call void @llvm.dbg.value(metadata i64 0, metadata !24, metadata > !DIExpression()), !dbg !30 > br label %ForCond, !dbg !31 > > ForCond: ; preds = %Else, %Entry > %for_index.0 = phi i64 [ 0, %Entry ], [ %0, %Else ], !dbg !31 > call void @llvm.dbg.value(metadata i64 %for_index.0, metadata !24, > metadata !DIExpression()), !dbg !30 > switch i64 %for_index.0, label %Else [ > > Else: ; preds = %ForCond > %0 = add nuw i64 %for_index.0, 1, !dbg !31 > call void @llvm.dbg.value(metadata i64 %0, metadata !24, metadata > !DIExpression()), !dbg !30 > br label %ForCond, !dbg !31 > > ForEnd: ; preds = %ForCond, %ForCond > ret void, !dbg !33 > } > > > I don't see how this follows from the original source. Is it a bug?
Andrew Kelley via llvm-dev
2018-Nov-23 22:10 UTC
[llvm-dev] is this a bug in an optimization pass?
2 things 1. I misread the LLVM IR. It's not an infinite loop, it's just a missed optimization. 2. I can reproduce this with a dead simple C program: void entry(void) { for (unsigned i = 0; i != 10; i += 1) { if (i == 1) break; } } https://godbolt.org/z/m3sjkA On Fri, Nov 23, 2018 at 3:47 PM Andrew Kelley <superjoe30 at gmail.com> wrote:> > Here's a godbolt link: https://godbolt.org/z/BaWZrM > And here's the expected @entry function after optimization: > > define void @entry() local_unnamed_addr #0 { > Entry: > ret void > } > > > > On Fri, Nov 23, 2018 at 3:32 PM Andrew Kelley <superjoe30 at gmail.com> wrote: > > > > The frontend code is a pretty simple for loop, that counts from i = 0; > > i != 10; i += 1 > > It gets optimized into and endless loop. > > > > export fn entry() void { > > var array: [10]Bar = undefined; > > var x = for (array) |elem, i| { > > if (i == 1) break elem; > > } else bar2(); > > } > > > > Here's the generated IR: > > > > ; ModuleID = 'test' > > source_filename = "test" > > target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" > > target triple = "x86_64-unknown-linux-gnu" > > > > %Bar = type { i32, i32 } > > %"[]u8" = type { i8*, i64 } > > %StackTrace = type { i64, %"[]usize" } > > %"[]usize" = type { i64*, i64 } > > > > @0 = internal unnamed_addr constant %Bar { i32 3, i32 4 }, align 4 > > > > ; Function Attrs: nounwind readnone speculatable > > declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 > > > > ; Function Attrs: nobuiltin nounwind > > define void @entry() #2 !dbg !41 { > > Entry: > > %array = alloca [10 x %Bar], align 4 > > %x = alloca %Bar, align 4 > > %for_index = alloca i64, align 8 > > %0 = bitcast [10 x %Bar]* %array to i8*, !dbg !60 > > call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 -86, i64 80, i1 > > false), !dbg !60 > > call void @llvm.dbg.declare(metadata [10 x %Bar]* %array, metadata > > !45, metadata !DIExpression()), !dbg !60 > > store i64 0, i64* %for_index, align 8, !dbg !61 > > call void @llvm.dbg.declare(metadata i64* %for_index, metadata !55, > > metadata !DIExpression()), !dbg !63 > > br label %ForCond, !dbg !61 > > > > ForCond: ; preds = %Else, %Entry > > %1 = load i64, i64* %for_index, align 8, !dbg !61 > > %2 = icmp ne i64 %1, 10, !dbg !61 > > br i1 %2, label %ForBody, label %ForElse, !dbg !61 > > > > ForBody: ; preds = %ForCond > > %3 = getelementptr inbounds [10 x %Bar], [10 x %Bar]* %array, i64 0, > > i64 %1, !dbg !61 > > call void @llvm.dbg.declare(metadata %Bar* %3, metadata !58, > > metadata !DIExpression()), !dbg !64 > > %4 = load i64, i64* %for_index, align 8, !dbg !65 > > %5 = icmp eq i64 %4, 1, !dbg !67 > > br i1 %5, label %Then, label %Else, !dbg !67 > > > > Then: ; preds = %ForBody > > %6 = bitcast %Bar* %3 to i8*, !dbg !68 > > %7 = bitcast %Bar* %x to i8*, !dbg !68 > > call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %7, i8* align 4 %6, > > i64 8, i1 false), !dbg !68 > > br label %ForEnd, !dbg !69 > > > > Else: ; preds = %ForBody > > %8 = add nuw i64 %1, 1, !dbg !61 > > store i64 %8, i64* %for_index, align 8, !dbg !61 > > br label %ForCond, !dbg !61 > > > > ForElse: ; preds = %ForCond > > call fastcc void @bar2(%Bar* sret %x), !dbg !70 > > br label %ForEnd, !dbg !71 > > > > ForEnd: ; preds = %ForElse, %Then > > call void @llvm.dbg.declare(metadata %Bar* %x, metadata !59, > > metadata !DIExpression()), !dbg !72 > > ret void, !dbg !73 > > } > > > > ; Function Attrs: argmemonly nounwind > > declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #3 > > > > ; Function Attrs: argmemonly nounwind > > declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* > > nocapture readonly, i64, i1) #3 > > > > ; Function Attrs: nobuiltin nounwind > > define internal fastcc void @bar2(%Bar* nonnull sret) unnamed_addr #2 !dbg !74 { > > Entry: > > call fastcc void @bar(%Bar* sret %0), !dbg !79 > > ret void, !dbg !81 > > } > > > > ; Function Attrs: nobuiltin nounwind > > define internal fastcc void @bar(%Bar* nonnull sret) unnamed_addr #2 !dbg !82 { > > Entry: > > %1 = bitcast %Bar* %0 to i8*, !dbg !83 > > call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 > > bitcast (%Bar* @0 to i8*), i64 8, i1 false), !dbg !83 > > ret void, !dbg !83 > > } > > > > > > Here's the optimized IR, an endless loop: > > > > define void @entry() local_unnamed_addr #0 !dbg !8 { > > Entry: > > call void @llvm.dbg.value(metadata i64 0, metadata !24, metadata > > !DIExpression()), !dbg !30 > > br label %ForCond, !dbg !31 > > > > ForCond: ; preds = %Else, %Entry > > %for_index.0 = phi i64 [ 0, %Entry ], [ %0, %Else ], !dbg !31 > > call void @llvm.dbg.value(metadata i64 %for_index.0, metadata !24, > > metadata !DIExpression()), !dbg !30 > > switch i64 %for_index.0, label %Else [ > > > > Else: ; preds = %ForCond > > %0 = add nuw i64 %for_index.0, 1, !dbg !31 > > call void @llvm.dbg.value(metadata i64 %0, metadata !24, metadata > > !DIExpression()), !dbg !30 > > br label %ForCond, !dbg !31 > > > > ForEnd: ; preds = %ForCond, %ForCond > > ret void, !dbg !33 > > } > > > > > > I don't see how this follows from the original source. Is it a bug?
Apparently Analagous Threads
- [LLVMdev] Additional Optimization I'm Missing?
- [LLVMdev] LICM/store-aliasing of global loads
- can debug info for coroutines be improved?
- Question on Aliasing and invariant load hoisting
- [Debuginfo] Changing llvm.dbg.value and DBG_VALUE to support multiple location operands