Andrew Kelley via llvm-dev
2018-Nov-23 20:32 UTC
[llvm-dev] is this a bug in an optimization pass?
The frontend code is a pretty simple for loop, that counts from i = 0;
i != 10; i += 1
It gets optimized into and endless loop.
export fn entry() void {
var array: [10]Bar = undefined;
var x = for (array) |elem, i| {
if (i == 1) break elem;
} else bar2();
}
Here's the generated IR:
; ModuleID = 'test'
source_filename = "test"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%Bar = type { i32, i32 }
%"[]u8" = type { i8*, i64 }
%StackTrace = type { i64, %"[]usize" }
%"[]usize" = type { i64*, i64 }
@0 = internal unnamed_addr constant %Bar { i32 3, i32 4 }, align 4
; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nobuiltin nounwind
define void @entry() #2 !dbg !41 {
Entry:
%array = alloca [10 x %Bar], align 4
%x = alloca %Bar, align 4
%for_index = alloca i64, align 8
%0 = bitcast [10 x %Bar]* %array to i8*, !dbg !60
call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 -86, i64 80, i1
false), !dbg !60
call void @llvm.dbg.declare(metadata [10 x %Bar]* %array, metadata
!45, metadata !DIExpression()), !dbg !60
store i64 0, i64* %for_index, align 8, !dbg !61
call void @llvm.dbg.declare(metadata i64* %for_index, metadata !55,
metadata !DIExpression()), !dbg !63
br label %ForCond, !dbg !61
ForCond: ; preds = %Else, %Entry
%1 = load i64, i64* %for_index, align 8, !dbg !61
%2 = icmp ne i64 %1, 10, !dbg !61
br i1 %2, label %ForBody, label %ForElse, !dbg !61
ForBody: ; preds = %ForCond
%3 = getelementptr inbounds [10 x %Bar], [10 x %Bar]* %array, i64 0,
i64 %1, !dbg !61
call void @llvm.dbg.declare(metadata %Bar* %3, metadata !58,
metadata !DIExpression()), !dbg !64
%4 = load i64, i64* %for_index, align 8, !dbg !65
%5 = icmp eq i64 %4, 1, !dbg !67
br i1 %5, label %Then, label %Else, !dbg !67
Then: ; preds = %ForBody
%6 = bitcast %Bar* %3 to i8*, !dbg !68
%7 = bitcast %Bar* %x to i8*, !dbg !68
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %7, i8* align 4 %6,
i64 8, i1 false), !dbg !68
br label %ForEnd, !dbg !69
Else: ; preds = %ForBody
%8 = add nuw i64 %1, 1, !dbg !61
store i64 %8, i64* %for_index, align 8, !dbg !61
br label %ForCond, !dbg !61
ForElse: ; preds = %ForCond
call fastcc void @bar2(%Bar* sret %x), !dbg !70
br label %ForEnd, !dbg !71
ForEnd: ; preds = %ForElse, %Then
call void @llvm.dbg.declare(metadata %Bar* %x, metadata !59,
metadata !DIExpression()), !dbg !72
ret void, !dbg !73
}
; Function Attrs: argmemonly nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #3
; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8*
nocapture readonly, i64, i1) #3
; Function Attrs: nobuiltin nounwind
define internal fastcc void @bar2(%Bar* nonnull sret) unnamed_addr #2 !dbg !74 {
Entry:
call fastcc void @bar(%Bar* sret %0), !dbg !79
ret void, !dbg !81
}
; Function Attrs: nobuiltin nounwind
define internal fastcc void @bar(%Bar* nonnull sret) unnamed_addr #2 !dbg !82 {
Entry:
%1 = bitcast %Bar* %0 to i8*, !dbg !83
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4
bitcast (%Bar* @0 to i8*), i64 8, i1 false), !dbg !83
ret void, !dbg !83
}
Here's the optimized IR, an endless loop:
define void @entry() local_unnamed_addr #0 !dbg !8 {
Entry:
call void @llvm.dbg.value(metadata i64 0, metadata !24, metadata
!DIExpression()), !dbg !30
br label %ForCond, !dbg !31
ForCond: ; preds = %Else, %Entry
%for_index.0 = phi i64 [ 0, %Entry ], [ %0, %Else ], !dbg !31
call void @llvm.dbg.value(metadata i64 %for_index.0, metadata !24,
metadata !DIExpression()), !dbg !30
switch i64 %for_index.0, label %Else [
Else: ; preds = %ForCond
%0 = add nuw i64 %for_index.0, 1, !dbg !31
call void @llvm.dbg.value(metadata i64 %0, metadata !24, metadata
!DIExpression()), !dbg !30
br label %ForCond, !dbg !31
ForEnd: ; preds = %ForCond, %ForCond
ret void, !dbg !33
}
I don't see how this follows from the original source. Is it a bug?
Andrew Kelley via llvm-dev
2018-Nov-23 20:47 UTC
[llvm-dev] is this a bug in an optimization pass?
Here's a godbolt link: https://godbolt.org/z/BaWZrM
And here's the expected @entry function after optimization:
define void @entry() local_unnamed_addr #0 {
Entry:
ret void
}
On Fri, Nov 23, 2018 at 3:32 PM Andrew Kelley <superjoe30 at gmail.com>
wrote:>
> The frontend code is a pretty simple for loop, that counts from i = 0;
> i != 10; i += 1
> It gets optimized into and endless loop.
>
> export fn entry() void {
> var array: [10]Bar = undefined;
> var x = for (array) |elem, i| {
> if (i == 1) break elem;
> } else bar2();
> }
>
> Here's the generated IR:
>
> ; ModuleID = 'test'
> source_filename = "test"
> target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-unknown-linux-gnu"
>
> %Bar = type { i32, i32 }
> %"[]u8" = type { i8*, i64 }
> %StackTrace = type { i64, %"[]usize" }
> %"[]usize" = type { i64*, i64 }
>
> @0 = internal unnamed_addr constant %Bar { i32 3, i32 4 }, align 4
>
> ; Function Attrs: nounwind readnone speculatable
> declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
>
> ; Function Attrs: nobuiltin nounwind
> define void @entry() #2 !dbg !41 {
> Entry:
> %array = alloca [10 x %Bar], align 4
> %x = alloca %Bar, align 4
> %for_index = alloca i64, align 8
> %0 = bitcast [10 x %Bar]* %array to i8*, !dbg !60
> call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 -86, i64 80, i1
> false), !dbg !60
> call void @llvm.dbg.declare(metadata [10 x %Bar]* %array, metadata
> !45, metadata !DIExpression()), !dbg !60
> store i64 0, i64* %for_index, align 8, !dbg !61
> call void @llvm.dbg.declare(metadata i64* %for_index, metadata !55,
> metadata !DIExpression()), !dbg !63
> br label %ForCond, !dbg !61
>
> ForCond: ; preds = %Else, %Entry
> %1 = load i64, i64* %for_index, align 8, !dbg !61
> %2 = icmp ne i64 %1, 10, !dbg !61
> br i1 %2, label %ForBody, label %ForElse, !dbg !61
>
> ForBody: ; preds = %ForCond
> %3 = getelementptr inbounds [10 x %Bar], [10 x %Bar]* %array, i64 0,
> i64 %1, !dbg !61
> call void @llvm.dbg.declare(metadata %Bar* %3, metadata !58,
> metadata !DIExpression()), !dbg !64
> %4 = load i64, i64* %for_index, align 8, !dbg !65
> %5 = icmp eq i64 %4, 1, !dbg !67
> br i1 %5, label %Then, label %Else, !dbg !67
>
> Then: ; preds = %ForBody
> %6 = bitcast %Bar* %3 to i8*, !dbg !68
> %7 = bitcast %Bar* %x to i8*, !dbg !68
> call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %7, i8* align 4 %6,
> i64 8, i1 false), !dbg !68
> br label %ForEnd, !dbg !69
>
> Else: ; preds = %ForBody
> %8 = add nuw i64 %1, 1, !dbg !61
> store i64 %8, i64* %for_index, align 8, !dbg !61
> br label %ForCond, !dbg !61
>
> ForElse: ; preds = %ForCond
> call fastcc void @bar2(%Bar* sret %x), !dbg !70
> br label %ForEnd, !dbg !71
>
> ForEnd: ; preds = %ForElse, %Then
> call void @llvm.dbg.declare(metadata %Bar* %x, metadata !59,
> metadata !DIExpression()), !dbg !72
> ret void, !dbg !73
> }
>
> ; Function Attrs: argmemonly nounwind
> declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #3
>
> ; Function Attrs: argmemonly nounwind
> declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8*
> nocapture readonly, i64, i1) #3
>
> ; Function Attrs: nobuiltin nounwind
> define internal fastcc void @bar2(%Bar* nonnull sret) unnamed_addr #2 !dbg
!74 {
> Entry:
> call fastcc void @bar(%Bar* sret %0), !dbg !79
> ret void, !dbg !81
> }
>
> ; Function Attrs: nobuiltin nounwind
> define internal fastcc void @bar(%Bar* nonnull sret) unnamed_addr #2 !dbg
!82 {
> Entry:
> %1 = bitcast %Bar* %0 to i8*, !dbg !83
> call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4
> bitcast (%Bar* @0 to i8*), i64 8, i1 false), !dbg !83
> ret void, !dbg !83
> }
>
>
> Here's the optimized IR, an endless loop:
>
> define void @entry() local_unnamed_addr #0 !dbg !8 {
> Entry:
> call void @llvm.dbg.value(metadata i64 0, metadata !24, metadata
> !DIExpression()), !dbg !30
> br label %ForCond, !dbg !31
>
> ForCond: ; preds = %Else, %Entry
> %for_index.0 = phi i64 [ 0, %Entry ], [ %0, %Else ], !dbg !31
> call void @llvm.dbg.value(metadata i64 %for_index.0, metadata !24,
> metadata !DIExpression()), !dbg !30
> switch i64 %for_index.0, label %Else [
>
> Else: ; preds = %ForCond
> %0 = add nuw i64 %for_index.0, 1, !dbg !31
> call void @llvm.dbg.value(metadata i64 %0, metadata !24, metadata
> !DIExpression()), !dbg !30
> br label %ForCond, !dbg !31
>
> ForEnd: ; preds = %ForCond,
%ForCond
> ret void, !dbg !33
> }
>
>
> I don't see how this follows from the original source. Is it a bug?
Andrew Kelley via llvm-dev
2018-Nov-23 22:10 UTC
[llvm-dev] is this a bug in an optimization pass?
2 things
1. I misread the LLVM IR. It's not an infinite loop, it's just a
missed optimization.
2. I can reproduce this with a dead simple C program:
void entry(void) {
for (unsigned i = 0; i != 10; i += 1) {
if (i == 1) break;
}
}
https://godbolt.org/z/m3sjkA
On Fri, Nov 23, 2018 at 3:47 PM Andrew Kelley <superjoe30 at gmail.com>
wrote:>
> Here's a godbolt link: https://godbolt.org/z/BaWZrM
> And here's the expected @entry function after optimization:
>
> define void @entry() local_unnamed_addr #0 {
> Entry:
> ret void
> }
>
>
>
> On Fri, Nov 23, 2018 at 3:32 PM Andrew Kelley <superjoe30 at
gmail.com> wrote:
> >
> > The frontend code is a pretty simple for loop, that counts from i = 0;
> > i != 10; i += 1
> > It gets optimized into and endless loop.
> >
> > export fn entry() void {
> > var array: [10]Bar = undefined;
> > var x = for (array) |elem, i| {
> > if (i == 1) break elem;
> > } else bar2();
> > }
> >
> > Here's the generated IR:
> >
> > ; ModuleID = 'test'
> > source_filename = "test"
> > target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
> > target triple = "x86_64-unknown-linux-gnu"
> >
> > %Bar = type { i32, i32 }
> > %"[]u8" = type { i8*, i64 }
> > %StackTrace = type { i64, %"[]usize" }
> > %"[]usize" = type { i64*, i64 }
> >
> > @0 = internal unnamed_addr constant %Bar { i32 3, i32 4 }, align 4
> >
> > ; Function Attrs: nounwind readnone speculatable
> > declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
> >
> > ; Function Attrs: nobuiltin nounwind
> > define void @entry() #2 !dbg !41 {
> > Entry:
> > %array = alloca [10 x %Bar], align 4
> > %x = alloca %Bar, align 4
> > %for_index = alloca i64, align 8
> > %0 = bitcast [10 x %Bar]* %array to i8*, !dbg !60
> > call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 -86, i64 80, i1
> > false), !dbg !60
> > call void @llvm.dbg.declare(metadata [10 x %Bar]* %array, metadata
> > !45, metadata !DIExpression()), !dbg !60
> > store i64 0, i64* %for_index, align 8, !dbg !61
> > call void @llvm.dbg.declare(metadata i64* %for_index, metadata !55,
> > metadata !DIExpression()), !dbg !63
> > br label %ForCond, !dbg !61
> >
> > ForCond: ; preds = %Else,
%Entry
> > %1 = load i64, i64* %for_index, align 8, !dbg !61
> > %2 = icmp ne i64 %1, 10, !dbg !61
> > br i1 %2, label %ForBody, label %ForElse, !dbg !61
> >
> > ForBody: ; preds = %ForCond
> > %3 = getelementptr inbounds [10 x %Bar], [10 x %Bar]* %array, i64 0,
> > i64 %1, !dbg !61
> > call void @llvm.dbg.declare(metadata %Bar* %3, metadata !58,
> > metadata !DIExpression()), !dbg !64
> > %4 = load i64, i64* %for_index, align 8, !dbg !65
> > %5 = icmp eq i64 %4, 1, !dbg !67
> > br i1 %5, label %Then, label %Else, !dbg !67
> >
> > Then: ; preds = %ForBody
> > %6 = bitcast %Bar* %3 to i8*, !dbg !68
> > %7 = bitcast %Bar* %x to i8*, !dbg !68
> > call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %7, i8* align 4 %6,
> > i64 8, i1 false), !dbg !68
> > br label %ForEnd, !dbg !69
> >
> > Else: ; preds = %ForBody
> > %8 = add nuw i64 %1, 1, !dbg !61
> > store i64 %8, i64* %for_index, align 8, !dbg !61
> > br label %ForCond, !dbg !61
> >
> > ForElse: ; preds = %ForCond
> > call fastcc void @bar2(%Bar* sret %x), !dbg !70
> > br label %ForEnd, !dbg !71
> >
> > ForEnd: ; preds = %ForElse,
%Then
> > call void @llvm.dbg.declare(metadata %Bar* %x, metadata !59,
> > metadata !DIExpression()), !dbg !72
> > ret void, !dbg !73
> > }
> >
> > ; Function Attrs: argmemonly nounwind
> > declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64,
i1) #3
> >
> > ; Function Attrs: argmemonly nounwind
> > declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8*
> > nocapture readonly, i64, i1) #3
> >
> > ; Function Attrs: nobuiltin nounwind
> > define internal fastcc void @bar2(%Bar* nonnull sret) unnamed_addr #2
!dbg !74 {
> > Entry:
> > call fastcc void @bar(%Bar* sret %0), !dbg !79
> > ret void, !dbg !81
> > }
> >
> > ; Function Attrs: nobuiltin nounwind
> > define internal fastcc void @bar(%Bar* nonnull sret) unnamed_addr #2
!dbg !82 {
> > Entry:
> > %1 = bitcast %Bar* %0 to i8*, !dbg !83
> > call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4
> > bitcast (%Bar* @0 to i8*), i64 8, i1 false), !dbg !83
> > ret void, !dbg !83
> > }
> >
> >
> > Here's the optimized IR, an endless loop:
> >
> > define void @entry() local_unnamed_addr #0 !dbg !8 {
> > Entry:
> > call void @llvm.dbg.value(metadata i64 0, metadata !24, metadata
> > !DIExpression()), !dbg !30
> > br label %ForCond, !dbg !31
> >
> > ForCond: ; preds = %Else,
%Entry
> > %for_index.0 = phi i64 [ 0, %Entry ], [ %0, %Else ], !dbg !31
> > call void @llvm.dbg.value(metadata i64 %for_index.0, metadata !24,
> > metadata !DIExpression()), !dbg !30
> > switch i64 %for_index.0, label %Else [
> >
> > Else: ; preds = %ForCond
> > %0 = add nuw i64 %for_index.0, 1, !dbg !31
> > call void @llvm.dbg.value(metadata i64 %0, metadata !24, metadata
> > !DIExpression()), !dbg !30
> > br label %ForCond, !dbg !31
> >
> > ForEnd: ; preds = %ForCond,
%ForCond
> > ret void, !dbg !33
> > }
> >
> >
> > I don't see how this follows from the original source. Is it a
bug?
Apparently Analagous Threads
- [LLVMdev] Additional Optimization I'm Missing?
- [LLVMdev] LICM/store-aliasing of global loads
- can debug info for coroutines be improved?
- Question on Aliasing and invariant load hoisting
- [Debuginfo] Changing llvm.dbg.value and DBG_VALUE to support multiple location operands