Ryan Taylor via llvm-dev
2021-Jan-14 17:27 UTC
[llvm-dev] GVN removing loads that are affected by call
This is right before GVN: define i32 @foo(<4 x i16> %p, <4 x i16> %p1, i16* nocapture %res) local_unnamed_addr #0 !dbg !6 { entry: %temp = alloca i64, align 8 %tmpcast = bitcast i64* %temp to [4 x i16]* %0 = bitcast i64* %temp to i8*, !dbg !8 call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0) #3, !dbg !8 store i64 0, i64* %temp, align 8, !dbg !9 %1 = bitcast i64* %temp to <4 x i16>*, !dbg !10 %2 = call <4 x i16>* @llvm.XXX.intrinsic(<4 x i16>* nonnull %1, <4 x i16> %p, i32 0), !dbg !11, !tbaa !12 %arrayidx = bitcast i64* %temp to i16*, !dbg !16 %3 = load i16, i16* %arrayidx, align 8, !dbg !16, !tbaa !17 br label %for.body, !dbg !19 for.body: ; preds = %entry %arrayidx1 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, i32 0, i32 1, !dbg !20 %4 = load i16, i16* %arrayidx1, align 2, !dbg !20, !tbaa !17 %cmp3 = icmp sgt i16 %3, %4, !dbg !21 %spec.select = select i1 %cmp3, i16 %4, i16 %3, !dbg !22 %arrayidx1.1 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, i32 0, i32 2, !dbg !20 %5 = load i16, i16* %arrayidx1.1, align 2, !dbg !20, !tbaa !17 %cmp3.1 = icmp sgt i16 %spec.select, %5, !dbg !21 %spec.select.1 = select i1 %cmp3.1, i16 %5, i16 %spec.select, !dbg !22 %arrayidx1.2 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, i32 0, i32 3, !dbg !20 %6 = load i16, i16* %arrayidx1.2, align 2, !dbg !20, !tbaa !17 %cmp3.2 = icmp sgt i16 %spec.select.1, %6, !dbg !21 %spec.select.2 = select i1 %cmp3.2, i16 %6, i16 %spec.select.1, !dbg !22 store i16 %spec.select.2, i16* %res, align 2, !dbg !23, !tbaa !17 %7 = tail call <4 x i16>* @llvm.XXX.intrinsic(<4 x i16>* %2, <4 x i16> %p1, i32 0), !dbg !24, !tbaa !12 %8 = load i16, i16* %arrayidx, align 8, !dbg !25, !tbaa !17 br label %for.body12, !dbg !26 for.body12: ; preds = %for.body %arrayidx14 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, i32 0, i32 1, !dbg !27 %9 = load i16, i16* %arrayidx14, align 2, !dbg !27, !tbaa !17 %cmp16 = icmp sgt i16 %8, %9, !dbg !28 %spec.select39 = select i1 %cmp16, i16 %9, i16 %8, !dbg !29 %arrayidx14.1 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, i32 0, i32 2, !dbg !27 %10 = load i16, i16* %arrayidx14.1, align 2, !dbg !27, !tbaa !17 %cmp16.1 = icmp sgt i16 %spec.select39, %10, !dbg !28 %spec.select39.1 = select i1 %cmp16.1, i16 %10, i16 %spec.select39, !dbg !29 %arrayidx14.2 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, i32 0, i32 3, !dbg !27 %11 = load i16, i16* %arrayidx14.2, align 2, !dbg !27, !tbaa !17 %cmp16.2 = icmp sgt i16 %spec.select39.1, %11, !dbg !28 %spec.select39.2 = select i1 %cmp16.2, i16 %11, i16 %spec.select39.1, !dbg !29 %conv24 = sext i16 %spec.select39.2 to i32, !dbg !30 call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) #3, !dbg !31 ret i32 %conv24, !dbg !32 On Thu, Jan 14, 2021 at 11:54 AM Roman Lebedev <lebedev.ri at gmail.com> wrote:> It would be good to have an actual IR reproducer here. > > On Thu, Jan 14, 2021 at 7:51 PM Ryan Taylor via llvm-dev > <llvm-dev at lists.llvm.org> wrote: > > > > So given an intrinsic that has a pointer as in/out and IntrWriteMem > property. > > > > call intrinsic(address a, ....); > > loop over address a > > load from address a + offset > > call intrinsic (address a, ...); > > loop over address a > > load from address a + offset > > > > GVN is removing the second loads, despite the second call overwriting > the memory starting at address a. AA has the intrinsics marked as unknown > instructions but has all of these as mayAlias in a set. I'm not seeing this > issue with -fno-unroll-loops. > > > > Thanks. > > > > > > > > > > _______________________________________________ > > LLVM Developers mailing list > > llvm-dev at lists.llvm.org > > https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20210114/50cc4201/attachment.html>
Johannes Doerfert via llvm-dev
2021-Jan-14 17:37 UTC
[llvm-dev] GVN removing loads that are affected by call
There is still not enough information here. My first guess. The `!tbaa` annotation on the `XXX.intrinsic` and the `load` basically encode there is no alias. Easy to verify, remove the ones on the intrinsic. ~ Johannes P.S. If this was a bug in GVN, and I assume it is not, a reproducer would help a lot. So a small IR sample that shows the problem and which we can run. This is a "redacted?" IR fragment in which I don't know what transformation is problematic. I also can not run it through GVN, which makes it impossible to reproduce. On 1/14/21 11:27 AM, Ryan Taylor via llvm-dev wrote:> This is right before GVN: > > define i32 @foo(<4 x i16> %p, <4 x i16> %p1, i16* nocapture %res) > local_unnamed_addr #0 !dbg !6 { > entry: > %temp = alloca i64, align 8 > %tmpcast = bitcast i64* %temp to [4 x i16]* > %0 = bitcast i64* %temp to i8*, !dbg !8 > call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0) #3, !dbg !8 > store i64 0, i64* %temp, align 8, !dbg !9 > %1 = bitcast i64* %temp to <4 x i16>*, !dbg !10 > %2 = call <4 x i16>* @llvm.XXX.intrinsic(<4 x i16>* nonnull %1, <4 x i16> > %p, i32 0), !dbg !11, !tbaa !12 > %arrayidx = bitcast i64* %temp to i16*, !dbg !16 > %3 = load i16, i16* %arrayidx, align 8, !dbg !16, !tbaa !17 > br label %for.body, !dbg !19 > > for.body: ; preds = %entry > %arrayidx1 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, i32 > 0, i32 1, !dbg !20 > %4 = load i16, i16* %arrayidx1, align 2, !dbg !20, !tbaa !17 > %cmp3 = icmp sgt i16 %3, %4, !dbg !21 > %spec.select = select i1 %cmp3, i16 %4, i16 %3, !dbg !22 > %arrayidx1.1 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, i32 > 0, i32 2, !dbg !20 > %5 = load i16, i16* %arrayidx1.1, align 2, !dbg !20, !tbaa !17 > %cmp3.1 = icmp sgt i16 %spec.select, %5, !dbg !21 > %spec.select.1 = select i1 %cmp3.1, i16 %5, i16 %spec.select, !dbg !22 > %arrayidx1.2 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, i32 > 0, i32 3, !dbg !20 > %6 = load i16, i16* %arrayidx1.2, align 2, !dbg !20, !tbaa !17 > %cmp3.2 = icmp sgt i16 %spec.select.1, %6, !dbg !21 > %spec.select.2 = select i1 %cmp3.2, i16 %6, i16 %spec.select.1, !dbg !22 > store i16 %spec.select.2, i16* %res, align 2, !dbg !23, !tbaa !17 > %7 = tail call <4 x i16>* @llvm.XXX.intrinsic(<4 x i16>* %2, <4 x i16> > %p1, i32 0), !dbg !24, !tbaa !12 > %8 = load i16, i16* %arrayidx, align 8, !dbg !25, !tbaa !17 > br label %for.body12, !dbg !26 > > for.body12: ; preds = %for.body > %arrayidx14 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, i32 > 0, i32 1, !dbg !27 > %9 = load i16, i16* %arrayidx14, align 2, !dbg !27, !tbaa !17 > %cmp16 = icmp sgt i16 %8, %9, !dbg !28 > %spec.select39 = select i1 %cmp16, i16 %9, i16 %8, !dbg !29 > %arrayidx14.1 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, > i32 0, i32 2, !dbg !27 > %10 = load i16, i16* %arrayidx14.1, align 2, !dbg !27, !tbaa !17 > %cmp16.1 = icmp sgt i16 %spec.select39, %10, !dbg !28 > %spec.select39.1 = select i1 %cmp16.1, i16 %10, i16 %spec.select39, !dbg > !29 > %arrayidx14.2 = getelementptr inbounds [4 x i16], [4 x i16]* %tmpcast, > i32 0, i32 3, !dbg !27 > %11 = load i16, i16* %arrayidx14.2, align 2, !dbg !27, !tbaa !17 > %cmp16.2 = icmp sgt i16 %spec.select39.1, %11, !dbg !28 > %spec.select39.2 = select i1 %cmp16.2, i16 %11, i16 %spec.select39.1, > !dbg !29 > %conv24 = sext i16 %spec.select39.2 to i32, !dbg !30 > call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) #3, !dbg !31 > ret i32 %conv24, !dbg !32 > > On Thu, Jan 14, 2021 at 11:54 AM Roman Lebedev <lebedev.ri at gmail.com> wrote: > >> It would be good to have an actual IR reproducer here. >> >> On Thu, Jan 14, 2021 at 7:51 PM Ryan Taylor via llvm-dev >> <llvm-dev at lists.llvm.org> wrote: >>> So given an intrinsic that has a pointer as in/out and IntrWriteMem >> property. >>> call intrinsic(address a, ....); >>> loop over address a >>> load from address a + offset >>> call intrinsic (address a, ...); >>> loop over address a >>> load from address a + offset >>> >>> GVN is removing the second loads, despite the second call overwriting >> the memory starting at address a. AA has the intrinsics marked as unknown >> instructions but has all of these as mayAlias in a set. I'm not seeing this >> issue with -fno-unroll-loops. >>> Thanks. >>> >>> >>> >>> >>> _______________________________________________ >>> LLVM Developers mailing list >>> llvm-dev at lists.llvm.org >>> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev > > _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org > https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev