Richard Kenner via llvm-dev
2021-Feb-17 23:57 UTC
[llvm-dev] Potentially unsafe loop optimization
I'm seeing a case where an add instruction with "nuw" is moved to what I think is an unsafe place and I'm wondering if there really is a bug here. This comes from the small Ada code: PROCEDURE C26006A IS procedure C_abort with Import, Convention => C, External_Name => "abort"; S1 : STRING (1..3) := "A 1"; S2 : STRING (1..3) := "A 2"; BEGIN FOR C IN CHARACTER'FIRST .. CHARACTER'LAST LOOP S1 (2) := C; S2 (2) := C; if S1 = S2 then C_Abort; end if; END LOOP; END C26006A; and the issue is the loop, which is from (interpreting the i8 as unsigned) 0 to 255 (-1). The loop end test looks like: loop.cond.iter: ; preds = %8 %6 = load i8, i8* %c, align 1, !tbaa !5 %loop.iter.cond = icmp eq i8 %6, -1 br i1 %loop.iter.cond, label %loop.exit, label %loop.iter loop.iter: ; preds = %loop.cond.iter %next.loop.var = add nuw i8 %6, 1 store i8 %next.loop.var, i8* %c, align 1, !tbaa !5 br label %loop.cond This looks correct. We do the add after the conditional branch, so we'll never see it overflow. But when we optimize with -O2 on this, we get: loop.cond.iter: ; preds = %loop.cond, %3 %loop.iter.cond = icmp eq i8 %c.0, -1 %next.loop.var = add nuw i8 %c.0, 1 br i1 %loop.iter.cond, label %loop.exit, label %loop.cond But now the add is done unconditionally, whatever the result of the comparison. That means that in the case where the icmp is true, we execute an undefined instruction. When we generate code from this, the code generator notices that, eliminates the test, and makes this an an conditional jump back, with the relevant code being: .LBB0_3: # %loop.cond.iter # in Loop: Header=BB0_1 Depth=1 incb %bl .LBB0_1: # %loop.cond # =>This Inner Loop Header: Depth=1 movb %bl, 17(%rsp) movb %bl, 1(%rsp) movzwl (%rsp), %eax xorw 16(%rsp), %ax movzbl 2(%rsp), %ecx xorb 18(%rsp), %cl movzbl %cl, %ecx orw %ax, %cx jne .LBB0_3 # %bb.2: # in Loop: Header=BB0_1 Depth=1 callq abort jmp .LBB0_3 I think the initial IR is correct, but the loop optimization here is unsafe: if it wants to promote the add instruction, it needs to remove any "nsw" or "nuw". Does that sound correct? The full .ll is below: ; ModuleID = 'c26006a.adb' source_filename = "c26006a.adb" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 :32:64-S128" target triple = "x86_64-unknown-linux-gnu" define void @_ada_c26006a() { entry: %s1 = alloca { { i32, i32 }, [3 x i8] }, align 4 %s2 = alloca { { i32, i32 }, [3 x i8] }, align 4 %c = alloca i8, align 1 %0 = bitcast { { i32, i32 }, [3 x i8] }* %s1 to [3 x i8]* store [3 x i8] c"A 1", [3 x i8]* %0, align 4, !tbaa !0 %1 = bitcast { { i32, i32 }, [3 x i8] }* %s2 to [3 x i8]* store [3 x i8] c"A 2", [3 x i8]* %1, align 4, !tbaa !0 store i8 0, i8* %c, align 1, !tbaa !5 br i1 true, label %loop.cond, label %loop.exit loop.cond: ; preds = %loop.iter, %entry %2 = load i8, i8* %c, align 1, !tbaa !5 %3 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i64 0, i32 1 store i8 %2, i8* %3, align 1, !tbaa !8 %4 = load i8, i8* %c, align 1, !tbaa !5 %5 = getelementptr inbounds [3 x i8], [3 x i8]* %1, i64 0, i32 1 store i8 %4, i8* %5, align 1, !tbaa !8 br i1 false, label %rhs.has.0.dim, label %normal.tests loop.iter: ; preds = %loop.cond.iter %next.loop.var = add nuw i8 %6, 1 store i8 %next.loop.var, i8* %c, align 1, !tbaa !5 br label %loop.cond loop.exit: ; preds = %loop.cond.iter, %entry ret void loop.cond.iter: ; preds = %8 %6 = load i8, i8* %c, align 1, !tbaa !5 %loop.iter.cond = icmp eq i8 %6, -1 br i1 %loop.iter.cond, label %loop.exit, label %loop.iter 7: ; preds = %9, %rhs.has.0.dim call void @abort() br label %8 8: ; preds = %7, %9, %normal.tests br label %loop.cond.iter rhs.has.0.dim: ; preds = %loop.cond br i1 false, label %7, label %normal.tests normal.tests: ; preds = %rhs.has.0.dim, %loop.cond br i1 true, label %9, label %8 9: ; preds = %normal.tests %10 = bitcast [3 x i8]* %1 to i8* %11 = bitcast [3 x i8]* %0 to i8* %12 = call i32 @memcmp(i8* align 4 %10, i8* align 4 %11, i64 3) %13 = icmp eq i32 %12, 0 br i1 %13, label %7, label %8 } ; Function Attrs: nounwind declare dso_local i32 @memcmp(i8* nocapture nonnull readonly, i8* nocapture nonnull readonly, i64) #0 declare void @abort() attributes #0 = { nounwind } !0 = !{!1, !1, i64 0, i64 3} !1 = !{!2, i64 3, !"c26006a__T2b#TN#AD", !3, i64 0, i64 1, !3, i64 1, i64 1, !3, i64 2, i64 1} !2 = !{!"Ada Root"} !3 = !{!4, i64 1, !"character#T0"} !4 = !{!2, i64 1, !"character#TN"} !5 = !{!6, !6, i64 0, i64 1} !6 = !{!7, i64 1, !"T4b#T4"} !7 = !{!4, i64 1, !"T4b#TN"} !8 = !{!3, !3, i64 0, i64 1}
Richard Kenner via llvm-dev
2021-Feb-18 00:21 UTC
[llvm-dev] Potentially unsafe loop optimization
Let me follow up on my own message: it seems that even if there's no "nuw" in the original IR, the optimizer generates one, which seems very odd.
Michael Kruse via llvm-dev
2021-Feb-18 00:35 UTC
[llvm-dev] Potentially unsafe loop optimization
Executing the add instruction that results in an unsigned wrap despite the nuw flag is not an undefined instruction, but results in a 'poison' value. Only certain uses of a poison value results in undefined behaviour. See https://llvm.org/docs/LangRef.html#poison-values for more information. Am Mi., 17. Feb. 2021 um 17:58 Uhr schrieb Richard Kenner via llvm-dev <llvm-dev at lists.llvm.org>:> The loop end test looks like:> > loop.cond.iter: ; preds = %8 > %6 = load i8, i8* %c, align 1, !tbaa !5 > %loop.iter.cond = icmp eq i8 %6, -1 > br i1 %loop.iter.cond, label %loop.exit, label %loop.iter > > loop.iter: ; preds = %loop.cond.iter > %next.loop.var = add nuw i8 %6, 1 > store i8 %next.loop.var, i8* %c, align 1, !tbaa !5 > br label %loop.cond > > This looks correct. We do the add after the conditional branch, so > we'll never see it overflow.I think this loop already has undefined behaviour. If %6 eventually loads the value 255, %next.loop.var will be poison. Storing poison to memory is not itself undefined, but write an undefined value. Michael
Johannes Doerfert via llvm-dev
2021-Feb-18 00:38 UTC
[llvm-dev] Potentially unsafe loop optimization
Executing the addition is not a problem by itself. Adding two values that causes an overflow when there is a nuw results in a poison value. We never use that value because it is only poison if loop.iter.cond is ture and we exit the loop. Long story short, from what I can see there is no miscompilation or change in semantics for that matter. ~ Johannes On 2/17/21 5:57 PM, Richard Kenner via llvm-dev wrote:> I'm seeing a case where an add instruction with "nuw" is moved to what > I think is an unsafe place and I'm wondering if there really is a bug > here. This comes from the small Ada code: > > PROCEDURE C26006A IS > > procedure C_abort with Import, Convention => C, > External_Name => "abort"; > > S1 : STRING (1..3) := "A 1"; > S2 : STRING (1..3) := "A 2"; > > BEGIN > FOR C IN CHARACTER'FIRST .. CHARACTER'LAST LOOP > S1 (2) := C; > S2 (2) := C; > if S1 = S2 then > C_Abort; > end if; > END LOOP; > END C26006A; > > and the issue is the loop, which is from (interpreting the i8 as unsigned) > 0 to 255 (-1). > > The loop end test looks like: > > loop.cond.iter: ; preds = %8 > %6 = load i8, i8* %c, align 1, !tbaa !5 > %loop.iter.cond = icmp eq i8 %6, -1 > br i1 %loop.iter.cond, label %loop.exit, label %loop.iter > > loop.iter: ; preds = %loop.cond.iter > %next.loop.var = add nuw i8 %6, 1 > store i8 %next.loop.var, i8* %c, align 1, !tbaa !5 > br label %loop.cond > > This looks correct. We do the add after the conditional branch, so > we'll never see it overflow. > > But when we optimize with -O2 on this, we get: > > loop.cond.iter: ; preds = %loop.cond, %3 > %loop.iter.cond = icmp eq i8 %c.0, -1 > %next.loop.var = add nuw i8 %c.0, 1 > br i1 %loop.iter.cond, label %loop.exit, label %loop.cond > > But now the add is done unconditionally, whatever the result of the > comparison. That means that in the case where the icmp is true, we > execute an undefined instruction. When we generate code from this, the > code generator notices that, eliminates the test, and makes this an > an conditional jump back, with the relevant code being: > > .LBB0_3: # %loop.cond.iter > # in Loop: Header=BB0_1 Depth=1 > incb %bl > .LBB0_1: # %loop.cond > # =>This Inner Loop Header: Depth=1 > movb %bl, 17(%rsp) > movb %bl, 1(%rsp) > movzwl (%rsp), %eax > xorw 16(%rsp), %ax > movzbl 2(%rsp), %ecx > xorb 18(%rsp), %cl > movzbl %cl, %ecx > orw %ax, %cx > jne .LBB0_3 > # %bb.2: # in Loop: Header=BB0_1 Depth=1 > callq abort > jmp .LBB0_3 > > I think the initial IR is correct, but the loop optimization here is > unsafe: if it wants to promote the add instruction, it needs to remove > any "nsw" or "nuw". Does that sound correct? > > The full .ll is below: > > ; ModuleID = 'c26006a.adb' > source_filename = "c26006a.adb" > target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 > :32:64-S128" > target triple = "x86_64-unknown-linux-gnu" > > define void @_ada_c26006a() { > entry: > %s1 = alloca { { i32, i32 }, [3 x i8] }, align 4 > %s2 = alloca { { i32, i32 }, [3 x i8] }, align 4 > %c = alloca i8, align 1 > %0 = bitcast { { i32, i32 }, [3 x i8] }* %s1 to [3 x i8]* > store [3 x i8] c"A 1", [3 x i8]* %0, align 4, !tbaa !0 > %1 = bitcast { { i32, i32 }, [3 x i8] }* %s2 to [3 x i8]* > store [3 x i8] c"A 2", [3 x i8]* %1, align 4, !tbaa !0 > store i8 0, i8* %c, align 1, !tbaa !5 > br i1 true, label %loop.cond, label %loop.exit > > loop.cond: ; preds = %loop.iter, %entry > %2 = load i8, i8* %c, align 1, !tbaa !5 > %3 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i64 0, i32 1 > store i8 %2, i8* %3, align 1, !tbaa !8 > %4 = load i8, i8* %c, align 1, !tbaa !5 > %5 = getelementptr inbounds [3 x i8], [3 x i8]* %1, i64 0, i32 1 > store i8 %4, i8* %5, align 1, !tbaa !8 > br i1 false, label %rhs.has.0.dim, label %normal.tests > > loop.iter: ; preds = %loop.cond.iter > %next.loop.var = add nuw i8 %6, 1 > store i8 %next.loop.var, i8* %c, align 1, !tbaa !5 > br label %loop.cond > > loop.exit: ; preds = %loop.cond.iter, %entry > ret void > > loop.cond.iter: ; preds = %8 > %6 = load i8, i8* %c, align 1, !tbaa !5 > %loop.iter.cond = icmp eq i8 %6, -1 > br i1 %loop.iter.cond, label %loop.exit, label %loop.iter > > 7: ; preds = %9, %rhs.has.0.dim > call void @abort() > br label %8 > > 8: ; preds = %7, %9, %normal.tests > br label %loop.cond.iter > > rhs.has.0.dim: ; preds = %loop.cond > br i1 false, label %7, label %normal.tests > > normal.tests: ; preds = %rhs.has.0.dim, %loop.cond > br i1 true, label %9, label %8 > > 9: ; preds = %normal.tests > %10 = bitcast [3 x i8]* %1 to i8* > %11 = bitcast [3 x i8]* %0 to i8* > %12 = call i32 @memcmp(i8* align 4 %10, i8* align 4 %11, i64 3) > %13 = icmp eq i32 %12, 0 > br i1 %13, label %7, label %8 > } > > ; Function Attrs: nounwind > declare dso_local i32 @memcmp(i8* nocapture nonnull readonly, i8* nocapture nonnull readonly, i64) #0 > > declare void @abort() > > attributes #0 = { nounwind } > > !0 = !{!1, !1, i64 0, i64 3} > !1 = !{!2, i64 3, !"c26006a__T2b#TN#AD", !3, i64 0, i64 1, !3, i64 1, i64 1, !3, i64 2, i64 1} > !2 = !{!"Ada Root"} > !3 = !{!4, i64 1, !"character#T0"} > !4 = !{!2, i64 1, !"character#TN"} > !5 = !{!6, !6, i64 0, i64 1} > !6 = !{!7, i64 1, !"T4b#T4"} > !7 = !{!4, i64 1, !"T4b#TN"} > !8 = !{!3, !3, i64 0, i64 1} > _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org > https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev