Richard Kenner via llvm-dev
2021-Feb-17 23:57 UTC
[llvm-dev] Potentially unsafe loop optimization
I'm seeing a case where an add instruction with "nuw" is moved to
what
I think is an unsafe place and I'm wondering if there really is a bug
here. This comes from the small Ada code:
PROCEDURE C26006A IS
procedure C_abort with Import, Convention => C,
External_Name => "abort";
S1 : STRING (1..3) := "A 1";
S2 : STRING (1..3) := "A 2";
BEGIN
FOR C IN CHARACTER'FIRST .. CHARACTER'LAST LOOP
S1 (2) := C;
S2 (2) := C;
if S1 = S2 then
C_Abort;
end if;
END LOOP;
END C26006A;
and the issue is the loop, which is from (interpreting the i8 as unsigned)
0 to 255 (-1).
The loop end test looks like:
loop.cond.iter: ; preds = %8
%6 = load i8, i8* %c, align 1, !tbaa !5
%loop.iter.cond = icmp eq i8 %6, -1
br i1 %loop.iter.cond, label %loop.exit, label %loop.iter
loop.iter: ; preds = %loop.cond.iter
%next.loop.var = add nuw i8 %6, 1
store i8 %next.loop.var, i8* %c, align 1, !tbaa !5
br label %loop.cond
This looks correct. We do the add after the conditional branch, so
we'll never see it overflow.
But when we optimize with -O2 on this, we get:
loop.cond.iter: ; preds = %loop.cond, %3
%loop.iter.cond = icmp eq i8 %c.0, -1
%next.loop.var = add nuw i8 %c.0, 1
br i1 %loop.iter.cond, label %loop.exit, label %loop.cond
But now the add is done unconditionally, whatever the result of the
comparison. That means that in the case where the icmp is true, we
execute an undefined instruction. When we generate code from this, the
code generator notices that, eliminates the test, and makes this an
an conditional jump back, with the relevant code being:
.LBB0_3: # %loop.cond.iter
# in Loop: Header=BB0_1 Depth=1
incb %bl
.LBB0_1: # %loop.cond
# =>This Inner Loop Header: Depth=1
movb %bl, 17(%rsp)
movb %bl, 1(%rsp)
movzwl (%rsp), %eax
xorw 16(%rsp), %ax
movzbl 2(%rsp), %ecx
xorb 18(%rsp), %cl
movzbl %cl, %ecx
orw %ax, %cx
jne .LBB0_3
# %bb.2: # in Loop: Header=BB0_1 Depth=1
callq abort
jmp .LBB0_3
I think the initial IR is correct, but the loop optimization here is
unsafe: if it wants to promote the add instruction, it needs to remove
any "nsw" or "nuw". Does that sound correct?
The full .ll is below:
; ModuleID = 'c26006a.adb'
source_filename = "c26006a.adb"
target datalayout =
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @_ada_c26006a() {
entry:
%s1 = alloca { { i32, i32 }, [3 x i8] }, align 4
%s2 = alloca { { i32, i32 }, [3 x i8] }, align 4
%c = alloca i8, align 1
%0 = bitcast { { i32, i32 }, [3 x i8] }* %s1 to [3 x i8]*
store [3 x i8] c"A 1", [3 x i8]* %0, align 4, !tbaa !0
%1 = bitcast { { i32, i32 }, [3 x i8] }* %s2 to [3 x i8]*
store [3 x i8] c"A 2", [3 x i8]* %1, align 4, !tbaa !0
store i8 0, i8* %c, align 1, !tbaa !5
br i1 true, label %loop.cond, label %loop.exit
loop.cond: ; preds = %loop.iter, %entry
%2 = load i8, i8* %c, align 1, !tbaa !5
%3 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i64 0, i32 1
store i8 %2, i8* %3, align 1, !tbaa !8
%4 = load i8, i8* %c, align 1, !tbaa !5
%5 = getelementptr inbounds [3 x i8], [3 x i8]* %1, i64 0, i32 1
store i8 %4, i8* %5, align 1, !tbaa !8
br i1 false, label %rhs.has.0.dim, label %normal.tests
loop.iter: ; preds = %loop.cond.iter
%next.loop.var = add nuw i8 %6, 1
store i8 %next.loop.var, i8* %c, align 1, !tbaa !5
br label %loop.cond
loop.exit: ; preds = %loop.cond.iter,
%entry
ret void
loop.cond.iter: ; preds = %8
%6 = load i8, i8* %c, align 1, !tbaa !5
%loop.iter.cond = icmp eq i8 %6, -1
br i1 %loop.iter.cond, label %loop.exit, label %loop.iter
7: ; preds = %9, %rhs.has.0.dim
call void @abort()
br label %8
8: ; preds = %7, %9,
%normal.tests
br label %loop.cond.iter
rhs.has.0.dim: ; preds = %loop.cond
br i1 false, label %7, label %normal.tests
normal.tests: ; preds = %rhs.has.0.dim,
%loop.cond
br i1 true, label %9, label %8
9: ; preds = %normal.tests
%10 = bitcast [3 x i8]* %1 to i8*
%11 = bitcast [3 x i8]* %0 to i8*
%12 = call i32 @memcmp(i8* align 4 %10, i8* align 4 %11, i64 3)
%13 = icmp eq i32 %12, 0
br i1 %13, label %7, label %8
}
; Function Attrs: nounwind
declare dso_local i32 @memcmp(i8* nocapture nonnull readonly, i8* nocapture
nonnull readonly, i64) #0
declare void @abort()
attributes #0 = { nounwind }
!0 = !{!1, !1, i64 0, i64 3}
!1 = !{!2, i64 3, !"c26006a__T2b#TN#AD", !3, i64 0, i64 1, !3, i64 1,
i64 1, !3, i64 2, i64 1}
!2 = !{!"Ada Root"}
!3 = !{!4, i64 1, !"character#T0"}
!4 = !{!2, i64 1, !"character#TN"}
!5 = !{!6, !6, i64 0, i64 1}
!6 = !{!7, i64 1, !"T4b#T4"}
!7 = !{!4, i64 1, !"T4b#TN"}
!8 = !{!3, !3, i64 0, i64 1}
Richard Kenner via llvm-dev
2021-Feb-18 00:21 UTC
[llvm-dev] Potentially unsafe loop optimization
Let me follow up on my own message: it seems that even if there's no "nuw" in the original IR, the optimizer generates one, which seems very odd.
Michael Kruse via llvm-dev
2021-Feb-18 00:35 UTC
[llvm-dev] Potentially unsafe loop optimization
Executing the add instruction that results in an unsigned wrap despite the nuw flag is not an undefined instruction, but results in a 'poison' value. Only certain uses of a poison value results in undefined behaviour. See https://llvm.org/docs/LangRef.html#poison-values for more information. Am Mi., 17. Feb. 2021 um 17:58 Uhr schrieb Richard Kenner via llvm-dev <llvm-dev at lists.llvm.org>:> The loop end test looks like:> > loop.cond.iter: ; preds = %8 > %6 = load i8, i8* %c, align 1, !tbaa !5 > %loop.iter.cond = icmp eq i8 %6, -1 > br i1 %loop.iter.cond, label %loop.exit, label %loop.iter > > loop.iter: ; preds = %loop.cond.iter > %next.loop.var = add nuw i8 %6, 1 > store i8 %next.loop.var, i8* %c, align 1, !tbaa !5 > br label %loop.cond > > This looks correct. We do the add after the conditional branch, so > we'll never see it overflow.I think this loop already has undefined behaviour. If %6 eventually loads the value 255, %next.loop.var will be poison. Storing poison to memory is not itself undefined, but write an undefined value. Michael
Johannes Doerfert via llvm-dev
2021-Feb-18 00:38 UTC
[llvm-dev] Potentially unsafe loop optimization
Executing the addition is not a problem by itself. Adding two values that causes an overflow when there is a nuw results in a poison value. We never use that value because it is only poison if loop.iter.cond is ture and we exit the loop. Long story short, from what I can see there is no miscompilation or change in semantics for that matter. ~ Johannes On 2/17/21 5:57 PM, Richard Kenner via llvm-dev wrote:> I'm seeing a case where an add instruction with "nuw" is moved to what > I think is an unsafe place and I'm wondering if there really is a bug > here. This comes from the small Ada code: > > PROCEDURE C26006A IS > > procedure C_abort with Import, Convention => C, > External_Name => "abort"; > > S1 : STRING (1..3) := "A 1"; > S2 : STRING (1..3) := "A 2"; > > BEGIN > FOR C IN CHARACTER'FIRST .. CHARACTER'LAST LOOP > S1 (2) := C; > S2 (2) := C; > if S1 = S2 then > C_Abort; > end if; > END LOOP; > END C26006A; > > and the issue is the loop, which is from (interpreting the i8 as unsigned) > 0 to 255 (-1). > > The loop end test looks like: > > loop.cond.iter: ; preds = %8 > %6 = load i8, i8* %c, align 1, !tbaa !5 > %loop.iter.cond = icmp eq i8 %6, -1 > br i1 %loop.iter.cond, label %loop.exit, label %loop.iter > > loop.iter: ; preds = %loop.cond.iter > %next.loop.var = add nuw i8 %6, 1 > store i8 %next.loop.var, i8* %c, align 1, !tbaa !5 > br label %loop.cond > > This looks correct. We do the add after the conditional branch, so > we'll never see it overflow. > > But when we optimize with -O2 on this, we get: > > loop.cond.iter: ; preds = %loop.cond, %3 > %loop.iter.cond = icmp eq i8 %c.0, -1 > %next.loop.var = add nuw i8 %c.0, 1 > br i1 %loop.iter.cond, label %loop.exit, label %loop.cond > > But now the add is done unconditionally, whatever the result of the > comparison. That means that in the case where the icmp is true, we > execute an undefined instruction. When we generate code from this, the > code generator notices that, eliminates the test, and makes this an > an conditional jump back, with the relevant code being: > > .LBB0_3: # %loop.cond.iter > # in Loop: Header=BB0_1 Depth=1 > incb %bl > .LBB0_1: # %loop.cond > # =>This Inner Loop Header: Depth=1 > movb %bl, 17(%rsp) > movb %bl, 1(%rsp) > movzwl (%rsp), %eax > xorw 16(%rsp), %ax > movzbl 2(%rsp), %ecx > xorb 18(%rsp), %cl > movzbl %cl, %ecx > orw %ax, %cx > jne .LBB0_3 > # %bb.2: # in Loop: Header=BB0_1 Depth=1 > callq abort > jmp .LBB0_3 > > I think the initial IR is correct, but the loop optimization here is > unsafe: if it wants to promote the add instruction, it needs to remove > any "nsw" or "nuw". Does that sound correct? > > The full .ll is below: > > ; ModuleID = 'c26006a.adb' > source_filename = "c26006a.adb" > target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 > :32:64-S128" > target triple = "x86_64-unknown-linux-gnu" > > define void @_ada_c26006a() { > entry: > %s1 = alloca { { i32, i32 }, [3 x i8] }, align 4 > %s2 = alloca { { i32, i32 }, [3 x i8] }, align 4 > %c = alloca i8, align 1 > %0 = bitcast { { i32, i32 }, [3 x i8] }* %s1 to [3 x i8]* > store [3 x i8] c"A 1", [3 x i8]* %0, align 4, !tbaa !0 > %1 = bitcast { { i32, i32 }, [3 x i8] }* %s2 to [3 x i8]* > store [3 x i8] c"A 2", [3 x i8]* %1, align 4, !tbaa !0 > store i8 0, i8* %c, align 1, !tbaa !5 > br i1 true, label %loop.cond, label %loop.exit > > loop.cond: ; preds = %loop.iter, %entry > %2 = load i8, i8* %c, align 1, !tbaa !5 > %3 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i64 0, i32 1 > store i8 %2, i8* %3, align 1, !tbaa !8 > %4 = load i8, i8* %c, align 1, !tbaa !5 > %5 = getelementptr inbounds [3 x i8], [3 x i8]* %1, i64 0, i32 1 > store i8 %4, i8* %5, align 1, !tbaa !8 > br i1 false, label %rhs.has.0.dim, label %normal.tests > > loop.iter: ; preds = %loop.cond.iter > %next.loop.var = add nuw i8 %6, 1 > store i8 %next.loop.var, i8* %c, align 1, !tbaa !5 > br label %loop.cond > > loop.exit: ; preds = %loop.cond.iter, %entry > ret void > > loop.cond.iter: ; preds = %8 > %6 = load i8, i8* %c, align 1, !tbaa !5 > %loop.iter.cond = icmp eq i8 %6, -1 > br i1 %loop.iter.cond, label %loop.exit, label %loop.iter > > 7: ; preds = %9, %rhs.has.0.dim > call void @abort() > br label %8 > > 8: ; preds = %7, %9, %normal.tests > br label %loop.cond.iter > > rhs.has.0.dim: ; preds = %loop.cond > br i1 false, label %7, label %normal.tests > > normal.tests: ; preds = %rhs.has.0.dim, %loop.cond > br i1 true, label %9, label %8 > > 9: ; preds = %normal.tests > %10 = bitcast [3 x i8]* %1 to i8* > %11 = bitcast [3 x i8]* %0 to i8* > %12 = call i32 @memcmp(i8* align 4 %10, i8* align 4 %11, i64 3) > %13 = icmp eq i32 %12, 0 > br i1 %13, label %7, label %8 > } > > ; Function Attrs: nounwind > declare dso_local i32 @memcmp(i8* nocapture nonnull readonly, i8* nocapture nonnull readonly, i64) #0 > > declare void @abort() > > attributes #0 = { nounwind } > > !0 = !{!1, !1, i64 0, i64 3} > !1 = !{!2, i64 3, !"c26006a__T2b#TN#AD", !3, i64 0, i64 1, !3, i64 1, i64 1, !3, i64 2, i64 1} > !2 = !{!"Ada Root"} > !3 = !{!4, i64 1, !"character#T0"} > !4 = !{!2, i64 1, !"character#TN"} > !5 = !{!6, !6, i64 0, i64 1} > !6 = !{!7, i64 1, !"T4b#T4"} > !7 = !{!4, i64 1, !"T4b#TN"} > !8 = !{!3, !3, i64 0, i64 1} > _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org > https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev