Johannes Doerfert via llvm-dev
2021-Feb-18 00:38 UTC
[llvm-dev] Potentially unsafe loop optimization
Executing the addition is not a problem by itself. Adding two values that causes an overflow when there is a nuw results in a poison value. We never use that value because it is only poison if loop.iter.cond is ture and we exit the loop. Long story short, from what I can see there is no miscompilation or change in semantics for that matter. ~ Johannes On 2/17/21 5:57 PM, Richard Kenner via llvm-dev wrote:> I'm seeing a case where an add instruction with "nuw" is moved to what > I think is an unsafe place and I'm wondering if there really is a bug > here. This comes from the small Ada code: > > PROCEDURE C26006A IS > > procedure C_abort with Import, Convention => C, > External_Name => "abort"; > > S1 : STRING (1..3) := "A 1"; > S2 : STRING (1..3) := "A 2"; > > BEGIN > FOR C IN CHARACTER'FIRST .. CHARACTER'LAST LOOP > S1 (2) := C; > S2 (2) := C; > if S1 = S2 then > C_Abort; > end if; > END LOOP; > END C26006A; > > and the issue is the loop, which is from (interpreting the i8 as unsigned) > 0 to 255 (-1). > > The loop end test looks like: > > loop.cond.iter: ; preds = %8 > %6 = load i8, i8* %c, align 1, !tbaa !5 > %loop.iter.cond = icmp eq i8 %6, -1 > br i1 %loop.iter.cond, label %loop.exit, label %loop.iter > > loop.iter: ; preds = %loop.cond.iter > %next.loop.var = add nuw i8 %6, 1 > store i8 %next.loop.var, i8* %c, align 1, !tbaa !5 > br label %loop.cond > > This looks correct. We do the add after the conditional branch, so > we'll never see it overflow. > > But when we optimize with -O2 on this, we get: > > loop.cond.iter: ; preds = %loop.cond, %3 > %loop.iter.cond = icmp eq i8 %c.0, -1 > %next.loop.var = add nuw i8 %c.0, 1 > br i1 %loop.iter.cond, label %loop.exit, label %loop.cond > > But now the add is done unconditionally, whatever the result of the > comparison. That means that in the case where the icmp is true, we > execute an undefined instruction. When we generate code from this, the > code generator notices that, eliminates the test, and makes this an > an conditional jump back, with the relevant code being: > > .LBB0_3: # %loop.cond.iter > # in Loop: Header=BB0_1 Depth=1 > incb %bl > .LBB0_1: # %loop.cond > # =>This Inner Loop Header: Depth=1 > movb %bl, 17(%rsp) > movb %bl, 1(%rsp) > movzwl (%rsp), %eax > xorw 16(%rsp), %ax > movzbl 2(%rsp), %ecx > xorb 18(%rsp), %cl > movzbl %cl, %ecx > orw %ax, %cx > jne .LBB0_3 > # %bb.2: # in Loop: Header=BB0_1 Depth=1 > callq abort > jmp .LBB0_3 > > I think the initial IR is correct, but the loop optimization here is > unsafe: if it wants to promote the add instruction, it needs to remove > any "nsw" or "nuw". Does that sound correct? > > The full .ll is below: > > ; ModuleID = 'c26006a.adb' > source_filename = "c26006a.adb" > target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 > :32:64-S128" > target triple = "x86_64-unknown-linux-gnu" > > define void @_ada_c26006a() { > entry: > %s1 = alloca { { i32, i32 }, [3 x i8] }, align 4 > %s2 = alloca { { i32, i32 }, [3 x i8] }, align 4 > %c = alloca i8, align 1 > %0 = bitcast { { i32, i32 }, [3 x i8] }* %s1 to [3 x i8]* > store [3 x i8] c"A 1", [3 x i8]* %0, align 4, !tbaa !0 > %1 = bitcast { { i32, i32 }, [3 x i8] }* %s2 to [3 x i8]* > store [3 x i8] c"A 2", [3 x i8]* %1, align 4, !tbaa !0 > store i8 0, i8* %c, align 1, !tbaa !5 > br i1 true, label %loop.cond, label %loop.exit > > loop.cond: ; preds = %loop.iter, %entry > %2 = load i8, i8* %c, align 1, !tbaa !5 > %3 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i64 0, i32 1 > store i8 %2, i8* %3, align 1, !tbaa !8 > %4 = load i8, i8* %c, align 1, !tbaa !5 > %5 = getelementptr inbounds [3 x i8], [3 x i8]* %1, i64 0, i32 1 > store i8 %4, i8* %5, align 1, !tbaa !8 > br i1 false, label %rhs.has.0.dim, label %normal.tests > > loop.iter: ; preds = %loop.cond.iter > %next.loop.var = add nuw i8 %6, 1 > store i8 %next.loop.var, i8* %c, align 1, !tbaa !5 > br label %loop.cond > > loop.exit: ; preds = %loop.cond.iter, %entry > ret void > > loop.cond.iter: ; preds = %8 > %6 = load i8, i8* %c, align 1, !tbaa !5 > %loop.iter.cond = icmp eq i8 %6, -1 > br i1 %loop.iter.cond, label %loop.exit, label %loop.iter > > 7: ; preds = %9, %rhs.has.0.dim > call void @abort() > br label %8 > > 8: ; preds = %7, %9, %normal.tests > br label %loop.cond.iter > > rhs.has.0.dim: ; preds = %loop.cond > br i1 false, label %7, label %normal.tests > > normal.tests: ; preds = %rhs.has.0.dim, %loop.cond > br i1 true, label %9, label %8 > > 9: ; preds = %normal.tests > %10 = bitcast [3 x i8]* %1 to i8* > %11 = bitcast [3 x i8]* %0 to i8* > %12 = call i32 @memcmp(i8* align 4 %10, i8* align 4 %11, i64 3) > %13 = icmp eq i32 %12, 0 > br i1 %13, label %7, label %8 > } > > ; Function Attrs: nounwind > declare dso_local i32 @memcmp(i8* nocapture nonnull readonly, i8* nocapture nonnull readonly, i64) #0 > > declare void @abort() > > attributes #0 = { nounwind } > > !0 = !{!1, !1, i64 0, i64 3} > !1 = !{!2, i64 3, !"c26006a__T2b#TN#AD", !3, i64 0, i64 1, !3, i64 1, i64 1, !3, i64 2, i64 1} > !2 = !{!"Ada Root"} > !3 = !{!4, i64 1, !"character#T0"} > !4 = !{!2, i64 1, !"character#TN"} > !5 = !{!6, !6, i64 0, i64 1} > !6 = !{!7, i64 1, !"T4b#T4"} > !7 = !{!4, i64 1, !"T4b#TN"} > !8 = !{!3, !3, i64 0, i64 1} > _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org > https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
Richard Kenner via llvm-dev
2021-Feb-18 00:41 UTC
[llvm-dev] Potentially unsafe loop optimization
> Long story short, from what I can see there is no miscompilation > or change in semantics for that matter.So why does the .s file not contain the loop exit test? .text .file "c26006a.adb" .globl _ada_c26006a # -- Begin function _ada_c26006a .p2align 4, 0x90 .type _ada_c26006a, at function _ada_c26006a: # @_ada_c26006a .cfi_startproc # %bb.0: # %entry pushq %rbx .cfi_def_cfa_offset 16 subq $32, %rsp .cfi_def_cfa_offset 48 .cfi_offset %rbx, -16 movw $8257, 16(%rsp) # imm = 0x2041 movb $49, 18(%rsp) movw $8257, (%rsp) # imm = 0x2041 movb $50, 2(%rsp) xorl %ebx, %ebx jmp .LBB0_1 .p2align 4, 0x90 .LBB0_3: # %loop.cond.iter # in Loop: Header=BB0_1 Depth=1 incb %bl .LBB0_1: # %loop.cond # =>This Inner Loop Header: Depth=1 movb %bl, 17(%rsp) movb %bl, 1(%rsp) movzwl (%rsp), %eax xorw 16(%rsp), %ax movzbl 2(%rsp), %ecx xorb 18(%rsp), %cl movzbl %cl, %ecx orw %ax, %cx jne .LBB0_3 # %bb.2: # in Loop: Header=BB0_1 Depth=1 callq abort jmp .LBB0_3 .Lfunc_end0: .size _ada_c26006a, .Lfunc_end0-_ada_c26006a .cfi_endproc # -- End function .section ".note.GNU-stack","", at progbits