thr3ads.net - llvm dev - [llvm-dev] Potentially unsafe loop optimization [Feb 2021]

If this information is useful, please help other people find it:
Share via:

Johannes Doerfert via llvm-dev

2021-Feb-18 00:38 UTC

[llvm-dev] Potentially unsafe loop optimization

Executing the addition is not a problem by itself.

Adding two values that causes an overflow when there is a nuw
results in a poison value. We never use that value because it
is only poison if loop.iter.cond is ture and we exit the loop.

Long story short, from what I can see there is no miscompilation
or change in semantics for that matter.

~ Johannes


On 2/17/21 5:57 PM, Richard Kenner via llvm-dev wrote:> I'm seeing a case where an add instruction with "nuw" is
moved to what
> I think is an unsafe place and I'm wondering if there really is a bug
> here.  This comes from the small Ada code:
>
> PROCEDURE C26006A IS
>
>          procedure C_abort with Import, Convention => C,
>            External_Name => "abort";
>
>          S1 : STRING (1..3) := "A 1";
>          S2 : STRING (1..3) := "A 2";
>
> BEGIN
>          FOR C IN CHARACTER'FIRST .. CHARACTER'LAST LOOP
>                  S1 (2) := C;
>                  S2 (2) := C;
>                  if S1 = S2 then
>                     C_Abort;
>                  end if;
>          END LOOP;
> END C26006A;
>
> and the issue is the loop, which is from (interpreting the i8 as unsigned)
> 0 to 255 (-1).
>
> The loop end test looks like:
>
> loop.cond.iter:                                   ; preds = %8
>    %6 = load i8, i8* %c, align 1, !tbaa !5
>    %loop.iter.cond = icmp eq i8 %6, -1
>    br i1 %loop.iter.cond, label %loop.exit, label %loop.iter
>
> loop.iter:                                        ; preds = %loop.cond.iter
>    %next.loop.var = add nuw i8 %6, 1
>    store i8 %next.loop.var, i8* %c, align 1, !tbaa !5
>    br label %loop.cond
>
> This looks correct.  We do the add after the conditional branch, so
> we'll never see it overflow.
>
> But when we optimize with -O2 on this, we get:
>
> loop.cond.iter:                                   ; preds = %loop.cond, %3
>    %loop.iter.cond = icmp eq i8 %c.0, -1
>    %next.loop.var = add nuw i8 %c.0, 1
>    br i1 %loop.iter.cond, label %loop.exit, label %loop.cond
>
> But now the add is done unconditionally, whatever the result of the
> comparison.  That means that in the case where the icmp is true, we
> execute an undefined instruction.  When we generate code from this, the
> code generator notices that, eliminates the test, and makes this an
> an conditional jump back, with the relevant code being:
>
> .LBB0_3:                                # %loop.cond.iter
>                                          #   in Loop: Header=BB0_1 Depth=1
>          incb    %bl
> .LBB0_1:                                # %loop.cond
>                                          # =>This Inner Loop Header:
Depth=1
>          movb    %bl, 17(%rsp)
>          movb    %bl, 1(%rsp)
>          movzwl  (%rsp), %eax
>          xorw    16(%rsp), %ax
>          movzbl  2(%rsp), %ecx
>          xorb    18(%rsp), %cl
>          movzbl  %cl, %ecx
>          orw     %ax, %cx
>          jne     .LBB0_3
> # %bb.2:                                #   in Loop: Header=BB0_1 Depth=1
>          callq   abort
>          jmp     .LBB0_3
>
> I think the initial IR is correct, but the loop optimization here is
> unsafe: if it wants to promote the add instruction, it needs to remove
> any "nsw" or "nuw".  Does that sound correct?
>
> The full .ll is below:
>
> ; ModuleID = 'c26006a.adb'
> source_filename = "c26006a.adb"
> target datalayout =
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
> :32:64-S128"
> target triple = "x86_64-unknown-linux-gnu"
>
> define void @_ada_c26006a() {
> entry:
>    %s1 = alloca { { i32, i32 }, [3 x i8] }, align 4
>    %s2 = alloca { { i32, i32 }, [3 x i8] }, align 4
>    %c = alloca i8, align 1
>    %0 = bitcast { { i32, i32 }, [3 x i8] }* %s1 to [3 x i8]*
>    store [3 x i8] c"A 1", [3 x i8]* %0, align 4, !tbaa !0
>    %1 = bitcast { { i32, i32 }, [3 x i8] }* %s2 to [3 x i8]*
>    store [3 x i8] c"A 2", [3 x i8]* %1, align 4, !tbaa !0
>    store i8 0, i8* %c, align 1, !tbaa !5
>    br i1 true, label %loop.cond, label %loop.exit
>
> loop.cond:                                        ; preds = %loop.iter,
%entry
>    %2 = load i8, i8* %c, align 1, !tbaa !5
>    %3 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i64 0, i32 1
>    store i8 %2, i8* %3, align 1, !tbaa !8
>    %4 = load i8, i8* %c, align 1, !tbaa !5
>    %5 = getelementptr inbounds [3 x i8], [3 x i8]* %1, i64 0, i32 1
>    store i8 %4, i8* %5, align 1, !tbaa !8
>    br i1 false, label %rhs.has.0.dim, label %normal.tests
>
> loop.iter:                                        ; preds = %loop.cond.iter
>    %next.loop.var = add nuw i8 %6, 1
>    store i8 %next.loop.var, i8* %c, align 1, !tbaa !5
>    br label %loop.cond
>
> loop.exit:                                        ; preds =
%loop.cond.iter, %entry
>    ret void
>
> loop.cond.iter:                                   ; preds = %8
>    %6 = load i8, i8* %c, align 1, !tbaa !5
>    %loop.iter.cond = icmp eq i8 %6, -1
>    br i1 %loop.iter.cond, label %loop.exit, label %loop.iter
>
> 7:                                                ; preds = %9,
%rhs.has.0.dim
>    call void @abort()
>    br label %8
>
> 8:                                                ; preds = %7, %9,
%normal.tests
>    br label %loop.cond.iter
>
> rhs.has.0.dim:                                    ; preds = %loop.cond
>    br i1 false, label %7, label %normal.tests
>
> normal.tests:                                     ; preds = %rhs.has.0.dim,
%loop.cond
>    br i1 true, label %9, label %8
>
> 9:                                                ; preds = %normal.tests
>    %10 = bitcast [3 x i8]* %1 to i8*
>    %11 = bitcast [3 x i8]* %0 to i8*
>    %12 = call i32 @memcmp(i8* align 4 %10, i8* align 4 %11, i64 3)
>    %13 = icmp eq i32 %12, 0
>    br i1 %13, label %7, label %8
> }
>
> ; Function Attrs: nounwind
> declare dso_local i32 @memcmp(i8* nocapture nonnull readonly, i8* nocapture
nonnull readonly, i64) #0
>
> declare void @abort()
>
> attributes #0 = { nounwind }
>
> !0 = !{!1, !1, i64 0, i64 3}
> !1 = !{!2, i64 3, !"c26006a__T2b#TN#AD", !3, i64 0, i64 1, !3,
i64 1, i64 1, !3, i64 2, i64 1}
> !2 = !{!"Ada Root"}
> !3 = !{!4, i64 1, !"character#T0"}
> !4 = !{!2, i64 1, !"character#TN"}
> !5 = !{!6, !6, i64 0, i64 1}
> !6 = !{!7, i64 1, !"T4b#T4"}
> !7 = !{!4, i64 1, !"T4b#TN"}
> !8 = !{!3, !3, i64 0, i64 1}
> _______________________________________________
> LLVM Developers mailing list
> llvm-dev at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev

Richard Kenner via llvm-dev

2021-Feb-18 00:41 UTC

head link

[llvm-dev] Potentially unsafe loop optimization

> Long story short, from what I can see there is no miscompilation
> or change in semantics for that matter.
So why does the .s file not contain the loop exit test?

        .text
        .file   "c26006a.adb"
        .globl  _ada_c26006a                    # -- Begin function _ada_c26006a
        .p2align        4, 0x90
        .type   _ada_c26006a, at function
_ada_c26006a:                           # @_ada_c26006a
        .cfi_startproc
# %bb.0:                                # %entry
        pushq   %rbx
        .cfi_def_cfa_offset 16
        subq    $32, %rsp
        .cfi_def_cfa_offset 48
        .cfi_offset %rbx, -16
        movw    $8257, 16(%rsp)                 # imm = 0x2041
        movb    $49, 18(%rsp)
        movw    $8257, (%rsp)                   # imm = 0x2041
        movb    $50, 2(%rsp)
        xorl    %ebx, %ebx
        jmp     .LBB0_1
        .p2align        4, 0x90
.LBB0_3:                                # %loop.cond.iter
                                        #   in Loop: Header=BB0_1 Depth=1
        incb    %bl
.LBB0_1:                                # %loop.cond
                                        # =>This Inner Loop Header: Depth=1
        movb    %bl, 17(%rsp)
        movb    %bl, 1(%rsp)
        movzwl  (%rsp), %eax
        xorw    16(%rsp), %ax
        movzbl  2(%rsp), %ecx
        xorb    18(%rsp), %cl
        movzbl  %cl, %ecx
        orw     %ax, %cx
        jne     .LBB0_3
# %bb.2:                                #   in Loop: Header=BB0_1 Depth=1
        callq   abort
        jmp     .LBB0_3
.Lfunc_end0:
        .size   _ada_c26006a, .Lfunc_end0-_ada_c26006a
        .cfi_endproc
                                        # -- End function
        .section        ".note.GNU-stack","", at progbits

llvm dev - Feb 2021 - Potentially unsafe loop optimization

[llvm-dev] Potentially unsafe loop optimization

[llvm-dev] Potentially unsafe loop optimization