Jingu Kang via llvm-dev
2021-Nov-25 14:37 UTC
[llvm-dev] Question about supporting zext on IVUsers and LSR
Hi All, I am looking at a simple example as below. target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" %struct.base_s = type { %struct.range, i64, i64, i64*, i32, [4 x i32], [274 x %struct.match], i32, i32, i8, i8, i8, i32, i32, i32, [16 x [768 x i16]], [12 x [16 x i16]], [12 x i16], [12 x i16], [12 x i16], [12 x i16], [12 x [16 x i16]], [4 x [64 x i16]], [114 x i16], [16 x i16], %struct.length, %struct.length, [4 x [64 x i32]], [4 x [128 x i32]], i32, i32, [16 x i32], i32, i32, i32, [4096 x %struct.opt] } %struct.range = type { i64, i64, i32, i8, i64, i32, i32, [53 x i32], [53 x i16*] } %struct.match = type { i32, i32 } %struct.length = type { i16, i16, [16 x [8 x i16]], [16 x [8 x i16]], [256 x i16], [16 x [272 x i32]], i32, [16 x i32] } %struct.opt = type { i32, i8, i8, i32, i32, i32, i32, i32, [4 x i32] } define i32 @test(i32 %len, %struct.base_s* nocapture readonly %obj) { entry: br label %while.cond while.cond: ; preds = %while.cond, %entry %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ] %idxprom = zext i32 %i.0 to i64 %len1 = getelementptr inbounds %struct.base_s, %struct.base_s* %obj, i64 0, i32 6, i64 %idxprom, i32 0 %0 = load i32, i32* %len1, align 4 %cmp = icmp ult i32 %0, %len %inc = add i32 %i.0, 1 br i1 %cmp, label %while.cond, label %while.end while.end: ; preds = %while.cond ret i32 %i.0 } I expected the LSR pass extracts the loop invariant part from `%len1 = getelementptr` and hoists it to preheader. It could cause a new IV for the loop dependent part from gep inside loop and `%0 = load` could use it. However, it looks the `IVUsers` does process the `%idxprom = zext`. I can see the `SCEVAddRecExpr` and `SCEVAddExpr` are handled in `isInteresting` function. It seems LSR pass does not also handle the `zext` for `IVChain`. If I remove the `%idxprom = zext` manually on above example, I can see LSR works as the expectation. Does anyone know why the `zext` is not supported on IVUsers and LSR? Does it make LSR difficult to construct formulas and compare them? If I missed something, please let me know. For reference, the assembly output of above example with `-O3` is as below. test: mov w8, w0 mov w0, #-1 .LBB0_1: add w0, w0, #1 add x9, x1, w0, uxtw #3 ldr w9, [x9, #724] cmp w9, w8 b.lo .LBB0_1 Ret If I remove the `zext`, the output is as below and the loop has one less instruction against above output. test: add x9, x1, #724 mov x8, #-1 .LBB0_1: ldr w10, [x9], #8 add x8, x8, #1 cmp w10, w0 b.lo .LBB0_1 mov x0, x8 ret The IR code, in which the `zext` is removed, is as below. target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" %struct.base_s = type { %struct.range, i64, i64, i64*, i32, [4 x i32], [274 x %struct.match], i32, i32, i8, i8, i8, i32, i32, i32, [16 x [768 x i16]], [12 x [16 x i16]], [12 x i16], [12 x i16], [12 x i16], [12 x i16], [12 x [16 x i16]], [4 x [64 x i16]], [114 x i16], [16 x i16], %struct.length, %struct.length, [4 x [64 x i32]], [4 x [128 x i32]], i32, i32, [16 x i32], i32, i32, i32, [4096 x %struct.opt] } %struct.range = type { i64, i64, i32, i8, i64, i32, i32, [53 x i32], [53 x i16*] } %struct.match = type { i32, i32 } %struct.length = type { i16, i16, [16 x [8 x i16]], [16 x [8 x i16]], [256 x i16], [16 x [272 x i32]], i32, [16 x i32] } %struct.opt = type { i32, i8, i8, i32, i32, i32, i32, i32, [4 x i32] } ;define i32 @test(i32 %len, %struct.base_s* nocapture readonly %obj) { define i64 @test(i32 %len, %struct.base_s* nocapture readonly %obj) { entry: br label %while.cond while.cond: ; preds = %while.cond, %entry ; %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ] %i.0 = phi i64 [ 0, %entry ], [ %inc, %while.cond ] ; %idxprom = zext i32 %i.0 to i64 ; %len1 = getelementptr inbounds %struct.base_s, %struct.base_s* %obj, i64 0, i32 6, i64 %idxprom, i32 0 %len1 = getelementptr inbounds %struct.base_s, %struct.base_s* %obj, i64 0, i32 6, i64 %i.0, i32 0 %0 = load i32, i32* %len1, align 4 %cmp = icmp ult i32 %0, %len ; %inc = add i32 %i.0, 1 %inc = add i64 %i.0, 1 br i1 %cmp, label %while.cond, label %while.end while.end: ; preds = %while.cond ; ret i32 %i.0 ret i64 %i.0 } Thanks JinGu Kang -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20211125/44585c82/attachment.html>
Philip Reames via llvm-dev
2021-Nov-29 17:57 UTC
[llvm-dev] Question about supporting zext on IVUsers and LSR
First, there are no "simple" question about LSR. :) Second, I wouldn't view your example as an LSR problem, but a failed IR canonicalization. In the example, we'd try to widen the IV in IndVars, and LSR would expect the widening to have already been done. I'd take a look into why we're not widening the IV as your next step. Philip On 11/25/21 6:37 AM, Jingu Kang via llvm-dev wrote:> > Hi All, > > I am looking at a simple example as below. > > target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" > > target triple = "aarch64-unknown-linux-gnu" > > %struct.base_s = type { %struct.range, i64, i64, i64*, i32, [4 x i32], > [274 x %struct.match], i32, i32, i8, i8, i8, i32, i32, i32, [16 x [768 > x i16]], [12 x [16 x i16]], [12 x i16], [12 x i16], [12 x i16], [12 x > i16], [12 x [16 x i16]], [4 x [64 x i16]], [114 x i16], [16 x i16], > %struct.length, %struct.length, [4 x [64 x i32]], [4 x [128 x i32]], > i32, i32, [16 x i32], i32, i32, i32, [4096 x %struct.opt] } > > %struct.range = type { i64, i64, i32, i8, i64, i32, i32, [53 x i32], > [53 x i16*] } > > %struct.match = type { i32, i32 } > > %struct.length = type { i16, i16, [16 x [8 x i16]], [16 x [8 x i16]], > [256 x i16], [16 x [272 x i32]], i32, [16 x i32] } > > %struct.opt = type { i32, i8, i8, i32, i32, i32, i32, i32, [4 x i32] } > > define i32 @test(i32 %len, %struct.base_s* nocapture readonly %obj) { > > entry: > > br label %while.cond > > while.cond: ; preds = %while.cond, %entry > > %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ] > > %idxprom = zext i32 %i.0 to i64 > > %len1 = getelementptr inbounds %struct.base_s, %struct.base_s* %obj, > i64 0, i32 6, i64 %idxprom, i32 0 > > %0 = load i32, i32* %len1, align 4 > > %cmp = icmp ult i32 %0, %len > > %inc = add i32 %i.0, 1 > > br i1 %cmp, label %while.cond, label %while.end > > while.end: ; preds = %while.cond > > ret i32 %i.0 > > } > > I expected the LSR pass extracts the loop invariant part from `%len1 = > getelementptr` and hoists it to preheader. It could cause a new IV for > the loop dependent part from gep inside loop and `%0 = load` could use > it. However, it looks the `IVUsers` does process the `%idxprom = > zext`. I can see the `SCEVAddRecExpr` and `SCEVAddExpr` are handled in > `isInteresting` function. It seems LSR pass does not also handle the > `zext` for `IVChain`. If I remove the `%idxprom = zext` manually on > above example, I can see LSR works as the expectation. Does anyone > know why the `zext` is not supported on IVUsers and LSR? Does it make > LSR difficult to construct formulas and compare them? If I missed > something, please let me know. > > For reference, the assembly output of above example with `-O3` is as > below. > > test: > > mov w8, w0 > > mov w0, #-1 > > .LBB0_1: > > add w0, w0, #1 > > add x9, x1, w0, uxtw #3 > > ldr w9, [x9, #724] > > cmp w9, w8 > > b.lo .LBB0_1 > > Ret > > If I remove the `zext`, the output is as below and the loop has one > less instruction against above output. > > test: > > add x9, x1, #724 > > mov x8, #-1 > > .LBB0_1: > > ldr w10, [x9], #8 > > add x8, x8, #1 > > cmp w10, w0 > > b.lo .LBB0_1 > > mov x0, x8 > > ret > > The IR code, in which the `zext` is removed, is as below. > > target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" > > target triple = "aarch64-unknown-linux-gnu" > > %struct.base_s = type { %struct.range, i64, i64, i64*, i32, [4 x i32], > [274 x %struct.match], i32, i32, i8, i8, i8, i32, i32, i32, [16 x [768 > x i16]], [12 x [16 x i16]], [12 x i16], [12 x i16], [12 x i16], [12 x > i16], [12 x [16 x i16]], [4 x [64 x i16]], [114 x i16], [16 x i16], > %struct.length, %struct.length, [4 x [64 x i32]], [4 x [128 x i32]], > i32, i32, [16 x i32], i32, i32, i32, [4096 x %struct.opt] } > > %struct.range = type { i64, i64, i32, i8, i64, i32, i32, [53 x i32], > [53 x i16*] } > > %struct.match = type { i32, i32 } > > %struct.length = type { i16, i16, [16 x [8 x i16]], [16 x [8 x i16]], > [256 x i16], [16 x [272 x i32]], i32, [16 x i32] } > > %struct.opt = type { i32, i8, i8, i32, i32, i32, i32, i32, [4 x i32] } > > ;define i32 @test(i32 %len, %struct.base_s* nocapture readonly %obj) { > > define i64 @test(i32 %len, %struct.base_s* nocapture readonly %obj) { > > entry: > > br label %while.cond > > while.cond: ; preds = %while.cond, %entry > > ; %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ] > > %i.0 = phi i64 [ 0, %entry ], [ %inc, %while.cond ] > > ; %idxprom = zext i32 %i.0 to i64 > > ; %len1 = getelementptr inbounds %struct.base_s, %struct.base_s* > %obj, i64 0, i32 6, i64 %idxprom, i32 0 > > %len1 = getelementptr inbounds %struct.base_s, %struct.base_s* %obj, > i64 0, i32 6, i64 %i.0, i32 0 > > %0 = load i32, i32* %len1, align 4 > > %cmp = icmp ult i32 %0, %len > > ; %inc = add i32 %i.0, 1 > > %inc = add i64 %i.0, 1 > > br i1 %cmp, label %while.cond, label %while.end > > while.end: ; preds = %while.cond > > ; ret i32 %i.0 > > ret i64 %i.0 > > } > > Thanks > > JinGu Kang > > > _______________________________________________ > LLVM Developers mailing list > llvm-dev at lists.llvm.org > https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20211129/f0581341/attachment.html>
Jingu Kang via llvm-dev
2021-Dec-01 11:00 UTC
[llvm-dev] Question about supporting zext on IVUsers and LSR
Hi Philip, I appreciate your kind guide. As you mentioned, the IndVars tries to widen the IV from the example. The SCEV fails to generate AddRecExpr from the zext because there could be overflow in `%inc = add i32 %i.0, 1`. Let me check why clang does not set the NUW flag to `%inc = add i32 %i.0, 1`. Thanks, JinGu Kang From: llvm-dev <llvm-dev-bounces at lists.llvm.org> On Behalf Of Philip Reames via llvm-dev Sent: 29 November 2021 17:58 To: Jingu Kang <Jingu.Kang at arm.com>; llvm-dev at lists.llvm.org Subject: Re: [llvm-dev] Question about supporting zext on IVUsers and LSR First, there are no "simple" question about LSR. :) Second, I wouldn't view your example as an LSR problem, but a failed IR canonicalization. In the example, we'd try to widen the IV in IndVars, and LSR would expect the widening to have already been done. I'd take a look into why we're not widening the IV as your next step. Philip On 11/25/21 6:37 AM, Jingu Kang via llvm-dev wrote: Hi All, I am looking at a simple example as below. target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" %struct.base_s = type { %struct.range, i64, i64, i64*, i32, [4 x i32], [274 x %struct.match], i32, i32, i8, i8, i8, i32, i32, i32, [16 x [768 x i16]], [12 x [16 x i16]], [12 x i16], [12 x i16], [12 x i16], [12 x i16], [12 x [16 x i16]], [4 x [64 x i16]], [114 x i16], [16 x i16], %struct.length, %struct.length, [4 x [64 x i32]], [4 x [128 x i32]], i32, i32, [16 x i32], i32, i32, i32, [4096 x %struct.opt] } %struct.range = type { i64, i64, i32, i8, i64, i32, i32, [53 x i32], [53 x i16*] } %struct.match = type { i32, i32 } %struct.length = type { i16, i16, [16 x [8 x i16]], [16 x [8 x i16]], [256 x i16], [16 x [272 x i32]], i32, [16 x i32] } %struct.opt = type { i32, i8, i8, i32, i32, i32, i32, i32, [4 x i32] } define i32 @test(i32 %len, %struct.base_s* nocapture readonly %obj) { entry: br label %while.cond while.cond: ; preds = %while.cond, %entry %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ] %idxprom = zext i32 %i.0 to i64 %len1 = getelementptr inbounds %struct.base_s, %struct.base_s* %obj, i64 0, i32 6, i64 %idxprom, i32 0 %0 = load i32, i32* %len1, align 4 %cmp = icmp ult i32 %0, %len %inc = add i32 %i.0, 1 br i1 %cmp, label %while.cond, label %while.end while.end: ; preds = %while.cond ret i32 %i.0 } I expected the LSR pass extracts the loop invariant part from `%len1 = getelementptr` and hoists it to preheader. It could cause a new IV for the loop dependent part from gep inside loop and `%0 = load` could use it. However, it looks the `IVUsers` does process the `%idxprom = zext`. I can see the `SCEVAddRecExpr` and `SCEVAddExpr` are handled in `isInteresting` function. It seems LSR pass does not also handle the `zext` for `IVChain`. If I remove the `%idxprom = zext` manually on above example, I can see LSR works as the expectation. Does anyone know why the `zext` is not supported on IVUsers and LSR? Does it make LSR difficult to construct formulas and compare them? If I missed something, please let me know. For reference, the assembly output of above example with `-O3` is as below. test: mov w8, w0 mov w0, #-1 .LBB0_1: add w0, w0, #1 add x9, x1, w0, uxtw #3 ldr w9, [x9, #724] cmp w9, w8 b.lo .LBB0_1 Ret If I remove the `zext`, the output is as below and the loop has one less instruction against above output. test: add x9, x1, #724 mov x8, #-1 .LBB0_1: ldr w10, [x9], #8 add x8, x8, #1 cmp w10, w0 b.lo .LBB0_1 mov x0, x8 ret The IR code, in which the `zext` is removed, is as below. target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" %struct.base_s = type { %struct.range, i64, i64, i64*, i32, [4 x i32], [274 x %struct.match], i32, i32, i8, i8, i8, i32, i32, i32, [16 x [768 x i16]], [12 x [16 x i16]], [12 x i16], [12 x i16], [12 x i16], [12 x i16], [12 x [16 x i16]], [4 x [64 x i16]], [114 x i16], [16 x i16], %struct.length, %struct.length, [4 x [64 x i32]], [4 x [128 x i32]], i32, i32, [16 x i32], i32, i32, i32, [4096 x %struct.opt] } %struct.range = type { i64, i64, i32, i8, i64, i32, i32, [53 x i32], [53 x i16*] } %struct.match = type { i32, i32 } %struct.length = type { i16, i16, [16 x [8 x i16]], [16 x [8 x i16]], [256 x i16], [16 x [272 x i32]], i32, [16 x i32] } %struct.opt = type { i32, i8, i8, i32, i32, i32, i32, i32, [4 x i32] } ;define i32 @test(i32 %len, %struct.base_s* nocapture readonly %obj) { define i64 @test(i32 %len, %struct.base_s* nocapture readonly %obj) { entry: br label %while.cond while.cond: ; preds = %while.cond, %entry ; %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ] %i.0 = phi i64 [ 0, %entry ], [ %inc, %while.cond ] ; %idxprom = zext i32 %i.0 to i64 ; %len1 = getelementptr inbounds %struct.base_s, %struct.base_s* %obj, i64 0, i32 6, i64 %idxprom, i32 0 %len1 = getelementptr inbounds %struct.base_s, %struct.base_s* %obj, i64 0, i32 6, i64 %i.0, i32 0 %0 = load i32, i32* %len1, align 4 %cmp = icmp ult i32 %0, %len ; %inc = add i32 %i.0, 1 %inc = add i64 %i.0, 1 br i1 %cmp, label %while.cond, label %while.end while.end: ; preds = %while.cond ; ret i32 %i.0 ret i64 %i.0 } Thanks JinGu Kang _______________________________________________ LLVM Developers mailing list llvm-dev at lists.llvm.org<mailto:llvm-dev at lists.llvm.org> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20211201/abf6445b/attachment.html>