The attached .ll files seem equivalent, but the resulting asm from 'opt-fail.ll' causes a crash to webkit. I suspect the usage of registers is wrong, can someone take a look ? $ llc opt-pass.ll -o - .section __TEXT,__text,regular,pure_instructions .globl __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE .align 4, 0x90 __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE: ## @_ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE ## BB#0: pushq %r14 pushq %rbx subq $8, %rsp movq %rsi, %rbx movq %rdi, %r14 movq %rdx, %rdi movq %rcx, %rsi callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE movq %rax, %rcx shrq $32, %rcx testl %ecx, %ecx je LBB0_2 ## BB#1: imull (%rbx), %eax cltd idivl %ecx movl %eax, (%r14) LBB0_2: addq $8, %rsp popq %rbx popq %r14 ret $ llc opt-fail.ll -o - .section __TEXT,__text,regular,pure_instructions .globl __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE .align 4, 0x90 __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE: ## @_ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE ## BB#0: pushq %r14 pushq %rbx subq $8, %rsp movq %rsi, %rbx movq %rdi, %r14 movq %rdx, %rdi movq %rcx, %rsi callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE movq %rax, %rcx shrq $32, %rcx testl %ecx, %ecx je LBB0_2 ## BB#1: movl (%rbx), %ecx imull %ecx, %eax shrq $32, %rax movl %eax, %ecx cltd idivl %ecx movl %eax, (%r14) LBB0_2: addq $8, %rsp popq %rbx popq %r14 ret -Argiris -------------- next part -------------- A non-text attachment was scrubbed... Name: opt-fail.ll Type: application/octet-stream Size: 1816 bytes Desc: not available URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20100901/f06be303/attachment.obj> -------------- next part -------------- A non-text attachment was scrubbed... Name: opt-pass.ll Type: application/octet-stream Size: 1838 bytes Desc: not available URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20100901/f06be303/attachment-0001.obj>
On Sep 1, 2010, at 6:25 AM, Argyrios Kyrtzidis wrote:> The attached .ll files seem equivalent, but the resulting asm from 'opt-fail.ll' causes a crash to webkit. > I suspect the usage of registers is wrong, can someone take a look ?The difference is that there is a shift right after the multiply, before the divide. In IR, the difference is: %5 = mul nsw i32 %4, %tmp1 ; <i32> [#uses=1] %btmp3 = lshr i64 %1, 32 ; <i64> [#uses=1] %btmp4 = trunc i64 %btmp3 to i32 ; <i32> [#uses=1] %6 = sdiv i32 %5, %btmp4 ; <i32> [#uses=1] vs: %5 = mul nsw i32 %4, %tmp1 ; <i32> [#uses=1] ; removed: %btmp3 = lshr i64 %1, 32 ; <i64> [#uses=1] ; removed: %btmp4 = trunc i64 %btmp3 to i32 ; <i32> [#uses=1] %6 = sdiv i32 %5, %atmp4 ; <i32> [#uses=1] It looks like you got these by manually editing the file. Do you think that the shift wasn't supposed to be there in the first place? Can you send me a .ii file that produces the __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE function with steps to get the bad IR? -Chris> > $ llc opt-pass.ll -o - > > .section __TEXT,__text,regular,pure_instructions > .globl __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE > .align 4, 0x90 > __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE: ## @_ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE > ## BB#0: > pushq %r14 > pushq %rbx > subq $8, %rsp > movq %rsi, %rbx > movq %rdi, %r14 > movq %rdx, %rdi > movq %rcx, %rsi > callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE > movq %rax, %rcx > shrq $32, %rcx > testl %ecx, %ecx > je LBB0_2 > ## BB#1: > imull (%rbx), %eax > cltd > idivl %ecx > movl %eax, (%r14) > LBB0_2: > addq $8, %rsp > popq %rbx > popq %r14 > ret > > > $ llc opt-fail.ll -o - > > .section __TEXT,__text,regular,pure_instructions > .globl __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE > .align 4, 0x90 > __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE: ## @_ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE > ## BB#0: > pushq %r14 > pushq %rbx > subq $8, %rsp > movq %rsi, %rbx > movq %rdi, %r14 > movq %rdx, %rdi > movq %rcx, %rsi > callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE > movq %rax, %rcx > shrq $32, %rcx > testl %ecx, %ecx > je LBB0_2 > ## BB#1: > movl (%rbx), %ecx > imull %ecx, %eax > shrq $32, %rax > movl %eax, %ecx > cltd > idivl %ecx > movl %eax, (%r14) > LBB0_2: > addq $8, %rsp > popq %rbx > popq %r14 > ret > > > -Argiris > > > <opt-fail.ll><opt-pass.ll>_______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
I attached preprocessed files. $ llvm-g++ gcc-RenderBoxModelObject.ii -fno-exceptions -arch x86_64 -O2 -c -o part.o vs $ clang++ clang-RenderBoxModelObject.ii -fno-exceptions -arch x86_64 -O2 -c -o part.o If I compile with clang, it causes a crash to webkit. -Argiris -------------- next part -------------- A non-text attachment was scrubbed... Name: prepro.zip Type: application/zip Size: 457370 bytes Desc: not available URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20100901/106b72fb/attachment.zip> -------------- next part -------------- On 1 Sep 2010, at 17:16, Chris Lattner wrote:> > On Sep 1, 2010, at 6:25 AM, Argyrios Kyrtzidis wrote: > >> The attached .ll files seem equivalent, but the resulting asm from 'opt-fail.ll' causes a crash to webkit. >> I suspect the usage of registers is wrong, can someone take a look ? > > The difference is that there is a shift right after the multiply, before the divide. In IR, the difference is: > > %5 = mul nsw i32 %4, %tmp1 ; <i32> [#uses=1] > > %btmp3 = lshr i64 %1, 32 ; <i64> [#uses=1] > %btmp4 = trunc i64 %btmp3 to i32 ; <i32> [#uses=1] > > %6 = sdiv i32 %5, %btmp4 ; <i32> [#uses=1] > > vs: > > %5 = mul nsw i32 %4, %tmp1 ; <i32> [#uses=1] > > ; removed: %btmp3 = lshr i64 %1, 32 ; <i64> [#uses=1] > ; removed: %btmp4 = trunc i64 %btmp3 to i32 ; <i32> [#uses=1] > > %6 = sdiv i32 %5, %atmp4 ; <i32> [#uses=1] > > It looks like you got these by manually editing the file. Do you think that the shift wasn't supposed to be there in the first place? Can you send me a .ii file that produces the __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE function with steps to get the bad IR? > > -Chris > > >> >> $ llc opt-pass.ll -o - >> >> .section __TEXT,__text,regular,pure_instructions >> .globl __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE >> .align 4, 0x90 >> __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE: ## @_ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE >> ## BB#0: >> pushq %r14 >> pushq %rbx >> subq $8, %rsp >> movq %rsi, %rbx >> movq %rdi, %r14 >> movq %rdx, %rdi >> movq %rcx, %rsi >> callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE >> movq %rax, %rcx >> shrq $32, %rcx >> testl %ecx, %ecx >> je LBB0_2 >> ## BB#1: >> imull (%rbx), %eax >> cltd >> idivl %ecx >> movl %eax, (%r14) >> LBB0_2: >> addq $8, %rsp >> popq %rbx >> popq %r14 >> ret >> >> >> $ llc opt-fail.ll -o - >> >> .section __TEXT,__text,regular,pure_instructions >> .globl __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE >> .align 4, 0x90 >> __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE: ## @_ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE >> ## BB#0: >> pushq %r14 >> pushq %rbx >> subq $8, %rsp >> movq %rsi, %rbx >> movq %rdi, %r14 >> movq %rdx, %rdi >> movq %rcx, %rsi >> callq __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE >> movq %rax, %rcx >> shrq $32, %rcx >> testl %ecx, %ecx >> je LBB0_2 >> ## BB#1: >> movl (%rbx), %ecx >> imull %ecx, %eax >> shrq $32, %rax >> movl %eax, %ecx >> cltd >> idivl %ecx >> movl %eax, (%r14) >> LBB0_2: >> addq $8, %rsp >> popq %rbx >> popq %r14 >> ret >> >> >> -Argiris >> >> >> <opt-fail.ll><opt-pass.ll>_______________________________________________ >> LLVM Developers mailing list >> LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev >
On Sep 1, 2010, at 9:16 AMPDT, Chris Lattner wrote:> > On Sep 1, 2010, at 6:25 AM, Argyrios Kyrtzidis wrote: > >> The attached .ll files seem equivalent, but the resulting asm from >> 'opt-fail.ll' causes a crash to webkit. >> I suspect the usage of registers is wrong, can someone take a look ? > > The difference is that there is a shift right after the multiply, > before the divide. In IR, the difference is:But the multiply doesn't feed into the shift. The hand-edited version reuses an earlier computation of (int32)(%1>>32). Argyrios is right that these should be equivalent; neither of them is bad.> %5 = mul nsw i32 %4, %tmp1 ; <i32> [#uses=1] > > %btmp3 = lshr i64 %1, 32 ; <i64> [#uses=1] > %btmp4 = trunc i64 %btmp3 to i32 ; <i32> [#uses=1] > > %6 = sdiv i32 %5, %btmp4 ; <i32> [#uses=1] > > vs: > > %5 = mul nsw i32 %4, %tmp1 ; <i32> [#uses=1] > > ; removed: %btmp3 = lshr i64 %1, 32 ; <i64> > [#uses=1] > ; removed: %btmp4 = trunc i64 %btmp3 to i32 ; > <i32> [#uses=1] > > %6 = sdiv i32 %5, %atmp4 ; <i32> [#uses=1] > > It looks like you got these by manually editing the file. Do you > think that the shift wasn't supposed to be there in the first > place? Can you send me a .ii file that produces the > __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE > function with steps to get the bad IR? > > -Chris > > >> >> $ llc opt-pass.ll -o - >> >> .section __TEXT,__text,regular,pure_instructions >> .globl >> __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE >> .align 4, 0x90 >> __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE >> : ## >> @_ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE >> ## BB#0: >> pushq %r14 >> pushq %rbx >> subq $8, %rsp >> movq %rsi, %rbx >> movq %rdi, %r14 >> movq %rdx, %rdi >> movq %rcx, %rsi >> callq >> __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE >> movq %rax, %rcx >> shrq $32, %rcx >> testl %ecx, %ecx >> je LBB0_2 >> ## BB#1: >> imull (%rbx), %eax >> cltd >> idivl %ecx >> movl %eax, (%r14) >> LBB0_2: >> addq $8, %rsp >> popq %rbx >> popq %r14 >> ret >> >> >> $ llc opt-fail.ll -o - >> >> .section __TEXT,__text,regular,pure_instructions >> .globl >> __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE >> .align 4, 0x90 >> __ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE >> : ## >> @_ZN7WebCore6kolos1ERiS0_PKNS_20RenderBoxModelObjectEPNS_10StyleImageE >> ## BB#0: >> pushq %r14 >> pushq %rbx >> subq $8, %rsp >> movq %rsi, %rbx >> movq %rdi, %r14 >> movq %rdx, %rdi >> movq %rcx, %rsi >> callq >> __ZN7WebCore4viziEPKNS_20RenderBoxModelObjectEPNS_10StyleImageE >> movq %rax, %rcx >> shrq $32, %rcx >> testl %ecx, %ecx >> je LBB0_2 >> ## BB#1: >> movl (%rbx), %ecx >> imull %ecx, %eax >> shrq $32, %rax >> movl %eax, %ecx >> cltd >> idivl %ecx >> movl %eax, (%r14) >> LBB0_2: >> addq $8, %rsp >> popq %rbx >> popq %r14 >> ret >> >> >> -Argiris >> >> >> <opt-fail.ll><opt- >> pass.ll>_______________________________________________ >> LLVM Developers mailing list >> LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev > > > _______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
On Sep 1, 2010, at 6:25 AMPDT, Argyrios Kyrtzidis wrote:> The attached .ll files seem equivalent, but the resulting asm from > 'opt-fail.ll' causes a crash to webkit. > I suspect the usage of registers is wrong, can someone take a look ?Yes, the code here is wrong:> movl (%rbx), %ecx > imull %ecx, %eaxThis computes h*((int32)%1) in %eax.> shrq $32, %rax > movl %eax, %ecxThis is trying to compute (int32)(%1>>32) into %ecx, but is using the wrong input value since %rax has been clobbered by the above code, and further is clobbering the value in %eax computed above, which is implicit input to the divide. This is some kind of back end error, probably register allocator.> cltd > idivl %ecx
On Sep 1, 2010, at 11:14 AM, Dale Johannesen wrote:> > On Sep 1, 2010, at 6:25 AMPDT, Argyrios Kyrtzidis wrote: > >> The attached .ll files seem equivalent, but the resulting asm from >> 'opt-fail.ll' causes a crash to webkit. >> I suspect the usage of registers is wrong, can someone take a look ? > > Yes, the code here is wrong: > >> movl (%rbx), %ecx >> imull %ecx, %eax > > This computes h*((int32)%1) in %eax. > >> shrq $32, %rax >> movl %eax, %ecx > > This is trying to compute (int32)(%1>>32) into %ecx, but is using the > wrong input value since %rax has been clobbered by the above code, and > further is clobbering the value in %eax computed above, which is > implicit input to the divide. This is some kind of back end error, > probably register allocator.Jakob, can you take a look when you get a chance? -Chris