Argyrios Kyrtzidis
2010-Aug-31 20:21 UTC
[LLVMdev] "equivalent" .ll files diverge after optimizations are applied
Here's the optimized versions: $ opt -std-compile-opts unopt-pass.ll -o - | llvm-dis -o - [...] define %3 @_ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE(%"class.WebCore::GraphicsContext"* %this, %"struct.WebCore::FloatRect"* %rect) nounwind ssp align 2 { %roundedOrigin = alloca %"class.WebCore::FloatSize", align 4 ; <%"class.WebCore::FloatSize"*> [#uses=3] %roundedLowerRight = alloca %"class.WebCore::FloatSize", align 4 ; <%"class.WebCore::FloatSize"*> [#uses=3] %1 = getelementptr inbounds %"class.WebCore::FloatSize"* %roundedOrigin, i64 0, i32 0 ; <float*> [#uses=2] store float 0.000000e+00, float* %1, align 4 %2 = getelementptr inbounds %"class.WebCore::FloatSize"* %roundedOrigin, i64 0, i32 1 ; <float*> [#uses=2] store float 0.000000e+00, float* %2, align 4 %3 = getelementptr inbounds %"class.WebCore::FloatSize"* %roundedLowerRight, i64 0, i32 0 ; <float*> [#uses=2] store float 0.000000e+00, float* %3, align 4 %4 = getelementptr inbounds %"class.WebCore::FloatSize"* %roundedLowerRight, i64 0, i32 1 ; <float*> [#uses=2] store float 0.000000e+00, float* %4, align 4 %5 = getelementptr inbounds %"class.WebCore::GraphicsContext"* %this, i64 0, i32 1 ; <%"class.WebCore::GraphicsContextPlatformPrivate"**> [#uses=1] %6 = load %"class.WebCore::GraphicsContextPlatformPrivate"** %5, align 8 ; <%"class.WebCore::GraphicsContextPlatformPrivate"*> [#uses=1] call void @_ZN7WebCore5mouniEPNS_15GraphicsContextEPNS_30GraphicsContextPlatformPrivateERKNS_9FloatRectERNS_10FloatPointES8_(%"class.WebCore::GraphicsContext"* %this, %"class.WebCore::GraphicsContextPlatformPrivate"* %6, %"struct.WebCore::FloatRect"* %rect, %"class.WebCore::FloatSize"* %roundedOrigin, %"class.WebCore::FloatSize"* %roundedLowerRight) nounwind %7 = load float* %3, align 4 ; <float> [#uses=1] %8 = load float* %1, align 4 ; <float> [#uses=2] %9 = fsub float %7, %8 ; <float> [#uses=1] %10 = load float* %4, align 4 ; <float> [#uses=1] %11 = load float* %2, align 4 ; <float> [#uses=2] %12 = fsub float %10, %11 ; <float> [#uses=1] %13 = insertelement <2 x float> undef, float %8, i32 0 ; <<2 x float>> [#uses=1] %14 = insertelement <2 x float> %13, float %11, i32 1 ; <<2 x float>> [#uses=1] %tmp8 = insertvalue %3 undef, <2 x float> %14, 0 ; <%3> [#uses=1] %15 = insertelement <2 x float> undef, float %9, i32 0 ; <<2 x float>> [#uses=1] %16 = insertelement <2 x float> %15, float %12, i32 1 ; <<2 x float>> [#uses=1] %tmp12 = insertvalue %3 %tmp8, <2 x float> %16, 1 ; <%3> [#uses=1] ret %3 %tmp12 } $ opt -std-compile-opts unopt-fail.ll -o - | llvm-dis -o - [...] define %3 @_ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE(%"class.WebCore::GraphicsContext"* %this, %"struct.WebCore::FloatRect"* %rect) nounwind ssp align 2 { %roundedOrigin = alloca i64, align 8 ; <i64*> [#uses=3] %tmpcast = bitcast i64* %roundedOrigin to %"class.WebCore::FloatSize"* ; <%"class.WebCore::FloatSize"*> [#uses=2] %roundedLowerRight = alloca %"class.WebCore::FloatSize", align 4 ; <%"class.WebCore::FloatSize"*> [#uses=3] %1 = bitcast i64* %roundedOrigin to float* ; <float*> [#uses=2] store float 0.000000e+00, float* %1, align 8 %2 = getelementptr inbounds %"class.WebCore::FloatSize"* %tmpcast, i64 0, i32 1 ; <float*> [#uses=2] store float 0.000000e+00, float* %2, align 4 %3 = getelementptr inbounds %"class.WebCore::FloatSize"* %roundedLowerRight, i64 0, i32 0 ; <float*> [#uses=2] store float 0.000000e+00, float* %3, align 4 %4 = getelementptr inbounds %"class.WebCore::FloatSize"* %roundedLowerRight, i64 0, i32 1 ; <float*> [#uses=2] store float 0.000000e+00, float* %4, align 4 %5 = getelementptr inbounds %"class.WebCore::GraphicsContext"* %this, i64 0, i32 1 ; <%"class.WebCore::GraphicsContextPlatformPrivate"**> [#uses=1] %6 = load %"class.WebCore::GraphicsContextPlatformPrivate"** %5, align 8 ; <%"class.WebCore::GraphicsContextPlatformPrivate"*> [#uses=1] call void @_ZN7WebCore5mouniEPNS_15GraphicsContextEPNS_30GraphicsContextPlatformPrivateERKNS_9FloatRectERNS_10FloatPointES8_(%"class.WebCore::GraphicsContext"* %this, %"class.WebCore::GraphicsContextPlatformPrivate"* %6, %"struct.WebCore::FloatRect"* %rect, %"class.WebCore::FloatSize"* %tmpcast, %"class.WebCore::FloatSize"* %roundedLowerRight) nounwind %7 = load float* %3, align 4 ; <float> [#uses=1] %8 = load float* %1, align 8 ; <float> [#uses=1] %9 = fsub float %7, %8 ; <float> [#uses=1] %10 = load float* %4, align 4 ; <float> [#uses=1] %11 = load float* %2, align 4 ; <float> [#uses=1] %12 = fsub float %10, %11 ; <float> [#uses=1] %tmp3.i = insertelement <2 x float> undef, float %9, i32 0 ; <<2 x float>> [#uses=1] %tmp1.i = insertelement <2 x float> %tmp3.i, float %12, i32 1 ; <<2 x float>> [#uses=1] %tmp = bitcast <2 x float> %tmp1.i to i64 ; <i64> [#uses=1] %tmp.i.i = load i64* %roundedOrigin, align 8 ; <i64> [#uses=1] %tmp9 = insertelement <1 x i64> undef, i64 %tmp.i.i, i32 0 ; <<1 x i64>> [#uses=1] %tmp6 = insertelement <1 x i64> undef, i64 %tmp, i32 0 ; <<1 x i64>> [#uses=1] %tmp11 = bitcast <1 x i64> %tmp9 to <2 x float> ; <<2 x float>> [#uses=1] %insert = insertvalue %3 undef, <2 x float> %tmp11, 0 ; <%3> [#uses=1] %tmp8 = bitcast <1 x i64> %tmp6 to <2 x float> ; <<2 x float>> [#uses=1] %insert4 = insertvalue %3 %insert, <2 x float> %tmp8, 1 ; <%3> [#uses=1] ret %3 %insert4 } Just to be clear, are you saying that the fact that, after using llc on the second IR, the produced asm is using MM registers, indicates a bug ? -Argiris On Aug 31, 2010, at 7:51 PM, Dale Johannesen wrote:> Using MM registers is wrong unless the user has specifically asked for it, which doesn't seem to be the case here. > In the awesome MMX architecture, touching an MM register makes subsequent x87 operations fail unless an EMMS instruction is issued first; none of the compilers here are smart enough to insert EMMS instructions in the right places, so the only safe thing is not to use these registers. There is no x87 instruction shown here, but you've probably got one in the full test suite and not in the test by itself, which fits your data. > > Why this is happening is not immediately clear. It looks like the successful code is doing an aggregate copy field-by-field while the failing code has lowered this to a memcpy. I would certainly expect the memcpy expansion to be smart enough to avoid using MM registers, though; that's a serious bug if it isn't. > > movd %xmm0, %rax > movd %rax, %mm0 > movq2dq %mm0, %xmm1 > movq2dq %mm0, %xmm2 > punpcklqdq %xmm2, %xmm1 ## xmm1 = xmm1[0],xmm2[0] > movq 16(%rsp), %rax > movd %rax, %mm0 > movq2dq %mm0, %xmm0 > punpcklqdq %xmm2, %xmm0 ## xmm0 = xmm0[0],xmm2[0] > > > On Aug 31, 2010, at 11:18 AMPDT, Argyrios Kyrtzidis wrote: > >> Hi, >> >> I've attached 2 .ll files which are supposed to be equivalent but 'unopt-fail.ll' causes a crash in webkit's test suite while 'unopt-pass.ll' does not. I can't give more details about the crash, when I run the crashing test it in isolation it passes, when I run the full suite it crashes; it boggles the mind. >> >> Below I provide the optimized asm that is produced from each file. Could you give a hint on what is the problem ? >> I also attached 't.cpp' which approximates the source that the .ll files came from. >> >> -Argiris >> >> > <unopt-fail.ll><unopt-pass.ll><t.cpp> >> >> >> >> >> $ opt -std-compile-opts unopt-pass.ll -o - | llc -o - >> >> .section __TEXT,__text,regular,pure_instructions >> .globl __ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE >> .align 4, 0x90 >> __ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE: ## @_ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE >> ## BB#0: >> subq $24, %rsp >> movq %rsi, %rdx >> movl $0, 16(%rsp) >> movl $0, 20(%rsp) >> movl $0, 8(%rsp) >> movl $0, 12(%rsp) >> movq 8(%rdi), %rsi >> leaq 16(%rsp), %rcx >> leaq 8(%rsp), %r8 >> callq __ZN7WebCore5mouniEPNS_15GraphicsContextEPNS_30GraphicsContextPlatformPrivateERKNS_9FloatRectERNS_10FloatPointES8_ >> movss 8(%rsp), %xmm1 >> movss 12(%rsp), %xmm0 >> subss 20(%rsp), %xmm0 >> subss 16(%rsp), %xmm1 >> ## kill: XMM1<def> XMM1<kill> XMM1<def> >> insertps $16, %xmm0, %xmm1 ## xmm1 = xmm1[0],xmm0[0],xmm1[2,3] >> movq 16(%rsp), %xmm0 >> addq $24, %rsp >> ret >> >> >> $ opt -std-compile-opts unopt-fail.ll -o - | llc -o - >> >> .section __TEXT,__text,regular,pure_instructions >> .globl __ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE >> .align 4, 0x90 >> __ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE: ## @_ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE >> ## BB#0: >> subq $24, %rsp >> movq %rsi, %rdx >> movl $0, 16(%rsp) >> movl $0, 20(%rsp) >> movl $0, 8(%rsp) >> movl $0, 12(%rsp) >> movq 8(%rdi), %rsi >> leaq 16(%rsp), %rcx >> leaq 8(%rsp), %r8 >> callq __ZN7WebCore5mouniEPNS_15GraphicsContextEPNS_30GraphicsContextPlatformPrivateERKNS_9FloatRectERNS_10FloatPointES8_ >> movss 8(%rsp), %xmm0 >> movss 12(%rsp), %xmm1 >> subss 20(%rsp), %xmm1 >> subss 16(%rsp), %xmm0 >> insertps $16, %xmm1, %xmm0 ## xmm0 = xmm0[0],xmm1[0],xmm0[2,3] >> movd %xmm0, %rax >> movd %rax, %mm0 >> movq2dq %mm0, %xmm1 >> movq2dq %mm0, %xmm2 >> punpcklqdq %xmm2, %xmm1 ## xmm1 = xmm1[0],xmm2[0] >> movq 16(%rsp), %rax >> movd %rax, %mm0 >> movq2dq %mm0, %xmm0 >> punpcklqdq %xmm2, %xmm0 ## xmm0 = xmm0[0],xmm2[0] >> addq $24, %rsp >> ret >> >> _______________________________________________ >> LLVM Developers mailing list >> LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev >
Dale Johannesen
2010-Aug-31 20:29 UTC
[LLVMdev] "equivalent" .ll files diverge after optimizations are applied
On Aug 31, 2010, at 1:21 PMPDT, Argyrios Kyrtzidis wrote:> > Just to be clear, are you saying that the fact that, after using llc > on the second IR, the produced asm is using MM registers, indicates > a bug ?Yes. It's not immediately obvious whether it's in the opt or llc, though. Chris was doing work involving <2 x float> and may know about this.> -Argiris > > > On Aug 31, 2010, at 7:51 PM, Dale Johannesen wrote: > >> Using MM registers is wrong unless the user has specifically asked >> for it, which doesn't seem to be the case here. >> In the awesome MMX architecture, touching an MM register makes >> subsequent x87 operations fail unless an EMMS instruction is issued >> first; none of the compilers here are smart enough to insert EMMS >> instructions in the right places, so the only safe thing is not to >> use these registers. There is no x87 instruction shown here, but >> you've probably got one in the full test suite and not in the test >> by itself, which fits your data. >> >> Why this is happening is not immediately clear. It looks like the >> successful code is doing an aggregate copy field-by-field while the >> failing code has lowered this to a memcpy. I would certainly >> expect the memcpy expansion to be smart enough to avoid using MM >> registers, though; that's a serious bug if it isn't. >> >> movd %xmm0, %rax >> movd %rax, %mm0 >> movq2dq %mm0, %xmm1 >> movq2dq %mm0, %xmm2 >> punpcklqdq %xmm2, %xmm1 ## xmm1 = xmm1[0],xmm2[0] >> movq 16(%rsp), %rax >> movd %rax, %mm0 >> movq2dq %mm0, %xmm0 >> punpcklqdq %xmm2, %xmm0 ## xmm0 = xmm0[0],xmm2[0] >> >> >> On Aug 31, 2010, at 11:18 AMPDT, Argyrios Kyrtzidis wrote: >> >>> Hi, >>> >>> I've attached 2 .ll files which are supposed to be equivalent but >>> 'unopt-fail.ll' causes a crash in webkit's test suite while 'unopt- >>> pass.ll' does not. I can't give more details about the crash, when >>> I run the crashing test it in isolation it passes, when I run the >>> full suite it crashes; it boggles the mind. >>> >>> Below I provide the optimized asm that is produced from each file. >>> Could you give a hint on what is the problem ? >>> I also attached 't.cpp' which approximates the source that the .ll >>> files came from. >>> >>> -Argiris >>> >>> >> <unopt-fail.ll><unopt-pass.ll><t.cpp> >>> >>> >>> >>> >>> $ opt -std-compile-opts unopt-pass.ll -o - | llc -o - >>> >>> .section __TEXT,__text,regular,pure_instructions >>> .globl >>> __ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE >>> .align 4, 0x90 >>> __ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE >>> : ## >>> @_ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE >>> ## BB#0: >>> subq $24, %rsp >>> movq %rsi, %rdx >>> movl $0, 16(%rsp) >>> movl $0, 20(%rsp) >>> movl $0, 8(%rsp) >>> movl $0, 12(%rsp) >>> movq 8(%rdi), %rsi >>> leaq 16(%rsp), %rcx >>> leaq 8(%rsp), %r8 >>> callq >>> __ZN7WebCore5mouniEPNS_15GraphicsContextEPNS_30GraphicsContextPlatformPrivateERKNS_9FloatRectERNS_10FloatPointES8_ >>> movss 8(%rsp), %xmm1 >>> movss 12(%rsp), %xmm0 >>> subss 20(%rsp), %xmm0 >>> subss 16(%rsp), %xmm1 >>> ## kill: XMM1<def> XMM1<kill> >>> XMM1<def> >>> insertps $16, %xmm0, %xmm1 ## xmm1 = xmm1[0],xmm0[0],xmm1[2,3] >>> movq 16(%rsp), %xmm0 >>> addq $24, %rsp >>> ret >>> >>> >>> $ opt -std-compile-opts unopt-fail.ll -o - | llc -o - >>> >>> .section __TEXT,__text,regular,pure_instructions >>> .globl >>> __ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE >>> .align 4, 0x90 >>> __ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE >>> : ## >>> @_ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE >>> ## BB#0: >>> subq $24, %rsp >>> movq %rsi, %rdx >>> movl $0, 16(%rsp) >>> movl $0, 20(%rsp) >>> movl $0, 8(%rsp) >>> movl $0, 12(%rsp) >>> movq 8(%rdi), %rsi >>> leaq 16(%rsp), %rcx >>> leaq 8(%rsp), %r8 >>> callq >>> __ZN7WebCore5mouniEPNS_15GraphicsContextEPNS_30GraphicsContextPlatformPrivateERKNS_9FloatRectERNS_10FloatPointES8_ >>> movss 8(%rsp), %xmm0 >>> movss 12(%rsp), %xmm1 >>> subss 20(%rsp), %xmm1 >>> subss 16(%rsp), %xmm0 >>> insertps $16, %xmm1, %xmm0 ## xmm0 = xmm0[0],xmm1[0],xmm0[2,3] >>> movd %xmm0, %rax >>> movd %rax, %mm0 >>> movq2dq %mm0, %xmm1 >>> movq2dq %mm0, %xmm2 >>> punpcklqdq %xmm2, %xmm1 ## xmm1 = xmm1[0],xmm2[0] >>> movq 16(%rsp), %rax >>> movd %rax, %mm0 >>> movq2dq %mm0, %xmm0 >>> punpcklqdq %xmm2, %xmm0 ## xmm0 = xmm0[0],xmm2[0] >>> addq $24, %rsp >>> ret >>> >>> _______________________________________________ >>> LLVM Developers mailing list >>> LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu >>> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev >> >
Chris Lattner
2010-Aug-31 21:20 UTC
[LLVMdev] "equivalent" .ll files diverge after optimizations are applied
On Aug 31, 2010, at 1:29 PM, Dale Johannesen wrote:> > On Aug 31, 2010, at 1:21 PMPDT, Argyrios Kyrtzidis wrote: >> >> Just to be clear, are you saying that the fact that, after using llc >> on the second IR, the produced asm is using MM registers, indicates >> a bug ? > > Yes. It's not immediately obvious whether it's in the opt or llc, > though. > Chris was doing work involving <2 x float> and may know about this.I did. <2 x float> doesn't use MMX, but <2 x int> probably does. It is possible that hte optimizer is turning <2 x float> operations into <2 x int> ones or something... -Chris
Reasonably Related Threads
- [LLVMdev] "equivalent" .ll files diverge after optimizations are applied
- [LLVMdev] "equivalent" .ll files diverge after optimizations are applied
- [LLVMdev] "equivalent" .ll files diverge after optimizations are applied
- [PATCH] Make SSE Run Time option. Add Win32 SSE code
- [LLVMdev] LLVM Build Bot failure on llmv-x86_64-ubuntu