Kévin Szkudlapski
2013-Aug-19 19:21 UTC
[LLVMdev] Issue with X86FrameLowering __chkstk on Windows 8 64-bit / Visual Studio 2012
Hi, I'm using LLVM to convert expressions to native assembly, the problem is when LLVM compiles this code: define void @fn_0000000000000000(i8*, i8*, i8*) { bb: %res = alloca i32 %3 = load i32* %res %4 = bitcast i8* %0 to i32* %5 = load i32* %4 %6 = bitcast i8* %0 to i32* %7 = load i32* %6 %8 = xor i32 %5, %7 store volatile i32 %8, i32* %res %9 = load i32* %res %10 = icmp eq i32 %9, 0 br i1 %10, label %then, label %else merged: ; preds = %else, %then %11 = load i32* %res %12 = and i32 %11, -2147483648 %13 = icmp eq i32 %12, 0 br i1 %13, label %then3, label %else4 then: ; preds = %bb %zf = alloca i1 %14 = load i1* %zf %15 = getelementptr i8* %0, i32 148 %16 = bitcast i8* %15 to i1* %17 = load i1* %16 store volatile i1 true, i1* %16 br label %merged else: ; preds = %bb %zf1 = alloca i1 %18 = load i1* %zf1 %19 = getelementptr i8* %0, i32 148 %20 = bitcast i8* %19 to i1* %21 = load i1* %20 store volatile i1 false, i1* %20 br label %merged merged2: ; preds = %else4, %then3 %22 = bitcast i8* %0 to i32* %23 = load i32* %22 %24 = load i32* %res store volatile i32 %24, i32* %22 %af = alloca i1 %25 = load i1* %af %26 = getelementptr i8* %0, i32 148 %27 = bitcast i8* %26 to i1* %28 = load i1* %27 store volatile i1 false, i1* %27 %of = alloca i1 %29 = load i1* %of %30 = getelementptr i8* %0, i32 148 %31 = bitcast i8* %30 to i1* %32 = load i1* %31 store volatile i1 false, i1* %31 %cf = alloca i1 %33 = load i1* %cf %34 = getelementptr i8* %0, i32 148 %35 = bitcast i8* %34 to i1* %36 = load i1* %35 store volatile i1 false, i1* %35 %37 = getelementptr i8* %0, i32 64 %38 = bitcast i8* %37 to i32* %39 = load i32* %38 %40 = getelementptr i8* %0, i32 64 %41 = bitcast i8* %40 to i32* %42 = load i32* %41 %43 = add i32 %42, 2 store volatile i32 %43, i32* %38 ret void then3: ; preds = %merged %sf = alloca i1 %44 = load i1* %sf %45 = getelementptr i8* %0, i32 148 %46 = bitcast i8* %45 to i1* %47 = load i1* %46 store volatile i1 false, i1* %46 br label %merged2 else4: ; preds = %merged %sf5 = alloca i1 %48 = load i1* %sf5 %49 = getelementptr i8* %0, i32 148 %50 = bitcast i8* %49 to i1* %51 = load i1* %50 store volatile i1 true, i1* %50 br label %merged2 } It generates the following assembly: 0000000581D30010 push rbp 0000000581D30011 mov rbp,rsp 0000000581D30014 sub rsp,10h 0000000581D30018 mov dword ptr [rbp-4],0 0000000581D3001F mov al,1 0000000581D30021 test al,al 0000000581D30023 jne 0000000581D30042 0000000581D30029 mov eax,10h 0000000581D3002E call 00000005F08425D0 0000000581D30033 sub rsp,rax 0000000581D30036 mov byte ptr [rcx+94h],0 0000000581D3003D jmp 0000000581D30056 0000000581D30042 mov eax,10h 0000000581D30047 call 00000005F08425D0 0000000581D3004C sub rsp,rax 0000000581D3004F mov byte ptr [rcx+94h],1 0000000581D30056 test byte ptr [rbp-1],80h 0000000581D3005A je 0000000581D30079 0000000581D30060 mov eax,10h 0000000581D30065 call 00000005F08425D0 0000000581D3006A sub rsp,rax 0000000581D3006D mov byte ptr [rcx+94h],1 0000000581D30074 jmp 0000000581D3008D 0000000581D30079 mov eax,10h 0000000581D3007E call 00000005F08425D0 0000000581D30083 sub rsp,rax 0000000581D30086 mov byte ptr [rcx+94h],0 0000000581D3008D mov eax,dword ptr [rbp-4] 0000000581D30090 mov dword ptr [rcx],eax 0000000581D30092 mov eax,10h 0000000581D30097 call 00000005F08425D0 0000000581D3009C sub rsp,rax 0000000581D3009F mov byte ptr [rcx+94h],0 0000000581D300A6 mov eax,10h 0000000581D300AB call 00000005F08425D0 0000000581D300B0 sub rsp,rax 0000000581D300B3 mov byte ptr [rcx+94h],0 0000000581D300BA mov eax,10h 0000000581D300BF call 00000005F08425D0 0000000581D300C4 sub rsp,rax 0000000581D300C7 mov byte ptr [rcx+94h],0 0000000581D300CE add dword ptr [rcx+40h],2 0000000581D300D2 mov rsp,rbp 0000000581D300D5 pop rbp 0000000581D300D6 ret The function located at 0x00000005F08425D0 is not valid (according to visual studio: 00000005F08425D0 ?? ??). If I compile LLVM bytecode using llc, this function is __chkstk: .def fn_0000000000000000; .scl 2; .type 32; .endef .text .globl fn_0000000000000000 .align 16, 0x90 fn_0000000000000000: # @fn_0000000000000000 # BB#0: # %bb push rbp mov rbp, rsp sub rsp, 16 mov dword ptr [rbp - 4], 0 mov al, 1 test al, al jne .LBB0_1 # BB#2: # %else mov eax, 16 call __chkstk sub rsp, rax mov byte ptr [rcx + 148], 0 jmp .LBB0_3 .LBB0_1: # %then mov eax, 16 call __chkstk sub rsp, rax mov byte ptr [rcx + 148], 1 .LBB0_3: # %merged test byte ptr [rbp - 1], -128 je .LBB0_4 # BB#5: # %else4 mov eax, 16 call __chkstk sub rsp, rax mov byte ptr [rcx + 148], 1 jmp .LBB0_6 .LBB0_4: # %then3 mov eax, 16 call __chkstk sub rsp, rax mov byte ptr [rcx + 148], 0 .LBB0_6: # %merged2 mov eax, dword ptr [rbp - 4] mov dword ptr [rcx], eax mov eax, 16 call __chkstk sub rsp, rax mov byte ptr [rcx + 148], 0 mov eax, 16 call __chkstk sub rsp, rax mov byte ptr [rcx + 148], 0 mov eax, 16 call __chkstk sub rsp, rax mov byte ptr [rcx + 148], 0 add dword ptr [rcx + 64], 2 mov rsp, rbp pop rbp ret It seems this issue has already been described here https://groups.google.com/forum/#!topic/llvm-commit/htNjwbWsNe8 I'm using this code https://github.com/wisk/medusa/blob/master/src/emul/llvm/llvm_emulator.cpp which is pretty basic. Please, tell me if you need further information about this issue. -- Kevin Szkudlapski
Michael Lewis
2013-Aug-27 03:18 UTC
[LLVMdev] Issue with X86FrameLowering __chkstk on Windows 8 64-bit / Visual Studio 2012
It's not a solution to the actual bug (which is, as the thread you linked discusses, a problem with the assumption on LLVM's part that the __chkstk function lies within 2GB of the emitted code's address space) but there is a simple workaround: hoist all allocas to the first basic block of your function. This allows the JIT to perform all stack allocations in a single adjustment of the SP instead of needing to use dynamic stack allocation, and thereby avoids the call to __chkstk entirely. On Mon, Aug 19, 2013 at 12:21 PM, Kévin Szkudlapski <szkudl.k at gmail.com>wrote:> Hi, > > I'm using LLVM to convert expressions to native assembly, the problem > is when LLVM compiles this code: > > define void @fn_0000000000000000(i8*, i8*, i8*) { > bb: > %res = alloca i32 > %3 = load i32* %res > %4 = bitcast i8* %0 to i32* > %5 = load i32* %4 > %6 = bitcast i8* %0 to i32* > %7 = load i32* %6 > %8 = xor i32 %5, %7 > store volatile i32 %8, i32* %res > %9 = load i32* %res > %10 = icmp eq i32 %9, 0 > br i1 %10, label %then, label %else > > merged: ; preds = %else, %then > %11 = load i32* %res > %12 = and i32 %11, -2147483648 > %13 = icmp eq i32 %12, 0 > br i1 %13, label %then3, label %else4 > > then: ; preds = %bb > %zf = alloca i1 > %14 = load i1* %zf > %15 = getelementptr i8* %0, i32 148 > %16 = bitcast i8* %15 to i1* > %17 = load i1* %16 > store volatile i1 true, i1* %16 > br label %merged > > else: ; preds = %bb > %zf1 = alloca i1 > %18 = load i1* %zf1 > %19 = getelementptr i8* %0, i32 148 > %20 = bitcast i8* %19 to i1* > %21 = load i1* %20 > store volatile i1 false, i1* %20 > br label %merged > > merged2: ; preds = %else4, %then3 > %22 = bitcast i8* %0 to i32* > %23 = load i32* %22 > %24 = load i32* %res > store volatile i32 %24, i32* %22 > %af = alloca i1 > %25 = load i1* %af > %26 = getelementptr i8* %0, i32 148 > %27 = bitcast i8* %26 to i1* > %28 = load i1* %27 > store volatile i1 false, i1* %27 > %of = alloca i1 > %29 = load i1* %of > %30 = getelementptr i8* %0, i32 148 > %31 = bitcast i8* %30 to i1* > %32 = load i1* %31 > store volatile i1 false, i1* %31 > %cf = alloca i1 > %33 = load i1* %cf > %34 = getelementptr i8* %0, i32 148 > %35 = bitcast i8* %34 to i1* > %36 = load i1* %35 > store volatile i1 false, i1* %35 > %37 = getelementptr i8* %0, i32 64 > %38 = bitcast i8* %37 to i32* > %39 = load i32* %38 > %40 = getelementptr i8* %0, i32 64 > %41 = bitcast i8* %40 to i32* > %42 = load i32* %41 > %43 = add i32 %42, 2 > store volatile i32 %43, i32* %38 > ret void > > then3: ; preds = %merged > %sf = alloca i1 > %44 = load i1* %sf > %45 = getelementptr i8* %0, i32 148 > %46 = bitcast i8* %45 to i1* > %47 = load i1* %46 > store volatile i1 false, i1* %46 > br label %merged2 > > else4: ; preds = %merged > %sf5 = alloca i1 > %48 = load i1* %sf5 > %49 = getelementptr i8* %0, i32 148 > %50 = bitcast i8* %49 to i1* > %51 = load i1* %50 > store volatile i1 true, i1* %50 > br label %merged2 > } > > > It generates the following assembly: > 0000000581D30010 push rbp > 0000000581D30011 mov rbp,rsp > 0000000581D30014 sub rsp,10h > 0000000581D30018 mov dword ptr [rbp-4],0 > 0000000581D3001F mov al,1 > 0000000581D30021 test al,al > 0000000581D30023 jne 0000000581D30042 > 0000000581D30029 mov eax,10h > 0000000581D3002E call 00000005F08425D0 > 0000000581D30033 sub rsp,rax > 0000000581D30036 mov byte ptr [rcx+94h],0 > 0000000581D3003D jmp 0000000581D30056 > 0000000581D30042 mov eax,10h > 0000000581D30047 call 00000005F08425D0 > 0000000581D3004C sub rsp,rax > 0000000581D3004F mov byte ptr [rcx+94h],1 > 0000000581D30056 test byte ptr [rbp-1],80h > 0000000581D3005A je 0000000581D30079 > 0000000581D30060 mov eax,10h > 0000000581D30065 call 00000005F08425D0 > 0000000581D3006A sub rsp,rax > 0000000581D3006D mov byte ptr [rcx+94h],1 > 0000000581D30074 jmp 0000000581D3008D > 0000000581D30079 mov eax,10h > 0000000581D3007E call 00000005F08425D0 > 0000000581D30083 sub rsp,rax > 0000000581D30086 mov byte ptr [rcx+94h],0 > 0000000581D3008D mov eax,dword ptr [rbp-4] > 0000000581D30090 mov dword ptr [rcx],eax > 0000000581D30092 mov eax,10h > 0000000581D30097 call 00000005F08425D0 > 0000000581D3009C sub rsp,rax > 0000000581D3009F mov byte ptr [rcx+94h],0 > 0000000581D300A6 mov eax,10h > 0000000581D300AB call 00000005F08425D0 > 0000000581D300B0 sub rsp,rax > 0000000581D300B3 mov byte ptr [rcx+94h],0 > 0000000581D300BA mov eax,10h > 0000000581D300BF call 00000005F08425D0 > 0000000581D300C4 sub rsp,rax > 0000000581D300C7 mov byte ptr [rcx+94h],0 > 0000000581D300CE add dword ptr [rcx+40h],2 > 0000000581D300D2 mov rsp,rbp > 0000000581D300D5 pop rbp > 0000000581D300D6 ret > > The function located at 0x00000005F08425D0 is not valid (according to > visual studio: 00000005F08425D0 ?? ??). > > If I compile LLVM bytecode using llc, this function is __chkstk: > .def fn_0000000000000000; > .scl 2; > .type 32; > .endef > .text > .globl fn_0000000000000000 > .align 16, 0x90 > fn_0000000000000000: # @fn_0000000000000000 > # BB#0: # %bb > push rbp > mov rbp, rsp > sub rsp, 16 > mov dword ptr [rbp - 4], 0 > mov al, 1 > test al, al > jne .LBB0_1 > # BB#2: # %else > mov eax, 16 > call __chkstk > sub rsp, rax > mov byte ptr [rcx + 148], 0 > jmp .LBB0_3 > .LBB0_1: # %then > mov eax, 16 > call __chkstk > sub rsp, rax > mov byte ptr [rcx + 148], 1 > .LBB0_3: # %merged > test byte ptr [rbp - 1], -128 > je .LBB0_4 > # BB#5: # %else4 > mov eax, 16 > call __chkstk > sub rsp, rax > mov byte ptr [rcx + 148], 1 > jmp .LBB0_6 > .LBB0_4: # %then3 > mov eax, 16 > call __chkstk > sub rsp, rax > mov byte ptr [rcx + 148], 0 > .LBB0_6: # %merged2 > mov eax, dword ptr [rbp - 4] > mov dword ptr [rcx], eax > mov eax, 16 > call __chkstk > sub rsp, rax > mov byte ptr [rcx + 148], 0 > mov eax, 16 > call __chkstk > sub rsp, rax > mov byte ptr [rcx + 148], 0 > mov eax, 16 > call __chkstk > sub rsp, rax > mov byte ptr [rcx + 148], 0 > add dword ptr [rcx + 64], 2 > mov rsp, rbp > pop rbp > ret > > It seems this issue has already been described here > https://groups.google.com/forum/#!topic/llvm-commit/htNjwbWsNe8 > > I'm using this code > https://github.com/wisk/medusa/blob/master/src/emul/llvm/llvm_emulator.cpp > which is pretty basic. > > Please, tell me if you need further information about this issue. > > -- > Kevin Szkudlapski > _______________________________________________ > LLVM Developers mailing list > LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev >-------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20130826/0339e342/attachment.html>
Kévin Szkudlapski
2013-Sep-01 19:37 UTC
[LLVMdev] Issue with X86FrameLowering __chkstk on Windows 8 64-bit / Visual Studio 2012
Hi Michael, On Tue, Aug 27, 2013 at 5:18 AM, Michael Lewis <don.apoch at gmail.com> wrote:> It's not a solution to the actual bug (which is, as the thread you linked > discusses, a problem with the assumption on LLVM's part that the __chkstk > function lies within 2GB of the emitted code's address space) but there is a > simple workaround: hoist all allocas to the first basic block of your > function. This allows the JIT to perform all stack allocations in a single > adjustment of the SP instead of needing to use dynamic stack allocation, and > thereby avoids the call to __chkstk entirely.Thanks for you answer, I redesigned my code and it's look ok now. However, I tried to insert a breakpoint at ``if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho())'' but the condition is never met. So I'm still wondering where, in LLVM, the call to __chkstk is emitted. If anybody has an idea, please let me know. :) -- Kevin Szkudlapski
Anton Korobeynikov
2013-Sep-01 20:10 UTC
[LLVMdev] Issue with X86FrameLowering __chkstk on Windows 8 64-bit / Visual Studio 2012
> It's not a solution to the actual bug (which is, as the thread you linked > discusses, a problem with the assumption on LLVM's part that the __chkstk > function lies within 2GB of the emitted code's address space) but there is a > simple workaround: hoist all allocas to the first basic block of your > function. This allows the JIT to perform all stack allocations in a single > adjustment of the SP instead of needing to use dynamic stack allocation, and > thereby avoids the call to __chkstk entirely.__chkstk is not connected with dynamic stack allocation at all, in general. On Windows when one allocates more than 1 page (4kb) of stack, it is necessary to touch all all the allocated space in order to ensure the proper order of guard page allocation. Surely, it's always required for dynamic stack allocation, because the amount of allocation is not known in advance, however, it can be triggered for static code as well. Consider e.g. void bar(int*); void baz() { int foo[2000]; bar(foo); } -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University
Maybe Matching Threads
- [LLVMdev] Issue with X86FrameLowering __chkstk on Windows 8 64-bit / Visual Studio 2012
- [LLVMdev] Question regarding basic-block placement optimization
- LLVM behavior different depending on function symbol name
- LLVM behavior different depending on function symbol name
- [LLVMdev] Question regarding basic-block placement optimization