Hi Lang,
I have attached the patch to this mail for your consideration.
Notes:
-I basically just added OrcX86_64_Win32 with the custom resolver code for
windows. All the other static methods simply relay to their OrcX86_64
versions (therefore there is no need for any more refactoring).
-The asm code in my inital post doesnt work in release mode, because I
forgot to account for shadow space allocation on stack. The code in the
attached patch has that fixed.
-I decided to name the support class OrcX86_64_Win32 in order to be
consistent with Triple::OSType::Win32.
-I didnt rename any of the other classes in OrcArchitectureSupport, as I
didnt feel comfortable touching those. I guess renaming those is a quick
thing to do.
-In lli, I made changes to stub and compilecallback creation functions
accordingly. Here I test for the os type in triple and return the win32
support class if required. There might be other tools/places I am not aware
of where this needs to be done.
-I didnt ran any tests apart from my little sandbox example.
Best,
David
On Thu, May 5, 2016 at 3:09 AM, David <dk402538 at googlemail.com> wrote:
> Hi Lang,
> I also agree that the second option should probably be the way to go. I
> will give it a shot and submit a patch.
> Best,
> David
>
> On Wed, May 4, 2016 at 11:39 PM, Lang Hames <lhames at gmail.com>
wrote:
>
>> Hi David,
>>
>> This is really cool. I'd love to get this in-tree.
>>
>> There are two ways we could go about this:
>>
>> (1) Make the OrcArchitecture interface ABI-aware so that it can choose
>> the right resolver code,
>> or
>> (2) Replace the OrcArchitecture classes with OrcABI classes. I.e.
We'd
>> just a rename OrcX86_64 -> Orc_X86_64_SysV (and rename I386 &
AArch64
>> similarly) , then we add your code as Orc_X86_64_Windows.
>>
>> I think the second is probably the way to go, with a little refactoring
>> so that the various X86 ABIs could share the stub and resolver code.
>>
>> Any interest in submitting a patch?
>>
>> - Lang.
>>
>>
>>
>>
>> On Wed, May 4, 2016 at 11:21 AM, David Blaikie <dblaikie at
gmail.com>
>> wrote:
>>
>>> +Lang, JIT Cowboy
>>>
>>> On Wed, May 4, 2016 at 11:17 AM, David via llvm-dev <
>>> llvm-dev at lists.llvm.org> wrote:
>>>
>>>> Hi There,
>>>>
>>>> I am currently exploring C++ JIT-compilation for a project
where this
>>>> would be very useful. I started with the code from the lli tool
which uses
>>>> OrcLazyJIT and changed it, such that the module is being
compiled from c++
>>>> source in memory and OrcLazyJIT is used exclusively.
>>>>
>>>> Now since I am on windows, I found that my application is
crashing when
>>>> trying to run the main function from the jit-compiled module (
which was
>>>> found by casting the symbol address to the main prototype). Now
after some
>>>> digging I found that the crash is caused by
>>>> LocalJITCompileCallbackManager::reenter not getting the correct
>>>> CompileCallback and trampolineid references. This in turn is
being caused by
>>>> OrcX86_64::writeResolverCode not respecting windows calling
convention
>>>> in the asm code for calling the reentry function.
>>>>
>>>> After making changes to the asm code in
OrcX86_64::writeResolverCode,
>>>> the code runs without any problems. I thought I share it here
with the
>>>> public so that others who would like to use orclazyjit on
windows could
>>>> benefit. Please let me know if a different channel would be
more
>>>> appropriate.
>>>>
>>>> Best,
>>>> David
>>>>
>>>> In order to get OrcLazyJIT to work under windows, replace the
prebaked
>>>> asm code in OrcX86_64::writeResolverCode in file
>>>> llvm/lib/ExecutionEngine/Orc/OrcAchitectureSupport.cpp with the
following.
>>>> Note that more work is needed to both support linux/windows but
I am not
>>>> sure how this is best dealt with in llvm.
>>>>
>>>>
>>>> // windows (arguments go to rcx and rdx and have reversed
order)---
>>>>
>>>> const uint8_t ResolverCode[] = {
>>>>
>>>> // resolver_entry:
>>>>
>>>> 0x55, // 0x00: pushq
%rbp
>>>>
>>>> 0x48, 0x89, 0xe5, // 0x01: movq
%rsp, %rbp
>>>>
>>>> 0x50, // 0x04: pushq
%rax
>>>>
>>>> 0x53, // 0x05: pushq
%rbx
>>>>
>>>> 0x51, // 0x06: pushq
%rcx
>>>>
>>>> 0x52, // 0x07: pushq
%rdx
>>>>
>>>> 0x56, // 0x08: pushq
%rsi
>>>>
>>>> 0x57, // 0x09: pushq
%rdi
>>>>
>>>> 0x41, 0x50, // 0x0a: pushq
%r8
>>>>
>>>> 0x41, 0x51, // 0x0c: pushq
%r9
>>>>
>>>> 0x41, 0x52, // 0x0e: pushq
%r10
>>>>
>>>> 0x41, 0x53, // 0x10: pushq
%r11
>>>>
>>>> 0x41, 0x54, // 0x12: pushq
%r12
>>>>
>>>> 0x41, 0x55, // 0x14: pushq
%r13
>>>>
>>>> 0x41, 0x56, // 0x16: pushq
%r14
>>>>
>>>> 0x41, 0x57, // 0x18: pushq
%r15
>>>>
>>>> 0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq
0x208, %rsp
>>>>
>>>> 0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64
(%rsp)
>>>>
>>>>
>>>>
>>>> 0x48, 0xb9, // 0x26: movabsq
<CBMgr>, %rcx
>>>>
>>>> // 0x28: Callback manager addr.
>>>>
>>>> 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
>>>>
>>>>
>>>>
>>>> 0x48, 0x8B, 0x55, 0x08, // mov rdx,QWORD
PTR [rbp+0x8]
>>>>
>>>> 0x48, 0x83, 0xea, 0x06, // sub rdx,0x6
>>>>
>>>>
>>>>
>>>> 0x48, 0xb8, // 0x38: movabsq
<REntry>, %rax
>>>>
>>>> // 0x3a: JIT re-entry fn addr:
>>>>
>>>> 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
>>>>
>>>>
>>>> 0xff, 0xd0, // 0x42: callq
*%rax
>>>>
>>>> 0x48, 0x89, 0x45, 0x08, // 0x44: movq
%rax, 8(%rbp)
>>>>
>>>> 0x48, 0x0f, 0xae, 0x0c, 0x24, // 0x48: fxrstor64
(%rsp)
>>>>
>>>> 0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x4d: addq
0x208, %rsp
>>>>
>>>> 0x41, 0x5f, // 0x54: popq
%r15
>>>>
>>>> 0x41, 0x5e, // 0x56: popq
%r14
>>>>
>>>> 0x41, 0x5d, // 0x58: popq
%r13
>>>>
>>>> 0x41, 0x5c, // 0x5a: popq
%r12
>>>>
>>>> 0x41, 0x5b, // 0x5c: popq
%r11
>>>>
>>>> 0x41, 0x5a, // 0x5e: popq
%r10
>>>>
>>>> 0x41, 0x59, // 0x60: popq
%r9
>>>>
>>>> 0x41, 0x58, // 0x62: popq
%r8
>>>>
>>>> 0x5f, // 0x64: popq
%rdi
>>>>
>>>> 0x5e, // 0x65: popq
%rsi
>>>>
>>>> 0x5a, // 0x66: popq
%rdx
>>>>
>>>> 0x59, // 0x67: popq
%rcx
>>>>
>>>> 0x5b, // 0x68: popq
%rbx
>>>>
>>>> 0x58, // 0x69: popq
%rax
>>>>
>>>> 0x5d, // 0x6a: popq
%rbp
>>>>
>>>> 0xc3, // 0x6b: retq
>>>>
>>>> };
>>>>
>>>>
>>>> const unsigned ReentryFnAddrOffset = 0x3a;
>>>>
>>>> const unsigned CallbackMgrAddrOffset = 0x28;
>>>>
>>>>
>>>> _______________________________________________
>>>> LLVM Developers mailing list
>>>> llvm-dev at lists.llvm.org
>>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
>>>>
>>>>
>>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20160506/2f8da730/attachment-0001.html>
-------------- next part --------------
Index: include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h
==================================================================---
include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h (Revision 268716)
+++ include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h (Arbeitskopie)
@@ -172,6 +172,40 @@
unsigned MinStubs, void *InitialPtrVal);
};
+/// @brief X86_64 support for windows.
+///
+/// X86_64 supports lazy JITing.
+class OrcX86_64_Win32 {
+public:
+ static const unsigned PointerSize = 8;
+ static const unsigned TrampolineSize = 8;
+ static const unsigned ResolverCodeSize = 0x74;
+
+ typedef GenericIndirectStubsInfo<8> IndirectStubsInfo;
+
+ typedef TargetAddress(*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
+
+ /// @brief Write the resolver code into the given memory. The user is be
+ /// responsible for allocating the memory and setting permissions.
+ static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
+ void *CallbackMgr);
+
+ /// @brief Write the requsted number of trampolines into the given memory,
+ /// which must be big enough to hold 1 pointer, plus NumTrampolines
+ /// trampolines.
+ static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+ unsigned NumTrampolines);
+
+ /// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to
+ /// the nearest page size.
+ ///
+ /// E.g. Asking for 4 stubs on x86-64, where stubs are 8-bytes, with 4k
+ /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
+ /// will return a block of 1024 (2-pages worth).
+ static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+ unsigned MinStubs, void *InitialPtrVal);
+};
+
/// @brief I386 support.
///
/// I386 supports lazy JITing.
Index: lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp
==================================================================---
lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp (Revision 268716)
+++ lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp (Arbeitskopie)
@@ -336,6 +336,91 @@
return Error::success();
}
+
+void OrcX86_64_Win32::writeResolverCode(uint8_t *ResolverMem, JITReentryFn
ReentryFn,
+ void *CallbackMgr) {
+
+ // resolverCode is similar to OrcX86_64 with differences specific to windows
x64 calling convention:
+ // arguments go into rcx, rdx and come in reverse order, shadow space
allocation on stack
+ const uint8_t ResolverCode[] = {
+ // resolver_entry:
+ 0x55, // 0x00: pushq %rbp
+ 0x48, 0x89, 0xe5, // 0x01: movq %rsp, %rbp
+ 0x50, // 0x04: pushq %rax
+ 0x53, // 0x05: pushq %rbx
+ 0x51, // 0x06: pushq %rcx
+ 0x52, // 0x07: pushq %rdx
+ 0x56, // 0x08: pushq %rsi
+ 0x57, // 0x09: pushq %rdi
+ 0x41, 0x50, // 0x0a: pushq %r8
+ 0x41, 0x51, // 0x0c: pushq %r9
+ 0x41, 0x52, // 0x0e: pushq %r10
+ 0x41, 0x53, // 0x10: pushq %r11
+ 0x41, 0x54, // 0x12: pushq %r12
+ 0x41, 0x55, // 0x14: pushq %r13
+ 0x41, 0x56, // 0x16: pushq %r14
+ 0x41, 0x57, // 0x18: pushq %r15
+ 0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq 0x208, %rsp
+ 0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp)
+
+ 0x48, 0xb9, // 0x26: movabsq <CBMgr>,
%rcx
+ // 0x28: Callback manager addr.
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+ 0x48, 0x8B, 0x55, 0x08, // mov rdx,QWORD PTR [rbp+0x8]
+ 0x48, 0x83, 0xea, 0x06, // sub rdx,0x6
+
+ 0x48, 0xb8, // 0x38: movabsq <REntry>,
%rax
+ // 0x3a: JIT re-entry fn addr:
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+ 0x48, 0x83, 0xEC, 0x20, // sub rsp,0x20 (reserve shadow
space)
+ 0xff, 0xd0, // 0x42: callq *%rax
+ 0x48, 0x83, 0xC4, 0x20, // add rsp,0x20 (free shadow
space)
+
+ 0x48, 0x89, 0x45, 0x08, // 0x44: movq %rax, 8(%rbp)
+ 0x48, 0x0f, 0xae, 0x0c, 0x24, // 0x48: fxrstor64 (%rsp)
+ 0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x4d: addq 0x208, %rsp
+ 0x41, 0x5f, // 0x54: popq %r15
+ 0x41, 0x5e, // 0x56: popq %r14
+ 0x41, 0x5d, // 0x58: popq %r13
+ 0x41, 0x5c, // 0x5a: popq %r12
+ 0x41, 0x5b, // 0x5c: popq %r11
+ 0x41, 0x5a, // 0x5e: popq %r10
+ 0x41, 0x59, // 0x60: popq %r9
+ 0x41, 0x58, // 0x62: popq %r8
+ 0x5f, // 0x64: popq %rdi
+ 0x5e, // 0x65: popq %rsi
+ 0x5a, // 0x66: popq %rdx
+ 0x59, // 0x67: popq %rcx
+ 0x5b, // 0x68: popq %rbx
+ 0x58, // 0x69: popq %rax
+ 0x5d, // 0x6a: popq %rbp
+ 0xc3, // 0x6b: retq
+ };
+
+
+
+ const unsigned ReentryFnAddrOffset = 0x3a;
+ const unsigned CallbackMgrAddrOffset = 0x28;
+
+ memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
+ memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn));
+ memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr,
+ sizeof(CallbackMgr));
+}
+
+void OrcX86_64_Win32::writeTrampolines(uint8_t *TrampolineMem, void
*ResolverAddr,
+ unsigned NumTrampolines) {
+ return OrcX86_64::writeTrampolines(TrampolineMem, ResolverAddr,
NumTrampolines);
+}
+
+Error OrcX86_64_Win32::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+ unsigned MinStubs,
+ void *InitialPtrVal) {
+ return OrcX86_64::emitIndirectStubsBlock(StubsInfo, MinStubs, InitialPtrVal);
+}
+
void OrcI386::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
void *CallbackMgr) {
Index: tools/lli/OrcLazyJIT.cpp
==================================================================---
tools/lli/OrcLazyJIT.cpp (Revision 268716)
+++ tools/lli/OrcLazyJIT.cpp (Arbeitskopie)
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "OrcLazyJIT.h"
+
#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DynamicLibrary.h"
@@ -14,6 +15,8 @@
#include <cstdio>
#include <system_error>
+
+
using namespace llvm;
namespace {
@@ -57,8 +60,14 @@
}
case Triple::x86_64: {
- typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64> CCMgrT;
- return llvm::make_unique<CCMgrT>(0);
+ if ( T.getOS() == Triple::OSType::Win32 ) {
+ typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_Win32>
CCMgrT;
+ return llvm::make_unique<CCMgrT>(0);
+ }
+ else {
+ typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64> CCMgrT;
+ return llvm::make_unique<CCMgrT>(0);
+ }
}
}
}
@@ -75,10 +84,17 @@
};
case Triple::x86_64:
- return [](){
- return llvm::make_unique<
-
orc::LocalIndirectStubsManager<orc::OrcX86_64>>();
- };
+ if (T.getOS() == Triple::OSType::Win32) {
+ return [](){
+ return llvm::make_unique <
+ orc::LocalIndirectStubsManager < orc::OrcX86_64_Win32 >> ();
+ };
+ } else {
+ return [](){
+ return llvm::make_unique <
+ orc::LocalIndirectStubsManager < orc::OrcX86_64 >> ();
+ };
+ }
}
}
@@ -192,3 +208,4 @@
auto Main = fromTargetAddress<MainFnPtr>(MainSym.getAddress());
return Main(ArgC, ArgV);
}
+
Index: tools/lli/lli.cpp
==================================================================---
tools/lli/lli.cpp (Revision 268716)
+++ tools/lli/lli.cpp (Arbeitskopie)
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "OrcLazyJIT.h"
+
#include "RemoteJITUtils.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/ADT/StringExtras.h"