Hi Lang,
I have attached the patch to this mail for your consideration.
Notes:
-I basically just added OrcX86_64_Win32 with the custom resolver code for
windows. All the other static methods simply relay to their OrcX86_64
versions (therefore there is no need for any more refactoring).
-The asm code in my inital post doesnt work in release mode, because I
forgot to account for shadow space allocation on stack. The code in the
attached patch has that fixed.
-I decided to name the support class OrcX86_64_Win32 in order to be
consistent with Triple::OSType::Win32.
-I didnt rename any of the other classes in OrcArchitectureSupport, as I
didnt feel comfortable touching those. I guess renaming those is a quick
thing to do.
-In lli, I made changes to stub and compilecallback creation functions
accordingly. Here I test for the os type in triple and return the win32
support class if required. There might be other tools/places I am not aware
of where this needs to be done.
-I didnt ran any tests apart from my little sandbox example.
Best,
David
On Thu, May 5, 2016 at 3:09 AM, David <dk402538 at googlemail.com> wrote:
> Hi Lang,
> I also agree that the second option should probably be the way to go. I
> will give it a shot and submit a patch.
> Best,
> David
>
> On Wed, May 4, 2016 at 11:39 PM, Lang Hames <lhames at gmail.com>
wrote:
>
>> Hi David,
>>
>> This is really cool. I'd love to get this in-tree.
>>
>> There are two ways we could go about this:
>>
>> (1) Make the OrcArchitecture interface ABI-aware so that it can choose
>> the right resolver code,
>> or
>> (2) Replace the OrcArchitecture classes with OrcABI classes. I.e.
We'd
>> just a rename OrcX86_64 -> Orc_X86_64_SysV (and rename I386 &
AArch64
>> similarly) , then we add your code as Orc_X86_64_Windows.
>>
>> I think the second is probably the way to go, with a little refactoring
>> so that the various X86 ABIs could share the stub and resolver code.
>>
>> Any interest in submitting a patch?
>>
>> - Lang.
>>
>>
>>
>>
>> On Wed, May 4, 2016 at 11:21 AM, David Blaikie <dblaikie at
gmail.com>
>> wrote:
>>
>>> +Lang, JIT Cowboy
>>>
>>> On Wed, May 4, 2016 at 11:17 AM, David via llvm-dev <
>>> llvm-dev at lists.llvm.org> wrote:
>>>
>>>> Hi There,
>>>>
>>>> I am currently exploring C++ JIT-compilation for a project
where this
>>>> would be very useful. I started with the code from the lli tool
which uses
>>>> OrcLazyJIT and changed it, such that the module is being
compiled from c++
>>>> source in memory and OrcLazyJIT is used exclusively.
>>>>
>>>> Now since I am on windows, I found that my application is
crashing when
>>>> trying to run the main function from the jit-compiled module (
which was
>>>> found by casting the symbol address to the main prototype). Now
after some
>>>> digging I found that the crash is caused by
>>>> LocalJITCompileCallbackManager::reenter not getting the correct
>>>> CompileCallback and trampolineid references. This in turn is
being caused by
>>>> OrcX86_64::writeResolverCode not respecting windows calling
convention
>>>> in the asm code for calling the reentry function.
>>>>
>>>> After making changes to the asm code in
OrcX86_64::writeResolverCode,
>>>> the code runs without any problems. I thought I share it here
with the
>>>> public so that others who would like to use orclazyjit on
windows could
>>>> benefit. Please let me know if a different channel would be
more
>>>> appropriate.
>>>>
>>>> Best,
>>>> David
>>>>
>>>> In order to get OrcLazyJIT to work under windows, replace the
prebaked
>>>> asm code in OrcX86_64::writeResolverCode in file
>>>> llvm/lib/ExecutionEngine/Orc/OrcAchitectureSupport.cpp with the
following.
>>>> Note that more work is needed to both support linux/windows but
I am not
>>>> sure how this is best dealt with in llvm.
>>>>
>>>>
>>>> // windows (arguments go to rcx and rdx and have reversed
order)---
>>>>
>>>> const uint8_t ResolverCode[] = {
>>>>
>>>> 					   // resolver_entry:
>>>>
>>>> 0x55,                                      // 0x00: pushq    
%rbp
>>>>
>>>> 0x48, 0x89, 0xe5,                          // 0x01: movq     
%rsp, %rbp
>>>>
>>>> 0x50,                                      // 0x04: pushq    
%rax
>>>>
>>>> 0x53,                                      // 0x05: pushq    
%rbx
>>>>
>>>> 0x51,                                      // 0x06: pushq    
%rcx
>>>>
>>>> 0x52,                                      // 0x07: pushq    
%rdx
>>>>
>>>> 0x56,                                      // 0x08: pushq    
%rsi
>>>>
>>>> 0x57,                                      // 0x09: pushq    
%rdi
>>>>
>>>> 0x41, 0x50,                                // 0x0a: pushq    
%r8
>>>>
>>>> 0x41, 0x51,                                // 0x0c: pushq    
%r9
>>>>
>>>> 0x41, 0x52,                                // 0x0e: pushq    
%r10
>>>>
>>>> 0x41, 0x53,                                // 0x10: pushq    
%r11
>>>>
>>>> 0x41, 0x54,                                // 0x12: pushq    
%r12
>>>>
>>>> 0x41, 0x55,                                // 0x14: pushq    
%r13
>>>>
>>>> 0x41, 0x56,                                // 0x16: pushq    
%r14
>>>>
>>>> 0x41, 0x57,                                // 0x18: pushq    
%r15
>>>>
>>>> 0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00,  // 0x1a: subq     
0x208, %rsp
>>>>
>>>> 0x48, 0x0f, 0xae, 0x04, 0x24,              // 0x21: fxsave64 
(%rsp)
>>>>
>>>>
>>>>
>>>> 0x48, 0xb9,                                // 0x26: movabsq  
<CBMgr>, %rcx
>>>>
>>>> // 0x28: Callback manager addr.
>>>>
>>>> 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
>>>>
>>>>
>>>>
>>>> 0x48, 0x8B, 0x55, 0x08,                    // mov    rdx,QWORD
PTR [rbp+0x8]
>>>>
>>>> 0x48, 0x83, 0xea, 0x06,                    // sub    rdx,0x6
>>>>
>>>>
>>>>
>>>> 0x48, 0xb8,                                // 0x38: movabsq  
<REntry>, %rax
>>>>
>>>> // 0x3a: JIT re-entry fn addr:
>>>>
>>>> 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
>>>>
>>>>
>>>> 0xff, 0xd0,                                // 0x42: callq    
*%rax
>>>>
>>>> 0x48, 0x89, 0x45, 0x08,                    // 0x44: movq     
%rax, 8(%rbp)
>>>>
>>>> 0x48, 0x0f, 0xae, 0x0c, 0x24,              // 0x48: fxrstor64
(%rsp)
>>>>
>>>> 0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00,  // 0x4d: addq     
0x208, %rsp
>>>>
>>>> 0x41, 0x5f,                                // 0x54: popq     
%r15
>>>>
>>>> 0x41, 0x5e,                                // 0x56: popq     
%r14
>>>>
>>>> 0x41, 0x5d,                                // 0x58: popq     
%r13
>>>>
>>>> 0x41, 0x5c,                                // 0x5a: popq     
%r12
>>>>
>>>> 0x41, 0x5b,                                // 0x5c: popq     
%r11
>>>>
>>>> 0x41, 0x5a,                                // 0x5e: popq     
%r10
>>>>
>>>> 0x41, 0x59,                                // 0x60: popq     
%r9
>>>>
>>>> 0x41, 0x58,                                // 0x62: popq     
%r8
>>>>
>>>> 0x5f,                                      // 0x64: popq     
%rdi
>>>>
>>>> 0x5e,                                      // 0x65: popq     
%rsi
>>>>
>>>> 0x5a,                                      // 0x66: popq     
%rdx
>>>>
>>>> 0x59,                                      // 0x67: popq     
%rcx
>>>>
>>>> 0x5b,                                      // 0x68: popq     
%rbx
>>>>
>>>> 0x58,                                      // 0x69: popq     
%rax
>>>>
>>>> 0x5d,                                      // 0x6a: popq     
%rbp
>>>>
>>>> 0xc3,                                      // 0x6b: retq
>>>>
>>>> };
>>>>
>>>>
>>>> const unsigned ReentryFnAddrOffset = 0x3a;
>>>>
>>>> const unsigned CallbackMgrAddrOffset = 0x28;
>>>>
>>>>
>>>> _______________________________________________
>>>> LLVM Developers mailing list
>>>> llvm-dev at lists.llvm.org
>>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
>>>>
>>>>
>>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://lists.llvm.org/pipermail/llvm-dev/attachments/20160506/2f8da730/attachment-0001.html>
-------------- next part --------------
Index: include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h
==================================================================---
include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h	(Revision 268716)
+++ include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h	(Arbeitskopie)
@@ -172,6 +172,40 @@
                                       unsigned MinStubs, void *InitialPtrVal);
 };
 
+/// @brief X86_64 support for windows.
+///
+/// X86_64 supports lazy JITing.
+class OrcX86_64_Win32 {
+public:
+	static const unsigned PointerSize = 8;
+	static const unsigned TrampolineSize = 8;
+	static const unsigned ResolverCodeSize = 0x74;
+
+	typedef GenericIndirectStubsInfo<8> IndirectStubsInfo;
+
+	typedef TargetAddress(*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
+
+	/// @brief Write the resolver code into the given memory. The user is be
+	///        responsible for allocating the memory and setting permissions.
+	static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
+		void *CallbackMgr);
+
+	/// @brief Write the requsted number of trampolines into the given memory,
+	///        which must be big enough to hold 1 pointer, plus NumTrampolines
+	///        trampolines.
+	static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+		unsigned NumTrampolines);
+
+	/// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to
+	///        the nearest page size.
+	///
+	///   E.g. Asking for 4 stubs on x86-64, where stubs are 8-bytes, with 4k
+	/// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
+	/// will return a block of 1024 (2-pages worth).
+	static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+		unsigned MinStubs, void *InitialPtrVal);
+};
+
 /// @brief I386 support.
 ///
 /// I386 supports lazy JITing.
Index: lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp
==================================================================---
lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp	(Revision 268716)
+++ lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp	(Arbeitskopie)
@@ -336,6 +336,91 @@
   return Error::success();
 }
 
+
+void OrcX86_64_Win32::writeResolverCode(uint8_t *ResolverMem, JITReentryFn
ReentryFn,
+	void *CallbackMgr) {
+
+	// resolverCode is similar to OrcX86_64 with differences specific to windows
x64 calling convention:
+	// arguments go into rcx, rdx and come in reverse order, shadow space
allocation on stack
+	const uint8_t ResolverCode[] = {
+		// resolver_entry:
+		0x55,                                      // 0x00: pushq     %rbp
+		0x48, 0x89, 0xe5,                          // 0x01: movq      %rsp, %rbp
+		0x50,                                      // 0x04: pushq     %rax
+		0x53,                                      // 0x05: pushq     %rbx
+		0x51,                                      // 0x06: pushq     %rcx
+		0x52,                                      // 0x07: pushq     %rdx
+		0x56,                                      // 0x08: pushq     %rsi
+		0x57,                                      // 0x09: pushq     %rdi
+		0x41, 0x50,                                // 0x0a: pushq     %r8
+		0x41, 0x51,                                // 0x0c: pushq     %r9
+		0x41, 0x52,                                // 0x0e: pushq     %r10
+		0x41, 0x53,                                // 0x10: pushq     %r11
+		0x41, 0x54,                                // 0x12: pushq     %r12
+		0x41, 0x55,                                // 0x14: pushq     %r13
+		0x41, 0x56,                                // 0x16: pushq     %r14
+		0x41, 0x57,                                // 0x18: pushq     %r15
+		0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00,  // 0x1a: subq      0x208, %rsp
+		0x48, 0x0f, 0xae, 0x04, 0x24,              // 0x21: fxsave64  (%rsp)
+
+		0x48, 0xb9,                                // 0x26: movabsq   <CBMgr>,
%rcx
+		// 0x28: Callback manager addr.
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+		0x48, 0x8B, 0x55, 0x08,                    // mov    rdx,QWORD PTR [rbp+0x8]
+		0x48, 0x83, 0xea, 0x06,                    // sub    rdx,0x6
+
+		0x48, 0xb8,                                // 0x38: movabsq   <REntry>,
%rax
+		// 0x3a: JIT re-entry fn addr:
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+		0x48, 0x83, 0xEC, 0x20,                    // sub    rsp,0x20 (reserve shadow
space)
+		0xff, 0xd0,                                // 0x42: callq     *%rax
+		0x48, 0x83, 0xC4, 0x20,                    // add    rsp,0x20 (free shadow
space)
+
+		0x48, 0x89, 0x45, 0x08,                    // 0x44: movq      %rax, 8(%rbp)
+		0x48, 0x0f, 0xae, 0x0c, 0x24,              // 0x48: fxrstor64 (%rsp)
+		0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00,  // 0x4d: addq      0x208, %rsp
+		0x41, 0x5f,                                // 0x54: popq      %r15
+		0x41, 0x5e,                                // 0x56: popq      %r14
+		0x41, 0x5d,                                // 0x58: popq      %r13
+		0x41, 0x5c,                                // 0x5a: popq      %r12
+		0x41, 0x5b,                                // 0x5c: popq      %r11
+		0x41, 0x5a,                                // 0x5e: popq      %r10
+		0x41, 0x59,                                // 0x60: popq      %r9
+		0x41, 0x58,                                // 0x62: popq      %r8
+		0x5f,                                      // 0x64: popq      %rdi
+		0x5e,                                      // 0x65: popq      %rsi
+		0x5a,                                      // 0x66: popq      %rdx
+		0x59,                                      // 0x67: popq      %rcx
+		0x5b,                                      // 0x68: popq      %rbx
+		0x58,                                      // 0x69: popq      %rax
+		0x5d,                                      // 0x6a: popq      %rbp
+		0xc3,                                      // 0x6b: retq
+	};
+
+
+
+	const unsigned ReentryFnAddrOffset = 0x3a;
+	const unsigned CallbackMgrAddrOffset = 0x28;
+
+	memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
+	memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn));
+	memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr,
+		sizeof(CallbackMgr));
+}
+
+void OrcX86_64_Win32::writeTrampolines(uint8_t *TrampolineMem, void
*ResolverAddr,
+	unsigned NumTrampolines) {
+	return OrcX86_64::writeTrampolines(TrampolineMem, ResolverAddr,
NumTrampolines);
+}
+
+Error OrcX86_64_Win32::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+	unsigned MinStubs,
+	void *InitialPtrVal) {
+	return OrcX86_64::emitIndirectStubsBlock(StubsInfo, MinStubs, InitialPtrVal);
+}
+
 void OrcI386::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
                                 void *CallbackMgr) {
 
Index: tools/lli/OrcLazyJIT.cpp
==================================================================---
tools/lli/OrcLazyJIT.cpp	(Revision 268716)
+++ tools/lli/OrcLazyJIT.cpp	(Arbeitskopie)
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "OrcLazyJIT.h"
+
 #include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/DynamicLibrary.h"
@@ -14,6 +15,8 @@
 #include <cstdio>
 #include <system_error>
 
+
+
 using namespace llvm;
 
 namespace {
@@ -57,8 +60,14 @@
     }
 
     case Triple::x86_64: {
-      typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64> CCMgrT;
-      return llvm::make_unique<CCMgrT>(0);
+		if ( T.getOS() == Triple::OSType::Win32 ) {
+			typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_Win32>
CCMgrT;
+			return llvm::make_unique<CCMgrT>(0);
+		}
+		else {
+			typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64> CCMgrT;
+			return llvm::make_unique<CCMgrT>(0);
+		}
     }
   }
 }
@@ -75,10 +84,17 @@
       };
 
     case Triple::x86_64:
-      return [](){
-        return llvm::make_unique<
-                      
orc::LocalIndirectStubsManager<orc::OrcX86_64>>();
-      };
+      if (T.getOS() == Triple::OSType::Win32) {
+		  return [](){
+			  return llvm::make_unique <
+				  orc::LocalIndirectStubsManager < orc::OrcX86_64_Win32 >> ();
+		  };
+      } else {
+			  return [](){
+				  return llvm::make_unique <
+					  orc::LocalIndirectStubsManager < orc::OrcX86_64 >> ();
+			  };
+      }
   }
 }
 
@@ -192,3 +208,4 @@
   auto Main = fromTargetAddress<MainFnPtr>(MainSym.getAddress());
   return Main(ArgC, ArgV);
 }
+
Index: tools/lli/lli.cpp
==================================================================---
tools/lli/lli.cpp	(Revision 268716)
+++ tools/lli/lli.cpp	(Arbeitskopie)
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "OrcLazyJIT.h"
+
 #include "RemoteJITUtils.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/ADT/StringExtras.h"