On 19/08/2009, at 4:00 AM, Anton Korobeynikov wrote:
> Hello, Nathan
>
>> or if it should be a configure test, which might be safer. Are there
>> any x86 platforms (other than apple) that don't need PLT-indirect
>> calls?
> Yes, mingw. However just tweaking the define is not enough - we're not
Ok, so configure might be the way to go then, maybe something like
the following? I haven't tested this on mingw, but it looks to do the
right thing on Solaris, Linux + OS X.
Index: lib/Target/X86/X86JITInfo.cpp
==================================================================---
X86/X86JITInfo.cpp (revision 79974)
+++ X86/X86JITInfo.cpp (working copy)
@@ -18,6 +18,7 @@
#include "llvm/Function.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Config/config.h"
#include <cstdlib>
#include <cstring>
using namespace llvm;
@@ -52,7 +53,7 @@
#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
// Check if building with -fPIC
-#if defined(__PIC__) && __PIC__ && defined(__linux__)
+#if defined(__PIC__) && __PIC__ && defined(NEED_PLT_CALL)
#define ASMCALLSUFFIX "@PLT"
#else
#define ASMCALLSUFFIX
Index: autoconf/configure.ac
==================================================================---
autoconf/configure.ac (revision 79390)
+++ autoconf/configure.ac (working copy)
@@ -1150,6 +1150,11 @@
dnl Check, whether __dso_handle is present
AC_CHECK_FUNCS([__dso_handle])
+AC_MSG_CHECKING([If your platform uses PLT-indirect calls])
+AC_COMPILE_IFELSE([[ __asm__ ("call dummy at PLT"); ]], [
+ AC_DEFINE(NEED_PLT_CALL,[1], [PIC code requires PLT indirect
calls])
+ AC_MSG_RESULT(yes)], [AC_MSG_RESULT(no)])
+
dnl See if the llvm-gcc executable can compile to LLVM assembly
AC_CACHE_CHECK([whether llvm-gcc is sane],[llvm_cv_llvmgcc_sanity],
[llvm_cv_llvmgcc_sanity="no"
> loading address of GOT into ebx before the call (on 32 bit ABIs) thus
> the call will be to nowhere.
Good point, I didn't look closely enough at the calling sequence. I
assume this has to be broken on Linux/x86 at the moment too? I've done
up a quick and dirty implementation below for the sake of discussion,
which compiles (and doesn't break lli), but hasn't been tested beyond
that point as yet. Admittedly this adds even more versions of
X86CompilationCallback, but the mixed version with #ifs seemed to be
pretty hard to follow.
Cheers,
Nathan
Index: X86/X86JITInfo.cpp
==================================================================---
X86/X86JITInfo.cpp (revision 79974)
+++ X86/X86JITInfo.cpp (working copy)
@@ -180,15 +181,148 @@
# endif
#elif defined (X86_32_JIT)
# ifndef _MSC_VER
+
+#if defined(__PIC__) && __PIC__ && defined(NEED_PLT_CALL)
void X86CompilationCallback(void);
asm(
".text\n"
".align 8\n"
".globl " ASMPREFIX "X86CompilationCallback\n"
+ ".local .local_got_stub\n"
TYPE_FUNCTION(X86CompilationCallback)
ASMPREFIX "X86CompilationCallback:\n"
CFI(".cfi_startproc\n")
"pushl %ebp\n"
+ CFI(".cfi_def_cfa_offset 12\n")
+ CFI(".cfi_offset %ebp, -12\n")
+ "movl %esp, %ebp\n" // Standard prologue
+ CFI(".cfi_def_cfa_register %ebp\n")
+ "pushl %eax\n"
+ CFI(".cfi_rel_offset %eax, 0\n")
+ "pushl %edx\n" // Save EAX/EDX/ECX
+ CFI(".cfi_rel_offset %edx, 4\n")
+ "pushl %ecx\n"
+ CFI(".cfi_rel_offset %ecx, 8\n")
+ "pushl %ebx\n"
+ CFI(".cfi_rel_offset %ebx, 12\n")
+ "subl $16, %esp\n"
+ "movl 4(%ebp), %eax\n" // Pass prev frame and return address
+ "movl %eax, 4(%esp)\n"
+ "movl %ebp, (%esp)\n"
+ "call .local_got_stub\n"
+ ".local_got_stub: \n"
+ "popl %ebx\n"
+ "addl $_GLOBAL_OFFSET_TABLE_+[.-.local_got_stub], %ebx\n"
+ "call " ASMPREFIX "X86CompilationCallback2"
ASMCALLSUFFIX "\n"
+ "movl %ebp, %esp\n" // Restore ESP
+ CFI(".cfi_def_cfa_register %esp\n")
+ "subl $16, %esp\n"
+ CFI(".cfi_adjust_cfa_offset 16\n")
+ "popl %ebx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ebx\n")
+ "popl %ecx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ecx\n")
+ "popl %edx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %edx\n")
+ "popl %eax\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %eax\n")
+ "popl %ebp\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ebp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback)
+ );
+
+ // Same as X86CompilationCallback but also saves XMM argument
registers.
+ void X86CompilationCallback_SSE(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback_SSE\n"
+ ".local .local_got_stub_SSE\n"
+ TYPE_FUNCTION(X86CompilationCallback_SSE)
+ ASMPREFIX "X86CompilationCallback_SSE:\n"
+ CFI(".cfi_startproc\n")
+ "pushl %ebp\n"
+ CFI(".cfi_def_cfa_offset 12\n")
+ CFI(".cfi_offset %ebp, -12\n")
+ "movl %esp, %ebp\n" // Standard prologue
+ CFI(".cfi_def_cfa_register %ebp\n")
+ "pushl %eax\n"
+ CFI(".cfi_rel_offset %eax, 0\n")
+ "pushl %edx\n" // Save EAX/EDX/ECX
+ CFI(".cfi_rel_offset %edx, 4\n")
+ "pushl %ecx\n"
+ CFI(".cfi_rel_offset %ecx, 8\n")
+ "pushl %ebx\n"
+ CFI(".cfi_rel_offset %ebx, 12\n")
+ "andl $-16, %esp\n" // Align ESP on 16-byte boundary
+ // Save all XMM arg registers
+ "subl $64, %esp\n"
+ // FIXME: provide frame move information for xmm registers.
+ // This can be tricky, because CFA register is ebp (unaligned)
+ // and we need to produce offsets relative to it.
+ "movaps %xmm0, (%esp)\n"
+ "movaps %xmm1, 16(%esp)\n"
+ "movaps %xmm2, 32(%esp)\n"
+ "movaps %xmm3, 48(%esp)\n"
+ "subl $16, %esp\n"
+ "movl 4(%ebp), %eax\n" // Pass prev frame and return address
+ "movl %eax, 4(%esp)\n"
+ "movl %ebp, (%esp)\n"
+ "call .local_got_stub_SSE\n"
+ ".local_got_stub_SSE: \n"
+ "popl %ebx\n"
+ "addl $_GLOBAL_OFFSET_TABLE_+[.-.local_got_stub], %ebx\n"
+ "call " ASMPREFIX "X86CompilationCallback2"
ASMCALLSUFFIX "\n"
+ "addl $16, %esp\n"
+ "movaps 48(%esp), %xmm3\n"
+ CFI(".cfi_restore %xmm3\n")
+ "movaps 32(%esp), %xmm2\n"
+ CFI(".cfi_restore %xmm2\n")
+ "movaps 16(%esp), %xmm1\n"
+ CFI(".cfi_restore %xmm1\n")
+ "movaps (%esp), %xmm0\n"
+ CFI(".cfi_restore %xmm0\n")
+ "movl %ebp, %esp\n" // Restore ESP
+ CFI(".cfi_def_cfa_register esp\n")
+ "subl $16, %esp\n"
+ CFI(".cfi_adjust_cfa_offset 16\n")
+ "popl %ebx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ebx\n")
+ "popl %ecx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ecx\n")
+ "popl %edx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %edx\n")
+ "popl %eax\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %eax\n")
+ "popl %ebp\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ebp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback_SSE)
+ );
+
+#else /* __PIC__ && NEED_PLT_CALL */
+ void X86CompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback\n"
+ TYPE_FUNCTION(X86CompilationCallback)
+ ASMPREFIX "X86CompilationCallback:\n"
+ CFI(".cfi_startproc\n")
+ "pushl %ebp\n"
CFI(".cfi_def_cfa_offset 8\n")
CFI(".cfi_offset %ebp, -8\n")
"movl %esp, %ebp\n" // Standard prologue
@@ -292,6 +426,8 @@
CFI(".cfi_endproc\n")
SIZE(X86CompilationCallback_SSE)
);
+#endif /* !(__PIC__ && NEED_PLT_CALL) */
+
# else
void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);