Jeremy Fitzhardinge
2007-Apr-18  17:50 UTC
[patch 2/2] Make COMPAT_VDSO runtime selectable.
Now that relocation of the VDSO for COMPAT_VDSO users is done at
runtime rather than compile time, it is possible to enable/disable
compat mode at runtime.
This patch allows you to enable COMPAT_VDSO mode with "vdso=2" on the
kernel command line, or via sysctl.  (Switching on a running system
shouldn't be done lightly; any process which was relying on the compat
VDSO will be upset if it goes away.)
The COMPAT_VDSO config option still exists, but if enabled it just
makes vdso_enabled default to VDSO_COMPAT.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: "Jan Beulich" <JBeulich@novell.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Roland McGrath <roland@redhat.com>
---
 Documentation/kernel-parameters.txt |    1 
 arch/i386/kernel/sysenter.c         |  141 +++++++++++++++++++++++------------
 include/asm-i386/page.h             |    2 
 3 files changed, 94 insertions(+), 50 deletions(-)
==================================================================---
a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1807,6 +1807,7 @@ and is between 256 and 4096 characters. 
 			[USBHID] The interval which mice are to be polled at.
 
 	vdso=		[IA-32,SH]
+			vdso=2: enable compat VDSO (default with COMPAT_VDSO)
 			vdso=1: enable VDSO (default)
 			vdso=0: disable VDSO mapping
 
==================================================================---
a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -23,12 +23,25 @@
 #include <asm/pgtable.h>
 #include <asm/unistd.h>
 #include <asm/elf.h>
+#include <asm/tlbflush.h>
+
+enum {
+	VDSO_DISABLED = 0,
+	VDSO_ENABLED = 1,
+	VDSO_COMPAT = 2,
+};
+
+#ifdef CONFIG_COMPAT_VDSO
+#define VDSO_DEFAULT	VDSO_COMPAT
+#else
+#define VDSO_DEFAULT	VDSO_ENABLED
+#endif
 
 /*
  * Should the kernel map a VDSO page into processes and pass its
  * address down to glibc upon exec()?
  */
-unsigned int __read_mostly vdso_enabled = 1;
+unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
 
 EXPORT_SYMBOL_GPL(vdso_enabled);
 
@@ -43,7 +56,6 @@ __setup("vdso=", vdso_setup);
 
 extern asmlinkage void sysenter_entry(void);
 
-#ifdef CONFIG_COMPAT_VDSO
 static __cpuinit void reloc_symtab(Elf32_Ehdr *ehdr,
 				   unsigned offset, unsigned size)
 {
@@ -146,11 +158,6 @@ static __cpuinit void relocate_vdso(Elf3
 				     shdr[i].sh_size);
 	}
 }
-#else
-static inline void relocate_vdso(Elf32_Ehdr *ehdr)
-{
-}
-#endif	/* COMPAT_VDSO */
 
 void enable_sep_cpu(void)
 {
@@ -168,6 +175,25 @@ void enable_sep_cpu(void)
 	wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0);
 	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
 	put_cpu();	
+}
+
+static struct vm_area_struct gate_vma;
+
+static int __cpuinit gate_vma_init(void)
+{
+	gate_vma.vm_mm = NULL;
+	gate_vma.vm_start = FIXADDR_USER_START;
+	gate_vma.vm_end = FIXADDR_USER_END;
+	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
+	gate_vma.vm_page_prot = __P101;
+	/*
+	 * Make sure the vDSO gets into every core dump.
+	 * Dumping its contents makes post-mortem fully interpretable later
+	 * without matching up the same kernel and hardware config to see
+	 * what PC values meant.
+	 */
+	gate_vma.vm_flags |= VM_ALWAYSDUMP;
+	return 0;
 }
 
 /*
@@ -178,6 +204,22 @@ extern const char vsyscall_sysenter_star
 extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
 static struct page *syscall_pages[1];
 
+static void map_compat_vdso(int map)
+{
+	static int vdso_mapped;
+
+	if (map == vdso_mapped)
+		return;
+
+	vdso_mapped = map;
+
+	__set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT,
+		     map ? PAGE_READONLY_EXEC : PAGE_NONE);
+
+	/* flush stray tlbs */
+	flush_tlb_all();
+}
+
 int __cpuinit sysenter_setup(void)
 {
 	void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
@@ -186,10 +228,9 @@ int __cpuinit sysenter_setup(void)
 
 	syscall_pages[0] = virt_to_page(syscall_page);
 
-#ifdef CONFIG_COMPAT_VDSO
-	__set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC);
+	gate_vma_init();
+
 	printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
-#endif
 
 	if (!boot_cpu_has(X86_FEATURE_SEP)) {
 		vsyscall = &vsyscall_int80_start;
@@ -208,42 +249,57 @@ int __cpuinit sysenter_setup(void)
 /* Defined in vsyscall-sysenter.S */
 extern void SYSENTER_RETURN;
 
-#ifdef __HAVE_ARCH_GATE_AREA
 /* Setup a VMA at program startup for the vsyscall page */
 int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
 {
 	struct mm_struct *mm = current->mm;
 	unsigned long addr;
 	int ret;
+	bool compat;
 
 	down_write(&mm->mmap_sem);
-	addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
-	if (IS_ERR_VALUE(addr)) {
-		ret = addr;
-		goto up_fail;
-	}
-
-	/*
-	 * MAYWRITE to allow gdb to COW and set breakpoints
-	 *
-	 * Make sure the vDSO gets into every core dump.
-	 * Dumping its contents makes post-mortem fully interpretable later
-	 * without matching up the same kernel and hardware config to see
-	 * what PC values meant.
-	 */
-	ret = install_special_mapping(mm, addr, PAGE_SIZE,
-				      VM_READ|VM_EXEC|
-				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
-				      VM_ALWAYSDUMP,
-				      syscall_pages);
-	if (ret)
-		goto up_fail;
+
+	/* Test compat mode once here, in case someone
+	   changes it via sysctl */
+	compat = (vdso_enabled == VDSO_COMPAT);
+
+	map_compat_vdso(compat);
+
+	if (compat)
+		addr = VDSO_HIGH_BASE;
+	else {
+		addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+		if (IS_ERR_VALUE(addr)) {
+			ret = addr;
+			goto up_fail;
+		}
+
+		/*
+		 * MAYWRITE to allow gdb to COW and set breakpoints
+		 *
+		 * Make sure the vDSO gets into every core dump.
+		 * Dumping its contents makes post-mortem fully
+		 * interpretable later without matching up the same
+		 * kernel and hardware config to see what PC values
+		 * meant.
+		 */
+		ret = install_special_mapping(mm, addr, PAGE_SIZE,
+					      VM_READ|VM_EXEC|
+					      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+					      VM_ALWAYSDUMP,
+					      syscall_pages);
+
+		if (ret)
+			goto up_fail;
+	}
 
 	current->mm->context.vdso = (void *)addr;
 	current_thread_info()->sysenter_return -				    (void
*)VDSO_SYM(&SYSENTER_RETURN);
-up_fail:
+		(void *)VDSO_SYM(&SYSENTER_RETURN);
+
+  up_fail:
 	up_write(&mm->mmap_sem);
+
 	return ret;
 }
 
@@ -256,6 +312,9 @@ const char *arch_vma_name(struct vm_area
 
 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
 {
+	/* Check to see if this task was created in compat vdso mode */
+	if (tsk->mm->context.vdso == (void *)VDSO_HIGH_BASE)
+		return &gate_vma;
 	return NULL;
 }
 
@@ -268,17 +327,3 @@ int in_gate_area_no_task(unsigned long a
 {
 	return 0;
 }
-#else  /* !__HAVE_ARCH_GATE_AREA */
-int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
-{
-	/*
-	 * If not creating userspace VMA, simply set vdso to point to
-	 * fixmap page.
-	 */
-	current->mm->context.vdso = (void *)VDSO_HIGH_BASE;
-	current_thread_info()->sysenter_return -		(void
*)VDSO_SYM(&SYSENTER_RETURN);
-
-	return 0;
-}
-#endif	/* __HAVE_ARCH_GATE_AREA */
==================================================================---
a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -143,9 +143,7 @@ extern int page_is_ram(unsigned long pag
 #include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
-#ifndef CONFIG_COMPAT_VDSO
 #define __HAVE_ARCH_GATE_AREA 1
-#endif
 #endif /* __KERNEL__ */
 
 #endif /* _I386_PAGE_H */
--
Hi Andi, Here's a couple of patches to fix up COMPAT_VDSO: The first is a straightforward implementation of Jan's original idea of relocating the VDSO to match its mapped location. Unlike Jan and Zach's version, I changed it to relocate based on the phdrs rather than the sections; the result is pleasantly compact. The second patch takes advantage of the fact that all the COMPAT_VDSO work happens at runtime now, and allows compat mode to be enabled dynamically. If you specify vdso=2 on the kernel command line, it comes up in compat mode; vdso=1 is normal vdso mode, and vdso=0 disables vdso altogether. You can also switch modes with sysctl. Changes since last posting: - rebase sections as well as phdrs - relocate symbols - more robust DT_tag handling - handle dynamic compat mode switches a bit better Thanks to Jan and Roland for the review. Thanks, J --
Jeremy Fitzhardinge
2007-Apr-18  17:50 UTC
[patch 2/2] Make COMPAT_VDSO runtime selectable.
Now that relocation of the VDSO for COMPAT_VDSO users is done at
runtime rather than compile time, it is possible to enable/disable
compat mode at runtime.
This patch allows you to enable COMPAT_VDSO mode with "vdso=2" on the
kernel command line, or via sysctl.
The COMPAT_VDSO config option still exists, but if enabled it just
makes vdso_enabled default to VDSO_COMPAT.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: "Jan Beulich" <JBeulich@novell.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Roland McGrath <roland@redhat.com>
---
 Documentation/kernel-parameters.txt |    1 
 arch/i386/kernel/sysenter.c         |  131 ++++++++++++++++++++++-------------
 include/asm-i386/page.h             |    2 
 3 files changed, 84 insertions(+), 50 deletions(-)
==================================================================---
a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1807,6 +1807,7 @@ and is between 256 and 4096 characters. 
 			[USBHID] The interval which mice are to be polled at.
 
 	vdso=		[IA-32,SH]
+			vdso=2: enable compat VDSO (default with COMPAT_VDSO)
 			vdso=1: enable VDSO (default)
 			vdso=0: disable VDSO mapping
 
==================================================================---
a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -24,11 +24,23 @@
 #include <asm/unistd.h>
 #include <asm/elf.h>
 
+enum {
+	VDSO_DISABLED = 0,
+	VDSO_ENABLED = 1,
+	VDSO_COMPAT = 2,
+};
+
+#ifdef CONFIG_COMPAT_VDSO
+#define VDSO_DEFAULT	VDSO_COMPAT
+#else
+#define VDSO_DEFAULT	VDSO_ENABLED
+#endif
+
 /*
  * Should the kernel map a VDSO page into processes and pass its
  * address down to glibc upon exec()?
  */
-unsigned int __read_mostly vdso_enabled = 1;
+unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
 
 EXPORT_SYMBOL_GPL(vdso_enabled);
 
@@ -43,7 +55,6 @@ __setup("vdso=", vdso_setup);
 
 extern asmlinkage void sysenter_entry(void);
 
-#ifdef CONFIG_COMPAT_VDSO
 static __cpuinit void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
 {
 	Elf32_Dyn *dyn = (void *)ehdr + offset;
@@ -85,11 +96,6 @@ static __cpuinit void relocate_vdso(Elf3
 			reloc_dyn(ehdr, phdr[i].p_offset);
 	}
 }
-#else
-static inline void relocate_vdso(Elf32_Ehdr *ehdr)
-{
-}
-#endif	/* COMPAT_VDSO */
 
 void enable_sep_cpu(void)
 {
@@ -109,6 +115,25 @@ void enable_sep_cpu(void)
 	put_cpu();	
 }
 
+static struct vm_area_struct gate_vma;
+
+static int __cpuinit gate_vma_init(void)
+{
+	gate_vma.vm_mm = NULL;
+	gate_vma.vm_start = FIXADDR_USER_START;
+	gate_vma.vm_end = FIXADDR_USER_END;
+	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
+	gate_vma.vm_page_prot = __P101;
+	/*
+	 * Make sure the vDSO gets into every core dump.
+	 * Dumping its contents makes post-mortem fully interpretable later
+	 * without matching up the same kernel and hardware config to see
+	 * what PC values meant.
+	 */
+	gate_vma.vm_flags |= VM_ALWAYSDUMP;
+	return 0;
+}
+
 /*
  * These symbols are defined by vsyscall.o to mark the bounds
  * of the ELF DSO images included therein.
@@ -117,6 +142,19 @@ extern const char vsyscall_sysenter_star
 extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
 static struct page *syscall_pages[1];
 
+static void map_compat_vdso(int map)
+{
+	static int vdso_mapped;
+
+	if (map == vdso_mapped)
+		return;
+
+	vdso_mapped = map;
+
+	__set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT,
+		     map ? PAGE_READONLY_EXEC : PAGE_NONE);
+}
+
 int __cpuinit sysenter_setup(void)
 {
 	void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
@@ -125,10 +163,9 @@ int __cpuinit sysenter_setup(void)
 
 	syscall_pages[0] = virt_to_page(syscall_page);
 
-#ifdef CONFIG_COMPAT_VDSO
-	__set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC);
+	gate_vma_init();
+
 	printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
-#endif
 
 	if (!boot_cpu_has(X86_FEATURE_SEP)) {
 		vsyscall = &vsyscall_int80_start;
@@ -147,7 +184,6 @@ int __cpuinit sysenter_setup(void)
 /* Defined in vsyscall-sysenter.S */
 extern void SYSENTER_RETURN;
 
-#ifdef __HAVE_ARCH_GATE_AREA
 /* Setup a VMA at program startup for the vsyscall page */
 int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
 {
@@ -156,33 +192,44 @@ int arch_setup_additional_pages(struct l
 	int ret;
 
 	down_write(&mm->mmap_sem);
-	addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
-	if (IS_ERR_VALUE(addr)) {
-		ret = addr;
-		goto up_fail;
-	}
-
-	/*
-	 * MAYWRITE to allow gdb to COW and set breakpoints
-	 *
-	 * Make sure the vDSO gets into every core dump.
-	 * Dumping its contents makes post-mortem fully interpretable later
-	 * without matching up the same kernel and hardware config to see
-	 * what PC values meant.
-	 */
-	ret = install_special_mapping(mm, addr, PAGE_SIZE,
-				      VM_READ|VM_EXEC|
-				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
-				      VM_ALWAYSDUMP,
-				      syscall_pages);
-	if (ret)
-		goto up_fail;
+
+	map_compat_vdso(vdso_enabled == VDSO_COMPAT);
+
+	if (vdso_enabled == VDSO_COMPAT)
+		addr = VDSO_HIGH_BASE;
+	else {
+		addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+		if (IS_ERR_VALUE(addr)) {
+			ret = addr;
+			goto up_fail;
+		}
+
+		/*
+		 * MAYWRITE to allow gdb to COW and set breakpoints
+		 *
+		 * Make sure the vDSO gets into every core dump.
+		 * Dumping its contents makes post-mortem fully
+		 * interpretable later without matching up the same
+		 * kernel and hardware config to see what PC values
+		 * meant.
+		 */
+		ret = install_special_mapping(mm, addr, PAGE_SIZE,
+					      VM_READ|VM_EXEC|
+					      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+					      VM_ALWAYSDUMP,
+					      syscall_pages);
+
+		if (ret)
+			goto up_fail;
+	}
 
 	current->mm->context.vdso = (void *)addr;
 	current_thread_info()->sysenter_return -				    (void
*)VDSO_SYM(&SYSENTER_RETURN);
-up_fail:
+		(void *)VDSO_SYM(&SYSENTER_RETURN);
+
+  up_fail:
 	up_write(&mm->mmap_sem);
+
 	return ret;
 }
 
@@ -195,6 +242,8 @@ const char *arch_vma_name(struct vm_area
 
 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
 {
+	if (vdso_enabled == VDSO_COMPAT)
+		return &gate_vma;
 	return NULL;
 }
 
@@ -207,17 +256,3 @@ int in_gate_area_no_task(unsigned long a
 {
 	return 0;
 }
-#else  /* !__HAVE_ARCH_GATE_AREA */
-int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
-{
-	/*
-	 * If not creating userspace VMA, simply set vdso to point to
-	 * fixmap page.
-	 */
-	current->mm->context.vdso = (void *)VDSO_HIGH_BASE;
-	current_thread_info()->sysenter_return -		(void
*)VDSO_SYM(&SYSENTER_RETURN);
-
-	return 0;
-}
-#endif	/* __HAVE_ARCH_GATE_AREA */
==================================================================---
a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -143,9 +143,7 @@ extern int page_is_ram(unsigned long pag
 #include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
 
-#ifndef CONFIG_COMPAT_VDSO
 #define __HAVE_ARCH_GATE_AREA 1
-#endif
 #endif /* __KERNEL__ */
 
 #endif /* _I386_PAGE_H */
--
Jeremy Fitzhardinge
2007-Apr-18  17:50 UTC
[patch 1/2] Relocate VDSO ELF headers to match mapped location with COMPAT_VDSO
Some versions of libc can't deal with a VDSO which doesn't have its
ELF headers matching its mapped address.  COMPAT_VDSO maps the VDSO at
a specific system-wide fixed address.  Previously this was all done at
build time, on the grounds that the fixed VDSO address is always at
the top of the address space.  However, a hypervisor may reserve some
of that address space, pushing the fixmap address down.
This patch does the adjustment dynamically at runtime, depending on
the runtime location of the VDSO fixmap.
[ Patch has been through several hands: Jan Beulich wrote the orignal
  version; Zach reworked it, and Jeremy converted it to relocate phdrs
  rather than sections. ]
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: "Jan Beulich" <JBeulich@novell.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Roland McGrath <roland@redhat.com>
---
 arch/i386/kernel/entry.S    |    4 -
 arch/i386/kernel/sysenter.c |   95 ++++++++++++++++++++++++++++++++++++-------
 arch/i386/mm/pgtable.c      |    6 --
 include/asm-i386/elf.h      |   28 ++++--------
 include/asm-i386/fixmap.h   |    8 ---
 include/linux/elf.h         |    3 +
 6 files changed, 95 insertions(+), 49 deletions(-)
==================================================================---
a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -305,16 +305,12 @@ sysenter_past_esp:
 	pushl $(__USER_CS)
 	CFI_ADJUST_CFA_OFFSET 4
 	/*CFI_REL_OFFSET cs, 0*/
-#ifndef CONFIG_COMPAT_VDSO
 	/*
 	 * Push current_thread_info()->sysenter_return to the stack.
 	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
 	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
 	 */
 	pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
-#else
-	pushl $SYSENTER_RETURN
-#endif
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET eip, 0
 
==================================================================---
a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -22,6 +22,7 @@
 #include <asm/msr.h>
 #include <asm/pgtable.h>
 #include <asm/unistd.h>
+#include <asm/elf.h>
 
 /*
  * Should the kernel map a VDSO page into processes and pass its
@@ -41,6 +42,54 @@ __setup("vdso=", vdso_setup);
 __setup("vdso=", vdso_setup);
 
 extern asmlinkage void sysenter_entry(void);
+
+#ifdef CONFIG_COMPAT_VDSO
+static __cpuinit void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
+{
+	Elf32_Dyn *dyn = (void *)ehdr + offset;
+
+	for(; dyn->d_tag != DT_NULL; dyn++)
+		switch(dyn->d_tag) {
+		case DT_PLTGOT:
+		case DT_HASH:
+		case DT_STRTAB:
+		case DT_SYMTAB:
+		case DT_RELA:
+		case DT_INIT:
+		case DT_FINI:
+		case DT_REL:
+		case DT_JMPREL:
+		case DT_VERSYM:
+		case DT_VERDEF:
+		case DT_VERNEED:
+			dyn->d_un.d_val += VDSO_HIGH_BASE;
+		}
+}
+
+static __cpuinit void relocate_vdso(Elf32_Ehdr *ehdr)
+{
+	Elf32_Phdr *phdr;
+	int i;
+
+	BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
+	       !elf_check_arch(ehdr) ||
+	       ehdr->e_type != ET_DYN);
+
+	ehdr->e_entry += VDSO_HIGH_BASE;
+
+	phdr = (void *)ehdr + ehdr->e_phoff;
+	for (i = 0; i < ehdr->e_phnum; i++) {
+		phdr[i].p_vaddr += VDSO_HIGH_BASE;
+
+		if (phdr[i].p_type == PT_DYNAMIC)
+			reloc_dyn(ehdr, phdr[i].p_offset);
+	}
+}
+#else
+static inline void relocate_vdso(Elf32_Ehdr *ehdr)
+{
+}
+#endif	/* COMPAT_VDSO */
 
 void enable_sep_cpu(void)
 {
@@ -71,6 +120,9 @@ int __cpuinit sysenter_setup(void)
 int __cpuinit sysenter_setup(void)
 {
 	void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+	const void *vsyscall;
+	size_t vsyscall_len;
+
 	syscall_pages[0] = virt_to_page(syscall_page);
 
 #ifdef CONFIG_COMPAT_VDSO
@@ -79,23 +131,23 @@ int __cpuinit sysenter_setup(void)
 #endif
 
 	if (!boot_cpu_has(X86_FEATURE_SEP)) {
-		memcpy(syscall_page,
-		       &vsyscall_int80_start,
-		       &vsyscall_int80_end - &vsyscall_int80_start);
-		return 0;
-	}
-
-	memcpy(syscall_page,
-	       &vsyscall_sysenter_start,
-	       &vsyscall_sysenter_end - &vsyscall_sysenter_start);
-
-	return 0;
-}
-
-#ifndef CONFIG_COMPAT_VDSO
+		vsyscall = &vsyscall_int80_start;
+		vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start;
+	} else {
+		vsyscall = &vsyscall_sysenter_start;
+		vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start;
+	}
+
+	memcpy(syscall_page, vsyscall, vsyscall_len);
+	relocate_vdso(syscall_page);
+
+	return 0;
+}
+
 /* Defined in vsyscall-sysenter.S */
 extern void SYSENTER_RETURN;
 
+#ifdef __HAVE_ARCH_GATE_AREA
 /* Setup a VMA at program startup for the vsyscall page */
 int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
 {
@@ -155,4 +207,17 @@ int in_gate_area_no_task(unsigned long a
 {
 	return 0;
 }
-#endif
+#else  /* !__HAVE_ARCH_GATE_AREA */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
+{
+	/*
+	 * If not creating userspace VMA, simply set vdso to point to
+	 * fixmap page.
+	 */
+	current->mm->context.vdso = (void *)VDSO_HIGH_BASE;
+	current_thread_info()->sysenter_return +		(void
*)VDSO_SYM(&SYSENTER_RETURN);
+
+	return 0;
+}
+#endif	/* __HAVE_ARCH_GATE_AREA */
==================================================================---
a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -144,10 +144,8 @@ void set_pmd_pfn(unsigned long vaddr, un
 }
 
 static int fixmaps;
-#ifndef CONFIG_COMPAT_VDSO
 unsigned long __FIXADDR_TOP = 0xfffff000;
 EXPORT_SYMBOL(__FIXADDR_TOP);
-#endif
 
 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t
flags)
 {
@@ -173,12 +171,8 @@ void reserve_top_address(unsigned long r
 	BUG_ON(fixmaps > 0);
 	printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
 	       (int)-reserve);
-#ifdef CONFIG_COMPAT_VDSO
-	BUG_ON(reserve != 0);
-#else
 	__FIXADDR_TOP = -reserve - PAGE_SIZE;
 	__VMALLOC_RESERVE += reserve;
-#endif
 }
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
==================================================================---
a/include/asm-i386/elf.h
+++ b/include/asm-i386/elf.h
@@ -133,39 +133,31 @@ extern int dump_task_extended_fpu (struc
 #define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk,
elf_xfpregs)
 
 #define VDSO_HIGH_BASE		(__fix_to_virt(FIX_VDSO))
-#define VDSO_BASE		((unsigned long)current->mm->context.vdso)
-
-#ifdef CONFIG_COMPAT_VDSO
-# define VDSO_COMPAT_BASE	VDSO_HIGH_BASE
-# define VDSO_PRELINK		VDSO_HIGH_BASE
-#else
-# define VDSO_COMPAT_BASE	VDSO_BASE
-# define VDSO_PRELINK		0
-#endif
+#define VDSO_CURRENT_BASE	((unsigned long)current->mm->context.vdso)
+#define VDSO_PRELINK		0
 
 #define VDSO_SYM(x) \
-		(VDSO_COMPAT_BASE + (unsigned long)(x) - VDSO_PRELINK)
+		(VDSO_CURRENT_BASE + (unsigned long)(x) - VDSO_PRELINK)
 
 #define VDSO_HIGH_EHDR		((const struct elfhdr *) VDSO_HIGH_BASE)
-#define VDSO_EHDR		((const struct elfhdr *) VDSO_COMPAT_BASE)
+#define VDSO_EHDR		((const struct elfhdr *) VDSO_CURRENT_BASE)
 
 extern void __kernel_vsyscall;
 
 #define VDSO_ENTRY		VDSO_SYM(&__kernel_vsyscall)
 
-#ifndef CONFIG_COMPAT_VDSO
+struct linux_binprm;
+
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES
-struct linux_binprm;
 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
                                        int executable_stack);
-#endif
 
 extern unsigned int vdso_enabled;
 
-#define ARCH_DLINFO						\
-do if (vdso_enabled) {						\
-		NEW_AUX_ENT(AT_SYSINFO,	VDSO_ENTRY);		\
-		NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_COMPAT_BASE);	\
+#define ARCH_DLINFO							\
+do if (vdso_enabled) {							\
+		NEW_AUX_ENT(AT_SYSINFO,	VDSO_ENTRY);			\
+		NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE);	\
 } while (0)
 
 #endif
==================================================================---
a/include/asm-i386/fixmap.h
+++ b/include/asm-i386/fixmap.h
@@ -19,13 +19,9 @@
  * Leave one empty page between vmalloc'ed areas and
  * the start of the fixmap.
  */
-#ifndef CONFIG_COMPAT_VDSO
 extern unsigned long __FIXADDR_TOP;
-#else
-#define __FIXADDR_TOP  0xfffff000
-#define FIXADDR_USER_START	__fix_to_virt(FIX_VDSO)
-#define FIXADDR_USER_END	__fix_to_virt(FIX_VDSO - 1)
-#endif
+#define FIXADDR_USER_START     __fix_to_virt(FIX_VDSO)
+#define FIXADDR_USER_END       __fix_to_virt(FIX_VDSO - 1)
 
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
==================================================================---
a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -83,6 +83,9 @@ typedef __s64	Elf64_Sxword;
 #define DT_DEBUG	21
 #define DT_TEXTREL	22
 #define DT_JMPREL	23
+#define DT_VERSYM	0x6ffffff0
+#define DT_VERDEF	0x6ffffffc
+#define DT_VERNEED	0x6ffffffe
 #define DT_LOPROC	0x70000000
 #define DT_HIPROC	0x7fffffff
 
--
Hi Andi, Here's a couple of patches to fix up COMPAT_VDSO: The first is a straightforward implementation of Jan's original idea of relocating the VDSO to match its mapped location. Unlike Jan and Zach's version, I changed it to relocate based on the phdrs rather than the sections; the result is pleasantly compact. The second patch takes advantage of the fact that all the COMPAT_VDSO work happens at runtime now, and allows compat mode to be enabled dynamically. If you specify vdso=2 on the kernel command line, it comes up in compat mode; vdso=1 is normal vdso mode, and vdso=0 disables vdso altogether. You can also switch modes with sysctl. Thanks, J --
Seemingly Similar Threads
- [patch 0/2] Updates to compat VDSOs
- [patch 0/4] Clean up asm/bugs.h, identify_cpu() and update COMPAT_VDSO
- [patch 0/4] Clean up asm/bugs.h, identify_cpu() and update COMPAT_VDSO
- [RFC, PATCH] Fixup COMPAT_VDSO to work with CONFIG_PARAVIRT
- [RFC, PATCH] Fixup COMPAT_VDSO to work with CONFIG_PARAVIRT