Jeremy Fitzhardinge
2007-Apr-18 17:50 UTC
[patch 2/2] Make COMPAT_VDSO runtime selectable.
Now that relocation of the VDSO for COMPAT_VDSO users is done at runtime rather than compile time, it is possible to enable/disable compat mode at runtime. This patch allows you to enable COMPAT_VDSO mode with "vdso=2" on the kernel command line, or via sysctl. (Switching on a running system shouldn't be done lightly; any process which was relying on the compat VDSO will be upset if it goes away.) The COMPAT_VDSO config option still exists, but if enabled it just makes vdso_enabled default to VDSO_COMPAT. Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Zachary Amsden <zach@vmware.com> Cc: "Jan Beulich" <JBeulich@novell.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Andi Kleen <ak@suse.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Roland McGrath <roland@redhat.com> --- Documentation/kernel-parameters.txt | 1 arch/i386/kernel/sysenter.c | 141 +++++++++++++++++++++++------------ include/asm-i386/page.h | 2 3 files changed, 94 insertions(+), 50 deletions(-) ==================================================================--- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1807,6 +1807,7 @@ and is between 256 and 4096 characters. [USBHID] The interval which mice are to be polled at. vdso= [IA-32,SH] + vdso=2: enable compat VDSO (default with COMPAT_VDSO) vdso=1: enable VDSO (default) vdso=0: disable VDSO mapping ==================================================================--- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -23,12 +23,25 @@ #include <asm/pgtable.h> #include <asm/unistd.h> #include <asm/elf.h> +#include <asm/tlbflush.h> + +enum { + VDSO_DISABLED = 0, + VDSO_ENABLED = 1, + VDSO_COMPAT = 2, +}; + +#ifdef CONFIG_COMPAT_VDSO +#define VDSO_DEFAULT VDSO_COMPAT +#else +#define VDSO_DEFAULT VDSO_ENABLED +#endif /* * Should the kernel map a VDSO page into processes and pass its * address down to glibc upon exec()? */ -unsigned int __read_mostly vdso_enabled = 1; +unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT; EXPORT_SYMBOL_GPL(vdso_enabled); @@ -43,7 +56,6 @@ __setup("vdso=", vdso_setup); extern asmlinkage void sysenter_entry(void); -#ifdef CONFIG_COMPAT_VDSO static __cpuinit void reloc_symtab(Elf32_Ehdr *ehdr, unsigned offset, unsigned size) { @@ -146,11 +158,6 @@ static __cpuinit void relocate_vdso(Elf3 shdr[i].sh_size); } } -#else -static inline void relocate_vdso(Elf32_Ehdr *ehdr) -{ -} -#endif /* COMPAT_VDSO */ void enable_sep_cpu(void) { @@ -168,6 +175,25 @@ void enable_sep_cpu(void) wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0); wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); put_cpu(); +} + +static struct vm_area_struct gate_vma; + +static int __cpuinit gate_vma_init(void) +{ + gate_vma.vm_mm = NULL; + gate_vma.vm_start = FIXADDR_USER_START; + gate_vma.vm_end = FIXADDR_USER_END; + gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; + gate_vma.vm_page_prot = __P101; + /* + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully interpretable later + * without matching up the same kernel and hardware config to see + * what PC values meant. + */ + gate_vma.vm_flags |= VM_ALWAYSDUMP; + return 0; } /* @@ -178,6 +204,22 @@ extern const char vsyscall_sysenter_star extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; static struct page *syscall_pages[1]; +static void map_compat_vdso(int map) +{ + static int vdso_mapped; + + if (map == vdso_mapped) + return; + + vdso_mapped = map; + + __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT, + map ? PAGE_READONLY_EXEC : PAGE_NONE); + + /* flush stray tlbs */ + flush_tlb_all(); +} + int __cpuinit sysenter_setup(void) { void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); @@ -186,10 +228,9 @@ int __cpuinit sysenter_setup(void) syscall_pages[0] = virt_to_page(syscall_page); -#ifdef CONFIG_COMPAT_VDSO - __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC); + gate_vma_init(); + printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); -#endif if (!boot_cpu_has(X86_FEATURE_SEP)) { vsyscall = &vsyscall_int80_start; @@ -208,42 +249,57 @@ int __cpuinit sysenter_setup(void) /* Defined in vsyscall-sysenter.S */ extern void SYSENTER_RETURN; -#ifdef __HAVE_ARCH_GATE_AREA /* Setup a VMA at program startup for the vsyscall page */ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) { struct mm_struct *mm = current->mm; unsigned long addr; int ret; + bool compat; down_write(&mm->mmap_sem); - addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); - if (IS_ERR_VALUE(addr)) { - ret = addr; - goto up_fail; - } - - /* - * MAYWRITE to allow gdb to COW and set breakpoints - * - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. - */ - ret = install_special_mapping(mm, addr, PAGE_SIZE, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, - syscall_pages); - if (ret) - goto up_fail; + + /* Test compat mode once here, in case someone + changes it via sysctl */ + compat = (vdso_enabled == VDSO_COMPAT); + + map_compat_vdso(compat); + + if (compat) + addr = VDSO_HIGH_BASE; + else { + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + + /* + * MAYWRITE to allow gdb to COW and set breakpoints + * + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully + * interpretable later without matching up the same + * kernel and hardware config to see what PC values + * meant. + */ + ret = install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + syscall_pages); + + if (ret) + goto up_fail; + } current->mm->context.vdso = (void *)addr; current_thread_info()->sysenter_return - (void *)VDSO_SYM(&SYSENTER_RETURN); -up_fail: + (void *)VDSO_SYM(&SYSENTER_RETURN); + + up_fail: up_write(&mm->mmap_sem); + return ret; } @@ -256,6 +312,9 @@ const char *arch_vma_name(struct vm_area struct vm_area_struct *get_gate_vma(struct task_struct *tsk) { + /* Check to see if this task was created in compat vdso mode */ + if (tsk->mm->context.vdso == (void *)VDSO_HIGH_BASE) + return &gate_vma; return NULL; } @@ -268,17 +327,3 @@ int in_gate_area_no_task(unsigned long a { return 0; } -#else /* !__HAVE_ARCH_GATE_AREA */ -int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) -{ - /* - * If not creating userspace VMA, simply set vdso to point to - * fixmap page. - */ - current->mm->context.vdso = (void *)VDSO_HIGH_BASE; - current_thread_info()->sysenter_return - (void *)VDSO_SYM(&SYSENTER_RETURN); - - return 0; -} -#endif /* __HAVE_ARCH_GATE_AREA */ ==================================================================--- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -143,9 +143,7 @@ extern int page_is_ram(unsigned long pag #include <asm-generic/memory_model.h> #include <asm-generic/page.h> -#ifndef CONFIG_COMPAT_VDSO #define __HAVE_ARCH_GATE_AREA 1 -#endif #endif /* __KERNEL__ */ #endif /* _I386_PAGE_H */ --
Hi Andi, Here's a couple of patches to fix up COMPAT_VDSO: The first is a straightforward implementation of Jan's original idea of relocating the VDSO to match its mapped location. Unlike Jan and Zach's version, I changed it to relocate based on the phdrs rather than the sections; the result is pleasantly compact. The second patch takes advantage of the fact that all the COMPAT_VDSO work happens at runtime now, and allows compat mode to be enabled dynamically. If you specify vdso=2 on the kernel command line, it comes up in compat mode; vdso=1 is normal vdso mode, and vdso=0 disables vdso altogether. You can also switch modes with sysctl. Changes since last posting: - rebase sections as well as phdrs - relocate symbols - more robust DT_tag handling - handle dynamic compat mode switches a bit better Thanks to Jan and Roland for the review. Thanks, J --
Jeremy Fitzhardinge
2007-Apr-18 17:50 UTC
[patch 2/2] Make COMPAT_VDSO runtime selectable.
Now that relocation of the VDSO for COMPAT_VDSO users is done at runtime rather than compile time, it is possible to enable/disable compat mode at runtime. This patch allows you to enable COMPAT_VDSO mode with "vdso=2" on the kernel command line, or via sysctl. The COMPAT_VDSO config option still exists, but if enabled it just makes vdso_enabled default to VDSO_COMPAT. Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Zachary Amsden <zach@vmware.com> Cc: "Jan Beulich" <JBeulich@novell.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Andi Kleen <ak@suse.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Roland McGrath <roland@redhat.com> --- Documentation/kernel-parameters.txt | 1 arch/i386/kernel/sysenter.c | 131 ++++++++++++++++++++++------------- include/asm-i386/page.h | 2 3 files changed, 84 insertions(+), 50 deletions(-) ==================================================================--- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1807,6 +1807,7 @@ and is between 256 and 4096 characters. [USBHID] The interval which mice are to be polled at. vdso= [IA-32,SH] + vdso=2: enable compat VDSO (default with COMPAT_VDSO) vdso=1: enable VDSO (default) vdso=0: disable VDSO mapping ==================================================================--- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -24,11 +24,23 @@ #include <asm/unistd.h> #include <asm/elf.h> +enum { + VDSO_DISABLED = 0, + VDSO_ENABLED = 1, + VDSO_COMPAT = 2, +}; + +#ifdef CONFIG_COMPAT_VDSO +#define VDSO_DEFAULT VDSO_COMPAT +#else +#define VDSO_DEFAULT VDSO_ENABLED +#endif + /* * Should the kernel map a VDSO page into processes and pass its * address down to glibc upon exec()? */ -unsigned int __read_mostly vdso_enabled = 1; +unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT; EXPORT_SYMBOL_GPL(vdso_enabled); @@ -43,7 +55,6 @@ __setup("vdso=", vdso_setup); extern asmlinkage void sysenter_entry(void); -#ifdef CONFIG_COMPAT_VDSO static __cpuinit void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) { Elf32_Dyn *dyn = (void *)ehdr + offset; @@ -85,11 +96,6 @@ static __cpuinit void relocate_vdso(Elf3 reloc_dyn(ehdr, phdr[i].p_offset); } } -#else -static inline void relocate_vdso(Elf32_Ehdr *ehdr) -{ -} -#endif /* COMPAT_VDSO */ void enable_sep_cpu(void) { @@ -109,6 +115,25 @@ void enable_sep_cpu(void) put_cpu(); } +static struct vm_area_struct gate_vma; + +static int __cpuinit gate_vma_init(void) +{ + gate_vma.vm_mm = NULL; + gate_vma.vm_start = FIXADDR_USER_START; + gate_vma.vm_end = FIXADDR_USER_END; + gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; + gate_vma.vm_page_prot = __P101; + /* + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully interpretable later + * without matching up the same kernel and hardware config to see + * what PC values meant. + */ + gate_vma.vm_flags |= VM_ALWAYSDUMP; + return 0; +} + /* * These symbols are defined by vsyscall.o to mark the bounds * of the ELF DSO images included therein. @@ -117,6 +142,19 @@ extern const char vsyscall_sysenter_star extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; static struct page *syscall_pages[1]; +static void map_compat_vdso(int map) +{ + static int vdso_mapped; + + if (map == vdso_mapped) + return; + + vdso_mapped = map; + + __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT, + map ? PAGE_READONLY_EXEC : PAGE_NONE); +} + int __cpuinit sysenter_setup(void) { void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); @@ -125,10 +163,9 @@ int __cpuinit sysenter_setup(void) syscall_pages[0] = virt_to_page(syscall_page); -#ifdef CONFIG_COMPAT_VDSO - __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC); + gate_vma_init(); + printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); -#endif if (!boot_cpu_has(X86_FEATURE_SEP)) { vsyscall = &vsyscall_int80_start; @@ -147,7 +184,6 @@ int __cpuinit sysenter_setup(void) /* Defined in vsyscall-sysenter.S */ extern void SYSENTER_RETURN; -#ifdef __HAVE_ARCH_GATE_AREA /* Setup a VMA at program startup for the vsyscall page */ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) { @@ -156,33 +192,44 @@ int arch_setup_additional_pages(struct l int ret; down_write(&mm->mmap_sem); - addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); - if (IS_ERR_VALUE(addr)) { - ret = addr; - goto up_fail; - } - - /* - * MAYWRITE to allow gdb to COW and set breakpoints - * - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. - */ - ret = install_special_mapping(mm, addr, PAGE_SIZE, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, - syscall_pages); - if (ret) - goto up_fail; + + map_compat_vdso(vdso_enabled == VDSO_COMPAT); + + if (vdso_enabled == VDSO_COMPAT) + addr = VDSO_HIGH_BASE; + else { + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + + /* + * MAYWRITE to allow gdb to COW and set breakpoints + * + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully + * interpretable later without matching up the same + * kernel and hardware config to see what PC values + * meant. + */ + ret = install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + syscall_pages); + + if (ret) + goto up_fail; + } current->mm->context.vdso = (void *)addr; current_thread_info()->sysenter_return - (void *)VDSO_SYM(&SYSENTER_RETURN); -up_fail: + (void *)VDSO_SYM(&SYSENTER_RETURN); + + up_fail: up_write(&mm->mmap_sem); + return ret; } @@ -195,6 +242,8 @@ const char *arch_vma_name(struct vm_area struct vm_area_struct *get_gate_vma(struct task_struct *tsk) { + if (vdso_enabled == VDSO_COMPAT) + return &gate_vma; return NULL; } @@ -207,17 +256,3 @@ int in_gate_area_no_task(unsigned long a { return 0; } -#else /* !__HAVE_ARCH_GATE_AREA */ -int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) -{ - /* - * If not creating userspace VMA, simply set vdso to point to - * fixmap page. - */ - current->mm->context.vdso = (void *)VDSO_HIGH_BASE; - current_thread_info()->sysenter_return - (void *)VDSO_SYM(&SYSENTER_RETURN); - - return 0; -} -#endif /* __HAVE_ARCH_GATE_AREA */ ==================================================================--- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -143,9 +143,7 @@ extern int page_is_ram(unsigned long pag #include <asm-generic/memory_model.h> #include <asm-generic/page.h> -#ifndef CONFIG_COMPAT_VDSO #define __HAVE_ARCH_GATE_AREA 1 -#endif #endif /* __KERNEL__ */ #endif /* _I386_PAGE_H */ --
Jeremy Fitzhardinge
2007-Apr-18 17:50 UTC
[patch 1/2] Relocate VDSO ELF headers to match mapped location with COMPAT_VDSO
Some versions of libc can't deal with a VDSO which doesn't have its ELF headers matching its mapped address. COMPAT_VDSO maps the VDSO at a specific system-wide fixed address. Previously this was all done at build time, on the grounds that the fixed VDSO address is always at the top of the address space. However, a hypervisor may reserve some of that address space, pushing the fixmap address down. This patch does the adjustment dynamically at runtime, depending on the runtime location of the VDSO fixmap. [ Patch has been through several hands: Jan Beulich wrote the orignal version; Zach reworked it, and Jeremy converted it to relocate phdrs rather than sections. ] Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Zachary Amsden <zach@vmware.com> Cc: "Jan Beulich" <JBeulich@novell.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Andi Kleen <ak@suse.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Roland McGrath <roland@redhat.com> --- arch/i386/kernel/entry.S | 4 - arch/i386/kernel/sysenter.c | 95 ++++++++++++++++++++++++++++++++++++------- arch/i386/mm/pgtable.c | 6 -- include/asm-i386/elf.h | 28 ++++-------- include/asm-i386/fixmap.h | 8 --- include/linux/elf.h | 3 + 6 files changed, 95 insertions(+), 49 deletions(-) ==================================================================--- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -305,16 +305,12 @@ sysenter_past_esp: pushl $(__USER_CS) CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET cs, 0*/ -#ifndef CONFIG_COMPAT_VDSO /* * Push current_thread_info()->sysenter_return to the stack. * A tiny bit of offset fixup is necessary - 4*4 means the 4 words * pushed above; +8 corresponds to copy_thread's esp0 setting. */ pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) -#else - pushl $SYSENTER_RETURN -#endif CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eip, 0 ==================================================================--- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -22,6 +22,7 @@ #include <asm/msr.h> #include <asm/pgtable.h> #include <asm/unistd.h> +#include <asm/elf.h> /* * Should the kernel map a VDSO page into processes and pass its @@ -41,6 +42,54 @@ __setup("vdso=", vdso_setup); __setup("vdso=", vdso_setup); extern asmlinkage void sysenter_entry(void); + +#ifdef CONFIG_COMPAT_VDSO +static __cpuinit void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) +{ + Elf32_Dyn *dyn = (void *)ehdr + offset; + + for(; dyn->d_tag != DT_NULL; dyn++) + switch(dyn->d_tag) { + case DT_PLTGOT: + case DT_HASH: + case DT_STRTAB: + case DT_SYMTAB: + case DT_RELA: + case DT_INIT: + case DT_FINI: + case DT_REL: + case DT_JMPREL: + case DT_VERSYM: + case DT_VERDEF: + case DT_VERNEED: + dyn->d_un.d_val += VDSO_HIGH_BASE; + } +} + +static __cpuinit void relocate_vdso(Elf32_Ehdr *ehdr) +{ + Elf32_Phdr *phdr; + int i; + + BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || + !elf_check_arch(ehdr) || + ehdr->e_type != ET_DYN); + + ehdr->e_entry += VDSO_HIGH_BASE; + + phdr = (void *)ehdr + ehdr->e_phoff; + for (i = 0; i < ehdr->e_phnum; i++) { + phdr[i].p_vaddr += VDSO_HIGH_BASE; + + if (phdr[i].p_type == PT_DYNAMIC) + reloc_dyn(ehdr, phdr[i].p_offset); + } +} +#else +static inline void relocate_vdso(Elf32_Ehdr *ehdr) +{ +} +#endif /* COMPAT_VDSO */ void enable_sep_cpu(void) { @@ -71,6 +120,9 @@ int __cpuinit sysenter_setup(void) int __cpuinit sysenter_setup(void) { void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); + const void *vsyscall; + size_t vsyscall_len; + syscall_pages[0] = virt_to_page(syscall_page); #ifdef CONFIG_COMPAT_VDSO @@ -79,23 +131,23 @@ int __cpuinit sysenter_setup(void) #endif if (!boot_cpu_has(X86_FEATURE_SEP)) { - memcpy(syscall_page, - &vsyscall_int80_start, - &vsyscall_int80_end - &vsyscall_int80_start); - return 0; - } - - memcpy(syscall_page, - &vsyscall_sysenter_start, - &vsyscall_sysenter_end - &vsyscall_sysenter_start); - - return 0; -} - -#ifndef CONFIG_COMPAT_VDSO + vsyscall = &vsyscall_int80_start; + vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start; + } else { + vsyscall = &vsyscall_sysenter_start; + vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start; + } + + memcpy(syscall_page, vsyscall, vsyscall_len); + relocate_vdso(syscall_page); + + return 0; +} + /* Defined in vsyscall-sysenter.S */ extern void SYSENTER_RETURN; +#ifdef __HAVE_ARCH_GATE_AREA /* Setup a VMA at program startup for the vsyscall page */ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) { @@ -155,4 +207,17 @@ int in_gate_area_no_task(unsigned long a { return 0; } -#endif +#else /* !__HAVE_ARCH_GATE_AREA */ +int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) +{ + /* + * If not creating userspace VMA, simply set vdso to point to + * fixmap page. + */ + current->mm->context.vdso = (void *)VDSO_HIGH_BASE; + current_thread_info()->sysenter_return + (void *)VDSO_SYM(&SYSENTER_RETURN); + + return 0; +} +#endif /* __HAVE_ARCH_GATE_AREA */ ==================================================================--- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -144,10 +144,8 @@ void set_pmd_pfn(unsigned long vaddr, un } static int fixmaps; -#ifndef CONFIG_COMPAT_VDSO unsigned long __FIXADDR_TOP = 0xfffff000; EXPORT_SYMBOL(__FIXADDR_TOP); -#endif void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) { @@ -173,12 +171,8 @@ void reserve_top_address(unsigned long r BUG_ON(fixmaps > 0); printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", (int)-reserve); -#ifdef CONFIG_COMPAT_VDSO - BUG_ON(reserve != 0); -#else __FIXADDR_TOP = -reserve - PAGE_SIZE; __VMALLOC_RESERVE += reserve; -#endif } pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) ==================================================================--- a/include/asm-i386/elf.h +++ b/include/asm-i386/elf.h @@ -133,39 +133,31 @@ extern int dump_task_extended_fpu (struc #define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs) #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) -#define VDSO_BASE ((unsigned long)current->mm->context.vdso) - -#ifdef CONFIG_COMPAT_VDSO -# define VDSO_COMPAT_BASE VDSO_HIGH_BASE -# define VDSO_PRELINK VDSO_HIGH_BASE -#else -# define VDSO_COMPAT_BASE VDSO_BASE -# define VDSO_PRELINK 0 -#endif +#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) +#define VDSO_PRELINK 0 #define VDSO_SYM(x) \ - (VDSO_COMPAT_BASE + (unsigned long)(x) - VDSO_PRELINK) + (VDSO_CURRENT_BASE + (unsigned long)(x) - VDSO_PRELINK) #define VDSO_HIGH_EHDR ((const struct elfhdr *) VDSO_HIGH_BASE) -#define VDSO_EHDR ((const struct elfhdr *) VDSO_COMPAT_BASE) +#define VDSO_EHDR ((const struct elfhdr *) VDSO_CURRENT_BASE) extern void __kernel_vsyscall; #define VDSO_ENTRY VDSO_SYM(&__kernel_vsyscall) -#ifndef CONFIG_COMPAT_VDSO +struct linux_binprm; + #define ARCH_HAS_SETUP_ADDITIONAL_PAGES -struct linux_binprm; extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack); -#endif extern unsigned int vdso_enabled; -#define ARCH_DLINFO \ -do if (vdso_enabled) { \ - NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ - NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_COMPAT_BASE); \ +#define ARCH_DLINFO \ +do if (vdso_enabled) { \ + NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ } while (0) #endif ==================================================================--- a/include/asm-i386/fixmap.h +++ b/include/asm-i386/fixmap.h @@ -19,13 +19,9 @@ * Leave one empty page between vmalloc'ed areas and * the start of the fixmap. */ -#ifndef CONFIG_COMPAT_VDSO extern unsigned long __FIXADDR_TOP; -#else -#define __FIXADDR_TOP 0xfffff000 -#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) -#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) -#endif +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) #ifndef __ASSEMBLY__ #include <linux/kernel.h> ==================================================================--- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -83,6 +83,9 @@ typedef __s64 Elf64_Sxword; #define DT_DEBUG 21 #define DT_TEXTREL 22 #define DT_JMPREL 23 +#define DT_VERSYM 0x6ffffff0 +#define DT_VERDEF 0x6ffffffc +#define DT_VERNEED 0x6ffffffe #define DT_LOPROC 0x70000000 #define DT_HIPROC 0x7fffffff --
Hi Andi, Here's a couple of patches to fix up COMPAT_VDSO: The first is a straightforward implementation of Jan's original idea of relocating the VDSO to match its mapped location. Unlike Jan and Zach's version, I changed it to relocate based on the phdrs rather than the sections; the result is pleasantly compact. The second patch takes advantage of the fact that all the COMPAT_VDSO work happens at runtime now, and allows compat mode to be enabled dynamically. If you specify vdso=2 on the kernel command line, it comes up in compat mode; vdso=1 is normal vdso mode, and vdso=0 disables vdso altogether. You can also switch modes with sysctl. Thanks, J --
Seemingly Similar Threads
- [patch 0/2] Updates to compat VDSOs
- [patch 0/4] Clean up asm/bugs.h, identify_cpu() and update COMPAT_VDSO
- [patch 0/4] Clean up asm/bugs.h, identify_cpu() and update COMPAT_VDSO
- [RFC, PATCH] Fixup COMPAT_VDSO to work with CONFIG_PARAVIRT
- [RFC, PATCH] Fixup COMPAT_VDSO to work with CONFIG_PARAVIRT