Jeremy Fitzhardinge
2007-Apr-18 13:02 UTC
[patch 4/4] Make COMPAT_VDSO runtime selectable.
Now that relocation of the VDSO for COMPAT_VDSO users is done at runtime rather than compile time, it is possible to enable/disable compat mode at runtime. This patch allows you to enable COMPAT_VDSO mode with "vdso=2" on the kernel command line, or via sysctl. (Switching on a running system shouldn't be done lightly; any process which was relying on the compat VDSO will be upset if it goes away.) The COMPAT_VDSO config option still exists, but if enabled it just makes vdso_enabled default to VDSO_COMPAT. Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Zachary Amsden <zach@vmware.com> Cc: "Jan Beulich" <JBeulich@novell.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Andi Kleen <ak@suse.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Roland McGrath <roland@redhat.com> --- Documentation/kernel-parameters.txt | 1 arch/i386/kernel/sysenter.c | 141 +++++++++++++++++++++++------------ include/asm-i386/page.h | 2 3 files changed, 94 insertions(+), 50 deletions(-) ==================================================================--- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1807,6 +1807,7 @@ and is between 256 and 4096 characters. [USBHID] The interval which mice are to be polled at. vdso= [IA-32,SH] + vdso=2: enable compat VDSO (default with COMPAT_VDSO) vdso=1: enable VDSO (default) vdso=0: disable VDSO mapping ==================================================================--- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -23,12 +23,25 @@ #include <asm/pgtable.h> #include <asm/unistd.h> #include <asm/elf.h> +#include <asm/tlbflush.h> + +enum { + VDSO_DISABLED = 0, + VDSO_ENABLED = 1, + VDSO_COMPAT = 2, +}; + +#ifdef CONFIG_COMPAT_VDSO +#define VDSO_DEFAULT VDSO_COMPAT +#else +#define VDSO_DEFAULT VDSO_ENABLED +#endif /* * Should the kernel map a VDSO page into processes and pass its * address down to glibc upon exec()? */ -unsigned int __read_mostly vdso_enabled = 1; +unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT; EXPORT_SYMBOL_GPL(vdso_enabled); @@ -43,7 +56,6 @@ __setup("vdso=", vdso_setup); extern asmlinkage void sysenter_entry(void); -#ifdef CONFIG_COMPAT_VDSO static __init void reloc_symtab(Elf32_Ehdr *ehdr, unsigned offset, unsigned size) { @@ -160,11 +172,6 @@ static __init void relocate_vdso(Elf32_E shdr[i].sh_size); } } -#else -static inline void relocate_vdso(Elf32_Ehdr *ehdr) -{ -} -#endif /* COMPAT_VDSO */ void enable_sep_cpu(void) { @@ -182,6 +189,25 @@ void enable_sep_cpu(void) wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0); wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); put_cpu(); +} + +static struct vm_area_struct gate_vma; + +static int __init gate_vma_init(void) +{ + gate_vma.vm_mm = NULL; + gate_vma.vm_start = FIXADDR_USER_START; + gate_vma.vm_end = FIXADDR_USER_END; + gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; + gate_vma.vm_page_prot = __P101; + /* + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully interpretable later + * without matching up the same kernel and hardware config to see + * what PC values meant. + */ + gate_vma.vm_flags |= VM_ALWAYSDUMP; + return 0; } /* @@ -192,6 +218,22 @@ extern const char vsyscall_sysenter_star extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; static struct page *syscall_pages[1]; +static void map_compat_vdso(int map) +{ + static int vdso_mapped; + + if (map == vdso_mapped) + return; + + vdso_mapped = map; + + __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT, + map ? PAGE_READONLY_EXEC : PAGE_NONE); + + /* flush stray tlbs */ + flush_tlb_all(); +} + int __init sysenter_setup(void) { void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); @@ -200,10 +242,9 @@ int __init sysenter_setup(void) syscall_pages[0] = virt_to_page(syscall_page); -#ifdef CONFIG_COMPAT_VDSO - __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC); + gate_vma_init(); + printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); -#endif if (!boot_cpu_has(X86_FEATURE_SEP)) { vsyscall = &vsyscall_int80_start; @@ -222,42 +263,57 @@ int __init sysenter_setup(void) /* Defined in vsyscall-sysenter.S */ extern void SYSENTER_RETURN; -#ifdef __HAVE_ARCH_GATE_AREA /* Setup a VMA at program startup for the vsyscall page */ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) { struct mm_struct *mm = current->mm; unsigned long addr; int ret; + bool compat; down_write(&mm->mmap_sem); - addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); - if (IS_ERR_VALUE(addr)) { - ret = addr; - goto up_fail; - } - - /* - * MAYWRITE to allow gdb to COW and set breakpoints - * - * Make sure the vDSO gets into every core dump. - * Dumping its contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to see - * what PC values meant. - */ - ret = install_special_mapping(mm, addr, PAGE_SIZE, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, - syscall_pages); - if (ret) - goto up_fail; + + /* Test compat mode once here, in case someone + changes it via sysctl */ + compat = (vdso_enabled == VDSO_COMPAT); + + map_compat_vdso(compat); + + if (compat) + addr = VDSO_HIGH_BASE; + else { + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + + /* + * MAYWRITE to allow gdb to COW and set breakpoints + * + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully + * interpretable later without matching up the same + * kernel and hardware config to see what PC values + * meant. + */ + ret = install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + syscall_pages); + + if (ret) + goto up_fail; + } current->mm->context.vdso = (void *)addr; current_thread_info()->sysenter_return - (void *)VDSO_SYM(&SYSENTER_RETURN); -up_fail: + (void *)VDSO_SYM(&SYSENTER_RETURN); + + up_fail: up_write(&mm->mmap_sem); + return ret; } @@ -270,6 +326,9 @@ const char *arch_vma_name(struct vm_area struct vm_area_struct *get_gate_vma(struct task_struct *tsk) { + /* Check to see if this task was created in compat vdso mode */ + if (tsk->mm->context.vdso == (void *)VDSO_HIGH_BASE) + return &gate_vma; return NULL; } @@ -282,17 +341,3 @@ int in_gate_area_no_task(unsigned long a { return 0; } -#else /* !__HAVE_ARCH_GATE_AREA */ -int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) -{ - /* - * If not creating userspace VMA, simply set vdso to point to - * fixmap page. - */ - current->mm->context.vdso = (void *)VDSO_HIGH_BASE; - current_thread_info()->sysenter_return - (void *)VDSO_SYM(&SYSENTER_RETURN); - - return 0; -} -#endif /* __HAVE_ARCH_GATE_AREA */ ==================================================================--- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -143,9 +143,7 @@ extern int page_is_ram(unsigned long pag #include <asm-generic/memory_model.h> #include <asm-generic/page.h> -#ifndef CONFIG_COMPAT_VDSO #define __HAVE_ARCH_GATE_AREA 1 -#endif #endif /* __KERNEL__ */ #endif /* _I386_PAGE_H */ --
Most of asm-i386/bugs.h is code which should be in a C file, so put it there. Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> --- arch/i386/kernel/cpu/Makefile | 2 arch/i386/kernel/cpu/bugs.c | 191 +++++++++++++++++++++++++++++++++++++++ include/asm-i386/alternative.h | 1 include/asm-i386/bugs.h | 194 ---------------------------------------- 4 files changed, 197 insertions(+), 191 deletions(-) ==================================================================--- a/arch/i386/kernel/cpu/Makefile +++ b/arch/i386/kernel/cpu/Makefile @@ -2,7 +2,7 @@ # Makefile for x86-compatible CPU details and quirks # -obj-y := common.o proc.o +obj-y := common.o proc.o bugs.o obj-y += amd.o obj-y += cyrix.o ==================================================================--- /dev/null +++ b/arch/i386/kernel/cpu/bugs.c @@ -0,0 +1,191 @@ +/* + * arch/i386/cpu/bugs.c + * + * Copyright (C) 1994 Linus Torvalds + * + * Cyrix stuff, June 1998 by: + * - Rafael R. Reilova (moved everything from head.S), + * <rreilova@ececs.uc.edu> + * - Channing Corn (tests & fixes), + * - Andrew D. Balsa (code cleanup). + */ +#include <linux/init.h> +#include <linux/utsname.h> +#include <asm/processor.h> +#include <asm/i387.h> +#include <asm/msr.h> +#include <asm/paravirt.h> +#include <asm/alternative.h> + +static int __init no_halt(char *s) +{ + boot_cpu_data.hlt_works_ok = 0; + return 1; +} + +__setup("no-hlt", no_halt); + +static int __init mca_pentium(char *s) +{ + mca_pentium_flag = 1; + return 1; +} + +__setup("mca-pentium", mca_pentium); + +static int __init no_387(char *s) +{ + boot_cpu_data.hard_math = 0; + write_cr0(0xE | read_cr0()); + return 1; +} + +__setup("no387", no_387); + +static double __initdata x = 4195835.0; +static double __initdata y = 3145727.0; + +/* + * This used to check for exceptions.. + * However, it turns out that to support that, + * the XMM trap handlers basically had to + * be buggy. So let's have a correct XMM trap + * handler, and forget about printing out + * some status at boot. + * + * We should really only care about bugs here + * anyway. Not features. + */ +static void __init check_fpu(void) +{ + if (!boot_cpu_data.hard_math) { +#ifndef CONFIG_MATH_EMULATION + printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); + printk(KERN_EMERG "Giving up.\n"); + for (;;) ; +#endif + return; + } + +/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ + /* Test for the divl bug.. */ + __asm__("fninit\n\t" + "fldl %1\n\t" + "fdivl %2\n\t" + "fmull %2\n\t" + "fldl %1\n\t" + "fsubp %%st,%%st(1)\n\t" + "fistpl %0\n\t" + "fwait\n\t" + "fninit" + : "=m" (*&boot_cpu_data.fdiv_bug) + : "m" (*&x), "m" (*&y)); + if (boot_cpu_data.fdiv_bug) + printk("Hmm, FPU with FDIV bug.\n"); +} + +static void __init check_hlt(void) +{ + if (paravirt_enabled()) + return; + + printk(KERN_INFO "Checking 'hlt' instruction... "); + if (!boot_cpu_data.hlt_works_ok) { + printk("disabled\n"); + return; + } + halt(); + halt(); + halt(); + halt(); + printk("OK.\n"); +} + +/* + * Most 386 processors have a bug where a POPAD can lock the + * machine even from user space. + */ + +static void __init check_popad(void) +{ +#ifndef CONFIG_X86_POPAD_OK + int res, inp = (int) &res; + + printk(KERN_INFO "Checking for popad bug... "); + __asm__ __volatile__( + "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " + : "=&a" (res) + : "d" (inp) + : "ecx", "edi" ); + /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ + if (res != 12345678) printk( "Buggy.\n" ); + else printk( "OK.\n" ); +#endif +} + +/* + * Check whether we are able to run this kernel safely on SMP. + * + * - In order to run on a i386, we need to be compiled for i386 + * (for due to lack of "invlpg" and working WP on a i386) + * - In order to run on anything without a TSC, we need to be + * compiled for a i486. + * - In order to support the local APIC on a buggy Pentium machine, + * we need to be compiled with CONFIG_X86_GOOD_APIC disabled, + * which happens implicitly if compiled for a Pentium or lower + * (unless an advanced selection of CPU features is used) as an + * otherwise config implies a properly working local APIC without + * the need to do extra reads from the APIC. +*/ + +static void __init check_config(void) +{ +/* + * We'd better not be a i386 if we're configured to use some + * i486+ only features! (WP works in supervisor mode and the + * new "invlpg" and "bswap" instructions) + */ +#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) + if (boot_cpu_data.x86 == 3) + panic("Kernel requires i486+ for 'invlpg' and other features"); +#endif + +/* + * If we configured ourselves for a TSC, we'd better have one! + */ +#ifdef CONFIG_X86_TSC + if (!cpu_has_tsc && !tsc_disable) + panic("Kernel compiled for Pentium+, requires TSC feature!"); +#endif + +/* + * If we were told we had a good local APIC, check for buggy Pentia, + * i.e. all B steppings and the C2 stepping of P54C when using their + * integrated APIC (see 11AP erratum in "Pentium Processor + * Specification Update"). + */ +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC) + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL + && cpu_has_apic + && boot_cpu_data.x86 == 5 + && boot_cpu_data.x86_model == 2 + && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) + panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!"); +#endif +} + + +void __init check_bugs(void) +{ + identify_boot_cpu(); +#ifndef CONFIG_SMP + printk("CPU: "); + print_cpu_info(&boot_cpu_data); +#endif + check_config(); + check_fpu(); + check_hlt(); + check_popad(); + init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); + alternative_instructions(); +} ==================================================================--- a/include/asm-i386/alternative.h +++ b/include/asm-i386/alternative.h @@ -14,6 +14,7 @@ struct alt_instr { u8 pad; }; +extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); struct module; ==================================================================--- a/include/asm-i386/bugs.h +++ b/include/asm-i386/bugs.h @@ -1,198 +1,12 @@ -/* - * include/asm-i386/bugs.h - * - * Copyright (C) 1994 Linus Torvalds - * - * Cyrix stuff, June 1998 by: - * - Rafael R. Reilova (moved everything from head.S), - * <rreilova@ececs.uc.edu> - * - Channing Corn (tests & fixes), - * - Andrew D. Balsa (code cleanup). - */ - /* * This is included by init/main.c to check for architecture-dependent bugs. * * Needs: * void check_bugs(void); */ +#ifndef _ASM_I386_BUG_H +#define _ASM_I386_BUG_H -#include <linux/init.h> -#include <asm/processor.h> -#include <asm/i387.h> -#include <asm/msr.h> -#include <asm/paravirt.h> +extern void __init check_bugs(void); -static int __init no_halt(char *s) -{ - boot_cpu_data.hlt_works_ok = 0; - return 1; -} - -__setup("no-hlt", no_halt); - -static int __init mca_pentium(char *s) -{ - mca_pentium_flag = 1; - return 1; -} - -__setup("mca-pentium", mca_pentium); - -static int __init no_387(char *s) -{ - boot_cpu_data.hard_math = 0; - write_cr0(0xE | read_cr0()); - return 1; -} - -__setup("no387", no_387); - -static double __initdata x = 4195835.0; -static double __initdata y = 3145727.0; - -/* - * This used to check for exceptions.. - * However, it turns out that to support that, - * the XMM trap handlers basically had to - * be buggy. So let's have a correct XMM trap - * handler, and forget about printing out - * some status at boot. - * - * We should really only care about bugs here - * anyway. Not features. - */ -static void __init check_fpu(void) -{ - if (!boot_cpu_data.hard_math) { -#ifndef CONFIG_MATH_EMULATION - printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); - printk(KERN_EMERG "Giving up.\n"); - for (;;) ; -#endif - return; - } - -/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ - /* Test for the divl bug.. */ - __asm__("fninit\n\t" - "fldl %1\n\t" - "fdivl %2\n\t" - "fmull %2\n\t" - "fldl %1\n\t" - "fsubp %%st,%%st(1)\n\t" - "fistpl %0\n\t" - "fwait\n\t" - "fninit" - : "=m" (*&boot_cpu_data.fdiv_bug) - : "m" (*&x), "m" (*&y)); - if (boot_cpu_data.fdiv_bug) - printk("Hmm, FPU with FDIV bug.\n"); -} - -static void __init check_hlt(void) -{ - if (paravirt_enabled()) - return; - - printk(KERN_INFO "Checking 'hlt' instruction... "); - if (!boot_cpu_data.hlt_works_ok) { - printk("disabled\n"); - return; - } - halt(); - halt(); - halt(); - halt(); - printk("OK.\n"); -} - -/* - * Most 386 processors have a bug where a POPAD can lock the - * machine even from user space. - */ - -static void __init check_popad(void) -{ -#ifndef CONFIG_X86_POPAD_OK - int res, inp = (int) &res; - - printk(KERN_INFO "Checking for popad bug... "); - __asm__ __volatile__( - "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " - : "=&a" (res) - : "d" (inp) - : "ecx", "edi" ); - /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ - if (res != 12345678) printk( "Buggy.\n" ); - else printk( "OK.\n" ); -#endif -} - -/* - * Check whether we are able to run this kernel safely on SMP. - * - * - In order to run on a i386, we need to be compiled for i386 - * (for due to lack of "invlpg" and working WP on a i386) - * - In order to run on anything without a TSC, we need to be - * compiled for a i486. - * - In order to support the local APIC on a buggy Pentium machine, - * we need to be compiled with CONFIG_X86_GOOD_APIC disabled, - * which happens implicitly if compiled for a Pentium or lower - * (unless an advanced selection of CPU features is used) as an - * otherwise config implies a properly working local APIC without - * the need to do extra reads from the APIC. -*/ - -static void __init check_config(void) -{ -/* - * We'd better not be a i386 if we're configured to use some - * i486+ only features! (WP works in supervisor mode and the - * new "invlpg" and "bswap" instructions) - */ -#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) - if (boot_cpu_data.x86 == 3) - panic("Kernel requires i486+ for 'invlpg' and other features"); -#endif - -/* - * If we configured ourselves for a TSC, we'd better have one! - */ -#ifdef CONFIG_X86_TSC - if (!cpu_has_tsc && !tsc_disable) - panic("Kernel compiled for Pentium+, requires TSC feature!"); -#endif - -/* - * If we were told we had a good local APIC, check for buggy Pentia, - * i.e. all B steppings and the C2 stepping of P54C when using their - * integrated APIC (see 11AP erratum in "Pentium Processor - * Specification Update"). - */ -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC) - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL - && cpu_has_apic - && boot_cpu_data.x86 == 5 - && boot_cpu_data.x86_model == 2 - && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) - panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!"); -#endif -} - -extern void alternative_instructions(void); - -static void __init check_bugs(void) -{ - identify_cpu(&boot_cpu_data); -#ifndef CONFIG_SMP - printk("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif - check_config(); - check_fpu(); - check_hlt(); - check_popad(); - init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); - alternative_instructions(); -} +#endif /* _ASM_I386_BUG_H */ --
Jeremy Fitzhardinge
2007-Apr-18 13:02 UTC
[patch 3/4] Relocate VDSO ELF headers to match mapped location with COMPAT_VDSO
Some versions of libc can't deal with a VDSO which doesn't have its ELF headers matching its mapped address. COMPAT_VDSO maps the VDSO at a specific system-wide fixed address. Previously this was all done at build time, on the grounds that the fixed VDSO address is always at the top of the address space. However, a hypervisor may reserve some of that address space, pushing the fixmap address down. This patch does the adjustment dynamically at runtime, depending on the runtime location of the VDSO fixmap. [ Patch has been through several hands: Jan Beulich wrote the orignal version; Zach reworked it, and Jeremy converted it to relocate phdrs as well as sections. ] Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Zachary Amsden <zach@vmware.com> Cc: "Jan Beulich" <JBeulich@novell.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Andi Kleen <ak@suse.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Roland McGrath <roland@redhat.com> --- arch/i386/kernel/entry.S | 4 - arch/i386/kernel/sysenter.c | 170 +++++++++++++++++++++++++++++++++++++++---- arch/i386/mm/pgtable.c | 6 - include/asm-i386/elf.h | 28 ++----- include/asm-i386/fixmap.h | 8 -- include/linux/elf.h | 17 ++++ 6 files changed, 184 insertions(+), 49 deletions(-) ==================================================================--- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -305,16 +305,12 @@ sysenter_past_esp: pushl $(__USER_CS) CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET cs, 0*/ -#ifndef CONFIG_COMPAT_VDSO /* * Push current_thread_info()->sysenter_return to the stack. * A tiny bit of offset fixup is necessary - 4*4 means the 4 words * pushed above; +8 corresponds to copy_thread's esp0 setting. */ pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) -#else - pushl $SYSENTER_RETURN -#endif CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eip, 0 ==================================================================--- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -22,6 +22,7 @@ #include <asm/msr.h> #include <asm/pgtable.h> #include <asm/unistd.h> +#include <asm/elf.h> /* * Should the kernel map a VDSO page into processes and pass its @@ -41,6 +42,129 @@ __setup("vdso=", vdso_setup); __setup("vdso=", vdso_setup); extern asmlinkage void sysenter_entry(void); + +#ifdef CONFIG_COMPAT_VDSO +static __init void reloc_symtab(Elf32_Ehdr *ehdr, + unsigned offset, unsigned size) +{ + Elf32_Sym *sym = (void *)ehdr + offset; + unsigned nsym = size / sizeof(*sym); + unsigned i; + + for(i = 0; i < nsym; i++, sym++) { + if (sym->st_shndx == SHN_UNDEF || + sym->st_shndx == SHN_ABS) + continue; /* skip */ + + if (sym->st_shndx > SHN_LORESERVE) { + printk(KERN_INFO "VDSO: unexpected st_shndx %x\n", + sym->st_shndx); + continue; + } + + switch(ELF_ST_TYPE(sym->st_info)) { + case STT_OBJECT: + case STT_FUNC: + case STT_SECTION: + case STT_FILE: + sym->st_value += VDSO_HIGH_BASE; + } + } +} + +static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) +{ + Elf32_Dyn *dyn = (void *)ehdr + offset; + + for(; dyn->d_tag != DT_NULL; dyn++) + switch(dyn->d_tag) { + case DT_PLTGOT: + case DT_HASH: + case DT_STRTAB: + case DT_SYMTAB: + case DT_RELA: + case DT_INIT: + case DT_FINI: + case DT_REL: + case DT_DEBUG: + case DT_JMPREL: + case DT_VERSYM: + case DT_VERDEF: + case DT_VERNEED: + case DT_ADDRRNGLO ... DT_ADDRRNGHI: + /* definitely pointers needing relocation */ + dyn->d_un.d_ptr += VDSO_HIGH_BASE; + break; + + case DT_ENCODING ... OLD_DT_LOOS-1: + case DT_LOOS ... DT_HIOS-1: + /* Tags above DT_ENCODING are pointers if + they're even */ + if (dyn->d_tag >= DT_ENCODING && + (dyn->d_tag & 1) == 0) + dyn->d_un.d_ptr += VDSO_HIGH_BASE; + break; + + case DT_VERDEFNUM: + case DT_VERNEEDNUM: + case DT_FLAGS_1: + case DT_RELACOUNT: + case DT_RELCOUNT: + case DT_VALRNGLO ... DT_VALRNGHI: + /* definitely not pointers */ + break; + + case OLD_DT_LOOS ... DT_LOOS-1: + case DT_HIOS ... DT_VALRNGLO-1: + default: + if (dyn->d_tag > DT_ENCODING) + printk(KERN_INFO "VDSO: unexpected DT_tag %x\n", + dyn->d_tag); + break; + } +} + +static __init void relocate_vdso(Elf32_Ehdr *ehdr) +{ + Elf32_Phdr *phdr; + Elf32_Shdr *shdr; + int i; + + BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || + !elf_check_arch(ehdr) || + ehdr->e_type != ET_DYN); + + ehdr->e_entry += VDSO_HIGH_BASE; + + /* rebase phdrs */ + phdr = (void *)ehdr + ehdr->e_phoff; + for (i = 0; i < ehdr->e_phnum; i++) { + phdr[i].p_vaddr += VDSO_HIGH_BASE; + + /* relocate dynamic stuff */ + if (phdr[i].p_type == PT_DYNAMIC) + reloc_dyn(ehdr, phdr[i].p_offset); + } + + /* rebase sections */ + shdr = (void *)ehdr + ehdr->e_shoff; + for(i = 0; i < ehdr->e_shnum; i++) { + if (!(shdr[i].sh_flags & SHF_ALLOC)) + continue; + + shdr[i].sh_addr += VDSO_HIGH_BASE; + + if (shdr[i].sh_type == SHT_SYMTAB || + shdr[i].sh_type == SHT_DYNSYM) + reloc_symtab(ehdr, shdr[i].sh_offset, + shdr[i].sh_size); + } +} +#else +static inline void relocate_vdso(Elf32_Ehdr *ehdr) +{ +} +#endif /* COMPAT_VDSO */ void enable_sep_cpu(void) { @@ -71,6 +195,9 @@ int __init sysenter_setup(void) int __init sysenter_setup(void) { void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); + const void *vsyscall; + size_t vsyscall_len; + syscall_pages[0] = virt_to_page(syscall_page); #ifdef CONFIG_COMPAT_VDSO @@ -79,23 +206,23 @@ int __init sysenter_setup(void) #endif if (!boot_cpu_has(X86_FEATURE_SEP)) { - memcpy(syscall_page, - &vsyscall_int80_start, - &vsyscall_int80_end - &vsyscall_int80_start); - return 0; - } - - memcpy(syscall_page, - &vsyscall_sysenter_start, - &vsyscall_sysenter_end - &vsyscall_sysenter_start); - - return 0; -} - -#ifndef CONFIG_COMPAT_VDSO + vsyscall = &vsyscall_int80_start; + vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start; + } else { + vsyscall = &vsyscall_sysenter_start; + vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start; + } + + memcpy(syscall_page, vsyscall, vsyscall_len); + relocate_vdso(syscall_page); + + return 0; +} + /* Defined in vsyscall-sysenter.S */ extern void SYSENTER_RETURN; +#ifdef __HAVE_ARCH_GATE_AREA /* Setup a VMA at program startup for the vsyscall page */ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) { @@ -155,4 +282,17 @@ int in_gate_area_no_task(unsigned long a { return 0; } -#endif +#else /* !__HAVE_ARCH_GATE_AREA */ +int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) +{ + /* + * If not creating userspace VMA, simply set vdso to point to + * fixmap page. + */ + current->mm->context.vdso = (void *)VDSO_HIGH_BASE; + current_thread_info()->sysenter_return + (void *)VDSO_SYM(&SYSENTER_RETURN); + + return 0; +} +#endif /* __HAVE_ARCH_GATE_AREA */ ==================================================================--- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -144,10 +144,8 @@ void set_pmd_pfn(unsigned long vaddr, un } static int fixmaps; -#ifndef CONFIG_COMPAT_VDSO unsigned long __FIXADDR_TOP = 0xfffff000; EXPORT_SYMBOL(__FIXADDR_TOP); -#endif void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) { @@ -173,12 +171,8 @@ void reserve_top_address(unsigned long r BUG_ON(fixmaps > 0); printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", (int)-reserve); -#ifdef CONFIG_COMPAT_VDSO - BUG_ON(reserve != 0); -#else __FIXADDR_TOP = -reserve - PAGE_SIZE; __VMALLOC_RESERVE += reserve; -#endif } pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) ==================================================================--- a/include/asm-i386/elf.h +++ b/include/asm-i386/elf.h @@ -133,39 +133,31 @@ extern int dump_task_extended_fpu (struc #define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk, elf_xfpregs) #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) -#define VDSO_BASE ((unsigned long)current->mm->context.vdso) - -#ifdef CONFIG_COMPAT_VDSO -# define VDSO_COMPAT_BASE VDSO_HIGH_BASE -# define VDSO_PRELINK VDSO_HIGH_BASE -#else -# define VDSO_COMPAT_BASE VDSO_BASE -# define VDSO_PRELINK 0 -#endif +#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) +#define VDSO_PRELINK 0 #define VDSO_SYM(x) \ - (VDSO_COMPAT_BASE + (unsigned long)(x) - VDSO_PRELINK) + (VDSO_CURRENT_BASE + (unsigned long)(x) - VDSO_PRELINK) #define VDSO_HIGH_EHDR ((const struct elfhdr *) VDSO_HIGH_BASE) -#define VDSO_EHDR ((const struct elfhdr *) VDSO_COMPAT_BASE) +#define VDSO_EHDR ((const struct elfhdr *) VDSO_CURRENT_BASE) extern void __kernel_vsyscall; #define VDSO_ENTRY VDSO_SYM(&__kernel_vsyscall) -#ifndef CONFIG_COMPAT_VDSO +struct linux_binprm; + #define ARCH_HAS_SETUP_ADDITIONAL_PAGES -struct linux_binprm; extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack); -#endif extern unsigned int vdso_enabled; -#define ARCH_DLINFO \ -do if (vdso_enabled) { \ - NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ - NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_COMPAT_BASE); \ +#define ARCH_DLINFO \ +do if (vdso_enabled) { \ + NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ } while (0) #endif ==================================================================--- a/include/asm-i386/fixmap.h +++ b/include/asm-i386/fixmap.h @@ -19,13 +19,9 @@ * Leave one empty page between vmalloc'ed areas and * the start of the fixmap. */ -#ifndef CONFIG_COMPAT_VDSO extern unsigned long __FIXADDR_TOP; -#else -#define __FIXADDR_TOP 0xfffff000 -#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) -#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) -#endif +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) #ifndef __ASSEMBLY__ #include <linux/kernel.h> ==================================================================--- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -83,6 +83,23 @@ typedef __s64 Elf64_Sxword; #define DT_DEBUG 21 #define DT_TEXTREL 22 #define DT_JMPREL 23 +#define DT_ENCODING 32 +#define OLD_DT_LOOS 0x60000000 +#define DT_LOOS 0x6000000d +#define DT_HIOS 0x6ffff000 +#define DT_VALRNGLO 0x6ffffd00 +#define DT_VALRNGHI 0x6ffffdff +#define DT_ADDRRNGLO 0x6ffffe00 +#define DT_ADDRRNGHI 0x6ffffeff +#define DT_VERSYM 0x6ffffff0 +#define DT_RELACOUNT 0x6ffffff9 +#define DT_RELCOUNT 0x6ffffffa +#define DT_FLAGS_1 0x6ffffffb +#define DT_VERDEF 0x6ffffffc +#define DT_VERDEFNUM 0x6ffffffd +#define DT_VERNEED 0x6ffffffe +#define DT_VERNEEDNUM 0x6fffffff +#define OLD_DT_HIOS 0x6fffffff #define DT_LOPROC 0x70000000 #define DT_HIPROC 0x7fffffff --
Jeremy Fitzhardinge
2007-Apr-18 13:02 UTC
[patch 0/4] Clean up asm/bugs.h, identify_cpu() and update COMPAT_VDSO
Hi Andi, Four patches: - clean up asm/bugs.h, by moving all the C code into its own C file - split identify_cpu() into boot and secondary variants, so that boot-time setup functions can be marked __init - repost of the COMPAT_VDSO patches with a bit more robustness from unknown DT_tags, and functions marked __init, since all this is boot-time only setup. Thanks, J --
identify_cpu() is used to identify both the boot CPU and secondary CPUs, but it performs some actions which only apply to the boot CPU. Those functions are therefore really __init functions, but because they're called by identify_cpu(), they must be marked __cpuinit. This patch splits identify_cpu() into identify_boot_cpu() and identify_secondary_cpu(), and calls the appropriate init functions from each. Also, identify_boot_cpu() and all the functions it dominates are marked __init. Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> --- arch/i386/kernel/cpu/common.c | 25 ++++++++++++++++--------- arch/i386/kernel/cpu/mtrr/main.c | 4 ++-- arch/i386/kernel/smpboot.c | 2 +- arch/i386/kernel/sysenter.c | 2 +- include/asm-i386/processor.h | 3 ++- 5 files changed, 22 insertions(+), 14 deletions(-) ==================================================================--- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -390,7 +390,7 @@ __setup("serialnumber", x86_serial_nr_se /* * This does the hard work of actually picking apart the CPU stuff... */ -void __cpuinit identify_cpu(struct cpuinfo_x86 *c) +static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) { int i; @@ -501,15 +501,22 @@ void __cpuinit identify_cpu(struct cpuin /* Init Machine Check Exception if available. */ mcheck_init(c); - - if (c == &boot_cpu_data) - sysenter_setup(); +} + +void __init identify_boot_cpu(void) +{ + identify_cpu(&boot_cpu_data); + sysenter_setup(); enable_sep_cpu(); - - if (c == &boot_cpu_data) - mtrr_bp_init(); - else - mtrr_ap_init(); + mtrr_bp_init(); +} + +void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) +{ + BUG_ON(c == &boot_cpu_data); + identify_cpu(c); + enable_sep_cpu(); + mtrr_ap_init(); } #ifdef CONFIG_X86_HT ==================================================================--- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -571,7 +571,7 @@ extern void cyrix_init_mtrr(void); extern void cyrix_init_mtrr(void); extern void centaur_init_mtrr(void); -static void __cpuinit init_ifs(void) +static void __init init_ifs(void) { #ifndef CONFIG_X86_64 amd_init_mtrr(); @@ -639,7 +639,7 @@ static struct sysdev_driver mtrr_sysdev_ * initialized (i.e. before smp_init()). * */ -void __cpuinit mtrr_bp_init(void) +void __init mtrr_bp_init(void) { init_ifs(); ==================================================================--- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -157,7 +157,7 @@ static void __cpuinit smp_store_cpu_info *c = boot_cpu_data; if (id!=0) - identify_cpu(c); + identify_secondary_cpu(c); /* * Mask B, Pentium, but not Pentium MMX */ ==================================================================--- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -68,7 +68,7 @@ extern const char vsyscall_sysenter_star extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; static struct page *syscall_pages[1]; -int __cpuinit sysenter_setup(void) +int __init sysenter_setup(void) { void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); syscall_pages[0] = virt_to_page(syscall_page); ==================================================================--- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -116,7 +116,8 @@ extern char ignore_fpu_irq; void __init cpu_detect(struct cpuinfo_x86 *c); -extern void identify_cpu(struct cpuinfo_x86 *); +extern void identify_boot_cpu(void); +extern void identify_secondary_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned short num_cache_leaves; --
Maybe Matching Threads
- [patch 0/4] Clean up asm/bugs.h, identify_cpu() and update COMPAT_VDSO
- [patch 0/2] Updates to compat VDSOs
- [patch 0/2] Updates to compat VDSOs
- [RFC, PATCH] Fixup COMPAT_VDSO to work with CONFIG_PARAVIRT
- [RFC, PATCH] Fixup COMPAT_VDSO to work with CONFIG_PARAVIRT