This fixes various problems that cropped up with the vdso patches. - Patch 1 fixes an information leak to userspace. - Patches 2 and 3 fix the kernel build on gold. - Patches 4 and 5 fix Xen (I hope). Konrad, could you could test these on Xen and run 'test_vsyscall test' [1]? I don't have a usable Xen setup. Also, I'd appreciate a review of patches 4 and 5 from some Xen/paravirt people. [1] https://gitorious.org/linux-test-utils/linux-clock-tests Andy Lutomirski (5): x86-64: Pad vDSO to a page boundary x86-64: Move the "user" vsyscall segment out of the data segment. x86-64: Work around gold bug 13023 x86-64/xen: Enable the vvar mapping x86-64: Add user_64bit_mode paravirt op arch/x86/include/asm/desc.h | 4 +- arch/x86/include/asm/paravirt_types.h | 6 ++++ arch/x86/include/asm/ptrace.h | 19 +++++++++++++ arch/x86/kernel/paravirt.c | 4 +++ arch/x86/kernel/step.c | 2 +- arch/x86/kernel/vmlinux.lds.S | 46 ++++++++++++++++++--------------- arch/x86/kernel/vsyscall_64.c | 6 +--- arch/x86/mm/fault.c | 2 +- arch/x86/vdso/vdso.S | 1 + arch/x86/xen/enlighten.c | 1 + arch/x86/xen/mmu.c | 4 ++- 11 files changed, 64 insertions(+), 31 deletions(-) -- 1.7.6
This avoids an information leak to userspace. Signed-off-by: Andy Lutomirski <luto at mit.edu> --- arch/x86/vdso/vdso.S | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S index 1b979c1..01f5e3b 100644 --- a/arch/x86/vdso/vdso.S +++ b/arch/x86/vdso/vdso.S @@ -9,6 +9,7 @@ __PAGE_ALIGNED_DATA vdso_start: .incbin "arch/x86/vdso/vdso.so" vdso_end: + .align PAGE_SIZE /* extra data here leaks to userspace. */ .previous -- 1.7.6
Andy Lutomirski
2011-Jul-27 03:20 UTC
[PATCH 2/5] x86-64: Move the "user" vsyscall segment out of the data segment.
The kernel's loader doesn't seem to care, but gold complains. Signed-off-by: Andy Lutomirski <luto at mit.edu> Reported-by: Arkadiusz Miskiewicz <a.miskiewicz at gmail.com> --- arch/x86/kernel/vmlinux.lds.S | 36 ++++++++++++++++++------------------ 1 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 4aa9c54..e79fb39 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -154,6 +154,24 @@ SECTIONS #ifdef CONFIG_X86_64 + . = ALIGN(PAGE_SIZE); + __vvar_page = .; + + .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) { + + /* Place all vvars at the offsets in asm/vvar.h. */ +#define EMIT_VVAR(name, offset) \ + . = offset; \ + *(.vvar_ ## name) +#define __VVAR_KERNEL_LDS +#include <asm/vvar.h> +#undef __VVAR_KERNEL_LDS +#undef EMIT_VVAR + + } :data + + . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); + #define VSYSCALL_ADDR (-10*1024*1024) #define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET) @@ -162,7 +180,6 @@ SECTIONS #define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) - . = ALIGN(4096); __vsyscall_0 = .; . = VSYSCALL_ADDR; @@ -185,23 +202,6 @@ SECTIONS #undef VVIRT_OFFSET #undef VVIRT - __vvar_page = .; - - .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) { - - /* Place all vvars at the offsets in asm/vvar.h. */ -#define EMIT_VVAR(name, offset) \ - . = offset; \ - *(.vvar_ ## name) -#define __VVAR_KERNEL_LDS -#include <asm/vvar.h> -#undef __VVAR_KERNEL_LDS -#undef EMIT_VVAR - - } :data - - . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); - #endif /* CONFIG_X86_64 */ /* Init code and data - will be freed after init */ -- 1.7.6
Gold has trouble assigning numbers to the location counter inside of an output section description. The bug was triggered by 9fd67b4ed0714ab718f1f9bd14c344af336a6df7, which consolidated all of the vsyscall sections into a single section. The workaround is IMO still nicer than the old way of doing it. This produces an apparently valid kernel image and passes my vdso tests on both GNU ld version 2.21.51.0.6-2.fc15 20110118 and GNU gold (version 2.21.51.0.6-2.fc15 20110118) 1.10 as distributed by Fedora 15. Signed-off-by: Andy Lutomirski <luto at mit.edu> Reported-by: Arkadiusz Miskiewicz <a.miskiewicz at gmail.com> --- arch/x86/kernel/vmlinux.lds.S | 16 ++++++++++------ 1 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index e79fb39..8f3a265 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -158,10 +158,12 @@ SECTIONS __vvar_page = .; .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) { + /* work around gold bug 13023 */ + __vvar_beginning_hack = .; - /* Place all vvars at the offsets in asm/vvar.h. */ -#define EMIT_VVAR(name, offset) \ - . = offset; \ + /* Place all vvars at the offsets in asm/vvar.h. */ +#define EMIT_VVAR(name, offset) \ + . = __vvar_beginning_hack + offset; \ *(.vvar_ ## name) #define __VVAR_KERNEL_LDS #include <asm/vvar.h> @@ -184,15 +186,17 @@ SECTIONS . = VSYSCALL_ADDR; .vsyscall : AT(VLOAD(.vsyscall)) { + /* work around gold bug 13023 */ + __vsyscall_beginning_hack = .; *(.vsyscall_0) - . = 1024; + . = __vsyscall_beginning_hack + 1024; *(.vsyscall_1) - . = 2048; + . = __vsyscall_beginning_hack + 2048; *(.vsyscall_2) - . = 4096; /* Pad the whole page. */ + . = __vsyscall_beginning_hack + 4096; /* Pad the whole page. */ } :user =0xcc . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE); -- 1.7.6
Xen needs special treatment for fixmaps. Signed-off-by: Andy Lutomirski <luto at mit.edu> Reported-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com> --- arch/x86/xen/mmu.c | 4 +++- 1 files changed, 3 insertions(+), 1 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f987bde..8cce339 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1916,6 +1916,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) # endif #else case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: + case VVAR_PAGE: #endif case FIX_TEXT_POKE0: case FIX_TEXT_POKE1: @@ -1956,7 +1957,8 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #ifdef CONFIG_X86_64 /* Replicate changes to map the vsyscall page into the user pagetable vsyscall mapping. */ - if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { + if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) || + idx == VVAR_PAGE) { unsigned long vaddr = __fix_to_virt(idx); set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); } -- 1.7.6
Three places in the kernel assume that the only long mode CPL 3 selector is __USER_CS. This is not true on Xen -- Xen's sysretq changes cs to the magic value 0xe033. Two of the places are corner cases, but as of "x86-64: Improve vsyscall emulation CS and RIP handling" (c9712944b2a12373cb6ff8059afcfb7e826a6c54), vsyscalls will segfault if called with Xen's extra CS selector. This causes a panic when older init builds die. It seems impossible to make Xen use __USER_CS reliably without taking a performance hit on every system call, so this fixes the tests instead with a new paravirt op. It's a little ugly because ptrace.h can't include paravirt.h. Signed-off-by: Andy Lutomirski <luto at mit.edu> Reported-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com> --- arch/x86/include/asm/desc.h | 4 ++-- arch/x86/include/asm/paravirt_types.h | 6 ++++++ arch/x86/include/asm/ptrace.h | 19 +++++++++++++++++++ arch/x86/kernel/paravirt.c | 4 ++++ arch/x86/kernel/step.c | 2 +- arch/x86/kernel/vsyscall_64.c | 6 +----- arch/x86/mm/fault.c | 2 +- arch/x86/xen/enlighten.c | 1 + 8 files changed, 35 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 7b439d9..41935fa 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -27,8 +27,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in desc->base2 = (info->base_addr & 0xff000000) >> 24; /* - * Don't allow setting of the lm bit. It is useless anyway - * because 64bit system calls require __USER_CS: + * Don't allow setting of the lm bit. It would confuse + * user_64bit_mode and would get overridden by sysret anyway. */ desc->l = 0; } diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 2c76521..8e8b9a4 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -41,6 +41,7 @@ #include <asm/desc_defs.h> #include <asm/kmap_types.h> +#include <asm/pgtable_types.h> struct page; struct thread_struct; @@ -63,6 +64,11 @@ struct paravirt_callee_save { struct pv_info { unsigned int kernel_rpl; int shared_kernel_pmd; + +#ifdef CONFIG_X86_64 + u16 extra_user_64bit_cs; /* __USER_CS if none */ +#endif + int paravirt_enabled; const char *name; }; diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 94e7618..3566454 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -131,6 +131,9 @@ struct pt_regs { #ifdef __KERNEL__ #include <linux/init.h> +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt_types.h> +#endif struct cpuinfo_x86; struct task_struct; @@ -187,6 +190,22 @@ static inline int v8086_mode(struct pt_regs *regs) #endif } +#ifdef CONFIG_X86_64 +static inline bool user_64bit_mode(struct pt_regs *regs) +{ +#ifndef CONFIG_PARAVIRT + /* + * On non-paravirt systems, this is the only long mode CPL 3 + * selector. We do not allow long mode selectors in the LDT. + */ + return regs->cs == __USER_CS; +#else + /* Headers are too twisted for this to go in paravirt.h. */ + return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs; +#endif +} +#endif + /* * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode * when it traps. The previous stack will be directly underneath the saved diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 613a793..d90272e 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -307,6 +307,10 @@ struct pv_info pv_info = { .paravirt_enabled = 0, .kernel_rpl = 0, .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ + +#ifdef CONFIG_X86_64 + .extra_user_64bit_cs = __USER_CS, +#endif }; struct pv_init_ops pv_init_ops = { diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 7977f0c..c346d11 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -74,7 +74,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) #ifdef CONFIG_X86_64 case 0x40 ... 0x4f: - if (regs->cs != __USER_CS) + if (!user_64bit_mode(regs)) /* 32-bit mode: register increment */ return 0; /* 64-bit mode: REX prefix */ diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index dda7dff..1725930 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -127,11 +127,7 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) local_irq_enable(); - /* - * Real 64-bit user mode code has cs == __USER_CS. Anything else - * is bogus. - */ - if (regs->cs != __USER_CS) { + if (!user_64bit_mode(regs)) { /* * If we trapped from kernel mode, we might as well OOPS now * instead of returning to some random address and OOPSing diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 4d09df0..decd51a 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -105,7 +105,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, * but for now it's good enough to assume that long * mode only uses well known segments or kernel. */ - return (!user_mode(regs)) || (regs->cs == __USER_CS); + return (!user_mode(regs) || user_64bit_mode(regs)); #endif case 0x60: /* 0x64 thru 0x67 are valid prefixes in all modes. */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 974a528..a9c710a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -950,6 +950,7 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, static const struct pv_info xen_info __initconst = { .paravirt_enabled = 1, .shared_kernel_pmd = 0, + .extra_user_64bit_cs = FLAT_USER_CS64, .name = "Xen", }; -- 1.7.6
Konrad Rzeszutek Wilk
2011-Jul-27 12:59 UTC
[PATCH 0/5] Collected vdso/vsyscall fixes for 3.1
On Tue, Jul 26, 2011 at 11:20:34PM -0400, Andy Lutomirski wrote:> This fixes various problems that cropped up with the vdso patches. > > - Patch 1 fixes an information leak to userspace. > - Patches 2 and 3 fix the kernel build on gold. > - Patches 4 and 5 fix Xen (I hope). > > Konrad, could you could test these on Xen and run 'test_vsyscall test' [1]? > I don't have a usable Xen setup.Sure.
Konrad Rzeszutek Wilk
2011-Jul-27 13:06 UTC
[PATCH 4/5] x86-64/xen: Enable the vvar mapping
On Tue, Jul 26, 2011 at 11:20:38PM -0400, Andy Lutomirski wrote:> Xen needs special treatment for fixmaps.The description needs a bit more, for example: "Xen needs to handle the newly introduced VVAR_PAGE introduced by git commit 9fd67b4ed0714ab718f1f9bd14c344af336a6df7 "x86-64: Give vvars their own page" Otherwise we die during bootup with this: <and include snippets of the boot message crash> " With that included it looks good to me.> > Signed-off-by: Andy Lutomirski <luto at mit.edu> > Reported-by: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com> > --- > arch/x86/xen/mmu.c | 4 +++- > 1 files changed, 3 insertions(+), 1 deletions(-) > > diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c > index f987bde..8cce339 100644 > --- a/arch/x86/xen/mmu.c > +++ b/arch/x86/xen/mmu.c > @@ -1916,6 +1916,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) > # endif > #else > case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: > + case VVAR_PAGE: > #endif > case FIX_TEXT_POKE0: > case FIX_TEXT_POKE1: > @@ -1956,7 +1957,8 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) > #ifdef CONFIG_X86_64 > /* Replicate changes to map the vsyscall page into the user > pagetable vsyscall mapping. */ > - if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { > + if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) || > + idx == VVAR_PAGE) { > unsigned long vaddr = __fix_to_virt(idx); > set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); > } > -- > 1.7.6
On Wed, Jul 27, 2011 at 9:06 AM, Konrad Rzeszutek Wilk <konrad.wilk at oracle.com> wrote:> On Tue, Jul 26, 2011 at 11:20:38PM -0400, Andy Lutomirski wrote: >> Xen needs special treatment for fixmaps. > > The description needs a bit more, for example: > > "Xen needs to handle the newly introduced VVAR_PAGE introduced by > git commit 9fd67b4ed0714ab718f1f9bd14c344af336a6df7 > "x86-64: Give vvars their own page"Will do. --Andy
Konrad Rzeszutek Wilk
2011-Jul-27 14:57 UTC
[PATCH 0/5] Collected vdso/vsyscall fixes for 3.1
On Tue, Jul 26, 2011 at 11:20:34PM -0400, Andy Lutomirski wrote:> This fixes various problems that cropped up with the vdso patches. > > - Patch 1 fixes an information leak to userspace. > - Patches 2 and 3 fix the kernel build on gold. > - Patches 4 and 5 fix Xen (I hope). > > Konrad, could you could test these on Xen and run 'test_vsyscall test' [1]?They boot 64-bit guest succesfully. But I doesn't compile under 32-bit: home/konrad/ssd/linux/arch/x86/xen/enlighten.c:953: error: unknown field ?extra_user_64bit_cs? specified in initializer /home/konrad/ssd/linux/arch/x86/xen/enlighten.c:953: error: ?FLAT_USER_CS64? undeclared here (not in a function) Looks like it needs some #ifdef CONFIG_X86_64 magic.. and after applying that magic dust it compiles and it also boots as 32-bit (no surprise there).> I don't have a usable Xen setup.It is pretty easy to setup. Google for PVops Wiki and you will find wealth of information. FYI: I am gone next week so won't be able to test these patches.> > Also, I'd appreciate a review of patches 4 and 5 from some Xen/paravirt > people. > > [1] https://gitorious.org/linux-test-utils/linux-clock-testsGrrrrr.. g++ -o test_vsyscall -std=gnu++0x -lrt -ldl -O2 -Wall -mavx -g test_vsyscall.cc test_vsyscall.cc: In function ?int bench(int, char**)?: test_vsyscall.cc:205: error: expected primary-expression before ?[? token test_vsyscall.cc:205: error: expected primary-expression before ?]? token test_vsyscall.cc:206: error: expected primary-expression before ?[? token test_vsyscall.cc:206: error: expected primary-expression before ?]? token test_vsyscall.cc:207: error: expected primary-expression before ?[? token test_vsyscall.cc:207: error: expected primary-expression before ?]? token test_vsyscall.cc:211: error: expected primary-expression before ?[? token test_vsyscall.cc:211: error: expected primary-expression before ?]? token test_vsyscall.cc:213: error: expected primary-expression before ?[? token test_vsyscall.cc:213: error: expected primary-expression before ?]? token test_vsyscall.cc:214: error: expected primary-expression before ?[? token test_vsyscall.cc:214: error: expected primary-expression before ?]? token test_vsyscall.cc:218: error: expected primary-expression before ?[? token test_vsyscall.cc:218: error: expected primary-expression before ?]? token test_vsyscall.cc:219: error: expected primary-expression before ?[? token test_vsyscall.cc:219: error: expected primary-expression before ?]? token test_vsyscall.cc:222: error: expected primary-expression before ?[? token test_vsyscall.cc:222: error: expected primary-expression before ?]? token test_vsyscall.cc:203: warning: unused variable ?tv? test_vsyscall.cc:204: warning: unused variable ?tz? test_vsyscall.cc:210: warning: unused variable ?t? test_vsyscall.cc:217: warning: unused variable ?cpu? test_vsyscall.cc:217: warning: unused variable ?node? Is there a specific version of GCC I should be using? I seem to be using: g++ (GCC) 4.4.4 20100503 (Red Hat 4.4.4-2) Anyhow, removed the benchmark code and ran it on 64-bit: sh-4.1# /test_vsyscall test Testing gettimeofday... [ 109.552261] test_vsyscall[2462] trap invalid opcode ip:400c8d sp:7fff84fab470 error:0 in test_vsyscall[400000+2000] Illegal instruction sh-4.1# /test_vsyscall intcc About to execute int 0xcc from RIP = 400959 [ 114.137150] test_vsyscall[2463] illegal int 0xcc (exploit attempt?) ip:400959 cs:e033 sp:7fff8b328310 ax:2c si:0 di:7fff8b3280f0 Caught SIGSEGV: Segmentation fault (Signal sent by the kernel [(nil)])RIP = 400959 [This is on git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git #testing, which has todays linus/master and your patchset]
Possibly Parallel Threads
- [PATCH 0/5] Collected vdso/vsyscall fixes for 3.1
- [PATCH 0/5] Collected vdso/vsyscall fixes for 3.1
- [PATCH v2 0/6] Collected vdso/vsyscall fixes for 3.1
- [PATCH v2 0/6] Collected vdso/vsyscall fixes for 3.1
- [PATCH v2 0/6] Collected vdso/vsyscall fixes for 3.1