1: correct LDT checks 2: add address validity check to guest_map_l1e() 3: use {rd,wr}{fs,gs}base when available 4: check for canonical address before doing page walks Signed-off-by: Jan Beulich <jbeulich@suse.com>
- MMUEXT_SET_LDT should behave as similarly to the LLDT instruction as possible: fail only if the base address is non-canonical - instead LDT descriptor accesses should fault if the descriptor address ends up being non-canonical (by ensuring this we at once avoid reading an entry from the mach-to-phys table and consider it a page table entry) - fault propagation on using LDT selectors must distinguish #PF and #GP (the latter must be raised for a non-canonical descriptor address, which also applies to several other uses of propagate_page_fault(), and hence the problem is being fixed there) - map_ldt_shadow_page() should properly wrap addresses for 32-bit VMs At once remove the odd invokation of map_ldt_shadow_page() from the MMUEXT_SET_LDT handler: There''s nothing really telling us that the first LDT page is going to be preferred over others. Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -674,12 +674,7 @@ int arch_set_info_guest( fixup_guest_code_selector(d, c.nat->trap_ctxt[i].cs); } - /* LDT safety checks. */ - if ( ((c.nat->ldt_base & (PAGE_SIZE-1)) != 0) || - (c.nat->ldt_ents > 8192) || - !array_access_ok(c.nat->ldt_base, - c.nat->ldt_ents, - LDT_ENTRY_SIZE) ) + if ( !__addr_ok(c.nat->ldt_base) ) return -EINVAL; } else @@ -692,15 +687,12 @@ int arch_set_info_guest( for ( i = 0; i < ARRAY_SIZE(c.cmp->trap_ctxt); i++ ) fixup_guest_code_selector(d, c.cmp->trap_ctxt[i].cs); - - /* LDT safety checks. */ - if ( ((c.cmp->ldt_base & (PAGE_SIZE-1)) != 0) || - (c.cmp->ldt_ents > 8192) || - !compat_array_access_ok(c.cmp->ldt_base, - c.cmp->ldt_ents, - LDT_ENTRY_SIZE) ) - return -EINVAL; } + + /* LDT safety checks. */ + if ( ((c(ldt_base) & (PAGE_SIZE - 1)) != 0) || + (c(ldt_ents) > 8192) ) + return -EINVAL; } v->fpu_initialised = !!(flags & VGCF_I387_VALID); --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -582,6 +582,8 @@ int map_ldt_shadow_page(unsigned int off BUG_ON(unlikely(in_irq())); + if ( is_pv_32bit_domain(d) ) + gva = (u32)gva; guest_get_eff_kern_l1e(v, gva, &l1e); if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) ) return 0; @@ -3229,9 +3231,8 @@ long do_mmuext_op( MEM_LOG("ignoring SET_LDT hypercall from external domain"); okay = 0; } - else if ( ((ptr & (PAGE_SIZE-1)) != 0) || - (ents > 8192) || - !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) ) + else if ( ((ptr & (PAGE_SIZE - 1)) != 0) || !__addr_ok(ptr) || + (ents > 8192) ) { okay = 0; MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents); @@ -3244,8 +3245,6 @@ long do_mmuext_op( curr->arch.pv_vcpu.ldt_base = ptr; curr->arch.pv_vcpu.ldt_ents = ents; load_LDT(curr); - if ( ents != 0 ) - (void)map_ldt_shadow_page(0); } break; } --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -1065,12 +1065,24 @@ static void reserved_bit_page_fault( show_execution_state(regs); } -void propagate_page_fault(unsigned long addr, u16 error_code) +struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code) { struct trap_info *ti; struct vcpu *v = current; struct trap_bounce *tb = &v->arch.pv_vcpu.trap_bounce; + if ( unlikely(!is_canonical_address(addr)) ) + { + ti = &v->arch.pv_vcpu.trap_ctxt[TRAP_gp_fault]; + tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE; + tb->error_code = 0; + tb->cs = ti->cs; + tb->eip = ti->address; + if ( TI_GET_IF(ti) ) + tb->flags |= TBF_INTERRUPT; + return tb; + } + v->arch.pv_vcpu.ctrlreg[2] = addr; arch_set_cr2(v, addr); @@ -1097,6 +1109,8 @@ void propagate_page_fault(unsigned long if ( unlikely(error_code & PFEC_reserved_bit) ) reserved_bit_page_fault(addr, guest_cpu_user_regs()); + + return NULL; } static int handle_gdt_ldt_mapping_fault( @@ -1130,13 +1144,16 @@ static int handle_gdt_ldt_mapping_fault( } else { + struct trap_bounce *tb; + /* In hypervisor mode? Leave it to the #PF handler to fix up. */ if ( !guest_mode(regs) ) return 0; - /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */ - propagate_page_fault( - curr->arch.pv_vcpu.ldt_base + offset, - regs->error_code); + /* In guest mode? Propagate fault to guest, with adjusted %cr2. */ + tb = propagate_page_fault(curr->arch.pv_vcpu.ldt_base + offset, + regs->error_code); + if ( tb ) + tb->error_code = ((u16)offset & ~3) | 4; } } else --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -555,7 +555,7 @@ int new_guest_cr3(unsigned long pfn); void make_cr3(struct vcpu *v, unsigned long mfn); void update_cr3(struct vcpu *v); int vcpu_destroy_pagetables(struct vcpu *); -void propagate_page_fault(unsigned long addr, u16 error_code); +struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code); void *do_page_walk(struct vcpu *v, unsigned long addr); int __sync_local_execstate(void); --- a/xen/include/asm-x86/paging.h +++ b/xen/include/asm-x86/paging.h @@ -386,7 +386,8 @@ guest_get_eff_l1e(struct vcpu *v, unsign if ( likely(!paging_mode_translate(v->domain)) ) { ASSERT(!paging_mode_external(v->domain)); - if ( __copy_from_user(eff_l1e, + if ( !__addr_ok(addr) || + __copy_from_user(eff_l1e, &__linear_l1_table[l1_linear_offset(addr)], sizeof(l1_pgentry_t)) != 0 ) *(l1_pgentry_t *)eff_l1e = l1e_empty(); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Oct-10 13:54 UTC
[PATCH 2/4] x86: add address validity check to guest_map_l1e()
Just like for guest_get_eff_l1e() this prevents accessing as page tables (and with the wrong memory attribute) internal data inside Xen happening to be mapped with 1Gb pages. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/include/asm-x86/paging.h +++ b/xen/include/asm-x86/paging.h @@ -360,7 +360,8 @@ guest_map_l1e(struct vcpu *v, unsigned l return paging_get_hostmode(v)->guest_map_l1e(v, addr, gl1mfn); /* Find this l1e and its enclosing l1mfn in the linear map */ - if ( __copy_from_user(&l2e, + if ( !__addr_ok(addr) || + __copy_from_user(&l2e, &__linear_l2_table[l2_linear_offset(addr)], sizeof(l2_pgentry_t)) != 0 ) return NULL; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Oct-10 13:54 UTC
[PATCH 3/4] x86: use {rd, wr}{fs, gs}base when available
... as being intended to be faster than MSR reads/writes. In the case of emulate_privileged_op() also use these in favor of the cached (but possibly stale) addresses from arch.pv_vcpu. This allows entirely removing the code that was the subject of XSA-67. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/Rules.mk +++ b/xen/arch/x86/Rules.mk @@ -31,6 +31,7 @@ $(call cc-options-add,CFLAGS,CC,$(EMBEDD $(call cc-option-add,CFLAGS,CC,-Wnested-externs) $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX) $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT) +$(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE) ifeq ($(supervisor_mode_kernel),y) CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1 --- a/xen/arch/x86/acpi/suspend.c +++ b/xen/arch/x86/acpi/suspend.c @@ -27,8 +27,8 @@ void save_rest_processor_state(void) asm volatile ( "movw %%ds,(%0); movw %%es,2(%0); movw %%fs,4(%0); movw %%gs,6(%0)" : : "r" (saved_segs) : "memory" ); - rdmsrl(MSR_FS_BASE, saved_fs_base); - rdmsrl(MSR_GS_BASE, saved_gs_base); + saved_fs_base = rdfsbase(); + saved_gs_base = rdgsbase(); rdmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base); rdmsrl(MSR_CSTAR, saved_cstar); rdmsrl(MSR_LSTAR, saved_lstar); @@ -56,8 +56,8 @@ void restore_rest_processor_state(void) X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_TF, 0U); - wrmsrl(MSR_FS_BASE, saved_fs_base); - wrmsrl(MSR_GS_BASE, saved_gs_base); + wrfsbase(saved_fs_base); + wrgsbase(saved_gs_base); wrmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base); if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL || --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1092,7 +1092,7 @@ static void load_segments(struct vcpu *n { /* This can only be non-zero if selector is NULL. */ if ( n->arch.pv_vcpu.fs_base ) - wrmsrl(MSR_FS_BASE, n->arch.pv_vcpu.fs_base); + wrfsbase(n->arch.pv_vcpu.fs_base); /* Most kernels have non-zero GS base, so don''t bother testing. */ /* (This is also a serialising instruction, avoiding AMD erratum #88.) */ @@ -1100,7 +1100,7 @@ static void load_segments(struct vcpu *n /* This can only be non-zero if selector is NULL. */ if ( n->arch.pv_vcpu.gs_base_user ) - wrmsrl(MSR_GS_BASE, n->arch.pv_vcpu.gs_base_user); + wrgsbase(n->arch.pv_vcpu.gs_base_user); /* If in kernel mode then switch the GS bases around. */ if ( (n->arch.flags & TF_kernel_mode) ) --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -1264,6 +1264,9 @@ void __init __start_xen(unsigned long mb if ( cpu_has_smep ) set_in_cr4(X86_CR4_SMEP); + if ( cpu_has_fsgsbase ) + set_in_cr4(X86_CR4_FSGSBASE); + local_irq_enable(); pt_pci_init(); --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -1985,28 +1985,18 @@ static int emulate_privileged_op(struct } else { - if ( lm_ovr == lm_seg_none || data_sel < 4 ) + switch ( lm_ovr ) { - switch ( lm_ovr ) - { - case lm_seg_none: - data_base = 0UL; - break; - case lm_seg_fs: - data_base = v->arch.pv_vcpu.fs_base; - break; - case lm_seg_gs: - if ( guest_kernel_mode(v, regs) ) - data_base = v->arch.pv_vcpu.gs_base_kernel; - else - data_base = v->arch.pv_vcpu.gs_base_user; - break; - } + default: + data_base = 0UL; + break; + case lm_seg_fs: + data_base = rdfsbase(); + break; + case lm_seg_gs: + data_base = rdgsbase(); + break; } - else if ( !read_descriptor(data_sel, v, regs, - &data_base, &data_limit, &ar, 0) || - !(ar & _SEGMENT_S) || !(ar & _SEGMENT_P) ) - goto fail; data_limit = ~0UL; ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P; } --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -457,12 +457,12 @@ unsigned long pv_guest_cr4_fixup(const s (((v)->arch.pv_vcpu.ctrlreg[4] \ | (mmu_cr4_features \ & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \ - X86_CR4_OSXSAVE)) \ + X86_CR4_OSXSAVE | X86_CR4_FSGSBASE)) \ | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \ & ~X86_CR4_DE) #define real_cr4_to_pv_guest_cr4(c) \ - ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD \ - | X86_CR4_OSXSAVE | X86_CR4_SMEP)) + ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \ + X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE)) void domain_cpuid(struct domain *d, unsigned int input, --- a/xen/include/asm-x86/msr.h +++ b/xen/include/asm-x86/msr.h @@ -9,6 +9,7 @@ #include <xen/percpu.h> #include <xen/errno.h> #include <asm/asm_defns.h> +#include <asm/cpufeature.h> #define rdmsr(msr,val1,val2) \ __asm__ __volatile__("rdmsr" \ @@ -97,6 +98,61 @@ static inline int wrmsr_safe(unsigned in : "=a" (low), "=d" (high) \ : "c" (counter)) +static inline unsigned long rdfsbase(void) +{ + unsigned long base; + + if ( cpu_has_fsgsbase ) +#ifdef HAVE_GAS_FSGSBASE + asm volatile ( "rdfsbase %0" : "=r" (base) ); +#else + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) ); +#endif + else + rdmsrl(MSR_FS_BASE, base); + + return base; +} + +static inline unsigned long rdgsbase(void) +{ + unsigned long base; + + if ( cpu_has_fsgsbase ) +#ifdef HAVE_GAS_FSGSBASE + asm volatile ( "rdgsbase %0" : "=r" (base) ); +#else + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) ); +#endif + else + rdmsrl(MSR_GS_BASE, base); + + return base; +} + +static inline void wrfsbase(unsigned long base) +{ + if ( cpu_has_fsgsbase ) +#ifdef HAVE_GAS_FSGSBASE + asm volatile ( "wrfsbase %0" :: "r" (base) ); +#else + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd0" :: "a" (base) ); +#endif + else + wrmsrl(MSR_FS_BASE, base); +} + +static inline void wrgsbase(unsigned long base) +{ + if ( cpu_has_fsgsbase ) +#ifdef HAVE_GAS_FSGSBASE + asm volatile ( "wrgsbase %0" :: "r" (base) ); +#else + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8" :: "a" (base) ); +#endif + else + wrmsrl(MSR_GS_BASE, base); +} DECLARE_PER_CPU(u64, efer); u64 read_efer(void); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Oct-10 13:55 UTC
[PATCH 4/4] x86: check for canonical address before doing page walks
... as there doesn''t really exists any valid mapping for them. Particularly in the case of do_page_walk() this also avoids returning non-NULL for such invalid input. Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com> Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -73,7 +73,7 @@ void *do_page_walk(struct vcpu *v, unsig l2_pgentry_t l2e, *l2t; l1_pgentry_t l1e, *l1t; - if ( is_hvm_vcpu(v) ) + if ( is_hvm_vcpu(v) || !is_canonical_address(addr) ) return NULL; l4t = map_domain_page(mfn); --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c @@ -170,6 +170,8 @@ void show_page_walk(unsigned long addr) l1_pgentry_t l1e, *l1t; printk("Pagetable walk from %016lx:\n", addr); + if ( !is_canonical_address(addr) ) + return; l4t = map_domain_page(mfn); l4e = l4t[l4_table_offset(addr)]; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Andrew Cooper
2013-Oct-10 13:57 UTC
Re: [PATCH 2/4] x86: add address validity check to guest_map_l1e()
On 10/10/13 14:54, Jan Beulich wrote:> Just like for guest_get_eff_l1e() this prevents accessing as page > tables (and with the wrong memory attribute) internal data inside Xen > happening to be mapped with 1Gb pages. > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Reviewed-by: Andrew Cooper <andrew.cooper@citrix.com>> > --- a/xen/include/asm-x86/paging.h > +++ b/xen/include/asm-x86/paging.h > @@ -360,7 +360,8 @@ guest_map_l1e(struct vcpu *v, unsigned l > return paging_get_hostmode(v)->guest_map_l1e(v, addr, gl1mfn); > > /* Find this l1e and its enclosing l1mfn in the linear map */ > - if ( __copy_from_user(&l2e, > + if ( !__addr_ok(addr) || > + __copy_from_user(&l2e, > &__linear_l2_table[l2_linear_offset(addr)], > sizeof(l2_pgentry_t)) != 0 ) > return NULL; > > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Andrew Cooper
2013-Oct-10 14:01 UTC
Re: [PATCH 4/4] x86: check for canonical address before doing page walks
On 10/10/13 14:55, Jan Beulich wrote:> ... as there doesn''t really exists any valid mapping for them. > > Particularly in the case of do_page_walk() this also avoids returning > non-NULL for such invalid input. > > Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com> > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/arch/x86/x86_64/mm.c > +++ b/xen/arch/x86/x86_64/mm.c > @@ -73,7 +73,7 @@ void *do_page_walk(struct vcpu *v, unsig > l2_pgentry_t l2e, *l2t; > l1_pgentry_t l1e, *l1t; > > - if ( is_hvm_vcpu(v) ) > + if ( is_hvm_vcpu(v) || !is_canonical_address(addr) ) > return NULL; > > l4t = map_domain_page(mfn); > --- a/xen/arch/x86/x86_64/traps.c > +++ b/xen/arch/x86/x86_64/traps.c > @@ -170,6 +170,8 @@ void show_page_walk(unsigned long addr) > l1_pgentry_t l1e, *l1t; > > printk("Pagetable walk from %016lx:\n", addr); > + if ( !is_canonical_address(addr) ) > + return; > > l4t = map_domain_page(mfn); > l4e = l4t[l4_table_offset(addr)]; >I was intending something a little more like if ( !is_canonical_address(addr) ) { printk(" Not canonical\n"); return; } So it doesn''t appear on the console that show_page_walk() got stuck somewhere. ~Andrew
Jan Beulich
2013-Oct-10 14:10 UTC
Re: [PATCH 4/4] x86: check for canonical address before doing page walks
>>> On 10.10.13 at 16:01, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > On 10/10/13 14:55, Jan Beulich wrote: >> --- a/xen/arch/x86/x86_64/traps.c >> +++ b/xen/arch/x86/x86_64/traps.c >> @@ -170,6 +170,8 @@ void show_page_walk(unsigned long addr) >> l1_pgentry_t l1e, *l1t; >> >> printk("Pagetable walk from %016lx:\n", addr); >> + if ( !is_canonical_address(addr) ) >> + return; >> >> l4t = map_domain_page(mfn); >> l4e = l4t[l4_table_offset(addr)]; >> > > I was intending something a little more like > > if ( !is_canonical_address(addr) ) > { > printk(" Not canonical\n"); > return; > } > > So it doesn''t appear on the console that show_page_walk() got stuck > somewhere.Seems pretty pointless - the address is being printed, and anyone (I would hope) can easily see whether it is non-canonical. Jan
Andrew Cooper
2013-Oct-10 14:15 UTC
Re: [PATCH 3/4] x86: use {rd, wr}{fs, gs}base when available
On 10/10/13 14:54, Jan Beulich wrote:> ... as being intended to be faster than MSR reads/writes. > > In the case of emulate_privileged_op() also use these in favor of the > cached (but possibly stale) addresses from arch.pv_vcpu. This allows > entirely removing the code that was the subject of XSA-67. > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>> > --- a/xen/arch/x86/Rules.mk > +++ b/xen/arch/x86/Rules.mk > @@ -31,6 +31,7 @@ $(call cc-options-add,CFLAGS,CC,$(EMBEDD > $(call cc-option-add,CFLAGS,CC,-Wnested-externs) > $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX) > $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT) > +$(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE) > > ifeq ($(supervisor_mode_kernel),y) > CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1 > --- a/xen/arch/x86/acpi/suspend.c > +++ b/xen/arch/x86/acpi/suspend.c > @@ -27,8 +27,8 @@ void save_rest_processor_state(void) > asm volatile ( > "movw %%ds,(%0); movw %%es,2(%0); movw %%fs,4(%0); movw %%gs,6(%0)" > : : "r" (saved_segs) : "memory" ); > - rdmsrl(MSR_FS_BASE, saved_fs_base); > - rdmsrl(MSR_GS_BASE, saved_gs_base); > + saved_fs_base = rdfsbase(); > + saved_gs_base = rdgsbase(); > rdmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base); > rdmsrl(MSR_CSTAR, saved_cstar); > rdmsrl(MSR_LSTAR, saved_lstar); > @@ -56,8 +56,8 @@ void restore_rest_processor_state(void) > X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_TF, > 0U); > > - wrmsrl(MSR_FS_BASE, saved_fs_base); > - wrmsrl(MSR_GS_BASE, saved_gs_base); > + wrfsbase(saved_fs_base); > + wrgsbase(saved_gs_base); > wrmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base); > > if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL || > --- a/xen/arch/x86/domain.c > +++ b/xen/arch/x86/domain.c > @@ -1092,7 +1092,7 @@ static void load_segments(struct vcpu *n > { > /* This can only be non-zero if selector is NULL. */ > if ( n->arch.pv_vcpu.fs_base ) > - wrmsrl(MSR_FS_BASE, n->arch.pv_vcpu.fs_base); > + wrfsbase(n->arch.pv_vcpu.fs_base); > > /* Most kernels have non-zero GS base, so don''t bother testing. */ > /* (This is also a serialising instruction, avoiding AMD erratum #88.) */ > @@ -1100,7 +1100,7 @@ static void load_segments(struct vcpu *n > > /* This can only be non-zero if selector is NULL. */ > if ( n->arch.pv_vcpu.gs_base_user ) > - wrmsrl(MSR_GS_BASE, n->arch.pv_vcpu.gs_base_user); > + wrgsbase(n->arch.pv_vcpu.gs_base_user); > > /* If in kernel mode then switch the GS bases around. */ > if ( (n->arch.flags & TF_kernel_mode) ) > --- a/xen/arch/x86/setup.c > +++ b/xen/arch/x86/setup.c > @@ -1264,6 +1264,9 @@ void __init __start_xen(unsigned long mb > if ( cpu_has_smep ) > set_in_cr4(X86_CR4_SMEP); > > + if ( cpu_has_fsgsbase ) > + set_in_cr4(X86_CR4_FSGSBASE); > + > local_irq_enable(); > > pt_pci_init(); > --- a/xen/arch/x86/traps.c > +++ b/xen/arch/x86/traps.c > @@ -1985,28 +1985,18 @@ static int emulate_privileged_op(struct > } > else > { > - if ( lm_ovr == lm_seg_none || data_sel < 4 ) > + switch ( lm_ovr ) > { > - switch ( lm_ovr ) > - { > - case lm_seg_none: > - data_base = 0UL; > - break; > - case lm_seg_fs: > - data_base = v->arch.pv_vcpu.fs_base; > - break; > - case lm_seg_gs: > - if ( guest_kernel_mode(v, regs) ) > - data_base = v->arch.pv_vcpu.gs_base_kernel; > - else > - data_base = v->arch.pv_vcpu.gs_base_user; > - break; > - } > + default: > + data_base = 0UL; > + break; > + case lm_seg_fs: > + data_base = rdfsbase(); > + break; > + case lm_seg_gs: > + data_base = rdgsbase(); > + break; > } > - else if ( !read_descriptor(data_sel, v, regs, > - &data_base, &data_limit, &ar, 0) || > - !(ar & _SEGMENT_S) || !(ar & _SEGMENT_P) ) > - goto fail; > data_limit = ~0UL; > ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P; > } > --- a/xen/include/asm-x86/domain.h > +++ b/xen/include/asm-x86/domain.h > @@ -457,12 +457,12 @@ unsigned long pv_guest_cr4_fixup(const s > (((v)->arch.pv_vcpu.ctrlreg[4] \ > | (mmu_cr4_features \ > & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \ > - X86_CR4_OSXSAVE)) \ > + X86_CR4_OSXSAVE | X86_CR4_FSGSBASE)) \ > | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \ > & ~X86_CR4_DE) > #define real_cr4_to_pv_guest_cr4(c) \ > - ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD \ > - | X86_CR4_OSXSAVE | X86_CR4_SMEP)) > + ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | \ > + X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE)) > > void domain_cpuid(struct domain *d, > unsigned int input, > --- a/xen/include/asm-x86/msr.h > +++ b/xen/include/asm-x86/msr.h > @@ -9,6 +9,7 @@ > #include <xen/percpu.h> > #include <xen/errno.h> > #include <asm/asm_defns.h> > +#include <asm/cpufeature.h> > > #define rdmsr(msr,val1,val2) \ > __asm__ __volatile__("rdmsr" \ > @@ -97,6 +98,61 @@ static inline int wrmsr_safe(unsigned in > : "=a" (low), "=d" (high) \ > : "c" (counter)) > > +static inline unsigned long rdfsbase(void) > +{ > + unsigned long base; > + > + if ( cpu_has_fsgsbase ) > +#ifdef HAVE_GAS_FSGSBASE > + asm volatile ( "rdfsbase %0" : "=r" (base) ); > +#else > + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) ); > +#endif > + else > + rdmsrl(MSR_FS_BASE, base); > + > + return base; > +} > + > +static inline unsigned long rdgsbase(void) > +{ > + unsigned long base; > + > + if ( cpu_has_fsgsbase ) > +#ifdef HAVE_GAS_FSGSBASE > + asm volatile ( "rdgsbase %0" : "=r" (base) ); > +#else > + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) ); > +#endif > + else > + rdmsrl(MSR_GS_BASE, base); > + > + return base; > +} > + > +static inline void wrfsbase(unsigned long base) > +{ > + if ( cpu_has_fsgsbase ) > +#ifdef HAVE_GAS_FSGSBASE > + asm volatile ( "wrfsbase %0" :: "r" (base) ); > +#else > + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd0" :: "a" (base) ); > +#endif > + else > + wrmsrl(MSR_FS_BASE, base); > +} > + > +static inline void wrgsbase(unsigned long base) > +{ > + if ( cpu_has_fsgsbase ) > +#ifdef HAVE_GAS_FSGSBASE > + asm volatile ( "wrgsbase %0" :: "r" (base) ); > +#else > + asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8" :: "a" (base) ); > +#endif > + else > + wrmsrl(MSR_GS_BASE, base); > +} > > DECLARE_PER_CPU(u64, efer); > u64 read_efer(void); > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Andrew Cooper
2013-Oct-10 14:17 UTC
Re: [PATCH 4/4] x86: check for canonical address before doing page walks
On 10/10/13 15:10, Jan Beulich wrote:>>>> On 10.10.13 at 16:01, Andrew Cooper <andrew.cooper3@citrix.com> wrote: >> On 10/10/13 14:55, Jan Beulich wrote: >>> --- a/xen/arch/x86/x86_64/traps.c >>> +++ b/xen/arch/x86/x86_64/traps.c >>> @@ -170,6 +170,8 @@ void show_page_walk(unsigned long addr) >>> l1_pgentry_t l1e, *l1t; >>> >>> printk("Pagetable walk from %016lx:\n", addr); >>> + if ( !is_canonical_address(addr) ) >>> + return; >>> >>> l4t = map_domain_page(mfn); >>> l4e = l4t[l4_table_offset(addr)]; >>> >> I was intending something a little more like >> >> if ( !is_canonical_address(addr) ) >> { >> printk(" Not canonical\n"); >> return; >> } >> >> So it doesn''t appear on the console that show_page_walk() got stuck >> somewhere. > Seems pretty pointless - the address is being printed, and anyone > (I would hope) can easily see whether it is non-canonical. > > Jan >True I suppose, although it would start getting harder if/when the non-canonical section shrinks in size. I would prefer the printk(), but am not overly fussed and the other fix is certainly good, so Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
On 10/10/2013 14:49, "Jan Beulich" <JBeulich@suse.com> wrote:> 1: correct LDT checks > 2: add address validity check to guest_map_l1e() > 3: use {rd,wr}{fs,gs}base when available > 4: check for canonical address before doing page walks > > Signed-off-by: Jan Beulich <jbeulich@suse.com>All of them. Acked-by: Keir Fraser <keir@xen.org>