From 09743a7516a37e93034dd7895d1730fe5f400a2f Mon Sep 17 00:00:00 2001 From: Liu Jinsong <jinsong.liu@intel.com> Date: Mon, 25 Nov 2013 03:32:34 +0800 Subject: [PATCH 3/4 V2] X86: MPX IA32_BNDCFGS msr handle When MPX supported, a new guest-state field for IA32_BNDCFGS is added to the VMCS. In addition, two new controls are added: - a VM-exit control called "clear BNDCFGS" - a VM-entry control called "load BNDCFGS." VM exits always save IA32_BNDCFGS into BNDCFGS field of VMCS. Signed-off-by: Xudong Hao <xudong.hao@intel.com> Reviewed-by: Liu Jinsong <jinsong.liu@intel.com> Unlikely, but in case buggy VMX ucode, it also emulated the case when rdmsr/wrmsr intercepted by hypervisor, via adding entries at vmexit msr store area/vmentry msr load area, and vmexit msr load area. Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com> Suggested-by: Jan Beulich <jbeulich@suse.com> Signed-off-by: Liu Jinsong <jinsong.liu@intel.com> --- xen/arch/x86/hvm/hvm.c | 27 +++++++++++++++++++++++++++ xen/arch/x86/hvm/vmx/vmcs.c | 33 +++++++++++++++++++++++++++++++-- xen/include/asm-x86/cpufeature.h | 2 ++ xen/include/asm-x86/hvm/vmx/vmcs.h | 2 ++ xen/include/asm-x86/msr-index.h | 2 ++ 5 files changed, 64 insertions(+), 2 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 9c88c73..070064c 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -248,6 +248,21 @@ int hvm_set_guest_pat(struct vcpu *v, u64 guest_pat) return 1; } +static bool_t bndcfgs_invalid(u64 msr_content) +{ + /* BNDCFGS MSR reserved bits (11 ~ 2) must be zero */ + if ( msr_content & 0xffc ) + return 1; + + /* Canonical address reserved bits must be zero */ + if ( hvm_long_mode_enabled(current) ) + /* 48b linear address for x86_64 guest */ + return !!(msr_content & (~(u64)0 << 48) ); + else + /* 32b linear address for x86_32 (include PAE) guest */ + return !!(msr_content & (~(u64)0 << 32) ); +} + void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc) { uint64_t tsc; @@ -3010,6 +3025,12 @@ int hvm_msr_read_intercept(unsigned int msr, uint64_t *msr_content) hvm_get_guest_pat(v, msr_content); break; + case MSR_IA32_BNDCFGS: + if ( !cpu_has_mpx ) + goto gp_fault; + vmx_read_guest_msr(MSR_IA32_BNDCFGS, msr_content); + break; + case MSR_MTRRcap: if ( !mtrr ) goto gp_fault; @@ -3133,6 +3154,12 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content) goto gp_fault; break; + case MSR_IA32_BNDCFGS: + if ( !cpu_has_mpx || bndcfgs_invalid(msr_content) ) + goto gp_fault; + vmx_write_guest_msr(MSR_IA32_BNDCFGS, msr_content); + break; + case MSR_MTRRcap: if ( !mtrr ) goto gp_fault; diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c index 290b42f..1114ce7 100644 --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -270,7 +270,8 @@ static int vmx_init_vmcs_config(void) } min = VM_EXIT_ACK_INTR_ON_EXIT; - opt = VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT; + opt = VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT | + VM_EXIT_CLEAR_BNDCFGS; min |= VM_EXIT_IA32E_MODE; _vmx_vmexit_control = adjust_vmx_controls( "VMExit Control", min, opt, MSR_IA32_VMX_EXIT_CTLS, &mismatch); @@ -284,7 +285,7 @@ static int vmx_init_vmcs_config(void) _vmx_pin_based_exec_control &= ~ PIN_BASED_POSTED_INTERRUPT; min = 0; - opt = VM_ENTRY_LOAD_GUEST_PAT; + opt = VM_ENTRY_LOAD_GUEST_PAT | VM_ENTRY_LOAD_BNDCFGS; _vmx_vmentry_control = adjust_vmx_controls( "VMEntry Control", min, opt, MSR_IA32_VMX_ENTRY_CTLS, &mismatch); @@ -955,6 +956,34 @@ static int construct_vmcs(struct vcpu *v) vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_R | MSR_TYPE_W); if ( paging_mode_hap(d) && (!iommu_enabled || iommu_snoop) ) vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R | MSR_TYPE_W); + + if ( cpu_has_mpx ) + { + /* + * When MPX supported, a new guest-state field for IA32_BNDCFGS + * is added to the VMCS. In addition, two new controls are added: + * - a VM-exit control called "clear BNDCFGS" + * - a VM-entry control called "load BNDCFGS." + * VM exits always save IA32_BNDCFGS into BNDCFGS field of VMCS. + */ + if ( likely((vmexit_ctl & VM_EXIT_CLEAR_BNDCFGS) && + (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)) ) + { + vmx_disable_intercept_for_msr(v, MSR_IA32_BNDCFGS, + MSR_TYPE_R | MSR_TYPE_W); + } + /* Unlikely, just in case buggy VMX ucode */ + else + { + int ret; + ret = vmx_add_guest_msr(MSR_IA32_BNDCFGS); + if ( ret ) + return ret; + ret = vmx_add_host_load_msr(MSR_IA32_BNDCFGS); + if ( ret ) + return ret; + } + } } /* I/O access bitmap. */ diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h index 1cfaf94..930dc9b 100644 --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -148,6 +148,7 @@ #define X86_FEATURE_INVPCID (7*32+10) /* Invalidate Process Context ID */ #define X86_FEATURE_RTM (7*32+11) /* Restricted Transactional Memory */ #define X86_FEATURE_NO_FPU_SEL (7*32+13) /* FPU CS/DS stored as zero */ +#define X86_FEATURE_MPX (7*32+14) /* Memory Protection Extensions */ #define X86_FEATURE_SMAP (7*32+20) /* Supervisor Mode Access Prevention */ #define cpu_has(c, bit) test_bit(bit, (c)->x86_capability) @@ -197,6 +198,7 @@ #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) #define cpu_has_lwp boot_cpu_has(X86_FEATURE_LWP) +#define cpu_has_mpx boot_cpu_has(X86_FEATURE_MPX) #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h index ebaba5c..75cd653 100644 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -186,6 +186,7 @@ extern u32 vmx_pin_based_exec_control; #define VM_EXIT_SAVE_GUEST_EFER 0x00100000 #define VM_EXIT_LOAD_HOST_EFER 0x00200000 #define VM_EXIT_SAVE_PREEMPT_TIMER 0x00400000 +#define VM_EXIT_CLEAR_BNDCFGS 0x00800000 extern u32 vmx_vmexit_control; #define VM_ENTRY_IA32E_MODE 0x00000200 @@ -194,6 +195,7 @@ extern u32 vmx_vmexit_control; #define VM_ENTRY_LOAD_PERF_GLOBAL_CTRL 0x00002000 #define VM_ENTRY_LOAD_GUEST_PAT 0x00004000 #define VM_ENTRY_LOAD_GUEST_EFER 0x00008000 +#define VM_ENTRY_LOAD_BNDCFGS 0x00010000 extern u32 vmx_vmentry_control; #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h index e597a28..ccad1ab 100644 --- a/xen/include/asm-x86/msr-index.h +++ b/xen/include/asm-x86/msr-index.h @@ -56,6 +56,8 @@ #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 +#define MSR_IA32_BNDCFGS 0x00000D90 + #define MSR_MTRRfix64K_00000 0x00000250 #define MSR_MTRRfix16K_80000 0x00000258 #define MSR_MTRRfix16K_A0000 0x00000259 -- 1.7.1 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
>>> On 25.11.13 at 09:25, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > +static bool_t bndcfgs_invalid(u64 msr_content) > +{ > + /* BNDCFGS MSR reserved bits (11 ~ 2) must be zero */ > + if ( msr_content & 0xffc ) > + return 1; > + > + /* Canonical address reserved bits must be zero */ > + if ( hvm_long_mode_enabled(current) ) > + /* 48b linear address for x86_64 guest */ > + return !!(msr_content & (~(u64)0 << 48) );!is_canonical_address()> + else > + /* 32b linear address for x86_32 (include PAE) guest */ > + return !!(msr_content & (~(u64)0 << 32) );!!(msr_content >> 32)> @@ -3010,6 +3025,12 @@ int hvm_msr_read_intercept(unsigned int msr, uint64_t *msr_content) > hvm_get_guest_pat(v, msr_content); > break; > > + case MSR_IA32_BNDCFGS: > + if ( !cpu_has_mpx )Wasn''t it you who started to properly use hvm_cpuid() for cases like this? We''re not after host capabilities here, but after what the guest is supposed to (not) use.> @@ -955,6 +956,34 @@ static int construct_vmcs(struct vcpu *v) > vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_R | MSR_TYPE_W); > if ( paging_mode_hap(d) && (!iommu_enabled || iommu_snoop) ) > vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R | MSR_TYPE_W); > + > + if ( cpu_has_mpx ) > + { > + /* > + * When MPX supported, a new guest-state field for IA32_BNDCFGS > + * is added to the VMCS. In addition, two new controls are added: > + * - a VM-exit control called "clear BNDCFGS" > + * - a VM-entry control called "load BNDCFGS." > + * VM exits always save IA32_BNDCFGS into BNDCFGS field of VMCS. > + */ > + if ( likely((vmexit_ctl & VM_EXIT_CLEAR_BNDCFGS) && > + (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)) ) > + { > + vmx_disable_intercept_for_msr(v, MSR_IA32_BNDCFGS, > + MSR_TYPE_R | MSR_TYPE_W); > + } > + /* Unlikely, just in case buggy VMX ucode */ > + else > + { > + int ret; > + ret = vmx_add_guest_msr(MSR_IA32_BNDCFGS); > + if ( ret ) > + return ret; > + ret = vmx_add_host_load_msr(MSR_IA32_BNDCFGS); > + if ( ret ) > + return ret;You can''t just return here - a vmx_vmcs_exit() is needed on any exit path in the middle of this function. Jan
Jan Beulich wrote:>>>> On 25.11.13 at 09:25, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> +static bool_t bndcfgs_invalid(u64 msr_content) >> +{ >> + /* BNDCFGS MSR reserved bits (11 ~ 2) must be zero */ >> + if ( msr_content & 0xffc ) >> + return 1; >> + >> + /* Canonical address reserved bits must be zero */ >> + if ( hvm_long_mode_enabled(current) ) >> + /* 48b linear address for x86_64 guest */ >> + return !!(msr_content & (~(u64)0 << 48) ); > > !is_canonical_address()Not simply is_canonical_address() check, per MPX doc 9.3.5.1, It will #GP if canonical address reserved bits (must be zero) check fails> >> + else >> + /* 32b linear address for x86_32 (include PAE) guest */ >> + return !!(msr_content & (~(u64)0 << 32) ); > > !!(msr_content >> 32) > >> @@ -3010,6 +3025,12 @@ int hvm_msr_read_intercept(unsigned int msr, >> uint64_t *msr_content) hvm_get_guest_pat(v, msr_content); >> break; >> >> + case MSR_IA32_BNDCFGS: >> + if ( !cpu_has_mpx ) > > Wasn''t it you who started to properly use hvm_cpuid() for cases like > this? We''re not after host capabilities here, but after what the guest > is supposed to (not) use.When malicious or flawed guest access reserved or unimplemented msr, I think inject #GP is better than silently ignore?> >> @@ -955,6 +956,34 @@ static int construct_vmcs(struct vcpu *v) >> vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, >> MSR_TYPE_R | MSR_TYPE_W); if ( paging_mode_hap(d) && >> (!iommu_enabled || iommu_snoop) ) >> vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R | >> MSR_TYPE_W); + + if ( cpu_has_mpx ) + { >> + /* >> + * When MPX supported, a new guest-state field for >> IA32_BNDCFGS + * is added to the VMCS. In addition, two >> new controls are added: + * - a VM-exit control called >> "clear BNDCFGS" + * - a VM-entry control called "load >> BNDCFGS." + * VM exits always save IA32_BNDCFGS into >> BNDCFGS field of VMCS. + */ + if ( >> likely((vmexit_ctl & VM_EXIT_CLEAR_BNDCFGS) && + >> (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)) ) + { + >> vmx_disable_intercept_for_msr(v, MSR_IA32_BNDCFGS, + >> MSR_TYPE_R | MSR_TYPE_W); + } + /* Unlikely, >> just in case buggy VMX ucode */ + else + { >> + int ret; >> + ret = vmx_add_guest_msr(MSR_IA32_BNDCFGS); + >> if ( ret ) + return ret; >> + ret = vmx_add_host_load_msr(MSR_IA32_BNDCFGS); + >> if ( ret ) + return ret; > > You can''t just return here - a vmx_vmcs_exit() is needed on any > exit path in the middle of this function. >Yes. Thanks, Jinsong
>>> On 26.11.13 at 11:11, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Jan Beulich wrote: >>>>> On 25.11.13 at 09:25, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >>> +static bool_t bndcfgs_invalid(u64 msr_content) >>> +{ >>> + /* BNDCFGS MSR reserved bits (11 ~ 2) must be zero */ >>> + if ( msr_content & 0xffc ) >>> + return 1; >>> + >>> + /* Canonical address reserved bits must be zero */ >>> + if ( hvm_long_mode_enabled(current) ) >>> + /* 48b linear address for x86_64 guest */ >>> + return !!(msr_content & (~(u64)0 << 48) ); >> >> !is_canonical_address() > > Not simply is_canonical_address() check, per MPX doc 9.3.5.1, > It will #GP if canonical address reserved bits (must be zero) check failsYou''re checking the reserved bits (2...11) above. For the address portion it says "The base of bound directory is a 4K page aligned linear address, and is always in canonical form. Any load into BNDCFGx (XRSTOR or WRMSR) ensures that the highest implemented bit of the linear address is sign extended to guarantee the canonicality of this address", which to me means _if_ you check anything here, then you want to check the address for being canonical.>>> + else >>> + /* 32b linear address for x86_32 (include PAE) guest */ >>> + return !!(msr_content & (~(u64)0 << 32) ); >> >> !!(msr_content >> 32) >> >>> @@ -3010,6 +3025,12 @@ int hvm_msr_read_intercept(unsigned int msr, >>> uint64_t *msr_content) hvm_get_guest_pat(v, msr_content); >>> break; >>> >>> + case MSR_IA32_BNDCFGS: >>> + if ( !cpu_has_mpx ) >> >> Wasn''t it you who started to properly use hvm_cpuid() for cases like >> this? We''re not after host capabilities here, but after what the guest >> is supposed to (not) use. > > When malicious or flawed guest access reserved or unimplemented msr, I think > inject #GP is better than silently ignore?I didn''t say "silently ignore"; in fact, I asked you to _tighten_ the check (hvm_cpuid() shouldn''t return the respective flag set when !cpu_has_mpx). Jan
Jan Beulich wrote:>>>> On 26.11.13 at 11:11, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> Jan Beulich wrote: >>>>>> On 25.11.13 at 09:25, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>> wrote: >>>> +static bool_t bndcfgs_invalid(u64 msr_content) >>>> +{ >>>> + /* BNDCFGS MSR reserved bits (11 ~ 2) must be zero */ >>>> + if ( msr_content & 0xffc ) >>>> + return 1; >>>> + >>>> + /* Canonical address reserved bits must be zero */ >>>> + if ( hvm_long_mode_enabled(current) ) >>>> + /* 48b linear address for x86_64 guest */ >>>> + return !!(msr_content & (~(u64)0 << 48) ); >>> >>> !is_canonical_address() >> >> Not simply is_canonical_address() check, per MPX doc 9.3.5.1, >> It will #GP if canonical address reserved bits (must be zero) check >> fails > > You''re checking the reserved bits (2...11) above. For the address > portion it says "The base of bound directory is a 4K page aligned > linear address, and is always in canonical form. Any load into BNDCFGx > (XRSTOR or WRMSR) ensures that the highest implemented bit of > the linear address is sign extended to guarantee the canonicality > of this address", which to me means _if_ you check anything here, > then you want to check the address for being canonical. > >>>> + else >>>> + /* 32b linear address for x86_32 (include PAE) guest */ >>>> + return !!(msr_content & (~(u64)0 << 32) ); >>> >>> !!(msr_content >> 32)If so, canonical check for x86-32 (and pae) -- bit63~32 should not always be zero, but is sign extension of bit 31?>>> >>>> @@ -3010,6 +3025,12 @@ int hvm_msr_read_intercept(unsigned int msr, >>>> uint64_t *msr_content) hvm_get_guest_pat(v, msr_content); >>>> break; >>>> >>>> + case MSR_IA32_BNDCFGS: >>>> + if ( !cpu_has_mpx ) >>> >>> Wasn''t it you who started to properly use hvm_cpuid() for cases like >>> this? We''re not after host capabilities here, but after what the >>> guest is supposed to (not) use. >> >> When malicious or flawed guest access reserved or unimplemented msr, >> I think inject #GP is better than silently ignore? > > I didn''t say "silently ignore"; in fact, I asked you to _tighten_ the > check (hvm_cpuid() shouldn''t return the respective flag set when > !cpu_has_mpx). > > JanNot quite clear your meaning here, hvm_cpuid will not return flag when !cpu_has_mpx. Thanks, Jinsong
>>> "Liu, Jinsong" <jinsong.liu@intel.com> 11/26/13 3:12 PM >>> >Jan Beulich wrote: >>>>> On 26.11.13 at 11:11, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >>> Jan Beulich wrote: >>>>>>> On 25.11.13 at 09:25, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>>> wrote: >>>>> +static bool_t bndcfgs_invalid(u64 msr_content) >>>>> +{ >>>>> + /* BNDCFGS MSR reserved bits (11 ~ 2) must be zero */ >>>>> + if ( msr_content & 0xffc ) >>>>> + return 1; >>>>> + >>>>> + /* Canonical address reserved bits must be zero */ >>>>> + if ( hvm_long_mode_enabled(current) ) >>>>> + /* 48b linear address for x86_64 guest */ >>>>> + return !!(msr_content & (~(u64)0 << 48) ); >>>> >>>> !is_canonical_address() >>> >>> Not simply is_canonical_address() check, per MPX doc 9.3.5.1, >>> It will #GP if canonical address reserved bits (must be zero) check >>> fails >> >> You''re checking the reserved bits (2...11) above. For the address >> portion it says "The base of bound directory is a 4K page aligned >> linear address, and is always in canonical form. Any load into BNDCFGx >> (XRSTOR or WRMSR) ensures that the highest implemented bit of >> the linear address is sign extended to guarantee the canonicality >> of this address", which to me means _if_ you check anything here, >> then you want to check the address for being canonical. >> >>>>> + else >>>>> + /* 32b linear address for x86_32 (include PAE) guest */ >>>>> + return !!(msr_content & (~(u64)0 << 32) ); >>>> >>>> !!(msr_content >> 32) > >If so, canonical check for x86-32 (and pae) -- bit63~32 should not always be zero, but is sign extension of bit 31?Why? Everywhere else 32-bit addresses get zero extended when a 64-bit value is needed. But first of all the question needs to be answered whether this checking is necessary at all - the documentation to me reads as if the hardware would not fault on non-canonical values, but simply make them canonical.>>>>> @@ -3010,6 +3025,12 @@ int hvm_msr_read_intercept(unsigned int msr, >>>>> uint64_t *msr_content) hvm_get_guest_pat(v, msr_content); >>>>> break; >>>>> >>>>> + case MSR_IA32_BNDCFGS: >>>>> + if ( !cpu_has_mpx ) >>>> >>>> Wasn''t it you who started to properly use hvm_cpuid() for cases like >>>> this? We''re not after host capabilities here, but after what the >>>> guest is supposed to (not) use. >>> >>> When malicious or flawed guest access reserved or unimplemented msr, >>> I think inject #GP is better than silently ignore? >> >> I didn''t say "silently ignore"; in fact, I asked you to _tighten_ the >> check (hvm_cpuid() shouldn''t return the respective flag set when >> !cpu_has_mpx). > >Not quite clear your meaning here, hvm_cpuid will not return flag when !cpu_has_mpx.In your patch you check the hardware capability, but you ought to check whether the guest knows the feature to be available. The feature will obviously not show as available if the hardware capability is not there. Jan
Jan Beulich wrote:>>>> "Liu, Jinsong" <jinsong.liu@intel.com> 11/26/13 3:12 PM >>> >> Jan Beulich wrote: >>>>>> On 26.11.13 at 11:11, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>> wrote: >>>> Jan Beulich wrote: >>>>>>>> On 25.11.13 at 09:25, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>>>> wrote: >>>>>> +static bool_t bndcfgs_invalid(u64 msr_content) >>>>>> +{ >>>>>> + /* BNDCFGS MSR reserved bits (11 ~ 2) must be zero */ >>>>>> + if ( msr_content & 0xffc ) >>>>>> + return 1; >>>>>> + >>>>>> + /* Canonical address reserved bits must be zero */ >>>>>> + if ( hvm_long_mode_enabled(current) ) >>>>>> + /* 48b linear address for x86_64 guest */ >>>>>> + return !!(msr_content & (~(u64)0 << 48) ); >>>>> >>>>> !is_canonical_address() >>>> >>>> Not simply is_canonical_address() check, per MPX doc 9.3.5.1, >>>> It will #GP if canonical address reserved bits (must be zero) check >>>> fails >>> >>> You''re checking the reserved bits (2...11) above. For the address >>> portion it says "The base of bound directory is a 4K page aligned >>> linear address, and is always in canonical form. Any load into >>> BNDCFGx (XRSTOR or WRMSR) ensures that the highest implemented bit >>> of >>> the linear address is sign extended to guarantee the canonicality >>> of this address", which to me means _if_ you check anything here, >>> then you want to check the address for being canonical. >>> >>>>>> + else >>>>>> + /* 32b linear address for x86_32 (include PAE) guest */ >>>>>> + return !!(msr_content & (~(u64)0 << 32) ); >>>>> >>>>> !!(msr_content >> 32) >> >> If so, canonical check for x86-32 (and pae) -- bit63~32 should not >> always be zero, but is sign extension of bit 31? > > Why? Everywhere else 32-bit addresses get zero extended when a 64-bit > value is needed. > > But first of all the question needs to be answered whether this > checking is necessary at all - the documentation to me reads as if > the hardware would not fault on non-canonical values, but simply make > them canonical.Re-read doc, seems your understanding is right. So we check bit11~2 and inject #GP if check fail, and emulate making canonical address: * for x86-64 guest, sign extend bit47 * for x86-32 guest, bit63~32 set all 0''s>>>>>> @@ -3010,6 +3025,12 @@ int hvm_msr_read_intercept(unsigned int >>>>>> msr, uint64_t *msr_content) hvm_get_guest_pat(v, >>>>>> msr_content); break; >>>>>> >>>>>> + case MSR_IA32_BNDCFGS: >>>>>> + if ( !cpu_has_mpx ) >>>>> >>>>> Wasn''t it you who started to properly use hvm_cpuid() for cases >>>>> like this? We''re not after host capabilities here, but after what >>>>> the guest is supposed to (not) use. >>>> >>>> When malicious or flawed guest access reserved or unimplemented >>>> msr, I think inject #GP is better than silently ignore? >>> >>> I didn''t say "silently ignore"; in fact, I asked you to _tighten_ >>> the check (hvm_cpuid() shouldn''t return the respective flag set when >>> !cpu_has_mpx). >> >> Not quite clear your meaning here, hvm_cpuid will not return flag >> when !cpu_has_mpx. > > In your patch you check the hardware capability, but you ought to > check whether the guest knows the feature to be available. The > feature will obviously not show as available if the hardware > capability is not there.But the check !cpu_has_mpx has implicitly meant guest will not know this feature -- if h/w has not mpx cability, xen will not expose it to guest. Anything I''m missing here? Thanks, Jinsong
>>> On 27.11.13 at 03:28, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Jan Beulich wrote: >>>>> "Liu, Jinsong" <jinsong.liu@intel.com> 11/26/13 3:12 PM >>> >>> Jan Beulich wrote: >>>>>>> On 26.11.13 at 11:11, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>>> wrote: >>>>> Jan Beulich wrote: >>>>>>>>> On 25.11.13 at 09:25, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>>>>> wrote: >>>>>>> @@ -3010,6 +3025,12 @@ int hvm_msr_read_intercept(unsigned int >>>>>>> msr, uint64_t *msr_content) hvm_get_guest_pat(v, >>>>>>> msr_content); break; >>>>>>> >>>>>>> + case MSR_IA32_BNDCFGS: >>>>>>> + if ( !cpu_has_mpx ) >>>>>> >>>>>> Wasn''t it you who started to properly use hvm_cpuid() for cases >>>>>> like this? We''re not after host capabilities here, but after what >>>>>> the guest is supposed to (not) use. >>>>> >>>>> When malicious or flawed guest access reserved or unimplemented >>>>> msr, I think inject #GP is better than silently ignore? >>>> >>>> I didn''t say "silently ignore"; in fact, I asked you to _tighten_ >>>> the check (hvm_cpuid() shouldn''t return the respective flag set when >>>> !cpu_has_mpx). >>> >>> Not quite clear your meaning here, hvm_cpuid will not return flag >>> when !cpu_has_mpx. >> >> In your patch you check the hardware capability, but you ought to >> check whether the guest knows the feature to be available. The >> feature will obviously not show as available if the hardware >> capability is not there. > > But the check !cpu_has_mpx has implicitly meant guest will not know this > feature -- if h/w has not mpx cability, xen will not expose it to guest. > Anything I''m missing here?Yes - you continue to ignore the inverse case: Hardware _has_ MPX, but the guest config disables the feature''s visibility to the guest (via overriding of CPUID flags). One other note: Rather than "manually" handling the MSR if the optional VMX support for it is unavailable, we should perhaps not expose the feature to guests in the first place. Otherwise your patch is, I''m afraid, still incomplete in that it lacks migration support (i.e. saving and restoring of the MSR value). Jan