From 1bdeea81e0ec151ec10c3341802f814956a2ff8d Mon Sep 17 00:00:00 2001 From: Liu Jinsong <jinsong.liu@intel.com> Date: Fri, 11 Oct 2013 23:22:05 +0800 Subject: [PATCH] XSA-60 security hole: cr0.cd handling Recently Oracle developers found a Xen security issue as DOS affecting, named as XSA-60. Please refer http://xenbits.xen.org/xsa/advisory-60.html Basically it involves how to handle guest cr0.cd setting, which under some environment it consumes much time resulting in DOS-like behavior. This patch solves XSA-60 security hole: 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need do nothing, since hardware snoop mechanism has ensured cache coherency. 2. For guest with VT-d but non-snooped, cache coherency can not be guaranteed by h/w snoop, therefore it need emulate UC type to guest: 2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so that guest memory type are all UC. 2.2). if it works w/ shadow, it works as currently Xen does, dropping all shadows so that any new ones would be created on demand w/ UC. Singed-off-by: Liu Jinsong <jinsong.liu@intel.com> --- xen/arch/x86/hvm/hvm.c | 71 ++++++++++++++++++++++--------------- xen/arch/x86/hvm/vmx/vmx.c | 57 ++++++++++++++++++++++++++++- xen/include/asm-x86/hvm/hvm.h | 1 + xen/include/asm-x86/hvm/support.h | 2 + 4 files changed, 100 insertions(+), 31 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index de81e45..0e14f49 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1703,6 +1703,36 @@ int hvm_mov_from_cr(unsigned int cr, unsigned int gpr) return X86EMUL_UNHANDLEABLE; } +void hvm_handle_cd_traditional(struct vcpu *v, unsigned long value) +{ + if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) ) + { + /* Entering no fill cache mode. */ + spin_lock(&v->domain->arch.hvm_domain.uc_lock); + v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; + + if ( !v->domain->arch.hvm_domain.is_in_uc_mode ) + { + /* Flush physical caches. */ + on_each_cpu(local_flush_cache, NULL, 1); + hvm_set_uc_mode(v, 1); + } + spin_unlock(&v->domain->arch.hvm_domain.uc_lock); + } + else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) && + (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ) + { + /* Exit from no fill cache mode. */ + spin_lock(&v->domain->arch.hvm_domain.uc_lock); + v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE; + + if ( domain_exit_uc_mode(v) ) + hvm_set_uc_mode(v, 0); + + spin_unlock(&v->domain->arch.hvm_domain.uc_lock); + } +} + int hvm_set_cr0(unsigned long value) { struct vcpu *v = current; @@ -1786,35 +1816,18 @@ int hvm_set_cr0(unsigned long value) } } - if ( has_arch_mmios(v->domain) ) - { - if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) ) - { - /* Entering no fill cache mode. */ - spin_lock(&v->domain->arch.hvm_domain.uc_lock); - v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; - - if ( !v->domain->arch.hvm_domain.is_in_uc_mode ) - { - /* Flush physical caches. */ - on_each_cpu(local_flush_cache, NULL, 1); - hvm_set_uc_mode(v, 1); - } - spin_unlock(&v->domain->arch.hvm_domain.uc_lock); - } - else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) && - (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ) - { - /* Exit from no fill cache mode. */ - spin_lock(&v->domain->arch.hvm_domain.uc_lock); - v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE; - - if ( domain_exit_uc_mode(v) ) - hvm_set_uc_mode(v, 0); - - spin_unlock(&v->domain->arch.hvm_domain.uc_lock); - } - } + /* + * When cr0.cd setting + * 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need not + * do anything, since hardware snoop mechanism has ensured cache coherency; + * 2. For guest with VT-d but non-snooped, cache coherency cannot be + * guaranteed by h/w so need emulate UC memory type to guest. + */ + if ( ((value ^ old_value) & X86_CR0_CD) && + has_arch_mmios(v->domain) && + iommu_enabled && !iommu_snoop && + hvm_funcs.handle_cd ) + hvm_funcs.handle_cd(v, value); v->arch.hvm_vcpu.guest_cr[0] = value; hvm_update_guest_cr(v, 0); diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 9ca8632..1e0269b 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -642,6 +642,10 @@ static void vmx_ctxt_switch_to(struct vcpu *v) __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept_data), 0); } + /* For guest cr0.cd setting, do not use potentially polluted cache */ + if ( unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ) + wbinvd(); + vmx_restore_guest_msrs(v); vmx_restore_dr(v); } @@ -908,7 +912,8 @@ static unsigned long vmx_get_shadow_gs_base(struct vcpu *v) static int vmx_set_guest_pat(struct vcpu *v, u64 gpat) { - if ( !cpu_has_vmx_pat || !paging_mode_hap(v->domain) ) + if ( !cpu_has_vmx_pat || !paging_mode_hap(v->domain) || + unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ) return 0; vmx_vmcs_enter(v); @@ -919,7 +924,8 @@ static int vmx_set_guest_pat(struct vcpu *v, u64 gpat) static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat) { - if ( !cpu_has_vmx_pat || !paging_mode_hap(v->domain) ) + if ( !cpu_has_vmx_pat || !paging_mode_hap(v->domain) || + unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ) return 0; vmx_vmcs_enter(v); @@ -1400,6 +1406,52 @@ static void vmx_set_uc_mode(struct vcpu *v) hvm_asid_flush_vcpu(v); } +static void vmx_handle_cd(struct vcpu *v, unsigned long value) +{ + if ( !paging_mode_hap(v->domain) || + unlikely(!cpu_has_vmx_pat) ) /* Too old for EPT, just for safe */ + { + /* + * For shadow, ''load IA32_PAT'' VM-entry control is 0, so it cannot + * set guest memory type as UC via IA32_PAT. Xen drop all shadows + * so that any new ones would be created on demand. + */ + hvm_handle_cd_traditional(v, value); + } + else + { + u64 *pat = &v->arch.hvm_vcpu.pat_cr; + + if ( value & X86_CR0_CD ) + { + /* + * For EPT, set guest IA32_PAT fields as UC so that guest + * memory type are all UC. + */ + u64 uc_pat + ((uint64_t)PAT_TYPE_UNCACHABLE) | /* PAT0 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 8) | /* PAT1 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 16) | /* PAT2 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 24) | /* PAT3 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 32) | /* PAT4 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 40) | /* PAT5 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 48) | /* PAT6 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 56); /* PAT7 */ + + vmx_get_guest_pat(v, pat); + vmx_set_guest_pat(v, uc_pat); + + wbinvd(); + v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; + } + else + { + vmx_set_guest_pat(v, *pat); + v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE; + } + } +} + static void vmx_set_info_guest(struct vcpu *v) { unsigned long intr_shadow; @@ -1558,6 +1610,7 @@ static struct hvm_function_table __initdata vmx_function_table = { .msr_read_intercept = vmx_msr_read_intercept, .msr_write_intercept = vmx_msr_write_intercept, .invlpg_intercept = vmx_invlpg_intercept, + .handle_cd = vmx_handle_cd, .set_uc_mode = vmx_set_uc_mode, .set_info_guest = vmx_set_info_guest, .set_rdtsc_exiting = vmx_set_rdtsc_exiting, diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h index 3376418..6306587 100644 --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -156,6 +156,7 @@ struct hvm_function_table { int (*msr_read_intercept)(unsigned int msr, uint64_t *msr_content); int (*msr_write_intercept)(unsigned int msr, uint64_t msr_content); void (*invlpg_intercept)(unsigned long vaddr); + void (*handle_cd)(struct vcpu *v, unsigned long value); void (*set_uc_mode)(struct vcpu *v); void (*set_info_guest)(struct vcpu *v); void (*set_rdtsc_exiting)(struct vcpu *v, bool_t); diff --git a/xen/include/asm-x86/hvm/support.h b/xen/include/asm-x86/hvm/support.h index 7ddc806..b291168 100644 --- a/xen/include/asm-x86/hvm/support.h +++ b/xen/include/asm-x86/hvm/support.h @@ -130,6 +130,8 @@ void hvm_rdtsc_intercept(struct cpu_user_regs *regs); int __must_check hvm_handle_xsetbv(u32 index, u64 new_bv); +void hvm_handle_cd_traditional(struct vcpu *v, unsigned long value); + /* These functions all return X86EMUL return codes. */ int hvm_set_efer(uint64_t value); int hvm_set_cr0(unsigned long value); -- 1.7.1 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Sorry, just find a little bug of this patch -- We should not let guest direct access IA32_PAT MSR when cr0.cd setting. I will fix it and then re-send out later. Thanks, Jinsong Liu, Jinsong wrote:> From 1bdeea81e0ec151ec10c3341802f814956a2ff8d Mon Sep 17 00:00:00 2001 > From: Liu Jinsong <jinsong.liu@intel.com> > Date: Fri, 11 Oct 2013 23:22:05 +0800 > Subject: [PATCH] XSA-60 security hole: cr0.cd handling > > Recently Oracle developers found a Xen security issue as DOS > affecting, > named as XSA-60. Please refer > http://xenbits.xen.org/xsa/advisory-60.html > > Basically it involves how to handle guest cr0.cd setting, which under > some environment it consumes much time resulting in DOS-like behavior. > > This patch solves XSA-60 security hole: > 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need > do nothing, since hardware snoop mechanism has ensured cache > coherency. > > 2. For guest with VT-d but non-snooped, cache coherency can not be > guaranteed by h/w snoop, therefore it need emulate UC type to guest: > 2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so > that > guest memory type are all UC. > 2.2). if it works w/ shadow, it works as currently Xen does, dropping > all shadows so that any new ones would be created on demand w/ UC. > > Singed-off-by: Liu Jinsong <jinsong.liu@intel.com> > --- > xen/arch/x86/hvm/hvm.c | 71 > ++++++++++++++++++++++--------------- xen/arch/x86/hvm/vmx/vmx.c > | 57 ++++++++++++++++++++++++++++- xen/include/asm-x86/hvm/hvm.h > | 1 + xen/include/asm-x86/hvm/support.h | 2 + > 4 files changed, 100 insertions(+), 31 deletions(-) > > diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c > index de81e45..0e14f49 100644 > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -1703,6 +1703,36 @@ int hvm_mov_from_cr(unsigned int cr, unsigned > int gpr) return X86EMUL_UNHANDLEABLE; > } > > +void hvm_handle_cd_traditional(struct vcpu *v, unsigned long value) > +{ > + if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) ) > + { > + /* Entering no fill cache mode. */ > + spin_lock(&v->domain->arch.hvm_domain.uc_lock); > + v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; > + > + if ( !v->domain->arch.hvm_domain.is_in_uc_mode ) > + { > + /* Flush physical caches. */ > + on_each_cpu(local_flush_cache, NULL, 1); > + hvm_set_uc_mode(v, 1); > + } > + spin_unlock(&v->domain->arch.hvm_domain.uc_lock); > + } > + else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) && > + (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ) > + { > + /* Exit from no fill cache mode. */ > + spin_lock(&v->domain->arch.hvm_domain.uc_lock); > + v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE; > + > + if ( domain_exit_uc_mode(v) ) > + hvm_set_uc_mode(v, 0); > + > + spin_unlock(&v->domain->arch.hvm_domain.uc_lock); > + } > +} > + > int hvm_set_cr0(unsigned long value) > { > struct vcpu *v = current; > @@ -1786,35 +1816,18 @@ int hvm_set_cr0(unsigned long value) > } > } > > - if ( has_arch_mmios(v->domain) ) > - { > - if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) ) > - { > - /* Entering no fill cache mode. */ > - spin_lock(&v->domain->arch.hvm_domain.uc_lock); > - v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; > - > - if ( !v->domain->arch.hvm_domain.is_in_uc_mode ) > - { > - /* Flush physical caches. */ > - on_each_cpu(local_flush_cache, NULL, 1); > - hvm_set_uc_mode(v, 1); > - } > - spin_unlock(&v->domain->arch.hvm_domain.uc_lock); > - } > - else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) && > - (v->arch.hvm_vcpu.cache_mode => NO_FILL_CACHE_MODE) ) > - { > - /* Exit from no fill cache mode. */ > - spin_lock(&v->domain->arch.hvm_domain.uc_lock); > - v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE; > - > - if ( domain_exit_uc_mode(v) ) > - hvm_set_uc_mode(v, 0); > - > - spin_unlock(&v->domain->arch.hvm_domain.uc_lock); > - } > - } > + /* > + * When cr0.cd setting > + * 1. For guest w/o VT-d, and for guest with VT-d but snooped, > Xen need not + * do anything, since hardware snoop mechanism has > ensured cache coherency; + * 2. For guest with VT-d but > non-snooped, cache coherency cannot be + * guaranteed by h/w so > need emulate UC memory type to guest. + */ > + if ( ((value ^ old_value) & X86_CR0_CD) && > + has_arch_mmios(v->domain) && > + iommu_enabled && !iommu_snoop && > + hvm_funcs.handle_cd ) > + hvm_funcs.handle_cd(v, value); > > v->arch.hvm_vcpu.guest_cr[0] = value; > hvm_update_guest_cr(v, 0); > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c > index 9ca8632..1e0269b 100644 > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -642,6 +642,10 @@ static void vmx_ctxt_switch_to(struct vcpu *v) > __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept_data), > 0); } > > + /* For guest cr0.cd setting, do not use potentially polluted > cache */ + if ( unlikely(v->arch.hvm_vcpu.cache_mode => NO_FILL_CACHE_MODE) ) + wbinvd(); > + > vmx_restore_guest_msrs(v); > vmx_restore_dr(v); > } > @@ -908,7 +912,8 @@ static unsigned long > vmx_get_shadow_gs_base(struct vcpu *v) > > static int vmx_set_guest_pat(struct vcpu *v, u64 gpat) > { > - if ( !cpu_has_vmx_pat || !paging_mode_hap(v->domain) ) > + if ( !cpu_has_vmx_pat || !paging_mode_hap(v->domain) || > + unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ) > return 0; > > vmx_vmcs_enter(v); > @@ -919,7 +924,8 @@ static int vmx_set_guest_pat(struct vcpu *v, u64 > gpat) > > static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat) > { > - if ( !cpu_has_vmx_pat || !paging_mode_hap(v->domain) ) > + if ( !cpu_has_vmx_pat || !paging_mode_hap(v->domain) || > + unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ) > return 0; > > vmx_vmcs_enter(v); > @@ -1400,6 +1406,52 @@ static void vmx_set_uc_mode(struct vcpu *v) > hvm_asid_flush_vcpu(v); > } > > +static void vmx_handle_cd(struct vcpu *v, unsigned long value) > +{ > + if ( !paging_mode_hap(v->domain) || > + unlikely(!cpu_has_vmx_pat) ) /* Too old for EPT, just > for safe */ + { > + /* > + * For shadow, ''load IA32_PAT'' VM-entry control is 0, so it > cannot + * set guest memory type as UC via IA32_PAT. Xen drop > all shadows + * so that any new ones would be created on > demand. + */ > + hvm_handle_cd_traditional(v, value); > + } > + else > + { > + u64 *pat = &v->arch.hvm_vcpu.pat_cr; > + > + if ( value & X86_CR0_CD ) > + { > + /* > + * For EPT, set guest IA32_PAT fields as UC so that guest > + * memory type are all UC. > + */ > + u64 uc_pat > + ((uint64_t)PAT_TYPE_UNCACHABLE) | /* > PAT0 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 8) | > /* PAT1 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 16) | > /* PAT2 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 24) | > /* PAT3 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 32) | > /* PAT4 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 40) | > /* PAT5 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 48) | > /* PAT6 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 56); > /* PAT7 */ + > + vmx_get_guest_pat(v, pat); > + vmx_set_guest_pat(v, uc_pat); > + > + wbinvd(); > + v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; > + } > + else > + { > + vmx_set_guest_pat(v, *pat); > + v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE; > + } > + } > +} > + > static void vmx_set_info_guest(struct vcpu *v) > { > unsigned long intr_shadow; > @@ -1558,6 +1610,7 @@ static struct hvm_function_table __initdata > vmx_function_table = { .msr_read_intercept > vmx_msr_read_intercept, .msr_write_intercept > vmx_msr_write_intercept, .invlpg_intercept > vmx_invlpg_intercept, + .handle_cd = vmx_handle_cd, > .set_uc_mode = vmx_set_uc_mode, > .set_info_guest = vmx_set_info_guest, > .set_rdtsc_exiting = vmx_set_rdtsc_exiting, > diff --git a/xen/include/asm-x86/hvm/hvm.h > b/xen/include/asm-x86/hvm/hvm.h > index 3376418..6306587 100644 > --- a/xen/include/asm-x86/hvm/hvm.h > +++ b/xen/include/asm-x86/hvm/hvm.h > @@ -156,6 +156,7 @@ struct hvm_function_table { > int (*msr_read_intercept)(unsigned int msr, uint64_t > *msr_content); int (*msr_write_intercept)(unsigned int msr, > uint64_t msr_content); void (*invlpg_intercept)(unsigned long > vaddr); + void (*handle_cd)(struct vcpu *v, unsigned long value); > void (*set_uc_mode)(struct vcpu *v); > void (*set_info_guest)(struct vcpu *v); > void (*set_rdtsc_exiting)(struct vcpu *v, bool_t); > diff --git a/xen/include/asm-x86/hvm/support.h > b/xen/include/asm-x86/hvm/support.h index 7ddc806..b291168 100644 > --- a/xen/include/asm-x86/hvm/support.h > +++ b/xen/include/asm-x86/hvm/support.h > @@ -130,6 +130,8 @@ void hvm_rdtsc_intercept(struct cpu_user_regs > *regs); > > int __must_check hvm_handle_xsetbv(u32 index, u64 new_bv); > > +void hvm_handle_cd_traditional(struct vcpu *v, unsigned long value); > + > /* These functions all return X86EMUL return codes. */ > int hvm_set_efer(uint64_t value); > int hvm_set_cr0(unsigned long value);
>>> On 11.10.13 at 20:25, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > +void hvm_handle_cd_traditional(struct vcpu *v, unsigned long value) > +{ > + if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) ) > + { > + /* Entering no fill cache mode. */ > + spin_lock(&v->domain->arch.hvm_domain.uc_lock); > + v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; > + > + if ( !v->domain->arch.hvm_domain.is_in_uc_mode ) > + { > + /* Flush physical caches. */ > + on_each_cpu(local_flush_cache, NULL, 1); > + hvm_set_uc_mode(v, 1);So you continue to use the problematic function, and you also don''t remove the problematic code from it''s VMX backend - what am I missing here? If there was no room for getting here with !paging_mode_hap(), the problematic code should be removed from vmx_set_uc_mode(). And if we still can get here with !paging_mode_hap(), then what''s the purpose of the patch?> + /* > + * When cr0.cd setting > + * 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need not > + * do anything, since hardware snoop mechanism has ensured cache coherency; > + * 2. For guest with VT-d but non-snooped, cache coherency cannot be > + * guaranteed by h/w so need emulate UC memory type to guest. > + */ > + if ( ((value ^ old_value) & X86_CR0_CD) && > + has_arch_mmios(v->domain) &&has_arch_mmios() has been wrong (too weak) here originally, so I strongly suggest replacing it here as you modify this logic anyway: This check must consider not only non-empty iomem_caps, but also non-empty ioport_caps.> +static void vmx_handle_cd(struct vcpu *v, unsigned long value) > +{ > + if ( !paging_mode_hap(v->domain) || > + unlikely(!cpu_has_vmx_pat) ) /* Too old for EPT, just for safe */ > + { > + /* > + * For shadow, ''load IA32_PAT'' VM-entry control is 0, so it cannot > + * set guest memory type as UC via IA32_PAT. Xen drop all shadows > + * so that any new ones would be created on demand. > + */ > + hvm_handle_cd_traditional(v, value); > + } > + else > + { > + u64 *pat = &v->arch.hvm_vcpu.pat_cr; > + > + if ( value & X86_CR0_CD ) > + { > + /* > + * For EPT, set guest IA32_PAT fields as UC so that guest > + * memory type are all UC. > + */ > + u64 uc_pat > + ((uint64_t)PAT_TYPE_UNCACHABLE) | /* PAT0 */ > + ((uint64_t)PAT_TYPE_UNCACHABLE << 8) | /* PAT1 */ > + ((uint64_t)PAT_TYPE_UNCACHABLE << 16) | /* PAT2 */ > + ((uint64_t)PAT_TYPE_UNCACHABLE << 24) | /* PAT3 */ > + ((uint64_t)PAT_TYPE_UNCACHABLE << 32) | /* PAT4 */ > + ((uint64_t)PAT_TYPE_UNCACHABLE << 40) | /* PAT5 */ > + ((uint64_t)PAT_TYPE_UNCACHABLE << 48) | /* PAT6 */ > + ((uint64_t)PAT_TYPE_UNCACHABLE << 56); /* PAT7 */ > + > + vmx_get_guest_pat(v, pat); > + vmx_set_guest_pat(v, uc_pat); > + > + wbinvd(); > + v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; > + } > + else > + { > + vmx_set_guest_pat(v, *pat); > + v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE; > + }All this is done only on the current vCPU, and I don''t see any mechanism by which the other vCPU-s of the domain would similarly get their PAT overridden (yet the whole domain gets transitioned to UC mode). This minimally needs more cleanup (i.e. it should be uniformly just the local vCPU or uniformly the whole domain that is affected here). And if going the local-vCPU-only route you need to make clear (via code comment I would think) that this is safe to do when two of the guest''s vCPU-s with different CR0.CD settings run on two hyperthreads on the same core. Jan
Jan Beulich wrote:>>>> On 11.10.13 at 20:25, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> +void hvm_handle_cd_traditional(struct vcpu *v, unsigned long value) >> +{ + if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) ) + { >> + /* Entering no fill cache mode. */ >> + spin_lock(&v->domain->arch.hvm_domain.uc_lock); >> + v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; + >> + if ( !v->domain->arch.hvm_domain.is_in_uc_mode ) + { >> + /* Flush physical caches. */ >> + on_each_cpu(local_flush_cache, NULL, 1); >> + hvm_set_uc_mode(v, 1); > > So you continue to use the problematic function, and you also > don''t remove the problematic code from it''s VMX backend - what > am I missing here? If there was no room for getting here with > !paging_mode_hap(), the problematic code should be removed > from vmx_set_uc_mode(). And if we still can get here with > !paging_mode_hap(), then what''s the purpose of the patch? >The new solution w/ PAT depends on whether Intel processor support ''load IA32_PAT'' VM-entry control bit (which in very old Intel processors it indeed didn''t support it). Though I''m pretty sure Intel processors w/ EPT capability also support ''load IA32_PAT'' VM-entry control bit, Intel SDM didn''t explicitly guarantee it. Hence we still keep old function, though I''m sure it will never be called, just for safety or logic integration.>> + /* >> + * When cr0.cd setting >> + * 1. For guest w/o VT-d, and for guest with VT-d but snooped, >> Xen need not + * do anything, since hardware snoop mechanism has >> ensured cache coherency; + * 2. For guest with VT-d but >> non-snooped, cache coherency cannot be + * guaranteed by h/w so >> need emulate UC memory type to guest. + */ + if ( ((value ^ >> old_value) & X86_CR0_CD) && + has_arch_mmios(v->domain) && > > has_arch_mmios() has been wrong (too weak) here originally, so I > strongly suggest replacing it here as you modify this logic anyway: > This check must consider not only non-empty iomem_caps, but also > non-empty ioport_caps.OK, or, how about has_arch_pdevs()?> >> +static void vmx_handle_cd(struct vcpu *v, unsigned long value) +{ >> + if ( !paging_mode_hap(v->domain) || >> + unlikely(!cpu_has_vmx_pat) ) /* Too old for EPT, just >> for safe */ + { + /* >> + * For shadow, ''load IA32_PAT'' VM-entry control is 0, so it >> cannot + * set guest memory type as UC via IA32_PAT. Xen >> drop all shadows + * so that any new ones would be created >> on demand. + */ + hvm_handle_cd_traditional(v, value); >> + } >> + else >> + { >> + u64 *pat = &v->arch.hvm_vcpu.pat_cr; >> + >> + if ( value & X86_CR0_CD ) >> + { >> + /* >> + * For EPT, set guest IA32_PAT fields as UC so that >> guest + * memory type are all UC. >> + */ >> + u64 uc_pat >> + ((uint64_t)PAT_TYPE_UNCACHABLE) | /* >> PAT0 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 8) | >> /* PAT1 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 16) | >> /* PAT2 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 24) | >> /* PAT3 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 32) | >> /* PAT4 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 40) | >> /* PAT5 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 48) | >> /* PAT6 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 56); >> /* PAT7 */ + + vmx_get_guest_pat(v, pat); >> + vmx_set_guest_pat(v, uc_pat); >> + >> + wbinvd(); >> + v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; + >> } + else >> + { >> + vmx_set_guest_pat(v, *pat); >> + v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE; >> + } > > All this is done only on the current vCPU, and I don''t see any > mechanism by which the other vCPU-s of the domain would > similarly get their PAT overridden (yet the whole domain gets > transitioned to UC mode). This minimally needs more cleanup (i.e. > it should be uniformly just the local vCPU or uniformly the whole > domain that is affected here). > > And if going the local-vCPU-only route you need to make clear > (via code comment I would think) that this is safe to do when > two of the guest''s vCPU-s with different CR0.CD settings run > on two hyperthreads on the same core. >For Intel processors CR0.CD operation is per vCPU, not per domain. It''s OK running w/ different CR0.CD and different cacheability for different CPUs (For AMD CR0.CD seems should be identical but AMD has not XSA-60 issue we are talking about). IA32_PAT is separate MSR per each logic processor, so it''s safe running two vCPUs on two hyperthreads on same core. I will add code comments. Thanks, Jinsong
>>> On 14.10.13 at 11:28, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Jan Beulich wrote: >>>>> On 11.10.13 at 20:25, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >>> +void hvm_handle_cd_traditional(struct vcpu *v, unsigned long value) >>> +{ + if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) ) + { >>> + /* Entering no fill cache mode. */ >>> + spin_lock(&v->domain->arch.hvm_domain.uc_lock); >>> + v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; + >>> + if ( !v->domain->arch.hvm_domain.is_in_uc_mode ) + { >>> + /* Flush physical caches. */ >>> + on_each_cpu(local_flush_cache, NULL, 1); >>> + hvm_set_uc_mode(v, 1); >> >> So you continue to use the problematic function, and you also >> don''t remove the problematic code from it''s VMX backend - what >> am I missing here? If there was no room for getting here with >> !paging_mode_hap(), the problematic code should be removed >> from vmx_set_uc_mode(). And if we still can get here with >> !paging_mode_hap(), then what''s the purpose of the patch? >> > > The new solution w/ PAT depends on whether Intel processor support ''load > IA32_PAT'' VM-entry control bit (which in very old Intel processors it indeed > didn''t support it). > > Though I''m pretty sure Intel processors w/ EPT capability also support ''load > IA32_PAT'' VM-entry control bit, Intel SDM didn''t explicitly guarantee it. > Hence we still keep old function, though I''m sure it will never be called, > just for safety or logic integration.If any such processor exists, the security issue would still be there with the patch as is. Hence either use of EPT needs to be suppressed in that case, or some other solution allowing the broken code to be deleted (or replaced) needs to be found.>>> + /* >>> + * When cr0.cd setting >>> + * 1. For guest w/o VT-d, and for guest with VT-d but snooped, >>> Xen need not + * do anything, since hardware snoop mechanism has >>> ensured cache coherency; + * 2. For guest with VT-d but >>> non-snooped, cache coherency cannot be + * guaranteed by h/w so >>> need emulate UC memory type to guest. + */ + if ( ((value ^ >>> old_value) & X86_CR0_CD) && + has_arch_mmios(v->domain) && >> >> has_arch_mmios() has been wrong (too weak) here originally, so I >> strongly suggest replacing it here as you modify this logic anyway: >> This check must consider not only non-empty iomem_caps, but also >> non-empty ioport_caps. > > OK, or, how about has_arch_pdevs()?Yes, precisely that one.>>> +static void vmx_handle_cd(struct vcpu *v, unsigned long value) +{ >>> + if ( !paging_mode_hap(v->domain) || >>> + unlikely(!cpu_has_vmx_pat) ) /* Too old for EPT, just >>> for safe */ + { + /* >>> + * For shadow, ''load IA32_PAT'' VM-entry control is 0, so it >>> cannot + * set guest memory type as UC via IA32_PAT. Xen >>> drop all shadows + * so that any new ones would be created >>> on demand. + */ + hvm_handle_cd_traditional(v, value); >>> + } >>> + else >>> + { >>> + u64 *pat = &v->arch.hvm_vcpu.pat_cr; >>> + >>> + if ( value & X86_CR0_CD ) >>> + { >>> + /* >>> + * For EPT, set guest IA32_PAT fields as UC so that >>> guest + * memory type are all UC. >>> + */ >>> + u64 uc_pat >>> + ((uint64_t)PAT_TYPE_UNCACHABLE) | /* >>> PAT0 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 8) | >>> /* PAT1 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 16) | >>> /* PAT2 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 24) | >>> /* PAT3 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 32) | >>> /* PAT4 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 40) | >>> /* PAT5 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 48) | >>> /* PAT6 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 56); >>> /* PAT7 */ + + vmx_get_guest_pat(v, pat); >>> + vmx_set_guest_pat(v, uc_pat); >>> + >>> + wbinvd(); >>> + v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; + >>> } + else >>> + { >>> + vmx_set_guest_pat(v, *pat); >>> + v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE; >>> + } >> >> All this is done only on the current vCPU, and I don''t see any >> mechanism by which the other vCPU-s of the domain would >> similarly get their PAT overridden (yet the whole domain gets >> transitioned to UC mode). This minimally needs more cleanup (i.e. >> it should be uniformly just the local vCPU or uniformly the whole >> domain that is affected here). >> >> And if going the local-vCPU-only route you need to make clear >> (via code comment I would think) that this is safe to do when >> two of the guest''s vCPU-s with different CR0.CD settings run >> on two hyperthreads on the same core. >> > > For Intel processors CR0.CD operation is per vCPU, not per domain. It''s OK > running w/ different CR0.CD and different cacheability for different CPUs > (For AMD CR0.CD seems should be identical but AMD has not XSA-60 issue we are > talking about). > > IA32_PAT is separate MSR per each logic processor, so it''s safe running two > vCPUs on two hyperthreads on same core. I will add code comments.So are you saying that no conflicts will arise in TLBs getting populated with different cachabilty information from the two hyperthreads? Jan
Jan Beulich wrote:>>>> On 14.10.13 at 11:28, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> Jan Beulich wrote: >>>>>> On 11.10.13 at 20:25, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>> wrote: >>>> +void hvm_handle_cd_traditional(struct vcpu *v, unsigned long >>>> value) +{ + if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) >>>> ) + { + /* Entering no fill cache mode. */ >>>> + spin_lock(&v->domain->arch.hvm_domain.uc_lock); >>>> + v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; + >>>> + if ( !v->domain->arch.hvm_domain.is_in_uc_mode ) + >>>> { + /* Flush physical caches. */ >>>> + on_each_cpu(local_flush_cache, NULL, 1); >>>> + hvm_set_uc_mode(v, 1); >>> >>> So you continue to use the problematic function, and you also >>> don''t remove the problematic code from it''s VMX backend - what >>> am I missing here? If there was no room for getting here with >>> !paging_mode_hap(), the problematic code should be removed >>> from vmx_set_uc_mode(). And if we still can get here with >>> !paging_mode_hap(), then what''s the purpose of the patch?'' >>> >> >> The new solution w/ PAT depends on whether Intel processor support >> ''load IA32_PAT'' VM-entry control bit (which in very old Intel >> processors it indeed didn''t support it). >> >> Though I''m pretty sure Intel processors w/ EPT capability also >> support ''load IA32_PAT'' VM-entry control bit, Intel SDM didn''t >> explicitly guarantee it. Hence we still keep old function, though >> I''m sure it will never be called, just for safety or logic >> integration. > > If any such processor exists, the security issue would still be there > with the patch as is. Hence either use of EPT needs to be > suppressed in that case, or some other solution allowing the broken > code to be deleted (or replaced) needs to be found. >Well, I have to go back to Intel library:) a long search for Intel VT history: both ''load IA32_PAT'' and ''EPT'' features does not exist at Intel SDM Order Number: 253669-026US (Feb 2008), but co-exist as early as Intel SDM Order Number: 253669-027US (July 2008). Per my understanding ''load IA32_PAT'' co-exist/co-work with EPT: 1. shadow don''t need ''load IA32_PAT'' feature 2. EPT, from its earliest version, has ''iPAT'' bit to control guest PAT and host EPT memory type combining -- without ''load IA32_PAT'' how can guest PAT memory type take effect? So we have 2 choices for ''load IA32_PAT'': 1. aggressive choice: remove vmx_set_uc_mode logic if ( shadow ) drop shadows and new ones would be created on demand; else /* Intel EPT */ IA32_PAT solution 2. conservative choice: as what the patch did, use if (!cpu_has_vmx_pat) to keep logic integration -- it didn''t make thing worse. Thoughts?>>>> + /* >>>> + * When cr0.cd setting >>>> + * 1. For guest w/o VT-d, and for guest with VT-d but snooped, >>>> Xen need not + * do anything, since hardware snoop mechanism >>>> has ensured cache coherency; + * 2. For guest with VT-d but >>>> non-snooped, cache coherency cannot be + * guaranteed by h/w so >>>> need emulate UC memory type to guest. + */ + if ( ((value ^ >>>> old_value) & X86_CR0_CD) && + has_arch_mmios(v->domain) >>>> && >>> >>> has_arch_mmios() has been wrong (too weak) here originally, so I >>> strongly suggest replacing it here as you modify this logic anyway: >>> This check must consider not only non-empty iomem_caps, but also >>> non-empty ioport_caps. >> >> OK, or, how about has_arch_pdevs()? > > Yes, precisely that one. > >>>> +static void vmx_handle_cd(struct vcpu *v, unsigned long value) +{ >>>> + if ( !paging_mode_hap(v->domain) || >>>> + unlikely(!cpu_has_vmx_pat) ) /* Too old for EPT, just >>>> for safe */ + { + /* >>>> + * For shadow, ''load IA32_PAT'' VM-entry control is 0, so >>>> it cannot + * set guest memory type as UC via IA32_PAT. Xen >>>> drop all shadows + * so that any new ones would be created >>>> on demand. + */ + hvm_handle_cd_traditional(v, >>>> value); + } + else >>>> + { >>>> + u64 *pat = &v->arch.hvm_vcpu.pat_cr; >>>> + >>>> + if ( value & X86_CR0_CD ) >>>> + { >>>> + /* >>>> + * For EPT, set guest IA32_PAT fields as UC so that >>>> guest + * memory type are all UC. >>>> + */ >>>> + u64 uc_pat >>>> + ((uint64_t)PAT_TYPE_UNCACHABLE) | /* >>>> PAT0 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 8) | >>>> /* PAT1 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 16) | >>>> /* PAT2 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 24) | >>>> /* PAT3 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 32) | >>>> /* PAT4 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 40) | >>>> /* PAT5 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 48) | >>>> /* PAT6 */ + ((uint64_t)PAT_TYPE_UNCACHABLE << 56); >>>> /* PAT7 */ + + vmx_get_guest_pat(v, pat); >>>> + vmx_set_guest_pat(v, uc_pat); >>>> + >>>> + wbinvd(); >>>> + v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; + } >>>> + else + { >>>> + vmx_set_guest_pat(v, *pat); >>>> + v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE; >>>> + } >>> >>> All this is done only on the current vCPU, and I don''t see any >>> mechanism by which the other vCPU-s of the domain would >>> similarly get their PAT overridden (yet the whole domain gets >>> transitioned to UC mode). This minimally needs more cleanup (i.e. >>> it should be uniformly just the local vCPU or uniformly the whole >>> domain that is affected here). >>> >>> And if going the local-vCPU-only route you need to make clear >>> (via code comment I would think) that this is safe to do when >>> two of the guest''s vCPU-s with different CR0.CD settings run >>> on two hyperthreads on the same core. >>> >> >> For Intel processors CR0.CD operation is per vCPU, not per domain. >> It''s OK running w/ different CR0.CD and different cacheability for >> different CPUs (For AMD CR0.CD seems should be identical but AMD has >> not XSA-60 issue we are talking about). >> >> IA32_PAT is separate MSR per each logic processor, so it''s safe >> running two vCPUs on two hyperthreads on same core. I will add code >> comments. > > So are you saying that no conflicts will arise in TLBs getting > populated with different cachabilty information from the two > hyperthreads? >No, I meant it''s irrelated to hyperthreads/core, but yes, we indeed need add TLB flush to invalidate old memory type info cached, via hvm_asid_flush_vcpu(): Thanks, Jinsong
>>> On 15.10.13 at 18:47, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Jan Beulich wrote: >>>>> On 14.10.13 at 11:28, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >>> Jan Beulich wrote: >>>>>>> On 11.10.13 at 20:25, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>>> wrote: >>>>> +void hvm_handle_cd_traditional(struct vcpu *v, unsigned long >>>>> value) +{ + if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) >>>>> ) + { + /* Entering no fill cache mode. */ >>>>> + spin_lock(&v->domain->arch.hvm_domain.uc_lock); >>>>> + v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; + >>>>> + if ( !v->domain->arch.hvm_domain.is_in_uc_mode ) + >>>>> { + /* Flush physical caches. */ >>>>> + on_each_cpu(local_flush_cache, NULL, 1); >>>>> + hvm_set_uc_mode(v, 1); >>>> >>>> So you continue to use the problematic function, and you also >>>> don''t remove the problematic code from it''s VMX backend - what >>>> am I missing here? If there was no room for getting here with >>>> !paging_mode_hap(), the problematic code should be removed >>>> from vmx_set_uc_mode(). And if we still can get here with >>>> !paging_mode_hap(), then what''s the purpose of the patch?'' >>>> >>> >>> The new solution w/ PAT depends on whether Intel processor support >>> ''load IA32_PAT'' VM-entry control bit (which in very old Intel >>> processors it indeed didn''t support it). >>> >>> Though I''m pretty sure Intel processors w/ EPT capability also >>> support ''load IA32_PAT'' VM-entry control bit, Intel SDM didn''t >>> explicitly guarantee it. Hence we still keep old function, though >>> I''m sure it will never be called, just for safety or logic >>> integration. >> >> If any such processor exists, the security issue would still be there >> with the patch as is. Hence either use of EPT needs to be >> suppressed in that case, or some other solution allowing the broken >> code to be deleted (or replaced) needs to be found. >> > > Well, I have to go back to Intel library:) a long search for Intel VT > history: both ''load IA32_PAT'' and ''EPT'' features does not exist at Intel SDM > Order Number: 253669-026US (Feb 2008), but co-exist as early as Intel SDM Order > Number: 253669-027US (July 2008). > > Per my understanding ''load IA32_PAT'' co-exist/co-work with EPT: > 1. shadow don''t need ''load IA32_PAT'' feature > 2. EPT, from its earliest version, has ''iPAT'' bit to control guest PAT and > host EPT memory type combining -- without ''load IA32_PAT'' how can guest PAT > memory type take effect? > > So we have 2 choices for ''load IA32_PAT'': > 1. aggressive choice: remove vmx_set_uc_mode logic > if ( shadow ) > drop shadows and new ones would be created on demand; > else /* Intel EPT */ > IA32_PAT solution > 2. conservative choice: as what the patch did, use if (!cpu_has_vmx_pat) to > keep logic integration -- it didn''t make thing worse.As said before - the code in question needs to go away or be modified in a way that''s safe. If there are extra restrictions that need to be enforced (like cpu_has_vmx_pat being a prereq for EPT+non-snooped-IOMMU), then so be it (i.e. your patch would need to be extended to do that). Jan
Jan Beulich wrote:>>>> On 15.10.13 at 18:47, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> Jan Beulich wrote: >>>>>> On 14.10.13 at 11:28, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>> wrote: >>>> Jan Beulich wrote: >>>>>>>> On 11.10.13 at 20:25, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>>>> wrote: >>>>>> +void hvm_handle_cd_traditional(struct vcpu *v, unsigned long >>>>>> value) +{ + if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) >>>>>> ) + { + /* Entering no fill cache mode. */ >>>>>> + spin_lock(&v->domain->arch.hvm_domain.uc_lock); >>>>>> + v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; + >>>>>> + if ( !v->domain->arch.hvm_domain.is_in_uc_mode ) + >>>>>> { + /* Flush physical caches. */ >>>>>> + on_each_cpu(local_flush_cache, NULL, 1); >>>>>> + hvm_set_uc_mode(v, 1); >>>>> >>>>> So you continue to use the problematic function, and you also >>>>> don''t remove the problematic code from it''s VMX backend - what >>>>> am I missing here? If there was no room for getting here with >>>>> !paging_mode_hap(), the problematic code should be removed >>>>> from vmx_set_uc_mode(). And if we still can get here with >>>>> !paging_mode_hap(), then what''s the purpose of the patch?'' >>>>> >>>> >>>> The new solution w/ PAT depends on whether Intel processor support >>>> ''load IA32_PAT'' VM-entry control bit (which in very old Intel >>>> processors it indeed didn''t support it). >>>> >>>> Though I''m pretty sure Intel processors w/ EPT capability also >>>> support ''load IA32_PAT'' VM-entry control bit, Intel SDM didn''t >>>> explicitly guarantee it. Hence we still keep old function, though >>>> I''m sure it will never be called, just for safety or logic >>>> integration. >>> >>> If any such processor exists, the security issue would still be >>> there with the patch as is. Hence either use of EPT needs to be >>> suppressed in that case, or some other solution allowing the broken >>> code to be deleted (or replaced) needs to be found. >>> >> >> Well, I have to go back to Intel library:) a long search for Intel VT >> history: both ''load IA32_PAT'' and ''EPT'' features does not exist at >> Intel SDM Order Number: 253669-026US (Feb 2008), but co-exist as >> early as Intel SDM Order Number: 253669-027US (July 2008). >> >> Per my understanding ''load IA32_PAT'' co-exist/co-work with EPT: >> 1. shadow don''t need ''load IA32_PAT'' feature >> 2. EPT, from its earliest version, has ''iPAT'' bit to control guest >> PAT and host EPT memory type combining -- without ''load IA32_PAT'' >> how can guest PAT memory type take effect? >> >> So we have 2 choices for ''load IA32_PAT'': >> 1. aggressive choice: remove vmx_set_uc_mode logic >> if ( shadow ) >> drop shadows and new ones would be created on demand; >> else /* Intel EPT */ IA32_PAT solution >> 2. conservative choice: as what the patch did, use if >> (!cpu_has_vmx_pat) to keep logic integration -- it didn''t make thing >> worse. > > As said before - the code in question needs to go away or be > modified in a way that''s safe. If there are extra restrictions > that need to be enforced (like cpu_has_vmx_pat being a prereq > for EPT+non-snooped-IOMMU), then so be it (i.e. your patch > would need to be extended to do that). > > JanOK, use aggressive approach while protected by disallowing EPT when (!cpu_has_vmx_pat). Thanks, Jinsong