From: Yang Zhang <yang.z.zhang@Intel.com> This patch fixs two issues: 1. The CR_READ_SHADOW should only cover the value that L2 wirtes to CR when L2 is running. But currently, L0 wirtes wrong value to it during virtual vmentry and L2''s CR access emualtion. 2. L2 changed cr[0/4] in a way that did not change any of L1''s shadowed bits, but did change L0 shadowed bits. In this case, the effective cr[0/4] value that L1 would like to write into the hardware is consist of the L2-owned bits from the new value combined with the L1-owned bits from L1''s guest cr[0/4]. Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> --- xen/arch/x86/hvm/hvm.c | 13 +++++++--- xen/arch/x86/hvm/vmx/vmx.c | 13 ++++++++- xen/arch/x86/hvm/vmx/vvmx.c | 44 +++++++++++++++++++++++++++++++++++ xen/include/asm-x86/hvm/nestedhvm.h | 8 ++++++ xen/include/asm-x86/hvm/vcpu.h | 3 ++ xen/include/asm-x86/hvm/vmx/vvmx.h | 1 + 6 files changed, 76 insertions(+), 6 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index e2ba9de..b1f8dfe 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1810,6 +1810,13 @@ void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value) } } +static void hvm_update_cr(struct vcpu *v, unsigned int cr, unsigned long value) +{ + v->arch.hvm_vcpu.guest_cr[cr] = value; + nestedhvm_set_cr(v, cr, value); + hvm_update_guest_cr(v, cr); +} + int hvm_set_cr0(unsigned long value) { struct vcpu *v = current; @@ -1915,8 +1922,7 @@ int hvm_set_cr0(unsigned long value) hvm_funcs.handle_cd ) hvm_funcs.handle_cd(v, value); - v->arch.hvm_vcpu.guest_cr[0] = value; - hvm_update_guest_cr(v, 0); + hvm_update_cr(v, 0, value); if ( (value ^ old_value) & X86_CR0_PG ) { if ( !nestedhvm_vmswitch_in_progress(v) && nestedhvm_vcpu_in_guestmode(v) ) @@ -1997,8 +2003,7 @@ int hvm_set_cr4(unsigned long value) goto gpf; } - v->arch.hvm_vcpu.guest_cr[4] = value; - hvm_update_guest_cr(v, 4); + hvm_update_cr(v, 4, value); hvm_memory_event_cr4(value, old_cr); /* diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 8d923e7..4ab15bc 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -1171,6 +1171,11 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) vmx_update_cpu_exec_control(v); } + if ( !nestedhvm_vcpu_in_guestmode(v) ) + __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); + else + nvmx_set_cr_read_shadow(v, 0); + if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) { if ( v != current ) @@ -1219,7 +1224,6 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) v->arch.hvm_vcpu.hw_cr[0] v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask; __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]); - __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); /* Changing CR0 can change some bits in real CR4. */ vmx_update_guest_cr(v, 4); @@ -1244,6 +1248,12 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK; if ( paging_mode_hap(v->domain) ) v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE; + + if ( !nestedhvm_vcpu_in_guestmode(v) ) + __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]); + else + nvmx_set_cr_read_shadow(v, 4); + v->arch.hvm_vcpu.hw_cr[4] |= v->arch.hvm_vcpu.guest_cr[4]; if ( v->arch.hvm_vmx.vmx_realmode ) v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_VME; @@ -1263,7 +1273,6 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_SMEP; } __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]); - __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]); break; default: BUG(); diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c index 248e975..4df127d 100644 --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -1046,6 +1046,8 @@ static void load_shadow_guest_state(struct vcpu *v) vvmcs_to_shadow_bulk(v, ARRAY_SIZE(vmcs_gstate_field), vmcs_gstate_field); + nvcpu->guest_cr[0] = __get_vvmcs(vvmcs, CR0_READ_SHADOW); + nvcpu->guest_cr[4] = __get_vvmcs(vvmcs, CR4_READ_SHADOW); hvm_set_cr0(__get_vvmcs(vvmcs, GUEST_CR0)); hvm_set_cr4(__get_vvmcs(vvmcs, GUEST_CR4)); hvm_set_cr3(__get_vvmcs(vvmcs, GUEST_CR3)); @@ -2454,3 +2456,45 @@ int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs, return ( nvcpu->nv_vmexit_pending == 1 ); } +void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr) +{ + unsigned long cr_field, read_shadow_field, mask_field; + + switch ( cr ) + { + case 0: + cr_field = GUEST_CR0; + read_shadow_field = CR0_READ_SHADOW; + mask_field = CR0_GUEST_HOST_MASK; + break; + case 4: + cr_field = GUEST_CR4; + read_shadow_field = CR4_READ_SHADOW; + mask_field = CR4_GUEST_HOST_MASK; + break; + default: + gdprintk(XENLOG_WARNING, "Set read shadow for CR%d.\n", cr); + return; + } + + if ( !nestedhvm_vmswitch_in_progress(v) ) + { + unsigned long virtual_cr_mask = + __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, mask_field); + + /* + * We get here when L2 changed cr in a way that did not change + * any of L1''s shadowed bits (see nvmx_n2_vmexit_handler), + * but did change L0 shadowed bits. So we first calculate the + * effective cr value that L1 would like to write into the + * hardware. It consists of the L2-owned bits from the new + * value combined with the L1-owned bits from L1''s guest cr. + */ + v->arch.hvm_vcpu.guest_cr[cr] &= ~virtual_cr_mask; + v->arch.hvm_vcpu.guest_cr[cr] |= virtual_cr_mask & + __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, cr_field); + } + + /* nvcpu.guest_cr is what L2 write to cr actually. */ + __vmwrite(read_shadow_field, v->arch.hvm_vcpu.nvcpu.guest_cr[cr]); +} diff --git a/xen/include/asm-x86/hvm/nestedhvm.h b/xen/include/asm-x86/hvm/nestedhvm.h index 649c511..d8124cf 100644 --- a/xen/include/asm-x86/hvm/nestedhvm.h +++ b/xen/include/asm-x86/hvm/nestedhvm.h @@ -68,4 +68,12 @@ void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m); bool_t nestedhvm_is_n2(struct vcpu *v); +static inline void nestedhvm_set_cr(struct vcpu *v, unsigned int cr, + unsigned long value) +{ + if ( !nestedhvm_vmswitch_in_progress(v) && + nestedhvm_vcpu_in_guestmode(v) ) + v->arch.hvm_vcpu.nvcpu.guest_cr[cr] = value; +} + #endif /* _HVM_NESTEDHVM_H */ diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h index 4f45060..c1abcec 100644 --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -110,6 +110,9 @@ struct nestedvcpu { */ bool_t nv_ioport80; bool_t nv_ioportED; + + /* L2''s control-resgister, just as the L2 sees them. */ + unsigned long guest_cr[5]; }; #define vcpu_nestedhvm(v) ((v)->arch.hvm_vcpu.nvcpu) diff --git a/xen/include/asm-x86/hvm/vmx/vvmx.h b/xen/include/asm-x86/hvm/vmx/vvmx.h index 848be75..c17a440 100644 --- a/xen/include/asm-x86/hvm/vmx/vvmx.h +++ b/xen/include/asm-x86/hvm/vmx/vvmx.h @@ -223,6 +223,7 @@ void nvmx_idtv_handling(void); u64 nvmx_get_tsc_offset(struct vcpu *v); int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs, unsigned int exit_reason); +void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr); uint64_t nept_get_ept_vpid_cap(void); -- 1.7.1
On 03.12.13 08:54, Yang Zhang wrote:> From: Yang Zhang <yang.z.zhang@Intel.com> > > This patch fixs two issues: > 1. The CR_READ_SHADOW should only cover the value that L2 wirtes to > CR when L2 is running. But currently, L0 wirtes wrong value to > it during virtual vmentry and L2''s CR access emualtion. > > 2. L2 changed cr[0/4] in a way that did not change any of L1''s shadowed > bits, but did change L0 shadowed bits. In this case, the effective cr[0/4] > value that L1 would like to write into the hardware is consist of > the L2-owned bits from the new value combined with the L1-owned bits > from L1''s guest cr[0/4]. > > Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> > --- > xen/arch/x86/hvm/hvm.c | 13 +++++++--- > xen/arch/x86/hvm/vmx/vmx.c | 13 ++++++++- > xen/arch/x86/hvm/vmx/vvmx.c | 44 +++++++++++++++++++++++++++++++++++ > xen/include/asm-x86/hvm/nestedhvm.h | 8 ++++++ > xen/include/asm-x86/hvm/vcpu.h | 3 ++ > xen/include/asm-x86/hvm/vmx/vvmx.h | 1 + > 6 files changed, 76 insertions(+), 6 deletions(-) > > diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c > index e2ba9de..b1f8dfe 100644 > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -1810,6 +1810,13 @@ void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value) > } > } > > +static void hvm_update_cr(struct vcpu *v, unsigned int cr, unsigned long value) > +{ > + v->arch.hvm_vcpu.guest_cr[cr] = value; > + nestedhvm_set_cr(v, cr, value); > + hvm_update_guest_cr(v, cr); > +} > + > int hvm_set_cr0(unsigned long value) > { > struct vcpu *v = current; > @@ -1915,8 +1922,7 @@ int hvm_set_cr0(unsigned long value) > hvm_funcs.handle_cd ) > hvm_funcs.handle_cd(v, value); > > - v->arch.hvm_vcpu.guest_cr[0] = value; > - hvm_update_guest_cr(v, 0); > + hvm_update_cr(v, 0, value); > > if ( (value ^ old_value) & X86_CR0_PG ) { > if ( !nestedhvm_vmswitch_in_progress(v) && nestedhvm_vcpu_in_guestmode(v) ) > @@ -1997,8 +2003,7 @@ int hvm_set_cr4(unsigned long value) > goto gpf; > } > > - v->arch.hvm_vcpu.guest_cr[4] = value; > - hvm_update_guest_cr(v, 4); > + hvm_update_cr(v, 4, value); > hvm_memory_event_cr4(value, old_cr); > > /* > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c > index 8d923e7..4ab15bc 100644 > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -1171,6 +1171,11 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) > vmx_update_cpu_exec_control(v); > } > > + if ( !nestedhvm_vcpu_in_guestmode(v) ) > + __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); > + else > + nvmx_set_cr_read_shadow(v, 0); > + > if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) > { > if ( v != current ) > @@ -1219,7 +1224,6 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) > v->arch.hvm_vcpu.hw_cr[0] > v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask; > __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]); > - __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); > > /* Changing CR0 can change some bits in real CR4. */ > vmx_update_guest_cr(v, 4); > @@ -1244,6 +1248,12 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) > v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK; > if ( paging_mode_hap(v->domain) ) > v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE; > + > + if ( !nestedhvm_vcpu_in_guestmode(v) ) > + __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]); > + else > + nvmx_set_cr_read_shadow(v, 4); > + > v->arch.hvm_vcpu.hw_cr[4] |= v->arch.hvm_vcpu.guest_cr[4]; > if ( v->arch.hvm_vmx.vmx_realmode ) > v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_VME; > @@ -1263,7 +1273,6 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) > v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_SMEP; > } > __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]); > - __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]); > break; > default: > BUG(); > diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c > index 248e975..4df127d 100644 > --- a/xen/arch/x86/hvm/vmx/vvmx.c > +++ b/xen/arch/x86/hvm/vmx/vvmx.c > @@ -1046,6 +1046,8 @@ static void load_shadow_guest_state(struct vcpu *v) > vvmcs_to_shadow_bulk(v, ARRAY_SIZE(vmcs_gstate_field), > vmcs_gstate_field); > > + nvcpu->guest_cr[0] = __get_vvmcs(vvmcs, CR0_READ_SHADOW); > + nvcpu->guest_cr[4] = __get_vvmcs(vvmcs, CR4_READ_SHADOW); > hvm_set_cr0(__get_vvmcs(vvmcs, GUEST_CR0)); > hvm_set_cr4(__get_vvmcs(vvmcs, GUEST_CR4)); > hvm_set_cr3(__get_vvmcs(vvmcs, GUEST_CR3)); > @@ -2454,3 +2456,45 @@ int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs, > return ( nvcpu->nv_vmexit_pending == 1 ); > } > > +void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr) > +{ > + unsigned long cr_field, read_shadow_field, mask_field; > + > + switch ( cr ) > + { > + case 0: > + cr_field = GUEST_CR0; > + read_shadow_field = CR0_READ_SHADOW; > + mask_field = CR0_GUEST_HOST_MASK; > + break; > + case 4: > + cr_field = GUEST_CR4; > + read_shadow_field = CR4_READ_SHADOW; > + mask_field = CR4_GUEST_HOST_MASK; > + break; > + default: > + gdprintk(XENLOG_WARNING, "Set read shadow for CR%d.\n", cr); > + return; > + } > + > + if ( !nestedhvm_vmswitch_in_progress(v) ) > + { > + unsigned long virtual_cr_mask = > + __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, mask_field); > + > + /* > + * We get here when L2 changed cr in a way that did not change > + * any of L1''s shadowed bits (see nvmx_n2_vmexit_handler), > + * but did change L0 shadowed bits. So we first calculate the > + * effective cr value that L1 would like to write into the > + * hardware. It consists of the L2-owned bits from the new > + * value combined with the L1-owned bits from L1''s guest cr. > + */ > + v->arch.hvm_vcpu.guest_cr[cr] &= ~virtual_cr_mask; > + v->arch.hvm_vcpu.guest_cr[cr] |= virtual_cr_mask & > + __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, cr_field); > + } > + > + /* nvcpu.guest_cr is what L2 write to cr actually. */ > + __vmwrite(read_shadow_field, v->arch.hvm_vcpu.nvcpu.guest_cr[cr]); > +} > diff --git a/xen/include/asm-x86/hvm/nestedhvm.h b/xen/include/asm-x86/hvm/nestedhvm.h > index 649c511..d8124cf 100644 > --- a/xen/include/asm-x86/hvm/nestedhvm.h > +++ b/xen/include/asm-x86/hvm/nestedhvm.h > @@ -68,4 +68,12 @@ void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m); > > bool_t nestedhvm_is_n2(struct vcpu *v); > > +static inline void nestedhvm_set_cr(struct vcpu *v, unsigned int cr, > + unsigned long value) > +{ > + if ( !nestedhvm_vmswitch_in_progress(v) && > + nestedhvm_vcpu_in_guestmode(v) ) > + v->arch.hvm_vcpu.nvcpu.guest_cr[cr] = value; > +} > + > #endif /* _HVM_NESTEDHVM_H */ > diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h > index 4f45060..c1abcec 100644 > --- a/xen/include/asm-x86/hvm/vcpu.h > +++ b/xen/include/asm-x86/hvm/vcpu.h > @@ -110,6 +110,9 @@ struct nestedvcpu { > */ > bool_t nv_ioport80; > bool_t nv_ioportED; > + > + /* L2''s control-resgister, just as the L2 sees them. */ > + unsigned long guest_cr[5]; > };I have a question to everyone thinking the nv_ prefix is pointless: Did you ever read BSD code? Christoph> #define vcpu_nestedhvm(v) ((v)->arch.hvm_vcpu.nvcpu) > diff --git a/xen/include/asm-x86/hvm/vmx/vvmx.h b/xen/include/asm-x86/hvm/vmx/vvmx.h > index 848be75..c17a440 100644 > --- a/xen/include/asm-x86/hvm/vmx/vvmx.h > +++ b/xen/include/asm-x86/hvm/vmx/vvmx.h > @@ -223,6 +223,7 @@ void nvmx_idtv_handling(void); > u64 nvmx_get_tsc_offset(struct vcpu *v); > int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs, > unsigned int exit_reason); > +void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr); > > uint64_t nept_get_ept_vpid_cap(void); > >
>>> On 03.12.13 at 11:09, "Egger, Christoph" <chegger@amazon.de> wrote: > On 03.12.13 08:54, Yang Zhang wrote: >> --- a/xen/include/asm-x86/hvm/vcpu.h >> +++ b/xen/include/asm-x86/hvm/vcpu.h >> @@ -110,6 +110,9 @@ struct nestedvcpu { >> */ >> bool_t nv_ioport80; >> bool_t nv_ioportED; >> + >> + /* L2''s control-resgister, just as the L2 sees them. */ >> + unsigned long guest_cr[5]; >> }; > > I have a question to everyone thinking the nv_ prefix is pointless: > Did you ever read BSD code?No, I didn''t. Using such prefixes is a relic of the K&R days. Jan
>>> On 03.12.13 at 08:54, Yang Zhang <yang.z.zhang@intel.com> wrote: > From: Yang Zhang <yang.z.zhang@Intel.com> > > This patch fixs two issues: > 1. The CR_READ_SHADOW should only cover the value that L2 wirtes to > CR when L2 is running. But currently, L0 wirtes wrong value to > it during virtual vmentry and L2''s CR access emualtion. > > 2. L2 changed cr[0/4] in a way that did not change any of L1''s shadowed > bits, but did change L0 shadowed bits. In this case, the effective cr[0/4] > value that L1 would like to write into the hardware is consist of > the L2-owned bits from the new value combined with the L1-owned bits > from L1''s guest cr[0/4]. > > Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> > ---It''s been a while since v2 was sent, so please tell us what has changed since then and why. That''s make reviewing quite a bit easier. Jan
Jan Beulich wrote on 2013-12-03:>>>> On 03.12.13 at 08:54, Yang Zhang <yang.z.zhang@intel.com> wrote: >> From: Yang Zhang <yang.z.zhang@Intel.com> >> >> This patch fixs two issues: >> 1. The CR_READ_SHADOW should only cover the value that L2 wirtes to >> CR when L2 is running. But currently, L0 wirtes wrong value to it >> during virtual vmentry and L2''s CR access emualtion. >> >> 2. L2 changed cr[0/4] in a way that did not change any of L1''s >> shadowed bits, but did change L0 shadowed bits. In this case, the >> effective cr[0/4] value that L1 would like to write into the >> hardware is consist of the L2-owned bits from the new value combined >> with the L1-owned bits from L1''s guest cr[0/4]. >> >> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> >> --- > > It''s been a while since v2 was sent, so please tell us what hasYes, about one month ago. :)> changed since then and why. That''s make reviewing quite a bit easier.Main change in V3 is that adding a new wrapper function nvmx_set_cr_read_shadow() for CR[0/4] read shadow setting.> > JanBest regards, Yang
Zhang, Yang Z wrote on 2013-12-03:> From: Yang Zhang <yang.z.zhang@Intel.com> > > This patch fixs two issues: > 1. The CR_READ_SHADOW should only cover the value that L2 wirtes to > CR when L2 is running. But currently, L0 wirtes wrong value to > it during virtual vmentry and L2''s CR access emualtion. > > 2. L2 changed cr[0/4] in a way that did not change any of L1''s shadowed > bits, but did change L0 shadowed bits. In this case, the effective cr[0/4] > value that L1 would like to write into the hardware is consist of > the L2-owned bits from the new value combined with the L1-owned bits > from L1''s guest cr[0/4]. > > Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>Hi Eddie, Could you help to review this patch again? You already gave the ack for the first version, but there are some changes in the third version. The main change is using a wrapper function to set CR_READ_SHADOW. Thanks.> --- > xen/arch/x86/hvm/hvm.c | 13 +++++++--- > xen/arch/x86/hvm/vmx/vmx.c | 13 ++++++++- > xen/arch/x86/hvm/vmx/vvmx.c | 44 > +++++++++++++++++++++++++++++++++++ xen/include/asm-x86/hvm/nestedhvm.h > | 8 ++++++ xen/include/asm-x86/hvm/vcpu.h | 3 ++ > xen/include/asm-x86/hvm/vmx/vvmx.h | 1 + 6 files changed, 76 > insertions(+), 6 deletions(-) > diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c > index e2ba9de..b1f8dfe 100644 > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -1810,6 +1810,13 @@ void hvm_shadow_handle_cd(struct vcpu *v, > unsigned long value) > } > } > +static void hvm_update_cr(struct vcpu *v, unsigned int cr, unsigned > long value) +{ + v->arch.hvm_vcpu.guest_cr[cr] = value; + > nestedhvm_set_cr(v, cr, value); + hvm_update_guest_cr(v, cr); +} + > int hvm_set_cr0(unsigned long value) > { > struct vcpu *v = current; > @@ -1915,8 +1922,7 @@ int hvm_set_cr0(unsigned long value) > hvm_funcs.handle_cd ) > hvm_funcs.handle_cd(v, value); > - v->arch.hvm_vcpu.guest_cr[0] = value; > - hvm_update_guest_cr(v, 0); > + hvm_update_cr(v, 0, value); > > if ( (value ^ old_value) & X86_CR0_PG ) { > if ( !nestedhvm_vmswitch_in_progress(v) && > nestedhvm_vcpu_in_guestmode(v) ) > @@ -1997,8 +2003,7 @@ int hvm_set_cr4(unsigned long value) > goto gpf; > } > - v->arch.hvm_vcpu.guest_cr[4] = value; > - hvm_update_guest_cr(v, 4); > + hvm_update_cr(v, 4, value); > hvm_memory_event_cr4(value, old_cr); > > /* > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c > index 8d923e7..4ab15bc 100644 > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -1171,6 +1171,11 @@ static void vmx_update_guest_cr(struct vcpu *v, > unsigned int cr) > vmx_update_cpu_exec_control(v); > } > + if ( !nestedhvm_vcpu_in_guestmode(v) ) + > __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); + else > + nvmx_set_cr_read_shadow(v, 0); + > if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) > { > if ( v != current ) > @@ -1219,7 +1224,6 @@ static void vmx_update_guest_cr(struct vcpu *v, > unsigned int cr) > v->arch.hvm_vcpu.hw_cr[0] > v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask; > __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]); > - __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); > > /* Changing CR0 can change some bits in real CR4. */ > vmx_update_guest_cr(v, 4); > @@ -1244,6 +1248,12 @@ static void vmx_update_guest_cr(struct vcpu *v, > unsigned int cr) > v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK; > if ( paging_mode_hap(v->domain) ) > v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE; > + + if ( !nestedhvm_vcpu_in_guestmode(v) ) + > __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]); + else > + nvmx_set_cr_read_shadow(v, 4); + > v->arch.hvm_vcpu.hw_cr[4] |= v->arch.hvm_vcpu.guest_cr[4]; > if ( v->arch.hvm_vmx.vmx_realmode ) > v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_VME; > @@ -1263,7 +1273,6 @@ static void vmx_update_guest_cr(struct vcpu *v, > unsigned int cr) > v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_SMEP; > } __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]); - > __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]); > break; default: BUG(); > diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c > index 248e975..4df127d 100644 > --- a/xen/arch/x86/hvm/vmx/vvmx.c > +++ b/xen/arch/x86/hvm/vmx/vvmx.c > @@ -1046,6 +1046,8 @@ static void load_shadow_guest_state(struct vcpu > *v) > vvmcs_to_shadow_bulk(v, ARRAY_SIZE(vmcs_gstate_field), > vmcs_gstate_field); > + nvcpu->guest_cr[0] = __get_vvmcs(vvmcs, CR0_READ_SHADOW); > + nvcpu->guest_cr[4] = __get_vvmcs(vvmcs, CR4_READ_SHADOW); > hvm_set_cr0(__get_vvmcs(vvmcs, GUEST_CR0)); > hvm_set_cr4(__get_vvmcs(vvmcs, GUEST_CR4)); > hvm_set_cr3(__get_vvmcs(vvmcs, GUEST_CR3)); > @@ -2454,3 +2456,45 @@ int nvmx_n2_vmexit_handler(struct cpu_user_regs > *regs, > return ( nvcpu->nv_vmexit_pending == 1 ); > } > +void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr) +{ + > unsigned long cr_field, read_shadow_field, mask_field; + + switch ( > cr ) + { + case 0: + cr_field = GUEST_CR0; + > read_shadow_field = CR0_READ_SHADOW; + mask_field > CR0_GUEST_HOST_MASK; + break; + case 4: + cr_field > GUEST_CR4; + read_shadow_field = CR4_READ_SHADOW; + > mask_field = CR4_GUEST_HOST_MASK; + break; + default: + > gdprintk(XENLOG_WARNING, "Set read shadow for CR%d.\n", cr); + > return; + } + + if ( !nestedhvm_vmswitch_in_progress(v) ) + { + > unsigned long virtual_cr_mask = + > __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, mask_field); + + /* + > * We get here when L2 changed cr in a way that did not change + > * any of L1''s shadowed bits (see nvmx_n2_vmexit_handler), + * > but did change L0 shadowed bits. So we first calculate the + * > effective cr value that L1 would like to write into the + * > hardware. It consists of the L2-owned bits from the new + * > value combined with the L1-owned bits from L1''s guest cr. + */ + > v->arch.hvm_vcpu.guest_cr[cr] &= ~virtual_cr_mask; + > v->arch.hvm_vcpu.guest_cr[cr] |= virtual_cr_mask & + > __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, cr_field); + } + + /* > nvcpu.guest_cr is what L2 write to cr actually. */ + > __vmwrite(read_shadow_field, v->arch.hvm_vcpu.nvcpu.guest_cr[cr]); +} > diff --git a/xen/include/asm-x86/hvm/nestedhvm.h > b/xen/include/asm-x86/hvm/nestedhvm.h index 649c511..d8124cf 100644 --- > a/xen/include/asm-x86/hvm/nestedhvm.h +++ > b/xen/include/asm-x86/hvm/nestedhvm.h @@ -68,4 +68,12 @@ void > nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m); > > bool_t nestedhvm_is_n2(struct vcpu *v); > +static inline void nestedhvm_set_cr(struct vcpu *v, unsigned int cr, > + unsigned long value) > +{ > + if ( !nestedhvm_vmswitch_in_progress(v) && > + nestedhvm_vcpu_in_guestmode(v) ) > + v->arch.hvm_vcpu.nvcpu.guest_cr[cr] = value; > +} > + > #endif /* _HVM_NESTEDHVM_H */ > diff --git a/xen/include/asm-x86/hvm/vcpu.h > b/xen/include/asm-x86/hvm/vcpu.h index 4f45060..c1abcec 100644 --- > a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ > -110,6 +110,9 @@ struct nestedvcpu { > */ > bool_t nv_ioport80; > bool_t nv_ioportED; > + > + /* L2''s control-resgister, just as the L2 sees them. */ > + unsigned long guest_cr[5]; > }; > > #define vcpu_nestedhvm(v) ((v)->arch.hvm_vcpu.nvcpu) > diff --git a/xen/include/asm-x86/hvm/vmx/vvmx.h > b/xen/include/asm-x86/hvm/vmx/vvmx.h index 848be75..c17a440 100644 --- > a/xen/include/asm-x86/hvm/vmx/vvmx.h +++ > b/xen/include/asm-x86/hvm/vmx/vvmx.h @@ -223,6 +223,7 @@ void > nvmx_idtv_handling(void); > u64 nvmx_get_tsc_offset(struct vcpu *v); > int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs, > unsigned int exit_reason); > +void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr); > > uint64_t nept_get_ept_vpid_cap(void);Best regards, Yang
Acked by Eddie Dong <eddie.dong@intel.com> -----Original Message----- From: Zhang, Yang Z Sent: Tuesday, December 03, 2013 3:55 PM To: xen-devel@lists.xenproject.org Cc: JBeulich@suse.com; Dong, Eddie; Zhang, Yang Z Subject: [PATCH v3] Nested VMX: CR emulation fix up From: Yang Zhang <yang.z.zhang@Intel.com> This patch fixs two issues: 1. The CR_READ_SHADOW should only cover the value that L2 wirtes to CR when L2 is running. But currently, L0 wirtes wrong value to it during virtual vmentry and L2''s CR access emualtion. 2. L2 changed cr[0/4] in a way that did not change any of L1''s shadowed bits, but did change L0 shadowed bits. In this case, the effective cr[0/4] value that L1 would like to write into the hardware is consist of the L2-owned bits from the new value combined with the L1-owned bits from L1''s guest cr[0/4]. Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> --- xen/arch/x86/hvm/hvm.c | 13 +++++++--- xen/arch/x86/hvm/vmx/vmx.c | 13 ++++++++- xen/arch/x86/hvm/vmx/vvmx.c | 44 +++++++++++++++++++++++++++++++++++ xen/include/asm-x86/hvm/nestedhvm.h | 8 ++++++ xen/include/asm-x86/hvm/vcpu.h | 3 ++ xen/include/asm-x86/hvm/vmx/vvmx.h | 1 + 6 files changed, 76 insertions(+), 6 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index e2ba9de..b1f8dfe 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1810,6 +1810,13 @@ void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value) } } +static void hvm_update_cr(struct vcpu *v, unsigned int cr, unsigned +long value) { + v->arch.hvm_vcpu.guest_cr[cr] = value; + nestedhvm_set_cr(v, cr, value); + hvm_update_guest_cr(v, cr); +} + int hvm_set_cr0(unsigned long value) { struct vcpu *v = current; @@ -1915,8 +1922,7 @@ int hvm_set_cr0(unsigned long value) hvm_funcs.handle_cd ) hvm_funcs.handle_cd(v, value); - v->arch.hvm_vcpu.guest_cr[0] = value; - hvm_update_guest_cr(v, 0); + hvm_update_cr(v, 0, value); if ( (value ^ old_value) & X86_CR0_PG ) { if ( !nestedhvm_vmswitch_in_progress(v) && nestedhvm_vcpu_in_guestmode(v) ) @@ -1997,8 +2003,7 @@ int hvm_set_cr4(unsigned long value) goto gpf; } - v->arch.hvm_vcpu.guest_cr[4] = value; - hvm_update_guest_cr(v, 4); + hvm_update_cr(v, 4, value); hvm_memory_event_cr4(value, old_cr); /* diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 8d923e7..4ab15bc 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -1171,6 +1171,11 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) vmx_update_cpu_exec_control(v); } + if ( !nestedhvm_vcpu_in_guestmode(v) ) + __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); + else + nvmx_set_cr_read_shadow(v, 0); + if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) { if ( v != current ) @@ -1219,7 +1224,6 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) v->arch.hvm_vcpu.hw_cr[0] v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask; __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]); - __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); /* Changing CR0 can change some bits in real CR4. */ vmx_update_guest_cr(v, 4); @@ -1244,6 +1248,12 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK; if ( paging_mode_hap(v->domain) ) v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE; + + if ( !nestedhvm_vcpu_in_guestmode(v) ) + __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]); + else + nvmx_set_cr_read_shadow(v, 4); + v->arch.hvm_vcpu.hw_cr[4] |= v->arch.hvm_vcpu.guest_cr[4]; if ( v->arch.hvm_vmx.vmx_realmode ) v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_VME; @@ -1263,7 +1273,6 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_SMEP; } __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]); - __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]); break; default: BUG(); diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c index 248e975..4df127d 100644 --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -1046,6 +1046,8 @@ static void load_shadow_guest_state(struct vcpu *v) vvmcs_to_shadow_bulk(v, ARRAY_SIZE(vmcs_gstate_field), vmcs_gstate_field); + nvcpu->guest_cr[0] = __get_vvmcs(vvmcs, CR0_READ_SHADOW); + nvcpu->guest_cr[4] = __get_vvmcs(vvmcs, CR4_READ_SHADOW); hvm_set_cr0(__get_vvmcs(vvmcs, GUEST_CR0)); hvm_set_cr4(__get_vvmcs(vvmcs, GUEST_CR4)); hvm_set_cr3(__get_vvmcs(vvmcs, GUEST_CR3)); @@ -2454,3 +2456,45 @@ int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs, return ( nvcpu->nv_vmexit_pending == 1 ); } +void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr) { + unsigned long cr_field, read_shadow_field, mask_field; + + switch ( cr ) + { + case 0: + cr_field = GUEST_CR0; + read_shadow_field = CR0_READ_SHADOW; + mask_field = CR0_GUEST_HOST_MASK; + break; + case 4: + cr_field = GUEST_CR4; + read_shadow_field = CR4_READ_SHADOW; + mask_field = CR4_GUEST_HOST_MASK; + break; + default: + gdprintk(XENLOG_WARNING, "Set read shadow for CR%d.\n", cr); + return; + } + + if ( !nestedhvm_vmswitch_in_progress(v) ) + { + unsigned long virtual_cr_mask = + __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, mask_field); + + /* + * We get here when L2 changed cr in a way that did not change + * any of L1''s shadowed bits (see nvmx_n2_vmexit_handler), + * but did change L0 shadowed bits. So we first calculate the + * effective cr value that L1 would like to write into the + * hardware. It consists of the L2-owned bits from the new + * value combined with the L1-owned bits from L1''s guest cr. + */ + v->arch.hvm_vcpu.guest_cr[cr] &= ~virtual_cr_mask; + v->arch.hvm_vcpu.guest_cr[cr] |= virtual_cr_mask & + __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, cr_field); + } + + /* nvcpu.guest_cr is what L2 write to cr actually. */ + __vmwrite(read_shadow_field, v->arch.hvm_vcpu.nvcpu.guest_cr[cr]); +} diff --git a/xen/include/asm-x86/hvm/nestedhvm.h b/xen/include/asm-x86/hvm/nestedhvm.h index 649c511..d8124cf 100644 --- a/xen/include/asm-x86/hvm/nestedhvm.h +++ b/xen/include/asm-x86/hvm/nestedhvm.h @@ -68,4 +68,12 @@ void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m); bool_t nestedhvm_is_n2(struct vcpu *v); +static inline void nestedhvm_set_cr(struct vcpu *v, unsigned int cr, + unsigned long value) { + if ( !nestedhvm_vmswitch_in_progress(v) && + nestedhvm_vcpu_in_guestmode(v) ) + v->arch.hvm_vcpu.nvcpu.guest_cr[cr] = value; } + #endif /* _HVM_NESTEDHVM_H */ diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h index 4f45060..c1abcec 100644 --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -110,6 +110,9 @@ struct nestedvcpu { */ bool_t nv_ioport80; bool_t nv_ioportED; + + /* L2''s control-resgister, just as the L2 sees them. */ + unsigned long guest_cr[5]; }; #define vcpu_nestedhvm(v) ((v)->arch.hvm_vcpu.nvcpu) diff --git a/xen/include/asm-x86/hvm/vmx/vvmx.h b/xen/include/asm-x86/hvm/vmx/vvmx.h index 848be75..c17a440 100644 --- a/xen/include/asm-x86/hvm/vmx/vvmx.h +++ b/xen/include/asm-x86/hvm/vmx/vvmx.h @@ -223,6 +223,7 @@ void nvmx_idtv_handling(void); u64 nvmx_get_tsc_offset(struct vcpu *v); int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs, unsigned int exit_reason); +void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr); uint64_t nept_get_ept_vpid_cap(void); -- 1.7.1