From: Yang Zhang <yang.z.zhang@Intel.com> This patch fixs two issues: 1. The CR_READ_SHADOW should only cover the value that L2 wirtes to CR when L2 is running. But currently, L0 wirtes wrong value to it during virtual vmentry and L2''s CR access emualtion. 2. L2 changed cr[0/4] in a way that did not change any of L1''s shadowed bits, but did change L0 shadowed bits. In this case, the effective cr[0/4] value that L1 would like to write into the hardware is consist of the L2-owned bits from the new value combined with the L1-owned bits from L1''s guest cr[0/4]. Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> Acked-by: "Dong, Eddie" <eddie.dong@intel.com> --- xen/arch/x86/hvm/hvm.c | 13 ++++++--- xen/arch/x86/hvm/vmx/vmx.c | 50 +++++++++++++++++++++++++++++++++- xen/arch/x86/hvm/vmx/vvmx.c | 2 + xen/include/asm-x86/hvm/nestedhvm.h | 8 +++++ xen/include/asm-x86/hvm/vcpu.h | 3 ++ 5 files changed, 70 insertions(+), 6 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 5f3a94a..e92bbef 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1703,6 +1703,13 @@ int hvm_mov_from_cr(unsigned int cr, unsigned int gpr) return X86EMUL_UNHANDLEABLE; } +static void hvm_update_cr(struct vcpu *v, unsigned int cr, unsigned long value) +{ + v->arch.hvm_vcpu.guest_cr[cr] = value; + nestedhvm_set_cr(v, cr, value); + hvm_update_guest_cr(v, cr); +} + int hvm_set_cr0(unsigned long value) { struct vcpu *v = current; @@ -1816,8 +1823,7 @@ int hvm_set_cr0(unsigned long value) } } - v->arch.hvm_vcpu.guest_cr[0] = value; - hvm_update_guest_cr(v, 0); + hvm_update_cr(v, 0, value); if ( (value ^ old_value) & X86_CR0_PG ) { if ( !nestedhvm_vmswitch_in_progress(v) && nestedhvm_vcpu_in_guestmode(v) ) @@ -1898,8 +1904,7 @@ int hvm_set_cr4(unsigned long value) goto gpf; } - v->arch.hvm_vcpu.guest_cr[4] = value; - hvm_update_guest_cr(v, 4); + hvm_update_cr(v, 4, value); hvm_memory_event_cr4(value, old_cr); /* diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 9ca8632..ea8d0a1 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -1096,6 +1096,29 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) vmx_update_cpu_exec_control(v); } + if ( !nestedhvm_vcpu_in_guestmode(v) ) + __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); + else if ( !nestedhvm_vmswitch_in_progress(v) ) + { + /* + * We get here when L2 changed cr0 in a way that did not change + * any of L1''s shadowed bits (see nvmx_n2_vmexit_handler), + * but did change L0 shadowed bits. So we first calculate the + * effective cr0 value that L1 would like to write into the + * hardware. It consists of the L2-owned bits from the new + * value combined with the L1-owned bits from L1''s guest cr0. + */ + v->arch.hvm_vcpu.guest_cr[0] &+ ~__get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, CR0_GUEST_HOST_MASK); + v->arch.hvm_vcpu.guest_cr[0] |+ __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, GUEST_CR0) & + __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, CR0_GUEST_HOST_MASK); + + /* nvcpu.guest_cr[0] is what L2 write to cr0 actually. */ + __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.nvcpu.guest_cr[0]); + } else + __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.nvcpu.guest_cr[0]); + if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) { if ( v != current ) @@ -1144,7 +1167,6 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) v->arch.hvm_vcpu.hw_cr[0] v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask; __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]); - __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); /* Changing CR0 can change some bits in real CR4. */ vmx_update_guest_cr(v, 4); @@ -1169,6 +1191,31 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK; if ( paging_mode_hap(v->domain) ) v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE; + + if ( !nestedhvm_vcpu_in_guestmode(v) ) + __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]); + else if ( !nestedhvm_vmswitch_in_progress(v) ) + { + /* + * We get here when L2 changed cr4 in a way that did not change + * any of L1''s shadowed bits (see nvmx_n2_vmexit_handler), + * but did change L0 shadowed bits. So we first calculate the + * effective cr4 value that L1 would like to write into the + * hardware. It consists of the L2-owned bits from the new + * value combined with the L1-owned bits from L1''s guest cr4. + */ + v->arch.hvm_vcpu.guest_cr[4] &+ ~__get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, CR4_GUEST_HOST_MASK); + v->arch.hvm_vcpu.guest_cr[4] |+ __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, GUEST_CR4) & + __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, CR4_GUEST_HOST_MASK); + + /* nvcpu.guest_cr[4] is what L2 write to cr4 actually. */ + __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.nvcpu.guest_cr[4]); + } + else + __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.nvcpu.guest_cr[4]); + v->arch.hvm_vcpu.hw_cr[4] |= v->arch.hvm_vcpu.guest_cr[4]; if ( v->arch.hvm_vmx.vmx_realmode ) v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_VME; @@ -1188,7 +1235,6 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_SMEP; } __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]); - __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]); break; default: BUG(); diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c index 2b2de77..6b2d51e 100644 --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -1042,6 +1042,8 @@ static void load_shadow_guest_state(struct vcpu *v) vvmcs_to_shadow_bulk(v, ARRAY_SIZE(vmcs_gstate_field), vmcs_gstate_field); + nvcpu->guest_cr[0] = __get_vvmcs(vvmcs, CR0_READ_SHADOW); + nvcpu->guest_cr[4] = __get_vvmcs(vvmcs, CR4_READ_SHADOW); hvm_set_cr0(__get_vvmcs(vvmcs, GUEST_CR0)); hvm_set_cr4(__get_vvmcs(vvmcs, GUEST_CR4)); hvm_set_cr3(__get_vvmcs(vvmcs, GUEST_CR3)); diff --git a/xen/include/asm-x86/hvm/nestedhvm.h b/xen/include/asm-x86/hvm/nestedhvm.h index 649c511..d8124cf 100644 --- a/xen/include/asm-x86/hvm/nestedhvm.h +++ b/xen/include/asm-x86/hvm/nestedhvm.h @@ -68,4 +68,12 @@ void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m); bool_t nestedhvm_is_n2(struct vcpu *v); +static inline void nestedhvm_set_cr(struct vcpu *v, unsigned int cr, + unsigned long value) +{ + if ( !nestedhvm_vmswitch_in_progress(v) && + nestedhvm_vcpu_in_guestmode(v) ) + v->arch.hvm_vcpu.nvcpu.guest_cr[cr] = value; +} + #endif /* _HVM_NESTEDHVM_H */ diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h index 8fa5452..c1aebc4 100644 --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -108,6 +108,9 @@ struct nestedvcpu { */ bool_t nv_ioport80; bool_t nv_ioportED; + + /* L2''s control-resgister, just as the L2 sees them. */ + unsigned long guest_cr[5]; }; #define vcpu_nestedhvm(v) ((v)->arch.hvm_vcpu.nvcpu) -- 1.7.1
>>> Yang Zhang <yang.z.zhang@intel.com> 10/23/13 9:20 AM >>> >From: Yang Zhang <yang.z.zhang@Intel.com> > >This patch fixs two issues: >1. The CR_READ_SHADOW should only cover the value that L2 wirtes to >CR when L2 is running. But currently, L0 wirtes wrong value to >it during virtual vmentry and L2''s CR access emualtion. > >2. L2 changed cr[0/4] in a way that did not change any of L1''s shadowed >bits, but did change L0 shadowed bits. In this case, the effective cr[0/4] >value that L1 would like to write into the hardware is consist of >the L2-owned bits from the new value combined with the L1-owned bits >from L1''s guest cr[0/4]. > >Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> >Acked-by: "Dong, Eddie" <eddie.dong@intel.com>I don''t think this can be counted as ack - neither do I recall having seen this ack on the list, nor did you Cc Eddie (I specifically did not alter the Cc list here), i.e. he would also have no way to communicate his objection to this. Please be transparent here (and I think I told you to Cc maintainers on your patches enough times by now). Jan
Jan Beulich wrote on 2013-10-24:>>>> Yang Zhang <yang.z.zhang@intel.com> 10/23/13 9:20 AM >>> >> From: Yang Zhang <yang.z.zhang@Intel.com> >> >> This patch fixs two issues: >> 1. The CR_READ_SHADOW should only cover the value that L2 wirtes to >> CR when L2 is running. But currently, L0 wirtes wrong value to it >> during virtual vmentry and L2''s CR access emualtion. >> >> 2. L2 changed cr[0/4] in a way that did not change any of L1''s >> shadowed bits, but did change L0 shadowed bits. In this case, the >> effective cr[0/4] value that L1 would like to write into the hardware >> is consist of the L2-owned bits from the new value combined with the >> L1-owned bits from L1''s guest cr[0/4]. >> >> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> >> Acked-by: "Dong, Eddie" <eddie.dong@intel.com> > > I don''t think this can be counted as ack - neither do I recall having > seen this ack on the list, nor did you Cc Eddie (I specifically did not alter the Cc list here), i.e. > he would also have no way to communicate his objection to this.He already acked the first version. Please have a double check or check the attached mail (in case of his mail only send to our internal mailbox).> Please be transparent here (and I think I told you to Cc maintainers > on your patches enough times by now).Sorry. It''s my wrong git configuration. This time I used another machine to send the patch. And it obviously didn''t auto-cc to all body signatures which did in my previous machine.> > Jan > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-develBest regards, Yang _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
>>> "Zhang, Yang Z" <yang.z.zhang@intel.com> 10/24/13 3:14 AM >>> >Jan Beulich wrote on 2013-10-24: >>>>> Yang Zhang <yang.z.zhang@intel.com> 10/23/13 9:20 AM >>> >>> Acked-by: "Dong, Eddie" <eddie.dong@intel.com> >> >> I don''t think this can be counted as ack - neither do I recall having >> seen this ack on the list, nor did you Cc Eddie (I specifically did not alter the Cc list here), i.e. >> he would also have no way to communicate his objection to this. > >He already acked the first version. Please have a double check or check the attached mail (in case of his mail only send to our internal mailbox).But you see, this is again a process problem of yours: You sent a v2 without information on what changed from v1. And I hope you don''t expect me to go back and compare the two versions. Hence I have to assume there were substantial changes that made it not worth pointing them out individually, and I think it''s clear to you that substantial changes always invalidate any earlier acks/reviews (the same would imo go for eventual bugs fixed that the person having given the ack/ review overlooked). Jan
>>> On 23.10.13 at 09:13, Yang Zhang <yang.z.zhang@intel.com> wrote: > From: Yang Zhang <yang.z.zhang@Intel.com> > > This patch fixs two issues: > 1. The CR_READ_SHADOW should only cover the value that L2 wirtes to > CR when L2 is running. But currently, L0 wirtes wrong value to > it during virtual vmentry and L2''s CR access emualtion. > > 2. L2 changed cr[0/4] in a way that did not change any of L1''s shadowed > bits, but did change L0 shadowed bits. In this case, the effective cr[0/4] > value that L1 would like to write into the hardware is consist of > the L2-owned bits from the new value combined with the L1-owned bits > from L1''s guest cr[0/4]. > > Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> > Acked-by: "Dong, Eddie" <eddie.dong@intel.com>So apart from wanting to see a proper ack on this patch (or a description of the changes in v2 that makes clear that a previous ack did not get invalidated) ...> --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -1096,6 +1096,29 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) > vmx_update_cpu_exec_control(v); > } > > + if ( !nestedhvm_vcpu_in_guestmode(v) ) > + __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); > + else if ( !nestedhvm_vmswitch_in_progress(v) ) > + { > + /* > + * We get here when L2 changed cr0 in a way that did not change > + * any of L1''s shadowed bits (see nvmx_n2_vmexit_handler), > + * but did change L0 shadowed bits. So we first calculate the > + * effective cr0 value that L1 would like to write into the > + * hardware. It consists of the L2-owned bits from the new > + * value combined with the L1-owned bits from L1''s guest cr0. > + */ > + v->arch.hvm_vcpu.guest_cr[0] &> + ~__get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, CR0_GUEST_HOST_MASK); > + v->arch.hvm_vcpu.guest_cr[0] |> + __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, GUEST_CR0) & > + __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, CR0_GUEST_HOST_MASK);... I''d also like to see clarified whether __get_vvmcs() really is cheap enough to get invoked twice for the same register here instead of the result latched into a local variable and re-used.> + > + /* nvcpu.guest_cr[0] is what L2 write to cr0 actually. */ > + __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.nvcpu.guest_cr[0]); > + } elseAnd should the above result in the patch to be rev''ed again, please also correct the coding style violation here. Jan
Jan Beulich wrote on 2013-10-28:>>>> On 23.10.13 at 09:13, Yang Zhang <yang.z.zhang@intel.com> wrote: >> From: Yang Zhang <yang.z.zhang@Intel.com> >> >> This patch fixs two issues: >> 1. The CR_READ_SHADOW should only cover the value that L2 wirtes to >> CR when L2 is running. But currently, L0 wirtes wrong value to it >> during virtual vmentry and L2''s CR access emualtion. >> >> 2. L2 changed cr[0/4] in a way that did not change any of L1''s >> shadowed bits, but did change L0 shadowed bits. In this case, the >> effective cr[0/4] value that L1 would like to write into the >> hardware is consist of the L2-owned bits from the new value combined >> with the L1-owned bits from L1''s guest cr[0/4]. >> >> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> >> Acked-by: "Dong, Eddie" <eddie.dong@intel.com> > > So apart from wanting to see a proper ack on this patch (or a > description of the changes in v2 that makes clear that a previous ack did not get invalidated) ... > >> --- a/xen/arch/x86/hvm/vmx/vmx.c >> +++ b/xen/arch/x86/hvm/vmx/vmx.c >> @@ -1096,6 +1096,29 @@ static void vmx_update_guest_cr(struct vcpu >> *v, > unsigned int cr) >> vmx_update_cpu_exec_control(v); >> } >> + if ( !nestedhvm_vcpu_in_guestmode(v) ) + >> __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); + else >> if ( !nestedhvm_vmswitch_in_progress(v) ) + { + /* + >> * We get here when L2 changed cr0 in a way that did + not >> change + * any of L1''s shadowed bits (see >> nvmx_n2_vmexit_handler), + * but did change L0 shadowed >> bits. So we first calculate the + * effective cr0 value >> that L1 would like to write into the + * hardware. It >> consists of the L2-owned bits from the new + * value >> combined with the L1-owned bits from L1''s + guest cr0. + */ >> + v->arch.hvm_vcpu.guest_cr[0] &= + >> ~__get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, CR0_GUEST_HOST_MASK); + >> v->arch.hvm_vcpu.guest_cr[0] |= + >> __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, GUEST_CR0) & + >> __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, + CR0_GUEST_HOST_MASK); > > ... I''d also like to see clarified whether __get_vvmcs() really is > cheap enough to get invoked twice for the same register here instead > of the result latched into a local variable and re-used.Actually, this code is executed rarely. But you are right, using a local variable is a better choice.> >> + + /* nvcpu.guest_cr[0] is what L2 write to cr0 actually. >> */ + __vmwrite(CR0_READ_SHADOW, >> v->arch.hvm_vcpu.nvcpu.guest_cr[0]); + } else > > And should the above result in the patch to be rev''ed again, please > also correct the coding style violation here.Current code seems a little redundant. I revised it to put the redundant code into nvmx_set_cr_read_shadow(). Please review it. void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr) { unsigned long cr_field, read_shadow_field, mask_field; switch ( cr ) { case 0: cr_field = GUEST_CR0; read_shadow_field = CR0_READ_SHADOW; mask_field = CR0_GUEST_HOST_MASK; break; case 4: cr_field = GUEST_CR4; read_shadow_field = CR4_READ_SHADOW; mask_field = CR4_GUEST_HOST_MASK; break; default: gdprintk(XENLOG_WARNING, "Set read shadow for CR%d.\n", cr); return; } if ( !nestedhvm_vmswitch_in_progress(v) ) { unsigned long virtual_cr_mask = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, mask_field); /* * We get here when L2 changed cr in a way that did not change * any of L1''s shadowed bits (see nvmx_n2_vmexit_handler), * but did change L0 shadowed bits. So we first calculate the * effective cr value that L1 would like to write into the * hardware. It consists of the L2-owned bits from the new * value combined with the L1-owned bits from L1''s guest cr. */ v->arch.hvm_vcpu.guest_cr[cr] &= ~virtual_cr_mask; v->arch.hvm_vcpu.guest_cr[cr] |= virtual_cr_mask & __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, cr_field); } /* nvcpu.guest_cr is what L2 write to cr actually. */ __vmwrite(read_shadow_field, v->arch.hvm_vcpu.nvcpu.guest_cr[cr]); }> > JanBest regards, Yang
>>> On 29.10.13 at 06:10, "Zhang, Yang Z" <yang.z.zhang@intel.com> wrote: > Current code seems a little redundant. I revised it to put the redundant > code into nvmx_set_cr_read_shadow(). Please review it. > > void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr) > { > unsigned long cr_field, read_shadow_field, mask_field; > > switch ( cr ) > { > case 0: > cr_field = GUEST_CR0; > read_shadow_field = CR0_READ_SHADOW; > mask_field = CR0_GUEST_HOST_MASK; > break; > case 4: > cr_field = GUEST_CR4; > read_shadow_field = CR4_READ_SHADOW; > mask_field = CR4_GUEST_HOST_MASK; > break; > default: > gdprintk(XENLOG_WARNING, "Set read shadow for CR%d.\n", cr); > return; > } > > if ( !nestedhvm_vmswitch_in_progress(v) ) > { > unsigned long virtual_cr_mask = > __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, mask_field); > > /* > * We get here when L2 changed cr in a way that did not change > * any of L1''s shadowed bits (see nvmx_n2_vmexit_handler), > * but did change L0 shadowed bits. So we first calculate the > * effective cr value that L1 would like to write into the > * hardware. It consists of the L2-owned bits from the new > * value combined with the L1-owned bits from L1''s guest cr. > */ > v->arch.hvm_vcpu.guest_cr[cr] &= ~virtual_cr_mask; > v->arch.hvm_vcpu.guest_cr[cr] |= virtual_cr_mask & > __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, cr_field); > } > > /* nvcpu.guest_cr is what L2 write to cr actually. */ > __vmwrite(read_shadow_field, v->arch.hvm_vcpu.nvcpu.guest_cr[cr]); > }This looks quite a bit cleaner in any event, and I''m certainly in favor of eliminating unnecessary redundancy. Jan