From 54fe7e722dd4ebba91bde16a1860f49a1cce4e5e Mon Sep 17 00:00:00 2001 From: Liu Jinsong <jinsong.liu@intel.com> Date: Wed, 4 Dec 2013 00:57:23 +0800 Subject: [PATCH v5 4/7] X86: generic MSRs save/restore This patch introduced a generic MSRs save/restore mechanism, so that in the future new MSRs save/restore could be added w/ smaller change than the full blown addition of a new save/restore type. Suggested-by: Jan Beulich <jbeulich@suse.com> Signed-off-by: Liu Jinsong <jinsong.liu@intel.com> --- xen/arch/x86/hvm/hvm.c | 74 ++++++++++++++++++++++++++++++++ xen/arch/x86/hvm/vmx/vmx.c | 17 +++++++ xen/include/asm-x86/hvm/hvm.h | 3 + xen/include/public/arch-x86/hvm/save.h | 18 +++++++- 4 files changed, 111 insertions(+), 1 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 0f7178b..fb46e4b 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -702,6 +702,80 @@ static int hvm_load_tsc_adjust(struct domain *d, hvm_domain_context_t *h) HVM_REGISTER_SAVE_RESTORE(TSC_ADJUST, hvm_save_tsc_adjust, hvm_load_tsc_adjust, 1, HVMSR_PER_VCPU); +/* Temporarily NULL, could be added in the future */ +static void hvm_save_msr_common(struct vcpu *v, struct hvm_msr *ctxt) +{ +} + +/* + * Temporarily NULL, could be added in the future: + * For msr load fail, return error (other than -ENOENT); + * For msr load success, return 0; + * For msr not found, return -ENOENT; + */ +static int hvm_load_msr_common(struct vcpu *v, struct hvm_msr *ctxt) +{ + return -ENOENT; +} + +static int hvm_save_msr(struct domain *d, hvm_domain_context_t *h) +{ + struct vcpu *v; + struct hvm_msr ctxt; + int err = 0; + + for_each_vcpu ( d, v ) + { + memset(&ctxt, 0, sizeof(ctxt)); + + /* For common msrs */ + hvm_save_msr_common(v, &ctxt); + + /* For vmx/svm specific msrs */ + if ( hvm_funcs.save_msr ) + hvm_funcs.save_msr(v, &ctxt); + + err = hvm_save_entry(HVM_MSR, v->vcpu_id, h, &ctxt); + if ( err ) + break; + } + + return err; +} + +static int hvm_load_msr(struct domain *d, hvm_domain_context_t *h) +{ + unsigned int vcpuid = hvm_load_instance(h); + struct vcpu *v; + struct hvm_msr ctxt; + int ret; + + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) + { + dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n", + d->domain_id, vcpuid); + return -EINVAL; + } + + if ( hvm_load_entry(HVM_MSR, h, &ctxt) != 0 ) + return -EINVAL; + + /* For common msrs */ + ret = hvm_load_msr_common(v, &ctxt); + if ( ret == -ENOENT ) + { + /* For vmx/svm specific msrs */ + if ( hvm_funcs.load_msr ) + return hvm_funcs.load_msr(v, &ctxt); + else + return -EINVAL; + } + return ret; +} + +HVM_REGISTER_SAVE_RESTORE(HVM_MSR, hvm_save_msr, + hvm_load_msr, 1, HVMSR_PER_VCPU); + static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h) { struct vcpu *v; diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index f0132a4..bac19f3 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -580,6 +580,21 @@ static int vmx_load_vmcs_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt) return 0; } +/* Temporarily NULL, could be added in the future */ +static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt) +{ +} + +/* + * Temporarily NULL, could be added in the future: + * For msr load fail, or msr not found, return error; + * For msr load success, return 0; + */ +static int vmx_load_msr(struct vcpu *v, struct hvm_msr *ctxt) +{ + return 0; +} + static void vmx_fpu_enter(struct vcpu *v) { vcpu_restore_fpu_lazy(v); @@ -1606,6 +1621,8 @@ static struct hvm_function_table __initdata vmx_function_table = { .vcpu_destroy = vmx_vcpu_destroy, .save_cpu_ctxt = vmx_save_vmcs_ctxt, .load_cpu_ctxt = vmx_load_vmcs_ctxt, + .save_msr = vmx_save_msr, + .load_msr = vmx_load_msr, .get_interrupt_shadow = vmx_get_interrupt_shadow, .set_interrupt_shadow = vmx_set_interrupt_shadow, .guest_x86_mode = vmx_guest_x86_mode, diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h index a8ba06d..1c09d41 100644 --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -109,6 +109,9 @@ struct hvm_function_table { void (*save_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt); int (*load_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt); + void (*save_msr)(struct vcpu *v, struct hvm_msr *ctxt); + int (*load_msr)(struct vcpu *v, struct hvm_msr *ctxt); + /* Examine specifics of the guest state. */ unsigned int (*get_interrupt_shadow)(struct vcpu *v); void (*set_interrupt_shadow)(struct vcpu *v, unsigned int intr_shadow); diff --git a/xen/include/public/arch-x86/hvm/save.h b/xen/include/public/arch-x86/hvm/save.h index 3664aaf..e440eb5 100644 --- a/xen/include/public/arch-x86/hvm/save.h +++ b/xen/include/public/arch-x86/hvm/save.h @@ -592,9 +592,25 @@ struct hvm_tsc_adjust { DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust); +enum { + HVM_MSR_COUNT, +}; + +struct msr_save_load { + uint32_t index; + uint64_t val; +}; + +struct hvm_msr { + uint32_t count; + struct msr_save_load msr[HVM_MSR_COUNT]; +}; + +DECLARE_HVM_SAVE_TYPE(HVM_MSR, 20, struct hvm_msr); + /* * Largest type-code in use */ -#define HVM_SAVE_CODE_MAX 19 +#define HVM_SAVE_CODE_MAX 20 #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ -- 1.7.1 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
>>> On 03.12.13 at 15:50, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Subject: [PATCH v5 4/7] X86: generic MSRs save/restore > > This patch introduced a generic MSRs save/restore mechanism, so that > in the future new MSRs save/restore could be added w/ smaller change > than the full blown addition of a new save/restore type. > > Suggested-by: Jan Beulich <jbeulich@suse.com>Attached is (obviously compile tested only) what I really suggested. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
x86: generic MSRs save/restore This patch introduces a generic MSRs save/restore mechanism, so that in the future new MSRs save/restore could be added w/ smaller change than the full blown addition of a new save/restore type. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1127,10 +1127,110 @@ static int hvm_load_cpu_xsave_states(str return 0; } -/* We need variable length data chunk for xsave area, hence customized - * declaration other than HVM_REGISTER_SAVE_RESTORE. +#define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt]) +static unsigned int __read_mostly msr_count_max; + +static int hvm_save_cpu_msrs(struct domain *d, hvm_domain_context_t *h) +{ + struct vcpu *v; + + if ( !msr_count_max ) + return 0; ASSERT( msr_count_mas ); ? + + for_each_vcpu ( d, v ) + { + struct hvm_msr *ctxt; + unsigned int i; + + if ( _hvm_init_entry(h, CPU_MSR_CODE, v->vcpu_id, + HVM_CPU_MSR_SIZE(msr_count_max)) ) + return 1; + ctxt = (struct hvm_msr *)&h->data[h->cur]; + ctxt->count = 0; + + if ( hvm_funcs.save_msr ) + hvm_funcs.save_msr(v, ctxt); + + for ( i = 0; i < ctxt->count; ++i ) + ctxt->msr[i]._rsvd = 0; What _rsvd for? seems not used at the patches. + + if ( ctxt->count ) + h->cur += HVM_CPU_MSR_SIZE(ctxt->count); + else + h->cur -= sizeof(struct hvm_save_descriptor); + } + + return 0; +} + +static int hvm_load_cpu_msrs(struct domain *d, hvm_domain_context_t *h) +{ + unsigned int i, vcpuid = hvm_load_instance(h); + struct vcpu *v; + const struct hvm_save_descriptor *desc; + const struct hvm_msr *ctxt; + int err = 0; + + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) + { + dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n", + d->domain_id, vcpuid); + return -EINVAL; + } + + /* Customized checking for entry since our entry is of variable length */ + desc = (struct hvm_save_descriptor *)&h->data[h->cur]; + if ( sizeof (*desc) > h->size - h->cur) + { + printk(XENLOG_G_WARNING + "HVM%d.%d restore: not enough data left to read MSR descriptor\n", + d->domain_id, vcpuid); + return -ENODATA; + } + if ( desc->length + sizeof (*desc) > h->size - h->cur) + { + printk(XENLOG_G_WARNING + "HVM%d.%d restore: not enough data left to read %u MSR bytes\n", + d->domain_id, vcpuid, desc->length); + return -ENODATA; + } + if ( desc->length < HVM_CPU_MSR_SIZE(1) ) + { + printk(XENLOG_G_WARNING + "HVM%d.%d restore mismatch: MSR length %u < %zu\n", + d->domain_id, vcpuid, desc->length, HVM_CPU_MSR_SIZE(1)); + return -EINVAL; + } + + h->cur += sizeof(*desc); + ctxt = (struct hvm_msr *)&h->data[h->cur]; + h->cur += desc->length; + + if ( desc->length != HVM_CPU_MSR_SIZE(ctxt->count) ) + { + printk(XENLOG_G_WARNING + "HVM%d.%d restore mismatch: MSR length %u != %zu\n", + d->domain_id, vcpuid, desc->length, + HVM_CPU_MSR_SIZE(ctxt->count)); + return -EOPNOTSUPP; + } + /* Checking finished */ + + for ( i = 0; i < ctxt->count; ++i ) + { + if ( hvm_funcs.load_msr ) + err = hvm_funcs.load_msr(v, &ctxt->msr[i]); + if ( err ) + break; + } Is it for vmx/svm specific msrs, or, will extend to generic msrs? If these patches are generic save/load mechanism supporting generic msrs and vmx/sve specific msrs, it need update here. + + return err; +} + +/* We need variable length data chunks for XSAVE area and MSRs, hence + * a custom declaration rather than HVM_REGISTER_SAVE_RESTORE. */ -static int __init __hvm_register_CPU_XSAVE_save_and_restore(void) +static int __init hvm_register_CPU_save_and_restore(void) { hvm_register_savevm(CPU_XSAVE_CODE, "CPU_XSAVE", @@ -1139,9 +1239,22 @@ static int __init __hvm_register_CPU_XSA HVM_CPU_XSAVE_SIZE(xfeature_mask) + sizeof(struct hvm_save_descriptor), HVMSR_PER_VCPU); + + if ( hvm_funcs.init_msr ) + msr_count_max += hvm_funcs.init_msr(); + + if ( msr_count_max ) + hvm_register_savevm(CPU_MSR_CODE, + "CPU_MSR", + hvm_save_cpu_msrs, + hvm_load_cpu_msrs, + HVM_CPU_MSR_SIZE(msr_count_max) + + sizeof(struct hvm_save_descriptor), + HVMSR_PER_VCPU); + return 0; } -__initcall(__hvm_register_CPU_XSAVE_save_and_restore); +__initcall(hvm_register_CPU_save_and_restore); int hvm_vcpu_initialise(struct vcpu *v) { --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -109,6 +109,10 @@ struct hvm_function_table { void (*save_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt); int (*load_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt); + unsigned int (*init_msr)(void); + void (*save_msr)(struct vcpu *, struct hvm_msr *); + int (*load_msr)(struct vcpu *, const struct hvm_one_msr *); + /* Examine specifics of the guest state. */ unsigned int (*get_interrupt_shadow)(struct vcpu *v); void (*set_interrupt_shadow)(struct vcpu *v, unsigned int intr_shadow); --- a/xen/include/public/arch-x86/hvm/save.h +++ b/xen/include/public/arch-x86/hvm/save.h @@ -592,9 +592,21 @@ struct hvm_tsc_adjust { DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust); + +struct hvm_msr { + uint32_t count; + struct hvm_one_msr { + uint32_t index; + uint32_t _rsvd; + uint64_t val; + } msr[1 /* variable size */]; +}; + +#define CPU_MSR_CODE 16 + It''s incorrect, conflict with CPU_XSAVE_CODE and panic when hvm_register_savevm(CPU_MSR_CODE, ...) Why not 20? /* * Largest type-code in use */ -#define HVM_SAVE_CODE_MAX 19 +#define HVM_SAVE_CODE_MAX 20 #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */
x86: MSR_IA32_BNDCFGS save/restore Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -580,6 +580,50 @@ static int vmx_load_vmcs_ctxt(struct vcp return 0; } +static unsigned int __init vmx_init_msr(void) +{ + return !!cpu_has_mpx; +} + +static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt) +{ + vmx_vmcs_enter(v); + + if ( cpu_has_mpx ) + { + __vmread(GUEST_BNDCFGS, &ctxt->msr[ctxt->count].val); + if ( ctxt->msr[ctxt->count].val ) Isn''t it better to remove if()? + ctxt->msr[ctxt->count++].index = MSR_IA32_BNDCFGS; + } + + vmx_vmcs_exit(v); +} + +static int vmx_load_msr(struct vcpu *v, const struct hvm_one_msr *msr) +{ + int err = 0; + + vmx_vmcs_enter(v); + + switch ( msr->index ) + { + case MSR_IA32_BNDCFGS: + if ( cpu_has_mpx ) + __vmwrite(GUEST_BNDCFGS, msr->val); + else + err = -ENXIO; + break; + + default: + err = -ENOENT; + break; + } + + vmx_vmcs_exit(v); + + return err; +} + static void vmx_fpu_enter(struct vcpu *v) { vcpu_restore_fpu_lazy(v); @@ -1606,6 +1650,9 @@ static struct hvm_function_table __initd .vcpu_destroy = vmx_vcpu_destroy, .save_cpu_ctxt = vmx_save_vmcs_ctxt, .load_cpu_ctxt = vmx_load_vmcs_ctxt, + .init_msr = vmx_init_msr, + .save_msr = vmx_save_msr, + .load_msr = vmx_load_msr, .get_interrupt_shadow = vmx_get_interrupt_shadow, .set_interrupt_shadow = vmx_set_interrupt_shadow, .guest_x86_mode = vmx_guest_x86_mode, --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -367,6 +367,8 @@ enum vmcs_field { GUEST_PDPTR2_HIGH = 0x0000280f, GUEST_PDPTR3 = 0x00002810, GUEST_PDPTR3_HIGH = 0x00002811, + GUEST_BNDCFGS = 0x00002812, + GUEST_BNDCFGS_HIGH = 0x00002813, HOST_PAT = 0x00002c00, HOST_PAT_HIGH = 0x00002c01, HOST_EFER = 0x00002c02,
>>> On 13.12.13 at 08:50, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:Please get your reply quoting fixed.> --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -1127,10 +1127,110 @@ static int hvm_load_cpu_xsave_states(str > return 0; > } > > -/* We need variable length data chunk for xsave area, hence customized > - * declaration other than HVM_REGISTER_SAVE_RESTORE. > +#define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt]) > +static unsigned int __read_mostly msr_count_max; > + > +static int hvm_save_cpu_msrs(struct domain *d, hvm_domain_context_t *h) > +{ > + struct vcpu *v; > + > + if ( !msr_count_max ) > + return 0; > > ASSERT( msr_count_mas ); ?Right. Left from before the registration was made conditional.> + > + for_each_vcpu ( d, v ) > + { > + struct hvm_msr *ctxt; > + unsigned int i; > + > + if ( _hvm_init_entry(h, CPU_MSR_CODE, v->vcpu_id, > + HVM_CPU_MSR_SIZE(msr_count_max)) ) > + return 1; > + ctxt = (struct hvm_msr *)&h->data[h->cur]; > + ctxt->count = 0; > + > + if ( hvm_funcs.save_msr ) > + hvm_funcs.save_msr(v, ctxt); > + > + for ( i = 0; i < ctxt->count; ++i ) > + ctxt->msr[i]._rsvd = 0; > > What _rsvd for? seems not used at the patches.See XSA-77 and http://lists.xenproject.org/archives/html/xen-devel/2013-12/msg01522.html - avoid leaking hypervisor data.> +static int hvm_load_cpu_msrs(struct domain *d, hvm_domain_context_t *h) > +{ > + unsigned int i, vcpuid = hvm_load_instance(h); > + struct vcpu *v; > + const struct hvm_save_descriptor *desc; > + const struct hvm_msr *ctxt; > + int err = 0; > + > + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) > + { > + dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n", > + d->domain_id, vcpuid); > + return -EINVAL; > + } > + > + /* Customized checking for entry since our entry is of variable length */ > + desc = (struct hvm_save_descriptor *)&h->data[h->cur]; > + if ( sizeof (*desc) > h->size - h->cur) > + { > + printk(XENLOG_G_WARNING > + "HVM%d.%d restore: not enough data left to read MSR descriptor\n", > + d->domain_id, vcpuid); > + return -ENODATA; > + } > + if ( desc->length + sizeof (*desc) > h->size - h->cur) > + { > + printk(XENLOG_G_WARNING > + "HVM%d.%d restore: not enough data left to read %u MSR bytes\n", > + d->domain_id, vcpuid, desc->length); > + return -ENODATA; > + } > + if ( desc->length < HVM_CPU_MSR_SIZE(1) ) > + { > + printk(XENLOG_G_WARNING > + "HVM%d.%d restore mismatch: MSR length %u < %zu\n", > + d->domain_id, vcpuid, desc->length, HVM_CPU_MSR_SIZE(1)); > + return -EINVAL; > + } > + > + h->cur += sizeof(*desc); > + ctxt = (struct hvm_msr *)&h->data[h->cur]; > + h->cur += desc->length; > + > + if ( desc->length != HVM_CPU_MSR_SIZE(ctxt->count) ) > + { > + printk(XENLOG_G_WARNING > + "HVM%d.%d restore mismatch: MSR length %u != %zu\n", > + d->domain_id, vcpuid, desc->length, > + HVM_CPU_MSR_SIZE(ctxt->count)); > + return -EOPNOTSUPP; > + } > + /* Checking finished */ > + > + for ( i = 0; i < ctxt->count; ++i ) > + { > + if ( hvm_funcs.load_msr ) > + err = hvm_funcs.load_msr(v, &ctxt->msr[i]); > + if ( err ) > + break; > + } > > Is it for vmx/svm specific msrs, or, will extend to generic msrs? > If these patches are generic save/load mechanism supporting generic msrs and > vmx/sve specific msrs, it need update here.In which way? But I''ll re-write this anyway, so that we don''t need to handle MSRs one by one (leveraging the _rsvd field).> --- a/xen/include/public/arch-x86/hvm/save.h > +++ b/xen/include/public/arch-x86/hvm/save.h > @@ -592,9 +592,21 @@ struct hvm_tsc_adjust { > > DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust); > > + > +struct hvm_msr { > + uint32_t count; > + struct hvm_one_msr { > + uint32_t index; > + uint32_t _rsvd; > + uint64_t val; > + } msr[1 /* variable size */]; > +}; > + > +#define CPU_MSR_CODE 16 > + > > It''s incorrect, conflict with CPU_XSAVE_CODE and panic when > hvm_register_savevm(CPU_MSR_CODE, ...) > Why not 20?Copy-n-paste mistake. Thanks for spotting. Jan
>>> On 13.12.13 at 08:57, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -580,6 +580,50 @@ static int vmx_load_vmcs_ctxt(struct vcp > return 0; > } > > +static unsigned int __init vmx_init_msr(void) > +{ > + return !!cpu_has_mpx; > +} > + > +static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt) > +{ > + vmx_vmcs_enter(v); > + > + if ( cpu_has_mpx ) > + { > + __vmread(GUEST_BNDCFGS, &ctxt->msr[ctxt->count].val); > + if ( ctxt->msr[ctxt->count].val ) > > > Isn''t it better to remove if()?Definitely not: That way, if the hardware support MPX but the guest never used it, restoring the guest on an MPX-incapable host will be possible. And when the MSR is zero, restoring on an MPX-capable host will be correct too, as the respective VMCS field starts out zeroed. Jan
On 13/12/2013 09:47, Jan Beulich wrote:>>>> On 13.12.13 at 08:57, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> --- a/xen/arch/x86/hvm/vmx/vmx.c >> +++ b/xen/arch/x86/hvm/vmx/vmx.c >> @@ -580,6 +580,50 @@ static int vmx_load_vmcs_ctxt(struct vcp >> return 0; >> } >> >> +static unsigned int __init vmx_init_msr(void) >> +{ >> + return !!cpu_has_mpx; >> +} >> + >> +static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt) >> +{ >> + vmx_vmcs_enter(v); >> + >> + if ( cpu_has_mpx ) >> + { >> + __vmread(GUEST_BNDCFGS, &ctxt->msr[ctxt->count].val); >> + if ( ctxt->msr[ctxt->count].val ) >> >> >> Isn''t it better to remove if()? > Definitely not: That way, if the hardware support MPX but the > guest never used it, restoring the guest on an MPX-incapable > host will be possible. And when the MSR is zero, restoring on an > MPX-capable host will be correct too, as the respective VMCS > field starts out zeroed. > > Jan >Furthermore, this looks as if is heading straight for an ABI breakage. What guarantee is that that the MSRs shall aways be saved and restored in this specific order forever more in the future? I think ctxt->count needs to be replaced with an ABI constant. ~Andrew
>>> On 13.12.13 at 13:04, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > On 13/12/2013 09:47, Jan Beulich wrote: >>>>> On 13.12.13 at 08:57, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >>> --- a/xen/arch/x86/hvm/vmx/vmx.c >>> +++ b/xen/arch/x86/hvm/vmx/vmx.c >>> @@ -580,6 +580,50 @@ static int vmx_load_vmcs_ctxt(struct vcp >>> return 0; >>> } >>> >>> +static unsigned int __init vmx_init_msr(void) >>> +{ >>> + return !!cpu_has_mpx; >>> +} >>> + >>> +static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt) >>> +{ >>> + vmx_vmcs_enter(v); >>> + >>> + if ( cpu_has_mpx ) >>> + { >>> + __vmread(GUEST_BNDCFGS, &ctxt->msr[ctxt->count].val); >>> + if ( ctxt->msr[ctxt->count].val ) >>> >>> >>> Isn''t it better to remove if()? >> Definitely not: That way, if the hardware support MPX but the >> guest never used it, restoring the guest on an MPX-incapable >> host will be possible. And when the MSR is zero, restoring on an >> MPX-capable host will be correct too, as the respective VMCS >> field starts out zeroed. > > Furthermore, this looks as if is heading straight for an ABI breakage. > > What guarantee is that that the MSRs shall aways be saved and restored > in this specific order forever more in the future? > > I think ctxt->count needs to be replaced with an ABI constant.??? Jan
This patch introduces a generic MSRs save/restore mechanism, so that in the future new MSRs save/restore could be added w/ smaller change than the full blown addition of a new save/restore type. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1127,10 +1127,117 @@ static int hvm_load_cpu_xsave_states(str return 0; } -/* We need variable length data chunk for xsave area, hence customized - * declaration other than HVM_REGISTER_SAVE_RESTORE. +#define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt]) +static unsigned int __read_mostly msr_count_max; + +static int hvm_save_cpu_msrs(struct domain *d, hvm_domain_context_t *h) +{ + struct vcpu *v; + + for_each_vcpu ( d, v ) + { + struct hvm_msr *ctxt; + unsigned int i; + + if ( _hvm_init_entry(h, CPU_MSR_CODE, v->vcpu_id, + HVM_CPU_MSR_SIZE(msr_count_max)) ) + return 1; + ctxt = (struct hvm_msr *)&h->data[h->cur]; + ctxt->count = 0; + + if ( hvm_funcs.save_msr ) + hvm_funcs.save_msr(v, ctxt); + + for ( i = 0; i < ctxt->count; ++i ) + ctxt->msr[i]._rsvd = 0; + + if ( ctxt->count ) + h->cur += HVM_CPU_MSR_SIZE(ctxt->count); + else + h->cur -= sizeof(struct hvm_save_descriptor); + } + + return 0; +} + +static int hvm_load_cpu_msrs(struct domain *d, hvm_domain_context_t *h) +{ + unsigned int i, vcpuid = hvm_load_instance(h); + struct vcpu *v; + const struct hvm_save_descriptor *desc; + struct hvm_msr *ctxt; + int err = 0; + + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) + { + dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n", + d->domain_id, vcpuid); + return -EINVAL; + } + + /* Customized checking for entry since our entry is of variable length */ + desc = (struct hvm_save_descriptor *)&h->data[h->cur]; + if ( sizeof (*desc) > h->size - h->cur) + { + printk(XENLOG_G_WARNING + "HVM%d.%d restore: not enough data left to read MSR descriptor\n", + d->domain_id, vcpuid); + return -ENODATA; + } + if ( desc->length + sizeof (*desc) > h->size - h->cur) + { + printk(XENLOG_G_WARNING + "HVM%d.%d restore: not enough data left to read %u MSR bytes\n", + d->domain_id, vcpuid, desc->length); + return -ENODATA; + } + if ( desc->length < HVM_CPU_MSR_SIZE(1) ) + { + printk(XENLOG_G_WARNING + "HVM%d.%d restore mismatch: MSR length %u < %zu\n", + d->domain_id, vcpuid, desc->length, HVM_CPU_MSR_SIZE(1)); + return -EINVAL; + } + + h->cur += sizeof(*desc); + ctxt = (struct hvm_msr *)&h->data[h->cur]; + h->cur += desc->length; + + if ( desc->length != HVM_CPU_MSR_SIZE(ctxt->count) ) + { + printk(XENLOG_G_WARNING + "HVM%d.%d restore mismatch: MSR length %u != %zu\n", + d->domain_id, vcpuid, desc->length, + HVM_CPU_MSR_SIZE(ctxt->count)); + return -EOPNOTSUPP; + } + + for ( i = 0; i < ctxt->count; ++i ) + if ( ctxt->msr[i]._rsvd ) + return -EOPNOTSUPP; + /* Checking finished */ + + if ( hvm_funcs.load_msr ) + err = hvm_funcs.load_msr(v, ctxt); + + for ( i = 0; !err && i < ctxt->count; ++i ) + { + switch ( ctxt->msr[i].index ) + { + default: + if ( !ctxt->msr[i]._rsvd ) + err = -ENXIO; + break; + } + } + + return err; +} + +/* We need variable length data chunks for XSAVE area and MSRs, hence + * a custom declaration rather than HVM_REGISTER_SAVE_RESTORE. */ -static int __init __hvm_register_CPU_XSAVE_save_and_restore(void) +static int __init hvm_register_CPU_save_and_restore(void) { hvm_register_savevm(CPU_XSAVE_CODE, "CPU_XSAVE", @@ -1139,9 +1246,22 @@ static int __init __hvm_register_CPU_XSA HVM_CPU_XSAVE_SIZE(xfeature_mask) + sizeof(struct hvm_save_descriptor), HVMSR_PER_VCPU); + + if ( hvm_funcs.init_msr ) + msr_count_max += hvm_funcs.init_msr(); + + if ( msr_count_max ) + hvm_register_savevm(CPU_MSR_CODE, + "CPU_MSR", + hvm_save_cpu_msrs, + hvm_load_cpu_msrs, + HVM_CPU_MSR_SIZE(msr_count_max) + + sizeof(struct hvm_save_descriptor), + HVMSR_PER_VCPU); + return 0; } -__initcall(__hvm_register_CPU_XSAVE_save_and_restore); +__initcall(hvm_register_CPU_save_and_restore); int hvm_vcpu_initialise(struct vcpu *v) { --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -109,6 +109,10 @@ struct hvm_function_table { void (*save_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt); int (*load_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt); + unsigned int (*init_msr)(void); + void (*save_msr)(struct vcpu *, struct hvm_msr *); + int (*load_msr)(struct vcpu *, struct hvm_msr *); + /* Examine specifics of the guest state. */ unsigned int (*get_interrupt_shadow)(struct vcpu *v); void (*set_interrupt_shadow)(struct vcpu *v, unsigned int intr_shadow); --- a/xen/include/public/arch-x86/hvm/save.h +++ b/xen/include/public/arch-x86/hvm/save.h @@ -592,9 +592,21 @@ struct hvm_tsc_adjust { DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust); + +struct hvm_msr { + uint32_t count; + struct hvm_one_msr { + uint32_t index; + uint32_t _rsvd; + uint64_t val; + } msr[1 /* variable size */]; +}; + +#define CPU_MSR_CODE 20 + /* * Largest type-code in use */ -#define HVM_SAVE_CODE_MAX 19 +#define HVM_SAVE_CODE_MAX 20 #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -580,6 +580,55 @@ static int vmx_load_vmcs_ctxt(struct vcp return 0; } +static unsigned int __init vmx_init_msr(void) +{ + return !!cpu_has_mpx; +} + +static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt) +{ + vmx_vmcs_enter(v); + + if ( cpu_has_mpx ) + { + __vmread(GUEST_BNDCFGS, &ctxt->msr[ctxt->count].val); + if ( ctxt->msr[ctxt->count].val ) + ctxt->msr[ctxt->count++].index = MSR_IA32_BNDCFGS; + } + + vmx_vmcs_exit(v); +} + +static int vmx_load_msr(struct vcpu *v, struct hvm_msr *ctxt) +{ + unsigned int i; + int err = 0; + + vmx_vmcs_enter(v); + + for ( i = 0; i < ctxt->count; ++i ) + { + switch ( ctxt->msr[i].index ) + { + case MSR_IA32_BNDCFGS: + if ( cpu_has_mpx ) + __vmwrite(GUEST_BNDCFGS, ctxt->msr[i].val); + else + err = -ENXIO; + break; + default: + continue; + } + if ( err ) + break; + ctxt->msr[i]._rsvd = 1; + } + + vmx_vmcs_exit(v); + + return err; +} + static void vmx_fpu_enter(struct vcpu *v) { vcpu_restore_fpu_lazy(v); @@ -1602,6 +1651,9 @@ static struct hvm_function_table __initd .vcpu_destroy = vmx_vcpu_destroy, .save_cpu_ctxt = vmx_save_vmcs_ctxt, .load_cpu_ctxt = vmx_load_vmcs_ctxt, + .init_msr = vmx_init_msr, + .save_msr = vmx_save_msr, + .load_msr = vmx_load_msr, .get_interrupt_shadow = vmx_get_interrupt_shadow, .set_interrupt_shadow = vmx_set_interrupt_shadow, .guest_x86_mode = vmx_guest_x86_mode, --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -367,6 +367,8 @@ enum vmcs_field { GUEST_PDPTR2_HIGH = 0x0000280f, GUEST_PDPTR3 = 0x00002810, GUEST_PDPTR3_HIGH = 0x00002811, + GUEST_BNDCFGS = 0x00002812, + GUEST_BNDCFGS_HIGH = 0x00002813, HOST_PAT = 0x00002c00, HOST_PAT_HIGH = 0x00002c01, HOST_EFER = 0x00002c02, _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
On 13/12/2013 14:01, Jan Beulich wrote:> This patch introduces a generic MSRs save/restore mechanism, so that > in the future new MSRs save/restore could be added w/ smaller change > than the full blown addition of a new save/restore type. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -1127,10 +1127,117 @@ static int hvm_load_cpu_xsave_states(str > return 0; > } > > -/* We need variable length data chunk for xsave area, hence customized > - * declaration other than HVM_REGISTER_SAVE_RESTORE. > +#define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt]) > +static unsigned int __read_mostly msr_count_max; > + > +static int hvm_save_cpu_msrs(struct domain *d, hvm_domain_context_t *h) > +{ > + struct vcpu *v; > + > + for_each_vcpu ( d, v ) > + { > + struct hvm_msr *ctxt; > + unsigned int i; > + > + if ( _hvm_init_entry(h, CPU_MSR_CODE, v->vcpu_id, > + HVM_CPU_MSR_SIZE(msr_count_max)) ) > + return 1; > + ctxt = (struct hvm_msr *)&h->data[h->cur]; > + ctxt->count = 0; > + > + if ( hvm_funcs.save_msr ) > + hvm_funcs.save_msr(v, ctxt); > + > + for ( i = 0; i < ctxt->count; ++i ) > + ctxt->msr[i]._rsvd = 0; > + > + if ( ctxt->count ) > + h->cur += HVM_CPU_MSR_SIZE(ctxt->count); > + else > + h->cur -= sizeof(struct hvm_save_descriptor);On the last iteration of the loop, this will leave a stale CPU_MSR_CODE header in the area between the end of the hvm record and the maximum possible size of the record, which then gets copied into the toolstack- provided buffer. Luckily, it does appear that xc_domain_save() does deal with this correctly and only send the valid subset of the entire record. I presume we dont care about breaking any other toolstacks which might get this wrong?> + } > + > + return 0; > +} > + > +static int hvm_load_cpu_msrs(struct domain *d, hvm_domain_context_t *h) > +{ > + unsigned int i, vcpuid = hvm_load_instance(h); > + struct vcpu *v; > + const struct hvm_save_descriptor *desc; > + struct hvm_msr *ctxt; > + int err = 0; > + > + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) > + { > + dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n", > + d->domain_id, vcpuid); > + return -EINVAL; > + } > + > + /* Customized checking for entry since our entry is of variable length */ > + desc = (struct hvm_save_descriptor *)&h->data[h->cur]; > + if ( sizeof (*desc) > h->size - h->cur) > + { > + printk(XENLOG_G_WARNING > + "HVM%d.%d restore: not enough data left to read MSR descriptor\n", > + d->domain_id, vcpuid); > + return -ENODATA; > + } > + if ( desc->length + sizeof (*desc) > h->size - h->cur) > + { > + printk(XENLOG_G_WARNING > + "HVM%d.%d restore: not enough data left to read %u MSR bytes\n", > + d->domain_id, vcpuid, desc->length); > + return -ENODATA; > + } > + if ( desc->length < HVM_CPU_MSR_SIZE(1) ) > + { > + printk(XENLOG_G_WARNING > + "HVM%d.%d restore mismatch: MSR length %u < %zu\n", > + d->domain_id, vcpuid, desc->length, HVM_CPU_MSR_SIZE(1)); > + return -EINVAL; > + } > + > + h->cur += sizeof(*desc); > + ctxt = (struct hvm_msr *)&h->data[h->cur]; > + h->cur += desc->length; > + > + if ( desc->length != HVM_CPU_MSR_SIZE(ctxt->count) ) > + { > + printk(XENLOG_G_WARNING > + "HVM%d.%d restore mismatch: MSR length %u != %zu\n", > + d->domain_id, vcpuid, desc->length, > + HVM_CPU_MSR_SIZE(ctxt->count)); > + return -EOPNOTSUPP; > + } > + > + for ( i = 0; i < ctxt->count; ++i ) > + if ( ctxt->msr[i]._rsvd ) > + return -EOPNOTSUPP; > + /* Checking finished */ > + > + if ( hvm_funcs.load_msr ) > + err = hvm_funcs.load_msr(v, ctxt); > + > + for ( i = 0; !err && i < ctxt->count; ++i ) > + { > + switch ( ctxt->msr[i].index ) > + { > + default: > + if ( !ctxt->msr[i]._rsvd ) > + err = -ENXIO; > + break; > + } > + } > + > + return err; > +} > + > +/* We need variable length data chunks for XSAVE area and MSRs, hence > + * a custom declaration rather than HVM_REGISTER_SAVE_RESTORE. > */ > -static int __init __hvm_register_CPU_XSAVE_save_and_restore(void) > +static int __init hvm_register_CPU_save_and_restore(void) > { > hvm_register_savevm(CPU_XSAVE_CODE, > "CPU_XSAVE", > @@ -1139,9 +1246,22 @@ static int __init __hvm_register_CPU_XSA > HVM_CPU_XSAVE_SIZE(xfeature_mask) + > sizeof(struct hvm_save_descriptor), > HVMSR_PER_VCPU); > + > + if ( hvm_funcs.init_msr ) > + msr_count_max += hvm_funcs.init_msr();Why += as opposed to direct assignment? Changing this value anywhere other than here looks as if it will lead to problems.> + > + if ( msr_count_max ) > + hvm_register_savevm(CPU_MSR_CODE, > + "CPU_MSR", > + hvm_save_cpu_msrs, > + hvm_load_cpu_msrs, > + HVM_CPU_MSR_SIZE(msr_count_max) + > + sizeof(struct hvm_save_descriptor), > + HVMSR_PER_VCPU); > + > return 0; > } > -__initcall(__hvm_register_CPU_XSAVE_save_and_restore); > +__initcall(hvm_register_CPU_save_and_restore); > > int hvm_vcpu_initialise(struct vcpu *v) > { > --- a/xen/include/asm-x86/hvm/hvm.h > +++ b/xen/include/asm-x86/hvm/hvm.h > @@ -109,6 +109,10 @@ struct hvm_function_table { > void (*save_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt); > int (*load_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt); > > + unsigned int (*init_msr)(void);Possibly a comment to explain that this must return the number of MSRs we expect to insert into an hvm msr record?> + void (*save_msr)(struct vcpu *, struct hvm_msr *); > + int (*load_msr)(struct vcpu *, struct hvm_msr *); > + > /* Examine specifics of the guest state. */ > unsigned int (*get_interrupt_shadow)(struct vcpu *v); > void (*set_interrupt_shadow)(struct vcpu *v, unsigned int intr_shadow); > --- a/xen/include/public/arch-x86/hvm/save.h > +++ b/xen/include/public/arch-x86/hvm/save.h > @@ -592,9 +592,21 @@ struct hvm_tsc_adjust { > > DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust); > > + > +struct hvm_msr { > + uint32_t count; > + struct hvm_one_msr { > + uint32_t index; > + uint32_t _rsvd; > + uint64_t val; > + } msr[1 /* variable size */];Coverity is going to complain about using this with more than 1 record, but I can''t think of a better way of doing this without introducing a VLA to the public header files. As far as I can tell, the underlying implementation is safe to index off the end of msr[]. ~Andrew> +}; > + > +#define CPU_MSR_CODE 20 > + > /* > * Largest type-code in use > */ > -#define HVM_SAVE_CODE_MAX 19 > +#define HVM_SAVE_CODE_MAX 20 > > #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ > >
On 13/12/2013 14:02, Jan Beulich wrote:> Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -580,6 +580,55 @@ static int vmx_load_vmcs_ctxt(struct vcp > return 0; > } > > +static unsigned int __init vmx_init_msr(void) > +{ > + return !!cpu_has_mpx; > +} > + > +static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt) > +{ > + vmx_vmcs_enter(v); > + > + if ( cpu_has_mpx ) > + { > + __vmread(GUEST_BNDCFGS, &ctxt->msr[ctxt->count].val); > + if ( ctxt->msr[ctxt->count].val ) > + ctxt->msr[ctxt->count++].index = MSR_IA32_BNDCFGS; > + } > + > + vmx_vmcs_exit(v); > +} > + > +static int vmx_load_msr(struct vcpu *v, struct hvm_msr *ctxt) > +{ > + unsigned int i; > + int err = 0; > + > + vmx_vmcs_enter(v); > + > + for ( i = 0; i < ctxt->count; ++i ) > + { > + switch ( ctxt->msr[i].index ) > + { > + case MSR_IA32_BNDCFGS: > + if ( cpu_has_mpx ) > + __vmwrite(GUEST_BNDCFGS, ctxt->msr[i].val); > + else > + err = -ENXIO; > + break; > + default: > + continue;This will skip setting _rsvd for an MSR we don''t recognise. Doesn''t this interfere with the error checking in the caller? ~Andrew> + } > + if ( err ) > + break; > + ctxt->msr[i]._rsvd = 1; > + } > + > + vmx_vmcs_exit(v); > + > + return err; > +} > + > static void vmx_fpu_enter(struct vcpu *v) > { > vcpu_restore_fpu_lazy(v); > @@ -1602,6 +1651,9 @@ static struct hvm_function_table __initd > .vcpu_destroy = vmx_vcpu_destroy, > .save_cpu_ctxt = vmx_save_vmcs_ctxt, > .load_cpu_ctxt = vmx_load_vmcs_ctxt, > + .init_msr = vmx_init_msr, > + .save_msr = vmx_save_msr, > + .load_msr = vmx_load_msr, > .get_interrupt_shadow = vmx_get_interrupt_shadow, > .set_interrupt_shadow = vmx_set_interrupt_shadow, > .guest_x86_mode = vmx_guest_x86_mode, > --- a/xen/include/asm-x86/hvm/vmx/vmcs.h > +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h > @@ -367,6 +367,8 @@ enum vmcs_field { > GUEST_PDPTR2_HIGH = 0x0000280f, > GUEST_PDPTR3 = 0x00002810, > GUEST_PDPTR3_HIGH = 0x00002811, > + GUEST_BNDCFGS = 0x00002812, > + GUEST_BNDCFGS_HIGH = 0x00002813, > HOST_PAT = 0x00002c00, > HOST_PAT_HIGH = 0x00002c01, > HOST_EFER = 0x00002c02, > > >
Jan Beulich wrote:> This patch introduces a generic MSRs save/restore mechanism, so that > in the future new MSRs save/restore could be added w/ smaller change > than the full blown addition of a new save/restore type. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -1127,10 +1127,117 @@ static int hvm_load_cpu_xsave_states(str > return 0; > } > > -/* We need variable length data chunk for xsave area, hence > customized > - * declaration other than HVM_REGISTER_SAVE_RESTORE. > +#define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt]) > +static unsigned int __read_mostly msr_count_max; > + > +static int hvm_save_cpu_msrs(struct domain *d, hvm_domain_context_t > *h) +{ > + struct vcpu *v; > + > + for_each_vcpu ( d, v ) > + { > + struct hvm_msr *ctxt; > + unsigned int i; > + > + if ( _hvm_init_entry(h, CPU_MSR_CODE, v->vcpu_id, > + HVM_CPU_MSR_SIZE(msr_count_max)) ) > + return 1; > + ctxt = (struct hvm_msr *)&h->data[h->cur]; > + ctxt->count = 0; > + > + if ( hvm_funcs.save_msr ) > + hvm_funcs.save_msr(v, ctxt); > + > + for ( i = 0; i < ctxt->count; ++i ) > + ctxt->msr[i]._rsvd = 0; > + > + if ( ctxt->count ) > + h->cur += HVM_CPU_MSR_SIZE(ctxt->count); > + else > + h->cur -= sizeof(struct hvm_save_descriptor); > + } > + > + return 0; > +} > + > +static int hvm_load_cpu_msrs(struct domain *d, hvm_domain_context_t > *h) +{ > + unsigned int i, vcpuid = hvm_load_instance(h); > + struct vcpu *v; > + const struct hvm_save_descriptor *desc; > + struct hvm_msr *ctxt; > + int err = 0; > + > + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) > + { > + dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n", > + d->domain_id, vcpuid); > + return -EINVAL; > + } > + > + /* Customized checking for entry since our entry is of variable > length */ + desc = (struct hvm_save_descriptor *)&h->data[h->cur]; > + if ( sizeof (*desc) > h->size - h->cur) > + { > + printk(XENLOG_G_WARNING > + "HVM%d.%d restore: not enough data left to read MSR > descriptor\n", + d->domain_id, vcpuid); > + return -ENODATA; > + } > + if ( desc->length + sizeof (*desc) > h->size - h->cur) > + { > + printk(XENLOG_G_WARNING > + "HVM%d.%d restore: not enough data left to read %u > MSR bytes\n", + d->domain_id, vcpuid, desc->length); > + return -ENODATA; > + } > + if ( desc->length < HVM_CPU_MSR_SIZE(1) ) > + { > + printk(XENLOG_G_WARNING > + "HVM%d.%d restore mismatch: MSR length %u < %zu\n", > + d->domain_id, vcpuid, desc->length, > HVM_CPU_MSR_SIZE(1)); + return -EINVAL; > + } > + > + h->cur += sizeof(*desc); > + ctxt = (struct hvm_msr *)&h->data[h->cur]; > + h->cur += desc->length; > + > + if ( desc->length != HVM_CPU_MSR_SIZE(ctxt->count) ) > + { > + printk(XENLOG_G_WARNING > + "HVM%d.%d restore mismatch: MSR length %u != %zu\n", > + d->domain_id, vcpuid, desc->length, > + HVM_CPU_MSR_SIZE(ctxt->count)); > + return -EOPNOTSUPP; > + } > + > + for ( i = 0; i < ctxt->count; ++i ) > + if ( ctxt->msr[i]._rsvd ) > + return -EOPNOTSUPP; > + /* Checking finished */ > + > + if ( hvm_funcs.load_msr ) > + err = hvm_funcs.load_msr(v, ctxt); > + > + for ( i = 0; !err && i < ctxt->count; ++i ) > + { > + switch ( ctxt->msr[i].index ) > + { > + default: > + if ( !ctxt->msr[i]._rsvd ) > + err = -ENXIO; > + break; > + } > + } > + > + return err; > +} > + > +/* We need variable length data chunks for XSAVE area and MSRs, hence > + * a custom declaration rather than HVM_REGISTER_SAVE_RESTORE. > */ > -static int __init __hvm_register_CPU_XSAVE_save_and_restore(void) > +static int __init hvm_register_CPU_save_and_restore(void) > { > hvm_register_savevm(CPU_XSAVE_CODE, > "CPU_XSAVE", > @@ -1139,9 +1246,22 @@ static int __init __hvm_register_CPU_XSA > HVM_CPU_XSAVE_SIZE(xfeature_mask) + > sizeof(struct hvm_save_descriptor), > HVMSR_PER_VCPU); > + > + if ( hvm_funcs.init_msr ) > + msr_count_max += hvm_funcs.init_msr(); > + > + if ( msr_count_max ) > + hvm_register_savevm(CPU_MSR_CODE, > + "CPU_MSR", > + hvm_save_cpu_msrs, > + hvm_load_cpu_msrs, > + HVM_CPU_MSR_SIZE(msr_count_max) + > + sizeof(struct hvm_save_descriptor), > + HVMSR_PER_VCPU); > + > return 0; > } > -__initcall(__hvm_register_CPU_XSAVE_save_and_restore); > +__initcall(hvm_register_CPU_save_and_restore); > > int hvm_vcpu_initialise(struct vcpu *v) > { > --- a/xen/include/asm-x86/hvm/hvm.h > +++ b/xen/include/asm-x86/hvm/hvm.h > @@ -109,6 +109,10 @@ struct hvm_function_table { > void (*save_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt); > int (*load_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt); > > + unsigned int (*init_msr)(void); > + void (*save_msr)(struct vcpu *, struct hvm_msr *); > + int (*load_msr)(struct vcpu *, struct hvm_msr *); > + > /* Examine specifics of the guest state. */ > unsigned int (*get_interrupt_shadow)(struct vcpu *v); > void (*set_interrupt_shadow)(struct vcpu *v, unsigned int > intr_shadow); --- a/xen/include/public/arch-x86/hvm/save.h > +++ b/xen/include/public/arch-x86/hvm/save.h > @@ -592,9 +592,21 @@ struct hvm_tsc_adjust { > > DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust); > > + > +struct hvm_msr { > + uint32_t count; > + struct hvm_one_msr { > + uint32_t index; > + uint32_t _rsvd; > + uint64_t val; > + } msr[1 /* variable size */]; > +}; > + > +#define CPU_MSR_CODE 20 > ++DECLARE_HVM_SAVE_TYPE(CPU_MSR, CPU_MSR_CODE, struct hvm_msr); +msr dump patch at tools/misc/xen-hvmctx.c Thanks, Jinsong> /* > * Largest type-code in use > */ > -#define HVM_SAVE_CODE_MAX 19 > +#define HVM_SAVE_CODE_MAX 20 > > #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */
Andrew Cooper wrote:> On 13/12/2013 14:01, Jan Beulich wrote: >> This patch introduces a generic MSRs save/restore mechanism, so that >> in the future new MSRs save/restore could be added w/ smaller change >> than the full blown addition of a new save/restore type. >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> >> --- a/xen/arch/x86/hvm/hvm.c >> +++ b/xen/arch/x86/hvm/hvm.c >> @@ -1127,10 +1127,117 @@ static int hvm_load_cpu_xsave_states(str >> return 0; } >> >> -/* We need variable length data chunk for xsave area, hence >> customized >> - * declaration other than HVM_REGISTER_SAVE_RESTORE. >> +#define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt]) >> +static unsigned int __read_mostly msr_count_max; >> + >> +static int hvm_save_cpu_msrs(struct domain *d, hvm_domain_context_t >> *h) +{ + struct vcpu *v; >> + >> + for_each_vcpu ( d, v ) >> + { >> + struct hvm_msr *ctxt; >> + unsigned int i; >> + >> + if ( _hvm_init_entry(h, CPU_MSR_CODE, v->vcpu_id, >> + HVM_CPU_MSR_SIZE(msr_count_max)) ) + >> return 1; + ctxt = (struct hvm_msr *)&h->data[h->cur]; + >> ctxt->count = 0; + >> + if ( hvm_funcs.save_msr ) >> + hvm_funcs.save_msr(v, ctxt); >> + >> + for ( i = 0; i < ctxt->count; ++i ) >> + ctxt->msr[i]._rsvd = 0; >> + >> + if ( ctxt->count ) >> + h->cur += HVM_CPU_MSR_SIZE(ctxt->count); + else >> + h->cur -= sizeof(struct hvm_save_descriptor); > > On the last iteration of the loop, this will leave a stale > CPU_MSR_CODE header in the area between the end of the hvm record and > the maximum possible size of the record, which then gets copied into > the toolstack- provided buffer. >The stale CPU_MSR_CODE header would be covered by other header, or by END marker.> Luckily, it does appear that xc_domain_save() does deal with this > correctly and only send the valid subset of the entire record. I > presume we dont care about breaking any other toolstacks which might > get this wrong? > >> + } >> + >> + return 0; >> +} >> + >> +static int hvm_load_cpu_msrs(struct domain *d, hvm_domain_context_t >> *h) +{ + unsigned int i, vcpuid = hvm_load_instance(h); + >> struct vcpu *v; + const struct hvm_save_descriptor *desc; >> + struct hvm_msr *ctxt; >> + int err = 0; >> + >> + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) >> + { + dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no >> vcpu%u\n", + d->domain_id, vcpuid); >> + return -EINVAL; >> + } >> + >> + /* Customized checking for entry since our entry is of variable >> length */ + desc = (struct hvm_save_descriptor *)&h->data[h->cur]; >> + if ( sizeof (*desc) > h->size - h->cur) >> + { >> + printk(XENLOG_G_WARNING >> + "HVM%d.%d restore: not enough data left to read MSR >> descriptor\n", + d->domain_id, vcpuid); >> + return -ENODATA; >> + } >> + if ( desc->length + sizeof (*desc) > h->size - h->cur) + { >> + printk(XENLOG_G_WARNING >> + "HVM%d.%d restore: not enough data left to read %u >> MSR bytes\n", + d->domain_id, vcpuid, desc->length); + >> return -ENODATA; + } >> + if ( desc->length < HVM_CPU_MSR_SIZE(1) ) >> + { >> + printk(XENLOG_G_WARNING >> + "HVM%d.%d restore mismatch: MSR length %u < %zu\n", >> + d->domain_id, vcpuid, desc->length, >> HVM_CPU_MSR_SIZE(1)); + return -EINVAL; + } >> + >> + h->cur += sizeof(*desc); >> + ctxt = (struct hvm_msr *)&h->data[h->cur]; >> + h->cur += desc->length; >> + >> + if ( desc->length != HVM_CPU_MSR_SIZE(ctxt->count) ) + { >> + printk(XENLOG_G_WARNING >> + "HVM%d.%d restore mismatch: MSR length %u != %zu\n", >> + d->domain_id, vcpuid, desc->length, >> + HVM_CPU_MSR_SIZE(ctxt->count)); >> + return -EOPNOTSUPP; >> + } >> + >> + for ( i = 0; i < ctxt->count; ++i ) >> + if ( ctxt->msr[i]._rsvd ) >> + return -EOPNOTSUPP; >> + /* Checking finished */ >> + >> + if ( hvm_funcs.load_msr ) >> + err = hvm_funcs.load_msr(v, ctxt); >> + >> + for ( i = 0; !err && i < ctxt->count; ++i ) >> + { >> + switch ( ctxt->msr[i].index ) >> + { >> + default: >> + if ( !ctxt->msr[i]._rsvd ) >> + err = -ENXIO; >> + break; >> + } >> + } >> + >> + return err; >> +} >> + >> +/* We need variable length data chunks for XSAVE area and MSRs, >> hence + * a custom declaration rather than >> HVM_REGISTER_SAVE_RESTORE. */ -static int __init >> __hvm_register_CPU_XSAVE_save_and_restore(void) +static int __init >> hvm_register_CPU_save_and_restore(void) { >> hvm_register_savevm(CPU_XSAVE_CODE, >> "CPU_XSAVE", @@ -1139,9 +1246,22 @@ static int __init >> __hvm_register_CPU_XSA >> HVM_CPU_XSAVE_SIZE(xfeature_mask) + >> sizeof(struct hvm_save_descriptor), >> HVMSR_PER_VCPU); + >> + if ( hvm_funcs.init_msr ) >> + msr_count_max += hvm_funcs.init_msr(); > > Why += as opposed to direct assignment? Changing this value anywhere > other than here looks as if it will lead to problems. >I guess += here is for future extension of generic msr. Thanks, Jinsong
Andrew Cooper wrote:> On 13/12/2013 14:02, Jan Beulich wrote: >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> >> --- a/xen/arch/x86/hvm/vmx/vmx.c >> +++ b/xen/arch/x86/hvm/vmx/vmx.c >> @@ -580,6 +580,55 @@ static int vmx_load_vmcs_ctxt(struct vcp >> return 0; } >> >> +static unsigned int __init vmx_init_msr(void) >> +{ >> + return !!cpu_has_mpx; >> +} >> + >> +static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt) +{ >> + vmx_vmcs_enter(v); >> + >> + if ( cpu_has_mpx ) >> + { >> + __vmread(GUEST_BNDCFGS, &ctxt->msr[ctxt->count].val); >> + if ( ctxt->msr[ctxt->count].val ) >> + ctxt->msr[ctxt->count++].index = MSR_IA32_BNDCFGS; + >> } + >> + vmx_vmcs_exit(v); >> +} >> + >> +static int vmx_load_msr(struct vcpu *v, struct hvm_msr *ctxt) +{ >> + unsigned int i; >> + int err = 0; >> + >> + vmx_vmcs_enter(v); >> + >> + for ( i = 0; i < ctxt->count; ++i ) >> + { >> + switch ( ctxt->msr[i].index ) >> + { >> + case MSR_IA32_BNDCFGS: >> + if ( cpu_has_mpx ) >> + __vmwrite(GUEST_BNDCFGS, ctxt->msr[i].val); + >> else + err = -ENXIO; >> + break; >> + default: >> + continue; > > This will skip setting _rsvd for an MSR we don''t recognise. Doesn''t > this interfere with the error checking in the caller? >It''s OK. It''s is for not-recognised-MSR got checked at the caller. If a MSR was not recognised by vmx/svm specific handler or generic msr handler, return error. Thanks, Jinsong
Jan Beulich wrote:> Signed-off-by: Jan Beulich <jbeulich@suse.com> >Reviewed-by: Liu Jinsong <jinsong.liu@intel.com>
>>> On 13.12.13 at 18:44, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > On 13/12/2013 14:01, Jan Beulich wrote: >> +static int hvm_save_cpu_msrs(struct domain *d, hvm_domain_context_t *h) >> +{ >> + struct vcpu *v; >> + >> + for_each_vcpu ( d, v ) >> + { >> + struct hvm_msr *ctxt; >> + unsigned int i; >> + >> + if ( _hvm_init_entry(h, CPU_MSR_CODE, v->vcpu_id, >> + HVM_CPU_MSR_SIZE(msr_count_max)) ) >> + return 1; >> + ctxt = (struct hvm_msr *)&h->data[h->cur]; >> + ctxt->count = 0; >> + >> + if ( hvm_funcs.save_msr ) >> + hvm_funcs.save_msr(v, ctxt); >> + >> + for ( i = 0; i < ctxt->count; ++i ) >> + ctxt->msr[i]._rsvd = 0; >> + >> + if ( ctxt->count ) >> + h->cur += HVM_CPU_MSR_SIZE(ctxt->count); >> + else >> + h->cur -= sizeof(struct hvm_save_descriptor); > > On the last iteration of the loop, this will leave a stale CPU_MSR_CODE > header in the area between the end of the hvm record and the maximum > possible size of the record, which then gets copied into the toolstack- > provided buffer. > > Luckily, it does appear that xc_domain_save() does deal with this > correctly and only send the valid subset of the entire record. I > presume we dont care about breaking any other toolstacks which might get > this wrong?Wouldn''t that be similarly a problem for the variable size XSAVE record we already have? I.e. I think tool stacks are _required_ to cope with this.>> @@ -1139,9 +1246,22 @@ static int __init __hvm_register_CPU_XSA >> HVM_CPU_XSAVE_SIZE(xfeature_mask) + >> sizeof(struct hvm_save_descriptor), >> HVMSR_PER_VCPU); >> + >> + if ( hvm_funcs.init_msr ) >> + msr_count_max += hvm_funcs.init_msr(); > > Why += as opposed to direct assignment? Changing this value anywhere > other than here looks as if it will lead to problems.The intention is for the variable to get initialized to non-zero as soon as an architectural MSR appears that might always need saving, or for the order of the increments to not matter when further conditionals get added here.>> --- a/xen/include/public/arch-x86/hvm/save.h >> +++ b/xen/include/public/arch-x86/hvm/save.h >> @@ -592,9 +592,21 @@ struct hvm_tsc_adjust { >> >> DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust); >> >> + >> +struct hvm_msr { >> + uint32_t count; >> + struct hvm_one_msr { >> + uint32_t index; >> + uint32_t _rsvd; >> + uint64_t val; >> + } msr[1 /* variable size */]; > > Coverity is going to complain about using this with more than 1 record, > but I can''t think of a better way of doing this without introducing a > VLA to the public header files.If that''s a concern, I''ll switch to __STDC_VERSION__/__GNUC__ dependent code here, just like we do elsewhere in the public headers: struct hvm_msr { uint32_t count; struct hvm_one_msr { uint32_t index; uint32_t _rsvd; uint64_t val; #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L } msr[]; #elif defined(__GNUC__) } msr[0]; #else } msr[1 /* variable size */]; #endif }; Jan
>>> On 16.12.13 at 04:01, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Jan Beulich wrote: >> This patch introduces a generic MSRs save/restore mechanism, so that >> in the future new MSRs save/restore could be added w/ smaller change >> than the full blown addition of a new save/restore type. >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> >> --- a/xen/arch/x86/hvm/hvm.c >> +++ b/xen/arch/x86/hvm/hvm.c >> @@ -1127,10 +1127,117 @@ static int hvm_load_cpu_xsave_states(str >> return 0; >> } >> >> -/* We need variable length data chunk for xsave area, hence >> customized >> - * declaration other than HVM_REGISTER_SAVE_RESTORE. >> +#define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt]) >> +static unsigned int __read_mostly msr_count_max; >> + >> +static int hvm_save_cpu_msrs(struct domain *d, hvm_domain_context_t >> *h) +{ >> + struct vcpu *v; >> + >> + for_each_vcpu ( d, v ) >> + { >> + struct hvm_msr *ctxt; >> + unsigned int i; >> + >> + if ( _hvm_init_entry(h, CPU_MSR_CODE, v->vcpu_id, >> + HVM_CPU_MSR_SIZE(msr_count_max)) ) >> + return 1; >> + ctxt = (struct hvm_msr *)&h->data[h->cur]; >> + ctxt->count = 0; >> + >> + if ( hvm_funcs.save_msr ) >> + hvm_funcs.save_msr(v, ctxt); >> + >> + for ( i = 0; i < ctxt->count; ++i ) >> + ctxt->msr[i]._rsvd = 0; >> + >> + if ( ctxt->count ) >> + h->cur += HVM_CPU_MSR_SIZE(ctxt->count); >> + else >> + h->cur -= sizeof(struct hvm_save_descriptor); >> + } >> + >> + return 0; >> +} >> + >> +static int hvm_load_cpu_msrs(struct domain *d, hvm_domain_context_t >> *h) +{ >> + unsigned int i, vcpuid = hvm_load_instance(h); >> + struct vcpu *v; >> + const struct hvm_save_descriptor *desc; >> + struct hvm_msr *ctxt; >> + int err = 0; >> + >> + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) >> + { >> + dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n", >> + d->domain_id, vcpuid); >> + return -EINVAL; >> + } >> + >> + /* Customized checking for entry since our entry is of variable >> length */ + desc = (struct hvm_save_descriptor *)&h->data[h->cur]; >> + if ( sizeof (*desc) > h->size - h->cur) >> + { >> + printk(XENLOG_G_WARNING >> + "HVM%d.%d restore: not enough data left to read MSR >> descriptor\n", + d->domain_id, vcpuid); >> + return -ENODATA; >> + } >> + if ( desc->length + sizeof (*desc) > h->size - h->cur) >> + { >> + printk(XENLOG_G_WARNING >> + "HVM%d.%d restore: not enough data left to read %u >> MSR bytes\n", + d->domain_id, vcpuid, desc->length); >> + return -ENODATA; >> + } >> + if ( desc->length < HVM_CPU_MSR_SIZE(1) ) >> + { >> + printk(XENLOG_G_WARNING >> + "HVM%d.%d restore mismatch: MSR length %u < %zu\n", >> + d->domain_id, vcpuid, desc->length, >> HVM_CPU_MSR_SIZE(1)); + return -EINVAL; >> + } >> + >> + h->cur += sizeof(*desc); >> + ctxt = (struct hvm_msr *)&h->data[h->cur]; >> + h->cur += desc->length; >> + >> + if ( desc->length != HVM_CPU_MSR_SIZE(ctxt->count) ) >> + { >> + printk(XENLOG_G_WARNING >> + "HVM%d.%d restore mismatch: MSR length %u != %zu\n", >> + d->domain_id, vcpuid, desc->length, >> + HVM_CPU_MSR_SIZE(ctxt->count)); >> + return -EOPNOTSUPP; >> + } >> + >> + for ( i = 0; i < ctxt->count; ++i ) >> + if ( ctxt->msr[i]._rsvd ) >> + return -EOPNOTSUPP; >> + /* Checking finished */ >> + >> + if ( hvm_funcs.load_msr ) >> + err = hvm_funcs.load_msr(v, ctxt); >> + >> + for ( i = 0; !err && i < ctxt->count; ++i ) >> + { >> + switch ( ctxt->msr[i].index ) >> + { >> + default: >> + if ( !ctxt->msr[i]._rsvd ) >> + err = -ENXIO; >> + break; >> + } >> + } >> + >> + return err; >> +} >> + >> +/* We need variable length data chunks for XSAVE area and MSRs, hence >> + * a custom declaration rather than HVM_REGISTER_SAVE_RESTORE. >> */ >> -static int __init __hvm_register_CPU_XSAVE_save_and_restore(void) >> +static int __init hvm_register_CPU_save_and_restore(void) >> { >> hvm_register_savevm(CPU_XSAVE_CODE, >> "CPU_XSAVE", >> @@ -1139,9 +1246,22 @@ static int __init __hvm_register_CPU_XSA >> HVM_CPU_XSAVE_SIZE(xfeature_mask) + >> sizeof(struct hvm_save_descriptor), >> HVMSR_PER_VCPU); >> + >> + if ( hvm_funcs.init_msr ) >> + msr_count_max += hvm_funcs.init_msr(); >> + >> + if ( msr_count_max ) >> + hvm_register_savevm(CPU_MSR_CODE, >> + "CPU_MSR", >> + hvm_save_cpu_msrs, >> + hvm_load_cpu_msrs, >> + HVM_CPU_MSR_SIZE(msr_count_max) + >> + sizeof(struct hvm_save_descriptor), >> + HVMSR_PER_VCPU); >> + >> return 0; >> } >> -__initcall(__hvm_register_CPU_XSAVE_save_and_restore); >> +__initcall(hvm_register_CPU_save_and_restore); >> >> int hvm_vcpu_initialise(struct vcpu *v) >> { >> --- a/xen/include/asm-x86/hvm/hvm.h >> +++ b/xen/include/asm-x86/hvm/hvm.h >> @@ -109,6 +109,10 @@ struct hvm_function_table { >> void (*save_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt); >> int (*load_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt); >> >> + unsigned int (*init_msr)(void); >> + void (*save_msr)(struct vcpu *, struct hvm_msr *); >> + int (*load_msr)(struct vcpu *, struct hvm_msr *); >> + >> /* Examine specifics of the guest state. */ >> unsigned int (*get_interrupt_shadow)(struct vcpu *v); >> void (*set_interrupt_shadow)(struct vcpu *v, unsigned int >> intr_shadow); --- a/xen/include/public/arch-x86/hvm/save.h >> +++ b/xen/include/public/arch-x86/hvm/save.h >> @@ -592,9 +592,21 @@ struct hvm_tsc_adjust { >> >> DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust); >> >> + >> +struct hvm_msr { >> + uint32_t count; >> + struct hvm_one_msr { >> + uint32_t index; >> + uint32_t _rsvd; >> + uint64_t val; >> + } msr[1 /* variable size */]; >> +}; >> + >> +#define CPU_MSR_CODE 20 >> + > > +DECLARE_HVM_SAVE_TYPE(CPU_MSR, CPU_MSR_CODE, struct hvm_msr); > +msr dump patch at tools/misc/xen-hvmctx.cSorry, I don''t follow what this is to mean. If that other patch of yours needs adjustment, then this surely doesn''t belong here. Jan
>>> On 13.12.13 at 18:57, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > On 13/12/2013 14:02, Jan Beulich wrote: >> +static int vmx_load_msr(struct vcpu *v, struct hvm_msr *ctxt) >> +{ >> + unsigned int i; >> + int err = 0; >> + >> + vmx_vmcs_enter(v); >> + >> + for ( i = 0; i < ctxt->count; ++i ) >> + { >> + switch ( ctxt->msr[i].index ) >> + { >> + case MSR_IA32_BNDCFGS: >> + if ( cpu_has_mpx ) >> + __vmwrite(GUEST_BNDCFGS, ctxt->msr[i].val); >> + else >> + err = -ENXIO; >> + break; >> + default: >> + continue; > > This will skip setting _rsvd for an MSR we don''t recognise. Doesn''t > this interfere with the error checking in the caller?No - that''s exactly the purpose: Not setting _rsvd will allow the caller to know this MSR was unrecognized by the vendor specific code, and hence if the generic code also can#t deal with it, the restore _must_ fail. Jan
Jan Beulich wrote:>>>> On 16.12.13 at 04:01, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> Jan Beulich wrote: >>> This patch introduces a generic MSRs save/restore mechanism, so that >>> in the future new MSRs save/restore could be added w/ smaller change >>> than the full blown addition of a new save/restore type. >>> >>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>> >>> --- a/xen/arch/x86/hvm/hvm.c >>> +++ b/xen/arch/x86/hvm/hvm.c >>> @@ -1127,10 +1127,117 @@ static int hvm_load_cpu_xsave_states(str >>> return 0; } >>> >>> -/* We need variable length data chunk for xsave area, hence >>> customized >>> - * declaration other than HVM_REGISTER_SAVE_RESTORE. >>> +#define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt]) >>> +static unsigned int __read_mostly msr_count_max; >>> + >>> +static int hvm_save_cpu_msrs(struct domain *d, >>> hvm_domain_context_t *h) +{ + struct vcpu *v; >>> + >>> + for_each_vcpu ( d, v ) >>> + { >>> + struct hvm_msr *ctxt; >>> + unsigned int i; >>> + >>> + if ( _hvm_init_entry(h, CPU_MSR_CODE, v->vcpu_id, >>> + HVM_CPU_MSR_SIZE(msr_count_max)) ) + >>> return 1; + ctxt = (struct hvm_msr *)&h->data[h->cur]; + >>> ctxt->count = 0; + >>> + if ( hvm_funcs.save_msr ) >>> + hvm_funcs.save_msr(v, ctxt); >>> + >>> + for ( i = 0; i < ctxt->count; ++i ) >>> + ctxt->msr[i]._rsvd = 0; >>> + >>> + if ( ctxt->count ) >>> + h->cur += HVM_CPU_MSR_SIZE(ctxt->count); + else >>> + h->cur -= sizeof(struct hvm_save_descriptor); + } >>> + >>> + return 0; >>> +} >>> + >>> +static int hvm_load_cpu_msrs(struct domain *d, >>> hvm_domain_context_t *h) +{ + unsigned int i, vcpuid >>> hvm_load_instance(h); + struct vcpu *v; + const struct >>> hvm_save_descriptor *desc; + struct hvm_msr *ctxt; >>> + int err = 0; >>> + >>> + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) >>> + { + dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no >>> vcpu%u\n", + d->domain_id, vcpuid); >>> + return -EINVAL; >>> + } >>> + >>> + /* Customized checking for entry since our entry is of variable >>> length */ + desc = (struct hvm_save_descriptor >>> *)&h->data[h->cur]; + if ( sizeof (*desc) > h->size - h->cur) >>> + { >>> + printk(XENLOG_G_WARNING >>> + "HVM%d.%d restore: not enough data left to read MSR >>> descriptor\n", + d->domain_id, vcpuid); + >>> return -ENODATA; + } >>> + if ( desc->length + sizeof (*desc) > h->size - h->cur) + { >>> + printk(XENLOG_G_WARNING >>> + "HVM%d.%d restore: not enough data left to read %u >>> MSR bytes\n", + d->domain_id, vcpuid, desc->length); >>> + return -ENODATA; + } >>> + if ( desc->length < HVM_CPU_MSR_SIZE(1) ) >>> + { >>> + printk(XENLOG_G_WARNING >>> + "HVM%d.%d restore mismatch: MSR length %u < %zu\n", >>> + d->domain_id, vcpuid, desc->length, >>> HVM_CPU_MSR_SIZE(1)); + return -EINVAL; >>> + } >>> + >>> + h->cur += sizeof(*desc); >>> + ctxt = (struct hvm_msr *)&h->data[h->cur]; >>> + h->cur += desc->length; >>> + >>> + if ( desc->length != HVM_CPU_MSR_SIZE(ctxt->count) ) + { >>> + printk(XENLOG_G_WARNING >>> + "HVM%d.%d restore mismatch: MSR length %u != %zu\n", >>> + d->domain_id, vcpuid, desc->length, >>> + HVM_CPU_MSR_SIZE(ctxt->count)); >>> + return -EOPNOTSUPP; >>> + } >>> + >>> + for ( i = 0; i < ctxt->count; ++i ) >>> + if ( ctxt->msr[i]._rsvd ) >>> + return -EOPNOTSUPP; >>> + /* Checking finished */ >>> + >>> + if ( hvm_funcs.load_msr ) >>> + err = hvm_funcs.load_msr(v, ctxt); >>> + >>> + for ( i = 0; !err && i < ctxt->count; ++i ) >>> + { >>> + switch ( ctxt->msr[i].index ) >>> + { >>> + default: >>> + if ( !ctxt->msr[i]._rsvd ) >>> + err = -ENXIO; >>> + break; >>> + } >>> + } >>> + >>> + return err; >>> +} >>> + >>> +/* We need variable length data chunks for XSAVE area and MSRs, >>> hence + * a custom declaration rather than >>> HVM_REGISTER_SAVE_RESTORE. */ -static int __init >>> __hvm_register_CPU_XSAVE_save_and_restore(void) +static int __init >>> hvm_register_CPU_save_and_restore(void) { >>> hvm_register_savevm(CPU_XSAVE_CODE, >>> "CPU_XSAVE", @@ -1139,9 +1246,22 @@ static int __init >>> __hvm_register_CPU_XSA >>> HVM_CPU_XSAVE_SIZE(xfeature_mask) + >>> sizeof(struct hvm_save_descriptor), >>> HVMSR_PER_VCPU); + >>> + if ( hvm_funcs.init_msr ) >>> + msr_count_max += hvm_funcs.init_msr(); >>> + >>> + if ( msr_count_max ) >>> + hvm_register_savevm(CPU_MSR_CODE, >>> + "CPU_MSR", >>> + hvm_save_cpu_msrs, >>> + hvm_load_cpu_msrs, >>> + HVM_CPU_MSR_SIZE(msr_count_max) + >>> + sizeof(struct hvm_save_descriptor), >>> + HVMSR_PER_VCPU); >>> + >>> return 0; >>> } >>> -__initcall(__hvm_register_CPU_XSAVE_save_and_restore); >>> +__initcall(hvm_register_CPU_save_and_restore); >>> >>> int hvm_vcpu_initialise(struct vcpu *v) >>> { >>> --- a/xen/include/asm-x86/hvm/hvm.h >>> +++ b/xen/include/asm-x86/hvm/hvm.h >>> @@ -109,6 +109,10 @@ struct hvm_function_table { >>> void (*save_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt); >>> int (*load_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt); >>> >>> + unsigned int (*init_msr)(void); >>> + void (*save_msr)(struct vcpu *, struct hvm_msr *); >>> + int (*load_msr)(struct vcpu *, struct hvm_msr *); + >>> /* Examine specifics of the guest state. */ >>> unsigned int (*get_interrupt_shadow)(struct vcpu *v); >>> void (*set_interrupt_shadow)(struct vcpu *v, unsigned int >>> intr_shadow); --- a/xen/include/public/arch-x86/hvm/save.h >>> +++ b/xen/include/public/arch-x86/hvm/save.h >>> @@ -592,9 +592,21 @@ struct hvm_tsc_adjust { >>> >>> DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust); >>> >>> + >>> +struct hvm_msr { >>> + uint32_t count; >>> + struct hvm_one_msr { >>> + uint32_t index; >>> + uint32_t _rsvd; >>> + uint64_t val; >>> + } msr[1 /* variable size */]; >>> +}; >>> + >>> +#define CPU_MSR_CODE 20 >>> + >> >> +DECLARE_HVM_SAVE_TYPE(CPU_MSR, CPU_MSR_CODE, struct hvm_msr); >> +msr dump patch at tools/misc/xen-hvmctx.c > > Sorry, I don''t follow what this is to mean. If that other patch of > yours needs adjustment, then this surely doesn''t belong here. >OK, I will adjust tools side msr dump patch. DECLARE_HVM_SAVE_TYPE(CPU_MSR, CPU_MSR_CODE, struct hvm_msr) is for msr dump tools side patch, so if you don''t add it here I can use a distinct patch to add it. Thanks, Jinsong
>>> On 16.12.13 at 09:39, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: > Jan Beulich wrote: >>>>> On 16.12.13 at 04:01, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >>> Jan Beulich wrote: >>>> This patch introduces a generic MSRs save/restore mechanism, so that >>>> in the future new MSRs save/restore could be added w/ smaller change >>>> than the full blown addition of a new save/restore type. >>>> >>>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>>> >>>> --- a/xen/arch/x86/hvm/hvm.c >>>> +++ b/xen/arch/x86/hvm/hvm.c >>>> @@ -1127,10 +1127,117 @@ static int hvm_load_cpu_xsave_states(str >>>> return 0; } >>>> >>>> -/* We need variable length data chunk for xsave area, hence >>>> customized >>>> - * declaration other than HVM_REGISTER_SAVE_RESTORE. >>>> +#define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt]) >>>> +static unsigned int __read_mostly msr_count_max; >>>> + >>>> +static int hvm_save_cpu_msrs(struct domain *d, >>>> hvm_domain_context_t *h) +{ + struct vcpu *v; >>>> + >>>> + for_each_vcpu ( d, v ) >>>> + { >>>> + struct hvm_msr *ctxt; >>>> + unsigned int i; >>>> + >>>> + if ( _hvm_init_entry(h, CPU_MSR_CODE, v->vcpu_id, >>>> + HVM_CPU_MSR_SIZE(msr_count_max)) ) + >>>> return 1; + ctxt = (struct hvm_msr *)&h->data[h->cur]; + >>>> ctxt->count = 0; + >>>> + if ( hvm_funcs.save_msr ) >>>> + hvm_funcs.save_msr(v, ctxt); >>>> + >>>> + for ( i = 0; i < ctxt->count; ++i ) >>>> + ctxt->msr[i]._rsvd = 0; >>>> + >>>> + if ( ctxt->count ) >>>> + h->cur += HVM_CPU_MSR_SIZE(ctxt->count); + else >>>> + h->cur -= sizeof(struct hvm_save_descriptor); + } >>>> + >>>> + return 0; >>>> +} >>>> + >>>> +static int hvm_load_cpu_msrs(struct domain *d, >>>> hvm_domain_context_t *h) +{ + unsigned int i, vcpuid >>>> hvm_load_instance(h); + struct vcpu *v; + const struct >>>> hvm_save_descriptor *desc; + struct hvm_msr *ctxt; >>>> + int err = 0; >>>> + >>>> + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL ) >>>> + { + dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no >>>> vcpu%u\n", + d->domain_id, vcpuid); >>>> + return -EINVAL; >>>> + } >>>> + >>>> + /* Customized checking for entry since our entry is of variable >>>> length */ + desc = (struct hvm_save_descriptor >>>> *)&h->data[h->cur]; + if ( sizeof (*desc) > h->size - h->cur) >>>> + { >>>> + printk(XENLOG_G_WARNING >>>> + "HVM%d.%d restore: not enough data left to read MSR >>>> descriptor\n", + d->domain_id, vcpuid); + >>>> return -ENODATA; + } >>>> + if ( desc->length + sizeof (*desc) > h->size - h->cur) + { >>>> + printk(XENLOG_G_WARNING >>>> + "HVM%d.%d restore: not enough data left to read %u >>>> MSR bytes\n", + d->domain_id, vcpuid, desc->length); >>>> + return -ENODATA; + } >>>> + if ( desc->length < HVM_CPU_MSR_SIZE(1) ) >>>> + { >>>> + printk(XENLOG_G_WARNING >>>> + "HVM%d.%d restore mismatch: MSR length %u < %zu\n", >>>> + d->domain_id, vcpuid, desc->length, >>>> HVM_CPU_MSR_SIZE(1)); + return -EINVAL; >>>> + } >>>> + >>>> + h->cur += sizeof(*desc); >>>> + ctxt = (struct hvm_msr *)&h->data[h->cur]; >>>> + h->cur += desc->length; >>>> + >>>> + if ( desc->length != HVM_CPU_MSR_SIZE(ctxt->count) ) + { >>>> + printk(XENLOG_G_WARNING >>>> + "HVM%d.%d restore mismatch: MSR length %u != %zu\n", >>>> + d->domain_id, vcpuid, desc->length, >>>> + HVM_CPU_MSR_SIZE(ctxt->count)); >>>> + return -EOPNOTSUPP; >>>> + } >>>> + >>>> + for ( i = 0; i < ctxt->count; ++i ) >>>> + if ( ctxt->msr[i]._rsvd ) >>>> + return -EOPNOTSUPP; >>>> + /* Checking finished */ >>>> + >>>> + if ( hvm_funcs.load_msr ) >>>> + err = hvm_funcs.load_msr(v, ctxt); >>>> + >>>> + for ( i = 0; !err && i < ctxt->count; ++i ) >>>> + { >>>> + switch ( ctxt->msr[i].index ) >>>> + { >>>> + default: >>>> + if ( !ctxt->msr[i]._rsvd ) >>>> + err = -ENXIO; >>>> + break; >>>> + } >>>> + } >>>> + >>>> + return err; >>>> +} >>>> + >>>> +/* We need variable length data chunks for XSAVE area and MSRs, >>>> hence + * a custom declaration rather than >>>> HVM_REGISTER_SAVE_RESTORE. */ -static int __init >>>> __hvm_register_CPU_XSAVE_save_and_restore(void) +static int __init >>>> hvm_register_CPU_save_and_restore(void) { >>>> hvm_register_savevm(CPU_XSAVE_CODE, >>>> "CPU_XSAVE", @@ -1139,9 +1246,22 @@ static int __init >>>> __hvm_register_CPU_XSA >>>> HVM_CPU_XSAVE_SIZE(xfeature_mask) + >>>> sizeof(struct hvm_save_descriptor), >>>> HVMSR_PER_VCPU); + >>>> + if ( hvm_funcs.init_msr ) >>>> + msr_count_max += hvm_funcs.init_msr(); >>>> + >>>> + if ( msr_count_max ) >>>> + hvm_register_savevm(CPU_MSR_CODE, >>>> + "CPU_MSR", >>>> + hvm_save_cpu_msrs, >>>> + hvm_load_cpu_msrs, >>>> + HVM_CPU_MSR_SIZE(msr_count_max) + >>>> + sizeof(struct hvm_save_descriptor), >>>> + HVMSR_PER_VCPU); >>>> + >>>> return 0; >>>> } >>>> -__initcall(__hvm_register_CPU_XSAVE_save_and_restore); >>>> +__initcall(hvm_register_CPU_save_and_restore); >>>> >>>> int hvm_vcpu_initialise(struct vcpu *v) >>>> { >>>> --- a/xen/include/asm-x86/hvm/hvm.h >>>> +++ b/xen/include/asm-x86/hvm/hvm.h >>>> @@ -109,6 +109,10 @@ struct hvm_function_table { >>>> void (*save_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt); >>>> int (*load_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu *ctxt); >>>> >>>> + unsigned int (*init_msr)(void); >>>> + void (*save_msr)(struct vcpu *, struct hvm_msr *); >>>> + int (*load_msr)(struct vcpu *, struct hvm_msr *); + >>>> /* Examine specifics of the guest state. */ >>>> unsigned int (*get_interrupt_shadow)(struct vcpu *v); >>>> void (*set_interrupt_shadow)(struct vcpu *v, unsigned int >>>> intr_shadow); --- a/xen/include/public/arch-x86/hvm/save.h >>>> +++ b/xen/include/public/arch-x86/hvm/save.h >>>> @@ -592,9 +592,21 @@ struct hvm_tsc_adjust { >>>> >>>> DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust); >>>> >>>> + >>>> +struct hvm_msr { >>>> + uint32_t count; >>>> + struct hvm_one_msr { >>>> + uint32_t index; >>>> + uint32_t _rsvd; >>>> + uint64_t val; >>>> + } msr[1 /* variable size */]; >>>> +}; >>>> + >>>> +#define CPU_MSR_CODE 20 >>>> + >>> >>> +DECLARE_HVM_SAVE_TYPE(CPU_MSR, CPU_MSR_CODE, struct hvm_msr); >>> +msr dump patch at tools/misc/xen-hvmctx.c >> >> Sorry, I don''t follow what this is to mean. If that other patch of >> yours needs adjustment, then this surely doesn''t belong here. >> > > OK, I will adjust tools side msr dump patch. > > DECLARE_HVM_SAVE_TYPE(CPU_MSR, CPU_MSR_CODE, struct hvm_msr) is for msr dump > tools side patch, so if you don''t add it here I can use a distinct patch to > add it.The main thing is - this does _not_ belong in the public header, just like the XSAVE one has no such declaration there. Jan
Jan Beulich wrote:>>>> On 16.12.13 at 09:39, "Liu, Jinsong" <jinsong.liu@intel.com> wrote: >> Jan Beulich wrote: >>>>>> On 16.12.13 at 04:01, "Liu, Jinsong" <jinsong.liu@intel.com> >>>>>> wrote: >>>> Jan Beulich wrote: >>>>> This patch introduces a generic MSRs save/restore mechanism, so >>>>> that in the future new MSRs save/restore could be added w/ >>>>> smaller change than the full blown addition of a new save/restore >>>>> type. >>>>> >>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>>>> >>>>> --- a/xen/arch/x86/hvm/hvm.c >>>>> +++ b/xen/arch/x86/hvm/hvm.c >>>>> @@ -1127,10 +1127,117 @@ static int hvm_load_cpu_xsave_states(str >>>>> return 0; } >>>>> >>>>> -/* We need variable length data chunk for xsave area, hence >>>>> customized >>>>> - * declaration other than HVM_REGISTER_SAVE_RESTORE. >>>>> +#define HVM_CPU_MSR_SIZE(cnt) offsetof(struct hvm_msr, msr[cnt]) >>>>> +static unsigned int __read_mostly msr_count_max; >>>>> + >>>>> +static int hvm_save_cpu_msrs(struct domain *d, >>>>> hvm_domain_context_t *h) +{ + struct vcpu *v; >>>>> + >>>>> + for_each_vcpu ( d, v ) >>>>> + { >>>>> + struct hvm_msr *ctxt; >>>>> + unsigned int i; >>>>> + >>>>> + if ( _hvm_init_entry(h, CPU_MSR_CODE, v->vcpu_id, >>>>> + HVM_CPU_MSR_SIZE(msr_count_max)) ) + >>>>> return 1; + ctxt = (struct hvm_msr *)&h->data[h->cur]; + >>>>> ctxt->count = 0; + + if ( hvm_funcs.save_msr ) >>>>> + hvm_funcs.save_msr(v, ctxt); >>>>> + >>>>> + for ( i = 0; i < ctxt->count; ++i ) >>>>> + ctxt->msr[i]._rsvd = 0; >>>>> + >>>>> + if ( ctxt->count ) >>>>> + h->cur += HVM_CPU_MSR_SIZE(ctxt->count); + >>>>> else + h->cur -= sizeof(struct hvm_save_descriptor); + >>>>> } + + return 0; >>>>> +} >>>>> + >>>>> +static int hvm_load_cpu_msrs(struct domain *d, >>>>> hvm_domain_context_t *h) +{ + unsigned int i, vcpuid >>>>> hvm_load_instance(h); + struct vcpu *v; + const struct >>>>> hvm_save_descriptor *desc; + struct hvm_msr *ctxt; + int >>>>> err = 0; + >>>>> + if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL >>>>> ) + { + dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no >>>>> vcpu%u\n", + d->domain_id, vcpuid); >>>>> + return -EINVAL; >>>>> + } >>>>> + >>>>> + /* Customized checking for entry since our entry is of >>>>> variable length */ + desc = (struct hvm_save_descriptor >>>>> *)&h->data[h->cur]; + if ( sizeof (*desc) > h->size - h->cur) >>>>> + { + printk(XENLOG_G_WARNING >>>>> + "HVM%d.%d restore: not enough data left to read >>>>> MSR descriptor\n", + d->domain_id, vcpuid); + >>>>> return -ENODATA; + } >>>>> + if ( desc->length + sizeof (*desc) > h->size - h->cur) + { >>>>> + printk(XENLOG_G_WARNING >>>>> + "HVM%d.%d restore: not enough data left to read %u >>>>> MSR bytes\n", + d->domain_id, vcpuid, desc->length); >>>>> + return -ENODATA; + } >>>>> + if ( desc->length < HVM_CPU_MSR_SIZE(1) ) >>>>> + { >>>>> + printk(XENLOG_G_WARNING >>>>> + "HVM%d.%d restore mismatch: MSR length %u < >>>>> %zu\n", + d->domain_id, vcpuid, desc->length, >>>>> HVM_CPU_MSR_SIZE(1)); + return -EINVAL; >>>>> + } >>>>> + >>>>> + h->cur += sizeof(*desc); >>>>> + ctxt = (struct hvm_msr *)&h->data[h->cur]; >>>>> + h->cur += desc->length; >>>>> + >>>>> + if ( desc->length != HVM_CPU_MSR_SIZE(ctxt->count) ) + { >>>>> + printk(XENLOG_G_WARNING >>>>> + "HVM%d.%d restore mismatch: MSR length %u !>>>>> %zu\n", + d->domain_id, vcpuid, desc->length, >>>>> + HVM_CPU_MSR_SIZE(ctxt->count)); >>>>> + return -EOPNOTSUPP; >>>>> + } >>>>> + >>>>> + for ( i = 0; i < ctxt->count; ++i ) >>>>> + if ( ctxt->msr[i]._rsvd ) >>>>> + return -EOPNOTSUPP; >>>>> + /* Checking finished */ >>>>> + >>>>> + if ( hvm_funcs.load_msr ) >>>>> + err = hvm_funcs.load_msr(v, ctxt); >>>>> + >>>>> + for ( i = 0; !err && i < ctxt->count; ++i ) >>>>> + { >>>>> + switch ( ctxt->msr[i].index ) >>>>> + { >>>>> + default: >>>>> + if ( !ctxt->msr[i]._rsvd ) >>>>> + err = -ENXIO; >>>>> + break; >>>>> + } >>>>> + } >>>>> + >>>>> + return err; >>>>> +} >>>>> + >>>>> +/* We need variable length data chunks for XSAVE area and MSRs, >>>>> hence + * a custom declaration rather than >>>>> HVM_REGISTER_SAVE_RESTORE. */ -static int __init >>>>> __hvm_register_CPU_XSAVE_save_and_restore(void) +static int __init >>>>> hvm_register_CPU_save_and_restore(void) { >>>>> hvm_register_savevm(CPU_XSAVE_CODE, >>>>> "CPU_XSAVE", @@ -1139,9 +1246,22 @@ static int __init >>>>> __hvm_register_CPU_XSA >>>>> HVM_CPU_XSAVE_SIZE(xfeature_mask) + >>>>> sizeof(struct hvm_save_descriptor), >>>>> HVMSR_PER_VCPU); + + if ( hvm_funcs.init_msr ) >>>>> + msr_count_max += hvm_funcs.init_msr(); >>>>> + >>>>> + if ( msr_count_max ) >>>>> + hvm_register_savevm(CPU_MSR_CODE, >>>>> + "CPU_MSR", >>>>> + hvm_save_cpu_msrs, >>>>> + hvm_load_cpu_msrs, >>>>> + HVM_CPU_MSR_SIZE(msr_count_max) + >>>>> + sizeof(struct >>>>> hvm_save_descriptor), + >>>>> HVMSR_PER_VCPU); + >>>>> return 0; >>>>> } >>>>> -__initcall(__hvm_register_CPU_XSAVE_save_and_restore); >>>>> +__initcall(hvm_register_CPU_save_and_restore); >>>>> >>>>> int hvm_vcpu_initialise(struct vcpu *v) >>>>> { >>>>> --- a/xen/include/asm-x86/hvm/hvm.h >>>>> +++ b/xen/include/asm-x86/hvm/hvm.h >>>>> @@ -109,6 +109,10 @@ struct hvm_function_table { >>>>> void (*save_cpu_ctxt)(struct vcpu *v, struct hvm_hw_cpu >>>>> *ctxt); int (*load_cpu_ctxt)(struct vcpu *v, struct >>>>> hvm_hw_cpu *ctxt); >>>>> >>>>> + unsigned int (*init_msr)(void); >>>>> + void (*save_msr)(struct vcpu *, struct hvm_msr *); >>>>> + int (*load_msr)(struct vcpu *, struct hvm_msr *); + >>>>> /* Examine specifics of the guest state. */ >>>>> unsigned int (*get_interrupt_shadow)(struct vcpu *v); >>>>> void (*set_interrupt_shadow)(struct vcpu *v, unsigned int >>>>> intr_shadow); --- a/xen/include/public/arch-x86/hvm/save.h >>>>> +++ b/xen/include/public/arch-x86/hvm/save.h >>>>> @@ -592,9 +592,21 @@ struct hvm_tsc_adjust { >>>>> >>>>> DECLARE_HVM_SAVE_TYPE(TSC_ADJUST, 19, struct hvm_tsc_adjust); >>>>> >>>>> + >>>>> +struct hvm_msr { >>>>> + uint32_t count; >>>>> + struct hvm_one_msr { >>>>> + uint32_t index; >>>>> + uint32_t _rsvd; >>>>> + uint64_t val; >>>>> + } msr[1 /* variable size */]; >>>>> +}; >>>>> + >>>>> +#define CPU_MSR_CODE 20 >>>>> + >>>> >>>> +DECLARE_HVM_SAVE_TYPE(CPU_MSR, CPU_MSR_CODE, struct hvm_msr); >>>> +msr dump patch at tools/misc/xen-hvmctx.c >>> >>> Sorry, I don''t follow what this is to mean. If that other patch of >>> yours needs adjustment, then this surely doesn''t belong here. >>> >> >> OK, I will adjust tools side msr dump patch. >> >> DECLARE_HVM_SAVE_TYPE(CPU_MSR, CPU_MSR_CODE, struct hvm_msr) is for >> msr dump tools side patch, so if you don''t add it here I can use a >> distinct patch to add it. > > The main thing is - this does _not_ belong in the public header, just > like the XSAVE one has no such declaration there. > > JanXSAVE didn''t get dumped at xen-hvmctx.c, it just forget (?) to do so, hence it didn''t declare at Xen side. Thanks, Jinsong