Add HVM hardware feature suspend/resume. Signed-off-by Ke Yu <ke.yu@intel.com> Signed-off-by Kevin Tian <kevin.tian@intel.com> diff -r f217aafc1c17 xen/arch/x86/acpi/power.c --- a/xen/arch/x86/acpi/power.c Mon Jun 25 13:28:41 2007 -0400 +++ b/xen/arch/x86/acpi/power.c Mon Jun 25 17:36:32 2007 -0400 @@ -83,7 +83,7 @@ static void device_power_up(void) /* Main interface to do xen specific suspend/resume */ int enter_state(u32 state) { - struct domain *d; + struct domain *d, *pd = NULL; unsigned long flags; int error; @@ -99,7 +99,15 @@ int enter_state(u32 state) for_each_domain(d) if (d->domain_id != 0) + { domain_pause(d); + if (is_hvm_domain(d) && !hvm_suspend_domain(d)) + { + error = -EINVAL; + goto Unpause; + } + pd = d; + } pmprintk(XENLOG_INFO, "PM: Preparing system for %s sleep\n", acpi_states[state]); @@ -133,13 +141,22 @@ int enter_state(u32 state) Done: local_irq_restore(flags); - for_each_domain(d) - if (d->domain_id!=0) - domain_unpause(d); + Unpause: + if (pd) + { + for_each_domain(d) + { + /* Unpause until recorded last paused domain */ + if (d == pd) + break; + + if (d->domain_id != 0) + domain_unpause(d); + } + } spin_unlock(&pm_lock); return error; - } /* diff -r f217aafc1c17 xen/arch/x86/acpi/suspend.c --- a/xen/arch/x86/acpi/suspend.c Mon Jun 25 13:28:41 2007 -0400 +++ b/xen/arch/x86/acpi/suspend.c Mon Jun 25 14:52:17 2007 -0400 @@ -82,4 +82,6 @@ void restore_rest_processor_state(void) mtrr_ap_init(); mcheck_init(&boot_cpu_data); + if (hvm_enabled) + hvm_resume_cpu(); } diff -r f217aafc1c17 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Mon Jun 25 13:28:41 2007 -0400 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Mon Jun 25 14:03:32 2007 -0400 @@ -178,7 +178,7 @@ static void __vmx_clear_vmcs(void *info) v->arch.hvm_vmx.launched = 0; } -static void vmx_clear_vmcs(struct vcpu *v) +void vmx_clear_vmcs(struct vcpu *v) { int cpu = v->arch.hvm_vmx.active_cpu; diff -r f217aafc1c17 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Mon Jun 25 13:28:41 2007 -0400 +++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Jun 25 17:38:49 2007 -0400 @@ -53,6 +53,11 @@ char *vmx_msr_bitmap; +static DEFINE_PER_CPU(struct vmcs_struct*, host_vmcs); + +static int vmx_suspend_domain(struct domain *d); +static int vmx_resume_cpu(void); + static void vmx_ctxt_switch_from(struct vcpu *v); static void vmx_ctxt_switch_to(struct vcpu *v); @@ -1211,7 +1216,9 @@ static struct hvm_function_table vmx_fun .inject_exception = vmx_inject_exception, .init_ap_context = vmx_init_ap_context, .init_hypercall_page = vmx_init_hypercall_page, - .event_injection_faulted = vmx_event_injection_faulted + .event_injection_faulted = vmx_event_injection_faulted, + .suspend_domain = vmx_suspend_domain, + .resume_cpu = vmx_resume_cpu, }; int start_vmx(void) @@ -1265,6 +1272,8 @@ int start_vmx(void) vmx_free_host_vmcs(vmcs); return 0; } + + this_cpu(host_vmcs) = vmcs; vmx_save_host_msrs(); @@ -3013,6 +3022,42 @@ asmlinkage void vmx_trace_vmentry(void) HVMTRACE_0D(VMENTRY, v); } +/* Suspend target domain with VMCS sync-ed */ +int vmx_suspend_domain(struct domain* d){ + struct vcpu *v; + + if (!is_hvm_domain(d)) + return 1; + + if (!atomic_read(&d->pause_count)) + return 0; + + for_each_vcpu(d, v) + { + spin_lock(&v->arch.hvm_vmx.vmcs_lock); + vmx_clear_vmcs(v); + spin_unlock(&v->arch.hvm_vmx.vmcs_lock); + } + + return 1; +} + +/* Resume vmx feature on the given cpu */ +static int vmx_resume_cpu(void){ + struct vmcs_struct *vmcs = this_cpu(host_vmcs); + + if ( __vmxon(virt_to_maddr(vmcs)) ) + { + clear_in_cr4(X86_CR4_VMXE); + printk("VMXON failed\n"); + vmx_free_host_vmcs(vmcs); + return 0; + } + + printk("VMXON is done\n"); + return 1; +} + /* * Local variables: * mode: C diff -r f217aafc1c17 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Mon Jun 25 13:28:41 2007 -0400 +++ b/xen/include/asm-x86/hvm/hvm.h Mon Jun 25 15:27:34 2007 -0400 @@ -145,6 +145,10 @@ struct hvm_function_table { void (*init_hypercall_page)(struct domain *d, void *hypercall_page); int (*event_injection_faulted)(struct vcpu *v); + + int (*suspend_domain)(struct domain *d); + + int (*resume_cpu)(void); }; extern struct hvm_function_table hvm_funcs; @@ -279,4 +283,26 @@ static inline int hvm_event_injection_fa return hvm_funcs.event_injection_faulted(v); } +static inline int +hvm_suspend_domain(struct domain* d) +{ + int ret = 1; + + if (hvm_funcs.suspend_domain) + ret = hvm_funcs.suspend_domain(d); + + return ret; +} + +static inline int +hvm_resume_cpu(void) +{ + int ret = 1; + + if (hvm_funcs.resume_cpu) + ret = hvm_funcs.resume_cpu(); + + return ret; +} + #endif /* __ASM_X86_HVM_HVM_H__ */ diff -r f217aafc1c17 xen/include/asm-x86/hvm/vmx/vmcs.h --- a/xen/include/asm-x86/hvm/vmx/vmcs.h Mon Jun 25 13:28:41 2007 -0400 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Mon Jun 25 14:03:32 2007 -0400 @@ -28,6 +28,7 @@ extern void vmcs_dump_vcpu(void); extern void vmcs_dump_vcpu(void); extern void vmx_init_vmcs_config(void); extern void setup_vmcs_dump(void); +extern void vmx_clear_vmcs(struct vcpu *v); struct vmcs_struct { u32 vmcs_revision_id; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Kevin, Is it documented anywhere in which CPU states VMCS synchronisation may be lost? I see it makes sense that S3 would do so since the CPUs are powered off in that state (although I don''t see this documented anywhere -- maybe it''s ''obvious''), but what about e.g., Deep Sleep (C states)? Shouldn''t you VMXOFF before S3? -- Keir On 27/6/07 14:33, "Tian, Kevin" <kevin.tian@intel.com> wrote:> Add HVM hardware feature suspend/resume. > > Signed-off-by Ke Yu <ke.yu@intel.com> > Signed-off-by Kevin Tian <kevin.tian@intel.com> > > diff -r f217aafc1c17 xen/arch/x86/acpi/power.c > --- a/xen/arch/x86/acpi/power.c Mon Jun 25 13:28:41 2007 -0400 > +++ b/xen/arch/x86/acpi/power.c Mon Jun 25 17:36:32 2007 -0400 > @@ -83,7 +83,7 @@ static void device_power_up(void) > /* Main interface to do xen specific suspend/resume */ > int enter_state(u32 state) > { > - struct domain *d; > + struct domain *d, *pd = NULL; > unsigned long flags; > int error; > > @@ -99,7 +99,15 @@ int enter_state(u32 state) > > for_each_domain(d) > if (d->domain_id != 0) > + { > domain_pause(d); > + if (is_hvm_domain(d) && !hvm_suspend_domain(d)) > + { > + error = -EINVAL; > + goto Unpause; > + } > + pd = d; > + } > > pmprintk(XENLOG_INFO, "PM: Preparing system for %s sleep\n", > acpi_states[state]); > @@ -133,13 +141,22 @@ int enter_state(u32 state) > Done: > local_irq_restore(flags); > > - for_each_domain(d) > - if (d->domain_id!=0) > - domain_unpause(d); > + Unpause: > + if (pd) > + { > + for_each_domain(d) > + { > + /* Unpause until recorded last paused domain */ > + if (d == pd) > + break; > + > + if (d->domain_id != 0) > + domain_unpause(d); > + } > + } > > spin_unlock(&pm_lock); > return error; > - > } > > /* > diff -r f217aafc1c17 xen/arch/x86/acpi/suspend.c > --- a/xen/arch/x86/acpi/suspend.c Mon Jun 25 13:28:41 2007 -0400 > +++ b/xen/arch/x86/acpi/suspend.c Mon Jun 25 14:52:17 2007 -0400 > @@ -82,4 +82,6 @@ void restore_rest_processor_state(void) > > mtrr_ap_init(); > mcheck_init(&boot_cpu_data); > + if (hvm_enabled) > + hvm_resume_cpu(); > } > diff -r f217aafc1c17 xen/arch/x86/hvm/vmx/vmcs.c > --- a/xen/arch/x86/hvm/vmx/vmcs.c Mon Jun 25 13:28:41 2007 -0400 > +++ b/xen/arch/x86/hvm/vmx/vmcs.c Mon Jun 25 14:03:32 2007 -0400 > @@ -178,7 +178,7 @@ static void __vmx_clear_vmcs(void *info) > v->arch.hvm_vmx.launched = 0; > } > > -static void vmx_clear_vmcs(struct vcpu *v) > +void vmx_clear_vmcs(struct vcpu *v) > { > int cpu = v->arch.hvm_vmx.active_cpu; > > diff -r f217aafc1c17 xen/arch/x86/hvm/vmx/vmx.c > --- a/xen/arch/x86/hvm/vmx/vmx.c Mon Jun 25 13:28:41 2007 -0400 > +++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Jun 25 17:38:49 2007 -0400 > @@ -53,6 +53,11 @@ > > char *vmx_msr_bitmap; > > +static DEFINE_PER_CPU(struct vmcs_struct*, host_vmcs); > + > +static int vmx_suspend_domain(struct domain *d); > +static int vmx_resume_cpu(void); > + > static void vmx_ctxt_switch_from(struct vcpu *v); > static void vmx_ctxt_switch_to(struct vcpu *v); > > @@ -1211,7 +1216,9 @@ static struct hvm_function_table vmx_fun > .inject_exception = vmx_inject_exception, > .init_ap_context = vmx_init_ap_context, > .init_hypercall_page = vmx_init_hypercall_page, > - .event_injection_faulted = vmx_event_injection_faulted > + .event_injection_faulted = vmx_event_injection_faulted, > + .suspend_domain = vmx_suspend_domain, > + .resume_cpu = vmx_resume_cpu, > }; > > int start_vmx(void) > @@ -1265,6 +1272,8 @@ int start_vmx(void) > vmx_free_host_vmcs(vmcs); > return 0; > } > + > + this_cpu(host_vmcs) = vmcs; > > vmx_save_host_msrs(); > > @@ -3013,6 +3022,42 @@ asmlinkage void vmx_trace_vmentry(void) > HVMTRACE_0D(VMENTRY, v); > } > > +/* Suspend target domain with VMCS sync-ed */ > +int vmx_suspend_domain(struct domain* d){ > + struct vcpu *v; > + > + if (!is_hvm_domain(d)) > + return 1; > + > + if (!atomic_read(&d->pause_count)) > + return 0; > + > + for_each_vcpu(d, v) > + { > + spin_lock(&v->arch.hvm_vmx.vmcs_lock); > + vmx_clear_vmcs(v); > + spin_unlock(&v->arch.hvm_vmx.vmcs_lock); > + } > + > + return 1; > +} > + > +/* Resume vmx feature on the given cpu */ > +static int vmx_resume_cpu(void){ > + struct vmcs_struct *vmcs = this_cpu(host_vmcs); > + > + if ( __vmxon(virt_to_maddr(vmcs)) ) > + { > + clear_in_cr4(X86_CR4_VMXE); > + printk("VMXON failed\n"); > + vmx_free_host_vmcs(vmcs); > + return 0; > + } > + > + printk("VMXON is done\n"); > + return 1; > +} > + > /* > * Local variables: > * mode: C > diff -r f217aafc1c17 xen/include/asm-x86/hvm/hvm.h > --- a/xen/include/asm-x86/hvm/hvm.h Mon Jun 25 13:28:41 2007 -0400 > +++ b/xen/include/asm-x86/hvm/hvm.h Mon Jun 25 15:27:34 2007 -0400 > @@ -145,6 +145,10 @@ struct hvm_function_table { > void (*init_hypercall_page)(struct domain *d, void > *hypercall_page); > > int (*event_injection_faulted)(struct vcpu *v); > + > + int (*suspend_domain)(struct domain *d); > + > + int (*resume_cpu)(void); > }; > > extern struct hvm_function_table hvm_funcs; > @@ -279,4 +283,26 @@ static inline int hvm_event_injection_fa > return hvm_funcs.event_injection_faulted(v); > } > > +static inline int > +hvm_suspend_domain(struct domain* d) > +{ > + int ret = 1; > + > + if (hvm_funcs.suspend_domain) > + ret = hvm_funcs.suspend_domain(d); > + > + return ret; > +} > + > +static inline int > +hvm_resume_cpu(void) > +{ > + int ret = 1; > + > + if (hvm_funcs.resume_cpu) > + ret = hvm_funcs.resume_cpu(); > + > + return ret; > +} > + > #endif /* __ASM_X86_HVM_HVM_H__ */ > diff -r f217aafc1c17 xen/include/asm-x86/hvm/vmx/vmcs.h > --- a/xen/include/asm-x86/hvm/vmx/vmcs.h Mon Jun 25 13:28:41 2007 > -0400 > +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Mon Jun 25 14:03:32 2007 > -0400 > @@ -28,6 +28,7 @@ extern void vmcs_dump_vcpu(void); > extern void vmcs_dump_vcpu(void); > extern void vmx_init_vmcs_config(void); > extern void setup_vmcs_dump(void); > +extern void vmx_clear_vmcs(struct vcpu *v); > > struct vmcs_struct { > u32 vmcs_revision_id; > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>From: Keir Fraser [mailto:keir@xensource.com] >Sent: 2007年7月10日 17:51 > >Kevin, > >Is it documented anywhere in which CPU states VMCS synchronisation >may be >lost? I see it makes sense that S3 would do so since the CPUs are >powered >off in that state (although I don''t see this documented anywhere -- maybe >it''s ''obvious''), but what about e.g., Deep Sleep (C states)?I don''t see either. Since S3 is triggered by I/O write to chipset, CPUs are immediately powered off as result and thus internal VMCS cache is also lost. All the CPU context favored by system software should be saved by system software before that I/O write. So I thought it as ''obvious''. :-) For deep sleep states, IPI is taken as break event to bring that CPU back to working state (C0).> >Shouldn''t you VMXOFF before S3? >Yes, it should. Actually for normal S3 path, it''s not necessary since all the CPUs are reset after resume. However it''s required at least for: - failure to do S3 suspend - S1 support In above two cases, CPUs are still powered while VMXON state makes physical CPU blocking INIT IPI which makes smp resume failed. I just realized it recently. Do you like to piggyback fix together with this patch, or I may send out an incremental one later? Thanks, Kevin _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>From: Tian, Kevin >Sent: 2007年7月10日 20:50 > >>From: Keir Fraser [mailto:keir@xensource.com] >>Sent: 2007年7月10日 17:51 >> >>Kevin, >> >>Is it documented anywhere in which CPU states VMCS synchronisation >>may be >>lost? I see it makes sense that S3 would do so since the CPUs are >>powered >>off in that state (although I don''t see this documented anywhere -- >maybe >>it''s ''obvious''), but what about e.g., Deep Sleep (C states)? > >I don''t see either. Since S3 is triggered by I/O write to chipset, CPUs >are immediately powered off as result and thus internal VMCS cache >is also lost. All the CPU context favored by system software should >be saved by system software before that I/O write. So I thought it as >''obvious''. :-) > >For deep sleep states, IPI is taken as break event to bring that >CPU back to working state (C0).Seems that I misunderstood your question. Normally such information is described in ICH spec and I didn''t see VMX related warn by far for deep C states. Some deep C state is described with implicit cache flush which, I assume, also applies to the internal VMCS cache too. Or else all the side effects will be explicitly described such as LAPIC timer stop in some deep state. In that case, software needs to tackle that effect correspondingly. Thanks, Kevin _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
On 10/7/07 14:12, "Tian, Kevin" <kevin.tian@intel.com> wrote:> Seems that I misunderstood your question. Normally such information > is described in ICH spec and I didn''t see VMX related warn by far for > deep C states. Some deep C state is described with implicit cache flush > which, I assume, also applies to the internal VMCS cache too. Or else > all the side effects will be explicitly described such as LAPIC timer stop > in some deep state. In that case, software needs to tackle that effect > correspondingly.Okay. Anyway, back to your patch 3/10. With a view to cleanly adding VMXOFF on suspend, and to allow efficient VMCLEARing if we need it in future, e.g., for deep-C states, I think you should change the suspend_domain() hook into suspend_cpu(): 1. This is then symmetric with the resume_cpu() hook. 2. It''s a natural place to put VMXOFF (unlike suspend_domain()). Of course, the question then is: how do you find the active VMCS''s that need clearing? I suggest you add a list_head to arch_vmx_struct, have a per-cpu list of active VMCS''s, enqueue on vmx_load_vmcs() and dequeue on __vmx_clear_vmcs(). Could you revise patch 3/10 and resend, please? -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>From: Keir Fraser [mailto:keir@xensource.com] >Sent: 2007年7月10日 21:52 > >Okay. > >Anyway, back to your patch 3/10. With a view to cleanly adding VMXOFF >on >suspend, and to allow efficient VMCLEARing if we need it in future, e.g., >for deep-C states, I think you should change the suspend_domain() hook >into >suspend_cpu(): > 1. This is then symmetric with the resume_cpu() hook. > 2. It''s a natural place to put VMXOFF (unlike suspend_domain()). > >Of course, the question then is: how do you find the active VMCS''s that >need >clearing? I suggest you add a list_head to arch_vmx_struct, have a >per-cpu >list of active VMCS''s, enqueue on vmx_load_vmcs() and dequeue on >__vmx_clear_vmcs(). > >Could you revise patch 3/10 and resend, please? > > -- KeirSure, I''ll do it. Thanks, Kevin _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel