Utilize cpu hotplug infrastructure to pull down all other cpus except cpu0, before starting suspend sequence. One trick point is, cpu0 is a bit special and we''d better do suspend on it. However vcpu0/dom0 is the one to trigger power event which may not bind to cpu0. So a new softirq is introduced to switch flow to idle vcpu on cpu0 if such case happens. Be careful that lazy context on all processors have to be sync-ed before suspend flow, or else they may be lost or result incorrect behavior due to recover guest setting in xen context (like fs/gs). Also recover S5 support. Signed-off-by Kevin Tian <kevin.tian@intel.com> diff -r 5ee7cfab745e xen/arch/x86/acpi/power.c --- a/xen/arch/x86/acpi/power.c Mon May 14 22:51:52 2007 -0400 +++ b/xen/arch/x86/acpi/power.c Tue May 15 00:27:37 2007 -0400 @@ -24,6 +24,7 @@ #include <xen/sched.h> #include <xen/domain.h> #include <xen/console.h> +#include <xen/softirq.h> u8 sleep_states[ACPI_S_STATE_COUNT]; DEFINE_SPINLOCK(pm_lock); @@ -134,27 +135,57 @@ static void device_power_up(void) console_resume(); } -/* Main interface to do xen specific suspend/resume */ -int enter_state(u32 state) +static void freeze_domains(void) { struct domain *d; - unsigned long flags; - int error; - - if (state <= ACPI_STATE_S0 || state > ACPI_S_STATES_MAX) - return -EINVAL; - - if (!spin_trylock(&pm_lock)) - return -EBUSY; - + for_each_domain(d) if (d->domain_id != 0) { domain_pause(d); arch_domain_suspend(d); } - +} + +static void thaw_domains(void) +{ + struct domain *d; + + for_each_domain(d) + if (d->domain_id != 0) + domain_unpause(d); +} + +/* Main interface to do xen specific suspend/resume */ +int enter_state(u32 state) +{ + unsigned long flags; + cpumask_t mask = cpu_online_map; + int error; + + if (smp_processor_id() != 0) + return -EPERM; + + if (state <= ACPI_STATE_S0 || state > ACPI_S_STATES_MAX) + return -EINVAL; + + if (!spin_trylock(&pm_lock)) + return -EBUSY; + pmprintk(XENLOG_INFO, "PM: Preparing system for %s sleep\n", acpi_states[state]); + + /* Sync all lazy states on other cpus, since APs will be + * re-intialized like fresh boot and stale context loses + */ + cpu_clear(0, mask); + flush_tlb_mask(mask); + pmprintk(XENLOG_INFO, "Finish lazy state sync\n"); + + disable_nonboot_cpus(); + if (num_online_cpus() != 1) { + error = -EBUSY; + goto Enable_cpu; + } local_irq_save(flags); @@ -188,12 +219,10 @@ int enter_state(u32 state) device_power_up(); pmprintk(XENLOG_INFO, "PM: Finishing wakeup.\n"); - for_each_domain(d) - if (d->domain_id!=0) - domain_unpause(d); - Done: local_irq_restore(flags); + Enable_cpu: + enable_nonboot_cpus(); spin_unlock(&pm_lock); return error; @@ -221,6 +250,14 @@ int set_acpi_sleep_info(struct xenpf_set acpi_sinfo.pm1a_evt, acpi_sinfo.pm1b_evt, info->xen_waking_vec); return 0; +} + +static void acpi_power_off(void) +{ + printk("%s called\n", __FUNCTION__); + local_irq_disable(); + /* Some SMP machines only can poweroff in boot CPU */ + acpi_enter_sleep_state(ACPI_STATE_S5); } /* @@ -262,7 +299,29 @@ int acpi_enter_sleep(struct xenpf_enter_ acpi_video_flags = sleep->video_flags; saved_videomode = sleep->video_mode; - return enter_state(acpi_sinfo.sleep_state); + /* acpi power off method */ + if (acpi_sinfo.sleep_state == ACPI_STATE_S5) { + acpi_power_off(); + /* Shouldn''t return */ + while(1); + } + + freeze_domains(); + if (current->processor == 0) { + int ret; + + pmprintk(XENLOG_INFO, "vcpu0 on cpu0, sleep direclty\n"); + ret = enter_state(acpi_sinfo.sleep_state); + thaw_domains(); + return ret; + } + + pmprintk(XENLOG_INFO, "vcpu0 on cpu%d, pause self and notify cpu0\n", + current->processor); + cpu_raise_softirq(0, PM_SOFTIRQ); + vcpu_pause_self(); + /* return value doens''t matter here. */ + return 0; } static int acpi_get_wake_status(void) @@ -288,6 +347,59 @@ acpi_status asmlinkage acpi_enter_sleep_ /* Wait until we enter sleep state, and spin until we wake */ while (!acpi_get_wake_status()); return_ACPI_STATUS(AE_OK); +} + +/* + * Power management related softirq, and cpu0 only. + * + * The reason for introducing this softirq is that cpu0 is a bit + * special as the last one to be pull down. However the sleep request + * is issued from vcpu0 of dom0 and this vcpu may not bind to cpu0. + * + * So if above case happens, the CPU receiving sleep request will + * raise a softirq to cpu0 and idle vcpu on cpu0 then execute this + * handler immediately. + * + * If vcpu0 is already running on cpu0, this softirq is not triggered + */ +static void pm_softirq(void) +{ + int cpu = smp_processor_id(); + struct vcpu *v = dom0->vcpu[0]; + struct cpu_user_regs *regs; + + pmprintk(XENLOG_DEBUG, "In pm_softirq\n"); + /* only cpu0 handles this irq for now */ + if (cpu != 0) + return; + + pmprintk(XENLOG_DEBUG, "handled by cpu0\n"); + /* Wait vcpu0/dom0 to be paused */ + while ( !atomic_read(&v->pause_count) ) + cpu_relax(); + + /* Then wait for context of vcpu/dom0 to be sync-ed */ + while ( test_bit(_VPF_need_sync, &v->pause_flags) ) + cpu_relax(); + + pmprintk(XENLOG_INFO, "vcpu0/dom0 has been paused\n"); + + /* Sync lazy state on ths cpu, to avoid any stale context from + * previous domain crashing system after resume. For example, + * ds/es/fs/gs won''t be restored after resume for x86-64. Then + * this step ensures them flushed into guest context. + */ + __sync_lazy_execstate(); + pmprintk(XENLOG_INFO, "Flush lazy state\n"); + + /* now safe to suspend whole system from cpu 0 */ + regs = &v->arch.guest_context.user_regs; + regs->eax = enter_state(acpi_sinfo.sleep_state); + + /* Now unpause vcpu0/dom0 */ + vcpu_unpause(v); + + thaw_domains(); } static int __init acpi_sleep_init(void) @@ -307,6 +419,8 @@ static int __init acpi_sleep_init(void) printk(")\n"); acpi_reserve_bootmem(); + + open_softirq(PM_SOFTIRQ, pm_softirq); return 0; } __initcall(acpi_sleep_init); diff -r 5ee7cfab745e xen/include/asm-x86/smp.h --- a/xen/include/asm-x86/smp.h Mon May 14 22:51:52 2007 -0400 +++ b/xen/include/asm-x86/smp.h Mon May 14 22:51:53 2007 -0400 @@ -70,6 +70,8 @@ extern void enable_nonboot_cpus(void); extern void enable_nonboot_cpus(void); #else static inline int cpu_is_offline(int cpu) {return 0;} +static inline void disable_nonboot_cpus(void) {} +static inline void enable_nonboot_cpus(void) {} #endif /* diff -r 5ee7cfab745e xen/include/xen/softirq.h --- a/xen/include/xen/softirq.h Mon May 14 22:51:52 2007 -0400 +++ b/xen/include/xen/softirq.h Mon May 14 22:51:53 2007 -0400 @@ -10,8 +10,9 @@ #define PAGE_SCRUB_SOFTIRQ 5 #define TRACE_SOFTIRQ 6 #define RCU_SOFTIRQ 7 +#define PM_SOFTIRQ 8 -#define NR_COMMON_SOFTIRQS 8 +#define NR_COMMON_SOFTIRQS 9 #include <asm/softirq.h> _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel