Tian, Kevin
2007-Jun-27 13:29 UTC
[Xen-devel] [PATCH 1/10] Provide basic Xen PM infrastructure
Basic infrastructure for Xen S3 support with a common CPU context save/restore logic for both 32bit and 64bit. Wakeup code is split into two parts: - the first locates after trampoline code, to share all the tricks on the latter, like relocation base and identy mapping - the 2nd part locates in xen code segment, to do the actual CPU context restore Signed-off-by Ke Yu <ke.yu@intel.com> Signed-off-by Kevin Tian <kevin.tian@intel.com> diff -r 9261686d840c xen/arch/x86/acpi/Makefile --- a/xen/arch/x86/acpi/Makefile Tue Jun 26 12:40:37 2007 +0100 +++ b/xen/arch/x86/acpi/Makefile Tue Jun 26 20:28:13 2007 -0400 @@ -1,1 +1,2 @@ obj-y += boot.o obj-y += boot.o +obj-y += power.o suspend.o wakeup_prot.o diff -r 9261686d840c xen/arch/x86/acpi/power.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/acpi/power.c Tue Jun 26 21:39:59 2007 -0400 @@ -0,0 +1,152 @@ +/* drivers/acpi/sleep/power.c - PM core functionality for Xen + * + * Copyrights from Linux side: + * Copyright (c) 2000-2003 Patrick Mochel + * Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz> + * Copyright (c) 2003 Open Source Development Lab + * Copyright (c) 2004 David Shaohua Li <shaohua.li@intel.com> + * Copyright (c) 2005 Alexey Starikovskiy <alexey.y.starikovskiy@intel.com> + * + * Slimmed with Xen specific support. + */ + +#include <asm/io.h> +#define CONFIG_ACPI_SLEEP +#include <asm/acpi.h> +#include <xen/acpi.h> +#include <xen/errno.h> +#include <xen/iocap.h> +#include <xen/sched.h> +#include <asm/acpi.h> +#include <asm/irq.h> +#include <asm/init.h> +#include <xen/spinlock.h> +#include <xen/sched.h> +#include <xen/domain.h> +#include <xen/console.h> + +u8 sleep_states[ACPI_S_STATE_COUNT]; +DEFINE_SPINLOCK(pm_lock); + +extern void do_suspend_lowlevel(void); + +static char *acpi_states[ACPI_S_STATE_COUNT] +{ + [ACPI_STATE_S1] = "standby", + [ACPI_STATE_S3] = "mem", + [ACPI_STATE_S4] = "disk", +}; + +unsigned long acpi_video_flags; +unsigned long saved_videomode; + +/* XXX: Add suspend failure recover later */ +static int device_power_down(void) +{ + console_suspend(); + + time_suspend(); + + i8259A_suspend(); + + ioapic_suspend(); + + lapic_suspend(); + + return 0; +} + +static void device_power_up(void) +{ + lapic_resume(); + + ioapic_resume(); + + i8259A_resume(); + + time_resume(); + + console_resume(); +} + +/* Main interface to do xen specific suspend/resume */ +int enter_state(u32 state) +{ + struct domain *d; + unsigned long flags; + int error; + + if (state <= ACPI_STATE_S0 || state > ACPI_S_STATES_MAX) + return -EINVAL; + + /* Sync lazy state on ths cpu */ + __sync_lazy_execstate(); + pmprintk(XENLOG_INFO, "Flush lazy state\n"); + + if (!spin_trylock(&pm_lock)) + return -EBUSY; + + for_each_domain(d) + if (d->domain_id != 0) + domain_pause(d); + + pmprintk(XENLOG_INFO, "PM: Preparing system for %s sleep\n", + acpi_states[state]); + + local_irq_save(flags); + + if ((error = device_power_down())) + { + printk(XENLOG_ERR "Some devices failed to power down\n"); + goto Done; + } + + ACPI_FLUSH_CPU_CACHE(); + + switch (state) + { + case ACPI_STATE_S3: + do_suspend_lowlevel(); + break; + default: + error = -EINVAL; + break; + } + + pmprintk(XENLOG_INFO, "Back to C!\n"); + + device_power_up(); + + pmprintk(XENLOG_INFO, "PM: Finishing wakeup.\n"); + + Done: + local_irq_restore(flags); + + for_each_domain(d) + if (d->domain_id!=0) + domain_unpause(d); + + spin_unlock(&pm_lock); + return error; + +} + +static int __init acpi_sleep_init(void) +{ + int i = 0; + + pmprintk(XENLOG_INFO, "ACPI (supports"); + for (i = 0; i < ACPI_S_STATE_COUNT; i++) + { + if (i == ACPI_STATE_S3) + { + sleep_states[i] = 1; + printk(" S%d", i); + } + else + sleep_states[i] = 0; + } + printk(")\n"); + return 0; +} +__initcall(acpi_sleep_init); diff -r 9261686d840c xen/arch/x86/acpi/suspend.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/acpi/suspend.c Tue Jun 26 21:39:59 2007 -0400 @@ -0,0 +1,82 @@ +/* + * Suspend support specific for i386. + * + * Distribute under GPLv2 + * + * Copyright (c) 2002 Pavel Machek <pavel@suse.cz> + * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> + */ +#include <xen/config.h> +#include <xen/acpi.h> +#include <xen/smp.h> +#include <asm/processor.h> +#include <asm/msr.h> +#include <asm/flushtlb.h> +#include <asm/hvm/hvm.h> +#include <asm/hvm/support.h> +#include <asm/i387.h> + +/* Following context save/restore happens on the real context + * of current vcpu, with a lazy state sync forced earlier. + */ +#if defined(CONFIG_X86_64) +unsigned long saved_lstar, saved_cstar; +#endif +void save_rest_processor_state(void) +{ + /* + * Net effect of unlazy_fpu is to set cr0.ts and thus there''s no + * need to restore fpu after resume. + */ + if (!is_idle_vcpu(current)) + unlazy_fpu(current); + +#if defined(CONFIG_X86_64) + rdmsrl(MSR_CSTAR, saved_cstar); + rdmsrl(MSR_LSTAR, saved_lstar); +#endif +} + +#define loaddebug(_v,_reg) \ + __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg])) + +void restore_rest_processor_state(void) +{ + int cpu = smp_processor_id(); + struct tss_struct *t = &init_tss[cpu]; + struct vcpu *v = current; + + /* Really scared by suffixed comment from Linux, and keep it for safe */ + set_tss_desc(cpu, t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ + + load_TR(cpu); + +#if defined(CONFIG_X86_64) + /* Recover syscall MSRs */ + wrmsrl(MSR_LSTAR, saved_lstar); + wrmsrl(MSR_CSTAR, saved_cstar); + wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS); + wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U); +#else /* !defined(CONFIG_X86_64) */ + if (supervisor_mode_kernel && cpu_has_sep) + wrmsr(MSR_IA32_SYSENTER_ESP, &t->esp1, 0); +#endif + + /* Maybe load the debug registers. */ + if ( !is_idle_vcpu(v) && unlikely(v->arch.guest_context.debugreg[7]) ) + { + loaddebug(&v->arch.guest_context, 0); + loaddebug(&v->arch.guest_context, 1); + loaddebug(&v->arch.guest_context, 2); + loaddebug(&v->arch.guest_context, 3); + /* no 4 and 5 */ + loaddebug(&v->arch.guest_context, 6); + loaddebug(&v->arch.guest_context, 7); + } + + /* Do we start fpu really? Just set cr0.ts to monitor it */ + stts(); + + mtrr_ap_init(); + mcheck_init(&boot_cpu_data); +} diff -r 9261686d840c xen/arch/x86/acpi/wakeup_prot.S --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/acpi/wakeup_prot.S Tue Jun 26 20:28:13 2007 -0400 @@ -0,0 +1,267 @@ + .text + +#include <xen/config.h> +#include <xen/multiboot.h> +#include <public/xen.h> +#include <asm/asm_defns.h> +#include <asm/desc.h> +#include <asm/page.h> +#include <asm/msr.h> + +#if defined(__x86_64__) + + .code64 + +#define GREG(x) %r##x +#define SAVED_GREG(x) saved_r##x(%rip) +#define DECLARE_GREG(x) saved_r##x: .quad 0 +#define SAVE_GREG(x) movq GREG(x), SAVED_GREG(x) +#define LOAD_GREG(x) movq SAVED_GREG(x), GREG(x) + +#define REF(x) x(%rip) + +#define RDMSR(ind, m) \ + xorq %rdx, %rdx; \ + mov $ind, %ecx; \ + rdmsr; \ + shlq $0x20, %rdx; \ + orq %rax, %rdx; \ + movq %rdx, m(%rip); + +#define WRMSR(ind, m) \ + mov $ind, %ecx; \ + movq m(%rip), %rdx; \ + mov %edx, %eax; \ + shrq $0x20, %rdx; \ + wrmsr; + +#else /* !defined(__x86_64__) */ + + .code32 + +#define GREG(x) %e##x +#define SAVED_GREG(x) saved_e##x +#define DECLARE_GREG(x) saved_e##x: .long 0 +#define SAVE_GREG(x) movl GREG(x), SAVED_GREG(x) +#define LOAD_GREG(x) movl SAVED_GREG(x), GREG(x) + +#define REF(x) x + +#endif + +ENTRY(do_suspend_lowlevel) + + SAVE_GREG(sp) + SAVE_GREG(ax) + SAVE_GREG(bx) + SAVE_GREG(cx) + SAVE_GREG(dx) + SAVE_GREG(bp) + SAVE_GREG(si) + SAVE_GREG(di) + +#if defined(__x86_64__) + + SAVE_GREG(8) # save r8...r15 + SAVE_GREG(9) + SAVE_GREG(10) + SAVE_GREG(11) + SAVE_GREG(12) + SAVE_GREG(13) + SAVE_GREG(14) + SAVE_GREG(15) + pushfq; + popq SAVED_GREG(flags) + + mov %cr8, GREG(ax) + mov GREG(ax), REF(saved_cr8) + + RDMSR(MSR_FS_BASE, saved_fs_base) + RDMSR(MSR_GS_BASE, saved_gs_base) + RDMSR(MSR_SHADOW_GS_BASE, saved_kernel_gs_base) + +#else /* !defined(__x86_64__) */ + + pushfl; + popl SAVED_GREG(flags) + +#endif + + mov %ds, REF(saved_ds) + mov %es, REF(saved_es) + mov %fs, REF(saved_fs) + mov %gs, REF(saved_gs) + mov %ss, REF(saved_ss) + + sgdt REF(saved_gdt) + sidt REF(saved_idt) + sldt REF(saved_ldt) + + mov %cr0, GREG(ax) + mov GREG(ax), REF(saved_cr0) + + mov %cr3, GREG(ax) + mov GREG(ax), REF(saved_cr3) + + call save_rest_processor_state + +#if defined(__x86_64__) + + mov $3, %rdi + xor %eax, %eax + +#else /* !defined(__x86_64__) */ + + push $3 + +#endif + + /* enter sleep state physically */ + call acpi_enter_sleep_state + jmp __ret_point + + .align 16 + .globl __ret_point +__ret_point: + + /* mmu_cr4_features contains latest cr4 setting */ + mov REF(mmu_cr4_features), GREG(ax) + mov GREG(ax), %cr4 + + mov REF(saved_cr3), GREG(ax) + mov GREG(ax), %cr3 + + mov REF(saved_cr0), GREG(ax) + mov GREG(ax), %cr0 + + lgdt REF(saved_gdt) + lidt REF(saved_idt) + lldt REF(saved_ldt) + + mov REF(saved_ss), %ss + LOAD_GREG(sp) + +#if defined(__x86_64__) + + mov REF(saved_cr8), %rax + mov %rax, %cr8 + + pushq SAVED_GREG(flags) + popfq + + /* Idle vcpu doesn''t need segment selectors reload, since + * those may contain stale value from other domains and + * reload may result page fault due to no matched gdt entry + */ + mov $(STACK_SIZE - 8), %rax + or %rsp, %rax + and $~7, %rax + mov (%rax), %rax + mov 0x10(%rax), %rax + cmpw $0x7fff, (%rax) + je 1f + + /* These selectors are from guest, and thus need reload */ + mov REF(saved_ds), %ds + mov REF(saved_es), %es + mov REF(saved_fs), %fs + + /* gs load is special */ + mov REF(saved_gs), %rsi + mov $3, %rdi # SEGBASE_GS_USER_SEL + call do_set_segment_base + +1: + # MSR restore + WRMSR(MSR_FS_BASE, saved_fs_base) + WRMSR(MSR_GS_BASE, saved_gs_base) + WRMSR(MSR_SHADOW_GS_BASE, saved_kernel_gs_base) + +#else /* !defined(__x86_64__) */ + + pushl SAVED_GREG(flags) + popfl + + /* No reload to fs/gs, which is saved in bottom stack already */ + mov REF(saved_ds), %ds + mov REF(saved_es), %es + +#endif + + call restore_rest_processor_state + + LOAD_GREG(bp) + LOAD_GREG(ax) + LOAD_GREG(bx) + LOAD_GREG(cx) + LOAD_GREG(dx) + LOAD_GREG(si) + LOAD_GREG(di) +#if defined(__x86_64__) + LOAD_GREG(8) # save r8...r15 + LOAD_GREG(9) + LOAD_GREG(10) + LOAD_GREG(11) + LOAD_GREG(12) + LOAD_GREG(13) + LOAD_GREG(14) + LOAD_GREG(15) +#endif + ret + +.data + .align 16 +saved_ds: .word 0 +saved_es: .word 0 +saved_ss: .word 0 +saved_gs: .word 0 +saved_fs: .word 0 + + .align 4 + .globl saved_magic +saved_magic: .long 0x9abcdef0 + + .align 8 +DECLARE_GREG(sp) +DECLARE_GREG(bp) +DECLARE_GREG(ax) +DECLARE_GREG(bx) +DECLARE_GREG(cx) +DECLARE_GREG(dx) +DECLARE_GREG(si) +DECLARE_GREG(di) +DECLARE_GREG(flags) + +#if defined(__x86_64__) + +DECLARE_GREG(8) +DECLARE_GREG(9) +DECLARE_GREG(10) +DECLARE_GREG(11) +DECLARE_GREG(12) +DECLARE_GREG(13) +DECLARE_GREG(14) +DECLARE_GREG(15) + +saved_gdt: .quad 0,0 +saved_idt: .quad 0,0 +saved_ldt: .quad 0,0 + +saved_cr0: .quad 0 +saved_cr3: .quad 0 +saved_cr8: .quad 0 + +saved_gs_base: .quad 0 +saved_fs_base: .quad 0 +saved_kernel_gs_base: .quad 0 + +#else /* !defined(__x86_64__) */ + +saved_gdt: .long 0,0 +saved_idt: .long 0,0 +saved_ldt: .long 0 + +saved_cr0: .long 0 +saved_cr3: .long 0 + +#endif diff -r 9261686d840c xen/arch/x86/boot/Makefile --- a/xen/arch/x86/boot/Makefile Tue Jun 26 12:40:37 2007 +0100 +++ b/xen/arch/x86/boot/Makefile Tue Jun 26 20:28:13 2007 -0400 @@ -1,3 +1,3 @@ obj-y += head.o obj-y += head.o -head.o: head.S $(TARGET_SUBARCH).S trampoline.S mem.S video.S cmdline.S edd.S +head.o: head.S $(TARGET_SUBARCH).S trampoline.S mem.S video.S cmdline.S edd.S wakeup.S diff -r 9261686d840c xen/arch/x86/boot/head.S --- a/xen/arch/x86/boot/head.S Tue Jun 26 12:40:37 2007 +0100 +++ b/xen/arch/x86/boot/head.S Tue Jun 26 20:28:13 2007 -0400 @@ -175,9 +175,11 @@ 1: stosl /* low mappings cover up #include "cmdline.S" + .align 16,0x90 .globl trampoline_start, trampoline_end trampoline_start: #include "trampoline.S" +#include "wakeup.S" trampoline_end: .text diff -r 9261686d840c xen/arch/x86/boot/wakeup.S --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/boot/wakeup.S Tue Jun 26 21:40:42 2007 -0400 @@ -0,0 +1,215 @@ + .code16 + +#undef wakesym +/* Used in real mode, to cal offset in current segment */ +#define wakesym(sym) (sym - wakeup_start) + +ENTRY(wakeup_start) + wakeup_code_start = . + + cli + cld + + # setup data segment + movw %cs, %ax + movw %ax, %ds + movw %ax, %ss # A stack required for BIOS call + movw $wakesym(wakeup_stack), %sp + + pushl $0 # Kill dangerous flag early + popfl + + # check magic number + movl wakesym(real_magic), %eax + cmpl $0x12345678, %eax + jne bogus_real_magic + + # for acpi_sleep=s3_bios + testl $1, wakesym(video_flags) + jz 1f + lcall $0xc000, $3 + movw %cs, %ax # In case messed by BIOS + movw %ax, %ds + movw %ax, %ss # Need this? How to ret if clobbered? + +1: + # for acpi_sleep=s3_mode + testl $2, wakesym(video_flags) + jz 1f + movl wakesym(video_mode), %eax + call mode_setw + +1: + # Show some progress if VGA is resumed + movw $0xb800, %ax + movw %ax, %fs + movw $0x0e00 + ''L'', %fs:(0x10) + + # boot trampoline is under 1M, and shift its start into + # %fs to reference symbols in that area + movl $BOOT_TRAMPOLINE, %eax + shrl $4, %eax + movl %eax, %fs + lidt %fs:bootsym(idt_48) + lgdt %fs:bootsym(gdt_48) + + movw $1, %ax + lmsw %ax # Turn on CR0.PE + jmp 1f +1: + ljmpl $BOOT_CS32, $bootsym_phys(wakeup_32) + +/* This code uses an extended set of video mode numbers. These include: + * Aliases for standard modes + * NORMAL_VGA (-1) + * EXTENDED_VGA (-2) + * ASK_VGA (-3) + * Video modes numbered by menu position -- NOT RECOMMENDED because of lack + * of compatibility when extending the table. These are between 0x00 and 0xff. + */ +#define VIDEO_FIRST_MENU 0x0000 + +/* Standard BIOS video modes (BIOS number + 0x0100) */ +#define VIDEO_FIRST_BIOS 0x0100 + +/* VESA BIOS video modes (VESA number + 0x0200) */ +#define VIDEO_FIRST_VESA 0x0200 + +/* Video7 special modes (BIOS number + 0x0900) */ +#define VIDEO_FIRST_V7 0x0900 + +# Setting of user mode (AX=mode ID) => CF=success +mode_setw: + movw %ax, %bx + cmpb $VIDEO_FIRST_VESA>>8, %ah + jnc check_vesaw + decb %ah + +setbadw: clc + ret + +check_vesaw: + subb $VIDEO_FIRST_VESA>>8, %bh + orw $0x4000, %bx # Use linear frame buffer + movw $0x4f02, %ax # VESA BIOS mode set call + int $0x10 + cmpw $0x004f, %ax # AL=4f if implemented + jnz _setbadw # AH=0 if OK + + stc + ret + +_setbadw: jmp setbadw + +bogus_real_magic: + movw $0x0e00 + ''B'', %fs:(0x12) + jmp bogus_real_magic + + .align 4 +real_magic: .long 0x12345678 + .globl video_mode, video_flags +video_mode: .long 0 +video_flags: .long 0 + + .code32 + + # Now in protect mode, with paging disabled + # Add offset for any reference to xen specific symbols + +wakeup_32: + mov $BOOT_DS, %eax + mov %eax, %ds + mov %eax, %ss + mov $bootsym_phys(wakeup_stack), %esp + + # check saved magic again + mov $sym_phys(saved_magic), %eax + add bootsym_phys(trampoline_xen_phys_start), %eax + mov (%eax), %eax + cmp $0x9abcdef0, %eax + jne bogus_saved_magic + + /* fpu init? */ + + /* Initialise CR4. */ +#if CONFIG_PAGING_LEVELS == 2 + mov $X86_CR4_PSE, %ecx +#else + mov $X86_CR4_PAE, %ecx +#endif + mov %ecx, %cr4 + + /* Load pagetable base register */ + mov $sym_phys(idle_pg_table),%eax + add bootsym_phys(trampoline_xen_phys_start),%eax + mov %eax,%cr3 + + /* Will cpuid feature change after resume? */ +#if CONFIG_PAGING_LEVELS != 2 + /* Set up EFER (Extended Feature Enable Register). */ + mov bootsym_phys(cpuid_ext_features),%edi + test $0x20100800,%edi /* SYSCALL/SYSRET, No Execute, Long Mode? */ + jz .Lskip_eferw + movl $MSR_EFER,%ecx + rdmsr +#if CONFIG_PAGING_LEVELS == 4 + btsl $_EFER_LME,%eax /* Long Mode */ + btsl $_EFER_SCE,%eax /* SYSCALL/SYSRET */ +#endif + btl $20,%edi /* No Execute? */ + jnc 1f + btsl $_EFER_NX,%eax /* No Execute */ +1: wrmsr +.Lskip_eferw: +#endif + + wbinvd + + mov $0x80050033,%eax /* hi-to-lo: PG,AM,WP,NE,ET,MP,PE */ + mov %eax,%cr0 + jmp 1f +1: + +#if defined(__x86_64__) + + /* Now in compatibility mode. Long-jump to 64-bit mode */ + ljmp $BOOT_CS64, $bootsym_phys(wakeup_64) + + .code64 + .align 8 + .word 0,0,0 +lgdt_descr: + .word LAST_RESERVED_GDT_BYTE + .quad gdt_table - FIRST_RESERVED_GDT_BYTE + +wakeup_64: + lgdt lgdt_descr(%rip) + mov $(__HYPERVISOR_DS64), %eax + mov %eax, %ds + + # long jump to return point, with cs reload + rex64 ljmp *ret_point(%rip) + + .align 8 +ret_point: + .quad __ret_point + .word __HYPERVISOR_CS64 + +#else /* !defined(__x86_64__) */ + lgdt gdt_descr + mov $(__HYPERVISOR_DS), %eax + mov %eax, %ds + + ljmp $(__HYPERVISOR_CS), $__ret_point +#endif + +bogus_saved_magic: + movw $0x0e00 + ''S'', 0xb8014 + jmp bogus_saved_magic + + .align 16 +wakeup_stack_begin: # Stack grows down + + .fill PAGE_SIZE,1,0 +wakeup_stack: # Just below end of first page in this section +ENTRY(wakeup_end) diff -r 9261686d840c xen/include/asm-x86/acpi.h --- a/xen/include/asm-x86/acpi.h Tue Jun 26 12:40:37 2007 +0100 +++ b/xen/include/asm-x86/acpi.h Tue Jun 26 20:28:13 2007 -0400 @@ -178,4 +178,6 @@ extern u8 x86_acpiid_to_apicid[]; extern u8 x86_acpiid_to_apicid[]; #define MAX_LOCAL_APIC 256 +#define pmprintk(_l, _f, _a...) \ + printk(_l "<PM>" _f, ## _a ) #endif /*_ASM_ACPI_H*/ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel