Magnus Damm
2006-Nov-08 13:48 UTC
[Xen-devel] [PATCH 00/04] Kexec / Kdump: Release 20061108 (xen-unstable-12281)
[PATCH 00/04] Kexec / Kdump: Release 20061108 (xen-unstable-12281) This is the 20061108 release of the Kexec / Kdump patches for x86 Xen. Test Results: Kexec Kexec Kexec Kexec Kdump Hardware Xen -> Xen -> bzImage -> Xen -> Xen -> Arch Platform Xen bzImage Xen vmlinux vmlinux i386 A PASS PASS PASS PASS PASS i386 B (VMX) PASS PASS PASS PASS PASS i386 C (SVM) PASS PASS PASS PASS PASS i386/PAE A PASS PASS PASS PASS PASS i386/PAE B (VMX) PASS PASS PASS PASS PASS i386/PAE C (SVM) PASS PASS PASS PASS PASS x86_64 D PASS PASS PASS PASS* PASS** x86_64 B (VMX) PASS PASS PASS PASS* PASS** x86_64 C (SVM) PASS PASS PASS PASS* PASS** Hardware Platforms: A: i386 - VA Linux 1220, 2 x Pentium III 866 Mhz, 2 GB B: Intel VT - Shuttle XPC SD36G5, 1 x Pentium D 930, 1 GB C: AMD VT - Shuttle XPC SK22G2, 1 x Athlon64 x2 3800+, 1 GB D: x86_64 - TYAN Transport GX28 B2881, 2 x Opteron 244 1.8 GHz, 2 GB kexec-tools: Work is being done on making kexec-tools xen aware. For now, use version 529ad18980e297efc6ac2839c82afc24eccdcd1f of the kexec-tools git tree. * use the kexec-tools-testing or apply the following patch to kexec-tools: http://www.kernel.org/git/?p=linux/kernel/git/horms/kexec-tools-testing.git;a=commit;h=0d5e71fbcb114b5a67ef7277dbad206915a20df2 ** only certain versions of kexec-tools-testing tree work properly. Changes: 20061108 - Release 20061108 for xen-unstable-12281 - Minor crash note fixes - Crash notes are now cleared on non-present cpus. - Crash notes are now 32-bit aligned - same as Linux. I feel that the patches are in a good and stable state. Please let me know if there is anything I can do to help out with the merge. Thanks, / magnus _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
[PATCH 01/04] Kexec / Kdump: Generic code This patch implements the generic portion of the Kexec / Kdump port to Xen. Signed-Off-By: Magnus Damm <magnus@valinux.co.jp> --- Applies on top of xen-unstable-12281. linux-2.6-xen-sparse/drivers/xen/core/Makefile | 1 linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c | 171 ++++++++ patches/linux-2.6.16.29/kexec-generic.patch | 224 +++++++++++ patches/linux-2.6.16.29/series | 1 xen/arch/ia64/xen/Makefile | 2 xen/arch/ia64/xen/crash.c | 19 xen/arch/ia64/xen/machine_kexec.c | 34 + xen/arch/powerpc/Makefile | 2 xen/arch/powerpc/crash.c | 19 xen/arch/powerpc/machine_kexec.c | 34 + xen/arch/x86/Makefile | 2 xen/arch/x86/crash.c | 19 xen/arch/x86/machine_kexec.c | 34 + xen/common/Makefile | 1 xen/common/kexec.c | 345 +++++++++++++++++ xen/common/page_alloc.c | 33 + xen/drivers/char/console.c | 3 xen/include/asm-ia64/elf.h | 25 + xen/include/asm-ia64/kexec.h | 25 + xen/include/asm-powerpc/elf.h | 25 + xen/include/asm-powerpc/kexec.h | 25 + xen/include/asm-x86/elf.h | 25 + xen/include/asm-x86/kexec.h | 24 + xen/include/public/kexec.h | 131 ++++++ xen/include/xen/elf.h | 16 xen/include/xen/elfcore.h | 127 ++++++ xen/include/xen/hypercall.h | 6 xen/include/xen/kexec.h | 43 ++ xen/include/xen/mm.h | 1 29 files changed, 1394 insertions(+), 23 deletions(-) --- 0001/linux-2.6-xen-sparse/drivers/xen/core/Makefile +++ work/linux-2.6-xen-sparse/drivers/xen/core/Makefile @@ -11,3 +11,4 @@ obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o obj-$(CONFIG_XEN_SKBUFF) += skbuff.o obj-$(CONFIG_XEN_REBOOT) += reboot.o machine_reboot.o obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o +obj-$(CONFIG_KEXEC) += machine_kexec.o --- /dev/null +++ work/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c @@ -0,0 +1,171 @@ +/* + * drivers/xen/core/machine_kexec.c + * handle transition of Linux booting another kernel + */ + +#include <linux/kexec.h> +#include <xen/interface/kexec.h> +#include <linux/mm.h> +#include <linux/bootmem.h> +#include <asm/hypercall.h> +#include <asm/kexec-xen.h> + +extern void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, + struct kimage *image); + +int xen_max_nr_phys_cpus; +struct resource xen_hypervisor_res; +struct resource *xen_phys_cpus; + +void xen_machine_kexec_setup_resources(void) +{ + xen_kexec_range_t range; + struct resource *res; + int k = 0; + + /* determine maximum number of physical cpus */ + + while (1) { + memset(&range, 0, sizeof(range)); + range.range = KEXEC_RANGE_MA_CPU; + range.nr = k; + + if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range)) + break; + + k++; + } + + xen_max_nr_phys_cpus = k; + + /* allocate xen_phys_cpus */ + + xen_phys_cpus = alloc_bootmem_low(k * sizeof(struct resource)); + BUG_ON(!xen_phys_cpus); + + /* fill in xen_phys_cpus with per-cpu crash note information */ + + for (k = 0; k < xen_max_nr_phys_cpus; k++) { + memset(&range, 0, sizeof(range)); + range.range = KEXEC_RANGE_MA_CPU; + range.nr = k; + + BUG_ON(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range)); + + res = xen_phys_cpus + k; + + memset(res, 0, sizeof(*res)); + res->name = "Crash note"; + res->start = range.start; + res->end = range.start + range.size - 1; + res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + } + + /* fill in xen_hypervisor_res with hypervisor machine address range */ + + memset(&range, 0, sizeof(range)); + range.range = KEXEC_RANGE_MA_XEN; + + BUG_ON(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range)); + + xen_hypervisor_res.name = "Hypervisor code and data"; + xen_hypervisor_res.start = range.start; + xen_hypervisor_res.end = range.start + range.size - 1; + xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM; + + /* fill in crashk_res if range is reserved by hypervisor */ + + memset(&range, 0, sizeof(range)); + range.range = KEXEC_RANGE_MA_CRASH; + + BUG_ON(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range)); + + if (range.size) { + crashk_res.start = range.start; + crashk_res.end = range.start + range.size - 1; + } +} + +void xen_machine_kexec_register_resources(struct resource *res) +{ + int k; + + request_resource(res, &xen_hypervisor_res); + + for (k = 0; k < xen_max_nr_phys_cpus; k++) + request_resource(res, xen_phys_cpus + k); + +} + +static void setup_load_arg(xen_kexec_image_t *xki, struct kimage *image) +{ + machine_kexec_setup_load_arg(xki, image); + + xki->indirection_page = image->head; + xki->start_address = image->start; +} + +/* + * Load the image into xen so xen can kdump itself + * This might have been done in prepare, but prepare + * is currently called too early. It might make sense + * to move prepare, but for now, just add an extra hook. + */ +int xen_machine_kexec_load(struct kimage *image) +{ + xen_kexec_load_t xkl; + + memset(&xkl, 0, sizeof(xkl)); + xkl.type = image->type; + setup_load_arg(&xkl.image, image); + return HYPERVISOR_kexec_op(KEXEC_CMD_kexec_load, &xkl); +} + +/* + * Unload the image that was stored by machine_kexec_load() + * This might have been done in machine_kexec_cleanup() but it + * is called too late, and its possible xen could try and kdump + * using resources that have been freed. + */ +void xen_machine_kexec_unload(struct kimage *image) +{ + xen_kexec_load_t xkl; + + memset(&xkl, 0, sizeof(xkl)); + xkl.type = image->type; + HYPERVISOR_kexec_op(KEXEC_CMD_kexec_unload, &xkl); +} + +/* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. + * + * This has the hypervisor move to the prefered reboot CPU, + * stop all CPUs and kexec. That is it combines machine_shutdown() + * and machine_kexec() in Linux kexec terms. + */ +NORET_TYPE void xen_machine_kexec(struct kimage *image) +{ + xen_kexec_exec_t xke; + + memset(&xke, 0, sizeof(xke)); + xke.type = image->type; + HYPERVISOR_kexec_op(KEXEC_CMD_kexec, &xke); + panic("KEXEC_CMD_kexec hypercall should not return\n"); +} + +void machine_shutdown(void) +{ + /* do nothing */ +} + + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ --- /dev/null +++ work/patches/linux-2.6.16.29/kexec-generic.patch @@ -0,0 +1,224 @@ +--- 0001/include/linux/kexec.h ++++ work/include/linux/kexec.h +@@ -91,6 +91,13 @@ struct kimage { + extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET; + extern int machine_kexec_prepare(struct kimage *image); + extern void machine_kexec_cleanup(struct kimage *image); ++#ifdef CONFIG_XEN ++extern int xen_machine_kexec_load(struct kimage *image); ++extern void xen_machine_kexec_unload(struct kimage *image); ++extern NORET_TYPE void xen_machine_kexec(struct kimage *image) ATTRIB_NORET; ++extern void xen_machine_kexec_setup_resources(void); ++extern void xen_machine_kexec_register_resources(struct resource *res); ++#endif + extern asmlinkage long sys_kexec_load(unsigned long entry, + unsigned long nr_segments, + struct kexec_segment __user *segments, +--- 0001/kernel/kexec.c ++++ work/kernel/kexec.c +@@ -26,6 +26,9 @@ + #include <asm/io.h> + #include <asm/system.h> + #include <asm/semaphore.h> ++#ifdef CONFIG_XEN ++#include <asm/kexec-xen.h> ++#endif + + /* Per cpu memory for storing cpu states in case of system crash. */ + note_buf_t* crash_notes; +@@ -403,7 +406,7 @@ static struct page *kimage_alloc_normal_ + pages = kimage_alloc_pages(GFP_KERNEL, order); + if (!pages) + break; +- pfn = page_to_pfn(pages); ++ pfn = kexec_page_to_pfn(pages); + epfn = pfn + count; + addr = pfn << PAGE_SHIFT; + eaddr = epfn << PAGE_SHIFT; +@@ -437,6 +440,7 @@ static struct page *kimage_alloc_normal_ + return pages; + } + ++#ifndef CONFIG_XEN + static struct page *kimage_alloc_crash_control_pages(struct kimage *image, + unsigned int order) + { +@@ -490,7 +494,7 @@ static struct page *kimage_alloc_crash_c + } + /* If I don''t overlap any segments I have found my hole! */ + if (i == image->nr_segments) { +- pages = pfn_to_page(hole_start >> PAGE_SHIFT); ++ pages = kexec_pfn_to_page(hole_start >> PAGE_SHIFT); + break; + } + } +@@ -517,6 +521,13 @@ struct page *kimage_alloc_control_pages( + + return pages; + } ++#else /* !CONFIG_XEN */ ++struct page *kimage_alloc_control_pages(struct kimage *image, ++ unsigned int order) ++{ ++ return kimage_alloc_normal_control_pages(image, order); ++} ++#endif + + static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) + { +@@ -532,7 +543,7 @@ static int kimage_add_entry(struct kimag + return -ENOMEM; + + ind_page = page_address(page); +- *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; ++ *image->entry = kexec_virt_to_phys(ind_page) | IND_INDIRECTION; + image->entry = ind_page; + image->last_entry = ind_page + + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); +@@ -593,13 +604,13 @@ static int kimage_terminate(struct kimag + #define for_each_kimage_entry(image, ptr, entry) \ + for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ + ptr = (entry & IND_INDIRECTION)? \ +- phys_to_virt((entry & PAGE_MASK)): ptr +1) ++ kexec_phys_to_virt((entry & PAGE_MASK)): ptr +1) + + static void kimage_free_entry(kimage_entry_t entry) + { + struct page *page; + +- page = pfn_to_page(entry >> PAGE_SHIFT); ++ page = kexec_pfn_to_page(entry >> PAGE_SHIFT); + kimage_free_pages(page); + } + +@@ -611,6 +622,10 @@ static void kimage_free(struct kimage *i + if (!image) + return; + ++#ifdef CONFIG_XEN ++ xen_machine_kexec_unload(image); ++#endif ++ + kimage_free_extra_pages(image); + for_each_kimage_entry(image, ptr, entry) { + if (entry & IND_INDIRECTION) { +@@ -686,7 +701,7 @@ static struct page *kimage_alloc_page(st + * have a match. + */ + list_for_each_entry(page, &image->dest_pages, lru) { +- addr = page_to_pfn(page) << PAGE_SHIFT; ++ addr = kexec_page_to_pfn(page) << PAGE_SHIFT; + if (addr == destination) { + list_del(&page->lru); + return page; +@@ -701,12 +716,12 @@ static struct page *kimage_alloc_page(st + if (!page) + return NULL; + /* If the page cannot be used file it away */ +- if (page_to_pfn(page) > ++ if (kexec_page_to_pfn(page) > + (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { + list_add(&page->lru, &image->unuseable_pages); + continue; + } +- addr = page_to_pfn(page) << PAGE_SHIFT; ++ addr = kexec_page_to_pfn(page) << PAGE_SHIFT; + + /* If it is the destination page we want use it */ + if (addr == destination) +@@ -729,7 +744,7 @@ static struct page *kimage_alloc_page(st + struct page *old_page; + + old_addr = *old & PAGE_MASK; +- old_page = pfn_to_page(old_addr >> PAGE_SHIFT); ++ old_page = kexec_pfn_to_page(old_addr >> PAGE_SHIFT); + copy_highpage(page, old_page); + *old = addr | (*old & ~PAGE_MASK); + +@@ -779,7 +794,7 @@ static int kimage_load_normal_segment(st + result = -ENOMEM; + goto out; + } +- result = kimage_add_page(image, page_to_pfn(page) ++ result = kimage_add_page(image, kexec_page_to_pfn(page) + << PAGE_SHIFT); + if (result < 0) + goto out; +@@ -811,6 +826,7 @@ out: + return result; + } + ++#ifndef CONFIG_XEN + static int kimage_load_crash_segment(struct kimage *image, + struct kexec_segment *segment) + { +@@ -833,7 +849,7 @@ static int kimage_load_crash_segment(str + char *ptr; + size_t uchunk, mchunk; + +- page = pfn_to_page(maddr >> PAGE_SHIFT); ++ page = kexec_pfn_to_page(maddr >> PAGE_SHIFT); + if (page == 0) { + result = -ENOMEM; + goto out; +@@ -881,6 +897,13 @@ static int kimage_load_segment(struct ki + + return result; + } ++#else /* CONFIG_XEN */ ++static int kimage_load_segment(struct kimage *image, ++ struct kexec_segment *segment) ++{ ++ return kimage_load_normal_segment(image, segment); ++} ++#endif + + /* + * Exec Kernel system call: for obvious reasons only root may call it. +@@ -991,6 +1014,11 @@ asmlinkage long sys_kexec_load(unsigned + if (result) + goto out; + } ++#ifdef CONFIG_XEN ++ result = xen_machine_kexec_load(image); ++ if (result) ++ goto out; ++#endif + /* Install the new kernel, and Uninstall the old */ + image = xchg(dest_image, image); + +@@ -1045,7 +1073,6 @@ void crash_kexec(struct pt_regs *regs) + struct kimage *image; + int locked; + +- + /* Take the kexec_lock here to prevent sys_kexec_load + * running on one cpu from replacing the crash kernel + * we are using after a panic on a different cpu. +@@ -1061,7 +1088,11 @@ void crash_kexec(struct pt_regs *regs) + struct pt_regs fixed_regs; + crash_setup_regs(&fixed_regs, regs); + machine_crash_shutdown(&fixed_regs); ++#ifdef CONFIG_XEN ++ xen_machine_kexec(image); ++#else + machine_kexec(image); ++#endif + } + xchg(&kexec_lock, 0); + } +--- 0002/kernel/sys.c ++++ work/kernel/sys.c +@@ -435,8 +435,12 @@ void kernel_kexec(void) + kernel_restart_prepare(NULL); + printk(KERN_EMERG "Starting new kernel\n"); + machine_shutdown(); ++#ifdef CONFIG_XEN ++ xen_machine_kexec(image); ++#else + machine_kexec(image); + #endif ++#endif + } + EXPORT_SYMBOL_GPL(kernel_kexec); + --- 0001/patches/linux-2.6.16.29/series +++ work/patches/linux-2.6.16.29/series @@ -1,3 +1,4 @@ +kexec-generic.patch blktap-aio-16_03_06.patch device_bind.patch fix-hz-suspend.patch --- 0001/xen/arch/ia64/xen/Makefile +++ work/xen/arch/ia64/xen/Makefile @@ -1,3 +1,5 @@ +obj-y += machine_kexec.o +obj-y += crash.o obj-y += acpi.o obj-y += dom0_ops.o obj-y += domain.o --- /dev/null +++ work/xen/arch/ia64/xen/crash.c @@ -0,0 +1,19 @@ +#include <xen/lib.h> /* for printk() used in stub */ +#include <xen/types.h> +#include <public/kexec.h> + +void machine_crash_shutdown(void) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ + --- /dev/null +++ work/xen/arch/ia64/xen/machine_kexec.c @@ -0,0 +1,34 @@ +#include <xen/lib.h> /* for printk() used in stubs */ +#include <xen/types.h> +#include <public/kexec.h> + +int machine_kexec_load(int type, int slot, xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); + return -1; +} + +void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +void machine_kexec(xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +void machine_shutdown(xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- 0001/xen/arch/powerpc/Makefile +++ work/xen/arch/powerpc/Makefile @@ -40,6 +40,8 @@ obj-y += smp-tbsync.o obj-y += sysctl.o obj-y += time.o obj-y += usercopy.o +obj-y += machine_kexec.o +obj-y += crash.o obj-$(debug) += 0opt.o obj-$(crash_debug) += gdbstub.o --- /dev/null +++ work/xen/arch/powerpc/crash.c @@ -0,0 +1,19 @@ +#include <xen/lib.h> /* for printk() used in stub */ +#include <xen/types.h> +#include <public/kexec.h> + +void machine_crash_shutdown(void) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ + --- /dev/null +++ work/xen/arch/powerpc/machine_kexec.c @@ -0,0 +1,34 @@ +#include <xen/lib.h> /* for printk() used in stubs */ +#include <xen/types.h> +#include <public/kexec.h> + +int machine_kexec_load(int type, int slot, xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); + return -1; +} + +void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +void machine_kexec(xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +void machine_shutdown(xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- 0001/xen/arch/x86/Makefile +++ work/xen/arch/x86/Makefile @@ -43,6 +43,8 @@ obj-y += trampoline.o obj-y += traps.o obj-y += usercopy.o obj-y += x86_emulate.o +obj-y += machine_kexec.o +obj-y += crash.o obj-$(crash_debug) += gdbstub.o --- /dev/null +++ work/xen/arch/x86/crash.c @@ -0,0 +1,19 @@ +#include <xen/lib.h> /* for printk() used in stub */ +#include <xen/types.h> +#include <public/kexec.h> + +void machine_crash_shutdown(void) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ + --- /dev/null +++ work/xen/arch/x86/machine_kexec.c @@ -0,0 +1,34 @@ +#include <xen/lib.h> /* for printk() used in stubs */ +#include <xen/types.h> +#include <public/kexec.h> + +int machine_kexec_load(int type, int slot, xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); + return -1; +} + +void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +void machine_kexec(xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +void machine_shutdown(xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- 0001/xen/common/Makefile +++ work/xen/common/Makefile @@ -7,6 +7,7 @@ obj-y += event_channel.o obj-y += grant_table.o obj-y += kernel.o obj-y += keyhandler.o +obj-y += kexec.o obj-y += lib.o obj-y += memory.o obj-y += multicall.o --- /dev/null +++ work/xen/common/kexec.c @@ -0,0 +1,345 @@ +/****************************************************************************** + * kexec.c - Achitecture independent kexec code for Xen + * + * Xen port written by: + * - Simon ''Horms'' Horman <horms@verge.net.au> + * - Magnus Damm <magnus@valinux.co.jp> + */ + +#include <asm/kexec.h> +#include <xen/lib.h> +#include <xen/ctype.h> +#include <xen/errno.h> +#include <xen/guest_access.h> +#include <xen/sched.h> +#include <xen/types.h> +#include <xen/kexec.h> +#include <xen/keyhandler.h> +#include <public/kexec.h> +#include <xen/cpumask.h> +#include <asm/atomic.h> +#include <xen/spinlock.h> + +static char opt_crashkernel[32] = ""; +string_param("crashkernel", opt_crashkernel); + +DEFINE_PER_CPU (crash_note_t, crash_notes); +cpumask_t crash_saved_cpus; +int crashing_cpu; + +xen_kexec_image_t kexec_image[KEXEC_IMAGE_NR]; + +#define KEXEC_FLAG_DEFAULT_POS (KEXEC_IMAGE_NR + 0) +#define KEXEC_FLAG_CRASH_POS (KEXEC_IMAGE_NR + 1) +#define KEXEC_FLAG_IN_PROGRESS (KEXEC_IMAGE_NR + 2) + +unsigned long kexec_flags = 0; /* the lowest bits are for KEXEC_IMAGE... */ + +spinlock_t kexec_lock = SPIN_LOCK_UNLOCKED; + +static void one_cpu_only(void) +{ + /* Only allow the first cpu to continue - force other cpus to spin */ + if (test_and_set_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags)) + { + while (1); + } +} + +void machine_crash_save_cpu(void) +{ + int cpu = smp_processor_id(); + crash_note_t *cntp; + + if (!cpu_test_and_set(cpu, crash_saved_cpus)) + { + cntp = &per_cpu(crash_notes, cpu); + elf_core_save_regs(&cntp->core.desc.desc.pr_reg); + + /* setup crash note header */ + + cntp->core.note.note.note.namesz = CORE_STR_LEN; + cntp->core.note.note.note.descsz = sizeof(ELF_Prstatus); + cntp->core.note.note.note.type = NT_PRSTATUS; + memcpy(cntp->core.note.note.name, CORE_STR, CORE_STR_LEN); + } +} + +void machine_crash_kexec(void) +{ + int pos, cpu; + xen_kexec_image_t *image; + + one_cpu_only(); + + machine_crash_save_cpu(); + crashing_cpu = smp_processor_id(); + + machine_crash_shutdown(); + + /* the memory for per-cpu crash notes are not initialized + * for non-present cpus. make sure the crash notes are cleared. + */ + + for (cpu = 0; cpu < NR_CPUS; cpu++) + { + if (cpu != crashing_cpu && !cpu_present(cpu)) + memset(&per_cpu(crash_notes, cpu), 0, sizeof(crash_note_t)); + } + + pos = (test_bit(KEXEC_FLAG_CRASH_POS, &kexec_flags) != 0); + + if (test_bit(KEXEC_IMAGE_CRASH_BASE + pos, &kexec_flags)) + { + image = &kexec_image[KEXEC_IMAGE_CRASH_BASE + pos]; + machine_kexec(image); /* Does not return */ + } + + while (1); /* No image available - just spin */ +} + +static void do_crashdump_trigger(unsigned char key) +{ + printk("triggering crashdump\n"); + machine_crash_kexec(); +} + +static __init int register_crashdump_trigger(void) +{ + register_keyhandler(''c'', do_crashdump_trigger, "trigger a crashdump"); + return 0; +} +__initcall(register_crashdump_trigger); + +void machine_kexec_reserved(xen_kexec_reserve_t *reservation) +{ + unsigned long val[2]; + char *str = opt_crashkernel; + int k = 0; + + memset(reservation, 0, sizeof(*reservation)); + + while (k < ARRAY_SIZE(val)) { + if (*str == ''\0'') { + break; + } + val[k] = simple_strtoul(str, &str, 0); + switch (toupper(*str)) { + case ''G'': val[k] <<= 10; + case ''M'': val[k] <<= 10; + case ''K'': val[k] <<= 10; + str++; + } + if (*str == ''@'') { + str++; + } + k++; + } + + if (k == ARRAY_SIZE(val)) { + reservation->size = val[0]; + reservation->start = val[1]; + } +} + +static int kexec_get_reserve(xen_kexec_range_t *range) +{ + xen_kexec_reserve_t reservation; + + machine_kexec_reserved(&reservation); + + range->start = reservation.start; + range->size = reservation.size; + return 0; +} + +extern unsigned long _text, _end; + +static int kexec_get_xen(xen_kexec_range_t *range, int get_ma) +{ + if (get_ma) + range->start = virt_to_maddr(&_text); + else + range->start = (unsigned long) &_text; + + range->size = &_end - &_text; + return 0; +} + +static int kexec_get_cpu(xen_kexec_range_t *range) +{ + if (range->nr < 0 || range->nr >= NR_CPUS) + return -EINVAL; + + range->start = __pa((unsigned long)&per_cpu(crash_notes, range->nr)); + range->size = sizeof(crash_note_t); + return 0; +} + +static int kexec_get_range(XEN_GUEST_HANDLE(void) uarg) +{ + xen_kexec_range_t range; + int ret = -EINVAL; + + if (unlikely(copy_from_guest(&range, uarg, 1))) + return -EFAULT; + + switch (range.range) + { + case KEXEC_RANGE_MA_CRASH: + ret = kexec_get_reserve(&range); + break; + case KEXEC_RANGE_MA_XEN: + ret = kexec_get_xen(&range, 1); + break; + case KEXEC_RANGE_VA_XEN: + ret = kexec_get_xen(&range, 0); + break; + case KEXEC_RANGE_MA_CPU: + ret = kexec_get_cpu(&range); + break; + } + + if (ret == 0 && unlikely(copy_to_guest(uarg, &range, 1))) + return -EFAULT; + + return ret; +} + +static int kexec_load_get_bits(int type, int *base, int *bit) +{ + switch (type) + { + case KEXEC_TYPE_DEFAULT: + *base = KEXEC_IMAGE_DEFAULT_BASE; + *bit = KEXEC_FLAG_DEFAULT_POS; + break; + case KEXEC_TYPE_CRASH: + *base = KEXEC_IMAGE_CRASH_BASE; + *bit = KEXEC_FLAG_CRASH_POS; + break; + default: + return -1; + } + return 0; +} + +static int kexec_load_unload(unsigned long op, XEN_GUEST_HANDLE(void) uarg) +{ + xen_kexec_load_t load; + xen_kexec_image_t *image; + int base, bit, pos; + int ret = 0; + + if (unlikely(copy_from_guest(&load, uarg, 1))) + return -EFAULT; + + if (kexec_load_get_bits(load.type, &base, &bit)) + return -EINVAL; + + pos = (test_bit(bit, &kexec_flags) != 0); + + /* Load the user data into an unused image */ + if (op == KEXEC_CMD_kexec_load) + { + image = &kexec_image[base + !pos]; + + BUG_ON(test_bit((base + !pos), &kexec_flags)); /* must be free */ + + memcpy(image, &load.image, sizeof(*image)); + + if (!(ret = machine_kexec_load(load.type, base + !pos, image))) + { + /* Set image present bit */ + set_bit((base + !pos), &kexec_flags); + + /* Make new image the active one */ + change_bit(bit, &kexec_flags); + } + } + + /* Unload the old image if present and load successful */ + if (ret == 0 && !test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags)) + { + if (test_and_clear_bit((base + pos), &kexec_flags)) + { + image = &kexec_image[base + pos]; + machine_kexec_unload(load.type, base + pos, image); + } + } + + return ret; +} + +static int kexec_exec(XEN_GUEST_HANDLE(void) uarg) +{ + xen_kexec_exec_t exec; + xen_kexec_image_t *image; + int base, bit, pos; + + if (unlikely(copy_from_guest(&exec, uarg, 1))) + return -EFAULT; + + if (kexec_load_get_bits(exec.type, &base, &bit)) + return -EINVAL; + + pos = (test_bit(bit, &kexec_flags) != 0); + + /* Only allow kexec/kdump into loaded images */ + if (!test_bit(base + pos, &kexec_flags)) + return -ENOENT; + + switch (exec.type) + { + case KEXEC_TYPE_DEFAULT: + image = &kexec_image[base + pos]; + one_cpu_only(); + machine_shutdown(image); /* Does not return */ + break; + case KEXEC_TYPE_CRASH: + machine_crash_kexec(); /* Does not return */ + break; + } + + return -EINVAL; /* never reached */ +} + +long do_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg) +{ + unsigned long flags; + int ret = -EINVAL; + + if ( !IS_PRIV(current->domain) ) + return -EPERM; + + switch (op) + { + case KEXEC_CMD_kexec_get_range: + ret = kexec_get_range(uarg); + break; + case KEXEC_CMD_kexec_load: + case KEXEC_CMD_kexec_unload: + spin_lock_irqsave(&kexec_lock, flags); + if (!test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags)) + { + ret = kexec_load_unload(op, uarg); + } + spin_unlock_irqrestore(&kexec_lock, flags); + break; + case KEXEC_CMD_kexec: + ret = kexec_exec(uarg); + break; + } + + return ret; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- 0001/xen/common/page_alloc.c +++ work/xen/common/page_alloc.c @@ -216,24 +216,35 @@ void init_boot_pages(paddr_t ps, paddr_t } } +unsigned long alloc_boot_pages_at(unsigned long nr_pfns, unsigned long pfn_at) +{ + unsigned long i; + + for ( i = 0; i < nr_pfns; i++ ) + if ( allocated_in_map(pfn_at + i) ) + break; + + if ( i == nr_pfns ) + { + map_alloc(pfn_at, nr_pfns); + return pfn_at; + } + + return 0; +} + unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align) { - unsigned long pg, i; + unsigned long pg, i = 0; for ( pg = 0; (pg + nr_pfns) < max_page; pg += pfn_align ) { - for ( i = 0; i < nr_pfns; i++ ) - if ( allocated_in_map(pg + i) ) - break; - - if ( i == nr_pfns ) - { - map_alloc(pg, nr_pfns); - return pg; - } + i = alloc_boot_pages_at(nr_pfns, pg); + if (i != 0) + break; } - return 0; + return i; } --- 0001/xen/drivers/char/console.c +++ work/xen/drivers/char/console.c @@ -27,6 +27,7 @@ #include <xen/guest_access.h> #include <xen/shutdown.h> #include <xen/vga.h> +#include <xen/kexec.h> #include <asm/current.h> #include <asm/debugger.h> #include <asm/io.h> @@ -865,6 +866,8 @@ void panic(const char *fmt, ...) debugger_trap_immediate(); + machine_crash_kexec(); + if ( opt_noreboot ) { machine_halt(); --- /dev/null +++ work/xen/include/asm-ia64/elf.h @@ -0,0 +1,25 @@ +#ifndef __IA64_ELF_H__ +#define __IA64_ELF_H__ + +#include <xen/lib.h> /* for printk() used in stub */ + +typedef struct { + unsigned long dummy; +} ELF_Gregset; + +extern inline void elf_core_save_regs(ELF_Gregset *dst) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +#endif /* __IA64_ELF_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- /dev/null +++ work/xen/include/asm-ia64/kexec.h @@ -0,0 +1,25 @@ +#ifndef __IA64_KEXEC_H__ +#define __IA64_KEXEC_H__ + +#include <xen/lib.h> /* for printk() used in stub */ +#include <xen/types.h> +#include <public/xen.h> +#include <xen/kexec.h> + +static inline void machine_kexec(xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +#endif /* __IA64_KEXEC_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ + --- /dev/null +++ work/xen/include/asm-powerpc/elf.h @@ -0,0 +1,25 @@ +#ifndef _ASM_ELF_H__ +#define _ASM_ELF_H__ + +#include <xen/lib.h> /* for printk() used in stub */ + +typedef struct { + unsigned long dummy; +} ELF_Gregset; + +extern inline void elf_core_save_regs(ELF_Gregset *dst) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +#endif /* _ASM_ELF_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- /dev/null +++ work/xen/include/asm-powerpc/kexec.h @@ -0,0 +1,25 @@ +#ifndef _ASM_KEXEC_H__ +#define _ASM_KEXEC_H__ + +#include <xen/lib.h> /* for printk() used in stub */ +#include <xen/types.h> +#include <public/xen.h> +#include <xen/kexec.h> + +static inline void machine_kexec(xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +#endif /* _ASM_KEXEC_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ + --- /dev/null +++ work/xen/include/asm-x86/elf.h @@ -0,0 +1,25 @@ +#ifndef __X86_ELF_H__ +#define __X86_ELF_H__ + +#include <xen/lib.h> /* for printk() used in stub */ + +typedef struct { + unsigned long dummy; +} ELF_Gregset; + +extern inline void elf_core_save_regs(ELF_Gregset *dst) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +#endif /* __X86_ELF_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- /dev/null +++ work/xen/include/asm-x86/kexec.h @@ -0,0 +1,24 @@ +#ifndef __X86_KEXEC_H__ +#define __X86_KEXEC_H__ + +#include <xen/lib.h> /* for printk() used in stub */ +#include <xen/types.h> +#include <public/xen.h> +#include <xen/kexec.h> + +static inline void machine_kexec(xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +#endif /* __X86_KEXEC_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- /dev/null +++ work/xen/include/public/kexec.h @@ -0,0 +1,131 @@ +/****************************************************************************** + * kexec.h - Public portion + * + * Xen port written by: + * - Simon ''Horms'' Horman <horms@verge.net.au> + * - Magnus Damm <magnus@valinux.co.jp> + */ + +#ifndef _XEN_PUBLIC_KEXEC_H +#define _XEN_PUBLIC_KEXEC_H + + +/* This file describes the Kexec / Kdump hypercall interface for Xen. + * + * Kexec under vanilla Linux allows a user to reboot the physical machine + * into a new user-specified kernel. The Xen port extends this idea + * to allow rebooting of the machine from dom0. When kexec for dom0 + * is used to reboot, both the hypervisor and the domains get replaced + * with some other kernel. It is possible to kexec between vanilla + * Linux and Xen and back again. Xen to Xen works well too. + * + * The hypercall interface for kexec can be divided into three main + * types of hypercall operations: + * + * 1) Range information: + * This is used by the dom0 kernel to ask the hypervisor about various + * address information. This information is needed to allow kexec-tools + * to fill in the ELF headers for /proc/vmcore properly. + * + * 2) Load and unload of images: + * There are no big surprises here, the kexec binary from kexec-tools + * runs in userspace in dom0. The tool loads/unloads data into the + * dom0 kernel such as new kernel, initramfs and hypervisor. When + * loaded the dom0 kernel performs a load hypercall operation, and + * before releasing all page references the dom0 kernel calls unload. + * + * 3) Kexec operation: + * This is used to start a previously loaded kernel. + */ + +#include "xen.h" + +/* + * Prototype for this hypercall is: + * int kexec_op(int cmd, void *args) + * @cmd == KEXEC_CMD_... + * KEXEC operation to perform + * @args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Kexec supports two types of operation: + * - kexec into a regular kernel, very similar to a standard reboot + * - KEXEC_TYPE_DEFAULT is used to specify this type + * - kexec into a special "crash kernel", aka kexec-on-panic + * - KEXEC_TYPE_CRASH is used to specify this type + * - parts of our system may be broken at kexec-on-panic time + * - the code should be kept as simple and self-contained as possible + */ + +#define KEXEC_TYPE_DEFAULT 0 +#define KEXEC_TYPE_CRASH 1 + + +/* The kexec implementation for Xen allows the user to load two + * types of kernels, KEXEC_TYPE_DEFAULT and KEXEC_TYPE_CRASH. + * All data needed for a kexec reboot is kept in one xen_kexec_image_t + * per "instance". The data mainly consists of machine address lists to pages + * together with destination addresses. The data in xen_kexec_image_t + * is passed to the "code page" which is one page of code that performs + * the final relocations before jumping to the new kernel. + */ + +typedef struct xen_kexec_image { + unsigned long indirection_page; + unsigned long start_address; +} xen_kexec_image_t; + +/* + * Perform kexec having previously loaded a kexec or kdump kernel + * as appropriate. + * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] + */ +#define KEXEC_CMD_kexec 0 +typedef struct xen_kexec_exec { + int type; +} xen_kexec_exec_t; + +/* + * Load/Unload kernel image for kexec or kdump. + * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] + * image == relocation information for kexec (ignored for unload) [in] + */ +#define KEXEC_CMD_kexec_load 1 +#define KEXEC_CMD_kexec_unload 2 +typedef struct xen_kexec_load { + int type; + xen_kexec_image_t image; +} xen_kexec_load_t; + +#define KEXEC_RANGE_MA_CRASH 0 /* machine address and size of crash area */ +#define KEXEC_RANGE_MA_XEN 1 /* machine address and size of Xen itself */ +#define KEXEC_RANGE_VA_XEN 2 /* virtual adrress and size of Xen itself */ +#define KEXEC_RANGE_MA_CPU 3 /* machine address and size of a CPU note */ + +/* + * Find the address and size of certain memory areas + * range == KEXEC_RANGE_... [in] + * nr == physical CPU number (starting from 0) if KEXEC_RANGE_MA_CPU [in] + * size == number of bytes reserved in window [out] + * start == address of the first byte in the window [out] + */ +#define KEXEC_CMD_kexec_get_range 3 +typedef struct xen_kexec_range { + int range; + int nr; + unsigned long size; + unsigned long start; +} xen_kexec_range_t; + +#endif /* _XEN_PUBLIC_KEXEC_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- 0001/xen/include/xen/elf.h +++ work/xen/include/xen/elf.h @@ -452,18 +452,12 @@ unsigned int elf_hash(const unsigned cha /* * Note Definitions */ -typedef struct { - Elf32_Word namesz; - Elf32_Word descsz; - Elf32_Word type; -} Elf32_Note; typedef struct { - Elf64_Half namesz; - Elf64_Half descsz; - Elf64_Half type; -} Elf64_Note; - + u32 namesz; + u32 descsz; + u32 type; +} Elf_Note; /* same format for both 32-bit and 64-bit ELF */ #if defined(ELFSIZE) #define CONCAT(x,y) __CONCAT(x,y) @@ -486,7 +480,6 @@ typedef struct { #define Elf_Addr Elf32_Addr #define Elf_Off Elf32_Off #define Elf_Nhdr Elf32_Nhdr -#define Elf_Note Elf32_Note #define ELF_R_SYM ELF32_R_SYM #define ELF_R_TYPE ELF32_R_TYPE @@ -511,7 +504,6 @@ typedef struct { #define Elf_Addr Elf64_Addr #define Elf_Off Elf64_Off #define Elf_Nhdr Elf64_Nhdr -#define Elf_Note Elf64_Note #define ELF_R_SYM ELF64_R_SYM #define ELF_R_TYPE ELF64_R_TYPE --- /dev/null +++ work/xen/include/xen/elfcore.h @@ -0,0 +1,127 @@ +/****************************************************************************** + * elfcore.h + * + * Based heavily on include/linux/elfcore.h from Linux 2.6.16 + * Naming scheeme based on include/xen/elf.h (not include/linux/elfcore.h) + * + */ + +#ifndef __ELFCOREC_H__ +#define __ELFCOREC_H__ + +#include <xen/types.h> +#include <xen/elf.h> +#include <asm/elf.h> +#include <public/xen.h> + +#define NT_PRSTATUS 1 + +typedef struct +{ + int signo; /* signal number */ + int code; /* extra code */ + int errno; /* errno */ +} ELF_Signifo; + +/* These seem to be the same length on all architectures on Linux */ +typedef int ELF_Pid; +typedef struct { + long tv_sec; + long tv_usec; +} ELF_Timeval; + +/* + * Definitions to generate Intel SVR4-like core files. + * These mostly have the same names as the SVR4 types with "elf_" + * tacked on the front to prevent clashes with linux definitions, + * and the typedef forms have been avoided. This is mostly like + * the SVR4 structure, but more Linuxy, with things that Linux does + * not support and which gdb doesn''t really use excluded. + */ +typedef struct +{ + ELF_Signifo pr_info; /* Info associated with signal */ + short pr_cursig; /* Current signal */ + unsigned long pr_sigpend; /* Set of pending signals */ + unsigned long pr_sighold; /* Set of held signals */ + ELF_Pid pr_pid; + ELF_Pid pr_ppid; + ELF_Pid pr_pgrp; + ELF_Pid pr_sid; + ELF_Timeval pr_utime; /* User time */ + ELF_Timeval pr_stime; /* System time */ + ELF_Timeval pr_cutime; /* Cumulative user time */ + ELF_Timeval pr_cstime; /* Cumulative system time */ + ELF_Gregset pr_reg; /* GP registers - from asm header file */ + int pr_fpvalid; /* True if math co-processor being used. */ +} ELF_Prstatus; + +/* + * The following data structures provide 64-bit ELF notes. In theory it should + * be possible to support both 64-bit and 32-bit ELF files, but to keep it + * simple we only do 64-bit. + * + * Please note that the current code aligns the 64-bit notes in the same + * way as Linux does. We are not following the 64-bit ELF spec, no one does. + * + * We are avoiding two problems by restricting us to 64-bit notes only: + * - Alignment of notes change with the word size. Ick. + * - We would need to tell kexec-tools which format we are using in the + * hypervisor to make sure the right ELF format is generated. + * That requires infrastructure. Let''s not. + */ + +#define ALIGN(x, n) ((x + ((1 << n) - 1)) / (1 << n)) +#define PAD32(x) u32 pad_data[ALIGN(x, 2)] + +#define TYPEDEF_NOTE(type, strlen, desctype) \ + typedef struct { \ + union { \ + struct { \ + Elf_Note note; \ + unsigned char name[strlen]; \ + } note; \ + PAD32(sizeof(Elf_Note) + strlen); \ + } note; \ + union { \ + desctype desc; \ + PAD32(sizeof(desctype)); \ + } desc; \ + } type + +#define CORE_STR "CORE" +#define CORE_STR_LEN 5 /* including terminating zero */ + +TYPEDEF_NOTE(crash_note_core_t, CORE_STR_LEN, ELF_Prstatus); + +#define XEN_STR "XEN CORE" +#define XEN_STR_LEN 9 /* including terminating zero */ + +typedef struct { + unsigned long xen_major_version; + unsigned long xen_minor_version; + unsigned long xen_extra_version; + unsigned long xen_changeset; + unsigned long xen_compiler; + unsigned long xen_compile_date; + unsigned long xen_compile_time; +} xen_crash_xen_regs_t; + +TYPEDEF_NOTE(crash_note_xen_t, XEN_STR_LEN, xen_crash_xen_regs_t); + +typedef struct { + crash_note_core_t core; + crash_note_xen_t xen; +} crash_note_t; + +#endif /* __ELFCOREC_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- 0001/xen/include/xen/hypercall.h +++ work/xen/include/xen/hypercall.h @@ -102,4 +102,10 @@ do_hvm_op( unsigned long op, XEN_GUEST_HANDLE(void) arg); +extern long +do_kexec_op( + unsigned long op, + int arg1, + XEN_GUEST_HANDLE(void) arg); + #endif /* __XEN_HYPERCALL_H__ */ --- /dev/null +++ work/xen/include/xen/kexec.h @@ -0,0 +1,43 @@ +#ifndef __XEN_KEXEC_H__ +#define __XEN_KEXEC_H__ + +#include <public/kexec.h> +#include <asm/percpu.h> +#include <xen/elfcore.h> + +DECLARE_PER_CPU (crash_note_t, crash_notes); +extern int crashing_cpu; + +typedef struct xen_kexec_reserve { + unsigned long size; + unsigned long start; +} xen_kexec_reserve_t; + +/* We have space for 4 images to support atomic update + * of images. This is important for CRASH images since + * a panic can happen at any time... + */ + +#define KEXEC_IMAGE_DEFAULT_BASE 0 +#define KEXEC_IMAGE_CRASH_BASE 2 +#define KEXEC_IMAGE_NR 4 + +int machine_kexec_load(int type, int slot, xen_kexec_image_t *image); +void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image); +void machine_kexec_reserved(xen_kexec_reserve_t *reservation); +void machine_shutdown(xen_kexec_image_t *image); +void machine_crash_kexec(void); +void machine_crash_save_cpu(void); +void machine_crash_shutdown(void); + +#endif /* __XEN_KEXEC_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- 0001/xen/include/xen/mm.h +++ work/xen/include/xen/mm.h @@ -40,6 +40,7 @@ struct page_info; paddr_t init_boot_allocator(paddr_t bitmap_start); void init_boot_pages(paddr_t ps, paddr_t pe); unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align); +unsigned long alloc_boot_pages_at(unsigned long nr_pfns, unsigned long pfn_at); void end_boot_allocator(void); /* Generic allocator. These functions are *not* interrupt-safe. */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Magnus Damm
2006-Nov-08 13:49 UTC
[Xen-devel] [PATCH 02/04] Kexec / Kdump: Code shared between x86_32 and x86_64
[PATCH 02/04] Kexec / Kdump: Code shared between x86_32 and x86_64 This patch contains Kexec / Kdump code shared between x86_32 and x86_64. Signed-Off-By: Magnus Damm <magnus@valinux.co.jp> --- Applies on top of xen-unstable-12281. patches/linux-2.6.16.29/git-2a...f7.patch | 62 ++++ patches/linux-2.6.16.29/git-2e...11.patch | 93 +++++++ patches/linux-2.6.16.29/series | 2 xen/arch/x86/crash.c | 130 +++++++++- xen/arch/x86/machine_kexec.c | 83 +++++- xen/arch/x86/setup.c | 73 ++++- xen/arch/x86/traps.c | 2 xen/include/asm-x86/elf.h | 15 - xen/include/asm-x86/fixmap.h | 4 xen/include/asm-x86/hypercall.h | 5 xen/include/asm-x86/kexec.h | 14 - xen/include/asm-x86/x86_32/elf.h | 25 + xen/include/asm-x86/x86_32/kexec.h | 24 + xen/include/asm-x86/x86_64/elf.h | 25 + xen/include/asm-x86/x86_64/kexec.h | 24 + xen/include/public/kexec.h | 7 xen/include/xen/elfcore.h | 3 17 files changed, 550 insertions(+), 41 deletions(-) --- /dev/null +++ work/patches/linux-2.6.16.29/git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch @@ -0,0 +1,62 @@ +From: Eric W. Biederman <ebiederm@xmission.com> +Date: Sun, 30 Jul 2006 10:03:20 +0000 (-0700) +Subject: [PATCH] machine_kexec.c: Fix the description of segment handling +X-Git-Tag: v2.6.18-rc4 +X-Git-Url: http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=2a8a3d5b65e86ec1dfef7d268c64a909eab94af7 + +[PATCH] machine_kexec.c: Fix the description of segment handling + +One of my original comments in machine_kexec was unclear +and this should fix it. + +Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> +Cc: Andi Kleen <ak@muc.de> +Acked-by: Horms <horms@verge.net.au> +Signed-off-by: Andrew Morton <akpm@osdl.org> +Signed-off-by: Linus Torvalds <torvalds@osdl.org> +--- + +--- a/arch/i386/kernel/machine_kexec.c ++++ b/arch/i386/kernel/machine_kexec.c +@@ -189,14 +189,11 @@ NORET_TYPE void machine_kexec(struct kim + memcpy((void *)reboot_code_buffer, relocate_new_kernel, + relocate_new_kernel_size); + +- /* The segment registers are funny things, they are +- * automatically loaded from a table, in memory wherever you +- * set them to a specific selector, but this table is never +- * accessed again you set the segment to a different selector. +- * +- * The more common model is are caches where the behide +- * the scenes work is done, but is also dropped at arbitrary +- * times. ++ /* The segment registers are funny things, they have both a ++ * visible and an invisible part. Whenever the visible part is ++ * set to a specific selector, the invisible part is loaded ++ * with from a table in memory. At no other time is the ++ * descriptor table in memory accessed. + * + * I take advantage of this here by force loading the + * segments, before I zap the gdt with an invalid value. +--- a/arch/x86_64/kernel/machine_kexec.c ++++ b/arch/x86_64/kernel/machine_kexec.c +@@ -207,14 +207,11 @@ NORET_TYPE void machine_kexec(struct kim + __flush_tlb(); + + +- /* The segment registers are funny things, they are +- * automatically loaded from a table, in memory wherever you +- * set them to a specific selector, but this table is never +- * accessed again unless you set the segment to a different selector. +- * +- * The more common model are caches where the behide +- * the scenes work is done, but is also dropped at arbitrary +- * times. ++ /* The segment registers are funny things, they have both a ++ * visible and an invisible part. Whenever the visible part is ++ * set to a specific selector, the invisible part is loaded ++ * with from a table in memory. At no other time is the ++ * descriptor table in memory accessed. + * + * I take advantage of this here by force loading the + * segments, before I zap the gdt with an invalid value. --- /dev/null +++ work/patches/linux-2.6.16.29/git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch @@ -0,0 +1,93 @@ +From: Tobias Klauser <tklauser@nuerscht.ch> +Date: Mon, 26 Jun 2006 16:57:34 +0000 (+0200) +Subject: Storage class should be first +X-Git-Tag: v2.6.18-rc1 +X-Git-Url: http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=2efe55a9cec8418f0e0cde3dc3787a42fddc4411 + +Storage class should be first + +Storage class should be before const + +Signed-off-by: Tobias Klauser <tklauser@nuerscht.ch> +Signed-off-by: Adrian Bunk <bunk@stusta.de> +--- + +--- a/arch/i386/kernel/machine_kexec.c ++++ b/arch/i386/kernel/machine_kexec.c +@@ -133,9 +133,9 @@ typedef asmlinkage NORET_TYPE void (*rel + unsigned long start_address, + unsigned int has_pae) ATTRIB_NORET; + +-const extern unsigned char relocate_new_kernel[]; ++extern const unsigned char relocate_new_kernel[]; + extern void relocate_new_kernel_end(void); +-const extern unsigned int relocate_new_kernel_size; ++extern const unsigned int relocate_new_kernel_size; + + /* + * A architecture hook called to validate the +--- a/arch/powerpc/kernel/machine_kexec_32.c ++++ b/arch/powerpc/kernel/machine_kexec_32.c +@@ -30,8 +30,8 @@ typedef NORET_TYPE void (*relocate_new_k + */ + void default_machine_kexec(struct kimage *image) + { +- const extern unsigned char relocate_new_kernel[]; +- const extern unsigned int relocate_new_kernel_size; ++ extern const unsigned char relocate_new_kernel[]; ++ extern const unsigned int relocate_new_kernel_size; + unsigned long page_list; + unsigned long reboot_code_buffer, reboot_code_buffer_phys; + relocate_new_kernel_t rnk; +--- a/arch/ppc/kernel/machine_kexec.c ++++ b/arch/ppc/kernel/machine_kexec.c +@@ -25,8 +25,8 @@ typedef NORET_TYPE void (*relocate_new_k + unsigned long reboot_code_buffer, + unsigned long start_address) ATTRIB_NORET; + +-const extern unsigned char relocate_new_kernel[]; +-const extern unsigned int relocate_new_kernel_size; ++extern const unsigned char relocate_new_kernel[]; ++extern const unsigned int relocate_new_kernel_size; + + void machine_shutdown(void) + { +--- a/arch/s390/kernel/machine_kexec.c ++++ b/arch/s390/kernel/machine_kexec.c +@@ -27,8 +27,8 @@ static void kexec_halt_all_cpus(void *); + + typedef void (*relocate_kernel_t) (kimage_entry_t *, unsigned long); + +-const extern unsigned char relocate_kernel[]; +-const extern unsigned long long relocate_kernel_len; ++extern const unsigned char relocate_kernel[]; ++extern const unsigned long long relocate_kernel_len; + + int + machine_kexec_prepare(struct kimage *image) +--- a/arch/sh/kernel/machine_kexec.c ++++ b/arch/sh/kernel/machine_kexec.c +@@ -25,8 +25,8 @@ typedef NORET_TYPE void (*relocate_new_k + unsigned long start_address, + unsigned long vbr_reg) ATTRIB_NORET; + +-const extern unsigned char relocate_new_kernel[]; +-const extern unsigned int relocate_new_kernel_size; ++extern const unsigned char relocate_new_kernel[]; ++extern const unsigned int relocate_new_kernel_size; + extern void *gdb_vbr_vector; + + /* +--- a/arch/x86_64/kernel/machine_kexec.c ++++ b/arch/x86_64/kernel/machine_kexec.c +@@ -149,8 +149,8 @@ typedef NORET_TYPE void (*relocate_new_k + unsigned long start_address, + unsigned long pgtable) ATTRIB_NORET; + +-const extern unsigned char relocate_new_kernel[]; +-const extern unsigned long relocate_new_kernel_size; ++extern const unsigned char relocate_new_kernel[]; ++extern const unsigned long relocate_new_kernel_size; + + int machine_kexec_prepare(struct kimage *image) + { --- 0003/patches/linux-2.6.16.29/series +++ work/patches/linux-2.6.16.29/series @@ -1,4 +1,6 @@ kexec-generic.patch +git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch +git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch blktap-aio-16_03_06.patch device_bind.patch fix-hz-suspend.patch --- 0003/xen/arch/x86/crash.c +++ work/xen/arch/x86/crash.c @@ -1,10 +1,134 @@ -#include <xen/lib.h> /* for printk() used in stub */ +/****************************************************************************** + * crash.c + * + * Based heavily on arch/i386/kernel/crash.c from Linux 2.6.16 + * + * Xen port written by: + * - Simon ''Horms'' Horman <horms@verge.net.au> + * - Magnus Damm <magnus@valinux.co.jp> + */ + +#include <asm/atomic.h> +#include <asm/elf.h> +#include <asm/percpu.h> +#include <asm/kexec.h> #include <xen/types.h> -#include <public/kexec.h> +#include <xen/irq.h> +#include <asm/ipi.h> +#include <asm/nmi.h> +#include <xen/string.h> +#include <xen/elf.h> +#include <xen/elfcore.h> +#include <xen/smp.h> +#include <xen/delay.h> +#include <xen/perfc.h> +#include <xen/kexec.h> +#include <xen/sched.h> +#include <xen/version.h> +#include <public/xen.h> +#include <asm/hvm/hvm.h> + +#ifdef CONFIG_SMP +static atomic_t waiting_for_crash_ipi; + +static int crash_nmi_callback(struct cpu_user_regs *regs, int cpu) +{ + /* Don''t do anything if this handler is invoked on crashing cpu. + * Otherwise, system will completely hang. Crashing cpu can get + * an NMI if system was initially booted with nmi_watchdog parameter. + */ + if (cpu == crashing_cpu) + return 1; + local_irq_disable(); + + machine_crash_save_cpu(); + disable_local_APIC(); + atomic_dec(&waiting_for_crash_ipi); + hvm_disable(); + + for ( ; ; ) + __asm__ __volatile__ ( "hlt" ); + + return 1; +} + +/* + * By using the NMI code instead of a vector we just sneak thru the + * word generator coming out with just what we want. AND it does + * not matter if clustered_apic_mode is set or not. + */ +static void smp_send_nmi_allbutself(void) +{ + cpumask_t allbutself = cpu_online_map; + + cpu_clear(smp_processor_id(), allbutself); + send_IPI_mask(allbutself, APIC_DM_NMI); +} + +static void nmi_shootdown_cpus(void) +{ + unsigned long msecs; + + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + /* Would it be better to replace the trap vector here? */ + set_nmi_callback(crash_nmi_callback); + /* Ensure the new callback function is set before sending + * out the NMI + */ + wmb(); + + smp_send_nmi_allbutself(); + + msecs = 1000; /* Wait at most a second for the other cpus to stop */ + while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { + mdelay(1); + msecs--; + } + + /* Leave the nmi callback set */ + disable_local_APIC(); +} +#endif + +static void crash_save_xen_notes(void) +{ + int cpu = smp_processor_id(); + crash_note_t *cntp = &per_cpu(crash_notes, cpu); + + /* this code assumes that the first note has been written already */ + + cntp->xen.note.note.note.namesz = XEN_STR_LEN; + cntp->xen.note.note.note.descsz = sizeof(xen_crash_xen_regs_t); + cntp->xen.note.note.note.type = 0x10000001; /* NT_XEN_DOM0_CR3 */ + memcpy(cntp->xen.note.note.name, XEN_STR, XEN_STR_LEN); + + cntp->xen.desc.desc.xen_major_version = xen_major_version(); + cntp->xen.desc.desc.xen_minor_version = xen_minor_version(); + cntp->xen.desc.desc.xen_extra_version = __pa(xen_extra_version()); + cntp->xen.desc.desc.xen_changeset = __pa(xen_changeset()); + cntp->xen.desc.desc.xen_compiler = __pa(xen_compiler()); + cntp->xen.desc.desc.xen_compile_date = __pa(xen_compile_date()); + cntp->xen.desc.desc.xen_compile_time = __pa(xen_compile_time()); + + cntp->xen.desc.desc.dom0_pfn_to_mfn_frame_list_list = \ + dom0->shared_info->arch.pfn_to_mfn_frame_list_list; +} void machine_crash_shutdown(void) { - printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); + printk("machine_crash_shutdown: %d\n", smp_processor_id()); + local_irq_disable(); + +#ifdef CONFIG_SMP + nmi_shootdown_cpus(); +#endif + +#ifdef CONFIG_X86_IO_APIC + disable_IO_APIC(); +#endif + hvm_disable(); + + crash_save_xen_notes(); } /* --- 0003/xen/arch/x86/machine_kexec.c +++ work/xen/arch/x86/machine_kexec.c @@ -1,26 +1,89 @@ -#include <xen/lib.h> /* for printk() used in stubs */ +/****************************************************************************** + * machine_kexec.c + * + * Xen port written by: + * - Simon ''Horms'' Horman <horms@verge.net.au> + * - Magnus Damm <magnus@valinux.co.jp> + */ + +#include <xen/lib.h> +#include <asm/irq.h> +#include <asm/page.h> +#include <asm/flushtlb.h> +#include <xen/smp.h> +#include <xen/nmi.h> #include <xen/types.h> -#include <public/kexec.h> +#include <xen/console.h> +#include <xen/kexec.h> +#include <asm/kexec.h> +#include <xen/domain_page.h> +#include <asm/fixmap.h> +#include <asm/hvm/hvm.h> int machine_kexec_load(int type, int slot, xen_kexec_image_t *image) { - printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); - return -1; + unsigned long prev_ma = 0; + int fix_base = FIX_KEXEC_BASE_0 + (slot * (KEXEC_XEN_NO_PAGES >> 1)); + int k; + + /* setup fixmap to point to our pages and record the virtual address + * in every odd index in page_list[]. + */ + + for (k = 0; k < KEXEC_XEN_NO_PAGES; k++) { + if ((k & 1) == 0) { /* even pages: machine address */ + prev_ma = image->page_list[k]; + } + else { /* odd pages: va for previous ma */ + set_fixmap(fix_base + (k >> 1), prev_ma); + image->page_list[k] = fix_to_virt(fix_base + (k >> 1)); + } + } + + return 0; } void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image) { - printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); } - -void machine_kexec(xen_kexec_image_t *image) + +static void __machine_shutdown(void *data) { - printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); -} + xen_kexec_image_t *image = (xen_kexec_image_t *)data; + watchdog_disable(); + console_start_sync(); + + smp_send_stop(); + +#ifdef CONFIG_X86_IO_APIC + disable_IO_APIC(); +#endif + hvm_disable(); + + machine_kexec(image); +} + void machine_shutdown(xen_kexec_image_t *image) { - printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); + int reboot_cpu_id; + cpumask_t reboot_cpu; + + reboot_cpu_id = 0; + + if (!cpu_isset(reboot_cpu_id, cpu_online_map)) + reboot_cpu_id = smp_processor_id(); + + if (reboot_cpu_id != smp_processor_id()) { + cpus_clear(reboot_cpu); + cpu_set(reboot_cpu_id, reboot_cpu); + on_selected_cpus(reboot_cpu, __machine_shutdown, image, 1, 0); + for (;;) + ; /* nothing */ + } + else + __machine_shutdown(image); + BUG(); } /* --- 0001/xen/arch/x86/setup.c +++ work/xen/arch/x86/setup.c @@ -27,6 +27,7 @@ #include <asm/shadow.h> #include <asm/e820.h> #include <acm/acm_hooks.h> +#include <xen/kexec.h> extern void dmi_scan_machine(void); extern void generic_apic_probe(void); @@ -273,6 +274,20 @@ static void srat_detect_node(int cpu) printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node); } +void __init move_memory(unsigned long dst, + unsigned long src_start, unsigned long src_end) +{ +#if defined(CONFIG_X86_32) + memmove((void *)dst, /* use low mapping */ + (void *)src_start, /* use low mapping */ + src_end - src_start); +#elif defined(CONFIG_X86_64) + memmove(__va(dst), + __va(src_start), + src_end - src_start); +#endif +} + void __init __start_xen(multiboot_info_t *mbi) { char __cmdline[] = "", *cmdline = __cmdline; @@ -284,6 +299,7 @@ void __init __start_xen(multiboot_info_t unsigned long nr_pages, modules_length; paddr_t s, e; int i, e820_warn = 0, e820_raw_nr = 0, bytes = 0; + xen_kexec_reserve_t crash_area; struct ns16550_defaults ns16550 = { .data_bits = 8, .parity = ''n'', @@ -415,15 +431,8 @@ void __init __start_xen(multiboot_info_t initial_images_start = xenheap_phys_end; initial_images_end = initial_images_start + modules_length; -#if defined(CONFIG_X86_32) - memmove((void *)initial_images_start, /* use low mapping */ - (void *)mod[0].mod_start, /* use low mapping */ - mod[mbi->mods_count-1].mod_end - mod[0].mod_start); -#elif defined(CONFIG_X86_64) - memmove(__va(initial_images_start), - __va(mod[0].mod_start), - mod[mbi->mods_count-1].mod_end - mod[0].mod_start); -#endif + move_memory(initial_images_start, + mod[0].mod_start, mod[mbi->mods_count-1].mod_end); /* Initialise boot-time allocator with all RAM situated after modules. */ xenheap_phys_start = init_boot_allocator(__pa(&_end)); @@ -471,6 +480,52 @@ void __init __start_xen(multiboot_info_t #endif } + machine_kexec_reserved(&crash_area); + if (crash_area.size > 0) { + unsigned long kdump_start, kdump_size, k; + + /* mark images pages as free for now */ + + init_boot_pages(initial_images_start, initial_images_end); + + kdump_start = crash_area.start; + kdump_size = crash_area.size; + + printk("Kdump: %luMB (%lukB) at 0x%lx\n", + kdump_size >> 20, + kdump_size >> 10, + kdump_start); + + if ((kdump_start & ~PAGE_MASK) || (kdump_size & ~PAGE_MASK)) + panic("Kdump parameters not page aligned\n"); + + kdump_start >>= PAGE_SHIFT; + kdump_size >>= PAGE_SHIFT; + + /* allocate pages for Kdump memory area */ + + k = alloc_boot_pages_at(kdump_size, kdump_start); + + if (k != kdump_start) + panic("Unable to reserve Kdump memory\n"); + + /* allocate pages for relocated initial images */ + + k = ((initial_images_end - initial_images_start) & ~PAGE_MASK) ? 1 : 0; + k += (initial_images_end - initial_images_start) >> PAGE_SHIFT; + + k = alloc_boot_pages(k, 1); + + if (!k) + panic("Unable to allocate initial images memory\n"); + + move_memory(k << PAGE_SHIFT, initial_images_start, initial_images_end); + + initial_images_end -= initial_images_start; + initial_images_start = k << PAGE_SHIFT; + initial_images_end += initial_images_start; + } + memguard_init(); percpu_guard_areas(); --- 0001/xen/arch/x86/traps.c +++ work/xen/arch/x86/traps.c @@ -45,6 +45,7 @@ #include <xen/iocap.h> #include <xen/nmi.h> #include <xen/version.h> +#include <xen/kexec.h> #include <asm/shadow.h> #include <asm/system.h> #include <asm/io.h> @@ -1579,6 +1580,7 @@ static void unknown_nmi_error(unsigned c printk("Uhhuh. NMI received for unknown reason %02x.\n", reason); printk("Dazed and confused, but trying to continue\n"); printk("Do you have a strange power saving mode enabled?\n"); + machine_crash_kexec(); } } --- 0003/xen/include/asm-x86/elf.h +++ work/xen/include/asm-x86/elf.h @@ -1,16 +1,11 @@ #ifndef __X86_ELF_H__ #define __X86_ELF_H__ -#include <xen/lib.h> /* for printk() used in stub */ - -typedef struct { - unsigned long dummy; -} ELF_Gregset; - -extern inline void elf_core_save_regs(ELF_Gregset *dst) -{ - printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); -} +#ifdef __x86_64__ +#include <asm/x86_64/elf.h> +#else +#include <asm/x86_32/elf.h> +#endif #endif /* __X86_ELF_H__ */ --- 0001/xen/include/asm-x86/fixmap.h +++ work/xen/include/asm-x86/fixmap.h @@ -16,6 +16,7 @@ #include <asm/apicdef.h> #include <asm/acpi.h> #include <asm/page.h> +#include <xen/kexec.h> /* * Here we define all the compile-time ''special'' virtual @@ -36,6 +37,9 @@ enum fixed_addresses { FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, FIX_HPET_BASE, FIX_CYCLONE_TIMER, + FIX_KEXEC_BASE_0, + FIX_KEXEC_BASE_END = FIX_KEXEC_BASE_0 \ + + ((KEXEC_XEN_NO_PAGES >> 1) * KEXEC_IMAGE_NR) - 1, __end_of_fixed_addresses }; --- 0001/xen/include/asm-x86/hypercall.h +++ work/xen/include/asm-x86/hypercall.h @@ -6,6 +6,7 @@ #define __ASM_X86_HYPERCALL_H__ #include <public/physdev.h> +#include <xen/types.h> extern long do_event_channel_op_compat( @@ -87,6 +88,10 @@ extern long arch_do_vcpu_op( int cmd, struct vcpu *v, XEN_GUEST_HANDLE(void) arg); +extern int +do_kexec( + unsigned long op, unsigned arg1, XEN_GUEST_HANDLE(void) uarg); + #ifdef __x86_64__ extern long --- 0003/xen/include/asm-x86/kexec.h +++ work/xen/include/asm-x86/kexec.h @@ -1,15 +1,11 @@ #ifndef __X86_KEXEC_H__ #define __X86_KEXEC_H__ -#include <xen/lib.h> /* for printk() used in stub */ -#include <xen/types.h> -#include <public/xen.h> -#include <xen/kexec.h> - -static inline void machine_kexec(xen_kexec_image_t *image) -{ - printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); -} +#ifdef __x86_64__ +#include <asm/x86_64/kexec.h> +#else +#include <asm/x86_32/kexec.h> +#endif #endif /* __X86_KEXEC_H__ */ --- /dev/null +++ work/xen/include/asm-x86/x86_32/elf.h @@ -0,0 +1,25 @@ +#ifndef __X86_32_ELF_H__ +#define __X86_32_ELF_H__ + +#include <xen/lib.h> /* for printk() used in stub */ + +typedef struct { + unsigned long dummy; +} ELF_Gregset; + +extern inline void elf_core_save_regs(ELF_Gregset *dst) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +#endif /* __X86_32_ELF_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- /dev/null +++ work/xen/include/asm-x86/x86_32/kexec.h @@ -0,0 +1,24 @@ +#ifndef __X86_32_KEXEC_H__ +#define __X86_32_KEXEC_H__ + +#include <xen/lib.h> /* for printk() used in stub */ +#include <xen/types.h> +#include <public/xen.h> +#include <xen/kexec.h> + +static inline void machine_kexec(xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +#endif /* __X86_32_KEXEC_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- /dev/null +++ work/xen/include/asm-x86/x86_64/elf.h @@ -0,0 +1,25 @@ +#ifndef __X86_64_ELF_H__ +#define __X86_64_ELF_H__ + +#include <xen/lib.h> /* for printk() used in stub */ + +typedef struct { + unsigned long dummy; +} ELF_Gregset; + +extern inline void elf_core_save_regs(ELF_Gregset *dst) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +#endif /* __X86_64_ELF_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- /dev/null +++ work/xen/include/asm-x86/x86_64/kexec.h @@ -0,0 +1,24 @@ +#ifndef __X86_64_KEXEC_H__ +#define __X86_64_KEXEC_H__ + +#include <xen/lib.h> /* for printk() used in stub */ +#include <xen/types.h> +#include <public/xen.h> +#include <xen/kexec.h> + +static inline void machine_kexec(xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); +} + +#endif /* __X86_64_KEXEC_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- 0003/xen/include/public/kexec.h +++ work/xen/include/public/kexec.h @@ -40,6 +40,10 @@ #include "xen.h" +#if defined(__i386__) || defined(__x86_64__) +#define KEXEC_XEN_NO_PAGES 17 +#endif + /* * Prototype for this hypercall is: * int kexec_op(int cmd, void *args) @@ -72,6 +76,9 @@ */ typedef struct xen_kexec_image { +#if defined(__i386__) || defined(__x86_64__) + unsigned long page_list[KEXEC_XEN_NO_PAGES]; +#endif unsigned long indirection_page; unsigned long start_address; } xen_kexec_image_t; --- 0003/xen/include/xen/elfcore.h +++ work/xen/include/xen/elfcore.h @@ -102,6 +102,9 @@ typedef struct { unsigned long xen_compiler; unsigned long xen_compile_date; unsigned long xen_compile_time; +#ifdef CONFIG_X86 + unsigned long dom0_pfn_to_mfn_frame_list_list; +#endif } xen_crash_xen_regs_t; TYPEDEF_NOTE(crash_note_xen_t, XEN_STR_LEN, xen_crash_xen_regs_t); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Magnus Damm
2006-Nov-08 13:49 UTC
[Xen-devel] [PATCH 03/04] Kexec / Kdump: x86_32 specific code
[PATCH 03/04] Kexec / Kdump: x86_32 specific code This patch contains the x86_32 implementation of Kexec / Kdump for Xen. Signed-Off-By: Magnus Damm <magnus@valinux.co.jp> --- Applies on top of xen-unstable-12281. buildconfigs/linux-defconfig_xen_x86_32 | 2 linux-2.6-xen-sparse/arch/i386/Kconfig | 2 linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c | 19 linux-2.6-xen-sparse/include/asm-i386/kexec-xen.h | 30 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h | 8 patches/linux-2.6.16.29/git-35...c9.patch | 401 ++++++ patches/linux-2.6.16.29/linux-2.6.16.29-crash-xen-i386.patch | 31 patches/linux-2.6.16.29/linux-2.6.19-rc1-kexec...code-i386.patch | 169 +++ patches/linux-2.6.16.29/linux-2.6.19-rc1-kexec-xen-i386.patch | 54 + patches/linux-2.6.16.29/series | 4 xen/arch/x86/x86_32/entry.S | 2 xen/include/asm-x86/x86_32/elf.h | 38 xen/include/asm-x86/x86_32/kexec.h | 28 13 files changed, 777 insertions(+), 11 deletions(-) --- 0002/buildconfigs/linux-defconfig_xen_x86_32 +++ work/buildconfigs/linux-defconfig_xen_x86_32 @@ -183,6 +183,7 @@ CONFIG_MTRR=y CONFIG_REGPARM=y CONFIG_SECCOMP=y CONFIG_HZ_100=y +CONFIG_KEXEC=y # CONFIG_HZ_250 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=100 @@ -1036,6 +1037,7 @@ CONFIG_DNOTIFY=y # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y +# CONFIG_PROC_VMCORE is not set CONFIG_SYSFS=y CONFIG_TMPFS=y # CONFIG_HUGETLB_PAGE is not set --- 0001/linux-2.6-xen-sparse/arch/i386/Kconfig +++ work/linux-2.6-xen-sparse/arch/i386/Kconfig @@ -726,7 +726,7 @@ source kernel/Kconfig.hz config KEXEC bool "kexec system call (EXPERIMENTAL)" - depends on EXPERIMENTAL && !X86_XEN + depends on EXPERIMENTAL && !XEN_UNPRIVILEGED_GUEST help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot --- 0001/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c +++ work/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c @@ -69,6 +69,10 @@ #include "setup_arch_pre.h" #include <bios_ebda.h> +#ifdef CONFIG_XEN +#include <xen/interface/kexec.h> +#endif + /* Forward Declaration. */ void __init find_max_pfn(void); @@ -943,6 +947,7 @@ static void __init parse_cmdline_early ( * after a kernel panic. */ else if (!memcmp(from, "crashkernel=", 12)) { +#ifndef CONFIG_XEN unsigned long size, base; size = memparse(from+12, &from); if (*from == ''@'') { @@ -953,6 +958,10 @@ static void __init parse_cmdline_early ( crashk_res.start = base; crashk_res.end = base + size - 1; } +#else + printk("Ignoring crashkernel command line, " + "parameter will be supplied by xen\n"); +#endif } #endif #ifdef CONFIG_PROC_VMCORE @@ -1322,10 +1331,14 @@ void __init setup_bootmem_allocator(void } #endif #ifdef CONFIG_KEXEC +#ifdef CONFIG_XEN + xen_machine_kexec_setup_resources(); +#else if (crashk_res.start != crashk_res.end) reserve_bootmem(crashk_res.start, crashk_res.end - crashk_res.start + 1); #endif +#endif if (!xen_feature(XENFEAT_auto_translated_physmap)) phys_to_machine_mapping @@ -1389,7 +1402,11 @@ legacy_init_iomem_resources(struct e820e request_resource(res, data_resource); #endif #ifdef CONFIG_KEXEC - request_resource(res, &crashk_res); + if (crashk_res.start != crashk_res.end) + request_resource(res, &crashk_res); +#ifdef CONFIG_XEN + xen_machine_kexec_register_resources(res); +#endif #endif } } --- /dev/null +++ work/linux-2.6-xen-sparse/include/asm-i386/kexec-xen.h @@ -0,0 +1,30 @@ +#ifndef _I386_KEXEC_XEN_H +#define _I386_KEXEC_XEN_H + +/* Kexec needs to know about the actual physical addresss. + * But in xen, on some architectures, a physical address is a + * pseudo-physical addresss. */ + +#ifdef CONFIG_XEN +#define kexec_page_to_pfn(page) pfn_to_mfn(page_to_pfn(page)) +#define kexec_pfn_to_page(pfn) pfn_to_page(mfn_to_pfn(pfn)) +#define kexec_virt_to_phys(addr) virt_to_machine(addr) +#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr)) +#else +#define kexec_page_to_pfn(page) page_to_pfn(page) +#define kexec_pfn_to_page(pfn) pfn_to_page(pfn) +#define kexec_virt_to_phys(addr) virt_to_phys(addr) +#define kexec_phys_to_virt(addr) phys_to_virt(addr) +#endif + +#endif /* _I386_KEXEC_XEN_H */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ --- 0001/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h +++ work/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h @@ -395,5 +395,13 @@ HYPERVISOR_xenoprof_op( return _hypercall2(int, xenoprof_op, op, arg); } +static inline int +HYPERVISOR_kexec_op( + unsigned long op, void *args) +{ + return _hypercall2(int, kexec_op, op, args); +} + + #endif /* __HYPERCALL_H__ */ --- /dev/null +++ work/patches/linux-2.6.16.29/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch @@ -0,0 +1,401 @@ +From: Magnus Damm <magnus@valinux.co.jp> +Date: Tue, 26 Sep 2006 08:52:38 +0000 (+0200) +Subject: [PATCH] i386: Avoid overwriting the current pgd (V4, i386) +X-Git-Url: http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=3566561bfadffcb5dbc85d576be80c0dbf2cccc9 + +[PATCH] i386: Avoid overwriting the current pgd (V4, i386) + +kexec: Avoid overwriting the current pgd (V4, i386) + +This patch upgrades the i386-specific kexec code to avoid overwriting the +current pgd. Overwriting the current pgd is bad when CONFIG_CRASH_DUMP is used +to start a secondary kernel that dumps the memory of the previous kernel. + +The code introduces a new set of page tables. These tables are used to provide +an executable identity mapping without overwriting the current pgd. + +Signed-off-by: Magnus Damm <magnus@valinux.co.jp> +Signed-off-by: Andi Kleen <ak@suse.de> +--- + +--- a/arch/i386/kernel/machine_kexec.c ++++ b/arch/i386/kernel/machine_kexec.c +@@ -21,70 +21,13 @@ + #include <asm/system.h> + + #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) +- +-#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +-#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +-#define L2_ATTR (_PAGE_PRESENT) +- +-#define LEVEL0_SIZE (1UL << 12UL) +- +-#ifndef CONFIG_X86_PAE +-#define LEVEL1_SIZE (1UL << 22UL) +-static u32 pgtable_level1[1024] PAGE_ALIGNED; +- +-static void identity_map_page(unsigned long address) +-{ +- unsigned long level1_index, level2_index; +- u32 *pgtable_level2; +- +- /* Find the current page table */ +- pgtable_level2 = __va(read_cr3()); +- +- /* Find the indexes of the physical address to identity map */ +- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; +- level2_index = address / LEVEL1_SIZE; +- +- /* Identity map the page table entry */ +- pgtable_level1[level1_index] = address | L0_ATTR; +- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; +- +- /* Flush the tlb so the new mapping takes effect. +- * Global tlb entries are not flushed but that is not an issue. +- */ +- load_cr3(pgtable_level2); +-} +- +-#else +-#define LEVEL1_SIZE (1UL << 21UL) +-#define LEVEL2_SIZE (1UL << 30UL) +-static u64 pgtable_level1[512] PAGE_ALIGNED; +-static u64 pgtable_level2[512] PAGE_ALIGNED; +- +-static void identity_map_page(unsigned long address) +-{ +- unsigned long level1_index, level2_index, level3_index; +- u64 *pgtable_level3; +- +- /* Find the current page table */ +- pgtable_level3 = __va(read_cr3()); +- +- /* Find the indexes of the physical address to identity map */ +- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; +- level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE; +- level3_index = address / LEVEL2_SIZE; +- +- /* Identity map the page table entry */ +- pgtable_level1[level1_index] = address | L0_ATTR; +- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; +- set_64bit(&pgtable_level3[level3_index], +- __pa(pgtable_level2) | L2_ATTR); +- +- /* Flush the tlb so the new mapping takes effect. +- * Global tlb entries are not flushed but that is not an issue. +- */ +- load_cr3(pgtable_level3); +-} ++static u32 kexec_pgd[1024] PAGE_ALIGNED; ++#ifdef CONFIG_X86_PAE ++static u32 kexec_pmd0[1024] PAGE_ALIGNED; ++static u32 kexec_pmd1[1024] PAGE_ALIGNED; + #endif ++static u32 kexec_pte0[1024] PAGE_ALIGNED; ++static u32 kexec_pte1[1024] PAGE_ALIGNED; + + static void set_idt(void *newidt, __u16 limit) + { +@@ -128,16 +71,6 @@ static void load_segments(void) + #undef __STR + } + +-typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)( +- unsigned long indirection_page, +- unsigned long reboot_code_buffer, +- unsigned long start_address, +- unsigned int has_pae) ATTRIB_NORET; +- +-extern const unsigned char relocate_new_kernel[]; +-extern void relocate_new_kernel_end(void); +-extern const unsigned int relocate_new_kernel_size; +- + /* + * A architecture hook called to validate the + * proposed image and prepare the control pages +@@ -170,25 +103,29 @@ void machine_kexec_cleanup(struct kimage + */ + NORET_TYPE void machine_kexec(struct kimage *image) + { +- unsigned long page_list; +- unsigned long reboot_code_buffer; +- +- relocate_new_kernel_t rnk; ++ unsigned long page_list[PAGES_NR]; ++ void *control_page; + + /* Interrupts aren''t acceptable while we reboot */ + local_irq_disable(); + +- /* Compute some offsets */ +- reboot_code_buffer = page_to_pfn(image->control_code_page) +- << PAGE_SHIFT; +- page_list = image->head; +- +- /* Set up an identity mapping for the reboot_code_buffer */ +- identity_map_page(reboot_code_buffer); +- +- /* copy it out */ +- memcpy((void *)reboot_code_buffer, relocate_new_kernel, +- relocate_new_kernel_size); ++ control_page = page_address(image->control_code_page); ++ memcpy(control_page, relocate_kernel, PAGE_SIZE); ++ ++ page_list[PA_CONTROL_PAGE] = __pa(control_page); ++ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; ++ page_list[PA_PGD] = __pa(kexec_pgd); ++ page_list[VA_PGD] = (unsigned long)kexec_pgd; ++#ifdef CONFIG_X86_PAE ++ page_list[PA_PMD_0] = __pa(kexec_pmd0); ++ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; ++ page_list[PA_PMD_1] = __pa(kexec_pmd1); ++ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; ++#endif ++ page_list[PA_PTE_0] = __pa(kexec_pte0); ++ page_list[VA_PTE_0] = (unsigned long)kexec_pte0; ++ page_list[PA_PTE_1] = __pa(kexec_pte1); ++ page_list[VA_PTE_1] = (unsigned long)kexec_pte1; + + /* The segment registers are funny things, they have both a + * visible and an invisible part. Whenever the visible part is +@@ -207,8 +144,8 @@ NORET_TYPE void machine_kexec(struct kim + set_idt(phys_to_virt(0),0); + + /* now call it */ +- rnk = (relocate_new_kernel_t) reboot_code_buffer; +- (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae); ++ relocate_kernel((unsigned long)image->head, (unsigned long)page_list, ++ image->start, cpu_has_pae); + } + + /* crashkernel=size@addr specifies the location to reserve for +--- a/arch/i386/kernel/relocate_kernel.S ++++ b/arch/i386/kernel/relocate_kernel.S +@@ -7,16 +7,138 @@ + */ + + #include <linux/linkage.h> ++#include <asm/page.h> ++#include <asm/kexec.h> ++ ++/* ++ * Must be relocatable PIC code callable as a C function ++ */ ++ ++#define PTR(x) (x << 2) ++#define PAGE_ALIGNED (1 << PAGE_SHIFT) ++#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ ++#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */ ++ ++ .text ++ .align PAGE_ALIGNED ++ .globl relocate_kernel ++relocate_kernel: ++ movl 8(%esp), %ebp /* list of pages */ ++ ++#ifdef CONFIG_X86_PAE ++ /* map the control page at its virtual address */ ++ ++ movl PTR(VA_PGD)(%ebp), %edi ++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax ++ andl $0xc0000000, %eax ++ shrl $27, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PMD_0)(%ebp), %edx ++ orl $PAE_PGD_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PMD_0)(%ebp), %edi ++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x3fe00000, %eax ++ shrl $18, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PTE_0)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PTE_0)(%ebp), %edi ++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x001ff000, %eax ++ shrl $9, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ /* identity map the control page at its physical address */ ++ ++ movl PTR(VA_PGD)(%ebp), %edi ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax ++ andl $0xc0000000, %eax ++ shrl $27, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PMD_1)(%ebp), %edx ++ orl $PAE_PGD_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PMD_1)(%ebp), %edi ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x3fe00000, %eax ++ shrl $18, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PTE_1)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PTE_1)(%ebp), %edi ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x001ff000, %eax ++ shrl $9, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++#else ++ /* map the control page at its virtual address */ ++ ++ movl PTR(VA_PGD)(%ebp), %edi ++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax ++ andl $0xffc00000, %eax ++ shrl $20, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PTE_0)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PTE_0)(%ebp), %edi ++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x003ff000, %eax ++ shrl $10, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ /* identity map the control page at its physical address */ ++ ++ movl PTR(VA_PGD)(%ebp), %edi ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax ++ andl $0xffc00000, %eax ++ shrl $20, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_PTE_1)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++ ++ movl PTR(VA_PTE_1)(%ebp), %edi ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax ++ andl $0x003ff000, %eax ++ shrl $10, %eax ++ addl %edi, %eax ++ ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx ++ orl $PAGE_ATTR, %edx ++ movl %edx, (%eax) ++#endif + +- /* +- * Must be relocatable PIC code callable as a C function, that once +- * it starts can not use the previous processes stack. +- */ +- .globl relocate_new_kernel + relocate_new_kernel: + /* read the arguments and say goodbye to the stack */ + movl 4(%esp), %ebx /* page_list */ +- movl 8(%esp), %ebp /* reboot_code_buffer */ ++ movl 8(%esp), %ebp /* list of pages */ + movl 12(%esp), %edx /* start address */ + movl 16(%esp), %ecx /* cpu_has_pae */ + +@@ -24,11 +146,26 @@ relocate_new_kernel: + pushl $0 + popfl + +- /* set a new stack at the bottom of our page... */ +- lea 4096(%ebp), %esp ++ /* get physical address of control page now */ ++ /* this is impossible after page table switch */ ++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edi ++ ++ /* switch to new set of page tables */ ++ movl PTR(PA_PGD)(%ebp), %eax ++ movl %eax, %cr3 ++ ++ /* setup a new stack at the end of the physical control page */ ++ lea 4096(%edi), %esp + +- /* store the parameters back on the stack */ +- pushl %edx /* store the start address */ ++ /* jump to identity mapped page */ ++ movl %edi, %eax ++ addl $(identity_mapped - relocate_kernel), %eax ++ pushl %eax ++ ret ++ ++identity_mapped: ++ /* store the start address on the stack */ ++ pushl %edx + + /* Set cr0 to a known state: + * 31 0 == Paging disabled +@@ -113,8 +250,3 @@ relocate_new_kernel: + xorl %edi, %edi + xorl %ebp, %ebp + ret +-relocate_new_kernel_end: +- +- .globl relocate_new_kernel_size +-relocate_new_kernel_size: +- .long relocate_new_kernel_end - relocate_new_kernel +--- a/include/asm-i386/kexec.h ++++ b/include/asm-i386/kexec.h +@@ -1,6 +1,26 @@ + #ifndef _I386_KEXEC_H + #define _I386_KEXEC_H + ++#define PA_CONTROL_PAGE 0 ++#define VA_CONTROL_PAGE 1 ++#define PA_PGD 2 ++#define VA_PGD 3 ++#define PA_PTE_0 4 ++#define VA_PTE_0 5 ++#define PA_PTE_1 6 ++#define VA_PTE_1 7 ++#ifdef CONFIG_X86_PAE ++#define PA_PMD_0 8 ++#define VA_PMD_0 9 ++#define PA_PMD_1 10 ++#define VA_PMD_1 11 ++#define PAGES_NR 12 ++#else ++#define PAGES_NR 8 ++#endif ++ ++#ifndef __ASSEMBLY__ ++ + #include <asm/fixmap.h> + #include <asm/ptrace.h> + #include <asm/string.h> +@@ -72,5 +92,12 @@ static inline void crash_setup_regs(stru + newregs->eip = (unsigned long)current_text_addr(); + } + } ++asmlinkage NORET_TYPE void ++relocate_kernel(unsigned long indirection_page, ++ unsigned long control_page, ++ unsigned long start_address, ++ unsigned int has_pae) ATTRIB_NORET; ++ ++#endif /* __ASSEMBLY__ */ + + #endif /* _I386_KEXEC_H */ --- /dev/null +++ work/patches/linux-2.6.16.29/linux-2.6.16.29-crash-xen-i386.patch @@ -0,0 +1,31 @@ +--- 0001/arch/i386/kernel/crash.c ++++ work/arch/i386/kernel/crash.c 2006-10-26 15:45:35.000000000 +0900 +@@ -90,6 +90,7 @@ static void crash_save_self(struct pt_re + crash_save_this_cpu(regs, cpu); + } + ++#ifndef CONFIG_XEN + #ifdef CONFIG_SMP + static atomic_t waiting_for_crash_ipi; + +@@ -158,6 +159,7 @@ static void nmi_shootdown_cpus(void) + /* There are no cpus to shootdown */ + } + #endif ++#endif /* CONFIG_XEN */ + + void machine_crash_shutdown(struct pt_regs *regs) + { +@@ -174,10 +176,12 @@ void machine_crash_shutdown(struct pt_re + + /* Make a note of crashing cpu. Will be used in NMI callback.*/ + crashing_cpu = smp_processor_id(); ++#ifndef CONFIG_XEN + nmi_shootdown_cpus(); + lapic_shutdown(); + #if defined(CONFIG_X86_IO_APIC) + disable_IO_APIC(); + #endif ++#endif /* CONFIG_XEN */ + crash_save_self(regs); + } --- /dev/null +++ work/patches/linux-2.6.16.29/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch @@ -0,0 +1,169 @@ +kexec: Move asm segment handling code to the assembly file (i386) + +This patch moves the idt, gdt, and segment handling code from machine_kexec.c +to relocate_kernel.S. The main reason behind this move is to avoid code +duplication in the Xen hypervisor. With this patch all code required to kexec +is put on the control page. + +On top of that this patch also counts as a cleanup - I think it is much +nicer to write assembly directly in assembly files than wrap inline assembly +in C functions for no apparent reason. + +Signed-off-by: Magnus Damm <magnus@valinux.co.jp> +--- + + Applies to 2.6.19-rc1. + + machine_kexec.c | 59 ----------------------------------------------------- + relocate_kernel.S | 58 +++++++++++++++++++++++++++++++++++++++++++++++----- + 2 files changed, 53 insertions(+), 64 deletions(-) + +--- 0002/arch/i386/kernel/machine_kexec.c ++++ work/arch/i386/kernel/machine_kexec.c 2006-10-05 15:49:08.000000000 +0900 +@@ -29,48 +29,6 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED + static u32 kexec_pte0[1024] PAGE_ALIGNED; + static u32 kexec_pte1[1024] PAGE_ALIGNED; + +-static void set_idt(void *newidt, __u16 limit) +-{ +- struct Xgt_desc_struct curidt; +- +- /* ia32 supports unaliged loads & stores */ +- curidt.size = limit; +- curidt.address = (unsigned long)newidt; +- +- load_idt(&curidt); +-}; +- +- +-static void set_gdt(void *newgdt, __u16 limit) +-{ +- struct Xgt_desc_struct curgdt; +- +- /* ia32 supports unaligned loads & stores */ +- curgdt.size = limit; +- curgdt.address = (unsigned long)newgdt; +- +- load_gdt(&curgdt); +-}; +- +-static void load_segments(void) +-{ +-#define __STR(X) #X +-#define STR(X) __STR(X) +- +- __asm__ __volatile__ ( +- "\tljmp $"STR(__KERNEL_CS)",$1f\n" +- "\t1:\n" +- "\tmovl $"STR(__KERNEL_DS)",%%eax\n" +- "\tmovl %%eax,%%ds\n" +- "\tmovl %%eax,%%es\n" +- "\tmovl %%eax,%%fs\n" +- "\tmovl %%eax,%%gs\n" +- "\tmovl %%eax,%%ss\n" +- ::: "eax", "memory"); +-#undef STR +-#undef __STR +-} +- + /* + * A architecture hook called to validate the + * proposed image and prepare the control pages +@@ -127,23 +85,6 @@ NORET_TYPE void machine_kexec(struct kim + page_list[PA_PTE_1] = __pa(kexec_pte1); + page_list[VA_PTE_1] = (unsigned long)kexec_pte1; + +- /* The segment registers are funny things, they have both a +- * visible and an invisible part. Whenever the visible part is +- * set to a specific selector, the invisible part is loaded +- * with from a table in memory. At no other time is the +- * descriptor table in memory accessed. +- * +- * I take advantage of this here by force loading the +- * segments, before I zap the gdt with an invalid value. +- */ +- load_segments(); +- /* The gdt & idt are now invalid. +- * If you want to load them you must set up your own idt & gdt. +- */ +- set_gdt(phys_to_virt(0),0); +- set_idt(phys_to_virt(0),0); +- +- /* now call it */ + relocate_kernel((unsigned long)image->head, (unsigned long)page_list, + image->start, cpu_has_pae); + } +--- 0002/arch/i386/kernel/relocate_kernel.S ++++ work/arch/i386/kernel/relocate_kernel.S 2006-10-05 16:03:21.000000000 +0900 +@@ -154,14 +154,45 @@ relocate_new_kernel: + movl PTR(PA_PGD)(%ebp), %eax + movl %eax, %cr3 + ++ /* setup idt */ ++ movl %edi, %eax ++ addl $(idt_48 - relocate_kernel), %eax ++ lidtl (%eax) ++ ++ /* setup gdt */ ++ movl %edi, %eax ++ addl $(gdt - relocate_kernel), %eax ++ movl %edi, %esi ++ addl $((gdt_48 - relocate_kernel) + 2), %esi ++ movl %eax, (%esi) ++ ++ movl %edi, %eax ++ addl $(gdt_48 - relocate_kernel), %eax ++ lgdtl (%eax) ++ ++ /* setup data segment registers */ ++ mov $(gdt_ds - gdt), %eax ++ mov %eax, %ds ++ mov %eax, %es ++ mov %eax, %fs ++ mov %eax, %gs ++ mov %eax, %ss ++ + /* setup a new stack at the end of the physical control page */ + lea 4096(%edi), %esp + +- /* jump to identity mapped page */ +- movl %edi, %eax +- addl $(identity_mapped - relocate_kernel), %eax +- pushl %eax +- ret ++ /* load new code segment and jump to identity mapped page */ ++ movl %edi, %esi ++ xorl %eax, %eax ++ pushl %eax ++ pushl %esi ++ pushl %eax ++ movl $(gdt_cs - gdt), %eax ++ pushl %eax ++ movl %edi, %eax ++ addl $(identity_mapped - relocate_kernel),%eax ++ pushl %eax ++ iretl + + identity_mapped: + /* store the start address on the stack */ +@@ -250,3 +281,20 @@ identity_mapped: + xorl %edi, %edi + xorl %ebp, %ebp + ret ++ ++ .align 16 ++gdt: ++ .quad 0x0000000000000000 /* NULL descriptor */ ++gdt_cs: ++ .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ ++gdt_ds: ++ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ ++gdt_end: ++ ++gdt_48: ++ .word gdt_end - gdt - 1 /* limit */ ++ .long 0 /* base - filled in by code above */ ++ ++idt_48: ++ .word 0 /* limit */ ++ .long 0 /* base */ --- /dev/null +++ work/patches/linux-2.6.16.29/linux-2.6.19-rc1-kexec-xen-i386.patch @@ -0,0 +1,54 @@ +--- 0004/arch/i386/kernel/machine_kexec.c ++++ work/arch/i386/kernel/machine_kexec.c 2006-10-11 18:34:06.000000000 +0900 +@@ -20,6 +20,10 @@ + #include <asm/desc.h> + #include <asm/system.h> + ++#ifdef CONFIG_XEN ++#include <xen/interface/kexec.h> ++#endif ++ + #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) + static u32 kexec_pgd[1024] PAGE_ALIGNED; + #ifdef CONFIG_X86_PAE +@@ -29,6 +33,40 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED + static u32 kexec_pte0[1024] PAGE_ALIGNED; + static u32 kexec_pte1[1024] PAGE_ALIGNED; + ++#ifdef CONFIG_XEN ++ ++#define __ma(x) (pfn_to_mfn(__pa((x)) >> PAGE_SHIFT) << PAGE_SHIFT) ++ ++#if PAGES_NR > KEXEC_XEN_NO_PAGES ++#error PAGES_NR is greater than KEXEC_XEN_NO_PAGES - Xen support will break ++#endif ++ ++#if PA_CONTROL_PAGE != 0 ++#error PA_CONTROL_PAGE is non zero - Xen support will break ++#endif ++ ++void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage *image) ++{ ++ void *control_page; ++ ++ memset(xki->page_list, 0, sizeof(xki->page_list)); ++ ++ control_page = page_address(image->control_code_page); ++ memcpy(control_page, relocate_kernel, PAGE_SIZE); ++ ++ xki->page_list[PA_CONTROL_PAGE] = __ma(control_page); ++ xki->page_list[PA_PGD] = __ma(kexec_pgd); ++#ifdef CONFIG_X86_PAE ++ xki->page_list[PA_PMD_0] = __ma(kexec_pmd0); ++ xki->page_list[PA_PMD_1] = __ma(kexec_pmd1); ++#endif ++ xki->page_list[PA_PTE_0] = __ma(kexec_pte0); ++ xki->page_list[PA_PTE_1] = __ma(kexec_pte1); ++ ++} ++ ++#endif /* CONFIG_XEN */ ++ + /* + * A architecture hook called to validate the + * proposed image and prepare the control pages --- 0004/patches/linux-2.6.16.29/series +++ work/patches/linux-2.6.16.29/series @@ -1,6 +1,10 @@ kexec-generic.patch git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch +git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch +linux-2.6.19-rc1-kexec-move_segment_code-i386.patch +linux-2.6.19-rc1-kexec-xen-i386.patch +linux-2.6.16.29-crash-xen-i386.patch blktap-aio-16_03_06.patch device_bind.patch fix-hz-suspend.patch --- 0001/xen/arch/x86/x86_32/entry.S +++ work/xen/arch/x86/x86_32/entry.S @@ -672,6 +672,7 @@ ENTRY(hypercall_table) .long do_hvm_op .long do_sysctl /* 35 */ .long do_domctl + .long do_kexec_op .rept NR_hypercalls-((.-hypercall_table)/4) .long do_ni_hypercall .endr @@ -714,6 +715,7 @@ ENTRY(hypercall_args_table) .byte 2 /* do_hvm_op */ .byte 1 /* do_sysctl */ /* 35 */ .byte 1 /* do_domctl */ + .byte 2 /* do_kexec_op */ .rept NR_hypercalls-(.-hypercall_args_table) .byte 0 /* do_ni_hypercall */ .endr --- 0004/xen/include/asm-x86/x86_32/elf.h +++ work/xen/include/asm-x86/x86_32/elf.h @@ -1,15 +1,47 @@ #ifndef __X86_32_ELF_H__ #define __X86_32_ELF_H__ -#include <xen/lib.h> /* for printk() used in stub */ +#include <asm/processor.h> typedef struct { - unsigned long dummy; + unsigned long ebx; + unsigned long ecx; + unsigned long edx; + unsigned long esi; + unsigned long edi; + unsigned long ebp; + unsigned long eax; + unsigned long ds; + unsigned long es; + unsigned long fs; + unsigned long gs; + unsigned long orig_eax; + unsigned long eip; + unsigned long cs; + unsigned long eflags; + unsigned long esp; + unsigned long ss; } ELF_Gregset; extern inline void elf_core_save_regs(ELF_Gregset *dst) { - printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); + asm volatile("movl %%ebx,%0" : "=m"(dst->ebx)); + asm volatile("movl %%ecx,%0" : "=m"(dst->ecx)); + asm volatile("movl %%edx,%0" : "=m"(dst->edx)); + asm volatile("movl %%esi,%0" : "=m"(dst->esi)); + asm volatile("movl %%edi,%0" : "=m"(dst->edi)); + asm volatile("movl %%ebp,%0" : "=m"(dst->ebp)); + asm volatile("movl %%eax,%0" : "=m"(dst->eax)); + asm volatile("movw %%ds, %%ax;" :"=a"(dst->ds)); + asm volatile("movw %%es, %%ax;" :"=a"(dst->es)); + asm volatile("movw %%fs, %%ax;" :"=a"(dst->fs)); + asm volatile("movw %%gs, %%ax;" :"=a"(dst->gs)); + /* orig_eax not filled in for now */ + dst->eip = (unsigned long)current_text_addr(); + asm volatile("movw %%cs, %%ax;" :"=a"(dst->cs)); + asm volatile("pushfl; popl %0" :"=m"(dst->eflags)); + asm volatile("movl %%esp,%0" : "=m"(dst->esp)); + asm volatile("movw %%ss, %%ax;" :"=a"(dst->ss)); } #endif /* __X86_32_ELF_H__ */ --- 0004/xen/include/asm-x86/x86_32/kexec.h +++ work/xen/include/asm-x86/x86_32/kexec.h @@ -1,17 +1,33 @@ -#ifndef __X86_32_KEXEC_H__ -#define __X86_32_KEXEC_H__ +/****************************************************************************** + * kexec.h + * + * Based heavily on machine_kexec.c and kexec.h from Linux 2.6.19-rc1 + * + */ + +#ifndef __X86_KEXEC_X86_32_H__ +#define __X86_KEXEC_X86_32_H__ -#include <xen/lib.h> /* for printk() used in stub */ #include <xen/types.h> -#include <public/xen.h> #include <xen/kexec.h> +#include <asm/fixmap.h> + +typedef asmlinkage void (*relocate_new_kernel_t)( + unsigned long indirection_page, + unsigned long page_list, + unsigned long start_address, + unsigned int has_pae); static inline void machine_kexec(xen_kexec_image_t *image) { - printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); + relocate_new_kernel_t rnk; + + rnk = (relocate_new_kernel_t) image->page_list[1]; + (*rnk)(image->indirection_page, (unsigned long)image->page_list, + image->start_address, (unsigned long)cpu_has_pae); } -#endif /* __X86_32_KEXEC_H__ */ +#endif /* __X86_KEXEC_X86_32_H__ */ /* * Local variables: _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Magnus Damm
2006-Nov-08 13:49 UTC
[Xen-devel] [PATCH 04/04] Kexec / Kdump: x86_64 specific code
[PATCH 04/04] Kexec / Kdump: x86_64 specific code This patch contains the x86_64 implementation of Kexec / Kdump for Xen. Signed-Off-By: Magnus Damm <magnus@valinux.co.jp> --- Applies on top of xen-unstable-12281. buildconfigs/linux-defconfig_xen_x86_64 | 1 linux-2.6-xen-sparse/arch/x86_64/Kconfig | 2 linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c | 6 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c | 13 linux-2.6-xen-sparse/include/asm-x86_64/kexec-xen.h | 30 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h | 7 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/ptrace.h | 2 patches/linux-2.6.16.29/git-4b...1f.patch | 375 +++ patches/linux-2.6.16.29/linux-2.6.16.29-crash-xen-x86_64.patch | 35 patches/linux-2.6.16.29/linux-2.6.19-rc1-kexec...code-x86_64.patch | 161 + patches/linux-2.6.16.29/linux-2.6.19-rc1-kexec-xen-x86_64.patch | 162 + patches/linux-2.6.16.29/series | 4 xen/arch/x86/x86_64/entry.S | 2 xen/include/asm-x86/x86_64/elf.h | 58 + xen/include/asm-x86/x86_64/kexec.h | 23 15 files changed, 872 insertions(+), 9 deletions(-) --- 0002/buildconfigs/linux-defconfig_xen_x86_64 +++ work/buildconfigs/linux-defconfig_xen_x86_64 @@ -138,6 +138,7 @@ CONFIG_SWIOTLB=y CONFIG_PHYSICAL_START=0x100000 CONFIG_SECCOMP=y CONFIG_HZ_100=y +CONFIG_KEXEC=y # CONFIG_HZ_250 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=100 --- 0001/linux-2.6-xen-sparse/arch/x86_64/Kconfig +++ work/linux-2.6-xen-sparse/arch/x86_64/Kconfig @@ -435,7 +435,7 @@ config X86_MCE_AMD config KEXEC bool "kexec system call (EXPERIMENTAL)" - depends on EXPERIMENTAL && !X86_64_XEN + depends on EXPERIMENTAL && !XEN_UNPRIVILEGED_GUEST help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot --- 0001/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c +++ work/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c @@ -260,7 +260,11 @@ void __init e820_reserve_resources(struc request_resource(res, &data_resource); #endif #ifdef CONFIG_KEXEC - request_resource(res, &crashk_res); + if (crashk_res.start != crashk_res.end) + request_resource(res, &crashk_res); +#ifdef CONFIG_XEN + xen_machine_kexec_register_resources(res); +#endif #endif } } --- 0001/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c +++ work/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c @@ -80,6 +80,10 @@ #include <asm/mach-xen/setup_arch_post.h> #include <xen/interface/memory.h> +#ifdef CONFIG_XEN +#include <xen/interface/kexec.h> +#endif + extern unsigned long start_pfn; extern struct edid_info edid_info; @@ -450,6 +454,7 @@ static __init void parse_cmdline_early ( * after a kernel panic. */ else if (!memcmp(from, "crashkernel=", 12)) { +#ifndef CONFIG_XEN unsigned long size, base; size = memparse(from+12, &from); if (*from == ''@'') { @@ -460,6 +465,10 @@ static __init void parse_cmdline_early ( crashk_res.start = base; crashk_res.end = base + size - 1; } +#else + printk("Ignoring crashkernel command line, " + "parameter will be supplied by xen\n"); +#endif } #endif @@ -812,11 +821,15 @@ void __init setup_arch(char **cmdline_p) #endif #endif /* !CONFIG_XEN */ #ifdef CONFIG_KEXEC +#ifdef CONFIG_XEN + xen_machine_kexec_setup_resources(); +#else if (crashk_res.start != crashk_res.end) { reserve_bootmem(crashk_res.start, crashk_res.end - crashk_res.start + 1); } #endif +#endif paging_init(); #ifdef CONFIG_X86_LOCAL_APIC --- /dev/null +++ work/linux-2.6-xen-sparse/include/asm-x86_64/kexec-xen.h @@ -0,0 +1,30 @@ +#ifndef _X86_64_KEXEC_XEN_H +#define _X86_64_KEXEC_XEN_H + +/* Kexec needs to know about the actual physical addresss. + * But in xen, on some architectures, a physical address is a + * pseudo-physical addresss. */ + +#ifdef CONFIG_XEN +#define kexec_page_to_pfn(page) pfn_to_mfn(page_to_pfn(page)) +#define kexec_pfn_to_page(pfn) pfn_to_page(mfn_to_pfn(pfn)) +#define kexec_virt_to_phys(addr) virt_to_machine(addr) +#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr)) +#else +#define kexec_page_to_pfn(page) page_to_pfn(page) +#define kexec_pfn_to_page(pfn) pfn_to_page(pfn) +#define kexec_virt_to_phys(addr) virt_to_phys(addr) +#define kexec_phys_to_virt(addr) phys_to_virt(addr) +#endif + +#endif /* _X86_64_KEXEC_XEN_H */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ --- 0001/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h +++ work/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h @@ -396,4 +396,11 @@ HYPERVISOR_xenoprof_op( return _hypercall2(int, xenoprof_op, op, arg); } +static inline int +HYPERVISOR_kexec_op( + unsigned long op, void *args) +{ + return _hypercall2(int, kexec_op, op, args); +} + #endif /* __HYPERCALL_H__ */ --- 0001/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/ptrace.h +++ work/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/ptrace.h @@ -90,6 +90,8 @@ extern unsigned long profile_pc(struct p #define profile_pc(regs) instruction_pointer(regs) #endif +#include <linux/compiler.h> + void signal_fault(struct pt_regs *regs, void __user *frame, char *where); struct task_struct; --- /dev/null +++ work/patches/linux-2.6.16.29/git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch @@ -0,0 +1,375 @@ +From: Magnus Damm <magnus@valinux.co.jp> +Date: Tue, 26 Sep 2006 08:52:38 +0000 (+0200) +Subject: [PATCH] Avoid overwriting the current pgd (V4, x86_64) +X-Git-Tag: v2.6.19-rc1 +X-Git-Url: http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f + +[PATCH] Avoid overwriting the current pgd (V4, x86_64) + +kexec: Avoid overwriting the current pgd (V4, x86_64) + +This patch upgrades the x86_64-specific kexec code to avoid overwriting the +current pgd. Overwriting the current pgd is bad when CONFIG_CRASH_DUMP is used +to start a secondary kernel that dumps the memory of the previous kernel. + +The code introduces a new set of page tables. These tables are used to provide +an executable identity mapping without overwriting the current pgd. + +Signed-off-by: Magnus Damm <magnus@valinux.co.jp> +Signed-off-by: Andi Kleen <ak@suse.de> +--- + +--- a/arch/x86_64/kernel/machine_kexec.c ++++ b/arch/x86_64/kernel/machine_kexec.c +@@ -15,6 +15,15 @@ + #include <asm/mmu_context.h> + #include <asm/io.h> + ++#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) ++static u64 kexec_pgd[512] PAGE_ALIGNED; ++static u64 kexec_pud0[512] PAGE_ALIGNED; ++static u64 kexec_pmd0[512] PAGE_ALIGNED; ++static u64 kexec_pte0[512] PAGE_ALIGNED; ++static u64 kexec_pud1[512] PAGE_ALIGNED; ++static u64 kexec_pmd1[512] PAGE_ALIGNED; ++static u64 kexec_pte1[512] PAGE_ALIGNED; ++ + static void init_level2_page(pmd_t *level2p, unsigned long addr) + { + unsigned long end_addr; +@@ -144,32 +153,19 @@ static void load_segments(void) + ); + } + +-typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page, +- unsigned long control_code_buffer, +- unsigned long start_address, +- unsigned long pgtable) ATTRIB_NORET; +- +-extern const unsigned char relocate_new_kernel[]; +-extern const unsigned long relocate_new_kernel_size; +- + int machine_kexec_prepare(struct kimage *image) + { +- unsigned long start_pgtable, control_code_buffer; ++ unsigned long start_pgtable; + int result; + + /* Calculate the offsets */ + start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; +- control_code_buffer = start_pgtable + PAGE_SIZE; + + /* Setup the identity mapped 64bit page table */ + result = init_pgtable(image, start_pgtable); + if (result) + return result; + +- /* Place the code in the reboot code buffer */ +- memcpy(__va(control_code_buffer), relocate_new_kernel, +- relocate_new_kernel_size); +- + return 0; + } + +@@ -184,28 +180,34 @@ void machine_kexec_cleanup(struct kimage + */ + NORET_TYPE void machine_kexec(struct kimage *image) + { +- unsigned long page_list; +- unsigned long control_code_buffer; +- unsigned long start_pgtable; +- relocate_new_kernel_t rnk; ++ unsigned long page_list[PAGES_NR]; ++ void *control_page; + + /* Interrupts aren''t acceptable while we reboot */ + local_irq_disable(); + +- /* Calculate the offsets */ +- page_list = image->head; +- start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; +- control_code_buffer = start_pgtable + PAGE_SIZE; ++ control_page = page_address(image->control_code_page) + PAGE_SIZE; ++ memcpy(control_page, relocate_kernel, PAGE_SIZE); + +- /* Set the low half of the page table to my identity mapped +- * page table for kexec. Leave the high half pointing at the +- * kernel pages. Don''t bother to flush the global pages +- * as that will happen when I fully switch to my identity mapped +- * page table anyway. +- */ +- memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2); +- __flush_tlb(); ++ page_list[PA_CONTROL_PAGE] = __pa(control_page); ++ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; ++ page_list[PA_PGD] = __pa(kexec_pgd); ++ page_list[VA_PGD] = (unsigned long)kexec_pgd; ++ page_list[PA_PUD_0] = __pa(kexec_pud0); ++ page_list[VA_PUD_0] = (unsigned long)kexec_pud0; ++ page_list[PA_PMD_0] = __pa(kexec_pmd0); ++ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; ++ page_list[PA_PTE_0] = __pa(kexec_pte0); ++ page_list[VA_PTE_0] = (unsigned long)kexec_pte0; ++ page_list[PA_PUD_1] = __pa(kexec_pud1); ++ page_list[VA_PUD_1] = (unsigned long)kexec_pud1; ++ page_list[PA_PMD_1] = __pa(kexec_pmd1); ++ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; ++ page_list[PA_PTE_1] = __pa(kexec_pte1); ++ page_list[VA_PTE_1] = (unsigned long)kexec_pte1; + ++ page_list[PA_TABLE_PAGE] ++ (unsigned long)__pa(page_address(image->control_code_page)); + + /* The segment registers are funny things, they have both a + * visible and an invisible part. Whenever the visible part is +@@ -222,9 +224,10 @@ NORET_TYPE void machine_kexec(struct kim + */ + set_gdt(phys_to_virt(0),0); + set_idt(phys_to_virt(0),0); ++ + /* now call it */ +- rnk = (relocate_new_kernel_t) control_code_buffer; +- (*rnk)(page_list, control_code_buffer, image->start, start_pgtable); ++ relocate_kernel((unsigned long)image->head, (unsigned long)page_list, ++ image->start); + } + + /* crashkernel=size@addr specifies the location to reserve for +--- a/arch/x86_64/kernel/relocate_kernel.S ++++ b/arch/x86_64/kernel/relocate_kernel.S +@@ -7,31 +7,169 @@ + */ + + #include <linux/linkage.h> ++#include <asm/page.h> ++#include <asm/kexec.h> + +- /* +- * Must be relocatable PIC code callable as a C function, that once +- * it starts can not use the previous processes stack. +- */ +- .globl relocate_new_kernel ++/* ++ * Must be relocatable PIC code callable as a C function ++ */ ++ ++#define PTR(x) (x << 3) ++#define PAGE_ALIGNED (1 << PAGE_SHIFT) ++#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ ++ ++ .text ++ .align PAGE_ALIGNED + .code64 ++ .globl relocate_kernel ++relocate_kernel: ++ /* %rdi indirection_page ++ * %rsi page_list ++ * %rdx start address ++ */ ++ ++ /* map the control page at its virtual address */ ++ ++ movq $0x0000ff8000000000, %r10 /* mask */ ++ mov $(39 - 3), %cl /* bits to shift */ ++ movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ ++ ++ movq %r11, %r9 ++ andq %r10, %r9 ++ shrq %cl, %r9 ++ ++ movq PTR(VA_PGD)(%rsi), %r8 ++ addq %r8, %r9 ++ movq PTR(PA_PUD_0)(%rsi), %r8 ++ orq $PAGE_ATTR, %r8 ++ movq %r8, (%r9) ++ ++ shrq $9, %r10 ++ sub $9, %cl ++ ++ movq %r11, %r9 ++ andq %r10, %r9 ++ shrq %cl, %r9 ++ ++ movq PTR(VA_PUD_0)(%rsi), %r8 ++ addq %r8, %r9 ++ movq PTR(PA_PMD_0)(%rsi), %r8 ++ orq $PAGE_ATTR, %r8 ++ movq %r8, (%r9) ++ ++ shrq $9, %r10 ++ sub $9, %cl ++ ++ movq %r11, %r9 ++ andq %r10, %r9 ++ shrq %cl, %r9 ++ ++ movq PTR(VA_PMD_0)(%rsi), %r8 ++ addq %r8, %r9 ++ movq PTR(PA_PTE_0)(%rsi), %r8 ++ orq $PAGE_ATTR, %r8 ++ movq %r8, (%r9) ++ ++ shrq $9, %r10 ++ sub $9, %cl ++ ++ movq %r11, %r9 ++ andq %r10, %r9 ++ shrq %cl, %r9 ++ ++ movq PTR(VA_PTE_0)(%rsi), %r8 ++ addq %r8, %r9 ++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 ++ orq $PAGE_ATTR, %r8 ++ movq %r8, (%r9) ++ ++ /* identity map the control page at its physical address */ ++ ++ movq $0x0000ff8000000000, %r10 /* mask */ ++ mov $(39 - 3), %cl /* bits to shift */ ++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ ++ ++ movq %r11, %r9 ++ andq %r10, %r9 ++ shrq %cl, %r9 ++ ++ movq PTR(VA_PGD)(%rsi), %r8 ++ addq %r8, %r9 ++ movq PTR(PA_PUD_1)(%rsi), %r8 ++ orq $PAGE_ATTR, %r8 ++ movq %r8, (%r9) ++ ++ shrq $9, %r10 ++ sub $9, %cl ++ ++ movq %r11, %r9 ++ andq %r10, %r9 ++ shrq %cl, %r9 ++ ++ movq PTR(VA_PUD_1)(%rsi), %r8 ++ addq %r8, %r9 ++ movq PTR(PA_PMD_1)(%rsi), %r8 ++ orq $PAGE_ATTR, %r8 ++ movq %r8, (%r9) ++ ++ shrq $9, %r10 ++ sub $9, %cl ++ ++ movq %r11, %r9 ++ andq %r10, %r9 ++ shrq %cl, %r9 ++ ++ movq PTR(VA_PMD_1)(%rsi), %r8 ++ addq %r8, %r9 ++ movq PTR(PA_PTE_1)(%rsi), %r8 ++ orq $PAGE_ATTR, %r8 ++ movq %r8, (%r9) ++ ++ shrq $9, %r10 ++ sub $9, %cl ++ ++ movq %r11, %r9 ++ andq %r10, %r9 ++ shrq %cl, %r9 ++ ++ movq PTR(VA_PTE_1)(%rsi), %r8 ++ addq %r8, %r9 ++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 ++ orq $PAGE_ATTR, %r8 ++ movq %r8, (%r9) ++ + relocate_new_kernel: +- /* %rdi page_list +- * %rsi reboot_code_buffer ++ /* %rdi indirection_page ++ * %rsi page_list + * %rdx start address +- * %rcx page_table +- * %r8 arg5 +- * %r9 arg6 + */ + + /* zero out flags, and disable interrupts */ + pushq $0 + popfq + +- /* set a new stack at the bottom of our page... */ +- lea 4096(%rsi), %rsp ++ /* get physical address of control page now */ ++ /* this is impossible after page table switch */ ++ movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 ++ ++ /* get physical address of page table now too */ ++ movq PTR(PA_TABLE_PAGE)(%rsi), %rcx ++ ++ /* switch to new set of page tables */ ++ movq PTR(PA_PGD)(%rsi), %r9 ++ movq %r9, %cr3 ++ ++ /* setup a new stack at the end of the physical control page */ ++ lea 4096(%r8), %rsp ++ ++ /* jump to identity mapped page */ ++ addq $(identity_mapped - relocate_kernel), %r8 ++ pushq %r8 ++ ret + +- /* store the parameters back on the stack */ +- pushq %rdx /* store the start address */ ++identity_mapped: ++ /* store the start address on the stack */ ++ pushq %rdx + + /* Set cr0 to a known state: + * 31 1 == Paging enabled +@@ -136,8 +274,3 @@ relocate_new_kernel: + xorq %r15, %r15 + + ret +-relocate_new_kernel_end: +- +- .globl relocate_new_kernel_size +-relocate_new_kernel_size: +- .quad relocate_new_kernel_end - relocate_new_kernel +--- a/include/asm-x86_64/kexec.h ++++ b/include/asm-x86_64/kexec.h +@@ -1,6 +1,27 @@ + #ifndef _X86_64_KEXEC_H + #define _X86_64_KEXEC_H + ++#define PA_CONTROL_PAGE 0 ++#define VA_CONTROL_PAGE 1 ++#define PA_PGD 2 ++#define VA_PGD 3 ++#define PA_PUD_0 4 ++#define VA_PUD_0 5 ++#define PA_PMD_0 6 ++#define VA_PMD_0 7 ++#define PA_PTE_0 8 ++#define VA_PTE_0 9 ++#define PA_PUD_1 10 ++#define VA_PUD_1 11 ++#define PA_PMD_1 12 ++#define VA_PMD_1 13 ++#define PA_PTE_1 14 ++#define VA_PTE_1 15 ++#define PA_TABLE_PAGE 16 ++#define PAGES_NR 17 ++ ++#ifndef __ASSEMBLY__ ++ + #include <linux/string.h> + + #include <asm/page.h> +@@ -64,4 +85,12 @@ static inline void crash_setup_regs(stru + newregs->rip = (unsigned long)current_text_addr(); + } + } ++ ++NORET_TYPE void ++relocate_kernel(unsigned long indirection_page, ++ unsigned long page_list, ++ unsigned long start_address) ATTRIB_NORET; ++ ++#endif /* __ASSEMBLY__ */ ++ + #endif /* _X86_64_KEXEC_H */ --- /dev/null +++ work/patches/linux-2.6.16.29/linux-2.6.16.29-crash-xen-x86_64.patch @@ -0,0 +1,35 @@ +--- 0001/arch/x86_64/kernel/crash.c ++++ work/arch/x86_64/kernel/crash.c 2006-10-26 15:44:27.000000000 +0900 +@@ -92,6 +92,7 @@ static void crash_save_self(struct pt_re + crash_save_this_cpu(regs, cpu); + } + ++#ifndef CONFIG_XEN + #ifdef CONFIG_SMP + static atomic_t waiting_for_crash_ipi; + +@@ -156,6 +157,7 @@ static void nmi_shootdown_cpus(void) + /* There are no cpus to shootdown */ + } + #endif ++#endif /* CONFIG_XEN */ + + void machine_crash_shutdown(struct pt_regs *regs) + { +@@ -173,6 +175,8 @@ void machine_crash_shutdown(struct pt_re + + /* Make a note of crashing cpu. Will be used in NMI callback.*/ + crashing_cpu = smp_processor_id(); ++ ++#ifndef CONFIG_XEN + nmi_shootdown_cpus(); + + if(cpu_has_apic) +@@ -181,6 +185,6 @@ void machine_crash_shutdown(struct pt_re + #if defined(CONFIG_X86_IO_APIC) + disable_IO_APIC(); + #endif +- ++#endif /* CONFIG_XEN */ + crash_save_self(regs); + } --- /dev/null +++ work/patches/linux-2.6.16.29/linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch @@ -0,0 +1,161 @@ +kexec: Move asm segment handling code to the assembly file (x86_64) + +This patch moves the idt, gdt, and segment handling code from machine_kexec.c +to relocate_kernel.S. The main reason behind this move is to avoid code +duplication in the Xen hypervisor. With this patch all code required to kexec +is put on the control page. + +On top of that this patch also counts as a cleanup - I think it is much +nicer to write assembly directly in assembly files than wrap inline assembly +in C functions for no apparent reason. + +Signed-off-by: Magnus Damm <magnus@valinux.co.jp> +--- + + Applies to 2.6.19-rc1. + + machine_kexec.c | 58 ----------------------------------------------------- + relocate_kernel.S | 50 +++++++++++++++++++++++++++++++++++++++++---- + 2 files changed, 45 insertions(+), 63 deletions(-) + +--- 0002/arch/x86_64/kernel/machine_kexec.c ++++ work/arch/x86_64/kernel/machine_kexec.c 2006-10-05 16:15:49.000000000 +0900 +@@ -112,47 +112,6 @@ static int init_pgtable(struct kimage *i + return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT); + } + +-static void set_idt(void *newidt, u16 limit) +-{ +- struct desc_ptr curidt; +- +- /* x86-64 supports unaliged loads & stores */ +- curidt.size = limit; +- curidt.address = (unsigned long)newidt; +- +- __asm__ __volatile__ ( +- "lidtq %0\n" +- : : "m" (curidt) +- ); +-}; +- +- +-static void set_gdt(void *newgdt, u16 limit) +-{ +- struct desc_ptr curgdt; +- +- /* x86-64 supports unaligned loads & stores */ +- curgdt.size = limit; +- curgdt.address = (unsigned long)newgdt; +- +- __asm__ __volatile__ ( +- "lgdtq %0\n" +- : : "m" (curgdt) +- ); +-}; +- +-static void load_segments(void) +-{ +- __asm__ __volatile__ ( +- "\tmovl %0,%%ds\n" +- "\tmovl %0,%%es\n" +- "\tmovl %0,%%ss\n" +- "\tmovl %0,%%fs\n" +- "\tmovl %0,%%gs\n" +- : : "a" (__KERNEL_DS) : "memory" +- ); +-} +- + int machine_kexec_prepare(struct kimage *image) + { + unsigned long start_pgtable; +@@ -209,23 +168,6 @@ NORET_TYPE void machine_kexec(struct kim + page_list[PA_TABLE_PAGE] + (unsigned long)__pa(page_address(image->control_code_page)); + +- /* The segment registers are funny things, they have both a +- * visible and an invisible part. Whenever the visible part is +- * set to a specific selector, the invisible part is loaded +- * with from a table in memory. At no other time is the +- * descriptor table in memory accessed. +- * +- * I take advantage of this here by force loading the +- * segments, before I zap the gdt with an invalid value. +- */ +- load_segments(); +- /* The gdt & idt are now invalid. +- * If you want to load them you must set up your own idt & gdt. +- */ +- set_gdt(phys_to_virt(0),0); +- set_idt(phys_to_virt(0),0); +- +- /* now call it */ + relocate_kernel((unsigned long)image->head, (unsigned long)page_list, + image->start); + } +--- 0002/arch/x86_64/kernel/relocate_kernel.S ++++ work/arch/x86_64/kernel/relocate_kernel.S 2006-10-05 16:18:07.000000000 +0900 +@@ -159,13 +159,39 @@ relocate_new_kernel: + movq PTR(PA_PGD)(%rsi), %r9 + movq %r9, %cr3 + ++ /* setup idt */ ++ movq %r8, %rax ++ addq $(idt_80 - relocate_kernel), %rax ++ lidtq (%rax) ++ ++ /* setup gdt */ ++ movq %r8, %rax ++ addq $(gdt - relocate_kernel), %rax ++ movq %r8, %r9 ++ addq $((gdt_80 - relocate_kernel) + 2), %r9 ++ movq %rax, (%r9) ++ ++ movq %r8, %rax ++ addq $(gdt_80 - relocate_kernel), %rax ++ lgdtq (%rax) ++ ++ /* setup data segment registers */ ++ xorl %eax, %eax ++ movl %eax, %ds ++ movl %eax, %es ++ movl %eax, %fs ++ movl %eax, %gs ++ movl %eax, %ss ++ + /* setup a new stack at the end of the physical control page */ + lea 4096(%r8), %rsp + +- /* jump to identity mapped page */ +- addq $(identity_mapped - relocate_kernel), %r8 +- pushq %r8 +- ret ++ /* load new code segment and jump to identity mapped page */ ++ movq %r8, %rax ++ addq $(identity_mapped - relocate_kernel), %rax ++ pushq $(gdt_cs - gdt) ++ pushq %rax ++ lretq + + identity_mapped: + /* store the start address on the stack */ +@@ -272,5 +298,19 @@ identity_mapped: + xorq %r13, %r13 + xorq %r14, %r14 + xorq %r15, %r15 +- + ret ++ ++ .align 16 ++gdt: ++ .quad 0x0000000000000000 /* NULL descriptor */ ++gdt_cs: ++ .quad 0x00af9a000000ffff ++gdt_end: ++ ++gdt_80: ++ .word gdt_end - gdt - 1 /* limit */ ++ .quad 0 /* base - filled in by code above */ ++ ++idt_80: ++ .word 0 /* limit */ ++ .quad 0 /* base */ --- /dev/null +++ work/patches/linux-2.6.16.29/linux-2.6.19-rc1-kexec-xen-x86_64.patch @@ -0,0 +1,162 @@ +--- 0006/arch/x86_64/kernel/machine_kexec.c ++++ work/arch/x86_64/kernel/machine_kexec.c 2006-10-06 15:36:16.000000000 +0900 +@@ -24,6 +24,104 @@ static u64 kexec_pud1[512] PAGE_ALIGNED; + static u64 kexec_pmd1[512] PAGE_ALIGNED; + static u64 kexec_pte1[512] PAGE_ALIGNED; + ++#ifdef CONFIG_XEN ++ ++/* In the case of Xen, override hypervisor functions to be able to create ++ * a regular identity mapping page table... ++ */ ++ ++#include <xen/interface/kexec.h> ++#include <xen/interface/memory.h> ++ ++#define x__pmd(x) ((pmd_t) { (x) } ) ++#define x__pud(x) ((pud_t) { (x) } ) ++#define x__pgd(x) ((pgd_t) { (x) } ) ++ ++#define x_pmd_val(x) ((x).pmd) ++#define x_pud_val(x) ((x).pud) ++#define x_pgd_val(x) ((x).pgd) ++ ++static inline void x_set_pmd(pmd_t *dst, pmd_t val) ++{ ++ x_pmd_val(*dst) = x_pmd_val(val); ++} ++ ++static inline void x_set_pud(pud_t *dst, pud_t val) ++{ ++ x_pud_val(*dst) = phys_to_machine(x_pud_val(val)); ++} ++ ++static inline void x_pud_clear (pud_t *pud) ++{ ++ x_pud_val(*pud) = 0; ++} ++ ++static inline void x_set_pgd(pgd_t *dst, pgd_t val) ++{ ++ x_pgd_val(*dst) = phys_to_machine(x_pgd_val(val)); ++} ++ ++static inline void x_pgd_clear (pgd_t * pgd) ++{ ++ x_pgd_val(*pgd) = 0; ++} ++ ++#define X__PAGE_KERNEL_LARGE_EXEC \ ++ _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_PSE ++#define X_KERNPG_TABLE _PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY ++ ++#define __ma(x) (pfn_to_mfn(__pa((x)) >> PAGE_SHIFT) << PAGE_SHIFT) ++ ++#if PAGES_NR > KEXEC_XEN_NO_PAGES ++#error PAGES_NR is greater than KEXEC_XEN_NO_PAGES - Xen support will break ++#endif ++ ++#if PA_CONTROL_PAGE != 0 ++#error PA_CONTROL_PAGE is non zero - Xen support will break ++#endif ++ ++void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage *image) ++{ ++ void *control_page; ++ void *table_page; ++ ++ memset(xki->page_list, 0, sizeof(xki->page_list)); ++ ++ control_page = page_address(image->control_code_page) + PAGE_SIZE; ++ memcpy(control_page, relocate_kernel, PAGE_SIZE); ++ ++ table_page = page_address(image->control_code_page); ++ ++ xki->page_list[PA_CONTROL_PAGE] = __ma(control_page); ++ xki->page_list[PA_TABLE_PAGE] = __ma(table_page); ++ ++ xki->page_list[PA_PGD] = __ma(kexec_pgd); ++ xki->page_list[PA_PUD_0] = __ma(kexec_pud0); ++ xki->page_list[PA_PUD_1] = __ma(kexec_pud1); ++ xki->page_list[PA_PMD_0] = __ma(kexec_pmd0); ++ xki->page_list[PA_PMD_1] = __ma(kexec_pmd1); ++ xki->page_list[PA_PTE_0] = __ma(kexec_pte0); ++ xki->page_list[PA_PTE_1] = __ma(kexec_pte1); ++} ++ ++#else /* CONFIG_XEN */ ++ ++#define x__pmd(x) __pmd(x) ++#define x__pud(x) __pud(x) ++#define x__pgd(x) __pgd(x) ++ ++#define x_set_pmd(x, y) set_pmd(x, y) ++#define x_set_pud(x, y) set_pud(x, y) ++#define x_set_pgd(x, y) set_pgd(x, y) ++ ++#define x_pud_clear(x) pud_clear(x) ++#define x_pgd_clear(x) pgd_clear(x) ++ ++#define X__PAGE_KERNEL_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC ++#define X_KERNPG_TABLE _KERNPG_TABLE ++ ++#endif /* CONFIG_XEN */ ++ + static void init_level2_page(pmd_t *level2p, unsigned long addr) + { + unsigned long end_addr; +@@ -31,7 +129,7 @@ static void init_level2_page(pmd_t *leve + addr &= PAGE_MASK; + end_addr = addr + PUD_SIZE; + while (addr < end_addr) { +- set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); ++ x_set_pmd(level2p++, x__pmd(addr | X__PAGE_KERNEL_LARGE_EXEC)); + addr += PMD_SIZE; + } + } +@@ -56,12 +154,12 @@ static int init_level3_page(struct kimag + } + level2p = (pmd_t *)page_address(page); + init_level2_page(level2p, addr); +- set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE)); ++ x_set_pud(level3p++, x__pud(__pa(level2p) | X_KERNPG_TABLE)); + addr += PUD_SIZE; + } + /* clear the unused entries */ + while (addr < end_addr) { +- pud_clear(level3p++); ++ x_pud_clear(level3p++); + addr += PUD_SIZE; + } + out: +@@ -92,12 +190,12 @@ static int init_level4_page(struct kimag + if (result) { + goto out; + } +- set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); ++ x_set_pgd(level4p++, x__pgd(__pa(level3p) | X_KERNPG_TABLE)); + addr += PGDIR_SIZE; + } + /* clear the unused entries */ + while (addr < end_addr) { +- pgd_clear(level4p++); ++ x_pgd_clear(level4p++); + addr += PGDIR_SIZE; + } + out: +@@ -108,8 +206,14 @@ out: + static int init_pgtable(struct kimage *image, unsigned long start_pgtable) + { + pgd_t *level4p; ++ unsigned long x_end_pfn = end_pfn; ++ ++#ifdef CONFIG_XEN ++ x_end_pfn = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL); ++#endif ++ + level4p = (pgd_t *)__va(start_pgtable); +- return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT); ++ return init_level4_page(image, level4p, 0, x_end_pfn << PAGE_SHIFT); + } + + int machine_kexec_prepare(struct kimage *image) --- 0005/patches/linux-2.6.16.29/series +++ work/patches/linux-2.6.16.29/series @@ -5,6 +5,10 @@ git-3566561bfadffcb5dbc85d576be80c0dbf2c linux-2.6.19-rc1-kexec-move_segment_code-i386.patch linux-2.6.19-rc1-kexec-xen-i386.patch linux-2.6.16.29-crash-xen-i386.patch +git-4bfaaef01a1badb9e8ffb0c0a37cd2379008d21f.patch +linux-2.6.19-rc1-kexec-move_segment_code-x86_64.patch +linux-2.6.19-rc1-kexec-xen-x86_64.patch +linux-2.6.16.29-crash-xen-x86_64.patch blktap-aio-16_03_06.patch device_bind.patch fix-hz-suspend.patch --- 0001/xen/arch/x86/x86_64/entry.S +++ work/xen/arch/x86/x86_64/entry.S @@ -573,6 +573,7 @@ ENTRY(hypercall_table) .quad do_hvm_op .quad do_sysctl /* 35 */ .quad do_domctl + .quad do_kexec_op .rept NR_hypercalls-((.-hypercall_table)/8) .quad do_ni_hypercall .endr @@ -615,6 +616,7 @@ ENTRY(hypercall_args_table) .byte 2 /* do_hvm_op */ .byte 1 /* do_sysctl */ /* 35 */ .byte 1 /* do_domctl */ + .byte 2 /* do_kexec */ .rept NR_hypercalls-(.-hypercall_args_table) .byte 0 /* do_ni_hypercall */ .endr --- 0004/xen/include/asm-x86/x86_64/elf.h +++ work/xen/include/asm-x86/x86_64/elf.h @@ -1,15 +1,67 @@ #ifndef __X86_64_ELF_H__ #define __X86_64_ELF_H__ -#include <xen/lib.h> /* for printk() used in stub */ +#include <asm/processor.h> typedef struct { - unsigned long dummy; + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; + unsigned long rbp; + unsigned long rbx; + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long rax; + unsigned long rcx; + unsigned long rdx; + unsigned long rsi; + unsigned long rdi; + unsigned long orig_rax; + unsigned long rip; + unsigned long cs; + unsigned long eflags; + unsigned long rsp; + unsigned long ss; + unsigned long thread_fs; + unsigned long thread_gs; + unsigned long ds; + unsigned long es; + unsigned long fs; + unsigned long gs; } ELF_Gregset; extern inline void elf_core_save_regs(ELF_Gregset *dst) { - printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); + asm volatile("movq %%r15,%0" : "=m"(dst->r15)); + asm volatile("movq %%r14,%0" : "=m"(dst->r14)); + asm volatile("movq %%r13,%0" : "=m"(dst->r13)); + asm volatile("movq %%r12,%0" : "=m"(dst->r12)); + asm volatile("movq %%rbp,%0" : "=m"(dst->rbp)); + asm volatile("movq %%rbx,%0" : "=m"(dst->rbx)); + asm volatile("movq %%r11,%0" : "=m"(dst->r11)); + asm volatile("movq %%r10,%0" : "=m"(dst->r10)); + asm volatile("movq %%r9,%0" : "=m"(dst->r9)); + asm volatile("movq %%r8,%0" : "=m"(dst->r8)); + asm volatile("movq %%rax,%0" : "=m"(dst->rax)); + asm volatile("movq %%rcx,%0" : "=m"(dst->rcx)); + asm volatile("movq %%rdx,%0" : "=m"(dst->rdx)); + asm volatile("movq %%rsi,%0" : "=m"(dst->rsi)); + asm volatile("movq %%rdi,%0" : "=m"(dst->rdi)); + /* orig_rax not filled in for now */ + dst->rip = (unsigned long)current_text_addr(); + asm volatile("movl %%cs, %%eax;" :"=a"(dst->cs)); + asm volatile("pushfq; popq %0" :"=m"(dst->eflags)); + asm volatile("movq %%rsp,%0" : "=m"(dst->rsp)); + asm volatile("movl %%ss, %%eax;" :"=a"(dst->ss)); + /* thread_fs not filled in for now */ + /* thread_gs not filled in for now */ + asm volatile("movl %%ds, %%eax;" :"=a"(dst->ds)); + asm volatile("movl %%es, %%eax;" :"=a"(dst->es)); + asm volatile("movl %%fs, %%eax;" :"=a"(dst->fs)); + asm volatile("movl %%gs, %%eax;" :"=a"(dst->gs)); } #endif /* __X86_64_ELF_H__ */ --- 0004/xen/include/asm-x86/x86_64/kexec.h +++ work/xen/include/asm-x86/x86_64/kexec.h @@ -1,14 +1,29 @@ +/****************************************************************************** + * kexec.h + * + * Based heavily on machine_kexec.c and kexec.h from Linux 2.6.19-rc1 + * + */ + #ifndef __X86_64_KEXEC_H__ #define __X86_64_KEXEC_H__ - -#include <xen/lib.h> /* for printk() used in stub */ + #include <xen/types.h> -#include <public/xen.h> #include <xen/kexec.h> +#include <asm/fixmap.h> + +typedef void (*relocate_new_kernel_t)( + unsigned long indirection_page, + unsigned long page_list, + unsigned long start_address); static inline void machine_kexec(xen_kexec_image_t *image) { - printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); + relocate_new_kernel_t rnk; + + rnk = (relocate_new_kernel_t) image->page_list[1]; + (*rnk)(image->indirection_page, (unsigned long)image->page_list, + image->start_address); } #endif /* __X86_64_KEXEC_H__ */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel