Sheng Yang
2010-Mar-05 06:07 UTC
[Xen-devel] [PATCH 0/7][v6] PV extension of HVM (Hybrid) for Xen
Hi, Jeremy Here is the sixth version of patchset to enable PV extension of HVM support in Linux kernel of Xen. The PV extension of HVM is started from real mode like HVM guest, but also with a a range of PV features(e.g. PV timer, event channel, as well as PV drivers). So guest with this feature can takes the advantages of both H/W virtualization and Para-Virtualization. The first two of the patchset imported several header file from Jeremy''s tree and Xen tree, respect to Jeremy and Keir''s works. The whole patchset based on Linux upstream. You need a line like: cpuid = [ ''0x40000002:edx=0x3'' ] in HVM configuration file to expose hybrid feature to guest, and CONFIG_XEN in the guest kernel configuration file to enable the hybrid support. And the compiled image can be used as native/pv domU/hvm guest/pv feature hvm kernel. Current the patchset support x86_64 only. Current base is on Linux 2.6.33. Change from v5: Update the comments from Jeremy. Change from v4: 1. Add a new CONFIG_XEN_HVM_PV to enable the feature in kernel 2. Separate the related code form enlighted.c to hvmpv.c 3. Separate the feature "PV clocksource" from evtchn. Now we can support HVM guest with PV clocksource. This would be enabled by default. 4. Drop PV halt and pv drivers in this edition. We can work on that later. 5. Update the patchset following Jeremy''s comments. Change from v3: 1. Rebase to Linux 2.6.33 release. 2. change the name to "PV extension of HVM" 3. Some minor coding polishing. Change from v2: 1. change the name "hybrid" to "PV featured HVM". 2. Unified the PV driver''s judgement of xen_domain() to xen_evtchn_enabled(). 3. Move the function(evtchn) initialize hypercall near the real enabling place, rather than a unified place before function enabled. 4. Remove the reserved E820 region for grant table. Use QEmu Xen platform device''s MMIO instead. The major change from v1: 1. SMP support. 2. Modify the entrance point to avoid most of genernic kernel modification. 3. Binding PV timer with event channel mechanism. -- regards Yang, Sheng arch/x86/include/asm/xen/cpuid.h | 73 +++++++++++ arch/x86/include/asm/xen/hypercall.h | 6 + arch/x86/include/asm/xen/hypervisor.h | 6 + arch/x86/kernel/setup.c | 4 + arch/x86/xen/Kconfig | 4 + arch/x86/xen/Makefile | 1 + arch/x86/xen/enlighten.c | 6 +- arch/x86/xen/hvmpv.c | 214 +++++++++++++++++++++++++++++++++ arch/x86/xen/irq.c | 28 +++++ arch/x86/xen/smp.c | 76 +++++++++++- arch/x86/xen/time.c | 12 ++- arch/x86/xen/xen-ops.h | 17 +++ drivers/block/xen-blkfront.c | 2 +- drivers/input/xen-kbdfront.c | 2 +- drivers/net/xen-netfront.c | 2 +- drivers/video/xen-fbfront.c | 2 +- drivers/xen/events.c | 74 +++++++++++- drivers/xen/grant-table.c | 2 +- drivers/xen/xenbus/xenbus_probe.c | 4 +- include/xen/events.h | 4 + include/xen/hvm.h | 28 +++++ include/xen/interface/hvm/hvm_op.h | 80 ++++++++++++ include/xen/interface/hvm/params.h | 111 +++++++++++++++++ include/xen/interface/xen.h | 6 +- include/xen/xen.h | 11 ++ 25 files changed, 753 insertions(+), 22 deletions(-) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Add support for hvm_op hypercall. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Signed-off-by: Sheng Yang <sheng@linux.intel.com> --- arch/x86/include/asm/xen/hypercall.h | 6 ++ include/xen/hvm.h | 23 +++++++ include/xen/interface/hvm/hvm_op.h | 72 ++++++++++++++++++++++ include/xen/interface/hvm/params.h | 111 ++++++++++++++++++++++++++++++++++ 4 files changed, 212 insertions(+), 0 deletions(-) create mode 100644 include/xen/hvm.h create mode 100644 include/xen/interface/hvm/hvm_op.h create mode 100644 include/xen/interface/hvm/params.h diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 9c371e4..47c2ebb 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -417,6 +417,12 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg) return _hypercall2(int, nmi_op, op, arg); } +static inline unsigned long __must_check +HYPERVISOR_hvm_op(int op, void *arg) +{ + return _hypercall2(unsigned long, hvm_op, op, arg); +} + static inline void MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) { diff --git a/include/xen/hvm.h b/include/xen/hvm.h new file mode 100644 index 0000000..4ea8887 --- /dev/null +++ b/include/xen/hvm.h @@ -0,0 +1,23 @@ +/* Simple wrappers around HVM functions */ +#ifndef XEN_HVM_H__ +#define XEN_HVM_H__ + +#include <xen/interface/hvm/params.h> + +static inline unsigned long hvm_get_parameter(int idx) +{ + struct xen_hvm_param xhv; + int r; + + xhv.domid = DOMID_SELF; + xhv.index = idx; + r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); + if (r < 0) { + printk(KERN_ERR "cannot get hvm parameter %d: %d.\n", + idx, r); + return 0; + } + return xhv.value; +} + +#endif /* XEN_HVM_H__ */ diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h new file mode 100644 index 0000000..7c74ba4 --- /dev/null +++ b/include/xen/interface/hvm/hvm_op.h @@ -0,0 +1,72 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ +#define __XEN_PUBLIC_HVM_HVM_OP_H__ + +/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */ +#define HVMOP_set_param 0 +#define HVMOP_get_param 1 +struct xen_hvm_param { + domid_t domid; /* IN */ + uint32_t index; /* IN */ + uint64_t value; /* IN/OUT */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param); + +/* Set the logical level of one of a domain''s PCI INTx wires. */ +#define HVMOP_set_pci_intx_level 2 +struct xen_hvm_set_pci_intx_level { + /* Domain to be updated. */ + domid_t domid; + /* PCI INTx identification in PCI topology (domain:bus:device:intx). */ + uint8_t domain, bus, device, intx; + /* Assertion level (0 = unasserted, 1 = asserted). */ + uint8_t level; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_pci_intx_level); + +/* Set the logical level of one of a domain''s ISA IRQ wires. */ +#define HVMOP_set_isa_irq_level 3 +struct xen_hvm_set_isa_irq_level { + /* Domain to be updated. */ + domid_t domid; + /* ISA device identification, by ISA IRQ (0-15). */ + uint8_t isa_irq; + /* Assertion level (0 = unasserted, 1 = asserted). */ + uint8_t level; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_isa_irq_level); + +#define HVMOP_set_pci_link_route 4 +struct xen_hvm_set_pci_link_route { + /* Domain to be updated. */ + domid_t domid; + /* PCI link identifier (0-3). */ + uint8_t link; + /* ISA IRQ (1-15), or 0 (disable link). */ + uint8_t isa_irq; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_pci_link_route); + +/* Flushes all VCPU TLBs: @arg must be NULL. */ +#define HVMOP_flush_tlbs 5 + +#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h new file mode 100644 index 0000000..15d828f --- /dev/null +++ b/include/xen/interface/hvm/params.h @@ -0,0 +1,111 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_PARAMS_H__ +#define __XEN_PUBLIC_HVM_PARAMS_H__ + +#include "hvm_op.h" + +/* + * Parameter space for HVMOP_{set,get}_param. + */ + +/* + * How should CPU0 event-channel notifications be delivered? + * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt). + * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: + * Domain = val[47:32], Bus = val[31:16], + * DevFn = val[15: 8], IntX = val[ 1: 0] + * If val == 0 then CPU0 event-channel notifications are not delivered. + */ +#define HVM_PARAM_CALLBACK_IRQ 0 + +/* + * These are not used by Xen. They are here for convenience of HVM-guest + * xenbus implementations. + */ +#define HVM_PARAM_STORE_PFN 1 +#define HVM_PARAM_STORE_EVTCHN 2 + +#define HVM_PARAM_PAE_ENABLED 4 + +#define HVM_PARAM_IOREQ_PFN 5 + +#define HVM_PARAM_BUFIOREQ_PFN 6 + +#ifdef __ia64__ + +#define HVM_PARAM_NVRAM_FD 7 +#define HVM_PARAM_VHPT_SIZE 8 +#define HVM_PARAM_BUFPIOREQ_PFN 9 + +#elif defined(__i386__) || defined(__x86_64__) + +/* Expose Viridian interfaces to this HVM guest? */ +#define HVM_PARAM_VIRIDIAN 9 + +#endif + +/* + * Set mode for virtual timers (currently x86 only): + * delay_for_missed_ticks (default): + * Do not advance a vcpu''s time beyond the correct delivery time for + * interrupts that have been missed due to preemption. Deliver missed + * interrupts when the vcpu is rescheduled and advance the vcpu''s virtual + * time stepwise for each one. + * no_delay_for_missed_ticks: + * As above, missed interrupts are delivered, but guest time always tracks + * wallclock (i.e., real) time while doing so. + * no_missed_ticks_pending: + * No missed interrupts are held pending. Instead, to ensure ticks are + * delivered at some non-zero rate, if we detect missed ticks then the + * internal tick alarm is not disabled if the VCPU is preempted during the + * next tick period. + * one_missed_tick_pending: + * Missed interrupts are collapsed together and delivered as one ''late tick''. + * Guest time always tracks wallclock (i.e., real) time. + */ +#define HVM_PARAM_TIMER_MODE 10 +#define HVMPTM_delay_for_missed_ticks 0 +#define HVMPTM_no_delay_for_missed_ticks 1 +#define HVMPTM_no_missed_ticks_pending 2 +#define HVMPTM_one_missed_tick_pending 3 + +/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ +#define HVM_PARAM_HPET_ENABLED 11 + +/* Identity-map page directory used by Intel EPT when CR0.PG=0. */ +#define HVM_PARAM_IDENT_PT 12 + +/* Device Model domain, defaults to 0. */ +#define HVM_PARAM_DM_DOMAIN 13 + +/* ACPI S state: currently support S0 and S3 on x86. */ +#define HVM_PARAM_ACPI_S_STATE 14 + +/* TSS used on Intel when CR0.PE=0. */ +#define HVM_PARAM_VM86_TSS 15 + +/* Boolean: Enable aligning all periodic vpts to reduce interrupts */ +#define HVM_PARAM_VPT_ALIGN 16 + +#define HVM_NR_PARAMS 17 + +#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ -- 1.5.4.5 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
From: Keir Fraser <keir.fraser@citrix.com> Which would be used by CPUID detection later Signed-off-by: Keir Fraser <keir.fraser@citrix.com> Signed-off-by: Sheng Yang <sheng@linux.intel.com> --- arch/x86/include/asm/xen/cpuid.h | 68 ++++++++++++++++++++++++++++++++++++++ 1 files changed, 68 insertions(+), 0 deletions(-) create mode 100644 arch/x86/include/asm/xen/cpuid.h diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h new file mode 100644 index 0000000..8787f03 --- /dev/null +++ b/arch/x86/include/asm/xen/cpuid.h @@ -0,0 +1,68 @@ +/****************************************************************************** + * arch/include/asm/xen/cpuid.h + * + * CPUID interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2007 Citrix Systems, Inc. + * + * Authors: + * Keir Fraser <keir.fraser@citrix.com> + */ + +#ifndef __ASM_X86_XEN_CPUID_H__ +#define __ASM_X86_XEN_CPUID_H__ + +/* Xen identification leaves start at 0x40000000. */ +#define XEN_CPUID_FIRST_LEAF 0x40000000 +#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i)) + +/* + * Leaf 1 (0x40000000) + * EAX: Largest Xen-information leaf. All leaves up to an including @EAX + * are supported by the Xen host. + * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification + * of a Xen host. + */ +#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */ +#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */ +#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */ + +/* + * Leaf 2 (0x40000001) + * EAX[31:16]: Xen major version. + * EAX[15: 0]: Xen minor version. + * EBX-EDX: Reserved (currently all zeroes). + */ + +/* + * Leaf 3 (0x40000002) + * EAX: Number of hypercall transfer pages. This register is always guaranteed + * to specify one hypercall page. + * EBX: Base address of Xen-specific MSRs. + * ECX: Features 1. Unused bits are set to zero. + * EDX: Features 2. Unused bits are set to zero. + */ + +/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */ +#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0 +#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0) + +#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */ -- 1.5.4.5 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Sheng Yang
2010-Mar-05 06:07 UTC
[Xen-devel] [PATCH 3/7] xen: Make pv drivers only work with xen_pv_domain()
Otherwise they would still try to enable with HVM domain type. Signed-off-by: Sheng Yang <sheng@linux.intel.com> --- drivers/block/xen-blkfront.c | 2 +- drivers/input/xen-kbdfront.c | 2 +- drivers/net/xen-netfront.c | 2 +- drivers/video/xen-fbfront.c | 2 +- drivers/xen/grant-table.c | 2 +- drivers/xen/xenbus/xenbus_probe.c | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 05a31e5..32e28bd 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1068,7 +1068,7 @@ static struct xenbus_driver blkfront = { static int __init xlblk_init(void) { - if (!xen_domain()) + if (!xen_pv_domain()) return -ENODEV; if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c index c721c0a..0c3f4ca 100644 --- a/drivers/input/xen-kbdfront.c +++ b/drivers/input/xen-kbdfront.c @@ -338,7 +338,7 @@ static struct xenbus_driver xenkbd_driver = { static int __init xenkbd_init(void) { - if (!xen_domain()) + if (!xen_pv_domain()) return -ENODEV; /* Nothing to do if running in dom0. */ diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index a869b45..e3ae126 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1804,7 +1804,7 @@ static struct xenbus_driver netfront_driver = { static int __init netif_init(void) { - if (!xen_domain()) + if (!xen_pv_domain()) return -ENODEV; if (xen_initial_domain()) diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c index 603598f..3360470 100644 --- a/drivers/video/xen-fbfront.c +++ b/drivers/video/xen-fbfront.c @@ -683,7 +683,7 @@ static struct xenbus_driver xenfb_driver = { static int __init xenfb_init(void) { - if (!xen_domain()) + if (!xen_pv_domain()) return -ENODEV; /* Nothing to do if running in dom0. */ diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 4c6c0bd..ade7f3d 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -510,7 +510,7 @@ static int __devinit gnttab_init(void) unsigned int max_nr_glist_frames, nr_glist_frames; unsigned int nr_init_grefs; - if (!xen_domain()) + if (!xen_pv_domain()) return -ENODEV; nr_grant_frames = 1; diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 2f7aaa9..a5712cd 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -786,7 +786,7 @@ static int __init xenbus_probe_init(void) DPRINTK(""); err = -ENODEV; - if (!xen_domain()) + if (!xen_pv_domain()) goto out_error; /* Register ourselves with the kernel bus subsystem */ @@ -922,7 +922,7 @@ static void wait_for_devices(struct xenbus_driver *xendrv) struct device_driver *drv = xendrv ? &xendrv->driver : NULL; unsigned int seconds_waited = 0; - if (!ready_to_wait_for_devices || !xen_domain()) + if (!ready_to_wait_for_devices || !xen_pv_domain()) return; while (exists_connecting_device(drv)) { -- 1.5.4.5 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Sheng Yang
2010-Mar-05 06:07 UTC
[Xen-devel] [PATCH 4/7] xen/hvm: Xen PV extension of HVM initialization
The PV extended HVM(once known as Hybrid) is started from real mode like HVM guest, but also with a component based PV feature selection(e.g. PV halt, PV timer, event channel, then PV drivers). So guest can takes the advantages of both H/W virtualization and Para-Virtualization. This patch introduced the PV extension of HVM guest initialization. Guest would detect the capability using CPUID 0x40000002.edx, then call HVMOP_enable_pv hypercall to enable pv support in hypervisor. Signed-off-by: Sheng Yang <sheng@linux.intel.com> Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com> --- arch/x86/include/asm/xen/cpuid.h | 5 ++ arch/x86/xen/Kconfig | 4 + arch/x86/xen/Makefile | 1 + arch/x86/xen/hvmpv.c | 137 ++++++++++++++++++++++++++++++++++++ include/xen/interface/hvm/hvm_op.h | 8 ++ include/xen/xen.h | 11 +++ 6 files changed, 166 insertions(+), 0 deletions(-) create mode 100644 arch/x86/xen/hvmpv.c diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h index 8787f03..a93c851 100644 --- a/arch/x86/include/asm/xen/cpuid.h +++ b/arch/x86/include/asm/xen/cpuid.h @@ -65,4 +65,9 @@ #define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0 #define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0) +#define _XEN_CPUID_FEAT2_HVM_PV 0 +#define XEN_CPUID_FEAT2_HVM_PV (1u<<0) +#define _XEN_CPUID_FEAT2_HVM_PV_EVTCHN 1 +#define XEN_CPUID_FEAT2_HVM_PV_EVTCHN (1u<<1) + #endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */ diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index b83e119..74fc233 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -36,3 +36,7 @@ config XEN_DEBUG_FS help Enable statistics output and various tuning options in debugfs. Enabling this option may incur a significant performance overhead. + +config XEN_HVM_PV + def_bool y + depends on XEN && X86_64 diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3bb4fc2..73bd5db 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -17,4 +17,5 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o +obj-$(CONFIG_XEN_HVM_PV) += hvmpv.o diff --git a/arch/x86/xen/hvmpv.c b/arch/x86/xen/hvmpv.c new file mode 100644 index 0000000..540eef4 --- /dev/null +++ b/arch/x86/xen/hvmpv.c @@ -0,0 +1,137 @@ +/* + * PV extension of HVM implementation. + * + * Sheng Yang <sheng@linux.intel.com>, Intel Corporation, 2010 + * + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/module.h> + +#include <xen/xen.h> +#include <xen/features.h> +#include <xen/events.h> +#include <xen/hvm.h> +#include <xen/interface/xen.h> +#include <xen/interface/version.h> +#include <xen/interface/memory.h> + +#include <asm/xen/cpuid.h> +#include <asm/xen/hypercall.h> +#include <asm/xen/hypervisor.h> + +#include "xen-ops.h" + +u32 xen_hvm_pv_features; +EXPORT_SYMBOL_GPL(xen_hvm_pv_features); + +static const struct pv_info xen_hvm_pv_info __initdata = { + .paravirt_enabled = 1, + .shared_kernel_pmd = 0, + .kernel_rpl = 0, + .name = "Xen", +}; + +static void __init xen_hvm_pv_banner(void) +{ + unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL); + struct xen_extraversion extra; + HYPERVISOR_xen_version(XENVER_extraversion, &extra); + + printk(KERN_INFO "Booting PV extended HVM kernel on %s\n", + pv_info.name); + printk(KERN_INFO "Xen version: %d.%d%s\n", + version >> 16, version & 0xffff, extra.extraversion); +} + +static int __init xen_para_available(void) +{ + uint32_t eax, ebx, ecx, edx; + cpuid(XEN_CPUID_LEAF(0), &eax, &ebx, &ecx, &edx); + + if (ebx == XEN_CPUID_SIGNATURE_EBX && + ecx == XEN_CPUID_SIGNATURE_ECX && + edx == XEN_CPUID_SIGNATURE_EDX && + ((eax - XEN_CPUID_LEAF(0)) >= 2)) + return 1; + + return 0; +} + +static int __init enable_hvm_pv(u64 flags) +{ + struct xen_hvm_pv_type a; + + a.domid = DOMID_SELF; + a.flags = flags; + return HYPERVISOR_hvm_op(HVMOP_enable_pv, &a); +} + +static int __init init_hvm_pv_info(void) +{ + uint32_t ecx, edx, pages, msr; + u64 pfn; + + if (!xen_para_available()) + return -EINVAL; + + cpuid(XEN_CPUID_LEAF(2), &pages, &msr, &ecx, &edx); + + /* Check if hvm_pv mode is supported */ + if (!(edx & XEN_CPUID_FEAT2_HVM_PV)) + return -ENODEV; + + if (pages < 1) + return -ENODEV; + + pfn = __pa(hypercall_page); + if (wrmsr_safe(msr, (u32)pfn, ((u64)pfn) >> 32)) + return -ENODEV; + + return 0; +} + +static struct shared_info shared_info_page __page_aligned_bss; + +static int __init init_shared_info(void) +{ + struct xen_add_to_physmap xatp; + + xatp.domid = DOMID_SELF; + xatp.idx = 0; + xatp.space = XENMAPSPACE_shared_info; + xatp.gpfn = __pa(&shared_info_page) >> PAGE_SHIFT; + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) + return -EINVAL; + + HYPERVISOR_shared_info = (struct shared_info *)&shared_info_page; + + per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; + + return 0; +} + +void __init xen_guest_init(void) +{ + int r; + + /* Ensure the we won''t confused with others */ + if (xen_domain()) + return; + + r = init_hvm_pv_info(); + if (r < 0) + return; + + r = init_shared_info(); + if (r < 0) + return; + + xen_setup_features(); + + x86_init.oem.banner = xen_hvm_pv_banner; + pv_info = xen_hvm_pv_info; + + xen_domain_type = XEN_HVM_DOMAIN; +} diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h index 7c74ba4..3f0b118 100644 --- a/include/xen/interface/hvm/hvm_op.h +++ b/include/xen/interface/hvm/hvm_op.h @@ -69,4 +69,12 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_pci_link_route); /* Flushes all VCPU TLBs: @arg must be NULL. */ #define HVMOP_flush_tlbs 5 +#define HVMOP_enable_pv 9 +struct xen_hvm_pv_type { + domid_t domid; + uint32_t flags; +#define HVM_PV_CLOCK (1ull<<0) +#define HVM_PV_EVTCHN (1ull<<1) +}; + #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ diff --git a/include/xen/xen.h b/include/xen/xen.h index a164024..ce2a256 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -19,6 +19,17 @@ extern enum xen_domain_type xen_domain_type; #define xen_hvm_domain() (xen_domain() && \ xen_domain_type == XEN_HVM_DOMAIN) +#ifdef CONFIG_XEN_HVM_PV + +#define XEN_HVM_PV_EVTCHN_ENABLED (1u << 1) +extern u32 xen_hvm_pv_features; + +#define xen_hvm_pv_evtchn_enabled() \ + (xen_hvm_pv_features & XEN_HVM_PV_EVTCHN_ENABLED) +#else +#define xen_hvm_pv_evtchn_enabled() 0 +#endif /* CONFIG_XEN_HVM_PV */ + #ifdef CONFIG_XEN_DOM0 #include <xen/interface/xen.h> #include <asm/xen/hypervisor.h> -- 1.5.4.5 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Sheng Yang
2010-Mar-05 06:07 UTC
[Xen-devel] [PATCH 5/7] x86/xen: The entrance for PV extension of HVM
xen_guest_init() would setup the environment. Signed-off-by: Sheng Yang <sheng@linux.intel.com> --- arch/x86/include/asm/xen/hypervisor.h | 6 ++++++ arch/x86/kernel/setup.c | 4 ++++ 2 files changed, 10 insertions(+), 0 deletions(-) diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 396ff4c..13e089a 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -37,4 +37,10 @@ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; +#ifdef CONFIG_XEN_HVM_PV +void __init xen_guest_init(void); +#else +static inline void xen_guest_init(void) {}; +#endif + #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5d9e40c..1cc4786 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -113,6 +113,8 @@ #endif #include <asm/mce.h> +#include <asm/xen/hypervisor.h> + /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. * The direct mapping extends to max_pfn_mapped, so that we can directly access @@ -740,6 +742,8 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.arch_setup(); + xen_guest_init(); + setup_memory_map(); parse_setup_data(); /* update the e820_saved too */ -- 1.5.4.5 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Sheng Yang
2010-Mar-05 06:07 UTC
[Xen-devel] [PATCH 6/7] xen: Enable PV clocksource for HVM
And enable it by default in PV extended HVM guest. Signed-off-by: Sheng Yang <sheng@linux.intel.com> --- arch/x86/xen/hvmpv.c | 18 ++++++++++++++++++ arch/x86/xen/time.c | 12 +++++++++++- arch/x86/xen/xen-ops.h | 1 + 3 files changed, 30 insertions(+), 1 deletions(-) diff --git a/arch/x86/xen/hvmpv.c b/arch/x86/xen/hvmpv.c index 540eef4..305dcca 100644 --- a/arch/x86/xen/hvmpv.c +++ b/arch/x86/xen/hvmpv.c @@ -43,6 +43,7 @@ static void __init xen_hvm_pv_banner(void) pv_info.name); printk(KERN_INFO "Xen version: %d.%d%s\n", version >> 16, version & 0xffff, extra.extraversion); + printk(KERN_INFO "PV feature: PV clocksource enabled\n"); } static int __init xen_para_available(void) @@ -112,6 +113,20 @@ static int __init init_shared_info(void) return 0; } +static void __init init_pv_clocksource(void) +{ + if (enable_hvm_pv(HVM_PV_CLOCK)) + BUG(); + + pv_time_ops.sched_clock = xen_sched_clock; + + x86_platform.calibrate_tsc = xen_tsc_khz; + x86_platform.get_wallclock = xen_get_wallclock; + x86_platform.set_wallclock = xen_set_wallclock; + + xen_register_clocksource(); +} + void __init xen_guest_init(void) { int r; @@ -133,5 +148,8 @@ void __init xen_guest_init(void) x86_init.oem.banner = xen_hvm_pv_banner; pv_info = xen_hvm_pv_info; + /* PV clocksource would be enabled by default */ + init_pv_clocksource(); + xen_domain_type = XEN_HVM_DOMAIN; } diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 0d3f07c..06b3e72 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -472,11 +472,21 @@ void xen_timer_resume(void) } } +static bool xen_clocksource_enabled; + +void xen_register_clocksource(void) +{ + if (!xen_clocksource_enabled) { + clocksource_register(&xen_clocksource); + xen_clocksource_enabled = 1; + } +} + __init void xen_time_init(void) { int cpu = smp_processor_id(); - clocksource_register(&xen_clocksource); + xen_register_clocksource(); if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { /* Successfully turned off 100Hz tick, so we have the diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f9153a3..d56b660 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -48,6 +48,7 @@ cycle_t xen_clocksource_read(void); void xen_setup_cpu_clockevents(void); unsigned long xen_tsc_khz(void); void __init xen_time_init(void); +void xen_register_clocksource(void); unsigned long xen_get_wallclock(void); int xen_set_wallclock(unsigned long time); unsigned long long xen_sched_clock(void); -- 1.5.4.5 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Sheng Yang
2010-Mar-05 06:07 UTC
[Xen-devel] [PATCH 7/7] xen: Enable event channel of PV extension of HVM
We mapped each IOAPIC pin to a VIRQ, so that we can deliver interrupt through these VIRQs. We used X86_PLATFORM_IPI_VECTOR as the notification vector for hypervisor to notify guest about the event. The patch also enabled SMP support, then we can support IPI through evtchn as well. When this feature is enabled, we would relay on Xen PV timer for clockevent, rather than other hardware emulated ones. Then we don''t use IOAPIC/LAPIC, eliminated the overhead brought by unnecessary VMExit caused by LAPIC. Signed-off-by: Sheng Yang <sheng@linux.intel.com> --- arch/x86/xen/enlighten.c | 6 ++-- arch/x86/xen/hvmpv.c | 59 +++++++++++++++++++++++++++++++++ arch/x86/xen/irq.c | 28 ++++++++++++++++ arch/x86/xen/smp.c | 76 ++++++++++++++++++++++++++++++++++++++++-- arch/x86/xen/xen-ops.h | 16 +++++++++ drivers/xen/events.c | 74 ++++++++++++++++++++++++++++++++++++++--- include/xen/events.h | 4 ++ include/xen/hvm.h | 5 +++ include/xen/interface/xen.h | 6 +++- 9 files changed, 260 insertions(+), 14 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 36daccb..2d60e70 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -717,7 +717,7 @@ static u32 xen_safe_apic_wait_icr_idle(void) return 0; } -static void set_xen_basic_apic_ops(void) +void xen_set_basic_apic_ops(void) { apic->read = xen_apic_read; apic->write = xen_apic_write; @@ -1026,7 +1026,7 @@ static void xen_crash_shutdown(struct pt_regs *regs) xen_reboot(SHUTDOWN_crash); } -static const struct machine_ops __initdata xen_machine_ops = { +const struct machine_ops __initdata xen_machine_ops = { .restart = xen_restart, .halt = xen_machine_halt, .power_off = xen_machine_halt, @@ -1116,7 +1116,7 @@ asmlinkage void __init xen_start_kernel(void) /* * set up the basic apic ops. */ - set_xen_basic_apic_ops(); + xen_set_basic_apic_ops(); #endif if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { diff --git a/arch/x86/xen/hvmpv.c b/arch/x86/xen/hvmpv.c index 305dcca..673b036 100644 --- a/arch/x86/xen/hvmpv.c +++ b/arch/x86/xen/hvmpv.c @@ -17,6 +17,7 @@ #include <xen/interface/version.h> #include <xen/interface/memory.h> +#include <asm/reboot.h> #include <asm/xen/cpuid.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> @@ -44,6 +45,8 @@ static void __init xen_hvm_pv_banner(void) printk(KERN_INFO "Xen version: %d.%d%s\n", version >> 16, version & 0xffff, extra.extraversion); printk(KERN_INFO "PV feature: PV clocksource enabled\n"); + if (xen_hvm_pv_evtchn_enabled()) + printk(KERN_INFO "PV feature: Event channel enabled\n"); } static int __init xen_para_available(void) @@ -83,6 +86,9 @@ static int __init init_hvm_pv_info(void) if (!(edx & XEN_CPUID_FEAT2_HVM_PV)) return -ENODEV; + if (edx & XEN_CPUID_FEAT2_HVM_PV_EVTCHN) + xen_hvm_pv_features |= XEN_HVM_PV_EVTCHN_ENABLED; + if (pages < 1) return -ENODEV; @@ -127,9 +133,35 @@ static void __init init_pv_clocksource(void) xen_register_clocksource(); } +static int set_callback_via(uint64_t via) +{ + struct xen_hvm_param a; + + a.domid = DOMID_SELF; + a.index = HVM_PARAM_CALLBACK_IRQ; + a.value = via; + return HYPERVISOR_hvm_op(HVMOP_set_param, &a); +} + +void do_hvm_pv_evtchn_intr(void) +{ + per_cpu(irq_count, smp_processor_id())++; + xen_hvm_evtchn_do_upcall(get_irq_regs()); + per_cpu(irq_count, smp_processor_id())--; +} + +#ifdef CONFIG_X86_LOCAL_APIC +static void xen_hvm_pv_evtchn_apic_write(u32 reg, u32 val) +{ + /* The only one reached here should be EOI */ + WARN_ON(reg != APIC_EOI); +} +#endif + void __init xen_guest_init(void) { int r; + uint64_t callback_via; /* Ensure the we won''t confused with others */ if (xen_domain()) @@ -152,4 +184,31 @@ void __init xen_guest_init(void) init_pv_clocksource(); xen_domain_type = XEN_HVM_DOMAIN; + + if (xen_hvm_pv_evtchn_enabled()) { + xen_hvm_pv_init_irq_ops(); + + x86_init.timers.timer_init = xen_time_init; + x86_init.timers.setup_percpu_clockev = x86_init_noop; + x86_cpuinit.setup_percpu_clockev = x86_init_noop; + + pv_apic_ops.startup_ipi_hook = paravirt_nop; +#ifdef CONFIG_X86_LOCAL_APIC + /* + * set up the basic apic ops. + */ + xen_set_basic_apic_ops(); + apic->write = xen_hvm_pv_evtchn_apic_write; +#endif + + callback_via = HVM_CALLBACK_VECTOR(X86_PLATFORM_IPI_VECTOR); + set_callback_via(callback_via); + + x86_platform_ipi_callback = do_hvm_pv_evtchn_intr; + + disable_acpi(); + + xen_hvm_pv_smp_init(); + machine_ops = xen_machine_ops; + } } diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 9d30105..e325640 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -2,6 +2,7 @@ #include <asm/x86_init.h> +#include <xen/xen.h> #include <xen/interface/xen.h> #include <xen/interface/sched.h> #include <xen/interface/vcpu.h> @@ -131,3 +132,30 @@ void __init xen_init_irq_ops() pv_irq_ops = xen_irq_ops; x86_init.irqs.intr_init = xen_init_IRQ; } + +#ifdef CONFIG_XEN_HVM_PV +static void xen_hvm_pv_evtchn_disable(void) +{ + native_irq_disable(); + xen_irq_disable(); +} +PV_CALLEE_SAVE_REGS_THUNK(xen_hvm_pv_evtchn_disable); + +static void xen_hvm_pv_evtchn_enable(void) +{ + native_irq_enable(); + xen_irq_enable(); +} +PV_CALLEE_SAVE_REGS_THUNK(xen_hvm_pv_evtchn_enable); + +void __init xen_hvm_pv_init_irq_ops(void) +{ + if (xen_hvm_pv_evtchn_enabled()) { + pv_irq_ops.irq_disable + PV_CALLEE_SAVE(xen_hvm_pv_evtchn_disable); + pv_irq_ops.irq_enable + PV_CALLEE_SAVE(xen_hvm_pv_evtchn_enable); + x86_init.irqs.intr_init = xen_hvm_pv_evtchn_init_IRQ; + } +} +#endif diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 563d205..a9fd12d 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -15,18 +15,24 @@ #include <linux/sched.h> #include <linux/err.h> #include <linux/smp.h> +#include <linux/nmi.h> #include <asm/paravirt.h> #include <asm/desc.h> #include <asm/pgtable.h> #include <asm/cpu.h> +#include <asm/trampoline.h> +#include <asm/tlbflush.h> +#include <asm/mtrr.h> #include <xen/interface/xen.h> #include <xen/interface/vcpu.h> #include <asm/xen/interface.h> #include <asm/xen/hypercall.h> +#include <asm/xen/hypervisor.h> +#include <xen/xen.h> #include <xen/page.h> #include <xen/events.h> @@ -63,8 +69,12 @@ static __cpuinit void cpu_bringup(void) touch_softlockup_watchdog(); preempt_disable(); - xen_enable_sysenter(); - xen_enable_syscall(); + if (xen_pv_domain()) { + xen_enable_sysenter(); + xen_enable_syscall(); + } + + set_mtrr_aps_delayed_init(); cpu = smp_processor_id(); smp_store_cpu_info(cpu); @@ -171,7 +181,8 @@ static void __init xen_smp_prepare_boot_cpu(void) /* We''ve switched to the "real" per-cpu gdt, so make sure the old memory can be recycled */ - make_lowmem_page_readwrite(xen_initial_gdt); + if (xen_feature(XENFEAT_writable_descriptor_tables)) + make_lowmem_page_readwrite(xen_initial_gdt); xen_setup_vcpu_info_placement(); } @@ -282,6 +293,39 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) return 0; } +static __cpuinit int +hvm_pv_cpu_initialize_context(unsigned int cpu, struct task_struct *idle) +{ + struct vcpu_guest_context *ctxt; + unsigned long start_ip; + + if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) + return 0; + + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); + if (ctxt == NULL) + return -ENOMEM; + + early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); + initial_code = (unsigned long)cpu_bringup_and_idle; + stack_start.sp = (void *) idle->thread.sp; + + /* start_ip had better be page-aligned! */ + start_ip = setup_trampoline(); + + /* only start_ip is what we want */ + ctxt->flags = VGCF_HVM_GUEST; + ctxt->user_regs.eip = start_ip; + + printk(KERN_INFO "Booting processor %d ip 0x%lx\n", cpu, start_ip); + + if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) + BUG(); + + kfree(ctxt); + return 0; +} + static int __cpuinit xen_cpu_up(unsigned int cpu) { struct task_struct *idle = idle_task(cpu); @@ -292,6 +336,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) irq_ctx_init(cpu); #else clear_tsk_thread_flag(idle, TIF_FORK); + initial_gs = per_cpu_offset(cpu); per_cpu(kernel_stack, cpu) (unsigned long)task_stack_page(idle) - KERNEL_STACK_OFFSET + THREAD_SIZE; @@ -305,7 +350,13 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) /* make sure interrupts start blocked */ per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; - rc = cpu_initialize_context(cpu, idle); + if (xen_pv_domain()) + rc = cpu_initialize_context(cpu, idle); + else if (xen_hvm_pv_evtchn_enabled()) + rc = hvm_pv_cpu_initialize_context(cpu, idle); + else + BUG(); + if (rc) return rc; @@ -480,3 +531,20 @@ void __init xen_smp_init(void) xen_fill_possible_map(); xen_init_spinlocks(); } + +#ifdef CONFIG_XEN_HVM_PV +static void xen_hvm_pv_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) +{ + /* TODO Make it more specific */ + flush_tlb_all(); +} + +void __init xen_hvm_pv_smp_init(void) +{ + if (xen_hvm_pv_evtchn_enabled()) { + smp_ops = xen_smp_ops; + pv_mmu_ops.flush_tlb_others = xen_hvm_pv_flush_tlb_others; + } +} +#endif diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index d56b660..4de9874 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -52,6 +52,12 @@ void xen_register_clocksource(void); unsigned long xen_get_wallclock(void); int xen_set_wallclock(unsigned long time); unsigned long long xen_sched_clock(void); +void xen_set_basic_apic_ops(void); + +#ifdef CONFIG_XEN_HVM_PV +void __init xen_hvm_pv_init_irq_ops(void); +void __init xen_hvm_pv_evtchn_init_IRQ(void); +#endif /* CONFIG_XEN_HVM_PV */ irqreturn_t xen_debug_interrupt(int irq, void *dev_id); @@ -62,9 +68,17 @@ void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP void xen_smp_init(void); +#ifdef CONFIG_XEN_HVM_PV +void xen_hvm_pv_smp_init(void); +#endif /* CONFIG_XEN_HVM_PV */ + extern cpumask_var_t xen_cpu_initialized_map; #else static inline void xen_smp_init(void) {} +#ifdef CONFIG_XEN_HVM_PV +static inline void xen_hvm_pv_smp_init(void) {} +#endif /* CONFIG_XEN_HVM_PV */ + #endif #ifdef CONFIG_PARAVIRT_SPINLOCKS @@ -102,4 +116,6 @@ void xen_sysret32(void); void xen_sysret64(void); void xen_adjust_exception_frame(void); +extern const struct machine_ops xen_machine_ops; + #endif /* XEN_OPS_H */ diff --git a/drivers/xen/events.c b/drivers/xen/events.c index ce602dd..e4b9de6 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -32,14 +32,17 @@ #include <asm/irq.h> #include <asm/idle.h> #include <asm/sync_bitops.h> +#include <asm/desc.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> +#include <xen/xen.h> #include <xen/xen-ops.h> #include <xen/events.h> #include <xen/interface/xen.h> #include <xen/interface/event_channel.h> + /* * This lock protects updates to the following mapping and reference-count * arrays. The lock does not need to be acquired to read the mapping tables. @@ -616,17 +619,13 @@ static DEFINE_PER_CPU(unsigned, xed_nesting_count); * a bitset of words which contain pending event bits. The second * level is a bitset of pending events themselves. */ -void xen_evtchn_do_upcall(struct pt_regs *regs) +void __xen_evtchn_do_upcall(struct pt_regs *regs) { int cpu = get_cpu(); - struct pt_regs *old_regs = set_irq_regs(regs); struct shared_info *s = HYPERVISOR_shared_info; struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); unsigned count; - exit_idle(); - irq_enter(); - do { unsigned long pending_words; @@ -662,10 +661,25 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) } while(count != 1); out: + put_cpu(); +} + +void xen_evtchn_do_upcall(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + exit_idle(); + irq_enter(); + + __xen_evtchn_do_upcall(regs); + irq_exit(); set_irq_regs(old_regs); +} - put_cpu(); +void xen_hvm_evtchn_do_upcall(struct pt_regs *regs) +{ + __xen_evtchn_do_upcall(regs); } /* Rebind a new event channel to an existing irq. */ @@ -944,3 +958,51 @@ void __init xen_init_IRQ(void) irq_ctx_init(smp_processor_id()); } + +void __init xen_hvm_pv_evtchn_init_IRQ(void) +{ + int i; + + xen_init_IRQ(); + for (i = 0; i < NR_IRQS_LEGACY; i++) { + struct evtchn_bind_virq bind_virq; + struct irq_desc *desc = irq_to_desc(i); + int virq, evtchn; + + virq = i + VIRQ_EMUL_PIN_START; + bind_virq.virq = virq; + bind_virq.vcpu = 0; + + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq) != 0) + BUG(); + + evtchn = bind_virq.port; + evtchn_to_irq[evtchn] = i; + irq_info[i] = mk_virq_info(evtchn, virq); + + desc->status = IRQ_DISABLED; + desc->action = NULL; + desc->depth = 1; + + /* + * 16 old-style INTA-cycle interrupts: + */ + set_irq_chip_and_handler_name(i, &xen_dynamic_chip, + handle_level_irq, "event"); + } + + /* + * Cover the whole vector space, no vector can escape + * us. (some of these will be overridden and become + * ''special'' SMP interrupts) + */ + for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + if (vector != IA32_SYSCALL_VECTOR) + set_intr_gate(vector, interrupt[i]); + } + + /* generic IPI for platform specific use, now used for HVM evtchn */ + alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); +} diff --git a/include/xen/events.h b/include/xen/events.h index e68d59a..b9fbb3b 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -56,4 +56,8 @@ void xen_poll_irq(int irq); /* Determine the IRQ which is bound to an event channel */ unsigned irq_from_evtchn(unsigned int evtchn); +#ifdef CONFIG_XEN_HVM_PV +void xen_hvm_evtchn_do_upcall(struct pt_regs *regs); +#endif + #endif /* _XEN_EVENTS_H */ diff --git a/include/xen/hvm.h b/include/xen/hvm.h index 4ea8887..c66d788 100644 --- a/include/xen/hvm.h +++ b/include/xen/hvm.h @@ -20,4 +20,9 @@ static inline unsigned long hvm_get_parameter(int idx) return xhv.value; } +#define HVM_CALLBACK_VIA_TYPE_VECTOR 0x2 +#define HVM_CALLBACK_VIA_TYPE_SHIFT 56 +#define HVM_CALLBACK_VECTOR(x) (((uint64_t)HVM_CALLBACK_VIA_TYPE_VECTOR)<<\ + HVM_CALLBACK_VIA_TYPE_SHIFT | (x)) + #endif /* XEN_HVM_H__ */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 2befa3e..70a6c6e 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -90,7 +90,11 @@ #define VIRQ_ARCH_6 22 #define VIRQ_ARCH_7 23 -#define NR_VIRQS 24 +#define VIRQ_EMUL_PIN_START 24 +#define VIRQ_EMUL_PIN_NUM 16 + +#define NR_VIRQS (VIRQ_EMUL_PIN_START + VIRQ_EMUL_PIN_NUM) + /* * MMU-UPDATE REQUESTS * -- 1.5.4.5 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2010-Mar-08 22:16 UTC
Re: [Xen-devel] [PATCH 7/7] xen: Enable event channel of PV extension of HVM
On 03/04/2010 10:07 PM, Sheng Yang wrote:> We mapped each IOAPIC pin to a VIRQ, so that we can deliver interrupt through > these VIRQs. > > We used X86_PLATFORM_IPI_VECTOR as the notification vector for hypervisor > to notify guest about the event. > > The patch also enabled SMP support, then we can support IPI through evtchn as well. > > When this feature is enabled, we would relay on Xen PV timer for clockevent, > rather than other hardware emulated ones. > > Then we don''t use IOAPIC/LAPIC, eliminated the overhead brought by > unnecessary VMExit caused by LAPIC. > > Signed-off-by: Sheng Yang<sheng@linux.intel.com> > --- > arch/x86/xen/enlighten.c | 6 ++-- > arch/x86/xen/hvmpv.c | 59 +++++++++++++++++++++++++++++++++ > arch/x86/xen/irq.c | 28 ++++++++++++++++ > arch/x86/xen/smp.c | 76 ++++++++++++++++++++++++++++++++++++++++-- > arch/x86/xen/xen-ops.h | 16 +++++++++ > drivers/xen/events.c | 74 ++++++++++++++++++++++++++++++++++++++--- > include/xen/events.h | 4 ++ > include/xen/hvm.h | 5 +++ > include/xen/interface/xen.h | 6 +++- > 9 files changed, 260 insertions(+), 14 deletions(-) > > diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c > index 36daccb..2d60e70 100644 > --- a/arch/x86/xen/enlighten.c > +++ b/arch/x86/xen/enlighten.c > @@ -717,7 +717,7 @@ static u32 xen_safe_apic_wait_icr_idle(void) > return 0; > } > > -static void set_xen_basic_apic_ops(void) > +void xen_set_basic_apic_ops(void) > { > apic->read = xen_apic_read; > apic->write = xen_apic_write; > @@ -1026,7 +1026,7 @@ static void xen_crash_shutdown(struct pt_regs *regs) > xen_reboot(SHUTDOWN_crash); > } > > -static const struct machine_ops __initdata xen_machine_ops = { > +const struct machine_ops __initdata xen_machine_ops = { > .restart = xen_restart, > .halt = xen_machine_halt, > .power_off = xen_machine_halt, > @@ -1116,7 +1116,7 @@ asmlinkage void __init xen_start_kernel(void) > /* > * set up the basic apic ops. > */ > - set_xen_basic_apic_ops(); > + xen_set_basic_apic_ops(); > #endif > > if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { > diff --git a/arch/x86/xen/hvmpv.c b/arch/x86/xen/hvmpv.c > index 305dcca..673b036 100644 > --- a/arch/x86/xen/hvmpv.c > +++ b/arch/x86/xen/hvmpv.c > @@ -17,6 +17,7 @@ > #include<xen/interface/version.h> > #include<xen/interface/memory.h> > > +#include<asm/reboot.h> > #include<asm/xen/cpuid.h> > #include<asm/xen/hypercall.h> > #include<asm/xen/hypervisor.h> > @@ -44,6 +45,8 @@ static void __init xen_hvm_pv_banner(void) > printk(KERN_INFO "Xen version: %d.%d%s\n", > version>> 16, version& 0xffff, extra.extraversion); > printk(KERN_INFO "PV feature: PV clocksource enabled\n"); > + if (xen_hvm_pv_evtchn_enabled()) > + printk(KERN_INFO "PV feature: Event channel enabled\n"); > } > > static int __init xen_para_available(void) > @@ -83,6 +86,9 @@ static int __init init_hvm_pv_info(void) > if (!(edx& XEN_CPUID_FEAT2_HVM_PV)) > return -ENODEV; > > + if (edx& XEN_CPUID_FEAT2_HVM_PV_EVTCHN) > + xen_hvm_pv_features |= XEN_HVM_PV_EVTCHN_ENABLED; > + > if (pages< 1) > return -ENODEV; > > @@ -127,9 +133,35 @@ static void __init init_pv_clocksource(void) > xen_register_clocksource(); > } > > +static int set_callback_via(uint64_t via) > +{ > + struct xen_hvm_param a; > + > + a.domid = DOMID_SELF; > + a.index = HVM_PARAM_CALLBACK_IRQ; > + a.value = via; > + return HYPERVISOR_hvm_op(HVMOP_set_param,&a); > +} > + > +void do_hvm_pv_evtchn_intr(void) > +{ > + per_cpu(irq_count, smp_processor_id())++; > + xen_hvm_evtchn_do_upcall(get_irq_regs()); > + per_cpu(irq_count, smp_processor_id())--; > +} > + > +#ifdef CONFIG_X86_LOCAL_APIC > +static void xen_hvm_pv_evtchn_apic_write(u32 reg, u32 val) > +{ > + /* The only one reached here should be EOI */ > + WARN_ON(reg != APIC_EOI); > +} > +#endif > + > void __init xen_guest_init(void) > { > int r; > + uint64_t callback_via; > > /* Ensure the we won''t confused with others */ > if (xen_domain()) > @@ -152,4 +184,31 @@ void __init xen_guest_init(void) > init_pv_clocksource(); > > xen_domain_type = XEN_HVM_DOMAIN; > + > + if (xen_hvm_pv_evtchn_enabled()) { > + xen_hvm_pv_init_irq_ops(); > + > + x86_init.timers.timer_init = xen_time_init; > + x86_init.timers.setup_percpu_clockev = x86_init_noop; > + x86_cpuinit.setup_percpu_clockev = x86_init_noop; > + > + pv_apic_ops.startup_ipi_hook = paravirt_nop; > +#ifdef CONFIG_X86_LOCAL_APIC > + /* > + * set up the basic apic ops. > + */ > + xen_set_basic_apic_ops(); > + apic->write = xen_hvm_pv_evtchn_apic_write; > +#endif > + > + callback_via = HVM_CALLBACK_VECTOR(X86_PLATFORM_IPI_VECTOR); > + set_callback_via(callback_via); > + > + x86_platform_ipi_callback = do_hvm_pv_evtchn_intr; > + > + disable_acpi(); > + > + xen_hvm_pv_smp_init(); > + machine_ops = xen_machine_ops; > + } > } > diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c > index 9d30105..e325640 100644 > --- a/arch/x86/xen/irq.c > +++ b/arch/x86/xen/irq.c > @@ -2,6 +2,7 @@ > > #include<asm/x86_init.h> > > +#include<xen/xen.h> > #include<xen/interface/xen.h> > #include<xen/interface/sched.h> > #include<xen/interface/vcpu.h> > @@ -131,3 +132,30 @@ void __init xen_init_irq_ops() > pv_irq_ops = xen_irq_ops; > x86_init.irqs.intr_init = xen_init_IRQ; > } > + > +#ifdef CONFIG_XEN_HVM_PV > +static void xen_hvm_pv_evtchn_disable(void) > +{ > + native_irq_disable(); > + xen_irq_disable(); > +} > +PV_CALLEE_SAVE_REGS_THUNK(xen_hvm_pv_evtchn_disable); > + > +static void xen_hvm_pv_evtchn_enable(void) > +{ > + native_irq_enable(); > + xen_irq_enable(); > +} >Why is it necessary to disable both the event mask and eflags.IF? Surely IF is enough? If it is necessary here, why not for save/restore flags? J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Sheng Yang
2010-Mar-09 05:55 UTC
Re: [Xen-devel] [PATCH 7/7] xen: Enable event channel of PV extension of HVM
On Tuesday 09 March 2010 06:16:30 Jeremy Fitzhardinge wrote:> On 03/04/2010 10:07 PM, Sheng Yang wrote: > > We mapped each IOAPIC pin to a VIRQ, so that we can deliver interrupt > > through these VIRQs. > > > > We used X86_PLATFORM_IPI_VECTOR as the notification vector for hypervisor > > to notify guest about the event. > > > > The patch also enabled SMP support, then we can support IPI through > > evtchn as well. > > > > When this feature is enabled, we would relay on Xen PV timer for > > clockevent, rather than other hardware emulated ones. > > > > Then we don''t use IOAPIC/LAPIC, eliminated the overhead brought by > > unnecessary VMExit caused by LAPIC. > > > > Signed-off-by: Sheng Yang<sheng@linux.intel.com> > > --- > > arch/x86/xen/enlighten.c | 6 ++-- > > arch/x86/xen/hvmpv.c | 59 +++++++++++++++++++++++++++++++++ > > arch/x86/xen/irq.c | 28 ++++++++++++++++ > > arch/x86/xen/smp.c | 76 > > ++++++++++++++++++++++++++++++++++++++++-- arch/x86/xen/xen-ops.h | > > 16 +++++++++ > > drivers/xen/events.c | 74 > > ++++++++++++++++++++++++++++++++++++++--- include/xen/events.h | > > 4 ++ > > include/xen/hvm.h | 5 +++ > > include/xen/interface/xen.h | 6 +++- > > 9 files changed, 260 insertions(+), 14 deletions(-) > > > > diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c > > index 36daccb..2d60e70 100644 > > --- a/arch/x86/xen/enlighten.c > > +++ b/arch/x86/xen/enlighten.c > > @@ -717,7 +717,7 @@ static u32 xen_safe_apic_wait_icr_idle(void) > > return 0; > > } > > > > -static void set_xen_basic_apic_ops(void) > > +void xen_set_basic_apic_ops(void) > > { > > apic->read = xen_apic_read; > > apic->write = xen_apic_write; > > @@ -1026,7 +1026,7 @@ static void xen_crash_shutdown(struct pt_regs > > *regs) xen_reboot(SHUTDOWN_crash); > > } > > > > -static const struct machine_ops __initdata xen_machine_ops = { > > +const struct machine_ops __initdata xen_machine_ops = { > > .restart = xen_restart, > > .halt = xen_machine_halt, > > .power_off = xen_machine_halt, > > @@ -1116,7 +1116,7 @@ asmlinkage void __init xen_start_kernel(void) > > /* > > * set up the basic apic ops. > > */ > > - set_xen_basic_apic_ops(); > > + xen_set_basic_apic_ops(); > > #endif > > > > if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { > > diff --git a/arch/x86/xen/hvmpv.c b/arch/x86/xen/hvmpv.c > > index 305dcca..673b036 100644 > > --- a/arch/x86/xen/hvmpv.c > > +++ b/arch/x86/xen/hvmpv.c > > @@ -17,6 +17,7 @@ > > #include<xen/interface/version.h> > > #include<xen/interface/memory.h> > > > > +#include<asm/reboot.h> > > #include<asm/xen/cpuid.h> > > #include<asm/xen/hypercall.h> > > #include<asm/xen/hypervisor.h> > > @@ -44,6 +45,8 @@ static void __init xen_hvm_pv_banner(void) > > printk(KERN_INFO "Xen version: %d.%d%s\n", > > version>> 16, version& 0xffff, extra.extraversion); > > printk(KERN_INFO "PV feature: PV clocksource enabled\n"); > > + if (xen_hvm_pv_evtchn_enabled()) > > + printk(KERN_INFO "PV feature: Event channel enabled\n"); > > } > > > > static int __init xen_para_available(void) > > @@ -83,6 +86,9 @@ static int __init init_hvm_pv_info(void) > > if (!(edx& XEN_CPUID_FEAT2_HVM_PV)) > > return -ENODEV; > > > > + if (edx& XEN_CPUID_FEAT2_HVM_PV_EVTCHN) > > + xen_hvm_pv_features |= XEN_HVM_PV_EVTCHN_ENABLED; > > + > > if (pages< 1) > > return -ENODEV; > > > > @@ -127,9 +133,35 @@ static void __init init_pv_clocksource(void) > > xen_register_clocksource(); > > } > > > > +static int set_callback_via(uint64_t via) > > +{ > > + struct xen_hvm_param a; > > + > > + a.domid = DOMID_SELF; > > + a.index = HVM_PARAM_CALLBACK_IRQ; > > + a.value = via; > > + return HYPERVISOR_hvm_op(HVMOP_set_param,&a); > > +} > > + > > +void do_hvm_pv_evtchn_intr(void) > > +{ > > + per_cpu(irq_count, smp_processor_id())++; > > + xen_hvm_evtchn_do_upcall(get_irq_regs()); > > + per_cpu(irq_count, smp_processor_id())--; > > +} > > + > > +#ifdef CONFIG_X86_LOCAL_APIC > > +static void xen_hvm_pv_evtchn_apic_write(u32 reg, u32 val) > > +{ > > + /* The only one reached here should be EOI */ > > + WARN_ON(reg != APIC_EOI); > > +} > > +#endif > > + > > void __init xen_guest_init(void) > > { > > int r; > > + uint64_t callback_via; > > > > /* Ensure the we won''t confused with others */ > > if (xen_domain()) > > @@ -152,4 +184,31 @@ void __init xen_guest_init(void) > > init_pv_clocksource(); > > > > xen_domain_type = XEN_HVM_DOMAIN; > > + > > + if (xen_hvm_pv_evtchn_enabled()) { > > + xen_hvm_pv_init_irq_ops(); > > + > > + x86_init.timers.timer_init = xen_time_init; > > + x86_init.timers.setup_percpu_clockev = x86_init_noop; > > + x86_cpuinit.setup_percpu_clockev = x86_init_noop; > > + > > + pv_apic_ops.startup_ipi_hook = paravirt_nop; > > +#ifdef CONFIG_X86_LOCAL_APIC > > + /* > > + * set up the basic apic ops. > > + */ > > + xen_set_basic_apic_ops(); > > + apic->write = xen_hvm_pv_evtchn_apic_write; > > +#endif > > + > > + callback_via = HVM_CALLBACK_VECTOR(X86_PLATFORM_IPI_VECTOR); > > + set_callback_via(callback_via); > > + > > + x86_platform_ipi_callback = do_hvm_pv_evtchn_intr; > > + > > + disable_acpi(); > > + > > + xen_hvm_pv_smp_init(); > > + machine_ops = xen_machine_ops; > > + } > > } > > diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c > > index 9d30105..e325640 100644 > > --- a/arch/x86/xen/irq.c > > +++ b/arch/x86/xen/irq.c > > @@ -2,6 +2,7 @@ > > > > #include<asm/x86_init.h> > > > > +#include<xen/xen.h> > > #include<xen/interface/xen.h> > > #include<xen/interface/sched.h> > > #include<xen/interface/vcpu.h> > > @@ -131,3 +132,30 @@ void __init xen_init_irq_ops() > > pv_irq_ops = xen_irq_ops; > > x86_init.irqs.intr_init = xen_init_IRQ; > > } > > + > > +#ifdef CONFIG_XEN_HVM_PV > > +static void xen_hvm_pv_evtchn_disable(void) > > +{ > > + native_irq_disable(); > > + xen_irq_disable(); > > +} > > +PV_CALLEE_SAVE_REGS_THUNK(xen_hvm_pv_evtchn_disable); > > + > > +static void xen_hvm_pv_evtchn_enable(void) > > +{ > > + native_irq_enable(); > > + xen_irq_enable(); > > +} > > Why is it necessary to disable both the event mask and eflags.IF? > Surely IF is enough? > > If it is necessary here, why not for save/restore flags?Yes, you are right. IF should be enough. Update this patch alone, or whole series again? (notice you were replying to the old version...) -- regards Yang, Sheng _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel