From ba9abf6ee7e5fe0515e2d51b14743c8d5416285c Mon Sep 17 00:00:00 2001 From: Liu, Jinsong <jinsong.liu@intel.com> Date: Fri, 24 Feb 2012 02:18:02 +0800 Subject: [PATCH 2/2] Xen pad logic This patch implement Xen pad logic, and when getting pad device notification, it hypercalls to Xen hypervisor for core parking. Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> --- arch/x86/xen/enlighten.c | 2 + arch/x86/xen/xen-ops.h | 1 + drivers/xen/Makefile | 1 + drivers/xen/xen_acpi_pad.c | 190 ++++++++++++++++++++++++++++++++++++++ include/xen/interface/platform.h | 14 +++ 5 files changed, 208 insertions(+), 0 deletions(-) create mode 100644 drivers/xen/xen_acpi_pad.c diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 12eb07b..3cce71f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1133,6 +1133,8 @@ asmlinkage void __init xen_start_kernel(void) xen_init_time_ops(); + xen_init_pad_ops(); + /* * Set up some pagetable state before starting to set any ptes. */ diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index b095739..7eee651 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -54,6 +54,7 @@ void xen_teardown_timer(int cpu); cycle_t xen_clocksource_read(void); void xen_setup_cpu_clockevents(void); void __init xen_init_time_ops(void); +void __init xen_init_pad_ops(void); void __init xen_hvm_init_time_ops(void); irqreturn_t xen_debug_interrupt(int irq, void *dev_id); diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index aa31337..c0268c9 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o +obj-$(CONFIG_XEN_DOM0) += xen_acpi_pad.o xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o diff --git a/drivers/xen/xen_acpi_pad.c b/drivers/xen/xen_acpi_pad.c new file mode 100644 index 0000000..4a1f2f7 --- /dev/null +++ b/drivers/xen/xen_acpi_pad.c @@ -0,0 +1,190 @@ +/* + * xen_acpi_pad.c - Xen pad interface + * + * Copyright (c) 2012, Intel Corporation. + * Author: Liu, Jinsong <jinsong.liu@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <acpi/acpi_bus.h> +#include <acpi/acpi_drivers.h> + +#include <asm/xen/hypercall.h> + +static DEFINE_MUTEX(xen_cpu_lock); + +static int xen_acpi_pad_idle_cpus(int *num_cpus) +{ + int ret; + + struct xen_platform_op op = { + .cmd = XENPF_core_parking, + .interface_version = XENPF_INTERFACE_VERSION, + }; + + /* set cpu nums expected to be idled */ + op.u.core_parking.type = XEN_CORE_PARKING_SET; + op.u.core_parking.idle_nums = (uint32_t)*num_cpus; + ret = HYPERVISOR_dom0_op(&op); + if (ret) + return ret; + + /* + * get cpu nums actually be idled + * cannot get it by using hypercall once (shared with _SET) + * because of the characteristic of Xen continue_hypercall_on_cpu + */ + op.u.core_parking.type = XEN_CORE_PARKING_GET; + ret = HYPERVISOR_dom0_op(&op); + if (ret) + return ret; + + *num_cpus = op.u.core_parking.idle_nums; + return 0; +} + +/* + * Query firmware how many CPUs should be idle + * return -1 on failure + */ +static int xen_acpi_pad_pur(acpi_handle handle) +{ + struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; + union acpi_object *package; + int num = -1; + + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PUR", NULL, &buffer))) + return num; + + if (!buffer.length || !buffer.pointer) + return num; + + package = buffer.pointer; + + if (package->type == ACPI_TYPE_PACKAGE && + package->package.count == 2 && + package->package.elements[0].integer.value == 1) /* rev 1 */ + + num = package->package.elements[1].integer.value; + + kfree(buffer.pointer); + return num; +} + +/* Notify firmware how many CPUs are idle */ +static void xen_acpi_pad_ost(acpi_handle handle, int stat, + uint32_t idle_cpus) +{ + union acpi_object params[3] = { + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_BUFFER,}, + }; + struct acpi_object_list arg_list = {3, params}; + + params[0].integer.value = ACPI_PROCESSOR_AGGREGATOR_NOTIFY; + params[1].integer.value = stat; + params[2].buffer.length = 4; + params[2].buffer.pointer = (void *)&idle_cpus; + acpi_evaluate_object(handle, "_OST", &arg_list, NULL); +} + +static void xen_acpi_pad_handle_notify(acpi_handle handle) +{ + int ret, num_cpus; + + mutex_lock(&xen_cpu_lock); + num_cpus = xen_acpi_pad_pur(handle); + if (num_cpus < 0) { + mutex_unlock(&xen_cpu_lock); + return; + } + + ret = xen_acpi_pad_idle_cpus(&num_cpus); + if (ret) { + mutex_unlock(&xen_cpu_lock); + return; + } + + xen_acpi_pad_ost(handle, 0, num_cpus); + mutex_unlock(&xen_cpu_lock); +} + +static void xen_acpi_pad_notify(acpi_handle handle, u32 event, + void *data) +{ + switch (event) { + case ACPI_PROCESSOR_AGGREGATOR_NOTIFY: + xen_acpi_pad_handle_notify(handle); + break; + default: + printk(KERN_WARNING "Unsupported event [0x%x]\n", event); + break; + } +} + +static int xen_acpi_pad_add(struct acpi_device *device) +{ + acpi_status status; + + strcpy(acpi_device_name(device), ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME); + strcpy(acpi_device_class(device), ACPI_PROCESSOR_AGGREGATOR_CLASS); + + status = acpi_install_notify_handler(device->handle, + ACPI_DEVICE_NOTIFY, xen_acpi_pad_notify, device); + if (ACPI_FAILURE(status)) + return -ENODEV; + + return 0; +} + +static int xen_acpi_pad_remove(struct acpi_device *device, + int type) +{ + int num_cpus = 0; + + mutex_lock(&xen_cpu_lock); + xen_acpi_pad_idle_cpus(&num_cpus); + mutex_unlock(&xen_cpu_lock); + + acpi_remove_notify_handler(device->handle, + ACPI_DEVICE_NOTIFY, xen_acpi_pad_notify); + return 0; +} + +static const struct acpi_device_id xen_pad_device_ids[] = { + {"ACPI000C", 0}, + {"", 0}, +}; + +static struct acpi_driver xen_acpi_pad_driver = { + .name = "processor_aggregator", + .class = ACPI_PROCESSOR_AGGREGATOR_CLASS, + .ids = xen_pad_device_ids, + .ops = { + .add = xen_acpi_pad_add, + .remove = xen_acpi_pad_remove, + }, +}; + +static int __init xen_acpi_pad_init(void) +{ + return acpi_bus_register_driver(&xen_acpi_pad_driver); +} + +void __init xen_init_pad_ops(void) +{ +#ifdef CONFIG_ACPI_PROCESSOR_AGGREGATOR + acpi_pad_ops.init = xen_acpi_pad_init; +#endif +} diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h index c168468..56ec72a 100644 --- a/include/xen/interface/platform.h +++ b/include/xen/interface/platform.h @@ -297,6 +297,19 @@ struct xenpf_set_processor_pminfo { }; DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo); +#define XENPF_core_parking 60 + +#define XEN_CORE_PARKING_SET 1 +#define XEN_CORE_PARKING_GET 2 +struct xenpf_core_parking { + /* IN variables */ + uint32_t type; + /* IN variables: set cpu nums expected to be idled */ + /* OUT variables: get cpu nums actually be idled */ + uint32_t idle_nums; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_core_parking); + struct xen_platform_op { uint32_t cmd; uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ @@ -312,6 +325,7 @@ struct xen_platform_op { struct xenpf_change_freq change_freq; struct xenpf_getidletime getidletime; struct xenpf_set_processor_pminfo set_pminfo; + struct xenpf_core_parking core_parking; uint8_t pad[128]; } u; }; -- 1.7.1
On Thu, Feb 23, 2012 at 01:31:25PM +0000, Liu, Jinsong wrote:> >From ba9abf6ee7e5fe0515e2d51b14743c8d5416285c Mon Sep 17 00:00:00 2001 > From: Liu, Jinsong <jinsong.liu@intel.com> > Date: Fri, 24 Feb 2012 02:18:02 +0800 > Subject: [PATCH 2/2] Xen pad logic > > This patch implement Xen pad logic, and when getting pad device > notification, it hypercalls to Xen hypervisor for core parking.Can you explain to me how and what pad device is? And how it functions right now in baremetal? And what kind of hardware do you need to use this? And what happens if you do not use it? Can one ignore the "pad" support? Please assume that I''ve a basic understanding of ACPI. Also, what happens now, if the this patch is not implemented? What will/is dom0 doing without these patches (so 3.2 for example on this machine)? Is it just idling using mwait on idle CPUs and ending up trapping in the hypervisor? Or is not mwaiting since the cstate.c doesn''t get executed since we have: boot_option_idle_override = IDLE_HALT; in arch/x86/xen/setup.c ? -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> +static struct acpi_driver xen_acpi_pad_driver = { > + .name = "processor_aggregator", > + .class = ACPI_PROCESSOR_AGGREGATOR_CLASS, > + .ids = xen_pad_device_ids, > + .ops = { > + .add = xen_acpi_pad_add, > + .remove = xen_acpi_pad_remove, > + }, > +}; > + > +static int __init xen_acpi_pad_init(void) > +{ > + return acpi_bus_register_driver(&xen_acpi_pad_driver);If the acpi_bus_register_driver function could work with multiple acpi_drivers that define the same class (and have some priority) would this be easier? -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Konrad Rzeszutek Wilk wrote:> On Thu, Feb 23, 2012 at 01:31:25PM +0000, Liu, Jinsong wrote: >>> From ba9abf6ee7e5fe0515e2d51b14743c8d5416285c Mon Sep 17 00:00:00 >>> 2001 >> From: Liu, Jinsong <jinsong.liu@intel.com> >> Date: Fri, 24 Feb 2012 02:18:02 +0800 >> Subject: [PATCH 2/2] Xen pad logic >> >> This patch implement Xen pad logic, and when getting pad device >> notification, it hypercalls to Xen hypervisor for core parking. > > Can you explain to me how and what pad device is? And how it functions > right now in baremetal? And what kind of hardware do you need to use > this? > And what happens if you do not use it? Can one ignore the "pad" > support? > Please assume that I''ve a basic understanding of ACPI. > > > Also, what happens now, if the this patch is not implemented? What > will/is dom0 doing without these patches (so 3.2 for example on this > machine)? > Is it just idling using mwait on idle CPUs and ending up trapping in > the hypervisor? Or is not mwaiting since the cstate.c doesn''t get > executed since we have: > > boot_option_idle_override = IDLE_HALT; > > in arch/x86/xen/setup.c ?Pad is an ACPI device used to direct os taking some action (depend on os itsef) for the sake of power consumption. 2 objs (PUR and OST) could be declared under PAD and as bios/os interface. PAD itself is pointless, unless it co-work with some external power control s/w (like NPTM). For example currently in baremetal, NPTM engine trigger sci to ospm, then evaluate and call sci handler, through which bios notify os with PUR value by which os could take corresponding action and feedback bios through OST. I don''t think it''s a problem if user don''t use PAD or ignore it, it only make some external power control s/w unuseable. As for how os handle pad notify is os business. Native kernel use round robin and mwait, considering some app workload affinity (I was told so). For xen we don''t need care it since it''s a question of vcpu level. For xen acpi_pad, dom0 patches used to parse PUR and hypercall to hypervisor, which in turn idle pcpu by its own algorithm. Thanks, Jinsong-- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Konrad Rzeszutek Wilk wrote:>> +static struct acpi_driver xen_acpi_pad_driver = { >> + .name = "processor_aggregator", >> + .class = ACPI_PROCESSOR_AGGREGATOR_CLASS, >> + .ids = xen_pad_device_ids, >> + .ops = { >> + .add = xen_acpi_pad_add, >> + .remove = xen_acpi_pad_remove, >> + }, >> +}; >> + >> +static int __init xen_acpi_pad_init(void) >> +{ >> + return acpi_bus_register_driver(&xen_acpi_pad_driver); > > If the acpi_bus_register_driver function could work with multiple > acpi_drivers that define the same class (and have some priority) would > this be easier?Not quite clear your point, could you elaborate more? Thanks, Jinsong-- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Mar 26, 2012 at 06:18:36AM +0000, Liu, Jinsong wrote:> Konrad Rzeszutek Wilk wrote: > > On Thu, Feb 23, 2012 at 01:31:25PM +0000, Liu, Jinsong wrote: > >>> From ba9abf6ee7e5fe0515e2d51b14743c8d5416285c Mon Sep 17 00:00:00 > >>> 2001 > >> From: Liu, Jinsong <jinsong.liu@intel.com> > >> Date: Fri, 24 Feb 2012 02:18:02 +0800 > >> Subject: [PATCH 2/2] Xen pad logic > >> > >> This patch implement Xen pad logic, and when getting pad device > >> notification, it hypercalls to Xen hypervisor for core parking. > > > > Can you explain to me how and what pad device is? And how it functions > > right now in baremetal? And what kind of hardware do you need to use > > this? > > And what happens if you do not use it? Can one ignore the "pad" > > support? > > Please assume that I''ve a basic understanding of ACPI. > > > > > > Also, what happens now, if the this patch is not implemented? What > > will/is dom0 doing without these patches (so 3.2 for example on this > > machine)? > > Is it just idling using mwait on idle CPUs and ending up trapping in > > the hypervisor? Or is not mwaiting since the cstate.c doesn''t get > > executed since we have: > > > > boot_option_idle_override = IDLE_HALT; > > > > in arch/x86/xen/setup.c ? > > > Pad is an ACPI device used to direct os taking some action (depend on os itsef) for the sake of power consumption. 2 objs (PUR and OST) could be declared under PAD and as bios/os interface. > > PAD itself is pointless, unless it co-work with some external power control s/w (like NPTM). For example currently in baremetal, NPTM engine trigger sci to ospm, then evaluate and call sci handler, through which bios notify os with PUR value by which os could take corresponding action and feedback bios through OST. I don''t think it''s a problem if user don''t use PAD or ignore it, it only make some external power control s/w unuseable.What is NPTM? Sounds like a SMI firmware?> > As for how os handle pad notify is os business. Native kernel use round robin and mwait, considering some app workload affinity (I was told so). For xen we don''t need care it since it''s a question of vcpu level. For xen acpi_pad, dom0 patches used to parse PUR and hypercall to hypervisor, which in turn idle pcpu by its own algorithm.I presume you have some of this hardware - if you launch the latest linus/master (along with these patches https://bugzilla.redhat.com/show_bug.cgi?id=804347 patches) and compile CONFIG_ACPI_PAD_PROCESSOR=y and are running under Xen 4.1, what happens when the _PUR notifcation takes affect? -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Mar 26, 2012 at 07:29:09AM +0000, Liu, Jinsong wrote:> Konrad Rzeszutek Wilk wrote: > >> +static struct acpi_driver xen_acpi_pad_driver = { > >> + .name = "processor_aggregator", > >> + .class = ACPI_PROCESSOR_AGGREGATOR_CLASS, > >> + .ids = xen_pad_device_ids, > >> + .ops = { > >> + .add = xen_acpi_pad_add, > >> + .remove = xen_acpi_pad_remove, > >> + }, > >> +}; > >> + > >> +static int __init xen_acpi_pad_init(void) > >> +{ > >> + return acpi_bus_register_driver(&xen_acpi_pad_driver); > > > > If the acpi_bus_register_driver function could work with multiple > > acpi_drivers that define the same class (and have some priority) would > > this be easier? > > Not quite clear your point, could you elaborate more?I was thinking it could do multiple registration of a driver servicing the same PNPxxx. -- To unsubscribe from this list: send the line "unsubscribe linux-acpi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Mar 26, 2012 at 12:38:20PM -0400, Konrad Rzeszutek Wilk wrote:> On Mon, Mar 26, 2012 at 06:18:36AM +0000, Liu, Jinsong wrote: > > Konrad Rzeszutek Wilk wrote: > > > On Thu, Feb 23, 2012 at 01:31:25PM +0000, Liu, Jinsong wrote: > > >>> From ba9abf6ee7e5fe0515e2d51b14743c8d5416285c Mon Sep 17 00:00:00 > > >>> 2001 > > >> From: Liu, Jinsong <jinsong.liu@intel.com> > > >> Date: Fri, 24 Feb 2012 02:18:02 +0800 > > >> Subject: [PATCH 2/2] Xen pad logic > > >> > > >> This patch implement Xen pad logic, and when getting pad device > > >> notification, it hypercalls to Xen hypervisor for core parking. > > > > > > Can you explain to me how and what pad device is? And how it functions > > > right now in baremetal? And what kind of hardware do you need to use > > > this? > > > And what happens if you do not use it? Can one ignore the "pad" > > > support? > > > Please assume that I''ve a basic understanding of ACPI. > > > > > > > > > Also, what happens now, if the this patch is not implemented? What > > > will/is dom0 doing without these patches (so 3.2 for example on this > > > machine)? > > > Is it just idling using mwait on idle CPUs and ending up trapping in > > > the hypervisor? Or is not mwaiting since the cstate.c doesn''t get > > > executed since we have: > > > > > > boot_option_idle_override = IDLE_HALT; > > > > > > in arch/x86/xen/setup.c ? > > > > > > Pad is an ACPI device used to direct os taking some action (depend on os itsef) for the sake of power consumption. 2 objs (PUR and OST) could be declared under PAD and as bios/os interface. > > > > PAD itself is pointless, unless it co-work with some external power control s/w (like NPTM). For example currently in baremetal, NPTM engine trigger sci to ospm, then evaluate and call sci handler, through which bios notify os with PUR value by which os could take corresponding action and feedback bios through OST. I don''t think it''s a problem if user don''t use PAD or ignore it, it only make some external power control s/w unuseable. > > What is NPTM? Sounds like a SMI firmware? > > > > > As for how os handle pad notify is os business. Native kernel use round robin and mwait, considering some app workload affinity (I was told so). For xen we don''t need care it since it''s a question of vcpu level. For xen acpi_pad, dom0 patches used to parse PUR and hypercall to hypervisor, which in turn idle pcpu by its own algorithm. > > I presume you have some of this hardware - if you launch the latest linus/master (along with these > patches https://bugzilla.redhat.com/show_bug.cgi?id=804347 patches) and compile CONFIG_ACPI_PAD_PROCESSOR=y > and are running under Xen 4.1, what happens when the _PUR notifcation takes affect?err, Xen-unstable, as the MWAIT_LEAF expose patch depends on patches that are only in Xen-unstable, not Xen 4.1.