On Wed, Jan 09, 2013 at 07:42:52AM +0000, Liu, Jinsong
wrote:> This patch implements real Xen acpi memory hotplug driver as module.
> When loaded, it replaces Xen stub driver.
>
> When an acpi memory device hotadd event occurs, it notifies OS and
> invokes notification callback, adding related memory device and parsing
> memory information, finally hypercall to xen hypervisor to add memory.
>
> Signed-off-by: Liu Jinsong <jinsong.liu@intel.com>
> ---
> drivers/xen/Kconfig | 11 +
> drivers/xen/Makefile | 1 +
> drivers/xen/xen-acpi-memhotplug.c | 487
+++++++++++++++++++++++++++++++++++++
> include/xen/interface/platform.h | 13 +-
> 4 files changed, 508 insertions(+), 4 deletions(-)
> create mode 100644 drivers/xen/xen-acpi-memhotplug.c
>
> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> index 2986de9..b8cf899 100644
> --- a/drivers/xen/Kconfig
> +++ b/drivers/xen/Kconfig
> @@ -191,6 +191,17 @@ config XEN_STUB
>
> To enable Xen features like cpu and memory hotplug, select Y here.
>
> +config XEN_ACPI_HOTPLUG_MEMORY
> + tristate "Xen ACPI memory hotplug"
> + depends on XEN_STUB && ACPI
> + default n
> + help
> + This is Xen ACPI memory hotplug.
> +
> + Currently Xen only support ACPI memory hot-add. If you want
> + to hot-add memory at runtime (the hot-added memory cannot be
> + removed until machine stop), select Y/M here, otherwise select N.
> +
> config XEN_ACPI_PROCESSOR
> tristate "Xen ACPI processor"
> depends on XEN && X86 && ACPI_PROCESSOR &&
CPU_FREQ
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index b63edd8..1605f59 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -31,6 +31,7 @@ obj-$(CONFIG_XEN_MCE_LOG) += mcelog.o
> obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
> obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
> obj-$(CONFIG_XEN_STUB) += xen-stub.o
> +obj-$(CONFIG_XEN_ACPI_HOTPLUG_MEMORY) += xen-acpi-memhotplug.o
> obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o
> xen-evtchn-y := evtchn.o
> xen-gntdev-y := gntdev.o
> diff --git a/drivers/xen/xen-acpi-memhotplug.c
b/drivers/xen/xen-acpi-memhotplug.c
> new file mode 100644
> index 0000000..d207fec
> --- /dev/null
> +++ b/drivers/xen/xen-acpi-memhotplug.c
> @@ -0,0 +1,487 @@
> +/*
> + * Copyright (C) 2012 Intel Corporation
> + * Author: Liu Jinsong <jinsong.liu@intel.com>
> + * Author: Jiang Yunhong <yunhong.jiang@intel.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or (at
> + * your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
> + * NON INFRINGEMENT. See the GNU General Public License for more
> + * details.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/types.h>
> +#include <linux/acpi.h>
> +#include <acpi/acpi_drivers.h>
> +#include <xen/acpi.h>
> +#include <xen/interface/platform.h>
> +#include <asm/xen/hypercall.h>
> +
> +#define PREFIX "ACPI:xen_memory_hotplug:"
> +
> +struct acpi_memory_info {
> + struct list_head list;
> + u64 start_addr; /* Memory Range start physical addr */
> + u64 length; /* Memory Range length */
> + unsigned short caching; /* memory cache attribute */
> + unsigned short write_protect; /* memory read/write attribute */
> + /* copied from buffer getting from _CRS */
> + unsigned int enabled:1;
> +};
> +
> +struct acpi_memory_device {
> + struct acpi_device *device;
> + struct list_head res_list;
> +};
> +
> +static bool acpi_hotmem_initialized __read_mostly;
> +
> +static int xen_hotadd_memory(int pxm, struct acpi_memory_info *info)
> +{
> + struct xen_platform_op op;
> +
> + op.cmd = XENPF_mem_hotadd;
> + op.u.mem_add.spfn = info->start_addr >> PAGE_SHIFT;
> + op.u.mem_add.epfn = (info->start_addr + info->length) >>
PAGE_SHIFT;
> + op.u.mem_add.pxm = pxm;
> +
> + return HYPERVISOR_dom0_op(&op);
Don''t want to print out the erorrs if it failed?
Say do:
int rc;
..
rc = HYPERVSIOR_dom0_op(&op);
if (rc)
pr_error(PFX "Hotplug Memory Add failed on %lx->%lx, _PXM: %d, error:
%d\n",
...
?> +}
> +
> +static int xen_acpi_get_pxm(acpi_handle h)
> +{
> + unsigned long long pxm;
> + acpi_status status;
> + acpi_handle handle;
> + acpi_handle phandle = h;
> +
> + do {
> + handle = phandle;
> + status = acpi_evaluate_integer(handle, "_PXM", NULL,
&pxm);
> + if (ACPI_SUCCESS(status))
> + return pxm;
> + status = acpi_get_parent(handle, &phandle);
> + } while (ACPI_SUCCESS(status));
> + return -1;
Ugh. Why not a normal -Exxx type erorr? Say -ENXIO ?
> +}
> +
> +static int xen_acpi_memory_enable_device(struct acpi_memory_device
*mem_device)
> +{
> + int pxm, result;
> + int num_enabled = 0;
> + struct acpi_memory_info *info;
> +
> + if (!mem_device)
> + return -EINVAL;
> +
> + pxm = xen_acpi_get_pxm(mem_device->device->handle);
> + if (pxm < 0)
> + return -EINVAL;
> +
> + list_for_each_entry(info, &mem_device->res_list, list) {
> + if (info->enabled) { /* just sanity check...*/
> + num_enabled++;
> + continue;
> + }
> +
> + if (!info->length)
> + continue;
> +
> + result = xen_hotadd_memory(pxm, info);
> + if (result)
> + continue;
> + info->enabled = 1;
> + num_enabled++;
> + }
> +
> + if (!num_enabled)
> + return -EINVAL;
Is that the correct error to be returned? I thought
-ENODEV would be more appropiate?
> +
> + return 0;
> +}
> +
> +static acpi_status
> +acpi_memory_get_resource(struct acpi_resource *resource, void *context)
> +{
> + struct acpi_memory_device *mem_device = context;
> + struct acpi_resource_address64 address64;
> + struct acpi_memory_info *info, *new;
> + acpi_status status;
> +
> + status = acpi_resource_to_address64(resource, &address64);
> + if (ACPI_FAILURE(status) ||
> + (address64.resource_type != ACPI_MEMORY_RANGE))
> + return AE_OK;
> +
> + list_for_each_entry(info, &mem_device->res_list, list) {
> + if ((info->caching == address64.info.mem.caching) &&
> + (info->write_protect == address64.info.mem.write_protect)
&&
> + (info->start_addr + info->length == address64.minimum)) {
> + info->length += address64.address_length;
> + return AE_OK;
> + }
> + }
> +
> + new = kzalloc(sizeof(struct acpi_memory_info), GFP_KERNEL);
> + if (!new)
> + return AE_ERROR;
> +
> + INIT_LIST_HEAD(&new->list);
> + new->caching = address64.info.mem.caching;
> + new->write_protect = address64.info.mem.write_protect;
> + new->start_addr = address64.minimum;
> + new->length = address64.address_length;
> + list_add_tail(&new->list, &mem_device->res_list);
> +
> + return AE_OK;
> +}
> +
> +static int
> +acpi_memory_get_device_resources(struct acpi_memory_device *mem_device)
> +{
> + acpi_status status;
> + struct acpi_memory_info *info, *n;
> +
> + if (!list_empty(&mem_device->res_list))
> + return 0;
> +
> + status = acpi_walk_resources(mem_device->device->handle,
> + METHOD_NAME__CRS, acpi_memory_get_resource, mem_device);
> +
> + if (ACPI_FAILURE(status)) {
> + list_for_each_entry_safe(info, n, &mem_device->res_list, list)
> + kfree(info);
> + INIT_LIST_HEAD(&mem_device->res_list);
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +static int
> +acpi_memory_get_device(acpi_handle handle,
> + struct acpi_memory_device **mem_device)
> +{
> + acpi_status status;
> + acpi_handle phandle;
> + struct acpi_device *device = NULL;
> + struct acpi_device *pdevice = NULL;
> + int result;
> +
> + if (!acpi_bus_get_device(handle, &device) && device)
> + goto end;
> +
> + status = acpi_get_parent(handle, &phandle);
> + if (ACPI_FAILURE(status)) {
> + pr_warn(PREFIX "Cannot find acpi parent\n");
> + return -EINVAL;
> + }
> +
> + /* Get the parent device */
> + result = acpi_bus_get_device(phandle, &pdevice);
> + if (result) {
> + pr_warn(PREFIX "Cannot get acpi bus device\n");
> + return -EINVAL;
> + }
> +
> + /*
> + * Now add the notified device. This creates the acpi_device
> + * and invokes .add function
> + */
> + result = acpi_bus_add(&device, pdevice, handle,
ACPI_BUS_TYPE_DEVICE);
> + if (result) {
> + pr_warn(PREFIX "Cannot add acpi bus\n");
> + return -EINVAL;
> + }
> +
> +end:
> + *mem_device = acpi_driver_data(device);
> + if (!(*mem_device)) {
> + pr_err(PREFIX "Driver data not found\n");
> + return -ENODEV;
> + }
> +
> + return 0;
> +}
> +
> +static int acpi_memory_check_device(struct acpi_memory_device *mem_device)
> +{
> + unsigned long long current_status;
> +
> + /* Get device present/absent information from the _STA */
> + if (ACPI_FAILURE(acpi_evaluate_integer(mem_device->device->handle,
> + "_STA", NULL, ¤t_status)))
> + return -ENODEV;
> + /*
> + * Check for device status. Device should be
> + * present/enabled/functioning.
> + */
> + if (!((current_status & ACPI_STA_DEVICE_PRESENT)
> + && (current_status & ACPI_STA_DEVICE_ENABLED)
> + && (current_status & ACPI_STA_DEVICE_FUNCTIONING)))
> + return -ENODEV;
> +
> + return 0;
> +}
> +
> +static int acpi_memory_disable_device(struct acpi_memory_device
*mem_device)
> +{
> + pr_warn(PREFIX "Xen does not support memory hotremove\n");
So is this going to show in the dmesg if the user supplies the
''0'' in the
SysFS?
Hmm, perhaps that should be pr_debug as the -ENOSYS is enough to tell
the user that we don''t support it.
> +
> + return -ENOSYS;
> +}
> +
> +static void acpi_memory_device_notify(acpi_handle handle, u32 event, void
*data)
> +{
> + struct acpi_memory_device *mem_device;
> + struct acpi_device *device;
> + u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; /* default */
> +
> + switch (event) {
> + case ACPI_NOTIFY_BUS_CHECK:
> + ACPI_DEBUG_PRINT((ACPI_DB_INFO,
> + "\nReceived BUS CHECK notification for device\n"));
> + /* Fall Through */
> + case ACPI_NOTIFY_DEVICE_CHECK:
> + if (event == ACPI_NOTIFY_DEVICE_CHECK)
> + ACPI_DEBUG_PRINT((ACPI_DB_INFO,
> + "\nReceived DEVICE CHECK notification for device\n"));
> +
> + if (acpi_memory_get_device(handle, &mem_device)) {
> + pr_err(PREFIX "Cannot find driver data\n");
> + break;
> + }
> +
> + ost_code = ACPI_OST_SC_SUCCESS;
> + break;
> +
> + case ACPI_NOTIFY_EJECT_REQUEST:
> + ACPI_DEBUG_PRINT((ACPI_DB_INFO,
> + "\nReceived EJECT REQUEST notification for device\n"));
> +
> + if (acpi_bus_get_device(handle, &device)) {
> + pr_err(PREFIX "Device doesn''t exist\n");
> + break;
> + }
> + mem_device = acpi_driver_data(device);
> + if (!mem_device) {
> + pr_err(PREFIX "Driver Data is NULL\n");
> + break;
> + }
> +
> + /*
> + * TBD: implement acpi_memory_disable_device and invoke
> + * acpi_bus_remove if Xen support hotremove in the future
> + */
> + acpi_memory_disable_device(mem_device);
> + break;
> +
> + default:
> + ACPI_DEBUG_PRINT((ACPI_DB_INFO,
> + "Unsupported event [0x%x]\n", event));
> + /* non-hotplug event; possibly handled by other handler */
> + return;
> + }
> +
> + (void) acpi_evaluate_hotplug_ost(handle, event, ost_code, NULL);
> + return;
> +}
> +
> +static int xen_acpi_memory_device_add(struct acpi_device *device)
> +{
> + int result;
> + struct acpi_memory_device *mem_device = NULL;
> +
> +
> + if (!device)
> + return -EINVAL;
> +
> + mem_device = kzalloc(sizeof(struct acpi_memory_device), GFP_KERNEL);
> + if (!mem_device)
> + return -ENOMEM;
> +
> + INIT_LIST_HEAD(&mem_device->res_list);
> + mem_device->device = device;
> + sprintf(acpi_device_name(device), "%s",
ACPI_MEMORY_DEVICE_NAME);
> + sprintf(acpi_device_class(device), "%s",
ACPI_MEMORY_DEVICE_CLASS);
> + device->driver_data = mem_device;
> +
> + /* Get the range from the _CRS */
> + result = acpi_memory_get_device_resources(mem_device);
> + if (result) {
> + kfree(mem_device);
> + return result;
> + }
> +
> + /*
> + * Early boot code has recognized memory area by EFI/E820.
> + * If DSDT shows these memory devices on boot, hotplug is not necessary
> + * for them. So, it just returns until completion of this
driver''s
> + * start up.
"So it just returns until completion of this drivers''s start
up."
Can you change that to be:
"Return OK until this driver starts up."
But then.. how can this function be called with acpi_hotmem_initialized=false?
Is it b/c of the acpi_walk_namespace call? How about you state that:
"This can be done via the acpi_walk_namespace which is called during
early boot and acpi_hotmem_initialized is set _after_ that call
has completed."
> + */
> + if (!acpi_hotmem_initialized)
> + return 0;
> +
> + if (!acpi_memory_check_device(mem_device))
> + result = xen_acpi_memory_enable_device(mem_device);
> +
> + return result;
> +}
> +
> +static int xen_acpi_memory_device_remove(struct acpi_device *device, int
type)
> +{
> + struct acpi_memory_device *mem_device = NULL;
> +
> + if (!device || !acpi_driver_data(device))
> + return -EINVAL;
> +
> + mem_device = acpi_driver_data(device);
> + kfree(mem_device);
> +
> + return 0;
> +}
> +
> +/*
> + * Helper function to check for memory device
> + */
> +static acpi_status is_memory_device(acpi_handle handle)
> +{
> + char *hardware_id;
> + acpi_status status;
> + struct acpi_device_info *info;
> +
> + status = acpi_get_object_info(handle, &info);
> + if (ACPI_FAILURE(status))
> + return status;
> +
> + if (!(info->valid & ACPI_VALID_HID)) {
> + kfree(info);
> + return AE_ERROR;
> + }
> +
> + hardware_id = info->hardware_id.string;
> + if ((hardware_id == NULL) ||
> + (strcmp(hardware_id, ACPI_MEMORY_DEVICE_HID)))
> + status = AE_ERROR;
> +
> + kfree(info);
> + return status;
> +}
> +
> +static acpi_status
> +acpi_memory_register_notify_handler(acpi_handle handle,
> + u32 level, void *ctxt, void **retv)
> +{
> + acpi_status status;
> +
> + status = is_memory_device(handle);
> + if (ACPI_FAILURE(status))
> + return AE_OK; /* continue */
> +
> + status = acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY,
> + acpi_memory_device_notify, NULL);
> + /* continue */
> + return AE_OK;
> +}
> +
> +static acpi_status
> +acpi_memory_deregister_notify_handler(acpi_handle handle,
> + u32 level, void *ctxt, void **retv)
> +{
> + acpi_status status;
> +
> + status = is_memory_device(handle);
> + if (ACPI_FAILURE(status))
> + return AE_OK; /* continue */
> +
> + status = acpi_remove_notify_handler(handle,
> + ACPI_SYSTEM_NOTIFY,
> + acpi_memory_device_notify);
> +
> + return AE_OK; /* continue */
> +}
> +
> +static const struct acpi_device_id memory_device_ids[] = {
> + {ACPI_MEMORY_DEVICE_HID, 0},
> + {"", 0},
> +};
> +MODULE_DEVICE_TABLE(acpi, memory_device_ids);
> +
> +static struct acpi_driver xen_acpi_memory_device_driver = {
> + .name = "acpi_memhotplug",
> + .class = ACPI_MEMORY_DEVICE_CLASS,
> + .ids = memory_device_ids,
> + .ops = {
> + .add = xen_acpi_memory_device_add,
> + .remove = xen_acpi_memory_device_remove,
> + },
> +};
> +
> +static int __init xen_acpi_memory_device_init(void)
> +{
> + int result;
> + acpi_status status;
> +
> + if (!xen_initial_domain())
> + return -ENODEV;
> +
> + /* unregister the stub which only used to reserve driver space */
> + acpi_bus_unregister_driver(&xen_stub_memory_device_driver);
> +
> + result = acpi_bus_register_driver(&xen_acpi_memory_device_driver);
> + if (result < 0)
> + return -ENODEV;
Shouldn''t we then try to re-register the stub driver?
> +
> + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
> + ACPI_UINT32_MAX,
> + acpi_memory_register_notify_handler,
> + NULL, NULL, NULL);
> +
> + if (ACPI_FAILURE(status)) {
> + pr_warn(PREFIX "walk_namespace failed\n");
> + acpi_bus_unregister_driver(&xen_acpi_memory_device_driver);
Ditto here.
> + return -ENODEV;
> + }
> +
> + acpi_hotmem_initialized = 1;
s/1/true/
> + return 0;
> +}
> +
> +static void __exit xen_acpi_memory_device_exit(void)
> +{
> + acpi_status status;
> +
> + if (!xen_initial_domain())
> + return;
> +
> + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
> + ACPI_UINT32_MAX,
> + acpi_memory_deregister_notify_handler,
> + NULL, NULL, NULL);
> + if (ACPI_FAILURE(status))
> + pr_warn(PREFIX "walk_namespace failed\n");
> +
> + acpi_bus_unregister_driver(&xen_acpi_memory_device_driver);
> +
> + /*
> + * stub reserve space again to prevent any chance of native
> + * driver loading, though not much meaning in real life
not much meaning in real life? What does that mean?
> + */
> + acpi_bus_register_driver(&xen_stub_memory_device_driver);
> + return;
> +}
> +
> +module_init(xen_acpi_memory_device_init);
> +module_exit(xen_acpi_memory_device_exit);
> +ACPI_MODULE_NAME("xen-acpi-memhotplug");
> +MODULE_AUTHOR("Liu Jinsong <jinsong.liu@intel.com>");
> +MODULE_DESCRIPTION("Xen Hotplug Mem Driver");
> +MODULE_LICENSE("GPL");
> diff --git a/include/xen/interface/platform.h
b/include/xen/interface/platform.h
> index 5e36932..2c4fb4b 100644
> --- a/include/xen/interface/platform.h
> +++ b/include/xen/interface/platform.h
> @@ -324,10 +324,14 @@ struct xenpf_cpu_ol {
> };
> DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol);
>
> -/*
> - * CMD 58 and 59 are reserved for cpu hotadd and memory hotadd,
> - * which are already occupied at Xen hypervisor side.
> - */
> +#define XENPF_mem_hotadd 59
> +struct xenpf_mem_hotadd {
> + uint64_t spfn;
> + uint64_t epfn;
> + uint32_t pxm;
> + uint32_t flags;
> +};
> +
> #define XENPF_core_parking 60
> struct xenpf_core_parking {
> /* IN variables */
> @@ -357,6 +361,7 @@ struct xen_platform_op {
> struct xenpf_set_processor_pminfo set_pminfo;
> struct xenpf_pcpuinfo pcpu_info;
> struct xenpf_cpu_ol cpu_ol;
> + struct xenpf_mem_hotadd mem_add;
> struct xenpf_core_parking core_parking;
> uint8_t pad[128];
> } u;
> --
> 1.7.1