thr3ads.net - Xen devel - [PATCH 3/3] Xen physical cpus interface [Apr 2012]

If this information is useful, please help other people find it:
Share via:

Liu, Jinsong

2012-Apr-19 13:33 UTC

[PATCH 3/3] Xen physical cpus interface

From afdd30a7769e706e9be64f80d64136e2e267ecb9 Mon Sep 17 00:00:00 2001
From: Liu, Jinsong <jinsong.liu@intel.com>
Date: Fri, 20 Apr 2012 05:11:58 +0800
Subject: [PATCH 3/3] Xen physical cpus interface

Manage physical cpus in dom0, get physical cpus info and provide sys interface.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
---
 drivers/xen/Makefile             |    1 +
 drivers/xen/pcpu.c               |  393 ++++++++++++++++++++++++++++++++++++++
 include/xen/interface/platform.h |   10 +-
 include/xen/interface/xen.h      |    1 +
 4 files changed, 404 insertions(+), 1 deletions(-)
 create mode 100644 drivers/xen/pcpu.c

diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 1d3e763..d12d14d 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_XEN_PVHVM)			+= platform-pci.o
 obj-$(CONFIG_XEN_TMEM)			+= tmem.o
 obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
 obj-$(CONFIG_XEN_DOM0)			+= pci.o
+obj-$(CONFIG_XEN_DOM0)			+= pcpu.o
 obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= xen-pciback/
 obj-$(CONFIG_XEN_PRIVCMD)		+= xen-privcmd.o
 obj-$(CONFIG_XEN_ACPI_PROCESSOR)	+= xen-acpi-processor.o
diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
new file mode 100644
index 0000000..9295def
--- /dev/null
+++ b/drivers/xen/pcpu.c
@@ -0,0 +1,393 @@
+/******************************************************************************
+ * pcpu.c
+ * Management physical cpu in dom0, get pcpu info and provide sys interface
+ *
+ * Copyright (c) 2012 Intel Corporation
+ * Author: Liu, Jinsong <jinsong.liu@intel.com>
+ * Author: Jiang, Yunhong <yunhong.jiang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software
without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/cpu.h>
+#include <linux/stat.h>
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/interface/platform.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+struct pcpu {
+	struct list_head pcpu_list;
+	struct device dev;
+	uint32_t xen_id;
+	uint32_t apic_id;
+	uint32_t acpi_id;
+	uint32_t flags;
+};
+
+static struct bus_type xen_pcpu_subsys = {
+	.name = "xen_cpu",
+	.dev_name = "xen_cpu",
+};
+
+static DEFINE_MUTEX(xen_pcpu_lock);
+#define get_pcpu_lock() mutex_lock(&xen_pcpu_lock);
+#define put_pcpu_lock() mutex_unlock(&xen_pcpu_lock);
+
+#define XEN_PCPU "xen_cpu: "
+
+struct xen_pcpus {
+	struct list_head list;
+};
+static struct xen_pcpus xen_pcpus;
+
+static int xen_pcpu_down(uint32_t xen_id)
+{
+	int ret;
+	struct xen_platform_op op = {
+		.cmd			= XENPF_cpu_offline,
+		.interface_version	= XENPF_INTERFACE_VERSION,
+		.u.cpu_ol.cpuid		= xen_id,
+	};
+
+	ret = HYPERVISOR_dom0_op(&op);
+	return ret;
+}
+
+static int xen_pcpu_up(uint32_t xen_id)
+{
+	int ret;
+	struct xen_platform_op op = {
+		.cmd			= XENPF_cpu_online,
+		.interface_version	= XENPF_INTERFACE_VERSION,
+		.u.cpu_ol.cpuid		= xen_id,
+	};
+
+	ret = HYPERVISOR_dom0_op(&op);
+	return ret;
+}
+
+static ssize_t show_online(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
+
+	return sprintf(buf, "%u\n", !!(cpu->flags &
XEN_PCPU_FLAGS_ONLINE));
+}
+
+static ssize_t __ref store_online(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
+	ssize_t ret;
+
+	switch (buf[0]) {
+	case ''0'':
+		ret = xen_pcpu_down(cpu->xen_id);
+		break;
+	case ''1'':
+		ret = xen_pcpu_up(cpu->xen_id);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret >= 0)
+		ret = count;
+	return ret;
+}
+static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online);
+
+static ssize_t show_apicid(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
+
+	return sprintf(buf, "%u\n", cpu->apic_id);
+}
+
+static ssize_t show_acpiid(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
+
+	return sprintf(buf, "%u\n", cpu->acpi_id);
+}
+static DEVICE_ATTR(apic_id, S_IRUGO, show_apicid, NULL);
+static DEVICE_ATTR(acpi_id, S_IRUGO, show_acpiid, NULL);
+
+static bool xen_pcpu_online(uint32_t flags)
+{
+	return !!(flags & XEN_PCPU_FLAGS_ONLINE);
+}
+
+static void pcpu_online_status(struct xenpf_pcpuinfo *info,
+			       struct pcpu *pcpu)
+{
+	if (xen_pcpu_online(info->flags) &&
+	   !xen_pcpu_online(pcpu->flags)) {
+		/* the pcpu is onlined */
+		pcpu->flags |= XEN_PCPU_FLAGS_ONLINE;
+		kobject_uevent(&pcpu->dev.kobj, KOBJ_ONLINE);
+	} else if (!xen_pcpu_online(info->flags) &&
+		    xen_pcpu_online(pcpu->flags)) {
+		/* The pcpu is offlined */
+		pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE;
+		kobject_uevent(&pcpu->dev.kobj, KOBJ_OFFLINE);
+	}
+}
+
+static struct pcpu *get_pcpu(uint32_t xen_id)
+{
+	struct pcpu *pcpu = NULL;
+
+	list_for_each_entry(pcpu, &xen_pcpus.list, pcpu_list) {
+		if (pcpu->xen_id == xen_id)
+			return pcpu;
+	}
+	return NULL;
+}
+
+static void pcpu_sys_remove(struct pcpu *pcpu)
+{
+	struct device *dev;
+
+	if (!pcpu)
+		return;
+
+	dev = &pcpu->dev;
+	if (dev->id)
+		device_remove_file(dev, &dev_attr_online);
+	device_remove_file(dev, &dev_attr_acpi_id);
+	device_remove_file(dev, &dev_attr_apic_id);
+	device_unregister(dev);
+}
+
+static void free_pcpu(struct pcpu *pcpu)
+{
+	if (!pcpu)
+		return;
+
+	pcpu_sys_remove(pcpu);
+	list_del(&pcpu->pcpu_list);
+	kfree(pcpu);
+}
+
+static int pcpu_sys_create(struct pcpu *pcpu)
+{
+	struct device *dev;
+	int err = -EINVAL;
+
+	if (!pcpu)
+		return err;
+
+	dev = &pcpu->dev;
+	dev->bus = &xen_pcpu_subsys;
+	dev->id = pcpu->xen_id;
+
+	err = device_register(dev);
+	if (err)
+		goto err1;
+
+	err = device_create_file(dev, &dev_attr_apic_id);
+	if (err)
+		goto err2;
+	err = device_create_file(dev, &dev_attr_acpi_id);
+	if (err)
+		goto err3;
+	/* Not open pcpu0 online access to user */
+	if (dev->id) {
+		err = device_create_file(dev, &dev_attr_online);
+		if (err)
+			goto err4;
+	}
+
+	return 0;
+
+err4:
+	device_remove_file(dev, &dev_attr_acpi_id);
+err3:
+	device_remove_file(dev, &dev_attr_apic_id);
+err2:
+	device_unregister(dev);
+err1:
+	return err;
+}
+
+static struct pcpu *init_pcpu(struct xenpf_pcpuinfo *info)
+{
+	struct pcpu *pcpu;
+	int err;
+
+	if (info->flags & XEN_PCPU_FLAGS_INVALID)
+		return ERR_PTR(-ENODEV);
+
+	pcpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL);
+	if (!pcpu)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&pcpu->pcpu_list);
+	pcpu->xen_id = info->xen_cpuid;
+	pcpu->apic_id = info->apic_id;
+	pcpu->acpi_id = info->acpi_id;
+	pcpu->flags = info->flags;
+
+	err = pcpu_sys_create(pcpu);
+	if (err) {
+		pr_warning(XEN_PCPU "Failed to register pcpu%d\n",
+			   info->xen_cpuid);
+		kfree(pcpu);
+		return ERR_PTR(-ENOENT);
+	}
+
+	list_add_tail(&pcpu->pcpu_list, &xen_pcpus.list);
+	return pcpu;
+}
+
+/*
+ * Caller should hold the pcpu lock
+ */
+static int _sync_pcpu(uint32_t cpu_num, uint32_t *ptr_max_cpu_num)
+{
+	int ret;
+	struct pcpu *pcpu = NULL;
+	struct xenpf_pcpuinfo *info;
+	struct xen_platform_op op = {
+		.cmd                   = XENPF_get_cpuinfo,
+		.interface_version     = XENPF_INTERFACE_VERSION,
+		.u.pcpu_info.xen_cpuid = cpu_num,
+	};
+
+	ret = HYPERVISOR_dom0_op(&op);
+	if (ret)
+		return ret;
+
+	info = &op.u.pcpu_info;
+	if (ptr_max_cpu_num)
+		*ptr_max_cpu_num = info->max_present;
+
+	pcpu = get_pcpu(cpu_num);
+
+	if (info->flags & XEN_PCPU_FLAGS_INVALID) {
+		/* The pcpu has been removed */
+		if (pcpu)
+			free_pcpu(pcpu);
+		return 0;
+	}
+
+	if (!pcpu) {
+		pcpu = init_pcpu(info);
+		if (IS_ERR_OR_NULL(pcpu))
+			return -ENODEV;
+	} else
+		pcpu_online_status(info, pcpu);
+
+	return 0;
+}
+
+/*
+ * Sync dom0''s pcpu information with xen hypervisor''s
+ */
+static int xen_sync_pcpus(void)
+{
+	/*
+	 * Boot cpu always have cpu_id 0 in xen
+	 */
+	uint32_t cpu_num = 0, max_cpu_num = 0;
+	int err = 0;
+	struct list_head *elem, *tmp;
+	struct pcpu *pcpu;
+
+	get_pcpu_lock();
+
+	while (!err && (cpu_num <= max_cpu_num)) {
+		err = _sync_pcpu(cpu_num, &max_cpu_num);
+		cpu_num++;
+	}
+
+	if (err) {
+		list_for_each_safe(elem, tmp, &xen_pcpus.list) {
+			pcpu = list_entry(elem, struct pcpu, pcpu_list);
+			free_pcpu(pcpu);
+		}
+	}
+
+	put_pcpu_lock();
+
+	return err;
+}
+
+static void xen_pcpu_dpc(struct work_struct *work)
+{
+	xen_sync_pcpus();
+}
+static DECLARE_WORK(xen_pcpu_work, xen_pcpu_dpc);
+
+static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id)
+{
+	schedule_work(&xen_pcpu_work);
+	return IRQ_HANDLED;
+}
+
+static int __init xen_pcpu_init(void)
+{
+	int ret;
+
+	if (!xen_initial_domain())
+		return -ENODEV;
+
+	ret = subsys_system_register(&xen_pcpu_subsys, NULL);
+	if (ret) {
+		pr_warning(XEN_PCPU "Failed to register pcpu subsys\n");
+		return ret;
+	}
+
+	INIT_LIST_HEAD(&xen_pcpus.list);
+
+	ret = xen_sync_pcpus();
+	if (ret) {
+		pr_warning(XEN_PCPU "Failed to sync pcpu info\n");
+		return ret;
+	}
+
+	ret = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0,
+				      xen_pcpu_interrupt, 0,
+				      "pcpu", NULL);
+	if (ret < 0) {
+		pr_warning(XEN_PCPU "Failed to bind pcpu virq\n");
+		return ret;
+	}
+
+	return 0;
+}
+arch_initcall(xen_pcpu_init);
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
index 486653f..2c56d4f 100644
--- a/include/xen/interface/platform.h
+++ b/include/xen/interface/platform.h
@@ -298,7 +298,7 @@ struct xenpf_set_processor_pminfo {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo);
 
-#define XENPF_get_cpuinfo 55
+#define XENPF_get_cpuinfo	55
 struct xenpf_pcpuinfo {
 	/* IN */
 	uint32_t xen_cpuid;
@@ -314,6 +314,13 @@ struct xenpf_pcpuinfo {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo);
 
+#define XENPF_cpu_online	56
+#define XENPF_cpu_offline	57
+struct xenpf_cpu_ol {
+	uint32_t cpuid;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol);
+
 struct xen_platform_op {
 	uint32_t cmd;
 	uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -330,6 +337,7 @@ struct xen_platform_op {
 		struct xenpf_getidletime       getidletime;
 		struct xenpf_set_processor_pminfo set_pminfo;
 		struct xenpf_pcpuinfo          pcpu_info;
+		struct xenpf_cpu_ol            cpu_ol;
 		uint8_t                        pad[128];
 	} u;
 };
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index a890804..0801468 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -80,6 +80,7 @@
 #define VIRQ_CONSOLE    2  /* (DOM0) Bytes received on emergency console. */
 #define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.   */
 #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
+#define VIRQ_PCPU_STATE 9  /* (DOM0) PCPU state changed                   */
 
 /* Architecture-specific VIRQ definitions. */
 #define VIRQ_ARCH_0    16
-- 
1.7.1

Konrad Rzeszutek Wilk

2012-Apr-20 19:24 UTC

head link

Re: [PATCH 3/3] Xen physical cpus interface

On Thu, Apr 19, 2012 at 01:33:03PM +0000, Liu, Jinsong
wrote:> >From afdd30a7769e706e9be64f80d64136e2e267ecb9 Mon Sep 17 00:00:00 2001
> From: Liu, Jinsong <jinsong.liu@intel.com>
> Date: Fri, 20 Apr 2012 05:11:58 +0800
> Subject: [PATCH 3/3] Xen physical cpus interface
> 
> Manage physical cpus in dom0, get physical cpus info and provide sys
interface.
Anything that exposes SysFS attributes needs documentation in 
Documentation/ABI

Can you explain what this solves? And if there are any
userland applications that use this?

> 
> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
> Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
> ---
>  drivers/xen/Makefile             |    1 +
>  drivers/xen/pcpu.c               |  393
++++++++++++++++++++++++++++++++++++++
>  include/xen/interface/platform.h |   10 +-
>  include/xen/interface/xen.h      |    1 +
>  4 files changed, 404 insertions(+), 1 deletions(-)
>  create mode 100644 drivers/xen/pcpu.c
> 
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index 1d3e763..d12d14d 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -19,6 +19,7 @@ obj-$(CONFIG_XEN_PVHVM)			+= platform-pci.o
>  obj-$(CONFIG_XEN_TMEM)			+= tmem.o
>  obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
>  obj-$(CONFIG_XEN_DOM0)			+= pci.o
> +obj-$(CONFIG_XEN_DOM0)			+= pcpu.o
>  obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= xen-pciback/
>  obj-$(CONFIG_XEN_PRIVCMD)		+= xen-privcmd.o
>  obj-$(CONFIG_XEN_ACPI_PROCESSOR)	+= xen-acpi-processor.o
> diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
> new file mode 100644
> index 0000000..9295def
> --- /dev/null
> +++ b/drivers/xen/pcpu.c
> @@ -0,0 +1,393 @@
>
+/******************************************************************************
> + * pcpu.c
> + * Management physical cpu in dom0, get pcpu info and provide sys
interface
> + *
> + * Copyright (c) 2012 Intel Corporation
> + * Author: Liu, Jinsong <jinsong.liu@intel.com>
> + * Author: Jiang, Yunhong <yunhong.jiang@intel.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version 2
> + * as published by the Free Software Foundation; or, when distributed
> + * separately from the Linux kernel or incorporated into other
> + * software packages, subject to the following license:
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
copy
> + * of this source file (the "Software"), to deal in the Software
without
> + * restriction, including without limitation the rights to use, copy,
modify,
> + * merge, publish, distribute, sublicense, and/or sell copies of the
Software,
> + * and to permit persons to whom the Software is furnished to do so,
subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included
in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include <linux/interrupt.h>
> +#include <linux/spinlock.h>
> +#include <linux/cpu.h>
> +#include <linux/stat.h>
> +#include <xen/xen.h>
> +#include <xen/xenbus.h>
> +#include <xen/events.h>
> +#include <xen/interface/platform.h>
> +#include <asm/xen/hypervisor.h>
> +#include <asm/xen/hypercall.h>
> +
> +struct pcpu {
> +	struct list_head pcpu_list;
> +	struct device dev;
> +	uint32_t xen_id;
What is a ''xen_id''? Can you put a provide a comment
explaining the relationship between that and apic_id acpi_id
and flags?

Or better yet, just at the start of the structure have:

/*
 * @xen_id The ...
 * @apic_id ... so on.

> +	uint32_t apic_id;
> +	uint32_t acpi_id;
> +	uint32_t flags;
> +};
> +
> +static struct bus_type xen_pcpu_subsys = {
> +	.name = "xen_cpu",
> +	.dev_name = "xen_cpu",
> +};
> +
> +static DEFINE_MUTEX(xen_pcpu_lock);
> +#define get_pcpu_lock() mutex_lock(&xen_pcpu_lock);
Just use the mutex - no point of using the #define

> +#define put_pcpu_lock() mutex_unlock(&xen_pcpu_lock);
> +
> +#define XEN_PCPU "xen_cpu: "
> +
> +struct xen_pcpus {
> +	struct list_head list;
Uh, so it is just a list? > +};
> +static struct xen_pcpus xen_pcpus;
Can''t you just do

static struct pcpu * xen_pcpus;

And then use that to add/remove items?
> +
> +static int xen_pcpu_down(uint32_t xen_id)
> +{
> +	int ret;
> +	struct xen_platform_op op = {
> +		.cmd			= XENPF_cpu_offline,
> +		.interface_version	= XENPF_INTERFACE_VERSION,
> +		.u.cpu_ol.cpuid		= xen_id,
> +	};
> +
> +	ret = HYPERVISOR_dom0_op(&op);
> +	return ret;
Just collapse it in:
	return HYPERVISOR_dom0_op..
> +}
> +
> +static int xen_pcpu_up(uint32_t xen_id)
> +{
> +	int ret;
> +	struct xen_platform_op op = {
> +		.cmd			= XENPF_cpu_online,
> +		.interface_version	= XENPF_INTERFACE_VERSION,
> +		.u.cpu_ol.cpuid		= xen_id,
> +	};
> +
> +	ret = HYPERVISOR_dom0_op(&op);
> +	return ret;
Ditto.
> +}
> +
> +static ssize_t show_online(struct device *dev,
> +			   struct device_attribute *attr,
> +			   char *buf)
> +{
> +	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
> +
> +	return sprintf(buf, "%u\n", !!(cpu->flags &
XEN_PCPU_FLAGS_ONLINE));
> +}
> +
> +static ssize_t __ref store_online(struct device *dev,
> +				  struct device_attribute *attr,
> +				  const char *buf, size_t count)
> +{
> +	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
> +	ssize_t ret;
> +
Add:
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

> +	switch (buf[0]) {
Use strict_strtoull pls.
> +	case ''0'':
> +		ret = xen_pcpu_down(cpu->xen_id);
> +		break;
> +	case ''1'':
> +		ret = xen_pcpu_up(cpu->xen_id);
> +		break;
> +	default:
> +		ret = -EINVAL;
> +	}
> +
> +	if (ret >= 0)
> +		ret = count;
> +	return ret;
> +}
> +static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online);
> +
> +static ssize_t show_apicid(struct device *dev,
> +			   struct device_attribute *attr,
> +			   char *buf)
> +{
> +	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
> +
> +	return sprintf(buf, "%u\n", cpu->apic_id);
> +}
> +
> +static ssize_t show_acpiid(struct device *dev,
> +			   struct device_attribute *attr,
> +			   char *buf)
> +{
> +	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
> +
> +	return sprintf(buf, "%u\n", cpu->acpi_id);
> +}
> +static DEVICE_ATTR(apic_id, S_IRUGO, show_apicid, NULL);
> +static DEVICE_ATTR(acpi_id, S_IRUGO, show_acpiid, NULL);
What benefit is there in exposing these values to the user?
> +
> +static bool xen_pcpu_online(uint32_t flags)
> +{
> +	return !!(flags & XEN_PCPU_FLAGS_ONLINE);
> +}
> +
> +static void pcpu_online_status(struct xenpf_pcpuinfo *info,
> +			       struct pcpu *pcpu)
> +{
> +	if (xen_pcpu_online(info->flags) &&
> +	   !xen_pcpu_online(pcpu->flags)) {
> +		/* the pcpu is onlined */
> +		pcpu->flags |= XEN_PCPU_FLAGS_ONLINE;
> +		kobject_uevent(&pcpu->dev.kobj, KOBJ_ONLINE);
> +	} else if (!xen_pcpu_online(info->flags) &&
> +		    xen_pcpu_online(pcpu->flags)) {
> +		/* The pcpu is offlined */
> +		pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE;
> +		kobject_uevent(&pcpu->dev.kobj, KOBJ_OFFLINE);
> +	}
> +}
> +
> +static struct pcpu *get_pcpu(uint32_t xen_id)
> +{
> +	struct pcpu *pcpu = NULL;
> +
> +	list_for_each_entry(pcpu, &xen_pcpus.list, pcpu_list) {
> +		if (pcpu->xen_id == xen_id)
> +			return pcpu;
> +	}
> +	return NULL;
> +}
> +
> +static void pcpu_sys_remove(struct pcpu *pcpu)
> +{
> +	struct device *dev;
> +
> +	if (!pcpu)
> +		return;
> +
> +	dev = &pcpu->dev;
> +	if (dev->id)
> +		device_remove_file(dev, &dev_attr_online);
> +	device_remove_file(dev, &dev_attr_acpi_id);
> +	device_remove_file(dev, &dev_attr_apic_id);
> +	device_unregister(dev);
So .. if you are using that, can''t you use the .release
and define something like:

static void pcpu_release(struct device *dev)
{
	struct pcpu *pcpu = container_of(dev, struct pcpu, dev);

	list_del(&pcpu->pcpu_list);
	kfree(pcpu);
}> +}
> +
> +static void free_pcpu(struct pcpu *pcpu)
> +{
> +	if (!pcpu)
> +		return;
> +
> +	pcpu_sys_remove(pcpu);
> +	list_del(&pcpu->pcpu_list);
> +	kfree(pcpu);
Those two shouldn''t be there. They sould be part of the
release function.> +}
> +
> +static int pcpu_sys_create(struct pcpu *pcpu)
> +{
> +	struct device *dev;
> +	int err = -EINVAL;
> +
> +	if (!pcpu)
> +		return err;
> +
> +	dev = &pcpu->dev;
> +	dev->bus = &xen_pcpu_subsys;
> +	dev->id = pcpu->xen_id;
And then here dev->release = &pcpu_release;
> +
> +	err = device_register(dev);
> +	if (err)
> +		goto err1;
> +
> +	err = device_create_file(dev, &dev_attr_apic_id);
> +	if (err)
> +		goto err2;
> +	err = device_create_file(dev, &dev_attr_acpi_id);
> +	if (err)
> +		goto err3;
Usually this is done with an array, like the drivers/xen/xen-balloon.c
..
But I am still not sure why these two parameters are needed?
> +	/* Not open pcpu0 online access to user */
Huh? You mean "Nobody can touch PCPU0" ?

Why? Why can they touch the other ones? And better yet,
what happens if one boots without "dom0_max_vcpus=X"
and powers of some of the CPUs?
> +	if (dev->id) {
> +		err = device_create_file(dev, &dev_attr_online);
> +		if (err)
> +			goto err4;
> +	}
> +
> +	return 0;
> +
> +err4:
> +	device_remove_file(dev, &dev_attr_acpi_id);
> +err3:
> +	device_remove_file(dev, &dev_attr_apic_id);
> +err2:
> +	device_unregister(dev);
> +err1:
> +	return err;
> +}
> +
> +static struct pcpu *init_pcpu(struct xenpf_pcpuinfo *info)
> +{
> +	struct pcpu *pcpu;
> +	int err;
> +
> +	if (info->flags & XEN_PCPU_FLAGS_INVALID)
> +		return ERR_PTR(-ENODEV);
> +
> +	pcpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL);
> +	if (!pcpu)
> +		return ERR_PTR(-ENOMEM);
> +
> +	INIT_LIST_HEAD(&pcpu->pcpu_list);
> +	pcpu->xen_id = info->xen_cpuid;
> +	pcpu->apic_id = info->apic_id;
> +	pcpu->acpi_id = info->acpi_id;
> +	pcpu->flags = info->flags;
> +
> +	err = pcpu_sys_create(pcpu);
> +	if (err) {
> +		pr_warning(XEN_PCPU "Failed to register pcpu%d\n",
Not %u?> +			   info->xen_cpuid);
> +		kfree(pcpu);
> +		return ERR_PTR(-ENOENT);
> +	}
> +
Should this be protected by the mutex? [edit: I see now
that the calleer is suppose to hold on the lock.
Mention that please right before you do any list manipulations]

> +	list_add_tail(&pcpu->pcpu_list, &xen_pcpus.list);
> +	return pcpu;
> +}
> +
> +/*
> + * Caller should hold the pcpu lock
Provide full name of the mutex lock.> + */
> +static int _sync_pcpu(uint32_t cpu_num, uint32_t *ptr_max_cpu_num)
Why the ''_'' in front of the name?

For parameters do:

uint32 cpu, uint32 *max_cpu
> +{
> +	int ret;
> +	struct pcpu *pcpu = NULL;
> +	struct xenpf_pcpuinfo *info;
> +	struct xen_platform_op op = {
> +		.cmd                   = XENPF_get_cpuinfo,
> +		.interface_version     = XENPF_INTERFACE_VERSION,
> +		.u.pcpu_info.xen_cpuid = cpu_num,
> +	};
> +
> +	ret = HYPERVISOR_dom0_op(&op);
> +	if (ret)
> +		return ret;
> +
> +	info = &op.u.pcpu_info;
> +	if (ptr_max_cpu_num)
> +		*ptr_max_cpu_num = info->max_present;
> +
> +	pcpu = get_pcpu(cpu_num);
> +
> +	if (info->flags & XEN_PCPU_FLAGS_INVALID) {
> +		/* The pcpu has been removed */
> +		if (pcpu)
> +			free_pcpu(pcpu);
> +		return 0;
> +	}
> +
> +	if (!pcpu) {
> +		pcpu = init_pcpu(info);
> +		if (IS_ERR_OR_NULL(pcpu))
> +			return -ENODEV;
> +	} else
> +		pcpu_online_status(info, pcpu);
> +
> +	return 0;
> +}
> +
> +/*
> + * Sync dom0''s pcpu information with xen hypervisor''s
> + */
> +static int xen_sync_pcpus(void)
> +{
> +	/*
> +	 * Boot cpu always have cpu_id 0 in xen
> +	 */
> +	uint32_t cpu_num = 0, max_cpu_num = 0;
Use ''cpu'' and ''max_cpu''
> +	int err = 0;
> +	struct list_head *elem, *tmp;
> +	struct pcpu *pcpu;
> +
> +	get_pcpu_lock();
> +
> +	while (!err && (cpu_num <= max_cpu_num)) {
> +		err = _sync_pcpu(cpu_num, &max_cpu_num);
> +		cpu_num++;
> +	}
> +
> +	if (err) {
> +		list_for_each_safe(elem, tmp, &xen_pcpus.list) {
s/elem/pcpu/
> +			pcpu = list_entry(elem, struct pcpu, pcpu_list);
That you can remove once you make the xen_pcpus structure as I mentioned
above.
> +			free_pcpu(pcpu);
> +		}
> +	}
> +
> +	put_pcpu_lock();
Ah, so you are locking around here.> +
> +	return err;
> +}
> +
> +static void xen_pcpu_dpc(struct work_struct *work)
s/dpc/workqueue/
> +{
> +	xen_sync_pcpus();
> +}
> +static DECLARE_WORK(xen_pcpu_work, xen_pcpu_dpc);
> +
> +static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id)
> +{
> +	schedule_work(&xen_pcpu_work);
> +	return IRQ_HANDLED;
> +}
> +
> +static int __init xen_pcpu_init(void)
> +{
> +	int ret;
> +
> +	if (!xen_initial_domain())
> +		return -ENODEV;
> +
> +	ret = subsys_system_register(&xen_pcpu_subsys, NULL);
> +	if (ret) {
> +		pr_warning(XEN_PCPU "Failed to register pcpu subsys\n");
> +		return ret;
> +	}
> +
> +	INIT_LIST_HEAD(&xen_pcpus.list);
> +
> +	ret = xen_sync_pcpus();
> +	if (ret) {
> +		pr_warning(XEN_PCPU "Failed to sync pcpu info\n");
> +		return ret;
> +	}
> +
> +	ret = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0,
> +				      xen_pcpu_interrupt, 0,
> +				      "pcpu", NULL);
"xen-pcpu"
> +	if (ret < 0) {
> +		pr_warning(XEN_PCPU "Failed to bind pcpu virq\n");
Shouldn''t you delete what ''xen_sync_pcpus'' did?
Or is it OK to still work without the interrupts? What is the purpose
of that interrupt? How does it actually work - the hypervisor
decides when/where to turn off CPUs?
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +arch_initcall(xen_pcpu_init);
> diff --git a/include/xen/interface/platform.h
b/include/xen/interface/platform.h
> index 486653f..2c56d4f 100644
> --- a/include/xen/interface/platform.h
> +++ b/include/xen/interface/platform.h
> @@ -298,7 +298,7 @@ struct xenpf_set_processor_pminfo {
>  };
>  DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo);
>  
> -#define XENPF_get_cpuinfo 55
> +#define XENPF_get_cpuinfo	55
Why?
>  struct xenpf_pcpuinfo {
>  	/* IN */
>  	uint32_t xen_cpuid;
> @@ -314,6 +314,13 @@ struct xenpf_pcpuinfo {
>  };
>  DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo);
>  
> +#define XENPF_cpu_online	56
> +#define XENPF_cpu_offline	57
> +struct xenpf_cpu_ol {
> +	uint32_t cpuid;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol);
> +
>  struct xen_platform_op {
>  	uint32_t cmd;
>  	uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
> @@ -330,6 +337,7 @@ struct xen_platform_op {
>  		struct xenpf_getidletime       getidletime;
>  		struct xenpf_set_processor_pminfo set_pminfo;
>  		struct xenpf_pcpuinfo          pcpu_info;
> +		struct xenpf_cpu_ol            cpu_ol;
>  		uint8_t                        pad[128];
>  	} u;
>  };
> diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
> index a890804..0801468 100644
> --- a/include/xen/interface/xen.h
> +++ b/include/xen/interface/xen.h
> @@ -80,6 +80,7 @@
>  #define VIRQ_CONSOLE    2  /* (DOM0) Bytes received on emergency console.
*/
>  #define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.  
*/
>  #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.  
*/
> +#define VIRQ_PCPU_STATE 9  /* (DOM0) PCPU state changed                  
*/
>  
>  /* Architecture-specific VIRQ definitions. */
>  #define VIRQ_ARCH_0    16
> -- 
> 1.7.1

Liu, Jinsong

2012-May-10 14:54 UTC

head link

RE: [PATCH 3/3] Xen physical cpus interface

Konrad,

Thanks for help me review!
Update according to your suggestion. 
Add some comments below.
>> 
>> Manage physical cpus in dom0, get physical cpus info and provide sys
>> interface. 
> 
> Anything that exposes SysFS attributes needs documentation in
> Documentation/ABI
Yes, added.
> 
> Can you explain what this solves? And if there are any
> userland applications that use this?
> 
It provide cpu online/offline interface to user. User can use it for their own
purpose, like power saving - by offlining some cpus when light workload it save
power greatly.
> 
> 
>> +	switch (buf[0]) {
> 
> Use strict_strtoull pls.
kernel suggest:
WARNING: strict_strtoull is obsolete, use kstrtoull instead :)
> 
>> +	case ''0'':
>> +		ret = xen_pcpu_down(cpu->xen_id);
>> +		break;
>> +	case ''1'':
>> +		ret = xen_pcpu_up(cpu->xen_id);
>> +		break;
>> +	default:
>> +		ret = -EINVAL;
>> +	}
>> +
>> +	if (ret >= 0)
>> +		ret = count;
>> +	return ret;
>> +}
>> +static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online,
>> store_online); 
> 
>> +
>> +static ssize_t show_apicid(struct device *dev,
>> +			   struct device_attribute *attr,
>> +			   char *buf)
>> +{
>> +	struct pcpu *cpu = container_of(dev, struct pcpu, dev); +
>> +	return sprintf(buf, "%u\n", cpu->apic_id);
>> +}
>> +
>> +static ssize_t show_acpiid(struct device *dev,
>> +			   struct device_attribute *attr,
>> +			   char *buf)
>> +{
>> +	struct pcpu *cpu = container_of(dev, struct pcpu, dev); +
>> +	return sprintf(buf, "%u\n", cpu->acpi_id);
>> +}
>> +static DEVICE_ATTR(apic_id, S_IRUGO, show_apicid, NULL);
>> +static DEVICE_ATTR(acpi_id, S_IRUGO, show_acpiid, NULL);
> 
> What benefit is there in exposing these values to the user?
Yes, no benefit so let''s cancel exposing acpi_id and apic_id.
>> +
>> +static void pcpu_sys_remove(struct pcpu *pcpu)
>> +{
>> +	struct device *dev;
>> +
>> +	if (!pcpu)
>> +		return;
>> +
>> +	dev = &pcpu->dev;
>> +	if (dev->id)
>> +		device_remove_file(dev, &dev_attr_online);
>> +	device_remove_file(dev, &dev_attr_acpi_id);
>> +	device_remove_file(dev, &dev_attr_apic_id);
>> +	device_unregister(dev);
> 
> So .. if you are using that, can''t you use the .release
> and define something like:
> 
> static void pcpu_release(struct device *dev)
> {
> 	struct pcpu *pcpu = container_of(dev, struct pcpu, dev);
> 
> 	list_del(&pcpu->pcpu_list);
> 	kfree(pcpu);
> }
>> +}
>> +
>> +static void free_pcpu(struct pcpu *pcpu)
>> +{
>> +	if (!pcpu)
>> +		return;
>> +
>> +	pcpu_sys_remove(pcpu);
> 
>> +	list_del(&pcpu->pcpu_list);
>> +	kfree(pcpu);
> 
> Those two shouldn''t be there. They sould be part of the
> release function.
>> +}
>> +
>> +static int pcpu_sys_create(struct pcpu *pcpu)
>> +{
>> +	struct device *dev;
>> +	int err = -EINVAL;
>> +
>> +	if (!pcpu)
>> +		return err;
>> +
>> +	dev = &pcpu->dev;
>> +	dev->bus = &xen_pcpu_subsys;
>> +	dev->id = pcpu->xen_id;
> 
> And then here dev->release = &pcpu_release;
> 
Hmm, it''s good if it''s convenient to do it automatically via
dev->release.
However, dev container (pcpu) would be free at some other error cases, so I
prefer do it ''manually''.
> 
>> +	/* Not open pcpu0 online access to user */
> 
> Huh? You mean "Nobody can touch PCPU0" ?
Add comments:
        /*
         * Xen never offline cpu0 due to several restrictions
         * and assumptions. This basically doesn''t add a sys control
         * to user, one cannot attempt to offline BSP.
         */
> 
> Why? Why can they touch the other ones? And better yet,
> what happens if one boots without "dom0_max_vcpus=X"
> and powers of some of the CPUs?
> 
Only those at cpu present map has its sys interface.
>> +static int __init xen_pcpu_init(void)
>> +{
>> +	int ret;
>> +
>> +	if (!xen_initial_domain())
>> +		return -ENODEV;
>> +
>> +	ret = subsys_system_register(&xen_pcpu_subsys, NULL); +	if (ret)
{
>> +		pr_warning(XEN_PCPU "Failed to register pcpu subsys\n");
>> +		return ret; +	}
>> +
>> +	INIT_LIST_HEAD(&xen_pcpus.list);
>> +
>> +	ret = xen_sync_pcpus();
>> +	if (ret) {
>> +		pr_warning(XEN_PCPU "Failed to sync pcpu info\n"); +	
return ret;
>> +	}
>> +
>> +	ret = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0,
>> +				      xen_pcpu_interrupt, 0,
>> +				      "pcpu", NULL);
> 
> "xen-pcpu"
> 
>> +	if (ret < 0) {
>> +		pr_warning(XEN_PCPU "Failed to bind pcpu virq\n");
> 
> Shouldn''t you delete what ''xen_sync_pcpus'' did?
yes, add error handling.
> Or is it OK to still work without the interrupts? What is the purpose
> of that interrupt? How does it actually work - the hypervisor
> decides when/where to turn off CPUs?
> 
user online/offline cpu via sys interface --> xen implement --> inject
virq back to dom0 --> sync cpu status.

Thanks,
Jinsong--
To unsubscribe from this list: send the line "unsubscribe
linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Konrad Rzeszutek Wilk

2012-May-10 14:57 UTC

head link

Re: [PATCH 3/3] Xen physical cpus interface

On Thu, May 10, 2012 at 02:54:07PM +0000, Liu, Jinsong
wrote:> Konrad,
> 
> Thanks for help me review!
Sure thing.> Update according to your suggestion. 
> Add some comments below.
> 
> >> 
> >> Manage physical cpus in dom0, get physical cpus info and provide
sys
> >> interface. 
> > 
> > Anything that exposes SysFS attributes needs documentation in
> > Documentation/ABI
> 
> Yes, added.
> 
> > 
> > Can you explain what this solves? And if there are any
> > userland applications that use this?
> > 
> 
> It provide cpu online/offline interface to user. User can use it for their
own purpose, like power saving - by offlining some cpus when light workload it
save power greatly.
OK, please include that in the descritpion.
> 
> > 
> > 
> >> +	switch (buf[0]) {
> > 
> > Use strict_strtoull pls.
> 
> kernel suggest:
> WARNING: strict_strtoull is obsolete, use kstrtoull instead :)
Ah yes.
.. snip..> > And then here dev->release = &pcpu_release;
> > 
> 
> Hmm, it''s good if it''s convenient to do it automatically
via dev->release.
> However, dev container (pcpu) would be free at some other error cases, so I
prefer do it ''manually''.
You could also call pcpu_release(..) to do it manually.
> 
> > 
> >> +	/* Not open pcpu0 online access to user */
> > 
> > Huh? You mean "Nobody can touch PCPU0" ?
> 
> Add comments:
>         /*
>          * Xen never offline cpu0 due to several restrictions
>          * and assumptions. This basically doesn''t add a sys
control
>          * to user, one cannot attempt to offline BSP.
>          */
> 
> > 
> > Why? Why can they touch the other ones? And better yet,
> > what happens if one boots without "dom0_max_vcpus=X"
> > and powers of some of the CPUs?
> > 
> 
> Only those at cpu present map has its sys interface.
OK, put that in the file so folks are aware of the limitations.
> 
> >> +static int __init xen_pcpu_init(void)
> >> +{
> >> +	int ret;
> >> +
> >> +	if (!xen_initial_domain())
> >> +		return -ENODEV;
> >> +
> >> +	ret = subsys_system_register(&xen_pcpu_subsys, NULL); +	if
(ret) {
> >> +		pr_warning(XEN_PCPU "Failed to register pcpu
subsys\n");
> >> +		return ret; +	}
> >> +
> >> +	INIT_LIST_HEAD(&xen_pcpus.list);
> >> +
> >> +	ret = xen_sync_pcpus();
> >> +	if (ret) {
> >> +		pr_warning(XEN_PCPU "Failed to sync pcpu info\n"); +	
return ret;
> >> +	}
> >> +
> >> +	ret = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0,
> >> +				      xen_pcpu_interrupt, 0,
> >> +				      "pcpu", NULL);
> > 
> > "xen-pcpu"
> > 
> >> +	if (ret < 0) {
> >> +		pr_warning(XEN_PCPU "Failed to bind pcpu virq\n");
> > 
> > Shouldn''t you delete what ''xen_sync_pcpus''
did?
> 
> yes, add error handling.
> 
> > Or is it OK to still work without the interrupts? What is the purpose
> > of that interrupt? How does it actually work - the hypervisor
> > decides when/where to turn off CPUs?
> > 
> 
> user online/offline cpu via sys interface --> xen implement -->
inject virq back to dom0 --> sync cpu status.
Add that in the file so the workflow is explained.> 
> Thanks,
> Jinsong

Liu, Jinsong

2012-May-10 15:20 UTC

head link

RE: [PATCH 3/3] Xen physical cpus interface

Just notice your reply (so quick :)

Agree and will update later, except 1 concern below.

Konrad Rzeszutek Wilk wrote:>> 
>> Hmm, it''s good if it''s convenient to do it
automatically via
>> dev->release. 
>> However, dev container (pcpu) would be free at some other error
>> cases, so I prefer do it ''manually''. 
> 
> You could also call pcpu_release(..) to do it manually.
> 
that means kfree(pcpu) would be done twice at some error cases, do you think it
really good?

Thanks,
Jinsong--
To unsubscribe from this list: send the line "unsubscribe
linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Liu, Jinsong

2012-May-11 13:12 UTC

head link

RE: [PATCH 3/3] Xen physical cpus interface

Liu, Jinsong wrote:> Just notice your reply (so quick :)
> 
> Agree and will update later, except 1 concern below.
> 
> Konrad Rzeszutek Wilk wrote:
>>> 
>>> Hmm, it''s good if it''s convenient to do it
automatically via
>>> dev->release. However, dev container (pcpu) would be free at
some
>>> other error cases, so I prefer do it ''manually''.
>> 
>> You could also call pcpu_release(..) to do it manually.
>> 
> 
> that means kfree(pcpu) would be done twice at some error cases, do
> you think it really good? 
> 
Ping.

I think error recovery should be kept inside error logic level itself, if try to
recover upper level error would bring trouble.

In our example, there are 2 logic levels:
pcpu level (as container), and dev level (subfield used for sys)
dev->release should only recover error occurred at dev/sys level, and the
pcpu error should be recovered at pcpu level.

If dev->release try to recover its container pcpu level error, like
list_del/kfree(pcpu), it would make confusing. i.e., considering
pcpu_sys_create(), 2 error cases:
device_register fail, and device_create_file fail --> how can the caller
decide kfree(pcpu) or not?

So how about recover pcpu error manually and explicitly?

Thanks,
Jinsong--
To unsubscribe from this list: send the line "unsubscribe
linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Konrad Rzeszutek Wilk

2012-May-11 14:27 UTC

head link

Re: [PATCH 3/3] Xen physical cpus interface

On Fri, May 11, 2012 at 01:12:13PM +0000, Liu, Jinsong
wrote:> Liu, Jinsong wrote:
> > Just notice your reply (so quick :)
> > 
> > Agree and will update later, except 1 concern below.
> > 
> > Konrad Rzeszutek Wilk wrote:
> >>> 
> >>> Hmm, it''s good if it''s convenient to do it
automatically via
> >>> dev->release. However, dev container (pcpu) would be free
at some
> >>> other error cases, so I prefer do it
''manually''.
> >> 
> >> You could also call pcpu_release(..) to do it manually.
> >> 
> > 
> > that means kfree(pcpu) would be done twice at some error cases, do
> > you think it really good? 
> > 
> 
> Ping.
> 
> I think error recovery should be kept inside error logic level itself, if
try to recover upper level error would bring trouble.
> 
> In our example, there are 2 logic levels:
> pcpu level (as container), and dev level (subfield used for sys)
So you need to untangle free_pcpu from doing both. Meaning one does the
SysFS and the other deals with free-ing the structure and removing itself
from the list.

> dev->release should only recover error occurred at dev/sys level, and
the pcpu error should be recovered at pcpu level.
> 
> If dev->release try to recover its container pcpu level error, like
list_del/kfree(pcpu), it would make confusing. i.e., considering
pcpu_sys_create(), 2 error cases:
> device_register fail, and device_create_file fail --> how can the caller
decide kfree(pcpu) or not?
Then you should free it manually. But you can do this by a wrapper
function:

__pcpu_release(..) {
	..
	/* Does the removing itself from the list and kfree the pcpu */
}
pcpu_release(..) {
	struct pcpcu *p= container_of(..)
	__pcpu_release(p);
}

dev->release = &pcpu_release;
> 
> So how about recover pcpu error manually and explicitly?
> 
> Thanks,
> Jinsong

Liu, Jinsong

2012-May-11 18:04 UTC

head link

RE: [PATCH 3/3] Xen physical cpus interface

Konrad Rzeszutek Wilk wrote:> On Fri, May 11, 2012 at 01:12:13PM +0000, Liu, Jinsong wrote:
>> Liu, Jinsong wrote:
>>> Just notice your reply (so quick :)
>>> 
>>> Agree and will update later, except 1 concern below.
>>> 
>>> Konrad Rzeszutek Wilk wrote:
>>>>> 
>>>>> Hmm, it''s good if it''s convenient to do
it automatically via
>>>>> dev->release. However, dev container (pcpu) would be
free at some
>>>>> other error cases, so I prefer do it
''manually''.
>>>> 
>>>> You could also call pcpu_release(..) to do it manually.
>>>> 
>>> 
>>> that means kfree(pcpu) would be done twice at some error cases, do
>>> you think it really good? 
>>> 
>> 
>> Ping.
>> 
>> I think error recovery should be kept inside error logic level
>> itself, if try to recover upper level error would bring trouble. 
>> 
>> In our example, there are 2 logic levels:
>> pcpu level (as container), and dev level (subfield used for sys)
> 
> So you need to untangle free_pcpu from doing both. Meaning one does
> the SysFS and the other deals with free-ing the structure and
> removing itself from the list.
> 
free_cpu is very samll, just consist of the 2 parts your said:
* pcpu_sys_remove() deal with sysfs
* list_del/kfree(pcpu) deal with pcpu
> 
>> dev->release should only recover error occurred at dev/sys level,
>> and the pcpu error should be recovered at pcpu level. 
>> 
>> If dev->release try to recover its container pcpu level error, like
>> list_del/kfree(pcpu), it would make confusing. i.e., considering
>> pcpu_sys_create(), 2 error cases: device_register fail, and
>> device_create_file fail --> how can the caller decide kfree(pcpu) or
>> not?   
> 
> Then you should free it manually. But you can do this by a wrapper
> function:
> 
> __pcpu_release(..) {
> 	..
> 	/* Does the removing itself from the list and kfree the pcpu */
> }
> pcpu_release(..) {
> 	struct pcpcu *p= container_of(..)
> 	__pcpu_release(p);
> }
> 
> dev->release = &pcpu_release;
> 
Too weird way. If we want to release dev itself it''s good to use
dev->release, but for pcpu I doubt it.
(consider the example I gave --> why we create issue (it maybe solved in
weird method I agree), just for using dev->release?)

In kernel many dev->release keep NULL.
An example of using dev->release is cpu/mcheck/mce.c -->
mce_device_release(), it *just* deal dev itself.

Thanks,
Jinsong

Konrad Rzeszutek Wilk

2012-May-11 19:00 UTC

head link

Re: [Xen-devel] [PATCH 3/3] Xen physical cpus interface

On Fri, May 11, 2012 at 06:04:21PM +0000, Liu, Jinsong
wrote:> Konrad Rzeszutek Wilk wrote:
> > On Fri, May 11, 2012 at 01:12:13PM +0000, Liu, Jinsong wrote:
> >> Liu, Jinsong wrote:
> >>> Just notice your reply (so quick :)
> >>> 
> >>> Agree and will update later, except 1 concern below.
> >>> 
> >>> Konrad Rzeszutek Wilk wrote:
> >>>>> 
> >>>>> Hmm, it''s good if it''s convenient to
do it automatically via
> >>>>> dev->release. However, dev container (pcpu) would
be free at some
> >>>>> other error cases, so I prefer do it
''manually''.
> >>>> 
> >>>> You could also call pcpu_release(..) to do it manually.
> >>>> 
> >>> 
> >>> that means kfree(pcpu) would be done twice at some error
cases, do
> >>> you think it really good? 
> >>> 
> >> 
> >> Ping.
> >> 
> >> I think error recovery should be kept inside error logic level
> >> itself, if try to recover upper level error would bring trouble. 
> >> 
> >> In our example, there are 2 logic levels:
> >> pcpu level (as container), and dev level (subfield used for sys)
> > 
> > So you need to untangle free_pcpu from doing both. Meaning one does
> > the SysFS and the other deals with free-ing the structure and
> > removing itself from the list.
> > 
> 
> free_cpu is very samll, just consist of the 2 parts your said:
> * pcpu_sys_remove() deal with sysfs
> * list_del/kfree(pcpu) deal with pcpu
> 
> > 
> >> dev->release should only recover error occurred at dev/sys
level,
> >> and the pcpu error should be recovered at pcpu level. 
> >> 
> >> If dev->release try to recover its container pcpu level error,
like
> >> list_del/kfree(pcpu), it would make confusing. i.e., considering
> >> pcpu_sys_create(), 2 error cases: device_register fail, and
> >> device_create_file fail --> how can the caller decide
kfree(pcpu) or
> >> not?   
> > 
> > Then you should free it manually. But you can do this by a wrapper
> > function:
> > 
> > __pcpu_release(..) {
> > 	..
> > 	/* Does the removing itself from the list and kfree the pcpu */
> > }
> > pcpu_release(..) {
> > 	struct pcpcu *p= container_of(..)
> > 	__pcpu_release(p);
> > }
> > 
> > dev->release = &pcpu_release;
> > 
> 
> Too weird way. If we want to release dev itself it''s good to use
dev->release, but for pcpu I doubt it.
> (consider the example I gave --> why we create issue (it maybe solved in
weird method I agree), just for using dev->release?)
> 
> In kernel many dev->release keep NULL.
> An example of using dev->release is cpu/mcheck/mce.c -->
mce_device_release(), it *just* deal dev itself.
OK? I am not sure what are we arguing here anymore?
I think using ''kfree(pcpu)'' on the error paths (as long as it
is
done before device_register) is OK. I think that seperating
the SysFS deletion from the pcpu deletion should be done to
avoid races. Perhaps the SysFS deletion function should also
remove the pcpu from the list.

Liu, Jinsong

2012-May-11 20:31 UTC

head link

RE: [Xen-devel] [PATCH 3/3] Xen physical cpus interface

Konrad Rzeszutek Wilk wrote:> On Fri, May 11, 2012 at 06:04:21PM +0000, Liu, Jinsong wrote:
>> Konrad Rzeszutek Wilk wrote:
>>> On Fri, May 11, 2012 at 01:12:13PM +0000, Liu, Jinsong wrote:
>>>> Liu, Jinsong wrote:
>>>>> Just notice your reply (so quick :)
>>>>> 
>>>>> Agree and will update later, except 1 concern below.
>>>>> 
>>>>> Konrad Rzeszutek Wilk wrote:
>>>>>>> 
>>>>>>> Hmm, it''s good if it''s convenient
to do it automatically via
>>>>>>> dev->release. However, dev container (pcpu)
would be free at
>>>>>>> some other error cases, so I prefer do it
''manually''.
>>>>>> 
>>>>>> You could also call pcpu_release(..) to do it manually.
>>>>>> 
>>>>> 
>>>>> that means kfree(pcpu) would be done twice at some error
cases,
>>>>> do you think it really good? 
>>>>> 
>>>> 
>>>> Ping.
>>>> 
>>>> I think error recovery should be kept inside error logic level
>>>> itself, if try to recover upper level error would bring
trouble.
>>>> 
>>>> In our example, there are 2 logic levels:
>>>> pcpu level (as container), and dev level (subfield used for
sys)
>>> 
>>> So you need to untangle free_pcpu from doing both. Meaning one does
>>> the SysFS and the other deals with free-ing the structure and
>>> removing itself from the list.
>>> 
>> 
>> free_cpu is very samll, just consist of the 2 parts your said:
>> * pcpu_sys_remove() deal with sysfs
>> * list_del/kfree(pcpu) deal with pcpu
>> 
>>> 
>>>> dev->release should only recover error occurred at dev/sys
level,
>>>> and the pcpu error should be recovered at pcpu level.
>>>> 
>>>> If dev->release try to recover its container pcpu level
error, like
>>>> list_del/kfree(pcpu), it would make confusing. i.e.,
considering
>>>> pcpu_sys_create(), 2 error cases: device_register fail, and
>>>> device_create_file fail --> how can the caller decide
kfree(pcpu)
>>>> or not?
>>> 
>>> Then you should free it manually. But you can do this by a wrapper
>>> function: 
>>> 
>>> __pcpu_release(..) {
>>> 	..
>>> 	/* Does the removing itself from the list and kfree the pcpu */ }
>>> pcpu_release(..) {
>>> 	struct pcpcu *p= container_of(..)
>>> 	__pcpu_release(p);
>>> }
>>> 
>>> dev->release = &pcpu_release;
>>> 
>> 
>> Too weird way. If we want to release dev itself it''s good to
use
>> dev->release, but for pcpu I doubt it. (consider the example I gave
>> --> why we create issue (it maybe solved in weird method I agree),
>> just for using dev->release?)  
>> 
>> In kernel many dev->release keep NULL.
>> An example of using dev->release is cpu/mcheck/mce.c -->
>> mce_device_release(), it *just* deal dev itself. 
> 
> OK? I am not sure what are we arguing here anymore?
> I think using ''kfree(pcpu)'' on the error paths (as long
as it is
> done before device_register) is OK. I think that seperating
> the SysFS deletion from the pcpu deletion should be done to
> avoid races. Perhaps the SysFS deletion function should also
> remove the pcpu from the list.
How about static array pcpu[NR_CPUS]?
It seems solve all issues we argued :)

Thanks,
Jinsong--
To unsubscribe from this list: send the line "unsubscribe
linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Konrad Rzeszutek Wilk

2012-May-11 20:48 UTC

head link

Re: [Xen-devel] [PATCH 3/3] Xen physical cpus interface

On Fri, May 11, 2012 at 08:31:15PM +0000, Liu, Jinsong
wrote:> Konrad Rzeszutek Wilk wrote:
> > On Fri, May 11, 2012 at 06:04:21PM +0000, Liu, Jinsong wrote:
> >> Konrad Rzeszutek Wilk wrote:
> >>> On Fri, May 11, 2012 at 01:12:13PM +0000, Liu, Jinsong wrote:
> >>>> Liu, Jinsong wrote:
> >>>>> Just notice your reply (so quick :)
> >>>>> 
> >>>>> Agree and will update later, except 1 concern below.
> >>>>> 
> >>>>> Konrad Rzeszutek Wilk wrote:
> >>>>>>> 
> >>>>>>> Hmm, it''s good if it''s
convenient to do it automatically via
> >>>>>>> dev->release. However, dev container (pcpu)
would be free at
> >>>>>>> some other error cases, so I prefer do it
''manually''.
> >>>>>> 
> >>>>>> You could also call pcpu_release(..) to do it
manually.
> >>>>>> 
> >>>>> 
> >>>>> that means kfree(pcpu) would be done twice at some
error cases,
> >>>>> do you think it really good? 
> >>>>> 
> >>>> 
> >>>> Ping.
> >>>> 
> >>>> I think error recovery should be kept inside error logic
level
> >>>> itself, if try to recover upper level error would bring
trouble.
> >>>> 
> >>>> In our example, there are 2 logic levels:
> >>>> pcpu level (as container), and dev level (subfield used
for sys)
> >>> 
> >>> So you need to untangle free_pcpu from doing both. Meaning one
does
> >>> the SysFS and the other deals with free-ing the structure and
> >>> removing itself from the list.
> >>> 
> >> 
> >> free_cpu is very samll, just consist of the 2 parts your said:
> >> * pcpu_sys_remove() deal with sysfs
> >> * list_del/kfree(pcpu) deal with pcpu
> >> 
> >>> 
> >>>> dev->release should only recover error occurred at
dev/sys level,
> >>>> and the pcpu error should be recovered at pcpu level.
> >>>> 
> >>>> If dev->release try to recover its container pcpu level
error, like
> >>>> list_del/kfree(pcpu), it would make confusing. i.e.,
considering
> >>>> pcpu_sys_create(), 2 error cases: device_register fail,
and
> >>>> device_create_file fail --> how can the caller decide
kfree(pcpu)
> >>>> or not?
> >>> 
> >>> Then you should free it manually. But you can do this by a
wrapper
> >>> function: 
> >>> 
> >>> __pcpu_release(..) {
> >>> 	..
> >>> 	/* Does the removing itself from the list and kfree the pcpu
*/ }
> >>> pcpu_release(..) {
> >>> 	struct pcpcu *p= container_of(..)
> >>> 	__pcpu_release(p);
> >>> }
> >>> 
> >>> dev->release = &pcpu_release;
> >>> 
> >> 
> >> Too weird way. If we want to release dev itself it''s good
to use
> >> dev->release, but for pcpu I doubt it. (consider the example I
gave
> >> --> why we create issue (it maybe solved in weird method I
agree),
> >> just for using dev->release?)  
> >> 
> >> In kernel many dev->release keep NULL.
> >> An example of using dev->release is cpu/mcheck/mce.c -->
> >> mce_device_release(), it *just* deal dev itself. 
> > 
> > OK? I am not sure what are we arguing here anymore?
> > I think using ''kfree(pcpu)'' on the error paths (as
long as it is
> > done before device_register) is OK. I think that seperating
> > the SysFS deletion from the pcpu deletion should be done to
> > avoid races. Perhaps the SysFS deletion function should also
> > remove the pcpu from the list.
> 
> How about static array pcpu[NR_CPUS]?
> It seems solve all issues we argued :)
Ugh. That could mean a structure of more than 4K of items.
Lets stick with making it dynamic.

Xen devel - Apr 2012 - [PATCH 3/3] Xen physical cpus interface

[PATCH 3/3] Xen physical cpus interface

Re: [PATCH 3/3] Xen physical cpus interface

RE: [PATCH 3/3] Xen physical cpus interface

Re: [PATCH 3/3] Xen physical cpus interface

RE: [PATCH 3/3] Xen physical cpus interface

RE: [PATCH 3/3] Xen physical cpus interface

Re: [PATCH 3/3] Xen physical cpus interface

RE: [PATCH 3/3] Xen physical cpus interface

Re: [Xen-devel] [PATCH 3/3] Xen physical cpus interface

RE: [Xen-devel] [PATCH 3/3] Xen physical cpus interface

Re: [Xen-devel] [PATCH 3/3] Xen physical cpus interface