thr3ads.net - Xen devel - [PATCH] xen/mce: schedule a workqueue to avoid sleep in atomic context [Jun 2012]

If this information is useful, please help other people find it:
Share via:

Liu, Jinsong

2012-Jun-12 07:51 UTC

[PATCH] xen/mce: schedule a workqueue to avoid sleep in atomic context

From aa2ce7440f16002266dc8464f749992d0c8ac0e5 Mon Sep 17 00:00:00 2001
From: Liu, Jinsong <jinsong.liu@intel.com>
Date: Tue, 12 Jun 2012 23:11:16 +0800
Subject: [PATCH] xen/mce: schedule a workqueue to avoid sleep in atomic context

copy_to_user might sleep and print a stack trace if it is executed
in an atomic spinlock context.

This patch schedule a workqueue for IRQ handler to poll the data,
and use mutex instead of spinlock, so copy_to_user sleep in atomic
context would not occur.

Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Suggested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
---
 drivers/xen/mcelog.c |   18 +++++++++++-------
 1 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c
index 72e87d2..804aa3c 100644
--- a/drivers/xen/mcelog.c
+++ b/drivers/xen/mcelog.c
@@ -55,7 +55,7 @@ static struct mc_info g_mi;
 static struct mcinfo_logical_cpu *g_physinfo;
 static uint32_t ncpus;
 
-static DEFINE_SPINLOCK(mcelog_lock);
+static DEFINE_MUTEX(mcelog_lock);
 
 static struct xen_mce_log xen_mcelog = {
 	.signature	= XEN_MCE_LOG_SIGNATURE,
@@ -106,7 +106,7 @@ static ssize_t xen_mce_chrdev_read(struct file *filp, char
__user *ubuf,
 	unsigned num;
 	int i, err;
 
-	spin_lock(&mcelog_lock);
+	mutex_lock(&mcelog_lock);
 
 	num = xen_mcelog.next;
 
@@ -130,7 +130,7 @@ static ssize_t xen_mce_chrdev_read(struct file *filp, char
__user *ubuf,
 		err = -EFAULT;
 
 out:
-	spin_unlock(&mcelog_lock);
+	mutex_unlock(&mcelog_lock);
 
 	return err ? err : buf - ubuf;
 }
@@ -310,12 +310,11 @@ static int mc_queue_handle(uint32_t flags)
 }
 
 /* virq handler for machine check error info*/
-static irqreturn_t xen_mce_interrupt(int irq, void *dev_id)
+static void xen_mce_work_fn(struct work_struct *work)
 {
 	int err;
-	unsigned long tmp;
 
-	spin_lock_irqsave(&mcelog_lock, tmp);
+	mutex_lock(&mcelog_lock);
 
 	/* urgent mc_info */
 	err = mc_queue_handle(XEN_MC_URGENT);
@@ -330,8 +329,13 @@ static irqreturn_t xen_mce_interrupt(int irq, void *dev_id)
 		pr_err(XEN_MCELOG
 		       "Failed to handle nonurgent mc_info queue.\n");
 
-	spin_unlock_irqrestore(&mcelog_lock, tmp);
+	mutex_unlock(&mcelog_lock);
+}
+static DECLARE_WORK(xen_mce_work, xen_mce_work_fn);
 
+static irqreturn_t xen_mce_interrupt(int irq, void *dev_id)
+{
+	schedule_work(&xen_mce_work);
 	return IRQ_HANDLED;
 }
 
-- 
1.7.1

Konrad Rzeszutek Wilk

2012-Jun-12 12:40 UTC

head link

Re: [Xen-devel] [PATCH] xen/mce: schedule a workqueue to avoid sleep in atomic context

On Tue, Jun 12, 2012 at 07:51:03AM +0000, Liu, Jinsong
wrote:> >From aa2ce7440f16002266dc8464f749992d0c8ac0e5 Mon Sep 17 00:00:00 2001
> From: Liu, Jinsong <jinsong.liu@intel.com>
> Date: Tue, 12 Jun 2012 23:11:16 +0800
> Subject: [PATCH] xen/mce: schedule a workqueue to avoid sleep in atomic
context
> 
> copy_to_user might sleep and print a stack trace if it is executed
> in an atomic spinlock context.
> 
> This patch schedule a workqueue for IRQ handler to poll the data,
> and use mutex instead of spinlock, so copy_to_user sleep in atomic
> context would not occur.
Ah much better. Usually one also includes the report of what the
stack trace was. So I''ve added that in.
> 
> Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Suggested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
> ---
>  drivers/xen/mcelog.c |   18 +++++++++++-------
>  1 files changed, 11 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c
> index 72e87d2..804aa3c 100644
> --- a/drivers/xen/mcelog.c
> +++ b/drivers/xen/mcelog.c
> @@ -55,7 +55,7 @@ static struct mc_info g_mi;
>  static struct mcinfo_logical_cpu *g_physinfo;
>  static uint32_t ncpus;
>  
> -static DEFINE_SPINLOCK(mcelog_lock);
> +static DEFINE_MUTEX(mcelog_lock);
>  
>  static struct xen_mce_log xen_mcelog = {
>  	.signature	= XEN_MCE_LOG_SIGNATURE,
> @@ -106,7 +106,7 @@ static ssize_t xen_mce_chrdev_read(struct file *filp,
char __user *ubuf,
>  	unsigned num;
>  	int i, err;
>  
> -	spin_lock(&mcelog_lock);
> +	mutex_lock(&mcelog_lock);
>  
>  	num = xen_mcelog.next;
>  
> @@ -130,7 +130,7 @@ static ssize_t xen_mce_chrdev_read(struct file *filp,
char __user *ubuf,
>  		err = -EFAULT;
>  
>  out:
> -	spin_unlock(&mcelog_lock);
> +	mutex_unlock(&mcelog_lock);
>  
>  	return err ? err : buf - ubuf;
>  }
> @@ -310,12 +310,11 @@ static int mc_queue_handle(uint32_t flags)
>  }
>  
>  /* virq handler for machine check error info*/
> -static irqreturn_t xen_mce_interrupt(int irq, void *dev_id)
> +static void xen_mce_work_fn(struct work_struct *work)
>  {
>  	int err;
> -	unsigned long tmp;
>  
> -	spin_lock_irqsave(&mcelog_lock, tmp);
> +	mutex_lock(&mcelog_lock);
>  
>  	/* urgent mc_info */
>  	err = mc_queue_handle(XEN_MC_URGENT);
> @@ -330,8 +329,13 @@ static irqreturn_t xen_mce_interrupt(int irq, void
*dev_id)
>  		pr_err(XEN_MCELOG
>  		       "Failed to handle nonurgent mc_info queue.\n");
>  
> -	spin_unlock_irqrestore(&mcelog_lock, tmp);
> +	mutex_unlock(&mcelog_lock);
> +}
> +static DECLARE_WORK(xen_mce_work, xen_mce_work_fn);
>  
> +static irqreturn_t xen_mce_interrupt(int irq, void *dev_id)
> +{
> +	schedule_work(&xen_mce_work);
>  	return IRQ_HANDLED;
>  }
>  
> -- 
> 1.7.1
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

Konrad Rzeszutek Wilk

2012-Jun-13 18:24 UTC

head link

xen/mce - mcelog at 100% cpu

On Tue, Jun 12, 2012 at 08:40:15AM -0400, Konrad Rzeszutek Wilk
wrote:> On Tue, Jun 12, 2012 at 07:51:03AM +0000, Liu, Jinsong wrote:
> > >From aa2ce7440f16002266dc8464f749992d0c8ac0e5 Mon Sep 17 00:00:00
2001
> > From: Liu, Jinsong <jinsong.liu@intel.com>
> > Date: Tue, 12 Jun 2012 23:11:16 +0800
> > Subject: [PATCH] xen/mce: schedule a workqueue to avoid sleep in
atomic context
> > 
> > copy_to_user might sleep and print a stack trace if it is executed
> > in an atomic spinlock context.
> > 
> > This patch schedule a workqueue for IRQ handler to poll the data,
> > and use mutex instead of spinlock, so copy_to_user sleep in atomic
> > context would not occur.
> 
> Ah much better. Usually one also includes the report of what the
> stack trace was. So I''ve added that in.
So another bug which is that mcelog is spinning at 100% CPU (and only
under Xen).

It seems to be doing:

ppoll([{fd=4, events=POLLIN}, {fd=3, events=POLLIN}], 2, NULL, [], 8) = 1
([{fd=3, revents=POLLIN}])
read(3, "", 2816)                       = 0
ppoll([{fd=4, events=POLLIN}, {fd=3, events=POLLIN}], 2, NULL, [], 8) = 1
([{fd=3, revents=POLLIN}])
read(3, "", 2816) 

constantly.

Liu, Jinsong

2012-Jun-14 08:56 UTC

head link

RE: xen/mce - mcelog at 100% cpu

Konrad Rzeszutek Wilk wrote:> On Tue, Jun 12, 2012 at 08:40:15AM -0400, Konrad Rzeszutek Wilk wrote:
>> On Tue, Jun 12, 2012 at 07:51:03AM +0000, Liu, Jinsong wrote:
>>>> From aa2ce7440f16002266dc8464f749992d0c8ac0e5 Mon Sep 17
00:00:00
>>>> 2001 
>>> From: Liu, Jinsong <jinsong.liu@intel.com>
>>> Date: Tue, 12 Jun 2012 23:11:16 +0800
>>> Subject: [PATCH] xen/mce: schedule a workqueue to avoid sleep in
>>> atomic context 
>>> 
>>> copy_to_user might sleep and print a stack trace if it is executed
>>> in an atomic spinlock context.
>>> 
>>> This patch schedule a workqueue for IRQ handler to poll the data,
>>> and use mutex instead of spinlock, so copy_to_user sleep in atomic
>>> context would not occur.
>> 
>> Ah much better. Usually one also includes the report of what the
>> stack trace was. So I''ve added that in.
> 
> So another bug which is that mcelog is spinning at 100% CPU (and only
> under Xen).
> 
> It seems to be doing:
> 
> ppoll([{fd=4, events=POLLIN}, {fd=3, events=POLLIN}], 2, NULL, [], 8)
> = 1 ([{fd=3, revents=POLLIN}]) read(3, "", 2816)
> = 0 
> ppoll([{fd=4, events=POLLIN}, {fd=3, events=POLLIN}], 2, NULL, [], 8)
> = 1 ([{fd=3, revents=POLLIN}]) read(3, "", 2816)
> 
> constantly.
I will debug it. I have try at my platform, but fail to reproduce it. (You still
use the config you send me last time, right?)
Would you tell me your step?

Thanks,
Jinsong--
To unsubscribe from this list: send the line "unsubscribe
linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Liu, Jinsong

2012-Jun-14 12:55 UTC

head link

[PATCH] xen/mce: add .poll method for mcelog device driver

>> 
>> So another bug which is that mcelog is spinning at 100% CPU (and
>> only under Xen). 
>> 
>> It seems to be doing:
>> 
>> ppoll([{fd=4, events=POLLIN}, {fd=3, events=POLLIN}], 2, NULL, [], 8)
>> = 1 ([{fd=3, revents=POLLIN}]) read(3, "", 2816)
>> = 0
>> ppoll([{fd=4, events=POLLIN}, {fd=3, events=POLLIN}], 2, NULL, [], 8)
>> = 1 ([{fd=3, revents=POLLIN}]) read(3, "", 2816)
>> 
>> constantly.
> 
> I will debug it. I have try at my platform, but fail to reproduce it.
> (You still use the config you send me last time, right?) Would you
> tell me your step? 
> 
> Thanks,
> Jinsong
Have a look at it, it caused by NULL .poll method.
Attached is the patch to fix this bug, but I cannot reproduce the bug at my
platform, so would you please help me to test it at your side?

Thanks,
Jinsong

============From fb664590ce4198539d96b6bc245c5d70cc079129 Mon Sep 17 00:00:00
2001
From: Liu, Jinsong <jinsong.liu@intel.com>
Date: Fri, 15 Jun 2012 04:16:52 +0800
Subject: [PATCH] xen/mce: add .poll method for mcelog device driver

If a driver leaves its poll method NULL, the device is assumed to
be both readable and writable without blocking.

This patch add .poll method to xen mcelog device driver, so that
when mcelog use system calls like ppoll or select, it would be
blocked when no data available, and avoid spinning at CPU.

Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
---
 drivers/xen/mcelog.c |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c
index 804aa3c..c0b39c9 100644
--- a/drivers/xen/mcelog.c
+++ b/drivers/xen/mcelog.c
@@ -41,6 +41,7 @@
 #include <linux/miscdevice.h>
 #include <linux/uaccess.h>
 #include <linux/capability.h>
+#include <linux/poll.h>
 
 #include <xen/interface/xen.h>
 #include <xen/events.h>
@@ -135,6 +136,14 @@ out:
 	return err ? err : buf - ubuf;
 }
 
+static unsigned int xen_mce_chrdev_poll(struct file *file, poll_table *wait)
+{
+	if (xen_mcelog.next)
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
 static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd,
 				unsigned long arg)
 {
@@ -166,6 +175,7 @@ static const struct file_operations xen_mce_chrdev_ops = {
 	.open			= xen_mce_chrdev_open,
 	.release		= xen_mce_chrdev_release,
 	.read			= xen_mce_chrdev_read,
+	.poll			= xen_mce_chrdev_poll,
 	.unlocked_ioctl		= xen_mce_chrdev_ioctl,
 	.llseek			= no_llseek,
 };
-- 
1.7.1

Liu, Jinsong

2012-Jun-14 17:19 UTC

head link

RE: [PATCH] xen/mce: add .poll method for mcelog device driver

Liu, Jinsong wrote:>>> So another bug which is that mcelog is spinning at 100% CPU (and
>>> only under Xen). 
>>> 
>>> It seems to be doing:
>>> 
>>> ppoll([{fd=4, events=POLLIN}, {fd=3, events=POLLIN}], 2, NULL, [],
>>> 8) = 1 ([{fd=3, revents=POLLIN}]) read(3, "", 2816)
>>> = 0
>>> ppoll([{fd=4, events=POLLIN}, {fd=3, events=POLLIN}], 2, NULL, [],
>>> 8) = 1 ([{fd=3, revents=POLLIN}]) read(3, "", 2816)
>>> 
>>> constantly.
>> 
>> I will debug it. I have try at my platform, but fail to reproduce it.
>> (You still use the config you send me last time, right?) Would you
>> tell me your step? 
>> 
>> Thanks,
>> Jinsong
> 
> Have a look at it, it caused by NULL .poll method.
> Attached is the patch to fix this bug, but I cannot reproduce the bug
> at my platform, so would you please help me to test it at your side? 
> 
Ah I know how you trigger the bug - you run mcelog as daemon ... then spinning
at CPU.
I update my patch as attached, and test at my platform OK now.

Thanks,
Jinsong

=======================From 771bf5835a1ed9e439c7da289cb3a72ee8c9bd02 Mon Sep 17
00:00:00 2001
From: Liu, Jinsong <jinsong.liu@intel.com>
Date: Fri, 15 Jun 2012 09:03:39 +0800
Subject: [PATCH] xen/mce: add .poll method for mcelog device driver

If a driver leaves its poll method NULL, the device is assumed to
be both readable and writable without blocking.

This patch add .poll method to xen mcelog device driver, so that
when mcelog use system calls like ppoll or select, it would be
blocked when no data available, and avoid spinning at CPU.

Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
---
 drivers/xen/mcelog.c |   18 ++++++++++++++++++
 1 files changed, 18 insertions(+), 0 deletions(-)

diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c
index 804aa3c..8feee08 100644
--- a/drivers/xen/mcelog.c
+++ b/drivers/xen/mcelog.c
@@ -41,6 +41,8 @@
 #include <linux/miscdevice.h>
 #include <linux/uaccess.h>
 #include <linux/capability.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
 
 #include <xen/interface/xen.h>
 #include <xen/events.h>
@@ -67,6 +69,8 @@ static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock);
 static int xen_mce_chrdev_open_count;	/* #times opened */
 static int xen_mce_chrdev_open_exclu;	/* already open exclusive? */
 
+static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait);
+
 static int xen_mce_chrdev_open(struct inode *inode, struct file *file)
 {
 	spin_lock(&xen_mce_chrdev_state_lock);
@@ -135,6 +139,16 @@ out:
 	return err ? err : buf - ubuf;
 }
 
+static unsigned int xen_mce_chrdev_poll(struct file *file, poll_table *wait)
+{
+	poll_wait(file, &xen_mce_chrdev_wait, wait);
+
+	if (xen_mcelog.next)
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
 static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd,
 				unsigned long arg)
 {
@@ -166,6 +180,7 @@ static const struct file_operations xen_mce_chrdev_ops = {
 	.open			= xen_mce_chrdev_open,
 	.release		= xen_mce_chrdev_release,
 	.read			= xen_mce_chrdev_read,
+	.poll			= xen_mce_chrdev_poll,
 	.unlocked_ioctl		= xen_mce_chrdev_ioctl,
 	.llseek			= no_llseek,
 };
@@ -329,6 +344,9 @@ static void xen_mce_work_fn(struct work_struct *work)
 		pr_err(XEN_MCELOG
 		       "Failed to handle nonurgent mc_info queue.\n");
 
+	/* wake processes polling /dev/mcelog */
+	wake_up_interruptible(&xen_mce_chrdev_wait);
+
 	mutex_unlock(&mcelog_lock);
 }
 static DECLARE_WORK(xen_mce_work, xen_mce_work_fn);
-- 
1.7.1

Xen devel - Jun 2012 - [PATCH] xen/mce: schedule a workqueue to avoid sleep in atomic context

[PATCH] xen/mce: schedule a workqueue to avoid sleep in atomic context

Re: [Xen-devel] [PATCH] xen/mce: schedule a workqueue to avoid sleep in atomic context

xen/mce - mcelog at 100% cpu

RE: xen/mce - mcelog at 100% cpu

[PATCH] xen/mce: add .poll method for mcelog device driver

RE: [PATCH] xen/mce: add .poll method for mcelog device driver