Liu, Jinsong
2012-Jun-12 07:51 UTC
[PATCH] xen/mce: schedule a workqueue to avoid sleep in atomic context
From aa2ce7440f16002266dc8464f749992d0c8ac0e5 Mon Sep 17 00:00:00 2001 From: Liu, Jinsong <jinsong.liu@intel.com> Date: Tue, 12 Jun 2012 23:11:16 +0800 Subject: [PATCH] xen/mce: schedule a workqueue to avoid sleep in atomic context copy_to_user might sleep and print a stack trace if it is executed in an atomic spinlock context. This patch schedule a workqueue for IRQ handler to poll the data, and use mutex instead of spinlock, so copy_to_user sleep in atomic context would not occur. Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Suggested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> --- drivers/xen/mcelog.c | 18 +++++++++++------- 1 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c index 72e87d2..804aa3c 100644 --- a/drivers/xen/mcelog.c +++ b/drivers/xen/mcelog.c @@ -55,7 +55,7 @@ static struct mc_info g_mi; static struct mcinfo_logical_cpu *g_physinfo; static uint32_t ncpus; -static DEFINE_SPINLOCK(mcelog_lock); +static DEFINE_MUTEX(mcelog_lock); static struct xen_mce_log xen_mcelog = { .signature = XEN_MCE_LOG_SIGNATURE, @@ -106,7 +106,7 @@ static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf, unsigned num; int i, err; - spin_lock(&mcelog_lock); + mutex_lock(&mcelog_lock); num = xen_mcelog.next; @@ -130,7 +130,7 @@ static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf, err = -EFAULT; out: - spin_unlock(&mcelog_lock); + mutex_unlock(&mcelog_lock); return err ? err : buf - ubuf; } @@ -310,12 +310,11 @@ static int mc_queue_handle(uint32_t flags) } /* virq handler for machine check error info*/ -static irqreturn_t xen_mce_interrupt(int irq, void *dev_id) +static void xen_mce_work_fn(struct work_struct *work) { int err; - unsigned long tmp; - spin_lock_irqsave(&mcelog_lock, tmp); + mutex_lock(&mcelog_lock); /* urgent mc_info */ err = mc_queue_handle(XEN_MC_URGENT); @@ -330,8 +329,13 @@ static irqreturn_t xen_mce_interrupt(int irq, void *dev_id) pr_err(XEN_MCELOG "Failed to handle nonurgent mc_info queue.\n"); - spin_unlock_irqrestore(&mcelog_lock, tmp); + mutex_unlock(&mcelog_lock); +} +static DECLARE_WORK(xen_mce_work, xen_mce_work_fn); +static irqreturn_t xen_mce_interrupt(int irq, void *dev_id) +{ + schedule_work(&xen_mce_work); return IRQ_HANDLED; } -- 1.7.1
Konrad Rzeszutek Wilk
2012-Jun-12 12:40 UTC
Re: [Xen-devel] [PATCH] xen/mce: schedule a workqueue to avoid sleep in atomic context
On Tue, Jun 12, 2012 at 07:51:03AM +0000, Liu, Jinsong wrote:> >From aa2ce7440f16002266dc8464f749992d0c8ac0e5 Mon Sep 17 00:00:00 2001 > From: Liu, Jinsong <jinsong.liu@intel.com> > Date: Tue, 12 Jun 2012 23:11:16 +0800 > Subject: [PATCH] xen/mce: schedule a workqueue to avoid sleep in atomic context > > copy_to_user might sleep and print a stack trace if it is executed > in an atomic spinlock context. > > This patch schedule a workqueue for IRQ handler to poll the data, > and use mutex instead of spinlock, so copy_to_user sleep in atomic > context would not occur.Ah much better. Usually one also includes the report of what the stack trace was. So I''ve added that in.> > Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > Suggested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> > --- > drivers/xen/mcelog.c | 18 +++++++++++------- > 1 files changed, 11 insertions(+), 7 deletions(-) > > diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c > index 72e87d2..804aa3c 100644 > --- a/drivers/xen/mcelog.c > +++ b/drivers/xen/mcelog.c > @@ -55,7 +55,7 @@ static struct mc_info g_mi; > static struct mcinfo_logical_cpu *g_physinfo; > static uint32_t ncpus; > > -static DEFINE_SPINLOCK(mcelog_lock); > +static DEFINE_MUTEX(mcelog_lock); > > static struct xen_mce_log xen_mcelog = { > .signature = XEN_MCE_LOG_SIGNATURE, > @@ -106,7 +106,7 @@ static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf, > unsigned num; > int i, err; > > - spin_lock(&mcelog_lock); > + mutex_lock(&mcelog_lock); > > num = xen_mcelog.next; > > @@ -130,7 +130,7 @@ static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf, > err = -EFAULT; > > out: > - spin_unlock(&mcelog_lock); > + mutex_unlock(&mcelog_lock); > > return err ? err : buf - ubuf; > } > @@ -310,12 +310,11 @@ static int mc_queue_handle(uint32_t flags) > } > > /* virq handler for machine check error info*/ > -static irqreturn_t xen_mce_interrupt(int irq, void *dev_id) > +static void xen_mce_work_fn(struct work_struct *work) > { > int err; > - unsigned long tmp; > > - spin_lock_irqsave(&mcelog_lock, tmp); > + mutex_lock(&mcelog_lock); > > /* urgent mc_info */ > err = mc_queue_handle(XEN_MC_URGENT); > @@ -330,8 +329,13 @@ static irqreturn_t xen_mce_interrupt(int irq, void *dev_id) > pr_err(XEN_MCELOG > "Failed to handle nonurgent mc_info queue.\n"); > > - spin_unlock_irqrestore(&mcelog_lock, tmp); > + mutex_unlock(&mcelog_lock); > +} > +static DECLARE_WORK(xen_mce_work, xen_mce_work_fn); > > +static irqreturn_t xen_mce_interrupt(int irq, void *dev_id) > +{ > + schedule_work(&xen_mce_work); > return IRQ_HANDLED; > } > > -- > 1.7.1> _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
On Tue, Jun 12, 2012 at 08:40:15AM -0400, Konrad Rzeszutek Wilk wrote:> On Tue, Jun 12, 2012 at 07:51:03AM +0000, Liu, Jinsong wrote: > > >From aa2ce7440f16002266dc8464f749992d0c8ac0e5 Mon Sep 17 00:00:00 2001 > > From: Liu, Jinsong <jinsong.liu@intel.com> > > Date: Tue, 12 Jun 2012 23:11:16 +0800 > > Subject: [PATCH] xen/mce: schedule a workqueue to avoid sleep in atomic context > > > > copy_to_user might sleep and print a stack trace if it is executed > > in an atomic spinlock context. > > > > This patch schedule a workqueue for IRQ handler to poll the data, > > and use mutex instead of spinlock, so copy_to_user sleep in atomic > > context would not occur. > > Ah much better. Usually one also includes the report of what the > stack trace was. So I''ve added that in.So another bug which is that mcelog is spinning at 100% CPU (and only under Xen). It seems to be doing: ppoll([{fd=4, events=POLLIN}, {fd=3, events=POLLIN}], 2, NULL, [], 8) = 1 ([{fd=3, revents=POLLIN}]) read(3, "", 2816) = 0 ppoll([{fd=4, events=POLLIN}, {fd=3, events=POLLIN}], 2, NULL, [], 8) = 1 ([{fd=3, revents=POLLIN}]) read(3, "", 2816) constantly.
Konrad Rzeszutek Wilk wrote:> On Tue, Jun 12, 2012 at 08:40:15AM -0400, Konrad Rzeszutek Wilk wrote: >> On Tue, Jun 12, 2012 at 07:51:03AM +0000, Liu, Jinsong wrote: >>>> From aa2ce7440f16002266dc8464f749992d0c8ac0e5 Mon Sep 17 00:00:00 >>>> 2001 >>> From: Liu, Jinsong <jinsong.liu@intel.com> >>> Date: Tue, 12 Jun 2012 23:11:16 +0800 >>> Subject: [PATCH] xen/mce: schedule a workqueue to avoid sleep in >>> atomic context >>> >>> copy_to_user might sleep and print a stack trace if it is executed >>> in an atomic spinlock context. >>> >>> This patch schedule a workqueue for IRQ handler to poll the data, >>> and use mutex instead of spinlock, so copy_to_user sleep in atomic >>> context would not occur. >> >> Ah much better. Usually one also includes the report of what the >> stack trace was. So I''ve added that in. > > So another bug which is that mcelog is spinning at 100% CPU (and only > under Xen). > > It seems to be doing: > > ppoll([{fd=4, events=POLLIN}, {fd=3, events=POLLIN}], 2, NULL, [], 8) > = 1 ([{fd=3, revents=POLLIN}]) read(3, "", 2816) > = 0 > ppoll([{fd=4, events=POLLIN}, {fd=3, events=POLLIN}], 2, NULL, [], 8) > = 1 ([{fd=3, revents=POLLIN}]) read(3, "", 2816) > > constantly.I will debug it. I have try at my platform, but fail to reproduce it. (You still use the config you send me last time, right?) Would you tell me your step? Thanks, Jinsong-- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Liu, Jinsong
2012-Jun-14 12:55 UTC
[PATCH] xen/mce: add .poll method for mcelog device driver
>> >> So another bug which is that mcelog is spinning at 100% CPU (and >> only under Xen). >> >> It seems to be doing: >> >> ppoll([{fd=4, events=POLLIN}, {fd=3, events=POLLIN}], 2, NULL, [], 8) >> = 1 ([{fd=3, revents=POLLIN}]) read(3, "", 2816) >> = 0 >> ppoll([{fd=4, events=POLLIN}, {fd=3, events=POLLIN}], 2, NULL, [], 8) >> = 1 ([{fd=3, revents=POLLIN}]) read(3, "", 2816) >> >> constantly. > > I will debug it. I have try at my platform, but fail to reproduce it. > (You still use the config you send me last time, right?) Would you > tell me your step? > > Thanks, > JinsongHave a look at it, it caused by NULL .poll method. Attached is the patch to fix this bug, but I cannot reproduce the bug at my platform, so would you please help me to test it at your side? Thanks, Jinsong ============From fb664590ce4198539d96b6bc245c5d70cc079129 Mon Sep 17 00:00:00 2001 From: Liu, Jinsong <jinsong.liu@intel.com> Date: Fri, 15 Jun 2012 04:16:52 +0800 Subject: [PATCH] xen/mce: add .poll method for mcelog device driver If a driver leaves its poll method NULL, the device is assumed to be both readable and writable without blocking. This patch add .poll method to xen mcelog device driver, so that when mcelog use system calls like ppoll or select, it would be blocked when no data available, and avoid spinning at CPU. Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> --- drivers/xen/mcelog.c | 10 ++++++++++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c index 804aa3c..c0b39c9 100644 --- a/drivers/xen/mcelog.c +++ b/drivers/xen/mcelog.c @@ -41,6 +41,7 @@ #include <linux/miscdevice.h> #include <linux/uaccess.h> #include <linux/capability.h> +#include <linux/poll.h> #include <xen/interface/xen.h> #include <xen/events.h> @@ -135,6 +136,14 @@ out: return err ? err : buf - ubuf; } +static unsigned int xen_mce_chrdev_poll(struct file *file, poll_table *wait) +{ + if (xen_mcelog.next) + return POLLIN | POLLRDNORM; + + return 0; +} + static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd, unsigned long arg) { @@ -166,6 +175,7 @@ static const struct file_operations xen_mce_chrdev_ops = { .open = xen_mce_chrdev_open, .release = xen_mce_chrdev_release, .read = xen_mce_chrdev_read, + .poll = xen_mce_chrdev_poll, .unlocked_ioctl = xen_mce_chrdev_ioctl, .llseek = no_llseek, }; -- 1.7.1
Liu, Jinsong
2012-Jun-14 17:19 UTC
RE: [PATCH] xen/mce: add .poll method for mcelog device driver
Liu, Jinsong wrote:>>> So another bug which is that mcelog is spinning at 100% CPU (and >>> only under Xen). >>> >>> It seems to be doing: >>> >>> ppoll([{fd=4, events=POLLIN}, {fd=3, events=POLLIN}], 2, NULL, [], >>> 8) = 1 ([{fd=3, revents=POLLIN}]) read(3, "", 2816) >>> = 0 >>> ppoll([{fd=4, events=POLLIN}, {fd=3, events=POLLIN}], 2, NULL, [], >>> 8) = 1 ([{fd=3, revents=POLLIN}]) read(3, "", 2816) >>> >>> constantly. >> >> I will debug it. I have try at my platform, but fail to reproduce it. >> (You still use the config you send me last time, right?) Would you >> tell me your step? >> >> Thanks, >> Jinsong > > Have a look at it, it caused by NULL .poll method. > Attached is the patch to fix this bug, but I cannot reproduce the bug > at my platform, so would you please help me to test it at your side? >Ah I know how you trigger the bug - you run mcelog as daemon ... then spinning at CPU. I update my patch as attached, and test at my platform OK now. Thanks, Jinsong =======================From 771bf5835a1ed9e439c7da289cb3a72ee8c9bd02 Mon Sep 17 00:00:00 2001 From: Liu, Jinsong <jinsong.liu@intel.com> Date: Fri, 15 Jun 2012 09:03:39 +0800 Subject: [PATCH] xen/mce: add .poll method for mcelog device driver If a driver leaves its poll method NULL, the device is assumed to be both readable and writable without blocking. This patch add .poll method to xen mcelog device driver, so that when mcelog use system calls like ppoll or select, it would be blocked when no data available, and avoid spinning at CPU. Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> --- drivers/xen/mcelog.c | 18 ++++++++++++++++++ 1 files changed, 18 insertions(+), 0 deletions(-) diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c index 804aa3c..8feee08 100644 --- a/drivers/xen/mcelog.c +++ b/drivers/xen/mcelog.c @@ -41,6 +41,8 @@ #include <linux/miscdevice.h> #include <linux/uaccess.h> #include <linux/capability.h> +#include <linux/poll.h> +#include <linux/sched.h> #include <xen/interface/xen.h> #include <xen/events.h> @@ -67,6 +69,8 @@ static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock); static int xen_mce_chrdev_open_count; /* #times opened */ static int xen_mce_chrdev_open_exclu; /* already open exclusive? */ +static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait); + static int xen_mce_chrdev_open(struct inode *inode, struct file *file) { spin_lock(&xen_mce_chrdev_state_lock); @@ -135,6 +139,16 @@ out: return err ? err : buf - ubuf; } +static unsigned int xen_mce_chrdev_poll(struct file *file, poll_table *wait) +{ + poll_wait(file, &xen_mce_chrdev_wait, wait); + + if (xen_mcelog.next) + return POLLIN | POLLRDNORM; + + return 0; +} + static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd, unsigned long arg) { @@ -166,6 +180,7 @@ static const struct file_operations xen_mce_chrdev_ops = { .open = xen_mce_chrdev_open, .release = xen_mce_chrdev_release, .read = xen_mce_chrdev_read, + .poll = xen_mce_chrdev_poll, .unlocked_ioctl = xen_mce_chrdev_ioctl, .llseek = no_llseek, }; @@ -329,6 +344,9 @@ static void xen_mce_work_fn(struct work_struct *work) pr_err(XEN_MCELOG "Failed to handle nonurgent mc_info queue.\n"); + /* wake processes polling /dev/mcelog */ + wake_up_interruptible(&xen_mce_chrdev_wait); + mutex_unlock(&mcelog_lock); } static DECLARE_WORK(xen_mce_work, xen_mce_work_fn); -- 1.7.1