When user hot-unplug a disk which is busy serving I/O, __blk_run_queue
might be unable to drain all the requests. As a result, the
blk_drain_queue() would loop forever and blk_cleanup_queue would not
return. So hot-unplug will fail.
This patch adds a callback in blk_drain_queue() for low lever driver to
abort requests.
Currently, this is useful for virtio-blk to do cleanup in hot-unplug.
Cc: Jens Axboe <axboe at kernel.dk>
Cc: Tejun Heo <tj at kernel.org>
Cc: linux-fsdevel at vger.kernel.org
Cc: Rusty Russell <rusty at rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst at redhat.com>
Cc: virtualization at lists.linux-foundation.org
Cc: kvm at vger.kernel.org
Signed-off-by: Asias He <asias at redhat.com>
---
block/blk-core.c | 3 +++
block/blk-settings.c | 12 ++++++++++++
include/linux/blkdev.h | 3 +++
3 files changed, 18 insertions(+)
diff --git a/block/blk-core.c b/block/blk-core.c
index 1f61b74..ca42fd7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -369,6 +369,9 @@ void blk_drain_queue(struct request_queue *q, bool
drain_all)
if (drain_all)
blk_throtl_drain(q);
+ if (q->abort_queue_fn)
+ q->abort_queue_fn(q);
+
/*
* This function might be called on a queue which failed
* driver init after queue creation. Some drivers
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d3234fc..83ccb48 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -100,6 +100,18 @@ void blk_queue_lld_busy(struct request_queue *q,
lld_busy_fn *fn)
EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
/**
+ * blk_queue_abort_queue - set driver specific abort function
+ * @q: queue
+ * @mbfn: abort_queue_fn
+ */
+void blk_queue_abort_queue(struct request_queue *q, abort_queue_fn *afn)
+{
+ q->abort_queue_fn = afn;
+}
+EXPORT_SYMBOL(blk_queue_abort_queue);
+
+
+/**
* blk_set_default_limits - reset limits to default values
* @lim: the queue_limits structure to reset
*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2aa2466..e2d58bd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -200,6 +200,7 @@ struct request_pm_state
typedef void (request_fn_proc) (struct request_queue *q);
typedef void (make_request_fn) (struct request_queue *q, struct bio *bio);
+typedef void (abort_queue_fn) (struct request_queue *q);
typedef int (prep_rq_fn) (struct request_queue *, struct request *);
typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
@@ -282,6 +283,7 @@ struct request_queue {
request_fn_proc *request_fn;
make_request_fn *make_request_fn;
+ abort_queue_fn *abort_queue_fn;
prep_rq_fn *prep_rq_fn;
unprep_rq_fn *unprep_rq_fn;
merge_bvec_fn *merge_bvec_fn;
@@ -821,6 +823,7 @@ extern struct request_queue *blk_init_allocated_queue(struct
request_queue *,
request_fn_proc *, spinlock_t *);
extern void blk_cleanup_queue(struct request_queue *);
extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
+extern void blk_queue_abort_queue(struct request_queue *, abort_queue_fn *);
extern void blk_queue_bounce_limit(struct request_queue *, u64);
extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
--
1.7.10.1
Asias He
2012-May-21 09:08 UTC
[RFC PATCH 3/5] virtio-blk: Call del_gendisk() before disable guest kick
del_gendisk() might not return due to failing to remove the
/sys/block/vda/serial sysfs entry when another thread (udev) is
trying to read it.
virtblk_remove()
vdev->config->reset() : guest will not kick us through interrupt
del_gendisk()
device_del()
kobject_del(): got stuck, sysfs entry ref count non zero
sysfs_open_file(): user space process read /sys/block/vda/serial
sysfs_get_active() : got sysfs entry ref count
dev_attr_show()
virtblk_serial_show()
blk_execute_rq() : got stuck, interrupt is disabled
request cannot be finished
This patch fixes it by calling del_gendisk() before we disable guest's
interrupt so that the request sent in virtblk_serial_show() will be
finished and del_gendisk() will success.
This fixes another race in hot-unplug process.
It is save to call del_gendisk(vblk->disk) before
flush_work(&vblk->config_work) which might access vblk->disk, because
vblk->disk is not freed until put_disk(vblk->disk).
Cc: Rusty Russell <rusty at rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst at redhat.com>
Cc: virtualization at lists.linux-foundation.org
Cc: kvm at vger.kernel.org
Signed-off-by: Asias He <asias at redhat.com>
---
drivers/block/virtio_blk.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 693187d..7d5f5b0 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -584,12 +584,13 @@ static void __devexit virtblk_remove(struct virtio_device
*vdev)
vblk->config_enable = false;
mutex_unlock(&vblk->config_lock);
+ del_gendisk(vblk->disk);
+
/* Stop all the virtqueues. */
vdev->config->reset(vdev);
flush_work(&vblk->config_work);
- del_gendisk(vblk->disk);
/* Abort requests dispatched to driver. */
spin_lock_irqsave(&vblk->lock, flags);
--
1.7.10.1
Asias He
2012-May-21 09:08 UTC
[RFC PATCH 4/5] virtio-blk: Use q->abort_queue_fn() to abort requests
virtblk_abort_queue() will be called by the block layer when cleans up
the queue. blk_cleanup_queue -> blk_drain_queue() ->
q->abort_queue_fn(q)
virtblk_abort_queue()
1) Abort requests in block which is not dispatched to driver
2) Abort requests already dispatched to driver
3) Wake up processes which is sleeping on get_request_wait()
This makes hot-unplug a disk which is busy serving I/O success.
Cc: Rusty Russell <rusty at rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst at redhat.com>
Cc: virtualization at lists.linux-foundation.org
Cc: kvm at vger.kernel.org
Cc: Jens Axboe <axboe at kernel.dk>
Cc: Tejun Heo <tj at kernel.org>
Cc: linux-fsdevel at vger.kernel.org
Signed-off-by: Asias He <asias at redhat.com>
---
drivers/block/virtio_blk.c | 38 +++++++++++++++++++++++++++-----------
1 file changed, 27 insertions(+), 11 deletions(-)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 7d5f5b0..ba35509 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -182,6 +182,31 @@ static bool do_req(struct request_queue *q, struct
virtio_blk *vblk,
return true;
}
+void virtblk_abort_queue(struct request_queue *q)
+{
+ struct virtio_blk *vblk = q->queuedata;
+ struct virtblk_req *vbr;
+ int i;
+
+ /* Abort requests in block layer. */
+ elv_abort_queue(q);
+
+ /* Abort requests dispatched to driver. */
+ while ((vbr = virtqueue_detach_unused_buf(vblk->vq))) {
+ vbr->req->cmd_flags |= REQ_QUIET;
+ __blk_end_request_all(vbr->req, -EIO);
+ mempool_free(vbr, vblk->pool);
+ }
+
+ /* Wake up threads sleeping on get_request_wait() */
+ for (i = 0; i < ARRAY_SIZE(q->rq.wait); i++) {
+ if (waitqueue_active(&q->rq.wait[i]))
+ wake_up_all(&q->rq.wait[i]);
+ }
+
+ return;
+}
+
static void do_virtblk_request(struct request_queue *q)
{
struct virtio_blk *vblk = q->queuedata;
@@ -462,6 +487,8 @@ static int __devinit virtblk_probe(struct virtio_device
*vdev)
goto out_put_disk;
}
+ blk_queue_abort_queue(q, virtblk_abort_queue);
+
q->queuedata = vblk;
virtblk_name_format("vd", index, vblk->disk->disk_name,
DISK_NAME_LEN);
@@ -576,8 +603,6 @@ static void __devexit virtblk_remove(struct virtio_device
*vdev)
{
struct virtio_blk *vblk = vdev->priv;
int index = vblk->index;
- struct virtblk_req *vbr;
- unsigned long flags;
/* Prevent config work handler from accessing the device. */
mutex_lock(&vblk->config_lock);
@@ -591,15 +616,6 @@ static void __devexit virtblk_remove(struct virtio_device
*vdev)
flush_work(&vblk->config_work);
-
- /* Abort requests dispatched to driver. */
- spin_lock_irqsave(&vblk->lock, flags);
- while ((vbr = virtqueue_detach_unused_buf(vblk->vq))) {
- __blk_end_request_all(vbr->req, -EIO);
- mempool_free(vbr, vblk->pool);
- }
- spin_unlock_irqrestore(&vblk->lock, flags);
-
blk_cleanup_queue(vblk->disk->queue);
put_disk(vblk->disk);
mempool_destroy(vblk->pool);
--
1.7.10.1
Asias He
2012-May-21 09:08 UTC
[RFC PATCH 5/5] virtio-blk: Use block layer provided spinlock
Block layer will allocate a spinlock for the queue if the driver does
not provide one in blk_init_queue().
The reason to use the internal spinlock is that blk_cleanup_queue() will
switch to use the internal spinlock in the cleanup code path.
if (q->queue_lock != &q->__queue_lock)
q->queue_lock = &q->__queue_lock;
However, processes which are in D state might have taken the driver
provided spinlock, when the processes wake up , they would release the
block provided spinlock.
====================================[ BUG: bad unlock balance detected! ]
3.4.0-rc7+ #238 Not tainted
-------------------------------------
fio/3587 is trying to release lock (&(&q->__queue_lock)->rlock)
at:
[<ffffffff813274d2>] blk_queue_bio+0x2a2/0x380
but there are no more locks to release!
other info that might help us debug this:
1 lock held by fio/3587:
#0: (&(&vblk->lock)->rlock){......}, at:
[<ffffffff8132661a>] get_request_wait+0x19a/0x250
Cc: Rusty Russell <rusty at rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst at redhat.com>
Cc: virtualization at lists.linux-foundation.org
Cc: kvm at vger.kernel.org
Signed-off-by: Asias He <asias at redhat.com>
---
drivers/block/virtio_blk.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index ba35509..0c2f0e8 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -21,8 +21,6 @@ struct workqueue_struct *virtblk_wq;
struct virtio_blk
{
- spinlock_t lock;
-
struct virtio_device *vdev;
struct virtqueue *vq;
@@ -65,7 +63,7 @@ static void blk_done(struct virtqueue *vq)
unsigned int len;
unsigned long flags;
- spin_lock_irqsave(&vblk->lock, flags);
+ spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
int error;
@@ -99,7 +97,7 @@ static void blk_done(struct virtqueue *vq)
}
/* In case queue is stopped waiting for more buffers. */
blk_start_queue(vblk->disk->queue);
- spin_unlock_irqrestore(&vblk->lock, flags);
+ spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
}
static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
@@ -456,7 +454,6 @@ static int __devinit virtblk_probe(struct virtio_device
*vdev)
goto out_free_index;
}
- spin_lock_init(&vblk->lock);
vblk->vdev = vdev;
vblk->sg_elems = sg_elems;
sg_init_table(vblk->sg, vblk->sg_elems);
@@ -481,7 +478,7 @@ static int __devinit virtblk_probe(struct virtio_device
*vdev)
goto out_mempool;
}
- q = vblk->disk->queue = blk_init_queue(do_virtblk_request,
&vblk->lock);
+ q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL);
if (!q) {
err = -ENOMEM;
goto out_put_disk;
--
1.7.10.1
On Mon, May 21, 2012 at 05:08:29PM +0800, Asias He wrote:> When user hot-unplug a disk which is busy serving I/O, __blk_run_queue > might be unable to drain all the requests. As a result, the > blk_drain_queue() would loop forever and blk_cleanup_queue would not > return. So hot-unplug will fail. > > This patch adds a callback in blk_drain_queue() for low lever driver to > abort requests. > > Currently, this is useful for virtio-blk to do cleanup in hot-unplug.Why is this necessary? virtio-blk should know that the device is gone and fail in-flight / new commands. That's what other drivers do. What makes virtio-blk different? Thanks. -- tejun
Michael S. Tsirkin
2012-May-21 20:59 UTC
[RFC PATCH 5/5] virtio-blk: Use block layer provided spinlock
On Mon, May 21, 2012 at 05:08:33PM +0800, Asias He wrote:> Block layer will allocate a spinlock for the queue if the driver does > not provide one in blk_init_queue(). > > The reason to use the internal spinlock is that blk_cleanup_queue() will > switch to use the internal spinlock in the cleanup code path. > if (q->queue_lock != &q->__queue_lock) > q->queue_lock = &q->__queue_lock; > > However, processes which are in D state might have taken the driver > provided spinlock, when the processes wake up , they would release the > block provided spinlock.Are you saying any driver with its own spinlock is broken if hotunplugged under stress?> ====================================> [ BUG: bad unlock balance detected! ] > 3.4.0-rc7+ #238 Not tainted > ------------------------------------- > fio/3587 is trying to release lock (&(&q->__queue_lock)->rlock) at: > [<ffffffff813274d2>] blk_queue_bio+0x2a2/0x380 > but there are no more locks to release! > > other info that might help us debug this: > 1 lock held by fio/3587: > #0: (&(&vblk->lock)->rlock){......}, at: > [<ffffffff8132661a>] get_request_wait+0x19a/0x250 > > Cc: Rusty Russell <rusty at rustcorp.com.au> > Cc: "Michael S. Tsirkin" <mst at redhat.com> > Cc: virtualization at lists.linux-foundation.org > Cc: kvm at vger.kernel.org > Signed-off-by: Asias He <asias at redhat.com> > --- > drivers/block/virtio_blk.c | 9 +++------ > 1 file changed, 3 insertions(+), 6 deletions(-) > > diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c > index ba35509..0c2f0e8 100644 > --- a/drivers/block/virtio_blk.c > +++ b/drivers/block/virtio_blk.c > @@ -21,8 +21,6 @@ struct workqueue_struct *virtblk_wq; > > struct virtio_blk > { > - spinlock_t lock; > - > struct virtio_device *vdev; > struct virtqueue *vq; > > @@ -65,7 +63,7 @@ static void blk_done(struct virtqueue *vq) > unsigned int len; > unsigned long flags; > > - spin_lock_irqsave(&vblk->lock, flags); > + spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); > while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { > int error; > > @@ -99,7 +97,7 @@ static void blk_done(struct virtqueue *vq) > } > /* In case queue is stopped waiting for more buffers. */ > blk_start_queue(vblk->disk->queue); > - spin_unlock_irqrestore(&vblk->lock, flags); > + spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags); > } > > static bool do_req(struct request_queue *q, struct virtio_blk *vblk, > @@ -456,7 +454,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) > goto out_free_index; > } > > - spin_lock_init(&vblk->lock); > vblk->vdev = vdev; > vblk->sg_elems = sg_elems; > sg_init_table(vblk->sg, vblk->sg_elems); > @@ -481,7 +478,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) > goto out_mempool; > } > > - q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); > + q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL); > if (!q) { > err = -ENOMEM; > goto out_put_disk; > -- > 1.7.10.1
Asias He
2012-May-22 08:22 UTC
[RFC PATCH 5/5] virtio-blk: Use block layer provided spinlock
On 05/22/2012 04:59 AM, Michael S. Tsirkin wrote:> On Mon, May 21, 2012 at 05:08:33PM +0800, Asias He wrote: >> Block layer will allocate a spinlock for the queue if the driver does >> not provide one in blk_init_queue(). >> >> The reason to use the internal spinlock is that blk_cleanup_queue() will >> switch to use the internal spinlock in the cleanup code path. >> if (q->queue_lock !=&q->__queue_lock) >> q->queue_lock =&q->__queue_lock; >> >> However, processes which are in D state might have taken the driver >> provided spinlock, when the processes wake up , they would release the >> block provided spinlock. > > Are you saying any driver with its own spinlock is > broken if hotunplugged under stress?Hi, Michael I can not say that. It is very hard to find real hardware device to try this. I tried on qemu with LSI Logic / Symbios Logic 53c895a scsi disk with hot-unplug. It is completely broken. And IDE does not support hotplug at all. Do you see any downside of using the block provided spinlock?> >> ====================================>> [ BUG: bad unlock balance detected! ] >> 3.4.0-rc7+ #238 Not tainted >> ------------------------------------- >> fio/3587 is trying to release lock (&(&q->__queue_lock)->rlock) at: >> [<ffffffff813274d2>] blk_queue_bio+0x2a2/0x380 >> but there are no more locks to release! >> >> other info that might help us debug this: >> 1 lock held by fio/3587: >> #0: (&(&vblk->lock)->rlock){......}, at: >> [<ffffffff8132661a>] get_request_wait+0x19a/0x250 >> >> Cc: Rusty Russell<rusty at rustcorp.com.au> >> Cc: "Michael S. Tsirkin"<mst at redhat.com> >> Cc: virtualization at lists.linux-foundation.org >> Cc: kvm at vger.kernel.org >> Signed-off-by: Asias He<asias at redhat.com> >> --- >> drivers/block/virtio_blk.c | 9 +++------ >> 1 file changed, 3 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c >> index ba35509..0c2f0e8 100644 >> --- a/drivers/block/virtio_blk.c >> +++ b/drivers/block/virtio_blk.c >> @@ -21,8 +21,6 @@ struct workqueue_struct *virtblk_wq; >> >> struct virtio_blk >> { >> - spinlock_t lock; >> - >> struct virtio_device *vdev; >> struct virtqueue *vq; >> >> @@ -65,7 +63,7 @@ static void blk_done(struct virtqueue *vq) >> unsigned int len; >> unsigned long flags; >> >> - spin_lock_irqsave(&vblk->lock, flags); >> + spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); >> while ((vbr = virtqueue_get_buf(vblk->vq,&len)) != NULL) { >> int error; >> >> @@ -99,7 +97,7 @@ static void blk_done(struct virtqueue *vq) >> } >> /* In case queue is stopped waiting for more buffers. */ >> blk_start_queue(vblk->disk->queue); >> - spin_unlock_irqrestore(&vblk->lock, flags); >> + spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags); >> } >> >> static bool do_req(struct request_queue *q, struct virtio_blk *vblk, >> @@ -456,7 +454,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) >> goto out_free_index; >> } >> >> - spin_lock_init(&vblk->lock); >> vblk->vdev = vdev; >> vblk->sg_elems = sg_elems; >> sg_init_table(vblk->sg, vblk->sg_elems); >> @@ -481,7 +478,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) >> goto out_mempool; >> } >> >> - q = vblk->disk->queue = blk_init_queue(do_virtblk_request,&vblk->lock); >> + q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL); >> if (!q) { >> err = -ENOMEM; >> goto out_put_disk; >> -- >> 1.7.10.1-- Asias