When user hot-unplug a disk which is busy serving I/O, __blk_run_queue might be unable to drain all the requests. As a result, the blk_drain_queue() would loop forever and blk_cleanup_queue would not return. So hot-unplug will fail. This patch adds a callback in blk_drain_queue() for low lever driver to abort requests. Currently, this is useful for virtio-blk to do cleanup in hot-unplug. Cc: Jens Axboe <axboe at kernel.dk> Cc: Tejun Heo <tj at kernel.org> Cc: linux-fsdevel at vger.kernel.org Cc: Rusty Russell <rusty at rustcorp.com.au> Cc: "Michael S. Tsirkin" <mst at redhat.com> Cc: virtualization at lists.linux-foundation.org Cc: kvm at vger.kernel.org Signed-off-by: Asias He <asias at redhat.com> --- block/blk-core.c | 3 +++ block/blk-settings.c | 12 ++++++++++++ include/linux/blkdev.h | 3 +++ 3 files changed, 18 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 1f61b74..ca42fd7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -369,6 +369,9 @@ void blk_drain_queue(struct request_queue *q, bool drain_all) if (drain_all) blk_throtl_drain(q); + if (q->abort_queue_fn) + q->abort_queue_fn(q); + /* * This function might be called on a queue which failed * driver init after queue creation. Some drivers diff --git a/block/blk-settings.c b/block/blk-settings.c index d3234fc..83ccb48 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -100,6 +100,18 @@ void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn) EXPORT_SYMBOL_GPL(blk_queue_lld_busy); /** + * blk_queue_abort_queue - set driver specific abort function + * @q: queue + * @mbfn: abort_queue_fn + */ +void blk_queue_abort_queue(struct request_queue *q, abort_queue_fn *afn) +{ + q->abort_queue_fn = afn; +} +EXPORT_SYMBOL(blk_queue_abort_queue); + + +/** * blk_set_default_limits - reset limits to default values * @lim: the queue_limits structure to reset * diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2aa2466..e2d58bd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -200,6 +200,7 @@ struct request_pm_state typedef void (request_fn_proc) (struct request_queue *q); typedef void (make_request_fn) (struct request_queue *q, struct bio *bio); +typedef void (abort_queue_fn) (struct request_queue *q); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); @@ -282,6 +283,7 @@ struct request_queue { request_fn_proc *request_fn; make_request_fn *make_request_fn; + abort_queue_fn *abort_queue_fn; prep_rq_fn *prep_rq_fn; unprep_rq_fn *unprep_rq_fn; merge_bvec_fn *merge_bvec_fn; @@ -821,6 +823,7 @@ extern struct request_queue *blk_init_allocated_queue(struct request_queue *, request_fn_proc *, spinlock_t *); extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); +extern void blk_queue_abort_queue(struct request_queue *, abort_queue_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); -- 1.7.10.1
Asias He
2012-May-21 09:08 UTC
[RFC PATCH 3/5] virtio-blk: Call del_gendisk() before disable guest kick
del_gendisk() might not return due to failing to remove the /sys/block/vda/serial sysfs entry when another thread (udev) is trying to read it. virtblk_remove() vdev->config->reset() : guest will not kick us through interrupt del_gendisk() device_del() kobject_del(): got stuck, sysfs entry ref count non zero sysfs_open_file(): user space process read /sys/block/vda/serial sysfs_get_active() : got sysfs entry ref count dev_attr_show() virtblk_serial_show() blk_execute_rq() : got stuck, interrupt is disabled request cannot be finished This patch fixes it by calling del_gendisk() before we disable guest's interrupt so that the request sent in virtblk_serial_show() will be finished and del_gendisk() will success. This fixes another race in hot-unplug process. It is save to call del_gendisk(vblk->disk) before flush_work(&vblk->config_work) which might access vblk->disk, because vblk->disk is not freed until put_disk(vblk->disk). Cc: Rusty Russell <rusty at rustcorp.com.au> Cc: "Michael S. Tsirkin" <mst at redhat.com> Cc: virtualization at lists.linux-foundation.org Cc: kvm at vger.kernel.org Signed-off-by: Asias He <asias at redhat.com> --- drivers/block/virtio_blk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 693187d..7d5f5b0 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -584,12 +584,13 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) vblk->config_enable = false; mutex_unlock(&vblk->config_lock); + del_gendisk(vblk->disk); + /* Stop all the virtqueues. */ vdev->config->reset(vdev); flush_work(&vblk->config_work); - del_gendisk(vblk->disk); /* Abort requests dispatched to driver. */ spin_lock_irqsave(&vblk->lock, flags); -- 1.7.10.1
Asias He
2012-May-21 09:08 UTC
[RFC PATCH 4/5] virtio-blk: Use q->abort_queue_fn() to abort requests
virtblk_abort_queue() will be called by the block layer when cleans up the queue. blk_cleanup_queue -> blk_drain_queue() -> q->abort_queue_fn(q) virtblk_abort_queue() 1) Abort requests in block which is not dispatched to driver 2) Abort requests already dispatched to driver 3) Wake up processes which is sleeping on get_request_wait() This makes hot-unplug a disk which is busy serving I/O success. Cc: Rusty Russell <rusty at rustcorp.com.au> Cc: "Michael S. Tsirkin" <mst at redhat.com> Cc: virtualization at lists.linux-foundation.org Cc: kvm at vger.kernel.org Cc: Jens Axboe <axboe at kernel.dk> Cc: Tejun Heo <tj at kernel.org> Cc: linux-fsdevel at vger.kernel.org Signed-off-by: Asias He <asias at redhat.com> --- drivers/block/virtio_blk.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 7d5f5b0..ba35509 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -182,6 +182,31 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, return true; } +void virtblk_abort_queue(struct request_queue *q) +{ + struct virtio_blk *vblk = q->queuedata; + struct virtblk_req *vbr; + int i; + + /* Abort requests in block layer. */ + elv_abort_queue(q); + + /* Abort requests dispatched to driver. */ + while ((vbr = virtqueue_detach_unused_buf(vblk->vq))) { + vbr->req->cmd_flags |= REQ_QUIET; + __blk_end_request_all(vbr->req, -EIO); + mempool_free(vbr, vblk->pool); + } + + /* Wake up threads sleeping on get_request_wait() */ + for (i = 0; i < ARRAY_SIZE(q->rq.wait); i++) { + if (waitqueue_active(&q->rq.wait[i])) + wake_up_all(&q->rq.wait[i]); + } + + return; +} + static void do_virtblk_request(struct request_queue *q) { struct virtio_blk *vblk = q->queuedata; @@ -462,6 +487,8 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) goto out_put_disk; } + blk_queue_abort_queue(q, virtblk_abort_queue); + q->queuedata = vblk; virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); @@ -576,8 +603,6 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; int index = vblk->index; - struct virtblk_req *vbr; - unsigned long flags; /* Prevent config work handler from accessing the device. */ mutex_lock(&vblk->config_lock); @@ -591,15 +616,6 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) flush_work(&vblk->config_work); - - /* Abort requests dispatched to driver. */ - spin_lock_irqsave(&vblk->lock, flags); - while ((vbr = virtqueue_detach_unused_buf(vblk->vq))) { - __blk_end_request_all(vbr->req, -EIO); - mempool_free(vbr, vblk->pool); - } - spin_unlock_irqrestore(&vblk->lock, flags); - blk_cleanup_queue(vblk->disk->queue); put_disk(vblk->disk); mempool_destroy(vblk->pool); -- 1.7.10.1
Asias He
2012-May-21 09:08 UTC
[RFC PATCH 5/5] virtio-blk: Use block layer provided spinlock
Block layer will allocate a spinlock for the queue if the driver does not provide one in blk_init_queue(). The reason to use the internal spinlock is that blk_cleanup_queue() will switch to use the internal spinlock in the cleanup code path. if (q->queue_lock != &q->__queue_lock) q->queue_lock = &q->__queue_lock; However, processes which are in D state might have taken the driver provided spinlock, when the processes wake up , they would release the block provided spinlock. ====================================[ BUG: bad unlock balance detected! ] 3.4.0-rc7+ #238 Not tainted ------------------------------------- fio/3587 is trying to release lock (&(&q->__queue_lock)->rlock) at: [<ffffffff813274d2>] blk_queue_bio+0x2a2/0x380 but there are no more locks to release! other info that might help us debug this: 1 lock held by fio/3587: #0: (&(&vblk->lock)->rlock){......}, at: [<ffffffff8132661a>] get_request_wait+0x19a/0x250 Cc: Rusty Russell <rusty at rustcorp.com.au> Cc: "Michael S. Tsirkin" <mst at redhat.com> Cc: virtualization at lists.linux-foundation.org Cc: kvm at vger.kernel.org Signed-off-by: Asias He <asias at redhat.com> --- drivers/block/virtio_blk.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index ba35509..0c2f0e8 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -21,8 +21,6 @@ struct workqueue_struct *virtblk_wq; struct virtio_blk { - spinlock_t lock; - struct virtio_device *vdev; struct virtqueue *vq; @@ -65,7 +63,7 @@ static void blk_done(struct virtqueue *vq) unsigned int len; unsigned long flags; - spin_lock_irqsave(&vblk->lock, flags); + spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { int error; @@ -99,7 +97,7 @@ static void blk_done(struct virtqueue *vq) } /* In case queue is stopped waiting for more buffers. */ blk_start_queue(vblk->disk->queue); - spin_unlock_irqrestore(&vblk->lock, flags); + spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags); } static bool do_req(struct request_queue *q, struct virtio_blk *vblk, @@ -456,7 +454,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) goto out_free_index; } - spin_lock_init(&vblk->lock); vblk->vdev = vdev; vblk->sg_elems = sg_elems; sg_init_table(vblk->sg, vblk->sg_elems); @@ -481,7 +478,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) goto out_mempool; } - q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); + q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL); if (!q) { err = -ENOMEM; goto out_put_disk; -- 1.7.10.1
On Mon, May 21, 2012 at 05:08:29PM +0800, Asias He wrote:> When user hot-unplug a disk which is busy serving I/O, __blk_run_queue > might be unable to drain all the requests. As a result, the > blk_drain_queue() would loop forever and blk_cleanup_queue would not > return. So hot-unplug will fail. > > This patch adds a callback in blk_drain_queue() for low lever driver to > abort requests. > > Currently, this is useful for virtio-blk to do cleanup in hot-unplug.Why is this necessary? virtio-blk should know that the device is gone and fail in-flight / new commands. That's what other drivers do. What makes virtio-blk different? Thanks. -- tejun
Michael S. Tsirkin
2012-May-21 20:59 UTC
[RFC PATCH 5/5] virtio-blk: Use block layer provided spinlock
On Mon, May 21, 2012 at 05:08:33PM +0800, Asias He wrote:> Block layer will allocate a spinlock for the queue if the driver does > not provide one in blk_init_queue(). > > The reason to use the internal spinlock is that blk_cleanup_queue() will > switch to use the internal spinlock in the cleanup code path. > if (q->queue_lock != &q->__queue_lock) > q->queue_lock = &q->__queue_lock; > > However, processes which are in D state might have taken the driver > provided spinlock, when the processes wake up , they would release the > block provided spinlock.Are you saying any driver with its own spinlock is broken if hotunplugged under stress?> ====================================> [ BUG: bad unlock balance detected! ] > 3.4.0-rc7+ #238 Not tainted > ------------------------------------- > fio/3587 is trying to release lock (&(&q->__queue_lock)->rlock) at: > [<ffffffff813274d2>] blk_queue_bio+0x2a2/0x380 > but there are no more locks to release! > > other info that might help us debug this: > 1 lock held by fio/3587: > #0: (&(&vblk->lock)->rlock){......}, at: > [<ffffffff8132661a>] get_request_wait+0x19a/0x250 > > Cc: Rusty Russell <rusty at rustcorp.com.au> > Cc: "Michael S. Tsirkin" <mst at redhat.com> > Cc: virtualization at lists.linux-foundation.org > Cc: kvm at vger.kernel.org > Signed-off-by: Asias He <asias at redhat.com> > --- > drivers/block/virtio_blk.c | 9 +++------ > 1 file changed, 3 insertions(+), 6 deletions(-) > > diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c > index ba35509..0c2f0e8 100644 > --- a/drivers/block/virtio_blk.c > +++ b/drivers/block/virtio_blk.c > @@ -21,8 +21,6 @@ struct workqueue_struct *virtblk_wq; > > struct virtio_blk > { > - spinlock_t lock; > - > struct virtio_device *vdev; > struct virtqueue *vq; > > @@ -65,7 +63,7 @@ static void blk_done(struct virtqueue *vq) > unsigned int len; > unsigned long flags; > > - spin_lock_irqsave(&vblk->lock, flags); > + spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); > while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { > int error; > > @@ -99,7 +97,7 @@ static void blk_done(struct virtqueue *vq) > } > /* In case queue is stopped waiting for more buffers. */ > blk_start_queue(vblk->disk->queue); > - spin_unlock_irqrestore(&vblk->lock, flags); > + spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags); > } > > static bool do_req(struct request_queue *q, struct virtio_blk *vblk, > @@ -456,7 +454,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) > goto out_free_index; > } > > - spin_lock_init(&vblk->lock); > vblk->vdev = vdev; > vblk->sg_elems = sg_elems; > sg_init_table(vblk->sg, vblk->sg_elems); > @@ -481,7 +478,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) > goto out_mempool; > } > > - q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); > + q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL); > if (!q) { > err = -ENOMEM; > goto out_put_disk; > -- > 1.7.10.1
Asias He
2012-May-22 08:22 UTC
[RFC PATCH 5/5] virtio-blk: Use block layer provided spinlock
On 05/22/2012 04:59 AM, Michael S. Tsirkin wrote:> On Mon, May 21, 2012 at 05:08:33PM +0800, Asias He wrote: >> Block layer will allocate a spinlock for the queue if the driver does >> not provide one in blk_init_queue(). >> >> The reason to use the internal spinlock is that blk_cleanup_queue() will >> switch to use the internal spinlock in the cleanup code path. >> if (q->queue_lock !=&q->__queue_lock) >> q->queue_lock =&q->__queue_lock; >> >> However, processes which are in D state might have taken the driver >> provided spinlock, when the processes wake up , they would release the >> block provided spinlock. > > Are you saying any driver with its own spinlock is > broken if hotunplugged under stress?Hi, Michael I can not say that. It is very hard to find real hardware device to try this. I tried on qemu with LSI Logic / Symbios Logic 53c895a scsi disk with hot-unplug. It is completely broken. And IDE does not support hotplug at all. Do you see any downside of using the block provided spinlock?> >> ====================================>> [ BUG: bad unlock balance detected! ] >> 3.4.0-rc7+ #238 Not tainted >> ------------------------------------- >> fio/3587 is trying to release lock (&(&q->__queue_lock)->rlock) at: >> [<ffffffff813274d2>] blk_queue_bio+0x2a2/0x380 >> but there are no more locks to release! >> >> other info that might help us debug this: >> 1 lock held by fio/3587: >> #0: (&(&vblk->lock)->rlock){......}, at: >> [<ffffffff8132661a>] get_request_wait+0x19a/0x250 >> >> Cc: Rusty Russell<rusty at rustcorp.com.au> >> Cc: "Michael S. Tsirkin"<mst at redhat.com> >> Cc: virtualization at lists.linux-foundation.org >> Cc: kvm at vger.kernel.org >> Signed-off-by: Asias He<asias at redhat.com> >> --- >> drivers/block/virtio_blk.c | 9 +++------ >> 1 file changed, 3 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c >> index ba35509..0c2f0e8 100644 >> --- a/drivers/block/virtio_blk.c >> +++ b/drivers/block/virtio_blk.c >> @@ -21,8 +21,6 @@ struct workqueue_struct *virtblk_wq; >> >> struct virtio_blk >> { >> - spinlock_t lock; >> - >> struct virtio_device *vdev; >> struct virtqueue *vq; >> >> @@ -65,7 +63,7 @@ static void blk_done(struct virtqueue *vq) >> unsigned int len; >> unsigned long flags; >> >> - spin_lock_irqsave(&vblk->lock, flags); >> + spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); >> while ((vbr = virtqueue_get_buf(vblk->vq,&len)) != NULL) { >> int error; >> >> @@ -99,7 +97,7 @@ static void blk_done(struct virtqueue *vq) >> } >> /* In case queue is stopped waiting for more buffers. */ >> blk_start_queue(vblk->disk->queue); >> - spin_unlock_irqrestore(&vblk->lock, flags); >> + spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags); >> } >> >> static bool do_req(struct request_queue *q, struct virtio_blk *vblk, >> @@ -456,7 +454,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) >> goto out_free_index; >> } >> >> - spin_lock_init(&vblk->lock); >> vblk->vdev = vdev; >> vblk->sg_elems = sg_elems; >> sg_init_table(vblk->sg, vblk->sg_elems); >> @@ -481,7 +478,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) >> goto out_mempool; >> } >> >> - q = vblk->disk->queue = blk_init_queue(do_virtblk_request,&vblk->lock); >> + q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL); >> if (!q) { >> err = -ENOMEM; >> goto out_put_disk; >> -- >> 1.7.10.1-- Asias