Mike Christie
2023-Feb-23 00:19 UTC
[PATCH 0/5] vhost-scsi: Fix management operation hangs
The following patches were made over Linus tree and also apply over mst tree's vhost branch. The patches fix an issue where management operations like LUN mapping/unmapping and device addition hang for 30 seconds or up to N minutes depending on the device. The problem is that we use a global mutex to protect the list of tpgs but we hold that mutex during those management operations. So if you are just trying to add another device, it will have to wait on another device if we are in the middle of clearing it's endpoint and it's waiting on hung IO. This patchset fixes up the ordering of how we flush IO and release refcounts so we don't need to always hold the mutex.
Mike Christie
2023-Feb-23 00:19 UTC
[PATCH 1/5] vhost-scsi: Hold tv_tpg_mutex when decrementing tv_tpg_vhost_count
This has us hold the tv_tpg_mutex when decrementing the tv_tpg_vhost_count in vhost_scsi_set_endpoint's failure path. We currently don't need to because we are holding the vhost_scsi_mutex and the dev.mutex when incrementing/decrementing in the normal paths, and we can't hit the tv_tpg_port_count check in vhost_scsi_drop_nexus during this time because there is a nexus and we have a refcount to the tpg. In the next patch we will change when we hold the vhost_scsi_mutex, so it's not always held in set/clear endpoint and in the future we might change the dev.mutex use, so this future proofs us. Signed-off-by: Mike Christie <michael.christie at oracle.com> --- drivers/vhost/scsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index d5ecb8876fc9..c31659aa5466 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1658,7 +1658,9 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) { tpg = vs_tpg[i]; if (tpg) { + mutex_lock(&tpg->tv_tpg_mutex); tpg->tv_tpg_vhost_count--; + mutex_unlock(&tpg->tv_tpg_mutex); target_undepend_item(&tpg->se_tpg.tpg_group.cg_item); } } -- 2.25.1
Mike Christie
2023-Feb-23 00:19 UTC
[PATCH 2/5] vhost-scsi: Drop vhost_scsi_flush during endpoint clearing
We don't need to run vhost_scsi_flush at the end of vhost_scsi_clear_endpoint, because a couple lines before this code if there are any tpgs in vs_tpg we take every vq mutex and clear the vq backend so know there will be no new IOs accessing the vs_tpg. And after we clear the backend we run vhost_scsi_flush already so we know there are no running cmds accessing the vs_tpg. Signed-off-by: Mike Christie <michael.christie at oracle.com> --- drivers/vhost/scsi.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index c31659aa5466..502d64b53d9c 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1746,11 +1746,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, vhost_scsi_destroy_vq_cmds(vq); } } - /* - * Act as synchronize_rcu to make sure access to - * old vs->vs_tpg is finished. - */ - vhost_scsi_flush(vs); + kfree(vs->vs_tpg); vs->vs_tpg = NULL; WARN_ON(vs->vs_events_nr); -- 2.25.1
Mike Christie
2023-Feb-23 00:19 UTC
[PATCH 3/5] vhost-scsi: Remove vhost_scsi_mutex from port link/unlink
We don't need the vhost_scsi_mutex in vhost_scsi_port_link and vhost_scsi_port_unlink because LIO has a refcount on the se_tpg for us, so it can't be removed while these functions are called. This removes the vhost_scsi_mutex from those functions to avoid cases where we are adding or removing LUNs to vhost-deviceA but are stuck waiting on the vhost_scsi_mutex because we are running vhost_scsi_clear_endpoint on vhost-deviceB and it's stuck in vhost_scsi_flush waiting for a flakey physical device. Signed-off-by: Mike Christie <michael.christie at oracle.com> --- drivers/vhost/scsi.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 502d64b53d9c..9e154e568438 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -232,7 +232,7 @@ struct vhost_scsi_ctx { struct iov_iter out_iter; }; -/* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */ +/* Global mutex to protect vhost_scsi TPG list for vhost IOCTL access */ static DEFINE_MUTEX(vhost_scsi_mutex); static LIST_HEAD(vhost_scsi_list); @@ -2038,17 +2038,12 @@ static int vhost_scsi_port_link(struct se_portal_group *se_tpg, INIT_LIST_HEAD(&tmf->queue_entry); vhost_work_init(&tmf->vwork, vhost_scsi_tmf_resp_work); - mutex_lock(&vhost_scsi_mutex); - mutex_lock(&tpg->tv_tpg_mutex); tpg->tv_tpg_port_count++; list_add_tail(&tmf->queue_entry, &tpg->tmf_queue); mutex_unlock(&tpg->tv_tpg_mutex); vhost_scsi_hotplug(tpg, lun); - - mutex_unlock(&vhost_scsi_mutex); - return 0; } @@ -2059,8 +2054,6 @@ static void vhost_scsi_port_unlink(struct se_portal_group *se_tpg, struct vhost_scsi_tpg, se_tpg); struct vhost_scsi_tmf *tmf; - mutex_lock(&vhost_scsi_mutex); - mutex_lock(&tpg->tv_tpg_mutex); tpg->tv_tpg_port_count--; tmf = list_first_entry(&tpg->tmf_queue, struct vhost_scsi_tmf, @@ -2070,8 +2063,6 @@ static void vhost_scsi_port_unlink(struct se_portal_group *se_tpg, mutex_unlock(&tpg->tv_tpg_mutex); vhost_scsi_hotunplug(tpg, lun); - - mutex_unlock(&vhost_scsi_mutex); } static ssize_t vhost_scsi_tpg_attrib_fabric_prot_type_store( -- 2.25.1
Mike Christie
2023-Feb-23 00:19 UTC
[PATCH 4/5] vhost-scsi: Delay releasing our refcount on the tpg
We currently hold the vhost_scsi_mutex the entire time we are running vhost_scsi_clear_endpoint. This prevents userspace from being able to free the se_tpg from under us after we have called target_undepend_item. However, it forces vhost_scsi_set_endpoint calls for other devices to have to wait on a flakey device's: vhost_scsi_clear_endpoint -> vhost_scsi_flush() call which can which can take a long time. This moves the target_undepend_item call and tv_tpg_vhost_count-- to after we have stopped new IO from starting up and after we have waited on running IO. We can then release our refcount on the tpg and session knowing our device is no longer accessing them. This also moves the tv_tpg_vhost_count-- to prevent a similar possible use after free with the session. Signed-off-by: Mike Christie <michael.christie at oracle.com> --- drivers/vhost/scsi.c | 62 ++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 9e154e568438..c405ab5c232a 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1704,11 +1704,10 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, if (!tpg) continue; - mutex_lock(&tpg->tv_tpg_mutex); tv_tport = tpg->tport; if (!tv_tport) { ret = -ENODEV; - goto err_tpg; + goto err_dev; } if (strcmp(tv_tport->tport_name, t->vhost_wwpn)) { @@ -1717,36 +1716,51 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, tv_tport->tport_name, tpg->tport_tpgt, t->vhost_wwpn, t->vhost_tpgt); ret = -EINVAL; - goto err_tpg; + goto err_dev; } + match = true; + } + if (!match) + goto free_vs_tpg; + + /* Prevent new cmds from starting and accessing the tpgs/sessions */ + for (i = 0; i < vs->dev.nvqs; i++) { + vq = &vs->vqs[i].vq; + mutex_lock(&vq->mutex); + vhost_vq_set_backend(vq, NULL); + mutex_unlock(&vq->mutex); + } + /* Make sure cmds are not running before tearing them down. */ + vhost_scsi_flush(vs); + + for (i = 0; i < vs->dev.nvqs; i++) { + vq = &vs->vqs[i].vq; + vhost_scsi_destroy_vq_cmds(vq); + } + + /* + * We can now release our hold on the tpg and sessions and userspace + * can free them after this point. + */ + for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) { + target = i; + tpg = vs->vs_tpg[target]; + if (!tpg) + continue; + + mutex_lock(&tpg->tv_tpg_mutex); + tpg->tv_tpg_vhost_count--; tpg->vhost_scsi = NULL; vs->vs_tpg[target] = NULL; - match = true; + mutex_unlock(&tpg->tv_tpg_mutex); - /* - * Release se_tpg->tpg_group.cg_item configfs dependency now - * to allow vhost-scsi WWPN se_tpg->tpg_group shutdown to occur. - */ + se_tpg = &tpg->se_tpg; target_undepend_item(&se_tpg->tpg_group.cg_item); } - if (match) { - for (i = 0; i < vs->dev.nvqs; i++) { - vq = &vs->vqs[i].vq; - mutex_lock(&vq->mutex); - vhost_vq_set_backend(vq, NULL); - mutex_unlock(&vq->mutex); - } - /* Make sure cmds are not running before tearing them down. */ - vhost_scsi_flush(vs); - - for (i = 0; i < vs->dev.nvqs; i++) { - vq = &vs->vqs[i].vq; - vhost_scsi_destroy_vq_cmds(vq); - } - } +free_vs_tpg: kfree(vs->vs_tpg); vs->vs_tpg = NULL; WARN_ON(vs->vs_events_nr); @@ -1754,8 +1768,6 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, mutex_unlock(&vhost_scsi_mutex); return 0; -err_tpg: - mutex_unlock(&tpg->tv_tpg_mutex); err_dev: mutex_unlock(&vs->dev.mutex); mutex_unlock(&vhost_scsi_mutex); -- 2.25.1
Now that vhost_scsi_clear_endpoint will prevent new IO and flush running IO before dropping it's refcounts on the tpg, we only need to hold the vhost_scsi_mutex while looping over tpgs and taking a refcount, and when manipulating the tpg list. This removes the vhost_scsi_mutex from vhost_scsi_clear_endpoint and moves the vhost_scsi_mutex use in vhost_scsi_set_endpoint so it's only taken while looping over the tpgs and taking a refcount. We can then now avoid issues where vhost_scsi_set_endpoint has to wait for another device's vhost_scsi_clear_endpoint call. Signed-off-by: Mike Christie <michael.christie at oracle.com> --- drivers/vhost/scsi.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index c405ab5c232a..75ea24f1c571 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1540,7 +1540,7 @@ static int vhost_scsi_setup_vq_cmds(struct vhost_virtqueue *vq, int max_cmds) * vhost_scsi_tpg with an active struct vhost_scsi_nexus * * The lock nesting rule is: - * vhost_scsi_mutex -> vs->dev.mutex -> tpg->tv_tpg_mutex -> vq->mutex + * vs->dev.mutex -> vhost_scsi_mutex -> tpg->tv_tpg_mutex -> vq->mutex */ static int vhost_scsi_set_endpoint(struct vhost_scsi *vs, @@ -1554,7 +1554,6 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, int index, ret, i, len; bool match = false; - mutex_lock(&vhost_scsi_mutex); mutex_lock(&vs->dev.mutex); /* Verify that ring has been setup correctly. */ @@ -1575,6 +1574,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, if (vs->vs_tpg) memcpy(vs_tpg, vs->vs_tpg, len); + mutex_lock(&vhost_scsi_mutex); list_for_each_entry(tpg, &vhost_scsi_list, tv_tpg_list) { mutex_lock(&tpg->tv_tpg_mutex); if (!tpg->tpg_nexus) { @@ -1590,6 +1590,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, if (!strcmp(tv_tport->tport_name, t->vhost_wwpn)) { if (vs->vs_tpg && vs->vs_tpg[tpg->tport_tpgt]) { mutex_unlock(&tpg->tv_tpg_mutex); + mutex_unlock(&vhost_scsi_mutex); ret = -EEXIST; goto undepend; } @@ -1604,6 +1605,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, if (ret) { pr_warn("target_depend_item() failed: %d\n", ret); mutex_unlock(&tpg->tv_tpg_mutex); + mutex_unlock(&vhost_scsi_mutex); goto undepend; } tpg->tv_tpg_vhost_count++; @@ -1613,6 +1615,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, } mutex_unlock(&tpg->tv_tpg_mutex); } + mutex_unlock(&vhost_scsi_mutex); if (match) { memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn, @@ -1667,7 +1670,6 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, kfree(vs_tpg); out: mutex_unlock(&vs->dev.mutex); - mutex_unlock(&vhost_scsi_mutex); return ret; } @@ -1683,7 +1685,6 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, int index, ret, i; u8 target; - mutex_lock(&vhost_scsi_mutex); mutex_lock(&vs->dev.mutex); /* Verify that ring has been setup correctly. */ for (index = 0; index < vs->dev.nvqs; ++index) { @@ -1765,12 +1766,10 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, vs->vs_tpg = NULL; WARN_ON(vs->vs_events_nr); mutex_unlock(&vs->dev.mutex); - mutex_unlock(&vhost_scsi_mutex); return 0; err_dev: mutex_unlock(&vs->dev.mutex); - mutex_unlock(&vhost_scsi_mutex); return ret; } -- 2.25.1