The following patches were made over Linus's tree which contains a vhost change missing in mst's vhost branch. These patches fix two issues caused by the vhost_task patches: 1. I was setting dev->worker too early and this caused crashes when vsock would queue work before VHOST_SET_OWNER. 2. The patch that Linus's tree contains which vhost does not yet have converted vhost_tasks to use CLONE_THREAD. That required a change to how we set the task state, but I completely messed up and moved when we set ourself to interruptible too late.
Mike Christie
2023-Jun-07 19:23 UTC
[PATCH 1/2] vhost: Fix crash during early vhost_transport_send_pkt calls
If userspace does VHOST_VSOCK_SET_GUEST_CID before VHOST_SET_OWNER we can race where: 1. thread0 calls vhost_transport_send_pkt -> vhost_work_queue 2. thread1 does VHOST_SET_OWNER which calls vhost_worker_create. 3. vhost_worker_create will set the dev->worker pointer before setting the worker->vtsk pointer. 4. thread0's vhost_work_queue will see the dev->worker pointer is set and try to call vhost_task_wake using not yet set worker->vtsk pointer. 5. We then crash since vtsk is NULL. Before commit 6e890c5d5021 ("vhost: use vhost_tasks for worker threads"), we only had the worker pointer so we could just check it to see if VHOST_SET_OWNER has been done. After that commit we have the vhost_worker and vhost_task pointer, so we can now hit the bug above. This patch embeds the vhost_worker in the vhost_dev and moves the work list initialization back to vhost_dev_init, so we can just check the worker.vtsk pointer to check if VHOST_SET_OWNER has been done like before. Fixes: 6e890c5d5021 ("vhost: use vhost_tasks for worker threads") Signed-off-by: Mike Christie <michael.christie at oracle.com> --- drivers/vhost/vhost.c | 50 +++++++++++++++---------------------------- drivers/vhost/vhost.h | 2 +- 2 files changed, 18 insertions(+), 34 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 074273020849..7a9f93eae225 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -235,7 +235,7 @@ void vhost_dev_flush(struct vhost_dev *dev) { struct vhost_flush_struct flush; - if (dev->worker) { + if (dev->worker.vtsk) { init_completion(&flush.wait_event); vhost_work_init(&flush.work, vhost_flush_work); @@ -247,7 +247,7 @@ EXPORT_SYMBOL_GPL(vhost_dev_flush); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) { - if (!dev->worker) + if (!dev->worker.vtsk) return; if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { @@ -255,8 +255,8 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) * sure it was not in the list. * test_and_set_bit() implies a memory barrier. */ - llist_add(&work->node, &dev->worker->work_list); - vhost_task_wake(dev->worker->vtsk); + llist_add(&work->node, &dev->worker.work_list); + vhost_task_wake(dev->worker.vtsk); } } EXPORT_SYMBOL_GPL(vhost_work_queue); @@ -264,7 +264,7 @@ EXPORT_SYMBOL_GPL(vhost_work_queue); /* A lockless hint for busy polling code to exit the loop */ bool vhost_has_work(struct vhost_dev *dev) { - return dev->worker && !llist_empty(&dev->worker->work_list); + return !llist_empty(&dev->worker.work_list); } EXPORT_SYMBOL_GPL(vhost_has_work); @@ -456,7 +456,8 @@ void vhost_dev_init(struct vhost_dev *dev, dev->umem = NULL; dev->iotlb = NULL; dev->mm = NULL; - dev->worker = NULL; + memset(&dev->worker, 0, sizeof(dev->worker)); + init_llist_head(&dev->worker.work_list); dev->iov_limit = iov_limit; dev->weight = weight; dev->byte_weight = byte_weight; @@ -530,47 +531,30 @@ static void vhost_detach_mm(struct vhost_dev *dev) static void vhost_worker_free(struct vhost_dev *dev) { - struct vhost_worker *worker = dev->worker; - - if (!worker) + if (!dev->worker.vtsk) return; - dev->worker = NULL; - WARN_ON(!llist_empty(&worker->work_list)); - vhost_task_stop(worker->vtsk); - kfree(worker); + WARN_ON(!llist_empty(&dev->worker.work_list)); + vhost_task_stop(dev->worker.vtsk); + dev->worker.kcov_handle = 0; + dev->worker.vtsk = NULL; } static int vhost_worker_create(struct vhost_dev *dev) { - struct vhost_worker *worker; struct vhost_task *vtsk; char name[TASK_COMM_LEN]; - int ret; - - worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); - if (!worker) - return -ENOMEM; - dev->worker = worker; - worker->kcov_handle = kcov_common_handle(); - init_llist_head(&worker->work_list); snprintf(name, sizeof(name), "vhost-%d", current->pid); - vtsk = vhost_task_create(vhost_worker, worker, name); - if (!vtsk) { - ret = -ENOMEM; - goto free_worker; - } + vtsk = vhost_task_create(vhost_worker, &dev->worker, name); + if (!vtsk) + return -ENOMEM; - worker->vtsk = vtsk; + dev->worker.kcov_handle = kcov_common_handle(); + dev->worker.vtsk = vtsk; vhost_task_start(vtsk); return 0; - -free_worker: - kfree(worker); - dev->worker = NULL; - return ret; } /* Caller should have device mutex */ diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 0308638cdeee..305ec8593d46 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -154,7 +154,7 @@ struct vhost_dev { struct vhost_virtqueue **vqs; int nvqs; struct eventfd_ctx *log_ctx; - struct vhost_worker *worker; + struct vhost_worker worker; struct vhost_iotlb *umem; struct vhost_iotlb *iotlb; spinlock_t iotlb_lock; -- 2.25.1
Mike Christie
2023-Jun-07 19:23 UTC
[PATCH 2/2] vhost: Fix worker hangs due to missed wake up calls
We can race where we have added work to the work_list, but vhost_task_fn has passed that check but not yet set us into TASK_INTERRUPTIBLE. wake_up_process will see us in TASK_RUNNING and just return. This bug was intoduced in commit f9010dbdce91 ("fork, vhost: Use CLONE_THREAD to fix freezer/ps regression") when I moved the setting of TASK_INTERRUPTIBLE to simplfy the code and avoid get_signal from logging warnings about being in the wrong state. This moves the setting of TASK_INTERRUPTIBLE back to before we test if we need to stop the task to avoid a possible race there as well. We then have vhost_worker set TASK_RUNNING if it finds work similar to before. Fixes: f9010dbdce91 ("fork, vhost: Use CLONE_THREAD to fix freezer/ps regression") Signed-off-by: Mike Christie <michael.christie at oracle.com> --- drivers/vhost/vhost.c | 2 ++ kernel/vhost_task.c | 16 +++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 7a9f93eae225..b2722d29e069 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -341,6 +341,8 @@ static bool vhost_worker(void *data) node = llist_del_all(&worker->work_list); if (node) { + __set_current_state(TASK_RUNNING); + node = llist_reverse_order(node); /* make sure flag is seen after deletion */ smp_wmb(); diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index f80d5c51ae67..da35e5b7f047 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -28,10 +28,6 @@ static int vhost_task_fn(void *data) for (;;) { bool did_work; - /* mb paired w/ vhost_task_stop */ - if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) - break; - if (!dead && signal_pending(current)) { struct ksignal ksig; /* @@ -48,11 +44,17 @@ static int vhost_task_fn(void *data) clear_thread_flag(TIF_SIGPENDING); } + /* mb paired w/ vhost_task_stop */ + set_current_state(TASK_INTERRUPTIBLE); + + if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) { + __set_current_state(TASK_RUNNING); + break; + } + did_work = vtsk->fn(vtsk->data); - if (!did_work) { - set_current_state(TASK_INTERRUPTIBLE); + if (!did_work) schedule(); - } } complete(&vtsk->exited); -- 2.25.1
On Wed, Jun 07, 2023 at 02:23:36PM -0500, Mike Christie wrote:> The following patches were made over Linus's tree which contains a > vhost change missing in mst's vhost branch. These patches fix two > issues caused by the vhost_task patches: > 1. I was setting dev->worker too early and this caused crashes when > vsock would queue work before VHOST_SET_OWNER. > > 2. The patch that Linus's tree contains which vhost does not yet > have converted vhost_tasks to use CLONE_THREAD. That required a > change to how we set the task state, but I completely messed up > and moved when we set ourself to interruptible too late. >Right. and that's in rc5 yes? -- MST