Mike Christie
2023-Feb-02 23:25 UTC
[PATCH v11 8/8] vhost: use vhost_tasks for worker threads
For vhost workers we use the kthread API which inherit's its values from and checks against the kthreadd thread. This results in the wrong RLIMITs being checked, so while tools like libvirt try to control the number of threads based on the nproc rlimit setting we can end up creating more threads than the user wanted. This patch has us use the vhost_task helpers which will inherit its values/checks from the thread that owns the device similar to if we did a clone in userspace. The vhost threads will now be counted in the nproc rlimits. And we get features like cgroups and mm sharing automatically, so we can remove those calls. Signed-off-by: Mike Christie <michael.christie at oracle.com> Acked-by: Michael S. Tsirkin <mst at redhat.com> --- drivers/vhost/vhost.c | 58 ++++++++----------------------------------- drivers/vhost/vhost.h | 4 +-- 2 files changed, 13 insertions(+), 49 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 74378d241f8d..d3c7c37b69a7 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -22,11 +22,11 @@ #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/kthread.h> -#include <linux/cgroup.h> #include <linux/module.h> #include <linux/sort.h> #include <linux/sched/mm.h> #include <linux/sched/signal.h> +#include <linux/sched/vhost_task.h> #include <linux/interval_tree_generic.h> #include <linux/nospec.h> #include <linux/kcov.h> @@ -256,7 +256,7 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) * test_and_set_bit() implies a memory barrier. */ llist_add(&work->node, &dev->worker->work_list); - wake_up_process(dev->worker->task); + wake_up_process(dev->worker->vtsk->task); } } EXPORT_SYMBOL_GPL(vhost_work_queue); @@ -336,17 +336,14 @@ static void vhost_vq_reset(struct vhost_dev *dev, static int vhost_worker(void *data) { struct vhost_worker *worker = data; - struct vhost_dev *dev = worker->dev; struct vhost_work *work, *work_next; struct llist_node *node; - kthread_use_mm(dev->mm); - for (;;) { /* mb paired w/ kthread_stop */ set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) { + if (vhost_task_should_stop(worker->vtsk)) { __set_current_state(TASK_RUNNING); break; } @@ -368,7 +365,7 @@ static int vhost_worker(void *data) schedule(); } } - kthread_unuse_mm(dev->mm); + return 0; } @@ -509,31 +506,6 @@ long vhost_dev_check_owner(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_dev_check_owner); -struct vhost_attach_cgroups_struct { - struct vhost_work work; - struct task_struct *owner; - int ret; -}; - -static void vhost_attach_cgroups_work(struct vhost_work *work) -{ - struct vhost_attach_cgroups_struct *s; - - s = container_of(work, struct vhost_attach_cgroups_struct, work); - s->ret = cgroup_attach_task_all(s->owner, current); -} - -static int vhost_attach_cgroups(struct vhost_dev *dev) -{ - struct vhost_attach_cgroups_struct attach; - - attach.owner = current; - vhost_work_init(&attach.work, vhost_attach_cgroups_work); - vhost_work_queue(dev, &attach.work); - vhost_dev_flush(dev); - return attach.ret; -} - /* Caller should have device mutex */ bool vhost_dev_has_owner(struct vhost_dev *dev) { @@ -580,14 +552,14 @@ static void vhost_worker_free(struct vhost_dev *dev) dev->worker = NULL; WARN_ON(!llist_empty(&worker->work_list)); - kthread_stop(worker->task); + vhost_task_stop(worker->vtsk); kfree(worker); } static int vhost_worker_create(struct vhost_dev *dev) { struct vhost_worker *worker; - struct task_struct *task; + struct vhost_task *vtsk; int ret; worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); @@ -595,27 +567,19 @@ static int vhost_worker_create(struct vhost_dev *dev) return -ENOMEM; dev->worker = worker; - worker->dev = dev; worker->kcov_handle = kcov_common_handle(); init_llist_head(&worker->work_list); - task = kthread_create(vhost_worker, worker, "vhost-%d", current->pid); - if (IS_ERR(task)) { - ret = PTR_ERR(task); + vtsk = vhost_task_create(vhost_worker, worker, NUMA_NO_NODE); + if (!vtsk) { + ret = -ENOMEM; goto free_worker; } - worker->task = task; - wake_up_process(task); /* avoid contributing to loadavg */ - - ret = vhost_attach_cgroups(dev); - if (ret) - goto stop_worker; - + worker->vtsk = vtsk; + vhost_task_start(vtsk, "vhost-%d", current->pid); return 0; -stop_worker: - kthread_stop(worker->task); free_worker: kfree(worker); dev->worker = NULL; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 2f6beab93784..3af59c65025e 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -16,6 +16,7 @@ #include <linux/irqbypass.h> struct vhost_work; +struct vhost_task; typedef void (*vhost_work_fn_t)(struct vhost_work *work); #define VHOST_WORK_QUEUED 1 @@ -26,9 +27,8 @@ struct vhost_work { }; struct vhost_worker { - struct task_struct *task; + struct vhost_task *vtsk; struct llist_head work_list; - struct vhost_dev *dev; u64 kcov_handle; }; -- 2.25.1
Michael S. Tsirkin
2023-Jul-20 13:06 UTC
[PATCH v11 8/8] vhost: use vhost_tasks for worker threads
On Thu, Feb 02, 2023 at 05:25:17PM -0600, Mike Christie wrote:> For vhost workers we use the kthread API which inherit's its values from > and checks against the kthreadd thread. This results in the wrong RLIMITs > being checked, so while tools like libvirt try to control the number of > threads based on the nproc rlimit setting we can end up creating more > threads than the user wanted. > > This patch has us use the vhost_task helpers which will inherit its > values/checks from the thread that owns the device similar to if we did > a clone in userspace. The vhost threads will now be counted in the nproc > rlimits. And we get features like cgroups and mm sharing automatically, > so we can remove those calls. > > Signed-off-by: Mike Christie <michael.christie at oracle.com> > Acked-by: Michael S. Tsirkin <mst at redhat.com>Hi Mike, So this seems to have caused a measureable regression in networking performance (about 30%). Take a look here, and there's a zip file with detailed measuraments attached: https://bugzilla.redhat.com/show_bug.cgi?id=2222603 Could you take a look please? You can also ask reporter questions there assuming you have or can create a (free) account.> --- > drivers/vhost/vhost.c | 58 ++++++++----------------------------------- > drivers/vhost/vhost.h | 4 +-- > 2 files changed, 13 insertions(+), 49 deletions(-) > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index 74378d241f8d..d3c7c37b69a7 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -22,11 +22,11 @@ > #include <linux/slab.h> > #include <linux/vmalloc.h> > #include <linux/kthread.h> > -#include <linux/cgroup.h> > #include <linux/module.h> > #include <linux/sort.h> > #include <linux/sched/mm.h> > #include <linux/sched/signal.h> > +#include <linux/sched/vhost_task.h> > #include <linux/interval_tree_generic.h> > #include <linux/nospec.h> > #include <linux/kcov.h> > @@ -256,7 +256,7 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) > * test_and_set_bit() implies a memory barrier. > */ > llist_add(&work->node, &dev->worker->work_list); > - wake_up_process(dev->worker->task); > + wake_up_process(dev->worker->vtsk->task); > } > } > EXPORT_SYMBOL_GPL(vhost_work_queue); > @@ -336,17 +336,14 @@ static void vhost_vq_reset(struct vhost_dev *dev, > static int vhost_worker(void *data) > { > struct vhost_worker *worker = data; > - struct vhost_dev *dev = worker->dev; > struct vhost_work *work, *work_next; > struct llist_node *node; > > - kthread_use_mm(dev->mm); > - > for (;;) { > /* mb paired w/ kthread_stop */ > set_current_state(TASK_INTERRUPTIBLE); > > - if (kthread_should_stop()) { > + if (vhost_task_should_stop(worker->vtsk)) { > __set_current_state(TASK_RUNNING); > break; > } > @@ -368,7 +365,7 @@ static int vhost_worker(void *data) > schedule(); > } > } > - kthread_unuse_mm(dev->mm); > + > return 0; > } > > @@ -509,31 +506,6 @@ long vhost_dev_check_owner(struct vhost_dev *dev) > } > EXPORT_SYMBOL_GPL(vhost_dev_check_owner); > > -struct vhost_attach_cgroups_struct { > - struct vhost_work work; > - struct task_struct *owner; > - int ret; > -}; > - > -static void vhost_attach_cgroups_work(struct vhost_work *work) > -{ > - struct vhost_attach_cgroups_struct *s; > - > - s = container_of(work, struct vhost_attach_cgroups_struct, work); > - s->ret = cgroup_attach_task_all(s->owner, current); > -} > - > -static int vhost_attach_cgroups(struct vhost_dev *dev) > -{ > - struct vhost_attach_cgroups_struct attach; > - > - attach.owner = current; > - vhost_work_init(&attach.work, vhost_attach_cgroups_work); > - vhost_work_queue(dev, &attach.work); > - vhost_dev_flush(dev); > - return attach.ret; > -} > - > /* Caller should have device mutex */ > bool vhost_dev_has_owner(struct vhost_dev *dev) > { > @@ -580,14 +552,14 @@ static void vhost_worker_free(struct vhost_dev *dev) > > dev->worker = NULL; > WARN_ON(!llist_empty(&worker->work_list)); > - kthread_stop(worker->task); > + vhost_task_stop(worker->vtsk); > kfree(worker); > } > > static int vhost_worker_create(struct vhost_dev *dev) > { > struct vhost_worker *worker; > - struct task_struct *task; > + struct vhost_task *vtsk; > int ret; > > worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); > @@ -595,27 +567,19 @@ static int vhost_worker_create(struct vhost_dev *dev) > return -ENOMEM; > > dev->worker = worker; > - worker->dev = dev; > worker->kcov_handle = kcov_common_handle(); > init_llist_head(&worker->work_list); > > - task = kthread_create(vhost_worker, worker, "vhost-%d", current->pid); > - if (IS_ERR(task)) { > - ret = PTR_ERR(task); > + vtsk = vhost_task_create(vhost_worker, worker, NUMA_NO_NODE); > + if (!vtsk) { > + ret = -ENOMEM; > goto free_worker; > } > > - worker->task = task; > - wake_up_process(task); /* avoid contributing to loadavg */ > - > - ret = vhost_attach_cgroups(dev); > - if (ret) > - goto stop_worker; > - > + worker->vtsk = vtsk; > + vhost_task_start(vtsk, "vhost-%d", current->pid); > return 0; > > -stop_worker: > - kthread_stop(worker->task); > free_worker: > kfree(worker); > dev->worker = NULL; > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index 2f6beab93784..3af59c65025e 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -16,6 +16,7 @@ > #include <linux/irqbypass.h> > > struct vhost_work; > +struct vhost_task; > typedef void (*vhost_work_fn_t)(struct vhost_work *work); > > #define VHOST_WORK_QUEUED 1 > @@ -26,9 +27,8 @@ struct vhost_work { > }; > > struct vhost_worker { > - struct task_struct *task; > + struct vhost_task *vtsk; > struct llist_head work_list; > - struct vhost_dev *dev; > u64 kcov_handle; > }; > > -- > 2.25.1