thr3ads.net - Virtualization - [PATCH v11 8/8] vhost: use vhost

If this information is useful, please help other people find it:
Share via:

Michael S. Tsirkin

2023-Jul-20 13:06 UTC

[PATCH v11 8/8] vhost: use vhost_tasks for worker threads

On Thu, Feb 02, 2023 at 05:25:17PM -0600, Mike Christie
wrote:> For vhost workers we use the kthread API which inherit's its values
from
> and checks against the kthreadd thread. This results in the wrong RLIMITs
> being checked, so while tools like libvirt try to control the number of
> threads based on the nproc rlimit setting we can end up creating more
> threads than the user wanted.
> 
> This patch has us use the vhost_task helpers which will inherit its
> values/checks from the thread that owns the device similar to if we did
> a clone in userspace. The vhost threads will now be counted in the nproc
> rlimits. And we get features like cgroups and mm sharing automatically,
> so we can remove those calls.
> 
> Signed-off-by: Mike Christie <michael.christie at oracle.com>
> Acked-by: Michael S. Tsirkin <mst at redhat.com>


Hi Mike,
So this seems to have caused a measureable regression in networking
performance (about 30%). Take a look here, and there's a zip file
with detailed measuraments attached:

https://bugzilla.redhat.com/show_bug.cgi?id=2222603


Could you take a look please?
You can also ask reporter questions there assuming you
have or can create a (free) account.


> ---
>  drivers/vhost/vhost.c | 58 ++++++++-----------------------------------
>  drivers/vhost/vhost.h |  4 +--
>  2 files changed, 13 insertions(+), 49 deletions(-)
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 74378d241f8d..d3c7c37b69a7 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -22,11 +22,11 @@
>  #include <linux/slab.h>
>  #include <linux/vmalloc.h>
>  #include <linux/kthread.h>
> -#include <linux/cgroup.h>
>  #include <linux/module.h>
>  #include <linux/sort.h>
>  #include <linux/sched/mm.h>
>  #include <linux/sched/signal.h>
> +#include <linux/sched/vhost_task.h>
>  #include <linux/interval_tree_generic.h>
>  #include <linux/nospec.h>
>  #include <linux/kcov.h>
> @@ -256,7 +256,7 @@ void vhost_work_queue(struct vhost_dev *dev, struct
vhost_work *work)
>  		 * test_and_set_bit() implies a memory barrier.
>  		 */
>  		llist_add(&work->node, &dev->worker->work_list);
> -		wake_up_process(dev->worker->task);
> +		wake_up_process(dev->worker->vtsk->task);
>  	}
>  }
>  EXPORT_SYMBOL_GPL(vhost_work_queue);
> @@ -336,17 +336,14 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>  static int vhost_worker(void *data)
>  {
>  	struct vhost_worker *worker = data;
> -	struct vhost_dev *dev = worker->dev;
>  	struct vhost_work *work, *work_next;
>  	struct llist_node *node;
>  
> -	kthread_use_mm(dev->mm);
> -
>  	for (;;) {
>  		/* mb paired w/ kthread_stop */
>  		set_current_state(TASK_INTERRUPTIBLE);
>  
> -		if (kthread_should_stop()) {
> +		if (vhost_task_should_stop(worker->vtsk)) {
>  			__set_current_state(TASK_RUNNING);
>  			break;
>  		}
> @@ -368,7 +365,7 @@ static int vhost_worker(void *data)
>  				schedule();
>  		}
>  	}
> -	kthread_unuse_mm(dev->mm);
> +
>  	return 0;
>  }
>  
> @@ -509,31 +506,6 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
>  }
>  EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
>  
> -struct vhost_attach_cgroups_struct {
> -	struct vhost_work work;
> -	struct task_struct *owner;
> -	int ret;
> -};
> -
> -static void vhost_attach_cgroups_work(struct vhost_work *work)
> -{
> -	struct vhost_attach_cgroups_struct *s;
> -
> -	s = container_of(work, struct vhost_attach_cgroups_struct, work);
> -	s->ret = cgroup_attach_task_all(s->owner, current);
> -}
> -
> -static int vhost_attach_cgroups(struct vhost_dev *dev)
> -{
> -	struct vhost_attach_cgroups_struct attach;
> -
> -	attach.owner = current;
> -	vhost_work_init(&attach.work, vhost_attach_cgroups_work);
> -	vhost_work_queue(dev, &attach.work);
> -	vhost_dev_flush(dev);
> -	return attach.ret;
> -}
> -
>  /* Caller should have device mutex */
>  bool vhost_dev_has_owner(struct vhost_dev *dev)
>  {
> @@ -580,14 +552,14 @@ static void vhost_worker_free(struct vhost_dev *dev)
>  
>  	dev->worker = NULL;
>  	WARN_ON(!llist_empty(&worker->work_list));
> -	kthread_stop(worker->task);
> +	vhost_task_stop(worker->vtsk);
>  	kfree(worker);
>  }
>  
>  static int vhost_worker_create(struct vhost_dev *dev)
>  {
>  	struct vhost_worker *worker;
> -	struct task_struct *task;
> +	struct vhost_task *vtsk;
>  	int ret;
>  
>  	worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
> @@ -595,27 +567,19 @@ static int vhost_worker_create(struct vhost_dev *dev)
>  		return -ENOMEM;
>  
>  	dev->worker = worker;
> -	worker->dev = dev;
>  	worker->kcov_handle = kcov_common_handle();
>  	init_llist_head(&worker->work_list);
>  
> -	task = kthread_create(vhost_worker, worker, "vhost-%d",
current->pid);
> -	if (IS_ERR(task)) {
> -		ret = PTR_ERR(task);
> +	vtsk = vhost_task_create(vhost_worker, worker, NUMA_NO_NODE);
> +	if (!vtsk) {
> +		ret = -ENOMEM;
>  		goto free_worker;
>  	}
>  
> -	worker->task = task;
> -	wake_up_process(task); /* avoid contributing to loadavg */
> -
> -	ret = vhost_attach_cgroups(dev);
> -	if (ret)
> -		goto stop_worker;
> -
> +	worker->vtsk = vtsk;
> +	vhost_task_start(vtsk, "vhost-%d", current->pid);
>  	return 0;
>  
> -stop_worker:
> -	kthread_stop(worker->task);
>  free_worker:
>  	kfree(worker);
>  	dev->worker = NULL;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 2f6beab93784..3af59c65025e 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -16,6 +16,7 @@
>  #include <linux/irqbypass.h>
>  
>  struct vhost_work;
> +struct vhost_task;
>  typedef void (*vhost_work_fn_t)(struct vhost_work *work);
>  
>  #define VHOST_WORK_QUEUED 1
> @@ -26,9 +27,8 @@ struct vhost_work {
>  };
>  
>  struct vhost_worker {
> -	struct task_struct	*task;
> +	struct vhost_task	*vtsk;
>  	struct llist_head	work_list;
> -	struct vhost_dev	*dev;
>  	u64			kcov_handle;
>  };
>  
> -- 
> 2.25.1

michael.christie at oracle.com

2023-Jul-23 04:03 UTC

head link

[PATCH v11 8/8] vhost: use vhost_tasks for worker threads

On 7/20/23 8:06 AM, Michael S. Tsirkin wrote:> On Thu, Feb 02, 2023 at 05:25:17PM -0600, Mike Christie wrote:
>> For vhost workers we use the kthread API which inherit's its values
from
>> and checks against the kthreadd thread. This results in the wrong
RLIMITs
>> being checked, so while tools like libvirt try to control the number of
>> threads based on the nproc rlimit setting we can end up creating more
>> threads than the user wanted.
>>
>> This patch has us use the vhost_task helpers which will inherit its
>> values/checks from the thread that owns the device similar to if we did
>> a clone in userspace. The vhost threads will now be counted in the
nproc
>> rlimits. And we get features like cgroups and mm sharing automatically,
>> so we can remove those calls.
>>
>> Signed-off-by: Mike Christie <michael.christie at oracle.com>
>> Acked-by: Michael S. Tsirkin <mst at redhat.com>
> 
> 
> Hi Mike,
> So this seems to have caused a measureable regression in networking
> performance (about 30%). Take a look here, and there's a zip file
> with detailed measuraments attached:
> 
> https://bugzilla.redhat.com/show_bug.cgi?id=2222603
> 
> 
> Could you take a look please?
> You can also ask reporter questions there assuming you
> have or can create a (free) account.
> 
Sorry for the late reply. I just got home from vacation.

The account creation link seems to be down. I keep getting a
"unable to establish SMTP connection to bz-exim-prod port 25 " error.

Can you give me Quan's email?

I think I can replicate the problem. I just need some extra info from Quan:

1. Just double check that they are using RHEL 9 on the host running the VMs.
2. The kernel config
3. Any tuning that was done. Is tuned running in guest and/or host running the
VMs and what profile is being used in each.
4. Number of vCPUs and virtqueues being used.
5. Can they dump the contents of:

/sys/kernel/debug/sched

and

sysctl  -a

on the host running the VMs.

6. With the 6.4 kernel, can they also run a quick test and tell me if they set
the scheduler to batch:

ps -T -o comm,pid,tid $QEMU_THREAD

then for each vhost thread do:

chrt -b -p 0 $VHOST_THREAD

Does that end up increasing perf? When I do this I see throughput go up by
around 50% vs 6.3 when sessions was 16 or more (16 was the number of vCPUs
and virtqueues per net device in the VM). Note that I'm not saying that is a
fix.
It's just a difference I noticed when running some other tests.

Virtualization - Jul 2023 - [PATCH v11 8/8] vhost: use vhost_tasks for worker threads

[PATCH v11 8/8] vhost: use vhost_tasks for worker threads

[PATCH v11 8/8] vhost: use vhost_tasks for worker threads