Mike Christie
2023-Mar-28 02:17 UTC
[PATCH v6 01/11] vhost: add vhost_worker pointer to vhost_virtqueue
This patchset allows userspace to map vqs to different workers. This patch adds a worker pointer to the vq so we can store that info. Signed-off-by: Mike Christie <michael.christie at oracle.com> --- drivers/vhost/vhost.c | 24 +++++++++++++----------- drivers/vhost/vhost.h | 1 + 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 4368ee9b999c..e041e116afee 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -486,6 +486,7 @@ void vhost_dev_init(struct vhost_dev *dev, vq->log = NULL; vq->indirect = NULL; vq->heads = NULL; + vq->worker = NULL; vq->dev = dev; mutex_init(&vq->mutex); vhost_vq_reset(dev, vq); @@ -554,16 +555,15 @@ static void vhost_worker_free(struct vhost_dev *dev) kfree(worker); } -static int vhost_worker_create(struct vhost_dev *dev) +static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) { struct vhost_worker *worker; struct vhost_task *vtsk; char name[TASK_COMM_LEN]; - int ret; worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); if (!worker) - return -ENOMEM; + return NULL; dev->worker = worker; worker->kcov_handle = kcov_common_handle(); @@ -571,25 +571,24 @@ static int vhost_worker_create(struct vhost_dev *dev) snprintf(name, sizeof(name), "vhost-%d", current->pid); vtsk = vhost_task_create(vhost_worker, worker, name); - if (!vtsk) { - ret = -ENOMEM; + if (!vtsk) goto free_worker; - } worker->vtsk = vtsk; vhost_task_start(vtsk); - return 0; + return worker; free_worker: kfree(worker); dev->worker = NULL; - return ret; + return NULL; } /* Caller should have device mutex */ long vhost_dev_set_owner(struct vhost_dev *dev) { - int err; + struct vhost_worker *worker; + int err, i; /* Is there an owner already? */ if (vhost_dev_has_owner(dev)) { @@ -600,9 +599,12 @@ long vhost_dev_set_owner(struct vhost_dev *dev) vhost_attach_mm(dev); if (dev->use_worker) { - err = vhost_worker_create(dev); - if (err) + worker = vhost_worker_create(dev); + if (!worker) goto err_worker; + + for (i = 0; i < dev->nvqs; i++) + dev->vqs[i]->worker = worker; } err = vhost_dev_alloc_iovecs(dev); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 0308638cdeee..e72b665ba3a5 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -74,6 +74,7 @@ struct vhost_vring_call { /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; + struct vhost_worker *worker; /* The actual ring of buffers. */ struct mutex mutex; -- 2.25.1
Jason Wang
2023-Apr-04 07:04 UTC
[PATCH v6 01/11] vhost: add vhost_worker pointer to vhost_virtqueue
On Tue, Mar 28, 2023 at 10:17?AM Mike Christie <michael.christie at oracle.com> wrote:> > This patchset allows userspace to map vqs to different workers. This > patch adds a worker pointer to the vq so we can store that info. > > Signed-off-by: Mike Christie <michael.christie at oracle.com>Acked-by: Jason Wang <jasowang at redhat.com> Thanks> --- > drivers/vhost/vhost.c | 24 +++++++++++++----------- > drivers/vhost/vhost.h | 1 + > 2 files changed, 14 insertions(+), 11 deletions(-) > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index 4368ee9b999c..e041e116afee 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -486,6 +486,7 @@ void vhost_dev_init(struct vhost_dev *dev, > vq->log = NULL; > vq->indirect = NULL; > vq->heads = NULL; > + vq->worker = NULL; > vq->dev = dev; > mutex_init(&vq->mutex); > vhost_vq_reset(dev, vq); > @@ -554,16 +555,15 @@ static void vhost_worker_free(struct vhost_dev *dev) > kfree(worker); > } > > -static int vhost_worker_create(struct vhost_dev *dev) > +static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) > { > struct vhost_worker *worker; > struct vhost_task *vtsk; > char name[TASK_COMM_LEN]; > - int ret; > > worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); > if (!worker) > - return -ENOMEM; > + return NULL; > > dev->worker = worker; > worker->kcov_handle = kcov_common_handle(); > @@ -571,25 +571,24 @@ static int vhost_worker_create(struct vhost_dev *dev) > snprintf(name, sizeof(name), "vhost-%d", current->pid); > > vtsk = vhost_task_create(vhost_worker, worker, name); > - if (!vtsk) { > - ret = -ENOMEM; > + if (!vtsk) > goto free_worker; > - } > > worker->vtsk = vtsk; > vhost_task_start(vtsk); > - return 0; > + return worker; > > free_worker: > kfree(worker); > dev->worker = NULL; > - return ret; > + return NULL; > } > > /* Caller should have device mutex */ > long vhost_dev_set_owner(struct vhost_dev *dev) > { > - int err; > + struct vhost_worker *worker; > + int err, i; > > /* Is there an owner already? */ > if (vhost_dev_has_owner(dev)) { > @@ -600,9 +599,12 @@ long vhost_dev_set_owner(struct vhost_dev *dev) > vhost_attach_mm(dev); > > if (dev->use_worker) { > - err = vhost_worker_create(dev); > - if (err) > + worker = vhost_worker_create(dev); > + if (!worker) > goto err_worker; > + > + for (i = 0; i < dev->nvqs; i++) > + dev->vqs[i]->worker = worker; > } > > err = vhost_dev_alloc_iovecs(dev); > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index 0308638cdeee..e72b665ba3a5 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -74,6 +74,7 @@ struct vhost_vring_call { > /* The virtqueue structure describes a queue attached to a device. */ > struct vhost_virtqueue { > struct vhost_dev *dev; > + struct vhost_worker *worker; > > /* The actual ring of buffers. */ > struct mutex mutex; > -- > 2.25.1 >
Michael S. Tsirkin
2023-Apr-04 18:38 UTC
[PATCH v6 01/11] vhost: add vhost_worker pointer to vhost_virtqueue
On Mon, Mar 27, 2023 at 09:17:07PM -0500, Mike Christie wrote:> This patchset allows userspace to map vqs to different workers. This > patch adds a worker pointer to the vq so we can store that info. > > Signed-off-by: Mike Christie <michael.christie at oracle.com>Thanks! Conflicts with a bunch of refactorings upstream: could you rebase this on my tree and repost? I need to queue this soon so it gets time in -next.> --- > drivers/vhost/vhost.c | 24 +++++++++++++----------- > drivers/vhost/vhost.h | 1 + > 2 files changed, 14 insertions(+), 11 deletions(-) > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index 4368ee9b999c..e041e116afee 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -486,6 +486,7 @@ void vhost_dev_init(struct vhost_dev *dev, > vq->log = NULL; > vq->indirect = NULL; > vq->heads = NULL; > + vq->worker = NULL; > vq->dev = dev; > mutex_init(&vq->mutex); > vhost_vq_reset(dev, vq); > @@ -554,16 +555,15 @@ static void vhost_worker_free(struct vhost_dev *dev) > kfree(worker); > } > > -static int vhost_worker_create(struct vhost_dev *dev) > +static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) > { > struct vhost_worker *worker; > struct vhost_task *vtsk; > char name[TASK_COMM_LEN]; > - int ret; > > worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); > if (!worker) > - return -ENOMEM; > + return NULL; > > dev->worker = worker; > worker->kcov_handle = kcov_common_handle(); > @@ -571,25 +571,24 @@ static int vhost_worker_create(struct vhost_dev *dev) > snprintf(name, sizeof(name), "vhost-%d", current->pid); > > vtsk = vhost_task_create(vhost_worker, worker, name); > - if (!vtsk) { > - ret = -ENOMEM; > + if (!vtsk) > goto free_worker; > - } > > worker->vtsk = vtsk; > vhost_task_start(vtsk); > - return 0; > + return worker; > > free_worker: > kfree(worker); > dev->worker = NULL; > - return ret; > + return NULL; > } > > /* Caller should have device mutex */ > long vhost_dev_set_owner(struct vhost_dev *dev) > { > - int err; > + struct vhost_worker *worker; > + int err, i; > > /* Is there an owner already? */ > if (vhost_dev_has_owner(dev)) { > @@ -600,9 +599,12 @@ long vhost_dev_set_owner(struct vhost_dev *dev) > vhost_attach_mm(dev); > > if (dev->use_worker) { > - err = vhost_worker_create(dev); > - if (err) > + worker = vhost_worker_create(dev); > + if (!worker) > goto err_worker; > + > + for (i = 0; i < dev->nvqs; i++) > + dev->vqs[i]->worker = worker; > } > > err = vhost_dev_alloc_iovecs(dev); > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index 0308638cdeee..e72b665ba3a5 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -74,6 +74,7 @@ struct vhost_vring_call { > /* The virtqueue structure describes a queue attached to a device. */ > struct vhost_virtqueue { > struct vhost_dev *dev; > + struct vhost_worker *worker; > > /* The actual ring of buffers. */ > struct mutex mutex; > -- > 2.25.1On Mon, Mar 27, 2023 at 09:17:08PM -0500, Mike Christie wrote:> In the next patches each vq might have different workers so one could > have work but others do not. For net, we only want to check specific vqs, > so this adds a helper to check if a vq has work pending and converts > vhost-net to use it. > > Signed-off-by: Mike Christie <michael.christie at oracle.com> > --- > drivers/vhost/net.c | 2 +- > drivers/vhost/vhost.c | 6 +++--- > drivers/vhost/vhost.h | 2 +- > 3 files changed, 5 insertions(+), 5 deletions(-) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index 07181cd8d52e..8ed63651b9eb 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -546,7 +546,7 @@ static void vhost_net_busy_poll(struct vhost_net *net, > endtime = busy_clock() + busyloop_timeout; > > while (vhost_can_busy_poll(endtime)) { > - if (vhost_has_work(&net->dev)) { > + if (vhost_vq_has_work(vq)) { > *busyloop_intr = true; > break; > } > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index e041e116afee..6567aed69ebb 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -262,11 +262,11 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) > EXPORT_SYMBOL_GPL(vhost_work_queue); > > /* A lockless hint for busy polling code to exit the loop */ > -bool vhost_has_work(struct vhost_dev *dev) > +bool vhost_vq_has_work(struct vhost_virtqueue *vq) > { > - return dev->worker && !llist_empty(&dev->worker->work_list); > + return vq->worker && !llist_empty(&vq->worker->work_list); > } > -EXPORT_SYMBOL_GPL(vhost_has_work); > +EXPORT_SYMBOL_GPL(vhost_vq_has_work); > > void vhost_poll_queue(struct vhost_poll *poll) > { > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index e72b665ba3a5..0dde119fb0ee 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -45,7 +45,6 @@ struct vhost_poll { > > void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn); > void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work); > -bool vhost_has_work(struct vhost_dev *dev); > > void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, > __poll_t mask, struct vhost_dev *dev); > @@ -195,6 +194,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *, > struct vhost_log *log, unsigned int *log_num); > void vhost_discard_vq_desc(struct vhost_virtqueue *, int n); > > +bool vhost_vq_has_work(struct vhost_virtqueue *vq); > bool vhost_vq_is_setup(struct vhost_virtqueue *vq); > int vhost_vq_init_access(struct vhost_virtqueue *); > int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len); > -- > 2.25.1On Mon, Mar 27, 2023 at 09:17:09PM -0500, Mike Christie wrote:> This patch has the core work queueing function take a worker for when we > support multiple workers. It also adds a helper that takes a vq during > queueing so modules can control which vq/worker to queue work on. > > This temp leaves vhost_work_queue. It will be removed when the drivers > are converted in the next patches. > > Signed-off-by: Mike Christie <michael.christie at oracle.com> > --- > drivers/vhost/vhost.c | 44 +++++++++++++++++++++++++++---------------- > drivers/vhost/vhost.h | 1 + > 2 files changed, 29 insertions(+), 16 deletions(-) > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index 6567aed69ebb..cc2628ba9a77 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -231,6 +231,34 @@ void vhost_poll_stop(struct vhost_poll *poll) > } > EXPORT_SYMBOL_GPL(vhost_poll_stop); > > +static void vhost_work_queue_on(struct vhost_worker *worker, > + struct vhost_work *work) > +{ > + if (!worker) > + return; > + > + if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { > + /* We can only add the work to the list after we're > + * sure it was not in the list. > + * test_and_set_bit() implies a memory barrier. > + */ > + llist_add(&work->node, &worker->work_list); > + wake_up_process(worker->vtsk->task); > + } > +} > + > +void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) > +{ > + vhost_work_queue_on(dev->worker, work); > +} > +EXPORT_SYMBOL_GPL(vhost_work_queue); > + > +void vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work) > +{ > + vhost_work_queue_on(vq->worker, work); > +} > +EXPORT_SYMBOL_GPL(vhost_vq_work_queue); > + > void vhost_dev_flush(struct vhost_dev *dev) > { > struct vhost_flush_struct flush; > @@ -245,22 +273,6 @@ void vhost_dev_flush(struct vhost_dev *dev) > } > EXPORT_SYMBOL_GPL(vhost_dev_flush); > > -void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) > -{ > - if (!dev->worker) > - return; > - > - if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { > - /* We can only add the work to the list after we're > - * sure it was not in the list. > - * test_and_set_bit() implies a memory barrier. > - */ > - llist_add(&work->node, &dev->worker->work_list); > - wake_up_process(dev->worker->vtsk->task); > - } > -} > -EXPORT_SYMBOL_GPL(vhost_work_queue); > - > /* A lockless hint for busy polling code to exit the loop */ > bool vhost_vq_has_work(struct vhost_virtqueue *vq) > { > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index 0dde119fb0ee..b64ee4ef387d 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -194,6 +194,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *, > struct vhost_log *log, unsigned int *log_num); > void vhost_discard_vq_desc(struct vhost_virtqueue *, int n); > > +void vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work); > bool vhost_vq_has_work(struct vhost_virtqueue *vq); > bool vhost_vq_is_setup(struct vhost_virtqueue *vq); > int vhost_vq_init_access(struct vhost_virtqueue *); > -- > 2.25.1On Mon, Mar 27, 2023 at 09:17:10PM -0500, Mike Christie wrote:> This patch has the core work flush function take a worker. When we > support multiple workers we can then flush each worker during device > removal, stoppage, etc. > > Signed-off-by: Mike Christie <michael.christie at oracle.com> > --- > drivers/vhost/vhost.c | 24 +++++++++++++++--------- > 1 file changed, 15 insertions(+), 9 deletions(-) > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index cc2628ba9a77..6160aa1cc922 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -247,6 +247,20 @@ static void vhost_work_queue_on(struct vhost_worker *worker, > } > } > > +static void vhost_work_flush_on(struct vhost_worker *worker) > +{ > + struct vhost_flush_struct flush; > + > + if (!worker) > + return; > + > + init_completion(&flush.wait_event); > + vhost_work_init(&flush.work, vhost_flush_work); > + > + vhost_work_queue_on(worker, &flush.work); > + wait_for_completion(&flush.wait_event); > +} > + > void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) > { > vhost_work_queue_on(dev->worker, work); > @@ -261,15 +275,7 @@ EXPORT_SYMBOL_GPL(vhost_vq_work_queue); > > void vhost_dev_flush(struct vhost_dev *dev) > { > - struct vhost_flush_struct flush; > - > - if (dev->worker) { > - init_completion(&flush.wait_event); > - vhost_work_init(&flush.work, vhost_flush_work); > - > - vhost_work_queue(dev, &flush.work); > - wait_for_completion(&flush.wait_event); > - } > + vhost_work_flush_on(dev->worker); > } > EXPORT_SYMBOL_GPL(vhost_dev_flush); > > -- > 2.25.1On Mon, Mar 27, 2023 at 09:17:11PM -0500, Mike Christie wrote:> This has the drivers pass in their poll to vq mapping and then converts > the core poll code to use the vq based helpers. In the next patches we > will allow vqs to be handled by different workers, so to allow drivers > to execute operations like queue, stop, flush, etc on specific polls/vqs > we need to know the mappings. > > Signed-off-by: Mike Christie <michael.christie at oracle.com> > --- > drivers/vhost/net.c | 6 ++++-- > drivers/vhost/vhost.c | 8 +++++--- > drivers/vhost/vhost.h | 4 +++- > 3 files changed, 12 insertions(+), 6 deletions(-) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index 8ed63651b9eb..4a9b757071a2 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -1342,8 +1342,10 @@ static int vhost_net_open(struct inode *inode, struct file *f) > VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true, > NULL); > > - vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); > - vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev); > + vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev, > + vqs[VHOST_NET_VQ_TX]); > + vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev, > + vqs[VHOST_NET_VQ_RX]); > > f->private_data = n; > n->page_frag.page = NULL; > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index 6160aa1cc922..6968f8fc17e8 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -187,13 +187,15 @@ EXPORT_SYMBOL_GPL(vhost_work_init); > > /* Init poll structure */ > void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, > - __poll_t mask, struct vhost_dev *dev) > + __poll_t mask, struct vhost_dev *dev, > + struct vhost_virtqueue *vq) > { > init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); > init_poll_funcptr(&poll->table, vhost_poll_func); > poll->mask = mask; > poll->dev = dev; > poll->wqh = NULL; > + poll->vq = vq; > > vhost_work_init(&poll->work, fn); > } > @@ -288,7 +290,7 @@ EXPORT_SYMBOL_GPL(vhost_vq_has_work); > > void vhost_poll_queue(struct vhost_poll *poll) > { > - vhost_work_queue(poll->dev, &poll->work); > + vhost_vq_work_queue(poll->vq, &poll->work); > } > EXPORT_SYMBOL_GPL(vhost_poll_queue); > > @@ -510,7 +512,7 @@ void vhost_dev_init(struct vhost_dev *dev, > vhost_vq_reset(dev, vq); > if (vq->handle_kick) > vhost_poll_init(&vq->poll, vq->handle_kick, > - EPOLLIN, dev); > + EPOLLIN, dev, vq); > } > } > EXPORT_SYMBOL_GPL(vhost_dev_init); > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index b64ee4ef387d..d9b8abbe3a26 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -41,13 +41,15 @@ struct vhost_poll { > struct vhost_work work; > __poll_t mask; > struct vhost_dev *dev; > + struct vhost_virtqueue *vq; > }; > > void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn); > void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work); > > void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, > - __poll_t mask, struct vhost_dev *dev); > + __poll_t mask, struct vhost_dev *dev, > + struct vhost_virtqueue *vq); > int vhost_poll_start(struct vhost_poll *poll, struct file *file); > void vhost_poll_stop(struct vhost_poll *poll); > void vhost_poll_queue(struct vhost_poll *poll); > -- > 2.25.1On Mon, Mar 27, 2023 at 09:17:12PM -0500, Mike Christie wrote:> Convert from vhost_work_queue to vhost_vq_work_queue. > > Signed-off-by: Mike Christie <michael.christie at oracle.com> > --- > drivers/vhost/vsock.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > index c8e6087769a1..1dcbc8669f95 100644 > --- a/drivers/vhost/vsock.c > +++ b/drivers/vhost/vsock.c > @@ -285,7 +285,7 @@ vhost_transport_send_pkt(struct sk_buff *skb) > atomic_inc(&vsock->queued_replies); > > virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb); > - vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); > + vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work); > > rcu_read_unlock(); > return len; > @@ -582,7 +582,7 @@ static int vhost_vsock_start(struct vhost_vsock *vsock) > /* Some packets may have been queued before the device was started, > * let's kick the send worker to send them. > */ > - vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); > + vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work); > > mutex_unlock(&vsock->dev.mutex); > return 0; > -- > 2.25.1On Mon, Mar 27, 2023 at 09:17:13PM -0500, Mike Christie wrote:> This patch separates the scsi cmd completion code paths so we can complete > cmds based on their vq instead of having all cmds complete on the same > worker/CPU. This will be useful with the next patches that allow us to > create mulitple worker threads and bind them to different vqs, so we can > have completions running on different threads/CPUs. > > Signed-off-by: Mike Christie <michael.christie at oracle.com> > Reviewed-by: Stefan Hajnoczi <stefanha at redhat.com> > --- > drivers/vhost/scsi.c | 56 ++++++++++++++++++++------------------------ > 1 file changed, 26 insertions(+), 30 deletions(-) > > diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c > index 3b0b556c57ef..ecb5cd7450b8 100644 > --- a/drivers/vhost/scsi.c > +++ b/drivers/vhost/scsi.c > @@ -167,6 +167,7 @@ MODULE_PARM_DESC(max_io_vqs, "Set the max number of IO virtqueues a vhost scsi d > > struct vhost_scsi_virtqueue { > struct vhost_virtqueue vq; > + struct vhost_scsi *vs; > /* > * Reference counting for inflight reqs, used for flush operation. At > * each time, one reference tracks new commands submitted, while we > @@ -181,6 +182,9 @@ struct vhost_scsi_virtqueue { > struct vhost_scsi_cmd *scsi_cmds; > struct sbitmap scsi_tags; > int max_cmds; > + > + struct vhost_work completion_work; > + struct llist_head completion_list; > }; > > struct vhost_scsi { > @@ -190,12 +194,8 @@ struct vhost_scsi { > > struct vhost_dev dev; > struct vhost_scsi_virtqueue *vqs; > - unsigned long *compl_bitmap; > struct vhost_scsi_inflight **old_inflight; > > - struct vhost_work vs_completion_work; /* cmd completion work item */ > - struct llist_head vs_completion_list; /* cmd completion queue */ > - > struct vhost_work vs_event_work; /* evt injection work item */ > struct llist_head vs_event_list; /* evt injection queue */ > > @@ -368,10 +368,11 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd) > } else { > struct vhost_scsi_cmd *cmd = container_of(se_cmd, > struct vhost_scsi_cmd, tvc_se_cmd); > - struct vhost_scsi *vs = cmd->tvc_vhost; > + struct vhost_scsi_virtqueue *svq = container_of(cmd->tvc_vq, > + struct vhost_scsi_virtqueue, vq); > > - llist_add(&cmd->tvc_completion_list, &vs->vs_completion_list); > - vhost_work_queue(&vs->dev, &vs->vs_completion_work); > + llist_add(&cmd->tvc_completion_list, &svq->completion_list); > + vhost_vq_work_queue(&svq->vq, &svq->completion_work); > } > } > > @@ -534,17 +535,17 @@ static void vhost_scsi_evt_work(struct vhost_work *work) > */ > static void vhost_scsi_complete_cmd_work(struct vhost_work *work) > { > - struct vhost_scsi *vs = container_of(work, struct vhost_scsi, > - vs_completion_work); > + struct vhost_scsi_virtqueue *svq = container_of(work, > + struct vhost_scsi_virtqueue, completion_work); > struct virtio_scsi_cmd_resp v_rsp; > struct vhost_scsi_cmd *cmd, *t; > struct llist_node *llnode; > struct se_cmd *se_cmd; > struct iov_iter iov_iter; > - int ret, vq; > + bool signal = false; > + int ret; > > - bitmap_zero(vs->compl_bitmap, vs->dev.nvqs); > - llnode = llist_del_all(&vs->vs_completion_list); > + llnode = llist_del_all(&svq->completion_list); > llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) { > se_cmd = &cmd->tvc_se_cmd; > > @@ -564,21 +565,17 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) > cmd->tvc_in_iovs, sizeof(v_rsp)); > ret = copy_to_iter(&v_rsp, sizeof(v_rsp), &iov_iter); > if (likely(ret == sizeof(v_rsp))) { > - struct vhost_scsi_virtqueue *q; > + signal = true; > + > vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0); > - q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq); > - vq = q - vs->vqs; > - __set_bit(vq, vs->compl_bitmap); > } else > pr_err("Faulted on virtio_scsi_cmd_resp\n"); > > vhost_scsi_release_cmd_res(se_cmd); > } > > - vq = -1; > - while ((vq = find_next_bit(vs->compl_bitmap, vs->dev.nvqs, vq + 1)) > - < vs->dev.nvqs) > - vhost_signal(&vs->dev, &vs->vqs[vq].vq); > + if (signal) > + vhost_signal(&svq->vs->dev, &svq->vq); > } > > static struct vhost_scsi_cmd * > @@ -1795,6 +1792,7 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) > > static int vhost_scsi_open(struct inode *inode, struct file *f) > { > + struct vhost_scsi_virtqueue *svq; > struct vhost_scsi *vs; > struct vhost_virtqueue **vqs; > int r = -ENOMEM, i, nvqs = vhost_scsi_max_io_vqs; > @@ -1813,10 +1811,6 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) > } > nvqs += VHOST_SCSI_VQ_IO; > > - vs->compl_bitmap = bitmap_alloc(nvqs, GFP_KERNEL); > - if (!vs->compl_bitmap) > - goto err_compl_bitmap; > - > vs->old_inflight = kmalloc_array(nvqs, sizeof(*vs->old_inflight), > GFP_KERNEL | __GFP_ZERO); > if (!vs->old_inflight) > @@ -1831,7 +1825,6 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) > if (!vqs) > goto err_local_vqs; > > - vhost_work_init(&vs->vs_completion_work, vhost_scsi_complete_cmd_work); > vhost_work_init(&vs->vs_event_work, vhost_scsi_evt_work); > > vs->vs_events_nr = 0; > @@ -1842,8 +1835,14 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) > vs->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick; > vs->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick; > for (i = VHOST_SCSI_VQ_IO; i < nvqs; i++) { > - vqs[i] = &vs->vqs[i].vq; > - vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; > + svq = &vs->vqs[i]; > + > + vqs[i] = &svq->vq; > + svq->vs = vs; > + init_llist_head(&svq->completion_list); > + vhost_work_init(&svq->completion_work, > + vhost_scsi_complete_cmd_work); > + svq->vq.handle_kick = vhost_scsi_handle_kick; > } > vhost_dev_init(&vs->dev, vqs, nvqs, UIO_MAXIOV, > VHOST_SCSI_WEIGHT, 0, true, NULL); > @@ -1858,8 +1857,6 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) > err_vqs: > kfree(vs->old_inflight); > err_inflight: > - bitmap_free(vs->compl_bitmap); > -err_compl_bitmap: > kvfree(vs); > err_vs: > return r; > @@ -1879,7 +1876,6 @@ static int vhost_scsi_release(struct inode *inode, struct file *f) > kfree(vs->dev.vqs); > kfree(vs->vqs); > kfree(vs->old_inflight); > - bitmap_free(vs->compl_bitmap); > kvfree(vs); > return 0; > } > -- > 2.25.1On Mon, Mar 27, 2023 at 09:17:14PM -0500, Mike Christie wrote:> Convert from vhost_work_queue to vhost_vq_work_queue. > > Signed-off-by: Mike Christie <michael.christie at oracle.com> > --- > drivers/vhost/scsi.c | 18 +++++++++--------- > 1 file changed, 9 insertions(+), 9 deletions(-) > > diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c > index ecb5cd7450b8..3e86b5fbeca6 100644 > --- a/drivers/vhost/scsi.c > +++ b/drivers/vhost/scsi.c > @@ -363,8 +363,9 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd) > if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) { > struct vhost_scsi_tmf *tmf = container_of(se_cmd, > struct vhost_scsi_tmf, se_cmd); > + struct vhost_virtqueue *vq = &tmf->svq->vq; > > - vhost_work_queue(&tmf->vhost->dev, &tmf->vwork); > + vhost_vq_work_queue(vq, &tmf->vwork); > } else { > struct vhost_scsi_cmd *cmd = container_of(se_cmd, > struct vhost_scsi_cmd, tvc_se_cmd); > @@ -1357,11 +1358,9 @@ static void vhost_scsi_ctl_handle_kick(struct vhost_work *work) > } > > static void > -vhost_scsi_send_evt(struct vhost_scsi *vs, > - struct vhost_scsi_tpg *tpg, > - struct se_lun *lun, > - u32 event, > - u32 reason) > +vhost_scsi_send_evt(struct vhost_scsi *vs, struct vhost_virtqueue *vq, > + struct vhost_scsi_tpg *tpg, struct se_lun *lun, > + u32 event, u32 reason) > { > struct vhost_scsi_evt *evt; > > @@ -1383,7 +1382,7 @@ vhost_scsi_send_evt(struct vhost_scsi *vs, > } > > llist_add(&evt->list, &vs->vs_event_list); > - vhost_work_queue(&vs->dev, &vs->vs_event_work); > + vhost_vq_work_queue(vq, &vs->vs_event_work); > } > > static void vhost_scsi_evt_handle_kick(struct vhost_work *work) > @@ -1397,7 +1396,8 @@ static void vhost_scsi_evt_handle_kick(struct vhost_work *work) > goto out; > > if (vs->vs_events_missed) > - vhost_scsi_send_evt(vs, NULL, NULL, VIRTIO_SCSI_T_NO_EVENT, 0); > + vhost_scsi_send_evt(vs, vq, NULL, NULL, VIRTIO_SCSI_T_NO_EVENT, > + 0); > out: > mutex_unlock(&vq->mutex); > } > @@ -2016,7 +2016,7 @@ vhost_scsi_do_plug(struct vhost_scsi_tpg *tpg, > goto unlock; > > if (vhost_has_feature(vq, VIRTIO_SCSI_F_HOTPLUG)) > - vhost_scsi_send_evt(vs, tpg, lun, > + vhost_scsi_send_evt(vs, vq, tpg, lun, > VIRTIO_SCSI_T_TRANSPORT_RESET, reason); > unlock: > mutex_unlock(&vq->mutex); > -- > 2.25.1On Mon, Mar 27, 2023 at 09:17:15PM -0500, Mike Christie wrote:> vhost_work_queue is no longer used. Each driver is using the poll or vq > based queueing, so remove vhost_work_queue. > > Signed-off-by: Mike Christie <michael.christie at oracle.com> > --- > drivers/vhost/vhost.c | 6 ------ > drivers/vhost/vhost.h | 1 - > 2 files changed, 7 deletions(-) > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index 6968f8fc17e8..f812daf25648 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -263,12 +263,6 @@ static void vhost_work_flush_on(struct vhost_worker *worker) > wait_for_completion(&flush.wait_event); > } > > -void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) > -{ > - vhost_work_queue_on(dev->worker, work); > -} > -EXPORT_SYMBOL_GPL(vhost_work_queue); > - > void vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work) > { > vhost_work_queue_on(vq->worker, work); > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index d9b8abbe3a26..ef55fae2517c 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -45,7 +45,6 @@ struct vhost_poll { > }; > > void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn); > -void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work); > > void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, > __poll_t mask, struct vhost_dev *dev, > -- > 2.25.1On Mon, Mar 27, 2023 at 09:17:16PM -0500, Mike Christie wrote:> With one worker we will always send the scsi cmd responses then send the > TMF rsp, because LIO will always complete the scsi cmds first then call > into us to send the TMF response. > > With multiple workers, the IO vq workers could be running while the > TMF/ctl vq worker is so this has us do a flush before completing the TMF > to make sure cmds are completed when it's work is later queued and run. > > Signed-off-by: Mike Christie <michael.christie at oracle.com> > --- > drivers/vhost/scsi.c | 22 +++++++++++++++++++--- > drivers/vhost/vhost.c | 6 ++++++ > drivers/vhost/vhost.h | 1 + > 3 files changed, 26 insertions(+), 3 deletions(-) > > diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c > index 3e86b5fbeca6..48dba4fe2dac 100644 > --- a/drivers/vhost/scsi.c > +++ b/drivers/vhost/scsi.c > @@ -1158,12 +1158,28 @@ static void vhost_scsi_tmf_resp_work(struct vhost_work *work) > { > struct vhost_scsi_tmf *tmf = container_of(work, struct vhost_scsi_tmf, > vwork); > - int resp_code; > + struct vhost_virtqueue *ctl_vq, *vq; > + int resp_code, i; > + > + if (tmf->scsi_resp == TMR_FUNCTION_COMPLETE) { > + /* > + * Flush IO vqs that don't share a worker with the ctl to make > + * sure they have sent their responses before us. > + */ > + ctl_vq = &tmf->vhost->vqs[VHOST_SCSI_VQ_CTL].vq; > + for (i = VHOST_SCSI_VQ_IO; i < tmf->vhost->dev.nvqs; i++) { > + vq = &tmf->vhost->vqs[i].vq; > + > + if (vhost_vq_is_setup(vq) && > + vq->worker != ctl_vq->worker) { > + vhost_vq_flush(vq); > + } > + } > > - if (tmf->scsi_resp == TMR_FUNCTION_COMPLETE) > resp_code = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED; > - else > + } else { > resp_code = VIRTIO_SCSI_S_FUNCTION_REJECTED; > + } > > vhost_scsi_send_tmf_resp(tmf->vhost, &tmf->svq->vq, tmf->in_iovs, > tmf->vq_desc, &tmf->resp_iov, resp_code); > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index f812daf25648..1fa5e9a49092 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -275,6 +275,12 @@ void vhost_dev_flush(struct vhost_dev *dev) > } > EXPORT_SYMBOL_GPL(vhost_dev_flush); > > +void vhost_vq_flush(struct vhost_virtqueue *vq) > +{ > + vhost_work_flush_on(vq->worker); > +} > +EXPORT_SYMBOL_GPL(vhost_vq_flush); > + > /* A lockless hint for busy polling code to exit the loop */ > bool vhost_vq_has_work(struct vhost_virtqueue *vq) > { > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index ef55fae2517c..395707c680e5 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -53,6 +53,7 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file); > void vhost_poll_stop(struct vhost_poll *poll); > void vhost_poll_queue(struct vhost_poll *poll); > void vhost_dev_flush(struct vhost_dev *dev); > +void vhost_vq_flush(struct vhost_virtqueue *vq); > > struct vhost_log { > u64 addr; > -- > 2.25.1On Mon, Mar 27, 2023 at 09:17:17PM -0500, Mike Christie wrote:> For vhost-scsi with 3 vqs and a workload like that tries to use those vqs > like: > > fio --filename=/dev/sdb --direct=1 --rw=randrw --bs=4k \ > --ioengine=libaio --iodepth=128 --numjobs=3 > > the single vhost worker thread will become a bottlneck and we are stuck > at around 500K IOPs no matter how many jobs, virtqueues, and CPUs are > used. > > To better utilize virtqueues and available CPUs, this patch allows > userspace to create workers and bind them to vqs. You can have N workers > per dev and also share N workers with M vqs. > > With the patches and doing a worker per vq, we can scale to at least > 16 vCPUs/vqs (that's my system limit) with the same command fio command > above with numjobs=16: > > fio --filename=/dev/sdb --direct=1 --rw=randrw --bs=4k \ > --ioengine=libaio --iodepth=64 --numjobs=16 > > which gives around 2326K IOPs. > > Note that for testing I dropped depth to 64 above because the vhost/virt > layer supports only 1024 total commands per device. And the only tuning I > did was set LIO's emulate_pr to 0 to avoid LIO's PR lock in the main IO > path which becomes an issue at around 12 jobs/virtqueues. > > Signed-off-by: Mike Christie <michael.christie at oracle.com> > --- > drivers/vhost/vhost.c | 177 ++++++++++++++++++++++++++++--- > drivers/vhost/vhost.h | 4 +- > include/uapi/linux/vhost.h | 22 ++++ > include/uapi/linux/vhost_types.h | 15 +++ > 4 files changed, 204 insertions(+), 14 deletions(-) > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index 1fa5e9a49092..e40699e83c6d 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -271,7 +271,11 @@ EXPORT_SYMBOL_GPL(vhost_vq_work_queue); > > void vhost_dev_flush(struct vhost_dev *dev) > { > - vhost_work_flush_on(dev->worker); > + struct vhost_worker *worker; > + unsigned long i; > + > + xa_for_each(&dev->worker_xa, i, worker) > + vhost_work_flush_on(worker); > } > EXPORT_SYMBOL_GPL(vhost_dev_flush); > > @@ -489,7 +493,6 @@ void vhost_dev_init(struct vhost_dev *dev, > dev->umem = NULL; > dev->iotlb = NULL; > dev->mm = NULL; > - dev->worker = NULL; > dev->iov_limit = iov_limit; > dev->weight = weight; > dev->byte_weight = byte_weight; > @@ -499,7 +502,7 @@ void vhost_dev_init(struct vhost_dev *dev, > INIT_LIST_HEAD(&dev->read_list); > INIT_LIST_HEAD(&dev->pending_list); > spin_lock_init(&dev->iotlb_lock); > - > + xa_init_flags(&dev->worker_xa, XA_FLAGS_ALLOC); > > for (i = 0; i < dev->nvqs; ++i) { > vq = dev->vqs[i]; > @@ -562,32 +565,67 @@ static void vhost_detach_mm(struct vhost_dev *dev) > dev->mm = NULL; > } > > -static void vhost_worker_free(struct vhost_dev *dev) > +static void vhost_worker_put(struct vhost_dev *dev, struct vhost_worker *worker) > { > - struct vhost_worker *worker = dev->worker; > - > if (!worker) > return; > > - dev->worker = NULL; > + if (!refcount_dec_and_test(&worker->refcount)) > + return; > + > WARN_ON(!llist_empty(&worker->work_list)); > vhost_task_stop(worker->vtsk); > kfree(worker); > } > > +static void vhost_vq_detach_worker(struct vhost_virtqueue *vq) > +{ > + if (vq->worker) > + vhost_worker_put(vq->dev, vq->worker); > + vq->worker = NULL; > +} > + > +static void vhost_workers_free(struct vhost_dev *dev) > +{ > + struct vhost_worker *worker; > + unsigned long i; > + > + if (!dev->use_worker) > + return; > + > + for (i = 0; i < dev->nvqs; i++) > + vhost_vq_detach_worker(dev->vqs[i]); > + /* > + * Drop the refcount taken during allocation, and handle the default > + * worker and the cases where userspace might have crashed or was lazy > + * and did a VHOST_NEW_WORKER but not a VHOST_FREE_WORKER. > + */ > + xa_for_each(&dev->worker_xa, i, worker) { > + xa_erase(&dev->worker_xa, worker->id); > + vhost_worker_put(dev, worker); > + } > + xa_destroy(&dev->worker_xa); > +} > + > static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) > { > struct vhost_worker *worker; > struct vhost_task *vtsk; > char name[TASK_COMM_LEN]; > + int ret; > + u32 id; > > worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); > if (!worker) > return NULL; > > - dev->worker = worker; > worker->kcov_handle = kcov_common_handle(); > init_llist_head(&worker->work_list); > + /* > + * We increase the refcount for the initial creation and then > + * later each time it's attached to a virtqueue. > + */ > + refcount_set(&worker->refcount, 1); > snprintf(name, sizeof(name), "vhost-%d", current->pid); > > vtsk = vhost_task_create(vhost_worker, worker, name); > @@ -596,14 +634,104 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) > > worker->vtsk = vtsk; > vhost_task_start(vtsk); > + > + ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL); > + if (ret < 0) > + goto stop_worker; > + worker->id = id; > + > return worker; > > +stop_worker: > + vhost_task_stop(vtsk); > free_worker: > kfree(worker); > - dev->worker = NULL; > return NULL; > } > > +/* Caller must have device and virtqueue mutex */ > +static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq, > + struct vhost_worker *worker) > +{ > + refcount_inc(&worker->refcount); > + vhost_vq_detach_worker(vq); > + vq->worker = worker; > +} > + > +/* Caller must have device and virtqueue mutex */ > +static int vhost_vq_attach_worker(struct vhost_virtqueue *vq, > + struct vhost_vring_worker *info) > +{ > + unsigned long index = info->worker_id; > + struct vhost_dev *dev = vq->dev; > + struct vhost_worker *worker; > + > + if (!dev->use_worker) > + return -EINVAL; > + > + /* > + * We don't support setting a worker on an active vq to make flushing > + * and removal simple. > + */ > + if (vhost_vq_get_backend(vq)) > + return -EBUSY; > + > + worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT); > + if (!worker || worker->id != info->worker_id) > + return -ENODEV; > + > + __vhost_vq_attach_worker(vq, worker); > + return 0; > +} > + > +/* Caller must have device mutex */ > +static int vhost_new_worker(struct vhost_dev *dev, > + struct vhost_worker_state *info) > +{ > + struct vhost_worker *worker; > + > + if (!dev->use_worker) > + return -EINVAL; > + > + worker = vhost_worker_create(dev); > + if (!worker) > + return -ENOMEM; > + > + info->worker_id = worker->id; > + return 0; > +} > + > +/* Caller must have device mutex */ > +static int vhost_free_worker(struct vhost_dev *dev, > + struct vhost_worker_state *info) > +{ > + unsigned long index = info->worker_id; > + struct vhost_worker *worker; > + > + if (!dev->use_worker) > + return -EINVAL; > + > + worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT); > + if (!worker || worker->id != info->worker_id) > + return -ENODEV; > + > + /* > + * We can free the worker if it's not attached to any virtqueues. > + */ > + if (refcount_read(&worker->refcount) != 1) > + return -EBUSY; > + > + xa_erase(&dev->worker_xa, worker->id); > + /* > + * Make sure if there was a flush that saw the worker in the XA that > + * it has completed. > + */ > + vhost_work_flush_on(worker); > + > + vhost_worker_put(dev, worker); > + return 0; > +} > + > /* Caller should have device mutex */ > long vhost_dev_set_owner(struct vhost_dev *dev) > { > @@ -624,7 +752,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev) > goto err_worker; > > for (i = 0; i < dev->nvqs; i++) > - dev->vqs[i]->worker = worker; > + __vhost_vq_attach_worker(dev->vqs[i], worker); > } > > err = vhost_dev_alloc_iovecs(dev); > @@ -633,7 +761,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev) > > return 0; > err_iovecs: > - vhost_worker_free(dev); > + vhost_workers_free(dev); > err_worker: > vhost_detach_mm(dev); > err_mm: > @@ -726,7 +854,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) > dev->iotlb = NULL; > vhost_clear_msg(dev); > wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM); > - vhost_worker_free(dev); > + vhost_workers_free(dev); > vhost_detach_mm(dev); > } > EXPORT_SYMBOL_GPL(vhost_dev_cleanup); > @@ -1616,6 +1744,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg > struct eventfd_ctx *ctx = NULL; > u32 __user *idxp = argp; > struct vhost_virtqueue *vq; > + struct vhost_vring_worker w; > struct vhost_vring_state s; > struct vhost_vring_file f; > u32 idx; > @@ -1723,7 +1852,16 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg > if (copy_to_user(argp, &s, sizeof(s))) > r = -EFAULT; > break; > - default: > + case VHOST_ATTACH_VRING_WORKER: > + if (copy_from_user(&w, argp, sizeof(w))) { > + r = -EFAULT; > + break; > + } > + r = vhost_vq_attach_worker(vq, &w); > + if (!r && copy_to_user(argp, &w, sizeof(w))) > + r = -EFAULT; > + break; > +default: > r = -ENOIOCTLCMD; > } > > @@ -1776,6 +1914,7 @@ EXPORT_SYMBOL_GPL(vhost_init_device_iotlb); > /* Caller must have device mutex */ > long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) > { > + struct vhost_worker_state w; > struct eventfd_ctx *ctx; > u64 p; > long r; > @@ -1836,6 +1975,18 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) > if (ctx) > eventfd_ctx_put(ctx); > break; > + case VHOST_NEW_WORKER: > + r = vhost_new_worker(d, &w); > + if (!r && copy_to_user(argp, &w, sizeof(w))) > + r = -EFAULT; > + break; > + case VHOST_FREE_WORKER: > + if (copy_from_user(&w, argp, sizeof(w))) { > + r = -EFAULT; > + break; > + } > + r = vhost_free_worker(d, &w); > + break; > default: > r = -ENOIOCTLCMD; > break; > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index 395707c680e5..a67ae8293c38 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -30,6 +30,8 @@ struct vhost_worker { > struct vhost_task *vtsk; > struct llist_head work_list; > u64 kcov_handle; > + refcount_t refcount; > + u32 id; > }; > > /* Poll a file (eventfd or socket) */ > @@ -156,7 +158,6 @@ struct vhost_dev { > struct vhost_virtqueue **vqs; > int nvqs; > struct eventfd_ctx *log_ctx; > - struct vhost_worker *worker; > struct vhost_iotlb *umem; > struct vhost_iotlb *iotlb; > spinlock_t iotlb_lock; > @@ -166,6 +167,7 @@ struct vhost_dev { > int iov_limit; > int weight; > int byte_weight; > + struct xarray worker_xa; > bool use_worker; > int (*msg_handler)(struct vhost_dev *dev, u32 asid, > struct vhost_iotlb_msg *msg); > diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h > index 92e1b700b51c..7329e7f349dd 100644 > --- a/include/uapi/linux/vhost.h > +++ b/include/uapi/linux/vhost.h > @@ -45,6 +45,23 @@ > #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) > /* Specify an eventfd file descriptor to signal on log write. */ > #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) > +/* By default, a device gets one vhost_worker that its virtqueues share. This > + * command allows the owner of the device to create an additional vhost_worker > + * for the device. It can later be bound to 1 or more of its virtqueues using > + * the VHOST_ATTACH_VRING_WORKER command. > + * > + * This must be called after VHOST_SET_OWNER and the caller must be the owner > + * of the device. The new thread will inherit caller's cgroups and namespaces, > + * and will share the caller's memory space. The new thread will also be > + * counted against the caller's RLIMIT_NPROC value. > + */ > +#define VHOST_NEW_WORKER _IOW(VHOST_VIRTIO, 0x8, struct vhost_worker_state) > +/* Free a worker created with VHOST_NEW_WORKER if it's not attached to any > + * virtqueue. If userspace is not able to call this for workers its created, > + * the kernel will free all the device's workers when the device is closed and > + * the last reference to the device has been released. > + */ > +#define VHOST_FREE_WORKER _IOR(VHOST_VIRTIO, 0x9, struct vhost_worker_state) > > /* Ring setup. */ > /* Set number of descriptors in ring. This parameter can not > @@ -70,6 +87,11 @@ > #define VHOST_VRING_BIG_ENDIAN 1 > #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state) > #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state) > +/* Attach a vhost_worker created with VHOST_NEW_WORKER to one of the device's > + * virtqueues. This must be done before the virtqueue is active. > + */ > +#define VHOST_ATTACH_VRING_WORKER _IOR(VHOST_VIRTIO, 0x15, \ > + struct vhost_vring_worker) > > /* The following ioctls use eventfd file descriptors to signal and poll > * for events. */ > diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h > index c5690a8992d8..ad0fe2e721be 100644 > --- a/include/uapi/linux/vhost_types.h > +++ b/include/uapi/linux/vhost_types.h > @@ -47,6 +47,21 @@ struct vhost_vring_addr { > __u64 log_guest_addr; > }; > > +struct vhost_worker_state { > + /* > + * For VHOST_NEW_WORKER the kernel will return the new vhost_worker id. > + * For VHOST_FREE_WORKER this must be set to the id of the vhost_worker > + * to free. > + */ > + int worker_id; > +}; > + > +struct vhost_vring_worker { > + unsigned int index; > + /* The id of the vhost_worker returned from VHOST_NEW_WORKER */ > + int worker_id; > +}; > + > /* no alignment requirement */ > struct vhost_iotlb_msg { > __u64 iova; > -- > 2.25.1