xiangxia.m.yue at gmail.com
2018-Aug-01 03:00 UTC
[PATCH net-next v7 0/4] net: vhost: improve performance when enable busyloop
From: Tonghao Zhang <xiangxia.m.yue at gmail.com> This patches improve the guest receive performance. On the handle_tx side, we poll the sock receive queue at the same time. handle_rx do that in the same way. For more performance report, see patch 4. v6->v7: fix issue and rebase codes: 1. on tx, busypoll will vhost_net_disable/enable_vq rx vq. [This is suggested by Toshiaki Makita <makita.toshiaki at lab.ntt.co.jp>] 2. introduce common helper vhost_net_busy_poll_try_queue(). v5->v6: rebase the codes. Tonghao Zhang (4): net: vhost: lock the vqs one by one net: vhost: replace magic number of lock annotation net: vhost: factor out busy polling logic to vhost_net_busy_poll() net: vhost: add rx busy polling in tx path drivers/vhost/net.c | 168 +++++++++++++++++++++++++++++++------------------- drivers/vhost/vhost.c | 24 +++----- 2 files changed, 113 insertions(+), 79 deletions(-) -- 1.8.3.1
xiangxia.m.yue at gmail.com
2018-Aug-01 03:00 UTC
[PATCH net-next v7 1/4] net: vhost: lock the vqs one by one
From: Tonghao Zhang <xiangxia.m.yue at gmail.com> This patch changes the way that lock all vqs at the same, to lock them one by one. It will be used for next patch to avoid the deadlock. Signed-off-by: Tonghao Zhang <xiangxia.m.yue at gmail.com> Acked-by: Jason Wang <jasowang at redhat.com> Signed-off-by: Jason Wang <jasowang at redhat.com> --- drivers/vhost/vhost.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index a502f1a..a1c06e7 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -294,8 +294,11 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) { int i; - for (i = 0; i < d->nvqs; ++i) + for (i = 0; i < d->nvqs; ++i) { + mutex_lock(&d->vqs[i]->mutex); __vhost_vq_meta_reset(d->vqs[i]); + mutex_unlock(&d->vqs[i]->mutex); + } } static void vhost_vq_reset(struct vhost_dev *dev, @@ -890,20 +893,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, #define vhost_get_used(vq, x, ptr) \ vhost_get_user(vq, x, ptr, VHOST_ADDR_USED) -static void vhost_dev_lock_vqs(struct vhost_dev *d) -{ - int i = 0; - for (i = 0; i < d->nvqs; ++i) - mutex_lock_nested(&d->vqs[i]->mutex, i); -} - -static void vhost_dev_unlock_vqs(struct vhost_dev *d) -{ - int i = 0; - for (i = 0; i < d->nvqs; ++i) - mutex_unlock(&d->vqs[i]->mutex); -} - static int vhost_new_umem_range(struct vhost_umem *umem, u64 start, u64 size, u64 end, u64 userspace_addr, int perm) @@ -953,7 +942,10 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d, if (msg->iova <= vq_msg->iova && msg->iova + msg->size - 1 > vq_msg->iova && vq_msg->type == VHOST_IOTLB_MISS) { + mutex_lock(&node->vq->mutex); vhost_poll_queue(&node->vq->poll); + mutex_unlock(&node->vq->mutex); + list_del(&node->node); kfree(node); } @@ -985,7 +977,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, int ret = 0; mutex_lock(&dev->mutex); - vhost_dev_lock_vqs(dev); switch (msg->type) { case VHOST_IOTLB_UPDATE: if (!dev->iotlb) { @@ -1019,7 +1010,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, break; } - vhost_dev_unlock_vqs(dev); mutex_unlock(&dev->mutex); return ret; -- 1.8.3.1
xiangxia.m.yue at gmail.com
2018-Aug-01 03:00 UTC
[PATCH net-next v7 2/4] net: vhost: replace magic number of lock annotation
From: Tonghao Zhang <xiangxia.m.yue at gmail.com> Use the VHOST_NET_VQ_XXX as a subclass for mutex_lock_nested. Signed-off-by: Tonghao Zhang <xiangxia.m.yue at gmail.com> Acked-by: Jason Wang <jasowang at redhat.com> --- drivers/vhost/net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 367d802..32c1b52 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -712,7 +712,7 @@ static void handle_tx(struct vhost_net *net) struct vhost_virtqueue *vq = &nvq->vq; struct socket *sock; - mutex_lock(&vq->mutex); + mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX); sock = vq->private_data; if (!sock) goto out; @@ -777,7 +777,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk, /* Flush batched heads first */ vhost_net_signal_used(rnvq); /* Both tx vq and rx socket were polled here */ - mutex_lock_nested(&tvq->mutex, 1); + mutex_lock_nested(&tvq->mutex, VHOST_NET_VQ_TX); vhost_disable_notify(&net->dev, tvq); preempt_disable(); @@ -919,7 +919,7 @@ static void handle_rx(struct vhost_net *net) __virtio16 num_buffers; int recv_pkts = 0; - mutex_lock_nested(&vq->mutex, 0); + mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX); sock = vq->private_data; if (!sock) goto out; -- 1.8.3.1
xiangxia.m.yue at gmail.com
2018-Aug-01 03:00 UTC
[PATCH net-next v7 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll()
From: Tonghao Zhang <xiangxia.m.yue at gmail.com> Factor out generic busy polling logic and will be used for in tx path in the next patch. And with the patch, qemu can set differently the busyloop_timeout for rx queue. In the handle_tx, the busypoll will vhost_net_disable/enable_vq because we have poll the sock. This can improve performance. [This is suggested by Toshiaki Makita <makita.toshiaki at lab.ntt.co.jp>] And when the sock receive skb, we should queue the poll if necessary. Signed-off-by: Tonghao Zhang <xiangxia.m.yue at gmail.com> --- drivers/vhost/net.c | 131 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 91 insertions(+), 40 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 32c1b52..5b45463 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -440,6 +440,95 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq) nvq->done_idx = 0; } +static int sk_has_rx_data(struct sock *sk) +{ + struct socket *sock = sk->sk_socket; + + if (sock->ops->peek_len) + return sock->ops->peek_len(sock); + + return skb_queue_empty(&sk->sk_receive_queue); +} + +static void vhost_net_busy_poll_try_queue(struct vhost_net *net, + struct vhost_virtqueue *vq) +{ + if (!vhost_vq_avail_empty(&net->dev, vq)) { + vhost_poll_queue(&vq->poll); + } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { + vhost_disable_notify(&net->dev, vq); + vhost_poll_queue(&vq->poll); + } +} + +static void vhost_net_busy_poll_check(struct vhost_net *net, + struct vhost_virtqueue *rvq, + struct vhost_virtqueue *tvq, + bool rx) +{ + struct socket *sock = rvq->private_data; + + if (rx) + vhost_net_busy_poll_try_queue(net, tvq); + else if (sock && sk_has_rx_data(sock->sk)) + vhost_net_busy_poll_try_queue(net, rvq); + else { + /* On tx here, sock has no rx data, so we + * will wait for sock wakeup for rx, and + * vhost_enable_notify() is not needed. */ + } +} + +static void vhost_net_busy_poll(struct vhost_net *net, + struct vhost_virtqueue *rvq, + struct vhost_virtqueue *tvq, + bool *busyloop_intr, + bool rx) +{ + unsigned long busyloop_timeout; + unsigned long endtime; + struct socket *sock; + struct vhost_virtqueue *vq = rx ? tvq : rvq; + + mutex_lock_nested(&vq->mutex, rx ? VHOST_NET_VQ_TX: VHOST_NET_VQ_RX); + vhost_disable_notify(&net->dev, vq); + sock = rvq->private_data; + + busyloop_timeout = rx ? rvq->busyloop_timeout: + tvq->busyloop_timeout; + + + /* Busypoll the sock, so don't need rx wakeups during it. */ + if (!rx) + vhost_net_disable_vq(net, vq); + + preempt_disable(); + endtime = busy_clock() + busyloop_timeout; + + while (vhost_can_busy_poll(endtime)) { + if (vhost_has_work(&net->dev)) { + *busyloop_intr = true; + break; + } + + if ((sock && sk_has_rx_data(sock->sk) && + !vhost_vq_avail_empty(&net->dev, rvq)) || + !vhost_vq_avail_empty(&net->dev, tvq)) + break; + + cpu_relax(); + } + + preempt_enable(); + + if (!rx) + vhost_net_enable_vq(net, vq); + + vhost_net_busy_poll_check(net, rvq, tvq, rx); + + mutex_unlock(&vq->mutex); +} + static int vhost_net_tx_get_vq_desc(struct vhost_net *net, struct vhost_net_virtqueue *nvq, unsigned int *out_num, unsigned int *in_num, @@ -753,16 +842,6 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk) return len; } -static int sk_has_rx_data(struct sock *sk) -{ - struct socket *sock = sk->sk_socket; - - if (sock->ops->peek_len) - return sock->ops->peek_len(sock); - - return skb_queue_empty(&sk->sk_receive_queue); -} - static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk, bool *busyloop_intr) { @@ -770,41 +849,13 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk, struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *rvq = &rnvq->vq; struct vhost_virtqueue *tvq = &tnvq->vq; - unsigned long uninitialized_var(endtime); int len = peek_head_len(rnvq, sk); - if (!len && tvq->busyloop_timeout) { + if (!len && rvq->busyloop_timeout) { /* Flush batched heads first */ vhost_net_signal_used(rnvq); /* Both tx vq and rx socket were polled here */ - mutex_lock_nested(&tvq->mutex, VHOST_NET_VQ_TX); - vhost_disable_notify(&net->dev, tvq); - - preempt_disable(); - endtime = busy_clock() + tvq->busyloop_timeout; - - while (vhost_can_busy_poll(endtime)) { - if (vhost_has_work(&net->dev)) { - *busyloop_intr = true; - break; - } - if ((sk_has_rx_data(sk) && - !vhost_vq_avail_empty(&net->dev, rvq)) || - !vhost_vq_avail_empty(&net->dev, tvq)) - break; - cpu_relax(); - } - - preempt_enable(); - - if (!vhost_vq_avail_empty(&net->dev, tvq)) { - vhost_poll_queue(&tvq->poll); - } else if (unlikely(vhost_enable_notify(&net->dev, tvq))) { - vhost_disable_notify(&net->dev, tvq); - vhost_poll_queue(&tvq->poll); - } - - mutex_unlock(&tvq->mutex); + vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true); len = peek_head_len(rnvq, sk); } -- 1.8.3.1
xiangxia.m.yue at gmail.com
2018-Aug-01 03:00 UTC
[PATCH net-next v7 4/4] net: vhost: add rx busy polling in tx path
From: Tonghao Zhang <xiangxia.m.yue at gmail.com> This patch improves the guest receive performance. On the handle_tx side, we poll the sock receive queue at the same time. handle_rx do that in the same way. We set the poll-us=100us and use the netperf to test throughput and mean latency. When running the tests, the vhost-net kthread of that VM, is alway 100% CPU. The commands are shown as below. Rx performance is greatly improved by this patch. There is not notable performance change on tx with this series though. This patch is useful for bi-directional traffic. netperf -H IP -t TCP_STREAM -l 20 -- -O "THROUGHPUT, THROUGHPUT_UNITS, MEAN_LATENCY" Topology: [Host] ->linux bridge -> tap vhost-net ->[Guest] TCP_STREAM: * Without the patch: 19842.95 Mbps, 6.50 us mean latency * With the patch: 38570.00 Mbps, 3.43 us mean latency Signed-off-by: Tonghao Zhang <xiangxia.m.yue at gmail.com> --- drivers/vhost/net.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 5b45463..b56db65 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -530,31 +530,24 @@ static void vhost_net_busy_poll(struct vhost_net *net, } static int vhost_net_tx_get_vq_desc(struct vhost_net *net, - struct vhost_net_virtqueue *nvq, + struct vhost_net_virtqueue *tnvq, unsigned int *out_num, unsigned int *in_num, bool *busyloop_intr) { - struct vhost_virtqueue *vq = &nvq->vq; - unsigned long uninitialized_var(endtime); - int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX]; + struct vhost_virtqueue *rvq = &rnvq->vq; + struct vhost_virtqueue *tvq = &tnvq->vq; + + int r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov), out_num, in_num, NULL, NULL); - if (r == vq->num && vq->busyloop_timeout) { - if (!vhost_sock_zcopy(vq->private_data)) - vhost_net_signal_used(nvq); - preempt_disable(); - endtime = busy_clock() + vq->busyloop_timeout; - while (vhost_can_busy_poll(endtime)) { - if (vhost_has_work(vq->dev)) { - *busyloop_intr = true; - break; - } - if (!vhost_vq_avail_empty(vq->dev, vq)) - break; - cpu_relax(); - } - preempt_enable(); - r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + if (r == tvq->num && tvq->busyloop_timeout) { + if (!vhost_sock_zcopy(tvq->private_data)) + vhost_net_signal_used(tnvq); + + vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, false); + + r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov), out_num, in_num, NULL, NULL); } -- 1.8.3.1
Jason Wang
2018-Aug-01 06:01 UTC
[PATCH net-next v7 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll()
On 2018?08?01? 11:00, xiangxia.m.yue at gmail.com wrote:> From: Tonghao Zhang <xiangxia.m.yue at gmail.com> > > Factor out generic busy polling logic and will be > used for in tx path in the next patch. And with the patch, > qemu can set differently the busyloop_timeout for rx queue. > > In the handle_tx, the busypoll will vhost_net_disable/enable_vq > because we have poll the sock. This can improve performance. > [This is suggested by Toshiaki Makita <makita.toshiaki at lab.ntt.co.jp>] > > And when the sock receive skb, we should queue the poll if necessary. > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue at gmail.com> > --- > drivers/vhost/net.c | 131 ++++++++++++++++++++++++++++++++++++---------------- > 1 file changed, 91 insertions(+), 40 deletions(-) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index 32c1b52..5b45463 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -440,6 +440,95 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq) > nvq->done_idx = 0; > } > > +static int sk_has_rx_data(struct sock *sk) > +{ > + struct socket *sock = sk->sk_socket; > + > + if (sock->ops->peek_len) > + return sock->ops->peek_len(sock); > + > + return skb_queue_empty(&sk->sk_receive_queue); > +} > + > +static void vhost_net_busy_poll_try_queue(struct vhost_net *net, > + struct vhost_virtqueue *vq) > +{ > + if (!vhost_vq_avail_empty(&net->dev, vq)) { > + vhost_poll_queue(&vq->poll); > + } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { > + vhost_disable_notify(&net->dev, vq); > + vhost_poll_queue(&vq->poll); > + } > +} > + > +static void vhost_net_busy_poll_check(struct vhost_net *net, > + struct vhost_virtqueue *rvq, > + struct vhost_virtqueue *tvq, > + bool rx) > +{ > + struct socket *sock = rvq->private_data; > + > + if (rx) > + vhost_net_busy_poll_try_queue(net, tvq); > + else if (sock && sk_has_rx_data(sock->sk)) > + vhost_net_busy_poll_try_queue(net, rvq); > + else { > + /* On tx here, sock has no rx data, so we > + * will wait for sock wakeup for rx, and > + * vhost_enable_notify() is not needed. */A possible case is we do have rx data but guest does not refill the rx queue. In this case we may lose notifications from guest.> + } > +} > + > +static void vhost_net_busy_poll(struct vhost_net *net, > + struct vhost_virtqueue *rvq, > + struct vhost_virtqueue *tvq, > + bool *busyloop_intr, > + bool rx) > +{ > + unsigned long busyloop_timeout; > + unsigned long endtime; > + struct socket *sock; > + struct vhost_virtqueue *vq = rx ? tvq : rvq; > + > + mutex_lock_nested(&vq->mutex, rx ? VHOST_NET_VQ_TX: VHOST_NET_VQ_RX); > + vhost_disable_notify(&net->dev, vq); > + sock = rvq->private_data; > + > + busyloop_timeout = rx ? rvq->busyloop_timeout: > + tvq->busyloop_timeout; > + > + > + /* Busypoll the sock, so don't need rx wakeups during it. */ > + if (!rx) > + vhost_net_disable_vq(net, vq);Actually this piece of code is not a factoring out. I would suggest to add this in another patch, or on top of this series.> + > + preempt_disable(); > + endtime = busy_clock() + busyloop_timeout; > + > + while (vhost_can_busy_poll(endtime)) { > + if (vhost_has_work(&net->dev)) { > + *busyloop_intr = true; > + break; > + } > + > + if ((sock && sk_has_rx_data(sock->sk) && > + !vhost_vq_avail_empty(&net->dev, rvq)) || > + !vhost_vq_avail_empty(&net->dev, tvq)) > + break;Some checks were duplicated in vhost_net_busy_poll_check(). Need consider to unify them.> + > + cpu_relax(); > + } > + > + preempt_enable(); > + > + if (!rx) > + vhost_net_enable_vq(net, vq);No need to enable rx virtqueue, if we are sure handle_rx() will be called soon.> + > + vhost_net_busy_poll_check(net, rvq, tvq, rx);It looks to me just open code all check here is better and easier to be reviewed. Thanks> + > + mutex_unlock(&vq->mutex); > +} > + > static int vhost_net_tx_get_vq_desc(struct vhost_net *net, > struct vhost_net_virtqueue *nvq, > unsigned int *out_num, unsigned int *in_num, > @@ -753,16 +842,6 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk) > return len; > } > > -static int sk_has_rx_data(struct sock *sk) > -{ > - struct socket *sock = sk->sk_socket; > - > - if (sock->ops->peek_len) > - return sock->ops->peek_len(sock); > - > - return skb_queue_empty(&sk->sk_receive_queue); > -} > - > static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk, > bool *busyloop_intr) > { > @@ -770,41 +849,13 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk, > struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX]; > struct vhost_virtqueue *rvq = &rnvq->vq; > struct vhost_virtqueue *tvq = &tnvq->vq; > - unsigned long uninitialized_var(endtime); > int len = peek_head_len(rnvq, sk); > > - if (!len && tvq->busyloop_timeout) { > + if (!len && rvq->busyloop_timeout) { > /* Flush batched heads first */ > vhost_net_signal_used(rnvq); > /* Both tx vq and rx socket were polled here */ > - mutex_lock_nested(&tvq->mutex, VHOST_NET_VQ_TX); > - vhost_disable_notify(&net->dev, tvq); > - > - preempt_disable(); > - endtime = busy_clock() + tvq->busyloop_timeout; > - > - while (vhost_can_busy_poll(endtime)) { > - if (vhost_has_work(&net->dev)) { > - *busyloop_intr = true; > - break; > - } > - if ((sk_has_rx_data(sk) && > - !vhost_vq_avail_empty(&net->dev, rvq)) || > - !vhost_vq_avail_empty(&net->dev, tvq)) > - break; > - cpu_relax(); > - } > - > - preempt_enable(); > - > - if (!vhost_vq_avail_empty(&net->dev, tvq)) { > - vhost_poll_queue(&tvq->poll); > - } else if (unlikely(vhost_enable_notify(&net->dev, tvq))) { > - vhost_disable_notify(&net->dev, tvq); > - vhost_poll_queue(&tvq->poll); > - } > - > - mutex_unlock(&tvq->mutex); > + vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true); > > len = peek_head_len(rnvq, sk); > }