xiangxia.m.yue at gmail.com
2018-Jul-02 12:57 UTC
[PATCH net-next v4 0/4] net: vhost: improve performance when enable busyloop
From: Tonghao Zhang <xiangxia.m.yue at gmail.com> This patches improve the guest receive and transmit performance. On the handle_tx side, we poll the sock receive queue at the same time. handle_rx do that in the same way. For more performance report, see patch 4. v3 -> v4: fix some issues v2 -> v3: This patches are splited from previous big patch: http://patchwork.ozlabs.org/patch/934673/ Tonghao Zhang (4): vhost: lock the vqs one by one net: vhost: replace magic number of lock annotation net: vhost: factor out busy polling logic to vhost_net_busy_poll() net: vhost: add rx busy polling in tx path drivers/vhost/net.c | 108 ++++++++++++++++++++++++++++---------------------- drivers/vhost/vhost.c | 24 ++++------- 2 files changed, 67 insertions(+), 65 deletions(-) -- 1.8.3.1
xiangxia.m.yue at gmail.com
2018-Jul-02 12:57 UTC
[PATCH net-next v4 1/4] vhost: lock the vqs one by one
From: Tonghao Zhang <xiangxia.m.yue at gmail.com> This patch changes the way that lock all vqs at the same, to lock them one by one. It will be used for next patch to avoid the deadlock. Signed-off-by: Tonghao Zhang <zhangtonghao at didichuxing.com> Acked-by: Jason Wang <jasowang at redhat.com> Signed-off-by: Jason Wang <jasowang at redhat.com> --- drivers/vhost/vhost.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 895eaa2..4ca9383 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -294,8 +294,11 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) { int i; - for (i = 0; i < d->nvqs; ++i) + for (i = 0; i < d->nvqs; ++i) { + mutex_lock(&d->vqs[i]->mutex); __vhost_vq_meta_reset(d->vqs[i]); + mutex_unlock(&d->vqs[i]->mutex); + } } static void vhost_vq_reset(struct vhost_dev *dev, @@ -887,20 +890,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, #define vhost_get_used(vq, x, ptr) \ vhost_get_user(vq, x, ptr, VHOST_ADDR_USED) -static void vhost_dev_lock_vqs(struct vhost_dev *d) -{ - int i = 0; - for (i = 0; i < d->nvqs; ++i) - mutex_lock_nested(&d->vqs[i]->mutex, i); -} - -static void vhost_dev_unlock_vqs(struct vhost_dev *d) -{ - int i = 0; - for (i = 0; i < d->nvqs; ++i) - mutex_unlock(&d->vqs[i]->mutex); -} - static int vhost_new_umem_range(struct vhost_umem *umem, u64 start, u64 size, u64 end, u64 userspace_addr, int perm) @@ -950,7 +939,10 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d, if (msg->iova <= vq_msg->iova && msg->iova + msg->size - 1 > vq_msg->iova && vq_msg->type == VHOST_IOTLB_MISS) { + mutex_lock(&node->vq->mutex); vhost_poll_queue(&node->vq->poll); + mutex_unlock(&node->vq->mutex); + list_del(&node->node); kfree(node); } @@ -982,7 +974,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, int ret = 0; mutex_lock(&dev->mutex); - vhost_dev_lock_vqs(dev); switch (msg->type) { case VHOST_IOTLB_UPDATE: if (!dev->iotlb) { @@ -1016,7 +1007,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, break; } - vhost_dev_unlock_vqs(dev); mutex_unlock(&dev->mutex); return ret; -- 1.8.3.1
xiangxia.m.yue at gmail.com
2018-Jul-02 12:57 UTC
[PATCH net-next v4 2/4] net: vhost: replace magic number of lock annotation
From: Tonghao Zhang <xiangxia.m.yue at gmail.com> Use the VHOST_NET_VQ_XXX as a subclass for mutex_lock_nested. Signed-off-by: Tonghao Zhang <zhangtonghao at didichuxing.com> Acked-by: Jason Wang <jasowang at redhat.com> --- drivers/vhost/net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index e7cf7d2..62bb8e8 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -484,7 +484,7 @@ static void handle_tx(struct vhost_net *net) bool zcopy, zcopy_used; int sent_pkts = 0; - mutex_lock(&vq->mutex); + mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX); sock = vq->private_data; if (!sock) goto out; @@ -655,7 +655,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) /* Flush batched heads first */ vhost_rx_signal_used(rvq); /* Both tx vq and rx socket were polled here */ - mutex_lock_nested(&vq->mutex, 1); + mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX); vhost_disable_notify(&net->dev, vq); preempt_disable(); @@ -789,7 +789,7 @@ static void handle_rx(struct vhost_net *net) __virtio16 num_buffers; int recv_pkts = 0; - mutex_lock_nested(&vq->mutex, 0); + mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX); sock = vq->private_data; if (!sock) goto out; -- 1.8.3.1
xiangxia.m.yue at gmail.com
2018-Jul-02 12:57 UTC
[PATCH net-next v4 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll()
From: Tonghao Zhang <xiangxia.m.yue at gmail.com> Factor out generic busy polling logic and will be used for in tx path in the next patch. And with the patch, qemu can set differently the busyloop_timeout for rx queue. Signed-off-by: Tonghao Zhang <zhangtonghao at didichuxing.com> --- drivers/vhost/net.c | 94 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 55 insertions(+), 39 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 62bb8e8..2790959 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -429,6 +429,52 @@ static int vhost_net_enable_vq(struct vhost_net *n, return vhost_poll_start(poll, sock->file); } +static int sk_has_rx_data(struct sock *sk) +{ + struct socket *sock = sk->sk_socket; + + if (sock->ops->peek_len) + return sock->ops->peek_len(sock); + + return skb_queue_empty(&sk->sk_receive_queue); +} + +static void vhost_net_busy_poll(struct vhost_net *net, + struct vhost_virtqueue *rvq, + struct vhost_virtqueue *tvq, + bool rx) +{ + unsigned long uninitialized_var(endtime); + unsigned long busyloop_timeout; + struct socket *sock; + struct vhost_virtqueue *vq = rx ? tvq : rvq; + + mutex_lock_nested(&vq->mutex, rx ? VHOST_NET_VQ_TX: VHOST_NET_VQ_RX); + + vhost_disable_notify(&net->dev, vq); + sock = rvq->private_data; + busyloop_timeout = rx ? rvq->busyloop_timeout : tvq->busyloop_timeout; + + preempt_disable(); + endtime = busy_clock() + busyloop_timeout; + while (vhost_can_busy_poll(tvq->dev, endtime) && + !(sock && sk_has_rx_data(sock->sk)) && + vhost_vq_avail_empty(tvq->dev, tvq)) + cpu_relax(); + preempt_enable(); + + if ((rx && !vhost_vq_avail_empty(&net->dev, vq)) || + (!rx && (sock && sk_has_rx_data(sock->sk)))) { + vhost_poll_queue(&vq->poll); + } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { + vhost_disable_notify(&net->dev, vq); + vhost_poll_queue(&vq->poll); + } + + mutex_unlock(&vq->mutex); +} + + static int vhost_net_tx_get_vq_desc(struct vhost_net *net, struct vhost_virtqueue *vq, struct iovec iov[], unsigned int iov_size, @@ -621,16 +667,6 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk) return len; } -static int sk_has_rx_data(struct sock *sk) -{ - struct socket *sock = sk->sk_socket; - - if (sock->ops->peek_len) - return sock->ops->peek_len(sock); - - return skb_queue_empty(&sk->sk_receive_queue); -} - static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq) { struct vhost_virtqueue *vq = &nvq->vq; @@ -645,39 +681,19 @@ static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq) static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) { - struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX]; - struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; - struct vhost_virtqueue *vq = &nvq->vq; - unsigned long uninitialized_var(endtime); - int len = peek_head_len(rvq, sk); + struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX]; + struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX]; - if (!len && vq->busyloop_timeout) { - /* Flush batched heads first */ - vhost_rx_signal_used(rvq); - /* Both tx vq and rx socket were polled here */ - mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX); - vhost_disable_notify(&net->dev, vq); + int len = peek_head_len(rnvq, sk); - preempt_disable(); - endtime = busy_clock() + vq->busyloop_timeout; - - while (vhost_can_busy_poll(&net->dev, endtime) && - !sk_has_rx_data(sk) && - vhost_vq_avail_empty(&net->dev, vq)) - cpu_relax(); - - preempt_enable(); - - if (!vhost_vq_avail_empty(&net->dev, vq)) - vhost_poll_queue(&vq->poll); - else if (unlikely(vhost_enable_notify(&net->dev, vq))) { - vhost_disable_notify(&net->dev, vq); - vhost_poll_queue(&vq->poll); - } + if (!len && rnvq->vq.busyloop_timeout) { + /* Flush batched heads first */ + vhost_rx_signal_used(rnvq); - mutex_unlock(&vq->mutex); + /* Both tx vq and rx socket were polled here */ + vhost_net_busy_poll(net, &rnvq->vq, &tnvq->vq, true); - len = peek_head_len(rvq, sk); + len = peek_head_len(rnvq, sk); } return len; -- 1.8.3.1
xiangxia.m.yue at gmail.com
2018-Jul-02 12:57 UTC
[PATCH net-next v4 4/4] net: vhost: add rx busy polling in tx path
From: Tonghao Zhang <xiangxia.m.yue at gmail.com> This patch improves the guest receive and transmit performance. On the handle_tx side, we poll the sock receive queue at the same time. handle_rx do that in the same way. We set the poll-us=100us and use the iperf3 to test its bandwidth, use the netperf to test throughput and mean latency. When running the tests, the vhost-net kthread of that VM, is alway 100% CPU. The commands are shown as below. iperf3 -s -D iperf3 -c IP -i 1 -P 1 -t 20 -M 1400 or netserver netperf -H IP -t TCP_RR -l 20 -- -O "THROUGHPUT,MEAN_LATENCY" host -> guest: iperf3: * With the patch: 27.0 Gbits/sec * Without the patch: 14.4 Gbits/sec netperf (TCP_RR): * With the patch: 48039.56 trans/s, 20.64us mean latency * Without the patch: 46027.07 trans/s, 21.58us mean latency This patch also improves the guest transmit performance. guest -> host: iperf3: * With the patch: 27.2 Gbits/sec * Without the patch: 24.4 Gbits/sec netperf (TCP_RR): * With the patch: 47963.25 trans/s, 20.71us mean latency * Without the patch: 45796.70 trans/s, 21.68us mean latency Signed-off-by: Tonghao Zhang <zhangtonghao at didichuxing.com> --- drivers/vhost/net.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 2790959..3f26547 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -480,17 +480,13 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net, struct iovec iov[], unsigned int iov_size, unsigned int *out_num, unsigned int *in_num) { - unsigned long uninitialized_var(endtime); + struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX]; int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), out_num, in_num, NULL, NULL); if (r == vq->num && vq->busyloop_timeout) { - preempt_disable(); - endtime = busy_clock() + vq->busyloop_timeout; - while (vhost_can_busy_poll(vq->dev, endtime) && - vhost_vq_avail_empty(vq->dev, vq)) - cpu_relax(); - preempt_enable(); + vhost_net_busy_poll(net, &rnvq->vq, vq, false); + r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), out_num, in_num, NULL, NULL); } -- 1.8.3.1
Jason Wang
2018-Jul-03 02:12 UTC
[PATCH net-next v4 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll()
On 2018?07?02? 20:57, xiangxia.m.yue at gmail.com wrote:> From: Tonghao Zhang <xiangxia.m.yue at gmail.com> > > Factor out generic busy polling logic and will be > used for in tx path in the next patch. And with the patch, > qemu can set differently the busyloop_timeout for rx queue. > > Signed-off-by: Tonghao Zhang <zhangtonghao at didichuxing.com> > --- > drivers/vhost/net.c | 94 +++++++++++++++++++++++++++++++---------------------- > 1 file changed, 55 insertions(+), 39 deletions(-) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index 62bb8e8..2790959 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -429,6 +429,52 @@ static int vhost_net_enable_vq(struct vhost_net *n, > return vhost_poll_start(poll, sock->file); > } > > +static int sk_has_rx_data(struct sock *sk) > +{ > + struct socket *sock = sk->sk_socket; > + > + if (sock->ops->peek_len) > + return sock->ops->peek_len(sock); > + > + return skb_queue_empty(&sk->sk_receive_queue); > +} > + > +static void vhost_net_busy_poll(struct vhost_net *net, > + struct vhost_virtqueue *rvq, > + struct vhost_virtqueue *tvq, > + bool rx) > +{ > + unsigned long uninitialized_var(endtime); > + unsigned long busyloop_timeout; > + struct socket *sock; > + struct vhost_virtqueue *vq = rx ? tvq : rvq; > + > + mutex_lock_nested(&vq->mutex, rx ? VHOST_NET_VQ_TX: VHOST_NET_VQ_RX); > + > + vhost_disable_notify(&net->dev, vq); > + sock = rvq->private_data; > + busyloop_timeout = rx ? rvq->busyloop_timeout : tvq->busyloop_timeout; > + > + preempt_disable(); > + endtime = busy_clock() + busyloop_timeout; > + while (vhost_can_busy_poll(tvq->dev, endtime) && > + !(sock && sk_has_rx_data(sock->sk)) && > + vhost_vq_avail_empty(tvq->dev, tvq)) > + cpu_relax(); > + preempt_enable(); > + > + if ((rx && !vhost_vq_avail_empty(&net->dev, vq)) || > + (!rx && (sock && sk_has_rx_data(sock->sk)))) { > + vhost_poll_queue(&vq->poll); > + } else if (unlikely(vhost_enable_notify(&net->dev, vq))) {One last question, do we need this for rx? This check will be always true under light or medium load. Thanks> + vhost_disable_notify(&net->dev, vq); > + vhost_poll_queue(&vq->poll); > + } > + > + mutex_unlock(&vq->mutex); > +} > + > + > static int vhost_net_tx_get_vq_desc(struct vhost_net *net, > struct vhost_virtqueue *vq, > struct iovec iov[], unsigned int iov_size, > @@ -621,16 +667,6 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk) > return len; > } > > -static int sk_has_rx_data(struct sock *sk) > -{ > - struct socket *sock = sk->sk_socket; > - > - if (sock->ops->peek_len) > - return sock->ops->peek_len(sock); > - > - return skb_queue_empty(&sk->sk_receive_queue); > -} > - > static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq) > { > struct vhost_virtqueue *vq = &nvq->vq; > @@ -645,39 +681,19 @@ static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq) > > static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) > { > - struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX]; > - struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; > - struct vhost_virtqueue *vq = &nvq->vq; > - unsigned long uninitialized_var(endtime); > - int len = peek_head_len(rvq, sk); > + struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX]; > + struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX]; > > - if (!len && vq->busyloop_timeout) { > - /* Flush batched heads first */ > - vhost_rx_signal_used(rvq); > - /* Both tx vq and rx socket were polled here */ > - mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX); > - vhost_disable_notify(&net->dev, vq); > + int len = peek_head_len(rnvq, sk); > > - preempt_disable(); > - endtime = busy_clock() + vq->busyloop_timeout; > - > - while (vhost_can_busy_poll(&net->dev, endtime) && > - !sk_has_rx_data(sk) && > - vhost_vq_avail_empty(&net->dev, vq)) > - cpu_relax(); > - > - preempt_enable(); > - > - if (!vhost_vq_avail_empty(&net->dev, vq)) > - vhost_poll_queue(&vq->poll); > - else if (unlikely(vhost_enable_notify(&net->dev, vq))) { > - vhost_disable_notify(&net->dev, vq); > - vhost_poll_queue(&vq->poll); > - } > + if (!len && rnvq->vq.busyloop_timeout) { > + /* Flush batched heads first */ > + vhost_rx_signal_used(rnvq); > > - mutex_unlock(&vq->mutex); > + /* Both tx vq and rx socket were polled here */ > + vhost_net_busy_poll(net, &rnvq->vq, &tnvq->vq, true); > > - len = peek_head_len(rvq, sk); > + len = peek_head_len(rnvq, sk); > } > > return len;
Possibly Parallel Threads
- [PATCH net-next v4 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll()
- [PATCH net-next v4 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll()
- [PATCH net-next v4 0/4] net: vhost: improve performance when enable busyloop
- [PATCH net-next v3 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll()
- [PATCH net-next v5 0/4] net: vhost: improve performance when enable busyloop