thr3ads.net - Linux Virtualization - [PATCH net-next v4 3/4] net: vhost: factor out busy polling logic to vhost_net_busy

If this information is useful, please help other people find it:
Share via:

xiangxia.m.yue at gmail.com

2018-Jul-02 12:57 UTC

[PATCH net-next v4 0/4] net: vhost: improve performance when enable busyloop

From: Tonghao Zhang <xiangxia.m.yue at gmail.com>

This patches improve the guest receive and transmit performance.
On the handle_tx side, we poll the sock receive queue at the same time.
handle_rx do that in the same way.

For more performance report, see patch 4.

v3 -> v4:
fix some issues

v2 -> v3:
This patches are splited from previous big patch:
http://patchwork.ozlabs.org/patch/934673/

Tonghao Zhang (4):
  vhost: lock the vqs one by one
  net: vhost: replace magic number of lock annotation
  net: vhost: factor out busy polling logic to vhost_net_busy_poll()
  net: vhost: add rx busy polling in tx path

 drivers/vhost/net.c   | 108 ++++++++++++++++++++++++++++----------------------
 drivers/vhost/vhost.c |  24 ++++-------
 2 files changed, 67 insertions(+), 65 deletions(-)

-- 
1.8.3.1

xiangxia.m.yue at gmail.com

2018-Jul-02 12:57 UTC

head link

[PATCH net-next v4 1/4] vhost: lock the vqs one by one

From: Tonghao Zhang <xiangxia.m.yue at gmail.com>

This patch changes the way that lock all vqs
at the same, to lock them one by one. It will
be used for next patch to avoid the deadlock.

Signed-off-by: Tonghao Zhang <zhangtonghao at didichuxing.com>
Acked-by: Jason Wang <jasowang at redhat.com>
Signed-off-by: Jason Wang <jasowang at redhat.com>
---
 drivers/vhost/vhost.c | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 895eaa2..4ca9383 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -294,8 +294,11 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
 {
 	int i;
 
-	for (i = 0; i < d->nvqs; ++i)
+	for (i = 0; i < d->nvqs; ++i) {
+		mutex_lock(&d->vqs[i]->mutex);
 		__vhost_vq_meta_reset(d->vqs[i]);
+		mutex_unlock(&d->vqs[i]->mutex);
+	}
 }
 
 static void vhost_vq_reset(struct vhost_dev *dev,
@@ -887,20 +890,6 @@ static inline void __user *__vhost_get_user(struct
vhost_virtqueue *vq,
 #define vhost_get_used(vq, x, ptr) \
 	vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
 
-static void vhost_dev_lock_vqs(struct vhost_dev *d)
-{
-	int i = 0;
-	for (i = 0; i < d->nvqs; ++i)
-		mutex_lock_nested(&d->vqs[i]->mutex, i);
-}
-
-static void vhost_dev_unlock_vqs(struct vhost_dev *d)
-{
-	int i = 0;
-	for (i = 0; i < d->nvqs; ++i)
-		mutex_unlock(&d->vqs[i]->mutex);
-}
-
 static int vhost_new_umem_range(struct vhost_umem *umem,
 				u64 start, u64 size, u64 end,
 				u64 userspace_addr, int perm)
@@ -950,7 +939,10 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d,
 		if (msg->iova <= vq_msg->iova &&
 		    msg->iova + msg->size - 1 > vq_msg->iova &&
 		    vq_msg->type == VHOST_IOTLB_MISS) {
+			mutex_lock(&node->vq->mutex);
 			vhost_poll_queue(&node->vq->poll);
+			mutex_unlock(&node->vq->mutex);
+
 			list_del(&node->node);
 			kfree(node);
 		}
@@ -982,7 +974,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
 	int ret = 0;
 
 	mutex_lock(&dev->mutex);
-	vhost_dev_lock_vqs(dev);
 	switch (msg->type) {
 	case VHOST_IOTLB_UPDATE:
 		if (!dev->iotlb) {
@@ -1016,7 +1007,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
 		break;
 	}
 
-	vhost_dev_unlock_vqs(dev);
 	mutex_unlock(&dev->mutex);
 
 	return ret;
-- 
1.8.3.1

xiangxia.m.yue at gmail.com

2018-Jul-02 12:57 UTC

head link

[PATCH net-next v4 2/4] net: vhost: replace magic number of lock annotation

From: Tonghao Zhang <xiangxia.m.yue at gmail.com>

Use the VHOST_NET_VQ_XXX as a subclass for mutex_lock_nested.

Signed-off-by: Tonghao Zhang <zhangtonghao at didichuxing.com>
Acked-by: Jason Wang <jasowang at redhat.com>
---
 drivers/vhost/net.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index e7cf7d2..62bb8e8 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -484,7 +484,7 @@ static void handle_tx(struct vhost_net *net)
 	bool zcopy, zcopy_used;
 	int sent_pkts = 0;
 
-	mutex_lock(&vq->mutex);
+	mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
 	sock = vq->private_data;
 	if (!sock)
 		goto out;
@@ -655,7 +655,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net,
struct sock *sk)
 		/* Flush batched heads first */
 		vhost_rx_signal_used(rvq);
 		/* Both tx vq and rx socket were polled here */
-		mutex_lock_nested(&vq->mutex, 1);
+		mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
 		vhost_disable_notify(&net->dev, vq);
 
 		preempt_disable();
@@ -789,7 +789,7 @@ static void handle_rx(struct vhost_net *net)
 	__virtio16 num_buffers;
 	int recv_pkts = 0;
 
-	mutex_lock_nested(&vq->mutex, 0);
+	mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX);
 	sock = vq->private_data;
 	if (!sock)
 		goto out;
-- 
1.8.3.1

xiangxia.m.yue at gmail.com

2018-Jul-02 12:57 UTC

head link

[PATCH net-next v4 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll()

From: Tonghao Zhang <xiangxia.m.yue at gmail.com>

Factor out generic busy polling logic and will be
used for in tx path in the next patch. And with the patch,
qemu can set differently the busyloop_timeout for rx queue.

Signed-off-by: Tonghao Zhang <zhangtonghao at didichuxing.com>
---
 drivers/vhost/net.c | 94 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 55 insertions(+), 39 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 62bb8e8..2790959 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -429,6 +429,52 @@ static int vhost_net_enable_vq(struct vhost_net *n,
 	return vhost_poll_start(poll, sock->file);
 }
 
+static int sk_has_rx_data(struct sock *sk)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (sock->ops->peek_len)
+		return sock->ops->peek_len(sock);
+
+	return skb_queue_empty(&sk->sk_receive_queue);
+}
+
+static void vhost_net_busy_poll(struct vhost_net *net,
+				struct vhost_virtqueue *rvq,
+				struct vhost_virtqueue *tvq,
+				bool rx)
+{
+	unsigned long uninitialized_var(endtime);
+	unsigned long busyloop_timeout;
+	struct socket *sock;
+	struct vhost_virtqueue *vq = rx ? tvq : rvq;
+
+	mutex_lock_nested(&vq->mutex, rx ? VHOST_NET_VQ_TX: VHOST_NET_VQ_RX);
+
+	vhost_disable_notify(&net->dev, vq);
+	sock = rvq->private_data;
+	busyloop_timeout = rx ? rvq->busyloop_timeout : tvq->busyloop_timeout;
+
+	preempt_disable();
+	endtime = busy_clock() + busyloop_timeout;
+	while (vhost_can_busy_poll(tvq->dev, endtime) &&
+	       !(sock && sk_has_rx_data(sock->sk)) &&
+	       vhost_vq_avail_empty(tvq->dev, tvq))
+		cpu_relax();
+	preempt_enable();
+
+	if ((rx && !vhost_vq_avail_empty(&net->dev, vq)) ||
+	    (!rx && (sock && sk_has_rx_data(sock->sk)))) {
+		vhost_poll_queue(&vq->poll);
+	} else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
+		vhost_disable_notify(&net->dev, vq);
+		vhost_poll_queue(&vq->poll);
+	}
+
+	mutex_unlock(&vq->mutex);
+}
+
+
 static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
 				    struct vhost_virtqueue *vq,
 				    struct iovec iov[], unsigned int iov_size,
@@ -621,16 +667,6 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq,
struct sock *sk)
 	return len;
 }
 
-static int sk_has_rx_data(struct sock *sk)
-{
-	struct socket *sock = sk->sk_socket;
-
-	if (sock->ops->peek_len)
-		return sock->ops->peek_len(sock);
-
-	return skb_queue_empty(&sk->sk_receive_queue);
-}
-
 static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
 {
 	struct vhost_virtqueue *vq = &nvq->vq;
@@ -645,39 +681,19 @@ static void vhost_rx_signal_used(struct
vhost_net_virtqueue *nvq)
 
 static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
 {
-	struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX];
-	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
-	struct vhost_virtqueue *vq = &nvq->vq;
-	unsigned long uninitialized_var(endtime);
-	int len = peek_head_len(rvq, sk);
+	struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
+	struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX];
 
-	if (!len && vq->busyloop_timeout) {
-		/* Flush batched heads first */
-		vhost_rx_signal_used(rvq);
-		/* Both tx vq and rx socket were polled here */
-		mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
-		vhost_disable_notify(&net->dev, vq);
+	int len = peek_head_len(rnvq, sk);
 
-		preempt_disable();
-		endtime = busy_clock() + vq->busyloop_timeout;
-
-		while (vhost_can_busy_poll(&net->dev, endtime) &&
-		       !sk_has_rx_data(sk) &&
-		       vhost_vq_avail_empty(&net->dev, vq))
-			cpu_relax();
-
-		preempt_enable();
-
-		if (!vhost_vq_avail_empty(&net->dev, vq))
-			vhost_poll_queue(&vq->poll);
-		else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
-			vhost_disable_notify(&net->dev, vq);
-			vhost_poll_queue(&vq->poll);
-		}
+	if (!len && rnvq->vq.busyloop_timeout) {
+		/* Flush batched heads first */
+		vhost_rx_signal_used(rnvq);
 
-		mutex_unlock(&vq->mutex);
+		/* Both tx vq and rx socket were polled here */
+		vhost_net_busy_poll(net, &rnvq->vq, &tnvq->vq, true);
 
-		len = peek_head_len(rvq, sk);
+		len = peek_head_len(rnvq, sk);
 	}
 
 	return len;
-- 
1.8.3.1

xiangxia.m.yue at gmail.com

2018-Jul-02 12:57 UTC

head link

[PATCH net-next v4 4/4] net: vhost: add rx busy polling in tx path

From: Tonghao Zhang <xiangxia.m.yue at gmail.com>

This patch improves the guest receive and transmit performance.
On the handle_tx side, we poll the sock receive queue at the
same time. handle_rx do that in the same way.

We set the poll-us=100us and use the iperf3 to test
its bandwidth, use the netperf to test throughput and mean
latency. When running the tests, the vhost-net kthread of
that VM, is alway 100% CPU. The commands are shown as below.

iperf3  -s -D
iperf3  -c IP -i 1 -P 1 -t 20 -M 1400

or
netserver
netperf -H IP -t TCP_RR -l 20 -- -O "THROUGHPUT,MEAN_LATENCY"

host -> guest:
iperf3:
* With the patch:     27.0 Gbits/sec
* Without the patch:  14.4 Gbits/sec

netperf (TCP_RR):
* With the patch:     48039.56 trans/s, 20.64us mean latency
* Without the patch:  46027.07 trans/s, 21.58us mean latency

This patch also improves the guest transmit performance.

guest -> host:
iperf3:
* With the patch:     27.2 Gbits/sec
* Without the patch:  24.4 Gbits/sec

netperf (TCP_RR):
* With the patch:     47963.25 trans/s, 20.71us mean latency
* Without the patch:  45796.70 trans/s, 21.68us mean latency

Signed-off-by: Tonghao Zhang <zhangtonghao at didichuxing.com>
---
 drivers/vhost/net.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 2790959..3f26547 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -480,17 +480,13 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
 				    struct iovec iov[], unsigned int iov_size,
 				    unsigned int *out_num, unsigned int *in_num)
 {
-	unsigned long uninitialized_var(endtime);
+	struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
 	int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
 				  out_num, in_num, NULL, NULL);
 
 	if (r == vq->num && vq->busyloop_timeout) {
-		preempt_disable();
-		endtime = busy_clock() + vq->busyloop_timeout;
-		while (vhost_can_busy_poll(vq->dev, endtime) &&
-		       vhost_vq_avail_empty(vq->dev, vq))
-			cpu_relax();
-		preempt_enable();
+		vhost_net_busy_poll(net, &rnvq->vq, vq, false);
+
 		r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
 				      out_num, in_num, NULL, NULL);
 	}
-- 
1.8.3.1

Jason Wang

2018-Jul-03 02:12 UTC

head link

[PATCH net-next v4 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll()

On 2018?07?02? 20:57, xiangxia.m.yue at gmail.com wrote:> From: Tonghao Zhang <xiangxia.m.yue at gmail.com>
>
> Factor out generic busy polling logic and will be
> used for in tx path in the next patch. And with the patch,
> qemu can set differently the busyloop_timeout for rx queue.
>
> Signed-off-by: Tonghao Zhang <zhangtonghao at didichuxing.com>
> ---
>   drivers/vhost/net.c | 94
+++++++++++++++++++++++++++++++----------------------
>   1 file changed, 55 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 62bb8e8..2790959 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -429,6 +429,52 @@ static int vhost_net_enable_vq(struct vhost_net *n,
>   	return vhost_poll_start(poll, sock->file);
>   }
>   
> +static int sk_has_rx_data(struct sock *sk)
> +{
> +	struct socket *sock = sk->sk_socket;
> +
> +	if (sock->ops->peek_len)
> +		return sock->ops->peek_len(sock);
> +
> +	return skb_queue_empty(&sk->sk_receive_queue);
> +}
> +
> +static void vhost_net_busy_poll(struct vhost_net *net,
> +				struct vhost_virtqueue *rvq,
> +				struct vhost_virtqueue *tvq,
> +				bool rx)
> +{
> +	unsigned long uninitialized_var(endtime);
> +	unsigned long busyloop_timeout;
> +	struct socket *sock;
> +	struct vhost_virtqueue *vq = rx ? tvq : rvq;
> +
> +	mutex_lock_nested(&vq->mutex, rx ? VHOST_NET_VQ_TX:
VHOST_NET_VQ_RX);
> +
> +	vhost_disable_notify(&net->dev, vq);
> +	sock = rvq->private_data;
> +	busyloop_timeout = rx ? rvq->busyloop_timeout :
tvq->busyloop_timeout;
> +
> +	preempt_disable();
> +	endtime = busy_clock() + busyloop_timeout;
> +	while (vhost_can_busy_poll(tvq->dev, endtime) &&
> +	       !(sock && sk_has_rx_data(sock->sk)) &&
> +	       vhost_vq_avail_empty(tvq->dev, tvq))
> +		cpu_relax();
> +	preempt_enable();
> +
> +	if ((rx && !vhost_vq_avail_empty(&net->dev, vq)) ||
> +	    (!rx && (sock && sk_has_rx_data(sock->sk)))) {
> +		vhost_poll_queue(&vq->poll);
> +	} else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
One last question, do we need this for rx? This check will be always 
true under light or medium load.

Thanks
> +		vhost_disable_notify(&net->dev, vq);
> +		vhost_poll_queue(&vq->poll);
> +	}
> +
> +	mutex_unlock(&vq->mutex);
> +}
> +
> +
>   static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
>   				    struct vhost_virtqueue *vq,
>   				    struct iovec iov[], unsigned int iov_size,
> @@ -621,16 +667,6 @@ static int peek_head_len(struct vhost_net_virtqueue
*rvq, struct sock *sk)
>   	return len;
>   }
>   
> -static int sk_has_rx_data(struct sock *sk)
> -{
> -	struct socket *sock = sk->sk_socket;
> -
> -	if (sock->ops->peek_len)
> -		return sock->ops->peek_len(sock);
> -
> -	return skb_queue_empty(&sk->sk_receive_queue);
> -}
> -
>   static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
>   {
>   	struct vhost_virtqueue *vq = &nvq->vq;
> @@ -645,39 +681,19 @@ static void vhost_rx_signal_used(struct
vhost_net_virtqueue *nvq)
>   
>   static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock
*sk)
>   {
> -	struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX];
> -	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
> -	struct vhost_virtqueue *vq = &nvq->vq;
> -	unsigned long uninitialized_var(endtime);
> -	int len = peek_head_len(rvq, sk);
> +	struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
> +	struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX];
>   
> -	if (!len && vq->busyloop_timeout) {
> -		/* Flush batched heads first */
> -		vhost_rx_signal_used(rvq);
> -		/* Both tx vq and rx socket were polled here */
> -		mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
> -		vhost_disable_notify(&net->dev, vq);
> +	int len = peek_head_len(rnvq, sk);
>   
> -		preempt_disable();
> -		endtime = busy_clock() + vq->busyloop_timeout;
> -
> -		while (vhost_can_busy_poll(&net->dev, endtime) &&
> -		       !sk_has_rx_data(sk) &&
> -		       vhost_vq_avail_empty(&net->dev, vq))
> -			cpu_relax();
> -
> -		preempt_enable();
> -
> -		if (!vhost_vq_avail_empty(&net->dev, vq))
> -			vhost_poll_queue(&vq->poll);
> -		else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
> -			vhost_disable_notify(&net->dev, vq);
> -			vhost_poll_queue(&vq->poll);
> -		}
> +	if (!len && rnvq->vq.busyloop_timeout) {
> +		/* Flush batched heads first */
> +		vhost_rx_signal_used(rnvq);
>   
> -		mutex_unlock(&vq->mutex);
> +		/* Both tx vq and rx socket were polled here */
> +		vhost_net_busy_poll(net, &rnvq->vq, &tnvq->vq, true);
>   
> -		len = peek_head_len(rvq, sk);
> +		len = peek_head_len(rnvq, sk);
>   	}
>   
>   	return len;

Possibly Parallel Threads

Search for more seemingly similar threads

Linux Virtualization - Jul 2018 - [PATCH net-next v4 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll()

[PATCH net-next v4 0/4] net: vhost: improve performance when enable busyloop

[PATCH net-next v4 1/4] vhost: lock the vqs one by one

[PATCH net-next v4 2/4] net: vhost: replace magic number of lock annotation

[PATCH net-next v4 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll()

[PATCH net-next v4 4/4] net: vhost: add rx busy polling in tx path

[PATCH net-next v4 3/4] net: vhost: factor out busy polling logic to vhost_net_busy_poll()

Possibly Parallel Threads