Hi: This series tries to optimize vhost_net polling at two points: - Stop rx polling for reduicng the unnecessary wakeups during handle_rx(). - Conditonally enable tx polling for reducing the unnecessary traversing and spinlock touching. Test shows about 17% improvement on rx pps. Please review Changes from V1: - use vhost_net_disable_vq()/vhost_net_enable_vq() instead of open coding. - Add a new patch for conditionally enable tx polling. Jason Wang (2): vhost_net: stop polling socket during rx processing vhost_net: conditionally enable tx polling drivers/vhost/net.c | 59 +++++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 27 deletions(-) -- 1.8.3.1
Jason Wang
2016-May-30  06:47 UTC
[PATCH V2 1/2] vhost_net: stop polling socket during rx processing
We don't stop rx polling socket during rx processing, this will lead
unnecessary wakeups from under layer net devices (E.g
sock_def_readable() form tun). Rx will be slowed down in this
way. This patch avoids this by stop polling socket during rx
processing. A small drawback is that this introduces some overheads in
light load case because of the extra start/stop polling, but single
netperf TCP_RR does not notice any change. In a super heavy load case,
e.g using pktgen to inject packet to guest, we get about ~8.8%
improvement on pps:
before: ~1240000 pkt/s
after:  ~1350000 pkt/s
Signed-off-by: Jason Wang <jasowang at redhat.com>
---
 drivers/vhost/net.c | 56 +++++++++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 27 deletions(-)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 10ff494..e91603b 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -301,6 +301,32 @@ static bool vhost_can_busy_poll(struct vhost_dev *dev,
 	       !vhost_has_work(dev);
 }
 
+static void vhost_net_disable_vq(struct vhost_net *n,
+				 struct vhost_virtqueue *vq)
+{
+	struct vhost_net_virtqueue *nvq +		container_of(vq, struct
vhost_net_virtqueue, vq);
+	struct vhost_poll *poll = n->poll + (nvq - n->vqs);
+	if (!vq->private_data)
+		return;
+	vhost_poll_stop(poll);
+}
+
+static int vhost_net_enable_vq(struct vhost_net *n,
+				struct vhost_virtqueue *vq)
+{
+	struct vhost_net_virtqueue *nvq +		container_of(vq, struct
vhost_net_virtqueue, vq);
+	struct vhost_poll *poll = n->poll + (nvq - n->vqs);
+	struct socket *sock;
+
+	sock = vq->private_data;
+	if (!sock)
+		return 0;
+
+	return vhost_poll_start(poll, sock->file);
+}
+
 static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
 				    struct vhost_virtqueue *vq,
 				    struct iovec iov[], unsigned int iov_size,
@@ -627,6 +653,7 @@ static void handle_rx(struct vhost_net *net)
 	if (!sock)
 		goto out;
 	vhost_disable_notify(&net->dev, vq);
+	vhost_net_disable_vq(net, vq);
 
 	vhost_hlen = nvq->vhost_hlen;
 	sock_hlen = nvq->sock_hlen;
@@ -715,9 +742,10 @@ static void handle_rx(struct vhost_net *net)
 		total_len += vhost_len;
 		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
 			vhost_poll_queue(&vq->poll);
-			break;
+			goto out;
 		}
 	}
+	vhost_net_enable_vq(net, vq);
 out:
 	mutex_unlock(&vq->mutex);
 }
@@ -796,32 +824,6 @@ static int vhost_net_open(struct inode *inode, struct file
*f)
 	return 0;
 }
 
-static void vhost_net_disable_vq(struct vhost_net *n,
-				 struct vhost_virtqueue *vq)
-{
-	struct vhost_net_virtqueue *nvq -		container_of(vq, struct
vhost_net_virtqueue, vq);
-	struct vhost_poll *poll = n->poll + (nvq - n->vqs);
-	if (!vq->private_data)
-		return;
-	vhost_poll_stop(poll);
-}
-
-static int vhost_net_enable_vq(struct vhost_net *n,
-				struct vhost_virtqueue *vq)
-{
-	struct vhost_net_virtqueue *nvq -		container_of(vq, struct
vhost_net_virtqueue, vq);
-	struct vhost_poll *poll = n->poll + (nvq - n->vqs);
-	struct socket *sock;
-
-	sock = vq->private_data;
-	if (!sock)
-		return 0;
-
-	return vhost_poll_start(poll, sock->file);
-}
-
 static struct socket *vhost_net_stop_vq(struct vhost_net *n,
 					struct vhost_virtqueue *vq)
 {
-- 
1.8.3.1
Jason Wang
2016-May-30  06:47 UTC
[PATCH V2 2/2] vhost_net: conditionally enable tx polling
We always poll tx for socket, this is sub optimal since:
- it will be only used when we exceed the sndbuf of the socket.
- since we use two independent polls for tx and vq, this will slightly
  increase the waitqueue traversing time and more important, vhost
  could not benefit from commit
  9e641bdcfa4ef4d6e2fbaa59c1be0ad5d1551fd5 ("net-tun: restructure
  tun_do_read for better sleep/wakeup efficiency") even if we've
  stopped rx polling during handle_rx since tx poll were still left in
  the waitqueue.
Fix this by conditionally enable tx polling only when -EAGAIN were
met.
Test shows about 8% improvement on guest rx pps.
Before: ~1350000
After:  ~1460000
Signed-off-by: Jason Wang <jasowang at redhat.com>
---
 drivers/vhost/net.c | 3 +++
 1 file changed, 3 insertions(+)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index e91603b..5a05fa0 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -378,6 +378,7 @@ static void handle_tx(struct vhost_net *net)
 		goto out;
 
 	vhost_disable_notify(&net->dev, vq);
+	vhost_net_disable_vq(net, vq);
 
 	hdr_size = nvq->vhost_hlen;
 	zcopy = nvq->ubufs;
@@ -459,6 +460,8 @@ static void handle_tx(struct vhost_net *net)
 					% UIO_MAXIOV;
 			}
 			vhost_discard_vq_desc(vq, 1);
+			if (err == -EAGAIN)
+				vhost_net_enable_vq(net, vq);
 			break;
 		}
 		if (err != len)
-- 
1.8.3.1
Michael S. Tsirkin
2016-May-30  15:47 UTC
[PATCH V2 1/2] vhost_net: stop polling socket during rx processing
On Mon, May 30, 2016 at 02:47:53AM -0400, Jason Wang wrote:> We don't stop rx polling socket during rx processing, this will lead > unnecessary wakeups from under layer net devices (E.g > sock_def_readable() form tun). Rx will be slowed down in this > way. This patch avoids this by stop polling socket during rx > processing. A small drawback is that this introduces some overheads in > light load case because of the extra start/stop polling, but single > netperf TCP_RR does not notice any change. In a super heavy load case, > e.g using pktgen to inject packet to guest, we get about ~8.8% > improvement on pps: > > before: ~1240000 pkt/s > after: ~1350000 pkt/s > > Signed-off-by: Jason Wang <jasowang at redhat.com> > --- > drivers/vhost/net.c | 56 +++++++++++++++++++++++++++-------------------------- > 1 file changed, 29 insertions(+), 27 deletions(-) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index 10ff494..e91603b 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -301,6 +301,32 @@ static bool vhost_can_busy_poll(struct vhost_dev *dev, > !vhost_has_work(dev); > } > > +static void vhost_net_disable_vq(struct vhost_net *n, > + struct vhost_virtqueue *vq) > +{ > + struct vhost_net_virtqueue *nvq > + container_of(vq, struct vhost_net_virtqueue, vq); > + struct vhost_poll *poll = n->poll + (nvq - n->vqs); > + if (!vq->private_data) > + return; > + vhost_poll_stop(poll); > +} > + > +static int vhost_net_enable_vq(struct vhost_net *n, > + struct vhost_virtqueue *vq) > +{ > + struct vhost_net_virtqueue *nvq > + container_of(vq, struct vhost_net_virtqueue, vq); > + struct vhost_poll *poll = n->poll + (nvq - n->vqs); > + struct socket *sock; > + > + sock = vq->private_data; > + if (!sock) > + return 0; > + > + return vhost_poll_start(poll, sock->file); > +} > + > static int vhost_net_tx_get_vq_desc(struct vhost_net *net, > struct vhost_virtqueue *vq, > struct iovec iov[], unsigned int iov_size,BTW we might want to rename these functions, name no longer reflects function ...> @@ -627,6 +653,7 @@ static void handle_rx(struct vhost_net *net) > if (!sock) > goto out; > vhost_disable_notify(&net->dev, vq); > + vhost_net_disable_vq(net, vq); > > vhost_hlen = nvq->vhost_hlen; > sock_hlen = nvq->sock_hlen; > @@ -715,9 +742,10 @@ static void handle_rx(struct vhost_net *net) > total_len += vhost_len; > if (unlikely(total_len >= VHOST_NET_WEIGHT)) { > vhost_poll_queue(&vq->poll); > - break; > + goto out; > } > } > + vhost_net_enable_vq(net, vq);OK so if sock is readable but RX VQ is empty, this will immediately schedule another round of handle_rx and so ad infinitum, Looks like a bug.> out: > mutex_unlock(&vq->mutex); > } > @@ -796,32 +824,6 @@ static int vhost_net_open(struct inode *inode, struct file *f) > return 0; > } > > -static void vhost_net_disable_vq(struct vhost_net *n, > - struct vhost_virtqueue *vq) > -{ > - struct vhost_net_virtqueue *nvq > - container_of(vq, struct vhost_net_virtqueue, vq); > - struct vhost_poll *poll = n->poll + (nvq - n->vqs); > - if (!vq->private_data) > - return; > - vhost_poll_stop(poll); > -} > - > -static int vhost_net_enable_vq(struct vhost_net *n, > - struct vhost_virtqueue *vq) > -{ > - struct vhost_net_virtqueue *nvq > - container_of(vq, struct vhost_net_virtqueue, vq); > - struct vhost_poll *poll = n->poll + (nvq - n->vqs); > - struct socket *sock; > - > - sock = vq->private_data; > - if (!sock) > - return 0; > - > - return vhost_poll_start(poll, sock->file); > -} > - > static struct socket *vhost_net_stop_vq(struct vhost_net *n, > struct vhost_virtqueue *vq) > { > -- > 1.8.3.1
Michael S. Tsirkin
2016-May-30  15:55 UTC
[PATCH V2 2/2] vhost_net: conditionally enable tx polling
On Mon, May 30, 2016 at 02:47:54AM -0400, Jason Wang wrote:> We always poll tx for socket, this is sub optimal since: > > - it will be only used when we exceed the sndbuf of the socket. > - since we use two independent polls for tx and vq, this will slightly > increase the waitqueue traversing time and more important, vhost > could not benefit from commit > 9e641bdcfa4ef4d6e2fbaa59c1be0ad5d1551fd5 ("net-tun: restructure > tun_do_read for better sleep/wakeup efficiency") even if we've > stopped rx polling during handle_rx since tx poll were still left in > the waitqueue.Why is this an issue? sock_def_write_space only wakes up when queue is half empty, not on each packet. if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) I suspect the issue is with your previous patch, it now pokes at the spinlock on data path where it used not to. Is that right?> > Fix this by conditionally enable tx polling only when -EAGAIN were > met. > > Test shows about 8% improvement on guest rx pps. > > Before: ~1350000 > After: ~1460000 > > Signed-off-by: Jason Wang <jasowang at redhat.com> > --- > drivers/vhost/net.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index e91603b..5a05fa0 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -378,6 +378,7 @@ static void handle_tx(struct vhost_net *net) > goto out; > > vhost_disable_notify(&net->dev, vq); > + vhost_net_disable_vq(net, vq); > > hdr_size = nvq->vhost_hlen; > zcopy = nvq->ubufs; > @@ -459,6 +460,8 @@ static void handle_tx(struct vhost_net *net) > % UIO_MAXIOV; > } > vhost_discard_vq_desc(vq, 1); > + if (err == -EAGAIN) > + vhost_net_enable_vq(net, vq); > break; > } > if (err != len) > -- > 1.8.3.1
Possibly Parallel Threads
- [PATCH V2 0/2] vhost_net polling optimization
- [PATCH V2 1/2] vhost_net: stop polling socket during rx processing
- [PATCH V2 1/2] vhost_net: stop polling socket during rx processing
- [PATCH V3 0/2] vhost_net polling optimization
- [PATCH V3 0/2] vhost_net polling optimization