Hi: This series tries to optimize vhost_net polling at two points: - Stop rx polling for reduicng the unnecessary wakeups during handle_rx(). - Conditonally enable tx polling for reducing the unnecessary traversing and spinlock touching. Test shows about 17% improvement on rx pps. Please review Changes from V2: - Don't enable rx vq if we meet an err or rx vq is empty Changes from V1: - use vhost_net_disable_vq()/vhost_net_enable_vq() instead of open coding. - Add a new patch for conditionally enable tx polling. Jason Wang (2): vhost_net: stop polling socket during rx processing vhost_net: conditionally enable tx polling drivers/vhost/net.c | 67 ++++++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 31 deletions(-) -- 1.8.3.1
Jason Wang
2016-Jun-01 05:56 UTC
[PATCH V3 1/2] vhost_net: stop polling socket during rx processing
We don't stop rx polling socket during rx processing, this will lead unnecessary wakeups from under layer net devices (E.g sock_def_readable() form tun). Rx will be slowed down in this way. This patch avoids this by stop polling socket during rx processing. A small drawback is that this introduces some overheads in light load case because of the extra start/stop polling, but single netperf TCP_RR does not notice any change. In a super heavy load case, e.g using pktgen to inject packet to guest, we get about ~8.8% improvement on pps: before: ~1240000 pkt/s after: ~1350000 pkt/s Signed-off-by: Jason Wang <jasowang at redhat.com> --- drivers/vhost/net.c | 64 +++++++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index f744eeb..1d3e45f 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -301,6 +301,32 @@ static bool vhost_can_busy_poll(struct vhost_dev *dev, !vhost_has_work(dev); } +static void vhost_net_disable_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + struct vhost_net_virtqueue *nvq + container_of(vq, struct vhost_net_virtqueue, vq); + struct vhost_poll *poll = n->poll + (nvq - n->vqs); + if (!vq->private_data) + return; + vhost_poll_stop(poll); +} + +static int vhost_net_enable_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + struct vhost_net_virtqueue *nvq + container_of(vq, struct vhost_net_virtqueue, vq); + struct vhost_poll *poll = n->poll + (nvq - n->vqs); + struct socket *sock; + + sock = vq->private_data; + if (!sock) + return 0; + + return vhost_poll_start(poll, sock->file); +} + static int vhost_net_tx_get_vq_desc(struct vhost_net *net, struct vhost_virtqueue *vq, struct iovec iov[], unsigned int iov_size, @@ -613,6 +639,7 @@ static void handle_rx(struct vhost_net *net) if (!sock) goto out; vhost_disable_notify(&net->dev, vq); + vhost_net_disable_vq(net, vq); vhost_hlen = nvq->vhost_hlen; sock_hlen = nvq->sock_hlen; @@ -629,7 +656,7 @@ static void handle_rx(struct vhost_net *net) likely(mergeable) ? UIO_MAXIOV : 1); /* On error, stop handling until the next kick. */ if (unlikely(headcount < 0)) - break; + goto out; /* On overrun, truncate and discard */ if (unlikely(headcount > UIO_MAXIOV)) { iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); @@ -648,7 +675,7 @@ static void handle_rx(struct vhost_net *net) } /* Nothing new? Wait for eventfd to tell us * they refilled. */ - break; + goto out; } /* We don't need to be notified again. */ iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len); @@ -676,7 +703,7 @@ static void handle_rx(struct vhost_net *net) &fixup) != sizeof(hdr)) { vq_err(vq, "Unable to write vnet_hdr " "at addr %p\n", vq->iov->iov_base); - break; + goto out; } } else { /* Header came from socket; we'll need to patch @@ -692,7 +719,7 @@ static void handle_rx(struct vhost_net *net) &fixup) != sizeof num_buffers) { vq_err(vq, "Failed num_buffers write"); vhost_discard_vq_desc(vq, headcount); - break; + goto out; } vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, headcount); @@ -701,9 +728,10 @@ static void handle_rx(struct vhost_net *net) total_len += vhost_len; if (unlikely(total_len >= VHOST_NET_WEIGHT)) { vhost_poll_queue(&vq->poll); - break; + goto out; } } + vhost_net_enable_vq(net, vq); out: mutex_unlock(&vq->mutex); } @@ -782,32 +810,6 @@ static int vhost_net_open(struct inode *inode, struct file *f) return 0; } -static void vhost_net_disable_vq(struct vhost_net *n, - struct vhost_virtqueue *vq) -{ - struct vhost_net_virtqueue *nvq - container_of(vq, struct vhost_net_virtqueue, vq); - struct vhost_poll *poll = n->poll + (nvq - n->vqs); - if (!vq->private_data) - return; - vhost_poll_stop(poll); -} - -static int vhost_net_enable_vq(struct vhost_net *n, - struct vhost_virtqueue *vq) -{ - struct vhost_net_virtqueue *nvq - container_of(vq, struct vhost_net_virtqueue, vq); - struct vhost_poll *poll = n->poll + (nvq - n->vqs); - struct socket *sock; - - sock = vq->private_data; - if (!sock) - return 0; - - return vhost_poll_start(poll, sock->file); -} - static struct socket *vhost_net_stop_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { -- 1.8.3.1
Jason Wang
2016-Jun-01 05:56 UTC
[PATCH V3 2/2] vhost_net: conditionally enable tx polling
We always poll tx for socket, this is sub optimal since: - it will be only used when we exceed the sndbuf of the socket. - since we use two independent polls for tx and vq, this will slightly increase the waitqueue traversing time and more important, vhost could not benefit from commit 9e641bdcfa4ef4d6e2fbaa59c1be0ad5d1551fd5 ("net-tun: restructure tun_do_read for better sleep/wakeup efficiency") even if we've stopped rx polling during handle_rx since tx poll were still left in the waitqueue. Fix this by conditionally enable tx polling only when -EAGAIN were met. Test shows about 8% improvement on guest rx pps. Before: ~1350000 After: ~1460000 Signed-off-by: Jason Wang <jasowang at redhat.com> --- drivers/vhost/net.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 1d3e45f..e75ffcc 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -378,6 +378,7 @@ static void handle_tx(struct vhost_net *net) goto out; vhost_disable_notify(&net->dev, vq); + vhost_net_disable_vq(net, vq); hdr_size = nvq->vhost_hlen; zcopy = nvq->ubufs; @@ -459,6 +460,8 @@ static void handle_tx(struct vhost_net *net) % UIO_MAXIOV; } vhost_discard_vq_desc(vq, 1); + if (err == -EAGAIN) + vhost_net_enable_vq(net, vq); break; } if (err != len) -- 1.8.3.1
From: Jason Wang <jasowang at redhat.com> Date: Wed, 1 Jun 2016 01:56:32 -0400> This series tries to optimize vhost_net polling at two points: > > - Stop rx polling for reduicng the unnecessary wakeups during > handle_rx(). > - Conditonally enable tx polling for reducing the unnecessary > traversing and spinlock touching. > > Test shows about 17% improvement on rx pps. > > Please review > > Changes from V2: > - Don't enable rx vq if we meet an err or rx vq is empty > Changes from V1: > - use vhost_net_disable_vq()/vhost_net_enable_vq() instead of open > coding. > - Add a new patch for conditionally enable tx polling.Michael, please review this patch series. Thanks.
On Thu, Jun 02, 2016 at 12:08:06PM -0700, David Miller wrote:> From: Jason Wang <jasowang at redhat.com> > Date: Wed, 1 Jun 2016 01:56:32 -0400 > > > This series tries to optimize vhost_net polling at two points: > > > > - Stop rx polling for reduicng the unnecessary wakeups during > > handle_rx(). > > - Conditonally enable tx polling for reducing the unnecessary > > traversing and spinlock touching. > > > > Test shows about 17% improvement on rx pps. > > > > Please review > > > > Changes from V2: > > - Don't enable rx vq if we meet an err or rx vq is empty > > Changes from V1: > > - use vhost_net_disable_vq()/vhost_net_enable_vq() instead of open > > coding. > > - Add a new patch for conditionally enable tx polling. > > Michael, please review this patch series. > > Thanks.thanks for the reminder, I plan to review next week.
Michael S. Tsirkin
2016-Jun-07 12:23 UTC
[PATCH V3 1/2] vhost_net: stop polling socket during rx processing
On Wed, Jun 01, 2016 at 01:56:33AM -0400, Jason Wang wrote:> We don't stop rx polling socket during rx processing, this will lead > unnecessary wakeups from under layer net devices (E.g > sock_def_readable() form tun). Rx will be slowed down in this > way. This patch avoids this by stop polling socket during rx > processing. A small drawback is that this introduces some overheads in > light load case because of the extra start/stop polling, but single > netperf TCP_RR does not notice any change. In a super heavy load case, > e.g using pktgen to inject packet to guest, we get about ~8.8% > improvement on pps: > > before: ~1240000 pkt/s > after: ~1350000 pkt/s > > Signed-off-by: Jason Wang <jasowang at redhat.com>I guess this works though I suspect it's even faster to maintain some state in the vq structure so we don't need to play with waitq all the time. For now Acked-by: Michael S. Tsirkin <mst at redhat.com>> --- > drivers/vhost/net.c | 64 +++++++++++++++++++++++++++-------------------------- > 1 file changed, 33 insertions(+), 31 deletions(-) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index f744eeb..1d3e45f 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -301,6 +301,32 @@ static bool vhost_can_busy_poll(struct vhost_dev *dev, > !vhost_has_work(dev); > } > > +static void vhost_net_disable_vq(struct vhost_net *n, > + struct vhost_virtqueue *vq) > +{ > + struct vhost_net_virtqueue *nvq > + container_of(vq, struct vhost_net_virtqueue, vq); > + struct vhost_poll *poll = n->poll + (nvq - n->vqs); > + if (!vq->private_data) > + return; > + vhost_poll_stop(poll); > +} > + > +static int vhost_net_enable_vq(struct vhost_net *n, > + struct vhost_virtqueue *vq) > +{ > + struct vhost_net_virtqueue *nvq > + container_of(vq, struct vhost_net_virtqueue, vq); > + struct vhost_poll *poll = n->poll + (nvq - n->vqs); > + struct socket *sock; > + > + sock = vq->private_data; > + if (!sock) > + return 0; > + > + return vhost_poll_start(poll, sock->file); > +} > + > static int vhost_net_tx_get_vq_desc(struct vhost_net *net, > struct vhost_virtqueue *vq, > struct iovec iov[], unsigned int iov_size, > @@ -613,6 +639,7 @@ static void handle_rx(struct vhost_net *net) > if (!sock) > goto out; > vhost_disable_notify(&net->dev, vq); > + vhost_net_disable_vq(net, vq); > > vhost_hlen = nvq->vhost_hlen; > sock_hlen = nvq->sock_hlen; > @@ -629,7 +656,7 @@ static void handle_rx(struct vhost_net *net) > likely(mergeable) ? UIO_MAXIOV : 1); > /* On error, stop handling until the next kick. */ > if (unlikely(headcount < 0)) > - break; > + goto out; > /* On overrun, truncate and discard */ > if (unlikely(headcount > UIO_MAXIOV)) { > iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); > @@ -648,7 +675,7 @@ static void handle_rx(struct vhost_net *net) > } > /* Nothing new? Wait for eventfd to tell us > * they refilled. */ > - break; > + goto out; > } > /* We don't need to be notified again. */ > iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len); > @@ -676,7 +703,7 @@ static void handle_rx(struct vhost_net *net) > &fixup) != sizeof(hdr)) { > vq_err(vq, "Unable to write vnet_hdr " > "at addr %p\n", vq->iov->iov_base); > - break; > + goto out; > } > } else { > /* Header came from socket; we'll need to patch > @@ -692,7 +719,7 @@ static void handle_rx(struct vhost_net *net) > &fixup) != sizeof num_buffers) { > vq_err(vq, "Failed num_buffers write"); > vhost_discard_vq_desc(vq, headcount); > - break; > + goto out; > } > vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, > headcount); > @@ -701,9 +728,10 @@ static void handle_rx(struct vhost_net *net) > total_len += vhost_len; > if (unlikely(total_len >= VHOST_NET_WEIGHT)) { > vhost_poll_queue(&vq->poll); > - break; > + goto out; > } > } > + vhost_net_enable_vq(net, vq); > out: > mutex_unlock(&vq->mutex); > } > @@ -782,32 +810,6 @@ static int vhost_net_open(struct inode *inode, struct file *f) > return 0; > } > > -static void vhost_net_disable_vq(struct vhost_net *n, > - struct vhost_virtqueue *vq) > -{ > - struct vhost_net_virtqueue *nvq > - container_of(vq, struct vhost_net_virtqueue, vq); > - struct vhost_poll *poll = n->poll + (nvq - n->vqs); > - if (!vq->private_data) > - return; > - vhost_poll_stop(poll); > -} > - > -static int vhost_net_enable_vq(struct vhost_net *n, > - struct vhost_virtqueue *vq) > -{ > - struct vhost_net_virtqueue *nvq > - container_of(vq, struct vhost_net_virtqueue, vq); > - struct vhost_poll *poll = n->poll + (nvq - n->vqs); > - struct socket *sock; > - > - sock = vq->private_data; > - if (!sock) > - return 0; > - > - return vhost_poll_start(poll, sock->file); > -} > - > static struct socket *vhost_net_stop_vq(struct vhost_net *n, > struct vhost_virtqueue *vq) > { > -- > 1.8.3.1
Michael S. Tsirkin
2016-Jun-07 12:26 UTC
[PATCH V3 2/2] vhost_net: conditionally enable tx polling
On Wed, Jun 01, 2016 at 01:56:34AM -0400, Jason Wang wrote:> We always poll tx for socket, this is sub optimal since: > > - it will be only used when we exceed the sndbuf of the socket. > - since we use two independent polls for tx and vq, this will slightly > increase the waitqueue traversing time and more important, vhost > could not benefit from commit > 9e641bdcfa4ef4d6e2fbaa59c1be0ad5d1551fd5 ("net-tun: restructure > tun_do_read for better sleep/wakeup efficiency") even if we've > stopped rx polling during handle_rx since tx poll were still left in > the waitqueue. > > Fix this by conditionally enable tx polling only when -EAGAIN were > met. > > Test shows about 8% improvement on guest rx pps. > > Before: ~1350000 > After: ~1460000 > > Signed-off-by: Jason Wang <jasowang at redhat.com> > --- > drivers/vhost/net.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index 1d3e45f..e75ffcc 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -378,6 +378,7 @@ static void handle_tx(struct vhost_net *net) > goto out; > > vhost_disable_notify(&net->dev, vq); > + vhost_net_disable_vq(net, vq); > > hdr_size = nvq->vhost_hlen; > zcopy = nvq->ubufs; > @@ -459,6 +460,8 @@ static void handle_tx(struct vhost_net *net) > % UIO_MAXIOV; > } > vhost_discard_vq_desc(vq, 1); > + if (err == -EAGAIN) > + vhost_net_enable_vq(net, vq); > break; > } > if (err != len)This seems rather risky. What if TX failed for some other reason? Polling won't ever be re-enabled ...> -- > 1.8.3.1
David Miller
2016-Jun-07 21:46 UTC
[PATCH V3 1/2] vhost_net: stop polling socket during rx processing
From: Jason Wang <jasowang at redhat.com> Date: Wed, 1 Jun 2016 01:56:33 -0400> We don't stop rx polling socket during rx processing, this will lead > unnecessary wakeups from under layer net devices (E.g > sock_def_readable() form tun). Rx will be slowed down in this > way. This patch avoids this by stop polling socket during rx > processing. A small drawback is that this introduces some overheads in > light load case because of the extra start/stop polling, but single > netperf TCP_RR does not notice any change. In a super heavy load case, > e.g using pktgen to inject packet to guest, we get about ~8.8% > improvement on pps: > > before: ~1240000 pkt/s > after: ~1350000 pkt/s > > Signed-off-by: Jason Wang <jasowang at redhat.com>Applied.
Possibly Parallel Threads
- [PATCH V3 2/2] vhost_net: conditionally enable tx polling
- [PATCH V3 2/2] vhost_net: conditionally enable tx polling
- [PATCH V2 2/2] vhost_net: conditionally enable tx polling
- [PATCH V2 2/2] vhost_net: conditionally enable tx polling
- [PATCH V3 0/2] vhost_net polling optimization