Tonghao Zhang
2018-Jun-20 13:28 UTC
[PATCH] net: vhost: improve performance when enable busyloop
This patch improves the guest receive performance from
host. On the handle_tx side, we poll the sock receive
queue at the same time. handle_rx do that in the same way.
we set the poll-us=100 us and use the iperf3 to test
its throughput. The iperf3 command is shown as below.
iperf3 -s -D
iperf3 -c 192.168.1.100 -i 1 -P 10 -t 10 -M 1400 --bandwidth 100000M
* With the patch: 21.1 Gbits/sec
* Without the patch: 12.7 Gbits/sec
Signed-off-by: Tonghao Zhang <zhangtonghao at didichuxing.com>
---
drivers/vhost/net.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index e7cf7d2..9364ede 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -429,22 +429,43 @@ static int vhost_net_enable_vq(struct vhost_net *n,
return vhost_poll_start(poll, sock->file);
}
+static int sk_has_rx_data(struct sock *sk);
+
static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
struct vhost_virtqueue *vq,
struct iovec iov[], unsigned int iov_size,
unsigned int *out_num, unsigned int *in_num)
{
unsigned long uninitialized_var(endtime);
+ struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX];
+ struct vhost_virtqueue *rvq = &nvq->vq;
+ struct socket *sock = rvq->private_data;
+
int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL);
if (r == vq->num && vq->busyloop_timeout) {
+ mutex_lock_nested(&rvq->mutex, 1);
+
+ vhost_disable_notify(&net->dev, rvq);
+
preempt_disable();
endtime = busy_clock() + vq->busyloop_timeout;
while (vhost_can_busy_poll(vq->dev, endtime) &&
+ !(sock && sk_has_rx_data(sock->sk)) &&
vhost_vq_avail_empty(vq->dev, vq))
cpu_relax();
preempt_enable();
+
+ if (sock && sk_has_rx_data(sock->sk))
+ vhost_poll_queue(&rvq->poll);
+ else if (unlikely(vhost_enable_notify(&net->dev, rvq))) {
+ vhost_disable_notify(&net->dev, rvq);
+ vhost_poll_queue(&rvq->poll);
+ }
+
+ mutex_unlock(&rvq->mutex);
+
r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL);
}
--
1.8.3.1
Jason Wang
2018-Jun-21 05:59 UTC
[PATCH] net: vhost: improve performance when enable busyloop
On 2018?06?20? 21:28, Tonghao Zhang wrote:> This patch improves the guest receive performance from > host. On the handle_tx side, we poll the sock receive > queue at the same time. handle_rx do that in the same way. > > we set the poll-us=100 us and use the iperf3 to test > its throughput. The iperf3 command is shown as below. > > iperf3 -s -D > iperf3 -c 192.168.1.100 -i 1 -P 10 -t 10 -M 1400 --bandwidth 100000M > > * With the patch: 21.1 Gbits/sec > * Without the patch: 12.7 Gbits/secThanks a lot for the patch. But looks like it needs some work to avoid e.g deadlock. E.g in vhost_process_iotlb_msg() we call vhost_dev_lock_vqs() which did: ??? for (i = 0; i < d->nvqs; ++i) ??? ??? mutex_lock_nested(&d->vqs[i]->mutex, i); I believe we need to change the code to lock the vq one by one like the attached (only compile test).> Signed-off-by: Tonghao Zhang <zhangtonghao at didichuxing.com> > --- > drivers/vhost/net.c | 21 +++++++++++++++++++++ > 1 file changed, 21 insertions(+) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index e7cf7d2..9364ede 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -429,22 +429,43 @@ static int vhost_net_enable_vq(struct vhost_net *n, > return vhost_poll_start(poll, sock->file); > } > > +static int sk_has_rx_data(struct sock *sk); > +How about move sk_has_rx_data() here.> static int vhost_net_tx_get_vq_desc(struct vhost_net *net, > struct vhost_virtqueue *vq, > struct iovec iov[], unsigned int iov_size, > unsigned int *out_num, unsigned int *in_num) > { > unsigned long uninitialized_var(endtime); > + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX]; > + struct vhost_virtqueue *rvq = &nvq->vq; > + struct socket *sock = rvq->private_data; > + > int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), > out_num, in_num, NULL, NULL); > > if (r == vq->num && vq->busyloop_timeout) { > + mutex_lock_nested(&rvq->mutex, 1); > + > + vhost_disable_notify(&net->dev, rvq); > + > preempt_disable(); > endtime = busy_clock() + vq->busyloop_timeout; > while (vhost_can_busy_poll(vq->dev, endtime) && > + !(sock && sk_has_rx_data(sock->sk)) && > vhost_vq_avail_empty(vq->dev, vq)) > cpu_relax(); > preempt_enable(); > + > + if (sock && sk_has_rx_data(sock->sk)) > + vhost_poll_queue(&rvq->poll); > + else if (unlikely(vhost_enable_notify(&net->dev, rvq))) { > + vhost_disable_notify(&net->dev, rvq); > + vhost_poll_queue(&rvq->poll); > + } > + > + mutex_unlock(&rvq->mutex);Some kinds of code duplication, can we try to unify them? Btw, net-next is closed, so you need resubmit after it was open and use a "net-next" as the prefix of the patch. Thanks> + > r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), > out_num, in_num, NULL, NULL); > }-------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-vhost-lock-vqs-one-by-one.patch Type: text/x-patch Size: 2179 bytes Desc: not available URL: <http://lists.linuxfoundation.org/pipermail/virtualization/attachments/20180621/ddd79f2a/attachment-0001.bin>
Apparently Analagous Threads
- [PATCH net-next v2] net: vhost: improve performance when enable busyloop
- [PATCH net-next v2] net: vhost: improve performance when enable busyloop
- [PATCH net-next v3 0/4] net: vhost: improve performance when enable busyloop
- [PATCH net-next v4 0/4] net: vhost: improve performance when enable busyloop
- [PATCH net-next v5 0/4] net: vhost: improve performance when enable busyloop