thr3ads.net - Linux Virtualization - [PATCH net-next] vhost_net: stop rx net polling when possible [Aug 2014]

If this information is useful, please help other people find it:
Share via:

Jason Wang

2014-Aug-15 03:40 UTC

[PATCH net-next] vhost_net: stop rx net polling when possible

After rx vq was enabled, we never stop polling its socket. This is sub optimal
when may lead unnecessary wake-ups after the rx net work has already been
queued. This could be optimized by stopping polling the rx net sock when
processing both rx and tx and restart it afterward. This could save unnecessary
wake-ups and even unnecessary spin locks acquiring with the help of commit
9e641bdcfa4ef4d6e2fbaa59c1be0ad5d1551fd5 "net-tun: restructure tun_do_read
for
better sleep/wakeup efficiency".

Test shows significant CPU% savings during almost all the cases:

Guest rx stream:
size(B)/sessions/throughput/cpu/normalized thru/
64/1/+0.7773%   -8.6224%     +10.2866%
64/2/+0.6335%   -13.9109%    +16.8946%
64/4/-0.8182%   -14.8336%    +16.4565%
64/8/+0.4830%   -13.7675%    +16.5256%
256/1/-7.0963%  -12.6880%    +6.4043%
256/2/-1.3982%  -11.5424%    +11.4678%
256/4/-0.0350%  -11.8323%    +13.3806%
256/8/-1.5830%  -12.7693%    +12.8238%
1024/1/-7.4895% -19.1449%       +14.4152%
1024/2/-7.4575% -19.4018%       +14.8195%
1024/4/-0.3881% -9.1183%        +9.6061%
1024/8/+0.4713% -11.0155%       +12.9087%
4096/1/+0.8786%  -8.4050%        +10.1355%
4096/2/+0.0098%  -15.3094%       +18.0885%
4096/4/+0.0445%  -10.8247%       +12.1886%
4096/8/-2.1317%  -12.5111%       +11.8637%
16384/1/-0.0008% -6.1891%        +6.5966%
16384/2/-0.0117% -16.2716%       +19.4198%
16384/4/+0.0001% -5.9197%        +6.2923%
16384/8/+0.0173% -7.6681%        +8.3236%
65535/1/+0.0011% -10.3594%       +11.5578%
65535/2/-0.4108%  -14.4304%       +16.3838%
65535/4/+0.0011%  -10.3594%       +11.5578%
65535/8/-0.4108%  -14.4304%       +16.3838%

Guest tx stream:
size(B)/sessions/throughput/cpu/normalized thru/
64/1/-0.6228%     -2.1936%     +1.6060%
64/2/+0.8646%     -3.5063%     +4.5297%
64/4/+0.8733%     -3.2495%     +4.2613%
64/8/+1.4290%     -3.5593%     +5.1724%
256/1/+7.2098%    -3.1122%     +10.6535%
256/2/-10.1408%   -6.8230%     -3.5607%
256/4/-11.3531%   -6.7085%     -4.9785%
256/8/-10.2723%   -6.5628%     -3.9701%
1024/1/-18.9329%  -13.6162%    -6.1547%
1024/2/-0.3728%   -1.3181%     +0.9580%
1024/4/+0.0125%   -3.6338%     +3.7838%
1024/8/-0.0030%   -2.7282%     +2.8017%
4096/1/+16.9367%  -1.9435%     +19.2543%
4096/2/+0.0121%   -6.1682%     +6.5866%
4096/4/+0.0019%   -3.8510%     +4.0072%
4096/8/-0.0222%   -4.1368%     +4.2922%
16384/1/-0.0026%  -8.6892%     +9.5132%
16384/2/-0.0012%  -10.1676%    +11.3171%
16384/4/+0.0196%  -1.2551%     +1.2908%
16384/8/+0.1303%  -3.2634%     +3.5082%
65535/1/+0.0019%  -3.4694%     +3.5961%
65535/2/-0.0003%  -0.7635%     +0.7690%
65535/4/-0.0219%  -2.7875%     +2.8448%
65535/8/+0.1137%  -2.7922%     +2.9894%

TCP_RR:
size(B)/sessions/throughput/cpu/normalized thru/
256/1/+1.9004%    -4.7985%     +7.0366%
256/25/-4.7366%   -11.0809%    +7.1349%
256/50/+3.9808%   -5.2037%     +9.6887%
4096/1/+2.1619%   -0.7303%     +2.9134%
4096/25/-13.1836% -14.7298%    +1.8134%
4096/50/-11.1990% -15.4763%    +5.0605%

Signed-off-by: Jason Wang <jasowang at redhat.com>
---
 drivers/vhost/net.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 8dae2f7..d4a9742 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -334,6 +334,8 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf,
bool success)
 static void handle_tx(struct vhost_net *net)
 {
 	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
+	struct vhost_virtqueue *rx_vq = &net->vqs[VHOST_NET_VQ_RX].vq;
+	struct vhost_poll *rx_poll = &net->poll[VHOST_NET_VQ_RX];
 	struct vhost_virtqueue *vq = &nvq->vq;
 	unsigned out, in, s;
 	int head;
@@ -348,15 +350,18 @@ static void handle_tx(struct vhost_net *net)
 	size_t len, total_len = 0;
 	int err;
 	size_t hdr_size;
-	struct socket *sock;
+	struct socket *sock, *rxsock;
 	struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
-	bool zcopy, zcopy_used;
+	bool zcopy, zcopy_used, poll = false;
 
 	mutex_lock(&vq->mutex);
+	mutex_lock(&rx_vq->mutex);
 	sock = vq->private_data;
+	rxsock = rx_vq->private_data;
 	if (!sock)
 		goto out;
 
+	vhost_poll_stop(rx_poll);
 	vhost_disable_notify(&net->dev, vq);
 
 	hdr_size = nvq->vhost_hlen;
@@ -451,11 +456,17 @@ static void handle_tx(struct vhost_net *net)
 		total_len += len;
 		vhost_net_tx_packet(net);
 		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
-			vhost_poll_queue(&vq->poll);
+			poll = true;
 			break;
 		}
 	}
+
+	if (rxsock)
+		vhost_poll_start(rx_poll, rxsock->file);
+	if (poll)
+		vhost_poll_queue(&vq->poll);
 out:
+	mutex_unlock(&rx_vq->mutex);
 	mutex_unlock(&vq->mutex);
 }
 
@@ -554,6 +565,7 @@ err:
 static void handle_rx(struct vhost_net *net)
 {
 	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX];
+	struct vhost_poll *poll = &net->poll[VHOST_NET_VQ_RX];
 	struct vhost_virtqueue *vq = &nvq->vq;
 	unsigned uninitialized_var(in), log;
 	struct vhost_log *vq_log;
@@ -580,6 +592,8 @@ static void handle_rx(struct vhost_net *net)
 	sock = vq->private_data;
 	if (!sock)
 		goto out;
+
+	vhost_poll_stop(poll);
 	vhost_disable_notify(&net->dev, vq);
 
 	vhost_hlen = nvq->vhost_hlen;
@@ -660,10 +674,12 @@ static void handle_rx(struct vhost_net *net)
 			vhost_log_write(vq, vq_log, log, vhost_len);
 		total_len += vhost_len;
 		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
-			vhost_poll_queue(&vq->poll);
-			break;
+			vhost_poll_queue(poll);
+			goto out;
 		}
 	}
+
+	vhost_poll_start(poll, sock->file);
 out:
 	mutex_unlock(&vq->mutex);
 }
-- 
1.9.1

Michael S. Tsirkin

2014-Aug-17 10:20 UTC

head link

[PATCH net-next] vhost_net: stop rx net polling when possible

On Fri, Aug 15, 2014 at 11:40:08AM +0800, Jason Wang
wrote:> After rx vq was enabled, we never stop polling its socket. This is sub
optimal
> when may lead unnecessary wake-ups after the rx net work has already been
> queued. This could be optimized by stopping polling the rx net sock when
> processing both rx and tx and restart it afterward. This could save
unnecessary
> wake-ups and even unnecessary spin locks acquiring with the help of commit
> 9e641bdcfa4ef4d6e2fbaa59c1be0ad5d1551fd5 "net-tun: restructure
tun_do_read for
> better sleep/wakeup efficiency".
OK so the point is to avoid expensive wake_up_process calls?
It's a bit unfortunate that we are adding/removing things from wait
queue which certainly does take extra spin-locks.



> Test shows significant CPU% savings during almost all the cases:
> 
> Guest rx stream:
> size(B)/sessions/throughput/cpu/normalized thru/
> 64/1/+0.7773%   -8.6224%     +10.2866%
> 64/2/+0.6335%   -13.9109%    +16.8946%
> 64/4/-0.8182%   -14.8336%    +16.4565%
> 64/8/+0.4830%   -13.7675%    +16.5256%
> 256/1/-7.0963%  -12.6880%    +6.4043%
> 256/2/-1.3982%  -11.5424%    +11.4678%
> 256/4/-0.0350%  -11.8323%    +13.3806%
> 256/8/-1.5830%  -12.7693%    +12.8238%
> 1024/1/-7.4895% -19.1449%       +14.4152%
> 1024/2/-7.4575% -19.4018%       +14.8195%
> 1024/4/-0.3881% -9.1183%        +9.6061%
> 1024/8/+0.4713% -11.0155%       +12.9087%
> 4096/1/+0.8786%  -8.4050%        +10.1355%
> 4096/2/+0.0098%  -15.3094%       +18.0885%
> 4096/4/+0.0445%  -10.8247%       +12.1886%
> 4096/8/-2.1317%  -12.5111%       +11.8637%
> 16384/1/-0.0008% -6.1891%        +6.5966%
> 16384/2/-0.0117% -16.2716%       +19.4198%
> 16384/4/+0.0001% -5.9197%        +6.2923%
> 16384/8/+0.0173% -7.6681%        +8.3236%
> 65535/1/+0.0011% -10.3594%       +11.5578%
> 65535/2/-0.4108%  -14.4304%       +16.3838%
> 65535/4/+0.0011%  -10.3594%       +11.5578%
> 65535/8/-0.4108%  -14.4304%       +16.3838%
> 
> Guest tx stream:
> size(B)/sessions/throughput/cpu/normalized thru/
> 64/1/-0.6228%     -2.1936%     +1.6060%
> 64/2/+0.8646%     -3.5063%     +4.5297%
> 64/4/+0.8733%     -3.2495%     +4.2613%
> 64/8/+1.4290%     -3.5593%     +5.1724%
> 256/1/+7.2098%    -3.1122%     +10.6535%
> 256/2/-10.1408%   -6.8230%     -3.5607%
> 256/4/-11.3531%   -6.7085%     -4.9785%
> 256/8/-10.2723%   -6.5628%     -3.9701%
> 1024/1/-18.9329%  -13.6162%    -6.1547%
> 1024/2/-0.3728%   -1.3181%     +0.9580%
> 1024/4/+0.0125%   -3.6338%     +3.7838%
> 1024/8/-0.0030%   -2.7282%     +2.8017%
> 4096/1/+16.9367%  -1.9435%     +19.2543%
> 4096/2/+0.0121%   -6.1682%     +6.5866%
> 4096/4/+0.0019%   -3.8510%     +4.0072%
> 4096/8/-0.0222%   -4.1368%     +4.2922%
> 16384/1/-0.0026%  -8.6892%     +9.5132%
> 16384/2/-0.0012%  -10.1676%    +11.3171%
> 16384/4/+0.0196%  -1.2551%     +1.2908%
> 16384/8/+0.1303%  -3.2634%     +3.5082%
> 65535/1/+0.0019%  -3.4694%     +3.5961%
> 65535/2/-0.0003%  -0.7635%     +0.7690%
> 65535/4/-0.0219%  -2.7875%     +2.8448%
> 65535/8/+0.1137%  -2.7922%     +2.9894%
> 
> TCP_RR:
> size(B)/sessions/throughput/cpu/normalized thru/
> 256/1/+1.9004%    -4.7985%     +7.0366%
> 256/25/-4.7366%   -11.0809%    +7.1349%
> 256/50/+3.9808%   -5.2037%     +9.6887%
> 4096/1/+2.1619%   -0.7303%     +2.9134%
> 4096/25/-13.1836% -14.7298%    +1.8134%
> 4096/50/-11.1990% -15.4763%    +5.0605%
> 
> Signed-off-by: Jason Wang <jasowang at redhat.com>

Could you split RX/TX parts out please, and benchmark separately?

They are really independent.



> ---
>  drivers/vhost/net.c | 26 +++++++++++++++++++++-----
>  1 file changed, 21 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 8dae2f7..d4a9742 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -334,6 +334,8 @@ static void vhost_zerocopy_callback(struct ubuf_info
*ubuf, bool success)
>  static void handle_tx(struct vhost_net *net)
>  {
>  	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
> +	struct vhost_virtqueue *rx_vq = &net->vqs[VHOST_NET_VQ_RX].vq;
> +	struct vhost_poll *rx_poll = &net->poll[VHOST_NET_VQ_RX];
>  	struct vhost_virtqueue *vq = &nvq->vq;
>  	unsigned out, in, s;
>  	int head;
> @@ -348,15 +350,18 @@ static void handle_tx(struct vhost_net *net)
>  	size_t len, total_len = 0;
>  	int err;
>  	size_t hdr_size;
> -	struct socket *sock;
> +	struct socket *sock, *rxsock;
>  	struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
> -	bool zcopy, zcopy_used;
> +	bool zcopy, zcopy_used, poll = false;
>  
>  	mutex_lock(&vq->mutex);
> +	mutex_lock(&rx_vq->mutex);
>  	sock = vq->private_data;
> +	rxsock = rx_vq->private_data;
>  	if (!sock)
>  		goto out;
>  
> +	vhost_poll_stop(rx_poll);
>  	vhost_disable_notify(&net->dev, vq);
>  
>  	hdr_size = nvq->vhost_hlen;
> @@ -451,11 +456,17 @@ static void handle_tx(struct vhost_net *net)
>  		total_len += len;
>  		vhost_net_tx_packet(net);
>  		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
> -			vhost_poll_queue(&vq->poll);
> +			poll = true;
>  			break;
>  		}
>  	}
> +
> +	if (rxsock)
> +		vhost_poll_start(rx_poll, rxsock->file);
> +	if (poll)
> +		vhost_poll_queue(&vq->poll);
>  out:
> +	mutex_unlock(&rx_vq->mutex);
>  	mutex_unlock(&vq->mutex);
>  }
>  
> @@ -554,6 +565,7 @@ err:
>  static void handle_rx(struct vhost_net *net)
>  {
>  	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX];
> +	struct vhost_poll *poll = &net->poll[VHOST_NET_VQ_RX];
>  	struct vhost_virtqueue *vq = &nvq->vq;
>  	unsigned uninitialized_var(in), log;
>  	struct vhost_log *vq_log;
> @@ -580,6 +592,8 @@ static void handle_rx(struct vhost_net *net)
>  	sock = vq->private_data;
>  	if (!sock)
>  		goto out;
> +
> +	vhost_poll_stop(poll);
>  	vhost_disable_notify(&net->dev, vq);
>  
>  	vhost_hlen = nvq->vhost_hlen;
> @@ -660,10 +674,12 @@ static void handle_rx(struct vhost_net *net)
>  			vhost_log_write(vq, vq_log, log, vhost_len);
>  		total_len += vhost_len;
>  		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
> -			vhost_poll_queue(&vq->poll);
> -			break;
> +			vhost_poll_queue(poll);
> +			goto out;
>  		}
>  	}
> +
> +	vhost_poll_start(poll, sock->file);
>  out:
>  	mutex_unlock(&vq->mutex);
>  }
> -- 
> 1.9.1

Jason Wang

2014-Aug-18 03:11 UTC

head link

[PATCH net-next] vhost_net: stop rx net polling when possible

On 08/17/2014 06:20 PM, Michael S. Tsirkin wrote:> On Fri, Aug 15, 2014 at 11:40:08AM +0800, Jason Wang wrote:
>> After rx vq was enabled, we never stop polling its socket. This is sub
optimal
>> when may lead unnecessary wake-ups after the rx net work has already
been
>> queued. This could be optimized by stopping polling the rx net sock
when
>> processing both rx and tx and restart it afterward. This could save
unnecessary
>> wake-ups and even unnecessary spin locks acquiring with the help of
commit
>> 9e641bdcfa4ef4d6e2fbaa59c1be0ad5d1551fd5 "net-tun: restructure
tun_do_read for
>> better sleep/wakeup efficiency".
> OK so the point is to avoid expensive wake_up_process calls?
> It's a bit unfortunate that we are adding/removing things from wait
> queue which certainly does take extra spin-locks.
When nothing new were queued during vhost thread is running. This change
may add two more spin-locks which may not but optimal. But if several
packets were queued by tun during vhost thread is running, it may save
lots of unnecessary wake ups. So the patch helps the performance in the
heavy load case for sure. In light load case, it may hurt some
throughput but cpu and thru/cpu is still saved.
>
>
>
>> Test shows significant CPU% savings during almost all the cases:
>>
>> Guest rx stream:
>> size(B)/sessions/throughput/cpu/normalized thru/
>> 64/1/+0.7773%   -8.6224%     +10.2866%
>> 64/2/+0.6335%   -13.9109%    +16.8946%
>> 64/4/-0.8182%   -14.8336%    +16.4565%
>> 64/8/+0.4830%   -13.7675%    +16.5256%
>> 256/1/-7.0963%  -12.6880%    +6.4043%
>> 256/2/-1.3982%  -11.5424%    +11.4678%
>> 256/4/-0.0350%  -11.8323%    +13.3806%
>> 256/8/-1.5830%  -12.7693%    +12.8238%
>> 1024/1/-7.4895% -19.1449%       +14.4152%
>> 1024/2/-7.4575% -19.4018%       +14.8195%
>> 1024/4/-0.3881% -9.1183%        +9.6061%
>> 1024/8/+0.4713% -11.0155%       +12.9087%
>> 4096/1/+0.8786%  -8.4050%        +10.1355%
>> 4096/2/+0.0098%  -15.3094%       +18.0885%
>> 4096/4/+0.0445%  -10.8247%       +12.1886%
>> 4096/8/-2.1317%  -12.5111%       +11.8637%
>> 16384/1/-0.0008% -6.1891%        +6.5966%
>> 16384/2/-0.0117% -16.2716%       +19.4198%
>> 16384/4/+0.0001% -5.9197%        +6.2923%
>> 16384/8/+0.0173% -7.6681%        +8.3236%
>> 65535/1/+0.0011% -10.3594%       +11.5578%
>> 65535/2/-0.4108%  -14.4304%       +16.3838%
>> 65535/4/+0.0011%  -10.3594%       +11.5578%
>> 65535/8/-0.4108%  -14.4304%       +16.3838%
>>
>> Guest tx stream:
>> size(B)/sessions/throughput/cpu/normalized thru/
>> 64/1/-0.6228%     -2.1936%     +1.6060%
>> 64/2/+0.8646%     -3.5063%     +4.5297%
>> 64/4/+0.8733%     -3.2495%     +4.2613%
>> 64/8/+1.4290%     -3.5593%     +5.1724%
>> 256/1/+7.2098%    -3.1122%     +10.6535%
>> 256/2/-10.1408%   -6.8230%     -3.5607%
>> 256/4/-11.3531%   -6.7085%     -4.9785%
>> 256/8/-10.2723%   -6.5628%     -3.9701%
>> 1024/1/-18.9329%  -13.6162%    -6.1547%
>> 1024/2/-0.3728%   -1.3181%     +0.9580%
>> 1024/4/+0.0125%   -3.6338%     +3.7838%
>> 1024/8/-0.0030%   -2.7282%     +2.8017%
>> 4096/1/+16.9367%  -1.9435%     +19.2543%
>> 4096/2/+0.0121%   -6.1682%     +6.5866%
>> 4096/4/+0.0019%   -3.8510%     +4.0072%
>> 4096/8/-0.0222%   -4.1368%     +4.2922%
>> 16384/1/-0.0026%  -8.6892%     +9.5132%
>> 16384/2/-0.0012%  -10.1676%    +11.3171%
>> 16384/4/+0.0196%  -1.2551%     +1.2908%
>> 16384/8/+0.1303%  -3.2634%     +3.5082%
>> 65535/1/+0.0019%  -3.4694%     +3.5961%
>> 65535/2/-0.0003%  -0.7635%     +0.7690%
>> 65535/4/-0.0219%  -2.7875%     +2.8448%
>> 65535/8/+0.1137%  -2.7922%     +2.9894%
>>
>> TCP_RR:
>> size(B)/sessions/throughput/cpu/normalized thru/
>> 256/1/+1.9004%    -4.7985%     +7.0366%
>> 256/25/-4.7366%   -11.0809%    +7.1349%
>> 256/50/+3.9808%   -5.2037%     +9.6887%
>> 4096/1/+2.1619%   -0.7303%     +2.9134%
>> 4096/25/-13.1836% -14.7298%    +1.8134%
>> 4096/50/-11.1990% -15.4763%    +5.0605%
>>
>> Signed-off-by: Jason Wang <jasowang at redhat.com>
>
> Could you split RX/TX parts out please, and benchmark separately?
>
> They are really independent.
Ok.

Possibly Parallel Threads

Search for more seemingly similar threads

Linux Virtualization - Aug 2014 - [PATCH net-next] vhost_net: stop rx net polling when possible

[PATCH net-next] vhost_net: stop rx net polling when possible

[PATCH net-next] vhost_net: stop rx net polling when possible

[PATCH net-next] vhost_net: stop rx net polling when possible

Possibly Parallel Threads