thr3ads.net - Virtualization - [PATCH 1/3] vhost-net: check the support of mergeable buffer outside the receive loop [Jan 2011]

If this information is useful, please help other people find it:
Share via:

Jason Wang

2011-Jan-17 08:10 UTC

[PATCH 1/3] vhost-net: check the support of mergeable buffer outside the receive loop

No need to check the support of mergeable buffer inside the recevie
loop as the whole handle_rx()_xx is in the read critical region.  So
this patch move it ahead of the receiving loop.

Signed-off-by: Jason Wang <jasowang at redhat.com>
---
 drivers/vhost/net.c |    5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 14fc189..95e49de 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -411,7 +411,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
 	};
 
 	size_t total_len = 0;
-	int err, headcount;
+	int err, headcount, mergeable;
 	size_t vhost_hlen, sock_hlen;
 	size_t vhost_len, sock_len;
 	struct socket *sock = rcu_dereference(vq->private_data);
@@ -425,6 +425,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
 
 	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
 		vq->log : NULL;
+	mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF);
 
 	while ((sock_len = peek_head_len(sock->sk))) {
 		sock_len += sock_hlen;
@@ -474,7 +475,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
 			break;
 		}
 		/* TODO: Should check and handle checksum. */
-		if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) &&
+		if (likely(mergeable) &&
 		    memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount,
 				      offsetof(typeof(hdr), num_buffers),
 				      sizeof hdr.num_buffers)) {

Jason Wang

2011-Jan-17 08:11 UTC

head link

[PATCH 2/3] vhost-net: Unify the code of mergeable and big buffer handling

Codes duplication were found between the handling of mergeable and big
buffers, so this patch tries to unify them. This could be easily done
by adding a quota to the get_rx_bufs() which is used to limit the
number of buffers it returns (for mergeable buffer, the quota is
simply UIO_MAXIOV, for big buffers, the quota is just 1), and then the
previous handle_rx_mergeable() could be resued also for big buffers.

Signed-off-by: Jason Wang <jasowang at redhat.com>
---
 drivers/vhost/net.c |  128 +++------------------------------------------------
 1 files changed, 7 insertions(+), 121 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 95e49de..c32a2e4 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -227,6 +227,7 @@ static int peek_head_len(struct sock *sk)
  * @iovcount	- returned count of io vectors we fill
  * @log		- vhost log
  * @log_num	- log offset
+ * @quota       - headcount quota, 1 for big buffer
  *	returns number of buffer heads allocated, negative on error
  */
 static int get_rx_bufs(struct vhost_virtqueue *vq,
@@ -234,7 +235,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
 		       int datalen,
 		       unsigned *iovcount,
 		       struct vhost_log *log,
-		       unsigned *log_num)
+		       unsigned *log_num,
+		       unsigned int quota)
 {
 	unsigned int out, in;
 	int seg = 0;
@@ -242,7 +244,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
 	unsigned d;
 	int r, nlogs = 0;
 
-	while (datalen > 0) {
+	while (datalen > 0 && headcount < quota) {
 		if (unlikely(seg >= UIO_MAXIOV)) {
 			r = -ENOBUFS;
 			goto err;
@@ -282,116 +284,7 @@ err:
 
 /* Expects to be always run from workqueue - which acts as
  * read-size critical section for our kind of RCU. */
-static void handle_rx_big(struct vhost_net *net)
-{
-	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
-	unsigned out, in, log, s;
-	int head;
-	struct vhost_log *vq_log;
-	struct msghdr msg = {
-		.msg_name = NULL,
-		.msg_namelen = 0,
-		.msg_control = NULL, /* FIXME: get and handle RX aux data. */
-		.msg_controllen = 0,
-		.msg_iov = vq->iov,
-		.msg_flags = MSG_DONTWAIT,
-	};
-
-	struct virtio_net_hdr hdr = {
-		.flags = 0,
-		.gso_type = VIRTIO_NET_HDR_GSO_NONE
-	};
-
-	size_t len, total_len = 0;
-	int err;
-	size_t hdr_size;
-	struct socket *sock = rcu_dereference(vq->private_data);
-	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
-		return;
-
-	mutex_lock(&vq->mutex);
-	vhost_disable_notify(vq);
-	hdr_size = vq->vhost_hlen;
-
-	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
-		vq->log : NULL;
-
-	for (;;) {
-		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
-					 ARRAY_SIZE(vq->iov),
-					 &out, &in,
-					 vq_log, &log);
-		/* On error, stop handling until the next kick. */
-		if (unlikely(head < 0))
-			break;
-		/* OK, now we need to know about added descriptors. */
-		if (head == vq->num) {
-			if (unlikely(vhost_enable_notify(vq))) {
-				/* They have slipped one in as we were
-				 * doing that: check again. */
-				vhost_disable_notify(vq);
-				continue;
-			}
-			/* Nothing new?  Wait for eventfd to tell us
-			 * they refilled. */
-			break;
-		}
-		/* We don't need to be notified again. */
-		if (out) {
-			vq_err(vq, "Unexpected descriptor format for RX: "
-			       "out %d, int %d\n",
-			       out, in);
-			break;
-		}
-		/* Skip header. TODO: support TSO/mergeable rx buffers. */
-		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
-		msg.msg_iovlen = in;
-		len = iov_length(vq->iov, in);
-		/* Sanity check */
-		if (!len) {
-			vq_err(vq, "Unexpected header len for RX: "
-			       "%zd expected %zd\n",
-			       iov_length(vq->hdr, s), hdr_size);
-			break;
-		}
-		err = sock->ops->recvmsg(NULL, sock, &msg,
-					 len, MSG_DONTWAIT | MSG_TRUNC);
-		/* TODO: Check specific error and bomb out unless EAGAIN? */
-		if (err < 0) {
-			vhost_discard_vq_desc(vq, 1);
-			break;
-		}
-		/* TODO: Should check and handle checksum. */
-		if (err > len) {
-			pr_debug("Discarded truncated rx packet: "
-				 " len %d > %zd\n", err, len);
-			vhost_discard_vq_desc(vq, 1);
-			continue;
-		}
-		len = err;
-		err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
-		if (err) {
-			vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
-			       vq->iov->iov_base, err);
-			break;
-		}
-		len += hdr_size;
-		vhost_add_used_and_signal(&net->dev, vq, head, len);
-		if (unlikely(vq_log))
-			vhost_log_write(vq, vq_log, log, len);
-		total_len += len;
-		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
-			vhost_poll_queue(&vq->poll);
-			break;
-		}
-	}
-
-	mutex_unlock(&vq->mutex);
-}
-
-/* Expects to be always run from workqueue - which acts as
- * read-size critical section for our kind of RCU. */
-static void handle_rx_mergeable(struct vhost_net *net)
+static void handle_rx(struct vhost_net *net)
 {
 	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
 	unsigned uninitialized_var(in), log;
@@ -431,7 +324,8 @@ static void handle_rx_mergeable(struct vhost_net *net)
 		sock_len += sock_hlen;
 		vhost_len = sock_len + vhost_hlen;
 		headcount = get_rx_bufs(vq, vq->heads, vhost_len,
-					&in, vq_log, &log);
+					&in, vq_log, &log,
+					likely(mergeable) ? UIO_MAXIOV : 1);
 		/* On error, stop handling until the next kick. */
 		if (unlikely(headcount < 0))
 			break;
@@ -497,14 +391,6 @@ static void handle_rx_mergeable(struct vhost_net *net)
 	mutex_unlock(&vq->mutex);
 }
 
-static void handle_rx(struct vhost_net *net)
-{
-	if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF))
-		handle_rx_mergeable(net);
-	else
-		handle_rx_big(net);
-}
-
 static void handle_tx_kick(struct vhost_work *work)
 {
 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,

Jason Wang

2011-Jan-17 08:11 UTC

head link

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

We can use lock_sock_fast() instead of lock_sock() in order to get
speedup in peek_head_len().

Signed-off-by: Jason Wang <jasowang at redhat.com>
---
 drivers/vhost/net.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index c32a2e4..50b622a 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -211,12 +211,12 @@ static int peek_head_len(struct sock *sk)
 {
 	struct sk_buff *head;
 	int len = 0;
+	bool slow = lock_sock_fast(sk);
 
-	lock_sock(sk);
 	head = skb_peek(&sk->sk_receive_queue);
 	if (head)
 		len = head->len;
-	release_sock(sk);
+	unlock_sock_fast(sk, slow);
 	return len;
 }

Michael S. Tsirkin

2011-Jan-17 08:46 UTC

head link

[PATCH 1/3] vhost-net: check the support of mergeable buffer outside the receive loop

On Mon, Jan 17, 2011 at 04:10:59PM +0800, Jason Wang
wrote:> No need to check the support of mergeable buffer inside the recevie
> loop as the whole handle_rx()_xx is in the read critical region.  So
> this patch move it ahead of the receiving loop.
> 
> Signed-off-by: Jason Wang <jasowang at redhat.com>
Well feature check is mostly just features & bit
so why would it be slower? Because of the rcu
adding memory barriers? Do you observe a
measureable speedup with this patch?

Apropos, I noticed that the check in vhost_has_feature
is wrong: it uses the same kind of RCU as the
private pointer. So we'll have to fix that properly
by adding more lockdep classes, but for now
we'll need to make
the check 1 || lockdep_is_held(&dev->mutex);
and add a TODO.
> ---
>  drivers/vhost/net.c |    5 +++--
>  1 files changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 14fc189..95e49de 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -411,7 +411,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
>  	};
>  
>  	size_t total_len = 0;
> -	int err, headcount;
> +	int err, headcount, mergeable;
>  	size_t vhost_hlen, sock_hlen;
>  	size_t vhost_len, sock_len;
>  	struct socket *sock = rcu_dereference(vq->private_data);
> @@ -425,6 +425,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
>  
>  	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
>  		vq->log : NULL;
> +	mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF);
>  
>  	while ((sock_len = peek_head_len(sock->sk))) {
>  		sock_len += sock_hlen;
> @@ -474,7 +475,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
>  			break;
>  		}
>  		/* TODO: Should check and handle checksum. */
> -		if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF)
&&
> +		if (likely(mergeable) &&
>  		    memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount,
>  				      offsetof(typeof(hdr), num_buffers),
>  				      sizeof hdr.num_buffers)) {

Eric Dumazet

2011-Jan-17 09:33 UTC

head link

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

Le lundi 17 janvier 2011 ? 16:11 +0800, Jason Wang a ?crit
:> We can use lock_sock_fast() instead of lock_sock() in order to get
> speedup in peek_head_len().
> 
> Signed-off-by: Jason Wang <jasowang at redhat.com>
> ---
>  drivers/vhost/net.c |    4 ++--
>  1 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index c32a2e4..50b622a 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -211,12 +211,12 @@ static int peek_head_len(struct sock *sk)
>  {
>  	struct sk_buff *head;
>  	int len = 0;
> +	bool slow = lock_sock_fast(sk);
>  
> -	lock_sock(sk);
>  	head = skb_peek(&sk->sk_receive_queue);
>  	if (head)
>  		len = head->len;
> -	release_sock(sk);
> +	unlock_sock_fast(sk, slow);
>  	return len;
>  }
>  Acked-by: Eric Dumazet <eric.dumazet at gmail.com>

Michael S. Tsirkin

2011-Jan-17 09:57 UTC

head link

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

On Mon, Jan 17, 2011 at 04:11:17PM +0800, Jason Wang
wrote:> We can use lock_sock_fast() instead of lock_sock() in order to get
> speedup in peek_head_len().
> 
> Signed-off-by: Jason Wang <jasowang at redhat.com>
Queued for 2.6.39, thanks everyone.
> ---
>  drivers/vhost/net.c |    4 ++--
>  1 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index c32a2e4..50b622a 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -211,12 +211,12 @@ static int peek_head_len(struct sock *sk)
>  {
>  	struct sk_buff *head;
>  	int len = 0;
> +	bool slow = lock_sock_fast(sk);
>  
> -	lock_sock(sk);
>  	head = skb_peek(&sk->sk_receive_queue);
>  	if (head)
>  		len = head->len;
> -	release_sock(sk);
> +	unlock_sock_fast(sk, slow);
>  	return len;
>  }
>

Michael S. Tsirkin

2011-Mar-13 15:06 UTC

head link

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

On Mon, Jan 17, 2011 at 04:11:17PM +0800, Jason Wang
wrote:> We can use lock_sock_fast() instead of lock_sock() in order to get
> speedup in peek_head_len().
> 
> Signed-off-by: Jason Wang <jasowang at redhat.com>
> ---
>  drivers/vhost/net.c |    4 ++--
>  1 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index c32a2e4..50b622a 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -211,12 +211,12 @@ static int peek_head_len(struct sock *sk)
>  {
>  	struct sk_buff *head;
>  	int len = 0;
> +	bool slow = lock_sock_fast(sk);
>  
> -	lock_sock(sk);
>  	head = skb_peek(&sk->sk_receive_queue);
>  	if (head)
>  		len = head->len;
> -	release_sock(sk);
> +	unlock_sock_fast(sk, slow);
>  	return len;
>  }
>  
Wanted to apply this, but looking at the code I think the lock_sock here
is wrong. What we really need is to handle the case where the skb is
pulled from the receive queue after skb_peek.  However this is not the
right lock to use for that, sk_receive_queue.lock is.
So I expect the following is the right way to handle this.
Comments?

Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 0329c41..5720301 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -213,12 +213,13 @@ static int peek_head_len(struct sock *sk)
 {
 	struct sk_buff *head;
 	int len = 0;
+	unsigned long flags;
 
-	lock_sock(sk);
+	spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
 	head = skb_peek(&sk->sk_receive_queue);
-	if (head)
+	if (likely(head))
 		len = head->len;
-	release_sock(sk);
+	spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags);
 	return len;
 }

Eric Dumazet

2011-Mar-13 15:52 UTC

head link

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

Le dimanche 13 mars 2011 ? 17:06 +0200, Michael S. Tsirkin a ?crit
:> On Mon, Jan 17, 2011 at 04:11:17PM +0800, Jason Wang wrote:
> > We can use lock_sock_fast() instead of lock_sock() in order to get
> > speedup in peek_head_len().
> > 
> > Signed-off-by: Jason Wang <jasowang at redhat.com>
> > ---
> >  drivers/vhost/net.c |    4 ++--
> >  1 files changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> > index c32a2e4..50b622a 100644
> > --- a/drivers/vhost/net.c
> > +++ b/drivers/vhost/net.c
> > @@ -211,12 +211,12 @@ static int peek_head_len(struct sock *sk)
> >  {
> >  	struct sk_buff *head;
> >  	int len = 0;
> > +	bool slow = lock_sock_fast(sk);
> >  
> > -	lock_sock(sk);
> >  	head = skb_peek(&sk->sk_receive_queue);
> >  	if (head)
> >  		len = head->len;
> > -	release_sock(sk);
> > +	unlock_sock_fast(sk, slow);
> >  	return len;
> >  }
> >  
> 
> Wanted to apply this, but looking at the code I think the lock_sock here
> is wrong. What we really need is to handle the case where the skb is
> pulled from the receive queue after skb_peek.  However this is not the
> right lock to use for that, sk_receive_queue.lock is.
> So I expect the following is the right way to handle this.
> Comments?
> 
> Signed-off-by: Michael S. Tsirkin <mst at redhat.com>
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 0329c41..5720301 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -213,12 +213,13 @@ static int peek_head_len(struct sock *sk)
>  {
>  	struct sk_buff *head;
>  	int len = 0;
> +	unsigned long flags;
>  
> -	lock_sock(sk);
> +	spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
>  	head = skb_peek(&sk->sk_receive_queue);
> -	if (head)
> +	if (likely(head))
>  		len = head->len;
> -	release_sock(sk);
> +	spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags);
>  	return len;
>  }
>  
You may be right, only way to be sure is to check the other side.

If it uses skb_queue_tail(), then yes, your patch is fine.

If other side did not lock socket, then your patch is a bug fix.

Michael S. Tsirkin

2011-Mar-13 16:19 UTC

head link

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

On Sun, Mar 13, 2011 at 04:52:50PM +0100, Eric Dumazet
wrote:> Le dimanche 13 mars 2011 ? 17:06 +0200, Michael S. Tsirkin a ?crit :
> > On Mon, Jan 17, 2011 at 04:11:17PM +0800, Jason Wang wrote:
> > > We can use lock_sock_fast() instead of lock_sock() in order to
get
> > > speedup in peek_head_len().
> > > 
> > > Signed-off-by: Jason Wang <jasowang at redhat.com>
> > > ---
> > >  drivers/vhost/net.c |    4 ++--
> > >  1 files changed, 2 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> > > index c32a2e4..50b622a 100644
> > > --- a/drivers/vhost/net.c
> > > +++ b/drivers/vhost/net.c
> > > @@ -211,12 +211,12 @@ static int peek_head_len(struct sock *sk)
> > >  {
> > >  	struct sk_buff *head;
> > >  	int len = 0;
> > > +	bool slow = lock_sock_fast(sk);
> > >  
> > > -	lock_sock(sk);
> > >  	head = skb_peek(&sk->sk_receive_queue);
> > >  	if (head)
> > >  		len = head->len;
> > > -	release_sock(sk);
> > > +	unlock_sock_fast(sk, slow);
> > >  	return len;
> > >  }
> > >  
> > 
> > Wanted to apply this, but looking at the code I think the lock_sock
here
> > is wrong. What we really need is to handle the case where the skb is
> > pulled from the receive queue after skb_peek.  However this is not the
> > right lock to use for that, sk_receive_queue.lock is.
> > So I expect the following is the right way to handle this.
> > Comments?
> > 
> > Signed-off-by: Michael S. Tsirkin <mst at redhat.com>
> > 
> > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> > index 0329c41..5720301 100644
> > --- a/drivers/vhost/net.c
> > +++ b/drivers/vhost/net.c
> > @@ -213,12 +213,13 @@ static int peek_head_len(struct sock *sk)
> >  {
> >  	struct sk_buff *head;
> >  	int len = 0;
> > +	unsigned long flags;
> >  
> > -	lock_sock(sk);
> > +	spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
> >  	head = skb_peek(&sk->sk_receive_queue);
> > -	if (head)
> > +	if (likely(head))
> >  		len = head->len;
> > -	release_sock(sk);
> > +	spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags);
> >  	return len;
> >  }
> >  
> 
> You may be right, only way to be sure is to check the other side.
> 
> If it uses skb_queue_tail(), then yes, your patch is fine.
> 
> If other side did not lock socket, then your patch is a bug fix.
> 
> 
Other side is in drivers/net/tun.c and net/packet/af_packet.c
At least wrt tun it seems clear socket is not locked.
Besides queue, dequeue seems to be done without socket locked.

-- 
MST

Eric Dumazet

2011-Mar-13 16:32 UTC

head link

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

Le dimanche 13 mars 2011 ? 18:19 +0200, Michael S. Tsirkin a ?crit :
> Other side is in drivers/net/tun.c and net/packet/af_packet.c
> At least wrt tun it seems clear socket is not locked.
Yes (assuming you refer to tun_net_xmit())
> Besides queue, dequeue seems to be done without socket locked.
> 
It seems this code (assuming you speak of drivers/vhost/net.c ?) has
some races indeed.

Michael S. Tsirkin

2011-Mar-13 16:43 UTC

head link

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

On Sun, Mar 13, 2011 at 05:32:07PM +0100, Eric Dumazet
wrote:> Le dimanche 13 mars 2011 ? 18:19 +0200, Michael S. Tsirkin a ?crit :
> 
> > Other side is in drivers/net/tun.c and net/packet/af_packet.c
> > At least wrt tun it seems clear socket is not locked.
> 
> Yes (assuming you refer to tun_net_xmit())
> 
> > Besides queue, dequeue seems to be done without socket locked.
> > 
> 
> It seems this code (assuming you speak of drivers/vhost/net.c ?) has
> some races indeed.
> 
Hmm. Any more besides the one fixed here?

-- 
MST

Eric Dumazet

2011-Mar-13 17:41 UTC

head link

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

Le dimanche 13 mars 2011 ? 18:43 +0200, Michael S. Tsirkin a ?crit
:> On Sun, Mar 13, 2011 at 05:32:07PM +0100, Eric Dumazet wrote:
> > Le dimanche 13 mars 2011 ? 18:19 +0200, Michael S. Tsirkin a ?crit :
> > 
> > > Other side is in drivers/net/tun.c and net/packet/af_packet.c
> > > At least wrt tun it seems clear socket is not locked.
> > 
> > Yes (assuming you refer to tun_net_xmit())
> > 
> > > Besides queue, dequeue seems to be done without socket locked.
> > > 
> > 
> > It seems this code (assuming you speak of drivers/vhost/net.c ?) has
> > some races indeed.
> > 
> 
> Hmm. Any more besides the one fixed here?
> 
If writers and readers dont share a common lock, how can they reliably
synchronize states ?

For example, the check at line 420 seems unsafe or useless.

skb_queue_empty(&sock->sk->sk_receive_queue)

Reasonably Related Threads

Search for more seemingly similar threads

Virtualization - Jan 2011 - [PATCH 1/3] vhost-net: check the support of mergeable buffer outside the receive loop

[PATCH 1/3] vhost-net: check the support of mergeable buffer outside the receive loop

[PATCH 2/3] vhost-net: Unify the code of mergeable and big buffer handling

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

[PATCH 1/3] vhost-net: check the support of mergeable buffer outside the receive loop

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

[PATCH 3/3] vhost-net: use lock_sock_fast() in peek_head_len()

Reasonably Related Threads