thr3ads.net - Xen devel - [PATCH net v2] xen-netback: fix abuse of napi budget [Dec 2013]

If this information is useful, please help other people find it:
Share via:

Paul Durrant

2013-Dec-10 11:27 UTC

[PATCH net v2] xen-netback: fix abuse of napi budget

netback seemed to be somewhat confused about the napi budget parameter. The
parameter is supposed to limit the number of skgs process in each poll, but
netback had this confused with grant operations.

This patch fixes that, properly limiting the work done in each poll. Note
that this limit makes sure we do not process any more data from the shared
ring than we intend to pass back from the poll. This is important to
prevent tx_queue potentially growing without bound.

This patch also changes the RING_FINAL_CHECK_FOR_REQUESTS in
xenvif_build_tx_gops to a check for RING_HAS_UNCONSUMED_REQUESTS as the
former call has the side effect of advancing the ring event pointer and
therefore inviting another interrupt from the frontend before the napi
poll has actually finished, thereby defeating the point of napi.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
---
v2:
- More elaborate description of the problem and the fix in the comment.
- Modified the check in xenvif_build_tx_gops.

 drivers/net/xen-netback/netback.c |   17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/net/xen-netback/netback.c
b/drivers/net/xen-netback/netback.c
index 43341b8..0c8ca76 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1351,14 +1351,15 @@ static bool tx_credit_exceeded(struct xenvif *vif,
unsigned size)
 	return false;
 }
 
-static unsigned xenvif_tx_build_gops(struct xenvif *vif)
+static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
 {
 	struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop;
 	struct sk_buff *skb;
 	int ret;
 
 	while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
-		< MAX_PENDING_REQS)) {
+		< MAX_PENDING_REQS) &&
+	       (skb_queue_len(&vif->tx_queue) < budget)) {
 		struct xen_netif_tx_request txreq;
 		struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
 		struct page *page;
@@ -1380,8 +1381,7 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif)
 			continue;
 		}
 
-		RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do);
-		if (!work_to_do)
+		if (!RING_HAS_UNCONSUMED_REQUESTS(&vif->tx))
 			break;
 
 		idx = vif->tx.req_cons;
@@ -1520,14 +1520,13 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif)
 }
 
 
-static int xenvif_tx_submit(struct xenvif *vif, int budget)
+static int xenvif_tx_submit(struct xenvif *vif)
 {
 	struct gnttab_copy *gop = vif->tx_copy_ops;
 	struct sk_buff *skb;
 	int work_done = 0;
 
-	while (work_done < budget &&
-	       (skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
+	while ((skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
 		struct xen_netif_tx_request *txp;
 		u16 pending_idx;
 		unsigned data_len;
@@ -1602,14 +1601,14 @@ int xenvif_tx_action(struct xenvif *vif, int budget)
 	if (unlikely(!tx_work_todo(vif)))
 		return 0;
 
-	nr_gops = xenvif_tx_build_gops(vif);
+	nr_gops = xenvif_tx_build_gops(vif, budget);
 
 	if (nr_gops == 0)
 		return 0;
 
 	gnttab_batch_copy(vif->tx_copy_ops, nr_gops);
 
-	work_done = xenvif_tx_submit(vif, nr_gops);
+	work_done = xenvif_tx_submit(vif);
 
 	return work_done;
 }
-- 
1.7.10.4

Paul Durrant

2013-Dec-10 11:32 UTC

head link

Re: [PATCH net v2] xen-netback: fix abuse of napi budget

> -----Original Message-----
> From: Paul Durrant [mailto:paul.durrant@citrix.com]
> Sent: 10 December 2013 11:28
> To: xen-devel@lists.xen.org; netdev@vger.kernel.org
> Cc: Paul Durrant; Wei Liu; Ian Campbell; David Vrabel
> Subject: [PATCH net v2] xen-netback: fix abuse of napi budget
> 
> netback seemed to be somewhat confused about the napi budget
> parameter. The
> parameter is supposed to limit the number of skgs process in each poll, but
> netback had this confused with grant operations.
> 
> This patch fixes that, properly limiting the work done in each poll. Note
> that this limit makes sure we do not process any more data from the shared
> ring than we intend to pass back from the poll. This is important to
> prevent tx_queue potentially growing without bound.
> 
> This patch also changes the RING_FINAL_CHECK_FOR_REQUESTS in
> xenvif_build_tx_gops to a check for RING_HAS_UNCONSUMED_REQUESTS
> as the
> former call has the side effect of advancing the ring event pointer and
> therefore inviting another interrupt from the frontend before the napi
> poll has actually finished, thereby defeating the point of napi.
> 
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Sorry, this patch is incomplete. I need to remove the decrement of the
work_to_do variable in xenvif_tx_build_gops too.

  Paul
> Cc: Wei Liu <wei.liu2@citrix.com>
> Cc: Ian Campbell <ian.campbell@citrix.com>
> Cc: David Vrabel <david.vrabel@citrix.com>
> ---
> v2:
> - More elaborate description of the problem and the fix in the comment.
> - Modified the check in xenvif_build_tx_gops.
> 
>  drivers/net/xen-netback/netback.c |   17 ++++++++---------
>  1 file changed, 8 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-
> netback/netback.c
> index 43341b8..0c8ca76 100644
> --- a/drivers/net/xen-netback/netback.c
> +++ b/drivers/net/xen-netback/netback.c
> @@ -1351,14 +1351,15 @@ static bool tx_credit_exceeded(struct xenvif *vif,
> unsigned size)
>  	return false;
>  }
> 
> -static unsigned xenvif_tx_build_gops(struct xenvif *vif)
> +static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
>  {
>  	struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop;
>  	struct sk_buff *skb;
>  	int ret;
> 
>  	while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
> -		< MAX_PENDING_REQS)) {
> +		< MAX_PENDING_REQS) &&
> +	       (skb_queue_len(&vif->tx_queue) < budget)) {
>  		struct xen_netif_tx_request txreq;
>  		struct xen_netif_tx_request
> txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
>  		struct page *page;
> @@ -1380,8 +1381,7 @@ static unsigned xenvif_tx_build_gops(struct xenvif
> *vif)
>  			continue;
>  		}
> 
> -		RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx,
> work_to_do);
> -		if (!work_to_do)
> +		if (!RING_HAS_UNCONSUMED_REQUESTS(&vif->tx))
>  			break;
> 
>  		idx = vif->tx.req_cons;
> @@ -1520,14 +1520,13 @@ static unsigned xenvif_tx_build_gops(struct
> xenvif *vif)
>  }
> 
> 
> -static int xenvif_tx_submit(struct xenvif *vif, int budget)
> +static int xenvif_tx_submit(struct xenvif *vif)
>  {
>  	struct gnttab_copy *gop = vif->tx_copy_ops;
>  	struct sk_buff *skb;
>  	int work_done = 0;
> 
> -	while (work_done < budget &&
> -	       (skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
> +	while ((skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
>  		struct xen_netif_tx_request *txp;
>  		u16 pending_idx;
>  		unsigned data_len;
> @@ -1602,14 +1601,14 @@ int xenvif_tx_action(struct xenvif *vif, int
> budget)
>  	if (unlikely(!tx_work_todo(vif)))
>  		return 0;
> 
> -	nr_gops = xenvif_tx_build_gops(vif);
> +	nr_gops = xenvif_tx_build_gops(vif, budget);
> 
>  	if (nr_gops == 0)
>  		return 0;
> 
>  	gnttab_batch_copy(vif->tx_copy_ops, nr_gops);
> 
> -	work_done = xenvif_tx_submit(vif, nr_gops);
> +	work_done = xenvif_tx_submit(vif);
> 
>  	return work_done;
>  }
> --
> 1.7.10.4

Ian Campbell

2013-Dec-10 11:44 UTC

head link

Re: [PATCH net v2] xen-netback: fix abuse of napi budget

On Tue, 2013-12-10 at 11:27 +0000, Paul Durrant wrote:> netback seemed to be somewhat confused about the napi budget parameter. The
> parameter is supposed to limit the number of skgs process in each poll, but
                                               skbs
> netback had this confused with grant operations.
> 
> This patch fixes that, properly limiting the work done in each poll. Note
> that this limit makes sure we do not process any more data from the shared
> ring than we intend to pass back from the poll. This is important to
> prevent tx_queue potentially growing without bound.
> 
> This patch also changes the RING_FINAL_CHECK_FOR_REQUESTS in
> xenvif_build_tx_gops to a check for RING_HAS_UNCONSUMED_REQUESTS as the
> former call has the side effect of advancing the ring event pointer and
> therefore inviting another interrupt from the frontend before the napi
> poll has actually finished, thereby defeating the point of napi.
Can you add a reminder of when/where the ring event pointer is
eventually advanced now please.

This seems like a slightly subtle change, especially when mixed in with
the budget handling changes. Please can we do it in a separate patch.
> 
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> Cc: Wei Liu <wei.liu2@citrix.com>
> Cc: Ian Campbell <ian.campbell@citrix.com>
> Cc: David Vrabel <david.vrabel@citrix.com>
> ---
> v2:
> - More elaborate description of the problem and the fix in the comment.
> - Modified the check in xenvif_build_tx_gops.
> 
>  drivers/net/xen-netback/netback.c |   17 ++++++++---------
>  1 file changed, 8 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/netback.c
b/drivers/net/xen-netback/netback.c
> index 43341b8..0c8ca76 100644
> --- a/drivers/net/xen-netback/netback.c
> +++ b/drivers/net/xen-netback/netback.c
> @@ -1351,14 +1351,15 @@ static bool tx_credit_exceeded(struct xenvif *vif,
unsigned size)
>  	return false;
>  }
>  
> -static unsigned xenvif_tx_build_gops(struct xenvif *vif)
> +static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
>  {
>  	struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop;
>  	struct sk_buff *skb;
>  	int ret;
>  
>  	while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
> -		< MAX_PENDING_REQS)) {
> +		< MAX_PENDING_REQS) &&
> +	       (skb_queue_len(&vif->tx_queue) < budget)) {
>  		struct xen_netif_tx_request txreq;
>  		struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
>  		struct page *page;
> @@ -1380,8 +1381,7 @@ static unsigned xenvif_tx_build_gops(struct xenvif
*vif)
>  			continue;
>  		}
>  
> -		RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do);
Is work_to_do now used uninitialised at various subsequent points? (in
your next reply you mentioned the decrease, but what about e.g. the call
to xenvif_get_extras?)
> -		if (!work_to_do)
> +		if (!RING_HAS_UNCONSUMED_REQUESTS(&vif->tx))
>  			break;
>  
>  		idx = vif->tx.req_cons;
> @@ -1520,14 +1520,13 @@ static unsigned xenvif_tx_build_gops(struct xenvif
*vif)
>  }
>  
> 
> -static int xenvif_tx_submit(struct xenvif *vif, int budget)
> +static int xenvif_tx_submit(struct xenvif *vif)
>  {
>  	struct gnttab_copy *gop = vif->tx_copy_ops;
>  	struct sk_buff *skb;
>  	int work_done = 0;
>  
> -	while (work_done < budget &&
> -	       (skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
> +	while ((skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
>  		struct xen_netif_tx_request *txp;
>  		u16 pending_idx;
>  		unsigned data_len;
> @@ -1602,14 +1601,14 @@ int xenvif_tx_action(struct xenvif *vif, int
budget)
>  	if (unlikely(!tx_work_todo(vif)))
>  		return 0;
>  
> -	nr_gops = xenvif_tx_build_gops(vif);
> +	nr_gops = xenvif_tx_build_gops(vif, budget);
>  
>  	if (nr_gops == 0)
>  		return 0;
>  
>  	gnttab_batch_copy(vif->tx_copy_ops, nr_gops);
>  
> -	work_done = xenvif_tx_submit(vif, nr_gops);
> +	work_done = xenvif_tx_submit(vif);
>  
>  	return work_done;
>  }

Paul Durrant

2013-Dec-10 11:50 UTC

head link

Re: [PATCH net v2] xen-netback: fix abuse of napi budget

> -----Original Message-----
> From: Ian Campbell
> Sent: 10 December 2013 11:44
> To: Paul Durrant
> Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Wei Liu; David Vrabel
> Subject: Re: [PATCH net v2] xen-netback: fix abuse of napi budget
> 
> On Tue, 2013-12-10 at 11:27 +0000, Paul Durrant wrote:
> > netback seemed to be somewhat confused about the napi budget
> parameter. The
> > parameter is supposed to limit the number of skgs process in each
poll, but
> 
>                                                skbs
> 
> > netback had this confused with grant operations.
> >
> > This patch fixes that, properly limiting the work done in each poll.
Note
> > that this limit makes sure we do not process any more data from the
> shared
> > ring than we intend to pass back from the poll. This is important to
> > prevent tx_queue potentially growing without bound.
> >
> > This patch also changes the RING_FINAL_CHECK_FOR_REQUESTS in
> > xenvif_build_tx_gops to a check for RING_HAS_UNCONSUMED_REQUESTS
> as the
> > former call has the side effect of advancing the ring event pointer
and
> > therefore inviting another interrupt from the frontend before the napi
> > poll has actually finished, thereby defeating the point of napi.
> 
> Can you add a reminder of when/where the ring event pointer is
> eventually advanced now please.
> 
> This seems like a slightly subtle change, especially when mixed in with
> the budget handling changes. Please can we do it in a separate patch.
> 
Ok. Given I also spotted the mistake with work_to_do, I''ll make this a
2 patch series.

  Paul
> >
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > Cc: Wei Liu <wei.liu2@citrix.com>
> > Cc: Ian Campbell <ian.campbell@citrix.com>
> > Cc: David Vrabel <david.vrabel@citrix.com>
> > ---
> > v2:
> > - More elaborate description of the problem and the fix in the
comment.
> > - Modified the check in xenvif_build_tx_gops.
> >
> >  drivers/net/xen-netback/netback.c |   17 ++++++++---------
> >  1 file changed, 8 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-
> netback/netback.c
> > index 43341b8..0c8ca76 100644
> > --- a/drivers/net/xen-netback/netback.c
> > +++ b/drivers/net/xen-netback/netback.c
> > @@ -1351,14 +1351,15 @@ static bool tx_credit_exceeded(struct xenvif
> *vif, unsigned size)
> >  	return false;
> >  }
> >
> > -static unsigned xenvif_tx_build_gops(struct xenvif *vif)
> > +static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
> >  {
> >  	struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop;
> >  	struct sk_buff *skb;
> >  	int ret;
> >
> >  	while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
> > -		< MAX_PENDING_REQS)) {
> > +		< MAX_PENDING_REQS) &&
> > +	       (skb_queue_len(&vif->tx_queue) < budget)) {
> >  		struct xen_netif_tx_request txreq;
> >  		struct xen_netif_tx_request
> txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
> >  		struct page *page;
> > @@ -1380,8 +1381,7 @@ static unsigned xenvif_tx_build_gops(struct
> xenvif *vif)
> >  			continue;
> >  		}
> >
> > -		RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx,
> work_to_do);
> 
> Is work_to_do now used uninitialised at various subsequent points? (in
> your next reply you mentioned the decrease, but what about e.g. the call
> to xenvif_get_extras?)
> 
> > -		if (!work_to_do)
> > +		if (!RING_HAS_UNCONSUMED_REQUESTS(&vif->tx))
> >  			break;
> >
> >  		idx = vif->tx.req_cons;
> > @@ -1520,14 +1520,13 @@ static unsigned xenvif_tx_build_gops(struct
> xenvif *vif)
> >  }
> >
> >
> > -static int xenvif_tx_submit(struct xenvif *vif, int budget)
> > +static int xenvif_tx_submit(struct xenvif *vif)
> >  {
> >  	struct gnttab_copy *gop = vif->tx_copy_ops;
> >  	struct sk_buff *skb;
> >  	int work_done = 0;
> >
> > -	while (work_done < budget &&
> > -	       (skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
> > +	while ((skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
> >  		struct xen_netif_tx_request *txp;
> >  		u16 pending_idx;
> >  		unsigned data_len;
> > @@ -1602,14 +1601,14 @@ int xenvif_tx_action(struct xenvif *vif, int
> budget)
> >  	if (unlikely(!tx_work_todo(vif)))
> >  		return 0;
> >
> > -	nr_gops = xenvif_tx_build_gops(vif);
> > +	nr_gops = xenvif_tx_build_gops(vif, budget);
> >
> >  	if (nr_gops == 0)
> >  		return 0;
> >
> >  	gnttab_batch_copy(vif->tx_copy_ops, nr_gops);
> >
> > -	work_done = xenvif_tx_submit(vif, nr_gops);
> > +	work_done = xenvif_tx_submit(vif);
> >
> >  	return work_done;
> >  }
>

Xen devel - Dec 2013 - [PATCH net v2] xen-netback: fix abuse of napi budget

[PATCH net v2] xen-netback: fix abuse of napi budget

Re: [PATCH net v2] xen-netback: fix abuse of napi budget

Re: [PATCH net v2] xen-netback: fix abuse of napi budget

Re: [PATCH net v2] xen-netback: fix abuse of napi budget