thr3ads.net - Xen devel - [PATCH V4 6/7] xen-netback: coalesce slots in TX path and fix regressions [Apr 2013]

If this information is useful, please help other people find it:
Share via:

Wei Liu

2013-Apr-12 14:24 UTC

[PATCH V4 6/7] xen-netback: coalesce slots in TX path and fix regressions

This patch tries to coalesce tx requests when constructing grant copy
structures. It enables netback to deal with situation when frontend''s
MAX_SKB_FRAGS is larger than backend''s MAX_SKB_FRAGS.

With the help of coalescing, this patch tries to address two regressions and
avoid reopening the security hole in XSA-39.

Regression 1. The reduction of the number of supported ring entries (slots)
per packet (from 18 to 17). This regression has been around for some time but
remains unnoticed until XSA-39 security fix. This is fixed by coalescing
slots.

Regression 2. The XSA-39 security fix turning "too many frags" errors
from
just dropping the packet to a fatal error and disabling the VIF. This is fixed
by coalescing slots (handling 18 slots when backend''s MAX_SKB_FRAGS is
17)
which rules out false positive (using 18 slots is legit) and dropping packets
using 19 to `max_skb_slots` slots.

To avoid reopening security hole in XSA-39, frontend sending packet using more
than max_skb_slots is considered malicious.

The behavior of netback for packet is thus:

    1-18            slots: valid
   19-max_skb_slots slots: drop and respond with an error
   max_skb_slots+   slots: fatal error

max_skb_slots is configurable by admin, default value is 20.

Also change variable name from "frags" to "slots" in
netbk_count_requests.

Please note that RX path still has dependency on MAX_SKB_FRAGS. This will be
fixed with separate patch.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/net/xen-netback/netback.c |  277 ++++++++++++++++++++++++++++++-------
 include/xen/interface/io/netif.h  |   18 +++
 2 files changed, 246 insertions(+), 49 deletions(-)

diff --git a/drivers/net/xen-netback/netback.c
b/drivers/net/xen-netback/netback.c
index 6e8e51a..00ae841 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -47,11 +47,47 @@
 #include <asm/xen/hypercall.h>
 #include <asm/xen/page.h>
 
+/*
+ * This is the maximum slots a skb can have. If a guest sends a skb
+ * which exceeds this limit it is considered malicious.
+ */
+#define MAX_SKB_SLOTS_DEFAULT 20
+static unsigned int max_skb_slots = MAX_SKB_SLOTS_DEFAULT;
+
+static int max_skb_slots_set(const char *val, const struct kernel_param *kp)
+{
+	int ret;
+	unsigned int param = 0;
+
+	ret = kstrtouint(val, 10, &param);
+
+	if (ret < 0 || param < XEN_NETIF_NR_SLOTS_MIN)
+		return -EINVAL;
+
+	max_skb_slots = param;
+
+	return 0;
+}
+
+static __moduleparam_const struct kernel_param_ops max_skb_slots_param_ops = {
+	.set = max_skb_slots_set,
+	.get = param_get_uint,
+};
+
+module_param_cb(max_skb_slots, &max_skb_slots_param_ops,
+		&max_skb_slots, 0444);
+
+typedef unsigned int pending_ring_idx_t;
+#define INVALID_PENDING_RING_IDX (~0U)
+
 struct pending_tx_info {
-	struct xen_netif_tx_request req;
+	struct xen_netif_tx_request req; /* coalesced tx request  */
 	struct xenvif *vif;
+	pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX
+				  * if it is head of one or more tx
+				  * reqs
+				  */
 };
-typedef unsigned int pending_ring_idx_t;
 
 struct netbk_rx_meta {
 	int id;
@@ -102,7 +138,11 @@ struct xen_netbk {
 	atomic_t netfront_count;
 
 	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
-	struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
+	/* Coalescing tx requests before copying makes number of grant
+	 * copy ops greater of equal to number of slots required. In
+	 * worst case a tx request consumes 2 gnttab_copy.
+	 */
+	struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
 
 	u16 pending_ring[MAX_PENDING_REQS];
 
@@ -250,6 +290,7 @@ static int max_required_rx_slots(struct xenvif *vif)
 {
 	int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
 
+	/* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
 	if (vif->can_sg || vif->gso || vif->gso_prefix)
 		max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
 
@@ -657,6 +698,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
 		__skb_queue_tail(&rxq, skb);
 
 		/* Filled the batch queue? */
+		/* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
 		if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
 			break;
 	}
@@ -904,47 +946,78 @@ static void netbk_fatal_tx_err(struct xenvif *vif)
 
 static int netbk_count_requests(struct xenvif *vif,
 				struct xen_netif_tx_request *first,
+				RING_IDX first_idx,
 				struct xen_netif_tx_request *txp,
 				int work_to_do)
 {
 	RING_IDX cons = vif->tx.req_cons;
-	int frags = 0;
+	int slots = 0;
+	int drop_err = 0;
 
 	if (!(first->flags & XEN_NETTXF_more_data))
 		return 0;
 
 	do {
-		if (frags >= work_to_do) {
-			netdev_err(vif->dev, "Need more frags\n");
+		if (slots >= work_to_do) {
+			netdev_err(vif->dev,
+				   "Asked for %d slots but exceeds this limit\n",
+				   work_to_do);
 			netbk_fatal_tx_err(vif);
 			return -ENODATA;
 		}
 
-		if (unlikely(frags >= MAX_SKB_FRAGS)) {
-			netdev_err(vif->dev, "Too many frags\n");
+		/* This guest is really using too many slots and
+		 * considered malicious.
+		 */
+		if (unlikely(slots >= max_skb_slots)) {
+			netdev_err(vif->dev,
+				   "Malicious frontend using %d slots, threshold %u\n",
+				   slots, max_skb_slots);
 			netbk_fatal_tx_err(vif);
 			return -E2BIG;
 		}
 
-		memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags),
+		/* Xen network protocol had implicit dependency on
+		 * MAX_SKB_FRAGS. XEN_NETIF_NR_SLOTS_MIN is set to the
+		 * historical MAX_SKB_FRAGS value 18 to honor the same
+		 * behavior as before. Any packet using more than 18
+		 * slots but less than max_skb_slots slots is dropped
+		 */
+		if (!drop_err && slots >= XEN_NETIF_NR_SLOTS_MIN) {
+			if (net_ratelimit())
+				netdev_dbg(vif->dev,
+					   "Too many slots (%d), dropping packet\n",
+					   slots);
+			drop_err = -E2BIG;
+		}
+
+		memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots),
 		       sizeof(*txp));
 		if (txp->size > first->size) {
-			netdev_err(vif->dev, "Frag is bigger than frame.\n");
+			netdev_err(vif->dev,
+				   "Invalid tx request, slot size %u > remaining size %u\n",
+				   txp->size, first->size);
 			netbk_fatal_tx_err(vif);
 			return -EIO;
 		}
 
 		first->size -= txp->size;
-		frags++;
+		slots++;
 
 		if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
-			netdev_err(vif->dev, "txp->offset: %x, size: %u\n",
+			netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size:
%u\n",
 				 txp->offset, txp->size);
 			netbk_fatal_tx_err(vif);
 			return -EINVAL;
 		}
 	} while ((txp++)->flags & XEN_NETTXF_more_data);
-	return frags;
+
+	if (drop_err) {
+		netbk_tx_err(vif, first, first_idx + slots);
+		return drop_err;
+	}
+
+	return slots;
 }
 
 static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk,
@@ -968,48 +1041,114 @@ static struct gnttab_copy *xen_netbk_get_requests(struct
xen_netbk *netbk,
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	skb_frag_t *frags = shinfo->frags;
 	u16 pending_idx = *((u16 *)skb->data);
-	int i, start;
+	u16 head_idx = 0;
+	int slot, start;
+	struct page *page;
+	pending_ring_idx_t index, start_idx = 0;
+	uint16_t dst_offset;
+	unsigned int nr_slots;
+	struct pending_tx_info *first = NULL;
+
+	/* At this point shinfo->nr_frags is in fact the number of
+	 * slots, which can be as large as XEN_NETIF_NR_SLOTS_MIN.
+	 */
+	nr_slots = shinfo->nr_frags;
 
 	/* Skip first skb fragment if it is on same page as header fragment. */
 	start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
 
-	for (i = start; i < shinfo->nr_frags; i++, txp++) {
-		struct page *page;
-		pending_ring_idx_t index;
+	/* Coalesce tx requests, at this point the packet passed in
+	 * should be <= 64K. Any packets larger than 64K have been
+	 * handled in netbk_count_requests().
+	 */
+	for (shinfo->nr_frags = slot = start; slot < nr_slots;
+	     shinfo->nr_frags++) {
 		struct pending_tx_info *pending_tx_info  			netbk->pending_tx_info;
 
-		index = pending_index(netbk->pending_cons++);
-		pending_idx = netbk->pending_ring[index];
-		page = xen_netbk_alloc_page(netbk, pending_idx);
+		page = alloc_page(GFP_KERNEL|__GFP_COLD);
 		if (!page)
 			goto err;
 
-		gop->source.u.ref = txp->gref;
-		gop->source.domid = vif->domid;
-		gop->source.offset = txp->offset;
-
-		gop->dest.u.gmfn = virt_to_mfn(page_address(page));
-		gop->dest.domid = DOMID_SELF;
-		gop->dest.offset = txp->offset;
-
-		gop->len = txp->size;
-		gop->flags = GNTCOPY_source_gref;
+		dst_offset = 0;
+		first = NULL;
+		while (dst_offset < PAGE_SIZE && slot < nr_slots) {
+			gop->flags = GNTCOPY_source_gref;
+
+			gop->source.u.ref = txp->gref;
+			gop->source.domid = vif->domid;
+			gop->source.offset = txp->offset;
+
+			gop->dest.domid = DOMID_SELF;
+
+			gop->dest.offset = dst_offset;
+			gop->dest.u.gmfn = virt_to_mfn(page_address(page));
+
+			if (dst_offset + txp->size > PAGE_SIZE) {
+				/* This page can only merge a portion
+				 * of tx request. Do not increment any
+				 * pointer / counter here. The txp
+				 * will be dealt with in future
+				 * rounds, eventually hitting the
+				 * `else` branch.
+				 */
+				gop->len = PAGE_SIZE - dst_offset;
+				txp->offset += gop->len;
+				txp->size -= gop->len;
+				dst_offset += gop->len; /* quit loop */
+			} else {
+				/* This tx request can be merged in the page */
+				gop->len = txp->size;
+				dst_offset += gop->len;
+
+				index = pending_index(netbk->pending_cons++);
+
+				pending_idx = netbk->pending_ring[index];
+
+				memcpy(&pending_tx_info[pending_idx].req, txp,
+				       sizeof(*txp));
+				xenvif_get(vif);
+
+				pending_tx_info[pending_idx].vif = vif;
+
+				/* Poison these fields, corresponding
+				 * fields for head tx req will be set
+				 * to correct values after the loop.
+				 */
+				netbk->mmap_pages[pending_idx] = (void *)(~0UL);
+				pending_tx_info[pending_idx].head +					INVALID_PENDING_RING_IDX;
+
+				if (!first) {
+					first = &pending_tx_info[pending_idx];
+					start_idx = index;
+					head_idx = pending_idx;
+				}
+
+				txp++;
+				slot++;
+			}
 
-		gop++;
+			gop++;
+		}
 
-		memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
-		xenvif_get(vif);
-		pending_tx_info[pending_idx].vif = vif;
-		frag_set_pending_idx(&frags[i], pending_idx);
+		first->req.offset = 0;
+		first->req.size = dst_offset;
+		first->head = start_idx;
+		set_page_ext(page, netbk, head_idx);
+		netbk->mmap_pages[head_idx] = page;
+		frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx);
 	}
 
+	BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS);
+
 	return gop;
 err:
 	/* Unwind, freeing all pages and sending error responses. */
-	while (i-- > start) {
-		xen_netbk_idx_release(netbk, frag_get_pending_idx(&frags[i]),
-				      XEN_NETIF_RSP_ERROR);
+	while (shinfo->nr_frags-- > start) {
+		xen_netbk_idx_release(netbk,
+				frag_get_pending_idx(&frags[shinfo->nr_frags]),
+				XEN_NETIF_RSP_ERROR);
 	}
 	/* The head too, if necessary. */
 	if (start)
@@ -1025,8 +1164,10 @@ static int xen_netbk_tx_check_gop(struct xen_netbk
*netbk,
 	struct gnttab_copy *gop = *gopp;
 	u16 pending_idx = *((u16 *)skb->data);
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
+	struct pending_tx_info *tx_info;
 	int nr_frags = shinfo->nr_frags;
 	int i, err, start;
+	u16 peek; /* peek into next tx request */
 
 	/* Check status of header. */
 	err = gop->status;
@@ -1038,11 +1179,21 @@ static int xen_netbk_tx_check_gop(struct xen_netbk
*netbk,
 
 	for (i = start; i < nr_frags; i++) {
 		int j, newerr;
+		pending_ring_idx_t head;
 
 		pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
+		tx_info = &netbk->pending_tx_info[pending_idx];
+		head = tx_info->head;
 
 		/* Check error status: if okay then remember grant handle. */
-		newerr = (++gop)->status;
+		do {
+			newerr = (++gop)->status;
+			if (newerr)
+				break;
+			peek = netbk->pending_ring[pending_index(++head)];
+		} while (netbk->pending_tx_info[peek].head
+			 == INVALID_PENDING_RING_IDX);
+
 		if (likely(!newerr)) {
 			/* Had a previous error? Invalidate this fragment. */
 			if (unlikely(err))
@@ -1267,11 +1418,12 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk
*netbk)
 	struct sk_buff *skb;
 	int ret;
 
-	while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS)
&&
+	while ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN
+		 < MAX_PENDING_REQS) &&
 		!list_empty(&netbk->net_schedule_list)) {
 		struct xenvif *vif;
 		struct xen_netif_tx_request txreq;
-		struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+		struct xen_netif_tx_request txfrags[max_skb_slots];
 		struct page *page;
 		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
 		u16 pending_idx;
@@ -1332,7 +1484,8 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk
*netbk)
 				continue;
 		}
 
-		ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do);
+		ret = netbk_count_requests(vif, &txreq, idx,
+					   txfrags, work_to_do);
 		if (unlikely(ret < 0))
 			continue;
 
@@ -1359,7 +1512,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk
*netbk)
 		pending_idx = netbk->pending_ring[index];
 
 		data_len = (txreq.size > PKT_PROT_LEN &&
-			    ret < MAX_SKB_FRAGS) ?
+			    ret < XEN_NETIF_NR_SLOTS_MIN) ?
 			PKT_PROT_LEN : txreq.size;
 
 		skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
@@ -1409,6 +1562,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk
*netbk)
 		memcpy(&netbk->pending_tx_info[pending_idx].req,
 		       &txreq, sizeof(txreq));
 		netbk->pending_tx_info[pending_idx].vif = vif;
+		netbk->pending_tx_info[pending_idx].head = index;
 		*((u16 *)skb->data) = pending_idx;
 
 		__skb_put(skb, data_len);
@@ -1539,7 +1693,10 @@ static void xen_netbk_idx_release(struct xen_netbk
*netbk, u16 pending_idx,
 {
 	struct xenvif *vif;
 	struct pending_tx_info *pending_tx_info;
-	pending_ring_idx_t index;
+	pending_ring_idx_t head;
+	u16 peek; /* peek into next tx request */
+
+	BUG_ON(netbk->mmap_pages[pending_idx] == (void *)(~0UL));
 
 	/* Already complete? */
 	if (netbk->mmap_pages[pending_idx] == NULL)
@@ -1548,13 +1705,34 @@ static void xen_netbk_idx_release(struct xen_netbk
*netbk, u16 pending_idx,
 	pending_tx_info = &netbk->pending_tx_info[pending_idx];
 
 	vif = pending_tx_info->vif;
+	head = pending_tx_info->head;
 
-	make_tx_response(vif, &pending_tx_info->req, status);
+	BUG_ON(head == INVALID_PENDING_RING_IDX);
+	BUG_ON(netbk->pending_ring[pending_index(head)] != pending_idx);
 
-	index = pending_index(netbk->pending_prod++);
-	netbk->pending_ring[index] = pending_idx;
+	do {
+		pending_ring_idx_t index;
+		pending_ring_idx_t idx = pending_index(head);
+		u16 info_idx = netbk->pending_ring[idx];
 
-	xenvif_put(vif);
+		pending_tx_info = &netbk->pending_tx_info[info_idx];
+		make_tx_response(vif, &pending_tx_info->req, status);
+
+		/* Setting any number other than
+		 * INVALID_PENDING_RING_IDX indicates this slot is
+		 * starting a new packet / ending a previous packet.
+		 */
+		pending_tx_info->head = 0;
+
+		index = pending_index(netbk->pending_prod++);
+		netbk->pending_ring[index] = netbk->pending_ring[info_idx];
+
+		xenvif_put(vif);
+
+		peek = netbk->pending_ring[pending_index(++head)];
+
+	} while (netbk->pending_tx_info[peek].head
+		 == INVALID_PENDING_RING_IDX);
 
 	netbk->mmap_pages[pending_idx]->mapping = 0;
 	put_page(netbk->mmap_pages[pending_idx]);
@@ -1613,7 +1791,8 @@ static inline int rx_work_todo(struct xen_netbk *netbk)
 static inline int tx_work_todo(struct xen_netbk *netbk)
 {
 
-	if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS)
&&
+	if (((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN)
+	     < MAX_PENDING_REQS) &&
 			!list_empty(&netbk->net_schedule_list))
 		return 1;
 
diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
index 9dfc120..e829a09 100644
--- a/include/xen/interface/io/netif.h
+++ b/include/xen/interface/io/netif.h
@@ -13,6 +13,24 @@
 #include <xen/interface/grant_table.h>
 
 /*
+ * Older implementation of Xen network frontend / backend has an
+ * implicit dependency on the MAX_SKB_FRAGS as the maximum number of
+ * ring slots a skb can use. Netfront / netback may not work as
+ * expected when frontend and backend have different MAX_SKB_FRAGS.
+ *
+ * A better approach is to add mechanism for netfront / netback to
+ * negotiate this value. However we cannot fix all possible
+ * frontends, so we need to define a value which states the minimum
+ * slots backend must support.
+ *
+ * The minimum value derives from older Linux kernel''s MAX_SKB_FRAGS
+ * (18), which is proved to work with most frontends. Any new backend
+ * which doesn''t negotiate with frontend should expect frontend to
+ * send a valid packet using slots up to this value.
+ */
+#define XEN_NETIF_NR_SLOTS_MIN 18
+
+/*
  * Notifications after enqueuing any type of message should be conditional on
  * the appropriate req_event or rsp_event field in the shared ring.
  * If the client sends notification for rx requests then it should specify
-- 
1.7.10.4

Ian Campbell

2013-Apr-12 15:35 UTC

head link

Re: [PATCH V4 6/7] xen-netback: coalesce slots in TX path and fix regressions

On Fri, 2013-04-12 at 15:24 +0100, Wei Liu wrote:> +/*
> + * This is the maximum slots a skb can have. If a guest sends a skb
> + * which exceeds this limit it is considered malicious.
> + */
> +#define MAX_SKB_SLOTS_DEFAULT 20
> +static unsigned int max_skb_slots = MAX_SKB_SLOTS_DEFAULT;
> +
> +static int max_skb_slots_set(const char *val, const struct kernel_param
*kp)
> +{
> +       int ret;
> +       unsigned int param = 0;
> +
> +       ret = kstrtouint(val, 10, &param);
> +
> +       if (ret < 0 || param < XEN_NETIF_NR_SLOTS_MIN)
> +               return -EINVAL;
> +
> +       max_skb_slots = param;
> +
> +       return 0;
> +}
> +
> +static __moduleparam_const struct kernel_param_ops max_skb_slots_param_ops
= {
> +       .set = max_skb_slots_set,
> +       .get = param_get_uint,
> +};
> +
> +module_param_cb(max_skb_slots, &max_skb_slots_param_ops,
> +               &max_skb_slots, 0444);
Is all this infrastructure instead of module_param_int just so we can
check XEN_NETIF_NR_SLOTS_MIN? I''m inclined to suggest that if an admin
wants to set a smaller slot limit then they get to keep the pieces.

Or if you really want to check it then you could check+log/reject in the
module init function.
> +
> +typedef unsigned int pending_ring_idx_t;
> +#define INVALID_PENDING_RING_IDX (~0U)
> +
>  struct pending_tx_info {
> -       struct xen_netif_tx_request req;
> +       struct xen_netif_tx_request req; /* coalesced tx request  */
>         struct xenvif *vif;
> +       pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX
> +                                 * if it is head of one or more tx
> +                                 * reqs
> +                                 */
>  };
> -typedef unsigned int pending_ring_idx_t;
> 
>  struct netbk_rx_meta {
>         int id;
> @@ -102,7 +138,11 @@ struct xen_netbk {
>         atomic_t netfront_count;
> 
>         struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
> -       struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
> +       /* Coalescing tx requests before copying makes number of grant
> +        * copy ops greater of equal to number of slots required. In                              ^or
> +        * worst case a tx request consumes 2 gnttab_copy.
I''m happy with this as an upper bound but can it be made smaller?

For example there are at most MAX_PENDING_REQS on the ring, but we are
filling MAX_SKB_FRAGS with that data, therefore only MAX_SKB_FRAGS (-1?)
or those requests can cross a frag boundary and therefore the actual max
is MAX_PENDING_REQS+MAX_SKB_FRAGS.

Is that logic right? Perhaps need to account for data going into the
head too with another +N?
> +        */
> +       struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
> 
>         u16 pending_ring[MAX_PENDING_REQS];
> [...]
> 
> -               memcpy(txp, RING_GET_REQUEST(&vif->tx, cons +
frags),
> +               /* Xen network protocol had implicit dependency on
> +                * MAX_SKB_FRAGS. XEN_NETIF_NR_SLOTS_MIN is set to the
> +                * historical MAX_SKB_FRAGS value 18 to honor the same
> +                * behavior as before. Any packet using more than 18
> +                * slots but less than max_skb_slots slots is dropped
> +                */
It seems a bit odd not to accept such a thing if the local network stack
can cope with it but I suppose the intention here is to maintain the
historical status quo to reduce the problem space when we imminently
implement proper negotiation between front- and backend about the number
of slots they can handle?
> +               if (!drop_err && slots >=
XEN_NETIF_NR_SLOTS_MIN) {
> +                       if (net_ratelimit())
> +                               netdev_dbg(vif->dev,
> +                                          "Too many slots (%d),
dropping packet\n",
> +                                          slots);
Could log the limits here?
> +                       drop_err = -E2BIG;
> +               }
> +
> +               memcpy(txp, RING_GET_REQUEST(&vif->tx, cons +
slots),
> @@ -1038,11 +1179,21 @@ static int xen_netbk_tx_check_gop(struct xen_netbk
*netbk,
> 
>         for (i = start; i < nr_frags; i++) {
>                 int j, newerr;
> +               pending_ring_idx_t head;
> 
>                 pending_idx =
frag_get_pending_idx(&shinfo->frags[i]);
> +               tx_info = &netbk->pending_tx_info[pending_idx];
> +               head = tx_info->head;
> 
>                 /* Check error status: if okay then remember grant handle.
*/
> -               newerr = (++gop)->status;
> +               do {
> +                       newerr = (++gop)->status;
> +                       if (newerr)
> +                               break;
> +                       peek =
netbk->pending_ring[pending_index(++head)];
> +               } while (netbk->pending_tx_info[peek].head
> +                        == INVALID_PENDING_RING_IDX);
The 80 column limit is a soft one (and I think its greater nowadays
anyhow) and in cases like this the "cure" is worse than the disease,
at
least in IMHO...

You are using INVALID_PENDING_RING_IDX as an indication of further
chaining, so the naming is a little counter intuitive. I can''t think of
a name I like (something with "continuation" in it?) but perhaps a
helper function pending_tx_is_head(netbk, peek) or something would make
it read more clearly?
> +
>                 if (likely(!newerr)) {
>                         /* Had a previous error? Invalidate this fragment.
*/
>                         if (unlikely(err))

Wei Liu

2013-Apr-14 16:15 UTC

head link

Re: [PATCH V4 6/7] xen-netback: coalesce slots in TX path and fix regressions

On Fri, Apr 12, 2013 at 04:35:44PM +0100, Ian Campbell wrote:
[...]> > +module_param_cb(max_skb_slots, &max_skb_slots_param_ops,
> > +               &max_skb_slots, 0444);
> 
> Is all this infrastructure instead of module_param_int just so we can
> check XEN_NETIF_NR_SLOTS_MIN? I''m inclined to suggest that if an
admin
> wants to set a smaller slot limit then they get to keep the pieces.
> 
> Or if you really want to check it then you could check+log/reject in the
> module init function.
> 
I will go for the latter one. :-)
> > 
[...]> >         struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
> > -       struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
> > +       /* Coalescing tx requests before copying makes number of grant
> > +        * copy ops greater of equal to number of slots required. In
>                               ^or
> 
> > +        * worst case a tx request consumes 2 gnttab_copy.
> 
> I''m happy with this as an upper bound but can it be made smaller?
> 
> For example there are at most MAX_PENDING_REQS on the ring, but we are
> filling MAX_SKB_FRAGS with that data, therefore only MAX_SKB_FRAGS (-1?)
> or those requests can cross a frag boundary and therefore the actual max
> is MAX_PENDING_REQS+MAX_SKB_FRAGS.
> 
> Is that logic right? Perhaps need to account for data going into the
> head too with another +N?
> 
I''m afraid this is not the case. Vif has a ring of size
MAX_PENDING_REQS, but that ring might contain multiple skbs, so the
statement "we are filling MAX_SKB_FRAGS with that data"
doesn''t stand.
> > +        */
> > +       struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
> > 
> >         u16 pending_ring[MAX_PENDING_REQS];
> > 
> [...]
> 
> > 
> > -               memcpy(txp, RING_GET_REQUEST(&vif->tx, cons +
frags),
> > +               /* Xen network protocol had implicit dependency on
> > +                * MAX_SKB_FRAGS. XEN_NETIF_NR_SLOTS_MIN is set to the
> > +                * historical MAX_SKB_FRAGS value 18 to honor the same
> > +                * behavior as before. Any packet using more than 18
> > +                * slots but less than max_skb_slots slots is dropped
> > +                */
> 
> It seems a bit odd not to accept such a thing if the local network stack
> can cope with it but I suppose the intention here is to maintain the
> historical status quo to reduce the problem space when we imminently
> implement proper negotiation between front- and backend about the number
> of slots they can handle?
> 
Yes, this behavior will be altered once we have mechanism to negotiate.


Wei.

Xen devel - Apr 2013 - [PATCH V4 6/7] xen-netback: coalesce slots in TX path and fix regressions

[PATCH V4 6/7] xen-netback: coalesce slots in TX path and fix regressions

Re: [PATCH V4 6/7] xen-netback: coalesce slots in TX path and fix regressions

Re: [PATCH V4 6/7] xen-netback: coalesce slots in TX path and fix regressions