Jason Wang
2018-Mar-02  09:29 UTC
[PATCH net V2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
XDP_REDIRECT support for mergeable buffer was removed since commit
7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable()
case"). This is because we don't reserve enough tailroom for struct
skb_shared_info which breaks XDP assumption. So this patch fixes this
by reserving enough tailroom and using fixed size of rx buffer.
Signed-off-by: Jason Wang <jasowang at redhat.com>
---
Changes from V1:
- do not add duplicated tracepoint when redirection fails
---
 drivers/net/virtio_net.c | 54 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 42 insertions(+), 12 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9bb9e56..426dcf7 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -504,6 +504,7 @@ static struct page *xdp_linearize_page(struct receive_queue
*rq,
 	page_off += *len;
 
 	while (--*num_buf) {
+		int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 		unsigned int buflen;
 		void *buf;
 		int off;
@@ -518,7 +519,7 @@ static struct page *xdp_linearize_page(struct receive_queue
*rq,
 		/* guard against a misconfigured or uncooperative backend that
 		 * is sending packet larger than the MTU.
 		 */
-		if ((page_off + buflen) > PAGE_SIZE) {
+		if ((page_off + buflen + tailroom) > PAGE_SIZE) {
 			put_page(p);
 			goto err_buf;
 		}
@@ -690,6 +691,7 @@ static struct sk_buff *receive_mergeable(struct net_device
*dev,
 	unsigned int truesize;
 	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
 	bool sent;
+	int err;
 
 	head_skb = NULL;
 
@@ -701,7 +703,12 @@ static struct sk_buff *receive_mergeable(struct net_device
*dev,
 		void *data;
 		u32 act;
 
-		/* This happens when rx buffer size is underestimated */
+		/* This happens when rx buffer size is underestimated
+		 * or headroom is not enough because of the buffer
+		 * was refilled before XDP is set. This should only
+		 * happen for the first several packets, so we don't
+		 * care much about its performance.
+		 */
 		if (unlikely(num_buf > 1 ||
 			     headroom < virtnet_get_headroom(vi))) {
 			/* linearize data for XDP */
@@ -736,9 +743,6 @@ static struct sk_buff *receive_mergeable(struct net_device
*dev,
 
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
 
-		if (act != XDP_PASS)
-			ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
-
 		switch (act) {
 		case XDP_PASS:
 			/* recalculate offset to account for any header
@@ -770,6 +774,18 @@ static struct sk_buff *receive_mergeable(struct net_device
*dev,
 				goto err_xdp;
 			rcu_read_unlock();
 			goto xdp_xmit;
+		case XDP_REDIRECT:
+			err = xdp_do_redirect(dev, &xdp, xdp_prog);
+			if (err) {
+				if (unlikely(xdp_page != page))
+					put_page(xdp_page);
+				goto err_xdp;
+			}
+			*xdp_xmit = true;
+			if (unlikely(xdp_page != page))
+				goto err_xdp;
+			rcu_read_unlock();
+			goto xdp_xmit;
 		default:
 			bpf_warn_invalid_xdp_action(act);
 		case XDP_ABORTED:
@@ -1013,13 +1029,18 @@ static int add_recvbuf_big(struct virtnet_info *vi,
struct receive_queue *rq,
 }
 
 static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
-					  struct ewma_pkt_len *avg_pkt_len)
+					  struct ewma_pkt_len *avg_pkt_len,
+					  unsigned int room)
 {
 	const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 	unsigned int len;
 
-	len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
+	if (room)
+		return PAGE_SIZE - room;
+
+	len = hdr_len +	clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
 				rq->min_buf_len, PAGE_SIZE - hdr_len);
+
 	return ALIGN(len, L1_CACHE_BYTES);
 }
 
@@ -1028,21 +1049,27 @@ static int add_recvbuf_mergeable(struct virtnet_info
*vi,
 {
 	struct page_frag *alloc_frag = &rq->alloc_frag;
 	unsigned int headroom = virtnet_get_headroom(vi);
+	unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
+	unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
 	char *buf;
 	void *ctx;
 	int err;
 	unsigned int len, hole;
 
-	len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len);
-	if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
+	/* Extra tailroom is needed to satisfy XDP's assumption. This
+	 * means rx frags coalescing won't work, but consider we've
+	 * disabled GSO for XDP, it won't be a big issue.
+	 */
+	len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
+	if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
 		return -ENOMEM;
 
 	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
 	buf += headroom; /* advance address leaving hole at front of pkt */
 	get_page(alloc_frag->page);
-	alloc_frag->offset += len + headroom;
+	alloc_frag->offset += len + room;
 	hole = alloc_frag->size - alloc_frag->offset;
-	if (hole < len + headroom) {
+	if (hole < len + room) {
 		/* To avoid internal fragmentation, if there is very likely not
 		 * enough space for another buffer, add the remaining space to
 		 * the current buffer.
@@ -2576,12 +2603,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct
netdev_rx_queue *queue,
 {
 	struct virtnet_info *vi = netdev_priv(queue->dev);
 	unsigned int queue_index = get_netdev_rx_queue_index(queue);
+	unsigned int headroom = virtnet_get_headroom(vi);
+	unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
 	struct ewma_pkt_len *avg;
 
 	BUG_ON(queue_index >= vi->max_queue_pairs);
 	avg = &vi->rq[queue_index].mrg_avg_pkt_len;
 	return sprintf(buf, "%u\n",
-		       get_mergeable_buf_len(&vi->rq[queue_index], avg));
+		       get_mergeable_buf_len(&vi->rq[queue_index], avg,
+				       SKB_DATA_ALIGN(headroom + tailroom)));
 }
 
 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute -- 
2.7.4
Jesper Dangaard Brouer
2018-Mar-02  16:07 UTC
[PATCH net V2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
On Fri, 2 Mar 2018 17:29:14 +0800 Jason Wang <jasowang at redhat.com> wrote:> XDP_REDIRECT support for mergeable buffer was removed since commit > 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable() > case"). This is because we don't reserve enough tailroom for struct > skb_shared_info which breaks XDP assumption. So this patch fixes this > by reserving enough tailroom and using fixed size of rx buffer. > > Signed-off-by: Jason Wang <jasowang at redhat.com> > --- > Changes from V1: > - do not add duplicated tracepoint when redirection failsAcked-by: Jesper Dangaard Brouer <brouer at redhat.com> I gave it a quick spin on my testlab, and cpumap seems to work/not-crash now (if I managed to turn back config to receive_mergeable() correctly ;-)). -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat LinkedIn: http://www.linkedin.com/in/brouer
Michael S. Tsirkin
2018-Mar-02  17:36 UTC
[PATCH net V2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
On Fri, Mar 02, 2018 at 05:29:14PM +0800, Jason Wang wrote:> XDP_REDIRECT support for mergeable buffer was removed since commit > 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable() > case"). This is because we don't reserve enough tailroom for struct > skb_shared_info which breaks XDP assumption. So this patch fixes this > by reserving enough tailroom and using fixed size of rx buffer. > > Signed-off-by: Jason Wang <jasowang at redhat.com>Acked-by: Michael S. Tsirkin <mst at redhat.com> I think the next incremental step is to look at splitting out fast path XDP processing to a separate set of functions.> --- > Changes from V1: > - do not add duplicated tracepoint when redirection fails > --- > drivers/net/virtio_net.c | 54 +++++++++++++++++++++++++++++++++++++----------- > 1 file changed, 42 insertions(+), 12 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 9bb9e56..426dcf7 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -504,6 +504,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, > page_off += *len; > > while (--*num_buf) { > + int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); > unsigned int buflen; > void *buf; > int off; > @@ -518,7 +519,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, > /* guard against a misconfigured or uncooperative backend that > * is sending packet larger than the MTU. > */ > - if ((page_off + buflen) > PAGE_SIZE) { > + if ((page_off + buflen + tailroom) > PAGE_SIZE) { > put_page(p); > goto err_buf; > } > @@ -690,6 +691,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > unsigned int truesize; > unsigned int headroom = mergeable_ctx_to_headroom(ctx); > bool sent; > + int err; > > head_skb = NULL; > > @@ -701,7 +703,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > void *data; > u32 act; > > - /* This happens when rx buffer size is underestimated */ > + /* This happens when rx buffer size is underestimated > + * or headroom is not enough because of the buffer > + * was refilled before XDP is set. This should only > + * happen for the first several packets, so we don't > + * care much about its performance. > + */ > if (unlikely(num_buf > 1 || > headroom < virtnet_get_headroom(vi))) { > /* linearize data for XDP */ > @@ -736,9 +743,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > > act = bpf_prog_run_xdp(xdp_prog, &xdp); > > - if (act != XDP_PASS) > - ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); > - > switch (act) { > case XDP_PASS: > /* recalculate offset to account for any header > @@ -770,6 +774,18 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > goto err_xdp; > rcu_read_unlock(); > goto xdp_xmit; > + case XDP_REDIRECT: > + err = xdp_do_redirect(dev, &xdp, xdp_prog); > + if (err) { > + if (unlikely(xdp_page != page)) > + put_page(xdp_page); > + goto err_xdp; > + } > + *xdp_xmit = true; > + if (unlikely(xdp_page != page)) > + goto err_xdp; > + rcu_read_unlock(); > + goto xdp_xmit; > default: > bpf_warn_invalid_xdp_action(act); > case XDP_ABORTED: > @@ -1013,13 +1029,18 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, > } > > static unsigned int get_mergeable_buf_len(struct receive_queue *rq, > - struct ewma_pkt_len *avg_pkt_len) > + struct ewma_pkt_len *avg_pkt_len, > + unsigned int room) > { > const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > unsigned int len; > > - len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), > + if (room) > + return PAGE_SIZE - room; > + > + len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), > rq->min_buf_len, PAGE_SIZE - hdr_len); > + > return ALIGN(len, L1_CACHE_BYTES); > } > > @@ -1028,21 +1049,27 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, > { > struct page_frag *alloc_frag = &rq->alloc_frag; > unsigned int headroom = virtnet_get_headroom(vi); > + unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; > + unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); > char *buf; > void *ctx; > int err; > unsigned int len, hole; > > - len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len); > - if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp))) > + /* Extra tailroom is needed to satisfy XDP's assumption. This > + * means rx frags coalescing won't work, but consider we've > + * disabled GSO for XDP, it won't be a big issue. > + */ > + len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); > + if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) > return -ENOMEM; > > buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; > buf += headroom; /* advance address leaving hole at front of pkt */ > get_page(alloc_frag->page); > - alloc_frag->offset += len + headroom; > + alloc_frag->offset += len + room; > hole = alloc_frag->size - alloc_frag->offset; > - if (hole < len + headroom) { > + if (hole < len + room) { > /* To avoid internal fragmentation, if there is very likely not > * enough space for another buffer, add the remaining space to > * the current buffer. > @@ -2576,12 +2603,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, > { > struct virtnet_info *vi = netdev_priv(queue->dev); > unsigned int queue_index = get_netdev_rx_queue_index(queue); > + unsigned int headroom = virtnet_get_headroom(vi); > + unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; > struct ewma_pkt_len *avg; > > BUG_ON(queue_index >= vi->max_queue_pairs); > avg = &vi->rq[queue_index].mrg_avg_pkt_len; > return sprintf(buf, "%u\n", > - get_mergeable_buf_len(&vi->rq[queue_index], avg)); > + get_mergeable_buf_len(&vi->rq[queue_index], avg, > + SKB_DATA_ALIGN(headroom + tailroom))); > } > > static struct rx_queue_attribute mergeable_rx_buffer_size_attribute > -- > 2.7.4
David Miller
2018-Mar-04  23:38 UTC
[PATCH net V2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
From: Jason Wang <jasowang at redhat.com> Date: Fri, 2 Mar 2018 17:29:14 +0800> XDP_REDIRECT support for mergeable buffer was removed since commit > 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable() > case"). This is because we don't reserve enough tailroom for struct > skb_shared_info which breaks XDP assumption. So this patch fixes this > by reserving enough tailroom and using fixed size of rx buffer. > > Signed-off-by: Jason Wang <jasowang at redhat.com> > --- > Changes from V1: > - do not add duplicated tracepoint when redirection failsApplied to net-next, thanks Jason.
Jason Wang
2018-Mar-05  02:39 UTC
[PATCH net V2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
On 2018?03?03? 00:07, Jesper Dangaard Brouer wrote:> On Fri, 2 Mar 2018 17:29:14 +0800 > Jason Wang <jasowang at redhat.com> wrote: > >> XDP_REDIRECT support for mergeable buffer was removed since commit >> 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable() >> case"). This is because we don't reserve enough tailroom for struct >> skb_shared_info which breaks XDP assumption. So this patch fixes this >> by reserving enough tailroom and using fixed size of rx buffer. >> >> Signed-off-by: Jason Wang <jasowang at redhat.com> >> --- >> Changes from V1: >> - do not add duplicated tracepoint when redirection fails > Acked-by: Jesper Dangaard Brouer <brouer at redhat.com> > > I gave it a quick spin on my testlab, and cpumap seems to > work/not-crash now (if I managed to turn back config to > receive_mergeable() correctly ;-)). >Thanks for the testing and reviewing.
Jason Wang
2018-Mar-05  02:41 UTC
[PATCH net V2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
On 2018?03?03? 01:36, Michael S. Tsirkin wrote:> On Fri, Mar 02, 2018 at 05:29:14PM +0800, Jason Wang wrote: >> XDP_REDIRECT support for mergeable buffer was removed since commit >> 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable() >> case"). This is because we don't reserve enough tailroom for struct >> skb_shared_info which breaks XDP assumption. So this patch fixes this >> by reserving enough tailroom and using fixed size of rx buffer. >> >> Signed-off-by: Jason Wang<jasowang at redhat.com> > Acked-by: Michael S. Tsirkin<mst at redhat.com> > > I think the next incremental step is to look at splitting > out fast path XDP processing to a separate set of functions. >Let me try (probably after 1.1 stuffs). Thanks
Jason Wang
2018-Mar-05  02:43 UTC
[PATCH net V2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
On 2018?03?05? 07:38, David Miller wrote:> From: Jason Wang <jasowang at redhat.com> > Date: Fri, 2 Mar 2018 17:29:14 +0800 > >> XDP_REDIRECT support for mergeable buffer was removed since commit >> 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable() >> case"). This is because we don't reserve enough tailroom for struct >> skb_shared_info which breaks XDP assumption. So this patch fixes this >> by reserving enough tailroom and using fixed size of rx buffer. >> >> Signed-off-by: Jason Wang <jasowang at redhat.com> >> --- >> Changes from V1: >> - do not add duplicated tracepoint when redirection fails > Applied to net-next, thanks Jason.Hi David, Consider the change is not large, any chance to make it for -net to keep XDP redirection work? Thanks
Apparently Analagous Threads
- [PATCH net V2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
- [PATCH net-next 0/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
- [PATCH net-next 0/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
- [PATCH net] virtio-net: re enable XDP_REDIRECT for mergeable buffer
- [PATCH net] virtio-net: re enable XDP_REDIRECT for mergeable buffer