Jason Wang
2018-Mar-01 03:19 UTC
[PATCH net-next 0/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
Hi: This series tries to re-enable XDP_REDIRECT for mergeable buffer which was removed since commit 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable() case"). Main concerns are: - not enough tailroom was reserved which breaks cpumap - complex logic like EWMA and linearizing during XDP processing Fix those by: - reserve enough tailroom during refill - disable EWMA and use fixed size of rx buffer - drop linearizing logic and offload it to generic XDP routine, this could happen only when the buffer were refilled before XDP set, so we could simply ignore the negative performance impact. Please review. Thanks Jason Wang (2): virtio-net: re enable XDP_REDIRECT for mergeable buffer virtio-net: simplify XDP handling in small buffer drivers/net/virtio_net.c | 186 ++++++++++++++++++----------------------------- 1 file changed, 70 insertions(+), 116 deletions(-) -- 2.7.4
Jason Wang
2018-Mar-01 03:19 UTC
[PATCH net-next 1/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
XDP_REDIRECT support for mergeable buffer was removed since commit 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable() case"). This is because we don't reserve enough tailroom for struct skb_shared_info which breaks XDP assumption. Other complaints are, the complex linearize logic and EWMA estimation may increase the possibility of linearizing. Signed-off-by: Jason Wang <jasowang at redhat.com> --- drivers/net/virtio_net.c | 107 +++++++++++++++++++++++++++++------------------ 1 file changed, 67 insertions(+), 40 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9bb9e56..81190ba 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -537,6 +537,26 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, return NULL; } +static struct sk_buff *virtnet_skb_xdp(struct receive_queue *rq, + struct sk_buff *skb) +{ + struct bpf_prog *xdp_prog; + int ret; + + rcu_read_lock(); + xdp_prog = rcu_dereference(rq->xdp_prog); + if (xdp_prog) { + ret = do_xdp_generic(xdp_prog, skb); + if (ret != XDP_PASS) { + rcu_read_unlock(); + return NULL; + } + } + rcu_read_unlock(); + + return skb; +} + static struct sk_buff *receive_small(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, @@ -689,31 +709,30 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct bpf_prog *xdp_prog; unsigned int truesize; unsigned int headroom = mergeable_ctx_to_headroom(ctx); - bool sent; + bool sent, skb_xdp = false; + int err; head_skb = NULL; rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { - struct page *xdp_page; struct xdp_buff xdp; void *data; u32 act; - /* This happens when rx buffer size is underestimated */ + /* This happens when rx buffer size is underestimated + * or headroom is not enough because of the buffer + * was refilled before XDP is set. In both cases, + * for simplicity, we will offload them to generic + * XDP routine. This should only happen for the first + * several packets, so we don't care much about its + * performance. + */ if (unlikely(num_buf > 1 || headroom < virtnet_get_headroom(vi))) { - /* linearize data for XDP */ - xdp_page = xdp_linearize_page(rq, &num_buf, - page, offset, - VIRTIO_XDP_HEADROOM, - &len); - if (!xdp_page) - goto err_xdp; - offset = VIRTIO_XDP_HEADROOM; - } else { - xdp_page = page; + skb_xdp = true; + goto skb_xdp; } /* Transient failure which in theory could occur if @@ -727,7 +746,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, /* Allow consuming headroom but reserve enough space to push * the descriptor on if we get an XDP_TX return code. */ - data = page_address(xdp_page) + offset; + data = page_address(page) + offset; xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; xdp.data = data + vi->hdr_len; xdp_set_data_meta_invalid(&xdp); @@ -736,9 +755,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, act = bpf_prog_run_xdp(xdp_prog, &xdp); - if (act != XDP_PASS) - ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); - switch (act) { case XDP_PASS: /* recalculate offset to account for any header @@ -746,28 +762,22 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, * skb and avoid using offset */ offset = xdp.data - - page_address(xdp_page) - vi->hdr_len; - - /* We can only create skb based on xdp_page. */ - if (unlikely(xdp_page != page)) { - rcu_read_unlock(); - put_page(page); - head_skb = page_to_skb(vi, rq, xdp_page, - offset, len, PAGE_SIZE); - return head_skb; - } + page_address(page) - vi->hdr_len; break; case XDP_TX: sent = __virtnet_xdp_xmit(vi, &xdp); if (unlikely(!sent)) { trace_xdp_exception(vi->dev, xdp_prog, act); - if (unlikely(xdp_page != page)) - put_page(xdp_page); goto err_xdp; } *xdp_xmit = true; - if (unlikely(xdp_page != page)) + rcu_read_unlock(); + goto xdp_xmit; + case XDP_REDIRECT: + err = xdp_do_redirect(dev, &xdp, xdp_prog); + if (err) goto err_xdp; + *xdp_xmit = true; rcu_read_unlock(); goto xdp_xmit; default: @@ -775,13 +785,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, case XDP_ABORTED: trace_xdp_exception(vi->dev, xdp_prog, act); case XDP_DROP: - if (unlikely(xdp_page != page)) - __free_pages(xdp_page, 0); goto err_xdp; } } rcu_read_unlock(); +skb_xdp: truesize = mergeable_ctx_to_truesize(ctx); if (unlikely(len > truesize)) { pr_debug("%s: rx error: len %u exceeds truesize %lu\n", @@ -848,7 +857,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } } - ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); + if (skb_xdp) + head_skb = virtnet_skb_xdp(rq, head_skb); + else + ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); + return head_skb; err_xdp: @@ -1013,13 +1026,18 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, } static unsigned int get_mergeable_buf_len(struct receive_queue *rq, - struct ewma_pkt_len *avg_pkt_len) + struct ewma_pkt_len *avg_pkt_len, + unsigned int room) { const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); unsigned int len; - len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), + if (room) + return PAGE_SIZE - room; + + len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), rq->min_buf_len, PAGE_SIZE - hdr_len); + return ALIGN(len, L1_CACHE_BYTES); } @@ -1028,21 +1046,27 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, { struct page_frag *alloc_frag = &rq->alloc_frag; unsigned int headroom = virtnet_get_headroom(vi); + unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; + unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); char *buf; void *ctx; int err; unsigned int len, hole; - len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len); - if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp))) + /* Extra tailroom is needed to satisfy XDP's assumption. This + * means rx frags coalescing won't work, but consider we've + * disabled GSO for XDP, it won't be a big issue. + */ + len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); + if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) return -ENOMEM; buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; buf += headroom; /* advance address leaving hole at front of pkt */ get_page(alloc_frag->page); - alloc_frag->offset += len + headroom; + alloc_frag->offset += len + room; hole = alloc_frag->size - alloc_frag->offset; - if (hole < len + headroom) { + if (hole < len + room) { /* To avoid internal fragmentation, if there is very likely not * enough space for another buffer, add the remaining space to * the current buffer. @@ -2576,12 +2600,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, { struct virtnet_info *vi = netdev_priv(queue->dev); unsigned int queue_index = get_netdev_rx_queue_index(queue); + unsigned int headroom = virtnet_get_headroom(vi); + unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; struct ewma_pkt_len *avg; BUG_ON(queue_index >= vi->max_queue_pairs); avg = &vi->rq[queue_index].mrg_avg_pkt_len; return sprintf(buf, "%u\n", - get_mergeable_buf_len(&vi->rq[queue_index], avg)); + get_mergeable_buf_len(&vi->rq[queue_index], avg, + SKB_DATA_ALIGN(headroom + tailroom))); } static struct rx_queue_attribute mergeable_rx_buffer_size_attribute -- 2.7.4
Jason Wang
2018-Mar-01 03:19 UTC
[PATCH net-next 2/2] virtio-net: simplify XDP handling in small buffer
We used to do data copy through xdp_linearize_page() for the buffer without sufficient headroom, it brings extra complexity without helping for the performance. So this patch remove it and switch to use generic XDP routine to handle this case. Signed-off-by: Jason Wang <jasowang at redhat.com> --- drivers/net/virtio_net.c | 93 ++++++------------------------------------------ 1 file changed, 10 insertions(+), 83 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 81190ba..3f14948 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -474,69 +474,6 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi) return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0; } -/* We copy the packet for XDP in the following cases: - * - * 1) Packet is scattered across multiple rx buffers. - * 2) Headroom space is insufficient. - * - * This is inefficient but it's a temporary condition that - * we hit right after XDP is enabled and until queue is refilled - * with large buffers with sufficient headroom - so it should affect - * at most queue size packets. - * Afterwards, the conditions to enable - * XDP should preclude the underlying device from sending packets - * across multiple buffers (num_buf > 1), and we make sure buffers - * have enough headroom. - */ -static struct page *xdp_linearize_page(struct receive_queue *rq, - u16 *num_buf, - struct page *p, - int offset, - int page_off, - unsigned int *len) -{ - struct page *page = alloc_page(GFP_ATOMIC); - - if (!page) - return NULL; - - memcpy(page_address(page) + page_off, page_address(p) + offset, *len); - page_off += *len; - - while (--*num_buf) { - unsigned int buflen; - void *buf; - int off; - - buf = virtqueue_get_buf(rq->vq, &buflen); - if (unlikely(!buf)) - goto err_buf; - - p = virt_to_head_page(buf); - off = buf - page_address(p); - - /* guard against a misconfigured or uncooperative backend that - * is sending packet larger than the MTU. - */ - if ((page_off + buflen) > PAGE_SIZE) { - put_page(p); - goto err_buf; - } - - memcpy(page_address(page) + page_off, - page_address(p) + off, buflen); - page_off += buflen; - put_page(p); - } - - /* Headroom does not contribute to packet length */ - *len = page_off - VIRTIO_XDP_HEADROOM; - return page; -err_buf: - __free_pages(page, 0); - return NULL; -} - static struct sk_buff *virtnet_skb_xdp(struct receive_queue *rq, struct sk_buff *skb) { @@ -573,8 +510,7 @@ static struct sk_buff *receive_small(struct net_device *dev, SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); struct page *page = virt_to_head_page(buf); unsigned int delta = 0; - struct page *xdp_page; - bool sent; + bool sent, skb_xdp = false; int err; len -= vi->hdr_len; @@ -590,25 +526,14 @@ static struct sk_buff *receive_small(struct net_device *dev, if (unlikely(hdr->hdr.gso_type)) goto err_xdp; + /* This happnes when headroom is not enough because + * the buffer was refilled before XDP is set. This + * only happen for several packets, for simplicity, + * offload them to generic XDP routine. + */ if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { - int offset = buf - page_address(page) + header_offset; - unsigned int tlen = len + vi->hdr_len; - u16 num_buf = 1; - - xdp_headroom = virtnet_get_headroom(vi); - header_offset = VIRTNET_RX_PAD + xdp_headroom; - headroom = vi->hdr_len + header_offset; - buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - xdp_page = xdp_linearize_page(rq, &num_buf, page, - offset, header_offset, - &tlen); - if (!xdp_page) - goto err_xdp; - - buf = page_address(xdp_page); - put_page(page); - page = xdp_page; + skb_xdp = true; + goto skb_xdp; } xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; @@ -650,6 +575,7 @@ static struct sk_buff *receive_small(struct net_device *dev, } rcu_read_unlock(); +skb_xdp: skb = build_skb(buf, buflen); if (!skb) { put_page(page); @@ -662,6 +588,7 @@ static struct sk_buff *receive_small(struct net_device *dev, memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); } /* keep zeroed vnet hdr since packet was changed by bpf */ + skb = virtnet_skb_xdp(rq, skb); err: return skb; -- 2.7.4
Jesper Dangaard Brouer
2018-Mar-01 08:02 UTC
[PATCH net-next 2/2] virtio-net: simplify XDP handling in small buffer
On Thu, 1 Mar 2018 11:19:05 +0800 Jason Wang <jasowang at redhat.com> wrote:> We used to do data copy through xdp_linearize_page() for the buffer > without sufficient headroom, it brings extra complexity without > helping for the performance. So this patch remove it and switch to use > generic XDP routine to handle this case.I don't like where this is going. I don't like intermixing the native XDP and generic XDP in this way, for several reasons: 1. XDP generic is not feature complete, e.g. cpumap will drop these packets. It might not be possible to implement some features, think of (AF_XDP) zero-copy. 2. This can easily cause out-of-order packets. 3. It makes it harder to troubleshoot, when diagnosing issues around #1, we have a hard time determining what path an XDP packet took (the xdp tracepoints doesn't know). [...]> @@ -590,25 +526,14 @@ static struct sk_buff *receive_small(struct net_device *dev, > if (unlikely(hdr->hdr.gso_type)) > goto err_xdp; > > + /* This happnes when headroom is not enough because > + * the buffer was refilled before XDP is set. This > + * only happen for several packets, for simplicity, > + * offload them to generic XDP routine.In my practical tests, I also saw that sometime my ping packets were traveling this code-path, even after a long time when XDP were attached. This worries me a bit, for troubleshooting purposes... as this can give a strange user experience given point #1.> + */ > if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { > - int offset = buf - page_address(page) + header_offset; > - unsigned int tlen = len + vi->hdr_len; > - u16 num_buf = 1;-- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat LinkedIn: http://www.linkedin.com/in/brouer
Jesper Dangaard Brouer
2018-Mar-01 08:41 UTC
[PATCH net-next 1/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
On Thu, 1 Mar 2018 11:19:04 +0800 Jason Wang <jasowang at redhat.com> wrote:> XDP_REDIRECT support for mergeable buffer was removed since commit > 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable() > case"). This is because we don't reserve enough tailroom for struct > skb_shared_info which breaks XDP assumption. Other complaints are, the > complex linearize logic and EWMA estimation may increase the > possibility of linearizing.This patch also have the intermixing issues, I mentioned for patch 2/2. On Thu, 1 Mar 2018 09:02:06 +0100 Jesper Dangaard Brouer <brouer at redhat.com> wrote:> 1. XDP generic is not feature complete, e.g. cpumap will drop these > packets. It might not be possible to implement some features, think > of (AF_XDP) zero-copy. > > 2. This can easily cause out-of-order packets. > > 3. It makes it harder to troubleshoot, when diagnosing issues > around #1, we have a hard time determining what path an XDP packet > took (the xdp tracepoints doesn't know).It is slightly better, as it is consistent in calling XDP-generic in the XDP_REDIRECT action, which an action under heavy development, here we want the freedom to develop in different code tempi. And some features might never be available in XDP-generic. Thus, when a feature is missing/broken it will be consistent for the user. The remaining question is how will a user know that XDP "mode" she is using? The user clearly loaded an XDP-native program, and expect the associated performance, but XDP_REDIRECT will be using the slow XDP-generic code path...> Signed-off-by: Jason Wang <jasowang at redhat.com> > --- > drivers/net/virtio_net.c | 107 +++++++++++++++++++++++++++++------------------ > 1 file changed, 67 insertions(+), 40 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 9bb9e56..81190ba 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c[...]> @@ -689,31 +709,30 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > struct bpf_prog *xdp_prog; > unsigned int truesize; > unsigned int headroom = mergeable_ctx_to_headroom(ctx); > - bool sent; > + bool sent, skb_xdp = false; > + int err; > > head_skb = NULL; > > rcu_read_lock(); > xdp_prog = rcu_dereference(rq->xdp_prog); > if (xdp_prog) { > - struct page *xdp_page; > struct xdp_buff xdp; > void *data; > u32 act; > > - /* This happens when rx buffer size is underestimated */ > + /* This happens when rx buffer size is underestimated > + * or headroom is not enough because of the buffer > + * was refilled before XDP is set. In both cases, > + * for simplicity, we will offload them to generic > + * XDP routine. This should only happen for the first > + * several packets, so we don't care much about its > + * performance. > + */ > if (unlikely(num_buf > 1 || > headroom < virtnet_get_headroom(vi))) {I think you also need to check the tailroom here? (AFAIK this is hidden in the len_to_ctx as the "truesize").> - /* linearize data for XDP */ > - xdp_page = xdp_linearize_page(rq, &num_buf, > - page, offset, > - VIRTIO_XDP_HEADROOM, > - &len); > - if (!xdp_page) > - goto err_xdp; > - offset = VIRTIO_XDP_HEADROOM; > - } else { > - xdp_page = page; > + skb_xdp = true; > + goto skb_xdp; > } > > /* Transient failure which in theory could occur if-- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat LinkedIn: http://www.linkedin.com/in/brouer
Jesper Dangaard Brouer
2018-Mar-01 09:10 UTC
[PATCH net-next 0/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
On Thu, 1 Mar 2018 11:19:03 +0800 Jason Wang <jasowang at redhat.com> wrote:> This series tries to re-enable XDP_REDIRECT for mergeable buffer which > was removed since commit 7324f5399b06 ("virtio_net: disable > XDP_REDIRECT in receive_mergeable() case"). Main concerns are: > > - not enough tailroom was reserved which breaks cpumapTo address this at a more fundamental level, I would suggest that we/you instead extend XDP to know it's buffers "frame" size/end. (The assumption use to be, xdp_buff->data_hard_start + PAGE_SIZE, but ixgbe+virtio_net broke that assumption). It should actually be fairly easy to implement: * Simply extend xdp_buff with a "data_hard_end" pointer. Now cpumap is more safe... instead of crashing, it can now know/see when it is safe to create an SKB using build_skb (could fallback to dev_alloc_skb). -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat LinkedIn: http://www.linkedin.com/in/brouer
Jason Wang
2018-Mar-01 09:23 UTC
[PATCH net-next 0/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
On 2018?03?01? 17:10, Jesper Dangaard Brouer wrote:> On Thu, 1 Mar 2018 11:19:03 +0800 > Jason Wang <jasowang at redhat.com> wrote: > >> This series tries to re-enable XDP_REDIRECT for mergeable buffer which >> was removed since commit 7324f5399b06 ("virtio_net: disable >> XDP_REDIRECT in receive_mergeable() case"). Main concerns are: >> >> - not enough tailroom was reserved which breaks cpumap > To address this at a more fundamental level, I would suggest that we/you > instead extend XDP to know it's buffers "frame" size/end. (The > assumption use to be, xdp_buff->data_hard_start + PAGE_SIZE, but > ixgbe+virtio_net broke that assumption). > > It should actually be fairly easy to implement: > * Simply extend xdp_buff with a "data_hard_end" pointer.Right, and then cpumap can warn and drop packets with insufficient tailroom. But it should be a patch on top of this I think. Thanks> > Now cpumap is more safe... instead of crashing, it can now know/see when > it is safe to create an SKB using build_skb (could fallback to > dev_alloc_skb). >
Michael S. Tsirkin
2018-Mar-01 13:36 UTC
[PATCH net-next 1/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
On Thu, Mar 01, 2018 at 11:19:04AM +0800, Jason Wang wrote:> XDP_REDIRECT support for mergeable buffer was removed since commit > 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable() > case"). This is because we don't reserve enough tailroom for struct > skb_shared_info which breaks XDP assumption. Other complaints are, the > complex linearize logic and EWMA estimation may increase the > possibility of linearizing. > > Signed-off-by: Jason Wang <jasowang at redhat.com>How about reposting just this patch for net? It's pretty small and this way we don't have broken redirect there. Probably keeping the linearize in here to reduce the amount of changes.> --- > drivers/net/virtio_net.c | 107 +++++++++++++++++++++++++++++------------------ > 1 file changed, 67 insertions(+), 40 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 9bb9e56..81190ba 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -537,6 +537,26 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, > return NULL; > } > > +static struct sk_buff *virtnet_skb_xdp(struct receive_queue *rq, > + struct sk_buff *skb) > +{ > + struct bpf_prog *xdp_prog; > + int ret; > + > + rcu_read_lock(); > + xdp_prog = rcu_dereference(rq->xdp_prog); > + if (xdp_prog) { > + ret = do_xdp_generic(xdp_prog, skb); > + if (ret != XDP_PASS) { > + rcu_read_unlock(); > + return NULL; > + } > + } > + rcu_read_unlock(); > + > + return skb; > +} > + > static struct sk_buff *receive_small(struct net_device *dev, > struct virtnet_info *vi, > struct receive_queue *rq, > @@ -689,31 +709,30 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > struct bpf_prog *xdp_prog; > unsigned int truesize; > unsigned int headroom = mergeable_ctx_to_headroom(ctx); > - bool sent; > + bool sent, skb_xdp = false; > + int err; > > head_skb = NULL; > > rcu_read_lock(); > xdp_prog = rcu_dereference(rq->xdp_prog); > if (xdp_prog) { > - struct page *xdp_page; > struct xdp_buff xdp; > void *data; > u32 act; > > - /* This happens when rx buffer size is underestimated */ > + /* This happens when rx buffer size is underestimated > + * or headroom is not enough because of the buffer > + * was refilled before XDP is set. In both cases, > + * for simplicity, we will offload them to generic > + * XDP routine. This should only happen for the first > + * several packets, so we don't care much about its > + * performance. > + */ > if (unlikely(num_buf > 1 || > headroom < virtnet_get_headroom(vi))) { > - /* linearize data for XDP */ > - xdp_page = xdp_linearize_page(rq, &num_buf, > - page, offset, > - VIRTIO_XDP_HEADROOM, > - &len); > - if (!xdp_page) > - goto err_xdp; > - offset = VIRTIO_XDP_HEADROOM; > - } else { > - xdp_page = page; > + skb_xdp = true; > + goto skb_xdp; > } > > /* Transient failure which in theory could occur if > @@ -727,7 +746,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > /* Allow consuming headroom but reserve enough space to push > * the descriptor on if we get an XDP_TX return code. > */ > - data = page_address(xdp_page) + offset; > + data = page_address(page) + offset; > xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; > xdp.data = data + vi->hdr_len; > xdp_set_data_meta_invalid(&xdp); > @@ -736,9 +755,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > > act = bpf_prog_run_xdp(xdp_prog, &xdp); > > - if (act != XDP_PASS) > - ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); > - > switch (act) { > case XDP_PASS: > /* recalculate offset to account for any header > @@ -746,28 +762,22 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > * skb and avoid using offset > */ > offset = xdp.data - > - page_address(xdp_page) - vi->hdr_len; > - > - /* We can only create skb based on xdp_page. */ > - if (unlikely(xdp_page != page)) { > - rcu_read_unlock(); > - put_page(page); > - head_skb = page_to_skb(vi, rq, xdp_page, > - offset, len, PAGE_SIZE); > - return head_skb; > - } > + page_address(page) - vi->hdr_len; > break; > case XDP_TX: > sent = __virtnet_xdp_xmit(vi, &xdp); > if (unlikely(!sent)) { > trace_xdp_exception(vi->dev, xdp_prog, act); > - if (unlikely(xdp_page != page)) > - put_page(xdp_page); > goto err_xdp; > } > *xdp_xmit = true; > - if (unlikely(xdp_page != page)) > + rcu_read_unlock(); > + goto xdp_xmit; > + case XDP_REDIRECT: > + err = xdp_do_redirect(dev, &xdp, xdp_prog); > + if (err) > goto err_xdp; > + *xdp_xmit = true; > rcu_read_unlock(); > goto xdp_xmit; > default: > @@ -775,13 +785,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > case XDP_ABORTED: > trace_xdp_exception(vi->dev, xdp_prog, act); > case XDP_DROP: > - if (unlikely(xdp_page != page)) > - __free_pages(xdp_page, 0); > goto err_xdp; > } > } > rcu_read_unlock(); > > +skb_xdp: > truesize = mergeable_ctx_to_truesize(ctx); > if (unlikely(len > truesize)) { > pr_debug("%s: rx error: len %u exceeds truesize %lu\n", > @@ -848,7 +857,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > } > } > > - ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); > + if (skb_xdp) > + head_skb = virtnet_skb_xdp(rq, head_skb); > + else > + ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); > + > return head_skb; > > err_xdp: > @@ -1013,13 +1026,18 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, > } > > static unsigned int get_mergeable_buf_len(struct receive_queue *rq, > - struct ewma_pkt_len *avg_pkt_len) > + struct ewma_pkt_len *avg_pkt_len, > + unsigned int room) > { > const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > unsigned int len; > > - len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), > + if (room) > + return PAGE_SIZE - room; > + > + len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), > rq->min_buf_len, PAGE_SIZE - hdr_len); > + > return ALIGN(len, L1_CACHE_BYTES); > } > > @@ -1028,21 +1046,27 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, > { > struct page_frag *alloc_frag = &rq->alloc_frag; > unsigned int headroom = virtnet_get_headroom(vi); > + unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; > + unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); > char *buf; > void *ctx; > int err; > unsigned int len, hole; > > - len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len); > - if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp))) > + /* Extra tailroom is needed to satisfy XDP's assumption. This > + * means rx frags coalescing won't work, but consider we've > + * disabled GSO for XDP, it won't be a big issue. > + */ > + len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room); > + if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp))) > return -ENOMEM; > > buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; > buf += headroom; /* advance address leaving hole at front of pkt */ > get_page(alloc_frag->page); > - alloc_frag->offset += len + headroom; > + alloc_frag->offset += len + room; > hole = alloc_frag->size - alloc_frag->offset; > - if (hole < len + headroom) { > + if (hole < len + room) { > /* To avoid internal fragmentation, if there is very likely not > * enough space for another buffer, add the remaining space to > * the current buffer. > @@ -2576,12 +2600,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, > { > struct virtnet_info *vi = netdev_priv(queue->dev); > unsigned int queue_index = get_netdev_rx_queue_index(queue); > + unsigned int headroom = virtnet_get_headroom(vi); > + unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; > struct ewma_pkt_len *avg; > > BUG_ON(queue_index >= vi->max_queue_pairs); > avg = &vi->rq[queue_index].mrg_avg_pkt_len; > return sprintf(buf, "%u\n", > - get_mergeable_buf_len(&vi->rq[queue_index], avg)); > + get_mergeable_buf_len(&vi->rq[queue_index], avg, > + SKB_DATA_ALIGN(headroom + tailroom))); > } > > static struct rx_queue_attribute mergeable_rx_buffer_size_attribute > -- > 2.7.4
Reasonably Related Threads
- [PATCH net-next 2/2] virtio-net: simplify XDP handling in small buffer
- [PATCH net-next 0/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
- [PATCH net-next 0/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
- [PATCH net-next 2/2] virtio-net: simplify XDP handling in small buffer
- [PATCH net-next 1/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer