Add support for XDP adjust head by allocating a 256B header region
that XDP programs can grow into. This is only enabled when a XDP
program is loaded.
In order to ensure that we do not have to unwind queue headroom push
queue setup below bpf_prog_add. It reads better to do a prog ref
unwind vs another queue setup call.
TBD merge with Jason Wang's fixes, do a bit more testing, note
big_packet support is removed by Jason as well.
Signed-off-by: John Fastabend <john.r.fastabend at intel.com>
---
drivers/net/virtio_net.c | 53 +++++++++++++++++++++++++++++++---------------
1 file changed, 36 insertions(+), 17 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 08327e0..1a93158 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -159,6 +159,9 @@ struct virtnet_info {
/* Ethtool settings */
u8 duplex;
u32 speed;
+
+ /* Headroom allocated in RX Queue */
+ unsigned int headroom;
};
struct padded_vnet_hdr {
@@ -392,6 +395,7 @@ static u32 do_xdp_prog(struct virtnet_info *vi,
else
hdr_padded_len = sizeof(struct padded_vnet_hdr);
+ xdp.data_hard_start = buf - vi->headroom;
xdp.data = buf + hdr_padded_len;
xdp.data_end = xdp.data + (len - vi->hdr_len);
@@ -430,9 +434,12 @@ static struct sk_buff *receive_big(struct net_device *dev,
void *buf,
unsigned int len)
{
- struct bpf_prog *xdp_prog;
struct page *page = buf;
+ struct bpf_prog *xdp_prog;
struct sk_buff *skb;
+ int offset;
+
+ offset = vi->headroom;
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
@@ -442,7 +449,7 @@ static struct sk_buff *receive_big(struct net_device *dev,
if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
goto err_xdp;
- act = do_xdp_prog(vi, rq, xdp_prog, page, 0, len);
+ act = do_xdp_prog(vi, rq, xdp_prog, page, offset, len);
switch (act) {
case XDP_PASS:
break;
@@ -456,7 +463,7 @@ static struct sk_buff *receive_big(struct net_device *dev,
}
rcu_read_unlock();
- skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
+ skb = page_to_skb(vi, rq, page, offset, len, PAGE_SIZE);
if (unlikely(!skb))
goto err;
@@ -797,10 +804,10 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct
receive_queue *rq,
/* rq->sg[0], rq->sg[1] share the same page */
/* a separated rq->sg[0] for header - required in case !any_header_sg */
- sg_set_buf(&rq->sg[0], p, vi->hdr_len);
+ sg_set_buf(&rq->sg[0], p, vi->hdr_len + vi->headroom);
/* rq->sg[1] for data packet, from offset */
- offset = sizeof(struct padded_vnet_hdr);
+ offset = vi->headroom + sizeof(struct padded_vnet_hdr);
sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
/* chain first in list head */
@@ -823,24 +830,27 @@ static unsigned int get_mergeable_buf_len(struct
ewma_pkt_len *avg_pkt_len)
return ALIGN(len, MERGEABLE_BUFFER_ALIGN);
}
-static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
+static int add_recvbuf_mergeable(struct virtnet_info *vi,
+ struct receive_queue *rq, gfp_t gfp)
{
struct page_frag *alloc_frag = &rq->alloc_frag;
+ unsigned int headroom = vi->headroom;
char *buf;
unsigned long ctx;
int err;
unsigned int len, hole;
len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len);
- if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
+ if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
return -ENOMEM;
buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
+ buf += headroom; /* advance address leaving hole at front of pkt */
ctx = mergeable_buf_to_ctx(buf, len);
get_page(alloc_frag->page);
- alloc_frag->offset += len;
+ alloc_frag->offset += len + headroom;
hole = alloc_frag->size - alloc_frag->offset;
- if (hole < len) {
+ if (hole < len + headroom) {
/* To avoid internal fragmentation, if there is very likely not
* enough space for another buffer, add the remaining space to
* the current buffer. This extra space is not included in
@@ -874,7 +884,7 @@ static bool try_fill_recv(struct virtnet_info *vi, struct
receive_queue *rq,
gfp |= __GFP_COLD;
do {
if (vi->mergeable_rx_bufs)
- err = add_recvbuf_mergeable(rq, gfp);
+ err = add_recvbuf_mergeable(vi, rq, gfp);
else if (vi->big_packets)
err = add_recvbuf_big(vi, rq, gfp);
else
@@ -1669,12 +1679,15 @@ static void virtnet_init_settings(struct net_device
*dev)
.set_settings = virtnet_set_settings,
};
+#define VIRTIO_XDP_HEADROOM 256
+
static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
{
unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr);
struct virtnet_info *vi = netdev_priv(dev);
struct bpf_prog *old_prog;
u16 xdp_qp = 0, curr_qp;
+ unsigned int old_hr;
int i, err;
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
@@ -1704,19 +1717,25 @@ static int virtnet_xdp_set(struct net_device *dev,
struct bpf_prog *prog)
return -ENOMEM;
}
+ old_hr = vi->headroom;
+ if (prog) {
+ prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+ vi->headroom = VIRTIO_XDP_HEADROOM;
+ } else {
+ vi->headroom = 0;
+ }
+
err = virtnet_set_queues(vi, curr_qp + xdp_qp);
if (err) {
dev_warn(&dev->dev, "XDP Device queue allocation
failure.\n");
+ vi->headroom = old_hr;
+ if (prog)
+ bpf_prog_sub(prog, vi->max_queue_pairs - 1);
return err;
}
- if (prog) {
- prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
- if (IS_ERR(prog)) {
- virtnet_set_queues(vi, curr_qp);
- return PTR_ERR(prog);
- }
- }
vi->xdp_queue_pairs = xdp_qp;
netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);