thr3ads.net - Virtualization - [PATCH net-next 2/5] virtio_net: Add page_pool support to improve performance [Jun 2023]

If this information is useful, please help other people find it:
Share via:

Xuan Zhuo

2023-May-31 03:10 UTC

[PATCH net-next 2/5] virtio_net: Add page_pool support to improve performance

On Mon, 29 May 2023 15:28:17 +0800, Liang Chen <liangchen.linux at
gmail.com> wrote:> On Sun, May 28, 2023 at 2:40?PM Michael S. Tsirkin <mst at
redhat.com> wrote:
> >
> > On Sat, May 27, 2023 at 08:35:01PM +0800, Liang Chen wrote:
> > > On Fri, May 26, 2023 at 2:51?PM Jason Wang <jasowang at
redhat.com> wrote:
> > > >
> > > > On Fri, May 26, 2023 at 1:46?PM Liang Chen
<liangchen.linux at gmail.com> wrote:
> > > > >
> > > > > The implementation at the moment uses one page per
packet in both the
> > > > > normal and XDP path.
> > > >
> > > > It's better to explain why we need a page pool and how
it can help the
> > > > performance.
> > > >
> > >
> > > Sure, I will include that on v2.
> > > > > In addition, introducing a module parameter to enable
> > > > > or disable the usage of page pool (disabled by
default).
> > > >
> > > > If page pool wins for most of the cases, any reason to
disable it by default?
> > > >
> > >
> > > Thank you for raising the point. It does make sense to enable it
by default.
> >
> > I'd like to see more benchmarks pls then, with a variety of packet
> > sizes, udp and tcp.
> >
>
> Sure, more benchmarks will be provided. Thanks.

I think so.

I did this, but I did not found any improve. So I gave up it.

Thanks.

>
>
> > > > >
> > > > > In single-core vm testing environments, it gives a
modest performance gain
> > > > > in the normal path.
> > > > >   Upstream codebase: 47.5 Gbits/sec
> > > > >   Upstream codebase + page_pool support: 50.2 Gbits/sec
> > > > >
> > > > > In multi-core vm testing environments, The most
significant performance
> > > > > gain is observed in XDP cpumap:
> > > > >   Upstream codebase: 1.38 Gbits/sec
> > > > >   Upstream codebase + page_pool support: 9.74 Gbits/sec
> > > >
> > > > Please show more details on the test. E.g which kinds of
tests have
> > > > you measured?
> > > >
> > > > Btw, it would be better to measure PPS as well.
> > > >
> > >
> > > Sure. It will be added on v2.
> > > > >
> > > > > With this foundation, we can further integrate page
pool fragmentation and
> > > > > DMA map/unmap support.
> > > > >
> > > > > Signed-off-by: Liang Chen <liangchen.linux at
gmail.com>
> > > > > ---
> > > > >  drivers/net/virtio_net.c | 188
++++++++++++++++++++++++++++++---------
> > > >
> > > > I believe we should make virtio-net to select
CONFIG_PAGE_POOL or do
> > > > the ifdef tricks at least.
> > > >
> > >
> > > Sure. it will be done on v2.
> > > > >  1 file changed, 146 insertions(+), 42 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/virtio_net.c
b/drivers/net/virtio_net.c
> > > > > index c5dca0d92e64..99c0ca0c1781 100644
> > > > > --- a/drivers/net/virtio_net.c
> > > > > +++ b/drivers/net/virtio_net.c
> > > > > @@ -31,6 +31,9 @@ module_param(csum, bool, 0444);
> > > > >  module_param(gso, bool, 0444);
> > > > >  module_param(napi_tx, bool, 0644);
> > > > >
> > > > > +static bool page_pool_enabled;
> > > > > +module_param(page_pool_enabled, bool, 0400);
> > > > > +
> > > > >  /* FIXME: MTU in config. */
> > > > >  #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN +
ETH_DATA_LEN)
> > > > >  #define GOOD_COPY_LEN  128
> > > > > @@ -159,6 +162,9 @@ struct receive_queue {
> > > > >         /* Chain pages by the private ptr. */
> > > > >         struct page *pages;
> > > > >
> > > > > +       /* Page pool */
> > > > > +       struct page_pool *page_pool;
> > > > > +
> > > > >         /* Average packet length for mergeable receive
buffers. */
> > > > >         struct ewma_pkt_len mrg_avg_pkt_len;
> > > > >
> > > > > @@ -459,6 +465,14 @@ static struct sk_buff
*virtnet_build_skb(void *buf, unsigned int buflen,
> > > > >         return skb;
> > > > >  }
> > > > >
> > > > > +static void virtnet_put_page(struct receive_queue *rq,
struct page *page)
> > > > > +{
> > > > > +       if (rq->page_pool)
> > > > > +              
page_pool_put_full_page(rq->page_pool, page, true);
> > > > > +       else
> > > > > +               put_page(page);
> > > > > +}
> > > > > +
> > > > >  /* Called from bottom half context */
> > > > >  static struct sk_buff *page_to_skb(struct virtnet_info
*vi,
> > > > >                                    struct receive_queue
*rq,
> > > > > @@ -555,7 +569,7 @@ static struct sk_buff
*page_to_skb(struct virtnet_info *vi,
> > > > >         hdr = skb_vnet_hdr(skb);
> > > > >         memcpy(hdr, hdr_p, hdr_len);
> > > > >         if (page_to_free)
> > > > > -               put_page(page_to_free);
> > > > > +               virtnet_put_page(rq, page_to_free);
> > > > >
> > > > >         return skb;
> > > > >  }
> > > > > @@ -802,7 +816,7 @@ static int virtnet_xdp_xmit(struct
net_device *dev,
> > > > >         return ret;
> > > > >  }
> > > > >
> > > > > -static void put_xdp_frags(struct xdp_buff *xdp)
> > > > > +static void put_xdp_frags(struct xdp_buff *xdp, struct
receive_queue *rq)
> > > > >  {
> > > >
> > > > rq could be fetched from xdp_rxq_info?
> > >
> > > Yeah, it has the queue_index there.
> > > >
> > > > >         struct skb_shared_info *shinfo;
> > > > >         struct page *xdp_page;
> > > > > @@ -812,7 +826,7 @@ static void put_xdp_frags(struct
xdp_buff *xdp)
> > > > >                 shinfo =
xdp_get_shared_info_from_buff(xdp);
> > > > >                 for (i = 0; i < shinfo->nr_frags;
i++) {
> > > > >                         xdp_page =
skb_frag_page(&shinfo->frags[i]);
> > > > > -                       put_page(xdp_page);
> > > > > +                       virtnet_put_page(rq, xdp_page);
> > > > >                 }
> > > > >         }
> > > > >  }
> > > > > @@ -903,7 +917,11 @@ static struct page
*xdp_linearize_page(struct receive_queue *rq,
> > > > >         if (page_off + *len + tailroom > PAGE_SIZE)
> > > > >                 return NULL;
> > > > >
> > > > > -       page = alloc_page(GFP_ATOMIC);
> > > > > +       if (rq->page_pool)
> > > > > +               page =
page_pool_dev_alloc_pages(rq->page_pool);
> > > > > +       else
> > > > > +               page = alloc_page(GFP_ATOMIC);
> > > > > +
> > > > >         if (!page)
> > > > >                 return NULL;
> > > > >
> > > > > @@ -926,21 +944,24 @@ static struct page
*xdp_linearize_page(struct receive_queue *rq,
> > > > >                  * is sending packet larger than the
MTU.
> > > > >                  */
> > > > >                 if ((page_off + buflen + tailroom) >
PAGE_SIZE) {
> > > > > -                       put_page(p);
> > > > > +                       virtnet_put_page(rq, p);
> > > > >                         goto err_buf;
> > > > >                 }
> > > > >
> > > > >                 memcpy(page_address(page) + page_off,
> > > > >                        page_address(p) + off, buflen);
> > > > >                 page_off += buflen;
> > > > > -               put_page(p);
> > > > > +               virtnet_put_page(rq, p);
> > > > >         }
> > > > >
> > > > >         /* Headroom does not contribute to packet
length */
> > > > >         *len = page_off - VIRTIO_XDP_HEADROOM;
> > > > >         return page;
> > > > >  err_buf:
> > > > > -       __free_pages(page, 0);
> > > > > +       if (rq->page_pool)
> > > > > +              
page_pool_put_full_page(rq->page_pool, page, true);
> > > > > +       else
> > > > > +               __free_pages(page, 0);
> > > > >         return NULL;
> > > > >  }
> > > > >
> > > > > @@ -1144,7 +1165,7 @@ static void
mergeable_buf_free(struct receive_queue *rq, int num_buf,
> > > > >                 }
> > > > >                 stats->bytes += len;
> > > > >                 page = virt_to_head_page(buf);
> > > > > -               put_page(page);
> > > > > +               virtnet_put_page(rq, page);
> > > > >         }
> > > > >  }
> > > > >
> > > > > @@ -1264,7 +1285,7 @@ static int
virtnet_build_xdp_buff_mrg(struct net_device *dev,
> > > > >                 cur_frag_size = truesize;
> > > > >                 xdp_frags_truesz += cur_frag_size;
> > > > >                 if (unlikely(len > truesize - room
|| cur_frag_size > PAGE_SIZE)) {
> > > > > -                       put_page(page);
> > > > > +                       virtnet_put_page(rq, page);
> > > > >                         pr_debug("%s: rx error:
len %u exceeds truesize %lu\n",
> > > > >                                  dev->name, len,
(unsigned long)(truesize - room));
> > > > >                        
dev->stats.rx_length_errors++;
> > > > > @@ -1283,7 +1304,7 @@ static int
virtnet_build_xdp_buff_mrg(struct net_device *dev,
> > > > >         return 0;
> > > > >
> > > > >  err:
> > > > > -       put_xdp_frags(xdp);
> > > > > +       put_xdp_frags(xdp, rq);
> > > > >         return -EINVAL;
> > > > >  }
> > > > >
> > > > > @@ -1344,7 +1365,10 @@ static void
*mergeable_xdp_get_buf(struct virtnet_info *vi,
> > > > >                 if (*len + xdp_room > PAGE_SIZE)
> > > > >                         return NULL;
> > > > >
> > > > > -               xdp_page = alloc_page(GFP_ATOMIC);
> > > > > +               if (rq->page_pool)
> > > > > +                       xdp_page =
page_pool_dev_alloc_pages(rq->page_pool);
> > > > > +               else
> > > > > +                       xdp_page =
alloc_page(GFP_ATOMIC);
> > > > >                 if (!xdp_page)
> > > > >                         return NULL;
> > > > >
> > > > > @@ -1354,7 +1378,7 @@ static void
*mergeable_xdp_get_buf(struct virtnet_info *vi,
> > > > >
> > > > >         *frame_sz = PAGE_SIZE;
> > > > >
> > > > > -       put_page(*page);
> > > > > +       virtnet_put_page(rq, *page);
> > > > >
> > > > >         *page = xdp_page;
> > > > >
> > > > > @@ -1400,6 +1424,8 @@ static struct sk_buff
*receive_mergeable_xdp(struct net_device *dev,
> > > > >                 head_skb = build_skb_from_xdp_buff(dev,
vi, &xdp, xdp_frags_truesz);
> > > > >                 if (unlikely(!head_skb))
> > > > >                         break;
> > > > > +               if (rq->page_pool)
> > > > > +                       skb_mark_for_recycle(head_skb);
> > > > >                 return head_skb;
> > > > >
> > > > >         case XDP_TX:
> > > > > @@ -1410,10 +1436,10 @@ static struct sk_buff
*receive_mergeable_xdp(struct net_device *dev,
> > > > >                 break;
> > > > >         }
> > > > >
> > > > > -       put_xdp_frags(&xdp);
> > > > > +       put_xdp_frags(&xdp, rq);
> > > > >
> > > > >  err_xdp:
> > > > > -       put_page(page);
> > > > > +       virtnet_put_page(rq, page);
> > > > >         mergeable_buf_free(rq, num_buf, dev, stats);
> > > > >
> > > > >         stats->xdp_drops++;
> > > > > @@ -1467,6 +1493,9 @@ static struct sk_buff
*receive_mergeable(struct net_device *dev,
> > > > >         head_skb = page_to_skb(vi, rq, page, offset,
len, truesize, headroom);
> > > > >         curr_skb = head_skb;
> > > > >
> > > > > +       if (rq->page_pool)
> > > > > +               skb_mark_for_recycle(curr_skb);
> > > > > +
> > > > >         if (unlikely(!curr_skb))
> > > > >                 goto err_skb;
> > > > >         while (--num_buf) {
> > > > > @@ -1509,6 +1538,8 @@ static struct sk_buff
*receive_mergeable(struct net_device *dev,
> > > > >                         curr_skb = nskb;
> > > > >                         head_skb->truesize +=
nskb->truesize;
> > > > >                         num_skb_frags = 0;
> > > > > +                       if (rq->page_pool)
> > > > > +                              
skb_mark_for_recycle(curr_skb);
> > > > >                 }
> > > > >                 if (curr_skb != head_skb) {
> > > > >                         head_skb->data_len += len;
> > > > > @@ -1517,7 +1548,7 @@ static struct sk_buff
*receive_mergeable(struct net_device *dev,
> > > > >                 }
> > > > >                 offset = buf - page_address(page);
> > > > >                 if (skb_can_coalesce(curr_skb,
num_skb_frags, page, offset)) {
> > > > > -                       put_page(page);
> > > > > +                       virtnet_put_page(rq, page);
> > > >
> > > > I wonder why not we can't do this during buffer
allocation like other drivers?
> > > >
> > >
> > > Sorry, I don't quite understand the point here. Would you
please
> > > elaborate a bit more?
> > > > >                         skb_coalesce_rx_frag(curr_skb,
num_skb_frags - 1,
> > > > >                                              len,
truesize);
> > > > >                 } else {
> > > > > @@ -1530,7 +1561,7 @@ static struct sk_buff
*receive_mergeable(struct net_device *dev,
> > > > >         return head_skb;
> > > > >
> > > > >  err_skb:
> > > > > -       put_page(page);
> > > > > +       virtnet_put_page(rq, page);
> > > > >         mergeable_buf_free(rq, num_buf, dev, stats);
> > > > >
> > > > >  err_buf:
> > > > > @@ -1737,31 +1768,40 @@ static int
add_recvbuf_mergeable(struct virtnet_info *vi,
> > > > >          * disabled GSO for XDP, it won't be a big
issue.
> > > > >          */
> > > > >         len = get_mergeable_buf_len(rq,
&rq->mrg_avg_pkt_len, room);
> > > > > -       if (unlikely(!skb_page_frag_refill(len + room,
alloc_frag, gfp)))
> > > > > -               return -ENOMEM;
> > > > > +       if (rq->page_pool) {
> > > > > +               struct page *page;
> > > > >
> > > > > -       buf = (char *)page_address(alloc_frag->page)
+ alloc_frag->offset;
> > > > > -       buf += headroom; /* advance address leaving
hole at front of pkt */
> > > > > -       get_page(alloc_frag->page);
> > > > > -       alloc_frag->offset += len + room;
> > > > > -       hole = alloc_frag->size -
alloc_frag->offset;
> > > > > -       if (hole < len + room) {
> > > > > -               /* To avoid internal fragmentation, if
there is very likely not
> > > > > -                * enough space for another buffer, add
the remaining space to
> > > > > -                * the current buffer.
> > > > > -                * XDP core assumes that frame_size of
xdp_buff and the length
> > > > > -                * of the frag are PAGE_SIZE, so we
disable the hole mechanism.
> > > > > -                */
> > > > > -               if (!headroom)
> > > > > -                       len += hole;
> > > > > -               alloc_frag->offset += hole;
> > > > > -       }
> > > > > +               page =
page_pool_dev_alloc_pages(rq->page_pool);
> > > > > +               if (unlikely(!page))
> > > > > +                       return -ENOMEM;
> > > > > +               buf = (char *)page_address(page);
> > > > > +               buf += headroom; /* advance address
leaving hole at front of pkt */
> > > > > +       } else {
> > > > > +               if (unlikely(!skb_page_frag_refill(len
+ room, alloc_frag, gfp)))
> > > >
> > > > Why not simply use a helper like virtnet_page_frag_refill()
and add
> > > > the page_pool allocation logic there? It helps to reduce the
> > > > changeset.
> > > >
> > >
> > > Sure. Will do that on v2.
> > > > > +                       return -ENOMEM;
> > > > >
> > > > > +               buf = (char
*)page_address(alloc_frag->page) + alloc_frag->offset;
> > > > > +               buf += headroom; /* advance address
leaving hole at front of pkt */
> > > > > +               get_page(alloc_frag->page);
> > > > > +               alloc_frag->offset += len + room;
> > > > > +               hole = alloc_frag->size -
alloc_frag->offset;
> > > > > +               if (hole < len + room) {
> > > > > +                       /* To avoid internal
fragmentation, if there is very likely not
> > > > > +                        * enough space for another
buffer, add the remaining space to
> > > > > +                        * the current buffer.
> > > > > +                        * XDP core assumes that
frame_size of xdp_buff and the length
> > > > > +                        * of the frag are PAGE_SIZE,
so we disable the hole mechanism.
> > > > > +                        */
> > > > > +                       if (!headroom)
> > > > > +                               len += hole;
> > > > > +                       alloc_frag->offset += hole;
> > > > > +               }
> > > > > +       }
> > > > >         sg_init_one(rq->sg, buf, len);
> > > > >         ctx = mergeable_len_to_ctx(len + room,
headroom);
> > > > >         err = virtqueue_add_inbuf_ctx(rq->vq,
rq->sg, 1, buf, ctx, gfp);
> > > > >         if (err < 0)
> > > > > -               put_page(virt_to_head_page(buf));
> > > > > +               virtnet_put_page(rq,
virt_to_head_page(buf));
> > > > >
> > > > >         return err;
> > > > >  }
> > > > > @@ -1994,8 +2034,15 @@ static int
virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> > > > >         if (err < 0)
> > > > >                 return err;
> > > > >
> > > > > -       err =
xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq,
> > > > > -                                       
MEM_TYPE_PAGE_SHARED, NULL);
> > > > > +       if (vi->rq[qp_index].page_pool)
> > > > > +               err =
xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq,
> > > > > +                                               
MEM_TYPE_PAGE_POOL,
> > > > > +                                               
vi->rq[qp_index].page_pool);
> > > > > +       else
> > > > > +               err =
xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq,
> > > > > +                                               
MEM_TYPE_PAGE_SHARED,
> > > > > +                                                NULL);
> > > > > +
> > > > >         if (err < 0)
> > > > >                 goto err_xdp_reg_mem_model;
> > > > >
> > > > > @@ -2951,6 +2998,7 @@ static void
virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
> > > > >                                 ethtool_sprintf(&p,
"tx_queue_%u_%s", i,
> > > > >                                                
virtnet_sq_stats_desc[j].desc);
> > > > >                 }
> > > > > +               page_pool_ethtool_stats_get_strings(p);
> > > > >                 break;
> > > > >         }
> > > > >  }
> > > > > @@ -2962,12 +3010,30 @@ static int
virtnet_get_sset_count(struct net_device *dev, int sset)
> > > > >         switch (sset) {
> > > > >         case ETH_SS_STATS:
> > > > >                 return vi->curr_queue_pairs *
(VIRTNET_RQ_STATS_LEN +
> > > > > -                                             
VIRTNET_SQ_STATS_LEN);
> > > > > +                                             
VIRTNET_SQ_STATS_LEN +
> > > > > +                                              
(page_pool_enabled && vi->mergeable_rx_bufs ?
> > > > > +                                               
page_pool_ethtool_stats_get_count() : 0));
> > > > >         default:
> > > > >                 return -EOPNOTSUPP;
> > > > >         }
> > > > >  }
> > > > >
> > > > > +static void virtnet_get_page_pool_stats(struct
net_device *dev, u64 *data)
> > > > > +{
> > > > > +#ifdef CONFIG_PAGE_POOL_STATS
> > > > > +       struct virtnet_info *vi = netdev_priv(dev);
> > > > > +       struct page_pool_stats pp_stats = {};
> > > > > +       int i;
> > > > > +
> > > > > +       for (i = 0; i < vi->curr_queue_pairs;
i++) {
> > > > > +               if (!vi->rq[i].page_pool)
> > > > > +                       continue;
> > > > > +              
page_pool_get_stats(vi->rq[i].page_pool, &pp_stats);
> > > > > +       }
> > > > > +       page_pool_ethtool_stats_get(data,
&pp_stats);
> > > > > +#endif /* CONFIG_PAGE_POOL_STATS */
> > > > > +}
> > > > > +
> > > > >  static void virtnet_get_ethtool_stats(struct
net_device *dev,
> > > > >                                       struct
ethtool_stats *stats, u64 *data)
> > > > >  {
> > > > > @@ -3003,6 +3069,8 @@ static void
virtnet_get_ethtool_stats(struct net_device *dev,
> > > > >                 } while
(u64_stats_fetch_retry(&sq->stats.syncp, start));
> > > > >                 idx += VIRTNET_SQ_STATS_LEN;
> > > > >         }
> > > > > +
> > > > > +       virtnet_get_page_pool_stats(dev,
&data[idx]);
> > > > >  }
> > > > >
> > > > >  static void virtnet_get_channels(struct net_device
*dev,
> > > > > @@ -3623,6 +3691,8 @@ static void
virtnet_free_queues(struct virtnet_info *vi)
> > > > >         for (i = 0; i < vi->max_queue_pairs; i++)
{
> > > > >                
__netif_napi_del(&vi->rq[i].napi);
> > > > >                
__netif_napi_del(&vi->sq[i].napi);
> > > > > +               if (vi->rq[i].page_pool)
> > > > > +                      
page_pool_destroy(vi->rq[i].page_pool);
> > > > >         }
> > > > >
> > > > >         /* We called __netif_napi_del(),
> > > > > @@ -3679,12 +3749,19 @@ static void
virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf)
> > > > >         struct virtnet_info *vi = vq->vdev->priv;
> > > > >         int i = vq2rxq(vq);
> > > > >
> > > > > -       if (vi->mergeable_rx_bufs)
> > > > > -               put_page(virt_to_head_page(buf));
> > > > > -       else if (vi->big_packets)
> > > > > +       if (vi->mergeable_rx_bufs) {
> > > > > +               if (vi->rq[i].page_pool) {
> > > > > +                      
page_pool_put_full_page(vi->rq[i].page_pool,
> > > > > +                                              
virt_to_head_page(buf),
> > > > > +                                               true);
> > > > > +               } else {
> > > > > +                      
put_page(virt_to_head_page(buf));
> > > > > +               }
> > > > > +       } else if (vi->big_packets) {
> > > > >                 give_pages(&vi->rq[i], buf);
> > > >
> > > > Any reason only mergeable were modified but not for small
and big?
> > > >
> > > > Thanks
> > > >
> > >
> > > Big mode uses the page chain to recycle pages, thus the using of
> > > "private" of the buffer page. I will take further look
into that to
> > > see if it is better to use page pool in these cases. Thanks!
> > >
> > >
> > >
> > > > > -       else
> > > > > +       } else {
> > > > >                 put_page(virt_to_head_page(buf));
> > > > > +       }
> > > > >  }
> > > > >
> > > > >  static void free_unused_bufs(struct virtnet_info *vi)
> > > > > @@ -3718,6 +3795,26 @@ static void
virtnet_del_vqs(struct virtnet_info *vi)
> > > > >         virtnet_free_queues(vi);
> > > > >  }
> > > > >
> > > > > +static void virtnet_alloc_page_pool(struct
receive_queue *rq)
> > > > > +{
> > > > > +       struct virtio_device *vdev =
rq->vq->vdev;
> > > > > +
> > > > > +       struct page_pool_params pp_params = {
> > > > > +               .order = 0,
> > > > > +               .pool_size = rq->vq->num_max,
> > > > > +               .nid =
dev_to_node(vdev->dev.parent),
> > > > > +               .dev = vdev->dev.parent,
> > > > > +               .offset = 0,
> > > > > +       };
> > > > > +
> > > > > +       rq->page_pool =
page_pool_create(&pp_params);
> > > > > +       if (IS_ERR(rq->page_pool)) {
> > > > > +               dev_warn(&vdev->dev, "page
pool creation failed: %ld\n",
> > > > > +                        PTR_ERR(rq->page_pool));
> > > > > +               rq->page_pool = NULL;
> > > > > +       }
> > > > > +}
> > > > > +
> > > > >  /* How large should a single buffer be so a queue full
of these can fit at
> > > > >   * least one full packet?
> > > > >   * Logic below assumes the mergeable buffer header is
used.
> > > > > @@ -3801,6 +3898,13 @@ static int
virtnet_find_vqs(struct virtnet_info *vi)
> > > > >                 vi->rq[i].vq = vqs[rxq2vq(i)];
> > > > >                 vi->rq[i].min_buf_len =
mergeable_min_buf_len(vi, vi->rq[i].vq);
> > > > >                 vi->sq[i].vq = vqs[txq2vq(i)];
> > > > > +
> > > > > +               if (page_pool_enabled &&
vi->mergeable_rx_bufs)
> > > > > +                      
virtnet_alloc_page_pool(&vi->rq[i]);
> > > > > +               else
> > > > > +                      
dev_warn(&vi->vdev->dev,
> > > > > +                                "page pool only
support mergeable mode\n");
> > > > > +
> > > > >         }
> > > > >
> > > > >         /* run here: ret == 0. */
> > > > > --
> > > > > 2.31.1
> > > > >
> > > >
> >

Liang Chen

2023-Jun-07 09:11 UTC

head link

[PATCH net-next 2/5] virtio_net: Add page_pool support to improve performance

On Wed, May 31, 2023 at 11:12?AM Xuan Zhuo <xuanzhuo at linux.alibaba.com>
wrote:>
> On Mon, 29 May 2023 15:28:17 +0800, Liang Chen <liangchen.linux at
gmail.com> wrote:
> > On Sun, May 28, 2023 at 2:40?PM Michael S. Tsirkin <mst at
redhat.com> wrote:
> > >
> > > On Sat, May 27, 2023 at 08:35:01PM +0800, Liang Chen wrote:
> > > > On Fri, May 26, 2023 at 2:51?PM Jason Wang <jasowang at
redhat.com> wrote:
> > > > >
> > > > > On Fri, May 26, 2023 at 1:46?PM Liang Chen
<liangchen.linux at gmail.com> wrote:
> > > > > >
> > > > > > The implementation at the moment uses one page per
packet in both the
> > > > > > normal and XDP path.
> > > > >
> > > > > It's better to explain why we need a page pool and
how it can help the
> > > > > performance.
> > > > >
> > > >
> > > > Sure, I will include that on v2.
> > > > > > In addition, introducing a module parameter to
enable
> > > > > > or disable the usage of page pool (disabled by
default).
> > > > >
> > > > > If page pool wins for most of the cases, any reason to
disable it by default?
> > > > >
> > > >
> > > > Thank you for raising the point. It does make sense to
enable it by default.
> > >
> > > I'd like to see more benchmarks pls then, with a variety of
packet
> > > sizes, udp and tcp.
> > >
> >
> > Sure, more benchmarks will be provided. Thanks.
>
>
> I think so.
>
> I did this, but I did not found any improve. So I gave up it.
>
> Thanks.
>
>
Our UDP benchmark shows a steady 0.8 percent change in PPS
measurement. However, when conducting iperf TCP stream performance
testing, the results vary depending on the packet size and testing
setup. With small packet sizes, the performance actually drops
slightly due to the reasons I explained in the previous email. On the
other hand, with large packets, we need to ensure that the sender side
doesn't become the bottleneck. To achieve this, our setup uses a
single-core vm to keep the receiver busy, which allows us to identify
performance differences in the receiving path.


Thanks,
Liang



> >
> >
> > > > > >
> > > > > > In single-core vm testing environments, it gives a
modest performance gain
> > > > > > in the normal path.
> > > > > >   Upstream codebase: 47.5 Gbits/sec
> > > > > >   Upstream codebase + page_pool support: 50.2
Gbits/sec
> > > > > >
> > > > > > In multi-core vm testing environments, The most
significant performance
> > > > > > gain is observed in XDP cpumap:
> > > > > >   Upstream codebase: 1.38 Gbits/sec
> > > > > >   Upstream codebase + page_pool support: 9.74
Gbits/sec
> > > > >
> > > > > Please show more details on the test. E.g which kinds
of tests have
> > > > > you measured?
> > > > >
> > > > > Btw, it would be better to measure PPS as well.
> > > > >
> > > >
> > > > Sure. It will be added on v2.
> > > > > >
> > > > > > With this foundation, we can further integrate
page pool fragmentation and
> > > > > > DMA map/unmap support.
> > > > > >
> > > > > > Signed-off-by: Liang Chen <liangchen.linux at
gmail.com>
> > > > > > ---
> > > > > >  drivers/net/virtio_net.c | 188
++++++++++++++++++++++++++++++---------
> > > > >
> > > > > I believe we should make virtio-net to select
CONFIG_PAGE_POOL or do
> > > > > the ifdef tricks at least.
> > > > >
> > > >
> > > > Sure. it will be done on v2.
> > > > > >  1 file changed, 146 insertions(+), 42
deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/net/virtio_net.c
b/drivers/net/virtio_net.c
> > > > > > index c5dca0d92e64..99c0ca0c1781 100644
> > > > > > --- a/drivers/net/virtio_net.c
> > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > @@ -31,6 +31,9 @@ module_param(csum, bool, 0444);
> > > > > >  module_param(gso, bool, 0444);
> > > > > >  module_param(napi_tx, bool, 0644);
> > > > > >
> > > > > > +static bool page_pool_enabled;
> > > > > > +module_param(page_pool_enabled, bool, 0400);
> > > > > > +
> > > > > >  /* FIXME: MTU in config. */
> > > > > >  #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN +
ETH_DATA_LEN)
> > > > > >  #define GOOD_COPY_LEN  128
> > > > > > @@ -159,6 +162,9 @@ struct receive_queue {
> > > > > >         /* Chain pages by the private ptr. */
> > > > > >         struct page *pages;
> > > > > >
> > > > > > +       /* Page pool */
> > > > > > +       struct page_pool *page_pool;
> > > > > > +
> > > > > >         /* Average packet length for mergeable
receive buffers. */
> > > > > >         struct ewma_pkt_len mrg_avg_pkt_len;
> > > > > >
> > > > > > @@ -459,6 +465,14 @@ static struct sk_buff
*virtnet_build_skb(void *buf, unsigned int buflen,
> > > > > >         return skb;
> > > > > >  }
> > > > > >
> > > > > > +static void virtnet_put_page(struct receive_queue
*rq, struct page *page)
> > > > > > +{
> > > > > > +       if (rq->page_pool)
> > > > > > +              
page_pool_put_full_page(rq->page_pool, page, true);
> > > > > > +       else
> > > > > > +               put_page(page);
> > > > > > +}
> > > > > > +
> > > > > >  /* Called from bottom half context */
> > > > > >  static struct sk_buff *page_to_skb(struct
virtnet_info *vi,
> > > > > >                                    struct
receive_queue *rq,
> > > > > > @@ -555,7 +569,7 @@ static struct sk_buff
*page_to_skb(struct virtnet_info *vi,
> > > > > >         hdr = skb_vnet_hdr(skb);
> > > > > >         memcpy(hdr, hdr_p, hdr_len);
> > > > > >         if (page_to_free)
> > > > > > -               put_page(page_to_free);
> > > > > > +               virtnet_put_page(rq,
page_to_free);
> > > > > >
> > > > > >         return skb;
> > > > > >  }
> > > > > > @@ -802,7 +816,7 @@ static int
virtnet_xdp_xmit(struct net_device *dev,
> > > > > >         return ret;
> > > > > >  }
> > > > > >
> > > > > > -static void put_xdp_frags(struct xdp_buff *xdp)
> > > > > > +static void put_xdp_frags(struct xdp_buff *xdp,
struct receive_queue *rq)
> > > > > >  {
> > > > >
> > > > > rq could be fetched from xdp_rxq_info?
> > > >
> > > > Yeah, it has the queue_index there.
> > > > >
> > > > > >         struct skb_shared_info *shinfo;
> > > > > >         struct page *xdp_page;
> > > > > > @@ -812,7 +826,7 @@ static void
put_xdp_frags(struct xdp_buff *xdp)
> > > > > >                 shinfo =
xdp_get_shared_info_from_buff(xdp);
> > > > > >                 for (i = 0; i <
shinfo->nr_frags; i++) {
> > > > > >                         xdp_page =
skb_frag_page(&shinfo->frags[i]);
> > > > > > -                       put_page(xdp_page);
> > > > > > +                       virtnet_put_page(rq,
xdp_page);
> > > > > >                 }
> > > > > >         }
> > > > > >  }
> > > > > > @@ -903,7 +917,11 @@ static struct page
*xdp_linearize_page(struct receive_queue *rq,
> > > > > >         if (page_off + *len + tailroom >
PAGE_SIZE)
> > > > > >                 return NULL;
> > > > > >
> > > > > > -       page = alloc_page(GFP_ATOMIC);
> > > > > > +       if (rq->page_pool)
> > > > > > +               page =
page_pool_dev_alloc_pages(rq->page_pool);
> > > > > > +       else
> > > > > > +               page = alloc_page(GFP_ATOMIC);
> > > > > > +
> > > > > >         if (!page)
> > > > > >                 return NULL;
> > > > > >
> > > > > > @@ -926,21 +944,24 @@ static struct page
*xdp_linearize_page(struct receive_queue *rq,
> > > > > >                  * is sending packet larger than
the MTU.
> > > > > >                  */
> > > > > >                 if ((page_off + buflen + tailroom)
> PAGE_SIZE) {
> > > > > > -                       put_page(p);
> > > > > > +                       virtnet_put_page(rq, p);
> > > > > >                         goto err_buf;
> > > > > >                 }
> > > > > >
> > > > > >                 memcpy(page_address(page) +
page_off,
> > > > > >                        page_address(p) + off,
buflen);
> > > > > >                 page_off += buflen;
> > > > > > -               put_page(p);
> > > > > > +               virtnet_put_page(rq, p);
> > > > > >         }
> > > > > >
> > > > > >         /* Headroom does not contribute to packet
length */
> > > > > >         *len = page_off - VIRTIO_XDP_HEADROOM;
> > > > > >         return page;
> > > > > >  err_buf:
> > > > > > -       __free_pages(page, 0);
> > > > > > +       if (rq->page_pool)
> > > > > > +              
page_pool_put_full_page(rq->page_pool, page, true);
> > > > > > +       else
> > > > > > +               __free_pages(page, 0);
> > > > > >         return NULL;
> > > > > >  }
> > > > > >
> > > > > > @@ -1144,7 +1165,7 @@ static void
mergeable_buf_free(struct receive_queue *rq, int num_buf,
> > > > > >                 }
> > > > > >                 stats->bytes += len;
> > > > > >                 page = virt_to_head_page(buf);
> > > > > > -               put_page(page);
> > > > > > +               virtnet_put_page(rq, page);
> > > > > >         }
> > > > > >  }
> > > > > >
> > > > > > @@ -1264,7 +1285,7 @@ static int
virtnet_build_xdp_buff_mrg(struct net_device *dev,
> > > > > >                 cur_frag_size = truesize;
> > > > > >                 xdp_frags_truesz += cur_frag_size;
> > > > > >                 if (unlikely(len > truesize -
room || cur_frag_size > PAGE_SIZE)) {
> > > > > > -                       put_page(page);
> > > > > > +                       virtnet_put_page(rq,
page);
> > > > > >                         pr_debug("%s: rx
error: len %u exceeds truesize %lu\n",
> > > > > >                                  dev->name,
len, (unsigned long)(truesize - room));
> > > > > >                        
dev->stats.rx_length_errors++;
> > > > > > @@ -1283,7 +1304,7 @@ static int
virtnet_build_xdp_buff_mrg(struct net_device *dev,
> > > > > >         return 0;
> > > > > >
> > > > > >  err:
> > > > > > -       put_xdp_frags(xdp);
> > > > > > +       put_xdp_frags(xdp, rq);
> > > > > >         return -EINVAL;
> > > > > >  }
> > > > > >
> > > > > > @@ -1344,7 +1365,10 @@ static void
*mergeable_xdp_get_buf(struct virtnet_info *vi,
> > > > > >                 if (*len + xdp_room >
PAGE_SIZE)
> > > > > >                         return NULL;
> > > > > >
> > > > > > -               xdp_page = alloc_page(GFP_ATOMIC);
> > > > > > +               if (rq->page_pool)
> > > > > > +                       xdp_page =
page_pool_dev_alloc_pages(rq->page_pool);
> > > > > > +               else
> > > > > > +                       xdp_page =
alloc_page(GFP_ATOMIC);
> > > > > >                 if (!xdp_page)
> > > > > >                         return NULL;
> > > > > >
> > > > > > @@ -1354,7 +1378,7 @@ static void
*mergeable_xdp_get_buf(struct virtnet_info *vi,
> > > > > >
> > > > > >         *frame_sz = PAGE_SIZE;
> > > > > >
> > > > > > -       put_page(*page);
> > > > > > +       virtnet_put_page(rq, *page);
> > > > > >
> > > > > >         *page = xdp_page;
> > > > > >
> > > > > > @@ -1400,6 +1424,8 @@ static struct sk_buff
*receive_mergeable_xdp(struct net_device *dev,
> > > > > >                 head_skb =
build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
> > > > > >                 if (unlikely(!head_skb))
> > > > > >                         break;
> > > > > > +               if (rq->page_pool)
> > > > > > +                      
skb_mark_for_recycle(head_skb);
> > > > > >                 return head_skb;
> > > > > >
> > > > > >         case XDP_TX:
> > > > > > @@ -1410,10 +1436,10 @@ static struct sk_buff
*receive_mergeable_xdp(struct net_device *dev,
> > > > > >                 break;
> > > > > >         }
> > > > > >
> > > > > > -       put_xdp_frags(&xdp);
> > > > > > +       put_xdp_frags(&xdp, rq);
> > > > > >
> > > > > >  err_xdp:
> > > > > > -       put_page(page);
> > > > > > +       virtnet_put_page(rq, page);
> > > > > >         mergeable_buf_free(rq, num_buf, dev,
stats);
> > > > > >
> > > > > >         stats->xdp_drops++;
> > > > > > @@ -1467,6 +1493,9 @@ static struct sk_buff
*receive_mergeable(struct net_device *dev,
> > > > > >         head_skb = page_to_skb(vi, rq, page,
offset, len, truesize, headroom);
> > > > > >         curr_skb = head_skb;
> > > > > >
> > > > > > +       if (rq->page_pool)
> > > > > > +               skb_mark_for_recycle(curr_skb);
> > > > > > +
> > > > > >         if (unlikely(!curr_skb))
> > > > > >                 goto err_skb;
> > > > > >         while (--num_buf) {
> > > > > > @@ -1509,6 +1538,8 @@ static struct sk_buff
*receive_mergeable(struct net_device *dev,
> > > > > >                         curr_skb = nskb;
> > > > > >                         head_skb->truesize +=
nskb->truesize;
> > > > > >                         num_skb_frags = 0;
> > > > > > +                       if (rq->page_pool)
> > > > > > +                              
skb_mark_for_recycle(curr_skb);
> > > > > >                 }
> > > > > >                 if (curr_skb != head_skb) {
> > > > > >                         head_skb->data_len +=
len;
> > > > > > @@ -1517,7 +1548,7 @@ static struct sk_buff
*receive_mergeable(struct net_device *dev,
> > > > > >                 }
> > > > > >                 offset = buf - page_address(page);
> > > > > >                 if (skb_can_coalesce(curr_skb,
num_skb_frags, page, offset)) {
> > > > > > -                       put_page(page);
> > > > > > +                       virtnet_put_page(rq,
page);
> > > > >
> > > > > I wonder why not we can't do this during buffer
allocation like other drivers?
> > > > >
> > > >
> > > > Sorry, I don't quite understand the point here. Would
you please
> > > > elaborate a bit more?
> > > > > >                        
skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
> > > > > >                                              len,
truesize);
> > > > > >                 } else {
> > > > > > @@ -1530,7 +1561,7 @@ static struct sk_buff
*receive_mergeable(struct net_device *dev,
> > > > > >         return head_skb;
> > > > > >
> > > > > >  err_skb:
> > > > > > -       put_page(page);
> > > > > > +       virtnet_put_page(rq, page);
> > > > > >         mergeable_buf_free(rq, num_buf, dev,
stats);
> > > > > >
> > > > > >  err_buf:
> > > > > > @@ -1737,31 +1768,40 @@ static int
add_recvbuf_mergeable(struct virtnet_info *vi,
> > > > > >          * disabled GSO for XDP, it won't be a
big issue.
> > > > > >          */
> > > > > >         len = get_mergeable_buf_len(rq,
&rq->mrg_avg_pkt_len, room);
> > > > > > -       if (unlikely(!skb_page_frag_refill(len +
room, alloc_frag, gfp)))
> > > > > > -               return -ENOMEM;
> > > > > > +       if (rq->page_pool) {
> > > > > > +               struct page *page;
> > > > > >
> > > > > > -       buf = (char
*)page_address(alloc_frag->page) + alloc_frag->offset;
> > > > > > -       buf += headroom; /* advance address
leaving hole at front of pkt */
> > > > > > -       get_page(alloc_frag->page);
> > > > > > -       alloc_frag->offset += len + room;
> > > > > > -       hole = alloc_frag->size -
alloc_frag->offset;
> > > > > > -       if (hole < len + room) {
> > > > > > -               /* To avoid internal
fragmentation, if there is very likely not
> > > > > > -                * enough space for another
buffer, add the remaining space to
> > > > > > -                * the current buffer.
> > > > > > -                * XDP core assumes that
frame_size of xdp_buff and the length
> > > > > > -                * of the frag are PAGE_SIZE, so
we disable the hole mechanism.
> > > > > > -                */
> > > > > > -               if (!headroom)
> > > > > > -                       len += hole;
> > > > > > -               alloc_frag->offset += hole;
> > > > > > -       }
> > > > > > +               page =
page_pool_dev_alloc_pages(rq->page_pool);
> > > > > > +               if (unlikely(!page))
> > > > > > +                       return -ENOMEM;
> > > > > > +               buf = (char *)page_address(page);
> > > > > > +               buf += headroom; /* advance
address leaving hole at front of pkt */
> > > > > > +       } else {
> > > > > > +               if
(unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
> > > > >
> > > > > Why not simply use a helper like
virtnet_page_frag_refill() and add
> > > > > the page_pool allocation logic there? It helps to
reduce the
> > > > > changeset.
> > > > >
> > > >
> > > > Sure. Will do that on v2.
> > > > > > +                       return -ENOMEM;
> > > > > >
> > > > > > +               buf = (char
*)page_address(alloc_frag->page) + alloc_frag->offset;
> > > > > > +               buf += headroom; /* advance
address leaving hole at front of pkt */
> > > > > > +               get_page(alloc_frag->page);
> > > > > > +               alloc_frag->offset += len +
room;
> > > > > > +               hole = alloc_frag->size -
alloc_frag->offset;
> > > > > > +               if (hole < len + room) {
> > > > > > +                       /* To avoid internal
fragmentation, if there is very likely not
> > > > > > +                        * enough space for
another buffer, add the remaining space to
> > > > > > +                        * the current buffer.
> > > > > > +                        * XDP core assumes that
frame_size of xdp_buff and the length
> > > > > > +                        * of the frag are
PAGE_SIZE, so we disable the hole mechanism.
> > > > > > +                        */
> > > > > > +                       if (!headroom)
> > > > > > +                               len += hole;
> > > > > > +                       alloc_frag->offset +=
hole;
> > > > > > +               }
> > > > > > +       }
> > > > > >         sg_init_one(rq->sg, buf, len);
> > > > > >         ctx = mergeable_len_to_ctx(len + room,
headroom);
> > > > > >         err = virtqueue_add_inbuf_ctx(rq->vq,
rq->sg, 1, buf, ctx, gfp);
> > > > > >         if (err < 0)
> > > > > > -               put_page(virt_to_head_page(buf));
> > > > > > +               virtnet_put_page(rq,
virt_to_head_page(buf));
> > > > > >
> > > > > >         return err;
> > > > > >  }
> > > > > > @@ -1994,8 +2034,15 @@ static int
virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> > > > > >         if (err < 0)
> > > > > >                 return err;
> > > > > >
> > > > > > -       err =
xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq,
> > > > > > -                                       
MEM_TYPE_PAGE_SHARED, NULL);
> > > > > > +       if (vi->rq[qp_index].page_pool)
> > > > > > +               err =
xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq,
> > > > > > +                                               
MEM_TYPE_PAGE_POOL,
> > > > > > +                                               
vi->rq[qp_index].page_pool);
> > > > > > +       else
> > > > > > +               err =
xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq,
> > > > > > +                                               
MEM_TYPE_PAGE_SHARED,
> > > > > > +                                               
NULL);
> > > > > > +
> > > > > >         if (err < 0)
> > > > > >                 goto err_xdp_reg_mem_model;
> > > > > >
> > > > > > @@ -2951,6 +2998,7 @@ static void
virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
> > > > > >                                
ethtool_sprintf(&p, "tx_queue_%u_%s", i,
> > > > > >                                                
virtnet_sq_stats_desc[j].desc);
> > > > > >                 }
> > > > > > +              
page_pool_ethtool_stats_get_strings(p);
> > > > > >                 break;
> > > > > >         }
> > > > > >  }
> > > > > > @@ -2962,12 +3010,30 @@ static int
virtnet_get_sset_count(struct net_device *dev, int sset)
> > > > > >         switch (sset) {
> > > > > >         case ETH_SS_STATS:
> > > > > >                 return vi->curr_queue_pairs *
(VIRTNET_RQ_STATS_LEN +
> > > > > > -                                             
VIRTNET_SQ_STATS_LEN);
> > > > > > +                                             
VIRTNET_SQ_STATS_LEN +
> > > > > > +                                              
(page_pool_enabled && vi->mergeable_rx_bufs ?
> > > > > > +                                               
page_pool_ethtool_stats_get_count() : 0));
> > > > > >         default:
> > > > > >                 return -EOPNOTSUPP;
> > > > > >         }
> > > > > >  }
> > > > > >
> > > > > > +static void virtnet_get_page_pool_stats(struct
net_device *dev, u64 *data)
> > > > > > +{
> > > > > > +#ifdef CONFIG_PAGE_POOL_STATS
> > > > > > +       struct virtnet_info *vi =
netdev_priv(dev);
> > > > > > +       struct page_pool_stats pp_stats = {};
> > > > > > +       int i;
> > > > > > +
> > > > > > +       for (i = 0; i <
vi->curr_queue_pairs; i++) {
> > > > > > +               if (!vi->rq[i].page_pool)
> > > > > > +                       continue;
> > > > > > +              
page_pool_get_stats(vi->rq[i].page_pool, &pp_stats);
> > > > > > +       }
> > > > > > +       page_pool_ethtool_stats_get(data,
&pp_stats);
> > > > > > +#endif /* CONFIG_PAGE_POOL_STATS */
> > > > > > +}
> > > > > > +
> > > > > >  static void virtnet_get_ethtool_stats(struct
net_device *dev,
> > > > > >                                       struct
ethtool_stats *stats, u64 *data)
> > > > > >  {
> > > > > > @@ -3003,6 +3069,8 @@ static void
virtnet_get_ethtool_stats(struct net_device *dev,
> > > > > >                 } while
(u64_stats_fetch_retry(&sq->stats.syncp, start));
> > > > > >                 idx += VIRTNET_SQ_STATS_LEN;
> > > > > >         }
> > > > > > +
> > > > > > +       virtnet_get_page_pool_stats(dev,
&data[idx]);
> > > > > >  }
> > > > > >
> > > > > >  static void virtnet_get_channels(struct
net_device *dev,
> > > > > > @@ -3623,6 +3691,8 @@ static void
virtnet_free_queues(struct virtnet_info *vi)
> > > > > >         for (i = 0; i < vi->max_queue_pairs;
i++) {
> > > > > >                
__netif_napi_del(&vi->rq[i].napi);
> > > > > >                
__netif_napi_del(&vi->sq[i].napi);
> > > > > > +               if (vi->rq[i].page_pool)
> > > > > > +                      
page_pool_destroy(vi->rq[i].page_pool);
> > > > > >         }
> > > > > >
> > > > > >         /* We called __netif_napi_del(),
> > > > > > @@ -3679,12 +3749,19 @@ static void
virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf)
> > > > > >         struct virtnet_info *vi =
vq->vdev->priv;
> > > > > >         int i = vq2rxq(vq);
> > > > > >
> > > > > > -       if (vi->mergeable_rx_bufs)
> > > > > > -               put_page(virt_to_head_page(buf));
> > > > > > -       else if (vi->big_packets)
> > > > > > +       if (vi->mergeable_rx_bufs) {
> > > > > > +               if (vi->rq[i].page_pool) {
> > > > > > +                      
page_pool_put_full_page(vi->rq[i].page_pool,
> > > > > > +                                              
virt_to_head_page(buf),
> > > > > > +                                              
true);
> > > > > > +               } else {
> > > > > > +                      
put_page(virt_to_head_page(buf));
> > > > > > +               }
> > > > > > +       } else if (vi->big_packets) {
> > > > > >                 give_pages(&vi->rq[i],
buf);
> > > > >
> > > > > Any reason only mergeable were modified but not for
small and big?
> > > > >
> > > > > Thanks
> > > > >
> > > >
> > > > Big mode uses the page chain to recycle pages, thus the
using of
> > > > "private" of the buffer page. I will take further
look into that to
> > > > see if it is better to use page pool in these cases. Thanks!
> > > >
> > > >
> > > >
> > > > > > -       else
> > > > > > +       } else {
> > > > > >                 put_page(virt_to_head_page(buf));
> > > > > > +       }
> > > > > >  }
> > > > > >
> > > > > >  static void free_unused_bufs(struct virtnet_info
*vi)
> > > > > > @@ -3718,6 +3795,26 @@ static void
virtnet_del_vqs(struct virtnet_info *vi)
> > > > > >         virtnet_free_queues(vi);
> > > > > >  }
> > > > > >
> > > > > > +static void virtnet_alloc_page_pool(struct
receive_queue *rq)
> > > > > > +{
> > > > > > +       struct virtio_device *vdev =
rq->vq->vdev;
> > > > > > +
> > > > > > +       struct page_pool_params pp_params = {
> > > > > > +               .order = 0,
> > > > > > +               .pool_size =
rq->vq->num_max,
> > > > > > +               .nid =
dev_to_node(vdev->dev.parent),
> > > > > > +               .dev = vdev->dev.parent,
> > > > > > +               .offset = 0,
> > > > > > +       };
> > > > > > +
> > > > > > +       rq->page_pool =
page_pool_create(&pp_params);
> > > > > > +       if (IS_ERR(rq->page_pool)) {
> > > > > > +               dev_warn(&vdev->dev,
"page pool creation failed: %ld\n",
> > > > > > +                       
PTR_ERR(rq->page_pool));
> > > > > > +               rq->page_pool = NULL;
> > > > > > +       }
> > > > > > +}
> > > > > > +
> > > > > >  /* How large should a single buffer be so a queue
full of these can fit at
> > > > > >   * least one full packet?
> > > > > >   * Logic below assumes the mergeable buffer
header is used.
> > > > > > @@ -3801,6 +3898,13 @@ static int
virtnet_find_vqs(struct virtnet_info *vi)
> > > > > >                 vi->rq[i].vq = vqs[rxq2vq(i)];
> > > > > >                 vi->rq[i].min_buf_len =
mergeable_min_buf_len(vi, vi->rq[i].vq);
> > > > > >                 vi->sq[i].vq = vqs[txq2vq(i)];
> > > > > > +
> > > > > > +               if (page_pool_enabled &&
vi->mergeable_rx_bufs)
> > > > > > +                      
virtnet_alloc_page_pool(&vi->rq[i]);
> > > > > > +               else
> > > > > > +                      
dev_warn(&vi->vdev->dev,
> > > > > > +                                "page pool
only support mergeable mode\n");
> > > > > > +
> > > > > >         }
> > > > > >
> > > > > >         /* run here: ret == 0. */
> > > > > > --
> > > > > > 2.31.1
> > > > > >
> > > > >
> > >

Virtualization - Jun 2023 - [PATCH net-next 2/5] virtio_net: Add page_pool support to improve performance

[PATCH net-next 2/5] virtio_net: Add page_pool support to improve performance

[PATCH net-next 2/5] virtio_net: Add page_pool support to improve performance