Jason Wang
2018-Nov-15 09:43 UTC
[PATCH net-next 1/2] vhost_net: mitigate page reference counting during page frag refill
We do a get_page() which involves a atomic operation. This patch tries to mitigate a per packet atomic operation by maintaining a reference bias which is initially USHRT_MAX. Each time a page is got, instead of calling get_page() we decrease the bias and when we find it's time to use a new page we will decrease the bias at one time through __page_cache_drain_cache(). Testpmd(virtio_user + vhost_net) + XDP_DROP on TAP shows about 1.6% improvement. Before: 4.63Mpps After: 4.71Mpps Signed-off-by: Jason Wang <jasowang at redhat.com> --- drivers/vhost/net.c | 54 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index ab11b2bee273..d919284f103b 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -141,6 +141,10 @@ struct vhost_net { unsigned tx_zcopy_err; /* Flush in progress. Protected by tx vq lock. */ bool tx_flush; + /* Private page frag */ + struct page_frag page_frag; + /* Refcount bias of page frag */ + int refcnt_bias; }; static unsigned vhost_net_zcopy_mask __read_mostly; @@ -637,14 +641,53 @@ static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len) !vhost_vq_avail_empty(vq->dev, vq); } +#define SKB_FRAG_PAGE_ORDER get_order(32768) + +static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz, + struct page_frag *pfrag, gfp_t gfp) +{ + if (pfrag->page) { + if (pfrag->offset + sz <= pfrag->size) + return true; + __page_frag_cache_drain(pfrag->page, net->refcnt_bias); + } + + pfrag->offset = 0; + net->refcnt_bias = 0; + if (SKB_FRAG_PAGE_ORDER) { + /* Avoid direct reclaim but allow kswapd to wake */ + pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | + __GFP_COMP | __GFP_NOWARN | + __GFP_NORETRY, + SKB_FRAG_PAGE_ORDER); + if (likely(pfrag->page)) { + pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; + goto done; + } + } + pfrag->page = alloc_page(gfp); + if (likely(pfrag->page)) { + pfrag->size = PAGE_SIZE; + goto done; + } + return false; + +done: + net->refcnt_bias = USHRT_MAX; + page_ref_add(pfrag->page, USHRT_MAX - 1); + return true; +} + #define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, struct iov_iter *from) { struct vhost_virtqueue *vq = &nvq->vq; + struct vhost_net *net = container_of(vq->dev, struct vhost_net, + dev); struct socket *sock = vq->private_data; - struct page_frag *alloc_frag = ¤t->task_frag; + struct page_frag *alloc_frag = &net->page_frag; struct virtio_net_hdr *gso; struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp]; struct tun_xdp_hdr *hdr; @@ -665,7 +708,8 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, buflen += SKB_DATA_ALIGN(len + pad); alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); - if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL))) + if (unlikely(!vhost_net_page_frag_refill(net, buflen, + alloc_frag, GFP_KERNEL))) return -ENOMEM; buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; @@ -703,7 +747,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, xdp->data_end = xdp->data + len; hdr->buflen = buflen; - get_page(alloc_frag->page); + --net->refcnt_bias; alloc_frag->offset += buflen; ++nvq->batched_xdp; @@ -1292,6 +1336,8 @@ static int vhost_net_open(struct inode *inode, struct file *f) vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev); f->private_data = n; + n->page_frag.page = NULL; + n->refcnt_bias = 0; return 0; } @@ -1366,6 +1412,8 @@ static int vhost_net_release(struct inode *inode, struct file *f) kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue); kfree(n->vqs[VHOST_NET_VQ_TX].xdp); kfree(n->dev.vqs); + if (n->page_frag.page) + __page_frag_cache_drain(n->page_frag.page, n->refcnt_bias); kvfree(n); return 0; } -- 2.17.1
Jason Wang
2018-Nov-15 09:43 UTC
[PATCH net-next 2/2] tuntap: free XDP dropped packets in a batch
Thanks to the batched XDP buffs through msg_control. Instead of calling put_page() for each page which involves a atomic operation, let's batch them by record the last page that needs to be freed and its refcnt count and free them in a batch. Testpmd(virtio-user + vhost_net) + XDP_DROP shows 3.8% improvement. Before: 4.71Mpps After : 4.89Mpps Signed-off-by: Jason Wang <jasowang at redhat.com> --- drivers/net/tun.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index a65779c6d72f..e90a7923a5f6 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -188,6 +188,11 @@ struct tun_file { struct xdp_rxq_info xdp_rxq; }; +struct tun_page { + struct page *page; + int count; +}; + struct tun_flow_entry { struct hlist_node hash_link; struct rcu_head rcu; @@ -2377,9 +2382,16 @@ static void tun_sock_write_space(struct sock *sk) kill_fasync(&tfile->fasync, SIGIO, POLL_OUT); } +static void tun_put_page(struct tun_page *tpage) +{ + if (tpage->page) + __page_frag_cache_drain(tpage->page, tpage->count); +} + static int tun_xdp_one(struct tun_struct *tun, struct tun_file *tfile, - struct xdp_buff *xdp, int *flush) + struct xdp_buff *xdp, int *flush, + struct tun_page *tpage) { struct tun_xdp_hdr *hdr = xdp->data_hard_start; struct virtio_net_hdr *gso = &hdr->gso; @@ -2390,6 +2402,7 @@ static int tun_xdp_one(struct tun_struct *tun, int buflen = hdr->buflen; int err = 0; bool skb_xdp = false; + struct page *page; xdp_prog = rcu_dereference(tun->xdp_prog); if (xdp_prog) { @@ -2416,7 +2429,14 @@ static int tun_xdp_one(struct tun_struct *tun, case XDP_PASS: break; default: - put_page(virt_to_head_page(xdp->data)); + page = virt_to_head_page(xdp->data); + if (tpage->page == page) { + ++tpage->count; + } else { + tun_put_page(tpage); + tpage->page = page; + tpage->count = 1; + } return 0; } } @@ -2480,6 +2500,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) return -EBADFD; if (ctl && (ctl->type == TUN_MSG_PTR)) { + struct tun_page tpage = {0}; int n = ctl->num; int flush = 0; @@ -2488,7 +2509,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) for (i = 0; i < n; i++) { xdp = &((struct xdp_buff *)ctl->ptr)[i]; - tun_xdp_one(tun, tfile, xdp, &flush); + tun_xdp_one(tun, tfile, xdp, &flush, &tpage); } if (flush) @@ -2497,6 +2518,8 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) rcu_read_unlock(); local_bh_enable(); + tun_put_page(&tpage); + ret = total_len; goto out; } -- 2.17.1
David Miller
2018-Nov-17 20:01 UTC
[PATCH net-next 1/2] vhost_net: mitigate page reference counting during page frag refill
From: Jason Wang <jasowang at redhat.com> Date: Thu, 15 Nov 2018 17:43:09 +0800> We do a get_page() which involves a atomic operation. This patch tries > to mitigate a per packet atomic operation by maintaining a reference > bias which is initially USHRT_MAX. Each time a page is got, instead of > calling get_page() we decrease the bias and when we find it's time to > use a new page we will decrease the bias at one time through > __page_cache_drain_cache(). > > Testpmd(virtio_user + vhost_net) + XDP_DROP on TAP shows about 1.6% > improvement. > > Before: 4.63Mpps > After: 4.71Mpps > > Signed-off-by: Jason Wang <jasowang at redhat.com>Applied.
David Miller
2018-Nov-17 20:01 UTC
[PATCH net-next 2/2] tuntap: free XDP dropped packets in a batch
From: Jason Wang <jasowang at redhat.com> Date: Thu, 15 Nov 2018 17:43:10 +0800> Thanks to the batched XDP buffs through msg_control. Instead of > calling put_page() for each page which involves a atomic operation, > let's batch them by record the last page that needs to be freed and > its refcnt count and free them in a batch. > > Testpmd(virtio-user + vhost_net) + XDP_DROP shows 3.8% improvement. > > Before: 4.71Mpps > After : 4.89Mpps > > Signed-off-by: Jason Wang <jasowang at redhat.com>Applied.
Apparently Analagous Threads
- [PATCH net-next 1/2] vhost_net: mitigate page reference counting during page frag refill
- [PATCH net-next 09/11] tuntap: accept an array of XDP buffs through sendmsg()
- [PATCH net-next 09/11] tuntap: accept an array of XDP buffs through sendmsg()
- [RFC PATCH net-next 12/12] vhost_net: batch submitting XDP buffers to underlayer sockets
- [PATCH 15/16] mm/hmm/test: add self tests for THP migration