Ian Campbell
2011-Jul-22 13:18 UTC
[Ocfs2-devel] [PATCH 12/13] net: add paged frag destructor support to kernel_sendpage.
Signed-off-by: Ian Campbell <ian.campbell at citrix.com> Cc: "David S. Miller" <davem at davemloft.net> Cc: Alexey Kuznetsov <kuznet at ms2.inr.ac.ru> Cc: "Pekka Savola (ipv6)" <pekkas at netcore.fi> Cc: James Morris <jmorris at namei.org> Cc: Hideaki YOSHIFUJI <yoshfuji at linux-ipv6.org> Cc: Patrick McHardy <kaber at trash.net> Cc: Trond Myklebust <Trond.Myklebust at netapp.com> Cc: Greg Kroah-Hartman <gregkh at suse.de> Cc: drbd-user at lists.linbit.com Cc: devel at driverdev.osuosl.org Cc: cluster-devel at redhat.com Cc: ocfs2-devel at oss.oracle.com Cc: netdev at vger.kernel.org Cc: ceph-devel at vger.kernel.org Cc: rds-devel at oss.oracle.com Cc: linux-nfs at vger.kernel.org [since v1: Drop sendpage_destructor and just add an argument to sendpage protocol hooks ] --- drivers/block/drbd/drbd_main.c | 1 + drivers/staging/pohmelfs/trans.c | 2 +- fs/dlm/lowcomms.c | 2 +- fs/ocfs2/cluster/tcp.c | 1 + include/linux/net.h | 6 +++++- include/net/inet_common.h | 4 +++- include/net/ip.h | 4 +++- include/net/sock.h | 2 ++ include/net/tcp.h | 4 +++- net/ceph/messenger.c | 2 +- net/core/sock.c | 6 +++++- net/ipv4/af_inet.c | 9 ++++++--- net/ipv4/ip_output.c | 7 ++++--- net/ipv4/tcp.c | 25 ++++++++++++++++--------- net/ipv4/udp.c | 11 ++++++----- net/ipv4/udp_impl.h | 5 +++-- net/rds/tcp_send.c | 1 + net/socket.c | 11 +++++++---- net/sunrpc/svcsock.c | 6 +++--- net/sunrpc/xprtsock.c | 2 +- 20 files changed, 73 insertions(+), 38 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 0358e55..49c7346 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2584,6 +2584,7 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, set_fs(KERNEL_DS); do { sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, + NULL, offset, len, msg_flags); if (sent == -EAGAIN) { diff --git a/drivers/staging/pohmelfs/trans.c b/drivers/staging/pohmelfs/trans.c index 36a2535..b5d8411 100644 --- a/drivers/staging/pohmelfs/trans.c +++ b/drivers/staging/pohmelfs/trans.c @@ -104,7 +104,7 @@ static int netfs_trans_send_pages(struct netfs_trans *t, struct netfs_state *st) msg.msg_flags = MSG_WAITALL | (attached_pages == 1 ? 0 : MSG_MORE); - err = kernel_sendpage(st->socket, page, 0, size, msg.msg_flags); + err = kernel_sendpage(st->socket, page, NULL, 0, size, msg.msg_flags); if (err <= 0) { printk("%s: %d/%d failed to send transaction page: t: %p, gen: %u, size: %u, err: %d.\n", __func__, i, t->page_num, t, t->gen, size, err); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 5e2c71f..64933ff 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1341,7 +1341,7 @@ static void send_to_sock(struct connection *con) ret = 0; if (len) { - ret = kernel_sendpage(con->sock, e->page, offset, len, + ret = kernel_sendpage(con->sock, e->page, NULL, offset, len, msg_flags); if (ret == -EAGAIN || ret == 0) { if (ret == -EAGAIN && diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index db5ee4b..81366a0 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -982,6 +982,7 @@ static void o2net_sendpage(struct o2net_sock_container *sc, mutex_lock(&sc->sc_send_lock); ret = sc->sc_sock->ops->sendpage(sc->sc_sock, virt_to_page(kmalloced_virt), + NULL, (long)kmalloced_virt & ~PAGE_MASK, size, MSG_DONTWAIT); mutex_unlock(&sc->sc_send_lock); diff --git a/include/linux/net.h b/include/linux/net.h index b299230..db562ba 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -157,6 +157,7 @@ struct kiocb; struct sockaddr; struct msghdr; struct module; +struct skb_frag_destructor; struct proto_ops { int family; @@ -203,6 +204,7 @@ struct proto_ops { int (*mmap) (struct file *file, struct socket *sock, struct vm_area_struct * vma); ssize_t (*sendpage) (struct socket *sock, struct page *page, + struct skb_frag_destructor *destroy, int offset, size_t size, int flags); ssize_t (*splice_read)(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); @@ -273,7 +275,9 @@ extern int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen); extern int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, unsigned int optlen); -extern int kernel_sendpage(struct socket *sock, struct page *page, int offset, +extern int kernel_sendpage(struct socket *sock, struct page *page, + struct skb_frag_destructor *destroy, + int offset, size_t size, int flags); extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); extern int kernel_sock_shutdown(struct socket *sock, diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 22fac98..91cd8d0 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -21,7 +21,9 @@ extern int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, extern int inet_accept(struct socket *sock, struct socket *newsock, int flags); extern int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size); -extern ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, +extern ssize_t inet_sendpage(struct socket *sock, struct page *page, + struct skb_frag_destructor *frag, + int offset, size_t size, int flags); extern int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags); diff --git a/include/net/ip.h b/include/net/ip.h index 66dd491..887a834 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -114,7 +114,9 @@ extern int ip_append_data(struct sock *sk, struct flowi4 *fl4, struct rtable **rt, unsigned int flags); extern int ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb); -extern ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, +extern ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, + struct page *page, + struct skb_frag_destructor *destroy, int offset, size_t size, int flags); extern struct sk_buff *__ip_make_skb(struct sock *sk, struct flowi4 *fl4, diff --git a/include/net/sock.h b/include/net/sock.h index c0b938c..c1ab674 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -763,6 +763,7 @@ struct proto { size_t len, int noblock, int flags, int *addr_len); int (*sendpage)(struct sock *sk, struct page *page, + struct skb_frag_destructor *destroy, int offset, size_t size, int flags); int (*bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len); @@ -1152,6 +1153,7 @@ extern int sock_no_mmap(struct file *file, struct vm_area_struct *vma); extern ssize_t sock_no_sendpage(struct socket *sock, struct page *page, + struct skb_frag_destructor *destroy, int offset, size_t size, int flags); diff --git a/include/net/tcp.h b/include/net/tcp.h index cda30ea..1f43c0d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -317,7 +317,9 @@ extern void *tcp_v4_tw_get_peer(struct sock *sk); extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); -extern int tcp_sendpage(struct sock *sk, struct page *page, int offset, +extern int tcp_sendpage(struct sock *sk, struct page *page, + struct skb_frag_destructor *destroy, + int offset, size_t size, int flags); extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 78b55f4..ec7955b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -852,7 +852,7 @@ static int write_partial_msg_pages(struct ceph_connection *con) cpu_to_le32(crc32c(tmpcrc, base, len)); con->out_msg_pos.did_page_crc = 1; } - ret = kernel_sendpage(con->sock, page, + ret = kernel_sendpage(con->sock, page, NULL, con->out_msg_pos.page_pos + page_shift, len, MSG_DONTWAIT | MSG_NOSIGNAL | diff --git a/net/core/sock.c b/net/core/sock.c index be55676..87d04db 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1858,7 +1858,9 @@ int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct * } EXPORT_SYMBOL(sock_no_mmap); -ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) +ssize_t sock_no_sendpage(struct socket *sock, struct page *page, + struct skb_frag_destructor *destroy, + int offset, size_t size, int flags) { ssize_t res; struct msghdr msg = {.msg_flags = flags}; @@ -1868,6 +1870,8 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz iov.iov_len = size; res = kernel_sendmsg(sock, &msg, &iov, 1, size); kunmap(page); + /* kernel_sendmsg copies so we can destroy immediately */ + skb_frag_destructor_unref(destroy); return res; } EXPORT_SYMBOL(sock_no_sendpage); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ef1528a..45c0876 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -740,7 +740,9 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, } EXPORT_SYMBOL(inet_sendmsg); -ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, +ssize_t inet_sendpage(struct socket *sock, struct page *page, + struct skb_frag_destructor *destroy, + int offset, size_t size, int flags) { struct sock *sk = sock->sk; @@ -753,8 +755,9 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, return -EAGAIN; if (sk->sk_prot->sendpage) - return sk->sk_prot->sendpage(sk, page, offset, size, flags); - return sock_no_sendpage(sock, page, offset, size, flags); + return sk->sk_prot->sendpage(sk, page, destroy, + offset, size, flags); + return sock_no_sendpage(sock, page, destroy, offset, size, flags); } EXPORT_SYMBOL(inet_sendpage); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c4326fb..b35b728 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1111,6 +1111,7 @@ int ip_append_data(struct sock *sk, struct flowi4 *fl4, } ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, + struct skb_frag_destructor *destroy, int offset, size_t size, int flags) { struct inet_sock *inet = inet_sk(sk); @@ -1224,11 +1225,11 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, i = skb_shinfo(skb)->nr_frags; if (len > size) len = size; - if (skb_can_coalesce(skb, i, page, NULL, offset)) { + if (skb_can_coalesce(skb, i, page, destroy, offset)) { skb_shinfo(skb)->frags[i-1].size += len; } else if (i < MAX_SKB_FRAGS) { - get_page(page); - skb_fill_page_desc(skb, i, page, NULL, offset, len); + skb_fill_page_desc(skb, i, page, destroy, offset, len); + skb_frag_ref(skb, i); } else { err = -EMSGSIZE; goto error; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a1a0ccd..2f590e5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -757,7 +757,10 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) return mss_now; } -static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, +static ssize_t do_tcp_sendpages(struct sock *sk, + struct page **pages, + struct skb_frag_destructor **destructors, + int poffset, size_t psize, int flags) { struct tcp_sock *tp = tcp_sk(sk); @@ -783,6 +786,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse while (psize > 0) { struct sk_buff *skb = tcp_write_queue_tail(sk); struct page *page = pages[poffset / PAGE_SIZE]; + struct skb_frag_destructor *destroy + destructors ? destructors[poffset / PAGE_SIZE] : NULL; int copy, i, can_coalesce; int offset = poffset % PAGE_SIZE; int size = min_t(size_t, psize, PAGE_SIZE - offset); @@ -804,7 +809,7 @@ new_segment: copy = size; i = skb_shinfo(skb)->nr_frags; - can_coalesce = skb_can_coalesce(skb, i, page, NULL, offset); + can_coalesce = skb_can_coalesce(skb, i, page, destroy, offset); if (!can_coalesce && i >= MAX_SKB_FRAGS) { tcp_mark_push(tp, skb); goto new_segment; @@ -815,8 +820,8 @@ new_segment: if (can_coalesce) { skb_shinfo(skb)->frags[i - 1].size += copy; } else { - get_page(page); - skb_fill_page_desc(skb, i, page, NULL, offset, copy); + skb_fill_page_desc(skb, i, page, destroy, offset, copy); + skb_frag_ref(skb, i); } skb->len += copy; @@ -871,18 +876,20 @@ out_err: return sk_stream_error(sk, flags, err); } -int tcp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) +int tcp_sendpage(struct sock *sk, struct page *page, + struct skb_frag_destructor *destroy, + int offset, size_t size, int flags) { ssize_t res; if (!(sk->sk_route_caps & NETIF_F_SG) || !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) - return sock_no_sendpage(sk->sk_socket, page, offset, size, - flags); + return sock_no_sendpage(sk->sk_socket, page, destroy, + offset, size, flags); lock_sock(sk); - res = do_tcp_sendpages(sk, &page, offset, size, flags); + res = do_tcp_sendpages(sk, &page, &destroy, + offset, size, flags); release_sock(sk); return res; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 198f75b..ebdc8ea 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1027,8 +1027,9 @@ do_confirm: } EXPORT_SYMBOL(udp_sendmsg); -int udp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) +int udp_sendpage(struct sock *sk, struct page *page, + struct skb_frag_destructor *destroy, + int offset, size_t size, int flags) { struct inet_sock *inet = inet_sk(sk); struct udp_sock *up = udp_sk(sk); @@ -1056,11 +1057,11 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, } ret = ip_append_page(sk, &inet->cork.fl.u.ip4, - page, offset, size, flags); + page, destroy, offset, size, flags); if (ret == -EOPNOTSUPP) { release_sock(sk); - return sock_no_sendpage(sk->sk_socket, page, offset, - size, flags); + return sock_no_sendpage(sk->sk_socket, page, destroy, + offset, size, flags); } if (ret < 0) { udp_flush_pending_frames(sk); diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index aaad650..4923d82 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -23,8 +23,9 @@ extern int compat_udp_getsockopt(struct sock *sk, int level, int optname, #endif extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); -extern int udp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags); +extern int udp_sendpage(struct sock *sk, struct page *page, + struct skb_frag_destructor *destroy, + int offset, size_t size, int flags); extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); extern void udp_destroy_sock(struct sock *sk); diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 1b4fd68..e0f03be 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -121,6 +121,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, sg_page(&rm->data.op_sg[sg]), rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off, + NULL, MSG_DONTWAIT|MSG_NOSIGNAL); rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.op_sg[sg]), rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off, diff --git a/net/socket.c b/net/socket.c index 02dc82d..4b77658 100644 --- a/net/socket.c +++ b/net/socket.c @@ -795,7 +795,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, if (more) flags |= MSG_MORE; - return kernel_sendpage(sock, page, offset, size, flags); + return kernel_sendpage(sock, page, NULL, offset, size, flags); } static ssize_t sock_splice_read(struct file *file, loff_t *ppos, @@ -3343,15 +3343,18 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(kernel_setsockopt); -int kernel_sendpage(struct socket *sock, struct page *page, int offset, +int kernel_sendpage(struct socket *sock, struct page *page, + struct skb_frag_destructor *destroy, + int offset, size_t size, int flags) { sock_update_classid(sock->sk); if (sock->ops->sendpage) - return sock->ops->sendpage(sock, page, offset, size, flags); + return sock->ops->sendpage(sock, page, destroy, + offset, size, flags); - return sock_no_sendpage(sock, page, offset, size, flags); + return sock_no_sendpage(sock, page, destroy, offset, size, flags); } EXPORT_SYMBOL(kernel_sendpage); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index af04f77..a80b1d3 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -181,7 +181,7 @@ int svc_send_common(struct socket *sock, struct xdr_buf *xdr, /* send head */ if (slen == xdr->head[0].iov_len) flags = 0; - len = kernel_sendpage(sock, headpage, headoffset, + len = kernel_sendpage(sock, headpage, NULL, headoffset, xdr->head[0].iov_len, flags); if (len != xdr->head[0].iov_len) goto out; @@ -194,7 +194,7 @@ int svc_send_common(struct socket *sock, struct xdr_buf *xdr, while (pglen > 0) { if (slen == size) flags = 0; - result = kernel_sendpage(sock, *ppage, base, size, flags); + result = kernel_sendpage(sock, *ppage, NULL, base, size, flags); if (result > 0) len += result; if (result != size) @@ -208,7 +208,7 @@ int svc_send_common(struct socket *sock, struct xdr_buf *xdr, /* send tail */ if (xdr->tail[0].iov_len) { - result = kernel_sendpage(sock, tailpage, tailoffset, + result = kernel_sendpage(sock, tailpage, NULL, tailoffset, xdr->tail[0].iov_len, 0); if (result > 0) len += result; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 72abb73..d027621 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -397,7 +397,7 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i remainder -= len; if (remainder != 0 || more) flags |= MSG_MORE; - err = sock->ops->sendpage(sock, *ppage, base, len, flags); + err = sock->ops->sendpage(sock, *ppage, NULL, base, len, flags); if (remainder == 0 || err != len) break; sent += err; -- 1.7.2.5