Paul Durrant
2013-Oct-11 15:06 UTC
[PATCH net-next v4 2/5] xen-netback: add support for IPv6 checksum offload from guest
For performance of VM to VM traffic on a single host it is better to avoid calculation of TCP/UDP checksum in the sending frontend. To allow this this patch adds the code necessary to set up partial checksum for IPv6 packets and xenstore flag feature-ipv6-csum-offload to advertise that fact to frontends. Signed-off-by: Paul Durrant <paul.durrant@citrix.com> Cc: Wei Liu <wei.liu2@citrix.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Ian Campbell <ian.campbell@citrix.com> --- drivers/net/xen-netback/netback.c | 233 ++++++++++++++++++++++++++++++------- drivers/net/xen-netback/xenbus.c | 9 ++ 2 files changed, 203 insertions(+), 39 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index f3e591c..74c13b9 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -109,15 +109,10 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif, return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); } -/* - * This is the amount of packet we copy rather than map, so that the - * guest can''t fiddle with the contents of the headers while we do - * packet processing on them (netfilter, routing, etc). +/* This is a miniumum size for the linear area to avoid lots of + * calls to __pskb_pull_tail() as we set up checksum offsets. */ -#define PKT_PROT_LEN (ETH_HLEN + \ - VLAN_HLEN + \ - sizeof(struct iphdr) + MAX_IPOPTLEN + \ - sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE) +#define PKT_PROT_LEN 128 static u16 frag_get_pending_idx(skb_frag_t *frag) { @@ -1118,61 +1113,74 @@ static int xenvif_set_skb_gso(struct xenvif *vif, return 0; } -static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len) +{ + if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) { + /* If we need to pullup then pullup to the max, so we + * won''t need to do it again. + */ + int target = min_t(int, skb->len, MAX_TCP_HEADER); + __pskb_pull_tail(skb, target - skb_headlen(skb)); + } +} + +static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, + int recalculate_partial_csum) { - struct iphdr *iph; + struct iphdr *iph = (void *)skb->data; + unsigned int header_size; + unsigned int off; int err = -EPROTO; - int recalculate_partial_csum = 0; - /* - * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy - * peers can fail to set NETRXF_csum_blank when sending a GSO - * frame. In this case force the SKB to CHECKSUM_PARTIAL and - * recalculate the partial checksum. - */ - if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { - vif->rx_gso_checksum_fixup++; - skb->ip_summed = CHECKSUM_PARTIAL; - recalculate_partial_csum = 1; - } + off = sizeof(struct iphdr); - /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ - if (skb->ip_summed != CHECKSUM_PARTIAL) - return 0; + header_size = skb->network_header + off + MAX_IPOPTLEN; + maybe_pull_tail(skb, header_size); - if (skb->protocol != htons(ETH_P_IP)) - goto out; + off = iph->ihl * 4; - iph = (void *)skb->data; switch (iph->protocol) { case IPPROTO_TCP: - if (!skb_partial_csum_set(skb, 4 * iph->ihl, + if (!skb_partial_csum_set(skb, off, offsetof(struct tcphdr, check))) goto out; if (recalculate_partial_csum) { struct tcphdr *tcph = tcp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct tcphdr); + maybe_pull_tail(skb, header_size); + tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - iph->ihl*4, + skb->len - off, IPPROTO_TCP, 0); } break; case IPPROTO_UDP: - if (!skb_partial_csum_set(skb, 4 * iph->ihl, + if (!skb_partial_csum_set(skb, off, offsetof(struct udphdr, check))) goto out; if (recalculate_partial_csum) { struct udphdr *udph = udp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct udphdr); + maybe_pull_tail(skb, header_size); + udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - iph->ihl*4, + skb->len - off, IPPROTO_UDP, 0); } break; default: if (net_ratelimit()) netdev_err(vif->dev, - "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n", + "Attempting to checksum a non-TCP/UDP packet, " + "dropping a protocol %d packet\n", iph->protocol); goto out; } @@ -1183,6 +1191,158 @@ out: return err; } +static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, + int recalculate_partial_csum) +{ + int err = -EPROTO; + struct ipv6hdr *ipv6h = (void *)skb->data; + u8 nexthdr; + unsigned int header_size; + unsigned int off; + bool fragment; + bool done; + + done = false; + + off = sizeof(struct ipv6hdr); + + header_size = skb->network_header + off; + maybe_pull_tail(skb, header_size); + + nexthdr = ipv6h->nexthdr; + + while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) && + !done) { + switch (nexthdr) { + case IPPROTO_DSTOPTS: + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: { + struct ipv6_opt_hdr *hp = (void *)(skb->data + off); + + header_size = skb->network_header + + off + + sizeof(struct ipv6_opt_hdr); + maybe_pull_tail(skb, header_size); + + nexthdr = hp->nexthdr; + off += ipv6_optlen(hp); + break; + } + case IPPROTO_AH: { + struct ip_auth_hdr *hp = (void *)(skb->data + off); + + header_size = skb->network_header + + off + + sizeof(struct ip_auth_hdr); + maybe_pull_tail(skb, header_size); + + nexthdr = hp->nexthdr; + off += (hp->hdrlen+2)<<2; + break; + } + case IPPROTO_FRAGMENT: + fragment = true; + /* fall through */ + default: + done = true; + break; + } + } + + if (!done) { + if (net_ratelimit()) + netdev_err(vif->dev, "Failed to parse packet header\n"); + goto out; + } + + if (fragment) { + if (net_ratelimit()) + netdev_err(vif->dev, "Packet is a fragment!\n"); + goto out; + } + + switch (nexthdr) { + case IPPROTO_TCP: + if (!skb_partial_csum_set(skb, off, + offsetof(struct tcphdr, check))) + goto out; + + if (recalculate_partial_csum) { + struct tcphdr *tcph = tcp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct tcphdr); + maybe_pull_tail(skb, header_size); + + tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, + &ipv6h->daddr, + skb->len - off, + IPPROTO_TCP, 0); + } + break; + case IPPROTO_UDP: + if (!skb_partial_csum_set(skb, off, + offsetof(struct udphdr, check))) + goto out; + + if (recalculate_partial_csum) { + struct udphdr *udph = udp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct udphdr); + maybe_pull_tail(skb, header_size); + + udph->check = ~csum_ipv6_magic(&ipv6h->saddr, + &ipv6h->daddr, + skb->len - off, + IPPROTO_UDP, 0); + } + break; + default: + if (net_ratelimit()) + netdev_err(vif->dev, + "Attempting to checksum a non-TCP/UDP packet, " + "dropping a protocol %d packet\n", + nexthdr); + goto out; + } + + err = 0; + +out: + return err; +} + +static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) +{ + int err = -EPROTO; + int recalculate_partial_csum = 0; + + /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy + * peers can fail to set NETRXF_csum_blank when sending a GSO + * frame. In this case force the SKB to CHECKSUM_PARTIAL and + * recalculate the partial checksum. + */ + if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { + vif->rx_gso_checksum_fixup++; + skb->ip_summed = CHECKSUM_PARTIAL; + recalculate_partial_csum = 1; + } + + /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + if (skb->protocol == htons(ETH_P_IP)) + err = checksum_setup_ip(vif, skb, recalculate_partial_csum); + else if (skb->protocol == htons(ETH_P_IPV6)) + err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum); + + return err; +} + static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) { unsigned long now = jiffies; @@ -1428,12 +1588,7 @@ static int xenvif_tx_submit(struct xenvif *vif, int budget) xenvif_fill_frags(vif, skb); - /* - * If the initial fragment was < PKT_PROT_LEN then - * pull through some bytes from the other fragments to - * increase the linear region to PKT_PROT_LEN bytes. - */ - if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) { + if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) { int target = min_t(int, skb->len, PKT_PROT_LEN); __pskb_pull_tail(skb, target - skb_headlen(skb)); } diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 99343ca..9c9b37d 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -105,6 +105,15 @@ static int netback_probe(struct xenbus_device *dev, goto abort_transaction; } + /* We support partial checksum setup for IPv6 packets */ + err = xenbus_printf(xbt, dev->nodename, + "feature-ipv6-csum-offload", + "%d", 1); + if (err) { + message = "writing feature-ipv6-csum-offload"; + goto abort_transaction; + } + /* We support rx-copy path. */ err = xenbus_printf(xbt, dev->nodename, "feature-rx-copy", "%d", 1); -- 1.7.10.4
Wei Liu
2013-Oct-14 10:42 UTC
Re: [PATCH net-next v4 2/5] xen-netback: add support for IPv6 checksum offload from guest
On Fri, Oct 11, 2013 at 04:06:19PM +0100, Paul Durrant wrote: [...]> -/* > - * This is the amount of packet we copy rather than map, so that the > - * guest can''t fiddle with the contents of the headers while we do > - * packet processing on them (netfilter, routing, etc). > +/* This is a miniumum size for the linear area to avoid lots of > + * calls to __pskb_pull_tail() as we set up checksum offsets. > */You seem to forget to explain why 128 is chosen. :-) Wei.
Paul Durrant
2013-Oct-14 10:49 UTC
Re: [PATCH net-next v4 2/5] xen-netback: add support for IPv6 checksum offload from guest
> -----Original Message----- > From: Wei Liu [mailto:wei.liu2@citrix.com] > Sent: 14 October 2013 11:43 > To: Paul Durrant > Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Wei Liu; David Vrabel; > Ian Campbell > Subject: Re: [PATCH net-next v4 2/5] xen-netback: add support for IPv6 > checksum offload from guest > > On Fri, Oct 11, 2013 at 04:06:19PM +0100, Paul Durrant wrote: > [...] > > -/* > > - * This is the amount of packet we copy rather than map, so that the > > - * guest can''t fiddle with the contents of the headers while we do > > - * packet processing on them (netfilter, routing, etc). > > +/* This is a miniumum size for the linear area to avoid lots of > > + * calls to __pskb_pull_tail() as we set up checksum offsets. > > */ > > You seem to forget to explain why 128 is chosen. :-)Is that not sufficient explanation? What sort of thing are you looking for? Paul
Wei Liu
2013-Oct-14 10:55 UTC
Re: [PATCH net-next v4 2/5] xen-netback: add support for IPv6 checksum offload from guest
On Mon, Oct 14, 2013 at 11:49:20AM +0100, Paul Durrant wrote:> > -----Original Message----- > > From: Wei Liu [mailto:wei.liu2@citrix.com] > > Sent: 14 October 2013 11:43 > > To: Paul Durrant > > Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Wei Liu; David Vrabel; > > Ian Campbell > > Subject: Re: [PATCH net-next v4 2/5] xen-netback: add support for IPv6 > > checksum offload from guest > > > > On Fri, Oct 11, 2013 at 04:06:19PM +0100, Paul Durrant wrote: > > [...] > > > -/* > > > - * This is the amount of packet we copy rather than map, so that the > > > - * guest can''t fiddle with the contents of the headers while we do > > > - * packet processing on them (netfilter, routing, etc). > > > +/* This is a miniumum size for the linear area to avoid lots of > > > + * calls to __pskb_pull_tail() as we set up checksum offsets. > > > */ > > > > You seem to forget to explain why 128 is chosen. :-) > > Is that not sufficient explanation? What sort of thing are you looking for? >From the second version of this patch, we had a conversation.> Where does 128 come from? >"It''s just an arbitrary power of 2 that was chosen because it seems to cover most likely v6 headers and all v4 headers." So something like: "We choose 128 which is likely to cover most V6 headers and all V4 headers" would be sufficeint. Wei.
Paul Durrant
2013-Oct-14 10:57 UTC
Re: [PATCH net-next v4 2/5] xen-netback: add support for IPv6 checksum offload from guest
> -----Original Message----- > From: Wei Liu [mailto:wei.liu2@citrix.com] > Sent: 14 October 2013 11:55 > To: Paul Durrant > Cc: Wei Liu; xen-devel@lists.xen.org; netdev@vger.kernel.org; David Vrabel; > Ian Campbell > Subject: Re: [PATCH net-next v4 2/5] xen-netback: add support for IPv6 > checksum offload from guest > > On Mon, Oct 14, 2013 at 11:49:20AM +0100, Paul Durrant wrote: > > > -----Original Message----- > > > From: Wei Liu [mailto:wei.liu2@citrix.com] > > > Sent: 14 October 2013 11:43 > > > To: Paul Durrant > > > Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Wei Liu; David > Vrabel; > > > Ian Campbell > > > Subject: Re: [PATCH net-next v4 2/5] xen-netback: add support for IPv6 > > > checksum offload from guest > > > > > > On Fri, Oct 11, 2013 at 04:06:19PM +0100, Paul Durrant wrote: > > > [...] > > > > -/* > > > > - * This is the amount of packet we copy rather than map, so that the > > > > - * guest can''t fiddle with the contents of the headers while we do > > > > - * packet processing on them (netfilter, routing, etc). > > > > +/* This is a miniumum size for the linear area to avoid lots of > > > > + * calls to __pskb_pull_tail() as we set up checksum offsets. > > > > */ > > > > > > You seem to forget to explain why 128 is chosen. :-) > > > > Is that not sufficient explanation? What sort of thing are you looking for? > > > > From the second version of this patch, we had a conversation. > > > Where does 128 come from? > > > > "It''s just an arbitrary power of 2 that was chosen because it seems to > cover most likely v6 headers and all v4 headers." > > So something like: "We choose 128 which is likely to cover most V6 > headers and all V4 headers" would be sufficeint. >Ok. I figured that was implied by "set up checksum offsets" but I can be more explicit. Paul
David Vrabel
2013-Oct-14 12:19 UTC
Re: [PATCH net-next v4 2/5] xen-netback: add support for IPv6 checksum offload from guest
On 14/10/13 11:55, Wei Liu wrote:> On Mon, Oct 14, 2013 at 11:49:20AM +0100, Paul Durrant wrote: >>> -----Original Message----- >>> From: Wei Liu [mailto:wei.liu2@citrix.com] >>> Sent: 14 October 2013 11:43 >>> To: Paul Durrant >>> Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Wei Liu; David Vrabel; >>> Ian Campbell >>> Subject: Re: [PATCH net-next v4 2/5] xen-netback: add support for IPv6 >>> checksum offload from guest >>> >>> On Fri, Oct 11, 2013 at 04:06:19PM +0100, Paul Durrant wrote: >>> [...] >>>> -/* >>>> - * This is the amount of packet we copy rather than map, so that the >>>> - * guest can''t fiddle with the contents of the headers while we do >>>> - * packet processing on them (netfilter, routing, etc). >>>> +/* This is a miniumum size for the linear area to avoid lots of >>>> + * calls to __pskb_pull_tail() as we set up checksum offsets. >>>> */ >>> >>> You seem to forget to explain why 128 is chosen. :-) >> >> Is that not sufficient explanation? What sort of thing are you looking for? >> > >>From the second version of this patch, we had a conversation. > >> Where does 128 come from? >> > > "It''s just an arbitrary power of 2 that was chosen because it seems to > cover most likely v6 headers and all v4 headers." > > So something like: "We choose 128 which is likely to cover most V6 > headers and all V4 headers" would be sufficeint.Is "most IPv6 headers" actually good enough? Don''t we need to ensure netback copies all IP headers? David
Paul Durrant
2013-Oct-14 12:34 UTC
Re: [PATCH net-next v4 2/5] xen-netback: add support for IPv6 checksum offload from guest
> -----Original Message----- > From: David Vrabel > Sent: 14 October 2013 13:19 > To: Wei Liu > Cc: Paul Durrant; netdev@vger.kernel.org; Ian Campbell; David Vrabel; xen- > devel@lists.xen.org > Subject: Re: [Xen-devel] [PATCH net-next v4 2/5] xen-netback: add support > for IPv6 checksum offload from guest > > On 14/10/13 11:55, Wei Liu wrote: > > On Mon, Oct 14, 2013 at 11:49:20AM +0100, Paul Durrant wrote: > >>> -----Original Message----- > >>> From: Wei Liu [mailto:wei.liu2@citrix.com] > >>> Sent: 14 October 2013 11:43 > >>> To: Paul Durrant > >>> Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Wei Liu; David > Vrabel; > >>> Ian Campbell > >>> Subject: Re: [PATCH net-next v4 2/5] xen-netback: add support for IPv6 > >>> checksum offload from guest > >>> > >>> On Fri, Oct 11, 2013 at 04:06:19PM +0100, Paul Durrant wrote: > >>> [...] > >>>> -/* > >>>> - * This is the amount of packet we copy rather than map, so that the > >>>> - * guest can''t fiddle with the contents of the headers while we do > >>>> - * packet processing on them (netfilter, routing, etc). > >>>> +/* This is a miniumum size for the linear area to avoid lots of > >>>> + * calls to __pskb_pull_tail() as we set up checksum offsets. > >>>> */ > >>> > >>> You seem to forget to explain why 128 is chosen. :-) > >> > >> Is that not sufficient explanation? What sort of thing are you looking for? > >> > > > >>From the second version of this patch, we had a conversation. > > > >> Where does 128 come from? > >> > > > > "It''s just an arbitrary power of 2 that was chosen because it seems to > > cover most likely v6 headers and all v4 headers." > > > > So something like: "We choose 128 which is likely to cover most V6 > > headers and all V4 headers" would be sufficeint. > > Is "most IPv6 headers" actually good enough? Don''t we need to ensure > netback copies all IP headers? >It will do if checksum offload is in use, but perhaps the pull as far as the transport header needs to be done anyway? I''m unsure of the expectations of other code. Paul
Ian Campbell
2013-Oct-16 16:11 UTC
Re: [PATCH net-next v4 2/5] xen-netback: add support for IPv6 checksum offload from guest
On Mon, 2013-10-14 at 13:34 +0100, Paul Durrant wrote:> > -----Original Message----- > > From: David Vrabel > > Sent: 14 October 2013 13:19 > > To: Wei Liu > > Cc: Paul Durrant; netdev@vger.kernel.org; Ian Campbell; David Vrabel; xen- > > devel@lists.xen.org > > Subject: Re: [Xen-devel] [PATCH net-next v4 2/5] xen-netback: add support > > for IPv6 checksum offload from guest > > > > On 14/10/13 11:55, Wei Liu wrote: > > > On Mon, Oct 14, 2013 at 11:49:20AM +0100, Paul Durrant wrote: > > >>> -----Original Message----- > > >>> From: Wei Liu [mailto:wei.liu2@citrix.com] > > >>> Sent: 14 October 2013 11:43 > > >>> To: Paul Durrant > > >>> Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Wei Liu; David > > Vrabel; > > >>> Ian Campbell > > >>> Subject: Re: [PATCH net-next v4 2/5] xen-netback: add support for IPv6 > > >>> checksum offload from guest > > >>> > > >>> On Fri, Oct 11, 2013 at 04:06:19PM +0100, Paul Durrant wrote: > > >>> [...] > > >>>> -/* > > >>>> - * This is the amount of packet we copy rather than map, so that the > > >>>> - * guest can''t fiddle with the contents of the headers while we do > > >>>> - * packet processing on them (netfilter, routing, etc). > > >>>> +/* This is a miniumum size for the linear area to avoid lots of > > >>>> + * calls to __pskb_pull_tail() as we set up checksum offsets. > > >>>> */ > > >>> > > >>> You seem to forget to explain why 128 is chosen. :-) > > >> > > >> Is that not sufficient explanation? What sort of thing are you looking for? > > >> > > > > > >>From the second version of this patch, we had a conversation. > > > > > >> Where does 128 come from? > > >> > > > > > > "It''s just an arbitrary power of 2 that was chosen because it seems to > > > cover most likely v6 headers and all v4 headers." > > > > > > So something like: "We choose 128 which is likely to cover most V6 > > > headers and all V4 headers" would be sufficeint. > > > > Is "most IPv6 headers" actually good enough? Don''t we need to ensure > > netback copies all IP headers? > > > > It will do if checksum offload is in use, but perhaps the pull as far > as the transport header needs to be done anyway? I''m unsure of the > expectations of other code.I''ve always been under the impression that transport headers needed pulling up too, for the benefit of netfilter perhaps? AIUI the frags should be pure "payload". I may be wrong about that though... Ian.
Paul Durrant
2013-Oct-16 16:57 UTC
Re: [PATCH net-next v4 2/5] xen-netback: add support for IPv6 checksum offload from guest
> -----Original Message----- > From: Ian Campbell > Sent: 16 October 2013 17:12 > To: Paul Durrant > Cc: David Vrabel; Wei Liu; netdev@vger.kernel.org; xen-devel@lists.xen.org > Subject: Re: [Xen-devel] [PATCH net-next v4 2/5] xen-netback: add support > for IPv6 checksum offload from guest > > On Mon, 2013-10-14 at 13:34 +0100, Paul Durrant wrote: > > > -----Original Message----- > > > From: David Vrabel > > > Sent: 14 October 2013 13:19 > > > To: Wei Liu > > > Cc: Paul Durrant; netdev@vger.kernel.org; Ian Campbell; David Vrabel; > xen- > > > devel@lists.xen.org > > > Subject: Re: [Xen-devel] [PATCH net-next v4 2/5] xen-netback: add > support > > > for IPv6 checksum offload from guest > > > > > > On 14/10/13 11:55, Wei Liu wrote: > > > > On Mon, Oct 14, 2013 at 11:49:20AM +0100, Paul Durrant wrote: > > > >>> -----Original Message----- > > > >>> From: Wei Liu [mailto:wei.liu2@citrix.com] > > > >>> Sent: 14 October 2013 11:43 > > > >>> To: Paul Durrant > > > >>> Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Wei Liu; David > > > Vrabel; > > > >>> Ian Campbell > > > >>> Subject: Re: [PATCH net-next v4 2/5] xen-netback: add support for > IPv6 > > > >>> checksum offload from guest > > > >>> > > > >>> On Fri, Oct 11, 2013 at 04:06:19PM +0100, Paul Durrant wrote: > > > >>> [...] > > > >>>> -/* > > > >>>> - * This is the amount of packet we copy rather than map, so that > the > > > >>>> - * guest can''t fiddle with the contents of the headers while we do > > > >>>> - * packet processing on them (netfilter, routing, etc). > > > >>>> +/* This is a miniumum size for the linear area to avoid lots of > > > >>>> + * calls to __pskb_pull_tail() as we set up checksum offsets. > > > >>>> */ > > > >>> > > > >>> You seem to forget to explain why 128 is chosen. :-) > > > >> > > > >> Is that not sufficient explanation? What sort of thing are you looking > for? > > > >> > > > > > > > >>From the second version of this patch, we had a conversation. > > > > > > > >> Where does 128 come from? > > > >> > > > > > > > > "It''s just an arbitrary power of 2 that was chosen because it seems to > > > > cover most likely v6 headers and all v4 headers." > > > > > > > > So something like: "We choose 128 which is likely to cover most V6 > > > > headers and all V4 headers" would be sufficeint. > > > > > > Is "most IPv6 headers" actually good enough? Don''t we need to ensure > > > netback copies all IP headers? > > > > > > > It will do if checksum offload is in use, but perhaps the pull as far > > as the transport header needs to be done anyway? I''m unsure of the > > expectations of other code. > > I''ve always been under the impression that transport headers needed > pulling up too, for the benefit of netfilter perhaps? > > AIUI the frags should be pure "payload". I may be wrong about that > though... >I don''t believe there is actually any upper bound on IPv6 header size so coming up with a static value would be hard. Do all h/w drivers really have to parse headers (assuming they''re driving dumb h/w that doesn''t do header payload split)? Paul