Paul Durrant
2013-Oct-08 10:58 UTC
[PATCH net-next v2 2/5] xen-netback: Add support for IPv6 checksum offload from guest
For performance of VM to VM traffic on a single host it is better to avoid calculation of TCP/UDP checksum in the sending frontend. To allow this this patch adds the code necessary to set up partial checksum for IPv6 packets and xenstore flag feature-ipv6-csum-offload to advertise that fact to frontends. Signed-off-by: Paul Durrant <paul.durrant@citrix.com> Cc: Wei Liu <wei.liu2@citrix.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Ian Campbell <ian.campbell@citrix.com> --- drivers/net/xen-netback/netback.c | 224 ++++++++++++++++++++++++++++++------- drivers/net/xen-netback/xenbus.c | 9 ++ 2 files changed, 191 insertions(+), 42 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index f3e591c..35b1fa6 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -109,15 +109,10 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif, return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); } -/* - * This is the amount of packet we copy rather than map, so that the - * guest can''t fiddle with the contents of the headers while we do - * packet processing on them (netfilter, routing, etc). +/* This is a miniumum size for the linear area to avoid lots of + * calls to __pskb_pull_tail() as we set up checksum offsets. */ -#define PKT_PROT_LEN (ETH_HLEN + \ - VLAN_HLEN + \ - sizeof(struct iphdr) + MAX_IPOPTLEN + \ - sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE) +#define PKT_PROT_LEN 128 static u16 frag_get_pending_idx(skb_frag_t *frag) { @@ -1118,61 +1113,72 @@ static int xenvif_set_skb_gso(struct xenvif *vif, return 0; } -static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len) { - struct iphdr *iph; + if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) { + int target = min_t(int, skb->len, len); + __pskb_pull_tail(skb, target - skb_headlen(skb)); + } +} + +static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, + int recalculate_partial_csum) +{ + struct iphdr *iph = (void *)skb->data; + unsigned int header_size; + unsigned int off; int err = -EPROTO; - int recalculate_partial_csum = 0; - /* - * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy - * peers can fail to set NETRXF_csum_blank when sending a GSO - * frame. In this case force the SKB to CHECKSUM_PARTIAL and - * recalculate the partial checksum. - */ - if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { - vif->rx_gso_checksum_fixup++; - skb->ip_summed = CHECKSUM_PARTIAL; - recalculate_partial_csum = 1; - } + off = sizeof(struct iphdr); - /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ - if (skb->ip_summed != CHECKSUM_PARTIAL) - return 0; + header_size = skb->network_header + off + MAX_IPOPTLEN; + maybe_pull_tail(skb, header_size); - if (skb->protocol != htons(ETH_P_IP)) - goto out; + off = iph->ihl * 4; - iph = (void *)skb->data; switch (iph->protocol) { case IPPROTO_TCP: - if (!skb_partial_csum_set(skb, 4 * iph->ihl, + if (!skb_partial_csum_set(skb, off, offsetof(struct tcphdr, check))) goto out; if (recalculate_partial_csum) { struct tcphdr *tcph = tcp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct tcphdr) + + MAX_TCP_OPTION_SPACE; + maybe_pull_tail(skb, header_size); + tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - iph->ihl*4, + skb->len - off, IPPROTO_TCP, 0); } break; case IPPROTO_UDP: - if (!skb_partial_csum_set(skb, 4 * iph->ihl, + if (!skb_partial_csum_set(skb, off, offsetof(struct udphdr, check))) goto out; if (recalculate_partial_csum) { struct udphdr *udph = udp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct udphdr); + maybe_pull_tail(skb, header_size); + udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - iph->ihl*4, + skb->len - off, IPPROTO_UDP, 0); } break; default: if (net_ratelimit()) netdev_err(vif->dev, - "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n", + "Attempting to checksum a non-TCP/UDP packet, " + "dropping a protocol %d packet\n", iph->protocol); goto out; } @@ -1183,6 +1189,148 @@ out: return err; } +static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, + int recalculate_partial_csum) +{ + int err = -EPROTO; + struct ipv6hdr *ipv6h = (void *)skb->data; + u8 nexthdr; + unsigned int header_size; + unsigned int off; + bool done; + + done = false; + off = sizeof(struct ipv6hdr); + + header_size = skb->network_header + off; + maybe_pull_tail(skb, header_size); + + nexthdr = ipv6h->nexthdr; + + while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) && + !done) { + /* We only access at most the first 2 bytes of any option header + * to determine the next one. + */ + header_size = skb->network_header + off + 2; + maybe_pull_tail(skb, header_size); + + switch (nexthdr) { + case IPPROTO_FRAGMENT: { + struct frag_hdr *hp = (void *)(skb->data + off); + + nexthdr = hp->nexthdr; + off += 8; + break; + } + case IPPROTO_DSTOPTS: + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: { + struct ipv6_opt_hdr *hp = (void *)(skb->data + off); + + nexthdr = hp->nexthdr; + off += ipv6_optlen(hp); + break; + } + case IPPROTO_AH: { + struct ip_auth_hdr *hp = (void *)(skb->data + off); + + nexthdr = hp->nexthdr; + off += (hp->hdrlen+2)<<2; + break; + } + default: + done = true; + break; + } + } + + if (!done) + goto out; + + switch (nexthdr) { + case IPPROTO_TCP: + if (!skb_partial_csum_set(skb, off, + offsetof(struct tcphdr, check))) + goto out; + + if (recalculate_partial_csum) { + struct tcphdr *tcph = tcp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct tcphdr) + + MAX_TCP_OPTION_SPACE; + maybe_pull_tail(skb, header_size); + + tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, + &ipv6h->daddr, + skb->len - off, + IPPROTO_TCP, 0); + } + break; + case IPPROTO_UDP: + if (!skb_partial_csum_set(skb, off, + offsetof(struct udphdr, check))) + goto out; + + if (recalculate_partial_csum) { + struct udphdr *udph = udp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct udphdr); + maybe_pull_tail(skb, header_size); + + udph->check = ~csum_ipv6_magic(&ipv6h->saddr, + &ipv6h->daddr, + skb->len - off, + IPPROTO_UDP, 0); + } + break; + default: + if (net_ratelimit()) + netdev_err(vif->dev, + "Attempting to checksum a non-TCP/UDP packet, " + "dropping a protocol %d packet\n", + nexthdr); + goto out; + } + + err = 0; + +out: + return err; +} + +static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) +{ + int err = -EPROTO; + int recalculate_partial_csum = 0; + + /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy + * peers can fail to set NETRXF_csum_blank when sending a GSO + * frame. In this case force the SKB to CHECKSUM_PARTIAL and + * recalculate the partial checksum. + */ + if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { + vif->rx_gso_checksum_fixup++; + skb->ip_summed = CHECKSUM_PARTIAL; + recalculate_partial_csum = 1; + } + + /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + if (skb->protocol == htons(ETH_P_IP)) + err = checksum_setup_ip(vif, skb, recalculate_partial_csum); + else if (skb->protocol == htons(ETH_P_IPV6)) + err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum); + + return err; +} + static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) { unsigned long now = jiffies; @@ -1428,15 +1576,7 @@ static int xenvif_tx_submit(struct xenvif *vif, int budget) xenvif_fill_frags(vif, skb); - /* - * If the initial fragment was < PKT_PROT_LEN then - * pull through some bytes from the other fragments to - * increase the linear region to PKT_PROT_LEN bytes. - */ - if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) { - int target = min_t(int, skb->len, PKT_PROT_LEN); - __pskb_pull_tail(skb, target - skb_headlen(skb)); - } + maybe_pull_tail(skb, PKT_PROT_LEN); skb->dev = vif->dev; skb->protocol = eth_type_trans(skb, skb->dev); diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 5e45b00..dff423b 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -101,6 +101,15 @@ static int netback_probe(struct xenbus_device *dev, goto abort_transaction; } + /* We support partial checksum setup for IPv6 packets */ + err = xenbus_printf(xbt, dev->nodename, + "feature-ipv6-csum-offload", + "%d", 1); + if (err) { + message = "writing feature-ipv6-csum-offload"; + goto abort_transaction; + } + /* We support rx-copy path. */ err = xenbus_printf(xbt, dev->nodename, "feature-rx-copy", "%d", 1); -- 1.7.10.4
Wei Liu
2013-Oct-08 13:30 UTC
Re: [PATCH net-next v2 2/5] xen-netback: Add support for IPv6 checksum offload from guest
On Tue, Oct 08, 2013 at 11:58:13AM +0100, Paul Durrant wrote: [...]> -/* > - * This is the amount of packet we copy rather than map, so that the > - * guest can''t fiddle with the contents of the headers while we do > - * packet processing on them (netfilter, routing, etc). > +/* This is a miniumum size for the linear area to avoid lots of > + * calls to __pskb_pull_tail() as we set up checksum offsets. > */ > -#define PKT_PROT_LEN (ETH_HLEN + \ > - VLAN_HLEN + \ > - sizeof(struct iphdr) + MAX_IPOPTLEN + \ > - sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE) > +#define PKT_PROT_LEN 128 >Where does 128 come from?> static u16 frag_get_pending_idx(skb_frag_t *frag) > { > @@ -1118,61 +1113,72 @@ static int xenvif_set_skb_gso(struct xenvif *vif, > return 0; > } > > -static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) > +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len) > { > - struct iphdr *iph; > + if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) { > + int target = min_t(int, skb->len, len); > + __pskb_pull_tail(skb, target - skb_headlen(skb)); > + } > +} > + > +static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, > + int recalculate_partial_csum) > +{[...]> > if (recalculate_partial_csum) { > struct tcphdr *tcph = tcp_hdr(skb); > + > + header_size = skb->network_header + > + off + > + sizeof(struct tcphdr) + > + MAX_TCP_OPTION_SPACE; > + maybe_pull_tail(skb, header_size); > +I presume this function is checksum_setup stripped down to handle IPv4 packet. What''s the purpose of changing its behaviour? Why the pull_tail here?> +static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, > + int recalculate_partial_csum) > +{ > + int err = -EPROTO; > + struct ipv6hdr *ipv6h = (void *)skb->data; > + u8 nexthdr; > + unsigned int header_size; > + unsigned int off; > + bool done; > + > + done = false; > + off = sizeof(struct ipv6hdr); > + > + header_size = skb->network_header + off; > + maybe_pull_tail(skb, header_size); > + > + nexthdr = ipv6h->nexthdr; > + > + while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) && > + !done) { > + /* We only access at most the first 2 bytes of any option header > + * to determine the next one. > + */ > + header_size = skb->network_header + off + 2; > + maybe_pull_tail(skb, header_size); > +Will this cause performance problem? Is it possible that you pull too many times?> + switch (nexthdr) { > + case IPPROTO_FRAGMENT: { > + struct frag_hdr *hp = (void *)(skb->data + off); > + > + nexthdr = hp->nexthdr; > + off += 8; > + break; > + } > + case IPPROTO_DSTOPTS: > + case IPPROTO_HOPOPTS: > + case IPPROTO_ROUTING: { > + struct ipv6_opt_hdr *hp = (void *)(skb->data + off); > + > + nexthdr = hp->nexthdr; > + off += ipv6_optlen(hp); > + break; > + } > + case IPPROTO_AH: { > + struct ip_auth_hdr *hp = (void *)(skb->data + off); > + > + nexthdr = hp->nexthdr; > + off += (hp->hdrlen+2)<<2; > + break; > + } > + default: > + done = true; > + break;Can you make the logic explicit here? case IPPROTO_TCP: case IPPROTO_UDP: done = true; break; default: break; Another minor suggestion is that change "done" to "found" because you''re trying to find the two type of headers.> + switch (nexthdr) { > + case IPPROTO_TCP: > + if (!skb_partial_csum_set(skb, off, > + offsetof(struct tcphdr, check))) > + goto out; > + > + if (recalculate_partial_csum) { > + struct tcphdr *tcph = tcp_hdr(skb); > + > + header_size = skb->network_header + > + off + > + sizeof(struct tcphdr) + > + MAX_TCP_OPTION_SPACE; > + maybe_pull_tail(skb, header_size); > +Same question as IPv4 counterpart, why do you need to pull here? Wei.
Paul Durrant
2013-Oct-08 13:50 UTC
Re: [PATCH net-next v2 2/5] xen-netback: Add support for IPv6 checksum offload from guest
> -----Original Message----- > From: Wei Liu [mailto:wei.liu2@citrix.com] > Sent: 08 October 2013 14:31 > To: Paul Durrant > Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Wei Liu; David Vrabel; > Ian Campbell > Subject: Re: [PATCH net-next v2 2/5] xen-netback: Add support for IPv6 > checksum offload from guest > > On Tue, Oct 08, 2013 at 11:58:13AM +0100, Paul Durrant wrote: > [...] > > -/* > > - * This is the amount of packet we copy rather than map, so that the > > - * guest can''t fiddle with the contents of the headers while we do > > - * packet processing on them (netfilter, routing, etc). > > +/* This is a miniumum size for the linear area to avoid lots of > > + * calls to __pskb_pull_tail() as we set up checksum offsets. > > */ > > -#define PKT_PROT_LEN (ETH_HLEN + \ > > - VLAN_HLEN + \ > > - sizeof(struct iphdr) + MAX_IPOPTLEN + \ > > - sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE) > > +#define PKT_PROT_LEN 128 > > > > Where does 128 come from? >It''s just an arbitrary power of 2 that was chosen because it seems to cover most likely v6 headers and all v4 headers.> > static u16 frag_get_pending_idx(skb_frag_t *frag) > > { > > @@ -1118,61 +1113,72 @@ static int xenvif_set_skb_gso(struct xenvif *vif, > > return 0; > > } > > > > -static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) > > +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len) > > { > > - struct iphdr *iph; > > + if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) { > > + int target = min_t(int, skb->len, len); > > + __pskb_pull_tail(skb, target - skb_headlen(skb)); > > + } > > +} > > + > > +static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, > > + int recalculate_partial_csum) > > +{ > [...] > > > > if (recalculate_partial_csum) { > > struct tcphdr *tcph = tcp_hdr(skb); > > + > > + header_size = skb->network_header + > > + off + > > + sizeof(struct tcphdr) + > > + MAX_TCP_OPTION_SPACE; > > + maybe_pull_tail(skb, header_size); > > + > > I presume this function is checksum_setup stripped down to handle IPv4 > packet. What''s the purpose of changing its behaviour? Why the pull_tail > here? >We have to make sure that the TCP header is in the linear area as we are about to write to the checksum field. In practice, the 128 byte pull should guarantee this but in case that is varied later I wanted to make sure things did not start to fail in an add way.> > +static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, > > + int recalculate_partial_csum) > > +{ > > + int err = -EPROTO; > > + struct ipv6hdr *ipv6h = (void *)skb->data; > > + u8 nexthdr; > > + unsigned int header_size; > > + unsigned int off; > > + bool done; > > + > > + done = false; > > + off = sizeof(struct ipv6hdr); > > + > > + header_size = skb->network_header + off; > > + maybe_pull_tail(skb, header_size); > > + > > + nexthdr = ipv6h->nexthdr; > > + > > + while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) > && > > + !done) { > > + /* We only access at most the first 2 bytes of any option > header > > + * to determine the next one. > > + */ > > + header_size = skb->network_header + off + 2; > > + maybe_pull_tail(skb, header_size); > > + > > Will this cause performance problem? Is it possible that you pull too > many times? >I guess it means we may get two pulls for the TCP/UDP headers rather than one so could push the pulls into the individual cases if you think it will affect performance that badly.> > + switch (nexthdr) { > > + case IPPROTO_FRAGMENT: { > > + struct frag_hdr *hp = (void *)(skb->data + off); > > + > > + nexthdr = hp->nexthdr; > > + off += 8; > > + break; > > + } > > + case IPPROTO_DSTOPTS: > > + case IPPROTO_HOPOPTS: > > + case IPPROTO_ROUTING: { > > + struct ipv6_opt_hdr *hp = (void *)(skb->data + off); > > + > > + nexthdr = hp->nexthdr; > > + off += ipv6_optlen(hp); > > + break; > > + } > > + case IPPROTO_AH: { > > + struct ip_auth_hdr *hp = (void *)(skb->data + off); > > + > > + nexthdr = hp->nexthdr; > > + off += (hp->hdrlen+2)<<2; > > + break; > > + } > > + default: > > + done = true; > > + break; > > Can you make the logic explicit here? > > case IPPROTO_TCP: > case IPPROTO_UDP: > done = true; > break; > default: > break; > > Another minor suggestion is that change "done" to "found" because you''re > trying to find the two type of headers. >Yes, I could code it that way if you prefer.> > + switch (nexthdr) { > > + case IPPROTO_TCP: > > + if (!skb_partial_csum_set(skb, off, > > + offsetof(struct tcphdr, check))) > > + goto out; > > + > > + if (recalculate_partial_csum) { > > + struct tcphdr *tcph = tcp_hdr(skb); > > + > > + header_size = skb->network_header + > > + off + > > + sizeof(struct tcphdr) + > > + MAX_TCP_OPTION_SPACE; > > + maybe_pull_tail(skb, header_size); > > + > > Same question as IPv4 counterpart, why do you need to pull here? >Same answer as before ;-) Paul
Wei Liu
2013-Oct-08 16:19 UTC
Re: [PATCH net-next v2 2/5] xen-netback: Add support for IPv6 checksum offload from guest
On Tue, Oct 08, 2013 at 02:50:27PM +0100, Paul Durrant wrote: [...]> > > -#define PKT_PROT_LEN (ETH_HLEN + \ > > > - VLAN_HLEN + \ > > > - sizeof(struct iphdr) + MAX_IPOPTLEN + \ > > > - sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE) > > > +#define PKT_PROT_LEN 128 > > > > > > > Where does 128 come from? > > > > It''s just an arbitrary power of 2 that was chosen because it seems to > cover most likely v6 headers and all v4 headers. >Hmm... How about using the value of MAX_TCP_HEADER? I guess that can cover all V4 / V6 headers. MAX_TCP_HEADER varies, depending on configuration. To make sure we can accommodate all guests packet we might need to use its maximum value which can be as big as 128 + 128 + 48.> > > if (recalculate_partial_csum) { > > > struct tcphdr *tcph = tcp_hdr(skb); > > > + > > > + header_size = skb->network_header + > > > + off + > > > + sizeof(struct tcphdr) + > > > + MAX_TCP_OPTION_SPACE; > > > + maybe_pull_tail(skb, header_size); > > > + > > > > I presume this function is checksum_setup stripped down to handle IPv4 > > packet. What''s the purpose of changing its behaviour? Why the pull_tail > > here? > > > > We have to make sure that the TCP header is in the linear area as we > are about to write to the checksum field. In practice, the 128 byte > pull should guarantee this but in case that is varied later I wanted > to make sure things did not start to fail in an add way. >If you already set the pull size to maximum possible value then this will not be necessary anymore, right?> > > + while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) > > && > > > + !done) { > > > + /* We only access at most the first 2 bytes of any option > > header > > > + * to determine the next one. > > > + */ > > > + header_size = skb->network_header + off + 2; > > > + maybe_pull_tail(skb, header_size); > > > + > > > > Will this cause performance problem? Is it possible that you pull too > > many times? > > > > I guess it means we may get two pulls for the TCP/UDP headers rather > than one so could push the pulls into the individual cases if you > think it will affect performance that badly.Hmm... Not sure I get what you mean here. The main problem I''m seeing is that maybe_pull_tail is called in every loop. I would like to see as few pulls as possible because __pskb_pull_tail can be expensive and only expected to use in "exceptional cases" (quoted from the comment above that function). Is it possible to pull TCP_MAX_HEADER bytes once to eliminate all other pulls in checksum_setup{,_ipv4,_ipv6}?>[...]> > > > Can you make the logic explicit here? > > > > case IPPROTO_TCP: > > case IPPROTO_UDP: > > done = true; > > break; > > default: > > break; > > > > Another minor suggestion is that change "done" to "found" because you''re > > trying to find the two type of headers. > > > > Yes, I could code it that way if you prefer. >Explicity is better than implicity IMHO. After this change could you also move the default branch (netdev_err) in the following "switch" to the first "switch". Wei.
Paul Durrant
2013-Oct-10 09:37 UTC
Re: [PATCH net-next v2 2/5] xen-netback: Add support for IPv6 checksum offload from guest
> -----Original Message----- > From: Wei Liu [mailto:wei.liu2@citrix.com] > Sent: 08 October 2013 17:19 > To: Paul Durrant > Cc: Wei Liu; xen-devel@lists.xen.org; netdev@vger.kernel.org; David Vrabel; > Ian Campbell > Subject: Re: [PATCH net-next v2 2/5] xen-netback: Add support for IPv6 > checksum offload from guest > > On Tue, Oct 08, 2013 at 02:50:27PM +0100, Paul Durrant wrote: > [...] > > > > -#define PKT_PROT_LEN (ETH_HLEN + \ > > > > - VLAN_HLEN + \ > > > > - sizeof(struct iphdr) + MAX_IPOPTLEN + \ > > > > - sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE) > > > > +#define PKT_PROT_LEN 128 > > > > > > > > > > Where does 128 come from? > > > > > > > It''s just an arbitrary power of 2 that was chosen because it seems to > > cover most likely v6 headers and all v4 headers. > > > > Hmm... How about using the value of MAX_TCP_HEADER? I guess that can > cover all V4 / V6 headers. > > MAX_TCP_HEADER varies, depending on configuration. To make sure we can > accommodate all guests packet we might need to use its maximum value > which can be as big as 128 + 128 + 48. >Because we always double-copy (as the grant copy doesn''t copy direct to the linear area) I was concerned about making the pullup too big. I''d rather optimize around a smaller header so how about we stick with 128 but if maybe_pull_tail() finds it needs to pullup then it just pulls up to MAX_TCP_HEADER, so we limit to pullups to maximum of 2?> > > > if (recalculate_partial_csum) { > > > > struct tcphdr *tcph = tcp_hdr(skb); > > > > + > > > > + header_size = skb->network_header + > > > > + off + > > > > + sizeof(struct tcphdr) + > > > > + MAX_TCP_OPTION_SPACE; > > > > + maybe_pull_tail(skb, header_size); > > > > + > > > > > > I presume this function is checksum_setup stripped down to handle IPv4 > > > packet. What''s the purpose of changing its behaviour? Why the pull_tail > > > here? > > > > > > > We have to make sure that the TCP header is in the linear area as we > > are about to write to the checksum field. In practice, the 128 byte > > pull should guarantee this but in case that is varied later I wanted > > to make sure things did not start to fail in an add way. > > > > If you already set the pull size to maximum possible value then this > will not be necessary anymore, right? > > > > > + while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) > > > && > > > > + !done) { > > > > + /* We only access at most the first 2 bytes of any option > > > header > > > > + * to determine the next one. > > > > + */ > > > > + header_size = skb->network_header + off + 2; > > > > + maybe_pull_tail(skb, header_size); > > > > + > > > > > > Will this cause performance problem? Is it possible that you pull too > > > many times? > > > > > > > I guess it means we may get two pulls for the TCP/UDP headers rather > > than one so could push the pulls into the individual cases if you > > think it will affect performance that badly. > > Hmm... Not sure I get what you mean here. The main problem I''m seeing is > that maybe_pull_tail is called in every loop. > > I would like to see as few pulls as possible because __pskb_pull_tail > can be expensive and only expected to use in "exceptional cases" (quoted > from the comment above that function). > > Is it possible to pull TCP_MAX_HEADER bytes once to eliminate all other > pulls in checksum_setup{,_ipv4,_ipv6}? >Note that the function is called *maybe*_pull_tail(). It only pulls if it needs to :-) Paul