Paul Durrant
2013-Nov-05 11:59 UTC
[PATCH net-next] xen-netfront: Add support for IPv6 offloads
This patch adds support for IPv6 checksum offload and GSO when those fetaures are available in the backend. Signed-off-by: Paul Durrant <paul.durrant@citrix.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: David Vrabel <david.vrabel@citrix.com> --- drivers/net/xen-netfront.c | 263 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 231 insertions(+), 32 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index dd1011e..27212d8 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) tx->flags |= XEN_NETTXF_extra_info; gso->u.gso.size = skb_shinfo(skb)->gso_size; - gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; + gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ? + XEN_NETIF_GSO_TYPE_TCPV6 : + XEN_NETIF_GSO_TYPE_TCPV4; gso->u.gso.pad = 0; gso->u.gso.features = 0; @@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff *skb, return -EINVAL; } - /* Currently only TCPv4 S.O. is supported. */ - if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { + if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 && + gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) { if (net_ratelimit()) pr_warn("Bad GSO type %d\n", gso->u.gso.type); return -EINVAL; } skb_shinfo(skb)->gso_size = gso->u.gso.size; - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + skb_shinfo(skb)->gso_type + (gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ? + SKB_GSO_TCPV4 : + SKB_GSO_TCPV6; /* Header must be checked, and gso_segs computed. */ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; @@ -856,62 +861,72 @@ static RING_IDX xennet_fill_frags(struct netfront_info *np, return cons; } -static int checksum_setup(struct net_device *dev, struct sk_buff *skb) +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len) { - struct iphdr *iph; - int err = -EPROTO; - int recalculate_partial_csum = 0; - - /* - * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy - * peers can fail to set NETRXF_csum_blank when sending a GSO - * frame. In this case force the SKB to CHECKSUM_PARTIAL and - * recalculate the partial checksum. - */ - if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { - struct netfront_info *np = netdev_priv(dev); - np->rx_gso_checksum_fixup++; - skb->ip_summed = CHECKSUM_PARTIAL; - recalculate_partial_csum = 1; + if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) { + /* If we need to pullup then pullup to the max, so we + * won''t need to do it again. + */ + int target = min_t(int, skb->len, MAX_TCP_HEADER); + __pskb_pull_tail(skb, target - skb_headlen(skb)); } +} - /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ - if (skb->ip_summed != CHECKSUM_PARTIAL) - return 0; +static int checksum_setup_ip(struct sk_buff *skb, int recalculate_partial_csum) +{ + struct iphdr *iph = (void *)skb->data; + unsigned int header_size; + unsigned int off; + int err = -EPROTO; - if (skb->protocol != htons(ETH_P_IP)) - goto out; + off = sizeof(struct iphdr); - iph = (void *)skb->data; + header_size = skb->network_header + off + MAX_IPOPTLEN; + maybe_pull_tail(skb, header_size); + + off = iph->ihl * 4; switch (iph->protocol) { case IPPROTO_TCP: - if (!skb_partial_csum_set(skb, 4 * iph->ihl, + if (!skb_partial_csum_set(skb, off, offsetof(struct tcphdr, check))) goto out; if (recalculate_partial_csum) { struct tcphdr *tcph = tcp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct tcphdr); + maybe_pull_tail(skb, header_size); + tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - iph->ihl*4, + skb->len - off, IPPROTO_TCP, 0); } break; case IPPROTO_UDP: - if (!skb_partial_csum_set(skb, 4 * iph->ihl, + if (!skb_partial_csum_set(skb, off, offsetof(struct udphdr, check))) goto out; if (recalculate_partial_csum) { struct udphdr *udph = udp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct udphdr); + maybe_pull_tail(skb, header_size); + udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - iph->ihl*4, + skb->len - off, IPPROTO_UDP, 0); } break; default: if (net_ratelimit()) - pr_err("Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n", + pr_err("Attempting to checksum a non-TCP/UDP packet, " + "dropping a protocol %d packet\n", iph->protocol); goto out; } @@ -922,6 +937,158 @@ out: return err; } +static int checksum_setup_ipv6(struct sk_buff *skb, int recalculate_partial_csum) +{ + int err = -EPROTO; + struct ipv6hdr *ipv6h = (void *)skb->data; + u8 nexthdr; + unsigned int header_size; + unsigned int off; + bool fragment; + bool done; + + done = false; + + off = sizeof(struct ipv6hdr); + + header_size = skb->network_header + off; + maybe_pull_tail(skb, header_size); + + nexthdr = ipv6h->nexthdr; + + while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) && + !done) { + switch (nexthdr) { + case IPPROTO_DSTOPTS: + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: { + struct ipv6_opt_hdr *hp = (void *)(skb->data + off); + + header_size = skb->network_header + + off + + sizeof(struct ipv6_opt_hdr); + maybe_pull_tail(skb, header_size); + + nexthdr = hp->nexthdr; + off += ipv6_optlen(hp); + break; + } + case IPPROTO_AH: { + struct ip_auth_hdr *hp = (void *)(skb->data + off); + + header_size = skb->network_header + + off + + sizeof(struct ip_auth_hdr); + maybe_pull_tail(skb, header_size); + + nexthdr = hp->nexthdr; + off += (hp->hdrlen+2)<<2; + break; + } + case IPPROTO_FRAGMENT: + fragment = true; + /* fall through */ + default: + done = true; + break; + } + } + + if (!done) { + if (net_ratelimit()) + pr_err("Failed to parse packet header\n"); + goto out; + } + + if (fragment) { + if (net_ratelimit()) + pr_err("Packet is a fragment!\n"); + goto out; + } + + switch (nexthdr) { + case IPPROTO_TCP: + if (!skb_partial_csum_set(skb, off, + offsetof(struct tcphdr, check))) + goto out; + + if (recalculate_partial_csum) { + struct tcphdr *tcph = tcp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct tcphdr); + maybe_pull_tail(skb, header_size); + + tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, + &ipv6h->daddr, + skb->len - off, + IPPROTO_TCP, 0); + } + break; + case IPPROTO_UDP: + if (!skb_partial_csum_set(skb, off, + offsetof(struct udphdr, check))) + goto out; + + if (recalculate_partial_csum) { + struct udphdr *udph = udp_hdr(skb); + + header_size = skb->network_header + + off + + sizeof(struct udphdr); + maybe_pull_tail(skb, header_size); + + udph->check = ~csum_ipv6_magic(&ipv6h->saddr, + &ipv6h->daddr, + skb->len - off, + IPPROTO_UDP, 0); + } + break; + default: + if (net_ratelimit()) + pr_err("Attempting to checksum a non-TCP/UDP packet, " + "dropping a protocol %d packet\n", + nexthdr); + goto out; + } + + err = 0; + +out: + return err; +} + +static int checksum_setup(struct net_device *dev, struct sk_buff *skb) +{ + int err = -EPROTO; + int recalculate_partial_csum = 0; + + /* + * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy + * peers can fail to set NETRXF_csum_blank when sending a GSO + * frame. In this case force the SKB to CHECKSUM_PARTIAL and + * recalculate the partial checksum. + */ + if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { + struct netfront_info *np = netdev_priv(dev); + np->rx_gso_checksum_fixup++; + skb->ip_summed = CHECKSUM_PARTIAL; + recalculate_partial_csum = 1; + } + + /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + if (skb->protocol == htons(ETH_P_IP)) + err = checksum_setup_ip(skb, recalculate_partial_csum); + else if (skb->protocol == htons(ETH_P_IPV6)) + err = checksum_setup_ipv6(skb, recalculate_partial_csum); + + return err; +} + static int handle_incoming_queue(struct net_device *dev, struct sk_buff_head *rxq) { @@ -1232,6 +1399,15 @@ static netdev_features_t xennet_fix_features(struct net_device *dev, features &= ~NETIF_F_SG; } + if (features & NETIF_F_IPV6_CSUM) { + if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, + "feature-ipv6-csum-offload", "%d", &val) < 0) + val = 0; + + if (!val) + features &= ~NETIF_F_IPV6_CSUM; + } + if (features & NETIF_F_TSO) { if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-gso-tcpv4", "%d", &val) < 0) @@ -1241,6 +1417,15 @@ static netdev_features_t xennet_fix_features(struct net_device *dev, features &= ~NETIF_F_TSO; } + if (features & NETIF_F_TSO6) { + if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, + "feature-gso-tcpv6", "%d", &val) < 0) + val = 0; + + if (!val) + features &= ~NETIF_F_TSO6; + } + return features; } @@ -1373,7 +1558,9 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) netif_napi_add(netdev, &np->napi, xennet_poll, 64); netdev->features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_GSO_ROBUST; - netdev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO; + netdev->hw_features = NETIF_F_SG | + NETIF_F_IPV6_CSUM | + NETIF_F_TSO | NETIF_F_TSO6; /* * Assume that all hw features are available for now. This set @@ -1751,6 +1938,18 @@ again: goto abort_transaction; } + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", "%d", 1); + if (err) { + message = "writing feature-gso-tcpv6"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-ipv6-csum-offload", "%d", 1); + if (err) { + message = "writing feature-gso-tcpv6"; + goto abort_transaction; + } + err = xenbus_transaction_end(xbt, 0); if (err) { if (err == -EAGAIN) -- 1.7.10.4
Ian Campbell
2013-Nov-07 10:48 UTC
Re: [PATCH net-next] xen-netfront: Add support for IPv6 offloads
On Tue, 2013-11-05 at 11:59 +0000, Paul Durrant wrote:> This patch adds support for IPv6 checksum offload and GSO when those > fetaures are available in the backend. > > Signed-off-by: Paul Durrant <paul.durrant@citrix.com> > Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> > Cc: David Vrabel <david.vrabel@citrix.com> > --- > drivers/net/xen-netfront.c | 263 ++++++++++++++++++++++++++++++++++++++------ > 1 file changed, 231 insertions(+), 32 deletions(-) > > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c > index dd1011e..27212d8 100644 > --- a/drivers/net/xen-netfront.c > +++ b/drivers/net/xen-netfront.c > @@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) > tx->flags |= XEN_NETTXF_extra_info; > > gso->u.gso.size = skb_shinfo(skb)->gso_size; > - gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; > + gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ? > + XEN_NETIF_GSO_TYPE_TCPV6 : > + XEN_NETIF_GSO_TYPE_TCPV4; > gso->u.gso.pad = 0; > gso->u.gso.features = 0; > > @@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff *skb, > return -EINVAL; > } > > - /* Currently only TCPv4 S.O. is supported. */ > - if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { > + if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 && > + gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) { > if (net_ratelimit()) > pr_warn("Bad GSO type %d\n", gso->u.gso.type); > return -EINVAL; > } > > skb_shinfo(skb)->gso_size = gso->u.gso.size; > - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; > + skb_shinfo(skb)->gso_type > + (gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ? > + SKB_GSO_TCPV4 : > + SKB_GSO_TCPV6; > > /* Header must be checked, and gso_segs computed. */ > skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; > @@ -856,62 +861,72 @@ static RING_IDX xennet_fill_frags(struct netfront_info *np, > return cons; > } > > -static int checksum_setup(struct net_device *dev, struct sk_buff *skb) > +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len) > { > - struct iphdr *iph; > - int err = -EPROTO; > - int recalculate_partial_csum = 0; > - > - /* > - * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy > - * peers can fail to set NETRXF_csum_blank when sending a GSO > - * frame. In this case force the SKB to CHECKSUM_PARTIAL and > - * recalculate the partial checksum. > - */ > - if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { > - struct netfront_info *np = netdev_priv(dev); > - np->rx_gso_checksum_fixup++; > - skb->ip_summed = CHECKSUM_PARTIAL; > - recalculate_partial_csum = 1; > + if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) { > + /* If we need to pullup then pullup to the max, so we > + * won''t need to do it again. > + */ > + int target = min_t(int, skb->len, MAX_TCP_HEADER); > + __pskb_pull_tail(skb, target - skb_headlen(skb));Should the functions "len" argument not be involved somewhere in this? What if it is bigger than MAX_TCP_HEADER for example?> } > +} > > - /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ > - if (skb->ip_summed != CHECKSUM_PARTIAL) > - return 0; > +static int checksum_setup_ip(struct sk_buff *skb, int recalculate_partial_csum) > +{> + struct iphdr *iph = (void *)skb->data; > + unsigned int header_size; > + unsigned int off; > + int err = -EPROTO; > > - if (skb->protocol != htons(ETH_P_IP)) > - goto out; > + off = sizeof(struct iphdr); > > - iph = (void *)skb->data; > + header_size = skb->network_header + off + MAX_IPOPTLEN; > + maybe_pull_tail(skb, header_size);You never reuse header_size other than setting it and immediately passing it to maybe_pull_tail, it could be inlined into the function call? (I found it confusing because you reset it in the recalculate_partial rather than extending it which is what I expected) maybe_pull_tail doesn''t guarantee to pull up to the length of the given parameter, e.g. if it is more then skb->len. Which I think means you need some other explicit skb len checks around the place, don''t you? Otherwise you risk running past the end of the skb for a malformed frame.> + > + off = iph->ihl * 4; > > switch (iph->protocol) { > case IPPROTO_TCP: > - if (!skb_partial_csum_set(skb, 4 * iph->ihl, > + if (!skb_partial_csum_set(skb, off, > offsetof(struct tcphdr, check))) > goto out; > > if (recalculate_partial_csum) { > struct tcphdr *tcph = tcp_hdr(skb); > + > + header_size = skb->network_header + > + off + > + sizeof(struct tcphdr); > + maybe_pull_tail(skb, header_size); > + > tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, > - skb->len - iph->ihl*4, > + skb->len - off, > IPPROTO_TCP, 0); > } > break; > case IPPROTO_UDP: > - if (!skb_partial_csum_set(skb, 4 * iph->ihl, > + if (!skb_partial_csum_set(skb, off, > offsetof(struct udphdr, check))) > goto out; > > if (recalculate_partial_csum) { > struct udphdr *udph = udp_hdr(skb); > + > + header_size = skb->network_header + > + off + > + sizeof(struct udphdr); > + maybe_pull_tail(skb, header_size); > + > udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, > - skb->len - iph->ihl*4, > + skb->len - off, > IPPROTO_UDP, 0); > } > break; > default: > if (net_ratelimit()) > - pr_err("Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n", > + pr_err("Attempting to checksum a non-TCP/UDP packet, " > + "dropping a protocol %d packet\n",I think Linux''s coding style explicitly relaxes the 80-column limit for string literals.> iph->protocol); > goto out; > } > @@ -922,6 +937,158 @@ out: > return err; > } > > +static int checksum_setup_ipv6(struct sk_buff *skb, int recalculate_partial_csum) > +{ > + int err = -EPROTO; > + struct ipv6hdr *ipv6h = (void *)skb->data; > + u8 nexthdr; > + unsigned int header_size; > + unsigned int off; > + bool fragment; > + bool done; > + > + done = false; > + > + off = sizeof(struct ipv6hdr); > + > + header_size = skb->network_header + off; > + maybe_pull_tail(skb, header_size);Same comment about skb lengths?> + > + nexthdr = ipv6h->nexthdr; > + > + while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) && > + !done) { > + switch (nexthdr) { > + case IPPROTO_DSTOPTS: > + case IPPROTO_HOPOPTS: > + case IPPROTO_ROUTING: { > + struct ipv6_opt_hdr *hp = (void *)(skb->data + off); > + > + header_size = skb->network_header + > + off + > + sizeof(struct ipv6_opt_hdr); > + maybe_pull_tail(skb, header_size); > + > + nexthdr = hp->nexthdr; > + off += ipv6_optlen(hp); > + break; > + } > + case IPPROTO_AH: { > + struct ip_auth_hdr *hp = (void *)(skb->data + off); > + > + header_size = skb->network_header + > + off + > + sizeof(struct ip_auth_hdr); > + maybe_pull_tail(skb, header_size); > + > + nexthdr = hp->nexthdr; > + off += (hp->hdrlen+2)<<2;I wonder if the core header would welcome a ipv6_ahlen cf optlen?> + break; > + } > + case IPPROTO_FRAGMENT: > + fragment = true; > + /* fall through */ > + default: > + done = true; > + break; > + } > + } > + > + if (!done) { > + if (net_ratelimit()) > + pr_err("Failed to parse packet header\n");Is it a requirement of the protocol that the list of IPPROTO_* we handle must be followed by some other header? Do we really care or is it more the upper stacks problem? Presumably it can get similarly malformed packets off the wire? Also you can use net_err_ratelimited.> + goto out; > + } > + > + if (fragment) { > + if (net_ratelimit()) > + pr_err("Packet is a fragment!\n");Is that a bad thing?> + goto out; > + } > + > + switch (nexthdr) { > + case IPPROTO_TCP: > + if (!skb_partial_csum_set(skb, off, > + offsetof(struct tcphdr, check))) > + goto out; > + > + if (recalculate_partial_csum) { > + struct tcphdr *tcph = tcp_hdr(skb); > + > + header_size = skb->network_header + > + off + > + sizeof(struct tcphdr); > + maybe_pull_tail(skb, header_size); > + > + tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, > + &ipv6h->daddr, > + skb->len - off, > + IPPROTO_TCP, 0); > + } > + break; > + case IPPROTO_UDP: > + if (!skb_partial_csum_set(skb, off, > + offsetof(struct udphdr, check))) > + goto out; > + > + if (recalculate_partial_csum) { > + struct udphdr *udph = udp_hdr(skb); > + > + header_size = skb->network_header + > + off + > + sizeof(struct udphdr); > + maybe_pull_tail(skb, header_size); > + > + udph->check = ~csum_ipv6_magic(&ipv6h->saddr, > + &ipv6h->daddr, > + skb->len - off, > + IPPROTO_UDP, 0); > + } > + break; > + default: > + if (net_ratelimit()) > + pr_err("Attempting to checksum a non-TCP/UDP packet, " > + "dropping a protocol %d packet\n", > + nexthdr); > + goto out; > + } > + > + err = 0; > + > +out: > + return err; > +} > + > +static int checksum_setup(struct net_device *dev, struct sk_buff *skb) > +{ > + int err = -EPROTO; > + int recalculate_partial_csum = 0; > + > + /* > + * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy > + * peers can fail to set NETRXF_csum_blank when sending a GSO > + * frame. In this case force the SKB to CHECKSUM_PARTIAL and > + * recalculate the partial checksum. > + */ > + if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {Is it worth making this ipv4 only. Assuming that anything which adds v6 support has already fixed this bug? That would save time in checksum_setup_ipv6. Speaking of which can checksum_setup_* be shared with netfront somehow? Or maybe even put in common code? (Perhaps as a future cleanup)> + struct netfront_info *np = netdev_priv(dev); > + np->rx_gso_checksum_fixup++; > + skb->ip_summed = CHECKSUM_PARTIAL; > + recalculate_partial_csum = 1; > + } > + > + /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ > + if (skb->ip_summed != CHECKSUM_PARTIAL) > + return 0; > + > + if (skb->protocol == htons(ETH_P_IP)) > + err = checksum_setup_ip(skb, recalculate_partial_csum); > + else if (skb->protocol == htons(ETH_P_IPV6)) > + err = checksum_setup_ipv6(skb, recalculate_partial_csum); > + > + return err; > +} > + > static int handle_incoming_queue(struct net_device *dev,[...]
Paul Durrant
2013-Nov-07 11:02 UTC
Re: [PATCH net-next] xen-netfront: Add support for IPv6 offloads
> -----Original Message----- > From: Ian Campbell > Sent: 07 November 2013 10:49 > To: Paul Durrant > Cc: xen-devel@lists.xen.org; Boris Ostrovsky; David Vrabel > Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: Add support for > IPv6 offloads > > On Tue, 2013-11-05 at 11:59 +0000, Paul Durrant wrote: > > This patch adds support for IPv6 checksum offload and GSO when those > > fetaures are available in the backend. > > > > Signed-off-by: Paul Durrant <paul.durrant@citrix.com> > > Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > > Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> > > Cc: David Vrabel <david.vrabel@citrix.com> > > --- > > drivers/net/xen-netfront.c | 263 > ++++++++++++++++++++++++++++++++++++++------ > > 1 file changed, 231 insertions(+), 32 deletions(-) > > > > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c > > index dd1011e..27212d8 100644 > > --- a/drivers/net/xen-netfront.c > > +++ b/drivers/net/xen-netfront.c > > @@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb, > struct net_device *dev) > > tx->flags |= XEN_NETTXF_extra_info; > > > > gso->u.gso.size = skb_shinfo(skb)->gso_size; > > - gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; > > + gso->u.gso.type = (skb_shinfo(skb)->gso_type & > SKB_GSO_TCPV6) ? > > + XEN_NETIF_GSO_TYPE_TCPV6 : > > + XEN_NETIF_GSO_TYPE_TCPV4; > > gso->u.gso.pad = 0; > > gso->u.gso.features = 0; > > > > @@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff > *skb, > > return -EINVAL; > > } > > > > - /* Currently only TCPv4 S.O. is supported. */ > > - if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { > > + if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 && > > + gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) { > > if (net_ratelimit()) > > pr_warn("Bad GSO type %d\n", gso->u.gso.type); > > return -EINVAL; > > } > > > > skb_shinfo(skb)->gso_size = gso->u.gso.size; > > - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; > > + skb_shinfo(skb)->gso_type > > + (gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ? > > + SKB_GSO_TCPV4 : > > + SKB_GSO_TCPV6; > > > > /* Header must be checked, and gso_segs computed. */ > > skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; > > @@ -856,62 +861,72 @@ static RING_IDX xennet_fill_frags(struct > netfront_info *np, > > return cons; > > } > > > > -static int checksum_setup(struct net_device *dev, struct sk_buff *skb) > > +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len) > > { > > - struct iphdr *iph; > > - int err = -EPROTO; > > - int recalculate_partial_csum = 0; > > - > > - /* > > - * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy > > - * peers can fail to set NETRXF_csum_blank when sending a GSO > > - * frame. In this case force the SKB to CHECKSUM_PARTIAL and > > - * recalculate the partial checksum. > > - */ > > - if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { > > - struct netfront_info *np = netdev_priv(dev); > > - np->rx_gso_checksum_fixup++; > > - skb->ip_summed = CHECKSUM_PARTIAL; > > - recalculate_partial_csum = 1; > > + if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) { > > + /* If we need to pullup then pullup to the max, so we > > + * won''t need to do it again. > > + */ > > + int target = min_t(int, skb->len, MAX_TCP_HEADER); > > + __pskb_pull_tail(skb, target - skb_headlen(skb)); > > Should the functions "len" argument not be involved somewhere in this? > What if it is bigger than MAX_TCP_HEADER for example?Hmm, good point.> > > } > > +} > > > > - /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ > > - if (skb->ip_summed != CHECKSUM_PARTIAL) > > - return 0; > > +static int checksum_setup_ip(struct sk_buff *skb, int > recalculate_partial_csum) > > +{ > > > + struct iphdr *iph = (void *)skb->data; > > + unsigned int header_size; > > + unsigned int off; > > + int err = -EPROTO; > > > > - if (skb->protocol != htons(ETH_P_IP)) > > - goto out; > > + off = sizeof(struct iphdr); > > > > - iph = (void *)skb->data; > > + header_size = skb->network_header + off + MAX_IPOPTLEN; > > + maybe_pull_tail(skb, header_size); > > You never reuse header_size other than setting it and immediately > passing it to maybe_pull_tail, it could be inlined into the function > call? >Yes, it could but I thought this was tidier. It''s also the same as the code in netback.> (I found it confusing because you reset it in the recalculate_partial > rather than extending it which is what I expected) > > maybe_pull_tail doesn''t guarantee to pull up to the length of the given > parameter, e.g. if it is more then skb->len. Which I think means you > need some other explicit skb len checks around the place, don''t you? > Otherwise you risk running past the end of the skb for a malformed > frame. >Yes, that''s true. Probably best to have maybe_pull_tail() return a failure if it doesn''t managed to pullup the requested amount then.> > + > > + off = iph->ihl * 4; > > > > switch (iph->protocol) { > > case IPPROTO_TCP: > > - if (!skb_partial_csum_set(skb, 4 * iph->ihl, > > + if (!skb_partial_csum_set(skb, off, > > offsetof(struct tcphdr, check))) > > goto out; > > > > if (recalculate_partial_csum) { > > struct tcphdr *tcph = tcp_hdr(skb); > > + > > + header_size = skb->network_header + > > + off + > > + sizeof(struct tcphdr); > > + maybe_pull_tail(skb, header_size); > > + > > tcph->check = ~csum_tcpudp_magic(iph->saddr, iph- > >daddr, > > - skb->len - iph->ihl*4, > > + skb->len - off, > > IPPROTO_TCP, 0); > > } > > break; > > case IPPROTO_UDP: > > - if (!skb_partial_csum_set(skb, 4 * iph->ihl, > > + if (!skb_partial_csum_set(skb, off, > > offsetof(struct udphdr, check))) > > goto out; > > > > if (recalculate_partial_csum) { > > struct udphdr *udph = udp_hdr(skb); > > + > > + header_size = skb->network_header + > > + off + > > + sizeof(struct udphdr); > > + maybe_pull_tail(skb, header_size); > > + > > udph->check = ~csum_tcpudp_magic(iph->saddr, > iph->daddr, > > - skb->len - iph->ihl*4, > > + skb->len - off, > > IPPROTO_UDP, 0); > > } > > break; > > default: > > if (net_ratelimit()) > > - pr_err("Attempting to checksum a non-TCP/UDP > packet, dropping a protocol %d packet\n", > > + pr_err("Attempting to checksum a non-TCP/UDP > packet, " > > + "dropping a protocol %d packet\n", > > I think Linux''s coding style explicitly relaxes the 80-column limit for > string literals. >Ok. Good to know.> > iph->protocol); > > goto out; > > } > > @@ -922,6 +937,158 @@ out: > > return err; > > } > > > > +static int checksum_setup_ipv6(struct sk_buff *skb, int > recalculate_partial_csum) > > +{ > > + int err = -EPROTO; > > + struct ipv6hdr *ipv6h = (void *)skb->data; > > + u8 nexthdr; > > + unsigned int header_size; > > + unsigned int off; > > + bool fragment; > > + bool done; > > + > > + done = false; > > + > > + off = sizeof(struct ipv6hdr); > > + > > + header_size = skb->network_header + off; > > + maybe_pull_tail(skb, header_size); > > Same comment about skb lengths? > > > + > > + nexthdr = ipv6h->nexthdr; > > + > > + while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) > && > > + !done) { > > + switch (nexthdr) { > > + case IPPROTO_DSTOPTS: > > + case IPPROTO_HOPOPTS: > > + case IPPROTO_ROUTING: { > > + struct ipv6_opt_hdr *hp = (void *)(skb->data + off); > > + > > + header_size = skb->network_header + > > + off + > > + sizeof(struct ipv6_opt_hdr); > > + maybe_pull_tail(skb, header_size); > > + > > + nexthdr = hp->nexthdr; > > + off += ipv6_optlen(hp); > > + break; > > + } > > + case IPPROTO_AH: { > > + struct ip_auth_hdr *hp = (void *)(skb->data + off); > > + > > + header_size = skb->network_header + > > + off + > > + sizeof(struct ip_auth_hdr); > > + maybe_pull_tail(skb, header_size); > > + > > + nexthdr = hp->nexthdr; > > + off += (hp->hdrlen+2)<<2; > > I wonder if the core header would welcome a ipv6_ahlen cf optlen? >Yes, it would be nicer wouldn''t it.> > + break; > > + } > > + case IPPROTO_FRAGMENT: > > + fragment = true; > > + /* fall through */ > > + default: > > + done = true; > > + break; > > + } > > + } > > + > > + if (!done) { > > + if (net_ratelimit()) > > + pr_err("Failed to parse packet header\n"); > > Is it a requirement of the protocol that the list of IPPROTO_* we handle > must be followed by some other header? Do we really care or is it more > the upper stacks problem? Presumably it can get similarly malformed > packets off the wire?I''d have to check but I''m pretty sure none of the protos we handle can be terminating so if done is not set then the packet is malformed.> > Also you can use net_err_ratelimited.Ok.> > > + goto out; > > + } > > + > > + if (fragment) { > > + if (net_ratelimit()) > > + pr_err("Packet is a fragment!\n"); > > Is that a bad thing? >Well, you can''t do TCP or UDP checksum offload on a fragment, can you?> > + goto out; > > + } > > + > > + switch (nexthdr) { > > + case IPPROTO_TCP: > > + if (!skb_partial_csum_set(skb, off, > > + offsetof(struct tcphdr, check))) > > + goto out; > > + > > + if (recalculate_partial_csum) { > > + struct tcphdr *tcph = tcp_hdr(skb); > > + > > + header_size = skb->network_header + > > + off + > > + sizeof(struct tcphdr); > > + maybe_pull_tail(skb, header_size); > > + > > + tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, > > + &ipv6h->daddr, > > + skb->len - off, > > + IPPROTO_TCP, 0); > > + } > > + break; > > + case IPPROTO_UDP: > > + if (!skb_partial_csum_set(skb, off, > > + offsetof(struct udphdr, check))) > > + goto out; > > + > > + if (recalculate_partial_csum) { > > + struct udphdr *udph = udp_hdr(skb); > > + > > + header_size = skb->network_header + > > + off + > > + sizeof(struct udphdr); > > + maybe_pull_tail(skb, header_size); > > + > > + udph->check = ~csum_ipv6_magic(&ipv6h->saddr, > > + &ipv6h->daddr, > > + skb->len - off, > > + IPPROTO_UDP, 0); > > + } > > + break; > > + default: > > + if (net_ratelimit()) > > + pr_err("Attempting to checksum a non-TCP/UDP > packet, " > > + "dropping a protocol %d packet\n", > > + nexthdr); > > + goto out; > > + } > > + > > + err = 0; > > + > > +out: > > + return err; > > +} > > + > > +static int checksum_setup(struct net_device *dev, struct sk_buff *skb) > > +{ > > + int err = -EPROTO; > > + int recalculate_partial_csum = 0; > > + > > + /* > > + * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy > > + * peers can fail to set NETRXF_csum_blank when sending a GSO > > + * frame. In this case force the SKB to CHECKSUM_PARTIAL and > > + * recalculate the partial checksum. > > + */ > > + if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { > > Is it worth making this ipv4 only. Assuming that anything which adds v6 > support has already fixed this bug? That would save time in > checksum_setup_ipv6. >That''s true.> Speaking of which can checksum_setup_* be shared with netfront > somehow? > Or maybe even put in common code? (Perhaps as a future cleanup) >I think it really should be in common code, but I think that would be better handled as a separate patch to remove the duplication. Thanks for the comprehensive review! Paul> > + struct netfront_info *np = netdev_priv(dev); > > + np->rx_gso_checksum_fixup++; > > + skb->ip_summed = CHECKSUM_PARTIAL; > > + recalculate_partial_csum = 1; > > + } > > + > > + /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ > > + if (skb->ip_summed != CHECKSUM_PARTIAL) > > + return 0; > > + > > + if (skb->protocol == htons(ETH_P_IP)) > > + err = checksum_setup_ip(skb, recalculate_partial_csum); > > + else if (skb->protocol == htons(ETH_P_IPV6)) > > + err = checksum_setup_ipv6(skb, recalculate_partial_csum); > > + > > + return err; > > +} > > + > > static int handle_incoming_queue(struct net_device *dev, > [...]
Ian Campbell
2013-Nov-07 11:49 UTC
Re: [PATCH net-next] xen-netfront: Add support for IPv6 offloads
On Thu, 2013-11-07 at 11:02 +0000, Paul Durrant wrote:> > (I found it confusing because you reset it in the recalculate_partial > > rather than extending it which is what I expected) > > > > maybe_pull_tail doesn''t guarantee to pull up to the length of the given > > parameter, e.g. if it is more then skb->len. Which I think means you > > need some other explicit skb len checks around the place, don''t you? > > Otherwise you risk running past the end of the skb for a malformed > > frame. > > > > Yes, that''s true. Probably best to have maybe_pull_tail() return a > failure if it doesn''t managed to pullup the requested amount then.That would work.> > Is it a requirement of the protocol that the list of IPPROTO_* we handle > > must be followed by some other header? Do we really care or is it more > > the upper stacks problem? Presumably it can get similarly malformed > > packets off the wire? > > I''d have to check but I''m pretty sure none of the protos we handle can > be terminating so if done is not set then the packet is malformed.OK. I''m not sure this is "our" problem though. we could just silently throw this corrupt frame up at the stack without trying to redo the checksum. Isn''t that what would happen with real hardware?> > > + goto out; > > > + } > > > + > > > + if (fragment) { > > > + if (net_ratelimit()) > > > + pr_err("Packet is a fragment!\n"); > > > > Is that a bad thing? > > > > Well, you can''t do TCP or UDP checksum offload on a fragment, can you?I guess I meant (as above), why do we particularly care? We could just throw it at the stack, who will account it as a RX error and carry on> > Speaking of which can checksum_setup_* be shared with netfront > > somehow? > > Or maybe even put in common code? (Perhaps as a future cleanup) > > > > I think it really should be in common code, but I think that would be > better handled as a separate patch to remove the duplication.I''m ok with that. (I''m not netfront maintainer though...)> Thanks for the comprehensive review!No problem!
Paul Durrant
2013-Nov-15 11:19 UTC
Re: [PATCH net-next] xen-netfront: Add support for IPv6 offloads
> -----Original Message----- > From: xen-devel-bounces@lists.xen.org [mailto:xen-devel- > bounces@lists.xen.org] On Behalf Of Paul Durrant > Sent: 07 November 2013 11:02 > To: Ian Campbell > Cc: Boris Ostrovsky; David Vrabel; xen-devel@lists.xen.org > Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: Add support for > IPv6 offloads > > > -----Original Message----- > > From: Ian Campbell > > Sent: 07 November 2013 10:49 > > To: Paul Durrant > > Cc: xen-devel@lists.xen.org; Boris Ostrovsky; David Vrabel > > Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: Add support for > > IPv6 offloads > > > > On Tue, 2013-11-05 at 11:59 +0000, Paul Durrant wrote: > > > This patch adds support for IPv6 checksum offload and GSO when those > > > fetaures are available in the backend. > > > > > > Signed-off-by: Paul Durrant <paul.durrant@citrix.com> > > > Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > > > Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> > > > Cc: David Vrabel <david.vrabel@citrix.com> > > > --- > > > drivers/net/xen-netfront.c | 263 > > ++++++++++++++++++++++++++++++++++++++------ > > > 1 file changed, 231 insertions(+), 32 deletions(-) > > > > > > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c > > > index dd1011e..27212d8 100644 > > > --- a/drivers/net/xen-netfront.c > > > +++ b/drivers/net/xen-netfront.c > > > @@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb, > > struct net_device *dev) > > > tx->flags |= XEN_NETTXF_extra_info; > > > > > > gso->u.gso.size = skb_shinfo(skb)->gso_size; > > > - gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; > > > + gso->u.gso.type = (skb_shinfo(skb)->gso_type & > > SKB_GSO_TCPV6) ? > > > + XEN_NETIF_GSO_TYPE_TCPV6 : > > > + XEN_NETIF_GSO_TYPE_TCPV4; > > > gso->u.gso.pad = 0; > > > gso->u.gso.features = 0; > > > > > > @@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff > > *skb, > > > return -EINVAL; > > > } > > > > > > - /* Currently only TCPv4 S.O. is supported. */ > > > - if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { > > > + if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 && > > > + gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) { > > > if (net_ratelimit()) > > > pr_warn("Bad GSO type %d\n", gso->u.gso.type); > > > return -EINVAL; > > > } > > > > > > skb_shinfo(skb)->gso_size = gso->u.gso.size; > > > - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; > > > + skb_shinfo(skb)->gso_type > > > + (gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ? > > > + SKB_GSO_TCPV4 : > > > + SKB_GSO_TCPV6; > > > > > > /* Header must be checked, and gso_segs computed. */ > > > skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; > > > @@ -856,62 +861,72 @@ static RING_IDX xennet_fill_frags(struct > > netfront_info *np, > > > return cons; > > > } > > > > > > -static int checksum_setup(struct net_device *dev, struct sk_buff *skb) > > > +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len) > > > { > > > - struct iphdr *iph; > > > - int err = -EPROTO; > > > - int recalculate_partial_csum = 0; > > > - > > > - /* > > > - * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy > > > - * peers can fail to set NETRXF_csum_blank when sending a GSO > > > - * frame. In this case force the SKB to CHECKSUM_PARTIAL and > > > - * recalculate the partial checksum. > > > - */ > > > - if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { > > > - struct netfront_info *np = netdev_priv(dev); > > > - np->rx_gso_checksum_fixup++; > > > - skb->ip_summed = CHECKSUM_PARTIAL; > > > - recalculate_partial_csum = 1; > > > + if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) { > > > + /* If we need to pullup then pullup to the max, so we > > > + * won''t need to do it again. > > > + */ > > > + int target = min_t(int, skb->len, MAX_TCP_HEADER); > > > + __pskb_pull_tail(skb, target - skb_headlen(skb)); > > > > Should the functions "len" argument not be involved somewhere in this? > > What if it is bigger than MAX_TCP_HEADER for example? > > Hmm, good point. >Actually, now I look again, this is correct. The if statement checks len to see if a pullup is necessary, but if one is necessary then it always pulls up to the max. I still need to add a failure return to indicate the pullup did not achieve the desired length though. Paul> > > > > } > > > +} > > > > > > - /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ > > > - if (skb->ip_summed != CHECKSUM_PARTIAL) > > > - return 0; > > > +static int checksum_setup_ip(struct sk_buff *skb, int > > recalculate_partial_csum) > > > +{ > > > > > + struct iphdr *iph = (void *)skb->data; > > > + unsigned int header_size; > > > + unsigned int off; > > > + int err = -EPROTO; > > > > > > - if (skb->protocol != htons(ETH_P_IP)) > > > - goto out; > > > + off = sizeof(struct iphdr); > > > > > > - iph = (void *)skb->data; > > > + header_size = skb->network_header + off + MAX_IPOPTLEN; > > > + maybe_pull_tail(skb, header_size); > > > > You never reuse header_size other than setting it and immediately > > passing it to maybe_pull_tail, it could be inlined into the function > > call? > > > > Yes, it could but I thought this was tidier. It''s also the same as the code in > netback. > > > (I found it confusing because you reset it in the recalculate_partial > > rather than extending it which is what I expected) > > > > maybe_pull_tail doesn''t guarantee to pull up to the length of the given > > parameter, e.g. if it is more then skb->len. Which I think means you > > need some other explicit skb len checks around the place, don''t you? > > Otherwise you risk running past the end of the skb for a malformed > > frame. > > > > Yes, that''s true. Probably best to have maybe_pull_tail() return a failure if it > doesn''t managed to pullup the requested amount then. > > > > + > > > + off = iph->ihl * 4; > > > > > > switch (iph->protocol) { > > > case IPPROTO_TCP: > > > - if (!skb_partial_csum_set(skb, 4 * iph->ihl, > > > + if (!skb_partial_csum_set(skb, off, > > > offsetof(struct tcphdr, check))) > > > goto out; > > > > > > if (recalculate_partial_csum) { > > > struct tcphdr *tcph = tcp_hdr(skb); > > > + > > > + header_size = skb->network_header + > > > + off + > > > + sizeof(struct tcphdr); > > > + maybe_pull_tail(skb, header_size); > > > + > > > tcph->check = ~csum_tcpudp_magic(iph->saddr, iph- > > >daddr, > > > - skb->len - iph->ihl*4, > > > + skb->len - off, > > > IPPROTO_TCP, 0); > > > } > > > break; > > > case IPPROTO_UDP: > > > - if (!skb_partial_csum_set(skb, 4 * iph->ihl, > > > + if (!skb_partial_csum_set(skb, off, > > > offsetof(struct udphdr, check))) > > > goto out; > > > > > > if (recalculate_partial_csum) { > > > struct udphdr *udph = udp_hdr(skb); > > > + > > > + header_size = skb->network_header + > > > + off + > > > + sizeof(struct udphdr); > > > + maybe_pull_tail(skb, header_size); > > > + > > > udph->check = ~csum_tcpudp_magic(iph->saddr, > > iph->daddr, > > > - skb->len - iph->ihl*4, > > > + skb->len - off, > > > IPPROTO_UDP, 0); > > > } > > > break; > > > default: > > > if (net_ratelimit()) > > > - pr_err("Attempting to checksum a non-TCP/UDP > > packet, dropping a protocol %d packet\n", > > > + pr_err("Attempting to checksum a non-TCP/UDP > > packet, " > > > + "dropping a protocol %d packet\n", > > > > I think Linux''s coding style explicitly relaxes the 80-column limit for > > string literals. > > > > Ok. Good to know. > > > > iph->protocol); > > > goto out; > > > } > > > @@ -922,6 +937,158 @@ out: > > > return err; > > > } > > > > > > +static int checksum_setup_ipv6(struct sk_buff *skb, int > > recalculate_partial_csum) > > > +{ > > > + int err = -EPROTO; > > > + struct ipv6hdr *ipv6h = (void *)skb->data; > > > + u8 nexthdr; > > > + unsigned int header_size; > > > + unsigned int off; > > > + bool fragment; > > > + bool done; > > > + > > > + done = false; > > > + > > > + off = sizeof(struct ipv6hdr); > > > + > > > + header_size = skb->network_header + off; > > > + maybe_pull_tail(skb, header_size); > > > > Same comment about skb lengths? > > > > > + > > > + nexthdr = ipv6h->nexthdr; > > > + > > > + while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) > > && > > > + !done) { > > > + switch (nexthdr) { > > > + case IPPROTO_DSTOPTS: > > > + case IPPROTO_HOPOPTS: > > > + case IPPROTO_ROUTING: { > > > + struct ipv6_opt_hdr *hp = (void *)(skb->data + off); > > > + > > > + header_size = skb->network_header + > > > + off + > > > + sizeof(struct ipv6_opt_hdr); > > > + maybe_pull_tail(skb, header_size); > > > + > > > + nexthdr = hp->nexthdr; > > > + off += ipv6_optlen(hp); > > > + break; > > > + } > > > + case IPPROTO_AH: { > > > + struct ip_auth_hdr *hp = (void *)(skb->data + off); > > > + > > > + header_size = skb->network_header + > > > + off + > > > + sizeof(struct ip_auth_hdr); > > > + maybe_pull_tail(skb, header_size); > > > + > > > + nexthdr = hp->nexthdr; > > > + off += (hp->hdrlen+2)<<2; > > > > I wonder if the core header would welcome a ipv6_ahlen cf optlen? > > > > Yes, it would be nicer wouldn''t it. > > > > + break; > > > + } > > > + case IPPROTO_FRAGMENT: > > > + fragment = true; > > > + /* fall through */ > > > + default: > > > + done = true; > > > + break; > > > + } > > > + } > > > + > > > + if (!done) { > > > + if (net_ratelimit()) > > > + pr_err("Failed to parse packet header\n"); > > > > Is it a requirement of the protocol that the list of IPPROTO_* we handle > > must be followed by some other header? Do we really care or is it more > > the upper stacks problem? Presumably it can get similarly malformed > > packets off the wire? > > I''d have to check but I''m pretty sure none of the protos we handle can be > terminating so if done is not set then the packet is malformed. > > > > > Also you can use net_err_ratelimited. > > Ok. > > > > > > + goto out; > > > + } > > > + > > > + if (fragment) { > > > + if (net_ratelimit()) > > > + pr_err("Packet is a fragment!\n"); > > > > Is that a bad thing? > > > > Well, you can''t do TCP or UDP checksum offload on a fragment, can you? > > > > + goto out; > > > + } > > > + > > > + switch (nexthdr) { > > > + case IPPROTO_TCP: > > > + if (!skb_partial_csum_set(skb, off, > > > + offsetof(struct tcphdr, check))) > > > + goto out; > > > + > > > + if (recalculate_partial_csum) { > > > + struct tcphdr *tcph = tcp_hdr(skb); > > > + > > > + header_size = skb->network_header + > > > + off + > > > + sizeof(struct tcphdr); > > > + maybe_pull_tail(skb, header_size); > > > + > > > + tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, > > > + &ipv6h->daddr, > > > + skb->len - off, > > > + IPPROTO_TCP, 0); > > > + } > > > + break; > > > + case IPPROTO_UDP: > > > + if (!skb_partial_csum_set(skb, off, > > > + offsetof(struct udphdr, check))) > > > + goto out; > > > + > > > + if (recalculate_partial_csum) { > > > + struct udphdr *udph = udp_hdr(skb); > > > + > > > + header_size = skb->network_header + > > > + off + > > > + sizeof(struct udphdr); > > > + maybe_pull_tail(skb, header_size); > > > + > > > + udph->check = ~csum_ipv6_magic(&ipv6h->saddr, > > > + &ipv6h->daddr, > > > + skb->len - off, > > > + IPPROTO_UDP, 0); > > > + } > > > + break; > > > + default: > > > + if (net_ratelimit()) > > > + pr_err("Attempting to checksum a non-TCP/UDP > > packet, " > > > + "dropping a protocol %d packet\n", > > > + nexthdr); > > > + goto out; > > > + } > > > + > > > + err = 0; > > > + > > > +out: > > > + return err; > > > +} > > > + > > > +static int checksum_setup(struct net_device *dev, struct sk_buff *skb) > > > +{ > > > + int err = -EPROTO; > > > + int recalculate_partial_csum = 0; > > > + > > > + /* > > > + * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy > > > + * peers can fail to set NETRXF_csum_blank when sending a GSO > > > + * frame. In this case force the SKB to CHECKSUM_PARTIAL and > > > + * recalculate the partial checksum. > > > + */ > > > + if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { > > > > Is it worth making this ipv4 only. Assuming that anything which adds v6 > > support has already fixed this bug? That would save time in > > checksum_setup_ipv6. > > > > That''s true. > > > Speaking of which can checksum_setup_* be shared with netfront > > somehow? > > Or maybe even put in common code? (Perhaps as a future cleanup) > > > > I think it really should be in common code, but I think that would be better > handled as a separate patch to remove the duplication. > > Thanks for the comprehensive review! > > Paul > > > > + struct netfront_info *np = netdev_priv(dev); > > > + np->rx_gso_checksum_fixup++; > > > + skb->ip_summed = CHECKSUM_PARTIAL; > > > + recalculate_partial_csum = 1; > > > + } > > > + > > > + /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ > > > + if (skb->ip_summed != CHECKSUM_PARTIAL) > > > + return 0; > > > + > > > + if (skb->protocol == htons(ETH_P_IP)) > > > + err = checksum_setup_ip(skb, recalculate_partial_csum); > > > + else if (skb->protocol == htons(ETH_P_IPV6)) > > > + err = checksum_setup_ipv6(skb, recalculate_partial_csum); > > > + > > > + return err; > > > +} > > > + > > > static int handle_incoming_queue(struct net_device *dev, > > [...] > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
Ian Campbell
2013-Nov-19 10:28 UTC
Re: [PATCH net-next] xen-netfront: Add support for IPv6 offloads
On Fri, 2013-11-15 at 11:19 +0000, Paul Durrant wrote:> > -----Original Message----- > > From: xen-devel-bounces@lists.xen.org [mailto:xen-devel- > > bounces@lists.xen.org] On Behalf Of Paul Durrant > > Sent: 07 November 2013 11:02 > > To: Ian Campbell > > Cc: Boris Ostrovsky; David Vrabel; xen-devel@lists.xen.org > > Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: Add support for > > IPv6 offloads > > > > > -----Original Message----- > > > From: Ian Campbell > > > Sent: 07 November 2013 10:49 > > > To: Paul Durrant > > > Cc: xen-devel@lists.xen.org; Boris Ostrovsky; David Vrabel > > > Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: Add support for > > > IPv6 offloads > > > > > > On Tue, 2013-11-05 at 11:59 +0000, Paul Durrant wrote: > > > > This patch adds support for IPv6 checksum offload and GSO when those > > > > fetaures are available in the backend. > > > > > > > > Signed-off-by: Paul Durrant <paul.durrant@citrix.com> > > > > Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > > > > Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> > > > > Cc: David Vrabel <david.vrabel@citrix.com> > > > > --- > > > > drivers/net/xen-netfront.c | 263 > > > ++++++++++++++++++++++++++++++++++++++------ > > > > 1 file changed, 231 insertions(+), 32 deletions(-) > > > > > > > > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c > > > > index dd1011e..27212d8 100644 > > > > --- a/drivers/net/xen-netfront.c > > > > +++ b/drivers/net/xen-netfront.c > > > > @@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb, > > > struct net_device *dev) > > > > tx->flags |= XEN_NETTXF_extra_info; > > > > > > > > gso->u.gso.size = skb_shinfo(skb)->gso_size; > > > > - gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; > > > > + gso->u.gso.type = (skb_shinfo(skb)->gso_type & > > > SKB_GSO_TCPV6) ? > > > > + XEN_NETIF_GSO_TYPE_TCPV6 : > > > > + XEN_NETIF_GSO_TYPE_TCPV4; > > > > gso->u.gso.pad = 0; > > > > gso->u.gso.features = 0; > > > > > > > > @@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff > > > *skb, > > > > return -EINVAL; > > > > } > > > > > > > > - /* Currently only TCPv4 S.O. is supported. */ > > > > - if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { > > > > + if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 && > > > > + gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) { > > > > if (net_ratelimit()) > > > > pr_warn("Bad GSO type %d\n", gso->u.gso.type); > > > > return -EINVAL; > > > > } > > > > > > > > skb_shinfo(skb)->gso_size = gso->u.gso.size; > > > > - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; > > > > + skb_shinfo(skb)->gso_type > > > > + (gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ? > > > > + SKB_GSO_TCPV4 : > > > > + SKB_GSO_TCPV6; > > > > > > > > /* Header must be checked, and gso_segs computed. */ > > > > skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; > > > > @@ -856,62 +861,72 @@ static RING_IDX xennet_fill_frags(struct > > > netfront_info *np, > > > > return cons; > > > > } > > > > > > > > -static int checksum_setup(struct net_device *dev, struct sk_buff *skb) > > > > +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len) > > > > { > > > > - struct iphdr *iph; > > > > - int err = -EPROTO; > > > > - int recalculate_partial_csum = 0; > > > > - > > > > - /* > > > > - * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy > > > > - * peers can fail to set NETRXF_csum_blank when sending a GSO > > > > - * frame. In this case force the SKB to CHECKSUM_PARTIAL and > > > > - * recalculate the partial checksum. > > > > - */ > > > > - if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { > > > > - struct netfront_info *np = netdev_priv(dev); > > > > - np->rx_gso_checksum_fixup++; > > > > - skb->ip_summed = CHECKSUM_PARTIAL; > > > > - recalculate_partial_csum = 1; > > > > + if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) { > > > > + /* If we need to pullup then pullup to the max, so we > > > > + * won''t need to do it again. > > > > + */ > > > > + int target = min_t(int, skb->len, MAX_TCP_HEADER); > > > > + __pskb_pull_tail(skb, target - skb_headlen(skb)); > > > > > > Should the functions "len" argument not be involved somewhere in this? > > > What if it is bigger than MAX_TCP_HEADER for example? > > > > Hmm, good point. > > > > Actually, now I look again, this is correct. The if statement checks > len to see if a pullup is necessary, but if one is necessary then it > always pulls up to the max.So it is guaranteed that len <= MAX_TCP_HEADER in all circumstances? That might surprise some callers, since the name doesn''t really indicate this has anything to do with TCP. How about passing max in as an argument, where all callers would currently pass MAX_TCP_HEADER?> I still need to add a failure return to indicate the pullup did not > achieve the desired length though. > > Paul > > > > > > > > } > > > > +} > > > > > > > > - /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ > > > > - if (skb->ip_summed != CHECKSUM_PARTIAL) > > > > - return 0; > > > > +static int checksum_setup_ip(struct sk_buff *skb, int > > > recalculate_partial_csum) > > > > +{ > > > > > > > + struct iphdr *iph = (void *)skb->data; > > > > + unsigned int header_size; > > > > + unsigned int off; > > > > + int err = -EPROTO; > > > > > > > > - if (skb->protocol != htons(ETH_P_IP)) > > > > - goto out; > > > > + off = sizeof(struct iphdr); > > > > > > > > - iph = (void *)skb->data; > > > > + header_size = skb->network_header + off + MAX_IPOPTLEN; > > > > + maybe_pull_tail(skb, header_size); > > > > > > You never reuse header_size other than setting it and immediately > > > passing it to maybe_pull_tail, it could be inlined into the function > > > call? > > > > > > > Yes, it could but I thought this was tidier. It''s also the same as the code in > > netback. > > > > > (I found it confusing because you reset it in the recalculate_partial > > > rather than extending it which is what I expected) > > > > > > maybe_pull_tail doesn''t guarantee to pull up to the length of the given > > > parameter, e.g. if it is more then skb->len. Which I think means you > > > need some other explicit skb len checks around the place, don''t you? > > > Otherwise you risk running past the end of the skb for a malformed > > > frame. > > > > > > > Yes, that''s true. Probably best to have maybe_pull_tail() return a failure if it > > doesn''t managed to pullup the requested amount then. > > > > > > + > > > > + off = iph->ihl * 4; > > > > > > > > switch (iph->protocol) { > > > > case IPPROTO_TCP: > > > > - if (!skb_partial_csum_set(skb, 4 * iph->ihl, > > > > + if (!skb_partial_csum_set(skb, off, > > > > offsetof(struct tcphdr, check))) > > > > goto out; > > > > > > > > if (recalculate_partial_csum) { > > > > struct tcphdr *tcph = tcp_hdr(skb); > > > > + > > > > + header_size = skb->network_header + > > > > + off + > > > > + sizeof(struct tcphdr); > > > > + maybe_pull_tail(skb, header_size); > > > > + > > > > tcph->check = ~csum_tcpudp_magic(iph->saddr, iph- > > > >daddr, > > > > - skb->len - iph->ihl*4, > > > > + skb->len - off, > > > > IPPROTO_TCP, 0); > > > > } > > > > break; > > > > case IPPROTO_UDP: > > > > - if (!skb_partial_csum_set(skb, 4 * iph->ihl, > > > > + if (!skb_partial_csum_set(skb, off, > > > > offsetof(struct udphdr, check))) > > > > goto out; > > > > > > > > if (recalculate_partial_csum) { > > > > struct udphdr *udph = udp_hdr(skb); > > > > + > > > > + header_size = skb->network_header + > > > > + off + > > > > + sizeof(struct udphdr); > > > > + maybe_pull_tail(skb, header_size); > > > > + > > > > udph->check = ~csum_tcpudp_magic(iph->saddr, > > > iph->daddr, > > > > - skb->len - iph->ihl*4, > > > > + skb->len - off, > > > > IPPROTO_UDP, 0); > > > > } > > > > break; > > > > default: > > > > if (net_ratelimit()) > > > > - pr_err("Attempting to checksum a non-TCP/UDP > > > packet, dropping a protocol %d packet\n", > > > > + pr_err("Attempting to checksum a non-TCP/UDP > > > packet, " > > > > + "dropping a protocol %d packet\n", > > > > > > I think Linux''s coding style explicitly relaxes the 80-column limit for > > > string literals. > > > > > > > Ok. Good to know. > > > > > > iph->protocol); > > > > goto out; > > > > } > > > > @@ -922,6 +937,158 @@ out: > > > > return err; > > > > } > > > > > > > > +static int checksum_setup_ipv6(struct sk_buff *skb, int > > > recalculate_partial_csum) > > > > +{ > > > > + int err = -EPROTO; > > > > + struct ipv6hdr *ipv6h = (void *)skb->data; > > > > + u8 nexthdr; > > > > + unsigned int header_size; > > > > + unsigned int off; > > > > + bool fragment; > > > > + bool done; > > > > + > > > > + done = false; > > > > + > > > > + off = sizeof(struct ipv6hdr); > > > > + > > > > + header_size = skb->network_header + off; > > > > + maybe_pull_tail(skb, header_size); > > > > > > Same comment about skb lengths? > > > > > > > + > > > > + nexthdr = ipv6h->nexthdr; > > > > + > > > > + while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) > > > && > > > > + !done) { > > > > + switch (nexthdr) { > > > > + case IPPROTO_DSTOPTS: > > > > + case IPPROTO_HOPOPTS: > > > > + case IPPROTO_ROUTING: { > > > > + struct ipv6_opt_hdr *hp = (void *)(skb->data + off); > > > > + > > > > + header_size = skb->network_header + > > > > + off + > > > > + sizeof(struct ipv6_opt_hdr); > > > > + maybe_pull_tail(skb, header_size); > > > > + > > > > + nexthdr = hp->nexthdr; > > > > + off += ipv6_optlen(hp); > > > > + break; > > > > + } > > > > + case IPPROTO_AH: { > > > > + struct ip_auth_hdr *hp = (void *)(skb->data + off); > > > > + > > > > + header_size = skb->network_header + > > > > + off + > > > > + sizeof(struct ip_auth_hdr); > > > > + maybe_pull_tail(skb, header_size); > > > > + > > > > + nexthdr = hp->nexthdr; > > > > + off += (hp->hdrlen+2)<<2; > > > > > > I wonder if the core header would welcome a ipv6_ahlen cf optlen? > > > > > > > Yes, it would be nicer wouldn''t it. > > > > > > + break; > > > > + } > > > > + case IPPROTO_FRAGMENT: > > > > + fragment = true; > > > > + /* fall through */ > > > > + default: > > > > + done = true; > > > > + break; > > > > + } > > > > + } > > > > + > > > > + if (!done) { > > > > + if (net_ratelimit()) > > > > + pr_err("Failed to parse packet header\n"); > > > > > > Is it a requirement of the protocol that the list of IPPROTO_* we handle > > > must be followed by some other header? Do we really care or is it more > > > the upper stacks problem? Presumably it can get similarly malformed > > > packets off the wire? > > > > I''d have to check but I''m pretty sure none of the protos we handle can be > > terminating so if done is not set then the packet is malformed. > > > > > > > > Also you can use net_err_ratelimited. > > > > Ok. > > > > > > > > > + goto out; > > > > + } > > > > + > > > > + if (fragment) { > > > > + if (net_ratelimit()) > > > > + pr_err("Packet is a fragment!\n"); > > > > > > Is that a bad thing? > > > > > > > Well, you can''t do TCP or UDP checksum offload on a fragment, can you? > > > > > > + goto out; > > > > + } > > > > + > > > > + switch (nexthdr) { > > > > + case IPPROTO_TCP: > > > > + if (!skb_partial_csum_set(skb, off, > > > > + offsetof(struct tcphdr, check))) > > > > + goto out; > > > > + > > > > + if (recalculate_partial_csum) { > > > > + struct tcphdr *tcph = tcp_hdr(skb); > > > > + > > > > + header_size = skb->network_header + > > > > + off + > > > > + sizeof(struct tcphdr); > > > > + maybe_pull_tail(skb, header_size); > > > > + > > > > + tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, > > > > + &ipv6h->daddr, > > > > + skb->len - off, > > > > + IPPROTO_TCP, 0); > > > > + } > > > > + break; > > > > + case IPPROTO_UDP: > > > > + if (!skb_partial_csum_set(skb, off, > > > > + offsetof(struct udphdr, check))) > > > > + goto out; > > > > + > > > > + if (recalculate_partial_csum) { > > > > + struct udphdr *udph = udp_hdr(skb); > > > > + > > > > + header_size = skb->network_header + > > > > + off + > > > > + sizeof(struct udphdr); > > > > + maybe_pull_tail(skb, header_size); > > > > + > > > > + udph->check = ~csum_ipv6_magic(&ipv6h->saddr, > > > > + &ipv6h->daddr, > > > > + skb->len - off, > > > > + IPPROTO_UDP, 0); > > > > + } > > > > + break; > > > > + default: > > > > + if (net_ratelimit()) > > > > + pr_err("Attempting to checksum a non-TCP/UDP > > > packet, " > > > > + "dropping a protocol %d packet\n", > > > > + nexthdr); > > > > + goto out; > > > > + } > > > > + > > > > + err = 0; > > > > + > > > > +out: > > > > + return err; > > > > +} > > > > + > > > > +static int checksum_setup(struct net_device *dev, struct sk_buff *skb) > > > > +{ > > > > + int err = -EPROTO; > > > > + int recalculate_partial_csum = 0; > > > > + > > > > + /* > > > > + * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy > > > > + * peers can fail to set NETRXF_csum_blank when sending a GSO > > > > + * frame. In this case force the SKB to CHECKSUM_PARTIAL and > > > > + * recalculate the partial checksum. > > > > + */ > > > > + if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { > > > > > > Is it worth making this ipv4 only. Assuming that anything which adds v6 > > > support has already fixed this bug? That would save time in > > > checksum_setup_ipv6. > > > > > > > That''s true. > > > > > Speaking of which can checksum_setup_* be shared with netfront > > > somehow? > > > Or maybe even put in common code? (Perhaps as a future cleanup) > > > > > > > I think it really should be in common code, but I think that would be better > > handled as a separate patch to remove the duplication. > > > > Thanks for the comprehensive review! > > > > Paul > > > > > > + struct netfront_info *np = netdev_priv(dev); > > > > + np->rx_gso_checksum_fixup++; > > > > + skb->ip_summed = CHECKSUM_PARTIAL; > > > > + recalculate_partial_csum = 1; > > > > + } > > > > + > > > > + /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ > > > > + if (skb->ip_summed != CHECKSUM_PARTIAL) > > > > + return 0; > > > > + > > > > + if (skb->protocol == htons(ETH_P_IP)) > > > > + err = checksum_setup_ip(skb, recalculate_partial_csum); > > > > + else if (skb->protocol == htons(ETH_P_IPV6)) > > > > + err = checksum_setup_ipv6(skb, recalculate_partial_csum); > > > > + > > > > + return err; > > > > +} > > > > + > > > > static int handle_incoming_queue(struct net_device *dev, > > > [...] > > > > _______________________________________________ > > Xen-devel mailing list > > Xen-devel@lists.xen.org > > http://lists.xen.org/xen-devel
Paul Durrant
2013-Nov-19 11:24 UTC
Re: [PATCH net-next] xen-netfront: Add support for IPv6 offloads
> -----Original Message----- > From: Ian Campbell > Sent: 19 November 2013 10:29 > To: Paul Durrant > Cc: Boris Ostrovsky; David Vrabel; xen-devel@lists.xen.org > Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: Add support for > IPv6 offloads > > On Fri, 2013-11-15 at 11:19 +0000, Paul Durrant wrote: > > > -----Original Message----- > > > From: xen-devel-bounces@lists.xen.org [mailto:xen-devel- > > > bounces@lists.xen.org] On Behalf Of Paul Durrant > > > Sent: 07 November 2013 11:02 > > > To: Ian Campbell > > > Cc: Boris Ostrovsky; David Vrabel; xen-devel@lists.xen.org > > > Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: Add support for > > > IPv6 offloads > > > > > > > -----Original Message----- > > > > From: Ian Campbell > > > > Sent: 07 November 2013 10:49 > > > > To: Paul Durrant > > > > Cc: xen-devel@lists.xen.org; Boris Ostrovsky; David Vrabel > > > > Subject: Re: [Xen-devel] [PATCH net-next] xen-netfront: Add support > for > > > > IPv6 offloads > > > > > > > > On Tue, 2013-11-05 at 11:59 +0000, Paul Durrant wrote: > > > > > This patch adds support for IPv6 checksum offload and GSO when > those > > > > > fetaures are available in the backend. > > > > > > > > > > Signed-off-by: Paul Durrant <paul.durrant@citrix.com> > > > > > Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > > > > > Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> > > > > > Cc: David Vrabel <david.vrabel@citrix.com> > > > > > --- > > > > > drivers/net/xen-netfront.c | 263 > > > > ++++++++++++++++++++++++++++++++++++++------ > > > > > 1 file changed, 231 insertions(+), 32 deletions(-) > > > > > > > > > > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c > > > > > index dd1011e..27212d8 100644 > > > > > --- a/drivers/net/xen-netfront.c > > > > > +++ b/drivers/net/xen-netfront.c > > > > > @@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff > *skb, > > > > struct net_device *dev) > > > > > tx->flags |= XEN_NETTXF_extra_info; > > > > > > > > > > gso->u.gso.size = skb_shinfo(skb)->gso_size; > > > > > - gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; > > > > > + gso->u.gso.type = (skb_shinfo(skb)->gso_type & > > > > SKB_GSO_TCPV6) ? > > > > > + XEN_NETIF_GSO_TYPE_TCPV6 : > > > > > + XEN_NETIF_GSO_TYPE_TCPV4; > > > > > gso->u.gso.pad = 0; > > > > > gso->u.gso.features = 0; > > > > > > > > > > @@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct > sk_buff > > > > *skb, > > > > > return -EINVAL; > > > > > } > > > > > > > > > > - /* Currently only TCPv4 S.O. is supported. */ > > > > > - if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { > > > > > + if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 && > > > > > + gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) { > > > > > if (net_ratelimit()) > > > > > pr_warn("Bad GSO type %d\n", gso- > >u.gso.type); > > > > > return -EINVAL; > > > > > } > > > > > > > > > > skb_shinfo(skb)->gso_size = gso->u.gso.size; > > > > > - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; > > > > > + skb_shinfo(skb)->gso_type > > > > > + (gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) > ? > > > > > + SKB_GSO_TCPV4 : > > > > > + SKB_GSO_TCPV6; > > > > > > > > > > /* Header must be checked, and gso_segs computed. */ > > > > > skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; > > > > > @@ -856,62 +861,72 @@ static RING_IDX xennet_fill_frags(struct > > > > netfront_info *np, > > > > > return cons; > > > > > } > > > > > > > > > > -static int checksum_setup(struct net_device *dev, struct sk_buff > *skb) > > > > > +static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int > len) > > > > > { > > > > > - struct iphdr *iph; > > > > > - int err = -EPROTO; > > > > > - int recalculate_partial_csum = 0; > > > > > - > > > > > - /* > > > > > - * A GSO SKB must be CHECKSUM_PARTIAL. However some > buggy > > > > > - * peers can fail to set NETRXF_csum_blank when sending a > GSO > > > > > - * frame. In this case force the SKB to CHECKSUM_PARTIAL > and > > > > > - * recalculate the partial checksum. > > > > > - */ > > > > > - if (skb->ip_summed != CHECKSUM_PARTIAL && > skb_is_gso(skb)) { > > > > > - struct netfront_info *np = netdev_priv(dev); > > > > > - np->rx_gso_checksum_fixup++; > > > > > - skb->ip_summed = CHECKSUM_PARTIAL; > > > > > - recalculate_partial_csum = 1; > > > > > + if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) { > > > > > + /* If we need to pullup then pullup to the max, so we > > > > > + * won''t need to do it again. > > > > > + */ > > > > > + int target = min_t(int, skb->len, MAX_TCP_HEADER); > > > > > + __pskb_pull_tail(skb, target - skb_headlen(skb)); > > > > > > > > Should the functions "len" argument not be involved somewhere in > this? > > > > What if it is bigger than MAX_TCP_HEADER for example? > > > > > > Hmm, good point. > > > > > > > Actually, now I look again, this is correct. The if statement checks > > len to see if a pullup is necessary, but if one is necessary then it > > always pulls up to the max. > > So it is guaranteed that len <= MAX_TCP_HEADER in all circumstances? > That might surprise some callers, since the name doesn''t really indicate > this has anything to do with TCP. How about passing max in as an > argument, where all callers would currently pass MAX_TCP_HEADER? >Ok, that sounds reasonable. It''s unclear to me whether there is any upper bound on the size of an IPv6 header, but google hits a recent thread in the IETF mail archives (starting at http://www.ietf.org/mail-archive/web/ipv6/current/msg18199.html ) suggesting that 256 might be a good number to choose but I agree it''s not directly related to 256, so maybe an argument and a local #define IPV6_MAX_HEADER of 256 would be better. Paul> > I still need to add a failure return to indicate the pullup did not > > achieve the desired length though. > > > > Paul > > > > > > > > > > > } > > > > > +} > > > > > > > > > > - /* A non-CHECKSUM_PARTIAL SKB does not require setup. > */ > > > > > - if (skb->ip_summed != CHECKSUM_PARTIAL) > > > > > - return 0; > > > > > +static int checksum_setup_ip(struct sk_buff *skb, int > > > > recalculate_partial_csum) > > > > > +{ > > > > > > > > > + struct iphdr *iph = (void *)skb->data; > > > > > + unsigned int header_size; > > > > > + unsigned int off; > > > > > + int err = -EPROTO; > > > > > > > > > > - if (skb->protocol != htons(ETH_P_IP)) > > > > > - goto out; > > > > > + off = sizeof(struct iphdr); > > > > > > > > > > - iph = (void *)skb->data; > > > > > + header_size = skb->network_header + off + > MAX_IPOPTLEN; > > > > > + maybe_pull_tail(skb, header_size); > > > > > > > > You never reuse header_size other than setting it and immediately > > > > passing it to maybe_pull_tail, it could be inlined into the function > > > > call? > > > > > > > > > > Yes, it could but I thought this was tidier. It''s also the same as the code in > > > netback. > > > > > > > (I found it confusing because you reset it in the recalculate_partial > > > > rather than extending it which is what I expected) > > > > > > > > maybe_pull_tail doesn''t guarantee to pull up to the length of the given > > > > parameter, e.g. if it is more then skb->len. Which I think means you > > > > need some other explicit skb len checks around the place, don''t you? > > > > Otherwise you risk running past the end of the skb for a malformed > > > > frame. > > > > > > > > > > Yes, that''s true. Probably best to have maybe_pull_tail() return a failure if > it > > > doesn''t managed to pullup the requested amount then. > > > > > > > > + > > > > > + off = iph->ihl * 4; > > > > > > > > > > switch (iph->protocol) { > > > > > case IPPROTO_TCP: > > > > > - if (!skb_partial_csum_set(skb, 4 * iph->ihl, > > > > > + if (!skb_partial_csum_set(skb, off, > > > > > offsetof(struct tcphdr, > check))) > > > > > goto out; > > > > > > > > > > if (recalculate_partial_csum) { > > > > > struct tcphdr *tcph = tcp_hdr(skb); > > > > > + > > > > > + header_size = skb->network_header + > > > > > + off + > > > > > + sizeof(struct tcphdr); > > > > > + maybe_pull_tail(skb, header_size); > > > > > + > > > > > tcph->check = ~csum_tcpudp_magic(iph- > >saddr, iph- > > > > >daddr, > > > > > - skb->len - > iph->ihl*4, > > > > > + skb->len - > off, > > > > > > IPPROTO_TCP, 0); > > > > > } > > > > > break; > > > > > case IPPROTO_UDP: > > > > > - if (!skb_partial_csum_set(skb, 4 * iph->ihl, > > > > > + if (!skb_partial_csum_set(skb, off, > > > > > offsetof(struct udphdr, > check))) > > > > > goto out; > > > > > > > > > > if (recalculate_partial_csum) { > > > > > struct udphdr *udph = udp_hdr(skb); > > > > > + > > > > > + header_size = skb->network_header + > > > > > + off + > > > > > + sizeof(struct udphdr); > > > > > + maybe_pull_tail(skb, header_size); > > > > > + > > > > > udph->check = ~csum_tcpudp_magic(iph- > >saddr, > > > > iph->daddr, > > > > > - skb->len - > iph->ihl*4, > > > > > + skb->len - > off, > > > > > > IPPROTO_UDP, 0); > > > > > } > > > > > break; > > > > > default: > > > > > if (net_ratelimit()) > > > > > - pr_err("Attempting to checksum a non- > TCP/UDP > > > > packet, dropping a protocol %d packet\n", > > > > > + pr_err("Attempting to checksum a non- > TCP/UDP > > > > packet, " > > > > > + "dropping a protocol %d packet\n", > > > > > > > > I think Linux''s coding style explicitly relaxes the 80-column limit for > > > > string literals. > > > > > > > > > > Ok. Good to know. > > > > > > > > iph->protocol); > > > > > goto out; > > > > > } > > > > > @@ -922,6 +937,158 @@ out: > > > > > return err; > > > > > } > > > > > > > > > > +static int checksum_setup_ipv6(struct sk_buff *skb, int > > > > recalculate_partial_csum) > > > > > +{ > > > > > + int err = -EPROTO; > > > > > + struct ipv6hdr *ipv6h = (void *)skb->data; > > > > > + u8 nexthdr; > > > > > + unsigned int header_size; > > > > > + unsigned int off; > > > > > + bool fragment; > > > > > + bool done; > > > > > + > > > > > + done = false; > > > > > + > > > > > + off = sizeof(struct ipv6hdr); > > > > > + > > > > > + header_size = skb->network_header + off; > > > > > + maybe_pull_tail(skb, header_size); > > > > > > > > Same comment about skb lengths? > > > > > > > > > + > > > > > + nexthdr = ipv6h->nexthdr; > > > > > + > > > > > + while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h- > >payload_len)) > > > > && > > > > > + !done) { > > > > > + switch (nexthdr) { > > > > > + case IPPROTO_DSTOPTS: > > > > > + case IPPROTO_HOPOPTS: > > > > > + case IPPROTO_ROUTING: { > > > > > + struct ipv6_opt_hdr *hp = (void *)(skb->data > + off); > > > > > + > > > > > + header_size = skb->network_header + > > > > > + off + > > > > > + sizeof(struct ipv6_opt_hdr); > > > > > + maybe_pull_tail(skb, header_size); > > > > > + > > > > > + nexthdr = hp->nexthdr; > > > > > + off += ipv6_optlen(hp); > > > > > + break; > > > > > + } > > > > > + case IPPROTO_AH: { > > > > > + struct ip_auth_hdr *hp = (void *)(skb->data > + off); > > > > > + > > > > > + header_size = skb->network_header + > > > > > + off + > > > > > + sizeof(struct ip_auth_hdr); > > > > > + maybe_pull_tail(skb, header_size); > > > > > + > > > > > + nexthdr = hp->nexthdr; > > > > > + off += (hp->hdrlen+2)<<2; > > > > > > > > I wonder if the core header would welcome a ipv6_ahlen cf optlen? > > > > > > > > > > Yes, it would be nicer wouldn''t it. > > > > > > > > + break; > > > > > + } > > > > > + case IPPROTO_FRAGMENT: > > > > > + fragment = true; > > > > > + /* fall through */ > > > > > + default: > > > > > + done = true; > > > > > + break; > > > > > + } > > > > > + } > > > > > + > > > > > + if (!done) { > > > > > + if (net_ratelimit()) > > > > > + pr_err("Failed to parse packet header\n"); > > > > > > > > Is it a requirement of the protocol that the list of IPPROTO_* we handle > > > > must be followed by some other header? Do we really care or is it more > > > > the upper stacks problem? Presumably it can get similarly malformed > > > > packets off the wire? > > > > > > I''d have to check but I''m pretty sure none of the protos we handle can be > > > terminating so if done is not set then the packet is malformed. > > > > > > > > > > > Also you can use net_err_ratelimited. > > > > > > Ok. > > > > > > > > > > > > + goto out; > > > > > + } > > > > > + > > > > > + if (fragment) { > > > > > + if (net_ratelimit()) > > > > > + pr_err("Packet is a fragment!\n"); > > > > > > > > Is that a bad thing? > > > > > > > > > > Well, you can''t do TCP or UDP checksum offload on a fragment, can you? > > > > > > > > + goto out; > > > > > + } > > > > > + > > > > > + switch (nexthdr) { > > > > > + case IPPROTO_TCP: > > > > > + if (!skb_partial_csum_set(skb, off, > > > > > + offsetof(struct tcphdr, > check))) > > > > > + goto out; > > > > > + > > > > > + if (recalculate_partial_csum) { > > > > > + struct tcphdr *tcph = tcp_hdr(skb); > > > > > + > > > > > + header_size = skb->network_header + > > > > > + off + > > > > > + sizeof(struct tcphdr); > > > > > + maybe_pull_tail(skb, header_size); > > > > > + > > > > > + tcph->check = ~csum_ipv6_magic(&ipv6h- > >saddr, > > > > > + &ipv6h->daddr, > > > > > + skb->len - off, > > > > > + IPPROTO_TCP, 0); > > > > > + } > > > > > + break; > > > > > + case IPPROTO_UDP: > > > > > + if (!skb_partial_csum_set(skb, off, > > > > > + offsetof(struct udphdr, > check))) > > > > > + goto out; > > > > > + > > > > > + if (recalculate_partial_csum) { > > > > > + struct udphdr *udph = udp_hdr(skb); > > > > > + > > > > > + header_size = skb->network_header + > > > > > + off + > > > > > + sizeof(struct udphdr); > > > > > + maybe_pull_tail(skb, header_size); > > > > > + > > > > > + udph->check = ~csum_ipv6_magic(&ipv6h- > >saddr, > > > > > + &ipv6h->daddr, > > > > > + skb->len - off, > > > > > + IPPROTO_UDP, 0); > > > > > + } > > > > > + break; > > > > > + default: > > > > > + if (net_ratelimit()) > > > > > + pr_err("Attempting to checksum a non- > TCP/UDP > > > > packet, " > > > > > + "dropping a protocol %d packet\n", > > > > > + nexthdr); > > > > > + goto out; > > > > > + } > > > > > + > > > > > + err = 0; > > > > > + > > > > > +out: > > > > > + return err; > > > > > +} > > > > > + > > > > > +static int checksum_setup(struct net_device *dev, struct sk_buff > *skb) > > > > > +{ > > > > > + int err = -EPROTO; > > > > > + int recalculate_partial_csum = 0; > > > > > + > > > > > + /* > > > > > + * A GSO SKB must be CHECKSUM_PARTIAL. However some > buggy > > > > > + * peers can fail to set NETRXF_csum_blank when sending a > GSO > > > > > + * frame. In this case force the SKB to CHECKSUM_PARTIAL > and > > > > > + * recalculate the partial checksum. > > > > > + */ > > > > > + if (skb->ip_summed != CHECKSUM_PARTIAL && > skb_is_gso(skb)) { > > > > > > > > Is it worth making this ipv4 only. Assuming that anything which adds v6 > > > > support has already fixed this bug? That would save time in > > > > checksum_setup_ipv6. > > > > > > > > > > That''s true. > > > > > > > Speaking of which can checksum_setup_* be shared with netfront > > > > somehow? > > > > Or maybe even put in common code? (Perhaps as a future cleanup) > > > > > > > > > > I think it really should be in common code, but I think that would be > better > > > handled as a separate patch to remove the duplication. > > > > > > Thanks for the comprehensive review! > > > > > > Paul > > > > > > > > + struct netfront_info *np = netdev_priv(dev); > > > > > + np->rx_gso_checksum_fixup++; > > > > > + skb->ip_summed = CHECKSUM_PARTIAL; > > > > > + recalculate_partial_csum = 1; > > > > > + } > > > > > + > > > > > + /* A non-CHECKSUM_PARTIAL SKB does not require setup. > */ > > > > > + if (skb->ip_summed != CHECKSUM_PARTIAL) > > > > > + return 0; > > > > > + > > > > > + if (skb->protocol == htons(ETH_P_IP)) > > > > > + err = checksum_setup_ip(skb, > recalculate_partial_csum); > > > > > + else if (skb->protocol == htons(ETH_P_IPV6)) > > > > > + err = checksum_setup_ipv6(skb, > recalculate_partial_csum); > > > > > + > > > > > + return err; > > > > > +} > > > > > + > > > > > static int handle_incoming_queue(struct net_device *dev, > > > > [...] > > > > > > _______________________________________________ > > > Xen-devel mailing list > > > Xen-devel@lists.xen.org > > > http://lists.xen.org/xen-devel >