Jon Mason
2006-Feb-06 19:43 UTC
[Xen-devel] [PATCH] Fix IPSec for Xen checksum offload packets
This patch fixes bug 143 (verified by Jim Dykman). Thanks, Jon Signed-off-by: James Dykman <dykman@us.ibm.com> Signed-off-by: Jon Mason <jdmason@us.ibm.com> # HG changeset patch # User jdmason@us.ibm.com # Node ID 079135b0d58fa0f8f4dc1b9b8ae4baab7d413f47 # Parent 57e6d721842703c08bf7dafbfb5efe3c9a44725d The Xen checksum offload feature attempts to insert a TCP/UDP checksums into already encrypted packets (esp4) in dom0. Obviously, it is not possible to insert a checksum into an already encrypted packet, so this patch inserts the checksum prior to encrypting packets. To do this cleanly, the TCP/UDP header pointers need to be pointed to the correct spot, so this functionality has been abstracted into a new function. diff -r 57e6d7218427 -r 079135b0d58f linux-2.6-xen-sparse/net/core/dev.c --- a/linux-2.6-xen-sparse/net/core/dev.c Fri Feb 3 18:45:14 2006 +++ b/linux-2.6-xen-sparse/net/core/dev.c Mon Feb 6 19:34:52 2006 @@ -1206,76 +1206,16 @@ return 0; } -#define HARD_TX_LOCK(dev, cpu) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ - spin_lock(&dev->xmit_lock); \ - dev->xmit_lock_owner = cpu; \ - } \ -} - -#define HARD_TX_UNLOCK(dev) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ - dev->xmit_lock_owner = -1; \ - spin_unlock(&dev->xmit_lock); \ - } \ -} - -/** - * dev_queue_xmit - transmit a buffer - * @skb: buffer to transmit - * - * Queue a buffer for transmission to a network device. The caller must - * have set the device and priority and built the buffer before calling - * this function. The function can be called from an interrupt. - * - * A negative errno code is returned on a failure. A success does not - * guarantee the frame will be transmitted as it may be dropped due - * to congestion or traffic shaping. - * - * ----------------------------------------------------------------------------------- - * I notice this method can also return errors from the queue disciplines, - * including NET_XMIT_DROP, which is a positive value. So, errors can also - * be positive. - * - * Regardless of the return value, the skb is consumed, so it is currently - * difficult to retry a send to this method. (You can bump the ref count - * before sending to hold a reference for retry if you are careful.) - * - * When calling this method, interrupts MUST be enabled. This is because - * the BH enable code must have IRQs enabled so that it will not deadlock. - * --BLG - */ - -int dev_queue_xmit(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - struct Qdisc *q; - int rc = -ENOMEM; - - if (skb_shinfo(skb)->frag_list && - !(dev->features & NETIF_F_FRAGLIST) && - __skb_linearize(skb, GFP_ATOMIC)) - goto out_kfree_skb; - - /* Fragmented skb is linearized if device does not support SG, - * or if at least one of fragments is in highmem and device - * does not support DMA from it. - */ - if (skb_shinfo(skb)->nr_frags && - (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && - __skb_linearize(skb, GFP_ATOMIC)) - goto out_kfree_skb; - #ifdef CONFIG_XEN - /* If a checksum-deferred packet is forwarded to a device that needs a - * checksum, correct the pointers and force checksumming. - */ +int xen_checksum_setup(struct sk_buff *skb) +{ if (skb->proto_csum_blank) { + skb->proto_csum_blank = 0; if (skb->protocol != htons(ETH_P_IP)) - goto out_kfree_skb; + goto out; skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl; if (skb->h.raw >= skb->tail) - goto out_kfree_skb; + goto out; switch (skb->nh.iph->protocol) { case IPPROTO_TCP: skb->csum = offsetof(struct tcphdr, check); @@ -1284,18 +1224,89 @@ skb->csum = offsetof(struct udphdr, check); break; default: - if (net_ratelimit()) - printk(KERN_ERR "Attempting to checksum a non-" - "TCP/UDP packet, dropping a protocol" - " %d packet", skb->nh.iph->protocol); - rc = -EPROTO; - goto out_kfree_skb; + printk(KERN_ERR "Attempting to checksum a non-" + "TCP/UDP packet, dropping a protocol" + " %d packet", skb->nh.iph->protocol); + goto out; } if ((skb->h.raw + skb->csum + 2) > skb->tail) - goto out_kfree_skb; + goto out; skb->ip_summed = CHECKSUM_HW; } + + return 0; +out: + return -EPROTO; +} +#else +static inline int xen_checksum_setup(struct sk_buff *skb) {} #endif + +#define HARD_TX_LOCK(dev, cpu) { \ + if ((dev->features & NETIF_F_LLTX) == 0) { \ + spin_lock(&dev->xmit_lock); \ + dev->xmit_lock_owner = cpu; \ + } \ +} + +#define HARD_TX_UNLOCK(dev) { \ + if ((dev->features & NETIF_F_LLTX) == 0) { \ + dev->xmit_lock_owner = -1; \ + spin_unlock(&dev->xmit_lock); \ + } \ +} + +/** + * dev_queue_xmit - transmit a buffer + * @skb: buffer to transmit + * + * Queue a buffer for transmission to a network device. The caller must + * have set the device and priority and built the buffer before calling + * this function. The function can be called from an interrupt. + * + * A negative errno code is returned on a failure. A success does not + * guarantee the frame will be transmitted as it may be dropped due + * to congestion or traffic shaping. + * + * ----------------------------------------------------------------------------------- + * I notice this method can also return errors from the queue disciplines, + * including NET_XMIT_DROP, which is a positive value. So, errors can also + * be positive. + * + * Regardless of the return value, the skb is consumed, so it is currently + * difficult to retry a send to this method. (You can bump the ref count + * before sending to hold a reference for retry if you are careful.) + * + * When calling this method, interrupts MUST be enabled. This is because + * the BH enable code must have IRQs enabled so that it will not deadlock. + * --BLG + */ + +int dev_queue_xmit(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct Qdisc *q; + int rc = -ENOMEM; + + if (skb_shinfo(skb)->frag_list && + !(dev->features & NETIF_F_FRAGLIST) && + __skb_linearize(skb, GFP_ATOMIC)) + goto out_kfree_skb; + + /* Fragmented skb is linearized if device does not support SG, + * or if at least one of fragments is in highmem and device + * does not support DMA from it. + */ + if (skb_shinfo(skb)->nr_frags && + (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && + __skb_linearize(skb, GFP_ATOMIC)) + goto out_kfree_skb; + + /* If a checksum-deferred packet is forwarded to a device that needs a + * checksum, correct the pointers and force checksumming. + */ + if (xen_checksum_setup(skb)) + goto out_kfree_skb; /* If packet is not checksummed and device does not support * checksumming for this protocol, complete checksumming here. @@ -3350,6 +3361,7 @@ EXPORT_SYMBOL(net_enable_timestamp); EXPORT_SYMBOL(net_disable_timestamp); EXPORT_SYMBOL(dev_get_flags); +EXPORT_SYMBOL(xen_checksum_setup); #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) EXPORT_SYMBOL(br_handle_frame_hook); diff -r 57e6d7218427 -r 079135b0d58f patches/linux-2.6.16-rc2/net-csum.patch --- a/patches/linux-2.6.16-rc2/net-csum.patch Fri Feb 3 18:45:14 2006 +++ b/patches/linux-2.6.16-rc2/net-csum.patch Mon Feb 6 19:34:52 2006 @@ -44,3 +44,27 @@ *portptr = newport; return 1; } +--- linux-2.6.16-rc2-xen0/net/ipv4/xfrm4_output.c.orig 2006-02-03 22:36:32.000000000 -0600 ++++ linux-2.6.16-rc2-xen0/net/ipv4/xfrm4_output.c 2006-02-03 22:41:39.000000000 -0600 +@@ -17,6 +17,8 @@ + #include <net/xfrm.h> + #include <net/icmp.h> + ++extern int xen_checksum_setup(struct sk_buff *skb); ++ + /* Add encapsulation header. + * + * In transport mode, the IP header will be moved forward to make space +@@ -102,7 +104,11 @@ static int xfrm4_output_one(struct sk_bu + struct dst_entry *dst = skb->dst; + struct xfrm_state *x = dst->xfrm; + int err; +- ++ ++ err = xen_checksum_setup(skb); ++ if (err) ++ goto error_nolock; ++ + if (skb->ip_summed == CHECKSUM_HW) { + err = skb_checksum_help(skb, 0); + if (err) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel