Amerigo Wang
2010-May-05 08:11 UTC
[Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
V5: Fix coding style problems pointed by David. V4: Use "unlikely" to mark netpoll call path, suggested by Stephen. Handle NETDEV_GOING_DOWN case. V3: Update to latest Linus' tree. Fix deadlocks when releasing slaves of bonding devices. Thanks to Andy. V2: Fix some bugs of previous version. Remove ->netpoll_setup and ->netpoll_xmit, they are not necessary. Don't poll all underlying devices, poll ->real_dev in struct netpoll. Thanks to David for suggesting above. ------------> This whole patchset is for adding netpoll support to bridge and bonding devices. I already tested it for bridge, bonding, bridge over bonding, and bonding over bridge. It looks fine now. To make bridge and bonding support netpoll, we need to adjust some netpoll generic code. This patch does the following things: 1) introduce two new priv_flags for struct net_device: IFF_IN_NETPOLL which identifies we are processing a netpoll; IFF_DISABLE_NETPOLL is used to disable netpoll support for a device at run-time; 2) introduce one new method for netdev_ops: ->ndo_netpoll_cleanup() is used to clean up netpoll when a device is removed. 3) introduce netpoll_poll_dev() which takes a struct net_device * parameter; export netpoll_send_skb() and netpoll_poll_dev() which will be used later; 4) hide a pointer to struct netpoll in struct netpoll_info, ditto. 5) introduce ->real_dev for struct netpoll. 6) introduce a new status NETDEV_BONDING_DESLAE, which is used to disable netconsole before releasing a slave, to avoid deadlocks. Cc: David Miller <davem at davemloft.net> Cc: Neil Horman <nhorman at tuxdriver.com> Signed-off-by: WANG Cong <amwang at redhat.com> --- Index: linux-2.6/include/linux/if.h ==================================================================--- linux-2.6.orig/include/linux/if.h +++ linux-2.6/include/linux/if.h @@ -71,6 +71,8 @@ * release skb->dst */ #define IFF_DONT_BRIDGE 0x800 /* disallow bridging this ether dev */ +#define IFF_IN_NETPOLL 0x1000 /* whether we are processing netpoll */ +#define IFF_DISABLE_NETPOLL 0x2000 /* disable netpoll at run-time */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 Index: linux-2.6/include/linux/netdevice.h ==================================================================--- linux-2.6.orig/include/linux/netdevice.h +++ linux-2.6/include/linux/netdevice.h @@ -667,6 +667,7 @@ struct net_device_ops { unsigned short vid); #ifdef CONFIG_NET_POLL_CONTROLLER void (*ndo_poll_controller)(struct net_device *dev); + void (*ndo_netpoll_cleanup)(struct net_device *dev); #endif int (*ndo_set_vf_mac)(struct net_device *dev, int queue, u8 *mac); Index: linux-2.6/include/linux/netpoll.h ==================================================================--- linux-2.6.orig/include/linux/netpoll.h +++ linux-2.6/include/linux/netpoll.h @@ -14,6 +14,7 @@ struct netpoll { struct net_device *dev; + struct net_device *real_dev; char dev_name[IFNAMSIZ]; const char *name; void (*rx_hook)(struct netpoll *, int, char *, int); @@ -36,8 +37,11 @@ struct netpoll_info { struct sk_buff_head txq; struct delayed_work tx_work; + + struct netpoll *netpoll; }; +void netpoll_poll_dev(struct net_device *dev); void netpoll_poll(struct netpoll *np); void netpoll_send_udp(struct netpoll *np, const char *msg, int len); void netpoll_print_options(struct netpoll *np); @@ -47,6 +51,7 @@ int netpoll_trap(void); void netpoll_set_trap(int trap); void netpoll_cleanup(struct netpoll *np); int __netpoll_rx(struct sk_buff *skb); +void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb); #ifdef CONFIG_NETPOLL Index: linux-2.6/net/core/netpoll.c ==================================================================--- linux-2.6.orig/net/core/netpoll.c +++ linux-2.6/net/core/netpoll.c @@ -179,9 +179,8 @@ static void service_arp_queue(struct net } } -void netpoll_poll(struct netpoll *np) +void netpoll_poll_dev(struct net_device *dev) { - struct net_device *dev = np->dev; const struct net_device_ops *ops; if (!dev || !netif_running(dev)) @@ -201,6 +200,11 @@ void netpoll_poll(struct netpoll *np) zap_completion_queue(); } +void netpoll_poll(struct netpoll *np) +{ + netpoll_poll_dev(np->dev); +} + static void refill_skbs(void) { struct sk_buff *skb; @@ -282,7 +286,7 @@ static int netpoll_owner_active(struct n return 0; } -static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) +void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { int status = NETDEV_TX_BUSY; unsigned long tries; @@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netp tries > 0; --tries) { if (__netif_tx_trylock(txq)) { if (!netif_tx_queue_stopped(txq)) { + dev->priv_flags |= IFF_IN_NETPOLL; status = ops->ndo_start_xmit(skb, dev); + dev->priv_flags &= ~IFF_IN_NETPOLL; if (status == NETDEV_TX_OK) txq_trans_update(txq); } @@ -756,7 +762,10 @@ int netpoll_setup(struct netpoll *np) atomic_inc(&npinfo->refcnt); } - if (!ndev->netdev_ops->ndo_poll_controller) { + npinfo->netpoll = np; + + if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || + !ndev->netdev_ops->ndo_poll_controller) { printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", np->name, np->dev_name); err = -ENOTSUPP; @@ -878,6 +887,7 @@ void netpoll_cleanup(struct netpoll *np) } if (atomic_dec_and_test(&npinfo->refcnt)) { + const struct net_device_ops *ops; skb_queue_purge(&npinfo->arp_tx); skb_queue_purge(&npinfo->txq); cancel_rearming_delayed_work(&npinfo->tx_work); @@ -885,7 +895,11 @@ void netpoll_cleanup(struct netpoll *np) /* clean after last, unfinished work */ __skb_queue_purge(&npinfo->txq); kfree(npinfo); - np->dev->npinfo = NULL; + ops = np->dev->netdev_ops; + if (ops->ndo_netpoll_cleanup) + ops->ndo_netpoll_cleanup(np->dev); + else + np->dev->npinfo = NULL; } } @@ -908,6 +922,7 @@ void netpoll_set_trap(int trap) atomic_dec(&trapped); } +EXPORT_SYMBOL(netpoll_send_skb); EXPORT_SYMBOL(netpoll_set_trap); EXPORT_SYMBOL(netpoll_trap); EXPORT_SYMBOL(netpoll_print_options); @@ -915,4 +930,5 @@ EXPORT_SYMBOL(netpoll_parse_options); EXPORT_SYMBOL(netpoll_setup); EXPORT_SYMBOL(netpoll_cleanup); EXPORT_SYMBOL(netpoll_send_udp); +EXPORT_SYMBOL(netpoll_poll_dev); EXPORT_SYMBOL(netpoll_poll); Index: linux-2.6/drivers/net/netconsole.c ==================================================================--- linux-2.6.orig/drivers/net/netconsole.c +++ linux-2.6/drivers/net/netconsole.c @@ -665,7 +665,8 @@ static int netconsole_netdev_event(struc struct netconsole_target *nt; struct net_device *dev = ptr; - if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER)) + if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER || + event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN)) goto done; spin_lock_irqsave(&target_list_lock, flags); @@ -677,19 +678,21 @@ static int netconsole_netdev_event(struc strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ); break; case NETDEV_UNREGISTER: - if (!nt->enabled) - break; netpoll_cleanup(&nt->np); + /* Fall through */ + case NETDEV_GOING_DOWN: + case NETDEV_BONDING_DESLAVE: nt->enabled = 0; - printk(KERN_INFO "netconsole: network logging stopped" - ", interface %s unregistered\n", - dev->name); break; } } netconsole_target_put(nt); } spin_unlock_irqrestore(&target_list_lock, flags); + if (event == NETDEV_UNREGISTER || event == NETDEV_BONDING_DESLAVE) + printk(KERN_INFO "netconsole: network logging stopped, " + "interface %s %s\n", dev->name, + event == NETDEV_UNREGISTER ? "unregistered" : "released slaves"); done: return NOTIFY_DONE; Index: linux-2.6/include/linux/notifier.h ==================================================================--- linux-2.6.orig/include/linux/notifier.h +++ linux-2.6/include/linux/notifier.h @@ -203,6 +203,7 @@ static inline int notifier_to_errno(int #define NETDEV_BONDING_NEWTYPE 0x000F #define NETDEV_POST_INIT 0x0010 #define NETDEV_UNREGISTER_BATCH 0x0011 +#define NETDEV_BONDING_DESLAVE 0x0012 #define SYS_DOWN 0x0001 /* Notify of system down */ #define SYS_RESTART SYS_DOWN
Amerigo Wang
2010-May-05 08:11 UTC
[Bridge] [v5 Patch 2/3] bridge: make bridge support netpoll
Based on the previous patch, make bridge support netpoll by: 1) implement the 2 methods to support netpoll for bridge; 2) modify netpoll during forwarding packets via bridge; 3) disable netpoll support of bridge when a netpoll-unabled device is added to bridge; 4) enable netpoll support when all underlying devices support netpoll. Cc: David Miller <davem at davemloft.net> Cc: Neil Horman <nhorman at tuxdriver.com> Cc: Stephen Hemminger <shemminger at linux-foundation.org> Cc: Matt Mackall <mpm at selenic.com> Signed-off-by: WANG Cong <amwang at redhat.com> --- Index: linux-2.6/net/bridge/br_device.c ==================================================================--- linux-2.6.orig/net/bridge/br_device.c +++ linux-2.6/net/bridge/br_device.c @@ -13,8 +13,10 @@ #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/netpoll.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> +#include <linux/list.h> #include <asm/uaccess.h> #include "br_private.h" @@ -162,6 +164,59 @@ static int br_set_tx_csum(struct net_dev return 0; } +#ifdef CONFIG_NET_POLL_CONTROLLER +bool br_devices_support_netpoll(struct net_bridge *br) +{ + struct net_bridge_port *p; + bool ret = true; + int count = 0; + unsigned long flags; + + spin_lock_irqsave(&br->lock, flags); + list_for_each_entry(p, &br->port_list, list) { + count++; + if ((p->dev->priv_flags & IFF_DISABLE_NETPOLL) || + !p->dev->netdev_ops->ndo_poll_controller) + ret = false; + } + spin_unlock_irqrestore(&br->lock, flags); + return count != 0 && ret; +} + +static void br_poll_controller(struct net_device *br_dev) +{ + struct netpoll *np = br_dev->npinfo->netpoll; + + if (np->real_dev != br_dev) + netpoll_poll_dev(np->real_dev); +} + +void br_netpoll_cleanup(struct net_device *br_dev) +{ + struct net_bridge *br = netdev_priv(br_dev); + struct net_bridge_port *p, *n; + const struct net_device_ops *ops; + + br->dev->npinfo = NULL; + list_for_each_entry_safe(p, n, &br->port_list, list) { + if (p->dev) { + ops = p->dev->netdev_ops; + if (ops->ndo_netpoll_cleanup) + ops->ndo_netpoll_cleanup(p->dev); + else + p->dev->npinfo = NULL; + } + } +} + +#else + +void br_netpoll_cleanup(struct net_device *br_dev) +{ +} + +#endif + static const struct ethtool_ops br_ethtool_ops = { .get_drvinfo = br_getinfo, .get_link = ethtool_op_get_link, @@ -184,6 +239,10 @@ static const struct net_device_ops br_ne .ndo_set_multicast_list = br_dev_set_multicast_list, .ndo_change_mtu = br_change_mtu, .ndo_do_ioctl = br_dev_ioctl, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_netpoll_cleanup = br_netpoll_cleanup, + .ndo_poll_controller = br_poll_controller, +#endif }; void br_dev_setup(struct net_device *dev) Index: linux-2.6/net/bridge/br_forward.c ==================================================================--- linux-2.6.orig/net/bridge/br_forward.c +++ linux-2.6/net/bridge/br_forward.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/netpoll.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <linux/netfilter_bridge.h> @@ -50,7 +51,13 @@ int br_dev_queue_push_xmit(struct sk_buf else { skb_push(skb, ETH_HLEN); - dev_queue_xmit(skb); +#ifdef CONFIG_NET_POLL_CONTROLLER + if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) { + netpoll_send_skb(skb->dev->npinfo->netpoll, skb); + skb->dev->priv_flags &= ~IFF_IN_NETPOLL; + } else +#endif + dev_queue_xmit(skb); } } @@ -66,9 +73,23 @@ int br_forward_finish(struct sk_buff *sk static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { +#ifdef CONFIG_NET_POLL_CONTROLLER + struct net_bridge *br = to->br; + if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) { + struct netpoll *np; + to->dev->npinfo = skb->dev->npinfo; + np = skb->dev->npinfo->netpoll; + np->real_dev = np->dev = to->dev; + to->dev->priv_flags |= IFF_IN_NETPOLL; + } +#endif skb->dev = to->dev; NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, br_forward_finish); +#ifdef CONFIG_NET_POLL_CONTROLLER + if (skb->dev->npinfo) + skb->dev->npinfo->netpoll->dev = br->dev; +#endif } static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) Index: linux-2.6/net/bridge/br_if.c ==================================================================--- linux-2.6.orig/net/bridge/br_if.c +++ linux-2.6/net/bridge/br_if.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/netpoll.h> #include <linux/ethtool.h> #include <linux/if_arp.h> #include <linux/module.h> @@ -153,6 +154,14 @@ static void del_nbp(struct net_bridge_po kobject_uevent(&p->kobj, KOBJ_REMOVE); kobject_del(&p->kobj); +#ifdef CONFIG_NET_POLL_CONTROLLER + if (br_devices_support_netpoll(br)) + br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL; + if (dev->netdev_ops->ndo_netpoll_cleanup) + dev->netdev_ops->ndo_netpoll_cleanup(dev); + else + dev->npinfo = NULL; +#endif call_rcu(&p->rcu, destroy_nbp_rcu); } @@ -165,6 +174,8 @@ static void del_br(struct net_bridge *br del_nbp(p); } + br_netpoll_cleanup(br->dev); + del_timer_sync(&br->gc_timer); br_sysfs_delbr(br->dev); @@ -438,6 +449,20 @@ int br_add_if(struct net_bridge *br, str kobject_uevent(&p->kobj, KOBJ_ADD); +#ifdef CONFIG_NET_POLL_CONTROLLER + if (br_devices_support_netpoll(br)) { + br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL; + if (br->dev->npinfo) + dev->npinfo = br->dev->npinfo; + } else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) { + br->dev->priv_flags |= IFF_DISABLE_NETPOLL; + printk(KERN_INFO "New device %s does not support netpoll\n", + dev->name); + printk(KERN_INFO "Disabling netpoll for %s\n", + br->dev->name); + } +#endif + return 0; err2: br_fdb_delete_by_port(br, p, 1); Index: linux-2.6/net/bridge/br_private.h ==================================================================--- linux-2.6.orig/net/bridge/br_private.h +++ linux-2.6/net/bridge/br_private.h @@ -233,6 +233,8 @@ static inline int br_is_root_bridge(cons extern void br_dev_setup(struct net_device *dev); extern netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev); +extern bool br_devices_support_netpoll(struct net_bridge *br); +extern void br_netpoll_cleanup(struct net_device *br_dev); /* br_fdb.c */ extern int br_fdb_init(void);
Amerigo Wang
2010-May-05 08:11 UTC
[Bridge] [v5 Patch 3/3] bonding: make bonding support netpoll
Based on Andy's work, but I modified a lot. Similar to the patch for bridge, this patch does: 1) implement the 2 methods to support netpoll for bonding; 2) modify netpoll during forwarding packets via bonding; 3) disable netpoll support of bonding when a netpoll-unabled device is added to bonding; 4) enable netpoll support when all underlying devices support netpoll. Cc: Andy Gospodarek <gospo at redhat.com> Cc: Jeff Moyer <jmoyer at redhat.com> Cc: Matt Mackall <mpm at selenic.com> Cc: Neil Horman <nhorman at tuxdriver.com> Cc: Jay Vosburgh <fubar at us.ibm.com> Cc: David Miller <davem at davemloft.net> Signed-off-by: WANG Cong <amwang at redhat.com> --- Index: linux-2.6/drivers/net/bonding/bond_main.c ==================================================================--- linux-2.6.orig/drivers/net/bonding/bond_main.c +++ linux-2.6/drivers/net/bonding/bond_main.c @@ -59,6 +59,7 @@ #include <linux/uaccess.h> #include <linux/errno.h> #include <linux/netdevice.h> +#include <linux/netpoll.h> #include <linux/inetdevice.h> #include <linux/igmp.h> #include <linux/etherdevice.h> @@ -430,7 +431,18 @@ int bond_dev_queue_xmit(struct bonding * } skb->priority = 1; - dev_queue_xmit(skb); +#ifdef CONFIG_NET_POLL_CONTROLLER + if (unlikely(bond->dev->priv_flags & IFF_IN_NETPOLL)) { + struct netpoll *np = bond->dev->npinfo->netpoll; + slave_dev->npinfo = bond->dev->npinfo; + np->real_dev = np->dev = skb->dev; + slave_dev->priv_flags |= IFF_IN_NETPOLL; + netpoll_send_skb(np, skb); + slave_dev->priv_flags &= ~IFF_IN_NETPOLL; + np->dev = bond->dev; + } else +#endif + dev_queue_xmit(skb); return 0; } @@ -1329,6 +1341,61 @@ static void bond_detach_slave(struct bon bond->slave_cnt--; } +#ifdef CONFIG_NET_POLL_CONTROLLER +/* + * You must hold read lock on bond->lock before calling this. + */ +static bool slaves_support_netpoll(struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave; + int i = 0; + bool ret = true; + + bond_for_each_slave(bond, slave, i) { + if ((slave->dev->priv_flags & IFF_DISABLE_NETPOLL) || + !slave->dev->netdev_ops->ndo_poll_controller) + ret = false; + } + return i != 0 && ret; +} + +static void bond_poll_controller(struct net_device *bond_dev) +{ + struct net_device *dev = bond_dev->npinfo->netpoll->real_dev; + if (dev != bond_dev) + netpoll_poll_dev(dev); +} + +static void bond_netpoll_cleanup(struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave; + const struct net_device_ops *ops; + int i; + + read_lock(&bond->lock); + bond_dev->npinfo = NULL; + bond_for_each_slave(bond, slave, i) { + if (slave->dev) { + ops = slave->dev->netdev_ops; + if (ops->ndo_netpoll_cleanup) + ops->ndo_netpoll_cleanup(slave->dev); + else + slave->dev->npinfo = NULL; + } + } + read_unlock(&bond->lock); +} + +#else + +static void bond_netpoll_cleanup(struct net_device *bond_dev) +{ +} + +#endif + /*---------------------------------- IOCTL ----------------------------------*/ static int bond_sethwaddr(struct net_device *bond_dev, @@ -1735,6 +1802,18 @@ int bond_enslave(struct net_device *bond bond_set_carrier(bond); +#ifdef CONFIG_NET_POLL_CONTROLLER + if (slaves_support_netpoll(bond_dev)) { + bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL; + if (bond_dev->npinfo) + slave_dev->npinfo = bond_dev->npinfo; + } else if (!(bond_dev->priv_flags & IFF_DISABLE_NETPOLL)) { + bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; + pr_info("New slave device %s does not support netpoll\n", + slave_dev->name); + pr_info("Disabling netpoll support for %s\n", bond_dev->name); + } +#endif read_unlock(&bond->lock); res = bond_create_slave_symlinks(bond_dev, slave_dev); @@ -1801,6 +1880,7 @@ int bond_release(struct net_device *bond return -EINVAL; } + netdev_bonding_change(bond_dev, NETDEV_BONDING_DESLAVE); write_lock_bh(&bond->lock); slave = bond_get_slave_by_dev(bond, slave_dev); @@ -1929,6 +2009,17 @@ int bond_release(struct net_device *bond netdev_set_master(slave_dev, NULL); +#ifdef CONFIG_NET_POLL_CONTROLLER + read_lock_bh(&bond->lock); + if (slaves_support_netpoll(bond_dev)) + bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL; + read_unlock_bh(&bond->lock); + if (slave_dev->netdev_ops->ndo_netpoll_cleanup) + slave_dev->netdev_ops->ndo_netpoll_cleanup(slave_dev); + else + slave_dev->npinfo = NULL; +#endif + /* close slave before restoring its mac address */ dev_close(slave_dev); @@ -4448,6 +4539,10 @@ static const struct net_device_ops bond_ .ndo_vlan_rx_register = bond_vlan_rx_register, .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_netpoll_cleanup = bond_netpoll_cleanup, + .ndo_poll_controller = bond_poll_controller, +#endif }; static void bond_destructor(struct net_device *bond_dev) @@ -4541,6 +4636,8 @@ static void bond_uninit(struct net_devic { struct bonding *bond = netdev_priv(bond_dev); + bond_netpoll_cleanup(bond_dev); + /* Release the bonded slaves */ bond_release_all(bond_dev);
Matt Mackall
2010-May-06 02:05 UTC
[Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
On Wed, 2010-05-05 at 04:11 -0400, Amerigo Wang wrote:> V5: > Fix coding style problems pointed by David.Aside from my concern about the policy of disabling netpoll on bridges/bonds with only partial netpoll support, I don't have any remaining issues with this. But I'll leave it to other folks to ack the underlying driver bits for this series. -- Mathematics is the supreme nostalgia of our time.
Flavio Leitner
2010-May-27 18:05 UTC
[Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
Hi guys! I finally could test this to see if an old problem reported on bugzilla[1] was fixed now, but unfortunately it is still there. The ticket is private I guess, but basically the problem happens when bonding driver tries to print something after it had taken the write_lock (monitor functions, enslave/de-enslave), so the printk() will pass through netpoll, then on bonding again which no matter what mode you use, it will try to read_lock() the lock again. The result is a deadlock and the entire system hangs. I manage to get a fresh backtrace with mode 1, see below: [ 93.167079] Call Trace: [ 93.167079] [<ffffffff81034cf9>] warn_slowpath_common+0x77/0x8f [ 93.167079] [<ffffffff81034d5e>] warn_slowpath_fmt+0x3c/0x3e [ 93.167079] [<ffffffff81366aef>] ? _raw_read_trylock+0x11/0x4b [ 93.167079] [<ffffffffa02a2c42>] ? bond_start_xmit+0x12b/0x401 [bonding] -> read_lock fails [ 93.167079] [<ffffffffa02a2c9f>] bond_start_xmit+0x188/0x401 [bonding] [ 93.167079] [<ffffffff81055b37>] ? trace_hardirqs_off+0xd/0xf [ 93.167079] [<ffffffff812dfdb9>] netpoll_send_skb+0xbd/0x1f3 [ 93.167079] [<ffffffff812e00ed>] netpoll_send_udp+0x1fe/0x20d [ 93.167079] [<ffffffffa02c017c>] write_msg+0x89/0xcd [netconsole] [ 93.167079] [<ffffffff81034e65>] __call_console_drivers+0x67/0x79 [ 93.167079] [<ffffffff81034ed0>] _call_console_drivers+0x59/0x5d [ 93.167079] [<ffffffff810352d3>] release_console_sem+0x121/0x1d7 [ 93.167079] [<ffffffff8103590a>] vprintk+0x35d/0x393 [ 93.167079] [<ffffffff8103f947>] ? add_timer+0x17/0x19 [ 93.167079] [<ffffffff81046ddf>] ? queue_delayed_work_on+0xa2/0xa9 [ 93.167079] [<ffffffff81363bb8>] printk+0x3c/0x44 [ 93.167079] [<ffffffffa02a3b17>] bond_select_active_slave+0x105/0x109 [bonding] -> write_locked [ 93.167079] [<ffffffffa02a4798>] bond_mii_monitor+0x479/0x4ed [bonding] [ 93.167079] [<ffffffff81046009>] worker_thread+0x1ef/0x2e2 In this case, the message should be "bonding: bond0: making interface eth0 the new active one" I did the following patch to discard the packet if it was IN_NETPOLL and the read_lock() fails, so I could go ahead testing it: diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5e12462..a3b8bad 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4258,8 +4258,19 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d struct bonding *bond = netdev_priv(bond_dev); int res = 1; - read_lock(&bond->lock); - read_lock(&bond->curr_slave_lock); + if (read_trylock(&bond->lock) == 0 && + (bond_dev->flags & IFF_IN_NETPOLL)) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + if (read_trylock(&bond->curr_slave_lock) == 0 && + (bond_dev->flags & IFF_IN_NETPOLL)) { + read_unlock(&bond->lock); + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + if (!BOND_IS_OK(bond)) goto out; and I found another problem. The function netpoll_send_skb() checks if the npinfo's queue length is zero and if it's not, it will queue the packet to make sure it's in order and then schedule the thread to run. Later, the thread wakes up running queue_process() which disables interrupts before calling ndo_start_xmit(). However, dev_queue_xmit() uses rcu_*_bh() and before return, it will enable the interrupts again, spitting this: ------------[ cut here ]------------ WARNING: at kernel/softirq.c:143 local_bh_enable+0x3c/0x86() Hardware name: Precision WorkStation 490 Modules linked in: netconsole bonding sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6] Pid: 17, comm: events/2 Not tainted 2.6.34-04700-gd938a70 #21 Call Trace: [<ffffffff810381d6>] warn_slowpath_common+0x77/0x8f [<ffffffff810381fd>] warn_slowpath_null+0xf/0x11 [<ffffffff8103d691>] local_bh_enable+0x3c/0x86 [<ffffffff812e4d85>] dev_queue_xmit+0x462/0x493 [<ffffffffa018805f>] bond_dev_queue_xmit+0x1bd/0x1e3 [bonding] [<ffffffffa01881dd>] bond_start_xmit+0x158/0x37b [bonding] -> interrupts disabled [<ffffffff812f3fca>] queue_process+0x9d/0xf9 [<ffffffff8104d022>] worker_thread+0x19d/0x224 [<ffffffff812f3f2d>] ? queue_process+0x0/0xf9 [<ffffffff81050819>] ? autoremove_wake_function+0x0/0x34 [<ffffffff8104ce85>] ? worker_thread+0x0/0x224 [<ffffffff8105040b>] kthread+0x7a/0x82 [<ffffffff810036d4>] kernel_thread_helper+0x4/0x10 [<ffffffff81050391>] ? kthread+0x0/0x82 [<ffffffff810036d0>] ? kernel_thread_helper+0x0/0x10 ---[ end trace 74e3904503fdb632 ]--- kernel/softirq.c: 141 static inline void _local_bh_enable_ip(unsigned long ip) 142 { 143 WARN_ON_ONCE(in_irq() || irqs_disabled()); 144 #ifdef CONFIG_TRACE_IRQFLAGS 145 local_irq_disable(); 146 #endif 147 /* 148 * Are softirqs going to be turned on now: 149 */ The git is updated up to: d938a70 be2net: increase POST timeout for EEH recovery Two slave interfaces, bonding mode 1, netconsole over bond0. [1] https://bugzilla.redhat.com/show_bug.cgi?id=248374#c5 regards, fbl On Wed, May 05, 2010 at 04:11:15AM -0400, Amerigo Wang wrote:> V5: > Fix coding style problems pointed by David. > > V4: > Use "unlikely" to mark netpoll call path, suggested by Stephen. > Handle NETDEV_GOING_DOWN case. > > V3: > Update to latest Linus' tree. > Fix deadlocks when releasing slaves of bonding devices. > Thanks to Andy. > > V2: > Fix some bugs of previous version. > Remove ->netpoll_setup and ->netpoll_xmit, they are not necessary. > Don't poll all underlying devices, poll ->real_dev in struct netpoll. > Thanks to David for suggesting above. > > ------------> > > This whole patchset is for adding netpoll support to bridge and bonding > devices. I already tested it for bridge, bonding, bridge over bonding, > and bonding over bridge. It looks fine now. > > > To make bridge and bonding support netpoll, we need to adjust > some netpoll generic code. This patch does the following things: > > 1) introduce two new priv_flags for struct net_device: > IFF_IN_NETPOLL which identifies we are processing a netpoll; > IFF_DISABLE_NETPOLL is used to disable netpoll support for a device > at run-time; > > 2) introduce one new method for netdev_ops: > ->ndo_netpoll_cleanup() is used to clean up netpoll when a device is > removed. > > 3) introduce netpoll_poll_dev() which takes a struct net_device * parameter; > export netpoll_send_skb() and netpoll_poll_dev() which will be used later; > > 4) hide a pointer to struct netpoll in struct netpoll_info, ditto. > > 5) introduce ->real_dev for struct netpoll. > > 6) introduce a new status NETDEV_BONDING_DESLAE, which is used to disable > netconsole before releasing a slave, to avoid deadlocks. > > Cc: David Miller <davem at davemloft.net> > Cc: Neil Horman <nhorman at tuxdriver.com> > Signed-off-by: WANG Cong <amwang at redhat.com> > > --- > > Index: linux-2.6/include/linux/if.h > ==================================================================> --- linux-2.6.orig/include/linux/if.h > +++ linux-2.6/include/linux/if.h > @@ -71,6 +71,8 @@ > * release skb->dst > */ > #define IFF_DONT_BRIDGE 0x800 /* disallow bridging this ether dev */ > +#define IFF_IN_NETPOLL 0x1000 /* whether we are processing netpoll */ > +#define IFF_DISABLE_NETPOLL 0x2000 /* disable netpoll at run-time */ > > #define IF_GET_IFACE 0x0001 /* for querying only */ > #define IF_GET_PROTO 0x0002 > Index: linux-2.6/include/linux/netdevice.h > ==================================================================> --- linux-2.6.orig/include/linux/netdevice.h > +++ linux-2.6/include/linux/netdevice.h > @@ -667,6 +667,7 @@ struct net_device_ops { > unsigned short vid); > #ifdef CONFIG_NET_POLL_CONTROLLER > void (*ndo_poll_controller)(struct net_device *dev); > + void (*ndo_netpoll_cleanup)(struct net_device *dev); > #endif > int (*ndo_set_vf_mac)(struct net_device *dev, > int queue, u8 *mac); > Index: linux-2.6/include/linux/netpoll.h > ==================================================================> --- linux-2.6.orig/include/linux/netpoll.h > +++ linux-2.6/include/linux/netpoll.h > @@ -14,6 +14,7 @@ > > struct netpoll { > struct net_device *dev; > + struct net_device *real_dev; > char dev_name[IFNAMSIZ]; > const char *name; > void (*rx_hook)(struct netpoll *, int, char *, int); > @@ -36,8 +37,11 @@ struct netpoll_info { > struct sk_buff_head txq; > > struct delayed_work tx_work; > + > + struct netpoll *netpoll; > }; > > +void netpoll_poll_dev(struct net_device *dev); > void netpoll_poll(struct netpoll *np); > void netpoll_send_udp(struct netpoll *np, const char *msg, int len); > void netpoll_print_options(struct netpoll *np); > @@ -47,6 +51,7 @@ int netpoll_trap(void); > void netpoll_set_trap(int trap); > void netpoll_cleanup(struct netpoll *np); > int __netpoll_rx(struct sk_buff *skb); > +void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb); > > > #ifdef CONFIG_NETPOLL > Index: linux-2.6/net/core/netpoll.c > ==================================================================> --- linux-2.6.orig/net/core/netpoll.c > +++ linux-2.6/net/core/netpoll.c > @@ -179,9 +179,8 @@ static void service_arp_queue(struct net > } > } > > -void netpoll_poll(struct netpoll *np) > +void netpoll_poll_dev(struct net_device *dev) > { > - struct net_device *dev = np->dev; > const struct net_device_ops *ops; > > if (!dev || !netif_running(dev)) > @@ -201,6 +200,11 @@ void netpoll_poll(struct netpoll *np) > zap_completion_queue(); > } > > +void netpoll_poll(struct netpoll *np) > +{ > + netpoll_poll_dev(np->dev); > +} > + > static void refill_skbs(void) > { > struct sk_buff *skb; > @@ -282,7 +286,7 @@ static int netpoll_owner_active(struct n > return 0; > } > > -static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) > +void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) > { > int status = NETDEV_TX_BUSY; > unsigned long tries; > @@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netp > tries > 0; --tries) { > if (__netif_tx_trylock(txq)) { > if (!netif_tx_queue_stopped(txq)) { > + dev->priv_flags |= IFF_IN_NETPOLL; > status = ops->ndo_start_xmit(skb, dev); > + dev->priv_flags &= ~IFF_IN_NETPOLL; > if (status == NETDEV_TX_OK) > txq_trans_update(txq); > } > @@ -756,7 +762,10 @@ int netpoll_setup(struct netpoll *np) > atomic_inc(&npinfo->refcnt); > } > > - if (!ndev->netdev_ops->ndo_poll_controller) { > + npinfo->netpoll = np; > + > + if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || > + !ndev->netdev_ops->ndo_poll_controller) { > printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", > np->name, np->dev_name); > err = -ENOTSUPP; > @@ -878,6 +887,7 @@ void netpoll_cleanup(struct netpoll *np) > } > > if (atomic_dec_and_test(&npinfo->refcnt)) { > + const struct net_device_ops *ops; > skb_queue_purge(&npinfo->arp_tx); > skb_queue_purge(&npinfo->txq); > cancel_rearming_delayed_work(&npinfo->tx_work); > @@ -885,7 +895,11 @@ void netpoll_cleanup(struct netpoll *np) > /* clean after last, unfinished work */ > __skb_queue_purge(&npinfo->txq); > kfree(npinfo); > - np->dev->npinfo = NULL; > + ops = np->dev->netdev_ops; > + if (ops->ndo_netpoll_cleanup) > + ops->ndo_netpoll_cleanup(np->dev); > + else > + np->dev->npinfo = NULL; > } > } > > @@ -908,6 +922,7 @@ void netpoll_set_trap(int trap) > atomic_dec(&trapped); > } > > +EXPORT_SYMBOL(netpoll_send_skb); > EXPORT_SYMBOL(netpoll_set_trap); > EXPORT_SYMBOL(netpoll_trap); > EXPORT_SYMBOL(netpoll_print_options); > @@ -915,4 +930,5 @@ EXPORT_SYMBOL(netpoll_parse_options); > EXPORT_SYMBOL(netpoll_setup); > EXPORT_SYMBOL(netpoll_cleanup); > EXPORT_SYMBOL(netpoll_send_udp); > +EXPORT_SYMBOL(netpoll_poll_dev); > EXPORT_SYMBOL(netpoll_poll); > Index: linux-2.6/drivers/net/netconsole.c > ==================================================================> --- linux-2.6.orig/drivers/net/netconsole.c > +++ linux-2.6/drivers/net/netconsole.c > @@ -665,7 +665,8 @@ static int netconsole_netdev_event(struc > struct netconsole_target *nt; > struct net_device *dev = ptr; > > - if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER)) > + if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER || > + event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN)) > goto done; > > spin_lock_irqsave(&target_list_lock, flags); > @@ -677,19 +678,21 @@ static int netconsole_netdev_event(struc > strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ); > break; > case NETDEV_UNREGISTER: > - if (!nt->enabled) > - break; > netpoll_cleanup(&nt->np); > + /* Fall through */ > + case NETDEV_GOING_DOWN: > + case NETDEV_BONDING_DESLAVE: > nt->enabled = 0; > - printk(KERN_INFO "netconsole: network logging stopped" > - ", interface %s unregistered\n", > - dev->name); > break; > } > } > netconsole_target_put(nt); > } > spin_unlock_irqrestore(&target_list_lock, flags); > + if (event == NETDEV_UNREGISTER || event == NETDEV_BONDING_DESLAVE) > + printk(KERN_INFO "netconsole: network logging stopped, " > + "interface %s %s\n", dev->name, > + event == NETDEV_UNREGISTER ? "unregistered" : "released slaves"); > > done: > return NOTIFY_DONE; > Index: linux-2.6/include/linux/notifier.h > ==================================================================> --- linux-2.6.orig/include/linux/notifier.h > +++ linux-2.6/include/linux/notifier.h > @@ -203,6 +203,7 @@ static inline int notifier_to_errno(int > #define NETDEV_BONDING_NEWTYPE 0x000F > #define NETDEV_POST_INIT 0x0010 > #define NETDEV_UNREGISTER_BATCH 0x0011 > +#define NETDEV_BONDING_DESLAVE 0x0012 > > #define SYS_DOWN 0x0001 /* Notify of system down */ > #define SYS_RESTART SYS_DOWN > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo at vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html-- Flavio
David Miller
2010-May-27 20:35 UTC
[Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
From: Flavio Leitner <fbl at sysclose.org> Date: Thu, 27 May 2010 15:05:45 -0300> I did the following patch to discard the packet if it was IN_NETPOLL > and the read_lock() fails, so I could go ahead testing it:This is disgusting, let's just disallow console output from such locations. Defer them to a workqueue if their output is so critical.
Cong Wang
2010-May-28 02:47 UTC
[Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
On 05/28/10 02:05, Flavio Leitner wrote:> > Hi guys! > > I finally could test this to see if an old problem reported on bugzilla[1] was > fixed now, but unfortunately it is still there. > > The ticket is private I guess, but basically the problem happens when bonding > driver tries to print something after it had taken the write_lock (monitor > functions, enslave/de-enslave), so the printk() will pass through netpoll, then > on bonding again which no matter what mode you use, it will try to read_lock() > the lock again. The result is a deadlock and the entire system hangs.This is true, I already fixed some similar issues.> > I manage to get a fresh backtrace with mode 1, see below: > > > [ 93.167079] Call Trace: > [ 93.167079] [<ffffffff81034cf9>] warn_slowpath_common+0x77/0x8f > [ 93.167079] [<ffffffff81034d5e>] warn_slowpath_fmt+0x3c/0x3e > [ 93.167079] [<ffffffff81366aef>] ? _raw_read_trylock+0x11/0x4b > [ 93.167079] [<ffffffffa02a2c42>] ? bond_start_xmit+0x12b/0x401 [bonding] > -> read_lock fails > [ 93.167079] [<ffffffffa02a2c9f>] bond_start_xmit+0x188/0x401 [bonding] > [ 93.167079] [<ffffffff81055b37>] ? trace_hardirqs_off+0xd/0xf > [ 93.167079] [<ffffffff812dfdb9>] netpoll_send_skb+0xbd/0x1f3 > [ 93.167079] [<ffffffff812e00ed>] netpoll_send_udp+0x1fe/0x20d > [ 93.167079] [<ffffffffa02c017c>] write_msg+0x89/0xcd [netconsole] > [ 93.167079] [<ffffffff81034e65>] __call_console_drivers+0x67/0x79 > [ 93.167079] [<ffffffff81034ed0>] _call_console_drivers+0x59/0x5d > [ 93.167079] [<ffffffff810352d3>] release_console_sem+0x121/0x1d7 > [ 93.167079] [<ffffffff8103590a>] vprintk+0x35d/0x393 > [ 93.167079] [<ffffffff8103f947>] ? add_timer+0x17/0x19 > [ 93.167079] [<ffffffff81046ddf>] ? queue_delayed_work_on+0xa2/0xa9 > [ 93.167079] [<ffffffff81363bb8>] printk+0x3c/0x44 > [ 93.167079] [<ffffffffa02a3b17>] bond_select_active_slave+0x105/0x109 [bonding] > -> write_locked > [ 93.167079] [<ffffffffa02a4798>] bond_mii_monitor+0x479/0x4ed [bonding] > [ 93.167079] [<ffffffff81046009>] worker_thread+0x1ef/0x2e2 > > In this case, the message should be > "bonding: bond0: making interface eth0 the new active one"Hmm, you triggered a warning here, let me check the source code and try to reproduce it here.> > I did the following patch to discard the packet if it was IN_NETPOLL > and the read_lock() fails, so I could go ahead testing it: > > diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c > index 5e12462..a3b8bad 100644 > --- a/drivers/net/bonding/bond_main.c > +++ b/drivers/net/bonding/bond_main.c > @@ -4258,8 +4258,19 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d > struct bonding *bond = netdev_priv(bond_dev); > int res = 1; > > - read_lock(&bond->lock); > - read_lock(&bond->curr_slave_lock); > + if (read_trylock(&bond->lock) == 0&& > + (bond_dev->flags& IFF_IN_NETPOLL)) { > + dev_kfree_skb(skb); > + return NETDEV_TX_OK; > + } > + > + if (read_trylock(&bond->curr_slave_lock) == 0&& > + (bond_dev->flags& IFF_IN_NETPOLL)) { > + read_unlock(&bond->lock); > + dev_kfree_skb(skb); > + return NETDEV_TX_OK; > + } > + > > if (!BOND_IS_OK(bond)) > goto out; >This looks like a workaround, not a fix. :)> > and I found another problem. The function netpoll_send_skb() checks > if the npinfo's queue length is zero and if it's not, it will queue > the packet to make sure it's in order and then schedule the thread > to run. Later, the thread wakes up running queue_process() which disables > interrupts before calling ndo_start_xmit(). However, dev_queue_xmit() > uses rcu_*_bh() and before return, it will enable the interrupts again, > spitting this: > > ------------[ cut here ]------------ > WARNING: at kernel/softirq.c:143 local_bh_enable+0x3c/0x86() > Hardware name: Precision WorkStation 490 > Modules linked in: netconsole bonding sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6] > Pid: 17, comm: events/2 Not tainted 2.6.34-04700-gd938a70 #21 > Call Trace: > [<ffffffff810381d6>] warn_slowpath_common+0x77/0x8f > [<ffffffff810381fd>] warn_slowpath_null+0xf/0x11 > [<ffffffff8103d691>] local_bh_enable+0x3c/0x86 > [<ffffffff812e4d85>] dev_queue_xmit+0x462/0x493 > [<ffffffffa018805f>] bond_dev_queue_xmit+0x1bd/0x1e3 [bonding] > [<ffffffffa01881dd>] bond_start_xmit+0x158/0x37b [bonding] > -> interrupts disabled > [<ffffffff812f3fca>] queue_process+0x9d/0xf9 > [<ffffffff8104d022>] worker_thread+0x19d/0x224 > [<ffffffff812f3f2d>] ? queue_process+0x0/0xf9 > [<ffffffff81050819>] ? autoremove_wake_function+0x0/0x34 > [<ffffffff8104ce85>] ? worker_thread+0x0/0x224 > [<ffffffff8105040b>] kthread+0x7a/0x82 > [<ffffffff810036d4>] kernel_thread_helper+0x4/0x10 > [<ffffffff81050391>] ? kthread+0x0/0x82 > [<ffffffff810036d0>] ? kernel_thread_helper+0x0/0x10 > ---[ end trace 74e3904503fdb632 ]--- > > kernel/softirq.c: > 141 static inline void _local_bh_enable_ip(unsigned long ip) > 142 { > 143 WARN_ON_ONCE(in_irq() || irqs_disabled()); > 144 #ifdef CONFIG_TRACE_IRQFLAGS > 145 local_irq_disable(); > 146 #endif > 147 /* > 148 * Are softirqs going to be turned on now: > 149 */ > >I am wondering if this was caused by the previous issue.> The git is updated up to: > d938a70 be2net: increase POST timeout for EEH recovery > > Two slave interfaces, bonding mode 1, netconsole over bond0. > > [1] https://bugzilla.redhat.com/show_bug.cgi?id=248374#c5How did you reproduce this? I will check that BZ to see if I can find how to reproduce this. Thanks.
Cong Wang
2010-May-28 08:16 UTC
[Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
On 05/28/10 02:05, Flavio Leitner wrote:> > Hi guys! > > I finally could test this to see if an old problem reported on bugzilla[1] was > fixed now, but unfortunately it is still there. > > The ticket is private I guess, but basically the problem happens when bonding > driver tries to print something after it had taken the write_lock (monitor > functions, enslave/de-enslave), so the printk() will pass through netpoll, then > on bonding again which no matter what mode you use, it will try to read_lock() > the lock again. The result is a deadlock and the entire system hangs. >Does the attached patch fix this hang? Thanks! -----------------------> We should notify netconsole that bond is changing its slaves when we use active-backup mode. Signed-off-by: WANG Cong <amwang at redhat.com> ---- -------------- next part -------------- A non-text attachment was scrubbed... Name: drivers-net-bonding-fix-activebackup-deadlock.diff Type: text/x-patch Size: 898 bytes Desc: not available Url : http://lists.linux-foundation.org/pipermail/bridge/attachments/20100528/ebdf38eb/attachment.bin
Flavio Leitner
2010-May-28 20:42 UTC
[Bridge] [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
On Fri, May 28, 2010 at 04:16:34PM +0800, Cong Wang wrote:> On 05/28/10 02:05, Flavio Leitner wrote: > > > >Hi guys! > > > >I finally could test this to see if an old problem reported on bugzilla[1] was > >fixed now, but unfortunately it is still there. > > > >The ticket is private I guess, but basically the problem happens when bonding > >driver tries to print something after it had taken the write_lock (monitor > >functions, enslave/de-enslave), so the printk() will pass through netpoll, then > >on bonding again which no matter what mode you use, it will try to read_lock() > >the lock again. The result is a deadlock and the entire system hangs. > > > > Does the attached patch fix this hang?I got another issue now: [ 89.523062] bonding: bond0: enslaving eth0 as a backup interface with a down link. [ 89.580746] bonding: bond0: enslaving eth2 as a backup interface with a down link. [ 91.198527] e1000: eth2 NIC Link is Up 100 Mbps Half Duplex, Flow Control: None [ 91.238245] bonding: bond0: link status definitely up for interface eth2. [ 91.245381] BUG: scheduling while atomic: bond0/2716/0x10000100 [ 91.251565] 5 locks held by bond0/2716: [ 91.255663] #0: ((bond_dev->name)){+.+.+.}, at: [<ffffffff81045fb4>] worker_thread+0x19a/0x2e2 [ 91.265179] #1: ((&(&bond->mii_work)->work)){+.+.+.}, at: [<ffffffff81045fb4>] worker_thread+0x19a/0x2e2 [ 91.275554] #2: (rtnl_mutex){+.+.+.}, at: [<ffffffff812daf38>] rtnl_lock+0x12/0x14 [ 91.284018] #3: (&bond->lock){++.+.+}, at: [<ffffffffa029e06a>] bond_mii_monitor+0x2a2/0x4ed [bonding] [ 91.294230] #4: (&bond->curr_slave_lock){+...+.}, at: [<ffffffffa029e239>] bond_mii_monitor+0x471/0x4ed [bonding] [ 91.305387] Modules linked in: bonding sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables x_tables ipv6 dm_mirror dm_region_hash dm_log dm_multipath uinput snd_hda_codec_idt snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm ppdev parport_pc parport rtc_cmos snd_timer tg3 snd ide_cd_mod i5000_edac i2c_i801 libphy rtc_core rtc_lib edac_core pcspkr e1000 dcdbas uhci_hcd tulip shpchp i2c_core cdrom serio_raw soundcore sg snd_page_alloc raid0 sd_mod button [last unloaded: mperf] [ 91.357735] Pid: 2716, comm: bond0 Not tainted 2.6.34-04700-gd938a70-dirty #36 [ 91.371112] Call Trace: [ 91.373825] [<ffffffff81056002>] ? __debug_show_held_locks+0x22/0x24 [ 91.380530] [<ffffffff8102e4a2>] __schedule_bug+0x6d/0x72 [ 91.386284] [<ffffffff81363f6e>] schedule+0xc9/0x791 [ 91.391600] [<ffffffff81032540>] __cond_resched+0x25/0x30 [ 91.397350] [<ffffffff81364757>] _cond_resched+0x27/0x32 [ 91.403013] [<ffffffff810ab243>] kmem_cache_alloc+0x2b/0xac [ 91.408936] [<ffffffff812c61fd>] skb_clone+0x42/0x5d [ 91.414253] [<ffffffff812ec696>] netlink_broadcast+0x192/0x369 [ 91.420436] [<ffffffff812ecdc3>] nlmsg_notify+0x43/0x89 [ 91.426012] [<ffffffff812dabc7>] rtnl_notify+0x2b/0x2d [ 91.431501] [<ffffffff812dacbc>] rtmsg_ifinfo+0xf3/0x118 [ 91.437165] [<ffffffff812dad0c>] rtnetlink_event+0x2b/0x2f [ 91.443003] [<ffffffff81369fe4>] notifier_call_chain+0x32/0x5e [ 91.449188] [<ffffffff8104d618>] raw_notifier_call_chain+0xf/0x11 [ 91.455634] [<ffffffff812cfc73>] call_netdevice_notifiers+0x45/0x4a [ 91.462253] [<ffffffff812d04f7>] netdev_bonding_change+0x12/0x14 [ 91.468614] [<ffffffffa029d589>] bond_select_active_slave+0xe8/0x123 [bonding] [ 91.476408] [<ffffffffa029e241>] bond_mii_monitor+0x479/0x4ed [bonding] [ 91.483375] [<ffffffff81046009>] worker_thread+0x1ef/0x2e2 [ 91.489212] [<ffffffff81045fb4>] ? worker_thread+0x19a/0x2e2 [ 91.495227] [<ffffffffa029ddc8>] ? bond_mii_monitor+0x0/0x4ed [bonding] [ 91.502192] [<ffffffff81049c71>] ? autoremove_wake_function+0x0/0x34 [ 91.508897] [<ffffffff81045e1a>] ? worker_thread+0x0/0x2e2 [ 91.514734] [<ffffffff810498bb>] kthread+0x7a/0x82 [ 91.519878] [<ffffffff81003714>] kernel_thread_helper+0x4/0x10 [ 91.526060] [<ffffffff81366ffc>] ? restore_args+0x0/0x30 [ 91.531723] [<ffffffff81049841>] ? kthread+0x0/0x82 [ 91.536953] [<ffffffff81003710>] ? kernel_thread_helper+0x0/0x10 [ 91.543343] bonding: bond0: making interface eth2 the new active one. [ 91.550554] bonding: bond0: first active interface up! [ 91.556859] ADDRCONF(NETDEV_CHANGE): bond0: link becomes ready No other patch applied. Just started netconsole over bonding, so no need to pull the cable from slaves. Reproduced twice, one I got the backtrace above, and on the other one the system hangs completely after the BUG: scheduling message. fbl> > Thanks! > > -----------------------> > > We should notify netconsole that bond is changing its slaves > when we use active-backup mode. > > Signed-off-by: WANG Cong <amwang at redhat.com> > > ---- >> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c > index 5e12462..9494c02 100644 > --- a/drivers/net/bonding/bond_main.c > +++ b/drivers/net/bonding/bond_main.c > @@ -1199,6 +1199,7 @@ void bond_select_active_slave(struct bonding *bond) > > best_slave = bond_find_best_slave(bond); > if (best_slave != bond->curr_active_slave) { > + netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE); > bond_change_active_slave(bond, best_slave); > rv = bond_set_carrier(bond); > if (!rv) > @@ -2154,6 +2155,7 @@ static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_devi > (old_active) && > (new_active->link == BOND_LINK_UP) && > IS_UP(new_active->dev)) { > + netdev_bonding_change(bond->dev, NETDEV_BONDING_DESLAVE); > write_lock_bh(&bond->curr_slave_lock); > bond_change_active_slave(bond, new_active); > write_unlock_bh(&bond->curr_slave_lock);-- Flavio