We''ve been hitting the following issue on a variety of hosts and recent Xen/dom0 version combinations. Here''s an excerpt from our latest: Xen: 4.1.4 (xenbits @ 23432) Dom0: 3.7.1-x86_64 BUG: unable to handle kernel NULL pointer dereference at 000000000000001c IP: [<ffffffff8141a301>] evtchn_from_irq+0x11/0x40 PGD 0 Oops: 0000 [#1] SMP Modules linked in: ebt_comment ebt_arp ebt_set ebt_limit ebt_ip6 ebt_ip ip_set_hash_net ip_set ebtable_nat xen_gntdev bonding ebtable_filter igb CPU 0 Pid: 1636, comm: netback/0 Not tainted 3.7.1-x86_64 #1 Supermicro X9DRi-LN4+/X9DR3-LN4+/X9DRi-LN4+/X9DR3-LN4+ RIP: e030:[<ffffffff8141a301>] [<ffffffff8141a301>] evtchn_from_irq+0x11/0x40 RSP: e02b:ffff88004334fc98 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff880004964700 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 00000000000001dc RDI: 000000000000001c RBP: ffff88004334fc98 R08: ffffea00010bf818 R09: 0000000000000000 R10: 0000000000000001 R11: ffff880000000000 R12: ffff880004964720 R13: ffff88002d34d700 R14: 00000000ffffffff R15: ffff88004334fd84 FS: 00007f8939347700(0000) GS:ffff880101e00000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 000000000000001c CR3: 0000000001c0b000 CR4: 0000000000002660 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process netback/0 (pid: 1636, threadinfo ffff88004334e000, task ffff880043fd5fe0) Stack: ffff88004334fcb8 ffffffff8141b06d ffff880000000218 ffff880042fe1200 ffff88004334fdb8 ffffffff81543b9b ffff88004334fd84 ffff880042c59040 ffff88004334fd68 ffff88004334fd48 ffff880000000cc0 ffffc900106c7ac0 Call Trace: [<ffffffff8141b06d>] notify_remote_via_irq+0xd/0x40 [<ffffffff81543b9b>] xen_netbk_rx_action+0x73b/0x800 [<ffffffff81544c25>] xen_netbk_kthread+0xb5/0xa60 [<ffffffff81080050>] ? finish_task_switch+0x60/0xd0 [<ffffffff81071fe0>] ? wake_up_bit+0x40/0x40 [<ffffffff81544b70>] ? xen_netbk_tx_build_gops+0xa10/0xa10 [<ffffffff81071926>] kthread+0xc6/0xd0 [<ffffffff810037b9>] ? xen_end_context_switch+0x19/0x20 [<ffffffff81071860>] ? kthread_freezable_should_stop+0x70/0x70 [<ffffffff81767c7c>] ret_from_fork+0x7c/0xb0 [<ffffffff81071860>] ? kthread_freezable_should_stop+0x70/0x70 Code: be f5 01 00 00 48 c7 c7 12 e2 99 81 e8 d9 4c c3 ff eb cd 0f 1f 80 00 00 00 00 55 48 89 e5 39 3d c6 fd 80 00 76 0b e8 df fa ff ff <0f> b7 40 1c c9 c3 89 f9 31 c0 48 c7 c2 27 e2 99 81 be db 00 00 RIP [<ffffffff8141a301>] evtchn_from_irq+0x11/0x40 RSP <ffff88004334fc98> CR2: 000000000000001c ---[ end trace 1b5f6b359343fcfe ]--- Which leads to xenwatch being stuck in D state, which then requires us to reboot the host. SysRq : Show Blocked State task PC stack pid father xenwatch D ffff880101f938c0 5056 49 2 0x00000000 ffff880101305cb8 0000000000000246 ffff8801012a0760 00000000000138c0 ffff880101305fd8 ffff880101304010 00000000000138c0 00000000000138c0 ffff880101305fd8 00000000000138c0 ffff8800349224e0 ffff8801012a0760 Call Trace: [<ffffffff8175f444>] schedule+0x24/0x70 [<ffffffff8154698d>] xenvif_disconnect+0x7d/0x130 [<ffffffff81071fe0>] ? wake_up_bit+0x40/0x40 [<ffffffff81545ac4>] frontend_changed+0x214/0x660 [<ffffffff81080050>] ? finish_task_switch+0x60/0xd0 [<ffffffff8141fb22>] xenbus_otherend_changed+0xb2/0xc0 [<ffffffff8175fe39>] ? _raw_spin_unlock_irqrestore+0x19/0x20 [<ffffffff8141fd3b>] frontend_changed+0xb/0x10 [<ffffffff8141da3a>] xenwatch_thread+0xba/0x180 [<ffffffff81071fe0>] ? wake_up_bit+0x40/0x40 [<ffffffff8141d980>] ? xs_watch+0x60/0x60 [<ffffffff81071926>] kthread+0xc6/0xd0 [<ffffffff810037b9>] ? xen_end_context_switch+0x19/0x20 [<ffffffff81071860>] ? kthread_freezable_should_stop+0x70/0x70 [<ffffffff81767c7c>] ret_from_fork+0x7c/0xb0 [<ffffffff81071860>] ? kthread_freezable_should_stop+0x70/0x70 I''ll give building an updated dom0 kernel a shot, but was hoping this rang a bell or two. Thanks, -Chris
On 01/02/13 21:58, Christopher S. Aker wrote:> We''ve been hitting the following issue on a variety of hosts and recent > Xen/dom0 version combinations. Here''s an excerpt from our latest: > > Xen: 4.1.4 (xenbits @ 23432) > Dom0: 3.7.1-x86_64 > > BUG: unable to handle kernel NULL pointer dereference at 000000000000001c > IP: [<ffffffff8141a301>] evtchn_from_irq+0x11/0x40 > PGD 0 > Oops: 0000 [#1] SMP > Modules linked in: ebt_comment ebt_arp ebt_set ebt_limit ebt_ip6 ebt_ip > ip_set_hash_net ip_set ebtable_nat xen_gntdev bonding ebtable_filter igb > CPU 0 > Pid: 1636, comm: netback/0 Not tainted 3.7.1-x86_64 #1 Supermicro > X9DRi-LN4+/X9DR3-LN4+/X9DRi-LN4+/X9DR3-LN4+ > RIP: e030:[<ffffffff8141a301>] [<ffffffff8141a301>] > evtchn_from_irq+0x11/0x40 > RSP: e02b:ffff88004334fc98 EFLAGS: 00010246 > RAX: 0000000000000000 RBX: ffff880004964700 RCX: 0000000000000000 > RDX: 0000000000000000 RSI: 00000000000001dc RDI: 000000000000001c > RBP: ffff88004334fc98 R08: ffffea00010bf818 R09: 0000000000000000 > R10: 0000000000000001 R11: ffff880000000000 R12: ffff880004964720 > R13: ffff88002d34d700 R14: 00000000ffffffff R15: ffff88004334fd84 > FS: 00007f8939347700(0000) GS:ffff880101e00000(0000) > knlGS:0000000000000000 > CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b > CR2: 000000000000001c CR3: 0000000001c0b000 CR4: 0000000000002660 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > Process netback/0 (pid: 1636, threadinfo ffff88004334e000, task > ffff880043fd5fe0) > Stack: > ffff88004334fcb8 ffffffff8141b06d ffff880000000218 ffff880042fe1200 > ffff88004334fdb8 ffffffff81543b9b ffff88004334fd84 ffff880042c59040 > ffff88004334fd68 ffff88004334fd48 ffff880000000cc0 ffffc900106c7ac0 > Call Trace: > [<ffffffff8141b06d>] notify_remote_via_irq+0xd/0x40 > [<ffffffff81543b9b>] xen_netbk_rx_action+0x73b/0x800 > [<ffffffff81544c25>] xen_netbk_kthread+0xb5/0xa60 > [<ffffffff81080050>] ? finish_task_switch+0x60/0xd0 > [<ffffffff81071fe0>] ? wake_up_bit+0x40/0x40 > [<ffffffff81544b70>] ? xen_netbk_tx_build_gops+0xa10/0xa10 > [<ffffffff81071926>] kthread+0xc6/0xd0 > [<ffffffff810037b9>] ? xen_end_context_switch+0x19/0x20 > [<ffffffff81071860>] ? kthread_freezable_should_stop+0x70/0x70 > [<ffffffff81767c7c>] ret_from_fork+0x7c/0xb0 > [<ffffffff81071860>] ? kthread_freezable_should_stop+0x70/0x70 > Code: be f5 01 00 00 48 c7 c7 12 e2 99 81 e8 d9 4c c3 ff eb cd 0f 1f 80 > 00 00 00 00 55 48 89 e5 39 3d c6 fd 80 00 76 0b e8 df fa ff ff <0f> b7 > 40 1c c9 c3 89 f9 31 c0 48 c7 c2 27 e2 99 81 be db 00 00 > RIP [<ffffffff8141a301>] evtchn_from_irq+0x11/0x40 > RSP <ffff88004334fc98> > CR2: 000000000000001c > ---[ end trace 1b5f6b359343fcfe ]--- > > > Which leads to xenwatch being stuck in D state, which then requires us > to reboot the host. > > SysRq : Show Blocked State > task PC stack pid father > xenwatch D ffff880101f938c0 5056 49 2 0x00000000 > ffff880101305cb8 0000000000000246 ffff8801012a0760 00000000000138c0 > ffff880101305fd8 ffff880101304010 00000000000138c0 00000000000138c0 > ffff880101305fd8 00000000000138c0 ffff8800349224e0 ffff8801012a0760 > Call Trace: > [<ffffffff8175f444>] schedule+0x24/0x70 > [<ffffffff8154698d>] xenvif_disconnect+0x7d/0x130 > [<ffffffff81071fe0>] ? wake_up_bit+0x40/0x40 > [<ffffffff81545ac4>] frontend_changed+0x214/0x660 > [<ffffffff81080050>] ? finish_task_switch+0x60/0xd0 > [<ffffffff8141fb22>] xenbus_otherend_changed+0xb2/0xc0 > [<ffffffff8175fe39>] ? _raw_spin_unlock_irqrestore+0x19/0x20 > [<ffffffff8141fd3b>] frontend_changed+0xb/0x10 > [<ffffffff8141da3a>] xenwatch_thread+0xba/0x180 > [<ffffffff81071fe0>] ? wake_up_bit+0x40/0x40 > [<ffffffff8141d980>] ? xs_watch+0x60/0x60 > [<ffffffff81071926>] kthread+0xc6/0xd0 > [<ffffffff810037b9>] ? xen_end_context_switch+0x19/0x20 > [<ffffffff81071860>] ? kthread_freezable_should_stop+0x70/0x70 > [<ffffffff81767c7c>] ret_from_fork+0x7c/0xb0 > [<ffffffff81071860>] ? kthread_freezable_should_stop+0x70/0x70 > > I''ll give building an updated dom0 kernel a shot, but was hoping this > rang a bell or two. > > Thanks, > -ChrisWell - it looks like info_for_irq(irq) returns a null pointer, and evtchn_from_irq blindly dereferences it trying to find the event channel. As for why the info is NULL, I can''t help you, but perhaps there should be a NULL check, returning 0 in the case of an error? ~Andrew> > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
Dose your Dom0 has very limited RAM? Just happened to fix a bug related to OOM not getting handled correctly. http://lists.xen.org/archives/html/xen-devel/2013-01/msg02549.html Wei. On Fri, Feb 1, 2013 at 9:58 PM, Christopher S. Aker <caker@theshore.net>wrote:> We''ve been hitting the following issue on a variety of hosts and recent > Xen/dom0 version combinations. Here''s an excerpt from our latest: > > Xen: 4.1.4 (xenbits @ 23432) > Dom0: 3.7.1-x86_64 > > BUG: unable to handle kernel NULL pointer dereference at 000000000000001c > IP: [<ffffffff8141a301>] evtchn_from_irq+0x11/0x40 > PGD 0 > Oops: 0000 [#1] SMP > Modules linked in: ebt_comment ebt_arp ebt_set ebt_limit ebt_ip6 ebt_ip > ip_set_hash_net ip_set ebtable_nat xen_gntdev bonding ebtable_filter igb > CPU 0 > Pid: 1636, comm: netback/0 Not tainted 3.7.1-x86_64 #1 Supermicro > X9DRi-LN4+/X9DR3-LN4+/X9DRi-**LN4+/X9DR3-LN4+ > RIP: e030:[<ffffffff8141a301>] [<ffffffff8141a301>] > evtchn_from_irq+0x11/0x40 > RSP: e02b:ffff88004334fc98 EFLAGS: 00010246 > RAX: 0000000000000000 RBX: ffff880004964700 RCX: 0000000000000000 > RDX: 0000000000000000 RSI: 00000000000001dc RDI: 000000000000001c > RBP: ffff88004334fc98 R08: ffffea00010bf818 R09: 0000000000000000 > R10: 0000000000000001 R11: ffff880000000000 R12: ffff880004964720 > R13: ffff88002d34d700 R14: 00000000ffffffff R15: ffff88004334fd84 > FS: 00007f8939347700(0000) GS:ffff880101e00000(0000) > knlGS:0000000000000000 > CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b > CR2: 000000000000001c CR3: 0000000001c0b000 CR4: 0000000000002660 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > Process netback/0 (pid: 1636, threadinfo ffff88004334e000, task > ffff880043fd5fe0) > Stack: > ffff88004334fcb8 ffffffff8141b06d ffff880000000218 ffff880042fe1200 > ffff88004334fdb8 ffffffff81543b9b ffff88004334fd84 ffff880042c59040 > ffff88004334fd68 ffff88004334fd48 ffff880000000cc0 ffffc900106c7ac0 > Call Trace: > [<ffffffff8141b06d>] notify_remote_via_irq+0xd/0x40 > [<ffffffff81543b9b>] xen_netbk_rx_action+0x73b/**0x800 > [<ffffffff81544c25>] xen_netbk_kthread+0xb5/0xa60 > [<ffffffff81080050>] ? finish_task_switch+0x60/0xd0 > [<ffffffff81071fe0>] ? wake_up_bit+0x40/0x40 > [<ffffffff81544b70>] ? xen_netbk_tx_build_gops+0xa10/**0xa10 > [<ffffffff81071926>] kthread+0xc6/0xd0 > [<ffffffff810037b9>] ? xen_end_context_switch+0x19/**0x20 > [<ffffffff81071860>] ? kthread_freezable_should_stop+**0x70/0x70 > [<ffffffff81767c7c>] ret_from_fork+0x7c/0xb0 > [<ffffffff81071860>] ? kthread_freezable_should_stop+**0x70/0x70 > Code: be f5 01 00 00 48 c7 c7 12 e2 99 81 e8 d9 4c c3 ff eb cd 0f 1f 80 00 > 00 00 00 55 48 89 e5 39 3d c6 fd 80 00 76 0b e8 df fa ff ff <0f> b7 40 1c > c9 c3 89 f9 31 c0 48 c7 c2 27 e2 99 81 be db 00 00 > RIP [<ffffffff8141a301>] evtchn_from_irq+0x11/0x40 > RSP <ffff88004334fc98> > CR2: 000000000000001c > ---[ end trace 1b5f6b359343fcfe ]--- > > > Which leads to xenwatch being stuck in D state, which then requires us to > reboot the host. > > SysRq : Show Blocked State > task PC stack pid father > xenwatch D ffff880101f938c0 5056 49 2 0x00000000 > ffff880101305cb8 0000000000000246 ffff8801012a0760 00000000000138c0 > ffff880101305fd8 ffff880101304010 00000000000138c0 00000000000138c0 > ffff880101305fd8 00000000000138c0 ffff8800349224e0 ffff8801012a0760 > Call Trace: > [<ffffffff8175f444>] schedule+0x24/0x70 > [<ffffffff8154698d>] xenvif_disconnect+0x7d/0x130 > [<ffffffff81071fe0>] ? wake_up_bit+0x40/0x40 > [<ffffffff81545ac4>] frontend_changed+0x214/0x660 > [<ffffffff81080050>] ? finish_task_switch+0x60/0xd0 > [<ffffffff8141fb22>] xenbus_otherend_changed+0xb2/**0xc0 > [<ffffffff8175fe39>] ? _raw_spin_unlock_irqrestore+**0x19/0x20 > [<ffffffff8141fd3b>] frontend_changed+0xb/0x10 > [<ffffffff8141da3a>] xenwatch_thread+0xba/0x180 > [<ffffffff81071fe0>] ? wake_up_bit+0x40/0x40 > [<ffffffff8141d980>] ? xs_watch+0x60/0x60 > [<ffffffff81071926>] kthread+0xc6/0xd0 > [<ffffffff810037b9>] ? xen_end_context_switch+0x19/**0x20 > [<ffffffff81071860>] ? kthread_freezable_should_stop+**0x70/0x70 > [<ffffffff81767c7c>] ret_from_fork+0x7c/0xb0 > [<ffffffff81071860>] ? kthread_freezable_should_stop+**0x70/0x70 > > I''ll give building an updated dom0 kernel a shot, but was hoping this rang > a bell or two. > > Thanks, > -Chris > > ______________________________**_________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
On 2/1/13 8:01 PM, Wei Liu wrote:> Dose your Dom0 has very limited RAM? > > Just happened to fix a bug related to OOM not getting handled correctly. > > http://lists.xen.org/archives/html/xen-devel/2013-01/msg02549.htmlI''m not sure if it was under abnormal memory pressure. We''ve tuned things quite a bit to not swap thrash under worst-case conditions. But things happen. If this is a likely culprit then we''ll rebuild with this patch and we''ll give it a go. Thanks! -Chris
On Mon, 2013-02-04 at 16:12 +0000, Christopher S. Aker wrote:> On 2/1/13 8:01 PM, Wei Liu wrote: > > Dose your Dom0 has very limited RAM? > > > > Just happened to fix a bug related to OOM not getting handled correctly. > > > > http://lists.xen.org/archives/html/xen-devel/2013-01/msg02549.html > > I''m not sure if it was under abnormal memory pressure. We''ve tuned > things quite a bit to not swap thrash under worst-case conditions. But > things happen. > > If this is a likely culprit then we''ll rebuild with this patch and we''ll > give it a go. Thanks! >You might also need this one if you use user space event channel driver http://lists.xen.org/archives/html/xen-devel/2013-02/msg00191.html Wei.> -Chris > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
On Feb 1, 2013, at 8:01 PM, Wei Liu wrote:> Just happened to fix a bug related to OOM not getting handled correctly. > > http://lists.xen.org/archives/html/xen-devel/2013-01/msg02549.htmlRebuilt with the above patch, and have now hit this: BUG: unable to handle kernel NULL pointer dereference at 00000000000008b8 IP: [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 PGD 82c63067 PUD 82640067 PMD 0 Oops: 0002 [#1] SMP Modules linked in: ebt_comment ebt_arp ebt_set ebt_limit ebt_ip6 ebt_ip ip_set_hash_net ip_set ebtable_nat xen_gntdev bonding ebtable_filter e1000e CPU 0 Pid: 1545, comm: netback/0 Not tainted 3.7.6-1-x86_64 #1 Supermicro X8DT6/X8DT6 RIP: e030:[<ffffffff81011dda>] [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 RSP: e02b:ffff880083693b58 EFLAGS: 00010006 RAX: 0000000000000400 RBX: 00000000000008b8 RCX: 0000000000dabda3 RDX: 0000000000000001 RSI: 0000000000000210 RDI: 00000000000008b8 RBP: ffff880083693b78 R08: 000000000000005f R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000001 R13: 0000000000000200 R14: 0000000000000400 R15: 0000000000dabda3 FS: 00007f19e7f69700(0000) GS:ffff880100600000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000000000008b8 CR3: 0000000082413000 CR4: 0000000000002660 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process netback/0 (pid: 1545, threadinfo ffff880083692000, task ffff880083d2e740) Stack: 0000000000000210 00000000000008b8 ffff88005f807700 ffff88005f8077d8 ffff880083693b98 ffffffff817605da 0000000000000000 00000000000008b8 ffff880083693bd8 ffffffff8154446f ffff88005f807000 0000000000000000 Call Trace: [<ffffffff817605da>] _raw_spin_lock_irqsave+0x2a/0x40 [<ffffffff8154446f>] xen_netbk_schedule_xenvif+0x8f/0x100 [<ffffffff81544505>] xen_netbk_check_rx_xenvif+0x25/0x60 [<ffffffff815445eb>] netbk_tx_err+0x5b/0x70 [<ffffffff8154518c>] xen_netbk_tx_build_gops+0xb8c/0xbc0 [<ffffffff816357b9>] ? enqueue_to_backlog+0xb9/0x190 [<ffffffff8100140a>] ? xen_hypercall_event_channel_op+0xa/0x20 [<ffffffff81639284>] ? netif_rx+0x44/0xe0 [<ffffffff81015410>] ? do_softirq+0x50/0xa0 [<ffffffff815452af>] xen_netbk_kthread+0xef/0x9d0 [<ffffffff81080150>] ? finish_task_switch+0x60/0xd0 [<ffffffff810720c0>] ? wake_up_bit+0x40/0x40 [<ffffffff815451c0>] ? xen_netbk_tx_build_gops+0xbc0/0xbc0 [<ffffffff81071a06>] kthread+0xc6/0xd0 [<ffffffff810037b9>] ? xen_end_context_switch+0x19/0x20 [<ffffffff81071940>] ? kthread_freezable_should_stop+0x70/0x70 [<ffffffff8176847c>] ret_from_fork+0x7c/0xb0 [<ffffffff81071940>] ? kthread_freezable_should_stop+0x70/0x70 Code: 24 08 4c 89 6c 24 10 4c 89 74 24 18 49 89 f5 48 89 fb 41 81 e5 00 02 00 00 41 bc 01 00 00 00 41 be 00 04 00 00 44 89 f0 44 89 e2 <86> 13 84 d2 74 0b f3 90 80 3b 00 74 f3 ff c8 75 f5 84 d2 75 15 RIP [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 RSP <ffff880083693b58> CR2: 00000000000008b8 ---[ end trace db6edd7c15bd6503 ]--- Plenty of RAM free (over 900M). Same symptoms: now xenwatch is in D state and hotplug stuff is timing out. -Chris
And another this afternoon on a different machine: BUG: unable to handle kernel NULL pointer dereference at 00000000000008b8 IP: [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 PGD 0 Oops: 0002 [#1] SMP Modules linked in: ebt_comment ebt_arp ebt_set ebt_limit ebt_ip6 ebt_ip ip_set_hash_net ip_set ebtable_nat xen_gntdev bonding ebtable_filter e1000e CPU 5 Pid: 1550, comm: netback/5 Not tainted 3.7.6-1-x86_64 #1 Supermicro X8DT6/X8DT6 RIP: e030:[<ffffffff81011dda>] [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 RSP: e02b:ffff8800836e7b58 EFLAGS: 00010006 RAX: 0000000000000400 RBX: 00000000000008b8 RCX: 000000000045de5d RDX: 0000000000000001 RSI: 0000000000000211 RDI: 00000000000008b8 RBP: ffff8800836e7b78 R08: 0000000000000068 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000001 R13: 0000000000000200 R14: 0000000000000400 R15: 000000000045de5d FS: 00007f474a465700(0000) GS:ffff880100740000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000000000008b8 CR3: 0000000001c0b000 CR4: 0000000000002660 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process netback/5 (pid: 1550, threadinfo ffff8800836e6000, task ffff880084510000) Stack: 0000000000000211 00000000000008b8 ffff8800771e5700 ffff8800771e57d8 ffff8800836e7b98 ffffffff817605da 0000000000000000 00000000000008b8 ffff8800836e7bd8 ffffffff8154446f ffff8800771e5000 0000000000000000 Call Trace: [<ffffffff817605da>] _raw_spin_lock_irqsave+0x2a/0x40 [<ffffffff8154446f>] xen_netbk_schedule_xenvif+0x8f/0x100 [<ffffffff81544505>] xen_netbk_check_rx_xenvif+0x25/0x60 [<ffffffff815445eb>] netbk_tx_err+0x5b/0x70 [<ffffffff8154518c>] xen_netbk_tx_build_gops+0xb8c/0xbc0 [<ffffffff81012880>] ? __switch_to+0x160/0x4f0 [<ffffffff810891b8>] ? idle_balance+0xf8/0x150 [<ffffffff81080150>] ? finish_task_switch+0x60/0xd0 [<ffffffff8175f7b4>] ? __schedule+0x394/0x750 [<ffffffff815452af>] xen_netbk_kthread+0xef/0x9d0 [<ffffffff81080150>] ? finish_task_switch+0x60/0xd0 [<ffffffff810720c0>] ? wake_up_bit+0x40/0x40 [<ffffffff815451c0>] ? xen_netbk_tx_build_gops+0xbc0/0xbc0 [<ffffffff81071a06>] kthread+0xc6/0xd0 [<ffffffff810037b9>] ? xen_end_context_switch+0x19/0x20 [<ffffffff81071940>] ? kthread_freezable_should_stop+0x70/0x70 [<ffffffff8176847c>] ret_from_fork+0x7c/0xb0 [<ffffffff81071940>] ? kthread_freezable_should_stop+0x70/0x70 Code: 24 08 4c 89 6c 24 10 4c 89 74 24 18 49 89 f5 48 89 fb 41 81 e5 00 02 00 00 41 bc 01 00 00 00 41 be 00 04 00 00 44 89 f0 44 89 e2 <86> 13 84 d2 74 0b f3 90 80 3b 00 74 f3 ff c8 75 f5 84 d2 75 15 RIP [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 RSP <ffff8800836e7b58> CR2: 00000000000008b8 ---[ end trace 62de4ce454b1699e ]--- Code: 24 08 4c 89 6c 24 10 4c 89 74 24 18 49 89 f5 48 89 fb 41 81 e5 00 02 00 00 41 bc 01 00 00 00 41 be 00 04 00 00 44 89 f0 44 89 e2 <86> 13 84 d2 74 0b f3 90 80 3b 00 74 f3 ff c8 75 f5 84 d2 75 15 All code ======= 0: 24 08 and $0x8,%al 2: 4c 89 6c 24 10 mov %r13,0x10(%rsp) 7: 4c 89 74 24 18 mov %r14,0x18(%rsp) c: 49 89 f5 mov %rsi,%r13 f: 48 89 fb mov %rdi,%rbx 12: 41 81 e5 00 02 00 00 and $0x200,%r13d 19: 41 bc 01 00 00 00 mov $0x1,%r12d 1f: 41 be 00 04 00 00 mov $0x400,%r14d 25: 44 89 f0 mov %r14d,%eax 28: 44 89 e2 mov %r12d,%edx 2b:* 86 13 xchg %dl,(%rbx) <-- trapping instruction 2d: 84 d2 test %dl,%dl 2f: 74 0b je 0x3c 31: f3 90 pause 33: 80 3b 00 cmpb $0x0,(%rbx) 36: 74 f3 je 0x2b 38: ff c8 dec %eax 3a: 75 f5 jne 0x31 3c: 84 d2 test %dl,%dl 3e: 75 15 jne 0x55 Code starting with the faulting instruction ========================================== 0: 86 13 xchg %dl,(%rbx) 2: 84 d2 test %dl,%dl 4: 74 0b je 0x11 6: f3 90 pause 8: 80 3b 00 cmpb $0x0,(%rbx) b: 74 f3 je 0x0 d: ff c8 dec %eax f: 75 f5 jne 0x6 11: 84 d2 test %dl,%dl 13: 75 15 jne 0x2a static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable) { struct xen_spinlock *xl = (struct xen_spinlock *)lock; unsigned timeout; u8 oldval; u64 start_spin; ADD_STATS(taken, 1); start_spin = spin_time_start(); do { u64 start_spin_fast = spin_time_start(); timeout = TIMEOUT; asm("1: xchgb %1,%0\n" " testb %1,%1\n" " jz 3f\n" "2: rep;nop\n" " cmpb $0,%0\n" " je 1b\n" " dec %2\n" " jnz 2b\n" "3:\n" : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) : "1" (1) : "memory"); spin_time_accum_spinning(start_spin_fast); } while (unlikely(oldval != 0 && (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable)))); spin_time_accum_total(start_spin); } static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags) { __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); } (gdb) disassemble xen_spin_lock_flags Dump of assembler code for function xen_spin_lock_flags: 0xffffffff81011da0 <xen_spin_lock_flags+0>: push %rbp 0xffffffff81011da1 <xen_spin_lock_flags+1>: mov %rsp,%rbp 0xffffffff81011da4 <xen_spin_lock_flags+4>: sub $0x20,%rsp 0xffffffff81011da8 <xen_spin_lock_flags+8>: mov %rbx,(%rsp) 0xffffffff81011dac <xen_spin_lock_flags+12>: mov %r12,0x8(%rsp) 0xffffffff81011db1 <xen_spin_lock_flags+17>: mov %r13,0x10(%rsp) 0xffffffff81011db6 <xen_spin_lock_flags+22>: mov %r14,ffff81011dda0x18(%rsp) 0xffffffff81011dbb <xen_spin_lock_flags+27>: mov %rsi,%r13 0xffffffff81011dbe <xen_spin_lock_flags+30>: mov %rdi,%rbx 0xffffffff81011dc1 <xen_spin_lock_flags+33>: and $0x200,%r13d 0xffffffff81011dc8 <xen_spin_lock_flags+40>: mov $0x1,%r12d 0xffffffff81011dce <xen_spin_lock_flags+46>: mov $0x400,%r14d 0xffffffff81011dd4 <xen_spin_lock_flags+52>: mov %r14d,%eax 0xffffffff81011dd7 <xen_spin_lock_flags+55>: mov %r12d,%edx 0xffffffff81011dda <xen_spin_lock_flags+58>: xchg %dl,(%rbx) <-- 0xffffffff81011ddc <xen_spin_lock_flags+60>: test %dl,%dl 0xffffffff81011dde <xen_spin_lock_flags+62>: je 0xffffffff81011deb <xen_spin_lock_flags+75> 0xffffffff81011de0 <xen_spin_lock_flags+64>: pause 0xffffffff81011de2 <xen_spin_lock_flags+66>: cmpb $0x0,(%rbx) 0xffffffff81011de5 <xen_spin_lock_flags+69>: je 0xffffffff81011dda <xen_spin_lock_flags+58> 0xffffffff81011de7 <xen_spin_lock_flags+71>: dec %eax 0xffffffff81011de9 <xen_spin_lock_flags+73>: jne 0xffffffff81011de0 <xen_spin_lock_flags+64> 0xffffffff81011deb <xen_spin_lock_flags+75>: test %dl,%dl 0xffffffff81011ded <xen_spin_lock_flags+77>: jne 0xffffffff81011e04 <xen_spin_lock_flags+100> 0xffffffff81011def <xen_spin_lock_flags+79>: mov (%rsp),%rbx 0xffffffff81011df3 <xen_spin_lock_flags+83>: mov 0x8(%rsp),%r12 0xffffffff81011df8 <xen_spin_lock_flags+88>: mov 0x10(%rsp),%r13 0xffffffff81011dfd <xen_spin_lock_flags+93>: mov 0x18(%rsp),%r14 0xffffffff81011e02 <xen_spin_lock_flags+98>: leaveq 0xffffffff81011e03 <xen_spin_lock_flags+99>: retq 0xffffffff81011e04 <xen_spin_lock_flags+100>: xor %esi,%esi 0xffffffff81011e06 <xen_spin_lock_flags+102>: mov %rbx,%rdi 0xffffffff81011e09 <xen_spin_lock_flags+105>: test %r13,%r13 0xffffffff81011e0c <xen_spin_lock_flags+108>: setne %sil 0xffffffff81011e10 <xen_spin_lock_flags+112>: callq 0xffffffff81011ca0 <xen_spin_lock_slow> 0xffffffff81011e15 <xen_spin_lock_flags+117>: test %eax,%eax 0xffffffff81011e17 <xen_spin_lock_flags+119>: jne 0xffffffff81011def <xen_spin_lock_flags+79> 0xffffffff81011e19 <xen_spin_lock_flags+121>: jmp 0xffffffff81011dd4 <xen_spin_lock_flags+52> End of assembler dump. We''re not so good at this, but it looks like xl->lock deref is what we hit? The lock was gone? -Chris
On Sun, 2013-02-10 at 22:03 +0000, Christopher S. Aker wrote:> And another this afternoon on a different machine: > > BUG: unable to handle kernel NULL pointer dereference at 00000000000008b8OK, so the guest is faulting at different offset now. It is very likely that there is OOM / race condition in other places. And judging from your two emails, I presume this bug can be reproduce steadily.> IP: [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 > PGD 0 > Oops: 0002 [#1] SMP > Modules linked in: ebt_comment ebt_arp ebt_set ebt_limit ebt_ip6 ebt_ip > ip_set_hash_net ip_set ebtable_nat xen_gntdev bonding ebtable_filter e1000e > CPU 5 > Pid: 1550, comm: netback/5 Not tainted 3.7.6-1-x86_64 #1 Supermicro > X8DT6/X8DT6 > RIP: e030:[<ffffffff81011dda>] [<ffffffff81011dda>] > xen_spin_lock_flags+0x3a/0x80 > RSP: e02b:ffff8800836e7b58 EFLAGS: 00010006 > RAX: 0000000000000400 RBX: 00000000000008b8 RCX: 000000000045de5d > RDX: 0000000000000001 RSI: 0000000000000211 RDI: 00000000000008b8 > RBP: ffff8800836e7b78 R08: 0000000000000068 R09: 0000000000000000 > R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000001 > R13: 0000000000000200 R14: 0000000000000400 R15: 000000000045de5d > FS: 00007f474a465700(0000) GS:ffff880100740000(0000) knlGS:0000000000000000 > CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b > CR2: 00000000000008b8 CR3: 0000000001c0b000 CR4: 0000000000002660 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > Process netback/5 (pid: 1550, threadinfo ffff8800836e6000, task > ffff880084510000) > Stack: > 0000000000000211 00000000000008b8 ffff8800771e5700 ffff8800771e57d8 > ffff8800836e7b98 ffffffff817605da 0000000000000000 00000000000008b8 > ffff8800836e7bd8 ffffffff8154446f ffff8800771e5000 0000000000000000 > Call Trace: > [<ffffffff817605da>] _raw_spin_lock_irqsave+0x2a/0x40 > [<ffffffff8154446f>] xen_netbk_schedule_xenvif+0x8f/0x100 > [<ffffffff81544505>] xen_netbk_check_rx_xenvif+0x25/0x60 > [<ffffffff815445eb>] netbk_tx_err+0x5b/0x70 > [<ffffffff8154518c>] xen_netbk_tx_build_gops+0xb8c/0xbc0 > [<ffffffff81012880>] ? __switch_to+0x160/0x4f0 > [<ffffffff810891b8>] ? idle_balance+0xf8/0x150 > [<ffffffff81080150>] ? finish_task_switch+0x60/0xd0 > [<ffffffff8175f7b4>] ? __schedule+0x394/0x750 > [<ffffffff815452af>] xen_netbk_kthread+0xef/0x9d0 > [<ffffffff81080150>] ? finish_task_switch+0x60/0xd0 > [<ffffffff810720c0>] ? wake_up_bit+0x40/0x40 > [<ffffffff815451c0>] ? xen_netbk_tx_build_gops+0xbc0/0xbc0 > [<ffffffff81071a06>] kthread+0xc6/0xd0 > [<ffffffff810037b9>] ? xen_end_context_switch+0x19/0x20 > [<ffffffff81071940>] ? kthread_freezable_should_stop+0x70/0x70 > [<ffffffff8176847c>] ret_from_fork+0x7c/0xb0 > [<ffffffff81071940>] ? kthread_freezable_should_stop+0x70/0x70[snip]> > We''re not so good at this, but it looks like xl->lock deref is what we > hit? The lock was gone? >A quick check on the xen_spinlock struct, its offset should not be 0x8b8. Reading the backtrace suggests that it is the netbk struct is gone. Do you manipulate the number of vcpus Dom0 has after it''s up? Wei.> -Chris > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
On 2/11/13 6:45 AM, Wei Liu wrote:> OK, so the guest is faulting at different offset now. It is very likely > that there is OOM / race condition in other places. And judging from > your two emails, I presume this bug can be reproduce steadily.It is frequent enough to be a problem, yes. We''re constantly deploying new servers and upgrading the stack on existing ones, so testing changes won''t be a problem.> A quick check on the xen_spinlock struct, its offset should not be > 0x8b8. Reading the backtrace suggests that it is the netbk struct is > gone.Great, we''ve narrowed it down some.> Do you manipulate the number of vcpus Dom0 has after it''s up?No, we do not. Thanks, -Chris
On Mon, 2013-02-11 at 16:17 +0000, Christopher S. Aker wrote:> On 2/11/13 6:45 AM, Wei Liu wrote: > > OK, so the guest is faulting at different offset now. It is very likely > > that there is OOM / race condition in other places. And judging from > > your two emails, I presume this bug can be reproduce steadily. > > It is frequent enough to be a problem, yes. We''re constantly deploying > new servers and upgrading the stack on existing ones, so testing changes > won''t be a problem. > > > A quick check on the xen_spinlock struct, its offset should not be > > 0x8b8. Reading the backtrace suggests that it is the netbk struct is > > gone. > > Great, we''ve narrowed it down some. > > > Do you manipulate the number of vcpus Dom0 has after it''s up? > > No, we do not. >Ha, strange. But a second thought come to me that even if you manipulate number of cpus of Dom0, it should not cause problem like this. Could you post your kernel config file. Wei.> Thanks, > -Chris >
On 2/11/13 11:57 AM, Wei Liu wrote:> Could you post your kernel config file.Config, syms and binaries are here: http://www.theshore.net/~caker/xen/BUGS/netback/ Thank you for looking into this. -Chris
On Mon, 2013-02-11 at 11:45 +0000, Wei Liu wrote:> On Sun, 2013-02-10 at 22:03 +0000, Christopher S. Aker wrote: > > And another this afternoon on a different machine: > > > > BUG: unable to handle kernel NULL pointer dereference at 00000000000008b8 > > OK, so the guest is faulting at different offset now. It is very likely > that there is OOM / race condition in other places. And judging from > your two emails, I presume this bug can be reproduce steadily. > > > IP: [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 > > PGD 0 > > Oops: 0002 [#1] SMP > > Modules linked in: ebt_comment ebt_arp ebt_set ebt_limit ebt_ip6 ebt_ip > > ip_set_hash_net ip_set ebtable_nat xen_gntdev bonding ebtable_filter e1000e > > CPU 5 > > Pid: 1550, comm: netback/5 Not tainted 3.7.6-1-x86_64 #1 Supermicro > > X8DT6/X8DT6 > > RIP: e030:[<ffffffff81011dda>] [<ffffffff81011dda>] > > xen_spin_lock_flags+0x3a/0x80 > > RSP: e02b:ffff8800836e7b58 EFLAGS: 00010006 > > RAX: 0000000000000400 RBX: 00000000000008b8 RCX: 000000000045de5d > > RDX: 0000000000000001 RSI: 0000000000000211 RDI: 00000000000008b8 > > RBP: ffff8800836e7b78 R08: 0000000000000068 R09: 0000000000000000 > > R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000001 > > R13: 0000000000000200 R14: 0000000000000400 R15: 000000000045de5d > > FS: 00007f474a465700(0000) GS:ffff880100740000(0000) knlGS:0000000000000000 > > CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b > > CR2: 00000000000008b8 CR3: 0000000001c0b000 CR4: 0000000000002660 > > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > > Process netback/5 (pid: 1550, threadinfo ffff8800836e6000, task > > ffff880084510000) > > Stack: > > 0000000000000211 00000000000008b8 ffff8800771e5700 ffff8800771e57d8 > > ffff8800836e7b98 ffffffff817605da 0000000000000000 00000000000008b8 > > ffff8800836e7bd8 ffffffff8154446f ffff8800771e5000 0000000000000000 > > Call Trace: > > [<ffffffff817605da>] _raw_spin_lock_irqsave+0x2a/0x40 > > [<ffffffff8154446f>] xen_netbk_schedule_xenvif+0x8f/0x100 > > [<ffffffff81544505>] xen_netbk_check_rx_xenvif+0x25/0x60 > > [<ffffffff815445eb>] netbk_tx_err+0x5b/0x70 > > [<ffffffff8154518c>] xen_netbk_tx_build_gops+0xb8c/0xbc0 > > [<ffffffff81012880>] ? __switch_to+0x160/0x4f0 > > [<ffffffff810891b8>] ? idle_balance+0xf8/0x150 > > [<ffffffff81080150>] ? finish_task_switch+0x60/0xd0 > > [<ffffffff8175f7b4>] ? __schedule+0x394/0x750 > > [<ffffffff815452af>] xen_netbk_kthread+0xef/0x9d0 > > [<ffffffff81080150>] ? finish_task_switch+0x60/0xd0 > > [<ffffffff810720c0>] ? wake_up_bit+0x40/0x40 > > [<ffffffff815451c0>] ? xen_netbk_tx_build_gops+0xbc0/0xbc0 > > [<ffffffff81071a06>] kthread+0xc6/0xd0 > > [<ffffffff810037b9>] ? xen_end_context_switch+0x19/0x20 > > [<ffffffff81071940>] ? kthread_freezable_should_stop+0x70/0x70 > > [<ffffffff8176847c>] ret_from_fork+0x7c/0xb0 > > [<ffffffff81071940>] ? kthread_freezable_should_stop+0x70/0x70 > [snip] > > > > We''re not so good at this, but it looks like xl->lock deref is what we > > hit? The lock was gone? > > > > A quick check on the xen_spinlock struct, its offset should not be > 0x8b8.This originally came from "&netbk->net_schedule_list_lock" in xen_netbk_schedule_xenvif so I guess most of the 0x8b8 came from the offset of net_schedule_list_lock.> Reading the backtrace suggests that it is the netbk struct is > gone.Yes. It would be interesting to add if (!netbk) netdev_err(vif->dev, "vif has no associated netbk!"); and also to add prints to xen_netbk_add_xenvif() and xen_netbk_remove_xenvif() to track to setup and teardown of the vif<->netbk relationships (these are infrequent, only when a vif is opened/closed, so it might be that dumping a stack trace is plausible/useful especially on the teardown). It would also be useful to confirm that the netbk selected in xen_netbk_add_xenvif is non-NULL and that its index relates sanely to xen_netbk_group_nr. There should be no way for a vif to get on the schedule list without being associated with a non-NULL netbk. Here the call chain is through xen_netbk_tx_build_gops -> netbk_tx_err -> xen_netbk_check_rx_xenvif. However the netback variable in xen_netbk_tx_build_gops has been used several times before we even get near any call to netbk_tx_err. I suppose adding a check if (vif->netbk != netbk) netdev_err(vif->dev, "has netbk %p should be %p!"); right after the !vif check at the top of the loop would also be interesting. Have you applied the XSA-39 fixes to this kernel? Every invocation of netbk_tx_err *should* have an associated error print, I think, at least after that change, if you are before it would be worth just checking. Either way you''ll need to turn on debugging (or s/pr_dbg/pr_err/ in netback.c). Knowing which call to tx_err occurred might yield a clue. Ian.
On Mon, 2013-02-11 at 18:44 +0000, Christopher S. Aker wrote:> On 2/11/13 11:57 AM, Wei Liu wrote: > > Could you post your kernel config file. > > Config, syms and binaries are here: > > http://www.theshore.net/~caker/xen/BUGS/netback/ > > Thank you for looking into this. >Judging from the statements I assume this is mostly a kernel issue. It would be better if you provide uncompressed vmlinux and symbols. Also please add some debug printk as Ian suggested. We cannot duplicate your setup entirely so it might be very hard to reproduce this problem. Wei.> -Chris >
On 2/12/13 4:58 AM, Ian Campbell wrote:> Have you applied the XSA-39 fixes to this kernel?Yes! When I rebuilt with Wei''s suggested patch for my original netback/xenwatch problem I also brought us up to date with XSA patches. We just hit the same (new) problem on another machine, and looking at the BUG with more kernel output context gives a giant clue: Feb 12 20:30:54: IPv6: ADDRCONF(NETDEV_UP): vif21.0: link is not ready Feb 12 20:30:54: device vif21.0 entered promiscuous mode Feb 12 20:30:56: xen-blkback:ring-ref 8, event-channel 31, protocol 2 (x86_32-abi) Feb 12 20:30:56: xen-blkback:ring-ref 9, event-channel 32, protocol 2 (x86_32-abi) Feb 12 20:30:56: IPv6: ADDRCONF(NETDEV_CHANGE): vif21.0: link becomes ready Feb 12 20:30:56: br0: port 5(vif21.0) entered forwarding state Feb 12 20:30:56: br0: port 5(vif21.0) entered forwarding state Feb 12 20:30:58: br0: port 5(vif21.0) entered forwarding state Feb 12 20:34:12: vif vif-21-0 vif21.0: Frag is bigger than frame. Feb 12 20:34:12: vif vif-21-0 vif21.0: fatal error; disabling device <-------------- Feb 12 20:34:12: BUG: unable to handle kernel NULL pointer dereference at 00000000000008b8 Feb 12 20:34:12: IP: [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 Feb 12 20:34:12: PGD 0 Feb 12 20:34:12: Oops: 0002 [#1] SMP Feb 12 20:34:12: Modules linked in: ebt_comment ebt_arp ebt_set ebt_limit ebt_ip6 ebt_ip ip_set_hash_net ip_set ebtable_nat xen_gntdev bonding ebtable_filter e1000e Feb 12 20:34:12: CPU 3 Feb 12 20:34:12: Pid: 1548, comm: netback/3 Not tainted 3.7.6-1-x86_64 #1 Supermicro X8DT6/X8DT6 Feb 12 20:34:12: RIP: e030:[<ffffffff81011dda>] [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 Feb 12 20:34:12: RSP: e02b:ffff880083681b58 EFLAGS: 00010006 Feb 12 20:34:12: RAX: 0000000000000400 RBX: 00000000000008b8 RCX: 0000000000000663 Feb 12 20:34:12: RDX: 0000000000000001 RSI: 0000000000000210 RDI: 00000000000008b8 Feb 12 20:34:12: RBP: ffff880083681b78 R08: 000000000000000d R09: 0000000000000000 Feb 12 20:34:12: R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000001 Feb 12 20:34:12: R13: 0000000000000200 R14: 0000000000000400 R15: 0000000000000663 Feb 12 20:34:12: FS: 00007f2bc1fb2700(0000) GS:ffff8801006c0000(0000) knlGS:0000000000000000 Feb 12 20:34:12: CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b Feb 12 20:34:12: CR2: 00000000000008b8 CR3: 0000000001c0b000 CR4: 0000000000002660 Feb 12 20:34:12: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 Feb 12 20:34:12: DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Feb 12 20:34:12: Process netback/3 (pid: 1548, threadinfo ffff880083680000, task ffff8800837ec9c0) Feb 12 20:34:12: Stack: Feb 12 20:34:12: 0000000000000210 00000000000008b8 ffff880003baa700 ffff880003baa7d8 Feb 12 20:34:12: ffff880083681b98 ffffffff817605da 0000000000000000 00000000000008b8 Feb 12 20:34:12: ffff880083681bd8 ffffffff8154446f ffff880003baa000 0000000000000000 Feb 12 20:34:12: Call Trace: Feb 12 20:34:12: [<ffffffff817605da>] _raw_spin_lock_irqsave+0x2a/0x40 Feb 12 20:34:12: [<ffffffff8154446f>] xen_netbk_schedule_xenvif+0x8f/0x100 Feb 12 20:34:12: [<ffffffff81544505>] xen_netbk_check_rx_xenvif+0x25/0x60 Feb 12 20:34:12: [<ffffffff815445eb>] netbk_tx_err+0x5b/0x70 Feb 12 20:34:12: [<ffffffff8154518c>] xen_netbk_tx_build_gops+0xb8c/0xbc0 Feb 12 20:34:12: [<ffffffff81012880>] ? __switch_to+0x160/0x4f0 Feb 12 20:34:12: [<ffffffff810891b8>] ? idle_balance+0xf8/0x150 Feb 12 20:34:12: [<ffffffff81080150>] ? finish_task_switch+0x60/0xd0 Feb 12 20:34:12: [<ffffffff8175f7b4>] ? __schedule+0x394/0x750 Feb 12 20:34:12: [<ffffffff815452af>] xen_netbk_kthread+0xef/0x9d0 Feb 12 20:34:12: [<ffffffff81080150>] ? finish_task_switch+0x60/0xd0 Feb 12 20:34:12: [<ffffffff810720c0>] ? wake_up_bit+0x40/0x40 Feb 12 20:34:12: [<ffffffff815451c0>] ? xen_netbk_tx_build_gops+0xbc0/0xbc0 Feb 12 20:34:12: [<ffffffff81071a06>] kthread+0xc6/0xd0 Feb 12 20:34:12: [<ffffffff810037b9>] ? xen_end_context_switch+0x19/0x20 Feb 12 20:34:12: [<ffffffff81071940>] ? kthread_freezable_should_stop+0x70/0x70 Feb 12 20:34:12: [<ffffffff8176847c>] ret_from_fork+0x7c/0xb0 Feb 12 20:34:12: [<ffffffff81071940>] ? kthread_freezable_should_stop+0x70/0x70 Feb 12 20:34:12: Code: 24 08 4c 89 6c 24 10 4c 89 74 24 18 49 89 f5 48 89 fb 41 81 e5 00 02 00 00 41 bc 01 00 00 00 41 be 00 04 00 00 44 89 f0 44 89 e2 <86> 13 84 d2 74 0b f3 90 80 3b 00 74 f3 ff c8 75 f5 84 d2 75 15 Feb 12 20:34:12: RIP [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 Feb 12 20:34:12: RSP <ffff880083681b58> Feb 12 20:34:12: CR2: 00000000000008b8 Feb 12 20:34:12: ---[ end trace ae243211c8c8cba5 ]--- https://lkml.org/lkml/2013/2/12/575 - "xen/netback: shut down the ring if it contains garbage" -Chris
On Wed, 2013-02-13 at 02:51 +0000, Christopher S. Aker wrote:> On 2/12/13 4:58 AM, Ian Campbell wrote: > > Have you applied the XSA-39 fixes to this kernel? > > Yes! When I rebuilt with Wei''s suggested patch for my original > netback/xenwatch problem I also brought us up to date with XSA patches. > > We just hit the same (new) problem on another machine, and looking at > the BUG with more kernel output context gives a giant clue: > > Feb 12 20:30:54: IPv6: ADDRCONF(NETDEV_UP): vif21.0: link is not ready > Feb 12 20:30:54: device vif21.0 entered promiscuous mode > Feb 12 20:30:56: xen-blkback:ring-ref 8, event-channel 31, protocol 2 > (x86_32-abi) > Feb 12 20:30:56: xen-blkback:ring-ref 9, event-channel 32, protocol 2 > (x86_32-abi) > Feb 12 20:30:56: IPv6: ADDRCONF(NETDEV_CHANGE): vif21.0: link becomes ready > Feb 12 20:30:56: br0: port 5(vif21.0) entered forwarding state > Feb 12 20:30:56: br0: port 5(vif21.0) entered forwarding state > Feb 12 20:30:58: br0: port 5(vif21.0) entered forwarding state > Feb 12 20:34:12: vif vif-21-0 vif21.0: Frag is bigger than frame. > Feb 12 20:34:12: vif vif-21-0 vif21.0: fatal error; disabling device > <-------------- > Feb 12 20:34:12: BUG: unable to handle kernel NULL pointer dereference > at 00000000000008b8Good to have more context. So the vif in question is disabled and unlinked from netbk but it gets rescheduled? Could you try something in xen_netbk_schedule_xenvif if (vif->netbk == NULL) netdev_err(vif->dev, "OOPS\n"); You should be able to get the device name. If it is the same name as the disabled device, we might get a bug somewhere alone the scheduling path. Wei.> Feb 12 20:34:12: IP: [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 > Feb 12 20:34:12: PGD 0 > Feb 12 20:34:12: Oops: 0002 [#1] SMP > Feb 12 20:34:12: Modules linked in: ebt_comment ebt_arp ebt_set > ebt_limit ebt_ip6 ebt_ip ip_set_hash_net ip_set ebtable_nat xen_gntdev > bonding ebtable_filter e1000e > Feb 12 20:34:12: CPU 3 > Feb 12 20:34:12: Pid: 1548, comm: netback/3 Not tainted 3.7.6-1-x86_64 > #1 Supermicro X8DT6/X8DT6 > Feb 12 20:34:12: RIP: e030:[<ffffffff81011dda>] [<ffffffff81011dda>] > xen_spin_lock_flags+0x3a/0x80 > Feb 12 20:34:12: RSP: e02b:ffff880083681b58 EFLAGS: 00010006 > Feb 12 20:34:12: RAX: 0000000000000400 RBX: 00000000000008b8 RCX: > 0000000000000663 > Feb 12 20:34:12: RDX: 0000000000000001 RSI: 0000000000000210 RDI: > 00000000000008b8 > Feb 12 20:34:12: RBP: ffff880083681b78 R08: 000000000000000d R09: > 0000000000000000 > Feb 12 20:34:12: R10: 0000000000000001 R11: 0000000000000001 R12: > 0000000000000001 > Feb 12 20:34:12: R13: 0000000000000200 R14: 0000000000000400 R15: > 0000000000000663 > Feb 12 20:34:12: FS: 00007f2bc1fb2700(0000) GS:ffff8801006c0000(0000) > knlGS:0000000000000000 > Feb 12 20:34:12: CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b > Feb 12 20:34:12: CR2: 00000000000008b8 CR3: 0000000001c0b000 CR4: > 0000000000002660 > Feb 12 20:34:12: DR0: 0000000000000000 DR1: 0000000000000000 DR2: > 0000000000000000 > Feb 12 20:34:12: DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: > 0000000000000400 > Feb 12 20:34:12: Process netback/3 (pid: 1548, threadinfo > ffff880083680000, task ffff8800837ec9c0) > Feb 12 20:34:12: Stack: > Feb 12 20:34:12: 0000000000000210 00000000000008b8 ffff880003baa700 > ffff880003baa7d8 > Feb 12 20:34:12: ffff880083681b98 ffffffff817605da 0000000000000000 > 00000000000008b8 > Feb 12 20:34:12: ffff880083681bd8 ffffffff8154446f ffff880003baa000 > 0000000000000000 > Feb 12 20:34:12: Call Trace: > Feb 12 20:34:12: [<ffffffff817605da>] _raw_spin_lock_irqsave+0x2a/0x40 > Feb 12 20:34:12: [<ffffffff8154446f>] xen_netbk_schedule_xenvif+0x8f/0x100 > Feb 12 20:34:12: [<ffffffff81544505>] xen_netbk_check_rx_xenvif+0x25/0x60 > Feb 12 20:34:12: [<ffffffff815445eb>] netbk_tx_err+0x5b/0x70 > Feb 12 20:34:12: [<ffffffff8154518c>] xen_netbk_tx_build_gops+0xb8c/0xbc0 > Feb 12 20:34:12: [<ffffffff81012880>] ? __switch_to+0x160/0x4f0 > Feb 12 20:34:12: [<ffffffff810891b8>] ? idle_balance+0xf8/0x150 > Feb 12 20:34:12: [<ffffffff81080150>] ? finish_task_switch+0x60/0xd0 > Feb 12 20:34:12: [<ffffffff8175f7b4>] ? __schedule+0x394/0x750 > Feb 12 20:34:12: [<ffffffff815452af>] xen_netbk_kthread+0xef/0x9d0 > Feb 12 20:34:12: [<ffffffff81080150>] ? finish_task_switch+0x60/0xd0 > Feb 12 20:34:12: [<ffffffff810720c0>] ? wake_up_bit+0x40/0x40 > Feb 12 20:34:12: [<ffffffff815451c0>] ? xen_netbk_tx_build_gops+0xbc0/0xbc0 > Feb 12 20:34:12: [<ffffffff81071a06>] kthread+0xc6/0xd0 > Feb 12 20:34:12: [<ffffffff810037b9>] ? xen_end_context_switch+0x19/0x20 > Feb 12 20:34:12: [<ffffffff81071940>] ? > kthread_freezable_should_stop+0x70/0x70 > Feb 12 20:34:12: [<ffffffff8176847c>] ret_from_fork+0x7c/0xb0 > Feb 12 20:34:12: [<ffffffff81071940>] ? > kthread_freezable_should_stop+0x70/0x70 > Feb 12 20:34:12: Code: 24 08 4c 89 6c 24 10 4c 89 74 24 18 49 89 f5 48 > 89 fb 41 81 e5 00 02 00 00 41 bc 01 00 00 00 41 be 00 04 00 00 44 89 f0 > 44 89 e2 <86> 13 84 d2 74 0b f3 90 80 3b 00 74 f3 ff c8 75 f5 84 d2 75 15 > Feb 12 20:34:12: RIP [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 > Feb 12 20:34:12: RSP <ffff880083681b58> > Feb 12 20:34:12: CR2: 00000000000008b8 > Feb 12 20:34:12: ---[ end trace ae243211c8c8cba5 ]--- > > https://lkml.org/lkml/2013/2/12/575 - "xen/netback: shut down the ring > if it contains garbage" > > -Chris > > >
>>> On 13.02.13 at 03:51, "Christopher S. Aker" <caker@theshore.net> wrote: > Feb 12 20:34:12: vif vif-21-0 vif21.0: Frag is bigger than frame. > Feb 12 20:34:12: vif vif-21-0 vif21.0: fatal error; disabling device > <-------------- > Feb 12 20:34:12: BUG: unable to handle kernel NULL pointer dereference at 00000000000008b8 >... > Feb 12 20:34:12: Call Trace: > Feb 12 20:34:12: [<ffffffff817605da>] _raw_spin_lock_irqsave+0x2a/0x40 > Feb 12 20:34:12: [<ffffffff8154446f>] xen_netbk_schedule_xenvif+0x8f/0x100 > Feb 12 20:34:12: [<ffffffff81544505>] xen_netbk_check_rx_xenvif+0x25/0x60 > Feb 12 20:34:12: [<ffffffff815445eb>] netbk_tx_err+0x5b/0x70 > Feb 12 20:34:12: [<ffffffff8154518c>] xen_netbk_tx_build_gops+0xb8c/0xbc0 > Feb 12 20:34:12: [<ffffffff81012880>] ? __switch_to+0x160/0x4f0 > Feb 12 20:34:12: [<ffffffff810891b8>] ? idle_balance+0xf8/0x150 > Feb 12 20:34:12: [<ffffffff81080150>] ? finish_task_switch+0x60/0xd0 > Feb 12 20:34:12: [<ffffffff8175f7b4>] ? __schedule+0x394/0x750 > Feb 12 20:34:12: [<ffffffff815452af>] xen_netbk_kthread+0xef/0x9d0 > Feb 12 20:34:12: [<ffffffff81080150>] ? finish_task_switch+0x60/0xd0 > Feb 12 20:34:12: [<ffffffff810720c0>] ? wake_up_bit+0x40/0x40 > Feb 12 20:34:12: [<ffffffff815451c0>] ? xen_netbk_tx_build_gops+0xbc0/0xbc0 > Feb 12 20:34:12: [<ffffffff81071a06>] kthread+0xc6/0xd0 > Feb 12 20:34:12: [<ffffffff810037b9>] ? xen_end_context_switch+0x19/0x20 > Feb 12 20:34:12: [<ffffffff81071940>] ? kthread_freezable_should_stop+0x70/0x70 > Feb 12 20:34:12: [<ffffffff8176847c>] ret_from_fork+0x7c/0xb0 > Feb 12 20:34:12: [<ffffffff81071940>] ? kthread_freezable_should_stop+0x70/0x70I think the root cause is the same as for the problem reported on the !classic" kernels - we should not blindly shut down everything on a fatal error. Instead I think we ought to set a flag on the xenvif and disassociate the two in a more controlled manner. On the pv-ops tree, that would likely be just at the bottom of the main loop in xen_netbk_kthread(), with the caveat that there needs to be a way to identify the busted xenvif(s). On the classic tree, this apparently could be done directly in net_tx_action() (and hence can be done in netbk_fatal_tx_err() in place of the call to xenvif_carrier_off()), but the scheduled piece of code would then need to sync with both tasklets. Of course there''s nothing preventing the pv-ops solution to be similar to this (allowing easier adding back of tasklet support, which - as I already told you elsewhere - appears to address throughput and/or CPU utilization problems people reported to us with the kthreads approach). Jan
On Wed, 2013-02-13 at 02:51 +0000, Christopher S. Aker wrote:> On 2/12/13 4:58 AM, Ian Campbell wrote: > > Have you applied the XSA-39 fixes to this kernel? > > Yes! When I rebuilt with Wei''s suggested patch for my original > netback/xenwatch problem I also brought us up to date with XSA patches. > > We just hit the same (new) problem on another machine, and looking at > the BUG with more kernel output context gives a giant clue: > > Feb 12 20:30:54: IPv6: ADDRCONF(NETDEV_UP): vif21.0: link is not ready > Feb 12 20:30:54: device vif21.0 entered promiscuous mode > Feb 12 20:30:56: xen-blkback:ring-ref 8, event-channel 31, protocol 2 > (x86_32-abi) > Feb 12 20:30:56: xen-blkback:ring-ref 9, event-channel 32, protocol 2 > (x86_32-abi) > Feb 12 20:30:56: IPv6: ADDRCONF(NETDEV_CHANGE): vif21.0: link becomes ready > Feb 12 20:30:56: br0: port 5(vif21.0) entered forwarding state > Feb 12 20:30:56: br0: port 5(vif21.0) entered forwarding state > Feb 12 20:30:58: br0: port 5(vif21.0) entered forwarding state > Feb 12 20:34:12: vif vif-21-0 vif21.0: Frag is bigger than frame. > Feb 12 20:34:12: vif vif-21-0 vif21.0: fatal error; disabling device > <-------------- > Feb 12 20:34:12: BUG: unable to handle kernel NULL pointer dereference > at 00000000000008b8 > Feb 12 20:34:12: IP: [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 > Feb 12 20:34:12: PGD 0 > Feb 12 20:34:12: Oops: 0002 [#1] SMP > Feb 12 20:34:12: Modules linked in: ebt_comment ebt_arp ebt_set > ebt_limit ebt_ip6 ebt_ip ip_set_hash_net ip_set ebtable_nat xen_gntdev > bonding ebtable_filter e1000e > Feb 12 20:34:12: CPU 3 > Feb 12 20:34:12: Pid: 1548, comm: netback/3 Not tainted 3.7.6-1-x86_64 > #1 Supermicro X8DT6/X8DT6 > Feb 12 20:34:12: RIP: e030:[<ffffffff81011dda>] [<ffffffff81011dda>] > xen_spin_lock_flags+0x3a/0x80 > Feb 12 20:34:12: RSP: e02b:ffff880083681b58 EFLAGS: 00010006 > Feb 12 20:34:12: RAX: 0000000000000400 RBX: 00000000000008b8 RCX: > 0000000000000663 > Feb 12 20:34:12: RDX: 0000000000000001 RSI: 0000000000000210 RDI: > 00000000000008b8 > Feb 12 20:34:12: RBP: ffff880083681b78 R08: 000000000000000d R09: > 0000000000000000 > Feb 12 20:34:12: R10: 0000000000000001 R11: 0000000000000001 R12: > 0000000000000001 > Feb 12 20:34:12: R13: 0000000000000200 R14: 0000000000000400 R15: > 0000000000000663 > Feb 12 20:34:12: FS: 00007f2bc1fb2700(0000) GS:ffff8801006c0000(0000) > knlGS:0000000000000000 > Feb 12 20:34:12: CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b > Feb 12 20:34:12: CR2: 00000000000008b8 CR3: 0000000001c0b000 CR4: > 0000000000002660 > Feb 12 20:34:12: DR0: 0000000000000000 DR1: 0000000000000000 DR2: > 0000000000000000 > Feb 12 20:34:12: DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: > 0000000000000400 > Feb 12 20:34:12: Process netback/3 (pid: 1548, threadinfo > ffff880083680000, task ffff8800837ec9c0) > Feb 12 20:34:12: Stack: > Feb 12 20:34:12: 0000000000000210 00000000000008b8 ffff880003baa700 > ffff880003baa7d8 > Feb 12 20:34:12: ffff880083681b98 ffffffff817605da 0000000000000000 > 00000000000008b8 > Feb 12 20:34:12: ffff880083681bd8 ffffffff8154446f ffff880003baa000 > 0000000000000000Fancy trying this *UNTESTED* patch a bit? I look through the code and there seems to be an error. Jan''s advice is worth consideration, but I think we should fix this XSA bug first. Wei. -----8<---- commit 6a06b51edd7124193322fd62244171eb099aff52 Author: Wei Liu <wei.liu2@citrix.com> Date: Wed Feb 13 18:17:01 2013 +0000 netback: fix netbk_count_requests Signed-off-by: Wei Liu <wei.liu2@citrix.com> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 103294d..61aaeb0 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -927,7 +927,11 @@ static int netbk_count_requests(struct xenvif *vif, if (txp->size > first->size) { netdev_err(vif->dev, "Frag is bigger than frame.\n"); netbk_fatal_tx_err(vif); - return -frags; + /* frag could be zero at this point if it + * fails during first iteration, so we need to + * set it to negative number if there is + * error */ + return frags ? -frags : -1; } first->size -= txp->size;
A slightly upgraded version of the *UNTESTED* patch. Wei. ----8<---- commit df4c929d034cec7043fbd96ba89833eb639c336e Author: Wei Liu <wei.liu2@citrix.com> Date: Wed Feb 13 18:17:01 2013 +0000 netback: fix netbk_count_requests There are two paths in the original code, a) test against work_to_do, b) test against first->size, could return 0 even when error happens. Simply return -1 in error paths should work. Modify all error paths to return -1 to be consistent. Signed-off-by: Wei Liu <wei.liu2@citrix.com> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 103294d..0e0162e 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -913,13 +913,13 @@ static int netbk_count_requests(struct xenvif *vif, if (frags >= work_to_do) { netdev_err(vif->dev, "Need more frags\n"); netbk_fatal_tx_err(vif); - return -frags; + return -1; } if (unlikely(frags >= MAX_SKB_FRAGS)) { netdev_err(vif->dev, "Too many frags\n"); netbk_fatal_tx_err(vif); - return -frags; + return -1; } memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags), @@ -927,7 +927,7 @@ static int netbk_count_requests(struct xenvif *vif, if (txp->size > first->size) { netdev_err(vif->dev, "Frag is bigger than frame.\n"); netbk_fatal_tx_err(vif); - return -frags; + return -1; } first->size -= txp->size; @@ -937,7 +937,7 @@ static int netbk_count_requests(struct xenvif *vif, netdev_err(vif->dev, "txp->offset: %x, size: %u\n", txp->offset, txp->size); netbk_fatal_tx_err(vif); - return -frags; + return -1; } } while ((txp++)->flags & XEN_NETTXF_more_data); return frags;
On 13/02/13 18:37, Wei Liu wrote:> A slightly upgraded version of the *UNTESTED* patch. > > > Wei. > > ----8<---- > commit df4c929d034cec7043fbd96ba89833eb639c336e > Author: Wei Liu <wei.liu2@citrix.com> > Date: Wed Feb 13 18:17:01 2013 +0000 > > netback: fix netbk_count_requests > > There are two paths in the original code, a) test against work_to_do, b) test > against first->size, could return 0 even when error happens. > > Simply return -1 in error paths should work. Modify all error paths to return > -1 to be consistent.You also need to remove the netbk_tx_err() after checking the result of netbk_count_requests(). Otherwise you will have a double xenvif_put(), which will screw up ref counting. I would also suggest returning -EINVAL from netbk_count_requests(). It not clear to me how this will fix the original oops though. David> > Signed-off-by: Wei Liu <wei.liu2@citrix.com> > > diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c > index 103294d..0e0162e 100644 > --- a/drivers/net/xen-netback/netback.c > +++ b/drivers/net/xen-netback/netback.c > @@ -913,13 +913,13 @@ static int netbk_count_requests(struct xenvif *vif, > if (frags >= work_to_do) { > netdev_err(vif->dev, "Need more frags\n"); > netbk_fatal_tx_err(vif); > - return -frags; > + return -1; > } > > if (unlikely(frags >= MAX_SKB_FRAGS)) { > netdev_err(vif->dev, "Too many frags\n"); > netbk_fatal_tx_err(vif); > - return -frags; > + return -1; > } > > memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags), > @@ -927,7 +927,7 @@ static int netbk_count_requests(struct xenvif *vif, > if (txp->size > first->size) { > netdev_err(vif->dev, "Frag is bigger than frame.\n"); > netbk_fatal_tx_err(vif); > - return -frags; > + return -1; > } > > first->size -= txp->size; > @@ -937,7 +937,7 @@ static int netbk_count_requests(struct xenvif *vif, > netdev_err(vif->dev, "txp->offset: %x, size: %u\n", > txp->offset, txp->size); > netbk_fatal_tx_err(vif); > - return -frags; > + return -1; > } > } while ((txp++)->flags & XEN_NETTXF_more_data); > return frags; > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
On Wed, 2013-02-13 at 19:20 +0000, David Vrabel wrote:> On 13/02/13 18:37, Wei Liu wrote: > > A slightly upgraded version of the *UNTESTED* patch. > > > > > > Wei. > > > > ----8<---- > > commit df4c929d034cec7043fbd96ba89833eb639c336e > > Author: Wei Liu <wei.liu2@citrix.com> > > Date: Wed Feb 13 18:17:01 2013 +0000 > > > > netback: fix netbk_count_requests > > > > There are two paths in the original code, a) test against work_to_do, b) test > > against first->size, could return 0 even when error happens. > > > > Simply return -1 in error paths should work. Modify all error paths to return > > -1 to be consistent. > > You also need to remove the netbk_tx_err() after checking the result of > netbk_count_requests(). Otherwise you will have a double xenvif_put(), > which will screw up ref counting. >Yes I saw that as well. I was suspecting it was done on purpose. I didn''t write this patch anyway. I thought that Ian at least smoke-tested it with creation / teardown vif so I just left it like that.> I would also suggest returning -EINVAL from netbk_count_requests(). > It not clear to me how this will fix the original oops though. >My analysis: netbk_count_requests returns 0 when an error occurs in the first iteration (frag = 0, -frag = 0), the caller gets 0 and doesn''t notice this vif has been disconnected. The subsequent call comparison txreq.size < ETH_HLEN is true for some reason - frontend messes up the txreq (this could also be the same reason that netbk_count_requests fails in first iteration), and a subsequent call to netbk_tx_err triggers the bug. Wei.> David > > > > > Signed-off-by: Wei Liu <wei.liu2@citrix.com> > > > > diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c > > index 103294d..0e0162e 100644 > > --- a/drivers/net/xen-netback/netback.c > > +++ b/drivers/net/xen-netback/netback.c > > @@ -913,13 +913,13 @@ static int netbk_count_requests(struct xenvif *vif, > > if (frags >= work_to_do) { > > netdev_err(vif->dev, "Need more frags\n"); > > netbk_fatal_tx_err(vif); > > - return -frags; > > + return -1; > > } > > > > if (unlikely(frags >= MAX_SKB_FRAGS)) { > > netdev_err(vif->dev, "Too many frags\n"); > > netbk_fatal_tx_err(vif); > > - return -frags; > > + return -1; > > } > > > > memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags), > > @@ -927,7 +927,7 @@ static int netbk_count_requests(struct xenvif *vif, > > if (txp->size > first->size) { > > netdev_err(vif->dev, "Frag is bigger than frame.\n"); > > netbk_fatal_tx_err(vif); > > - return -frags; > > + return -1; > > } > > > > first->size -= txp->size; > > @@ -937,7 +937,7 @@ static int netbk_count_requests(struct xenvif *vif, > > netdev_err(vif->dev, "txp->offset: %x, size: %u\n", > > txp->offset, txp->size); > > netbk_fatal_tx_err(vif); > > - return -frags; > > + return -1; > > } > > } while ((txp++)->flags & XEN_NETTXF_more_data); > > return frags; > > > > > > > > _______________________________________________ > > Xen-devel mailing list > > Xen-devel@lists.xen.org > > http://lists.xen.org/xen-devel >
On 2/13/13 7:47 AM, Wei Liu wrote:> On Wed, 2013-02-13 at 02:51 +0000, Christopher S. Aker wrote: >> On 2/12/13 4:58 AM, Ian Campbell wrote: >>> Have you applied the XSA-39 fixes to this kernel? >> >> Yes! When I rebuilt with Wei''s suggested patch for my original >> netback/xenwatch problem I also brought us up to date with XSA patches. > Good to have more context.We have found a way to reproduce a very similar BUG by keeping a guest''s network IO busy and then from the host "ifconfig down" the vif. It results in the following dump. This only works if the guest is doing network I/O. We can reproduce regardless if dom0 is patched with XSA-39, and is trigger-able at least as far back as 3.2.6 dom0. Procedure: Launch a guest and configure iperf [in TCP mode] between it and another box on the network then bring down its vif on the host. root@dom0:~# ifconfig vif14.0 down <-- insta-boom br0: port 3(vif14.0) entered disabled state unable to handle kernel NULL pointer dereference at 00000000000008b8 IP: [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 PGD 0 Oops: 0002 [#1] SMP Modules linked in: ebt_comment ebt_arp ebt_set ebt_limit ebt_ip6 ebt_ip ip_set_hash_net ip_set xt_physdev iptable_filter ip_tables ebtable_nat xen_gntdev bonding ebtable_filter igb CPU 1 Pid: 0, comm: swapper/1 Not tainted 3.7.6-1-x86_64 #1 Supermicro X9SRE/X9SRE-3F/X9SRi/X9SRi-3F/X9SRE/X9SRE-3F/X9SRi/X9SRi-3F RIP: e030:[<ffffffff81011dda>] [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 RSP: e02b:ffff880141843d60 EFLAGS: 00010006 RAX: 0000000000000400 RBX: 00000000000008b8 RCX: 0000000000012739 RDX: 0000000000000001 RSI: 0000000000000222 RDI: 00000000000008b8 RBP: ffff880141843d80 R08: 0000000000000144 R09: 0000000000000003 R10: 0000000000000003 R11: dead000000200200 R12: 0000000000000001 R13: 0000000000000200 R14: 0000000000000400 R15: ffff8800216ba700 FS: 00007f03fa88a700(0000) GS:ffff880141840000(0000) knlGS:0000000000000000 CS: e033 DS: 002b ES: 002b CR0: 000000008005003b CR2: 00000000000008b8 CR3: 0000000001c0b000 CR4: 0000000000002660 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper/1 (pid: 0, threadinfo ffff880101138000, task ffff8801011049c0) Stack: 0000000000000222 00000000000008b8 ffff8800216ba700 ffff8800216ba7d8 ffff880141843da0 ffffffff817605da 0000000000000000 00000000000008b8 ffff880141843de0 ffffffff8154446f ffff88014184e5b8 ffff88014184e578 Call Trace: <IRQ> [<ffffffff817605da>] _raw_spin_lock_irqsave+0x2a/0x40 [<ffffffff8154446f>] xen_netbk_schedule_xenvif+0x8f/0x100 [<ffffffff81544540>] ? xen_netbk_check_rx_xenvif+0x60/0x60 [<ffffffff81544505>] xen_netbk_check_rx_xenvif+0x25/0x60 [<ffffffff81544589>] tx_credit_callback+0x49/0x50 [<ffffffff8105ee04>] call_timer_fn+0x44/0x120 [<ffffffff8105f411>] run_timer_softirq+0x241/0x2b0 [<ffffffff81544540>] ? xen_netbk_check_rx_xenvif+0x60/0x60 [<ffffffff8105731f>] __do_softirq+0xcf/0x250 [<ffffffff810c1253>] ? handle_percpu_irq+0x43/0x60 [<ffffffff8176971c>] call_softirq+0x1c/0x30 [<ffffffff81015425>] do_softirq+0x65/0xa0 [<ffffffff8105710d>] irq_exit+0xbd/0xe0 [<ffffffff8141a73f>] xen_evtchn_do_upcall+0x2f/0x40 [<ffffffff8176977e>] xen_do_hypervisor_callback+0x1e/0x30 <EOI> [<ffffffff810013aa>] ? xen_hypercall_sched_op+0xa/0x20 [<ffffffff810013aa>] ? xen_hypercall_sched_op+0xa/0x20 [<ffffffff81009ae0>] ? xen_safe_halt+0x10/0x20 [<ffffffff8101c168>] ? default_idle+0x58/0x1b0 [<ffffffff8101b8a8>] ? cpu_idle+0x88/0xd0 [<ffffffff817541de>] ? cpu_bringup_and_idle+0xe/0x10 Code: 24 08 4c 89 6c 24 10 4c 89 74 24 18 49 89 f5 48 89 fb 41 81 e5 00 02 00 00 41 bc 01 00 00 00 41 be 00 04 00 00 44 89 f0 44 89 e2 <86> 13 84 d2 74 0b f3 90 80 3b 00 74 f3 ff c8 75 f5 84 d2 75 15 RIP [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 RSP <ffff880141843d60> CR2: 00000000000008b8 ---[ end trace 337eb85a44e2f0ef ]--- Kernel panic - not syncing: Fatal exception in interrupt (XEN) Domain 0 crashed: rebooting machine in 5 seconds. (XEN) Resetting with ACPI MEMORY or I/O RESET_REG. -Chris
On Wed, Feb 13, 2013 at 07:20:32PM +0000, David Vrabel wrote:> On 13/02/13 18:37, Wei Liu wrote: > > A slightly upgraded version of the *UNTESTED* patch. > > > > > > Wei. > > > > ----8<---- > > commit df4c929d034cec7043fbd96ba89833eb639c336e > > Author: Wei Liu <wei.liu2@citrix.com> > > Date: Wed Feb 13 18:17:01 2013 +0000 > > > > netback: fix netbk_count_requests > > > > There are two paths in the original code, a) test against work_to_do, b) test > > against first->size, could return 0 even when error happens. > > > > Simply return -1 in error paths should work. Modify all error paths to return > > -1 to be consistent. > > You also need to remove the netbk_tx_err() after checking the result of > netbk_count_requests(). Otherwise you will have a double xenvif_put(), > which will screw up ref counting.I just realized that we were talking about different code path when I walked home. The path you mentioned is correct. As excution flow should never reach there if there is error in netbk_count_requests. The path I''m not sure is that in the netbk_fatal_tx_err, it calls xenvif_carrier_off which calls xenvif_put, and then it calls xenvif_put, which is likely to mess up the refcount. Wei.> > I would also suggest returning -EINVAL from netbk_count_requests(). > > It not clear to me how this will fix the original oops though. > > David > > > > > Signed-off-by: Wei Liu <wei.liu2@citrix.com> > > > > diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c > > index 103294d..0e0162e 100644 > > --- a/drivers/net/xen-netback/netback.c > > +++ b/drivers/net/xen-netback/netback.c > > @@ -913,13 +913,13 @@ static int netbk_count_requests(struct xenvif *vif, > > if (frags >= work_to_do) { > > netdev_err(vif->dev, "Need more frags\n"); > > netbk_fatal_tx_err(vif); > > - return -frags; > > + return -1; > > } > > > > if (unlikely(frags >= MAX_SKB_FRAGS)) { > > netdev_err(vif->dev, "Too many frags\n"); > > netbk_fatal_tx_err(vif); > > - return -frags; > > + return -1; > > } > > > > memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags), > > @@ -927,7 +927,7 @@ static int netbk_count_requests(struct xenvif *vif, > > if (txp->size > first->size) { > > netdev_err(vif->dev, "Frag is bigger than frame.\n"); > > netbk_fatal_tx_err(vif); > > - return -frags; > > + return -1; > > } > > > > first->size -= txp->size; > > @@ -937,7 +937,7 @@ static int netbk_count_requests(struct xenvif *vif, > > netdev_err(vif->dev, "txp->offset: %x, size: %u\n", > > txp->offset, txp->size); > > netbk_fatal_tx_err(vif); > > - return -frags; > > + return -1; > > } > > } while ((txp++)->flags & XEN_NETTXF_more_data); > > return frags; > > > > > > > > _______________________________________________ > > Xen-devel mailing list > > Xen-devel@lists.xen.org > > http://lists.xen.org/xen-devel >
On Wed, Feb 13, 2013 at 08:12:44PM +0000, Christopher S. Aker wrote:> On 2/13/13 7:47 AM, Wei Liu wrote: > > On Wed, 2013-02-13 at 02:51 +0000, Christopher S. Aker wrote: > >> On 2/12/13 4:58 AM, Ian Campbell wrote: > >>> Have you applied the XSA-39 fixes to this kernel? > >> > >> Yes! When I rebuilt with Wei''s suggested patch for my original > >> netback/xenwatch problem I also brought us up to date with XSA patches. > > Good to have more context. > > We have found a way to reproduce a very similar BUG by keeping a guest''s > network IO busy and then from the host "ifconfig down" the vif. It > results in the following dump. This only works if the guest is doing > network I/O. > > We can reproduce regardless if dom0 is patched with XSA-39, and is > trigger-able at least as far back as 3.2.6 dom0. > > Procedure: > > Launch a guest and configure iperf [in TCP mode] between it and another > box on the network then bring down its vif on the host. > > root@dom0:~# ifconfig vif14.0 down <-- insta-boom > br0: port 3(vif14.0) entered disabled state > unable to handle kernel NULL pointer dereference at 00000000000008b8 > IP: [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80 > PGD 0 > Oops: 0002 [#1] SMP > Modules linked in: ebt_comment ebt_arp ebt_set ebt_limit ebt_ip6 ebt_ip > ip_set_hash_net ip_set xt_physdev iptable_filter ip_tables ebtable_nat > xen_gntdev bonding ebtable_filter igb > CPU 1 > Pid: 0, comm: swapper/1 Not tainted 3.7.6-1-x86_64 #1 Supermicro > X9SRE/X9SRE-3F/X9SRi/X9SRi-3F/X9SRE/X9SRE-3F/X9SRi/X9SRi-3F > RIP: e030:[<ffffffff81011dda>] [<ffffffff81011dda>] > xen_spin_lock_flags+0x3a/0x80 > RSP: e02b:ffff880141843d60 EFLAGS: 00010006 > RAX: 0000000000000400 RBX: 00000000000008b8 RCX: 0000000000012739 > RDX: 0000000000000001 RSI: 0000000000000222 RDI: 00000000000008b8 > RBP: ffff880141843d80 R08: 0000000000000144 R09: 0000000000000003 > R10: 0000000000000003 R11: dead000000200200 R12: 0000000000000001 > R13: 0000000000000200 R14: 0000000000000400 R15: ffff8800216ba700 > FS: 00007f03fa88a700(0000) GS:ffff880141840000(0000) > knlGS:0000000000000000 > CS: e033 DS: 002b ES: 002b CR0: 000000008005003b > CR2: 00000000000008b8 CR3: 0000000001c0b000 CR4: 0000000000002660 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > Process swapper/1 (pid: 0, threadinfo ffff880101138000, task > ffff8801011049c0) > Stack: > 0000000000000222 00000000000008b8 ffff8800216ba700 ffff8800216ba7d8 > ffff880141843da0 ffffffff817605da 0000000000000000 00000000000008b8 > ffff880141843de0 ffffffff8154446f ffff88014184e5b8 ffff88014184e578 > Call Trace: > <IRQ> > [<ffffffff817605da>] _raw_spin_lock_irqsave+0x2a/0x40 > [<ffffffff8154446f>] xen_netbk_schedule_xenvif+0x8f/0x100 > [<ffffffff81544540>] ? xen_netbk_check_rx_xenvif+0x60/0x60 > [<ffffffff81544505>] xen_netbk_check_rx_xenvif+0x25/0x60 > [<ffffffff81544589>] tx_credit_callback+0x49/0x50 > [<ffffffff8105ee04>] call_timer_fn+0x44/0x120 > [<ffffffff8105f411>] run_timer_softirq+0x241/0x2b0 > [<ffffffff81544540>] ? xen_netbk_check_rx_xenvif+0x60/0x60 > [<ffffffff8105731f>] __do_softirq+0xcf/0x250 > [<ffffffff810c1253>] ? handle_percpu_irq+0x43/0x60 > [<ffffffff8176971c>] call_softirq+0x1c/0x30 > [<ffffffff81015425>] do_softirq+0x65/0xa0 > [<ffffffff8105710d>] irq_exit+0xbd/0xe0 > [<ffffffff8141a73f>] xen_evtchn_do_upcall+0x2f/0x40 > [<ffffffff8176977e>] xen_do_hypervisor_callback+0x1e/0x30Notice the tracelog is different here, this looks like a vallina bug to me. It is the timer callback that triggers the oops. This one should be simple to fix - we should also shut down the timer when shutting down vif. Will get to this tomorrow. Need to have rest now. :-) Wei.
>>> On 13.02.13 at 21:17, Wei Liu <wei.liu2@citrix.com> wrote: > On Wed, Feb 13, 2013 at 07:20:32PM +0000, David Vrabel wrote: >> On 13/02/13 18:37, Wei Liu wrote: >> > A slightly upgraded version of the *UNTESTED* patch. >> > >> > >> > Wei. >> > >> > ----8<---- >> > commit df4c929d034cec7043fbd96ba89833eb639c336e >> > Author: Wei Liu <wei.liu2@citrix.com> >> > Date: Wed Feb 13 18:17:01 2013 +0000 >> > >> > netback: fix netbk_count_requests >> > >> > There are two paths in the original code, a) test against work_to_do, > b) test >> > against first->size, could return 0 even when error happens. >> > >> > Simply return -1 in error paths should work. Modify all error paths to > return >> > -1 to be consistent. >> >> You also need to remove the netbk_tx_err() after checking the result of >> netbk_count_requests(). Otherwise you will have a double xenvif_put(), >> which will screw up ref counting. > > I just realized that we were talking about different code path when I > walked home. > > The path you mentioned is correct. As excution flow should never reach > there if there is error in netbk_count_requests. > > The path I''m not sure is that in the netbk_fatal_tx_err, it calls > xenvif_carrier_off which calls xenvif_put, and then it calls xenvif_put, > which is likely to mess up the refcount.I first thought so too when looking at this again yesterday, but then convinced myself that this double put _here_ is correct. And Ian''s patch specifically removed to call to netbk_tx_err() after the caller netbk_count_requests() recognized the error, knowing that the latter function now itself issues a call to netbk_fatal_tx_err() (whether that wouldn''t better have replaced the caller''s call to netbk_tx_err() is a different question). What I do support is that fact that the third error path in netbk_count_requests() has the potential of returning zero instead of a negative value. As the caller doesn''t really do anything with the specific negative value (it only did so before Ian''s patch), returning -1 or -EINVAL on all four error paths would seem the right change to me. Jan
>>> On 13.02.13 at 19:37, Wei Liu <wei.liu2@citrix.com> wrote: > A slightly upgraded version of the *UNTESTED* patch.Ack (albeit I''d slightly prefer -EINVAL or, for eventual debugging purposes, four distinct -Exxx values to be returned). Jan> ----8<---- > commit df4c929d034cec7043fbd96ba89833eb639c336e > Author: Wei Liu <wei.liu2@citrix.com> > Date: Wed Feb 13 18:17:01 2013 +0000 > > netback: fix netbk_count_requests > > There are two paths in the original code, a) test against work_to_do, b) > test > against first->size, could return 0 even when error happens. > > Simply return -1 in error paths should work. Modify all error paths to > return > -1 to be consistent. > > Signed-off-by: Wei Liu <wei.liu2@citrix.com> > > diff --git a/drivers/net/xen-netback/netback.c > b/drivers/net/xen-netback/netback.c > index 103294d..0e0162e 100644 > --- a/drivers/net/xen-netback/netback.c > +++ b/drivers/net/xen-netback/netback.c > @@ -913,13 +913,13 @@ static int netbk_count_requests(struct xenvif *vif, > if (frags >= work_to_do) { > netdev_err(vif->dev, "Need more frags\n"); > netbk_fatal_tx_err(vif); > - return -frags; > + return -1; > } > > if (unlikely(frags >= MAX_SKB_FRAGS)) { > netdev_err(vif->dev, "Too many frags\n"); > netbk_fatal_tx_err(vif); > - return -frags; > + return -1; > } > > memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags), > @@ -927,7 +927,7 @@ static int netbk_count_requests(struct xenvif *vif, > if (txp->size > first->size) { > netdev_err(vif->dev, "Frag is bigger than frame.\n"); > netbk_fatal_tx_err(vif); > - return -frags; > + return -1; > } > > first->size -= txp->size; > @@ -937,7 +937,7 @@ static int netbk_count_requests(struct xenvif *vif, > netdev_err(vif->dev, "txp->offset: %x, size: %u\n", > txp->offset, txp->size); > netbk_fatal_tx_err(vif); > - return -frags; > + return -1; > } > } while ((txp++)->flags & XEN_NETTXF_more_data); > return frags; > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
>>> On 14.02.13 at 10:14, "Jan Beulich" <JBeulich@suse.com> wrote: >>>> On 13.02.13 at 19:37, Wei Liu <wei.liu2@citrix.com> wrote: >> A slightly upgraded version of the *UNTESTED* patch. > > Ack (albeit I''d slightly prefer -EINVAL or, for eventual debugging > purposes, four distinct -Exxx values to be returned).Like this (for the 2.6.18-xen tree). Jan netback: fix netbk_count_requests() There are two paths in the original code, a) test against work_to_do, b) test against first->size, could return 0 even when error happens. Simply return -1 in error paths should work. Modify all error paths to return -1 to be consistent. Signed-off-by: Wei Liu <wei.liu2@citrix.com> Use distinct -E... values instead. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/drivers/xen/netback/netback.c +++ b/drivers/xen/netback/netback.c @@ -1042,14 +1042,14 @@ static int netbk_count_requests(netif_t printk(KERN_ERR "%s: Need more frags\n", netif->dev->name); netbk_fatal_tx_err(netif); - return -frags; + return -ENODATA; } if (unlikely(frags >= MAX_SKB_FRAGS)) { printk(KERN_ERR "%s: Too many frags\n", netif->dev->name); netbk_fatal_tx_err(netif); - return -frags; + return -E2BIG; } memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags), @@ -1058,7 +1058,7 @@ static int netbk_count_requests(netif_t printk(KERN_ERR "%s: Frag is bigger than frame.\n", netif->dev->name); netbk_fatal_tx_err(netif); - return -frags; + return -EIO; } first->size -= txp->size; @@ -1068,7 +1068,7 @@ static int netbk_count_requests(netif_t printk(KERN_ERR "%s: txp->offset: %x, size: %u\n", netif->dev->name, txp->offset, txp->size); netbk_fatal_tx_err(netif); - return -frags; + return -EINVAL; } } while ((txp++)->flags & NETTXF_more_data); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
On Thu, 2013-02-14 at 09:14 +0000, Jan Beulich wrote:> >>> On 13.02.13 at 19:37, Wei Liu <wei.liu2@citrix.com> wrote: > > A slightly upgraded version of the *UNTESTED* patch. > > Ack (albeit I''d slightly prefer -EINVAL or, for eventual debugging > purposes, four distinct -Exxx values to be returned).Yes, please, 4 values would be good. I''m a little bit curious what the guest, which is generating these invalid packets. I guess previously they were being silently dropped?> > Jan > > > ----8<---- > > commit df4c929d034cec7043fbd96ba89833eb639c336e > > Author: Wei Liu <wei.liu2@citrix.com> > > Date: Wed Feb 13 18:17:01 2013 +0000 > > > > netback: fix netbk_count_requests > > > > There are two paths in the original code, a) test against work_to_do, b) > > test > > against first->size, could return 0 even when error happens. > > > > Simply return -1 in error paths should work. Modify all error paths to > > return > > -1 to be consistent. > > > > Signed-off-by: Wei Liu <wei.liu2@citrix.com> > > > > diff --git a/drivers/net/xen-netback/netback.c > > b/drivers/net/xen-netback/netback.c > > index 103294d..0e0162e 100644 > > --- a/drivers/net/xen-netback/netback.c > > +++ b/drivers/net/xen-netback/netback.c > > @@ -913,13 +913,13 @@ static int netbk_count_requests(struct xenvif *vif, > > if (frags >= work_to_do) { > > netdev_err(vif->dev, "Need more frags\n"); > > netbk_fatal_tx_err(vif); > > - return -frags; > > + return -1; > > } > > > > if (unlikely(frags >= MAX_SKB_FRAGS)) { > > netdev_err(vif->dev, "Too many frags\n"); > > netbk_fatal_tx_err(vif); > > - return -frags; > > + return -1; > > } > > > > memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags), > > @@ -927,7 +927,7 @@ static int netbk_count_requests(struct xenvif *vif, > > if (txp->size > first->size) { > > netdev_err(vif->dev, "Frag is bigger than frame.\n"); > > netbk_fatal_tx_err(vif); > > - return -frags; > > + return -1; > > } > > > > first->size -= txp->size; > > @@ -937,7 +937,7 @@ static int netbk_count_requests(struct xenvif *vif, > > netdev_err(vif->dev, "txp->offset: %x, size: %u\n", > > txp->offset, txp->size); > > netbk_fatal_tx_err(vif); > > - return -frags; > > + return -1; > > } > > } while ((txp++)->flags & XEN_NETTXF_more_data); > > return frags; > > > > > > > > _______________________________________________ > > Xen-devel mailing list > > Xen-devel@lists.xen.org > > http://lists.xen.org/xen-devel > > >
On Thu, 2013-02-14 at 09:11 +0000, Jan Beulich wrote:> >>> On 13.02.13 at 21:17, Wei Liu <wei.liu2@citrix.com> wrote: > > On Wed, Feb 13, 2013 at 07:20:32PM +0000, David Vrabel wrote: > >> On 13/02/13 18:37, Wei Liu wrote: > >> > A slightly upgraded version of the *UNTESTED* patch. > >> > > >> > > >> > Wei. > >> > > >> > ----8<---- > >> > commit df4c929d034cec7043fbd96ba89833eb639c336e > >> > Author: Wei Liu <wei.liu2@citrix.com> > >> > Date: Wed Feb 13 18:17:01 2013 +0000 > >> > > >> > netback: fix netbk_count_requests > >> > > >> > There are two paths in the original code, a) test against work_to_do, > > b) test > >> > against first->size, could return 0 even when error happens. > >> > > >> > Simply return -1 in error paths should work. Modify all error paths to > > return > >> > -1 to be consistent. > >> > >> You also need to remove the netbk_tx_err() after checking the result of > >> netbk_count_requests(). Otherwise you will have a double xenvif_put(), > >> which will screw up ref counting. > > > > I just realized that we were talking about different code path when I > > walked home. > > > > The path you mentioned is correct. As excution flow should never reach > > there if there is error in netbk_count_requests. > > > > The path I''m not sure is that in the netbk_fatal_tx_err, it calls > > xenvif_carrier_off which calls xenvif_put, and then it calls xenvif_put, > > which is likely to mess up the refcount. > > I first thought so too when looking at this again yesterday, but > then convinced myself that this double put _here_ is correct. And > Ian''s patch specifically removed to call to netbk_tx_err() after the > caller netbk_count_requests() recognized the error, knowing that > the latter function now itself issues a call to netbk_fatal_tx_err() > (whether that wouldn''t better have replaced the caller''s call to > netbk_tx_err() is a different question). >I''m not convinced. netbk_tx_err only has one xenvif_put, however netbk_fatal_tx_err has two puts. If this is a bug, and, if my previous patch fixes Christopher''s OOPS, he will hit this bug soon when shutting down DomU. Wei.
On 14/02/13 09:11, Jan Beulich wrote:> > I first thought so too when looking at this again yesterday, but > then convinced myself that this double put _here_ is correct. And > Ian''s patch specifically removed to call to netbk_tx_err() after the > caller netbk_count_requests() recognized the error, knowing that > the latter function now itself issues a call to netbk_fatal_tx_err() > (whether that wouldn''t better have replaced the caller''s call to > netbk_tx_err() is a different question).Yes, I agree. I had too many different versions of netback open and got confused. David
On Thu, 2013-02-14 at 11:20 +0000, Wei Liu wrote:> I''m not convinced. netbk_tx_err only has one xenvif_put, however > netbk_fatal_tx_err has two puts.One balances the get in poll_net_schedule_list (i.e. at the top of the loop in xen_netbk_tx_build_gops. The other one I guess you mean the one in xenvif_carrier_off? This balances the refcount taken in xenvif_connect, when the carrier is brought up. In my testing I found that both were required or else things deadlock in xenvif_disconnect with the refcnt stuck at 1. The eventual put in xenvif_disconnect is balanced by the initial count of 1 in xenvif_alloc() Ian.> If this is a bug, and, if my previous patch fixes Christopher''s OOPS, he > will hit this bug soon when shutting down DomU. > > > Wei. >
On Thu, 2013-02-14 at 11:34 +0000, Ian Campbell wrote:> On Thu, 2013-02-14 at 11:20 +0000, Wei Liu wrote: > > > I''m not convinced. netbk_tx_err only has one xenvif_put, however > > netbk_fatal_tx_err has two puts. > > One balances the get in poll_net_schedule_list (i.e. at the top of the > loop in xen_netbk_tx_build_gops. > > The other one I guess you mean the one in xenvif_carrier_off? This > balances the refcount taken in xenvif_connect, when the carrier is > brought up. > > In my testing I found that both were required or else things deadlock in > xenvif_disconnect with the refcnt stuck at 1. > > The eventual put in xenvif_disconnect is balanced by the initial count > of 1 in xenvif_alloc()Oh, I get what you mean now. Because the vif is down so xenvif_carrier_off is not invoked in disconnect path. But I think a better place to balance refcount taken in xenvif_connect is xenvif_disconnect, so I would rather move that xenvif_put in fatal_tx_err to xenvif_disconnect. Wei.> Ian. > > > If this is a bug, and, if my previous patch fixes Christopher''s OOPS, he > > will hit this bug soon when shutting down DomU. > > > > > > Wei. > > > >
>>> On 14.02.13 at 12:20, Wei Liu <wei.liu2@citrix.com> wrote: > On Thu, 2013-02-14 at 09:11 +0000, Jan Beulich wrote: >> >>> On 13.02.13 at 21:17, Wei Liu <wei.liu2@citrix.com> wrote: >> > On Wed, Feb 13, 2013 at 07:20:32PM +0000, David Vrabel wrote: >> >> On 13/02/13 18:37, Wei Liu wrote: >> >> > A slightly upgraded version of the *UNTESTED* patch. >> >> > >> >> > >> >> > Wei. >> >> > >> >> > ----8<---- >> >> > commit df4c929d034cec7043fbd96ba89833eb639c336e >> >> > Author: Wei Liu <wei.liu2@citrix.com> >> >> > Date: Wed Feb 13 18:17:01 2013 +0000 >> >> > >> >> > netback: fix netbk_count_requests >> >> > >> >> > There are two paths in the original code, a) test against work_to_do, >> > b) test >> >> > against first->size, could return 0 even when error happens. >> >> > >> >> > Simply return -1 in error paths should work. Modify all error paths to > >> > return >> >> > -1 to be consistent. >> >> >> >> You also need to remove the netbk_tx_err() after checking the result of >> >> netbk_count_requests(). Otherwise you will have a double xenvif_put(), >> >> which will screw up ref counting. >> > >> > I just realized that we were talking about different code path when I >> > walked home. >> > >> > The path you mentioned is correct. As excution flow should never reach >> > there if there is error in netbk_count_requests. >> > >> > The path I''m not sure is that in the netbk_fatal_tx_err, it calls >> > xenvif_carrier_off which calls xenvif_put, and then it calls xenvif_put, >> > which is likely to mess up the refcount. >> >> I first thought so too when looking at this again yesterday, but >> then convinced myself that this double put _here_ is correct. And >> Ian''s patch specifically removed to call to netbk_tx_err() after the >> caller netbk_count_requests() recognized the error, knowing that >> the latter function now itself issues a call to netbk_fatal_tx_err() >> (whether that wouldn''t better have replaced the caller''s call to >> netbk_tx_err() is a different question). > > I''m not convinced. netbk_tx_err only has one xenvif_put, however > netbk_fatal_tx_err has two puts.Sure - one for the current transfer, and one for the carrier being on. That second one would otherwise be put when the interface gets brought down "normally".> If this is a bug, and, if my previous patch fixes Christopher''s OOPS, he > will hit this bug soon when shutting down DomU.I don''t think this patch will fix his problems, which - as described yesterday - I''m relatively certain result from the harsh action netbk_fatal_tx_err() does. Jan
>>> On 14.02.13 at 12:38, Wei Liu <wei.liu2@citrix.com> wrote: > On Thu, 2013-02-14 at 11:34 +0000, Ian Campbell wrote: >> On Thu, 2013-02-14 at 11:20 +0000, Wei Liu wrote: >> >> > I''m not convinced. netbk_tx_err only has one xenvif_put, however >> > netbk_fatal_tx_err has two puts. >> >> One balances the get in poll_net_schedule_list (i.e. at the top of the >> loop in xen_netbk_tx_build_gops. >> >> The other one I guess you mean the one in xenvif_carrier_off? This >> balances the refcount taken in xenvif_connect, when the carrier is >> brought up. >> >> In my testing I found that both were required or else things deadlock in >> xenvif_disconnect with the refcnt stuck at 1. >> >> The eventual put in xenvif_disconnect is balanced by the initial count >> of 1 in xenvif_alloc() > > Oh, I get what you mean now. Because the vif is down so > xenvif_carrier_off is not invoked in disconnect path. > > But I think a better place to balance refcount taken in xenvif_connect > is xenvif_disconnect, so I would rather move that xenvif_put in > fatal_tx_err to xenvif_disconnect.I relatively certain this would make things worse, not better. Jan
On Thu, 2013-02-14 at 11:50 +0000, Jan Beulich wrote:> >>> On 14.02.13 at 12:38, Wei Liu <wei.liu2@citrix.com> wrote: > > On Thu, 2013-02-14 at 11:34 +0000, Ian Campbell wrote: > >> On Thu, 2013-02-14 at 11:20 +0000, Wei Liu wrote: > >> > >> > I''m not convinced. netbk_tx_err only has one xenvif_put, however > >> > netbk_fatal_tx_err has two puts. > >> > >> One balances the get in poll_net_schedule_list (i.e. at the top of the > >> loop in xen_netbk_tx_build_gops. > >> > >> The other one I guess you mean the one in xenvif_carrier_off? This > >> balances the refcount taken in xenvif_connect, when the carrier is > >> brought up. > >> > >> In my testing I found that both were required or else things deadlock in > >> xenvif_disconnect with the refcnt stuck at 1. > >> > >> The eventual put in xenvif_disconnect is balanced by the initial count > >> of 1 in xenvif_alloc() > > > > Oh, I get what you mean now. Because the vif is down so > > xenvif_carrier_off is not invoked in disconnect path. > > > > But I think a better place to balance refcount taken in xenvif_connect > > is xenvif_disconnect, so I would rather move that xenvif_put in > > fatal_tx_err to xenvif_disconnect. > > I relatively certain this would make things worse, not better. >Oops, my mistake. Let it stay like this is just fine. Wei.
On Thu, 2013-02-14 at 11:48 +0000, Jan Beulich wrote:> > I don''t think this patch will fix his problems, which - as described > yesterday - I''m relatively certain result from the harsh action > netbk_fatal_tx_err() does. >Yes, having this one only is not enough. It will need to either disable the vif timer or check vif->netbk != NULL inside timer callback. Wei.> Jan >
On 14/02/13 11:48, Jan Beulich wrote:>>>> On 14.02.13 at 12:20, Wei Liu <wei.liu2@citrix.com> wrote: > >> If this is a bug, and, if my previous patch fixes Christopher''s OOPS, he >> will hit this bug soon when shutting down DomU. > > I don''t think this patch will fix his problems, which - as described > yesterday - I''m relatively certain result from the harsh action > netbk_fatal_tx_err() does.I can''t see anything broken in netbk_fatal_tx_err(). However, a call to netbk_fatal_tx_err() may result in the vif''s ref count going to 1 which means a simutaneous attempt to shutdown the vif will free the net device. Netback thread Xenwatch thread netbk_fatal_tx_err() netback_remove() xenvif_disconnect() ... free_netdev() netbk_tx_err() Oops! David
On Thu, 2013-02-14 at 12:20 +0000, David Vrabel wrote:> On 14/02/13 11:48, Jan Beulich wrote: > >>>> On 14.02.13 at 12:20, Wei Liu <wei.liu2@citrix.com> wrote: > > > >> If this is a bug, and, if my previous patch fixes Christopher''s OOPS, he > >> will hit this bug soon when shutting down DomU. > > > > I don''t think this patch will fix his problems, which - as described > > yesterday - I''m relatively certain result from the harsh action > > netbk_fatal_tx_err() does. > > I can''t see anything broken in netbk_fatal_tx_err(). > > However, a call to netbk_fatal_tx_err() may result in the vif''s ref > count going to 1 which means a simutaneous attempt to shutdown the vif > will free the net device.> Netback thread Xenwatch thread > > netbk_fatal_tx_err() netback_remove() > xenvif_disconnect() > ... > free_netdev() > netbk_tx_err() Oops! >This is not a problem. Reading comments and code of the commit, netbk_fatal_tx_err shuts down the vif entirely (at the moment the timer is not handled though) which should make sure it will never get scheduled again, so in practice it will never hit netbk_tx_err. Wei.> David
>>> On 14.02.13 at 13:13, Wei Liu <wei.liu2@citrix.com> wrote: > On Thu, 2013-02-14 at 11:48 +0000, Jan Beulich wrote: >> I don''t think this patch will fix his problems, which - as described >> yesterday - I''m relatively certain result from the harsh action >> netbk_fatal_tx_err() does. > > Yes, having this one only is not enough. It will need to either disable > the vif timer or check vif->netbk != NULL inside timer callback.I continue to be unclear what timer you refer to. If we keep the carrier-off in fatal_tx_err, then _all_ asynchronously callable interfaces need to check whether the vif -> netbk association went away. But, especially in the context of using tasklets instead of kthreads, I meanwhile think that simply setting a flag (along with turning off the IRQ) would be better (and keep the turning off of the carrier the way it had been done before. The flag would basically need checking anywhere we look at the shared ring (which ought to be very few places), and perhaps should also cause xenvif_schedulable() to return false. Jan
On Thu, 2013-02-14 at 12:29 +0000, Wei Liu wrote:> On Thu, 2013-02-14 at 12:20 +0000, David Vrabel wrote: > > On 14/02/13 11:48, Jan Beulich wrote: > > >>>> On 14.02.13 at 12:20, Wei Liu <wei.liu2@citrix.com> wrote: > > > > > >> If this is a bug, and, if my previous patch fixes Christopher''s OOPS, he > > >> will hit this bug soon when shutting down DomU. > > > > > > I don''t think this patch will fix his problems, which - as described > > > yesterday - I''m relatively certain result from the harsh action > > > netbk_fatal_tx_err() does. > > > > I can''t see anything broken in netbk_fatal_tx_err(). > > > > However, a call to netbk_fatal_tx_err() may result in the vif''s ref > > count going to 1 which means a simutaneous attempt to shutdown the vif > > will free the net device. > > > Netback thread Xenwatch thread > > > > netbk_fatal_tx_err() netback_remove() > > xenvif_disconnect() > > ... > > free_netdev() > > netbk_tx_err() Oops! > > > > This is not a problem. Reading comments and code of the commit, > netbk_fatal_tx_err shuts down the vif entirely (at the moment the timer > is not handled though) which should make sure it will never get > scheduled again, so in practice it will never hit netbk_tx_err.That was certainly the intention! Ian.
>>> On 14.02.13 at 13:20, David Vrabel <david.vrabel@citrix.com> wrote: > On 14/02/13 11:48, Jan Beulich wrote: >>>>> On 14.02.13 at 12:20, Wei Liu <wei.liu2@citrix.com> wrote: >> >>> If this is a bug, and, if my previous patch fixes Christopher''s OOPS, he >>> will hit this bug soon when shutting down DomU. >> >> I don''t think this patch will fix his problems, which - as described >> yesterday - I''m relatively certain result from the harsh action >> netbk_fatal_tx_err() does. > > I can''t see anything broken in netbk_fatal_tx_err().The main problem I''m seeing is the unexpected removal of the vif->netbk association. There are some checks for this, but not everywhere.> However, a call to netbk_fatal_tx_err() may result in the vif''s ref > count going to 1 which means a simutaneous attempt to shutdown the vif > will free the net device. > > Netback thread Xenwatch thread > > netbk_fatal_tx_err() netback_remove() > xenvif_disconnect() > ... > free_netdev() > netbk_tx_err() Oops!Right. One more argument for not doing it there. Jan
On Thu, 2013-02-14 at 12:33 +0000, Jan Beulich wrote:> >>> On 14.02.13 at 13:13, Wei Liu <wei.liu2@citrix.com> wrote: > > On Thu, 2013-02-14 at 11:48 +0000, Jan Beulich wrote: > >> I don''t think this patch will fix his problems, which - as described > >> yesterday - I''m relatively certain result from the harsh action > >> netbk_fatal_tx_err() does. > > > > Yes, having this one only is not enough. It will need to either disable > > the vif timer or check vif->netbk != NULL inside timer callback. > > I continue to be unclear what timer you refer to. >Oh, it is the tx credit timer callback, which will try to schedule vif regardless of whether it is linked to netbk or not. See tx_credit_callback. It is also the root cause of Christopher''s latest oops report last night, when he `ifconfig vifX.X down` in Dom0.> If we keep the carrier-off in fatal_tx_err, then _all_ > asynchronously callable interfaces need to check whether the > vif -> netbk association went away. But, especially in the > context of using tasklets instead of kthreads, I meanwhile > think that simply setting a flag (along with turning off the IRQ) > would be better (and keep the turning off of the carrier the way > it had been done before. The flag would basically need checking > anywhere we look at the shared ring (which ought to be very > few places), and perhaps should also cause xenvif_schedulable() > to return false. >Is this equivalent to checking vif->netbk? Well, at least in practice. Wei.> Jan >
>>> On 14.02.13 at 13:33, "Jan Beulich" <JBeulich@suse.com> wrote: >>>> On 14.02.13 at 13:13, Wei Liu <wei.liu2@citrix.com> wrote: >> On Thu, 2013-02-14 at 11:48 +0000, Jan Beulich wrote: >>> I don''t think this patch will fix his problems, which - as described >>> yesterday - I''m relatively certain result from the harsh action >>> netbk_fatal_tx_err() does. >> >> Yes, having this one only is not enough. It will need to either disable >> the vif timer or check vif->netbk != NULL inside timer callback. > > I continue to be unclear what timer you refer to. > > If we keep the carrier-off in fatal_tx_err, then _all_ > asynchronously callable interfaces need to check whether the > vif -> netbk association went away. But, especially in the > context of using tasklets instead of kthreads, I meanwhile > think that simply setting a flag (along with turning off the IRQ) > would be better (and keep the turning off of the carrier the way > it had been done before. The flag would basically need checking > anywhere we look at the shared ring (which ought to be very > few places), and perhaps should also cause xenvif_schedulable() > to return false.Or a "light" version of xenvif_down(), i.e. simply disable_irq(vif->irq); xen_netbk_deschedule_xenvif(vif); If I''m not mistaken, doing this instead of xenvif_carrier_off() could be all that''s needed. Jan
On 14/02/13 12:29, Wei Liu wrote:> On Thu, 2013-02-14 at 12:20 +0000, David Vrabel wrote: >> On 14/02/13 11:48, Jan Beulich wrote: >>>>>> On 14.02.13 at 12:20, Wei Liu <wei.liu2@citrix.com> wrote: >>> >>>> If this is a bug, and, if my previous patch fixes Christopher''s OOPS, he >>>> will hit this bug soon when shutting down DomU. >>> >>> I don''t think this patch will fix his problems, which - as described >>> yesterday - I''m relatively certain result from the harsh action >>> netbk_fatal_tx_err() does. >> >> I can''t see anything broken in netbk_fatal_tx_err(). >> >> However, a call to netbk_fatal_tx_err() may result in the vif''s ref >> count going to 1 which means a simutaneous attempt to shutdown the vif >> will free the net device. > >> Netback thread Xenwatch thread >> >> netbk_fatal_tx_err() netback_remove() >> xenvif_disconnect() >> ... >> free_netdev() >> netbk_tx_err() Oops! >> > > This is not a problem. Reading comments and code of the commit, > netbk_fatal_tx_err shuts down the vif entirely (at the moment the timer > is not handled though) which should make sure it will never get > scheduled again, so in practice it will never hit netbk_tx_err.Without the fix to the error paths of netbk_count_requests(), then if it returned 0 netbk_tx_err() may be called. e.g., if txreq.size < ETH_HLEN. netbk_fatal_tx_err() should call del_timer_sync() on the credit timer (vif->credit_timeout) as well, otherwise it may fire and attempt to reschedule the vif, which will then oops as vif->netbk == NULL. David
On Thu, 2013-02-14 at 12:47 +0000, David Vrabel wrote:> On 14/02/13 12:29, Wei Liu wrote: > > On Thu, 2013-02-14 at 12:20 +0000, David Vrabel wrote: > >> On 14/02/13 11:48, Jan Beulich wrote: > >>>>>> On 14.02.13 at 12:20, Wei Liu <wei.liu2@citrix.com> wrote: > >>> > >>>> If this is a bug, and, if my previous patch fixes Christopher''s OOPS, he > >>>> will hit this bug soon when shutting down DomU. > >>> > >>> I don''t think this patch will fix his problems, which - as described > >>> yesterday - I''m relatively certain result from the harsh action > >>> netbk_fatal_tx_err() does. > >> > >> I can''t see anything broken in netbk_fatal_tx_err(). > >> > >> However, a call to netbk_fatal_tx_err() may result in the vif''s ref > >> count going to 1 which means a simutaneous attempt to shutdown the vif > >> will free the net device. > > > >> Netback thread Xenwatch thread > >> > >> netbk_fatal_tx_err() netback_remove() > >> xenvif_disconnect() > >> ... > >> free_netdev() > >> netbk_tx_err() Oops! > >> > > > > This is not a problem. Reading comments and code of the commit, > > netbk_fatal_tx_err shuts down the vif entirely (at the moment the timer > > is not handled though) which should make sure it will never get > > scheduled again, so in practice it will never hit netbk_tx_err. > > Without the fix to the error paths of netbk_count_requests(), then if it > returned 0 netbk_tx_err() may be called. e.g., if txreq.size < ETH_HLEN. >Yes returning 0 in error case is wrong. I thought we talked about this problem on the basis that we fix this one, and the latter timer bug.> netbk_fatal_tx_err() should call del_timer_sync() on the credit timer > (vif->credit_timeout) as well, otherwise it may fire and attempt to > reschedule the vif, which will then oops as vif->netbk == NULL. >Yeah, I have patches to fix these bugs, will post them today. :-) Wei.> David
>>> On 14.02.13 at 13:45, "Jan Beulich" <JBeulich@suse.com> wrote: >>>> On 14.02.13 at 13:33, "Jan Beulich" <JBeulich@suse.com> wrote: >>>>> On 14.02.13 at 13:13, Wei Liu <wei.liu2@citrix.com> wrote: >>> On Thu, 2013-02-14 at 11:48 +0000, Jan Beulich wrote: >>>> I don''t think this patch will fix his problems, which - as described >>>> yesterday - I''m relatively certain result from the harsh action >>>> netbk_fatal_tx_err() does. >>> >>> Yes, having this one only is not enough. It will need to either disable >>> the vif timer or check vif->netbk != NULL inside timer callback. >> >> I continue to be unclear what timer you refer to. >> >> If we keep the carrier-off in fatal_tx_err, then _all_ >> asynchronously callable interfaces need to check whether the >> vif -> netbk association went away. But, especially in the >> context of using tasklets instead of kthreads, I meanwhile >> think that simply setting a flag (along with turning off the IRQ) >> would be better (and keep the turning off of the carrier the way >> it had been done before. The flag would basically need checking >> anywhere we look at the shared ring (which ought to be very >> few places), and perhaps should also cause xenvif_schedulable() >> to return false. > > Or a "light" version of xenvif_down(), i.e. simply > > disable_irq(vif->irq); > xen_netbk_deschedule_xenvif(vif); > > If I''m not mistaken, doing this instead of xenvif_carrier_off() > could be all that''s needed.Like the below/attached (compile tested only so far). Jan netback: fix shutting down the ring if it contains garbage Using rtnl_lock() in tasklet context is not permitted. This undoes the part of 1219:5108c6901b30 that split off xenvif_carrier_off() from netif_disconnect(). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/drivers/xen/netback/common.h +++ b/drivers/xen/netback/common.h @@ -78,6 +78,8 @@ typedef struct netif_st { u8 can_queue:1; /* can queue packets for receiver? */ u8 copying_receiver:1; /* copy packets to receiver? */ + u8 busted:1; + /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */ RING_IDX rx_req_cons_peek; @@ -195,7 +197,8 @@ int netif_map(netif_t *netif, unsigned l void netif_xenbus_init(void); #define netif_schedulable(netif) \ - (netif_running((netif)->dev) && netback_carrier_ok(netif)) + (likely(!(netif)->busted) && \ + netif_running((netif)->dev) && netback_carrier_ok(netif)) void netif_schedule_work(netif_t *netif); void netif_deschedule_work(netif_t *netif); @@ -204,9 +207,6 @@ int netif_be_start_xmit(struct sk_buff * struct net_device_stats *netif_be_get_stats(struct net_device *dev); irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs); -/* Prevent the device from generating any further traffic. */ -void xenvif_carrier_off(netif_t *netif); - static inline int netbk_can_queue(struct net_device *dev) { netif_t *netif = netdev_priv(dev); --- a/drivers/xen/netback/interface.c +++ b/drivers/xen/netback/interface.c @@ -56,6 +56,7 @@ module_param_named(queue_length, netbk_q static void __netif_up(netif_t *netif) { + netif->busted = 0; enable_irq(netif->irq); netif_schedule_work(netif); } @@ -347,23 +348,19 @@ err_rx: return err; } -void xenvif_carrier_off(netif_t *netif) -{ - rtnl_lock(); - netback_carrier_off(netif); - netif_carrier_off(netif->dev); /* discard queued packets */ - if (netif_running(netif->dev)) - __netif_down(netif); - rtnl_unlock(); - netif_put(netif); -} - void netif_disconnect(struct backend_info *be) { netif_t *netif = be->netif; - if (netback_carrier_ok(netif)) - xenvif_carrier_off(netif); + if (netback_carrier_ok(netif)) { + rtnl_lock(); + netback_carrier_off(netif); + netif_carrier_off(netif->dev); /* discard queued packets */ + if (netif_running(netif->dev)) + __netif_down(netif); + rtnl_unlock(); + netif_put(netif); + } atomic_dec(&netif->refcnt); wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0); --- a/drivers/xen/netback/netback.c +++ b/drivers/xen/netback/netback.c @@ -845,7 +845,7 @@ void netif_schedule_work(netif_t *netif) RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); #endif - if (more_to_do) { + if (more_to_do && likely(!netif->busted)) { add_to_net_schedule_list_tail(netif); maybe_schedule_tx_action(); } @@ -1024,7 +1024,9 @@ static void netbk_fatal_tx_err(netif_t * { printk(KERN_ERR "%s: fatal error; disabling device\n", netif->dev->name); - xenvif_carrier_off(netif); + netif->busted = 1; + disable_irq(netif->irq); + netif_deschedule_work(netif); netif_put(netif); } @@ -1292,6 +1294,11 @@ static void net_tx_action(unsigned long if (!netif) continue; + if (unlikely(netif->busted)) { + netif_put(netif); + continue; + } + if (netif->tx.sring->req_prod - netif->tx.req_cons > NET_TX_RING_SIZE) { printk(KERN_ERR "%s: Impossible number of requests. " _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
On Thu, 2013-02-14 at 15:29 +0000, Jan Beulich wrote:> >>> On 14.02.13 at 13:45, "Jan Beulich" <JBeulich@suse.com> wrote: > >>>> On 14.02.13 at 13:33, "Jan Beulich" <JBeulich@suse.com> wrote: > >>>>> On 14.02.13 at 13:13, Wei Liu <wei.liu2@citrix.com> wrote: > >>> On Thu, 2013-02-14 at 11:48 +0000, Jan Beulich wrote: > >>>> I don''t think this patch will fix his problems, which - as described > >>>> yesterday - I''m relatively certain result from the harsh action > >>>> netbk_fatal_tx_err() does. > >>> > >>> Yes, having this one only is not enough. It will need to either disable > >>> the vif timer or check vif->netbk != NULL inside timer callback. > >> > >> I continue to be unclear what timer you refer to. > >> > >> If we keep the carrier-off in fatal_tx_err, then _all_ > >> asynchronously callable interfaces need to check whether the > >> vif -> netbk association went away. But, especially in the > >> context of using tasklets instead of kthreads, I meanwhile > >> think that simply setting a flag (along with turning off the IRQ) > >> would be better (and keep the turning off of the carrier the way > >> it had been done before. The flag would basically need checking > >> anywhere we look at the shared ring (which ought to be very > >> few places), and perhaps should also cause xenvif_schedulable() > >> to return false. > > > > Or a "light" version of xenvif_down(), i.e. simply > > > > disable_irq(vif->irq); > > xen_netbk_deschedule_xenvif(vif); > > > > If I''m not mistaken, doing this instead of xenvif_carrier_off() > > could be all that''s needed. >Just a side note, I probably would go along with this busted flag in the future. If we are to move to 1:1 model, there will be no vif <-> netbk connection anymore, so the testing against vif->netbk won''t exist anymore. If this approach serves both tasklet and thread model well, this is the way to go. Wei.> Like the below/attached (compile tested only so far). > > Jan > > netback: fix shutting down the ring if it contains garbage > > Using rtnl_lock() in tasklet context is not permitted. > > This undoes the part of 1219:5108c6901b30 that split off > xenvif_carrier_off() from netif_disconnect(). > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/drivers/xen/netback/common.h > +++ b/drivers/xen/netback/common.h > @@ -78,6 +78,8 @@ typedef struct netif_st { > u8 can_queue:1; /* can queue packets for receiver? */ > u8 copying_receiver:1; /* copy packets to receiver? */ > > + u8 busted:1; > + > /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */ > RING_IDX rx_req_cons_peek; > > @@ -195,7 +197,8 @@ int netif_map(netif_t *netif, unsigned l > void netif_xenbus_init(void); > > #define netif_schedulable(netif) \ > - (netif_running((netif)->dev) && netback_carrier_ok(netif)) > + (likely(!(netif)->busted) && \ > + netif_running((netif)->dev) && netback_carrier_ok(netif)) > > void netif_schedule_work(netif_t *netif); > void netif_deschedule_work(netif_t *netif); > @@ -204,9 +207,6 @@ int netif_be_start_xmit(struct sk_buff * > struct net_device_stats *netif_be_get_stats(struct net_device *dev); > irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs); > > -/* Prevent the device from generating any further traffic. */ > -void xenvif_carrier_off(netif_t *netif); > - > static inline int netbk_can_queue(struct net_device *dev) > { > netif_t *netif = netdev_priv(dev); > --- a/drivers/xen/netback/interface.c > +++ b/drivers/xen/netback/interface.c > @@ -56,6 +56,7 @@ module_param_named(queue_length, netbk_q > > static void __netif_up(netif_t *netif) > { > + netif->busted = 0; > enable_irq(netif->irq); > netif_schedule_work(netif); > } > @@ -347,23 +348,19 @@ err_rx: > return err; > } > > -void xenvif_carrier_off(netif_t *netif) > -{ > - rtnl_lock(); > - netback_carrier_off(netif); > - netif_carrier_off(netif->dev); /* discard queued packets */ > - if (netif_running(netif->dev)) > - __netif_down(netif); > - rtnl_unlock(); > - netif_put(netif); > -} > - > void netif_disconnect(struct backend_info *be) > { > netif_t *netif = be->netif; > > - if (netback_carrier_ok(netif)) > - xenvif_carrier_off(netif); > + if (netback_carrier_ok(netif)) { > + rtnl_lock(); > + netback_carrier_off(netif); > + netif_carrier_off(netif->dev); /* discard queued packets */ > + if (netif_running(netif->dev)) > + __netif_down(netif); > + rtnl_unlock(); > + netif_put(netif); > + } > > atomic_dec(&netif->refcnt); > wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0); > --- a/drivers/xen/netback/netback.c > +++ b/drivers/xen/netback/netback.c > @@ -845,7 +845,7 @@ void netif_schedule_work(netif_t *netif) > RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); > #endif > > - if (more_to_do) { > + if (more_to_do && likely(!netif->busted)) { > add_to_net_schedule_list_tail(netif); > maybe_schedule_tx_action(); > } > @@ -1024,7 +1024,9 @@ static void netbk_fatal_tx_err(netif_t * > { > printk(KERN_ERR "%s: fatal error; disabling device\n", > netif->dev->name); > - xenvif_carrier_off(netif); > + netif->busted = 1; > + disable_irq(netif->irq); > + netif_deschedule_work(netif); > netif_put(netif); > } > > @@ -1292,6 +1294,11 @@ static void net_tx_action(unsigned long > if (!netif) > continue; > > + if (unlikely(netif->busted)) { > + netif_put(netif); > + continue; > + } > + > if (netif->tx.sring->req_prod - netif->tx.req_cons > > NET_TX_RING_SIZE) { > printk(KERN_ERR "%s: Impossible number of requests. " > >
Seemingly Similar Threads
- [PATCH net-next 2/2] xen-netback: avoid allocating variable size array on stack
- [PATCH 1/4] xen/netback: shutdown the ring if it contains garbage.
- Question on sg in netback tx path
- xennet: skb rides the rocket: 20 slots
- xenpaging fixes for kernel and hypervisor