See the crash below, which happened on one of my test machines which still aren''t in the test pool due to these kind of failures. The bug happened immediately after localhost migration of a pv guest. The kernel was 2.6.32.46 (as built in flight 10515). Ian. Dec 16 19:02:13.085286 [ 391.370426] ------------[ cut here ]------------^M Dec 16 19:02:13.097267 [ 391.370440] kernel BUG at arch/x86/mm/fault.c:210!^M Dec 16 19:02:13.097303 [ 391.370460] invalid opcode: 0000 [#1] SMP ^M Dec 16 19:02:13.109253 [ 391.370491] last sysfs file: /sys/devices/vif-9-0/uevent^M Dec 16 19:02:13.109291 [ 391.370503] Modules linked in: e1000e [last unloaded: scsi_wait_scan]^M Dec 16 19:02:13.117292 [ 391.370575] ^M Dec 16 19:02:13.117330 [ 391.370597] Pid: 40, comm: xenwatch Not tainted (2.6.32.46 #1) X9SCL/X9SCM^M Dec 16 19:02:13.129252 [ 391.370613] EIP: 0061:[<c104a052>] EFLAGS: 00010082 CPU: 5^M Dec 16 19:02:13.129289 [ 391.370633] EIP is at vmalloc_sync_one+0x112/0x121^M Dec 16 19:02:13.137260 [ 391.370646] EAX: 000050a0 EBX: c1810fd0 ECX: fffff2e3 EDX: 00000000^M Dec 16 19:02:13.149240 [ 391.370661] ESI: ffffffe0 EDI: d9248fff EBP: dfda7ea8 ESP: dfda7e94^M Dec 16 19:02:13.149279 [ 391.370677] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069^M Dec 16 19:02:13.157316 [ 391.370690] Process xenwatch (pid: 40, ti=dfda6000 task=dfce3cc0 task.ti=dfda6000)^M Dec 16 19:02:13.169240 [ 391.370705] Stack:^M Dec 16 19:02:13.169267 [ 391.370716] 00285067 00000000 c5c68640 ff400000 c1810fc8 dfda7ec4 c104aa52 c1760204^M Dec 16 19:02:13.177285 [ 391.370783] <0> c4be6f84 d7411320 df68bb40 dbc2b600 dfda7ed0 c10da2f9 fffffff4 dfda7f28^M Dec 16 19:02:13.177352 [ 391.370858] <0> c122c1f3 0000032a 00000300 df68bb40 0200006b dce9d3a8 400002c0 c102c23b^M Dec 16 19:02:13.189265 [ 391.370940] Call Trace:^M Dec 16 19:02:13.189298 [ 391.370960] [<c104aa52>] ? vmalloc_sync_all+0x5f/0xc1^M Dec 16 19:02:13.197260 [ 391.370982] [<c10da2f9>] ? alloc_vm_area+0x44/0x4b^M Dec 16 19:02:13.197296 [ 391.371004] [<c122c1f3>] ? netif_map+0x45/0x24d^M Dec 16 19:02:13.209317 [ 391.371033] [<c102c23b>] ? xen_restore_fl_direct_end+0x0/0x1^M Dec 16 19:02:13.209352 [ 391.371065] [<c10e40b8>] ? kfree+0xff/0x107^M Dec 16 19:02:13.217261 [ 391.371097] [<c121e974>] ? xenbus_scanf+0x35/0x48^M Dec 16 19:02:13.217297 [ 391.371118] [<c122b8b8>] ? frontend_changed+0x2a2/0x4f0^M Dec 16 19:02:13.229254 [ 391.371137] [<c121f331>] ? xenbus_otherend_changed+0x5c/0x61^M Dec 16 19:02:13.237246 [ 391.371158] [<c121f6ff>] ? frontend_changed+0xa/0xd^M Dec 16 19:02:13.237283 [ 391.371178] [<c121e00c>] ? xenwatch_thread+0xfe/0x126^M Dec 16 19:02:13.245274 [ 391.371199] [<c1076589>] ? autoremove_wake_function+0x0/0x2f^M Dec 16 19:02:13.245317 [ 391.371223] [<c121df0e>] ? xenwatch_thread+0x0/0x126^M Dec 16 19:02:13.257249 [ 391.371244] [<c107626f>] ? kthread+0x5f/0x64^M Dec 16 19:02:13.257283 [ 391.371261] [<c1076210>] ? kthread+0x0/0x64^M Dec 16 19:02:13.265277 [ 391.371283] [<c102f497>] ? kernel_thread_helper+0x7/0x10^M Dec 16 19:02:13.265318 [ 391.371297] Code: eb fe ff 15 08 dd 66 c1 8b 7d ec 89 ce 89 45 ec 8b 45 ec 89 55 f0 8b 55 f0 0f ac fe 0c c1 e6 05 0f ac d0 0c c1 e0 05 39 c6 74 06 <0f> 0b eb fe 31 db 5a 89 d8 59 5b 5e 5f 5d c3 55 89 e5 53 89 c3 ^M Dec 16 19:02:13.285270 [ 391.371792] EIP: [<c104a052>] vmalloc_sync_one+0x112/0x121 SS:ESP 0069:dfda7e94^M Dec 16 19:02:13.293255 [ 391.371833] ---[ end trace 27f82c4d856810c0 ]---^M 10520/test-amd64-i386-xl
Ian Campbell
2011-Dec-20 16:28 UTC
Re: vmalloc_sync_all crash still happening on some machines
On Tue, 2011-12-20 at 16:17 +0000, Ian Jackson wrote:> See the crash below, which happened on one of my test machines which > still aren''t in the test pool due to these kind of failures. > > The bug happened immediately after localhost migration of a pv guest. > > The kernel was 2.6.32.46 (as built in flight 10515).More specifically it seems to be 6bec8b4a4c14095d0b7ce424db9d583c3decae6c from Jeremy''s xen/next-2.6.32 branch. That''s actually a commit from September has the test system got stuck testing that for some reason? There''s quite a few new upstream stable releases in Jeremy''s branch. Does it relate somehow to the kernel.org outage? I suspect that''s a red-herring though since I bet the vmalloc fixes simply never got backported to this tree. Ian.> > Ian. > > Dec 16 19:02:13.085286 [ 391.370426] ------------[ cut here ]------------^M > Dec 16 19:02:13.097267 [ 391.370440] kernel BUG at arch/x86/mm/fault.c:210!^M > Dec 16 19:02:13.097303 [ 391.370460] invalid opcode: 0000 [#1] SMP ^M > Dec 16 19:02:13.109253 [ 391.370491] last sysfs file: /sys/devices/vif-9-0/uevent^M > Dec 16 19:02:13.109291 [ 391.370503] Modules linked in: e1000e [last unloaded: scsi_wait_scan]^M > Dec 16 19:02:13.117292 [ 391.370575] ^M > Dec 16 19:02:13.117330 [ 391.370597] Pid: 40, comm: xenwatch Not tainted (2.6.32.46 #1) X9SCL/X9SCM^M > Dec 16 19:02:13.129252 [ 391.370613] EIP: 0061:[<c104a052>] EFLAGS: 00010082 CPU: 5^M > Dec 16 19:02:13.129289 [ 391.370633] EIP is at vmalloc_sync_one+0x112/0x121^M > Dec 16 19:02:13.137260 [ 391.370646] EAX: 000050a0 EBX: c1810fd0 ECX: fffff2e3 EDX: 00000000^M > Dec 16 19:02:13.149240 [ 391.370661] ESI: ffffffe0 EDI: d9248fff EBP: dfda7ea8 ESP: dfda7e94^M > Dec 16 19:02:13.149279 [ 391.370677] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069^M > Dec 16 19:02:13.157316 [ 391.370690] Process xenwatch (pid: 40, ti=dfda6000 task=dfce3cc0 task.ti=dfda6000)^M > Dec 16 19:02:13.169240 [ 391.370705] Stack:^M > Dec 16 19:02:13.169267 [ 391.370716] 00285067 00000000 c5c68640 ff400000 c1810fc8 dfda7ec4 c104aa52 c1760204^M > Dec 16 19:02:13.177285 [ 391.370783] <0> c4be6f84 d7411320 df68bb40 dbc2b600 dfda7ed0 c10da2f9 fffffff4 dfda7f28^M > Dec 16 19:02:13.177352 [ 391.370858] <0> c122c1f3 0000032a 00000300 df68bb40 0200006b dce9d3a8 400002c0 c102c23b^M > Dec 16 19:02:13.189265 [ 391.370940] Call Trace:^M > Dec 16 19:02:13.189298 [ 391.370960] [<c104aa52>] ? vmalloc_sync_all+0x5f/0xc1^M > Dec 16 19:02:13.197260 [ 391.370982] [<c10da2f9>] ? alloc_vm_area+0x44/0x4b^M > Dec 16 19:02:13.197296 [ 391.371004] [<c122c1f3>] ? netif_map+0x45/0x24d^M > Dec 16 19:02:13.209317 [ 391.371033] [<c102c23b>] ? xen_restore_fl_direct_end+0x0/0x1^M > Dec 16 19:02:13.209352 [ 391.371065] [<c10e40b8>] ? kfree+0xff/0x107^M > Dec 16 19:02:13.217261 [ 391.371097] [<c121e974>] ? xenbus_scanf+0x35/0x48^M > Dec 16 19:02:13.217297 [ 391.371118] [<c122b8b8>] ? frontend_changed+0x2a2/0x4f0^M > Dec 16 19:02:13.229254 [ 391.371137] [<c121f331>] ? xenbus_otherend_changed+0x5c/0x61^M > Dec 16 19:02:13.237246 [ 391.371158] [<c121f6ff>] ? frontend_changed+0xa/0xd^M > Dec 16 19:02:13.237283 [ 391.371178] [<c121e00c>] ? xenwatch_thread+0xfe/0x126^M > Dec 16 19:02:13.245274 [ 391.371199] [<c1076589>] ? autoremove_wake_function+0x0/0x2f^M > Dec 16 19:02:13.245317 [ 391.371223] [<c121df0e>] ? xenwatch_thread+0x0/0x126^M > Dec 16 19:02:13.257249 [ 391.371244] [<c107626f>] ? kthread+0x5f/0x64^M > Dec 16 19:02:13.257283 [ 391.371261] [<c1076210>] ? kthread+0x0/0x64^M > Dec 16 19:02:13.265277 [ 391.371283] [<c102f497>] ? kernel_thread_helper+0x7/0x10^M > Dec 16 19:02:13.265318 [ 391.371297] Code: eb fe ff 15 08 dd 66 c1 8b 7d ec 89 ce 89 45 ec 8b 45 ec 89 55 f0 8b 55 f0 0f ac fe 0c c1 e6 05 0f ac d0 0c c1 e0 05 39 c6 74 06 <0f> 0b eb fe 31 db 5a 89 d8 59 5b 5e 5f 5d c3 55 89 e5 53 89 c3 ^M > Dec 16 19:02:13.285270 [ 391.371792] EIP: [<c104a052>] vmalloc_sync_one+0x112/0x121 SS:ESP 0069:dfda7e94^M > Dec 16 19:02:13.293255 [ 391.371833] ---[ end trace 27f82c4d856810c0 ]---^M > > 10520/test-amd64-i386-xl > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel
Ian Jackson
2011-Dec-20 16:51 UTC
Re: vmalloc_sync_all crash still happening on some machines
Ian Campbell writes ("Re: [Xen-devel] vmalloc_sync_all crash still happening on some machines"):> More specifically it seems to be > 6bec8b4a4c14095d0b7ce424db9d583c3decae6c from Jeremy''s xen/next-2.6.32 > branch.My script is still pulling from Jeremy''s github. I see that the tree on git.kernel.org has many new changesets so I''ll switch back to that. I guess I naively assumed that having established two trees Jeremy might push to both of them...> I suspect that''s a red-herring though since I bet the vmalloc fixes > simply never got backported to this tree.Yes, it looks like they didn''t. Ian.
Ian Campbell
2011-Dec-20 17:15 UTC
Re: vmalloc_sync_all crash still happening on some machines
On Tue, 2011-12-20 at 16:51 +0000, Ian Jackson wrote:> Ian Campbell writes ("Re: [Xen-devel] vmalloc_sync_all crash still happening on some machines"): > > More specifically it seems to be > > 6bec8b4a4c14095d0b7ce424db9d583c3decae6c from Jeremy''s xen/next-2.6.32 > > branch. > > My script is still pulling from Jeremy''s github. I see that the tree > on git.kernel.org has many new changesets so I''ll switch back to that. > I guess I naively assumed that having established two trees Jeremy > might push to both of them... > > > I suspect that''s a red-herring though since I bet the vmalloc fixes > > simply never got backported to this tree. > > Yes, it looks like they didn''t.Looking at the tree the vmalloc_sync_all() in alloc_vm_area() is present and has been for ages. The upstream stuff I was thinking of was David Vrabel''s work to make the sync unnecessary. The only difference in vmalloc_sync_all seems to be that 2.6.32 uses spin_lock_irqsave() while upstream uses just spin_lock() which doesn''t seem likely to make a difference. The actual bug you are seeing is hitting the BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)) towards the end of vmalloc_sync_one(). David, do you remember from when you were digging into this stuff what it actually means? I suppose we could backport the stuff to make vmalloc_sync_all unnecessary, but I guess there''d be no guarantee it would help since the root cause isn''t really understood. Ian.
Jan Beulich
2011-Dec-20 17:27 UTC
Re: vmalloc_sync_all crash still happening on some machines
>>> On 20.12.11 at 18:15, Ian Campbell <Ian.Campbell@citrix.com> wrote: > The actual bug you are seeing is hitting the > BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)) > towards the end of vmalloc_sync_one(). David, do you remember from when > you were digging into this stuff what it actually means?This means that the pmd entry in the reference page tables (init_mm''s) is referencing a different page than the (non-blank) one in the page table being updated, i.e. some race occurred that allowed two distinct page tables to be allocated for the same kernel address (range). If this is reproducible in some way, printing the entries (or really just their PFNs/MFNs) might help understand what is going on here (assuming that such a race can be expected to not really exist in this old and mature a kernel). Jan
Ian Jackson
2011-Dec-20 17:29 UTC
Re: vmalloc_sync_all crash still happening on some machines
Jan Beulich writes ("Re: [Xen-devel] vmalloc_sync_all crash still happening on some machines"):> If this is reproducible in some way, printing the entries (or really just > their PFNs/MFNs) might help understand what is going on here > (assuming that such a race can be expected to not really exist in > this old and mature a kernel).It does seem quite reproducible. I''d be happy to test patches etc. Ian.
Ian Campbell
2012-Jan-13 10:35 UTC
Re: vmalloc_sync_all crash still happening on some machines
On Tue, 2011-12-20 at 17:29 +0000, Ian Jackson wrote:> Jan Beulich writes ("Re: [Xen-devel] vmalloc_sync_all crash still happening on some machines"): > > If this is reproducible in some way, printing the entries (or really just > > their PFNs/MFNs) might help understand what is going on here > > (assuming that such a race can be expected to not really exist in > > this old and mature a kernel). > > It does seem quite reproducible. I''d be happy to test patches etc.(trawling my backlog). How about this for starters (the ret change is incidental and fixes an existing warning): 8<-------------------------------------------------------------------- From 823c4eb30f08e2f35170e4d98c9477dd6d24a387 Mon Sep 17 00:00:00 2001 From: Ian Campbell <ian.campbell@citrix.com> Date: Fri, 13 Jan 2012 10:35:14 +0000 Subject: [PATCH] Debug vmalloc_sync_all crash --- arch/x86/mm/fault.c | 18 ++++++++++++++---- 1 files changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 7e7dbd1..5df9335 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -206,8 +206,18 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) if (!pmd_present(*pmd)) set_pmd(pmd, *pmd_k); - else + else { + printk(KERN_CRIT "vmalloc sync one failure for %#lx\n", address); + printk(KERN_CRIT " pgd %p = %#010llx\n", pgd, pgd_val(*pgd)); + printk(KERN_CRIT "pgd_k %p = %#010llx\n", pgd_k, pgd_val(*pgd_k)); + printk(KERN_CRIT " pud %p = %#010llx\n", pud, pud_val(*pud)); + printk(KERN_CRIT "pud_k %p = %#010llx\n", pud_k, pud_val(*pud_k)); + printk(KERN_CRIT " pmd %p = %#010llx\n", pmd, pmd_val(*pmd)); + printk(KERN_CRIT "pmd_k %p = %#010llx\n", pmd_k, pmd_val(*pmd_k)); + printk(KERN_CRIT "pmd page %p\n", pmd_page(*pmd)); + printk(KERN_CRIT "pmd_k page %p\n", pmd_page(*pmd_k)); BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); + } return pmd_k; } @@ -229,15 +239,15 @@ void vmalloc_sync_all(void) spin_lock_irqsave(&pgd_lock, flags); list_for_each_entry(page, &pgd_list, lru) { spinlock_t *pgt_lock; - int ret; + pmd_t *pmd; pgt_lock = &pgd_page_get_mm(page)->page_table_lock; spin_lock(pgt_lock); - ret = vmalloc_sync_one(page_address(page), address); + pmd = vmalloc_sync_one(page_address(page), address); spin_unlock(pgt_lock); - if (!ret) + if (!pmd) break; } spin_unlock_irqrestore(&pgd_lock, flags); -- 1.7.2.5