Guy Helmer
2016-Jul-08 16:36 UTC
9-STABLE: Panic when destroying gmirror that is synchronizing
Hi, I?m able to replicate a problem where destroying a gmirror that is synchronizing causes a panic in 9-STABLE (^/stable/9 rev 302430) on amd64. I?ve forced a gmirror into an inconsistent state by forcing a reset, and then issued ?gmirror destroy -f mirror1? after the reboot and the system is synchronizing the two disks in the mirror. As a workaround, I can use ?geom mirror clear? on the providers before geom_mirror.ko is loaded. It?s a rare issue but I wanted to document it in case it can be fixed. Kernel stack trace and code snippets follow. If anything other information would be useful, please let me know. Guy Unread portion of the kernel message buffer: GEOM_MIRROR: Device mirror1: provider mirror/mirror1 destroyed. GEOM_MIRROR: Device mirror1: rebuilding provider gptid/48c59e09-3c94-11e6-8928-000c29ce3c97 stopped. Fatal trap 12: page fault while in kernel mode cpuid = 0; apic id = 00 fault virtual address = 0x98 fault code = supervisor write data, page not present instruction pointer = 0x20:0xffffffff806df6ff stack pointer = 0x28:0xffffff800024eb30 frame pointer = 0x28:0xffffff800024eb40 code segment = base 0x0, limit 0xfffff, type 0x1b = DPL 0, pres 1, long 1, def32 0, gran 1 processor eflags = interrupt enabled, resume, IOPL = 0 current process = 13 (g_up) trap number = 12 panic: page fault cpuid = 0 KDB: stack backtrace: #0 0xffffffff8072e336 at kdb_backtrace+0x66 #1 0xffffffff806f3d5e at panic+0x1ce #2 0xffffffff80910ae7 at trap_fatal+0x277 #3 0xffffffff80910e31 at trap_pfault+0x211 #4 0xffffffff809113f9 at trap+0x329 #5 0xffffffff808fa311 at calltrap+0x8 #6 0xffffffff81412593 at g_mirror_sync_done+0x53 #7 0xffffffff8077439e at biodone+0xae #8 0xffffffff806576ac at g_io_schedule_up+0xac #9 0xffffffff80657e0c at g_up_procbody+0x5c #10 0xffffffff806c0b4f at fork_exit+0x11f #11 0xffffffff808fa83e at fork_trampoline+0xe Uptime: 1m54s GEOM_MIRROR: Device mirror0: rebuilding provider gptid/48ac38bf-3c94-11e6-8928-000c29ce3c97 stopped. Dumping 87 out of 238 MB:..19%..37%..55%..74%..92% Reading symbols from /boot/kernel/geom_mirror.ko...Reading symbols from /boot/kernel/geom_mirror.ko.symbols...done. done. Loaded symbols for /boot/kernel/geom_mirror.ko #0 doadump (textdump=<value optimized out>) at pcpu.h:235 235 pcpu.h: No such file or directory. in pcpu.h (kgdb) #0 doadump (textdump=<value optimized out>) at pcpu.h:235 #1 0xffffffff806f3836 in kern_reboot (howto=260) at ../../../kern/kern_shutdown.c:454 #2 0xffffffff806f3d37 in panic (fmt=0x1 <Address 0x1 out of bounds>) at ../../../kern/kern_shutdown.c:642 #3 0xffffffff80910ae7 in trap_fatal (frame=0xc, eva=<value optimized out>) at ../../../amd64/amd64/trap.c:876 #4 0xffffffff80910e31 in trap_pfault (frame=0xffffff800024ea80, usermode=0) at ../../../amd64/amd64/trap.c:798 #5 0xffffffff809113f9 in trap (frame=0xffffff800024ea80) at ../../../amd64/amd64/trap.c:462 #6 0xffffffff808fa311 in calltrap () at ../../../amd64/amd64/exception.S:238 #7 0xffffffff806df6ff in _mtx_lock_flags (m=0x80, opts=0, file=0xffffffff8141d0d8 "/usr/src/sys/modules/geom/geom_mirror/../../../geom/mirror/g_mirror.c", line=990) at atomic.h:164 #8 0xffffffff81412593 in g_mirror_sync_done (bp=0xfffffe0004c963e0) at /usr/src/sys/modules/geom/geom_mirror/../../../geom/mirror/g_mirror.c:990 #9 0xffffffff8077439e in biodone (bp=0xfffffe0004c963e0) at ../../../kern/vfs_bio.c:3667 #10 0xffffffff806576ac in g_io_schedule_up (tp=<value optimized out>) at ../../../geom/geom_io.c:808 #11 0xffffffff80657e0c in g_up_procbody (arg=<value optimized out>) at ../../../geom/geom_kern.c:97 #12 0xffffffff806c0b4f in fork_exit ( callout=0xffffffff80657db0 <g_up_procbody>, arg=0x0, frame=0xffffff800024ec40) at ../../../kern/kern_fork.c:1000 #13 0xffffffff808fa83e in fork_trampoline () at ../../../amd64/amd64/exception.S:613 #14 0x0000000000000000 in ?? () (kgdb) geom/mirror/g_mirror.c:990: static void g_mirror_sync_done(struct bio *bp) { struct g_mirror_softc *sc; G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered."); sc = bp->bio_from->geom->softc; bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC; mtx_lock(&sc->sc_queue_mtx); <--- bioq_insert_tail(&sc->sc_queue, bp); mtx_unlock(&sc->sc_queue_mtx); wakeup(sc); } kern/vfs_vio.c:3667: done = bp->bio_done; if (done == NULL) wakeup(bp); mtx_unlock(mtxp); if (done != NULL) done(bp); <--- if (transient) { pmap_qremove(start, OFF_TO_IDX(end - start)); vm_map_remove(bio_transient_map, start, end); atomic_add_int(&inflight_transient_maps, -1); } } geom/geom_io.c:808: bp = g_bioq_first(&g_bio_run_up); if (bp != NULL) { g_bioq_unlock(&g_bio_run_up); THREAD_NO_SLEEPING(); CTR4(KTR_GEOM, "g_up biodone bp %p provider %s off " "%jd len %ld", bp, bp->bio_to->name, bp->bio_offset, bp->bio_length); biodone(bp); <--- THREAD_SLEEPING_OK(); continue; } CTR0(KTR_GEOM, "g_up going to sleep"); geom/geom_kern.c:97: static void g_up_procbody(void *arg) { mtx_assert(&Giant, MA_NOTOWNED); thread_lock(g_up_td); sched_prio(g_up_td, PRIBIO); thread_unlock(g_up_td); for(;;) { g_io_schedule_up(g_up_td); <--- } }