thr3ads.net - freebsd stable - Shutdown hangs on unmount of a gjournaled file system in 8-Stable [Jul 2013]

If this information is useful, please help other people find it:
Share via:

Andreas Longwitz

2013-Jul-07 22:26 UTC

Shutdown hangs on unmount of a gjournaled file system in 8-Stable

The problem occurs after an update of 8-stable from r248120 to r252111.
Sometimes shutdown hangs:

Waiting (max 60 seconds) for system process `vnlru' to stop...done
Waiting (max 60 seconds) for system process `bufdaemon' to stop...done
Waiting (max 60 seconds) for system process `syncer' to stop...
Syncing disks, vnodes remaining...0 0 done
All buffers synced.
>From the kernel dump I see the deadlock occurs on unmount of agjournaled file system. Involved are two processes

db> ps
pid ppid pgrp uid state wmesg  wchan              cmd
  1   0   1   0  SLs  mount dr 0xffffff007f7e559c [init]
 18   0   0   0  SL   suspwt   0xffffff007f7e5364 [g_journal switcher]

(kgdb) info threads
 158 Thread 100002 (PID=1: init)  sched_switch (td=0xffffff000235e8e0,
                                  newtd=<value optimized out>,
    flags=<value optimized out>) at /usr/src/sys/kern/sched_ule.c:1932
 ....
 217 Thread 100076 (PID=18: g_journal switcher)  sched_switch

                            (td=0xffffff0002bd6000,
    newtd=<value optimized out>, flags=<value optimized out>) at
                            /usr/src/sys/kern/sched_ule.c:1932


(kgdb) thread 158
[Switching to thread 158 (Thread 100002)]#0
sched_switche(td=0xffffff000235e8e0,
    newtd=<value optimized out>, flags=<value optimized out>) at
/usr/src/sys/kern/sched_ule.c:1932
1932                    cpuid = PCPU_GET(cpuid);

(kgdb) bt
#0  sched_switch (td=0xffffff000235e8e0, newtd=<value optimized out>,
              flags=<value optimized out>)
              at /usr/src/sys/kern/sched_ule.c:1932
#1  0xffffffff80407836 in mi_switch (flags=260, newtd=0x0) at

              /usr/src/sys/kern/kern_synch.c:466
#2  0xffffffff8043e0e2 in sleepq_wait (wchan=0xffffff007f7e559c, pri=80)
              at /usr/src/sys/kern/subr_sleepqueue.c:613
#3  0xffffffff80407fc6 in _sleep (ident=0xffffff007f7e559c,
              lock=0xffffff007f7e52f0,
              priority=<value optimized out>,
              wmesg=0xffffffff8069f595 "mount drain", timo=0)
              at /usr/src/sys/kern/kern_synch.c:250
#4  0xffffffff8048ee42 in dounmount (mp=0xffffff007f7e52f0,
              flags=524288, td=<value optimized out>)
              at /usr/src/sys/kern/vfs_mount.c:1266
#5  0xffffffff80493202 in vfs_unmountall () at
              /usr/src/sys/kern/vfs_subr.c:3321
#6  0xffffffff803fec69 in boot (howto=<value optimized out>) at
              /usr/src/sys/kern/kern_shutdown.c:428
#7  0xffffffff803fef86 in reboot (td=<value optimized out>,
              uap=0xffffff8000238bb0)
              at /usr/src/sys/kern/kern_shutdown.c:191
#8  0xffffffff805db1b4 in amd64_syscall (td=0xffffff000235e8e0,
              traced=0) at subr_syscall.c:114
#9  0xffffffff805c282c in Xfast_syscall () at
             /usr/src/sys/amd64/amd64/exception.S:387

(kgdb) f 5
#5  0xffffffff80493202 in vfs_unmountall () at
              /usr/src/sys/kern/vfs_subr.c:3321
3321                    error = dounmount(mp, MNT_FORCE, td);

(kgdb) p mp->mnt_lockref
$1=1

(kgdb) f 4
#4  0xffffffff8048ee42 in dounmount (mp=0xffffff007f7e52f0,
             flags=524288, td=<value optimized out>)
             at /usr/src/sys/kern/vfs_mount.c:1266
1266                error = msleep(&mp->mnt_lockref, MNT_MTX(mp), PVFS,

(kgdb) list
1261            if (flags & MNT_FORCE)
1262                 mp->mnt_kern_flag |= MNTK_UNMOUNTF;
1263            error = 0;
1264            if (mp->mnt_lockref) {
1265                 mp->mnt_kern_flag |= MNTK_DRAINING;
1266                 error = msleep(&mp->mnt_lockref, MNT_MTX(mp), PVFS,
1267                        "mount drain", 0);
1268            }
1269            MNT_IUNLOCK(mp);
1270            KASSERT(mp->mnt_lockref == 0,

(kgdb) thread 217
[Switching to thread 217 (Thread 100076)]#0  sched_switch
              (td=0xffffff0002bd6000,
               newtd=<value optimized out>,
               flags=<value optimized out>) at
               /usr/src/sys/kern/sched_ule.c:1932
1932                    cpuid = PCPU_GET(cpuid);

(kgdb) bt
#0  sched_switch (td=0xffffff0002bd6000, newtd=<value optimized out>,
               flags=<value optimized out>)
               at /usr/src/sys/kern/sched_ule.c:1932
#1  0xffffffff80407836 in mi_switch (flags=260, newtd=0x0) at
               /usr/src/sys/kern/kern_synch.c:466
#2  0xffffffff8043e0e2 in sleepq_wait
               (wchan=0xffffff007f7e5364, pri=159)
               at /usr/src/sys/kern/subr_sleepqueue.c:613
#3  0xffffffff80407fc6 in _sleep (ident=0xffffff007f7e5364,
               lock=0xffffff007f7e52f0,
               priority=<value optimized out>,
               wmesg=0xffffffff806a0813 "suspwt", timo=0)
               at /usr/src/sys/kern/kern_synch.c:250
#4  0xffffffff804a25f0 in vfs_write_suspend (mp=0xffffff007f7e52f0) at
               /usr/src/sys/kern/vfs_vnops.c:1277
#5  0xffffffff80c843bd in g_journal_switcher
               (arg=<value optimized out>) at
               /usr/src/sys/modules/geom/geom_journal/../
                    ../../geom/journal/g_journal.c:2968
#6  0xffffffff803d326f in fork_exit (callout=0xffffffff80c838e0
               <g_journal_switcher>, arg=0xffffffff80c8b140,
               frame=0xffffff8242e68c40) at
               /usr/src/sys/kern/kern_fork.c:872
#7  0xffffffff805c2a0e in fork_trampoline () at
               /usr/src/sys/amd64/amd64/exception.S:602

(kgdb) f 4
#4  0xffffffff804a25f0 in vfs_write_suspend (mp=0xffffff007f7e52f0) at
               /usr/src/sys/kern/vfs_vnops.c:1277
1277           (void) msleep(&mp->mnt_writeopcount,

(kgdb) list
1272            while (mp->mnt_kern_flag & MNTK_SUSPEND)
1273                 msleep(&mp->mnt_flag, MNT_MTX(mp), PUSER - 1,
                            "wsuspfs", 0);
1274            mp->mnt_kern_flag |= MNTK_SUSPEND;
1275            mp->mnt_susp_owner = curthread;
1276            if (mp->mnt_writeopcount > 0)
1277                  (void) msleep(&mp->mnt_writeopcount,
1278                      MNT_MTX(mp), (PUSER - 1)|PDROP, "suspwt",
0);
1279            else
1280                    MNT_IUNLOCK(mp);
1281            if ((error = VFS_SYNC(mp, MNT_SUSPEND)) != 0)

(kgdb) p mp->mnt_writeopcount
$2 = 1

The deadlock can be explained now: pid 1 (init) sleeps on "mount
drain"
because mp->mnt_lockref was 1. This setting was done by pid 18 (gjournal
switcher) by calling vfs_busy(). pid 18 now sleeps on "suspwt" because
mp->mnt_writeopcount was 1. This setting was done by pid 1 before going
to sleep by calling vn_start_write() in dounmount().

I think the reason for this deadlock is the commit r249055 which seems
not to be compatible with gjournal.


Andreas Longwitz

Konstantin Belousov

2013-Jul-08 05:43 UTC

head link

Shutdown hangs on unmount of a gjournaled file system in 8-Stable

On Mon, Jul 08, 2013 at 12:26:43AM +0200, Andreas Longwitz
wrote:> The deadlock can be explained now: pid 1 (init) sleeps on "mount
drain"
> because mp->mnt_lockref was 1. This setting was done by pid 18 (gjournal
> switcher) by calling vfs_busy(). pid 18 now sleeps on "suspwt"
because
> mp->mnt_writeopcount was 1. This setting was done by pid 1 before going
> to sleep by calling vn_start_write() in dounmount().
> 
> I think the reason for this deadlock is the commit r249055 which seems
> not to be compatible with gjournal.Thank you for the analysis. I think 'not compatible' is some
understatement. The situation clearly causes a deadlock, you are right.

The vfs_busy(); vfs_write_suspend(); call sequence is somewhat dubious,
in fact, exactly because unmount could start in between. I think that
vfs_write_suspend() must avoid setting MNT_SUSPEND if unmount was
started. Patch below, for HEAD, should fix the problem, by marking the
callers of vfs_write_suspend(), which are not protected by the covered
vnode lock, with the VS_SKIP_UNMOUNT flag.

I believe that the conflicts on stable/8 should be trivial, if any.

diff --git a/sys/geom/journal/g_journal.c b/sys/geom/journal/g_journal.c
index a3c996c..3ce2785 100644
--- a/sys/geom/journal/g_journal.c
+++ b/sys/geom/journal/g_journal.c
@@ -2960,7 +2960,7 @@ g_journal_do_switch(struct g_class *classp)
 		GJ_TIMER_STOP(1, &bt, "BIO_FLUSH time of %s", sc->sc_name);
 
 		GJ_TIMER_START(1, &bt);
-		error = vfs_write_suspend(mp);
+		error = vfs_write_suspend(mp, VS_SKIP_UNMOUNT);
 		GJ_TIMER_STOP(1, &bt, "Suspend time of %s", mountpoint);
 		if (error != 0) {
 			GJ_DEBUG(0, "Cannot suspend file system %s (error=%d).",
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 7eac0ef..06e59f9 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -1668,8 +1668,7 @@ vn_finished_secondary_write(mp)
  * Request a filesystem to suspend write operations.
  */
 int
-vfs_write_suspend(mp)
-	struct mount *mp;
+vfs_write_suspend(struct mount *mp, int flags)
 {
 	int error;
 
@@ -1680,6 +1679,21 @@ vfs_write_suspend(mp)
 	}
 	while (mp->mnt_kern_flag & MNTK_SUSPEND)
 		msleep(&mp->mnt_flag, MNT_MTX(mp), PUSER - 1, "wsuspfs", 0);
+
+	/*
+	 * Unmount holds a write reference on the mount point.  If we
+	 * own busy reference and drain for writers, we deadlock with
+	 * the reference draining in the unmount path.  Callers of
+	 * vfs_write_suspend() must specify VS_SKIP_UNMOUNT if
+	 * vfs_busy() reference is owned and caller is not in the
+	 * unmount context.
+	 */
+	if ((flags & VS_SKIP_UNMOUNT) != 0 &&
+	    (mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
+		MNT_IUNLOCK(mp);
+		return (EBUSY);
+	}
+
 	mp->mnt_kern_flag |= MNTK_SUSPEND;
 	mp->mnt_susp_owner = curthread;
 	if (mp->mnt_writeopcount > 0)
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 42bfb65..b0cbcc0 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -398,6 +398,9 @@ extern int		vttoif_tab[];
 #define	VR_START_WRITE	0x0001	/* vfs_write_resume: start write atomically */
 #define	VR_NO_SUSPCLR	0x0002	/* vfs_write_resume: do not clear suspension */
 
+#define	VS_SKIP_UNMOUNT	0x0001	/* vfs_write_suspend: fail if the
+				   filesystem is being unmounted */
+
 #define	VREF(vp)	vref(vp)
 
 #ifdef DIAGNOSTIC
@@ -711,7 +714,7 @@ int	vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset,
int xfersize,
 int	vfs_cache_lookup(struct vop_lookup_args *ap);
 void	vfs_timestamp(struct timespec *);
 void	vfs_write_resume(struct mount *mp, int flags);
-int	vfs_write_suspend(struct mount *mp);
+int	vfs_write_suspend(struct mount *mp, int flags);
 int	vop_stdbmap(struct vop_bmap_args *);
 int	vop_stdfsync(struct vop_fsync_args *);
 int	vop_stdgetwritemount(struct vop_getwritemount_args *);
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
index 9a9c88a..ad157aa 100644
--- a/sys/ufs/ffs/ffs_snapshot.c
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -423,7 +423,7 @@ restart:
 	 */
 	for (;;) {
 		vn_finished_write(wrtmp);
-		if ((error = vfs_write_suspend(vp->v_mount)) != 0) {
+		if ((error = vfs_write_suspend(vp->v_mount, 0)) != 0) {
 			vn_start_write(NULL, &wrtmp, V_WAIT);
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 			goto out;
diff --git a/sys/ufs/ffs/ffs_suspend.c b/sys/ufs/ffs/ffs_suspend.c
index 3198c1a..a8c4578 100644
--- a/sys/ufs/ffs/ffs_suspend.c
+++ b/sys/ufs/ffs/ffs_suspend.c
@@ -206,7 +206,7 @@ ffs_susp_suspend(struct mount *mp)
 		return (EPERM);
 #endif
 
-	if ((error = vfs_write_suspend(mp)) != 0)
+	if ((error = vfs_write_suspend(mp, VS_SKIP_UNMOUNT)) != 0)
 		return (error);
 
 	ump->um_writesuspended = 1;
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 57f092c..a87fdfa 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -257,7 +257,7 @@ ffs_mount(struct mount *mp)
 				return (error);
 			for (;;) {
 				vn_finished_write(mp);
-				if ((error = vfs_write_suspend(mp)) != 0)
+				if ((error = vfs_write_suspend(mp, 0)) != 0)
 					return (error);
 				MNT_ILOCK(mp);
 				if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
@@ -1255,7 +1255,7 @@ ffs_unmount(mp, mntflags)
 		 */
 		for (;;) {
 			vn_finished_write(mp);
-			if ((error = vfs_write_suspend(mp)) != 0)
+			if ((error = vfs_write_suspend(mp, 0)) != 0)
 				return (error);
 			MNT_ILOCK(mp);
 			if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 834 bytes
Desc: not available
URL:
<http://lists.freebsd.org/pipermail/freebsd-stable/attachments/20130708/15fc3a2b/attachment.sig>

Seemingly Similar Threads

Search for more possibly parallel threads

freebsd stable - Jul 2013 - Shutdown hangs on unmount of a gjournaled file system in 8-Stable

Shutdown hangs on unmount of a gjournaled file system in 8-Stable

Shutdown hangs on unmount of a gjournaled file system in 8-Stable

Seemingly Similar Threads