thr3ads.net - Nouveau - [Nouveau] [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT

If this information is useful, please help other people find it:
Share via:

Peter Wu

2016-Jul-12 16:49 UTC

[Nouveau] [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

The FBIOPUT_CON2FBMAP ioctl takes a console_lock(). When this is called
while nouveau was runtime suspended, a deadlock would occur due to
nouveau_fbcon_set_suspend also trying to obtain console_lock().

Fix this by delaying the drm_fb_helper_set_suspend call. Based on the
i915 code (which was done for performance reasons though).

Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Signed-off-by: Peter Wu <peter at lekensteyn.nl>
---
Tested on top of v4.7-rc5, the deadlock is gone.
---
 drivers/gpu/drm/nouveau/nouveau_drm.c   |  4 +--
 drivers/gpu/drm/nouveau/nouveau_drv.h   |  1 +
 drivers/gpu/drm/nouveau/nouveau_fbcon.c | 54 ++++++++++++++++++++++++++++-----
 drivers/gpu/drm/nouveau/nouveau_fbcon.h |  2 +-
 4 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c
b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 11f8dd9..f9a2c10 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -552,7 +552,7 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime)
 
 	if (dev->mode_config.num_crtc) {
 		NV_INFO(drm, "suspending console...\n");
-		nouveau_fbcon_set_suspend(dev, 1);
+		nouveau_fbcon_set_suspend(dev, FBINFO_STATE_SUSPENDED, true);
 		NV_INFO(drm, "suspending display...\n");
 		ret = nouveau_display_suspend(dev, runtime);
 		if (ret)
@@ -635,7 +635,7 @@ nouveau_do_resume(struct drm_device *dev, bool runtime)
 		NV_INFO(drm, "resuming display...\n");
 		nouveau_display_resume(dev, runtime);
 		NV_INFO(drm, "resuming console...\n");
-		nouveau_fbcon_set_suspend(dev, 0);
+		nouveau_fbcon_set_suspend(dev, FBINFO_STATE_RUNNING, false);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h
b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 822a021..a743d19 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -147,6 +147,7 @@ struct nouveau_drm {
 	struct nouveau_channel *channel;
 	struct nvkm_gpuobj *notify;
 	struct nouveau_fbdev *fbcon;
+	struct work_struct fbdev_suspend_work;
 	struct nvif_object nvsw;
 	struct nvif_object ntfy;
 	struct nvif_notify flip;
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index d1f248f..089156a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -492,19 +492,53 @@ static const struct drm_fb_helper_funcs
nouveau_fbcon_helper_funcs = {
 	.fb_probe = nouveau_fbcon_create,
 };
 
+static void nouveau_fbcon_suspend_worker(struct work_struct *work)
+{
+	nouveau_fbcon_set_suspend(container_of(work,
+					       struct nouveau_drm,
+					       fbdev_suspend_work)->dev,
+				  FBINFO_STATE_RUNNING,
+				  true);
+}
+
 void
-nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
+nouveau_fbcon_set_suspend(struct drm_device *dev, int state, bool synchronous)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	if (drm->fbcon) {
-		console_lock();
-		if (state == FBINFO_STATE_RUNNING)
-			nouveau_fbcon_accel_restore(dev);
-		drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
+	if (!drm->fbcon)
+		return;
+
+	if (synchronous) {
+		/* Flush any pending work to turn the console on, and then
+		 * wait to turn it off. It must be synchronous as we are
+		 * about to suspend or unload the driver.
+		 *
+		 * Note that from within the work-handler, we cannot flush
+		 * ourselves, so only flush outstanding work upon suspend!
+		 */
 		if (state != FBINFO_STATE_RUNNING)
-			nouveau_fbcon_accel_save_disable(dev);
-		console_unlock();
+			flush_work(&drm->fbdev_suspend_work);
+		console_lock();
+	} else {
+		/*
+		 * The console lock can be pretty contented on resume due
+		 * to all the printk activity.  Try to keep it out of the hot
+		 * path of resume if possible.  This also prevents a deadlock
+		 * with FBIOPUT_CON2FBMAP.
+		 */
+		WARN_ON(state != FBINFO_STATE_RUNNING);
+		if (!console_trylock()) {
+			schedule_work(&drm->fbdev_suspend_work);
+			return;
+		}
 	}
+
+	if (state == FBINFO_STATE_RUNNING)
+		nouveau_fbcon_accel_restore(dev);
+	drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
+	if (state != FBINFO_STATE_RUNNING)
+		nouveau_fbcon_accel_save_disable(dev);
+	console_unlock();
 }
 
 int
@@ -526,6 +560,8 @@ nouveau_fbcon_init(struct drm_device *dev)
 	fbcon->dev = dev;
 	drm->fbcon = fbcon;
 
+	INIT_WORK(&drm->fbdev_suspend_work, nouveau_fbcon_suspend_worker);
+
 	drm_fb_helper_prepare(dev, &fbcon->helper,
&nouveau_fbcon_helper_funcs);
 
 	ret = drm_fb_helper_init(dev, &fbcon->helper,
@@ -571,6 +607,8 @@ nouveau_fbcon_fini(struct drm_device *dev)
 	if (!drm->fbcon)
 		return;
 
+	flush_work(&drm->fbdev_suspend_work);
+
 	nouveau_fbcon_accel_fini(dev);
 	nouveau_fbcon_destroy(dev, drm->fbcon);
 	kfree(drm->fbcon);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
index ca77ad0..34b2504 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
@@ -66,7 +66,7 @@ void nouveau_fbcon_gpu_lockup(struct fb_info *info);
 
 int nouveau_fbcon_init(struct drm_device *dev);
 void nouveau_fbcon_fini(struct drm_device *dev);
-void nouveau_fbcon_set_suspend(struct drm_device *dev, int state);
+void nouveau_fbcon_set_suspend(struct drm_device *dev, int state, bool
synchronous);
 void nouveau_fbcon_accel_save_disable(struct drm_device *dev);
 void nouveau_fbcon_accel_restore(struct drm_device *dev);
 
-- 
2.8.3

Lukas Wunner

2016-Jul-12 19:16 UTC

head link

[Nouveau] [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

On Tue, Jul 12, 2016 at 06:49:34PM +0200, Peter Wu
wrote:> The FBIOPUT_CON2FBMAP ioctl takes a console_lock(). When this is called
> while nouveau was runtime suspended, a deadlock would occur due to
> nouveau_fbcon_set_suspend also trying to obtain console_lock().
> 
> Fix this by delaying the drm_fb_helper_set_suspend call. Based on the
> i915 code (which was done for performance reasons though).
> 
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> Signed-off-by: Peter Wu <peter at lekensteyn.nl>
> ---
> Tested on top of v4.7-rc5, the deadlock is gone.
Hm, how did you trigger this deadlock?

Thanks,

Lukas
> ---
>  drivers/gpu/drm/nouveau/nouveau_drm.c   |  4 +--
>  drivers/gpu/drm/nouveau/nouveau_drv.h   |  1 +
>  drivers/gpu/drm/nouveau/nouveau_fbcon.c | 54
++++++++++++++++++++++++++++-----
>  drivers/gpu/drm/nouveau/nouveau_fbcon.h |  2 +-
>  4 files changed, 50 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c
b/drivers/gpu/drm/nouveau/nouveau_drm.c
> index 11f8dd9..f9a2c10 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drm.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
> @@ -552,7 +552,7 @@ nouveau_do_suspend(struct drm_device *dev, bool
runtime)
>  
>  	if (dev->mode_config.num_crtc) {
>  		NV_INFO(drm, "suspending console...\n");
> -		nouveau_fbcon_set_suspend(dev, 1);
> +		nouveau_fbcon_set_suspend(dev, FBINFO_STATE_SUSPENDED, true);
>  		NV_INFO(drm, "suspending display...\n");
>  		ret = nouveau_display_suspend(dev, runtime);
>  		if (ret)
> @@ -635,7 +635,7 @@ nouveau_do_resume(struct drm_device *dev, bool runtime)
>  		NV_INFO(drm, "resuming display...\n");
>  		nouveau_display_resume(dev, runtime);
>  		NV_INFO(drm, "resuming console...\n");
> -		nouveau_fbcon_set_suspend(dev, 0);
> +		nouveau_fbcon_set_suspend(dev, FBINFO_STATE_RUNNING, false);
>  	}
>  
>  	return 0;
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h
b/drivers/gpu/drm/nouveau/nouveau_drv.h
> index 822a021..a743d19 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
> @@ -147,6 +147,7 @@ struct nouveau_drm {
>  	struct nouveau_channel *channel;
>  	struct nvkm_gpuobj *notify;
>  	struct nouveau_fbdev *fbcon;
> +	struct work_struct fbdev_suspend_work;
>  	struct nvif_object nvsw;
>  	struct nvif_object ntfy;
>  	struct nvif_notify flip;
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> index d1f248f..089156a 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> @@ -492,19 +492,53 @@ static const struct drm_fb_helper_funcs
nouveau_fbcon_helper_funcs = {
>  	.fb_probe = nouveau_fbcon_create,
>  };
>  
> +static void nouveau_fbcon_suspend_worker(struct work_struct *work)
> +{
> +	nouveau_fbcon_set_suspend(container_of(work,
> +					       struct nouveau_drm,
> +					       fbdev_suspend_work)->dev,
> +				  FBINFO_STATE_RUNNING,
> +				  true);
> +}
> +
>  void
> -nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
> +nouveau_fbcon_set_suspend(struct drm_device *dev, int state, bool
synchronous)
>  {
>  	struct nouveau_drm *drm = nouveau_drm(dev);
> -	if (drm->fbcon) {
> -		console_lock();
> -		if (state == FBINFO_STATE_RUNNING)
> -			nouveau_fbcon_accel_restore(dev);
> -		drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
> +	if (!drm->fbcon)
> +		return;
> +
> +	if (synchronous) {
> +		/* Flush any pending work to turn the console on, and then
> +		 * wait to turn it off. It must be synchronous as we are
> +		 * about to suspend or unload the driver.
> +		 *
> +		 * Note that from within the work-handler, we cannot flush
> +		 * ourselves, so only flush outstanding work upon suspend!
> +		 */
>  		if (state != FBINFO_STATE_RUNNING)
> -			nouveau_fbcon_accel_save_disable(dev);
> -		console_unlock();
> +			flush_work(&drm->fbdev_suspend_work);
> +		console_lock();
> +	} else {
> +		/*
> +		 * The console lock can be pretty contented on resume due
> +		 * to all the printk activity.  Try to keep it out of the hot
> +		 * path of resume if possible.  This also prevents a deadlock
> +		 * with FBIOPUT_CON2FBMAP.
> +		 */
> +		WARN_ON(state != FBINFO_STATE_RUNNING);
> +		if (!console_trylock()) {
> +			schedule_work(&drm->fbdev_suspend_work);
> +			return;
> +		}
>  	}
> +
> +	if (state == FBINFO_STATE_RUNNING)
> +		nouveau_fbcon_accel_restore(dev);
> +	drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
> +	if (state != FBINFO_STATE_RUNNING)
> +		nouveau_fbcon_accel_save_disable(dev);
> +	console_unlock();
>  }
>  
>  int
> @@ -526,6 +560,8 @@ nouveau_fbcon_init(struct drm_device *dev)
>  	fbcon->dev = dev;
>  	drm->fbcon = fbcon;
>  
> +	INIT_WORK(&drm->fbdev_suspend_work, nouveau_fbcon_suspend_worker);
> +
>  	drm_fb_helper_prepare(dev, &fbcon->helper,
&nouveau_fbcon_helper_funcs);
>  
>  	ret = drm_fb_helper_init(dev, &fbcon->helper,
> @@ -571,6 +607,8 @@ nouveau_fbcon_fini(struct drm_device *dev)
>  	if (!drm->fbcon)
>  		return;
>  
> +	flush_work(&drm->fbdev_suspend_work);
> +
>  	nouveau_fbcon_accel_fini(dev);
>  	nouveau_fbcon_destroy(dev, drm->fbcon);
>  	kfree(drm->fbcon);
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
> index ca77ad0..34b2504 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
> @@ -66,7 +66,7 @@ void nouveau_fbcon_gpu_lockup(struct fb_info *info);
>  
>  int nouveau_fbcon_init(struct drm_device *dev);
>  void nouveau_fbcon_fini(struct drm_device *dev);
> -void nouveau_fbcon_set_suspend(struct drm_device *dev, int state);
> +void nouveau_fbcon_set_suspend(struct drm_device *dev, int state, bool
synchronous);
>  void nouveau_fbcon_accel_save_disable(struct drm_device *dev);
>  void nouveau_fbcon_accel_restore(struct drm_device *dev);
>  
> -- 
> 2.8.3
> 
> _______________________________________________
> Nouveau mailing list
> Nouveau at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/nouveau

Peter Wu

2016-Jul-12 20:18 UTC

head link

[Nouveau] [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

On Tue, Jul 12, 2016 at 09:16:22PM +0200, Lukas Wunner
wrote:> On Tue, Jul 12, 2016 at 06:49:34PM +0200, Peter Wu wrote:
> > The FBIOPUT_CON2FBMAP ioctl takes a console_lock(). When this is
called
> > while nouveau was runtime suspended, a deadlock would occur due to
> > nouveau_fbcon_set_suspend also trying to obtain console_lock().
> > 
> > Fix this by delaying the drm_fb_helper_set_suspend call. Based on the
> > i915 code (which was done for performance reasons though).
> > 
> > Cc: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> > Signed-off-by: Peter Wu <peter at lekensteyn.nl>
> > ---
> > Tested on top of v4.7-rc5, the deadlock is gone.
> 
> Hm, how did you trigger this deadlock?
> 
> Thanks,
> Lukas
Here is a small Python script with hardcoded values:

    #!/usr/bin/env python3
    # see drivers/video/fbdev/core/fbmem.c ->
    # drivers/video/console/fbcon.c for FB_EVENT_SET_CONSOLE_MAP
    import array, fcntl
    FBIOPUT_CON2FBMAP = 0x4610
    console, framebuffer = 6, 1
    with open("/dev/fb0") as f:
        info = array.array('I', [console, framebuffer])
        fcntl.ioctl(f, FBIOPUT_CON2FBMAP, info)

Ensure that the nouveau card is sleeping, then invoke:

    python3 con2fbmap.py

If you check /proc/`pidof python3`/stack or the dmesg spew 120 seconds
later, you will see a trace like this on a kernel without this patch:

[   60.738089] snd_hda_intel 0000:01:00.1: Disabling via vga_switcheroo
[   60.739810] nouveau 0000:01:00.0: DRM: suspending console...
[   60.740090] nouveau 0000:01:00.0: DRM: suspending display...
[   60.740581] nouveau 0000:01:00.0: DRM: evicting buffers...
[   60.740718] nouveau 0000:01:00.0: DRM: waiting for kernel channels to go
idle...
[   60.741096] nouveau 0000:01:00.0: DRM: suspending client object trees...
[   60.748015] nouveau 0000:01:00.0: DRM: suspending kernel object tree...
[   62.598156] nouveau 0000:01:00.0: power state changed by ACPI to D3cold
[   66.883880] nouveau 0000:01:00.0: power state changed by ACPI to D0
[   66.883987] nouveau 0000:01:00.0: restoring config space at offset 0x4 (was
0x100403, writing 0x100407)
[   66.884017] nouveau 0000:01:00.0: calling nv_msi_ht_cap_quirk_leaf+0x0/0x30
[   66.884032] nouveau 0000:01:00.0: DRM: resuming kernel object tree...
[   66.995505] nouveau 0000:01:00.0: priv: GPC0: 419df4 00000000 (1f40820e)
[   66.995512] nouveau 0000:01:00.0: priv: GPC1: 419df4 00000000 (1f40820e)
[   67.014829] nouveau 0000:01:00.0: DRM: resuming client object trees...
[   67.014905] nouveau 0000:01:00.0: DRM: resuming display...
[   67.014962] nouveau 0000:01:00.0: DRM: resuming console...
[  240.619840] INFO: task con2fb:482 blocked for more than 120 seconds.
[  240.619844]       Not tainted 4.7.0-rc1kasan-00011-g5c72d90 #2
[  240.619845] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[  240.619858] con2fb          D ffff880769467378 25464   482    447 0x00000000
[  240.619864]  ffff880769467378 ffffffff845eb340 ffffffff83ed1708
00ff880769467330
[  240.619868]  ffff8807762a06e0 ffff8807762a0708 ffff88077629fdd8
ffff88077629fdc0
[  240.619872]  ffff880772bc6200 ffff88076f221880 ffff880769460000
ffffed00ed28c001
[  240.619874] Call Trace:
[  240.619881]  [<ffffffff82e904c5>] schedule+0x95/0x1b0
[  240.619885]  [<ffffffff82e9b740>] schedule_timeout+0x3d0/0x8b0
[  240.619889]  [<ffffffff82e9b370>] ? usleep_range+0xe0/0xe0
[  240.619894]  [<ffffffff812266c7>] ? debug_lockdep_rcu_enabled+0x77/0x90
[  240.619897]  [<ffffffff811f0918>] ? mark_held_locks+0xc8/0x120
[  240.619901]  [<ffffffff82e9d0cc>] ? _raw_spin_unlock_irq+0x2c/0x30
[  240.619904]  [<ffffffff811f0d69>] ?
trace_hardirqs_on_caller+0x3f9/0x580
[  240.619907]  [<ffffffff82e98bdf>] __down+0xff/0x1d0
[  240.619911]  [<ffffffff82e98ae0>] ? ww_mutex_unlock+0x270/0x270
[  240.619925]  [<ffffffff82225c32>] ? _dev_info+0xc2/0xf0
[  240.619929]  [<ffffffff811e56b3>] down+0x63/0x80
[  240.619933]  [<ffffffff8120796e>] console_lock+0x1e/0x70
[  240.620012]  [<ffffffffa1842d61>] nouveau_fbcon_set_suspend+0x71/0x390
[nouveau]
[  240.620085]  [<ffffffffa17f8a22>] nouveau_do_resume+0x2e2/0x380
[nouveau]
[  240.620157]  [<ffffffffa17f92de>]
nouveau_pmops_runtime_resume+0xce/0x210 [nouveau]
[  240.620163]  [<ffffffff81c2be80>] ?
pci_restore_standard_config+0x70/0x70
[  240.620167]  [<ffffffff81c2bfb0>] pci_pm_runtime_resume+0x130/0x220
[  240.620171]  [<ffffffff81c2be80>] ?
pci_restore_standard_config+0x70/0x70
[  240.620175]  [<ffffffff82249d12>] __rpm_callback+0x62/0xe0
[  240.620179]  [<ffffffff81c2be80>] ?
pci_restore_standard_config+0x70/0x70
[  240.620182]  [<ffffffff82249ef8>] rpm_callback+0x168/0x210
[  240.620186]  [<ffffffff81c2be80>] ?
pci_restore_standard_config+0x70/0x70
[  240.620189]  [<ffffffff8224b6b3>] rpm_resume+0xbc3/0x1880
[  240.620193]  [<ffffffff8224aaf0>] ?
pm_runtime_autosuspend_expiration+0x60/0x60
[  240.620196]  [<ffffffff8224f11a>] ? __pm_runtime_resume+0x6a/0xa0
[  240.620200]  [<ffffffff8224f128>] __pm_runtime_resume+0x78/0xa0
[  240.620270]  [<ffffffffa1840ad0>] nouveau_fbcon_open+0xd0/0x120
[nouveau]
[  240.620274]  [<ffffffff81c93577>] con2fb_acquire_newinfo+0xc7/0x2c0
[  240.620277]  [<ffffffff81c95e18>] set_con2fb_map+0x728/0xcb0
[  240.620281]  [<ffffffff81c96e4c>] fbcon_event_notify+0xaac/0x1f90
[  240.620285]  [<ffffffff81162cc9>] notifier_call_chain+0xc9/0x130
[  240.620288]  [<ffffffff81163110>]
__blocking_notifier_call_chain+0x70/0xb0
[  240.620292]  [<ffffffff81163166>]
blocking_notifier_call_chain+0x16/0x20
[  240.620295]  [<ffffffff81ca0c8b>] fb_notifier_call_chain+0x1b/0x20
[  240.620298]  [<ffffffff81ca939a>] do_fb_ioctl+0x93a/0xa80
[  240.620301]  [<ffffffff81513377>] ? mntput+0x57/0x70
[  240.620305]  [<ffffffff81ca8a60>] ? fb_read+0x5f0/0x5f0
[  240.620309]  [<ffffffff812266c7>] ? debug_lockdep_rcu_enabled+0x77/0x90
[  240.620312]  [<ffffffff811f2475>] ? __lock_acquire+0x1055/0x2ed0
[  240.620316]  [<ffffffff812266c7>] ? debug_lockdep_rcu_enabled+0x77/0x90
[  240.620319]  [<ffffffff811f2475>] ? __lock_acquire+0x1055/0x2ed0
[  240.620323]  [<ffffffff812266c7>] ? debug_lockdep_rcu_enabled+0x77/0x90
[  240.620327]  [<ffffffff811f1420>] ?
debug_check_no_locks_freed+0x280/0x280
[  240.620331]  [<ffffffff812266c7>] ? debug_lockdep_rcu_enabled+0x77/0x90
[  240.620335]  [<ffffffff812266c7>] ? debug_lockdep_rcu_enabled+0x77/0x90
[  240.620340]  [<ffffffff8149714d>] ?
cmpxchg_double_slab.isra.54+0x10d/0x130
[  240.620344]  [<ffffffff82e9d106>] ?
_raw_spin_unlock_irqrestore+0x36/0x50
[  240.620347]  [<ffffffff811f0d69>] ?
trace_hardirqs_on_caller+0x3f9/0x580
[  240.620351]  [<ffffffff81ca95ac>] fb_ioctl+0xcc/0x140
[  240.620355]  [<ffffffff814e9372>] do_vfs_ioctl+0x192/0x1000
[  240.620359]  [<ffffffff814e0191>] ? putname+0xc1/0xf0
[  240.620362]  [<ffffffff814e91e0>] ? ioctl_preallocate+0x1e0/0x1e0
[  240.620365]  [<ffffffff814e0191>] ? putname+0xc1/0xf0
[  240.620369]  [<ffffffff81226899>] ? rcu_read_lock_sched_held+0xe9/0x110
[  240.620373]  [<ffffffff81498ffe>] ? kmem_cache_free+0x1fe/0x280
[  240.620376]  [<ffffffff814e0191>] ? putname+0xc1/0xf0
[  240.620380]  [<ffffffff814abbbd>] ? do_sys_open+0x25d/0x340
[  240.620384]  [<ffffffff82e9d692>] ? entry_SYSCALL_64_fastpath+0x5/0xa8
[  240.620387]  [<ffffffff812266c7>] ? debug_lockdep_rcu_enabled+0x77/0x90
[  240.620390]  [<ffffffff81509839>] ? __fget_light+0x139/0x200
[  240.620393]  [<ffffffff814ea259>] SyS_ioctl+0x79/0x90
[  240.620397]  [<ffffffff82e9d6a5>] entry_SYSCALL_64_fastpath+0x18/0xa8
[  240.620401] 3 locks held by con2fb/482:
[  240.620409]  #0:  (console_lock){+.+.+.}, at: [<ffffffff81ca9376>]
do_fb_ioctl+0x916/0xa80
[  240.620416]  #1:  (&fb_info->lock){+.+.+.}, at:
[<ffffffff81ca1d5d>] lock_fb_info+0x1d/0x70
[  240.620423]  #2:  ((fb_notifier_list).rwsem){.+.+.+}, at:
[<ffffffff811630fb>] __blocking_notifier_call_chain+0x5b/0xb0

Peter
> > ---
> >  drivers/gpu/drm/nouveau/nouveau_drm.c   |  4 +--
> >  drivers/gpu/drm/nouveau/nouveau_drv.h   |  1 +
> >  drivers/gpu/drm/nouveau/nouveau_fbcon.c | 54
++++++++++++++++++++++++++++-----
> >  drivers/gpu/drm/nouveau/nouveau_fbcon.h |  2 +-
> >  4 files changed, 50 insertions(+), 11 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c
b/drivers/gpu/drm/nouveau/nouveau_drm.c
> > index 11f8dd9..f9a2c10 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_drm.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
> > @@ -552,7 +552,7 @@ nouveau_do_suspend(struct drm_device *dev, bool
runtime)
> >  
> >  	if (dev->mode_config.num_crtc) {
> >  		NV_INFO(drm, "suspending console...\n");
> > -		nouveau_fbcon_set_suspend(dev, 1);
> > +		nouveau_fbcon_set_suspend(dev, FBINFO_STATE_SUSPENDED, true);
> >  		NV_INFO(drm, "suspending display...\n");
> >  		ret = nouveau_display_suspend(dev, runtime);
> >  		if (ret)
> > @@ -635,7 +635,7 @@ nouveau_do_resume(struct drm_device *dev, bool
runtime)
> >  		NV_INFO(drm, "resuming display...\n");
> >  		nouveau_display_resume(dev, runtime);
> >  		NV_INFO(drm, "resuming console...\n");
> > -		nouveau_fbcon_set_suspend(dev, 0);
> > +		nouveau_fbcon_set_suspend(dev, FBINFO_STATE_RUNNING, false);
> >  	}
> >  
> >  	return 0;
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h
b/drivers/gpu/drm/nouveau/nouveau_drv.h
> > index 822a021..a743d19 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
> > +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
> > @@ -147,6 +147,7 @@ struct nouveau_drm {
> >  	struct nouveau_channel *channel;
> >  	struct nvkm_gpuobj *notify;
> >  	struct nouveau_fbdev *fbcon;
> > +	struct work_struct fbdev_suspend_work;
> >  	struct nvif_object nvsw;
> >  	struct nvif_object ntfy;
> >  	struct nvif_notify flip;
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> > index d1f248f..089156a 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> > @@ -492,19 +492,53 @@ static const struct drm_fb_helper_funcs
nouveau_fbcon_helper_funcs = {
> >  	.fb_probe = nouveau_fbcon_create,
> >  };
> >  
> > +static void nouveau_fbcon_suspend_worker(struct work_struct *work)
> > +{
> > +	nouveau_fbcon_set_suspend(container_of(work,
> > +					       struct nouveau_drm,
> > +					       fbdev_suspend_work)->dev,
> > +				  FBINFO_STATE_RUNNING,
> > +				  true);
> > +}
> > +
> >  void
> > -nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
> > +nouveau_fbcon_set_suspend(struct drm_device *dev, int state, bool
synchronous)
> >  {
> >  	struct nouveau_drm *drm = nouveau_drm(dev);
> > -	if (drm->fbcon) {
> > -		console_lock();
> > -		if (state == FBINFO_STATE_RUNNING)
> > -			nouveau_fbcon_accel_restore(dev);
> > -		drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
> > +	if (!drm->fbcon)
> > +		return;
> > +
> > +	if (synchronous) {
> > +		/* Flush any pending work to turn the console on, and then
> > +		 * wait to turn it off. It must be synchronous as we are
> > +		 * about to suspend or unload the driver.
> > +		 *
> > +		 * Note that from within the work-handler, we cannot flush
> > +		 * ourselves, so only flush outstanding work upon suspend!
> > +		 */
> >  		if (state != FBINFO_STATE_RUNNING)
> > -			nouveau_fbcon_accel_save_disable(dev);
> > -		console_unlock();
> > +			flush_work(&drm->fbdev_suspend_work);
> > +		console_lock();
> > +	} else {
> > +		/*
> > +		 * The console lock can be pretty contented on resume due
> > +		 * to all the printk activity.  Try to keep it out of the hot
> > +		 * path of resume if possible.  This also prevents a deadlock
> > +		 * with FBIOPUT_CON2FBMAP.
> > +		 */
> > +		WARN_ON(state != FBINFO_STATE_RUNNING);
> > +		if (!console_trylock()) {
> > +			schedule_work(&drm->fbdev_suspend_work);
> > +			return;
> > +		}
> >  	}
> > +
> > +	if (state == FBINFO_STATE_RUNNING)
> > +		nouveau_fbcon_accel_restore(dev);
> > +	drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
> > +	if (state != FBINFO_STATE_RUNNING)
> > +		nouveau_fbcon_accel_save_disable(dev);
> > +	console_unlock();
> >  }
> >  
> >  int
> > @@ -526,6 +560,8 @@ nouveau_fbcon_init(struct drm_device *dev)
> >  	fbcon->dev = dev;
> >  	drm->fbcon = fbcon;
> >  
> > +	INIT_WORK(&drm->fbdev_suspend_work,
nouveau_fbcon_suspend_worker);
> > +
> >  	drm_fb_helper_prepare(dev, &fbcon->helper,
&nouveau_fbcon_helper_funcs);
> >  
> >  	ret = drm_fb_helper_init(dev, &fbcon->helper,
> > @@ -571,6 +607,8 @@ nouveau_fbcon_fini(struct drm_device *dev)
> >  	if (!drm->fbcon)
> >  		return;
> >  
> > +	flush_work(&drm->fbdev_suspend_work);
> > +
> >  	nouveau_fbcon_accel_fini(dev);
> >  	nouveau_fbcon_destroy(dev, drm->fbcon);
> >  	kfree(drm->fbcon);
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
> > index ca77ad0..34b2504 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
> > +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
> > @@ -66,7 +66,7 @@ void nouveau_fbcon_gpu_lockup(struct fb_info *info);
> >  
> >  int nouveau_fbcon_init(struct drm_device *dev);
> >  void nouveau_fbcon_fini(struct drm_device *dev);
> > -void nouveau_fbcon_set_suspend(struct drm_device *dev, int state);
> > +void nouveau_fbcon_set_suspend(struct drm_device *dev, int state,
bool synchronous);
> >  void nouveau_fbcon_accel_save_disable(struct drm_device *dev);
> >  void nouveau_fbcon_accel_restore(struct drm_device *dev);
> >  
> > -- 
> > 2.8.3
> > 
> > _______________________________________________
> > Nouveau mailing list
> > Nouveau at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/nouveau

Daniel Vetter

2016-Jul-13 09:54 UTC

head link

[Nouveau] [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

On Tue, Jul 12, 2016 at 06:49:34PM +0200, Peter Wu
wrote:> The FBIOPUT_CON2FBMAP ioctl takes a console_lock(). When this is called
> while nouveau was runtime suspended, a deadlock would occur due to
> nouveau_fbcon_set_suspend also trying to obtain console_lock().
> 
> Fix this by delaying the drm_fb_helper_set_suspend call. Based on the
> i915 code (which was done for performance reasons though).
> 
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> Signed-off-by: Peter Wu <peter at lekensteyn.nl>
> ---
> Tested on top of v4.7-rc5, the deadlock is gone.
If we bother with this, it should imo be moved into the drm_fb_helper.c
function drm_fb_helper_set_suspend(). But this also smells like some kind
of bad duct-tape. I think Lukas is working on some other rpm vs. fbdev
deadlocks, maybe we could fix them all with one proper fix? I've made some
comments on Lukas' last patch series.

Besides this, when fixing a deadlock pls provide more details about the
precise callchain and the locks involved in the deadlock. If you
discovered this using lockdep, then just add the entire lockdep splat to
the commit message. Otherwise there's lots of guesswork involved here.
-Daniel
> ---
>  drivers/gpu/drm/nouveau/nouveau_drm.c   |  4 +--
>  drivers/gpu/drm/nouveau/nouveau_drv.h   |  1 +
>  drivers/gpu/drm/nouveau/nouveau_fbcon.c | 54
++++++++++++++++++++++++++++-----
>  drivers/gpu/drm/nouveau/nouveau_fbcon.h |  2 +-
>  4 files changed, 50 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c
b/drivers/gpu/drm/nouveau/nouveau_drm.c
> index 11f8dd9..f9a2c10 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drm.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
> @@ -552,7 +552,7 @@ nouveau_do_suspend(struct drm_device *dev, bool
runtime)
>  
>  	if (dev->mode_config.num_crtc) {
>  		NV_INFO(drm, "suspending console...\n");
> -		nouveau_fbcon_set_suspend(dev, 1);
> +		nouveau_fbcon_set_suspend(dev, FBINFO_STATE_SUSPENDED, true);
>  		NV_INFO(drm, "suspending display...\n");
>  		ret = nouveau_display_suspend(dev, runtime);
>  		if (ret)
> @@ -635,7 +635,7 @@ nouveau_do_resume(struct drm_device *dev, bool runtime)
>  		NV_INFO(drm, "resuming display...\n");
>  		nouveau_display_resume(dev, runtime);
>  		NV_INFO(drm, "resuming console...\n");
> -		nouveau_fbcon_set_suspend(dev, 0);
> +		nouveau_fbcon_set_suspend(dev, FBINFO_STATE_RUNNING, false);
>  	}
>  
>  	return 0;
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h
b/drivers/gpu/drm/nouveau/nouveau_drv.h
> index 822a021..a743d19 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
> @@ -147,6 +147,7 @@ struct nouveau_drm {
>  	struct nouveau_channel *channel;
>  	struct nvkm_gpuobj *notify;
>  	struct nouveau_fbdev *fbcon;
> +	struct work_struct fbdev_suspend_work;
>  	struct nvif_object nvsw;
>  	struct nvif_object ntfy;
>  	struct nvif_notify flip;
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> index d1f248f..089156a 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> @@ -492,19 +492,53 @@ static const struct drm_fb_helper_funcs
nouveau_fbcon_helper_funcs = {
>  	.fb_probe = nouveau_fbcon_create,
>  };
>  
> +static void nouveau_fbcon_suspend_worker(struct work_struct *work)
> +{
> +	nouveau_fbcon_set_suspend(container_of(work,
> +					       struct nouveau_drm,
> +					       fbdev_suspend_work)->dev,
> +				  FBINFO_STATE_RUNNING,
> +				  true);
> +}
> +
>  void
> -nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
> +nouveau_fbcon_set_suspend(struct drm_device *dev, int state, bool
synchronous)
>  {
>  	struct nouveau_drm *drm = nouveau_drm(dev);
> -	if (drm->fbcon) {
> -		console_lock();
> -		if (state == FBINFO_STATE_RUNNING)
> -			nouveau_fbcon_accel_restore(dev);
> -		drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
> +	if (!drm->fbcon)
> +		return;
> +
> +	if (synchronous) {
> +		/* Flush any pending work to turn the console on, and then
> +		 * wait to turn it off. It must be synchronous as we are
> +		 * about to suspend or unload the driver.
> +		 *
> +		 * Note that from within the work-handler, we cannot flush
> +		 * ourselves, so only flush outstanding work upon suspend!
> +		 */
>  		if (state != FBINFO_STATE_RUNNING)
> -			nouveau_fbcon_accel_save_disable(dev);
> -		console_unlock();
> +			flush_work(&drm->fbdev_suspend_work);
> +		console_lock();
> +	} else {
> +		/*
> +		 * The console lock can be pretty contented on resume due
> +		 * to all the printk activity.  Try to keep it out of the hot
> +		 * path of resume if possible.  This also prevents a deadlock
> +		 * with FBIOPUT_CON2FBMAP.
> +		 */
> +		WARN_ON(state != FBINFO_STATE_RUNNING);
> +		if (!console_trylock()) {
> +			schedule_work(&drm->fbdev_suspend_work);
> +			return;
> +		}
>  	}
> +
> +	if (state == FBINFO_STATE_RUNNING)
> +		nouveau_fbcon_accel_restore(dev);
> +	drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
> +	if (state != FBINFO_STATE_RUNNING)
> +		nouveau_fbcon_accel_save_disable(dev);
> +	console_unlock();
>  }
>  
>  int
> @@ -526,6 +560,8 @@ nouveau_fbcon_init(struct drm_device *dev)
>  	fbcon->dev = dev;
>  	drm->fbcon = fbcon;
>  
> +	INIT_WORK(&drm->fbdev_suspend_work, nouveau_fbcon_suspend_worker);
> +
>  	drm_fb_helper_prepare(dev, &fbcon->helper,
&nouveau_fbcon_helper_funcs);
>  
>  	ret = drm_fb_helper_init(dev, &fbcon->helper,
> @@ -571,6 +607,8 @@ nouveau_fbcon_fini(struct drm_device *dev)
>  	if (!drm->fbcon)
>  		return;
>  
> +	flush_work(&drm->fbdev_suspend_work);
> +
>  	nouveau_fbcon_accel_fini(dev);
>  	nouveau_fbcon_destroy(dev, drm->fbcon);
>  	kfree(drm->fbcon);
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
> index ca77ad0..34b2504 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
> @@ -66,7 +66,7 @@ void nouveau_fbcon_gpu_lockup(struct fb_info *info);
>  
>  int nouveau_fbcon_init(struct drm_device *dev);
>  void nouveau_fbcon_fini(struct drm_device *dev);
> -void nouveau_fbcon_set_suspend(struct drm_device *dev, int state);
> +void nouveau_fbcon_set_suspend(struct drm_device *dev, int state, bool
synchronous);
>  void nouveau_fbcon_accel_save_disable(struct drm_device *dev);
>  void nouveau_fbcon_accel_restore(struct drm_device *dev);
>  
> -- 
> 2.8.3
> 
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

Peter Wu

2016-Jul-13 12:40 UTC

head link

[Nouveau] [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

On Wed, Jul 13, 2016 at 11:54:49AM +0200, Daniel Vetter
wrote:> On Tue, Jul 12, 2016 at 06:49:34PM +0200, Peter Wu wrote:
> > The FBIOPUT_CON2FBMAP ioctl takes a console_lock(). When this is
called
> > while nouveau was runtime suspended, a deadlock would occur due to
> > nouveau_fbcon_set_suspend also trying to obtain console_lock().
> > 
> > Fix this by delaying the drm_fb_helper_set_suspend call. Based on the
> > i915 code (which was done for performance reasons though).
> > 
> > Cc: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> > Signed-off-by: Peter Wu <peter at lekensteyn.nl>
> > ---
> > Tested on top of v4.7-rc5, the deadlock is gone.
> 
> If we bother with this, it should imo be moved into the drm_fb_helper.c
> function drm_fb_helper_set_suspend(). But this also smells like some kind
> of bad duct-tape. I think Lukas is working on some other rpm vs. fbdev
> deadlocks, maybe we could fix them all with one proper fix? I've made
some
> comments on Lukas' last patch series.
This patch is only needed for drivers that use console_lock (for
drm_fb_helper_set_suspend) in their runtime resume functions.
Lukas posted fixes for runtime PM reference leaks, those are different
from this deadlock (see
https://lists.freedesktop.org/archives/dri-devel/2016-July/113005.html
for a backtrace for this issue).

The deadlock could also be avoided if the device backing the fbcon is
somehow runtime-resumed outside the lock, but that feels like a larger
hack that does not seem easy.

The i915 patch was done to reduce resume time (due to console_lock
contention), that feature seems useful to all other drivers too even if
the deadlock is fixed in a different way.

My current plan is to move stuff out of the lock and allow (just)
resuming the console to be delayed.  Some drivers (nouveau,
radeon/amdgpu, i915) do unnecessary stuff under the console lock:

 - nouveau: I *think* that cleraing/setting FBINFO_HWACCEL_DISABLED
   (nouveau_fbcon_accel_restore) is safe outside the lock as the fb is
   already suspended before clearing/after setting the flag.
 - radeon: since the console is suspended, I don't think that that all
   of the code is radeon_resume_kms is really needed.
 - amdgpu: same as radeon. Btw, console_lock is leaked on an error path.
 - i915: I think that clearing the fb memory can be done outside the
   lock too as the console is suspended.

Please correct me if my assumptions are flawed.
> Besides this, when fixing a deadlock pls provide more details about the
> precise callchain and the locks involved in the deadlock. If you
> discovered this using lockdep, then just add the entire lockdep splat to
> the commit message. Otherwise there's lots of guesswork involved here.
> -Daniel
There was no lockdep splat, it was triggered via the ioctl in the commit
message. I'll include the verbose trace from the previous mail in the
next proposed patch to reduce hunting though.

Peter
> > ---
> >  drivers/gpu/drm/nouveau/nouveau_drm.c   |  4 +--
> >  drivers/gpu/drm/nouveau/nouveau_drv.h   |  1 +
> >  drivers/gpu/drm/nouveau/nouveau_fbcon.c | 54
++++++++++++++++++++++++++++-----
> >  drivers/gpu/drm/nouveau/nouveau_fbcon.h |  2 +-
> >  4 files changed, 50 insertions(+), 11 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c
b/drivers/gpu/drm/nouveau/nouveau_drm.c
> > index 11f8dd9..f9a2c10 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_drm.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
> > @@ -552,7 +552,7 @@ nouveau_do_suspend(struct drm_device *dev, bool
runtime)
> >  
> >  	if (dev->mode_config.num_crtc) {
> >  		NV_INFO(drm, "suspending console...\n");
> > -		nouveau_fbcon_set_suspend(dev, 1);
> > +		nouveau_fbcon_set_suspend(dev, FBINFO_STATE_SUSPENDED, true);
> >  		NV_INFO(drm, "suspending display...\n");
> >  		ret = nouveau_display_suspend(dev, runtime);
> >  		if (ret)
> > @@ -635,7 +635,7 @@ nouveau_do_resume(struct drm_device *dev, bool
runtime)
> >  		NV_INFO(drm, "resuming display...\n");
> >  		nouveau_display_resume(dev, runtime);
> >  		NV_INFO(drm, "resuming console...\n");
> > -		nouveau_fbcon_set_suspend(dev, 0);
> > +		nouveau_fbcon_set_suspend(dev, FBINFO_STATE_RUNNING, false);
> >  	}
> >  
> >  	return 0;
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h
b/drivers/gpu/drm/nouveau/nouveau_drv.h
> > index 822a021..a743d19 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
> > +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
> > @@ -147,6 +147,7 @@ struct nouveau_drm {
> >  	struct nouveau_channel *channel;
> >  	struct nvkm_gpuobj *notify;
> >  	struct nouveau_fbdev *fbcon;
> > +	struct work_struct fbdev_suspend_work;
> >  	struct nvif_object nvsw;
> >  	struct nvif_object ntfy;
> >  	struct nvif_notify flip;
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> > index d1f248f..089156a 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> > @@ -492,19 +492,53 @@ static const struct drm_fb_helper_funcs
nouveau_fbcon_helper_funcs = {
> >  	.fb_probe = nouveau_fbcon_create,
> >  };
> >  
> > +static void nouveau_fbcon_suspend_worker(struct work_struct *work)
> > +{
> > +	nouveau_fbcon_set_suspend(container_of(work,
> > +					       struct nouveau_drm,
> > +					       fbdev_suspend_work)->dev,
> > +				  FBINFO_STATE_RUNNING,
> > +				  true);
> > +}
> > +
> >  void
> > -nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
> > +nouveau_fbcon_set_suspend(struct drm_device *dev, int state, bool
synchronous)
> >  {
> >  	struct nouveau_drm *drm = nouveau_drm(dev);
> > -	if (drm->fbcon) {
> > -		console_lock();
> > -		if (state == FBINFO_STATE_RUNNING)
> > -			nouveau_fbcon_accel_restore(dev);
> > -		drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
> > +	if (!drm->fbcon)
> > +		return;
> > +
> > +	if (synchronous) {
> > +		/* Flush any pending work to turn the console on, and then
> > +		 * wait to turn it off. It must be synchronous as we are
> > +		 * about to suspend or unload the driver.
> > +		 *
> > +		 * Note that from within the work-handler, we cannot flush
> > +		 * ourselves, so only flush outstanding work upon suspend!
> > +		 */
> >  		if (state != FBINFO_STATE_RUNNING)
> > -			nouveau_fbcon_accel_save_disable(dev);
> > -		console_unlock();
> > +			flush_work(&drm->fbdev_suspend_work);
> > +		console_lock();
> > +	} else {
> > +		/*
> > +		 * The console lock can be pretty contented on resume due
> > +		 * to all the printk activity.  Try to keep it out of the hot
> > +		 * path of resume if possible.  This also prevents a deadlock
> > +		 * with FBIOPUT_CON2FBMAP.
> > +		 */
> > +		WARN_ON(state != FBINFO_STATE_RUNNING);
> > +		if (!console_trylock()) {
> > +			schedule_work(&drm->fbdev_suspend_work);
> > +			return;
> > +		}
> >  	}
> > +
> > +	if (state == FBINFO_STATE_RUNNING)
> > +		nouveau_fbcon_accel_restore(dev);
> > +	drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
> > +	if (state != FBINFO_STATE_RUNNING)
> > +		nouveau_fbcon_accel_save_disable(dev);
> > +	console_unlock();
> >  }
> >  
> >  int
> > @@ -526,6 +560,8 @@ nouveau_fbcon_init(struct drm_device *dev)
> >  	fbcon->dev = dev;
> >  	drm->fbcon = fbcon;
> >  
> > +	INIT_WORK(&drm->fbdev_suspend_work,
nouveau_fbcon_suspend_worker);
> > +
> >  	drm_fb_helper_prepare(dev, &fbcon->helper,
&nouveau_fbcon_helper_funcs);
> >  
> >  	ret = drm_fb_helper_init(dev, &fbcon->helper,
> > @@ -571,6 +607,8 @@ nouveau_fbcon_fini(struct drm_device *dev)
> >  	if (!drm->fbcon)
> >  		return;
> >  
> > +	flush_work(&drm->fbdev_suspend_work);
> > +
> >  	nouveau_fbcon_accel_fini(dev);
> >  	nouveau_fbcon_destroy(dev, drm->fbcon);
> >  	kfree(drm->fbcon);
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
> > index ca77ad0..34b2504 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
> > +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
> > @@ -66,7 +66,7 @@ void nouveau_fbcon_gpu_lockup(struct fb_info *info);
> >  
> >  int nouveau_fbcon_init(struct drm_device *dev);
> >  void nouveau_fbcon_fini(struct drm_device *dev);
> > -void nouveau_fbcon_set_suspend(struct drm_device *dev, int state);
> > +void nouveau_fbcon_set_suspend(struct drm_device *dev, int state,
bool synchronous);
> >  void nouveau_fbcon_accel_save_disable(struct drm_device *dev);
> >  void nouveau_fbcon_accel_restore(struct drm_device *dev);
> >  
> > -- 
> > 2.8.3
> > 
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
-- 
Kind regards,
Peter Wu
https://lekensteyn.nl

Chris Wilson

2016-Jul-13 17:17 UTC

head link

[Nouveau] [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

On Tue, Jul 12, 2016 at 06:49:34PM +0200, Peter Wu
wrote:> The FBIOPUT_CON2FBMAP ioctl takes a console_lock(). When this is called
> while nouveau was runtime suspended, a deadlock would occur due to
> nouveau_fbcon_set_suspend also trying to obtain console_lock().
> 
> Fix this by delaying the drm_fb_helper_set_suspend call. Based on the
> i915 code (which was done for performance reasons though).
> 
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> Signed-off-by: Peter Wu <peter at lekensteyn.nl>
> ---
> Tested on top of v4.7-rc5, the deadlock is gone.
> ---
>  drivers/gpu/drm/nouveau/nouveau_drm.c   |  4 +--
>  drivers/gpu/drm/nouveau/nouveau_drv.h   |  1 +
>  drivers/gpu/drm/nouveau/nouveau_fbcon.c | 54
++++++++++++++++++++++++++++-----
>  drivers/gpu/drm/nouveau/nouveau_fbcon.h |  2 +-
>  4 files changed, 50 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c
b/drivers/gpu/drm/nouveau/nouveau_drm.c
> index 11f8dd9..f9a2c10 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drm.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
> @@ -552,7 +552,7 @@ nouveau_do_suspend(struct drm_device *dev, bool
runtime)
>  
>  	if (dev->mode_config.num_crtc) {
>  		NV_INFO(drm, "suspending console...\n");
> -		nouveau_fbcon_set_suspend(dev, 1);
> +		nouveau_fbcon_set_suspend(dev, FBINFO_STATE_SUSPENDED, true);
>  		NV_INFO(drm, "suspending display...\n");
>  		ret = nouveau_display_suspend(dev, runtime);
>  		if (ret)
> @@ -635,7 +635,7 @@ nouveau_do_resume(struct drm_device *dev, bool runtime)
>  		NV_INFO(drm, "resuming display...\n");
>  		nouveau_display_resume(dev, runtime);
>  		NV_INFO(drm, "resuming console...\n");
> -		nouveau_fbcon_set_suspend(dev, 0);
> +		nouveau_fbcon_set_suspend(dev, FBINFO_STATE_RUNNING, false);
>  	}
>  
>  	return 0;
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h
b/drivers/gpu/drm/nouveau/nouveau_drv.h
> index 822a021..a743d19 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
> @@ -147,6 +147,7 @@ struct nouveau_drm {
>  	struct nouveau_channel *channel;
>  	struct nvkm_gpuobj *notify;
>  	struct nouveau_fbdev *fbcon;
> +	struct work_struct fbdev_suspend_work;
>  	struct nvif_object nvsw;
>  	struct nvif_object ntfy;
>  	struct nvif_notify flip;
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> index d1f248f..089156a 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> @@ -492,19 +492,53 @@ static const struct drm_fb_helper_funcs
nouveau_fbcon_helper_funcs = {
>  	.fb_probe = nouveau_fbcon_create,
>  };
>  
> +static void nouveau_fbcon_suspend_worker(struct work_struct *work)
> +{
> +	nouveau_fbcon_set_suspend(container_of(work,
> +					       struct nouveau_drm,
> +					       fbdev_suspend_work)->dev,
> +				  FBINFO_STATE_RUNNING,
> +				  true);
> +}
> +
>  void
> -nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
> +nouveau_fbcon_set_suspend(struct drm_device *dev, int state, bool
synchronous)
>  {
>  	struct nouveau_drm *drm = nouveau_drm(dev);
> -	if (drm->fbcon) {
> -		console_lock();
> -		if (state == FBINFO_STATE_RUNNING)
> -			nouveau_fbcon_accel_restore(dev);
> -		drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
> +	if (!drm->fbcon)
> +		return;
> +
> +	if (synchronous) {
> +		/* Flush any pending work to turn the console on, and then
> +		 * wait to turn it off. It must be synchronous as we are
> +		 * about to suspend or unload the driver.
> +		 *
> +		 * Note that from within the work-handler, we cannot flush
> +		 * ourselves, so only flush outstanding work upon suspend!
> +		 */
>  		if (state != FBINFO_STATE_RUNNING)
> -			nouveau_fbcon_accel_save_disable(dev);
> -		console_unlock();
> +			flush_work(&drm->fbdev_suspend_work);
> +		console_lock();
> +	} else {
> +		/*
> +		 * The console lock can be pretty contented on resume due
> +		 * to all the printk activity.  Try to keep it out of the hot
> +		 * path of resume if possible.  This also prevents a deadlock
> +		 * with FBIOPUT_CON2FBMAP.
> +		 */
> +		WARN_ON(state != FBINFO_STATE_RUNNING);
> +		if (!console_trylock()) {
> +			schedule_work(&drm->fbdev_suspend_work);
> +			return;
> +		}
>  	}
> +
> +	if (state == FBINFO_STATE_RUNNING)
> +		nouveau_fbcon_accel_restore(dev);
> +	drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
> +	if (state != FBINFO_STATE_RUNNING)
> +		nouveau_fbcon_accel_save_disable(dev);
> +	console_unlock();
>  }
>  
>  int
> @@ -526,6 +560,8 @@ nouveau_fbcon_init(struct drm_device *dev)
>  	fbcon->dev = dev;
>  	drm->fbcon = fbcon;
>  
> +	INIT_WORK(&drm->fbdev_suspend_work, nouveau_fbcon_suspend_worker);
> +
>  	drm_fb_helper_prepare(dev, &fbcon->helper,
&nouveau_fbcon_helper_funcs);
>  
>  	ret = drm_fb_helper_init(dev, &fbcon->helper,
> @@ -571,6 +607,8 @@ nouveau_fbcon_fini(struct drm_device *dev)
>  	if (!drm->fbcon)
>  		return;
>  
> +	flush_work(&drm->fbdev_suspend_work);
Hmm, since suspend_work can theorectically rearm itself, this should be
cancel_work_sync().

The copy'n'paste of the code looks fine, so (other than the bug copied
across):

Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk>

Whether you can convince the maintainers on the basis of this being a
deadlock fix is another matter...

I did test this patch, since I see a livelock on resume, but not the
same console deadlock. Just in case anyone is interested:

Jul 13 17:05:59 acer kernel: [24873.945839] NMI watchdog: BUG: soft lockup -
CPU#2 stuck for 22s! [kworker/2:1:8370]
Jul 13 17:05:59 acer kernel: [24873.946563] Modules linked in: rfcomm drbg
ansi_cprng ctr ccm arc4 bnep ath10k_pci ath10k_core snd_hda_codec_hdmi
snd_hda_codec_realtek ath snd_hda_co
dec_generic snd_hda_intel mac80211 snd_hda_codec binfmt_misc snd_hda_core
nls_iso8859_1 snd_hwdep btusb btrtl snd_pcm btbcm rtsx_usb_ms btintel
x86_pkg_temp_thermal uvcvideo acer_wmi
 intel_powerclamp memstick snd_seq_midi bluetooth sparse_keymap
snd_seq_midi_event coretemp videobuf2_vmalloc videobuf2_memops snd_rawmidi
kvm_intel videobuf2_v4l2 kvm cfg80211 video
buf2_core snd_seq videodev irqbypass snd_seq_device snd_timer media
crct10dif_pclmul snd crc32_pclmul hid_multitouch ghash_clmulni_intel aesni_intel
joydev aes_x86_64 lrw gf128mul gl
ue_helper ablk_helper cryptd soundcore ie31200_edac edac_core mei_me shpchp mei
input_leds acpi_als serio_raw kfifo_buf lpc_ich industrialio soc_button_array
mac_hid parport_pc ppdev
 lp parport autofs4 btrfs xor raid6_pq dm_mirror dm_region_hash dm_log nouveau
rtsx_usb_sdmmc rtsx_usb hid_generic usbhid hid i915 broadcom bcm_phy_lib mxm_wmi
ttm i2c_algo_bit drm_k
ms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm tg3 ahci ptp libahci
pps_core video wmi fjes
Jul 13 17:05:59 acer kernel: [24873.946598] CPU: 2 PID: 8370 Comm: kworker/2:1
Tainted: G             L  4.7.0-rc6+ #4
Jul 13 17:05:59 acer kernel: [24873.946599] Hardware name: Acer Aspire
VN7-791G/Aspire VN7-791G, BIOS V1.11 01/09/2015
Jul 13 17:05:59 acer kernel: [24873.946603] Workqueue: pm pm_runtime_work
Jul 13 17:05:59 acer kernel: [24873.946604] task: ffff880023c59c80 ti:
ffff880024058000 task.ti: ffff880024058000
Jul 13 17:05:59 acer kernel: [24873.946605] RIP: 0010:[<ffffffff8140a6c0>]
[<ffffffff8140a6c0>] ioread32+0x30/0x40
Jul 13 17:05:59 acer kernel: [24873.946608] RSP: 0018:ffff88002405baf0  EFLAGS:
00000296
Jul 13 17:05:59 acer kernel: [24873.946609] RAX: 00000000ffffffff RBX:
ffff88041734a000 RCX: 0000000000000018
Jul 13 17:05:59 acer kernel: [24873.946610] RDX: 0012230aadf99e58 RSI:
ffffc9000410a014 RDI: ffffc90004009410
Jul 13 17:05:59 acer kernel: [24873.946610] RBP: ffff88002405bb10 R08:
0000000000000009 R09: ffff880416ce0000
Jul 13 17:05:59 acer kernel: [24873.946611] R10: 000000000000000a R11:
0000000000000001 R12: 00000000ffffffff
Jul 13 17:05:59 acer kernel: [24873.946612] R13: 00000000ffffffff R14:
ffff880415de6600 R15: ffffffffffffffff
Jul 13 17:05:59 acer kernel: [24873.946613] FS:  0000000000000000(0000)
GS:ffff88045f280000(0000) knlGS:0000000000000000
Jul 13 17:05:59 acer kernel: [24873.946614] CS:  0010 DS: 0000 ES: 0000 CR0:
0000000080050033
Jul 13 17:05:59 acer kernel: [24873.946614] CR2: 00007f566a5ac010 CR3:
0000000002e06000 CR4: 00000000001406e0
Jul 13 17:05:59 acer kernel: [24873.946615] Stack:
Jul 13 17:05:59 acer kernel: [24873.946616]  ffffffffc03bee71 ffff88041734a000
0000000000000000 ffff880415de7908
Jul 13 17:05:59 acer kernel: [24873.946617]  ffff88002405bb20 ffffffffc03be9bf
ffff88002405bb58 ffffffffc03b7340
Jul 13 17:05:59 acer kernel: [24873.946618]  ffff880415de7908 ffff88041734a000
00000414ef5b0e40 0000000000000011
Jul 13 17:05:59 acer kernel: [24873.946620] Call Trace:
Jul 13 17:05:59 acer kernel: [24873.946645]  [<ffffffffc03bee71>] ?
nv04_timer_read+0x51/0x70 [nouveau]
Jul 13 17:05:59 acer kernel: [24873.946661]  [<ffffffffc03be9bf>]
nvkm_timer_read+0xf/0x20 [nouveau]
Jul 13 17:05:59 acer kernel: [24873.946676]  [<ffffffffc03b7340>]
nvkm_pmu_init+0x50/0x450 [nouveau]
Jul 13 17:05:59 acer kernel: [24873.946685]  [<ffffffffc0371ac1>]
nvkm_subdev_init+0x91/0x200 [nouveau]
Jul 13 17:05:59 acer kernel: [24873.946701]  [<ffffffffc03c2f26>]
nvkm_device_init+0x146/0x280 [nouveau]
Jul 13 17:05:59 acer kernel: [24873.946715]  [<ffffffffc03c6a18>]
nvkm_udevice_init+0x48/0x60 [nouveau]
Jul 13 17:05:59 acer kernel: [24873.946724]  [<ffffffffc0370440>]
nvkm_object_init+0x40/0x190 [nouveau]
Jul 13 17:05:59 acer kernel: [24873.946733]  [<ffffffffc03704b4>]
nvkm_object_init+0xb4/0x190 [nouveau]
Jul 13 17:05:59 acer kernel: [24873.946742]  [<ffffffffc036d56e>]
nvkm_client_init+0xe/0x10 [nouveau]
Jul 13 17:05:59 acer kernel: [24873.946758]  [<ffffffffc040ac0e>]
nvkm_client_resume+0xe/0x10 [nouveau]
Jul 13 17:05:59 acer kernel: [24873.946767]  [<ffffffffc036c7c7>]
nvif_client_resume+0x17/0x20 [nouveau]
Jul 13 17:05:59 acer kernel: [24873.946782]  [<ffffffffc04082fb>]
nouveau_do_resume+0x4b/0x130 [nouveau]
Jul 13 17:05:59 acer kernel: [24873.946797]  [<ffffffffc0408709>]
nouveau_pmops_runtime_resume+0x79/0x120 [nouveau]
Jul 13 17:05:59 acer kernel: [24873.946800]  [<ffffffff814448eb>]
pci_pm_runtime_resume+0x7b/0xa0
Jul 13 17:05:59 acer kernel: [24873.946801]  [<ffffffff815676d3>]
__rpm_callback+0x33/0x70
Jul 13 17:05:59 acer kernel: [24873.946803]  [<ffffffff81444870>] ?
pci_restore_standard_config+0x40/0x40
Jul 13 17:05:59 acer kernel: [24873.946804]  [<ffffffff81567734>]
rpm_callback+0x24/0x80
Jul 13 17:05:59 acer kernel: [24873.946806]  [<ffffffff81444870>] ?
pci_restore_standard_config+0x40/0x40
Jul 13 17:05:59 acer kernel: [24873.946807]  [<ffffffff81567ee1>]
rpm_resume+0x491/0x690
Jul 13 17:05:59 acer kernel: [24873.946808]  [<ffffffff81568f08>]
pm_runtime_work+0x58/0xa0
Jul 13 17:05:59 acer kernel: [24873.946811]  [<ffffffff8109adbb>]
process_one_work+0x16b/0x480
Jul 13 17:05:59 acer kernel: [24873.946812]  [<ffffffff8109b11b>]
worker_thread+0x4b/0x500
Jul 13 17:05:59 acer kernel: [24873.946814]  [<ffffffff8109b0d0>] ?
process_one_work+0x480/0x480
Jul 13 17:05:59 acer kernel: [24873.946815]  [<ffffffff8109b0d0>] ?
process_one_work+0x480/0x480
Jul 13 17:05:59 acer kernel: [24873.946816]  [<ffffffff810a1348>]
kthread+0xd8/0xf0
Jul 13 17:05:59 acer kernel: [24873.946818]  [<ffffffff81845fdf>]
ret_from_fork+0x1f/0x40
Jul 13 17:05:59 acer kernel: [24873.946819]  [<ffffffff810a1270>] ?
kthread_create_on_node+0x1a0/0x1a0
Jul 13 17:05:59 acer kernel: [24873.946820] Code: 03 00 77 25 48 81 ff 00 00 01
00 76 05 0f b7 d7 ed c3 55 48 c7 c6 e4 36 cc 81 48 89 e5 e8 19 ff ff ff b8 ff ff
ff ff 5d c3 8b 07 <c3> 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 48 81 fe ff ff -Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

Peter Wu

2016-Jul-15 11:26 UTC

head link

[Nouveau] [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

On Wed, Jul 13, 2016 at 06:17:47PM +0100, Chris Wilson
wrote:> On Tue, Jul 12, 2016 at 06:49:34PM +0200, Peter Wu wrote:
> > The FBIOPUT_CON2FBMAP ioctl takes a console_lock(). When this is
called
> > while nouveau was runtime suspended, a deadlock would occur due to
> > nouveau_fbcon_set_suspend also trying to obtain console_lock().
> > 
> > Fix this by delaying the drm_fb_helper_set_suspend call. Based on the
> > i915 code (which was done for performance reasons though).
> > 
> > Cc: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> > Signed-off-by: Peter Wu <peter at lekensteyn.nl>
> > ---
> > Tested on top of v4.7-rc5, the deadlock is gone.
> > ---
> >  drivers/gpu/drm/nouveau/nouveau_drm.c   |  4 +--
> >  drivers/gpu/drm/nouveau/nouveau_drv.h   |  1 +
> >  drivers/gpu/drm/nouveau/nouveau_fbcon.c | 54
++++++++++++++++++++++++++++-----
> >  drivers/gpu/drm/nouveau/nouveau_fbcon.h |  2 +-
> >  4 files changed, 50 insertions(+), 11 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c
b/drivers/gpu/drm/nouveau/nouveau_drm.c
> > index 11f8dd9..f9a2c10 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_drm.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
> > @@ -552,7 +552,7 @@ nouveau_do_suspend(struct drm_device *dev, bool
runtime)
> >  
> >  	if (dev->mode_config.num_crtc) {
> >  		NV_INFO(drm, "suspending console...\n");
> > -		nouveau_fbcon_set_suspend(dev, 1);
> > +		nouveau_fbcon_set_suspend(dev, FBINFO_STATE_SUSPENDED, true);
> >  		NV_INFO(drm, "suspending display...\n");
> >  		ret = nouveau_display_suspend(dev, runtime);
> >  		if (ret)
> > @@ -635,7 +635,7 @@ nouveau_do_resume(struct drm_device *dev, bool
runtime)
> >  		NV_INFO(drm, "resuming display...\n");
> >  		nouveau_display_resume(dev, runtime);
> >  		NV_INFO(drm, "resuming console...\n");
> > -		nouveau_fbcon_set_suspend(dev, 0);
> > +		nouveau_fbcon_set_suspend(dev, FBINFO_STATE_RUNNING, false);
> >  	}
> >  
> >  	return 0;
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h
b/drivers/gpu/drm/nouveau/nouveau_drv.h
> > index 822a021..a743d19 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
> > +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
> > @@ -147,6 +147,7 @@ struct nouveau_drm {
> >  	struct nouveau_channel *channel;
> >  	struct nvkm_gpuobj *notify;
> >  	struct nouveau_fbdev *fbcon;
> > +	struct work_struct fbdev_suspend_work;
> >  	struct nvif_object nvsw;
> >  	struct nvif_object ntfy;
> >  	struct nvif_notify flip;
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> > index d1f248f..089156a 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> > @@ -492,19 +492,53 @@ static const struct drm_fb_helper_funcs
nouveau_fbcon_helper_funcs = {
> >  	.fb_probe = nouveau_fbcon_create,
> >  };
> >  
> > +static void nouveau_fbcon_suspend_worker(struct work_struct *work)
> > +{
> > +	nouveau_fbcon_set_suspend(container_of(work,
> > +					       struct nouveau_drm,
> > +					       fbdev_suspend_work)->dev,
> > +				  FBINFO_STATE_RUNNING,
> > +				  true);
> > +}
> > +
> >  void
> > -nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
> > +nouveau_fbcon_set_suspend(struct drm_device *dev, int state, bool
synchronous)
> >  {
> >  	struct nouveau_drm *drm = nouveau_drm(dev);
> > -	if (drm->fbcon) {
> > -		console_lock();
> > -		if (state == FBINFO_STATE_RUNNING)
> > -			nouveau_fbcon_accel_restore(dev);
> > -		drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
> > +	if (!drm->fbcon)
> > +		return;
> > +
> > +	if (synchronous) {
> > +		/* Flush any pending work to turn the console on, and then
> > +		 * wait to turn it off. It must be synchronous as we are
> > +		 * about to suspend or unload the driver.
> > +		 *
> > +		 * Note that from within the work-handler, we cannot flush
> > +		 * ourselves, so only flush outstanding work upon suspend!
> > +		 */
> >  		if (state != FBINFO_STATE_RUNNING)
> > -			nouveau_fbcon_accel_save_disable(dev);
> > -		console_unlock();
> > +			flush_work(&drm->fbdev_suspend_work);
> > +		console_lock();
> > +	} else {
> > +		/*
> > +		 * The console lock can be pretty contented on resume due
> > +		 * to all the printk activity.  Try to keep it out of the hot
> > +		 * path of resume if possible.  This also prevents a deadlock
> > +		 * with FBIOPUT_CON2FBMAP.
> > +		 */
> > +		WARN_ON(state != FBINFO_STATE_RUNNING);
> > +		if (!console_trylock()) {
> > +			schedule_work(&drm->fbdev_suspend_work);
> > +			return;
> > +		}
> >  	}
> > +
> > +	if (state == FBINFO_STATE_RUNNING)
> > +		nouveau_fbcon_accel_restore(dev);
> > +	drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
> > +	if (state != FBINFO_STATE_RUNNING)
> > +		nouveau_fbcon_accel_save_disable(dev);
> > +	console_unlock();
> >  }
> >  
> >  int
> > @@ -526,6 +560,8 @@ nouveau_fbcon_init(struct drm_device *dev)
> >  	fbcon->dev = dev;
> >  	drm->fbcon = fbcon;
> >  
> > +	INIT_WORK(&drm->fbdev_suspend_work,
nouveau_fbcon_suspend_worker);
> > +
> >  	drm_fb_helper_prepare(dev, &fbcon->helper,
&nouveau_fbcon_helper_funcs);
> >  
> >  	ret = drm_fb_helper_init(dev, &fbcon->helper,
> > @@ -571,6 +607,8 @@ nouveau_fbcon_fini(struct drm_device *dev)
> >  	if (!drm->fbcon)
> >  		return;
> >  
> > +	flush_work(&drm->fbdev_suspend_work);
> 
> Hmm, since suspend_work can theorectically rearm itself, this should be
> cancel_work_sync().
How so? The worker calls with state = FBINFO_STATE_RUNNING and
synchronous = true, so schedule_work() can never be called.
> The copy'n'paste of the code looks fine, so (other than the bug
copied
> across):
> 
> Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk>
> 
> Whether you can convince the maintainers on the basis of this being a
> deadlock fix is another matter...
> 
> I did test this patch, since I see a livelock on resume, but not the
> same console deadlock. Just in case anyone is interested:
This sounds like a device that is somehow still sleeping, resulting in
failure to read the register. Can you always reproduce this somehow? Is
this the mainline kernel with just this patch?

I found an acpidump for your laptop on
https://bugzilla.kernel.org/show_bug.cgi?id=99381 and it looks like you
have a newer laptop designed for Win8 or newer. Were there any other
ACPI messages (like an infinite loop) preceding this dmesg?

Peter
> Jul 13 17:05:59 acer kernel: [24873.945839] NMI watchdog: BUG: soft lockup
- CPU#2 stuck for 22s! [kworker/2:1:8370]
> Jul 13 17:05:59 acer kernel: [24873.946563] Modules linked in: rfcomm drbg
ansi_cprng ctr ccm arc4 bnep ath10k_pci ath10k_core snd_hda_codec_hdmi
snd_hda_codec_realtek ath snd_hda_co
> dec_generic snd_hda_intel mac80211 snd_hda_codec binfmt_misc snd_hda_core
nls_iso8859_1 snd_hwdep btusb btrtl snd_pcm btbcm rtsx_usb_ms btintel
x86_pkg_temp_thermal uvcvideo acer_wmi
>  intel_powerclamp memstick snd_seq_midi bluetooth sparse_keymap
snd_seq_midi_event coretemp videobuf2_vmalloc videobuf2_memops snd_rawmidi
kvm_intel videobuf2_v4l2 kvm cfg80211 video
> buf2_core snd_seq videodev irqbypass snd_seq_device snd_timer media
crct10dif_pclmul snd crc32_pclmul hid_multitouch ghash_clmulni_intel aesni_intel
joydev aes_x86_64 lrw gf128mul gl
> ue_helper ablk_helper cryptd soundcore ie31200_edac edac_core mei_me shpchp
mei input_leds acpi_als serio_raw kfifo_buf lpc_ich industrialio
soc_button_array mac_hid parport_pc ppdev
>  lp parport autofs4 btrfs xor raid6_pq dm_mirror dm_region_hash dm_log
nouveau rtsx_usb_sdmmc rtsx_usb hid_generic usbhid hid i915 broadcom bcm_phy_lib
mxm_wmi ttm i2c_algo_bit drm_k
> ms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm tg3 ahci ptp
libahci pps_core video wmi fjes
> Jul 13 17:05:59 acer kernel: [24873.946598] CPU: 2 PID: 8370 Comm:
kworker/2:1 Tainted: G             L  4.7.0-rc6+ #4
> Jul 13 17:05:59 acer kernel: [24873.946599] Hardware name: Acer Aspire
VN7-791G/Aspire VN7-791G, BIOS V1.11 01/09/2015
> Jul 13 17:05:59 acer kernel: [24873.946603] Workqueue: pm pm_runtime_work
> Jul 13 17:05:59 acer kernel: [24873.946604] task: ffff880023c59c80 ti:
ffff880024058000 task.ti: ffff880024058000
> Jul 13 17:05:59 acer kernel: [24873.946605] RIP:
0010:[<ffffffff8140a6c0>]  [<ffffffff8140a6c0>] ioread32+0x30/0x40
> Jul 13 17:05:59 acer kernel: [24873.946608] RSP: 0018:ffff88002405baf0 
EFLAGS: 00000296
> Jul 13 17:05:59 acer kernel: [24873.946609] RAX: 00000000ffffffff RBX:
ffff88041734a000 RCX: 0000000000000018
> Jul 13 17:05:59 acer kernel: [24873.946610] RDX: 0012230aadf99e58 RSI:
ffffc9000410a014 RDI: ffffc90004009410
> Jul 13 17:05:59 acer kernel: [24873.946610] RBP: ffff88002405bb10 R08:
0000000000000009 R09: ffff880416ce0000
> Jul 13 17:05:59 acer kernel: [24873.946611] R10: 000000000000000a R11:
0000000000000001 R12: 00000000ffffffff
> Jul 13 17:05:59 acer kernel: [24873.946612] R13: 00000000ffffffff R14:
ffff880415de6600 R15: ffffffffffffffff
> Jul 13 17:05:59 acer kernel: [24873.946613] FS:  0000000000000000(0000)
GS:ffff88045f280000(0000) knlGS:0000000000000000
> Jul 13 17:05:59 acer kernel: [24873.946614] CS:  0010 DS: 0000 ES: 0000
CR0: 0000000080050033
> Jul 13 17:05:59 acer kernel: [24873.946614] CR2: 00007f566a5ac010 CR3:
0000000002e06000 CR4: 00000000001406e0
> Jul 13 17:05:59 acer kernel: [24873.946615] Stack:
> Jul 13 17:05:59 acer kernel: [24873.946616]  ffffffffc03bee71
ffff88041734a000 0000000000000000 ffff880415de7908
> Jul 13 17:05:59 acer kernel: [24873.946617]  ffff88002405bb20
ffffffffc03be9bf ffff88002405bb58 ffffffffc03b7340
> Jul 13 17:05:59 acer kernel: [24873.946618]  ffff880415de7908
ffff88041734a000 00000414ef5b0e40 0000000000000011
> Jul 13 17:05:59 acer kernel: [24873.946620] Call Trace:
> Jul 13 17:05:59 acer kernel: [24873.946645]  [<ffffffffc03bee71>] ?
nv04_timer_read+0x51/0x70 [nouveau]
> Jul 13 17:05:59 acer kernel: [24873.946661]  [<ffffffffc03be9bf>]
nvkm_timer_read+0xf/0x20 [nouveau]
> Jul 13 17:05:59 acer kernel: [24873.946676]  [<ffffffffc03b7340>]
nvkm_pmu_init+0x50/0x450 [nouveau]
> Jul 13 17:05:59 acer kernel: [24873.946685]  [<ffffffffc0371ac1>]
nvkm_subdev_init+0x91/0x200 [nouveau]
> Jul 13 17:05:59 acer kernel: [24873.946701]  [<ffffffffc03c2f26>]
nvkm_device_init+0x146/0x280 [nouveau]
> Jul 13 17:05:59 acer kernel: [24873.946715]  [<ffffffffc03c6a18>]
nvkm_udevice_init+0x48/0x60 [nouveau]
> Jul 13 17:05:59 acer kernel: [24873.946724]  [<ffffffffc0370440>]
nvkm_object_init+0x40/0x190 [nouveau]
> Jul 13 17:05:59 acer kernel: [24873.946733]  [<ffffffffc03704b4>]
nvkm_object_init+0xb4/0x190 [nouveau]
> Jul 13 17:05:59 acer kernel: [24873.946742]  [<ffffffffc036d56e>]
nvkm_client_init+0xe/0x10 [nouveau]
> Jul 13 17:05:59 acer kernel: [24873.946758]  [<ffffffffc040ac0e>]
nvkm_client_resume+0xe/0x10 [nouveau]
> Jul 13 17:05:59 acer kernel: [24873.946767]  [<ffffffffc036c7c7>]
nvif_client_resume+0x17/0x20 [nouveau]
> Jul 13 17:05:59 acer kernel: [24873.946782]  [<ffffffffc04082fb>]
nouveau_do_resume+0x4b/0x130 [nouveau]
> Jul 13 17:05:59 acer kernel: [24873.946797]  [<ffffffffc0408709>]
nouveau_pmops_runtime_resume+0x79/0x120 [nouveau]
> Jul 13 17:05:59 acer kernel: [24873.946800]  [<ffffffff814448eb>]
pci_pm_runtime_resume+0x7b/0xa0
> Jul 13 17:05:59 acer kernel: [24873.946801]  [<ffffffff815676d3>]
__rpm_callback+0x33/0x70
> Jul 13 17:05:59 acer kernel: [24873.946803]  [<ffffffff81444870>] ?
pci_restore_standard_config+0x40/0x40
> Jul 13 17:05:59 acer kernel: [24873.946804]  [<ffffffff81567734>]
rpm_callback+0x24/0x80
> Jul 13 17:05:59 acer kernel: [24873.946806]  [<ffffffff81444870>] ?
pci_restore_standard_config+0x40/0x40
> Jul 13 17:05:59 acer kernel: [24873.946807]  [<ffffffff81567ee1>]
rpm_resume+0x491/0x690
> Jul 13 17:05:59 acer kernel: [24873.946808]  [<ffffffff81568f08>]
pm_runtime_work+0x58/0xa0
> Jul 13 17:05:59 acer kernel: [24873.946811]  [<ffffffff8109adbb>]
process_one_work+0x16b/0x480
> Jul 13 17:05:59 acer kernel: [24873.946812]  [<ffffffff8109b11b>]
worker_thread+0x4b/0x500
> Jul 13 17:05:59 acer kernel: [24873.946814]  [<ffffffff8109b0d0>] ?
process_one_work+0x480/0x480
> Jul 13 17:05:59 acer kernel: [24873.946815]  [<ffffffff8109b0d0>] ?
process_one_work+0x480/0x480
> Jul 13 17:05:59 acer kernel: [24873.946816]  [<ffffffff810a1348>]
kthread+0xd8/0xf0
> Jul 13 17:05:59 acer kernel: [24873.946818]  [<ffffffff81845fdf>]
ret_from_fork+0x1f/0x40
> Jul 13 17:05:59 acer kernel: [24873.946819]  [<ffffffff810a1270>] ?
kthread_create_on_node+0x1a0/0x1a0
> Jul 13 17:05:59 acer kernel: [24873.946820] Code: 03 00 77 25 48 81 ff 00
00 01 00 76 05 0f b7 d7 ed c3 55 48 c7 c6 e4 36 cc 81 48 89 e5 e8 19 ff ff ff b8
ff ff ff ff 5d c3 8b 07 <c3
> > 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 48 81 fe ff ff 
> -Chris
> 
> -- 
> Chris Wilson, Intel Open Source Technology Centre

Apparently Analagous Threads

Search for more apparently analagous threads

Nouveau - Jul 2016 - [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

[Nouveau] [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

[Nouveau] [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

[Nouveau] [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

[Nouveau] [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

[Nouveau] [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

[Nouveau] [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

[Nouveau] [PATCH] drm/nouveau/fbcon: fix deadlock with FBIOPUT_CON2FBMAP

Apparently Analagous Threads