John Ogness
2023-Apr-15  01:23 UTC
[Nouveau] [PATCH v2] drm/nouveau: fix incorrect conversion to dma_resv_wait_timeout()
Commit 41d351f29528 ("drm/nouveau: stop using ttm_bo_wait")
converted from ttm_bo_wait_ctx() to dma_resv_wait_timeout().
However, dma_resv_wait_timeout() returns greater than zero on
success as opposed to ttm_bo_wait_ctx(). As a result, relocs
will fail and log errors even when it was a success.
Change the return code handling to match that of
nouveau_gem_ioctl_cpu_prep(), which was already using
dma_resv_wait_timeout() correctly.
Fixes: 41d351f29528 ("drm/nouveau: stop using ttm_bo_wait")
Reported-by: Tanmay Bhushan <007047221b at gmail.com>
Link: https://lore.kernel.org/lkml/20230119225351.71657-1-007047221b at
gmail.com
Signed-off-by: John Ogness <john.ogness at linutronix.de>
---
 The original report was actually a patch that needed fixing.
 Since nobody has stepped up to fix this regression correctly,
 I'm posting the v2.
 This is a real regression introduced in 6.3-rc1.
 drivers/gpu/drm/nouveau/nouveau_gem.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c
b/drivers/gpu/drm/nouveau/nouveau_gem.c
index f77e44958037..346839c24273 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -645,8 +645,9 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
 				struct drm_nouveau_gem_pushbuf_reloc *reloc,
 				struct drm_nouveau_gem_pushbuf_bo *bo)
 {
-	long ret = 0;
+	int ret = 0;
 	unsigned i;
+	long lret;
 
 	for (i = 0; i < req->nr_relocs; i++) {
 		struct drm_nouveau_gem_pushbuf_reloc *r = &reloc[i];
@@ -703,13 +704,18 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
 				data |= r->vor;
 		}
 
-		ret = dma_resv_wait_timeout(nvbo->bo.base.resv,
-					    DMA_RESV_USAGE_BOOKKEEP,
-					    false, 15 * HZ);
-		if (ret == 0)
+		lret = dma_resv_wait_timeout(nvbo->bo.base.resv,
+					     DMA_RESV_USAGE_BOOKKEEP,
+					     false, 15 * HZ);
+		if (!lret)
 			ret = -EBUSY;
+		else if (lret > 0)
+			ret = 0;
+		else
+			ret = lret;
+
 		if (ret) {
-			NV_PRINTK(err, cli, "reloc wait_idle failed: %ld\n",
+			NV_PRINTK(err, cli, "reloc wait_idle failed: %d\n",
 				  ret);
 			break;
 		}
base-commit: 09a9639e56c01c7a00d6c0ca63f4c7c41abe075d
-- 
2.30.2
John Ogness
2023-Apr-15  02:04 UTC
[Nouveau] [PATCH v3] drm/nouveau: fix incorrect conversion to dma_resv_wait_timeout()
Commit 41d351f29528 ("drm/nouveau: stop using ttm_bo_wait")
converted from ttm_bo_wait_ctx() to dma_resv_wait_timeout().
However, dma_resv_wait_timeout() returns greater than zero on
success as opposed to ttm_bo_wait_ctx(). As a result, relocs
will fail and log errors even when it was a success.
Change the return code handling to match that of
nouveau_gem_ioctl_cpu_prep(), which was already using
dma_resv_wait_timeout() correctly.
Fixes: 41d351f29528 ("drm/nouveau: stop using ttm_bo_wait")
Reported-by: Tanmay Bhushan <007047221b at gmail.com>
Link: https://lore.kernel.org/lkml/20230119225351.71657-1-007047221b at
gmail.com
Signed-off-by: John Ogness <john.ogness at linutronix.de>
---
 I just realized that the nouveau driver style prefers to scope
 variables used only in loops.
 v3: Define @lret within the for-loop.
 drivers/gpu/drm/nouveau/nouveau_gem.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c
b/drivers/gpu/drm/nouveau/nouveau_gem.c
index f77e44958037..ab9062e50977 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -645,7 +645,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
 				struct drm_nouveau_gem_pushbuf_reloc *reloc,
 				struct drm_nouveau_gem_pushbuf_bo *bo)
 {
-	long ret = 0;
+	int ret = 0;
 	unsigned i;
 
 	for (i = 0; i < req->nr_relocs; i++) {
@@ -653,6 +653,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
 		struct drm_nouveau_gem_pushbuf_bo *b;
 		struct nouveau_bo *nvbo;
 		uint32_t data;
+		long lret;
 
 		if (unlikely(r->bo_index >= req->nr_buffers)) {
 			NV_PRINTK(err, cli, "reloc bo index invalid\n");
@@ -703,13 +704,18 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
 				data |= r->vor;
 		}
 
-		ret = dma_resv_wait_timeout(nvbo->bo.base.resv,
-					    DMA_RESV_USAGE_BOOKKEEP,
-					    false, 15 * HZ);
-		if (ret == 0)
+		lret = dma_resv_wait_timeout(nvbo->bo.base.resv,
+					     DMA_RESV_USAGE_BOOKKEEP,
+					     false, 15 * HZ);
+		if (!lret)
 			ret = -EBUSY;
+		else if (lret > 0)
+			ret = 0;
+		else
+			ret = lret;
+
 		if (ret) {
-			NV_PRINTK(err, cli, "reloc wait_idle failed: %ld\n",
+			NV_PRINTK(err, cli, "reloc wait_idle failed: %d\n",
 				  ret);
 			break;
 		}
base-commit: 09a9639e56c01c7a00d6c0ca63f4c7c41abe075d
-- 
2.30.2
Christian König
2023-Apr-17  05:55 UTC
[Nouveau] [PATCH v3] drm/nouveau: fix incorrect conversion to dma_resv_wait_timeout()
Am 15.04.23 um 04:02 schrieb John Ogness:> Commit 41d351f29528 ("drm/nouveau: stop using ttm_bo_wait") > converted from ttm_bo_wait_ctx() to dma_resv_wait_timeout(). > However, dma_resv_wait_timeout() returns greater than zero on > success as opposed to ttm_bo_wait_ctx(). As a result, relocs > will fail and log errors even when it was a success. > > Change the return code handling to match that of > nouveau_gem_ioctl_cpu_prep(), which was already using > dma_resv_wait_timeout() correctly. > > Fixes: 41d351f29528 ("drm/nouveau: stop using ttm_bo_wait") > Reported-by: Tanmay Bhushan <007047221b at gmail.com> > Link: https://lore.kernel.org/lkml/20230119225351.71657-1-007047221b at gmail.com > Signed-off-by: John Ogness <john.ogness at linutronix.de>Reviewed-by: Christian K?nig <christian.koenig at amd.com>> --- > I just realized that the nouveau driver style prefers to scope > variables used only in loops. > > v3: Define @lret within the for-loop. > > drivers/gpu/drm/nouveau/nouveau_gem.c | 18 ++++++++++++------ > 1 file changed, 12 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c > index f77e44958037..ab9062e50977 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_gem.c > +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c > @@ -645,7 +645,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli, > struct drm_nouveau_gem_pushbuf_reloc *reloc, > struct drm_nouveau_gem_pushbuf_bo *bo) > { > - long ret = 0; > + int ret = 0; > unsigned i; > > for (i = 0; i < req->nr_relocs; i++) { > @@ -653,6 +653,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli, > struct drm_nouveau_gem_pushbuf_bo *b; > struct nouveau_bo *nvbo; > uint32_t data; > + long lret; > > if (unlikely(r->bo_index >= req->nr_buffers)) { > NV_PRINTK(err, cli, "reloc bo index invalid\n"); > @@ -703,13 +704,18 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli, > data |= r->vor; > } > > - ret = dma_resv_wait_timeout(nvbo->bo.base.resv, > - DMA_RESV_USAGE_BOOKKEEP, > - false, 15 * HZ); > - if (ret == 0) > + lret = dma_resv_wait_timeout(nvbo->bo.base.resv, > + DMA_RESV_USAGE_BOOKKEEP, > + false, 15 * HZ); > + if (!lret) > ret = -EBUSY; > + else if (lret > 0) > + ret = 0; > + else > + ret = lret; > + > if (ret) { > - NV_PRINTK(err, cli, "reloc wait_idle failed: %ld\n", > + NV_PRINTK(err, cli, "reloc wait_idle failed: %d\n", > ret); > break; > } > > base-commit: 09a9639e56c01c7a00d6c0ca63f4c7c41abe075d
Possibly Parallel Threads
- [PATCH v3] drm/nouveau: fix incorrect conversion to dma_resv_wait_timeout()
- [PATCH 2/3] drm/nouveau: slowpath for pushbuf ioctl
- [PATCH 2/3] drm/nouveau: slowpath for pushbuf ioctl
- [PATCH 2/3] drm/nouveau: slowpath for pushbuf ioctl
- [PATCH] drm/nouveau: fix missing spin_unlock in failure path