Danilo Krummrich
2024-Feb-02 00:05 UTC
[PATCH 1/2] drm/nouveau: don't fini scheduler if not initialized
nouveau_abi16_ioctl_channel_alloc() and nouveau_cli_init() simply call
their corresponding *_fini() counterpart. This can lead to
nouveau_sched_fini() being called without struct nouveau_sched ever
being initialized in the first place.
Instead of embedding struct nouveau_sched into struct nouveau_cli and
struct nouveau_chan_abi16, allocate struct nouveau_sched separately,
such that we can check for the corresponding pointer to be NULL in the
particular *_fini() functions.
It makes sense to allocate struct nouveau_sched separately anyway, since
in a subsequent commit we can also avoid to allocate a struct
nouveau_sched in nouveau_abi16_ioctl_channel_alloc() at all, if the
VM_BIND uAPI has been disabled due to the legacy uAPI being used.
Fixes: 5f03a507b29e ("drm/nouveau: implement 1:1 scheduler - entity
relationship")
Reported-by: Timur Tabi <ttabi at nvidia.com>
Closes: https://lore.kernel.org/nouveau/20240131213917.1545604-1-ttabi at
nvidia.com/
Signed-off-by: Danilo Krummrich <dakr at redhat.com>
---
drivers/gpu/drm/nouveau/nouveau_abi16.c | 10 ++++---
drivers/gpu/drm/nouveau/nouveau_abi16.h | 2 +-
drivers/gpu/drm/nouveau/nouveau_drm.c | 7 +++--
drivers/gpu/drm/nouveau/nouveau_drv.h | 2 +-
drivers/gpu/drm/nouveau/nouveau_exec.c | 2 +-
drivers/gpu/drm/nouveau/nouveau_sched.c | 38 +++++++++++++++++++++++--
drivers/gpu/drm/nouveau/nouveau_sched.h | 6 ++--
drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +-
8 files changed, 53 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c
b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index a04156ca8390..ca4b5ab3e59e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -128,12 +128,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
struct nouveau_abi16_ntfy *ntfy, *temp;
/* Cancel all jobs from the entity's queue. */
- drm_sched_entity_fini(&chan->sched.entity);
+ if (chan->sched)
+ drm_sched_entity_fini(&chan->sched->entity);
if (chan->chan)
nouveau_channel_idle(chan->chan);
- nouveau_sched_fini(&chan->sched);
+ if (chan->sched)
+ nouveau_sched_destroy(&chan->sched);
/* cleanup notifier state */
list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) {
@@ -337,8 +339,8 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
if (ret)
goto done;
- ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq,
- chan->chan->dma.ib_max);
+ ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq,
+ chan->chan->dma.ib_max);
if (ret)
goto done;
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h
b/drivers/gpu/drm/nouveau/nouveau_abi16.h
index 1f5e243c0c75..11c8c4a80079 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.h
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h
@@ -26,7 +26,7 @@ struct nouveau_abi16_chan {
struct nouveau_bo *ntfy;
struct nouveau_vma *ntfy_vma;
struct nvkm_mm heap;
- struct nouveau_sched sched;
+ struct nouveau_sched *sched;
};
struct nouveau_abi16 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c
b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 6f6c31a9937b..a947e1d5f309 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -201,7 +201,8 @@ nouveau_cli_fini(struct nouveau_cli *cli)
WARN_ON(!list_empty(&cli->worker));
usif_client_fini(cli);
- nouveau_sched_fini(&cli->sched);
+ if (cli->sched)
+ nouveau_sched_destroy(&cli->sched);
if (uvmm)
nouveau_uvmm_fini(uvmm);
nouveau_vmm_fini(&cli->svm);
@@ -311,7 +312,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
cli->mem = &mems[ret];
/* Don't pass in the (shared) sched_wq in order to let
- * nouveau_sched_init() create a dedicated one for VM_BIND jobs.
+ * nouveau_sched_create() create a dedicated one for VM_BIND jobs.
*
* This is required to ensure that for VM_BIND jobs free_job() work and
* run_job() work can always run concurrently and hence, free_job() work
@@ -320,7 +321,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
* locks which indirectly or directly are held for allocations
* elsewhere.
*/
- ret = nouveau_sched_init(&cli->sched, drm, NULL, 1);
+ ret = nouveau_sched_create(&cli->sched, drm, NULL, 1);
if (ret)
goto done;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h
b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 8a6d94c8b163..e239c6bf4afa 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -98,7 +98,7 @@ struct nouveau_cli {
bool disabled;
} uvmm;
- struct nouveau_sched sched;
+ struct nouveau_sched *sched;
const struct nvif_mclass *mem;
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c
b/drivers/gpu/drm/nouveau/nouveau_exec.c
index bc5d71b79ab2..e65c0ef23bc7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
@@ -389,7 +389,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev,
if (ret)
goto out;
- args.sched = &chan16->sched;
+ args.sched = chan16->sched;
args.file_priv = file_priv;
args.chan = chan;
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c
b/drivers/gpu/drm/nouveau/nouveau_sched.c
index dd98f6910f9c..32fa2e273965 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -398,7 +398,7 @@ static const struct drm_sched_backend_ops nouveau_sched_ops
= {
.free_job = nouveau_sched_free_job,
};
-int
+static int
nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
struct workqueue_struct *wq, u32 credit_limit)
{
@@ -453,7 +453,30 @@ nouveau_sched_init(struct nouveau_sched *sched, struct
nouveau_drm *drm,
return ret;
}
-void
+int
+nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
+ struct workqueue_struct *wq, u32 credit_limit)
+{
+ struct nouveau_sched *sched;
+ int ret;
+
+ sched = kzalloc(sizeof(*sched), GFP_KERNEL);
+ if (!sched)
+ return -ENOMEM;
+
+ ret = nouveau_sched_init(sched, drm, wq, credit_limit);
+ if (ret) {
+ kfree(sched);
+ return ret;
+ }
+
+ *psched = sched;
+
+ return 0;
+}
+
+
+static void
nouveau_sched_fini(struct nouveau_sched *sched)
{
struct drm_gpu_scheduler *drm_sched = &sched->base;
@@ -471,3 +494,14 @@ nouveau_sched_fini(struct nouveau_sched *sched)
if (sched->wq)
destroy_workqueue(sched->wq);
}
+
+void
+nouveau_sched_destroy(struct nouveau_sched **psched)
+{
+ struct nouveau_sched *sched = *psched;
+
+ nouveau_sched_fini(sched);
+ kfree(sched);
+
+ *psched = NULL;
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h
b/drivers/gpu/drm/nouveau/nouveau_sched.h
index a6528f5981e6..e1f01a23e6f6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.h
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.h
@@ -111,8 +111,8 @@ struct nouveau_sched {
} job;
};
-int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
- struct workqueue_struct *wq, u32 credit_limit);
-void nouveau_sched_fini(struct nouveau_sched *sched);
+int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm
*drm,
+ struct workqueue_struct *wq, u32 credit_limit);
+void nouveau_sched_destroy(struct nouveau_sched **psched);
#endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 4f223c972c6a..0a0a11dc9ec0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1740,7 +1740,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev,
if (ret)
return ret;
- args.sched = &cli->sched;
+ args.sched = cli->sched;
args.file_priv = file_priv;
ret = nouveau_uvmm_vm_bind(&args);
base-commit: 041261ac4c365e03b07427569d6735f8adfd21c8
--
2.43.0
Danilo Krummrich
2024-Feb-02 00:05 UTC
[PATCH 2/2] drm/nouveau: omit to create schedulers using the legacy uAPI
Omit to create scheduler instances when using the legacy uAPI. When
using the legacy NOUVEAU_GEM_PUSHBUF ioctl no scheduler instance is
required, hence omit creating scheduler instances in
nouveau_abi16_ioctl_channel_alloc().
Signed-off-by: Danilo Krummrich <dakr at redhat.com>
---
drivers/gpu/drm/nouveau/nouveau_abi16.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c
b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index ca4b5ab3e59e..d1bb8151a1df 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -339,10 +339,16 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
if (ret)
goto done;
- ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq,
- chan->chan->dma.ib_max);
- if (ret)
- goto done;
+ /* If we're not using the VM_BIND uAPI, we don't need a scheduler.
+ *
+ * The client lock is already acquired by nouveau_abi16_get().
+ */
+ if (nouveau_cli_uvmm(cli)) {
+ ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq,
+ chan->chan->dma.ib_max);
+ if (ret)
+ goto done;
+ }
init->channel = chan->chan->chid;
--
2.43.0
Timur Tabi
2024-Feb-02 17:14 UTC
[PATCH 1/2] drm/nouveau: don't fini scheduler if not initialized
On Fri, 2024-02-02 at 01:05 +0100, Danilo Krummrich wrote:> nouveau_abi16_ioctl_channel_alloc() and nouveau_cli_init() simply call > their corresponding *_fini() counterpart. This can lead to > nouveau_sched_fini() being called without struct nouveau_sched ever > being initialized in the first place.Thanks, I've confirmed that these patches do fix the problem.
Dave Airlie
2024-Feb-09 19:39 UTC
[PATCH 1/2] drm/nouveau: don't fini scheduler if not initialized
On Fri, 2 Feb 2024 at 10:06, Danilo Krummrich <dakr at redhat.com> wrote:> > nouveau_abi16_ioctl_channel_alloc() and nouveau_cli_init() simply call > their corresponding *_fini() counterpart. This can lead to > nouveau_sched_fini() being called without struct nouveau_sched ever > being initialized in the first place. > > Instead of embedding struct nouveau_sched into struct nouveau_cli and > struct nouveau_chan_abi16, allocate struct nouveau_sched separately, > such that we can check for the corresponding pointer to be NULL in the > particular *_fini() functions. > > It makes sense to allocate struct nouveau_sched separately anyway, since > in a subsequent commit we can also avoid to allocate a struct > nouveau_sched in nouveau_abi16_ioctl_channel_alloc() at all, if the > VM_BIND uAPI has been disabled due to the legacy uAPI being used.Looks good, for the series Reviewed-by: Dave Airlie <airlied at redhat.com>> > Fixes: 5f03a507b29e ("drm/nouveau: implement 1:1 scheduler - entity relationship") > Reported-by: Timur Tabi <ttabi at nvidia.com> > Closes: https://lore.kernel.org/nouveau/20240131213917.1545604-1-ttabi at nvidia.com/ > Signed-off-by: Danilo Krummrich <dakr at redhat.com> > --- > drivers/gpu/drm/nouveau/nouveau_abi16.c | 10 ++++--- > drivers/gpu/drm/nouveau/nouveau_abi16.h | 2 +- > drivers/gpu/drm/nouveau/nouveau_drm.c | 7 +++-- > drivers/gpu/drm/nouveau/nouveau_drv.h | 2 +- > drivers/gpu/drm/nouveau/nouveau_exec.c | 2 +- > drivers/gpu/drm/nouveau/nouveau_sched.c | 38 +++++++++++++++++++++++-- > drivers/gpu/drm/nouveau/nouveau_sched.h | 6 ++-- > drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +- > 8 files changed, 53 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c > index a04156ca8390..ca4b5ab3e59e 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c > +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c > @@ -128,12 +128,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16, > struct nouveau_abi16_ntfy *ntfy, *temp; > > /* Cancel all jobs from the entity's queue. */ > - drm_sched_entity_fini(&chan->sched.entity); > + if (chan->sched) > + drm_sched_entity_fini(&chan->sched->entity); > > if (chan->chan) > nouveau_channel_idle(chan->chan); > > - nouveau_sched_fini(&chan->sched); > + if (chan->sched) > + nouveau_sched_destroy(&chan->sched); > > /* cleanup notifier state */ > list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) { > @@ -337,8 +339,8 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) > if (ret) > goto done; > > - ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq, > - chan->chan->dma.ib_max); > + ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq, > + chan->chan->dma.ib_max); > if (ret) > goto done; > > diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h > index 1f5e243c0c75..11c8c4a80079 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_abi16.h > +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h > @@ -26,7 +26,7 @@ struct nouveau_abi16_chan { > struct nouveau_bo *ntfy; > struct nouveau_vma *ntfy_vma; > struct nvkm_mm heap; > - struct nouveau_sched sched; > + struct nouveau_sched *sched; > }; > > struct nouveau_abi16 { > diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c > index 6f6c31a9937b..a947e1d5f309 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_drm.c > +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c > @@ -201,7 +201,8 @@ nouveau_cli_fini(struct nouveau_cli *cli) > WARN_ON(!list_empty(&cli->worker)); > > usif_client_fini(cli); > - nouveau_sched_fini(&cli->sched); > + if (cli->sched) > + nouveau_sched_destroy(&cli->sched); > if (uvmm) > nouveau_uvmm_fini(uvmm); > nouveau_vmm_fini(&cli->svm); > @@ -311,7 +312,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, > cli->mem = &mems[ret]; > > /* Don't pass in the (shared) sched_wq in order to let > - * nouveau_sched_init() create a dedicated one for VM_BIND jobs. > + * nouveau_sched_create() create a dedicated one for VM_BIND jobs. > * > * This is required to ensure that for VM_BIND jobs free_job() work and > * run_job() work can always run concurrently and hence, free_job() work > @@ -320,7 +321,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, > * locks which indirectly or directly are held for allocations > * elsewhere. > */ > - ret = nouveau_sched_init(&cli->sched, drm, NULL, 1); > + ret = nouveau_sched_create(&cli->sched, drm, NULL, 1); > if (ret) > goto done; > > diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h > index 8a6d94c8b163..e239c6bf4afa 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_drv.h > +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h > @@ -98,7 +98,7 @@ struct nouveau_cli { > bool disabled; > } uvmm; > > - struct nouveau_sched sched; > + struct nouveau_sched *sched; > > const struct nvif_mclass *mem; > > diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c > index bc5d71b79ab2..e65c0ef23bc7 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_exec.c > +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c > @@ -389,7 +389,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev, > if (ret) > goto out; > > - args.sched = &chan16->sched; > + args.sched = chan16->sched; > args.file_priv = file_priv; > args.chan = chan; > > diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c > index dd98f6910f9c..32fa2e273965 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_sched.c > +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c > @@ -398,7 +398,7 @@ static const struct drm_sched_backend_ops nouveau_sched_ops = { > .free_job = nouveau_sched_free_job, > }; > > -int > +static int > nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, > struct workqueue_struct *wq, u32 credit_limit) > { > @@ -453,7 +453,30 @@ nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, > return ret; > } > > -void > +int > +nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, > + struct workqueue_struct *wq, u32 credit_limit) > +{ > + struct nouveau_sched *sched; > + int ret; > + > + sched = kzalloc(sizeof(*sched), GFP_KERNEL); > + if (!sched) > + return -ENOMEM; > + > + ret = nouveau_sched_init(sched, drm, wq, credit_limit); > + if (ret) { > + kfree(sched); > + return ret; > + } > + > + *psched = sched; > + > + return 0; > +} > + > + > +static void > nouveau_sched_fini(struct nouveau_sched *sched) > { > struct drm_gpu_scheduler *drm_sched = &sched->base; > @@ -471,3 +494,14 @@ nouveau_sched_fini(struct nouveau_sched *sched) > if (sched->wq) > destroy_workqueue(sched->wq); > } > + > +void > +nouveau_sched_destroy(struct nouveau_sched **psched) > +{ > + struct nouveau_sched *sched = *psched; > + > + nouveau_sched_fini(sched); > + kfree(sched); > + > + *psched = NULL; > +} > diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h > index a6528f5981e6..e1f01a23e6f6 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_sched.h > +++ b/drivers/gpu/drm/nouveau/nouveau_sched.h > @@ -111,8 +111,8 @@ struct nouveau_sched { > } job; > }; > > -int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, > - struct workqueue_struct *wq, u32 credit_limit); > -void nouveau_sched_fini(struct nouveau_sched *sched); > +int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, > + struct workqueue_struct *wq, u32 credit_limit); > +void nouveau_sched_destroy(struct nouveau_sched **psched); > > #endif > diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c > index 4f223c972c6a..0a0a11dc9ec0 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c > +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c > @@ -1740,7 +1740,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev, > if (ret) > return ret; > > - args.sched = &cli->sched; > + args.sched = cli->sched; > args.file_priv = file_priv; > > ret = nouveau_uvmm_vm_bind(&args); > > base-commit: 041261ac4c365e03b07427569d6735f8adfd21c8 > -- > 2.43.0 >
Seemingly Similar Threads
- [PATCH 1/2] drm/nouveau: don't fini scheduler if not initialized
- [PATCH v2] drm/nouveau: Improve variable names in nouveau_sched_init()
- [PATCH] drm/nouveau: Improve variable names in nouveau_sched_init()
- [PATCH] drm/sched: Use struct for drm_sched_init() params
- [PATCH] drm/nouveau: Constify struct nouveau_job_ops