thr3ads.net - Nouveau - [PATCH v2] drm/sched: Use struct for drm_sched

If this information is useful, please help other people find it:
Share via:

Danilo Krummrich

2025-Jan-28 14:56 UTC

[PATCH v2] drm/sched: Use struct for drm_sched_init() params

On Tue, Jan 28, 2025 at 03:29:27PM +0100, Philipp Stanner
wrote:> drm_sched_init() has a great many parameters and upcoming new
> functionality for the scheduler might add even more. Generally, the
> great number of parameters reduces readability and has already caused
> one missnaming in:
> 
> commit 6f1cacf4eba7 ("drm/nouveau: Improve variable name in
nouveau_sched_init()").
> 
> Introduce a new struct for the scheduler init parameters and port all
> users.
> 
> Signed-off-by: Philipp Stanner <phasta at kernel.org>
> ---
> Changes in v2:
>   - Point out that the hang-limit is deprecated. (Christian)
>   - Initialize the structs to 0 at declaration. (Planet Earth)
>   - Don't set stuff explicitly to 0 / NULL. (Tvrtko)
>   - Make the structs const where possible. (Boris)
>   - v3d: Use just 1, universal, function for sched-init. (Ma?ra)
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 ++++--
>  drivers/gpu/drm/etnaviv/etnaviv_sched.c    | 20 +++----
>  drivers/gpu/drm/imagination/pvr_queue.c    | 18 ++++--
>  drivers/gpu/drm/lima/lima_sched.c          | 16 +++--
>  drivers/gpu/drm/msm/msm_ringbuffer.c       | 17 +++---
>  drivers/gpu/drm/nouveau/nouveau_sched.c    | 15 +++--
>  drivers/gpu/drm/panfrost/panfrost_job.c    | 20 ++++---
>  drivers/gpu/drm/panthor/panthor_mmu.c      | 16 +++--
>  drivers/gpu/drm/panthor/panthor_sched.c    | 29 +++++----
>  drivers/gpu/drm/scheduler/sched_main.c     | 50 ++++++----------
>  drivers/gpu/drm/v3d/v3d_sched.c            | 68 +++++++++-------------
>  drivers/gpu/drm/xe/xe_execlist.c           | 16 +++--
>  drivers/gpu/drm/xe/xe_gpu_scheduler.c      | 17 +++++-
>  include/drm/gpu_scheduler.h                | 37 ++++++++++--
>  14 files changed, 206 insertions(+), 151 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c
b/drivers/gpu/drm/nouveau/nouveau_sched.c
> index 4412f2711fb5..b5aac8eebfdd 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_sched.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
> @@ -404,7 +404,15 @@ nouveau_sched_init(struct nouveau_sched *sched, struct
nouveau_drm *drm,
>  {
>  	struct drm_gpu_scheduler *drm_sched = &sched->base;
>  	struct drm_sched_entity *entity = &sched->entity;
> -	const long timeout = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS);
> +	const struct drm_sched_init_args args = {
> +		.ops = &nouveau_sched_ops,
> +		.submit_wq = wq,
> +		.num_rqs = DRM_SCHED_PRIORITY_COUNT,
> +		.credit_limit = credit_limit,
> +		.timeout = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS),
> +		.name = "nouveau_sched",
> +		.dev = drm->dev->dev
> +	};
>  	int ret;
>  
>  	if (!wq) {
> @@ -416,10 +424,7 @@ nouveau_sched_init(struct nouveau_sched *sched, struct
nouveau_drm *drm,
>  		sched->wq = wq;
This change breaks Nouveau, you need to set args.submit_wq here as well.
>  	}
>  
> -	ret = drm_sched_init(drm_sched, &nouveau_sched_ops, wq,
> -			     NOUVEAU_SCHED_PRIORITY_COUNT,
> -			     credit_limit, 0, timeout,
> -			     NULL, NULL, "nouveau_sched", drm->dev->dev);
> +	ret = drm_sched_init(drm_sched, &args);
>  	if (ret)
>  		goto fail_wq;
>  
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c
b/drivers/gpu/drm/panthor/panthor_sched.c
> index 5844a7f639e0..44713cfdcd74 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -3284,6 +3284,22 @@ static struct panthor_queue *
>  group_create_queue(struct panthor_group *group,
>  		   const struct drm_panthor_queue_create *args)
>  {
> +	const struct drm_sched_init_args sched_args = {
> +		.ops = &panthor_queue_sched_ops,
> +		.submit_wq = group->ptdev->scheduler->wq,
> +		.num_rqs = 1,
> +		/*
> +		 * The credit limit argument tells us the total number of
> +		 * instructions across all CS slots in the ringbuffer, with
> +		 * some jobs requiring twice as many as others, depending on
> +		 * their profiling status.
> +		 */
> +		.credit_limit = args->ringbuf_size / sizeof(u64),
> +		.timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
> +		.timeout_wq = group->ptdev->reset.wq,
> +		.name = "panthor-queue",
> +		.dev = group->ptdev->base.dev
> +	};
>  	struct drm_gpu_scheduler *drm_sched;
>  	struct panthor_queue *queue;
>  	int ret;
> @@ -3354,17 +3370,8 @@ group_create_queue(struct panthor_group *group,
>  	if (ret)
>  		goto err_free_queue;
>  
> -	/*
> -	 * Credit limit argument tells us the total number of instructions
> -	 * across all CS slots in the ringbuffer, with some jobs requiring
> -	 * twice as many as others, depending on their profiling status.
> -	 */
> -	ret = drm_sched_init(&queue->scheduler,
&panthor_queue_sched_ops,
> -			     group->ptdev->scheduler->wq, 1,
> -			     args->ringbuf_size / sizeof(u64),
> -			     0, msecs_to_jiffies(JOB_TIMEOUT_MS),
> -			     group->ptdev->reset.wq,
> -			     NULL, "panthor-queue", group->ptdev->base.dev);
> +
This creates two empty lines.
> +	ret = drm_sched_init(&queue->scheduler, &sched_args);
>  	if (ret)
>  		goto err_free_queue;
>  
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
b/drivers/gpu/drm/scheduler/sched_main.c
> index a48be16ab84f..6295b2654a7c 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -1244,40 +1244,24 @@ static void drm_sched_run_job_work(struct
work_struct *w)
>   * drm_sched_init - Init a gpu scheduler instance
>   *
>   * @sched: scheduler instance
> - * @ops: backend operations for this scheduler
> - * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is
> - *	       allocated and used
> - * @num_rqs: number of runqueues, one for each priority, up to
DRM_SCHED_PRIORITY_COUNT
> - * @credit_limit: the number of credits this scheduler can hold from all
jobs
> - * @hang_limit: number of times to allow a job to hang before dropping it
> - * @timeout: timeout value in jiffies for the scheduler
> - * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq
is
> - *		used
> - * @score: optional score atomic shared with other schedulers
> - * @name: name used for debugging
> - * @dev: target &struct device
> + * @args: scheduler initialization arguments
>   *
>   * Return 0 on success, otherwise error code.
>   */
> -int drm_sched_init(struct drm_gpu_scheduler *sched,
> -		   const struct drm_sched_backend_ops *ops,
> -		   struct workqueue_struct *submit_wq,
> -		   u32 num_rqs, u32 credit_limit, unsigned int hang_limit,
> -		   long timeout, struct workqueue_struct *timeout_wq,
> -		   atomic_t *score, const char *name, struct device *dev)
> +int drm_sched_init(struct drm_gpu_scheduler *sched, const struct
drm_sched_init_args *args)
>  {
>  	int i;
>  
> -	sched->ops = ops;
> -	sched->credit_limit = credit_limit;
> -	sched->name = name;
> -	sched->timeout = timeout;
> -	sched->timeout_wq = timeout_wq ? : system_wq;
> -	sched->hang_limit = hang_limit;
> -	sched->score = score ? score : &sched->_score;
> -	sched->dev = dev;
> +	sched->ops = args->ops;
> +	sched->credit_limit = args->credit_limit;
> +	sched->name = args->name;
> +	sched->timeout = args->timeout;
> +	sched->timeout_wq = args->timeout_wq ? : system_wq;
> +	sched->hang_limit = args->hang_limit;
> +	sched->score = args->score ? args->score :
&sched->_score;
> +	sched->dev = args->dev;
>  
> -	if (num_rqs > DRM_SCHED_PRIORITY_COUNT) {
> +	if (args->num_rqs > DRM_SCHED_PRIORITY_COUNT) {
>  		/* This is a gross violation--tell drivers what the  problem is.
>  		 */
>  		drm_err(sched, "%s: num_rqs cannot be greater than
DRM_SCHED_PRIORITY_COUNT\n",
> @@ -1292,16 +1276,16 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>  		return 0;
>  	}
>  
> -	if (submit_wq) {
> -		sched->submit_wq = submit_wq;
> +	if (args->submit_wq) {
> +		sched->submit_wq = args->submit_wq;
>  		sched->own_submit_wq = false;
>  	} else {
>  #ifdef CONFIG_LOCKDEP
> -		sched->submit_wq = alloc_ordered_workqueue_lockdep_map(name,
> +		sched->submit_wq = alloc_ordered_workqueue_lockdep_map(args->name,
>  								       WQ_MEM_RECLAIM,
>  								       &drm_sched_lockdep_map);
>  #else
> -		sched->submit_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
> +		sched->submit_wq = alloc_ordered_workqueue(args->name,
WQ_MEM_RECLAIM);
>  #endif
>  		if (!sched->submit_wq)
>  			return -ENOMEM;
> @@ -1309,11 +1293,11 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>  		sched->own_submit_wq = true;
>  	}
>  
> -	sched->sched_rq = kmalloc_array(num_rqs, sizeof(*sched->sched_rq),
> +	sched->sched_rq = kmalloc_array(args->num_rqs,
sizeof(*sched->sched_rq),
>  					GFP_KERNEL | __GFP_ZERO);
>  	if (!sched->sched_rq)
>  		goto Out_check_own;
> -	sched->num_rqs = num_rqs;
> +	sched->num_rqs = args->num_rqs;
>  	for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
>  		sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]),
GFP_KERNEL);
>  		if (!sched->sched_rq[i])
> diff --git a/drivers/gpu/drm/xe/xe_execlist.c
b/drivers/gpu/drm/xe/xe_execlist.c
> index a8c416a48812..db1c52dcf1a6 100644
> --- a/drivers/gpu/drm/xe/xe_execlist.c
> +++ b/drivers/gpu/drm/xe/xe_execlist.c
> @@ -332,6 +332,15 @@ static const struct drm_sched_backend_ops
drm_sched_ops = {
>  static int execlist_exec_queue_init(struct xe_exec_queue *q)
>  {
>  	struct drm_gpu_scheduler *sched;
> +	const struct drm_sched_init_args args = {
> +		.ops = &drm_sched_ops,
> +		.num_rqs = 1,
> +		.credit_limit = q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES,
> +		.hang_limit = XE_SCHED_HANG_LIMIT,
> +		.timeout = XE_SCHED_JOB_TIMEOUT,
> +		.name = q->hwe->name,
> +		.dev = gt_to_xe(q->gt)->drm.dev
> +	};
>  	struct xe_execlist_exec_queue *exl;
>  	struct xe_device *xe = gt_to_xe(q->gt);
>  	int err;
> @@ -346,11 +355,8 @@ static int execlist_exec_queue_init(struct
xe_exec_queue *q)
>  
>  	exl->q = q;
>  
> -	err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1,
> -			     q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES,
> -			     XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT,
> -			     NULL, NULL, q->hwe->name,
> -			     gt_to_xe(q->gt)->drm.dev);
> +
Two empty lines here as well.
> +	err = drm_sched_init(&exl->sched, &args);
>  	if (err)
>  		goto err_free;
>  
> diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c
b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
> index 50361b4638f9..be95f4cca007 100644
> --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c
> +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
> @@ -63,13 +63,24 @@ int xe_sched_init(struct xe_gpu_scheduler *sched,
>  		  atomic_t *score, const char *name,
>  		  struct device *dev)
>  {
> +	const struct drm_sched_init_args args = {
> +		.ops = ops,
> +		.submit_wq = submit_wq,
> +		.num_rqs = 1,
> +		.credit_limit = hw_submission,
> +		.hang_limit = hang_limit,
> +		.timeout = timeout,
> +		.timeout_wq = timeout_wq,
> +		.score = score,
> +		.name = name,
> +		.dev = dev
> +	};
> +
>  	sched->ops = xe_ops;
>  	INIT_LIST_HEAD(&sched->msgs);
>  	INIT_WORK(&sched->work_process_msg, xe_sched_process_msg_work);
>  
> -	return drm_sched_init(&sched->base, ops, submit_wq, 1,
hw_submission,
> -			      hang_limit, timeout, timeout_wq, score, name,
> -			      dev);
> +	return drm_sched_init(&sched->base, &args);
>  }
>  
>  void xe_sched_fini(struct xe_gpu_scheduler *sched)
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index a0ff08123f07..f0f5435598a8 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -540,12 +540,39 @@ struct drm_gpu_scheduler {
>  	struct device			*dev;
>  };
>  
> +/**
> + * struct drm_sched_init_args - parameters for initializing a DRM GPU
scheduler
> + *
> + * @ops: backend operations provided by the driver
> + * @submit_wq: workqueue to use for submission. May be NULL.
> + *	If NULL, an ordered wq is allocated and used.
> + * @num_rqs: Number of run-queues. This may be at most
DRM_SCHED_PRIORITY_COUNT,
> + *	as there's usually one run-queue per priority, but may be less.
> + * @credit_limit: the number of credits this scheduler can hold from all
jobs
> + * @hang_limit: number of times to allow a job to hang before dropping it.
> + *	This mechanism is DEPRECATED. Set it to 0.
> + * @timeout: timeout value in jiffies for the scheduler
> + * @timeout_wq: workqueue to use for timeout work. May be NULL.
> + *	If NULL, the system_wq is used.
> + * @score: score atomic shared with other schedulers. May be NULL.
> + * @name: name used for debugging
> + * @dev: associated device. Used for debugging
> + */
This is weirdly formatted. Please align line breaks with the corresponding
number of spaces.
> +struct drm_sched_init_args {
> +	const struct drm_sched_backend_ops *ops;
> +	struct workqueue_struct *submit_wq;
> +	struct workqueue_struct *timeout_wq;
> +	u32 num_rqs;
> +	u32 credit_limit;
> +	unsigned int hang_limit;
> +	long timeout;
> +	atomic_t *score;
> +	const char *name;
> +	struct device *dev;
> +};
> +
>  int drm_sched_init(struct drm_gpu_scheduler *sched,
> -		   const struct drm_sched_backend_ops *ops,
> -		   struct workqueue_struct *submit_wq,
> -		   u32 num_rqs, u32 credit_limit, unsigned int hang_limit,
> -		   long timeout, struct workqueue_struct *timeout_wq,
> -		   atomic_t *score, const char *name, struct device *dev);
> +		const struct drm_sched_init_args *args);
>  
>  void drm_sched_fini(struct drm_gpu_scheduler *sched);
>  int drm_sched_job_init(struct drm_sched_job *job,
> -- 
> 2.47.1
>

Philipp Stanner

2025-Jan-29 15:18 UTC

head link

[PATCH v2] drm/sched: Use struct for drm_sched_init() params

On Tue, 2025-01-28 at 15:56 +0100, Danilo Krummrich
wrote:> On Tue, Jan 28, 2025 at 03:29:27PM +0100, Philipp Stanner wrote:
> > drm_sched_init() has a great many parameters and upcoming new
> > functionality for the scheduler might add even more. Generally, the
> > great number of parameters reduces readability and has already
> > caused
> > one missnaming in:
> > 
> > commit 6f1cacf4eba7 ("drm/nouveau: Improve variable name in
> > nouveau_sched_init()").
> > 
> > Introduce a new struct for the scheduler init parameters and port
> > all
> > users.
> > 
> > Signed-off-by: Philipp Stanner <phasta at kernel.org>
> > ---
> > Changes in v2:
> > ? - Point out that the hang-limit is deprecated. (Christian)
> > ? - Initialize the structs to 0 at declaration. (Planet Earth)
> > ? - Don't set stuff explicitly to 0 / NULL. (Tvrtko)
> > ? - Make the structs const where possible. (Boris)
> > ? - v3d: Use just 1, universal, function for sched-init. (Ma?ra)
> > ---
> > ?drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 ++++--
> > ?drivers/gpu/drm/etnaviv/etnaviv_sched.c??? | 20 +++----
> > ?drivers/gpu/drm/imagination/pvr_queue.c??? | 18 ++++--
> > ?drivers/gpu/drm/lima/lima_sched.c????????? | 16 +++--
> > ?drivers/gpu/drm/msm/msm_ringbuffer.c?????? | 17 +++---
> > ?drivers/gpu/drm/nouveau/nouveau_sched.c??? | 15 +++--
> > ?drivers/gpu/drm/panfrost/panfrost_job.c??? | 20 ++++---
> > ?drivers/gpu/drm/panthor/panthor_mmu.c????? | 16 +++--
> > ?drivers/gpu/drm/panthor/panthor_sched.c??? | 29 +++++----
> > ?drivers/gpu/drm/scheduler/sched_main.c???? | 50 ++++++----------
> > ?drivers/gpu/drm/v3d/v3d_sched.c??????????? | 68 +++++++++---------
> > ----
> > ?drivers/gpu/drm/xe/xe_execlist.c?????????? | 16 +++--
> > ?drivers/gpu/drm/xe/xe_gpu_scheduler.c????? | 17 +++++-
> > ?include/drm/gpu_scheduler.h??????????????? | 37 ++++++++++--
> > ?14 files changed, 206 insertions(+), 151 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c
> > b/drivers/gpu/drm/nouveau/nouveau_sched.c
> > index 4412f2711fb5..b5aac8eebfdd 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_sched.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
> > @@ -404,7 +404,15 @@ nouveau_sched_init(struct nouveau_sched
> > *sched, struct nouveau_drm *drm,
> > ?{
> > ?	struct drm_gpu_scheduler *drm_sched = &sched->base;
> > ?	struct drm_sched_entity *entity = &sched->entity;
> > -	const long timeout > >
msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS);
> > +	const struct drm_sched_init_args args = {
> > +		.ops = &nouveau_sched_ops,
> > +		.submit_wq = wq,
> > +		.num_rqs = DRM_SCHED_PRIORITY_COUNT,
> > +		.credit_limit = credit_limit,
> > +		.timeout > > msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS),
> > +		.name = "nouveau_sched",
> > +		.dev = drm->dev->dev
> > +	};
> > ?	int ret;
> > ?
> > ?	if (!wq) {
> > @@ -416,10 +424,7 @@ nouveau_sched_init(struct nouveau_sched
> > *sched, struct nouveau_drm *drm,
> > ?		sched->wq = wq;
Do you want it set here 

args.submit_wq = wq;

or below, outside the if-block? Should be the same AFAICS

> 
> This change breaks Nouveau, you need to set args.submit_wq here as
> well.
I overlooked the allocation below.


P.
> 
> > ?	}
> > ?
> > -	ret = drm_sched_init(drm_sched, &nouveau_sched_ops, wq,
> > -			???? NOUVEAU_SCHED_PRIORITY_COUNT,
> > -			???? credit_limit, 0, timeout,
> > -			???? NULL, NULL, "nouveau_sched", drm-
> > >dev->dev);
> > +	ret = drm_sched_init(drm_sched, &args);
> > ?	if (ret)
> > ?		goto fail_wq;
> > ?
> > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c
> > b/drivers/gpu/drm/panthor/panthor_sched.c
> > index 5844a7f639e0..44713cfdcd74 100644
> > --- a/drivers/gpu/drm/panthor/panthor_sched.c
> > +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> > @@ -3284,6 +3284,22 @@ static struct panthor_queue *
> > ?group_create_queue(struct panthor_group *group,
> > ?		?? const struct drm_panthor_queue_create *args)
> > ?{
> > +	const struct drm_sched_init_args sched_args = {
> > +		.ops = &panthor_queue_sched_ops,
> > +		.submit_wq = group->ptdev->scheduler->wq,
> > +		.num_rqs = 1,
> > +		/*
> > +		 * The credit limit argument tells us the total
> > number of
> > +		 * instructions across all CS slots in the
> > ringbuffer, with
> > +		 * some jobs requiring twice as many as others,
> > depending on
> > +		 * their profiling status.
> > +		 */
> > +		.credit_limit = args->ringbuf_size / sizeof(u64),
> > +		.timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
> > +		.timeout_wq = group->ptdev->reset.wq,
> > +		.name = "panthor-queue",
> > +		.dev = group->ptdev->base.dev
> > +	};
> > ?	struct drm_gpu_scheduler *drm_sched;
> > ?	struct panthor_queue *queue;
> > ?	int ret;
> > @@ -3354,17 +3370,8 @@ group_create_queue(struct panthor_group
> > *group,
> > ?	if (ret)
> > ?		goto err_free_queue;
> > ?
> > -	/*
> > -	 * Credit limit argument tells us the total number of
> > instructions
> > -	 * across all CS slots in the ringbuffer, with some jobs
> > requiring
> > -	 * twice as many as others, depending on their profiling
> > status.
> > -	 */
> > -	ret = drm_sched_init(&queue->scheduler,
> > &panthor_queue_sched_ops,
> > -			???? group->ptdev->scheduler->wq, 1,
> > -			???? args->ringbuf_size / sizeof(u64),
> > -			???? 0, msecs_to_jiffies(JOB_TIMEOUT_MS),
> > -			???? group->ptdev->reset.wq,
> > -			???? NULL, "panthor-queue", group->ptdev-
> > >base.dev);
> > +
> 
> This creates two empty lines.
> 
> > +	ret = drm_sched_init(&queue->scheduler, &sched_args);
> > ?	if (ret)
> > ?		goto err_free_queue;
> > ?
> > diff --git a/drivers/gpu/drm/scheduler/sched_main.c
> > b/drivers/gpu/drm/scheduler/sched_main.c
> > index a48be16ab84f..6295b2654a7c 100644
> > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > @@ -1244,40 +1244,24 @@ static void drm_sched_run_job_work(struct
> > work_struct *w)
> > ? * drm_sched_init - Init a gpu scheduler instance
> > ? *
> > ? * @sched: scheduler instance
> > - * @ops: backend operations for this scheduler
> > - * @submit_wq: workqueue to use for submission. If NULL, an
> > ordered wq is
> > - *	?????? allocated and used
> > - * @num_rqs: number of runqueues, one for each priority, up to
> > DRM_SCHED_PRIORITY_COUNT
> > - * @credit_limit: the number of credits this scheduler can hold
> > from all jobs
> > - * @hang_limit: number of times to allow a job to hang before
> > dropping it
> > - * @timeout: timeout value in jiffies for the scheduler
> > - * @timeout_wq: workqueue to use for timeout work. If NULL, the
> > system_wq is
> > - *		used
> > - * @score: optional score atomic shared with other schedulers
> > - * @name: name used for debugging
> > - * @dev: target &struct device
> > + * @args: scheduler initialization arguments
> > ? *
> > ? * Return 0 on success, otherwise error code.
> > ? */
> > -int drm_sched_init(struct drm_gpu_scheduler *sched,
> > -		?? const struct drm_sched_backend_ops *ops,
> > -		?? struct workqueue_struct *submit_wq,
> > -		?? u32 num_rqs, u32 credit_limit, unsigned int
> > hang_limit,
> > -		?? long timeout, struct workqueue_struct
> > *timeout_wq,
> > -		?? atomic_t *score, const char *name, struct
> > device *dev)
> > +int drm_sched_init(struct drm_gpu_scheduler *sched, const struct
> > drm_sched_init_args *args)
> > ?{
> > ?	int i;
> > ?
> > -	sched->ops = ops;
> > -	sched->credit_limit = credit_limit;
> > -	sched->name = name;
> > -	sched->timeout = timeout;
> > -	sched->timeout_wq = timeout_wq ? : system_wq;
> > -	sched->hang_limit = hang_limit;
> > -	sched->score = score ? score : &sched->_score;
> > -	sched->dev = dev;
> > +	sched->ops = args->ops;
> > +	sched->credit_limit = args->credit_limit;
> > +	sched->name = args->name;
> > +	sched->timeout = args->timeout;
> > +	sched->timeout_wq = args->timeout_wq ? : system_wq;
> > +	sched->hang_limit = args->hang_limit;
> > +	sched->score = args->score ? args->score :
&sched->_score;
> > +	sched->dev = args->dev;
> > ?
> > -	if (num_rqs > DRM_SCHED_PRIORITY_COUNT) {
> > +	if (args->num_rqs > DRM_SCHED_PRIORITY_COUNT) {
> > ?		/* This is a gross violation--tell drivers what
> > the? problem is.
> > ?		 */
> > ?		drm_err(sched, "%s: num_rqs cannot be greater than
> > DRM_SCHED_PRIORITY_COUNT\n",
> > @@ -1292,16 +1276,16 @@ int drm_sched_init(struct drm_gpu_scheduler
> > *sched,
> > ?		return 0;
> > ?	}
> > ?
> > -	if (submit_wq) {
> > -		sched->submit_wq = submit_wq;
> > +	if (args->submit_wq) {
> > +		sched->submit_wq = args->submit_wq;
> > ?		sched->own_submit_wq = false;
> > ?	} else {
> > ?#ifdef CONFIG_LOCKDEP
> > -		sched->submit_wq > >
alloc_ordered_workqueue_lockdep_map(name,
> > +		sched->submit_wq > >
alloc_ordered_workqueue_lockdep_map(args->name,
> > ?								??
> > ???? WQ_MEM_RECLAIM,
> > ?								??
> > ???? &drm_sched_lockdep_map);
> > ?#else
> > -		sched->submit_wq = alloc_ordered_workqueue(name,
> > WQ_MEM_RECLAIM);
> > +		sched->submit_wq = alloc_ordered_workqueue(args-
> > >name, WQ_MEM_RECLAIM);
> > ?#endif
> > ?		if (!sched->submit_wq)
> > ?			return -ENOMEM;
> > @@ -1309,11 +1293,11 @@ int drm_sched_init(struct drm_gpu_scheduler
> > *sched,
> > ?		sched->own_submit_wq = true;
> > ?	}
> > ?
> > -	sched->sched_rq = kmalloc_array(num_rqs, sizeof(*sched-
> > >sched_rq),
> > +	sched->sched_rq = kmalloc_array(args->num_rqs,
> > sizeof(*sched->sched_rq),
> > ?					GFP_KERNEL | __GFP_ZERO);
> > ?	if (!sched->sched_rq)
> > ?		goto Out_check_own;
> > -	sched->num_rqs = num_rqs;
> > +	sched->num_rqs = args->num_rqs;
> > ?	for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs;
> > i++) {
> > ?		sched->sched_rq[i] = kzalloc(sizeof(*sched-
> > >sched_rq[i]), GFP_KERNEL);
> > ?		if (!sched->sched_rq[i])
> > diff --git a/drivers/gpu/drm/xe/xe_execlist.c
> > b/drivers/gpu/drm/xe/xe_execlist.c
> > index a8c416a48812..db1c52dcf1a6 100644
> > --- a/drivers/gpu/drm/xe/xe_execlist.c
> > +++ b/drivers/gpu/drm/xe/xe_execlist.c
> > @@ -332,6 +332,15 @@ static const struct drm_sched_backend_ops
> > drm_sched_ops = {
> > ?static int execlist_exec_queue_init(struct xe_exec_queue *q)
> > ?{
> > ?	struct drm_gpu_scheduler *sched;
> > +	const struct drm_sched_init_args args = {
> > +		.ops = &drm_sched_ops,
> > +		.num_rqs = 1,
> > +		.credit_limit = q->lrc[0]->ring.size /
> > MAX_JOB_SIZE_BYTES,
> > +		.hang_limit = XE_SCHED_HANG_LIMIT,
> > +		.timeout = XE_SCHED_JOB_TIMEOUT,
> > +		.name = q->hwe->name,
> > +		.dev = gt_to_xe(q->gt)->drm.dev
> > +	};
> > ?	struct xe_execlist_exec_queue *exl;
> > ?	struct xe_device *xe = gt_to_xe(q->gt);
> > ?	int err;
> > @@ -346,11 +355,8 @@ static int execlist_exec_queue_init(struct
> > xe_exec_queue *q)
> > ?
> > ?	exl->q = q;
> > ?
> > -	err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL,
1,
> > -			???? q->lrc[0]->ring.size /
> > MAX_JOB_SIZE_BYTES,
> > -			???? XE_SCHED_HANG_LIMIT,
> > XE_SCHED_JOB_TIMEOUT,
> > -			???? NULL, NULL, q->hwe->name,
> > -			???? gt_to_xe(q->gt)->drm.dev);
> > +
> 
> Two empty lines here as well.
> 
> > +	err = drm_sched_init(&exl->sched, &args);
> > ?	if (err)
> > ?		goto err_free;
> > ?
> > diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c
> > b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
> > index 50361b4638f9..be95f4cca007 100644
> > --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c
> > +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
> > @@ -63,13 +63,24 @@ int xe_sched_init(struct xe_gpu_scheduler
> > *sched,
> > ?		? atomic_t *score, const char *name,
> > ?		? struct device *dev)
> > ?{
> > +	const struct drm_sched_init_args args = {
> > +		.ops = ops,
> > +		.submit_wq = submit_wq,
> > +		.num_rqs = 1,
> > +		.credit_limit = hw_submission,
> > +		.hang_limit = hang_limit,
> > +		.timeout = timeout,
> > +		.timeout_wq = timeout_wq,
> > +		.score = score,
> > +		.name = name,
> > +		.dev = dev
> > +	};
> > +
> > ?	sched->ops = xe_ops;
> > ?	INIT_LIST_HEAD(&sched->msgs);
> > ?	INIT_WORK(&sched->work_process_msg,
> > xe_sched_process_msg_work);
> > ?
> > -	return drm_sched_init(&sched->base, ops, submit_wq, 1,
> > hw_submission,
> > -			????? hang_limit, timeout, timeout_wq,
> > score, name,
> > -			????? dev);
> > +	return drm_sched_init(&sched->base, &args);
> > ?}
> > ?
> > ?void xe_sched_fini(struct xe_gpu_scheduler *sched)
> > diff --git a/include/drm/gpu_scheduler.h
> > b/include/drm/gpu_scheduler.h
> > index a0ff08123f07..f0f5435598a8 100644
> > --- a/include/drm/gpu_scheduler.h
> > +++ b/include/drm/gpu_scheduler.h
> > @@ -540,12 +540,39 @@ struct drm_gpu_scheduler {
> > ?	struct device			*dev;
> > ?};
> > ?
> > +/**
> > + * struct drm_sched_init_args - parameters for initializing a DRM
> > GPU scheduler
> > + *
> > + * @ops: backend operations provided by the driver
> > + * @submit_wq: workqueue to use for submission. May be NULL.
> > + *	If NULL, an ordered wq is allocated and used.
> > + * @num_rqs: Number of run-queues. This may be at most
> > DRM_SCHED_PRIORITY_COUNT,
> > + *	as there's usually one run-queue per priority, but may be
> > less.
> > + * @credit_limit: the number of credits this scheduler can hold
> > from all jobs
> > + * @hang_limit: number of times to allow a job to hang before
> > dropping it.
> > + *	This mechanism is DEPRECATED. Set it to 0.
> > + * @timeout: timeout value in jiffies for the scheduler
> > + * @timeout_wq: workqueue to use for timeout work. May be NULL.
> > + *	If NULL, the system_wq is used.
> > + * @score: score atomic shared with other schedulers. May be NULL.
> > + * @name: name used for debugging
> > + * @dev: associated device. Used for debugging
> > + */
> 
> This is weirdly formatted. Please align line breaks with the
> corresponding
> number of spaces.
> 
> > +struct drm_sched_init_args {
> > +	const struct drm_sched_backend_ops *ops;
> > +	struct workqueue_struct *submit_wq;
> > +	struct workqueue_struct *timeout_wq;
> > +	u32 num_rqs;
> > +	u32 credit_limit;
> > +	unsigned int hang_limit;
> > +	long timeout;
> > +	atomic_t *score;
> > +	const char *name;
> > +	struct device *dev;
> > +};
> > +
> > ?int drm_sched_init(struct drm_gpu_scheduler *sched,
> > -		?? const struct drm_sched_backend_ops *ops,
> > -		?? struct workqueue_struct *submit_wq,
> > -		?? u32 num_rqs, u32 credit_limit, unsigned int
> > hang_limit,
> > -		?? long timeout, struct workqueue_struct
> > *timeout_wq,
> > -		?? atomic_t *score, const char *name, struct
> > device *dev);
> > +		const struct drm_sched_init_args *args);
> > ?
> > ?void drm_sched_fini(struct drm_gpu_scheduler *sched);
> > ?int drm_sched_job_init(struct drm_sched_job *job,
> > -- 
> > 2.47.1
> > 
>

Danilo Krummrich

2025-Jan-29 16:50 UTC

head link

[PATCH v2] drm/sched: Use struct for drm_sched_init() params

On Wed, Jan 29, 2025 at 04:18:30PM +0100, Philipp Stanner
wrote:> On Tue, 2025-01-28 at 15:56 +0100, Danilo Krummrich wrote:
> > On Tue, Jan 28, 2025 at 03:29:27PM +0100, Philipp Stanner wrote:
> > > diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c
> > > b/drivers/gpu/drm/nouveau/nouveau_sched.c
> > > index 4412f2711fb5..b5aac8eebfdd 100644
> > > --- a/drivers/gpu/drm/nouveau/nouveau_sched.c
> > > +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
> > > @@ -404,7 +404,15 @@ nouveau_sched_init(struct nouveau_sched
> > > *sched, struct nouveau_drm *drm,
> > > ?{
> > > ?	struct drm_gpu_scheduler *drm_sched = &sched->base;
> > > ?	struct drm_sched_entity *entity = &sched->entity;
> > > -	const long timeout > > >
msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS);
> > > +	const struct drm_sched_init_args args = {
> > > +		.ops = &nouveau_sched_ops,
> > > +		.submit_wq = wq,
> > > +		.num_rqs = DRM_SCHED_PRIORITY_COUNT,
> > > +		.credit_limit = credit_limit,
> > > +		.timeout > > >
msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS),
> > > +		.name = "nouveau_sched",
> > > +		.dev = drm->dev->dev
> > > +	};
> > > ?	int ret;
> > > ?
> > > ?	if (!wq) {
> > > @@ -416,10 +424,7 @@ nouveau_sched_init(struct nouveau_sched
> > > *sched, struct nouveau_drm *drm,
> > > ?		sched->wq = wq;
> 
> Do you want it set here 
> 
> args.submit_wq = wq;
> 
> or below, outside the if-block? Should be the same AFAICS
Yeah, shouldn't make a difference. Personally, I'd prefer to set it once
after
the if-block.
> 
> 
> > 
> > This change breaks Nouveau, you need to set args.submit_wq here as
> > well.
> 
> I overlooked the allocation below.
> 
> 
> P.
> 
> > 
> > > ?	}
> > > ?
> > > -	ret = drm_sched_init(drm_sched, &nouveau_sched_ops, wq,
> > > -			???? NOUVEAU_SCHED_PRIORITY_COUNT,
> > > -			???? credit_limit, 0, timeout,
> > > -			???? NULL, NULL, "nouveau_sched", drm-
> > > >dev->dev);
> > > +	ret = drm_sched_init(drm_sched, &args);
> > > ?	if (ret)
> > > ?		goto fail_wq;
> > > ?

Nouveau - Jan 2025 - [PATCH v2] drm/sched: Use struct for drm_sched_init() params

[PATCH v2] drm/sched: Use struct for drm_sched_init() params

[PATCH v2] drm/sched: Use struct for drm_sched_init() params

[PATCH v2] drm/sched: Use struct for drm_sched_init() params