Christian König
2023-Aug-07 18:07 UTC
[Nouveau] [PATCH drm-misc-next v9 06/11] drm/nouveau: fence: separate fence alloc and emit
Am 03.08.23 um 18:52 schrieb Danilo Krummrich:> The new (VM_BIND) UAPI exports DMA fences through DRM syncobjs. Hence, > in order to emit fences within DMA fence signalling critical sections > (e.g. as typically done in the DRM GPU schedulers run_job() callback) we > need to separate fence allocation and fence emitting.At least from the description that sounds like it might be illegal. Daniel can you take a look as well. What exactly are you doing here? Regards, Christian.> > Signed-off-by: Danilo Krummrich <dakr at redhat.com> > --- > drivers/gpu/drm/nouveau/dispnv04/crtc.c | 9 ++++- > drivers/gpu/drm/nouveau/nouveau_bo.c | 52 +++++++++++++++---------- > drivers/gpu/drm/nouveau/nouveau_chan.c | 6 ++- > drivers/gpu/drm/nouveau/nouveau_dmem.c | 9 +++-- > drivers/gpu/drm/nouveau/nouveau_fence.c | 16 +++----- > drivers/gpu/drm/nouveau/nouveau_fence.h | 3 +- > drivers/gpu/drm/nouveau/nouveau_gem.c | 5 ++- > 7 files changed, 59 insertions(+), 41 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c > index a6f2e681bde9..a34924523133 100644 > --- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c > +++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c > @@ -1122,11 +1122,18 @@ nv04_page_flip_emit(struct nouveau_channel *chan, > PUSH_NVSQ(push, NV_SW, NV_SW_PAGE_FLIP, 0x00000000); > PUSH_KICK(push); > > - ret = nouveau_fence_new(chan, false, pfence); > + ret = nouveau_fence_new(pfence); > if (ret) > goto fail; > > + ret = nouveau_fence_emit(*pfence, chan); > + if (ret) > + goto fail_fence_unref; > + > return 0; > + > +fail_fence_unref: > + nouveau_fence_unref(pfence); > fail: > spin_lock_irqsave(&dev->event_lock, flags); > list_del(&s->head); > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c > index 057bc995f19b..e9cbbf594e6f 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_bo.c > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c > @@ -820,29 +820,39 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, > mutex_lock(&cli->mutex); > else > mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING); > + > ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, ctx->interruptible); > - if (ret == 0) { > - ret = drm->ttm.move(chan, bo, bo->resource, new_reg); > - if (ret == 0) { > - ret = nouveau_fence_new(chan, false, &fence); > - if (ret == 0) { > - /* TODO: figure out a better solution here > - * > - * wait on the fence here explicitly as going through > - * ttm_bo_move_accel_cleanup somehow doesn't seem to do it. > - * > - * Without this the operation can timeout and we'll fallback to a > - * software copy, which might take several minutes to finish. > - */ > - nouveau_fence_wait(fence, false, false); > - ret = ttm_bo_move_accel_cleanup(bo, > - &fence->base, > - evict, false, > - new_reg); > - nouveau_fence_unref(&fence); > - } > - } > + if (ret) > + goto out_unlock; > + > + ret = drm->ttm.move(chan, bo, bo->resource, new_reg); > + if (ret) > + goto out_unlock; > + > + ret = nouveau_fence_new(&fence); > + if (ret) > + goto out_unlock; > + > + ret = nouveau_fence_emit(fence, chan); > + if (ret) { > + nouveau_fence_unref(&fence); > + goto out_unlock; > } > + > + /* TODO: figure out a better solution here > + * > + * wait on the fence here explicitly as going through > + * ttm_bo_move_accel_cleanup somehow doesn't seem to do it. > + * > + * Without this the operation can timeout and we'll fallback to a > + * software copy, which might take several minutes to finish. > + */ > + nouveau_fence_wait(fence, false, false); > + ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false, > + new_reg); > + nouveau_fence_unref(&fence); > + > +out_unlock: > mutex_unlock(&cli->mutex); > return ret; > } > diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c > index 6d639314250a..f69be4c8f9f2 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_chan.c > +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c > @@ -62,9 +62,11 @@ nouveau_channel_idle(struct nouveau_channel *chan) > struct nouveau_fence *fence = NULL; > int ret; > > - ret = nouveau_fence_new(chan, false, &fence); > + ret = nouveau_fence_new(&fence); > if (!ret) { > - ret = nouveau_fence_wait(fence, false, false); > + ret = nouveau_fence_emit(fence, chan); > + if (!ret) > + ret = nouveau_fence_wait(fence, false, false); > nouveau_fence_unref(&fence); > } > > diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c > index 789857faa048..4ad40e42cae1 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c > +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c > @@ -209,7 +209,8 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) > goto done; > } > > - nouveau_fence_new(dmem->migrate.chan, false, &fence); > + if (!nouveau_fence_new(&fence)) > + nouveau_fence_emit(fence, dmem->migrate.chan); > migrate_vma_pages(&args); > nouveau_dmem_fence_done(&fence); > dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); > @@ -402,7 +403,8 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk) > } > } > > - nouveau_fence_new(chunk->drm->dmem->migrate.chan, false, &fence); > + if (!nouveau_fence_new(&fence)) > + nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan); > migrate_device_pages(src_pfns, dst_pfns, npages); > nouveau_dmem_fence_done(&fence); > migrate_device_finalize(src_pfns, dst_pfns, npages); > @@ -675,7 +677,8 @@ static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm, > addr += PAGE_SIZE; > } > > - nouveau_fence_new(drm->dmem->migrate.chan, false, &fence); > + if (!nouveau_fence_new(&fence)) > + nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan); > migrate_vma_pages(args); > nouveau_dmem_fence_done(&fence); > nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i); > diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c > index ee5e9d40c166..e946408f945b 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_fence.c > +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c > @@ -210,6 +210,9 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan) > struct nouveau_fence_priv *priv = (void*)chan->drm->fence; > int ret; > > + if (unlikely(!chan->fence)) > + return -ENODEV; > + > fence->channel = chan; > fence->timeout = jiffies + (15 * HZ); > > @@ -396,25 +399,16 @@ nouveau_fence_unref(struct nouveau_fence **pfence) > } > > int > -nouveau_fence_new(struct nouveau_channel *chan, bool sysmem, > - struct nouveau_fence **pfence) > +nouveau_fence_new(struct nouveau_fence **pfence) > { > struct nouveau_fence *fence; > - int ret = 0; > - > - if (unlikely(!chan->fence)) > - return -ENODEV; > > fence = kzalloc(sizeof(*fence), GFP_KERNEL); > if (!fence) > return -ENOMEM; > > - ret = nouveau_fence_emit(fence, chan); > - if (ret) > - nouveau_fence_unref(&fence); > - > *pfence = fence; > - return ret; > + return 0; > } > > static const char *nouveau_fence_get_get_driver_name(struct dma_fence *fence) > diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h > index 0ca2bc85adf6..7c73c7c9834a 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_fence.h > +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h > @@ -17,8 +17,7 @@ struct nouveau_fence { > unsigned long timeout; > }; > > -int nouveau_fence_new(struct nouveau_channel *, bool sysmem, > - struct nouveau_fence **); > +int nouveau_fence_new(struct nouveau_fence **); > void nouveau_fence_unref(struct nouveau_fence **); > > int nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *); > diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c > index a48f42aaeab9..9c8d1b911a01 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_gem.c > +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c > @@ -873,8 +873,11 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, > } > } > > - ret = nouveau_fence_new(chan, false, &fence); > + ret = nouveau_fence_new(&fence); > + if (!ret) > + ret = nouveau_fence_emit(fence, chan); > if (ret) { > + nouveau_fence_unref(&fence); > NV_PRINTK(err, cli, "error fencing pushbuf: %d\n", ret); > WIND_RING(chan); > goto out;
Danilo Krummrich
2023-Aug-07 18:54 UTC
[Nouveau] [PATCH drm-misc-next v9 06/11] drm/nouveau: fence: separate fence alloc and emit
Hi Christian, On 8/7/23 20:07, Christian K?nig wrote:> Am 03.08.23 um 18:52 schrieb Danilo Krummrich: >> The new (VM_BIND) UAPI exports DMA fences through DRM syncobjs. Hence, >> in order to emit fences within DMA fence signalling critical sections >> (e.g. as typically done in the DRM GPU schedulers run_job() callback) we >> need to separate fence allocation and fence emitting. > > At least from the description that sounds like it might be illegal. > Daniel can you take a look as well. > > What exactly are you doing here?I'm basically doing exactly the same as amdgpu_fence_emit() does in amdgpu_ib_schedule() called by amdgpu_job_run(). The difference - and this is what this patch is for - is that I separate the fence allocation from emitting the fence, such that the fence structure is allocated before the job is submitted to the GPU scheduler. amdgpu solves this with GFP_ATOMIC within amdgpu_fence_emit() to allocate the fence structure in this case. - Danilo> > Regards, > Christian. > >> >> Signed-off-by: Danilo Krummrich <dakr at redhat.com> >> --- >> ? drivers/gpu/drm/nouveau/dispnv04/crtc.c |? 9 ++++- >> ? drivers/gpu/drm/nouveau/nouveau_bo.c??? | 52 +++++++++++++++---------- >> ? drivers/gpu/drm/nouveau/nouveau_chan.c? |? 6 ++- >> ? drivers/gpu/drm/nouveau/nouveau_dmem.c? |? 9 +++-- >> ? drivers/gpu/drm/nouveau/nouveau_fence.c | 16 +++----- >> ? drivers/gpu/drm/nouveau/nouveau_fence.h |? 3 +- >> ? drivers/gpu/drm/nouveau/nouveau_gem.c?? |? 5 ++- >> ? 7 files changed, 59 insertions(+), 41 deletions(-) >> >> diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c >> b/drivers/gpu/drm/nouveau/dispnv04/crtc.c >> index a6f2e681bde9..a34924523133 100644 >> --- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c >> +++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c >> @@ -1122,11 +1122,18 @@ nv04_page_flip_emit(struct nouveau_channel *chan, >> ????? PUSH_NVSQ(push, NV_SW, NV_SW_PAGE_FLIP, 0x00000000); >> ????? PUSH_KICK(push); >> -??? ret = nouveau_fence_new(chan, false, pfence); >> +??? ret = nouveau_fence_new(pfence); >> ????? if (ret) >> ????????? goto fail; >> +??? ret = nouveau_fence_emit(*pfence, chan); >> +??? if (ret) >> +??????? goto fail_fence_unref; >> + >> ????? return 0; >> + >> +fail_fence_unref: >> +??? nouveau_fence_unref(pfence); >> ? fail: >> ????? spin_lock_irqsave(&dev->event_lock, flags); >> ????? list_del(&s->head); >> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c >> b/drivers/gpu/drm/nouveau/nouveau_bo.c >> index 057bc995f19b..e9cbbf594e6f 100644 >> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c >> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c >> @@ -820,29 +820,39 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object >> *bo, int evict, >> ????????? mutex_lock(&cli->mutex); >> ????? else >> ????????? mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING); >> + >> ????? ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, >> ctx->interruptible); >> -??? if (ret == 0) { >> -??????? ret = drm->ttm.move(chan, bo, bo->resource, new_reg); >> -??????? if (ret == 0) { >> -??????????? ret = nouveau_fence_new(chan, false, &fence); >> -??????????? if (ret == 0) { >> -??????????????? /* TODO: figure out a better solution here >> -???????????????? * >> -???????????????? * wait on the fence here explicitly as going through >> -???????????????? * ttm_bo_move_accel_cleanup somehow doesn't seem to >> do it. >> -???????????????? * >> -???????????????? * Without this the operation can timeout and we'll >> fallback to a >> -???????????????? * software copy, which might take several minutes to >> finish. >> -???????????????? */ >> -??????????????? nouveau_fence_wait(fence, false, false); >> -??????????????? ret = ttm_bo_move_accel_cleanup(bo, >> -??????????????????????????????? &fence->base, >> -??????????????????????????????? evict, false, >> -??????????????????????????????? new_reg); >> -??????????????? nouveau_fence_unref(&fence); >> -??????????? } >> -??????? } >> +??? if (ret) >> +??????? goto out_unlock; >> + >> +??? ret = drm->ttm.move(chan, bo, bo->resource, new_reg); >> +??? if (ret) >> +??????? goto out_unlock; >> + >> +??? ret = nouveau_fence_new(&fence); >> +??? if (ret) >> +??????? goto out_unlock; >> + >> +??? ret = nouveau_fence_emit(fence, chan); >> +??? if (ret) { >> +??????? nouveau_fence_unref(&fence); >> +??????? goto out_unlock; >> ????? } >> + >> +??? /* TODO: figure out a better solution here >> +???? * >> +???? * wait on the fence here explicitly as going through >> +???? * ttm_bo_move_accel_cleanup somehow doesn't seem to do it. >> +???? * >> +???? * Without this the operation can timeout and we'll fallback to a >> +???? * software copy, which might take several minutes to finish. >> +???? */ >> +??? nouveau_fence_wait(fence, false, false); >> +??? ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false, >> +??????????????????? new_reg); >> +??? nouveau_fence_unref(&fence); >> + >> +out_unlock: >> ????? mutex_unlock(&cli->mutex); >> ????? return ret; >> ? } >> diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c >> b/drivers/gpu/drm/nouveau/nouveau_chan.c >> index 6d639314250a..f69be4c8f9f2 100644 >> --- a/drivers/gpu/drm/nouveau/nouveau_chan.c >> +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c >> @@ -62,9 +62,11 @@ nouveau_channel_idle(struct nouveau_channel *chan) >> ????????? struct nouveau_fence *fence = NULL; >> ????????? int ret; >> -??????? ret = nouveau_fence_new(chan, false, &fence); >> +??????? ret = nouveau_fence_new(&fence); >> ????????? if (!ret) { >> -??????????? ret = nouveau_fence_wait(fence, false, false); >> +??????????? ret = nouveau_fence_emit(fence, chan); >> +??????????? if (!ret) >> +??????????????? ret = nouveau_fence_wait(fence, false, false); >> ????????????? nouveau_fence_unref(&fence); >> ????????? } >> diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c >> b/drivers/gpu/drm/nouveau/nouveau_dmem.c >> index 789857faa048..4ad40e42cae1 100644 >> --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c >> +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c >> @@ -209,7 +209,8 @@ static vm_fault_t >> nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) >> ????????? goto done; >> ????? } >> -??? nouveau_fence_new(dmem->migrate.chan, false, &fence); >> +??? if (!nouveau_fence_new(&fence)) >> +??????? nouveau_fence_emit(fence, dmem->migrate.chan); >> ????? migrate_vma_pages(&args); >> ????? nouveau_dmem_fence_done(&fence); >> ????? dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, >> DMA_BIDIRECTIONAL); >> @@ -402,7 +403,8 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk >> *chunk) >> ????????? } >> ????? } >> -??? nouveau_fence_new(chunk->drm->dmem->migrate.chan, false, &fence); >> +??? if (!nouveau_fence_new(&fence)) >> +??????? nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan); >> ????? migrate_device_pages(src_pfns, dst_pfns, npages); >> ????? nouveau_dmem_fence_done(&fence); >> ????? migrate_device_finalize(src_pfns, dst_pfns, npages); >> @@ -675,7 +677,8 @@ static void nouveau_dmem_migrate_chunk(struct >> nouveau_drm *drm, >> ????????? addr += PAGE_SIZE; >> ????? } >> -??? nouveau_fence_new(drm->dmem->migrate.chan, false, &fence); >> +??? if (!nouveau_fence_new(&fence)) >> +??????? nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan); >> ????? migrate_vma_pages(args); >> ????? nouveau_dmem_fence_done(&fence); >> ????? nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i); >> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c >> b/drivers/gpu/drm/nouveau/nouveau_fence.c >> index ee5e9d40c166..e946408f945b 100644 >> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c >> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c >> @@ -210,6 +210,9 @@ nouveau_fence_emit(struct nouveau_fence *fence, >> struct nouveau_channel *chan) >> ????? struct nouveau_fence_priv *priv = (void*)chan->drm->fence; >> ????? int ret; >> +??? if (unlikely(!chan->fence)) >> +??????? return -ENODEV; >> + >> ????? fence->channel? = chan; >> ????? fence->timeout? = jiffies + (15 * HZ); >> @@ -396,25 +399,16 @@ nouveau_fence_unref(struct nouveau_fence **pfence) >> ? } >> ? int >> -nouveau_fence_new(struct nouveau_channel *chan, bool sysmem, >> -????????? struct nouveau_fence **pfence) >> +nouveau_fence_new(struct nouveau_fence **pfence) >> ? { >> ????? struct nouveau_fence *fence; >> -??? int ret = 0; >> - >> -??? if (unlikely(!chan->fence)) >> -??????? return -ENODEV; >> ????? fence = kzalloc(sizeof(*fence), GFP_KERNEL); >> ????? if (!fence) >> ????????? return -ENOMEM; >> -??? ret = nouveau_fence_emit(fence, chan); >> -??? if (ret) >> -??????? nouveau_fence_unref(&fence); >> - >> ????? *pfence = fence; >> -??? return ret; >> +??? return 0; >> ? } >> ? static const char *nouveau_fence_get_get_driver_name(struct >> dma_fence *fence) >> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h >> b/drivers/gpu/drm/nouveau/nouveau_fence.h >> index 0ca2bc85adf6..7c73c7c9834a 100644 >> --- a/drivers/gpu/drm/nouveau/nouveau_fence.h >> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h >> @@ -17,8 +17,7 @@ struct nouveau_fence { >> ????? unsigned long timeout; >> ? }; >> -int? nouveau_fence_new(struct nouveau_channel *, bool sysmem, >> -?????????????? struct nouveau_fence **); >> +int? nouveau_fence_new(struct nouveau_fence **); >> ? void nouveau_fence_unref(struct nouveau_fence **); >> ? int? nouveau_fence_emit(struct nouveau_fence *, struct >> nouveau_channel *); >> diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c >> b/drivers/gpu/drm/nouveau/nouveau_gem.c >> index a48f42aaeab9..9c8d1b911a01 100644 >> --- a/drivers/gpu/drm/nouveau/nouveau_gem.c >> +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c >> @@ -873,8 +873,11 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, >> void *data, >> ????????? } >> ????? } >> -??? ret = nouveau_fence_new(chan, false, &fence); >> +??? ret = nouveau_fence_new(&fence); >> +??? if (!ret) >> +??????? ret = nouveau_fence_emit(fence, chan); >> ????? if (ret) { >> +??????? nouveau_fence_unref(&fence); >> ????????? NV_PRINTK(err, cli, "error fencing pushbuf: %d\n", ret); >> ????????? WIND_RING(chan); >> ????????? goto out; >