Thomas Hellström
2023-Nov-02 17:09 UTC
[Nouveau] [PATCH drm-misc-next v8 09/12] drm/gpuvm: reference count drm_gpuvm structures
On Thu, 2023-11-02 at 00:31 +0100, Danilo Krummrich wrote:> Implement reference counting for struct drm_gpuvm. > > Signed-off-by: Danilo Krummrich <dakr at redhat.com> > --- > ?drivers/gpu/drm/drm_gpuvm.c??????????? | 44 +++++++++++++++++++----- > -- > ?drivers/gpu/drm/nouveau/nouveau_uvmm.c | 20 +++++++++--- > ?include/drm/drm_gpuvm.h??????????????? | 31 +++++++++++++++++- > ?3 files changed, 78 insertions(+), 17 deletions(-) > > diff --git a/drivers/gpu/drm/drm_gpuvm.c > b/drivers/gpu/drm/drm_gpuvm.c > index 53e2c406fb04..6a88eafc5229 100644 > --- a/drivers/gpu/drm/drm_gpuvm.c > +++ b/drivers/gpu/drm/drm_gpuvm.c > @@ -746,6 +746,8 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const > char *name, > ????????gpuvm->rb.tree = RB_ROOT_CACHED; > ????????INIT_LIST_HEAD(&gpuvm->rb.list); > ? > +???????kref_init(&gpuvm->kref); > + > ????????gpuvm->name = name ? name : "unknown"; > ????????gpuvm->flags = flags; > ????????gpuvm->ops = ops; > @@ -770,15 +772,8 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const > char *name, > ?} > ?EXPORT_SYMBOL_GPL(drm_gpuvm_init); > ? > -/** > - * drm_gpuvm_destroy() - cleanup a &drm_gpuvm > - * @gpuvm: pointer to the &drm_gpuvm to clean up > - * > - * Note that it is a bug to call this function on a manager that > still > - * holds GPU VA mappings. > - */ > -void > -drm_gpuvm_destroy(struct drm_gpuvm *gpuvm) > +static void > +drm_gpuvm_fini(struct drm_gpuvm *gpuvm) > ?{ > ????????gpuvm->name = NULL; > ? > @@ -790,7 +785,33 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuvm) > ? > ????????drm_gem_object_put(gpuvm->r_obj); > ?} > -EXPORT_SYMBOL_GPL(drm_gpuvm_destroy); > + > +static void > +drm_gpuvm_free(struct kref *kref) > +{ > +???????struct drm_gpuvm *gpuvm = container_of(kref, struct > drm_gpuvm, kref); > + > +???????if (drm_WARN_ON(gpuvm->drm, !gpuvm->ops->vm_free)) > +???????????????return; > + > +???????drm_gpuvm_fini(gpuvm); > + > +???????gpuvm->ops->vm_free(gpuvm); > +} > + > +/** > + * drm_gpuvm_bo_put() - drop a struct drm_gpuvm referencecopy-paste error in function name. Also it appears like xe might put a vm from irq context so we should document the context where this function call is allowable, and if applicable add a might_sleep(). If this function needs to sleep we can work around that in Xe by keeping an xe-private refcount for the xe vm container, but I'd like to avoid that if possible and piggy-back on the refcount introduced here.> + * @gpuvm: the &drm_gpuvm to release the reference of > + * > + * This releases a reference to @gpuvm. > + */ > +void > +drm_gpuvm_put(struct drm_gpuvm *gpuvm) > +{ > +???????if (gpuvm) > +???????????????kref_put(&gpuvm->kref, drm_gpuvm_free); > +} > +EXPORT_SYMBOL_GPL(drm_gpuvm_put); > ? > ?static int > ?__drm_gpuva_insert(struct drm_gpuvm *gpuvm, > @@ -843,7 +864,7 @@ drm_gpuva_insert(struct drm_gpuvm *gpuvm, > ????????if (unlikely(!drm_gpuvm_range_valid(gpuvm, addr, range))) > ????????????????return -EINVAL; > ? > -???????return __drm_gpuva_insert(gpuvm, va); > +???????return __drm_gpuva_insert(drm_gpuvm_get(gpuvm), va);Here we leak a reference if __drm_gpuva_insert() fails, and IMO the reference should be taken where the pointer holding the reference is assigned (in this case in __drm_gpuva_insert()), or document the reference transfer from the argument close to the assignment. But since a va itself is not refcounted it clearly can't outlive the vm, so is a reference really needed here? I'd suggest using an accessor that instead of using va->vm uses va->vm_bo->vm, to avoid needing to worry about the vm->vm refcountaltoghether. Thanks, Thomas
Danilo Krummrich
2023-Nov-02 17:32 UTC
[Nouveau] [PATCH drm-misc-next v8 09/12] drm/gpuvm: reference count drm_gpuvm structures
Hi Thomas, thanks for your timely response on that! On 11/2/23 18:09, Thomas Hellstr?m wrote:> On Thu, 2023-11-02 at 00:31 +0100, Danilo Krummrich wrote: >> Implement reference counting for struct drm_gpuvm. >> >> Signed-off-by: Danilo Krummrich <dakr at redhat.com> >> --- >> ?drivers/gpu/drm/drm_gpuvm.c??????????? | 44 +++++++++++++++++++----- >> -- >> ?drivers/gpu/drm/nouveau/nouveau_uvmm.c | 20 +++++++++--- >> ?include/drm/drm_gpuvm.h??????????????? | 31 +++++++++++++++++- >> ?3 files changed, 78 insertions(+), 17 deletions(-) >> >> diff --git a/drivers/gpu/drm/drm_gpuvm.c >> b/drivers/gpu/drm/drm_gpuvm.c >> index 53e2c406fb04..6a88eafc5229 100644 >> --- a/drivers/gpu/drm/drm_gpuvm.c >> +++ b/drivers/gpu/drm/drm_gpuvm.c >> @@ -746,6 +746,8 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const >> char *name, >> ????????gpuvm->rb.tree = RB_ROOT_CACHED; >> ????????INIT_LIST_HEAD(&gpuvm->rb.list); >> >> +???????kref_init(&gpuvm->kref); >> + >> ????????gpuvm->name = name ? name : "unknown"; >> ????????gpuvm->flags = flags; >> ????????gpuvm->ops = ops; >> @@ -770,15 +772,8 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const >> char *name, >> ?} >> ?EXPORT_SYMBOL_GPL(drm_gpuvm_init); >> >> -/** >> - * drm_gpuvm_destroy() - cleanup a &drm_gpuvm >> - * @gpuvm: pointer to the &drm_gpuvm to clean up >> - * >> - * Note that it is a bug to call this function on a manager that >> still >> - * holds GPU VA mappings. >> - */ >> -void >> -drm_gpuvm_destroy(struct drm_gpuvm *gpuvm) >> +static void >> +drm_gpuvm_fini(struct drm_gpuvm *gpuvm) >> ?{ >> ????????gpuvm->name = NULL; >> >> @@ -790,7 +785,33 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuvm) >> >> ????????drm_gem_object_put(gpuvm->r_obj); >> ?} >> -EXPORT_SYMBOL_GPL(drm_gpuvm_destroy); >> + >> +static void >> +drm_gpuvm_free(struct kref *kref) >> +{ >> +???????struct drm_gpuvm *gpuvm = container_of(kref, struct >> drm_gpuvm, kref); >> + >> +???????if (drm_WARN_ON(gpuvm->drm, !gpuvm->ops->vm_free)) >> +???????????????return; >> + >> +???????drm_gpuvm_fini(gpuvm); >> + >> +???????gpuvm->ops->vm_free(gpuvm); >> +} >> + >> +/** >> + * drm_gpuvm_bo_put() - drop a struct drm_gpuvm reference > copy-paste error in function name. > > Also it appears like xe might put a vm from irq context so we should > document the context where this function call is allowable, and if > applicable add a might_sleep().From GPUVM PoV I don't see why we can't call this from an IRQ context. It depends on the driver callbacks of GPUVM (->vm_free) and the resv GEM's free callback. Both are controlled by the driver. Hence, I don't see the need for a restriction here.> > If this function needs to sleep we can work around that in Xe by > keeping an xe-private refcount for the xe vm container, but I'd like to > avoid that if possible and piggy-back on the refcount introduced here. > >> + * @gpuvm: the &drm_gpuvm to release the reference of >> + * >> + * This releases a reference to @gpuvm. >> + */ >> +void >> +drm_gpuvm_put(struct drm_gpuvm *gpuvm) >> +{ >> +???????if (gpuvm) >> +???????????????kref_put(&gpuvm->kref, drm_gpuvm_free); >> +} >> +EXPORT_SYMBOL_GPL(drm_gpuvm_put); >> >> ?static int >> ?__drm_gpuva_insert(struct drm_gpuvm *gpuvm, >> @@ -843,7 +864,7 @@ drm_gpuva_insert(struct drm_gpuvm *gpuvm, >> ????????if (unlikely(!drm_gpuvm_range_valid(gpuvm, addr, range))) >> ????????????????return -EINVAL; >> >> -???????return __drm_gpuva_insert(gpuvm, va); >> +???????return __drm_gpuva_insert(drm_gpuvm_get(gpuvm), va); > > Here we leak a reference if __drm_gpuva_insert() fails, and IMO the > reference should be taken where the pointer holding the reference is > assigned (in this case in __drm_gpuva_insert()), or document the > reference transfer from the argument close to the assignment.Ah, good catch. I had it in __drm_gpuva_insert() originally, but that doesn't work, because __drm_gpuva_insert() is used to insert the kernel_alloc_node. And we need to __drm_gpuva_remove() the kernel_alloc_node from drm_gpuvm_fini(), which is called when the reference count is at zero already. In fact, the __* variants are only there to handle the kernel_alloc_node and this one clearly doesn't need reference counting.> > But since a va itself is not refcounted it clearly can't outlive the > vm, so is a reference really needed here?Well, technically, it can. It just doesn't make any sense and would be considered to be a bug. The reference count comes in handy to prevent that in the first place. I'd like to keep the reference count and just fix up the code.> > I'd suggest using an accessor that instead of using va->vm uses va- >> vm_bo->vm, to avoid needing to worry about the vm->vm refcount > altoghether.No, I want to keep that optional. Drivers should be able to use GPUVM to track mappings without being required to implement everything else. I think PowerVR, for instance, currently uses GPUVM only to track mappings without everything else. - Danilo> > Thanks, > Thomas >