Caterina Shablia
2025-Jul-07 17:04 UTC
[PATCH v4 1/7] drm/panthor: Add support for atomic page table updates
From: Boris Brezillon <boris.brezillon at collabora.com> Move the lock/flush_mem operations around the gpuvm_sm_map() calls so we can implement true atomic page updates, where any access in the locked range done by the GPU has to wait for the page table updates to land before proceeding. This is needed for vkQueueBindSparse(), so we can replace the dummy page mapped over the entire object by actual BO backed pages in an atomic way. Signed-off-by: Boris Brezillon <boris.brezillon at collabora.com> Signed-off-by: Caterina Shablia <caterina.shablia at collabora.com> --- drivers/gpu/drm/panthor/panthor_mmu.c | 65 +++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index b39ea6acc6a9..9caaba03c5eb 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -387,6 +387,15 @@ struct panthor_vm { * flagged as faulty as a result. */ bool unhandled_fault; + + /** @locked_region: Information about the currently locked region currently. */ + struct { + /** @locked_region.start: Start of the locked region. */ + u64 start; + + /** @locked_region.size: Size of the locked region. */ + u64 size; + } locked_region; }; /** @@ -775,6 +784,10 @@ int panthor_vm_active(struct panthor_vm *vm) } ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr); + if (!ret && vm->locked_region.size) { + lock_region(ptdev, vm->as.id, vm->locked_region.start, vm->locked_region.size); + ret = wait_ready(ptdev, vm->as.id); + } out_make_active: if (!ret) { @@ -902,6 +915,9 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) struct io_pgtable_ops *ops = vm->pgtbl_ops; u64 offset = 0; + drm_WARN_ON(&ptdev->base, + (iova < vm->locked_region.start) || + (iova + size > vm->locked_region.start + vm->locked_region.size)); drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size); while (offset < size) { @@ -915,13 +931,12 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) iova + offset + unmapped_sz, iova + offset + pgsize * pgcount, iova, iova + size); - panthor_vm_flush_range(vm, iova, offset + unmapped_sz); return -EINVAL; } offset += unmapped_sz; } - return panthor_vm_flush_range(vm, iova, size); + return 0; } static int @@ -938,6 +953,10 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, if (!size) return 0; + drm_WARN_ON(&ptdev->base, + (iova < vm->locked_region.start) || + (iova + size > vm->locked_region.start + vm->locked_region.size)); + for_each_sgtable_dma_sg(sgt, sgl, count) { dma_addr_t paddr = sg_dma_address(sgl); size_t len = sg_dma_len(sgl); @@ -985,7 +1004,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, offset = 0; } - return panthor_vm_flush_range(vm, start_iova, iova - start_iova); + return 0; } static int flags_to_prot(u32 flags) @@ -1654,6 +1673,38 @@ static const char *access_type_name(struct panthor_device *ptdev, } } +static int panthor_vm_lock_region(struct panthor_vm *vm, u64 start, u64 size) +{ + struct panthor_device *ptdev = vm->ptdev; + int ret = 0; + + mutex_lock(&ptdev->mmu->as.slots_lock); + drm_WARN_ON(&ptdev->base, vm->locked_region.start || vm->locked_region.size); + vm->locked_region.start = start; + vm->locked_region.size = size; + if (vm->as.id >= 0) { + lock_region(ptdev, vm->as.id, start, size); + ret = wait_ready(ptdev, vm->as.id); + } + mutex_unlock(&ptdev->mmu->as.slots_lock); + + return ret; +} + +static void panthor_vm_unlock_region(struct panthor_vm *vm) +{ + struct panthor_device *ptdev = vm->ptdev; + + mutex_lock(&ptdev->mmu->as.slots_lock); + if (vm->as.id >= 0) { + write_cmd(ptdev, vm->as.id, AS_COMMAND_FLUSH_MEM); + drm_WARN_ON(&ptdev->base, wait_ready(ptdev, vm->as.id)); + } + vm->locked_region.start = 0; + vm->locked_region.size = 0; + mutex_unlock(&ptdev->mmu->as.slots_lock); +} + static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status) { bool has_unhandled_faults = false; @@ -2179,6 +2230,11 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op, mutex_lock(&vm->op_lock); vm->op_ctx = op; + + ret = panthor_vm_lock_region(vm, op->va.addr, op->va.range); + if (ret) + goto out; + switch (op_type) { case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: if (vm->unusable) { @@ -2199,6 +2255,9 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op, break; } + panthor_vm_unlock_region(vm); + +out: if (ret && flag_vm_unusable_on_failure) vm->unusable = true; -- 2.47.2
Steven Price
2025-Jul-11 13:30 UTC
[PATCH v4 1/7] drm/panthor: Add support for atomic page table updates
On 07/07/2025 18:04, Caterina Shablia wrote:> From: Boris Brezillon <boris.brezillon at collabora.com> > > Move the lock/flush_mem operations around the gpuvm_sm_map() calls so > we can implement true atomic page updates, where any access in the > locked range done by the GPU has to wait for the page table updates > to land before proceeding. > > This is needed for vkQueueBindSparse(), so we can replace the dummy > page mapped over the entire object by actual BO backed pages in an atomic > way. > > Signed-off-by: Boris Brezillon <boris.brezillon at collabora.com> > Signed-off-by: Caterina Shablia <caterina.shablia at collabora.com> > --- > drivers/gpu/drm/panthor/panthor_mmu.c | 65 +++++++++++++++++++++++++-- > 1 file changed, 62 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c > index b39ea6acc6a9..9caaba03c5eb 100644 > --- a/drivers/gpu/drm/panthor/panthor_mmu.c > +++ b/drivers/gpu/drm/panthor/panthor_mmu.c > @@ -387,6 +387,15 @@ struct panthor_vm { > * flagged as faulty as a result. > */ > bool unhandled_fault; > + > + /** @locked_region: Information about the currently locked region currently. */ > + struct { > + /** @locked_region.start: Start of the locked region. */ > + u64 start; > + > + /** @locked_region.size: Size of the locked region. */ > + u64 size; > + } locked_region; > }; > > /** > @@ -775,6 +784,10 @@ int panthor_vm_active(struct panthor_vm *vm) > } > > ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr); > + if (!ret && vm->locked_region.size) { > + lock_region(ptdev, vm->as.id, vm->locked_region.start, vm->locked_region.size); > + ret = wait_ready(ptdev, vm->as.id); > + }Do we need to do this? It seems odd to restore a MMU context and immediately set a lock region. Is there a good reason we can't just WARN_ON if there's a lock region set in panthor_vm_idle()? I think we need to briefly take vm->op_lock to ensure synchronisation but that doesn't seem a big issue. Or perhaps there's a good reason that I'm missing?> > out_make_active: > if (!ret) { > @@ -902,6 +915,9 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) > struct io_pgtable_ops *ops = vm->pgtbl_ops; > u64 offset = 0; > > + drm_WARN_ON(&ptdev->base, > + (iova < vm->locked_region.start) || > + (iova + size > vm->locked_region.start + vm->locked_region.size)); > drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size); > > while (offset < size) { > @@ -915,13 +931,12 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) > iova + offset + unmapped_sz, > iova + offset + pgsize * pgcount, > iova, iova + size); > - panthor_vm_flush_range(vm, iova, offset + unmapped_sz); > return -EINVAL; > } > offset += unmapped_sz; > } > > - return panthor_vm_flush_range(vm, iova, size); > + return 0; > } > > static int > @@ -938,6 +953,10 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, > if (!size) > return 0; > > + drm_WARN_ON(&ptdev->base, > + (iova < vm->locked_region.start) || > + (iova + size > vm->locked_region.start + vm->locked_region.size)); > + > for_each_sgtable_dma_sg(sgt, sgl, count) { > dma_addr_t paddr = sg_dma_address(sgl); > size_t len = sg_dma_len(sgl); > @@ -985,7 +1004,7 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, > offset = 0; > } > > - return panthor_vm_flush_range(vm, start_iova, iova - start_iova); > + return 0; > } > > static int flags_to_prot(u32 flags) > @@ -1654,6 +1673,38 @@ static const char *access_type_name(struct panthor_device *ptdev, > } > } > > +static int panthor_vm_lock_region(struct panthor_vm *vm, u64 start, u64 size) > +{ > + struct panthor_device *ptdev = vm->ptdev; > + int ret = 0; > + > + mutex_lock(&ptdev->mmu->as.slots_lock); > + drm_WARN_ON(&ptdev->base, vm->locked_region.start || vm->locked_region.size); > + vm->locked_region.start = start; > + vm->locked_region.size = size; > + if (vm->as.id >= 0) { > + lock_region(ptdev, vm->as.id, start, size); > + ret = wait_ready(ptdev, vm->as.id); > + } > + mutex_unlock(&ptdev->mmu->as.slots_lock); > + > + return ret; > +} > + > +static void panthor_vm_unlock_region(struct panthor_vm *vm) > +{ > + struct panthor_device *ptdev = vm->ptdev; > + > + mutex_lock(&ptdev->mmu->as.slots_lock); > + if (vm->as.id >= 0) { > + write_cmd(ptdev, vm->as.id, AS_COMMAND_FLUSH_MEM); > + drm_WARN_ON(&ptdev->base, wait_ready(ptdev, vm->as.id)); > + } > + vm->locked_region.start = 0; > + vm->locked_region.size = 0; > + mutex_unlock(&ptdev->mmu->as.slots_lock); > +}Do we need to include a drm_dev_enter() somewhere here? I note that panthor_vm_flush_range() has one and you've effectively replaced that code with the above. Thanks, Steve> + > static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status) > { > bool has_unhandled_faults = false; > @@ -2179,6 +2230,11 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op, > > mutex_lock(&vm->op_lock); > vm->op_ctx = op; > + > + ret = panthor_vm_lock_region(vm, op->va.addr, op->va.range); > + if (ret) > + goto out; > + > switch (op_type) { > case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: > if (vm->unusable) { > @@ -2199,6 +2255,9 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op, > break; > } > > + panthor_vm_unlock_region(vm); > + > +out: > if (ret && flag_vm_unusable_on_failure) > vm->unusable = true; >