Christoph Hellwig
2020-Mar-16 17:52 UTC
[Nouveau] ensure device private pages have an owner
When acting on device private mappings a driver needs to know if the device (or other entity in case of kvmppc) actually owns this private mapping. This series adds an owner field and converts the migrate_vma code over to check it. I looked into doing the same for hmm_range_fault, but as far as I can tell that code has never been wired up to actually work for device private memory, so instead of trying to fix some unused code the second patch just remove the code. We can add it back once we have a working and fully tested code, and then should pass the expected owner in the hmm_range structure.
Christoph Hellwig
2020-Mar-16 17:52 UTC
[Nouveau] [PATCH 1/2] mm: handle multiple owners of device private pages in migrate_vma
Add a new opaque owner field to struct dev_pagemap, which will allow the hmm and migrate_vma code to identify who owns ZONE_DEVICE memory, and refuse to work on mappings not owned by the calling entity. Signed-off-by: Christoph Hellwig <hch at lst.de> --- arch/powerpc/kvm/book3s_hv_uvmem.c | 4 ++++ drivers/gpu/drm/nouveau/nouveau_dmem.c | 3 +++ include/linux/memremap.h | 4 ++++ include/linux/migrate.h | 2 ++ mm/migrate.c | 3 +++ 5 files changed, 16 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 79b1202b1c62..29ed52892d31 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -386,6 +386,7 @@ kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, mig.end = end; mig.src = &src_pfn; mig.dst = &dst_pfn; + mig.dev_private_owner = &kvmppc_uvmem_pgmap; /* * We come here with mmap_sem write lock held just for @@ -563,6 +564,7 @@ kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, mig.end = end; mig.src = &src_pfn; mig.dst = &dst_pfn; + mig.dev_private_owner = &kvmppc_uvmem_pgmap; mutex_lock(&kvm->arch.uvmem_lock); /* The requested page is already paged-out, nothing to do */ @@ -779,6 +781,8 @@ int kvmppc_uvmem_init(void) kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE; kvmppc_uvmem_pgmap.res = *res; kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops; + /* just one global instance: */ + kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap; addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE); if (IS_ERR(addr)) { ret = PTR_ERR(addr); diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 0ad5d87b5a8e..7605c4c48985 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -176,6 +176,7 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) .end = vmf->address + PAGE_SIZE, .src = &src, .dst = &dst, + .dev_private_owner = drm->dev, }; /* @@ -526,6 +527,7 @@ nouveau_dmem_init(struct nouveau_drm *drm) drm->dmem->pagemap.type = MEMORY_DEVICE_PRIVATE; drm->dmem->pagemap.res = *res; drm->dmem->pagemap.ops = &nouveau_dmem_pagemap_ops; + drm->dmem->pagemap.owner = drm->dev; if (IS_ERR(devm_memremap_pages(device, &drm->dmem->pagemap))) goto out_free; @@ -631,6 +633,7 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, struct migrate_vma args = { .vma = vma, .start = start, + .dev_private_owner = drm->dev, }; unsigned long c, i; int ret = -ENOMEM; diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 6fefb09af7c3..60d97e8fd3c0 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -103,6 +103,9 @@ struct dev_pagemap_ops { * @type: memory type: see MEMORY_* in memory_hotplug.h * @flags: PGMAP_* flags to specify defailed behavior * @ops: method table + * @owner: an opaque pointer identifying the entity that manages this + * instance. Used by various helpers to make sure that no + * foreign ZONE_DEVICE memory is accessed. */ struct dev_pagemap { struct vmem_altmap altmap; @@ -113,6 +116,7 @@ struct dev_pagemap { enum memory_type type; unsigned int flags; const struct dev_pagemap_ops *ops; + void *owner; }; static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 72120061b7d4..4bbd8d732e53 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -196,6 +196,8 @@ struct migrate_vma { unsigned long npages; unsigned long start; unsigned long end; + + void *dev_private_owner; }; int migrate_vma_setup(struct migrate_vma *args); diff --git a/mm/migrate.c b/mm/migrate.c index b1092876e537..201e8fa627e0 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2267,6 +2267,9 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, goto next; page = device_private_entry_to_page(entry); + if (page->pgmap->owner != migrate->dev_private_owner) + goto next; + mpfn = migrate_pfn(page_to_pfn(page)) | MIGRATE_PFN_MIGRATE; if (is_write_device_private_entry(entry)) -- 2.24.1
Christoph Hellwig
2020-Mar-16 17:52 UTC
[Nouveau] [PATCH 2/2] mm: remove device private page support from hmm_range_fault
No driver has actually used properly wire up and support this feature. There is various code related to it in nouveau, but as far as I can tell it never actually got turned on, and the only changes since the initial commit are global cleanups. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 - drivers/gpu/drm/nouveau/nouveau_dmem.c | 37 ------------------------- drivers/gpu/drm/nouveau/nouveau_dmem.h | 2 -- drivers/gpu/drm/nouveau/nouveau_svm.c | 3 -- include/linux/hmm.h | 2 -- mm/hmm.c | 28 ------------------- 6 files changed, 73 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index dee446278417..90821ce5e6ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -776,7 +776,6 @@ struct amdgpu_ttm_tt { static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { (1 << 0), /* HMM_PFN_VALID */ (1 << 1), /* HMM_PFN_WRITE */ - 0 /* HMM_PFN_DEVICE_PRIVATE */ }; static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 7605c4c48985..42808efceaf2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -671,40 +671,3 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, out: return ret; } - -static inline bool -nouveau_dmem_page(struct nouveau_drm *drm, struct page *page) -{ - return is_device_private_page(page) && drm->dmem == page_to_dmem(page); -} - -void -nouveau_dmem_convert_pfn(struct nouveau_drm *drm, - struct hmm_range *range) -{ - unsigned long i, npages; - - npages = (range->end - range->start) >> PAGE_SHIFT; - for (i = 0; i < npages; ++i) { - struct page *page; - uint64_t addr; - - page = hmm_device_entry_to_page(range, range->pfns[i]); - if (page == NULL) - continue; - - if (!(range->pfns[i] & range->flags[HMM_PFN_DEVICE_PRIVATE])) { - continue; - } - - if (!nouveau_dmem_page(drm, page)) { - WARN(1, "Some unknown device memory !\n"); - range->pfns[i] = 0; - continue; - } - - addr = nouveau_dmem_page_addr(page); - range->pfns[i] &= ((1UL << range->pfn_shift) - 1); - range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift; - } -} diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.h b/drivers/gpu/drm/nouveau/nouveau_dmem.h index 92394be5d649..1ac620b3d4fb 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.h +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.h @@ -38,8 +38,6 @@ int nouveau_dmem_migrate_vma(struct nouveau_drm *drm, unsigned long start, unsigned long end); -void nouveau_dmem_convert_pfn(struct nouveau_drm *drm, - struct hmm_range *range); #else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */ static inline void nouveau_dmem_init(struct nouveau_drm *drm) {} static inline void nouveau_dmem_fini(struct nouveau_drm *drm) {} diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index df9bf1fd1bc0..7e0376dca137 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -367,7 +367,6 @@ static const u64 nouveau_svm_pfn_flags[HMM_PFN_FLAG_MAX] = { [HMM_PFN_VALID ] = NVIF_VMM_PFNMAP_V0_V, [HMM_PFN_WRITE ] = NVIF_VMM_PFNMAP_V0_W, - [HMM_PFN_DEVICE_PRIVATE] = NVIF_VMM_PFNMAP_V0_VRAM, }; static const u64 @@ -558,8 +557,6 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm, break; } - nouveau_dmem_convert_pfn(drm, &range); - svmm->vmm->vmm.object.client->super = true; ret = nvif_object_ioctl(&svmm->vmm->vmm.object, data, size, NULL); svmm->vmm->vmm.object.client->super = false; diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 4bf8d6997b12..5e6034f105c3 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -74,7 +74,6 @@ * Flags: * HMM_PFN_VALID: pfn is valid. It has, at least, read permission. * HMM_PFN_WRITE: CPU page table has write permission set - * HMM_PFN_DEVICE_PRIVATE: private device memory (ZONE_DEVICE) * * The driver provides a flags array for mapping page protections to device * PTE bits. If the driver valid bit for an entry is bit 3, @@ -86,7 +85,6 @@ enum hmm_pfn_flag_e { HMM_PFN_VALID = 0, HMM_PFN_WRITE, - HMM_PFN_DEVICE_PRIVATE, HMM_PFN_FLAG_MAX }; diff --git a/mm/hmm.c b/mm/hmm.c index 180e398170b0..3d10485bf323 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -118,15 +118,6 @@ static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk, /* We aren't ask to do anything ... */ if (!(pfns & range->flags[HMM_PFN_VALID])) return; - /* If this is device memory then only fault if explicitly requested */ - if ((cpu_flags & range->flags[HMM_PFN_DEVICE_PRIVATE])) { - /* Do we fault on device memory ? */ - if (pfns & range->flags[HMM_PFN_DEVICE_PRIVATE]) { - *write_fault = pfns & range->flags[HMM_PFN_WRITE]; - *fault = true; - } - return; - } /* If CPU page table is not valid then we need to fault */ *fault = !(cpu_flags & range->flags[HMM_PFN_VALID]); @@ -259,25 +250,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, if (!pte_present(pte)) { swp_entry_t entry = pte_to_swp_entry(pte); - /* - * This is a special swap entry, ignore migration, use - * device and report anything else as error. - */ - if (is_device_private_entry(entry)) { - cpu_flags = range->flags[HMM_PFN_VALID] | - range->flags[HMM_PFN_DEVICE_PRIVATE]; - cpu_flags |= is_write_device_private_entry(entry) ? - range->flags[HMM_PFN_WRITE] : 0; - hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, - &fault, &write_fault); - if (fault || write_fault) - goto fault; - *pfn = hmm_device_entry_from_pfn(range, - swp_offset(entry)); - *pfn |= cpu_flags; - return 0; - } - hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, &fault, &write_fault); if (!fault && !write_fault) -- 2.24.1
Jason Gunthorpe
2020-Mar-16 18:17 UTC
[Nouveau] [PATCH 1/2] mm: handle multiple owners of device private pages in migrate_vma
On Mon, Mar 16, 2020 at 06:52:58PM +0100, Christoph Hellwig wrote:> Add a new opaque owner field to struct dev_pagemap, which will allow > the hmm and migrate_vma code to identify who owns ZONE_DEVICE memory, > and refuse to work on mappings not owned by the calling entity.Using a pointer seems like a good solution to me. Is this a bug fix? What is the downside for migrate on pages it doesn't work? I'm not up to speed on migrate.. Jason
Ralph Campbell
2020-Mar-16 18:42 UTC
[Nouveau] [PATCH 2/2] mm: remove device private page support from hmm_range_fault
On 3/16/20 10:52 AM, Christoph Hellwig wrote:> No driver has actually used properly wire up and support this feature. > There is various code related to it in nouveau, but as far as I can tell > it never actually got turned on, and the only changes since the initial > commit are global cleanups.This is not actually true. OpenCL 2.x does support SVM with nouveau and device private memory via clEnqueueSVMMigrateMem(). Also, Ben Skeggs has accepted a set of patches to map GPU memory after being migrated and this change would conflict with that.> Signed-off-by: Christoph Hellwig <hch at lst.de> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 - > drivers/gpu/drm/nouveau/nouveau_dmem.c | 37 ------------------------- > drivers/gpu/drm/nouveau/nouveau_dmem.h | 2 -- > drivers/gpu/drm/nouveau/nouveau_svm.c | 3 -- > include/linux/hmm.h | 2 -- > mm/hmm.c | 28 ------------------- > 6 files changed, 73 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index dee446278417..90821ce5e6ca 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -776,7 +776,6 @@ struct amdgpu_ttm_tt { > static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { > (1 << 0), /* HMM_PFN_VALID */ > (1 << 1), /* HMM_PFN_WRITE */ > - 0 /* HMM_PFN_DEVICE_PRIVATE */ > }; > > static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { > diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c > index 7605c4c48985..42808efceaf2 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c > +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c > @@ -671,40 +671,3 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, > out: > return ret; > } > - > -static inline bool > -nouveau_dmem_page(struct nouveau_drm *drm, struct page *page) > -{ > - return is_device_private_page(page) && drm->dmem == page_to_dmem(page); > -} > - > -void > -nouveau_dmem_convert_pfn(struct nouveau_drm *drm, > - struct hmm_range *range) > -{ > - unsigned long i, npages; > - > - npages = (range->end - range->start) >> PAGE_SHIFT; > - for (i = 0; i < npages; ++i) { > - struct page *page; > - uint64_t addr; > - > - page = hmm_device_entry_to_page(range, range->pfns[i]); > - if (page == NULL) > - continue; > - > - if (!(range->pfns[i] & range->flags[HMM_PFN_DEVICE_PRIVATE])) { > - continue; > - } > - > - if (!nouveau_dmem_page(drm, page)) { > - WARN(1, "Some unknown device memory !\n"); > - range->pfns[i] = 0; > - continue; > - } > - > - addr = nouveau_dmem_page_addr(page); > - range->pfns[i] &= ((1UL << range->pfn_shift) - 1); > - range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift; > - } > -} > diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.h b/drivers/gpu/drm/nouveau/nouveau_dmem.h > index 92394be5d649..1ac620b3d4fb 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_dmem.h > +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.h > @@ -38,8 +38,6 @@ int nouveau_dmem_migrate_vma(struct nouveau_drm *drm, > unsigned long start, > unsigned long end); > > -void nouveau_dmem_convert_pfn(struct nouveau_drm *drm, > - struct hmm_range *range); > #else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */ > static inline void nouveau_dmem_init(struct nouveau_drm *drm) {} > static inline void nouveau_dmem_fini(struct nouveau_drm *drm) {} > diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c > index df9bf1fd1bc0..7e0376dca137 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_svm.c > +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c > @@ -367,7 +367,6 @@ static const u64 > nouveau_svm_pfn_flags[HMM_PFN_FLAG_MAX] = { > [HMM_PFN_VALID ] = NVIF_VMM_PFNMAP_V0_V, > [HMM_PFN_WRITE ] = NVIF_VMM_PFNMAP_V0_W, > - [HMM_PFN_DEVICE_PRIVATE] = NVIF_VMM_PFNMAP_V0_VRAM, > }; > > static const u64 > @@ -558,8 +557,6 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm, > break; > } > > - nouveau_dmem_convert_pfn(drm, &range); > - > svmm->vmm->vmm.object.client->super = true; > ret = nvif_object_ioctl(&svmm->vmm->vmm.object, data, size, NULL); > svmm->vmm->vmm.object.client->super = false; > diff --git a/include/linux/hmm.h b/include/linux/hmm.h > index 4bf8d6997b12..5e6034f105c3 100644 > --- a/include/linux/hmm.h > +++ b/include/linux/hmm.h > @@ -74,7 +74,6 @@ > * Flags: > * HMM_PFN_VALID: pfn is valid. It has, at least, read permission. > * HMM_PFN_WRITE: CPU page table has write permission set > - * HMM_PFN_DEVICE_PRIVATE: private device memory (ZONE_DEVICE) > * > * The driver provides a flags array for mapping page protections to device > * PTE bits. If the driver valid bit for an entry is bit 3, > @@ -86,7 +85,6 @@ > enum hmm_pfn_flag_e { > HMM_PFN_VALID = 0, > HMM_PFN_WRITE, > - HMM_PFN_DEVICE_PRIVATE, > HMM_PFN_FLAG_MAX > }; > > diff --git a/mm/hmm.c b/mm/hmm.c > index 180e398170b0..3d10485bf323 100644 > --- a/mm/hmm.c > +++ b/mm/hmm.c > @@ -118,15 +118,6 @@ static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk, > /* We aren't ask to do anything ... */ > if (!(pfns & range->flags[HMM_PFN_VALID])) > return; > - /* If this is device memory then only fault if explicitly requested */ > - if ((cpu_flags & range->flags[HMM_PFN_DEVICE_PRIVATE])) { > - /* Do we fault on device memory ? */ > - if (pfns & range->flags[HMM_PFN_DEVICE_PRIVATE]) { > - *write_fault = pfns & range->flags[HMM_PFN_WRITE]; > - *fault = true; > - } > - return; > - } > > /* If CPU page table is not valid then we need to fault */ > *fault = !(cpu_flags & range->flags[HMM_PFN_VALID]); > @@ -259,25 +250,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, > if (!pte_present(pte)) { > swp_entry_t entry = pte_to_swp_entry(pte); > > - /* > - * This is a special swap entry, ignore migration, use > - * device and report anything else as error. > - */ > - if (is_device_private_entry(entry)) { > - cpu_flags = range->flags[HMM_PFN_VALID] | > - range->flags[HMM_PFN_DEVICE_PRIVATE]; > - cpu_flags |= is_write_device_private_entry(entry) ? > - range->flags[HMM_PFN_WRITE] : 0; > - hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, > - &fault, &write_fault); > - if (fault || write_fault) > - goto fault; > - *pfn = hmm_device_entry_from_pfn(range, > - swp_offset(entry)); > - *pfn |= cpu_flags; > - return 0; > - } > - > hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, &fault, > &write_fault); > if (!fault && !write_fault) >
Possibly Parallel Threads
- ensure device private pages have an owner v2
- [PATCH hmm 0/5] Adjust hmm_range_fault() API
- [PATCH hmm v2 0/5] Adjust hmm_range_fault() API
- [PATCH 2/2] mm: remove device private page support from hmm_range_fault
- hmm_range_fault related fixes and legacy API removal v3