Ralph Campbell
2020-Mar-16 18:42 UTC
[Nouveau] [PATCH 2/2] mm: remove device private page support from hmm_range_fault
On 3/16/20 10:52 AM, Christoph Hellwig wrote:> No driver has actually used properly wire up and support this feature. > There is various code related to it in nouveau, but as far as I can tell > it never actually got turned on, and the only changes since the initial > commit are global cleanups.This is not actually true. OpenCL 2.x does support SVM with nouveau and device private memory via clEnqueueSVMMigrateMem(). Also, Ben Skeggs has accepted a set of patches to map GPU memory after being migrated and this change would conflict with that.> Signed-off-by: Christoph Hellwig <hch at lst.de> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 - > drivers/gpu/drm/nouveau/nouveau_dmem.c | 37 ------------------------- > drivers/gpu/drm/nouveau/nouveau_dmem.h | 2 -- > drivers/gpu/drm/nouveau/nouveau_svm.c | 3 -- > include/linux/hmm.h | 2 -- > mm/hmm.c | 28 ------------------- > 6 files changed, 73 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index dee446278417..90821ce5e6ca 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -776,7 +776,6 @@ struct amdgpu_ttm_tt { > static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { > (1 << 0), /* HMM_PFN_VALID */ > (1 << 1), /* HMM_PFN_WRITE */ > - 0 /* HMM_PFN_DEVICE_PRIVATE */ > }; > > static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { > diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c > index 7605c4c48985..42808efceaf2 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c > +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c > @@ -671,40 +671,3 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, > out: > return ret; > } > - > -static inline bool > -nouveau_dmem_page(struct nouveau_drm *drm, struct page *page) > -{ > - return is_device_private_page(page) && drm->dmem == page_to_dmem(page); > -} > - > -void > -nouveau_dmem_convert_pfn(struct nouveau_drm *drm, > - struct hmm_range *range) > -{ > - unsigned long i, npages; > - > - npages = (range->end - range->start) >> PAGE_SHIFT; > - for (i = 0; i < npages; ++i) { > - struct page *page; > - uint64_t addr; > - > - page = hmm_device_entry_to_page(range, range->pfns[i]); > - if (page == NULL) > - continue; > - > - if (!(range->pfns[i] & range->flags[HMM_PFN_DEVICE_PRIVATE])) { > - continue; > - } > - > - if (!nouveau_dmem_page(drm, page)) { > - WARN(1, "Some unknown device memory !\n"); > - range->pfns[i] = 0; > - continue; > - } > - > - addr = nouveau_dmem_page_addr(page); > - range->pfns[i] &= ((1UL << range->pfn_shift) - 1); > - range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift; > - } > -} > diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.h b/drivers/gpu/drm/nouveau/nouveau_dmem.h > index 92394be5d649..1ac620b3d4fb 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_dmem.h > +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.h > @@ -38,8 +38,6 @@ int nouveau_dmem_migrate_vma(struct nouveau_drm *drm, > unsigned long start, > unsigned long end); > > -void nouveau_dmem_convert_pfn(struct nouveau_drm *drm, > - struct hmm_range *range); > #else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */ > static inline void nouveau_dmem_init(struct nouveau_drm *drm) {} > static inline void nouveau_dmem_fini(struct nouveau_drm *drm) {} > diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c > index df9bf1fd1bc0..7e0376dca137 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_svm.c > +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c > @@ -367,7 +367,6 @@ static const u64 > nouveau_svm_pfn_flags[HMM_PFN_FLAG_MAX] = { > [HMM_PFN_VALID ] = NVIF_VMM_PFNMAP_V0_V, > [HMM_PFN_WRITE ] = NVIF_VMM_PFNMAP_V0_W, > - [HMM_PFN_DEVICE_PRIVATE] = NVIF_VMM_PFNMAP_V0_VRAM, > }; > > static const u64 > @@ -558,8 +557,6 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm, > break; > } > > - nouveau_dmem_convert_pfn(drm, &range); > - > svmm->vmm->vmm.object.client->super = true; > ret = nvif_object_ioctl(&svmm->vmm->vmm.object, data, size, NULL); > svmm->vmm->vmm.object.client->super = false; > diff --git a/include/linux/hmm.h b/include/linux/hmm.h > index 4bf8d6997b12..5e6034f105c3 100644 > --- a/include/linux/hmm.h > +++ b/include/linux/hmm.h > @@ -74,7 +74,6 @@ > * Flags: > * HMM_PFN_VALID: pfn is valid. It has, at least, read permission. > * HMM_PFN_WRITE: CPU page table has write permission set > - * HMM_PFN_DEVICE_PRIVATE: private device memory (ZONE_DEVICE) > * > * The driver provides a flags array for mapping page protections to device > * PTE bits. If the driver valid bit for an entry is bit 3, > @@ -86,7 +85,6 @@ > enum hmm_pfn_flag_e { > HMM_PFN_VALID = 0, > HMM_PFN_WRITE, > - HMM_PFN_DEVICE_PRIVATE, > HMM_PFN_FLAG_MAX > }; > > diff --git a/mm/hmm.c b/mm/hmm.c > index 180e398170b0..3d10485bf323 100644 > --- a/mm/hmm.c > +++ b/mm/hmm.c > @@ -118,15 +118,6 @@ static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk, > /* We aren't ask to do anything ... */ > if (!(pfns & range->flags[HMM_PFN_VALID])) > return; > - /* If this is device memory then only fault if explicitly requested */ > - if ((cpu_flags & range->flags[HMM_PFN_DEVICE_PRIVATE])) { > - /* Do we fault on device memory ? */ > - if (pfns & range->flags[HMM_PFN_DEVICE_PRIVATE]) { > - *write_fault = pfns & range->flags[HMM_PFN_WRITE]; > - *fault = true; > - } > - return; > - } > > /* If CPU page table is not valid then we need to fault */ > *fault = !(cpu_flags & range->flags[HMM_PFN_VALID]); > @@ -259,25 +250,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, > if (!pte_present(pte)) { > swp_entry_t entry = pte_to_swp_entry(pte); > > - /* > - * This is a special swap entry, ignore migration, use > - * device and report anything else as error. > - */ > - if (is_device_private_entry(entry)) { > - cpu_flags = range->flags[HMM_PFN_VALID] | > - range->flags[HMM_PFN_DEVICE_PRIVATE]; > - cpu_flags |= is_write_device_private_entry(entry) ? > - range->flags[HMM_PFN_WRITE] : 0; > - hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, > - &fault, &write_fault); > - if (fault || write_fault) > - goto fault; > - *pfn = hmm_device_entry_from_pfn(range, > - swp_offset(entry)); > - *pfn |= cpu_flags; > - return 0; > - } > - > hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, &fault, > &write_fault); > if (!fault && !write_fault) >
Christoph Hellwig
2020-Mar-16 18:49 UTC
[Nouveau] [PATCH 2/2] mm: remove device private page support from hmm_range_fault
On Mon, Mar 16, 2020 at 11:42:19AM -0700, Ralph Campbell wrote:> > On 3/16/20 10:52 AM, Christoph Hellwig wrote: >> No driver has actually used properly wire up and support this feature. >> There is various code related to it in nouveau, but as far as I can tell >> it never actually got turned on, and the only changes since the initial >> commit are global cleanups. > > This is not actually true. OpenCL 2.x does support SVM with nouveau and > device private memory via clEnqueueSVMMigrateMem(). > Also, Ben Skeggs has accepted a set of patches to map GPU memory after being > migrated and this change would conflict with that.Can you explain me how we actually invoke this code? For that we'd need HMM_PFN_DEVICE_PRIVATE NVIF_VMM_PFNMAP_V0_VRAM set in ->pfns before calling hmm_range_fault, which isn't happening.
Christoph Hellwig
2020-Mar-16 18:58 UTC
[Nouveau] [PATCH 2/2] mm: remove device private page support from hmm_range_fault
On Mon, Mar 16, 2020 at 07:49:35PM +0100, Christoph Hellwig wrote:> On Mon, Mar 16, 2020 at 11:42:19AM -0700, Ralph Campbell wrote: > > > > On 3/16/20 10:52 AM, Christoph Hellwig wrote: > >> No driver has actually used properly wire up and support this feature. > >> There is various code related to it in nouveau, but as far as I can tell > >> it never actually got turned on, and the only changes since the initial > >> commit are global cleanups. > > > > This is not actually true. OpenCL 2.x does support SVM with nouveau and > > device private memory via clEnqueueSVMMigrateMem(). > > Also, Ben Skeggs has accepted a set of patches to map GPU memory after being > > migrated and this change would conflict with that. > > Can you explain me how we actually invoke this code? > > For that we'd need HMM_PFN_DEVICE_PRIVATE NVIF_VMM_PFNMAP_V0_VRAM > set in ->pfns before calling hmm_range_fault, which isn't happening.Ok, looks like the fault side is unused, but we need to keep the non-fault path in place. I'll fix up the patch.
Jason Gunthorpe
2020-Mar-16 19:04 UTC
[Nouveau] [PATCH 2/2] mm: remove device private page support from hmm_range_fault
On Mon, Mar 16, 2020 at 11:42:19AM -0700, Ralph Campbell wrote:> > On 3/16/20 10:52 AM, Christoph Hellwig wrote: > > No driver has actually used properly wire up and support this feature. > > There is various code related to it in nouveau, but as far as I can tell > > it never actually got turned on, and the only changes since the initial > > commit are global cleanups. > > This is not actually true. OpenCL 2.x does support SVM with nouveau and > device private memory via clEnqueueSVMMigrateMem(). > Also, Ben Skeggs has accepted a set of patches to map GPU memory after being > migrated and this change would conflict with that.Other than the page_to_dmem() possibly doing container_of on the wrong type pgmap, are there other bugs here to fix? Something like this is probably close to the right thing to fix that and work with Christoph's 1/2 patch - Ralph can you check, test, etc? diff --git a/mm/hmm.c b/mm/hmm.c index 9e8f68eb83287a..9fa4748da1b779 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -300,6 +300,20 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, range->flags[HMM_PFN_DEVICE_PRIVATE]; cpu_flags |= is_write_device_private_entry(entry) ? range->flags[HMM_PFN_WRITE] : 0; + + /* + * If the caller understands this kind of device_private + * page, then leave it as is, otherwise fault it. + */ + hmm_vma_walk->pgmap = get_dev_pagemap( + device_private_entry_to_pfn(entry), + hmm_vma_walk->pgmap); + if (!unlikely(!pgmap)) + return -EBUSY; + if (hmm_vma_walk->pgmap->owner !+ hmm_vma_walk->dev_private_owner) + cpu_flags = 0; + hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, &fault, &write_fault); if (fault || write_fault) Jason
Christoph Hellwig
2020-Mar-16 19:07 UTC
[Nouveau] [PATCH 2/2] mm: remove device private page support from hmm_range_fault
On Mon, Mar 16, 2020 at 04:04:43PM -0300, Jason Gunthorpe wrote:> > This is not actually true. OpenCL 2.x does support SVM with nouveau and > > device private memory via clEnqueueSVMMigrateMem(). > > Also, Ben Skeggs has accepted a set of patches to map GPU memory after being > > migrated and this change would conflict with that. > > Other than the page_to_dmem() possibly doing container_of on the wrong > type pgmap, are there other bugs here to fix?It fixes that bug, as well as making sure everyone gets this check right by default.> Something like this is probably close to the right thing to fix that > and work with Christoph's 1/2 patch - Ralph can you check, test, etc?I don't think we need the get_dev_pagemap. For device private pages we can derive the page using device_private_entry_to_page and just use that. Let me resend a proper series.
Ralph Campbell
2020-Mar-16 19:56 UTC
[Nouveau] [PATCH 2/2] mm: remove device private page support from hmm_range_fault
On 3/16/20 11:49 AM, Christoph Hellwig wrote:> On Mon, Mar 16, 2020 at 11:42:19AM -0700, Ralph Campbell wrote: >> >> On 3/16/20 10:52 AM, Christoph Hellwig wrote: >>> No driver has actually used properly wire up and support this feature. >>> There is various code related to it in nouveau, but as far as I can tell >>> it never actually got turned on, and the only changes since the initial >>> commit are global cleanups. >> >> This is not actually true. OpenCL 2.x does support SVM with nouveau and >> device private memory via clEnqueueSVMMigrateMem(). >> Also, Ben Skeggs has accepted a set of patches to map GPU memory after being >> migrated and this change would conflict with that. > > Can you explain me how we actually invoke this code?GPU memory is allocated when the device private memory "struct page" is allocated. See where nouveau_dmem_chunk_alloc() calls nouveau_bo_new(). Then when a page is migrated to the GPU, the GPU memory physical address is just the offset into the "fake" PFN range allocated by devm_request_free_mem_region(). I'm looking into allocating GPU memory at the time of migration instead of when the device private memory struct pages are allocated but that is a future improvement. System memory is migrated to GPU memory: # mesa clEnqueueSVMMigrateMem() svm_migrate_op() q.svm_migrate() pipe->svm_migrate() // really nvc0_svm_migrate() drmCommandWrite() // in libdrm drmIoctl() ioctl() nouveau_drm_ioctl() // nouveau_drm.c drm_ioctl() nouveau_svmm_bind() nouveau_dmem_migrate_vma() migrate_vma_setup() nouveau_dmem_migrate_chunk() nouveau_dmem_migrate_copy_one() // allocate device private struct page dpage = nouveau_dmem_page_alloc_locked() dpage = nouveau_dmem_pages_alloc() // Get GPU VRAM physical address nouveau_dmem_page_addr(dpage) // This does the DMA to GPU memory drm->dmem->migrate.copy_func() migrate_vma_pages() migrate_vma_finalize() Without my recent patch set, there is no GPU page table entry created for this migrated memory so there will be a GPU fault which is handled in a worker thread: nouveau_svm_fault() // examine fault buffer entries and compute range of pages nouveau_range_fault() // This will fill in the pfns array with a device private entry PFN hmm_range_fault() // This sees the range->flags[HMM_PFN_DEVICE_PRIVATE] flag // and converts the HMM PFN to a GPU physical address nouveau_dmem_convert_pfn() // This sets up the GPU page tables nvif_object_ioctl()> For that we'd need HMM_PFN_DEVICE_PRIVATE NVIF_VMM_PFNMAP_V0_VRAM > set in ->pfns before calling hmm_range_fault, which isn't happening. >It is set by hmm_range_fault() via the range->flags[HMM_PFN_DEVICE_PRIVATE] entry when hmm_range_fault() sees a device private struct page. The call to nouveau_dmem_convert_pfn() is just replacing the "fake" PFN with the real PFN but not clearing/changing the read/write or VRAM/system memory PTE bits.
Jason Gunthorpe
2020-Mar-16 20:09 UTC
[Nouveau] [PATCH 2/2] mm: remove device private page support from hmm_range_fault
On Mon, Mar 16, 2020 at 07:49:35PM +0100, Christoph Hellwig wrote:> On Mon, Mar 16, 2020 at 11:42:19AM -0700, Ralph Campbell wrote: > > > > On 3/16/20 10:52 AM, Christoph Hellwig wrote: > >> No driver has actually used properly wire up and support this feature. > >> There is various code related to it in nouveau, but as far as I can tell > >> it never actually got turned on, and the only changes since the initial > >> commit are global cleanups. > > > > This is not actually true. OpenCL 2.x does support SVM with nouveau and > > device private memory via clEnqueueSVMMigrateMem(). > > Also, Ben Skeggs has accepted a set of patches to map GPU memory after being > > migrated and this change would conflict with that. > > Can you explain me how we actually invoke this code? > > For that we'd need HMM_PFN_DEVICE_PRIVATE NVIF_VMM_PFNMAP_V0_VRAM > set in ->pfns before calling hmm_range_fault, which isn't happening.Oh, I got tripped on this too The logic is backwards from what you'd think.. If you *set* HMM_PFN_DEVICE_PRIVATE then this triggers: hmm_pte_need_fault(): if ((cpu_flags & range->flags[HMM_PFN_DEVICE_PRIVATE])) { /* Do we fault on device memory ? */ if (pfns & range->flags[HMM_PFN_DEVICE_PRIVATE]) { *write_fault = pfns & range->flags[HMM_PFN_WRITE]; *fault = true; } return; } Ie if the cpu page is a DEVICE_PRIVATE and the caller sets HMM_PFN_DEVICE_PRIVATE in the input flags (pfns) then it always faults it and never sets HMM_PFN_DEVICE_PRIVATE in the output flags. So setting 0 enabled device_private support, and nouveau is Ok. AMDGPU is broken because it can't handle device private and can't set the flag to supress it. I was going to try and sort this out as part of getting rid of range->flags Jason
Possibly Parallel Threads
- [PATCH 2/2] mm: remove device private page support from hmm_range_fault
- [PATCH 3/4] mm: simplify device private page handling in hmm_range_fault
- [PATCH 2/2] mm: remove device private page support from hmm_range_fault
- [PATCH 3/4] mm: simplify device private page handling in hmm_range_fault
- ensure device private pages have an owner v2