Christoph Hellwig
2019-Jul-30 05:51 UTC
[Nouveau] hmm_range_fault related fixes and legacy API removal v3
Hi Jérôme, Ben, Felxi and Jason, below is a series against the hmm tree which cleans up various minor bits and allows HMM_MIRROR to be built on all architectures. Diffstat: 7 files changed, 81 insertions(+), 171 deletions(-) A git tree is also available at: git://git.infradead.org/users/hch/misc.git hmm-cleanups Gitweb: http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/hmm-cleanups
Christoph Hellwig
2019-Jul-30 05:51 UTC
[Nouveau] [PATCH 01/13] amdgpu: remove -EAGAIN handling for hmm_range_fault
hmm_range_fault can only return -EAGAIN if called with the block argument set to false, so remove the special handling for it. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 12a59ac83f72..f0821638bbc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -778,7 +778,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) struct hmm_range *range; unsigned long i; uint64_t *pfns; - int retry = 0; int r = 0; if (!mm) /* Happens during process shutdown */ @@ -822,7 +821,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) hmm_range_register(range, mirror, start, start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT); -retry: /* * Just wait for range to be valid, safe to ignore return value as we * will use the return value of hmm_range_fault() below under the @@ -831,24 +829,12 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT); down_read(&mm->mmap_sem); - r = hmm_range_fault(range, 0); - if (unlikely(r < 0)) { - if (likely(r == -EAGAIN)) { - /* - * return -EAGAIN, mmap_sem is dropped - */ - if (retry++ < MAX_RETRY_HMM_RANGE_FAULT) - goto retry; - else - pr_err("Retry hmm fault too many times\n"); - } - - goto out_up_read; - } - up_read(&mm->mmap_sem); + if (unlikely(r < 0)) + goto out_free_pfns; + for (i = 0; i < ttm->num_pages; i++) { pages[i] = hmm_device_entry_to_page(range, pfns[i]); if (unlikely(!pages[i])) { @@ -864,9 +850,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) return 0; -out_up_read: - if (likely(r != -EAGAIN)) - up_read(&mm->mmap_sem); out_free_pfns: hmm_range_unregister(range); kvfree(pfns); -- 2.20.1
Christoph Hellwig
2019-Jul-30 05:51 UTC
[Nouveau] [PATCH 02/13] amdgpu: don't initialize range->list in amdgpu_hmm_init_range
The list is used to add the range to another list as an entry in the core hmm code, so there is no need to initialize it in a driver. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index b698b423b25d..60b9fc9561d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -484,6 +484,5 @@ void amdgpu_hmm_init_range(struct hmm_range *range) range->flags = hmm_range_flags; range->values = hmm_range_values; range->pfn_shift = PAGE_SHIFT; - INIT_LIST_HEAD(&range->list); } } -- 2.20.1
Christoph Hellwig
2019-Jul-30 05:51 UTC
[Nouveau] [PATCH 03/13] nouveau: pass struct nouveau_svmm to nouveau_range_fault
This avoid having to abuse the vma field in struct hmm_range to unlock the mmap_sem. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/gpu/drm/nouveau/nouveau_svm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index a74530b5a523..b889d5ec4c7e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -485,14 +485,14 @@ nouveau_range_done(struct hmm_range *range) } static int -nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range) +nouveau_range_fault(struct nouveau_svmm *svmm, struct hmm_range *range) { long ret; range->default_flags = 0; range->pfn_flags_mask = -1UL; - ret = hmm_range_register(range, mirror, + ret = hmm_range_register(range, &svmm->mirror, range->start, range->end, PAGE_SHIFT); if (ret) { @@ -689,7 +689,7 @@ nouveau_svm_fault(struct nvif_notify *notify) range.values = nouveau_svm_pfn_values; range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT; again: - ret = nouveau_range_fault(&svmm->mirror, &range); + ret = nouveau_range_fault(svmm, &range); if (ret == 0) { mutex_lock(&svmm->mutex); if (!nouveau_range_done(&range)) { -- 2.20.1
Christoph Hellwig
2019-Jul-30 05:51 UTC
[Nouveau] [PATCH 04/13] mm: remove the pgmap field from struct hmm_vma_walk
There is only a single place where the pgmap is passed over a function call, so replace it with local variables in the places where we deal with the pgmap. Signed-off-by: Christoph Hellwig <hch at lst.de> --- mm/hmm.c | 62 ++++++++++++++++++++++++-------------------------------- 1 file changed, 27 insertions(+), 35 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index 9a908902e4cc..d66fa29b42e0 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -278,7 +278,6 @@ EXPORT_SYMBOL(hmm_mirror_unregister); struct hmm_vma_walk { struct hmm_range *range; - struct dev_pagemap *pgmap; unsigned long last; unsigned int flags; }; @@ -475,6 +474,7 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, #ifdef CONFIG_TRANSPARENT_HUGEPAGE struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; + struct dev_pagemap *pgmap = NULL; unsigned long pfn, npages, i; bool fault, write_fault; uint64_t cpu_flags; @@ -490,17 +490,14 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, pfn = pmd_pfn(pmd) + pte_index(addr); for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) { if (pmd_devmap(pmd)) { - hmm_vma_walk->pgmap = get_dev_pagemap(pfn, - hmm_vma_walk->pgmap); - if (unlikely(!hmm_vma_walk->pgmap)) + pgmap = get_dev_pagemap(pfn, pgmap); + if (unlikely(!pgmap)) return -EBUSY; } pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags; } - if (hmm_vma_walk->pgmap) { - put_dev_pagemap(hmm_vma_walk->pgmap); - hmm_vma_walk->pgmap = NULL; - } + if (pgmap) + put_dev_pagemap(pgmap); hmm_vma_walk->last = end; return 0; #else @@ -520,7 +517,7 @@ static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte) static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, unsigned long end, pmd_t *pmdp, pte_t *ptep, - uint64_t *pfn) + uint64_t *pfn, struct dev_pagemap **pgmap) { struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; @@ -591,9 +588,8 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, goto fault; if (pte_devmap(pte)) { - hmm_vma_walk->pgmap = get_dev_pagemap(pte_pfn(pte), - hmm_vma_walk->pgmap); - if (unlikely(!hmm_vma_walk->pgmap)) + *pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap); + if (unlikely(!*pgmap)) return -EBUSY; } else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) { *pfn = range->values[HMM_PFN_SPECIAL]; @@ -604,10 +600,10 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, return 0; fault: - if (hmm_vma_walk->pgmap) { - put_dev_pagemap(hmm_vma_walk->pgmap); - hmm_vma_walk->pgmap = NULL; - } + if (*pgmap) + put_dev_pagemap(*pgmap); + *pgmap = NULL; + pte_unmap(ptep); /* Fault any virtual address we were asked to fault */ return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk); @@ -620,6 +616,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, { struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; + struct dev_pagemap *pgmap = NULL; uint64_t *pfns = range->pfns; unsigned long addr = start, i; pte_t *ptep; @@ -683,23 +680,21 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, for (; addr < end; addr += PAGE_SIZE, ptep++, i++) { int r; - r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, &pfns[i]); + r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, &pfns[i], + &pgmap); if (r) { /* hmm_vma_handle_pte() did unmap pte directory */ hmm_vma_walk->last = addr; return r; } } - if (hmm_vma_walk->pgmap) { - /* - * We do put_dev_pagemap() here and not in hmm_vma_handle_pte() - * so that we can leverage get_dev_pagemap() optimization which - * will not re-take a reference on a pgmap if we already have - * one. - */ - put_dev_pagemap(hmm_vma_walk->pgmap); - hmm_vma_walk->pgmap = NULL; - } + /* + * We do put_dev_pagemap() here and not in hmm_vma_handle_pte() so that + * we can leverage the get_dev_pagemap() optimization which will not + * re-take a reference on a pgmap if we already have one. + */ + if (pgmap) + put_dev_pagemap(pgmap); pte_unmap(ptep - 1); hmm_vma_walk->last = addr; @@ -714,6 +709,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; unsigned long addr = start, next; + struct dev_pagemap *pgmap = NULL; pmd_t *pmdp; pud_t pud; int ret; @@ -744,17 +740,14 @@ static int hmm_vma_walk_pud(pud_t *pudp, pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); for (i = 0; i < npages; ++i, ++pfn) { - hmm_vma_walk->pgmap = get_dev_pagemap(pfn, - hmm_vma_walk->pgmap); - if (unlikely(!hmm_vma_walk->pgmap)) + pgmap = get_dev_pagemap(pfn, pgmap); + if (unlikely(!pgmap)) return -EBUSY; pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags; } - if (hmm_vma_walk->pgmap) { - put_dev_pagemap(hmm_vma_walk->pgmap); - hmm_vma_walk->pgmap = NULL; - } + if (pgmap) + put_dev_pagemap(pgmap); hmm_vma_walk->last = end; return 0; } @@ -1002,7 +995,6 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags) return -EPERM; } - hmm_vma_walk.pgmap = NULL; hmm_vma_walk.last = start; hmm_vma_walk.flags = flags; hmm_vma_walk.range = range; -- 2.20.1
Christoph Hellwig
2019-Jul-30 05:51 UTC
[Nouveau] [PATCH 05/13] mm: remove the unused vma argument to hmm_range_dma_unmap
Signed-off-by: Christoph Hellwig <hch at lst.de> --- include/linux/hmm.h | 1 - mm/hmm.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 82265118d94a..59be0aa2476d 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -422,7 +422,6 @@ long hmm_range_dma_map(struct hmm_range *range, dma_addr_t *daddrs, unsigned int flags); long hmm_range_dma_unmap(struct hmm_range *range, - struct vm_area_struct *vma, struct device *device, dma_addr_t *daddrs, bool dirty); diff --git a/mm/hmm.c b/mm/hmm.c index d66fa29b42e0..3a3852660757 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -1121,7 +1121,6 @@ EXPORT_SYMBOL(hmm_range_dma_map); /** * hmm_range_dma_unmap() - unmap range of that was map with hmm_range_dma_map() * @range: range being unmapped - * @vma: the vma against which the range (optional) * @device: device against which dma map was done * @daddrs: dma address of mapped pages * @dirty: dirty page if it had the write flag set @@ -1133,7 +1132,6 @@ EXPORT_SYMBOL(hmm_range_dma_map); * concurrent mmu notifier or sync_cpu_device_pagetables() to make progress. */ long hmm_range_dma_unmap(struct hmm_range *range, - struct vm_area_struct *vma, struct device *device, dma_addr_t *daddrs, bool dirty) -- 2.20.1
Christoph Hellwig
2019-Jul-30 05:51 UTC
[Nouveau] [PATCH 06/13] mm: remove superflous arguments from hmm_range_register
The start, end and page_shift values are all saved in the range structure, so we might as well use that for argument passing. Signed-off-by: Christoph Hellwig <hch at lst.de> --- Documentation/vm/hmm.rst | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 +++++-- drivers/gpu/drm/nouveau/nouveau_svm.c | 5 ++--- include/linux/hmm.h | 6 +----- mm/hmm.c | 20 +++++--------------- 5 files changed, 14 insertions(+), 26 deletions(-) diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst index ddcb5ca8b296..e63c11f7e0e0 100644 --- a/Documentation/vm/hmm.rst +++ b/Documentation/vm/hmm.rst @@ -222,7 +222,7 @@ The usage pattern is:: range.flags = ...; range.values = ...; range.pfn_shift = ...; - hmm_range_register(&range); + hmm_range_register(&range, mirror); /* * Just wait for range to be valid, safe to ignore return value as we diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index f0821638bbc6..71d6e7087b0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -818,8 +818,11 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) 0 : range->flags[HMM_PFN_WRITE]; range->pfn_flags_mask = 0; range->pfns = pfns; - hmm_range_register(range, mirror, start, - start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT); + range->page_shift = PAGE_SHIFT; + range->start = start; + range->end = start + ttm->num_pages * PAGE_SIZE; + + hmm_range_register(range, mirror); /* * Just wait for range to be valid, safe to ignore return value as we diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index b889d5ec4c7e..40e706234554 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -492,9 +492,7 @@ nouveau_range_fault(struct nouveau_svmm *svmm, struct hmm_range *range) range->default_flags = 0; range->pfn_flags_mask = -1UL; - ret = hmm_range_register(range, &svmm->mirror, - range->start, range->end, - PAGE_SHIFT); + ret = hmm_range_register(range, &svmm->mirror); if (ret) { up_read(&range->hmm->mm->mmap_sem); return (int)ret; @@ -682,6 +680,7 @@ nouveau_svm_fault(struct nvif_notify *notify) args.i.p.addr + args.i.p.size, fn - fi); /* Have HMM fault pages within the fault window to the GPU. */ + range.page_shift = PAGE_SHIFT; range.start = args.i.p.addr; range.end = args.i.p.addr + args.i.p.size; range.pfns = args.phys; diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 59be0aa2476d..c5b51376b453 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -400,11 +400,7 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror); /* * Please see Documentation/vm/hmm.rst for how to use the range API. */ -int hmm_range_register(struct hmm_range *range, - struct hmm_mirror *mirror, - unsigned long start, - unsigned long end, - unsigned page_shift); +int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror); void hmm_range_unregister(struct hmm_range *range); /* diff --git a/mm/hmm.c b/mm/hmm.c index 3a3852660757..926735a3aef9 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -843,35 +843,25 @@ static void hmm_pfns_clear(struct hmm_range *range, * hmm_range_register() - start tracking change to CPU page table over a range * @range: range * @mm: the mm struct for the range of virtual address - * @start: start virtual address (inclusive) - * @end: end virtual address (exclusive) - * @page_shift: expect page shift for the range + * * Return: 0 on success, -EFAULT if the address space is no longer valid * * Track updates to the CPU page table see include/linux/hmm.h */ -int hmm_range_register(struct hmm_range *range, - struct hmm_mirror *mirror, - unsigned long start, - unsigned long end, - unsigned page_shift) +int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror) { - unsigned long mask = ((1UL << page_shift) - 1UL); + unsigned long mask = ((1UL << range->page_shift) - 1UL); struct hmm *hmm = mirror->hmm; unsigned long flags; range->valid = false; range->hmm = NULL; - if ((start & mask) || (end & mask)) + if ((range->start & mask) || (range->end & mask)) return -EINVAL; - if (start >= end) + if (range->start >= range->end) return -EINVAL; - range->page_shift = page_shift; - range->start = start; - range->end = end; - /* Prevent hmm_release() from running while the range is valid */ if (!mmget_not_zero(hmm->mm)) return -EFAULT; -- 2.20.1
Christoph Hellwig
2019-Jul-30 05:51 UTC
[Nouveau] [PATCH 07/13] mm: remove the page_shift member from struct hmm_range
All users pass PAGE_SIZE here, and if we wanted to support single entries for huge pages we should really just add a HMM_FAULT_HUGEPAGE flag instead that uses the huge page size instead of having the caller calculate that size once, just for the hmm code to verify it. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 - drivers/gpu/drm/nouveau/nouveau_svm.c | 1 - include/linux/hmm.h | 22 ------------- mm/hmm.c | 42 ++++++------------------- 4 files changed, 9 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 71d6e7087b0b..8bf79288c4e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -818,7 +818,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) 0 : range->flags[HMM_PFN_WRITE]; range->pfn_flags_mask = 0; range->pfns = pfns; - range->page_shift = PAGE_SHIFT; range->start = start; range->end = start + ttm->num_pages * PAGE_SIZE; diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 40e706234554..e7068ce46949 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -680,7 +680,6 @@ nouveau_svm_fault(struct nvif_notify *notify) args.i.p.addr + args.i.p.size, fn - fi); /* Have HMM fault pages within the fault window to the GPU. */ - range.page_shift = PAGE_SHIFT; range.start = args.i.p.addr; range.end = args.i.p.addr + args.i.p.size; range.pfns = args.phys; diff --git a/include/linux/hmm.h b/include/linux/hmm.h index c5b51376b453..51e18fbb8953 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -158,7 +158,6 @@ enum hmm_pfn_value_e { * @values: pfn value for some special case (none, special, error, ...) * @default_flags: default flags for the range (write, read, ... see hmm doc) * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter - * @page_shift: device virtual address shift value (should be >= PAGE_SHIFT) * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT) * @valid: pfns array did not change since it has been fill by an HMM function */ @@ -172,31 +171,10 @@ struct hmm_range { const uint64_t *values; uint64_t default_flags; uint64_t pfn_flags_mask; - uint8_t page_shift; uint8_t pfn_shift; bool valid; }; -/* - * hmm_range_page_shift() - return the page shift for the range - * @range: range being queried - * Return: page shift (page size = 1 << page shift) for the range - */ -static inline unsigned hmm_range_page_shift(const struct hmm_range *range) -{ - return range->page_shift; -} - -/* - * hmm_range_page_size() - return the page size for the range - * @range: range being queried - * Return: page size for the range in bytes - */ -static inline unsigned long hmm_range_page_size(const struct hmm_range *range) -{ - return 1UL << hmm_range_page_shift(range); -} - /* * hmm_range_wait_until_valid() - wait for range to be valid * @range: range affected by invalidation to wait on diff --git a/mm/hmm.c b/mm/hmm.c index 926735a3aef9..f26d6abc4ed2 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -344,13 +344,12 @@ static int hmm_vma_walk_hole_(unsigned long addr, unsigned long end, struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; uint64_t *pfns = range->pfns; - unsigned long i, page_size; + unsigned long i; hmm_vma_walk->last = addr; - page_size = hmm_range_page_size(range); - i = (addr - range->start) >> range->page_shift; + i = (addr - range->start) >> PAGE_SHIFT; - for (; addr < end; addr += page_size, i++) { + for (; addr < end; addr += PAGE_SIZE, i++) { pfns[i] = range->values[HMM_PFN_NONE]; if (fault || write_fault) { int ret; @@ -772,7 +771,7 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, struct mm_walk *walk) { #ifdef CONFIG_HUGETLB_PAGE - unsigned long addr = start, i, pfn, mask, size, pfn_inc; + unsigned long addr = start, i, pfn, mask; struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; struct vm_area_struct *vma = walk->vma; @@ -783,24 +782,12 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, pte_t entry; int ret = 0; - size = huge_page_size(h); - mask = size - 1; - if (range->page_shift != PAGE_SHIFT) { - /* Make sure we are looking at a full page. */ - if (start & mask) - return -EINVAL; - if (end < (start + size)) - return -EINVAL; - pfn_inc = size >> PAGE_SHIFT; - } else { - pfn_inc = 1; - size = PAGE_SIZE; - } + mask = huge_page_size(h) - 1; ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte); entry = huge_ptep_get(pte); - i = (start - range->start) >> range->page_shift; + i = (start - range->start) >> PAGE_SHIFT; orig_pfn = range->pfns[i]; range->pfns[i] = range->values[HMM_PFN_NONE]; cpu_flags = pte_to_hmm_pfn_flags(range, entry); @@ -812,8 +799,8 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, goto unlock; } - pfn = pte_pfn(entry) + ((start & mask) >> range->page_shift); - for (; addr < end; addr += size, i++, pfn += pfn_inc) + pfn = pte_pfn(entry) + ((start & mask) >> PAGE_SHIFT); + for (; addr < end; addr += PAGE_SIZE, i++, pfn++) range->pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags; hmm_vma_walk->last = end; @@ -850,14 +837,13 @@ static void hmm_pfns_clear(struct hmm_range *range, */ int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror) { - unsigned long mask = ((1UL << range->page_shift) - 1UL); struct hmm *hmm = mirror->hmm; unsigned long flags; range->valid = false; range->hmm = NULL; - if ((range->start & mask) || (range->end & mask)) + if ((range->start & (PAGE_SIZE - 1)) || (range->end & (PAGE_SIZE - 1))) return -EINVAL; if (range->start >= range->end) return -EINVAL; @@ -964,16 +950,6 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags) if (vma == NULL || (vma->vm_flags & device_vma)) return -EFAULT; - if (is_vm_hugetlb_page(vma)) { - if (huge_page_shift(hstate_vma(vma)) !- range->page_shift && - range->page_shift != PAGE_SHIFT) - return -EINVAL; - } else { - if (range->page_shift != PAGE_SHIFT) - return -EINVAL; - } - if (!(vma->vm_flags & VM_READ)) { /* * If vma do not allow read access, then assume that it -- 2.20.1
Christoph Hellwig
2019-Jul-30 05:51 UTC
[Nouveau] [PATCH 08/13] mm: remove the mask variable in hmm_vma_walk_hugetlb_entry
The pagewalk code already passes the value as the hmask parameter. Signed-off-by: Christoph Hellwig <hch at lst.de> --- mm/hmm.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index f26d6abc4ed2..88b77a4a6a1e 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -771,19 +771,16 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, struct mm_walk *walk) { #ifdef CONFIG_HUGETLB_PAGE - unsigned long addr = start, i, pfn, mask; + unsigned long addr = start, i, pfn; struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; struct vm_area_struct *vma = walk->vma; - struct hstate *h = hstate_vma(vma); uint64_t orig_pfn, cpu_flags; bool fault, write_fault; spinlock_t *ptl; pte_t entry; int ret = 0; - mask = huge_page_size(h) - 1; - ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte); entry = huge_ptep_get(pte); @@ -799,7 +796,7 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, goto unlock; } - pfn = pte_pfn(entry) + ((start & mask) >> PAGE_SHIFT); + pfn = pte_pfn(entry) + ((start & hmask) >> PAGE_SHIFT); for (; addr < end; addr += PAGE_SIZE, i++, pfn++) range->pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags; -- 2.20.1
Christoph Hellwig
2019-Jul-30 05:51 UTC
[Nouveau] [PATCH 09/13] mm: don't abuse pte_index() in hmm_vma_handle_pmd
pte_index is an internal arch helper in various architectures, without consistent semantics. Open code that calculation of a PMD index based on the virtual address instead. Signed-off-by: Christoph Hellwig <hch at lst.de> --- mm/hmm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hmm.c b/mm/hmm.c index 88b77a4a6a1e..e63ab7f11334 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -486,7 +486,7 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, if (pmd_protnone(pmd) || fault || write_fault) return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk); - pfn = pmd_pfn(pmd) + pte_index(addr); + pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) { if (pmd_devmap(pmd)) { pgmap = get_dev_pagemap(pfn, pgmap); -- 2.20.1
Christoph Hellwig
2019-Jul-30 05:52 UTC
[Nouveau] [PATCH 10/13] mm: only define hmm_vma_walk_pud if needed
We only need the special pud_entry walker if PUD-sized hugepages and pte mappings are supported, else the common pagewalk code will take care of the iteration. Not implementing this callback reduced the amount of code compiled for non-x86 platforms, and also fixes compile failures with other architectures when helpers like pud_pfn are not implemented. Signed-off-by: Christoph Hellwig <hch at lst.de> --- mm/hmm.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index e63ab7f11334..4d3bd41b6522 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -455,15 +455,6 @@ static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd) range->flags[HMM_PFN_VALID]; } -static inline uint64_t pud_to_hmm_pfn_flags(struct hmm_range *range, pud_t pud) -{ - if (!pud_present(pud)) - return 0; - return pud_write(pud) ? range->flags[HMM_PFN_VALID] | - range->flags[HMM_PFN_WRITE] : - range->flags[HMM_PFN_VALID]; -} - static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, unsigned long end, @@ -700,10 +691,19 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, return 0; } -static int hmm_vma_walk_pud(pud_t *pudp, - unsigned long start, - unsigned long end, - struct mm_walk *walk) +#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && \ + defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) +static inline uint64_t pud_to_hmm_pfn_flags(struct hmm_range *range, pud_t pud) +{ + if (!pud_present(pud)) + return 0; + return pud_write(pud) ? range->flags[HMM_PFN_VALID] | + range->flags[HMM_PFN_WRITE] : + range->flags[HMM_PFN_VALID]; +} + +static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, + struct mm_walk *walk) { struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; @@ -765,6 +765,9 @@ static int hmm_vma_walk_pud(pud_t *pudp, return 0; } +#else +#define hmm_vma_walk_pud NULL +#endif static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, unsigned long start, unsigned long end, -- 2.20.1
Christoph Hellwig
2019-Jul-30 05:52 UTC
[Nouveau] [PATCH 11/13] mm: cleanup the hmm_vma_handle_pmd stub
Stub out the whole function when CONFIG_TRANSPARENT_HUGEPAGE is not set to make the function easier to read. Signed-off-by: Christoph Hellwig <hch at lst.de> --- mm/hmm.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index 4d3bd41b6522..f4e90ea5779f 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -455,13 +455,10 @@ static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd) range->flags[HMM_PFN_VALID]; } -static int hmm_vma_handle_pmd(struct mm_walk *walk, - unsigned long addr, - unsigned long end, - uint64_t *pfns, - pmd_t pmd) -{ #ifdef CONFIG_TRANSPARENT_HUGEPAGE +static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, + unsigned long end, uint64_t *pfns, pmd_t pmd) +{ struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; struct dev_pagemap *pgmap = NULL; @@ -490,11 +487,14 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, put_dev_pagemap(pgmap); hmm_vma_walk->last = end; return 0; -#else - /* If THP is not enabled then we should never reach this code ! */ +} +#else /* CONFIG_TRANSPARENT_HUGEPAGE */ +static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, + unsigned long end, uint64_t *pfns, pmd_t pmd) +{ return -EINVAL; -#endif } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte) { -- 2.20.1
Christoph Hellwig
2019-Jul-30 05:52 UTC
[Nouveau] [PATCH 12/13] mm: cleanup the hmm_vma_walk_hugetlb_entry stub
Stub out the whole function and assign NULL to the .hugetlb_entry method if CONFIG_HUGETLB_PAGE is not set, as the method won't ever be called in that case. Signed-off-by: Christoph Hellwig <hch at lst.de> --- mm/hmm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/hmm.c b/mm/hmm.c index f4e90ea5779f..2b56a4af1001 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -769,11 +769,11 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, #define hmm_vma_walk_pud NULL #endif +#ifdef CONFIG_HUGETLB_PAGE static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, unsigned long start, unsigned long end, struct mm_walk *walk) { -#ifdef CONFIG_HUGETLB_PAGE unsigned long addr = start, i, pfn; struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; @@ -812,10 +812,10 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk); return ret; -#else /* CONFIG_HUGETLB_PAGE */ - return -EINVAL; -#endif } +#else +#define hmm_vma_walk_hugetlb_entry NULL +#endif /* CONFIG_HUGETLB_PAGE */ static void hmm_pfns_clear(struct hmm_range *range, uint64_t *pfns, -- 2.20.1
Christoph Hellwig
2019-Jul-30 05:52 UTC
[Nouveau] [PATCH 13/13] mm: allow HMM_MIRROR on all architectures with MMU
There isn't really any architecture specific code in this page table walk implementation, so drop the dependencies. Signed-off-by: Christoph Hellwig <hch at lst.de> --- mm/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/Kconfig b/mm/Kconfig index 56cec636a1fc..b18782be969c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -677,8 +677,7 @@ config DEV_PAGEMAP_OPS config HMM_MIRROR bool "HMM mirror CPU page table into a device page table" - depends on (X86_64 || PPC64) - depends on MMU && 64BIT + depends on MMU select MMU_NOTIFIER help Select HMM_MIRROR if you want to mirror range of the CPU page table of a -- 2.20.1
Jason Gunthorpe
2019-Jul-30 12:33 UTC
[Nouveau] [PATCH 01/13] amdgpu: remove -EAGAIN handling for hmm_range_fault
On Tue, Jul 30, 2019 at 08:51:51AM +0300, Christoph Hellwig wrote:> hmm_range_fault can only return -EAGAIN if called with the block > argument set to false, so remove the special handling for it. > > Signed-off-by: Christoph Hellwig <hch at lst.de> > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 23 +++-------------------- > 1 file changed, 3 insertions(+), 20 deletions(-)Reviewed-by: Jason Gunthorpe <jgg at mellanox.com> Jason
Jason Gunthorpe
2019-Jul-30 12:33 UTC
[Nouveau] [PATCH 02/13] amdgpu: don't initialize range->list in amdgpu_hmm_init_range
On Tue, Jul 30, 2019 at 08:51:52AM +0300, Christoph Hellwig wrote:> The list is used to add the range to another list as an entry in the > core hmm code, so there is no need to initialize it in a driver. > > Signed-off-by: Christoph Hellwig <hch at lst.de> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 1 - > 1 file changed, 1 deletion(-)Reviewed-by: Jason Gunthorpe <jgg at mellanox.com> Jason
Jason Gunthorpe
2019-Jul-30 12:35 UTC
[Nouveau] [PATCH 03/13] nouveau: pass struct nouveau_svmm to nouveau_range_fault
On Tue, Jul 30, 2019 at 08:51:53AM +0300, Christoph Hellwig wrote:> This avoid having to abuse the vma field in struct hmm_range to unlock > the mmap_sem.I think the change inside hmm_range_fault got lost on rebase, it is now using: up_read(&range->hmm->mm->mmap_sem); But, yes, lets change it to use svmm->mm and try to keep struct hmm opaque to drivers Jason
Jason Gunthorpe
2019-Jul-30 12:45 UTC
[Nouveau] [PATCH 05/13] mm: remove the unused vma argument to hmm_range_dma_unmap
On Tue, Jul 30, 2019 at 08:51:55AM +0300, Christoph Hellwig wrote:> Signed-off-by: Christoph Hellwig <hch at lst.de> > include/linux/hmm.h | 1 - > mm/hmm.c | 2 -- > 2 files changed, 3 deletions(-)Reviewed-by: Jason Gunthorpe <jgg at mellanox.com> Unclear what this was intended for, but I think if we need to carry information from the dma_map to unmap it should be done in some opaque way. The driver should not have to care about VMAs, beyond perhaps using VMAs to guide what VA ranges to mirror. Jason
Jason Gunthorpe
2019-Jul-30 12:55 UTC
[Nouveau] [PATCH 07/13] mm: remove the page_shift member from struct hmm_range
On Tue, Jul 30, 2019 at 08:51:57AM +0300, Christoph Hellwig wrote:> All users pass PAGE_SIZE here, and if we wanted to support single > entries for huge pages we should really just add a HMM_FAULT_HUGEPAGE > flag instead that uses the huge page size instead of having the > caller calculate that size once, just for the hmm code to verify it.I suspect this was added for the ODP conversion that does use both page sizes. I think the ODP code for this is kind of broken, but I haven't delved into that.. The challenge is that the driver needs to know what page size to configure the hardware before it does any range stuff. The other challenge is that the HW is configured to do only one page size, and if the underlying CPU page side changes it goes south. What I would prefer is if the driver could somehow dynamically adjust the the page size after each dma map, but I don't know if ODP HW can do that. Since this is all driving toward making ODP use this maybe we should keep this API? I'm not sure I can loose the crappy huge page support in ODP. Jason
Jason Gunthorpe
2019-Jul-30 17:39 UTC
[Nouveau] [PATCH 08/13] mm: remove the mask variable in hmm_vma_walk_hugetlb_entry
On Tue, Jul 30, 2019 at 08:51:58AM +0300, Christoph Hellwig wrote:> The pagewalk code already passes the value as the hmask parameter. > > Signed-off-by: Christoph Hellwig <hch at lst.de> > mm/hmm.c | 7 ++----- > 1 file changed, 2 insertions(+), 5 deletions(-) > > diff --git a/mm/hmm.c b/mm/hmm.c > index f26d6abc4ed2..88b77a4a6a1e 100644 > +++ b/mm/hmm.c > @@ -771,19 +771,16 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, > struct mm_walk *walk) > { > #ifdef CONFIG_HUGETLB_PAGE > - unsigned long addr = start, i, pfn, mask; > + unsigned long addr = start, i, pfn; > struct hmm_vma_walk *hmm_vma_walk = walk->private; > struct hmm_range *range = hmm_vma_walk->range; > struct vm_area_struct *vma = walk->vma; > - struct hstate *h = hstate_vma(vma); > uint64_t orig_pfn, cpu_flags; > bool fault, write_fault; > spinlock_t *ptl; > pte_t entry; > int ret = 0; > > - mask = huge_page_size(h) - 1; > - > ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte); > entry = huge_ptep_get(pte); > > @@ -799,7 +796,7 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, > goto unlock; > } > > - pfn = pte_pfn(entry) + ((start & mask) >> PAGE_SHIFT); > + pfn = pte_pfn(entry) + ((start & hmask) >> PAGE_SHIFT);I don't know this hstate stuff, but this doesn't look the same? static int walk_hugetlb_range(unsigned long addr, unsigned long end, { struct hstate *h = hstate_vma(vma); unsigned long hmask = huge_page_mask(h); // aka h->mask err = walk->hugetlb_entry(pte, hmask, addr, next, walk); And the first place I found setting h->mask is: void __init hugetlb_add_hstate(unsigned int order) { h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1); Compared with mask = huge_page_size(h) - 1; = ((unsigned long)PAGE_SIZE << h->order) - 1 Looks like hmask == ~mask ? Jason
Jason Gunthorpe
2019-Jul-30 17:51 UTC
[Nouveau] [PATCH 06/13] mm: remove superflous arguments from hmm_range_register
On Tue, Jul 30, 2019 at 08:51:56AM +0300, Christoph Hellwig wrote:> The start, end and page_shift values are all saved in the range > structure, so we might as well use that for argument passing. > > Signed-off-by: Christoph Hellwig <hch at lst.de> > --- > Documentation/vm/hmm.rst | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 +++++-- > drivers/gpu/drm/nouveau/nouveau_svm.c | 5 ++--- > include/linux/hmm.h | 6 +----- > mm/hmm.c | 20 +++++--------------- > 5 files changed, 14 insertions(+), 26 deletions(-)I also don't really like how the API sets up some things in the struct and some things via arguments, so: Reviewed-by: Jason Gunthorpe <jgg at mellanox.com> Jason
Jason Gunthorpe
2019-Jul-30 17:53 UTC
[Nouveau] [PATCH 11/13] mm: cleanup the hmm_vma_handle_pmd stub
On Tue, Jul 30, 2019 at 08:52:01AM +0300, Christoph Hellwig wrote:> Stub out the whole function when CONFIG_TRANSPARENT_HUGEPAGE is not set > to make the function easier to read. > > Signed-off-by: Christoph Hellwig <hch at lst.de> > mm/hmm.c | 18 +++++++++--------- > 1 file changed, 9 insertions(+), 9 deletions(-) > > diff --git a/mm/hmm.c b/mm/hmm.c > index 4d3bd41b6522..f4e90ea5779f 100644 > +++ b/mm/hmm.c > @@ -455,13 +455,10 @@ static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd) > range->flags[HMM_PFN_VALID]; > } > > -static int hmm_vma_handle_pmd(struct mm_walk *walk, > - unsigned long addr, > - unsigned long end, > - uint64_t *pfns, > - pmd_t pmd) > -{ > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > +static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, > + unsigned long end, uint64_t *pfns, pmd_t pmd) > +{ > struct hmm_vma_walk *hmm_vma_walk = walk->private; > struct hmm_range *range = hmm_vma_walk->range; > struct dev_pagemap *pgmap = NULL; > @@ -490,11 +487,14 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, > put_dev_pagemap(pgmap); > hmm_vma_walk->last = end; > return 0; > -#else > - /* If THP is not enabled then we should never reach thisThis old comment says we should never get here> +} > +#else /* CONFIG_TRANSPARENT_HUGEPAGE */ > +static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, > + unsigned long end, uint64_t *pfns, pmd_t pmd) > +{ > return -EINVAL;So could we just do #define hmm_vma_handle_pmd NULL ? At the very least this seems like a WARN_ON too? Jason
Jason Gunthorpe
2019-Jul-30 18:02 UTC
[Nouveau] [PATCH 10/13] mm: only define hmm_vma_walk_pud if needed
On Tue, Jul 30, 2019 at 08:52:00AM +0300, Christoph Hellwig wrote:> We only need the special pud_entry walker if PUD-sized hugepages and > pte mappings are supported, else the common pagewalk code will take > care of the iteration. Not implementing this callback reduced the > amount of code compiled for non-x86 platforms, and also fixes compile > failures with other architectures when helpers like pud_pfn are not > implemented. > > Signed-off-by: Christoph Hellwig <hch at lst.de> > --- > mm/hmm.c | 29 ++++++++++++++++------------- > 1 file changed, 16 insertions(+), 13 deletions(-)Reviewed-by: Jason Gunthorpe <jgg at mellanox.com> Jason
Jason Gunthorpe
2019-Jul-30 18:02 UTC
[Nouveau] [PATCH 12/13] mm: cleanup the hmm_vma_walk_hugetlb_entry stub
On Tue, Jul 30, 2019 at 08:52:02AM +0300, Christoph Hellwig wrote:> Stub out the whole function and assign NULL to the .hugetlb_entry method > if CONFIG_HUGETLB_PAGE is not set, as the method won't ever be called in > that case. > > Signed-off-by: Christoph Hellwig <hch at lst.de> > --- > mm/hmm.c | 8 ++++---- > 1 file changed, 4 insertions(+), 4 deletions(-)Reviewed-by: Jason Gunthorpe <jgg at mellanox.com> Jason
Jason Gunthorpe
2019-Jul-30 18:03 UTC
[Nouveau] [PATCH 13/13] mm: allow HMM_MIRROR on all architectures with MMU
On Tue, Jul 30, 2019 at 08:52:03AM +0300, Christoph Hellwig wrote:> There isn't really any architecture specific code in this page table > walk implementation, so drop the dependencies. > > Signed-off-by: Christoph Hellwig <hch at lst.de> > mm/Kconfig | 3 +-- > 1 file changed, 1 insertion(+), 2 deletions(-)Happy to see it, lets try to get this patch + the required parts of this series into linux-next to be really sure Reviewed-by: Jason Gunthorpe <jgg at mellanox.com> Thanks, Jason
Ralph Campbell
2019-Jul-31 01:01 UTC
[Nouveau] [PATCH 08/13] mm: remove the mask variable in hmm_vma_walk_hugetlb_entry
On 7/29/19 10:51 PM, Christoph Hellwig wrote:> The pagewalk code already passes the value as the hmask parameter. > > Signed-off-by: Christoph Hellwig <hch at lst.de> > --- > mm/hmm.c | 7 ++----- > 1 file changed, 2 insertions(+), 5 deletions(-) > > diff --git a/mm/hmm.c b/mm/hmm.c > index f26d6abc4ed2..88b77a4a6a1e 100644 > --- a/mm/hmm.c > +++ b/mm/hmm.c > @@ -771,19 +771,16 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, > struct mm_walk *walk) > { > #ifdef CONFIG_HUGETLB_PAGE > - unsigned long addr = start, i, pfn, mask; > + unsigned long addr = start, i, pfn; > struct hmm_vma_walk *hmm_vma_walk = walk->private; > struct hmm_range *range = hmm_vma_walk->range; > struct vm_area_struct *vma = walk->vma; > - struct hstate *h = hstate_vma(vma); > uint64_t orig_pfn, cpu_flags; > bool fault, write_fault; > spinlock_t *ptl; > pte_t entry; > int ret = 0; > > - mask = huge_page_size(h) - 1; > - > ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte); > entry = huge_ptep_get(pte); > > @@ -799,7 +796,7 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, > goto unlock; > } > > - pfn = pte_pfn(entry) + ((start & mask) >> PAGE_SHIFT); > + pfn = pte_pfn(entry) + ((start & hmask) >> PAGE_SHIFT);This needs to be "~hmask" so that the upper bits of the start address are not added to the pfn. It's the middle bits of the address that offset into the huge page that are needed.> for (; addr < end; addr += PAGE_SIZE, i++, pfn++) > range->pfns[i] = hmm_device_entry_from_pfn(range, pfn) | > cpu_flags; >
Kuehling, Felix
2019-Jul-31 13:13 UTC
[Nouveau] [PATCH 01/13] amdgpu: remove -EAGAIN handling for hmm_range_fault
On 2019-07-30 1:51 a.m., Christoph Hellwig wrote:> hmm_range_fault can only return -EAGAIN if called with the block > argument set to false, so remove the special handling for it.The block argument no longer exists. You replaced that with the HMM_FAULT_ALLOW_RETRY with opposite logic. So this should read "hmm_range_fault can only return -EAGAIN if called with the HMM_FAULT_ALLOW_RETRY flag set, so remove the special handling for it. With that fixed, this commit is Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>> > Signed-off-by: Christoph Hellwig <hch at lst.de> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 23 +++-------------------- > 1 file changed, 3 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 12a59ac83f72..f0821638bbc6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -778,7 +778,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > struct hmm_range *range; > unsigned long i; > uint64_t *pfns; > - int retry = 0; > int r = 0; > > if (!mm) /* Happens during process shutdown */ > @@ -822,7 +821,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > hmm_range_register(range, mirror, start, > start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT); > > -retry: > /* > * Just wait for range to be valid, safe to ignore return value as we > * will use the return value of hmm_range_fault() below under the > @@ -831,24 +829,12 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT); > > down_read(&mm->mmap_sem); > - > r = hmm_range_fault(range, 0); > - if (unlikely(r < 0)) { > - if (likely(r == -EAGAIN)) { > - /* > - * return -EAGAIN, mmap_sem is dropped > - */ > - if (retry++ < MAX_RETRY_HMM_RANGE_FAULT) > - goto retry; > - else > - pr_err("Retry hmm fault too many times\n"); > - } > - > - goto out_up_read; > - } > - > up_read(&mm->mmap_sem); > > + if (unlikely(r < 0)) > + goto out_free_pfns; > + > for (i = 0; i < ttm->num_pages; i++) { > pages[i] = hmm_device_entry_to_page(range, pfns[i]); > if (unlikely(!pages[i])) { > @@ -864,9 +850,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > > return 0; > > -out_up_read: > - if (likely(r != -EAGAIN)) > - up_read(&mm->mmap_sem); > out_free_pfns: > hmm_range_unregister(range); > kvfree(pfns);
Kuehling, Felix
2019-Jul-31 13:25 UTC
[Nouveau] [PATCH 02/13] amdgpu: don't initialize range->list in amdgpu_hmm_init_range
On 2019-07-30 1:51 a.m., Christoph Hellwig wrote:> The list is used to add the range to another list as an entry in the > core hmm code, so there is no need to initialize it in a driver.I've seen code that uses list_empty to check whether a list head has been added to a list or not. For that to work, the list head needs to be initialized, and it has to be removed with list_del_init. If HMM doesn't ever do that with range->list, then this patch is Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>> > Signed-off-by: Christoph Hellwig <hch at lst.de> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 1 - > 1 file changed, 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > index b698b423b25d..60b9fc9561d7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > @@ -484,6 +484,5 @@ void amdgpu_hmm_init_range(struct hmm_range *range) > range->flags = hmm_range_flags; > range->values = hmm_range_values; > range->pfn_shift = PAGE_SHIFT; > - INIT_LIST_HEAD(&range->list); > } > }
Kuehling, Felix
2019-Jul-31 13:31 UTC
[Nouveau] [PATCH 06/13] mm: remove superflous arguments from hmm_range_register
On 2019-07-30 1:51 a.m., Christoph Hellwig wrote:> The start, end and page_shift values are all saved in the range > structure, so we might as well use that for argument passing. > > Signed-off-by: Christoph Hellwig <hch at lst.de>Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>> --- > Documentation/vm/hmm.rst | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 +++++-- > drivers/gpu/drm/nouveau/nouveau_svm.c | 5 ++--- > include/linux/hmm.h | 6 +----- > mm/hmm.c | 20 +++++--------------- > 5 files changed, 14 insertions(+), 26 deletions(-) > > diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst > index ddcb5ca8b296..e63c11f7e0e0 100644 > --- a/Documentation/vm/hmm.rst > +++ b/Documentation/vm/hmm.rst > @@ -222,7 +222,7 @@ The usage pattern is:: > range.flags = ...; > range.values = ...; > range.pfn_shift = ...; > - hmm_range_register(&range); > + hmm_range_register(&range, mirror); > > /* > * Just wait for range to be valid, safe to ignore return value as we > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index f0821638bbc6..71d6e7087b0b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -818,8 +818,11 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > 0 : range->flags[HMM_PFN_WRITE]; > range->pfn_flags_mask = 0; > range->pfns = pfns; > - hmm_range_register(range, mirror, start, > - start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT); > + range->page_shift = PAGE_SHIFT; > + range->start = start; > + range->end = start + ttm->num_pages * PAGE_SIZE; > + > + hmm_range_register(range, mirror); > > /* > * Just wait for range to be valid, safe to ignore return value as we > diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c > index b889d5ec4c7e..40e706234554 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_svm.c > +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c > @@ -492,9 +492,7 @@ nouveau_range_fault(struct nouveau_svmm *svmm, struct hmm_range *range) > range->default_flags = 0; > range->pfn_flags_mask = -1UL; > > - ret = hmm_range_register(range, &svmm->mirror, > - range->start, range->end, > - PAGE_SHIFT); > + ret = hmm_range_register(range, &svmm->mirror); > if (ret) { > up_read(&range->hmm->mm->mmap_sem); > return (int)ret; > @@ -682,6 +680,7 @@ nouveau_svm_fault(struct nvif_notify *notify) > args.i.p.addr + args.i.p.size, fn - fi); > > /* Have HMM fault pages within the fault window to the GPU. */ > + range.page_shift = PAGE_SHIFT; > range.start = args.i.p.addr; > range.end = args.i.p.addr + args.i.p.size; > range.pfns = args.phys; > diff --git a/include/linux/hmm.h b/include/linux/hmm.h > index 59be0aa2476d..c5b51376b453 100644 > --- a/include/linux/hmm.h > +++ b/include/linux/hmm.h > @@ -400,11 +400,7 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror); > /* > * Please see Documentation/vm/hmm.rst for how to use the range API. > */ > -int hmm_range_register(struct hmm_range *range, > - struct hmm_mirror *mirror, > - unsigned long start, > - unsigned long end, > - unsigned page_shift); > +int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror); > void hmm_range_unregister(struct hmm_range *range); > > /* > diff --git a/mm/hmm.c b/mm/hmm.c > index 3a3852660757..926735a3aef9 100644 > --- a/mm/hmm.c > +++ b/mm/hmm.c > @@ -843,35 +843,25 @@ static void hmm_pfns_clear(struct hmm_range *range, > * hmm_range_register() - start tracking change to CPU page table over a range > * @range: range > * @mm: the mm struct for the range of virtual address > - * @start: start virtual address (inclusive) > - * @end: end virtual address (exclusive) > - * @page_shift: expect page shift for the range > + * > * Return: 0 on success, -EFAULT if the address space is no longer valid > * > * Track updates to the CPU page table see include/linux/hmm.h > */ > -int hmm_range_register(struct hmm_range *range, > - struct hmm_mirror *mirror, > - unsigned long start, > - unsigned long end, > - unsigned page_shift) > +int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror) > { > - unsigned long mask = ((1UL << page_shift) - 1UL); > + unsigned long mask = ((1UL << range->page_shift) - 1UL); > struct hmm *hmm = mirror->hmm; > unsigned long flags; > > range->valid = false; > range->hmm = NULL; > > - if ((start & mask) || (end & mask)) > + if ((range->start & mask) || (range->end & mask)) > return -EINVAL; > - if (start >= end) > + if (range->start >= range->end) > return -EINVAL; > > - range->page_shift = page_shift; > - range->start = start; > - range->end = end; > - > /* Prevent hmm_release() from running while the range is valid */ > if (!mmget_not_zero(hmm->mm)) > return -EFAULT;
Kuehling, Felix
2019-Jul-31 13:38 UTC
[Nouveau] [PATCH 07/13] mm: remove the page_shift member from struct hmm_range
On 2019-07-30 1:51 a.m., Christoph Hellwig wrote:> All users pass PAGE_SIZE here, and if we wanted to support single > entries for huge pages we should really just add a HMM_FAULT_HUGEPAGE > flag instead that uses the huge page size instead of having the > caller calculate that size once, just for the hmm code to verify it.Maybe this was meant to support device page size != native page size? Anyway, looks like we didn't use it that way. Acked-by: Felix Kuehling <Felix.Kuehling at amd.com>> > Signed-off-by: Christoph Hellwig <hch at lst.de> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 - > drivers/gpu/drm/nouveau/nouveau_svm.c | 1 - > include/linux/hmm.h | 22 ------------- > mm/hmm.c | 42 ++++++------------------- > 4 files changed, 9 insertions(+), 57 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 71d6e7087b0b..8bf79288c4e2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -818,7 +818,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > 0 : range->flags[HMM_PFN_WRITE]; > range->pfn_flags_mask = 0; > range->pfns = pfns; > - range->page_shift = PAGE_SHIFT; > range->start = start; > range->end = start + ttm->num_pages * PAGE_SIZE; > > diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c > index 40e706234554..e7068ce46949 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_svm.c > +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c > @@ -680,7 +680,6 @@ nouveau_svm_fault(struct nvif_notify *notify) > args.i.p.addr + args.i.p.size, fn - fi); > > /* Have HMM fault pages within the fault window to the GPU. */ > - range.page_shift = PAGE_SHIFT; > range.start = args.i.p.addr; > range.end = args.i.p.addr + args.i.p.size; > range.pfns = args.phys; > diff --git a/include/linux/hmm.h b/include/linux/hmm.h > index c5b51376b453..51e18fbb8953 100644 > --- a/include/linux/hmm.h > +++ b/include/linux/hmm.h > @@ -158,7 +158,6 @@ enum hmm_pfn_value_e { > * @values: pfn value for some special case (none, special, error, ...) > * @default_flags: default flags for the range (write, read, ... see hmm doc) > * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter > - * @page_shift: device virtual address shift value (should be >= PAGE_SHIFT) > * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT) > * @valid: pfns array did not change since it has been fill by an HMM function > */ > @@ -172,31 +171,10 @@ struct hmm_range { > const uint64_t *values; > uint64_t default_flags; > uint64_t pfn_flags_mask; > - uint8_t page_shift; > uint8_t pfn_shift; > bool valid; > }; > > -/* > - * hmm_range_page_shift() - return the page shift for the range > - * @range: range being queried > - * Return: page shift (page size = 1 << page shift) for the range > - */ > -static inline unsigned hmm_range_page_shift(const struct hmm_range *range) > -{ > - return range->page_shift; > -} > - > -/* > - * hmm_range_page_size() - return the page size for the range > - * @range: range being queried > - * Return: page size for the range in bytes > - */ > -static inline unsigned long hmm_range_page_size(const struct hmm_range *range) > -{ > - return 1UL << hmm_range_page_shift(range); > -} > - > /* > * hmm_range_wait_until_valid() - wait for range to be valid > * @range: range affected by invalidation to wait on > diff --git a/mm/hmm.c b/mm/hmm.c > index 926735a3aef9..f26d6abc4ed2 100644 > --- a/mm/hmm.c > +++ b/mm/hmm.c > @@ -344,13 +344,12 @@ static int hmm_vma_walk_hole_(unsigned long addr, unsigned long end, > struct hmm_vma_walk *hmm_vma_walk = walk->private; > struct hmm_range *range = hmm_vma_walk->range; > uint64_t *pfns = range->pfns; > - unsigned long i, page_size; > + unsigned long i; > > hmm_vma_walk->last = addr; > - page_size = hmm_range_page_size(range); > - i = (addr - range->start) >> range->page_shift; > + i = (addr - range->start) >> PAGE_SHIFT; > > - for (; addr < end; addr += page_size, i++) { > + for (; addr < end; addr += PAGE_SIZE, i++) { > pfns[i] = range->values[HMM_PFN_NONE]; > if (fault || write_fault) { > int ret; > @@ -772,7 +771,7 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, > struct mm_walk *walk) > { > #ifdef CONFIG_HUGETLB_PAGE > - unsigned long addr = start, i, pfn, mask, size, pfn_inc; > + unsigned long addr = start, i, pfn, mask; > struct hmm_vma_walk *hmm_vma_walk = walk->private; > struct hmm_range *range = hmm_vma_walk->range; > struct vm_area_struct *vma = walk->vma; > @@ -783,24 +782,12 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, > pte_t entry; > int ret = 0; > > - size = huge_page_size(h); > - mask = size - 1; > - if (range->page_shift != PAGE_SHIFT) { > - /* Make sure we are looking at a full page. */ > - if (start & mask) > - return -EINVAL; > - if (end < (start + size)) > - return -EINVAL; > - pfn_inc = size >> PAGE_SHIFT; > - } else { > - pfn_inc = 1; > - size = PAGE_SIZE; > - } > + mask = huge_page_size(h) - 1; > > ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte); > entry = huge_ptep_get(pte); > > - i = (start - range->start) >> range->page_shift; > + i = (start - range->start) >> PAGE_SHIFT; > orig_pfn = range->pfns[i]; > range->pfns[i] = range->values[HMM_PFN_NONE]; > cpu_flags = pte_to_hmm_pfn_flags(range, entry); > @@ -812,8 +799,8 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, > goto unlock; > } > > - pfn = pte_pfn(entry) + ((start & mask) >> range->page_shift); > - for (; addr < end; addr += size, i++, pfn += pfn_inc) > + pfn = pte_pfn(entry) + ((start & mask) >> PAGE_SHIFT); > + for (; addr < end; addr += PAGE_SIZE, i++, pfn++) > range->pfns[i] = hmm_device_entry_from_pfn(range, pfn) | > cpu_flags; > hmm_vma_walk->last = end; > @@ -850,14 +837,13 @@ static void hmm_pfns_clear(struct hmm_range *range, > */ > int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror) > { > - unsigned long mask = ((1UL << range->page_shift) - 1UL); > struct hmm *hmm = mirror->hmm; > unsigned long flags; > > range->valid = false; > range->hmm = NULL; > > - if ((range->start & mask) || (range->end & mask)) > + if ((range->start & (PAGE_SIZE - 1)) || (range->end & (PAGE_SIZE - 1))) > return -EINVAL; > if (range->start >= range->end) > return -EINVAL; > @@ -964,16 +950,6 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags) > if (vma == NULL || (vma->vm_flags & device_vma)) > return -EFAULT; > > - if (is_vm_hugetlb_page(vma)) { > - if (huge_page_shift(hstate_vma(vma)) !> - range->page_shift && > - range->page_shift != PAGE_SHIFT) > - return -EINVAL; > - } else { > - if (range->page_shift != PAGE_SHIFT) > - return -EINVAL; > - } > - > if (!(vma->vm_flags & VM_READ)) { > /* > * If vma do not allow read access, then assume that it