Alexandre Courbot
2015-Nov-11 08:07 UTC
[Nouveau] [PATCH] instmem/gk20a: use DMA API CPU mapping
Commit 69c4938249fb ("drm/nouveau/instmem/gk20a: use direct CPU access") tried to be smart while using the DMA-API by managing the CPU mappings of buffers allocated with the DMA-API by itself. In doing so, it relied on dma_to_phys() which is an architecture-private function not available everywhere. This broke the build on several architectures. Since there is no reliable and portable way to obtain the physical address of a DMA-API buffer, stop trying to be smart and just use the CPU mapping that the DMA-API can provide. This means that buffers will be CPU-mapped for all their life as opposed to when we need them, but anyway using the DMA-API here is a fallback for when no IOMMU is available so we should not expect optimal behavior. This makes the IOMMU and DMA-API implementations of instmem diverge enough that we should maybe put them into separate files... Signed-off-by: Alexandre Courbot <acourbot at nvidia.com> --- Ben/Dave, since Dave's fix has reached mainline and builds are not broken anymore, we can proceed one of two ways: 1) Ben merges this for 4.4 and let it flow for -rc2 2) I send another fix against the kernel tree Which one shall I do? drm/nouveau/nvkm/subdev/instmem/gk20a.c | 148 +++++++++++++------------------- lib/include/nvif/os.h | 6 -- 2 files changed, 62 insertions(+), 92 deletions(-) diff --git a/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 681b2541229a..4c20fec64d96 100644 --- a/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -56,9 +56,6 @@ struct gk20a_instobj { /* CPU mapping */ u32 *vaddr; - struct list_head vaddr_node; - /* How many clients are using vaddr? */ - u32 use_cpt; }; #define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory) @@ -68,7 +65,6 @@ struct gk20a_instobj { struct gk20a_instobj_dma { struct gk20a_instobj base; - u32 *cpuaddr; dma_addr_t handle; struct nvkm_mm_node r; }; @@ -81,6 +77,11 @@ struct gk20a_instobj_dma { struct gk20a_instobj_iommu { struct gk20a_instobj base; + /* to link into gk20a_instmem::vaddr_lru */ + struct list_head vaddr_node; + /* how many clients are using vaddr? */ + u32 use_cpt; + /* will point to the higher half of pages */ dma_addr_t *dma_addrs; /* array of base.mem->size pages (+ dma_addr_ts) */ @@ -109,8 +110,6 @@ struct gk20a_instmem { /* Only used by DMA API */ struct dma_attrs attrs; - - void __iomem * (*cpu_map)(struct nvkm_memory *); }; #define gk20a_instmem(p) container_of((p), struct gk20a_instmem, base) @@ -132,46 +131,19 @@ gk20a_instobj_size(struct nvkm_memory *memory) return (u64)gk20a_instobj(memory)->mem.size << 12; } -static void __iomem * -gk20a_instobj_cpu_map_dma(struct nvkm_memory *memory) -{ - struct gk20a_instobj_dma *node = gk20a_instobj_dma(memory); - struct device *dev = node->base.imem->base.subdev.device->dev; - int npages = nvkm_memory_size(memory) >> 12; - struct page *pages[npages]; - int i; - - /* phys_to_page does not exist on all platforms... */ - pages[0] = pfn_to_page(dma_to_phys(dev, node->handle) >> PAGE_SHIFT); - for (i = 1; i < npages; i++) - pages[i] = pages[0] + i; - - return vmap(pages, npages, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); -} - -static void __iomem * -gk20a_instobj_cpu_map_iommu(struct nvkm_memory *memory) -{ - struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); - int npages = nvkm_memory_size(memory) >> 12; - - return vmap(node->pages, npages, VM_MAP, - pgprot_writecombine(PAGE_KERNEL)); -} - /* * Recycle the vaddr of obj. Must be called with gk20a_instmem::lock held. */ static void -gk20a_instobj_recycle_vaddr(struct gk20a_instobj *obj) +gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj) { - struct gk20a_instmem *imem = obj->imem; + struct gk20a_instmem *imem = obj->base.imem; /* there should not be any user left... */ WARN_ON(obj->use_cpt); list_del(&obj->vaddr_node); - vunmap(obj->vaddr); - obj->vaddr = NULL; - imem->vaddr_use -= nvkm_memory_size(&obj->memory); + vunmap(obj->base.vaddr); + obj->base.vaddr = NULL; + imem->vaddr_use -= nvkm_memory_size(&obj->base.memory); nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use, imem->vaddr_max); } @@ -187,17 +159,30 @@ gk20a_instmem_vaddr_gc(struct gk20a_instmem *imem, const u64 size) if (list_empty(&imem->vaddr_lru)) break; - gk20a_instobj_recycle_vaddr(list_first_entry(&imem->vaddr_lru, - struct gk20a_instobj, vaddr_node)); + gk20a_instobj_iommu_recycle_vaddr( + list_first_entry(&imem->vaddr_lru, + struct gk20a_instobj_iommu, vaddr_node)); } } static void __iomem * -gk20a_instobj_acquire(struct nvkm_memory *memory) +gk20a_instobj_acquire_dma(struct nvkm_memory *memory) { struct gk20a_instobj *node = gk20a_instobj(memory); struct gk20a_instmem *imem = node->imem; struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; + + nvkm_ltc_flush(ltc); + + return node->vaddr; +} + +static void __iomem * +gk20a_instobj_acquire_iommu(struct nvkm_memory *memory) +{ + struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); + struct gk20a_instmem *imem = node->base.imem; + struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; const u64 size = nvkm_memory_size(memory); unsigned long flags; @@ -205,7 +190,7 @@ gk20a_instobj_acquire(struct nvkm_memory *memory) spin_lock_irqsave(&imem->lock, flags); - if (node->vaddr) { + if (node->base.vaddr) { if (!node->use_cpt) { /* remove from LRU list since mapping in use again */ list_del(&node->vaddr_node); @@ -216,9 +201,10 @@ gk20a_instobj_acquire(struct nvkm_memory *memory) /* try to free some address space if we reached the limit */ gk20a_instmem_vaddr_gc(imem, size); - node->vaddr = imem->cpu_map(memory); - - if (!node->vaddr) { + /* map the pages */ + node->base.vaddr = vmap(node->pages, size >> PAGE_SHIFT, VM_MAP, + pgprot_writecombine(PAGE_KERNEL)); + if (!node->base.vaddr) { nvkm_error(&imem->base.subdev, "cannot map instobj - " "this is not going to end well...\n"); goto out; @@ -232,15 +218,25 @@ out: node->use_cpt++; spin_unlock_irqrestore(&imem->lock, flags); - return node->vaddr; + return node->base.vaddr; } static void -gk20a_instobj_release(struct nvkm_memory *memory) +gk20a_instobj_release_dma(struct nvkm_memory *memory) { struct gk20a_instobj *node = gk20a_instobj(memory); struct gk20a_instmem *imem = node->imem; struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; + + nvkm_ltc_invalidate(ltc); +} + +static void +gk20a_instobj_release_iommu(struct nvkm_memory *memory) +{ + struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); + struct gk20a_instmem *imem = node->base.imem; + struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; unsigned long flags; spin_lock_irqsave(&imem->lock, flags); @@ -284,27 +280,6 @@ gk20a_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset) nvkm_vm_map_at(vma, offset, &node->mem); } -/* - * Clear the CPU mapping of an instobj if it exists - */ -static void -gk20a_instobj_dtor(struct gk20a_instobj *node) -{ - struct gk20a_instmem *imem = node->imem; - unsigned long flags; - - spin_lock_irqsave(&imem->lock, flags); - - /* vaddr has already been recycled */ - if (!node->vaddr) - goto out; - - gk20a_instobj_recycle_vaddr(node); - -out: - spin_unlock_irqrestore(&imem->lock, flags); -} - static void * gk20a_instobj_dtor_dma(struct nvkm_memory *memory) { @@ -312,12 +287,10 @@ gk20a_instobj_dtor_dma(struct nvkm_memory *memory) struct gk20a_instmem *imem = node->base.imem; struct device *dev = imem->base.subdev.device->dev; - gk20a_instobj_dtor(&node->base); - - if (unlikely(!node->cpuaddr)) + if (unlikely(!node->base.vaddr)) goto out; - dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->cpuaddr, + dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->base.vaddr, node->handle, &imem->attrs); out: @@ -331,13 +304,20 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory) struct gk20a_instmem *imem = node->base.imem; struct device *dev = imem->base.subdev.device->dev; struct nvkm_mm_node *r; + unsigned long flags; int i; - gk20a_instobj_dtor(&node->base); - if (unlikely(list_empty(&node->base.mem.regions))) goto out; + spin_lock_irqsave(&imem->lock, flags); + + /* vaddr has already been recycled */ + if (node->base.vaddr) + gk20a_instobj_iommu_recycle_vaddr(node); + + spin_unlock_irqrestore(&imem->lock, flags); + r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node, rl_entry); @@ -368,8 +348,8 @@ gk20a_instobj_func_dma = { .target = gk20a_instobj_target, .addr = gk20a_instobj_addr, .size = gk20a_instobj_size, - .acquire = gk20a_instobj_acquire, - .release = gk20a_instobj_release, + .acquire = gk20a_instobj_acquire_dma, + .release = gk20a_instobj_release_dma, .rd32 = gk20a_instobj_rd32, .wr32 = gk20a_instobj_wr32, .map = gk20a_instobj_map, @@ -381,8 +361,8 @@ gk20a_instobj_func_iommu = { .target = gk20a_instobj_target, .addr = gk20a_instobj_addr, .size = gk20a_instobj_size, - .acquire = gk20a_instobj_acquire, - .release = gk20a_instobj_release, + .acquire = gk20a_instobj_acquire_iommu, + .release = gk20a_instobj_release_iommu, .rd32 = gk20a_instobj_rd32, .wr32 = gk20a_instobj_wr32, .map = gk20a_instobj_map, @@ -402,10 +382,10 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory); - node->cpuaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, - &node->handle, GFP_KERNEL, - &imem->attrs); - if (!node->cpuaddr) { + node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, + &node->handle, GFP_KERNEL, + &imem->attrs); + if (!node->base.vaddr) { nvkm_error(subdev, "cannot allocate DMA memory\n"); return -ENOMEM; } @@ -611,18 +591,14 @@ gk20a_instmem_new(struct nvkm_device *device, int index, imem->mm = &tdev->iommu.mm; imem->domain = tdev->iommu.domain; imem->iommu_pgshift = tdev->iommu.pgshift; - imem->cpu_map = gk20a_instobj_cpu_map_iommu; imem->iommu_bit = tdev->func->iommu_bit; nvkm_info(&imem->base.subdev, "using IOMMU\n"); } else { init_dma_attrs(&imem->attrs); - /* We will access the memory through our own mapping */ dma_set_attr(DMA_ATTR_NON_CONSISTENT, &imem->attrs); dma_set_attr(DMA_ATTR_WEAK_ORDERING, &imem->attrs); dma_set_attr(DMA_ATTR_WRITE_COMBINE, &imem->attrs); - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &imem->attrs); - imem->cpu_map = gk20a_instobj_cpu_map_dma; nvkm_info(&imem->base.subdev, "using DMA API\n"); } diff --git a/lib/include/nvif/os.h b/lib/include/nvif/os.h index 2df30489179a..e8c06cb254dc 100644 --- a/lib/include/nvif/os.h +++ b/lib/include/nvif/os.h @@ -943,12 +943,6 @@ dma_unmap_page(struct device *pdev, dma_addr_t addr, int size, unsigned flags) { } -static inline phys_addr_t -dma_to_phys(struct device *dev, dma_addr_t addr) -{ - return 0; -} - /****************************************************************************** * PCI *****************************************************************************/ -- 2.6.2
On 11/11/2015 06:07 PM, Alexandre Courbot wrote:> Commit 69c4938249fb ("drm/nouveau/instmem/gk20a: use direct CPU access") > tried to be smart while using the DMA-API by managing the CPU mappings of > buffers allocated with the DMA-API by itself. In doing so, it relied > on dma_to_phys() which is an architecture-private function not > available everywhere. This broke the build on several architectures. > > Since there is no reliable and portable way to obtain the physical > address of a DMA-API buffer, stop trying to be smart and just use the > CPU mapping that the DMA-API can provide. This means that buffers will > be CPU-mapped for all their life as opposed to when we need them, but > anyway using the DMA-API here is a fallback for when no IOMMU is > available so we should not expect optimal behavior. > > This makes the IOMMU and DMA-API implementations of instmem diverge > enough that we should maybe put them into separate files... > > Signed-off-by: Alexandre Courbot <acourbot at nvidia.com> > --- > Ben/Dave, since Dave's fix has reached mainline and builds are not > broken anymore, we can proceed one of two ways: > > 1) Ben merges this for 4.4 and let it flow for -rc2 > 2) I send another fix against the kernel treeI just spoke to Dave, and I'll take this in my tree for 4.5 if everything works fine with the temporary hack fix. Does that sound OK to you? Ben.> > Which one shall I do? > > drm/nouveau/nvkm/subdev/instmem/gk20a.c | 148 +++++++++++++------------------- > lib/include/nvif/os.h | 6 -- > 2 files changed, 62 insertions(+), 92 deletions(-) > > diff --git a/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drm/nouveau/nvkm/subdev/instmem/gk20a.c > index 681b2541229a..4c20fec64d96 100644 > --- a/drm/nouveau/nvkm/subdev/instmem/gk20a.c > +++ b/drm/nouveau/nvkm/subdev/instmem/gk20a.c > @@ -56,9 +56,6 @@ struct gk20a_instobj { > > /* CPU mapping */ > u32 *vaddr; > - struct list_head vaddr_node; > - /* How many clients are using vaddr? */ > - u32 use_cpt; > }; > #define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory) > > @@ -68,7 +65,6 @@ struct gk20a_instobj { > struct gk20a_instobj_dma { > struct gk20a_instobj base; > > - u32 *cpuaddr; > dma_addr_t handle; > struct nvkm_mm_node r; > }; > @@ -81,6 +77,11 @@ struct gk20a_instobj_dma { > struct gk20a_instobj_iommu { > struct gk20a_instobj base; > > + /* to link into gk20a_instmem::vaddr_lru */ > + struct list_head vaddr_node; > + /* how many clients are using vaddr? */ > + u32 use_cpt; > + > /* will point to the higher half of pages */ > dma_addr_t *dma_addrs; > /* array of base.mem->size pages (+ dma_addr_ts) */ > @@ -109,8 +110,6 @@ struct gk20a_instmem { > > /* Only used by DMA API */ > struct dma_attrs attrs; > - > - void __iomem * (*cpu_map)(struct nvkm_memory *); > }; > #define gk20a_instmem(p) container_of((p), struct gk20a_instmem, base) > > @@ -132,46 +131,19 @@ gk20a_instobj_size(struct nvkm_memory *memory) > return (u64)gk20a_instobj(memory)->mem.size << 12; > } > > -static void __iomem * > -gk20a_instobj_cpu_map_dma(struct nvkm_memory *memory) > -{ > - struct gk20a_instobj_dma *node = gk20a_instobj_dma(memory); > - struct device *dev = node->base.imem->base.subdev.device->dev; > - int npages = nvkm_memory_size(memory) >> 12; > - struct page *pages[npages]; > - int i; > - > - /* phys_to_page does not exist on all platforms... */ > - pages[0] = pfn_to_page(dma_to_phys(dev, node->handle) >> PAGE_SHIFT); > - for (i = 1; i < npages; i++) > - pages[i] = pages[0] + i; > - > - return vmap(pages, npages, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); > -} > - > -static void __iomem * > -gk20a_instobj_cpu_map_iommu(struct nvkm_memory *memory) > -{ > - struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); > - int npages = nvkm_memory_size(memory) >> 12; > - > - return vmap(node->pages, npages, VM_MAP, > - pgprot_writecombine(PAGE_KERNEL)); > -} > - > /* > * Recycle the vaddr of obj. Must be called with gk20a_instmem::lock held. > */ > static void > -gk20a_instobj_recycle_vaddr(struct gk20a_instobj *obj) > +gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj) > { > - struct gk20a_instmem *imem = obj->imem; > + struct gk20a_instmem *imem = obj->base.imem; > /* there should not be any user left... */ > WARN_ON(obj->use_cpt); > list_del(&obj->vaddr_node); > - vunmap(obj->vaddr); > - obj->vaddr = NULL; > - imem->vaddr_use -= nvkm_memory_size(&obj->memory); > + vunmap(obj->base.vaddr); > + obj->base.vaddr = NULL; > + imem->vaddr_use -= nvkm_memory_size(&obj->base.memory); > nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use, > imem->vaddr_max); > } > @@ -187,17 +159,30 @@ gk20a_instmem_vaddr_gc(struct gk20a_instmem *imem, const u64 size) > if (list_empty(&imem->vaddr_lru)) > break; > > - gk20a_instobj_recycle_vaddr(list_first_entry(&imem->vaddr_lru, > - struct gk20a_instobj, vaddr_node)); > + gk20a_instobj_iommu_recycle_vaddr( > + list_first_entry(&imem->vaddr_lru, > + struct gk20a_instobj_iommu, vaddr_node)); > } > } > > static void __iomem * > -gk20a_instobj_acquire(struct nvkm_memory *memory) > +gk20a_instobj_acquire_dma(struct nvkm_memory *memory) > { > struct gk20a_instobj *node = gk20a_instobj(memory); > struct gk20a_instmem *imem = node->imem; > struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; > + > + nvkm_ltc_flush(ltc); > + > + return node->vaddr; > +} > + > +static void __iomem * > +gk20a_instobj_acquire_iommu(struct nvkm_memory *memory) > +{ > + struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); > + struct gk20a_instmem *imem = node->base.imem; > + struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; > const u64 size = nvkm_memory_size(memory); > unsigned long flags; > > @@ -205,7 +190,7 @@ gk20a_instobj_acquire(struct nvkm_memory *memory) > > spin_lock_irqsave(&imem->lock, flags); > > - if (node->vaddr) { > + if (node->base.vaddr) { > if (!node->use_cpt) { > /* remove from LRU list since mapping in use again */ > list_del(&node->vaddr_node); > @@ -216,9 +201,10 @@ gk20a_instobj_acquire(struct nvkm_memory *memory) > /* try to free some address space if we reached the limit */ > gk20a_instmem_vaddr_gc(imem, size); > > - node->vaddr = imem->cpu_map(memory); > - > - if (!node->vaddr) { > + /* map the pages */ > + node->base.vaddr = vmap(node->pages, size >> PAGE_SHIFT, VM_MAP, > + pgprot_writecombine(PAGE_KERNEL)); > + if (!node->base.vaddr) { > nvkm_error(&imem->base.subdev, "cannot map instobj - " > "this is not going to end well...\n"); > goto out; > @@ -232,15 +218,25 @@ out: > node->use_cpt++; > spin_unlock_irqrestore(&imem->lock, flags); > > - return node->vaddr; > + return node->base.vaddr; > } > > static void > -gk20a_instobj_release(struct nvkm_memory *memory) > +gk20a_instobj_release_dma(struct nvkm_memory *memory) > { > struct gk20a_instobj *node = gk20a_instobj(memory); > struct gk20a_instmem *imem = node->imem; > struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; > + > + nvkm_ltc_invalidate(ltc); > +} > + > +static void > +gk20a_instobj_release_iommu(struct nvkm_memory *memory) > +{ > + struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory); > + struct gk20a_instmem *imem = node->base.imem; > + struct nvkm_ltc *ltc = imem->base.subdev.device->ltc; > unsigned long flags; > > spin_lock_irqsave(&imem->lock, flags); > @@ -284,27 +280,6 @@ gk20a_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset) > nvkm_vm_map_at(vma, offset, &node->mem); > } > > -/* > - * Clear the CPU mapping of an instobj if it exists > - */ > -static void > -gk20a_instobj_dtor(struct gk20a_instobj *node) > -{ > - struct gk20a_instmem *imem = node->imem; > - unsigned long flags; > - > - spin_lock_irqsave(&imem->lock, flags); > - > - /* vaddr has already been recycled */ > - if (!node->vaddr) > - goto out; > - > - gk20a_instobj_recycle_vaddr(node); > - > -out: > - spin_unlock_irqrestore(&imem->lock, flags); > -} > - > static void * > gk20a_instobj_dtor_dma(struct nvkm_memory *memory) > { > @@ -312,12 +287,10 @@ gk20a_instobj_dtor_dma(struct nvkm_memory *memory) > struct gk20a_instmem *imem = node->base.imem; > struct device *dev = imem->base.subdev.device->dev; > > - gk20a_instobj_dtor(&node->base); > - > - if (unlikely(!node->cpuaddr)) > + if (unlikely(!node->base.vaddr)) > goto out; > > - dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->cpuaddr, > + dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->base.vaddr, > node->handle, &imem->attrs); > > out: > @@ -331,13 +304,20 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory) > struct gk20a_instmem *imem = node->base.imem; > struct device *dev = imem->base.subdev.device->dev; > struct nvkm_mm_node *r; > + unsigned long flags; > int i; > > - gk20a_instobj_dtor(&node->base); > - > if (unlikely(list_empty(&node->base.mem.regions))) > goto out; > > + spin_lock_irqsave(&imem->lock, flags); > + > + /* vaddr has already been recycled */ > + if (node->base.vaddr) > + gk20a_instobj_iommu_recycle_vaddr(node); > + > + spin_unlock_irqrestore(&imem->lock, flags); > + > r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node, > rl_entry); > > @@ -368,8 +348,8 @@ gk20a_instobj_func_dma = { > .target = gk20a_instobj_target, > .addr = gk20a_instobj_addr, > .size = gk20a_instobj_size, > - .acquire = gk20a_instobj_acquire, > - .release = gk20a_instobj_release, > + .acquire = gk20a_instobj_acquire_dma, > + .release = gk20a_instobj_release_dma, > .rd32 = gk20a_instobj_rd32, > .wr32 = gk20a_instobj_wr32, > .map = gk20a_instobj_map, > @@ -381,8 +361,8 @@ gk20a_instobj_func_iommu = { > .target = gk20a_instobj_target, > .addr = gk20a_instobj_addr, > .size = gk20a_instobj_size, > - .acquire = gk20a_instobj_acquire, > - .release = gk20a_instobj_release, > + .acquire = gk20a_instobj_acquire_iommu, > + .release = gk20a_instobj_release_iommu, > .rd32 = gk20a_instobj_rd32, > .wr32 = gk20a_instobj_wr32, > .map = gk20a_instobj_map, > @@ -402,10 +382,10 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, > > nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory); > > - node->cpuaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, > - &node->handle, GFP_KERNEL, > - &imem->attrs); > - if (!node->cpuaddr) { > + node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, > + &node->handle, GFP_KERNEL, > + &imem->attrs); > + if (!node->base.vaddr) { > nvkm_error(subdev, "cannot allocate DMA memory\n"); > return -ENOMEM; > } > @@ -611,18 +591,14 @@ gk20a_instmem_new(struct nvkm_device *device, int index, > imem->mm = &tdev->iommu.mm; > imem->domain = tdev->iommu.domain; > imem->iommu_pgshift = tdev->iommu.pgshift; > - imem->cpu_map = gk20a_instobj_cpu_map_iommu; > imem->iommu_bit = tdev->func->iommu_bit; > > nvkm_info(&imem->base.subdev, "using IOMMU\n"); > } else { > init_dma_attrs(&imem->attrs); > - /* We will access the memory through our own mapping */ > dma_set_attr(DMA_ATTR_NON_CONSISTENT, &imem->attrs); > dma_set_attr(DMA_ATTR_WEAK_ORDERING, &imem->attrs); > dma_set_attr(DMA_ATTR_WRITE_COMBINE, &imem->attrs); > - dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &imem->attrs); > - imem->cpu_map = gk20a_instobj_cpu_map_dma; > > nvkm_info(&imem->base.subdev, "using DMA API\n"); > } > diff --git a/lib/include/nvif/os.h b/lib/include/nvif/os.h > index 2df30489179a..e8c06cb254dc 100644 > --- a/lib/include/nvif/os.h > +++ b/lib/include/nvif/os.h > @@ -943,12 +943,6 @@ dma_unmap_page(struct device *pdev, dma_addr_t addr, int size, unsigned flags) > { > } > > -static inline phys_addr_t > -dma_to_phys(struct device *dev, dma_addr_t addr) > -{ > - return 0; > -} > - > /****************************************************************************** > * PCI > *****************************************************************************/ >-------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 819 bytes Desc: OpenPGP digital signature URL: <http://lists.freedesktop.org/archives/nouveau/attachments/20151112/59edd165/attachment.sig>
Alexandre Courbot
2015-Nov-12 02:07 UTC
[Nouveau] [PATCH] instmem/gk20a: use DMA API CPU mapping
On 11/12/2015 07:49 AM, Ben Skeggs wrote:> * PGP Signed by an unknown key > > On 11/11/2015 06:07 PM, Alexandre Courbot wrote: >> Commit 69c4938249fb ("drm/nouveau/instmem/gk20a: use direct CPU access") >> tried to be smart while using the DMA-API by managing the CPU mappings of >> buffers allocated with the DMA-API by itself. In doing so, it relied >> on dma_to_phys() which is an architecture-private function not >> available everywhere. This broke the build on several architectures. >> >> Since there is no reliable and portable way to obtain the physical >> address of a DMA-API buffer, stop trying to be smart and just use the >> CPU mapping that the DMA-API can provide. This means that buffers will >> be CPU-mapped for all their life as opposed to when we need them, but >> anyway using the DMA-API here is a fallback for when no IOMMU is >> available so we should not expect optimal behavior. >> >> This makes the IOMMU and DMA-API implementations of instmem diverge >> enough that we should maybe put them into separate files... >> >> Signed-off-by: Alexandre Courbot <acourbot at nvidia.com> >> --- >> Ben/Dave, since Dave's fix has reached mainline and builds are not >> broken anymore, we can proceed one of two ways: >> >> 1) Ben merges this for 4.4 and let it flow for -rc2 >> 2) I send another fix against the kernel tree > I just spoke to Dave, and I'll take this in my tree for 4.5 if > everything works fine with the temporary hack fix. Does that sound OK > to you?I would rather get rid of this illicit dma_to_phys() quickly so other people don't start thinking it's ok to use in drivers/ (or start throwing stones at me), but your call.