Fix a very shameful memory leak and a compilation error due to the use of non-exported CMA functions. The workaround for the latter is not really elegant (replace the CMA functions by a runtime failure if we are compiled as a module), but is temporary and still an improvement over the current situation (compile error). Alexandre Courbot (2): drm/gk20a/fb: fix huge memory leak drm/gk20a/fb: fix compile error whith CMA and module drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c | 95 ++++++++++++++++------- 1 file changed, 67 insertions(+), 28 deletions(-) -- 1.9.2
Alexandre Courbot
2014-May-19 06:51 UTC
[Nouveau] [PATCH 1/2] drm/gk20a/fb: fix huge memory leak
CMA-allocated memory must be freed by an exact mirror call to dma_release_from_contiguous(). It cannot be freed page-by-page as was previously believed without severe memory leakage. This page records the address and size of every allocated memory chunk so they can be properly freed when needed. Signed-off-by: Alexandre Courbot <acourbot at nvidia.com> --- drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c | 74 ++++++++++++++--------- 1 file changed, 46 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c index 7effd1a63458..5904af52e6d6 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c @@ -28,28 +28,34 @@ #include <linux/types.h> #include <linux/dma-contiguous.h> +struct gk20a_mem_chunk { + struct list_head list; + struct page *pages; + u32 npages; +}; + +struct gk20a_mem { + struct nouveau_mem base; + struct list_head head; +}; + static void gk20a_ram_put(struct nouveau_fb *pfb, struct nouveau_mem **pmem) { struct device *dev = nv_device_base(nv_device(pfb)); - struct nouveau_mem *mem = *pmem; - int i; + struct gk20a_mem *mem = container_of(*pmem, struct gk20a_mem, base); + struct gk20a_mem_chunk *chunk, *n; *pmem = NULL; if (unlikely(mem == NULL)) return; - for (i = 0; i < mem->size; i++) { - struct page *page; - - if (mem->pages[i] == 0) - break; - - page = pfn_to_page(mem->pages[i] >> PAGE_SHIFT); - dma_release_from_contiguous(dev, page, 1); + list_for_each_entry_safe(chunk, n, &mem->head, list) { + dma_release_from_contiguous(dev, chunk->pages, chunk->npages); + kfree(chunk); } - kfree(mem->pages); + kfree(mem->base.pages); kfree(mem); } @@ -58,9 +64,8 @@ gk20a_ram_get(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin, u32 memtype, struct nouveau_mem **pmem) { struct device *dev = nv_device_base(nv_device(pfb)); - struct nouveau_mem *mem; + struct gk20a_mem *mem; int type = memtype & 0xff; - dma_addr_t dma_addr; int npages; int order; int i; @@ -95,44 +100,57 @@ gk20a_ram_get(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin, if (!mem) return -ENOMEM; - mem->size = npages; - mem->memtype = type; + mem->base.size = npages; + mem->base.memtype = type; - mem->pages = kzalloc(sizeof(dma_addr_t) * npages, GFP_KERNEL); - if (!mem) { + mem->base.pages = kzalloc(sizeof(dma_addr_t) * npages, GFP_KERNEL); + if (!mem->base.pages) { kfree(mem); return -ENOMEM; } + INIT_LIST_HEAD(&mem->head); + + *pmem = &mem->base; + while (npages) { - struct page *pages; + struct gk20a_mem_chunk *chunk; + dma_addr_t addr; int pos = 0; /* don't overflow in case size is not a multiple of ncmin */ if (ncmin > npages) ncmin = npages; - pages = dma_alloc_from_contiguous(dev, ncmin, order); - if (!pages) { - gk20a_ram_put(pfb, &mem); + chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); + if (!chunk) { + gk20a_ram_put(pfb, pmem); return -ENOMEM; } - dma_addr = (dma_addr_t)(page_to_pfn(pages) << PAGE_SHIFT); + chunk->pages = dma_alloc_from_contiguous(dev, ncmin, order); + if (!chunk->pages) { + kfree(chunk); + gk20a_ram_put(pfb, pmem); + return -ENOMEM; + } - nv_debug(pfb, " alloc count: %x, order: %x, addr: %pad\n", ncmin, - order, &dma_addr); + chunk->npages = ncmin; + list_add_tail(&chunk->list, &mem->head); + + addr = (dma_addr_t)(page_to_pfn(chunk->pages) << PAGE_SHIFT); + + nv_debug(pfb, " alloc count: %x, order: %x, addr: %pad\n", + ncmin, order, &addr); for (i = 0; i < ncmin; i++) - mem->pages[pos + i] = dma_addr + (PAGE_SIZE * i); + mem->base.pages[pos + i] = addr + (PAGE_SIZE * i); pos += ncmin; npages -= ncmin; } - mem->offset = (u64)mem->pages[0]; - - *pmem = mem; + mem->base.offset = (u64)mem->base.pages[0]; return 0; } -- 1.9.2
Alexandre Courbot
2014-May-19 06:51 UTC
[Nouveau] [PATCH 2/2] drm/gk20a/fb: fix compile error whith CMA and module
CMA functions are not available to kernel modules, but the GK20A FB driver currently (and temporarily) relies on them. This patch replaces the calls to CMA functions in problematic cases (CMA enabled and Nouveau compiled as a module) with dummy stubs that will make this particular driver fail, but at least won't produce a compile error. This is a temporary fix until a better memory allocation scheme is devised. Signed-off-by: Alexandre Courbot <acourbot at nvidia.com> --- drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c | 25 +++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c index 5904af52e6d6..fa867ce5449e 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c @@ -39,6 +39,27 @@ struct gk20a_mem { struct list_head head; }; +/* + * CMA is not available to modules. Until we find a better solution, make + * memory allocations fail in that case. + */ +#if IS_ENABLED(CONFIG_CMA) && IS_MODULE(CONFIG_DRM_NOUVEAU) +static inline struct page * +alloc_contiguous_memory(struct device *dev, int count, unsigned int order) +{ + dev_err(dev, "cannot use CMA from a module - allocation failed\n"); + return NULL; +} + +static inline void +release_contiguous_memory(struct device *dev, struct page *page, int count) +{ +} +#else +#define alloc_contiguous_memory(d, c, o) dma_alloc_from_contiguous(d, c, o) +#define release_contiguous_memory(d, p, c) dma_release_from_contiguous(d, p, c) +#endif + static void gk20a_ram_put(struct nouveau_fb *pfb, struct nouveau_mem **pmem) { @@ -51,7 +72,7 @@ gk20a_ram_put(struct nouveau_fb *pfb, struct nouveau_mem **pmem) return; list_for_each_entry_safe(chunk, n, &mem->head, list) { - dma_release_from_contiguous(dev, chunk->pages, chunk->npages); + release_contiguous_memory(dev, chunk->pages, chunk->npages); kfree(chunk); } @@ -128,7 +149,7 @@ gk20a_ram_get(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin, return -ENOMEM; } - chunk->pages = dma_alloc_from_contiguous(dev, ncmin, order); + chunk->pages = alloc_contiguous_memory(dev, ncmin, order); if (!chunk->pages) { kfree(chunk); gk20a_ram_put(pfb, pmem); -- 1.9.2
Ben, I am withdrawing this series as it will be superseeded by a new version. You probably did not intend to merge it, but just in case. :) On Mon, May 19, 2014 at 3:51 PM, Alexandre Courbot <acourbot at nvidia.com> wrote:> Fix a very shameful memory leak and a compilation error due to the use of > non-exported CMA functions. The workaround for the latter is not really elegant > (replace the CMA functions by a runtime failure if we are compiled as a > module), but is temporary and still an improvement over the current situation > (compile error). > > Alexandre Courbot (2): > drm/gk20a/fb: fix huge memory leak > drm/gk20a/fb: fix compile error whith CMA and module > > drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c | 95 ++++++++++++++++------- > 1 file changed, 67 insertions(+), 28 deletions(-) > > -- > 1.9.2 >
Possibly Parallel Threads
- [PATCH v2 04/10] drm/nouveau/fb: add GK20A support
- [PATCH v2 04/10] drm/nouveau/fb: add GK20A support
- [PATCH v2 00/10] drm/nouveau: support for GK20A, cont'd
- [PATCH v4 0/9] drm/nouveau: support for GK20A, cont'd
- [PATCH v3 0/9] drm/nouveau: support for GK20A, cont'd