Timur Tabi
2024-Feb-02  23:06 UTC
[PATCH 1/2] [v3] drm/nouveau: fix several DMA buffer leaks
Nouveau manages GSP-RM DMA buffers with nvkm_gsp_mem objects.  Several of
these buffers are never dealloced.  Some of them can be deallocated
right after GSP-RM is initialized, but the rest need to stay until the
driver unloads.
Also futher bullet-proof these objects by poisoning the buffer and
clearing the nvkm_gsp_mem object when it is deallocated.  Poisoning
the buffer should trigger an error (or crash) from GSP-RM if it tries
to access the buffer after we've deallocated it, because we were wrong
about when it is safe to deallocate.
Finally, change the mem->size field to a size_t because that's the same
type that dma_alloc_coherent expects.
Fixes: 176fdcbddfd2 ("drm/nouveau/gsp/r535: add support for booting
GSP-RM")
Signed-off-by: Timur Tabi <ttabi at nvidia.com>
---
v3: use size_t
v2: add buffer poisoning
 .../gpu/drm/nouveau/include/nvkm/subdev/gsp.h |  2 +-
 .../gpu/drm/nouveau/nvkm/subdev/gsp/r535.c    | 59 ++++++++++++-------
 2 files changed, 39 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
index 5c6b8536e31c..3fbc57b16a05 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
@@ -9,7 +9,7 @@
 #define GSP_PAGE_SIZE  BIT(GSP_PAGE_SHIFT)
 
 struct nvkm_gsp_mem {
-	u32 size;
+	size_t size;
 	void *data;
 	dma_addr_t addr;
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index 17fc429ee50b..a9030eb83b4d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -999,6 +999,32 @@ r535_gsp_rpc_get_gsp_static_info(struct nvkm_gsp *gsp)
 	return 0;
 }
 
+static void
+nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem)
+{
+	if (mem->data) {
+		/*
+		 * Poison the buffer to catch any unexpected access from
+		 * GSP-RM if the buffer was prematurely freed.
+		 */
+		memset(mem->data, 0xFF, mem->size);
+
+		dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data,
mem->addr);
+		memset(mem, 0, sizeof(*mem));
+	}
+}
+
+static int
+nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, size_t size, struct nvkm_gsp_mem *mem)
+{
+	mem->size = size;
+	mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size,
&mem->addr, GFP_KERNEL);
+	if (WARN_ON(!mem->data))
+		return -ENOMEM;
+
+	return 0;
+}
+
 static int
 r535_gsp_postinit(struct nvkm_gsp *gsp)
 {
@@ -1026,6 +1052,13 @@ r535_gsp_postinit(struct nvkm_gsp *gsp)
 
 	nvkm_inth_allow(&gsp->subdev.inth);
 	nvkm_wr32(device, 0x110004, 0x00000040);
+
+	/* Release the DMA buffers that were needed only for boot and init */
+	nvkm_gsp_mem_dtor(gsp, &gsp->boot.fw);
+	nvkm_gsp_mem_dtor(gsp, &gsp->libos);
+	nvkm_gsp_mem_dtor(gsp, &gsp->rmargs);
+	nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta);
+
 	return ret;
 }
 
@@ -1760,27 +1793,6 @@ r535_gsp_msg_run_cpu_sequencer(void *priv, u32 fn, void
*repv, u32 repc)
 	return 0;
 }
 
-static void
-nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem)
-{
-	if (mem->data) {
-		dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data,
mem->addr);
-		mem->data = NULL;
-	}
-}
-
-static int
-nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, u32 size, struct nvkm_gsp_mem *mem)
-{
-	mem->size = size;
-	mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size,
&mem->addr, GFP_KERNEL);
-	if (WARN_ON(!mem->data))
-		return -ENOMEM;
-
-	return 0;
-}
-
-
 static int
 r535_gsp_booter_unload(struct nvkm_gsp *gsp, u32 mbox0, u32 mbox1)
 {
@@ -2378,6 +2390,11 @@ r535_gsp_dtor(struct nvkm_gsp *gsp)
 	mutex_destroy(&gsp->cmdq.mutex);
 
 	r535_gsp_dtor_fws(gsp);
+
+	nvkm_gsp_mem_dtor(gsp, &gsp->shm.mem);
+	nvkm_gsp_mem_dtor(gsp, &gsp->loginit);
+	nvkm_gsp_mem_dtor(gsp, &gsp->logintr);
+	nvkm_gsp_mem_dtor(gsp, &gsp->logrm);
 }
 
 int
-- 
2.34.1
Timur Tabi
2024-Feb-02  23:06 UTC
[PATCH 2/2] drm/nouveau: nvkm_gsp_radix3_sg() should use nvkm_gsp_mem_ctor()
Function nvkm_gsp_radix3_sg() uses nvkm_gsp_mem objects to allocate the
radix3 tables, but it unnecessarily creates those objects manually
instead of using the standard nvkm_gsp_mem_ctor() function like the
rest of the code does.
Signed-off-by: Timur Tabi <ttabi at nvidia.com>
---
 drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index a9030eb83b4d..d065389e3618 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -2178,20 +2178,20 @@ nvkm_gsp_radix3_dtor(struct nvkm_gsp *gsp, struct
nvkm_gsp_radix3 *rx3)
  * See kgspCreateRadix3_IMPL
  */
 static int
-nvkm_gsp_radix3_sg(struct nvkm_device *device, struct sg_table *sgt, u64 size,
+nvkm_gsp_radix3_sg(struct nvkm_gsp *gsp, struct sg_table *sgt, u64 size,
 		   struct nvkm_gsp_radix3 *rx3)
 {
 	u64 addr;
 
 	for (int i = ARRAY_SIZE(rx3->mem) - 1; i >= 0; i--) {
 		u64 *ptes;
-		int idx;
+		size_t bufsize;
+		int ret, idx;
 
-		rx3->mem[i].size = ALIGN((size / GSP_PAGE_SIZE) * sizeof(u64),
GSP_PAGE_SIZE);
-		rx3->mem[i].data = dma_alloc_coherent(device->dev, rx3->mem[i].size,
-						      &rx3->mem[i].addr, GFP_KERNEL);
-		if (WARN_ON(!rx3->mem[i].data))
-			return -ENOMEM;
+		bufsize = ALIGN((size / GSP_PAGE_SIZE) * sizeof(u64), GSP_PAGE_SIZE);
+		ret = nvkm_gsp_mem_ctor(gsp, bufsize, &rx3->mem[i]);
+		if (ret)
+			return ret;
 
 		ptes = rx3->mem[i].data;
 		if (i == 2) {
@@ -2231,7 +2231,7 @@ r535_gsp_fini(struct nvkm_gsp *gsp, bool suspend)
 		if (ret)
 			return ret;
 
-		ret = nvkm_gsp_radix3_sg(gsp->subdev.device, &gsp->sr.sgt, len,
&gsp->sr.radix3);
+		ret = nvkm_gsp_radix3_sg(gsp, &gsp->sr.sgt, len,
&gsp->sr.radix3);
 		if (ret)
 			return ret;
 
@@ -2439,7 +2439,7 @@ r535_gsp_oneinit(struct nvkm_gsp *gsp)
 	memcpy(gsp->sig.data, data, size);
 
 	/* Build radix3 page table for ELF image. */
-	ret = nvkm_gsp_radix3_sg(device, &gsp->fw.mem.sgt, gsp->fw.len,
&gsp->radix3);
+	ret = nvkm_gsp_radix3_sg(gsp, &gsp->fw.mem.sgt, gsp->fw.len,
&gsp->radix3);
 	if (ret)
 		return ret;
 
-- 
2.34.1
Danilo Krummrich
2024-Feb-05  18:08 UTC
[PATCH 1/2] [v3] drm/nouveau: fix several DMA buffer leaks
On 2/3/24 00:06, Timur Tabi wrote:> Nouveau manages GSP-RM DMA buffers with nvkm_gsp_mem objects. Several of > these buffers are never dealloced. Some of them can be deallocated > right after GSP-RM is initialized, but the rest need to stay until the > driver unloads. > > Also futher bullet-proof these objects by poisoning the buffer and > clearing the nvkm_gsp_mem object when it is deallocated. Poisoning > the buffer should trigger an error (or crash) from GSP-RM if it tries > to access the buffer after we've deallocated it, because we were wrong > about when it is safe to deallocate. > > Finally, change the mem->size field to a size_t because that's the same > type that dma_alloc_coherent expects. > > Fixes: 176fdcbddfd2 ("drm/nouveau/gsp/r535: add support for booting GSP-RM") > Signed-off-by: Timur Tabi <ttabi at nvidia.com>Series applied to drm-misc-fixes, thanks!> --- > v3: use size_t > v2: add buffer poisoning > > .../gpu/drm/nouveau/include/nvkm/subdev/gsp.h | 2 +- > .../gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 59 ++++++++++++------- > 2 files changed, 39 insertions(+), 22 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h > index 5c6b8536e31c..3fbc57b16a05 100644 > --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h > +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h > @@ -9,7 +9,7 @@ > #define GSP_PAGE_SIZE BIT(GSP_PAGE_SHIFT) > > struct nvkm_gsp_mem { > - u32 size; > + size_t size; > void *data; > dma_addr_t addr; > }; > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c > index 17fc429ee50b..a9030eb83b4d 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c > @@ -999,6 +999,32 @@ r535_gsp_rpc_get_gsp_static_info(struct nvkm_gsp *gsp) > return 0; > } > > +static void > +nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem) > +{ > + if (mem->data) { > + /* > + * Poison the buffer to catch any unexpected access from > + * GSP-RM if the buffer was prematurely freed. > + */ > + memset(mem->data, 0xFF, mem->size); > + > + dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data, mem->addr); > + memset(mem, 0, sizeof(*mem)); > + } > +} > + > +static int > +nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, size_t size, struct nvkm_gsp_mem *mem) > +{ > + mem->size = size; > + mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size, &mem->addr, GFP_KERNEL); > + if (WARN_ON(!mem->data)) > + return -ENOMEM; > + > + return 0; > +} > + > static int > r535_gsp_postinit(struct nvkm_gsp *gsp) > { > @@ -1026,6 +1052,13 @@ r535_gsp_postinit(struct nvkm_gsp *gsp) > > nvkm_inth_allow(&gsp->subdev.inth); > nvkm_wr32(device, 0x110004, 0x00000040); > + > + /* Release the DMA buffers that were needed only for boot and init */ > + nvkm_gsp_mem_dtor(gsp, &gsp->boot.fw); > + nvkm_gsp_mem_dtor(gsp, &gsp->libos); > + nvkm_gsp_mem_dtor(gsp, &gsp->rmargs); > + nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta); > + > return ret; > } > > @@ -1760,27 +1793,6 @@ r535_gsp_msg_run_cpu_sequencer(void *priv, u32 fn, void *repv, u32 repc) > return 0; > } > > -static void > -nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem) > -{ > - if (mem->data) { > - dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data, mem->addr); > - mem->data = NULL; > - } > -} > - > -static int > -nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, u32 size, struct nvkm_gsp_mem *mem) > -{ > - mem->size = size; > - mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size, &mem->addr, GFP_KERNEL); > - if (WARN_ON(!mem->data)) > - return -ENOMEM; > - > - return 0; > -} > - > - > static int > r535_gsp_booter_unload(struct nvkm_gsp *gsp, u32 mbox0, u32 mbox1) > { > @@ -2378,6 +2390,11 @@ r535_gsp_dtor(struct nvkm_gsp *gsp) > mutex_destroy(&gsp->cmdq.mutex); > > r535_gsp_dtor_fws(gsp); > + > + nvkm_gsp_mem_dtor(gsp, &gsp->shm.mem); > + nvkm_gsp_mem_dtor(gsp, &gsp->loginit); > + nvkm_gsp_mem_dtor(gsp, &gsp->logintr); > + nvkm_gsp_mem_dtor(gsp, &gsp->logrm); > } > > int