thr3ads.net - Nouveau - [Nouveau] [PATCH 0/4] tegra: DMA mask and IOMMU bit fixes [Sep 2015]

If this information is useful, please help other people find it:
Share via:

Alexandre Courbot

2015-Sep-04 10:59 UTC

[Nouveau] [PATCH 0/4] tegra: DMA mask and IOMMU bit fixes

These 4 patches fix two issues that existed on Tegra regarding DMA:

1) The bit indicating whether to use an IOMMU or not was hardcoded ; make this
   a platform property and use it in instmem
2) The DMA mask was not set for platform devices. Fix this by converting
   more pci_dma* to the DMA API, and use that more generic code to set the
   DMA mask properly for all platforms.

Tested on both x86 and Tegra and confirmed that the DMA mask was set properly
in both cases. In case there is worry about the conversion to the DMA API,
here is evidence that this change is a no-op for PCI as of 4.2:

http://lxr.free-electrons.com/ident?i=pci_set_dma_mask

Alexandre Courbot (4):
  platform: allow to specify the IOMMU bit
  instmem/gk20a: make use of the IOMMU bit
  ttm: convert to DMA API
  ttm: set the DMA mask for platform devices

 drm/nouveau/include/nvkm/core/tegra.h   |  3 +++
 drm/nouveau/nouveau_platform.c          | 14 ++++++++++++--
 drm/nouveau/nouveau_platform.h          | 10 ++++++++++
 drm/nouveau/nouveau_ttm.c               | 31 +++++++++++++++++++++----------
 drm/nouveau/nvkm/engine/device/tegra.c  |  9 ++++++++-
 drm/nouveau/nvkm/subdev/instmem/gk20a.c | 10 ++++++----
 6 files changed, 60 insertions(+), 17 deletions(-)

-- 
2.5.1

Alexandre Courbot

2015-Sep-04 10:59 UTC

head link

[Nouveau] [PATCH 1/4] platform: allow to specify the IOMMU bit

Current Tegra code taking advantage of the IOMMU assumes a hardcoded
value for the IOMMU bit. Make it a platform property instead for
flexibility.

Signed-off-by: Alexandre Courbot <acourbot at nvidia.com>
---
 drm/nouveau/include/nvkm/core/tegra.h  |  3 +++
 drm/nouveau/nouveau_platform.c         | 14 ++++++++++++--
 drm/nouveau/nouveau_platform.h         | 10 ++++++++++
 drm/nouveau/nvkm/engine/device/tegra.c |  9 ++++++++-
 4 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/drm/nouveau/include/nvkm/core/tegra.h
b/drm/nouveau/include/nvkm/core/tegra.h
index 5aa2480da25f..3e354edbc580 100644
--- a/drm/nouveau/include/nvkm/core/tegra.h
+++ b/drm/nouveau/include/nvkm/core/tegra.h
@@ -3,10 +3,13 @@
 #include <core/device.h>
 #include <core/mm.h>
 
+#include "nouveau_platform.h"
+
 struct nvkm_device_tegra {
 	struct nvkm_device device;
 	struct platform_device *pdev;
 	int irq;
+	const struct nouveau_platform_data *pdata;
 
 	struct reset_control *rst;
 	struct clk *clk;
diff --git a/drm/nouveau/nouveau_platform.c b/drm/nouveau/nouveau_platform.c
index 3eb665453165..f1bf2983b993 100644
--- a/drm/nouveau/nouveau_platform.c
+++ b/drm/nouveau/nouveau_platform.c
@@ -48,9 +48,19 @@ static int nouveau_platform_remove(struct platform_device
*pdev)
 }
 
 #if IS_ENABLED(CONFIG_OF)
+static const struct nouveau_platform_data gk20a_platform_data = {
+	.iommu_bit = 34,
+};
+
 static const struct of_device_id nouveau_platform_match[] = {
-	{ .compatible = "nvidia,gk20a" },
-	{ .compatible = "nvidia,gm20b" },
+	{
+		.compatible = "nvidia,gk20a",
+		.data = &gk20a_platform_data,
+	},
+	{
+		.compatible = "nvidia,gm20b",
+		.data = &gk20a_platform_data,
+	},
 	{ }
 };
 
diff --git a/drm/nouveau/nouveau_platform.h b/drm/nouveau/nouveau_platform.h
index f41056d0f5f4..fbce6a4ceab9 100644
--- a/drm/nouveau/nouveau_platform.h
+++ b/drm/nouveau/nouveau_platform.h
@@ -23,5 +23,15 @@
 #define __NOUVEAU_PLATFORM_H__
 #include "nouveau_drm.h"
 
+struct nouveau_platform_data
+{
+	/*
+	 * If an IOMMU is used, indicates which address bit will trigger a
+	 * IOMMU translation when set (when this bit is not set, IOMMU is
+	 * bypassed). A value of 0 means an IOMMU is never used.
+	 */
+	u8 iommu_bit;
+};
+
 extern struct platform_driver nouveau_platform_driver;
 #endif
diff --git a/drm/nouveau/nvkm/engine/device/tegra.c
b/drm/nouveau/nvkm/engine/device/tegra.c
index da57c8a60608..8ec717f20808 100644
--- a/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drm/nouveau/nvkm/engine/device/tegra.c
@@ -23,6 +23,8 @@
 #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER
 #include "priv.h"
 
+#include <linux/of_device.h>
+
 static int
 nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev)
 {
@@ -85,6 +87,9 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
 	unsigned long pgsize_bitmap;
 	int ret;
 
+	if (!tdev->pdata->iommu_bit)
+		return;
+
 	mutex_init(&tdev->iommu.mutex);
 
 	if (iommu_present(&platform_bus_type)) {
@@ -114,7 +119,8 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra
*tdev)
 			goto free_domain;
 
 		ret = nvkm_mm_init(&tdev->iommu.mm, 0,
-				   (1ULL << 40) >> tdev->iommu.pgshift, 1);
+				   (1ULL << tdev->pdata->iommu_bit) >>
+					tdev->iommu.pgshift, 1);
 		if (ret)
 			goto detach_device;
 	}
@@ -250,6 +256,7 @@ nvkm_device_tegra_new(struct platform_device *pdev,
 	*pdevice = &tdev->device;
 	tdev->pdev = pdev;
 	tdev->irq = -1;
+	tdev->pdata = of_device_get_match_data(&pdev->dev);
 
 	tdev->vdd = devm_regulator_get(&pdev->dev, "vdd");
 	if (IS_ERR(tdev->vdd))
-- 
2.5.1

Alexandre Courbot

2015-Sep-04 10:59 UTC

head link

[Nouveau] [PATCH 2/4] instmem/gk20a: make use of the IOMMU bit

Use the IOMMU bit specified in platform data instead of hardcoding it to
the bit used by current Tegra GPUs.

Signed-off-by: Alexandre Courbot <acourbot at nvidia.com>
---
 drm/nouveau/nvkm/subdev/instmem/gk20a.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drm/nouveau/nvkm/subdev/instmem/gk20a.c
b/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index a2921ace4045..6a1314723a53 100644
--- a/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -103,6 +103,7 @@ struct gk20a_instmem {
 	struct nvkm_mm *mm;
 	struct iommu_domain *domain;
 	unsigned long iommu_pgshift;
+	u16 iommu_bit;
 
 	/* Only used by DMA API */
 	struct dma_attrs attrs;
@@ -332,8 +333,8 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
 	r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node,
 			     rl_entry);
 
-	/* clear bit 34 to unmap pages */
-	r->offset &= ~BIT(34 - imem->iommu_pgshift);
+	/* clear IOMMU bit to unmap pages */
+	r->offset &= ~BIT(imem->iommu_bit - imem->iommu_pgshift);
 
 	/* Unmap pages from GPU address space and free them */
 	for (i = 0; i < node->base.mem.size; i++) {
@@ -489,8 +490,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32
npages, u32 align,
 		}
 	}
 
-	/* Bit 34 tells that an address is to be resolved through the IOMMU */
-	r->offset |= BIT(34 - imem->iommu_pgshift);
+	/* IOMMU bit tells that an address is to be resolved through the IOMMU */
+	r->offset |= BIT(imem->iommu_bit - imem->iommu_pgshift);
 
 	node->base.mem.offset = ((u64)r->offset) <<
imem->iommu_pgshift;
 
@@ -603,6 +604,7 @@ gk20a_instmem_new(struct nvkm_device *device, int index,
 		imem->domain = tdev->iommu.domain;
 		imem->iommu_pgshift = tdev->iommu.pgshift;
 		imem->cpu_map = gk20a_instobj_cpu_map_iommu;
+		imem->iommu_bit = tdev->pdata->iommu_bit;
 
 		nvkm_info(&imem->base.subdev, "using IOMMU\n");
 	} else {
-- 
2.5.1

Alexandre Courbot

2015-Sep-04 10:59 UTC

head link

[Nouveau] [PATCH 3/4] ttm: convert to DMA API

The pci_dma_* functions are now superseeded in the kernel by the DMA
API. Make the conversion to this more generic API.

Signed-off-by: Alexandre Courbot <acourbot at nvidia.com>
---
 drm/nouveau/nouveau_ttm.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drm/nouveau/nouveau_ttm.c b/drm/nouveau/nouveau_ttm.c
index e88bd697d09b..8a476e77bf6c 100644
--- a/drm/nouveau/nouveau_ttm.c
+++ b/drm/nouveau/nouveau_ttm.c
@@ -338,7 +338,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 	struct nvkm_device *device = nvxx_device(&drm->device);
 	struct nvkm_pci *pci = device->pci;
 	struct drm_device *dev = drm->dev;
-	u32 bits;
+	u8 bits;
 	int ret;
 
 	if (pci && pci->agp.bridge) {
@@ -351,18 +351,16 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 	bits = nvxx_mmu(&drm->device)->dma_bits;
 	if (nvxx_device(&drm->device)->func->pci) {
 		if (drm->agp.bridge ||
-		     !pci_dma_supported(dev->pdev, DMA_BIT_MASK(bits)))
+		     !dma_supported(dev->dev, DMA_BIT_MASK(bits)))
 			bits = 32;
 
-		ret = pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(bits));
+		ret = dma_set_mask(dev->dev, DMA_BIT_MASK(bits));
 		if (ret)
 			return ret;
 
-		ret = pci_set_consistent_dma_mask(dev->pdev,
-						  DMA_BIT_MASK(bits));
+		ret = dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(bits));
 		if (ret)
-			pci_set_consistent_dma_mask(dev->pdev,
-						    DMA_BIT_MASK(32));
+			dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(32));
 	}
 
 	ret = nouveau_ttm_global_init(drm);
-- 
2.5.1

Alexandre Courbot

2015-Sep-04 10:59 UTC

head link

[Nouveau] [PATCH 4/4] ttm: set the DMA mask for platform devices

So far the DMA mask was not set for platform devices, which limited them
to a 32-bit physical space. Allow dma_set_mask() to be called for
non-PCI devices, and also take the IOMMU bit into account since it could
restrict the physically addressable space.

Signed-off-by: Alexandre Courbot <acourbot at nvidia.com>
---
 drm/nouveau/nouveau_ttm.c | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/drm/nouveau/nouveau_ttm.c b/drm/nouveau/nouveau_ttm.c
index 8a476e77bf6c..ce11310faf9c 100644
--- a/drm/nouveau/nouveau_ttm.c
+++ b/drm/nouveau/nouveau_ttm.c
@@ -29,6 +29,9 @@
 #include "nouveau_gem.h"
 
 #include "drm_legacy.h"
+
+#include <core/tegra.h>
+
 static int
 nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long
psize)
 {
@@ -353,16 +356,26 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 		if (drm->agp.bridge ||
 		     !dma_supported(dev->dev, DMA_BIT_MASK(bits)))
 			bits = 32;
+	} else if (device->func->tegra) {
+		struct nvkm_device_tegra *tegra = device->func->tegra(device);
 
-		ret = dma_set_mask(dev->dev, DMA_BIT_MASK(bits));
-		if (ret)
-			return ret;
+		/*
+		 * If the platform can use a IOMMU, then the addressable DMA
+		 * space is constrained by the IOMMU bit
+		 */
+		if (tegra->pdata->iommu_bit)
+			bits = min(bits, tegra->pdata->iommu_bit);
 
-		ret = dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(bits));
-		if (ret)
-			dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(32));
 	}
 
+	ret = dma_set_mask(dev->dev, DMA_BIT_MASK(bits));
+	if (ret)
+		return ret;
+
+	ret = dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(bits));
+	if (ret)
+		dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(32));
+
 	ret = nouveau_ttm_global_init(drm);
 	if (ret)
 		return ret;
-- 
2.5.1

Possibly Parallel Threads

Search for more apparently analagous threads

Nouveau - Sep 2015 - [PATCH 0/4] tegra: DMA mask and IOMMU bit fixes

[Nouveau] [PATCH 0/4] tegra: DMA mask and IOMMU bit fixes

[Nouveau] [PATCH 1/4] platform: allow to specify the IOMMU bit

[Nouveau] [PATCH 2/4] instmem/gk20a: make use of the IOMMU bit

[Nouveau] [PATCH 3/4] ttm: convert to DMA API

[Nouveau] [PATCH 4/4] ttm: set the DMA mask for platform devices

Possibly Parallel Threads