Alexandre Courbot
2015-Sep-04 10:59 UTC
[Nouveau] [PATCH 0/4] tegra: DMA mask and IOMMU bit fixes
These 4 patches fix two issues that existed on Tegra regarding DMA: 1) The bit indicating whether to use an IOMMU or not was hardcoded ; make this a platform property and use it in instmem 2) The DMA mask was not set for platform devices. Fix this by converting more pci_dma* to the DMA API, and use that more generic code to set the DMA mask properly for all platforms. Tested on both x86 and Tegra and confirmed that the DMA mask was set properly in both cases. In case there is worry about the conversion to the DMA API, here is evidence that this change is a no-op for PCI as of 4.2: http://lxr.free-electrons.com/ident?i=pci_set_dma_mask Alexandre Courbot (4): platform: allow to specify the IOMMU bit instmem/gk20a: make use of the IOMMU bit ttm: convert to DMA API ttm: set the DMA mask for platform devices drm/nouveau/include/nvkm/core/tegra.h | 3 +++ drm/nouveau/nouveau_platform.c | 14 ++++++++++++-- drm/nouveau/nouveau_platform.h | 10 ++++++++++ drm/nouveau/nouveau_ttm.c | 31 +++++++++++++++++++++---------- drm/nouveau/nvkm/engine/device/tegra.c | 9 ++++++++- drm/nouveau/nvkm/subdev/instmem/gk20a.c | 10 ++++++---- 6 files changed, 60 insertions(+), 17 deletions(-) -- 2.5.1
Alexandre Courbot
2015-Sep-04 10:59 UTC
[Nouveau] [PATCH 1/4] platform: allow to specify the IOMMU bit
Current Tegra code taking advantage of the IOMMU assumes a hardcoded value for the IOMMU bit. Make it a platform property instead for flexibility. Signed-off-by: Alexandre Courbot <acourbot at nvidia.com> --- drm/nouveau/include/nvkm/core/tegra.h | 3 +++ drm/nouveau/nouveau_platform.c | 14 ++++++++++++-- drm/nouveau/nouveau_platform.h | 10 ++++++++++ drm/nouveau/nvkm/engine/device/tegra.c | 9 ++++++++- 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/drm/nouveau/include/nvkm/core/tegra.h b/drm/nouveau/include/nvkm/core/tegra.h index 5aa2480da25f..3e354edbc580 100644 --- a/drm/nouveau/include/nvkm/core/tegra.h +++ b/drm/nouveau/include/nvkm/core/tegra.h @@ -3,10 +3,13 @@ #include <core/device.h> #include <core/mm.h> +#include "nouveau_platform.h" + struct nvkm_device_tegra { struct nvkm_device device; struct platform_device *pdev; int irq; + const struct nouveau_platform_data *pdata; struct reset_control *rst; struct clk *clk; diff --git a/drm/nouveau/nouveau_platform.c b/drm/nouveau/nouveau_platform.c index 3eb665453165..f1bf2983b993 100644 --- a/drm/nouveau/nouveau_platform.c +++ b/drm/nouveau/nouveau_platform.c @@ -48,9 +48,19 @@ static int nouveau_platform_remove(struct platform_device *pdev) } #if IS_ENABLED(CONFIG_OF) +static const struct nouveau_platform_data gk20a_platform_data = { + .iommu_bit = 34, +}; + static const struct of_device_id nouveau_platform_match[] = { - { .compatible = "nvidia,gk20a" }, - { .compatible = "nvidia,gm20b" }, + { + .compatible = "nvidia,gk20a", + .data = &gk20a_platform_data, + }, + { + .compatible = "nvidia,gm20b", + .data = &gk20a_platform_data, + }, { } }; diff --git a/drm/nouveau/nouveau_platform.h b/drm/nouveau/nouveau_platform.h index f41056d0f5f4..fbce6a4ceab9 100644 --- a/drm/nouveau/nouveau_platform.h +++ b/drm/nouveau/nouveau_platform.h @@ -23,5 +23,15 @@ #define __NOUVEAU_PLATFORM_H__ #include "nouveau_drm.h" +struct nouveau_platform_data +{ + /* + * If an IOMMU is used, indicates which address bit will trigger a + * IOMMU translation when set (when this bit is not set, IOMMU is + * bypassed). A value of 0 means an IOMMU is never used. + */ + u8 iommu_bit; +}; + extern struct platform_driver nouveau_platform_driver; #endif diff --git a/drm/nouveau/nvkm/engine/device/tegra.c b/drm/nouveau/nvkm/engine/device/tegra.c index da57c8a60608..8ec717f20808 100644 --- a/drm/nouveau/nvkm/engine/device/tegra.c +++ b/drm/nouveau/nvkm/engine/device/tegra.c @@ -23,6 +23,8 @@ #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER #include "priv.h" +#include <linux/of_device.h> + static int nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev) { @@ -85,6 +87,9 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) unsigned long pgsize_bitmap; int ret; + if (!tdev->pdata->iommu_bit) + return; + mutex_init(&tdev->iommu.mutex); if (iommu_present(&platform_bus_type)) { @@ -114,7 +119,8 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) goto free_domain; ret = nvkm_mm_init(&tdev->iommu.mm, 0, - (1ULL << 40) >> tdev->iommu.pgshift, 1); + (1ULL << tdev->pdata->iommu_bit) >> + tdev->iommu.pgshift, 1); if (ret) goto detach_device; } @@ -250,6 +256,7 @@ nvkm_device_tegra_new(struct platform_device *pdev, *pdevice = &tdev->device; tdev->pdev = pdev; tdev->irq = -1; + tdev->pdata = of_device_get_match_data(&pdev->dev); tdev->vdd = devm_regulator_get(&pdev->dev, "vdd"); if (IS_ERR(tdev->vdd)) -- 2.5.1
Alexandre Courbot
2015-Sep-04 10:59 UTC
[Nouveau] [PATCH 2/4] instmem/gk20a: make use of the IOMMU bit
Use the IOMMU bit specified in platform data instead of hardcoding it to the bit used by current Tegra GPUs. Signed-off-by: Alexandre Courbot <acourbot at nvidia.com> --- drm/nouveau/nvkm/subdev/instmem/gk20a.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drm/nouveau/nvkm/subdev/instmem/gk20a.c index a2921ace4045..6a1314723a53 100644 --- a/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -103,6 +103,7 @@ struct gk20a_instmem { struct nvkm_mm *mm; struct iommu_domain *domain; unsigned long iommu_pgshift; + u16 iommu_bit; /* Only used by DMA API */ struct dma_attrs attrs; @@ -332,8 +333,8 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory) r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node, rl_entry); - /* clear bit 34 to unmap pages */ - r->offset &= ~BIT(34 - imem->iommu_pgshift); + /* clear IOMMU bit to unmap pages */ + r->offset &= ~BIT(imem->iommu_bit - imem->iommu_pgshift); /* Unmap pages from GPU address space and free them */ for (i = 0; i < node->base.mem.size; i++) { @@ -489,8 +490,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, } } - /* Bit 34 tells that an address is to be resolved through the IOMMU */ - r->offset |= BIT(34 - imem->iommu_pgshift); + /* IOMMU bit tells that an address is to be resolved through the IOMMU */ + r->offset |= BIT(imem->iommu_bit - imem->iommu_pgshift); node->base.mem.offset = ((u64)r->offset) << imem->iommu_pgshift; @@ -603,6 +604,7 @@ gk20a_instmem_new(struct nvkm_device *device, int index, imem->domain = tdev->iommu.domain; imem->iommu_pgshift = tdev->iommu.pgshift; imem->cpu_map = gk20a_instobj_cpu_map_iommu; + imem->iommu_bit = tdev->pdata->iommu_bit; nvkm_info(&imem->base.subdev, "using IOMMU\n"); } else { -- 2.5.1
The pci_dma_* functions are now superseeded in the kernel by the DMA API. Make the conversion to this more generic API. Signed-off-by: Alexandre Courbot <acourbot at nvidia.com> --- drm/nouveau/nouveau_ttm.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drm/nouveau/nouveau_ttm.c b/drm/nouveau/nouveau_ttm.c index e88bd697d09b..8a476e77bf6c 100644 --- a/drm/nouveau/nouveau_ttm.c +++ b/drm/nouveau/nouveau_ttm.c @@ -338,7 +338,7 @@ nouveau_ttm_init(struct nouveau_drm *drm) struct nvkm_device *device = nvxx_device(&drm->device); struct nvkm_pci *pci = device->pci; struct drm_device *dev = drm->dev; - u32 bits; + u8 bits; int ret; if (pci && pci->agp.bridge) { @@ -351,18 +351,16 @@ nouveau_ttm_init(struct nouveau_drm *drm) bits = nvxx_mmu(&drm->device)->dma_bits; if (nvxx_device(&drm->device)->func->pci) { if (drm->agp.bridge || - !pci_dma_supported(dev->pdev, DMA_BIT_MASK(bits))) + !dma_supported(dev->dev, DMA_BIT_MASK(bits))) bits = 32; - ret = pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(bits)); + ret = dma_set_mask(dev->dev, DMA_BIT_MASK(bits)); if (ret) return ret; - ret = pci_set_consistent_dma_mask(dev->pdev, - DMA_BIT_MASK(bits)); + ret = dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(bits)); if (ret) - pci_set_consistent_dma_mask(dev->pdev, - DMA_BIT_MASK(32)); + dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(32)); } ret = nouveau_ttm_global_init(drm); -- 2.5.1
Alexandre Courbot
2015-Sep-04 10:59 UTC
[Nouveau] [PATCH 4/4] ttm: set the DMA mask for platform devices
So far the DMA mask was not set for platform devices, which limited them to a 32-bit physical space. Allow dma_set_mask() to be called for non-PCI devices, and also take the IOMMU bit into account since it could restrict the physically addressable space. Signed-off-by: Alexandre Courbot <acourbot at nvidia.com> --- drm/nouveau/nouveau_ttm.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drm/nouveau/nouveau_ttm.c b/drm/nouveau/nouveau_ttm.c index 8a476e77bf6c..ce11310faf9c 100644 --- a/drm/nouveau/nouveau_ttm.c +++ b/drm/nouveau/nouveau_ttm.c @@ -29,6 +29,9 @@ #include "nouveau_gem.h" #include "drm_legacy.h" + +#include <core/tegra.h> + static int nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long psize) { @@ -353,16 +356,26 @@ nouveau_ttm_init(struct nouveau_drm *drm) if (drm->agp.bridge || !dma_supported(dev->dev, DMA_BIT_MASK(bits))) bits = 32; + } else if (device->func->tegra) { + struct nvkm_device_tegra *tegra = device->func->tegra(device); - ret = dma_set_mask(dev->dev, DMA_BIT_MASK(bits)); - if (ret) - return ret; + /* + * If the platform can use a IOMMU, then the addressable DMA + * space is constrained by the IOMMU bit + */ + if (tegra->pdata->iommu_bit) + bits = min(bits, tegra->pdata->iommu_bit); - ret = dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(bits)); - if (ret) - dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(32)); } + ret = dma_set_mask(dev->dev, DMA_BIT_MASK(bits)); + if (ret) + return ret; + + ret = dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(bits)); + if (ret) + dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(32)); + ret = nouveau_ttm_global_init(drm); if (ret) return ret; -- 2.5.1