thr3ads.net - Nouveau - [Nouveau] [PATCH 0/6] Nouveau on ARM fixes [Aug 2013]

If this information is useful, please help other people find it:
Share via:

Lucas Stach

2013-Aug-28 00:00 UTC

[Nouveau] [PATCH 0/6] Nouveau on ARM fixes

This is the first set of patches to make Nouveau work
on Tegra. Those are only the obvious correctness fixes,
a lot of optimization work remains to be done, but at least
it's enough to get accel working and let the machine survive
a piglit run.

A new BO flag is introduced to allow userspace to hint the
kernel about possible optimizations.

Lucas Stach (6):
  drm/ttm: recognize ARM arch in ioprot handler
  drm/ttm: introduce dma cache sync helpers
  drm/nouveau: hook up cache sync functions
  drm/nouveau: introduce NOUVEAU_GEM_TILE_WCUS
  drm/nouveau: map IB write-combined
  drm/nouveau: use MSI interrupts

 drivers/gpu/drm/nouveau/core/include/subdev/mc.h |  1 +
 drivers/gpu/drm/nouveau/core/subdev/mc/base.c    | 17 ++++++++++++++
 drivers/gpu/drm/nouveau/nouveau_bo.c             | 15 ++++++++++++-
 drivers/gpu/drm/nouveau/nouveau_bo.h             |  1 +
 drivers/gpu/drm/nouveau/nouveau_chan.c           |  3 ++-
 drivers/gpu/drm/nouveau/nouveau_gem.c            |  5 +++++
 drivers/gpu/drm/ttm/ttm_bo_util.c                |  2 +-
 drivers/gpu/drm/ttm/ttm_tt.c                     | 25 +++++++++++++++++++++
 include/drm/ttm/ttm_bo_driver.h                  | 28 ++++++++++++++++++++++++
 include/uapi/drm/nouveau_drm.h                   |  1 +
 10 files changed, 95 insertions(+), 3 deletions(-)

-- 
1.8.3.1

Lucas Stach

2013-Aug-28 00:00 UTC

head link

[Nouveau] [PATCH 1/6] drm/ttm: recognize ARM arch in ioprot handler

Signed-off-by: Lucas Stach <dev at lynxeye.de>
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 319cf41..db15687 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -487,7 +487,7 @@ pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp)
 			pgprot_val(tmp) |= _PAGE_GUARDED;
 	}
 #endif
-#if defined(__ia64__)
+#if defined(__ia64__) || defined(__arm__)
 	if (caching_flags & TTM_PL_FLAG_WC)
 		tmp = pgprot_writecombine(tmp);
 	else
-- 
1.8.3.1

Lucas Stach

2013-Aug-28 00:00 UTC

head link

[Nouveau] [PATCH 2/6] drm/ttm: introduce dma cache sync helpers

On arches with non-coherent PCI, we need to flush caches ourselfes at
the appropriate places. Introduce two small helpers to make things easy
for TTM based drivers.

Signed-off-by: Lucas Stach <dev at lynxeye.de>
---
 drivers/gpu/drm/ttm/ttm_tt.c    | 25 +++++++++++++++++++++++++
 include/drm/ttm/ttm_bo_driver.h | 28 ++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 5e93a52..935e121 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -38,6 +38,7 @@
 #include <linux/swap.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/dma-mapping.h>
 #include <drm/drm_cache.h>
 #include <drm/drm_mem_util.h>
 #include <drm/ttm/ttm_module.h>
@@ -249,6 +250,30 @@ void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma)
 }
 EXPORT_SYMBOL(ttm_dma_tt_fini);
 
+void ttm_dma_tt_cache_sync_for_device(struct ttm_dma_tt *ttm_dma,
+				      struct device *dev)
+{
+	int i;
+
+	for (i = 0; i < ttm_dma->ttm.num_pages; i++) {
+		dma_sync_single_for_device(dev, ttm_dma->dma_address[i],
+					   PAGE_SIZE, DMA_TO_DEVICE);
+	}
+}
+EXPORT_SYMBOL(ttm_dma_tt_cache_sync_for_device);
+
+void ttm_dma_tt_cache_sync_for_cpu(struct ttm_dma_tt *ttm_dma,
+				   struct device *dev)
+{
+	int i;
+
+	for (i = 0; i < ttm_dma->ttm.num_pages; i++) {
+		dma_sync_single_for_cpu(dev, ttm_dma->dma_address[i],
+					PAGE_SIZE, DMA_FROM_DEVICE);
+	}
+}
+EXPORT_SYMBOL(ttm_dma_tt_cache_sync_for_cpu);
+
 void ttm_tt_unbind(struct ttm_tt *ttm)
 {
 	int ret;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 984fc2d..db5f3b5 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -40,6 +40,7 @@
 #include <linux/fs.h>
 #include <linux/spinlock.h>
 #include <linux/reservation.h>
+#include <linux/device.h>
 
 struct ttm_backend_func {
 	/**
@@ -681,6 +682,33 @@ extern int ttm_tt_set_placement_caching(struct ttm_tt *ttm,
uint32_t placement);
 extern int ttm_tt_swapout(struct ttm_tt *ttm,
 			  struct file *persistent_swap_storage);
 
+/**
+ * ttm_dma_tt_cache_sync_for_device:
+ *
+ * @ttm A struct ttm_tt of the type returned by ttm_dma_tt_init.
+ * @dev A struct device representing the device to which to sync.
+ *
+ * This function will flush the CPU caches on arches where snooping in the
+ * TT is not available. On fully coherent arches this will turn into an
(almost)
+ * noop. This makes sure that data written by the CPU is visible to the device.
+ */
+extern void ttm_dma_tt_cache_sync_for_device(struct ttm_dma_tt *ttm_dma,
+					     struct device *dev);
+
+/**
+ * ttm_dma_tt_cache_sync_for_cpu:
+ *
+ * @ttm A struct ttm_tt of the type returned by ttm_dma_tt_init.
+ * @dev A struct device representing the device from which to sync.
+ *
+ * This function will invalidate the CPU caches on arches where snooping in the
+ * TT is not available. On fully coherent arches this will turn into an
(almost)
+ * noop. This makes sure that the CPU does not read any stale cached or
+ * prefetched data.
+ */
+extern void ttm_dma_tt_cache_sync_for_cpu(struct ttm_dma_tt *ttm_dma,
+					  struct device *dev);
+
 /*
  * ttm_bo.c
  */
-- 
1.8.3.1

Lucas Stach

2013-Aug-28 00:00 UTC

head link

[Nouveau] [PATCH 3/6] drm/nouveau: hook up cache sync functions

Signed-off-by: Lucas Stach <dev at lynxeye.de>
---
 drivers/gpu/drm/nouveau/nouveau_bo.c  | 4 ++++
 drivers/gpu/drm/nouveau/nouveau_gem.c | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index af20fba..f4a2eb9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -411,6 +411,10 @@ nouveau_bo_validate(struct nouveau_bo *nvbo, bool
interruptible,
 {
 	int ret;
 
+	if (nvbo->bo.ttm && nvbo->bo.ttm->caching_state == tt_cached)
+		ttm_dma_tt_cache_sync_for_device((struct ttm_dma_tt *)nvbo->bo.ttm,
+			&nouveau_bdev(nvbo->bo.ttm->bdev)->dev->pdev->dev);
+
 	ret = ttm_bo_validate(&nvbo->bo, &nvbo->placement,
 			      interruptible, no_wait_gpu);
 	if (ret)
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c
b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 830cb7b..f632b92 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -901,6 +901,11 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void
*data,
 	ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait);
 	spin_unlock(&nvbo->bo.bdev->fence_lock);
 	drm_gem_object_unreference_unlocked(gem);
+
+	if (!ret && nvbo->bo.ttm &&
nvbo->bo.ttm->caching_state == tt_cached)
+		ttm_dma_tt_cache_sync_for_cpu((struct ttm_dma_tt *)nvbo->bo.ttm,
+					      &dev->pdev->dev);
+
 	return ret;
 }
 
-- 
1.8.3.1

Lucas Stach

2013-Aug-28 00:00 UTC

head link

[Nouveau] [PATCH 4/6] drm/nouveau: introduce NOUVEAU_GEM_TILE_WCUS

This flag allows userspace to give the kernel a hint that it should use
a non-snooped resource. To guarantee coherency at all times mappings
into userspace are done write combined, so userspace should avoid
reading back from those resources.

Signed-off-by: Lucas Stach <dev at lynxeye.de>
---
On x86 an optimized userspace can save up on snoop traffic in the
system, on ARM the benefits are potentially much larger, as we can save
the manual cache flush/invalidate.
---
 drivers/gpu/drm/nouveau/nouveau_bo.c | 11 ++++++++++-
 drivers/gpu/drm/nouveau/nouveau_bo.h |  1 +
 include/uapi/drm/nouveau_drm.h       |  1 +
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index f4a2eb9..c5fcbcc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -231,6 +231,12 @@ nouveau_bo_new(struct drm_device *dev, int size, int align,
 
 	nouveau_bo_fixup_align(nvbo, flags, &align, &size);
 	nvbo->bo.mem.num_pages = size >> PAGE_SHIFT;
+
+	if (tile_flags & NOUVEAU_GEM_TILE_WCUS)
+		nvbo->valid_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
+	else
+		nvbo->valid_caching = TTM_PL_MASK_CACHING;
+
 	nouveau_bo_placement_set(nvbo, flags, 0);
 
 	acc_size = ttm_bo_dma_acc_size(&drm->ttm.bdev, size,
@@ -292,7 +298,7 @@ void
 nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy)
 {
 	struct ttm_placement *pl = &nvbo->placement;
-	uint32_t flags = TTM_PL_MASK_CACHING |
+	uint32_t flags = nvbo->valid_caching |
 		(nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0);
 
 	pl->placement = nvbo->placements;
@@ -1554,6 +1560,9 @@ nouveau_bo_vma_add(struct nouveau_bo *nvbo, struct
nouveau_vm *vm,
 	if (nvbo->bo.mem.mem_type == TTM_PL_VRAM)
 		nouveau_vm_map(vma, nvbo->bo.mem.mm_node);
 	else if (nvbo->bo.mem.mem_type == TTM_PL_TT) {
+		if (!(nvbo->valid_caching & TTM_PL_FLAG_CACHED))
+			vma->access |= NV_MEM_ACCESS_NOSNOOP;
+
 		if (node->sg)
 			nouveau_vm_map_sg_table(vma, 0, size, node);
 		else
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h
b/drivers/gpu/drm/nouveau/nouveau_bo.h
index 653dbbb..2ecf8b7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -9,6 +9,7 @@ struct nouveau_bo {
 	struct ttm_buffer_object bo;
 	struct ttm_placement placement;
 	u32 valid_domains;
+	u32 valid_caching;
 	u32 placements[3];
 	u32 busy_placements[3];
 	struct ttm_bo_kmap_obj kmap;
diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
index 2a5769f..4948eee2 100644
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -36,6 +36,7 @@
 #define NOUVEAU_GEM_TILE_32BPP       0x00000002
 #define NOUVEAU_GEM_TILE_ZETA        0x00000004
 #define NOUVEAU_GEM_TILE_NONCONTIG   0x00000008
+#define NOUVEAU_GEM_TILE_WCUS        0x00000010 /* write-combined, unsnooped */
 
 struct drm_nouveau_gem_info {
 	uint32_t handle;
-- 
1.8.3.1

Lucas Stach

2013-Aug-28 00:00 UTC

head link

[Nouveau] [PATCH 5/6] drm/nouveau: map IB write-combined

Signed-off-by: Lucas Stach <dev at lynxeye.de>
---
 drivers/gpu/drm/nouveau/nouveau_chan.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c
b/drivers/gpu/drm/nouveau/nouveau_chan.c
index e84f4c3..3b54e8f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -114,7 +114,8 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct
nouveau_cli *cli,
 	if (nouveau_vram_pushbuf)
 		target = TTM_PL_FLAG_VRAM;
 
-	ret = nouveau_bo_new(drm->dev, size, 0, target, 0, 0, NULL,
+	ret = nouveau_bo_new(drm->dev, size, 0, target, 0,
+			    NOUVEAU_GEM_TILE_WCUS, NULL,
 			    &chan->push.buffer);
 	if (ret == 0) {
 		ret = nouveau_bo_pin(chan->push.buffer, target);
-- 
1.8.3.1

Lucas Stach

2013-Aug-28 00:00 UTC

head link

[Nouveau] [PATCH 6/6] drm/nouveau: use MSI interrupts

MSIs were only problematic on some old, broken chipsets. But now that we
already see systems where PCI legacy interrupts are somewhat flaky, it's
really time to move to MSIs.

Signed-off-by: Lucas Stach <dev at lynxeye.de>
---
 drivers/gpu/drm/nouveau/core/include/subdev/mc.h |  1 +
 drivers/gpu/drm/nouveau/core/subdev/mc/base.c    | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/mc.h
b/drivers/gpu/drm/nouveau/core/include/subdev/mc.h
index 9d2cd20..ce6569f 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/mc.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/mc.h
@@ -12,6 +12,7 @@ struct nouveau_mc_intr {
 struct nouveau_mc {
 	struct nouveau_subdev base;
 	const struct nouveau_mc_intr *intr_map;
+	bool use_msi;
 };
 
 static inline struct nouveau_mc *
diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/base.c
b/drivers/gpu/drm/nouveau/core/subdev/mc/base.c
index ec9cd6f..02b337e 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mc/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mc/base.c
@@ -23,6 +23,7 @@
  */
 
 #include <subdev/mc.h>
+#include <core/option.h>
 
 static irqreturn_t
 nouveau_mc_intr(int irq, void *arg)
@@ -43,6 +44,9 @@ nouveau_mc_intr(int irq, void *arg)
 		map++;
 	}
 
+	if (pmc->use_msi)
+		nv_wr08(pmc->base.base.parent, 0x00088068, 0xff);
+
 	if (intr) {
 		nv_error(pmc, "unknown intr 0x%08x\n", stat);
 	}
@@ -75,6 +79,8 @@ _nouveau_mc_dtor(struct nouveau_object *object)
 	struct nouveau_device *device = nv_device(object);
 	struct nouveau_mc *pmc = (void *)object;
 	free_irq(device->pdev->irq, pmc);
+	if (pmc->use_msi)
+		pci_disable_msi(device->pdev);
 	nouveau_subdev_destroy(&pmc->base);
 }
 
@@ -96,6 +102,17 @@ nouveau_mc_create_(struct nouveau_object *parent, struct
nouveau_object *engine,
 
 	pmc->intr_map = intr_map;
 
+	pmc->use_msi = nouveau_boolopt(device->cfgopt, "NvMSI", true);
+	if (pmc->use_msi) {
+		ret = pci_enable_msi(device->pdev);
+		if (ret) {
+			pmc->use_msi = false;
+		} else {
+			nv_wr08(device, 0x00088068, 0xff);
+			nv_info(pmc, "MSI interrupts enabled\n");
+		}
+	}
+
 	ret = request_irq(device->pdev->irq, nouveau_mc_intr,
 			  IRQF_SHARED, "nouveau", pmc);
 	if (ret < 0)
-- 
1.8.3.1

Ben Skeggs

2013-Aug-28 07:09 UTC

head link

[Nouveau] [PATCH 6/6] drm/nouveau: use MSI interrupts

On Wed, Aug 28, 2013 at 10:00 AM, Lucas Stach <dev at lynxeye.de>
wrote:> MSIs were only problematic on some old, broken chipsets. But now that we
> already see systems where PCI legacy interrupts are somewhat flaky,
it's
> really time to move to MSIs.
>
> Signed-off-by: Lucas Stach <dev at lynxeye.de>
> ---
>  drivers/gpu/drm/nouveau/core/include/subdev/mc.h |  1 +
>  drivers/gpu/drm/nouveau/core/subdev/mc/base.c    | 17 +++++++++++++++++
>  2 files changed, 18 insertions(+)
>
> diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/mc.h
b/drivers/gpu/drm/nouveau/core/include/subdev/mc.h
> index 9d2cd20..ce6569f 100644
> --- a/drivers/gpu/drm/nouveau/core/include/subdev/mc.h
> +++ b/drivers/gpu/drm/nouveau/core/include/subdev/mc.h
> @@ -12,6 +12,7 @@ struct nouveau_mc_intr {
>  struct nouveau_mc {
>         struct nouveau_subdev base;
>         const struct nouveau_mc_intr *intr_map;
> +       bool use_msi;
>  };
>
>  static inline struct nouveau_mc *
> diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/base.c
b/drivers/gpu/drm/nouveau/core/subdev/mc/base.c
> index ec9cd6f..02b337e 100644
> --- a/drivers/gpu/drm/nouveau/core/subdev/mc/base.c
> +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/base.c
> @@ -23,6 +23,7 @@
>   */
>
>  #include <subdev/mc.h>
> +#include <core/option.h>
>
>  static irqreturn_t
>  nouveau_mc_intr(int irq, void *arg)
> @@ -43,6 +44,9 @@ nouveau_mc_intr(int irq, void *arg)
>                 map++;
>         }
>
> +       if (pmc->use_msi)
> +               nv_wr08(pmc->base.base.parent, 0x00088068, 0xff);Register not present everywhere.

At the very least, the enabling of MSI should be disallowed on the
earlier chipsets where it's not supported.  Though, it's perhaps
possible that the pci_enable_msi() call will fail in all of these
cases anyway.. I'm not certain.
> +
>         if (intr) {
>                 nv_error(pmc, "unknown intr 0x%08x\n", stat);
>         }
> @@ -75,6 +79,8 @@ _nouveau_mc_dtor(struct nouveau_object *object)
>         struct nouveau_device *device = nv_device(object);
>         struct nouveau_mc *pmc = (void *)object;
>         free_irq(device->pdev->irq, pmc);
> +       if (pmc->use_msi)
> +               pci_disable_msi(device->pdev);
>         nouveau_subdev_destroy(&pmc->base);
>  }
>
> @@ -96,6 +102,17 @@ nouveau_mc_create_(struct nouveau_object *parent,
struct nouveau_object *engine,
>
>         pmc->intr_map = intr_map;
>
> +       pmc->use_msi = nouveau_boolopt(device->cfgopt,
"NvMSI", true);
> +       if (pmc->use_msi) {
> +               ret = pci_enable_msi(device->pdev);
> +               if (ret) {
> +                       pmc->use_msi = false;
> +               } else {
> +                       nv_wr08(device, 0x00088068, 0xff);
> +                       nv_info(pmc, "MSI interrupts enabled\n");
> +               }
> +       }
> +
>         ret = request_irq(device->pdev->irq, nouveau_mc_intr,
>                           IRQF_SHARED, "nouveau", pmc);
>         if (ret < 0)
> --
> 1.8.3.1
>

Ben Skeggs

2013-Aug-28 07:11 UTC

head link

[Nouveau] [PATCH 4/6] drm/nouveau: introduce NOUVEAU_GEM_TILE_WCUS

On Wed, Aug 28, 2013 at 10:00 AM, Lucas Stach <dev at lynxeye.de>
wrote:> This flag allows userspace to give the kernel a hint that it should use
> a non-snooped resource. To guarantee coherency at all times mappings
> into userspace are done write combined, so userspace should avoid
> reading back from those resources.Do any other combinations of cached/uncached and snooped/non-snooped
make any sense?  If so, perhaps we want to split the flags.
>
> Signed-off-by: Lucas Stach <dev at lynxeye.de>
> ---
> On x86 an optimized userspace can save up on snoop traffic in the
> system, on ARM the benefits are potentially much larger, as we can save
> the manual cache flush/invalidate.
> ---
>  drivers/gpu/drm/nouveau/nouveau_bo.c | 11 ++++++++++-
>  drivers/gpu/drm/nouveau/nouveau_bo.h |  1 +
>  include/uapi/drm/nouveau_drm.h       |  1 +
>  3 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c
b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index f4a2eb9..c5fcbcc 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -231,6 +231,12 @@ nouveau_bo_new(struct drm_device *dev, int size, int
align,
>
>         nouveau_bo_fixup_align(nvbo, flags, &align, &size);
>         nvbo->bo.mem.num_pages = size >> PAGE_SHIFT;
> +
> +       if (tile_flags & NOUVEAU_GEM_TILE_WCUS)
> +               nvbo->valid_caching = TTM_PL_FLAG_UNCACHED |
TTM_PL_FLAG_WC;
> +       else
> +               nvbo->valid_caching = TTM_PL_MASK_CACHING;
> +
>         nouveau_bo_placement_set(nvbo, flags, 0);
>
>         acc_size = ttm_bo_dma_acc_size(&drm->ttm.bdev, size,
> @@ -292,7 +298,7 @@ void
>  nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t
busy)
>  {
>         struct ttm_placement *pl = &nvbo->placement;
> -       uint32_t flags = TTM_PL_MASK_CACHING |
> +       uint32_t flags = nvbo->valid_caching |
>                 (nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0);
>
>         pl->placement = nvbo->placements;
> @@ -1554,6 +1560,9 @@ nouveau_bo_vma_add(struct nouveau_bo *nvbo, struct
nouveau_vm *vm,
>         if (nvbo->bo.mem.mem_type == TTM_PL_VRAM)
>                 nouveau_vm_map(vma, nvbo->bo.mem.mm_node);
>         else if (nvbo->bo.mem.mem_type == TTM_PL_TT) {
> +               if (!(nvbo->valid_caching & TTM_PL_FLAG_CACHED))
> +                       vma->access |= NV_MEM_ACCESS_NOSNOOP;
> +
>                 if (node->sg)
>                         nouveau_vm_map_sg_table(vma, 0, size, node);
>                 else
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h
b/drivers/gpu/drm/nouveau/nouveau_bo.h
> index 653dbbb..2ecf8b7 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
> @@ -9,6 +9,7 @@ struct nouveau_bo {
>         struct ttm_buffer_object bo;
>         struct ttm_placement placement;
>         u32 valid_domains;
> +       u32 valid_caching;
>         u32 placements[3];
>         u32 busy_placements[3];
>         struct ttm_bo_kmap_obj kmap;
> diff --git a/include/uapi/drm/nouveau_drm.h
b/include/uapi/drm/nouveau_drm.h
> index 2a5769f..4948eee2 100644
> --- a/include/uapi/drm/nouveau_drm.h
> +++ b/include/uapi/drm/nouveau_drm.h
> @@ -36,6 +36,7 @@
>  #define NOUVEAU_GEM_TILE_32BPP       0x00000002
>  #define NOUVEAU_GEM_TILE_ZETA        0x00000004
>  #define NOUVEAU_GEM_TILE_NONCONTIG   0x00000008
> +#define NOUVEAU_GEM_TILE_WCUS        0x00000010 /* write-combined,
unsnooped */
>
>  struct drm_nouveau_gem_info {
>         uint32_t handle;
> --
> 1.8.3.1
>

Thierry Reding

2013-Aug-28 07:50 UTC

head link

[Nouveau] [PATCH 0/6] Nouveau on ARM fixes

On Wed, Aug 28, 2013 at 02:00:44AM +0200, Lucas Stach
wrote:> This is the first set of patches to make Nouveau work
> on Tegra.
Perhaps you should clarify that this patch series allows discrete GPUs
to be used via Tegra's PCIe interface.

People might misinterpret...

Thierry
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 836 bytes
Desc: not available
URL:
<http://lists.freedesktop.org/archives/nouveau/attachments/20130828/06f67182/attachment.pgp>

Ben Skeggs

2013-Aug-28 08:09 UTC

head link

[Nouveau] [PATCH 0/6] Nouveau on ARM fixes

On Wed, Aug 28, 2013 at 5:50 PM, Thierry Reding
<thierry.reding at gmail.com> wrote:> On Wed, Aug 28, 2013 at 02:00:44AM +0200, Lucas Stach wrote:
>> This is the first set of patches to make Nouveau work
>> on Tegra.
>
> Perhaps you should clarify that this patch series allows discrete GPUs
> to be used via Tegra's PCIe interface.
>
> People might misinterpret...Hah!  Too late, quality journalism already hard at work ;)

Ben.>
> Thierry
>
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
>

Konrad Rzeszutek Wilk

2013-Aug-28 16:43 UTC

head link

[Nouveau] [PATCH 3/6] drm/nouveau: hook up cache sync functions

On Wed, Aug 28, 2013 at 02:00:47AM +0200, Lucas Stach
wrote:> Signed-off-by: Lucas Stach <dev at lynxeye.de>
> ---
>  drivers/gpu/drm/nouveau/nouveau_bo.c  | 4 ++++
>  drivers/gpu/drm/nouveau/nouveau_gem.c | 5 +++++
>  2 files changed, 9 insertions(+)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c
b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index af20fba..f4a2eb9 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -411,6 +411,10 @@ nouveau_bo_validate(struct nouveau_bo *nvbo, bool
interruptible,
>  {
>  	int ret;
>  
> +	if (nvbo->bo.ttm && nvbo->bo.ttm->caching_state ==
tt_cached)
You don't want to do it also for tt_wc ?
> +		ttm_dma_tt_cache_sync_for_device((struct ttm_dma_tt *)nvbo->bo.ttm,
> +			&nouveau_bdev(nvbo->bo.ttm->bdev)->dev->pdev->dev);
> +
>  	ret = ttm_bo_validate(&nvbo->bo, &nvbo->placement,
>  			      interruptible, no_wait_gpu);
>  	if (ret)
> diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c
b/drivers/gpu/drm/nouveau/nouveau_gem.c
> index 830cb7b..f632b92 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_gem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
> @@ -901,6 +901,11 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev,
void *data,
>  	ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait);
>  	spin_unlock(&nvbo->bo.bdev->fence_lock);
>  	drm_gem_object_unreference_unlocked(gem);
> +
> +	if (!ret && nvbo->bo.ttm &&
nvbo->bo.ttm->caching_state == tt_cached)
Ditto? > +		ttm_dma_tt_cache_sync_for_cpu((struct ttm_dma_tt *)nvbo->bo.ttm,
> +					      &dev->pdev->dev);
> +
>  	return ret;
>  }
>  
> -- 
> 1.8.3.1
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel

Possibly Parallel Threads

Search for more possibly parallel threads

Nouveau - Aug 2013 - [PATCH 0/6] Nouveau on ARM fixes

[Nouveau] [PATCH 0/6] Nouveau on ARM fixes

[Nouveau] [PATCH 1/6] drm/ttm: recognize ARM arch in ioprot handler

[Nouveau] [PATCH 2/6] drm/ttm: introduce dma cache sync helpers

[Nouveau] [PATCH 3/6] drm/nouveau: hook up cache sync functions

[Nouveau] [PATCH 4/6] drm/nouveau: introduce NOUVEAU_GEM_TILE_WCUS

[Nouveau] [PATCH 5/6] drm/nouveau: map IB write-combined

[Nouveau] [PATCH 6/6] drm/nouveau: use MSI interrupts

[Nouveau] [PATCH 6/6] drm/nouveau: use MSI interrupts

[Nouveau] [PATCH 4/6] drm/nouveau: introduce NOUVEAU_GEM_TILE_WCUS

[Nouveau] [PATCH 0/6] Nouveau on ARM fixes

[Nouveau] [PATCH 0/6] Nouveau on ARM fixes

[Nouveau] [PATCH 3/6] drm/nouveau: hook up cache sync functions

Possibly Parallel Threads