Francisco Jerez
2009-Dec-11 18:33 UTC
[Nouveau] [PATCH 1/3] drm/nouveau: Pre-G80 tiling support.
Signed-off-by: Francisco Jerez <currojerez at riseup.net>
---
drivers/gpu/drm/nouveau/nouveau_drv.h | 23 +++++
drivers/gpu/drm/nouveau/nouveau_reg.h | 16 ++--
drivers/gpu/drm/nouveau/nouveau_state.c | 8 ++
drivers/gpu/drm/nouveau/nv10_fb.c | 32 ++++++--
drivers/gpu/drm/nouveau/nv10_graph.c | 47 ++++++++---
drivers/gpu/drm/nouveau/nv20_graph.c | 80 +++++++++++--------
drivers/gpu/drm/nouveau/nv40_fb.c | 53 ++++++++-----
drivers/gpu/drm/nouveau/nv40_graph.c | 135 +++++++++++++++----------------
8 files changed, 247 insertions(+), 147 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h
b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 88b4c7b..2730497 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -276,8 +276,13 @@ struct nouveau_timer_engine {
};
struct nouveau_fb_engine {
+ int num_tiles;
+
int (*init)(struct drm_device *dev);
void (*takedown)(struct drm_device *dev);
+
+ void (*set_region_tiling)(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch);
};
struct nouveau_fifo_engine {
@@ -328,6 +333,9 @@ struct nouveau_pgraph_engine {
void (*destroy_context)(struct nouveau_channel *);
int (*load_context)(struct nouveau_channel *);
int (*unload_context)(struct drm_device *);
+
+ void (*set_region_tiling)(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch);
};
struct nouveau_engine {
@@ -876,10 +884,16 @@ extern void nv04_fb_takedown(struct drm_device *);
/* nv10_fb.c */
extern int nv10_fb_init(struct drm_device *);
extern void nv10_fb_takedown(struct drm_device *);
+extern void nv10_fb_set_region_tiling(struct drm_device *dev, int i,
+ uint32_t addr, uint32_t size,
+ uint32_t pitch);
/* nv40_fb.c */
extern int nv40_fb_init(struct drm_device *);
extern void nv40_fb_takedown(struct drm_device *);
+extern void nv40_fb_set_region_tiling(struct drm_device *dev, int i,
+ uint32_t addr, uint32_t size,
+ uint32_t pitch);
/* nv04_fifo.c */
extern int nv04_fifo_init(struct drm_device *);
@@ -938,6 +952,9 @@ extern void nv10_graph_destroy_context(struct
nouveau_channel *);
extern int nv10_graph_load_context(struct nouveau_channel *);
extern int nv10_graph_unload_context(struct drm_device *);
extern void nv10_graph_context_switch(struct drm_device *);
+extern void nv10_graph_set_region_tiling(struct drm_device *dev, int i,
+ uint32_t addr, uint32_t size,
+ uint32_t pitch);
/* nv20_graph.c */
extern struct nouveau_pgraph_object_class nv20_graph_grclass[];
@@ -949,6 +966,9 @@ extern int nv20_graph_unload_context(struct drm_device *);
extern int nv20_graph_init(struct drm_device *);
extern void nv20_graph_takedown(struct drm_device *);
extern int nv30_graph_init(struct drm_device *);
+extern void nv20_graph_set_region_tiling(struct drm_device *dev, int i,
+ uint32_t addr, uint32_t size,
+ uint32_t pitch);
/* nv40_graph.c */
extern struct nouveau_pgraph_object_class nv40_graph_grclass[];
@@ -962,6 +982,9 @@ extern int nv40_graph_unload_context(struct drm_device *);
extern int nv40_grctx_init(struct drm_device *);
extern void nv40_grctx_fini(struct drm_device *);
extern void nv40_grctx_vals_load(struct drm_device *, struct nouveau_gpuobj *);
+extern void nv40_graph_set_region_tiling(struct drm_device *dev, int i,
+ uint32_t addr, uint32_t size,
+ uint32_t pitch);
/* nv50_graph.c */
extern struct nouveau_pgraph_object_class nv50_graph_grclass[];
diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h
b/drivers/gpu/drm/nouveau/nouveau_reg.h
index fa1b0e7..251f1b3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_reg.h
+++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
@@ -349,19 +349,19 @@
#define NV04_PGRAPH_BLEND 0x00400824
#define NV04_PGRAPH_STORED_FMT 0x00400830
#define NV04_PGRAPH_PATT_COLORRAM 0x00400900
-#define NV40_PGRAPH_TILE0(i) (0x00400900 +
(i*16))
-#define NV40_PGRAPH_TLIMIT0(i) (0x00400904 +
(i*16))
-#define NV40_PGRAPH_TSIZE0(i) (0x00400908 +
(i*16))
-#define NV40_PGRAPH_TSTATUS0(i) (0x0040090C +
(i*16))
+#define NV20_PGRAPH_TILE(i) (0x00400900 +
(i*16))
+#define NV20_PGRAPH_TLIMIT(i) (0x00400904 +
(i*16))
+#define NV20_PGRAPH_TSIZE(i) (0x00400908 +
(i*16))
+#define NV20_PGRAPH_TSTATUS(i) (0x0040090C +
(i*16))
#define NV10_PGRAPH_TILE(i) (0x00400B00 +
(i*16))
#define NV10_PGRAPH_TLIMIT(i) (0x00400B04 +
(i*16))
#define NV10_PGRAPH_TSIZE(i) (0x00400B08 +
(i*16))
#define NV10_PGRAPH_TSTATUS(i) (0x00400B0C +
(i*16))
#define NV04_PGRAPH_U_RAM 0x00400D00
-#define NV47_PGRAPH_TILE0(i) (0x00400D00 +
(i*16))
-#define NV47_PGRAPH_TLIMIT0(i) (0x00400D04 +
(i*16))
-#define NV47_PGRAPH_TSIZE0(i) (0x00400D08 +
(i*16))
-#define NV47_PGRAPH_TSTATUS0(i) (0x00400D0C +
(i*16))
+#define NV47_PGRAPH_TILE(i) (0x00400D00 +
(i*16))
+#define NV47_PGRAPH_TLIMIT(i) (0x00400D04 +
(i*16))
+#define NV47_PGRAPH_TSIZE(i) (0x00400D08 +
(i*16))
+#define NV47_PGRAPH_TSTATUS(i) (0x00400D0C +
(i*16))
#define NV04_PGRAPH_V_RAM 0x00400D40
#define NV04_PGRAPH_W_RAM 0x00400D80
#define NV10_PGRAPH_COMBINER0_IN_ALPHA 0x00400E40
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c
b/drivers/gpu/drm/nouveau/nouveau_state.c
index 2ed41d3..4342867 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -100,6 +100,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->timer.takedown = nv04_timer_takedown;
engine->fb.init = nv10_fb_init;
engine->fb.takedown = nv10_fb_takedown;
+ engine->fb.set_region_tiling = nv10_fb_set_region_tiling;
engine->graph.grclass = nv10_graph_grclass;
engine->graph.init = nv10_graph_init;
engine->graph.takedown = nv10_graph_takedown;
@@ -109,6 +110,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->graph.fifo_access = nv04_graph_fifo_access;
engine->graph.load_context = nv10_graph_load_context;
engine->graph.unload_context = nv10_graph_unload_context;
+ engine->graph.set_region_tiling = nv10_graph_set_region_tiling;
engine->fifo.channels = 32;
engine->fifo.init = nv10_fifo_init;
engine->fifo.takedown = nouveau_stub_takedown;
@@ -139,6 +141,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->timer.takedown = nv04_timer_takedown;
engine->fb.init = nv10_fb_init;
engine->fb.takedown = nv10_fb_takedown;
+ engine->fb.set_region_tiling = nv10_fb_set_region_tiling;
engine->graph.grclass = nv20_graph_grclass;
engine->graph.init = nv20_graph_init;
engine->graph.takedown = nv20_graph_takedown;
@@ -148,6 +151,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->graph.fifo_access = nv04_graph_fifo_access;
engine->graph.load_context = nv20_graph_load_context;
engine->graph.unload_context = nv20_graph_unload_context;
+ engine->graph.set_region_tiling = nv20_graph_set_region_tiling;
engine->fifo.channels = 32;
engine->fifo.init = nv10_fifo_init;
engine->fifo.takedown = nouveau_stub_takedown;
@@ -178,6 +182,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->timer.takedown = nv04_timer_takedown;
engine->fb.init = nv10_fb_init;
engine->fb.takedown = nv10_fb_takedown;
+ engine->fb.set_region_tiling = nv10_fb_set_region_tiling;
engine->graph.grclass = nv30_graph_grclass;
engine->graph.init = nv30_graph_init;
engine->graph.takedown = nv20_graph_takedown;
@@ -187,6 +192,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->graph.destroy_context = nv20_graph_destroy_context;
engine->graph.load_context = nv20_graph_load_context;
engine->graph.unload_context = nv20_graph_unload_context;
+ engine->graph.set_region_tiling = nv20_graph_set_region_tiling;
engine->fifo.channels = 32;
engine->fifo.init = nv10_fifo_init;
engine->fifo.takedown = nouveau_stub_takedown;
@@ -218,6 +224,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->timer.takedown = nv04_timer_takedown;
engine->fb.init = nv40_fb_init;
engine->fb.takedown = nv40_fb_takedown;
+ engine->fb.set_region_tiling = nv40_fb_set_region_tiling;
engine->graph.grclass = nv40_graph_grclass;
engine->graph.init = nv40_graph_init;
engine->graph.takedown = nv40_graph_takedown;
@@ -227,6 +234,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->graph.destroy_context = nv40_graph_destroy_context;
engine->graph.load_context = nv40_graph_load_context;
engine->graph.unload_context = nv40_graph_unload_context;
+ engine->graph.set_region_tiling = nv40_graph_set_region_tiling;
engine->fifo.channels = 32;
engine->fifo.init = nv40_fifo_init;
engine->fifo.takedown = nouveau_stub_takedown;
diff --git a/drivers/gpu/drm/nouveau/nv10_fb.c
b/drivers/gpu/drm/nouveau/nv10_fb.c
index 79e2d10..cc5cda4 100644
--- a/drivers/gpu/drm/nouveau/nv10_fb.c
+++ b/drivers/gpu/drm/nouveau/nv10_fb.c
@@ -3,17 +3,37 @@
#include "nouveau_drv.h"
#include "nouveau_drm.h"
+void
+nv10_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch) {
+ if (dev_priv->card_type >= NV_20)
+ addr |= 1;
+ else
+ addr |= 1 << 31;
+ }
+
+ nv_wr32(dev, NV10_PFB_TLIMIT(i), limit);
+ nv_wr32(dev, NV10_PFB_TSIZE(i), pitch);
+ nv_wr32(dev, NV10_PFB_TILE(i), addr);
+}
+
int
nv10_fb_init(struct drm_device *dev)
{
- uint32_t fb_bar_size;
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
int i;
- fb_bar_size = drm_get_resource_len(dev, 0) - 1;
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- nv_wr32(dev, NV10_PFB_TILE(i), 0);
- nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size);
- }
+ pfb->num_tiles = NV10_PFB_TILE__SIZE;
+
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < pfb->num_tiles; i++)
+ pfb->set_region_tiling(dev, i, 0, 0, 0);
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nv10_graph.c
b/drivers/gpu/drm/nouveau/nv10_graph.c
index 6bf6804..2aeac8b 100644
--- a/drivers/gpu/drm/nouveau/nv10_graph.c
+++ b/drivers/gpu/drm/nouveau/nv10_graph.c
@@ -808,6 +808,39 @@ void nv10_graph_destroy_context(struct nouveau_channel
*chan)
chan->pgraph_ctx = NULL;
}
+static void
+nv10_graph_write_tile(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch)
+ addr |= 1 << 31;
+
+ nv_wr32(dev, NV10_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV10_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV10_PGRAPH_TILE(i), addr);
+}
+
+void
+nv10_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+ struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+
+ pfifo->reassign(dev, false);
+ pgraph->fifo_access(dev, false);
+
+ nouveau_wait_for_idle(dev);
+
+ nv10_graph_write_tile(dev, i, addr, size, pitch);
+
+ pgraph->fifo_access(dev, true);
+ pfifo->reassign(dev, true);
+}
+
int nv10_graph_init(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -836,17 +869,9 @@ int nv10_graph_init(struct drm_device *dev)
} else
nv_wr32(dev, NV10_PGRAPH_DEBUG_4, 0x00000000);
- /* copy tile info from PFB */
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- nv_wr32(dev, NV10_PGRAPH_TILE(i),
- nv_rd32(dev, NV10_PFB_TILE(i)));
- nv_wr32(dev, NV10_PGRAPH_TLIMIT(i),
- nv_rd32(dev, NV10_PFB_TLIMIT(i)));
- nv_wr32(dev, NV10_PGRAPH_TSIZE(i),
- nv_rd32(dev, NV10_PFB_TSIZE(i)));
- nv_wr32(dev, NV10_PGRAPH_TSTATUS(i),
- nv_rd32(dev, NV10_PFB_TSTATUS(i)));
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
+ nv10_graph_write_tile(dev, i, 0, 0, 0);
nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH1, 0x00000000);
nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH2, 0x00000000);
diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c
b/drivers/gpu/drm/nouveau/nv20_graph.c
index 18ba74f..55d1a8e 100644
--- a/drivers/gpu/drm/nouveau/nv20_graph.c
+++ b/drivers/gpu/drm/nouveau/nv20_graph.c
@@ -514,6 +514,46 @@ nv20_graph_rdi(struct drm_device *dev)
nouveau_wait_for_idle(dev);
}
+static void
+nv20_graph_write_tile(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch)
+ addr |= 1;
+
+ nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
+
+ nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + 4 * i);
+ nv_wr32(dev, NV10_PGRAPH_RDI_DATA, limit);
+ nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + 4 * i);
+ nv_wr32(dev, NV10_PGRAPH_RDI_DATA, pitch);
+ nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i);
+ nv_wr32(dev, NV10_PGRAPH_RDI_DATA, addr);
+}
+
+void
+nv20_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+ struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+
+ pfifo->reassign(dev, false);
+ pgraph->fifo_access(dev, false);
+
+ nouveau_wait_for_idle(dev);
+
+ nv20_graph_write_tile(dev, i, addr, size, pitch);
+
+ pgraph->fifo_access(dev, true);
+ pfifo->reassign(dev, true);
+}
+
int
nv20_graph_init(struct drm_device *dev)
{
@@ -572,27 +612,10 @@ nv20_graph_init(struct drm_device *dev)
nv_wr32(dev, NV10_PGRAPH_RDI_DATA , 0x00000030);
}
- /* copy tile info from PFB */
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- nv_wr32(dev, 0x00400904 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TLIMIT(i)));
- /* which is NV40_PGRAPH_TLIMIT0(i) ?? */
- nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + i * 4);
- nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
- nv_rd32(dev, NV10_PFB_TLIMIT(i)));
- nv_wr32(dev, 0x00400908 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TSIZE(i)));
- /* which is NV40_PGRAPH_TSIZE0(i) ?? */
- nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + i * 4);
- nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
- nv_rd32(dev, NV10_PFB_TSIZE(i)));
- nv_wr32(dev, 0x00400900 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TILE(i)));
- /* which is NV40_PGRAPH_TILE0(i) ?? */
- nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + i * 4);
- nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
- nv_rd32(dev, NV10_PFB_TILE(i)));
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
+ nv20_graph_write_tile(dev, i, 0, 0, 0);
+
for (i = 0; i < 8; i++) {
nv_wr32(dev, 0x400980 + i * 4, nv_rd32(dev, 0x100300 + i * 4));
nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0090 + i * 4);
@@ -704,18 +727,9 @@ nv30_graph_init(struct drm_device *dev)
nv_wr32(dev, 0x4000c0, 0x00000016);
- /* copy tile info from PFB */
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- nv_wr32(dev, 0x00400904 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TLIMIT(i)));
- /* which is NV40_PGRAPH_TLIMIT0(i) ?? */
- nv_wr32(dev, 0x00400908 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TSIZE(i)));
- /* which is NV40_PGRAPH_TSIZE0(i) ?? */
- nv_wr32(dev, 0x00400900 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TILE(i)));
- /* which is NV40_PGRAPH_TILE0(i) ?? */
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
+ nv20_graph_write_tile(dev, i, 0, 0, 0);
nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
nv_wr32(dev, NV10_PGRAPH_STATE , 0xFFFFFFFF);
diff --git a/drivers/gpu/drm/nouveau/nv40_fb.c
b/drivers/gpu/drm/nouveau/nv40_fb.c
index ca1d271..3cd07d8 100644
--- a/drivers/gpu/drm/nouveau/nv40_fb.c
+++ b/drivers/gpu/drm/nouveau/nv40_fb.c
@@ -3,12 +3,37 @@
#include "nouveau_drv.h"
#include "nouveau_drm.h"
+void
+nv40_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch)
+ addr |= 1;
+
+ switch (dev_priv->chipset) {
+ case 0x40:
+ nv_wr32(dev, NV10_PFB_TLIMIT(i), limit);
+ nv_wr32(dev, NV10_PFB_TSIZE(i), pitch);
+ nv_wr32(dev, NV10_PFB_TILE(i), addr);
+ break;
+
+ default:
+ nv_wr32(dev, NV40_PFB_TLIMIT(i), limit);
+ nv_wr32(dev, NV40_PFB_TSIZE(i), pitch);
+ nv_wr32(dev, NV40_PFB_TILE(i), addr);
+ break;
+ }
+}
+
int
nv40_fb_init(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
- uint32_t fb_bar_size, tmp;
- int num_tiles;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ uint32_t tmp;
int i;
/* This is strictly a NV4x register (don't know about NV5x). */
@@ -23,35 +48,23 @@ nv40_fb_init(struct drm_device *dev)
case 0x45:
tmp = nv_rd32(dev, NV10_PFB_CLOSE_PAGE2);
nv_wr32(dev, NV10_PFB_CLOSE_PAGE2, tmp & ~(1 << 15));
- num_tiles = NV10_PFB_TILE__SIZE;
+ pfb->num_tiles = NV10_PFB_TILE__SIZE;
break;
case 0x46: /* G72 */
case 0x47: /* G70 */
case 0x49: /* G71 */
case 0x4b: /* G73 */
case 0x4c: /* C51 (G7X version) */
- num_tiles = NV40_PFB_TILE__SIZE_1;
+ pfb->num_tiles = NV40_PFB_TILE__SIZE_1;
break;
default:
- num_tiles = NV40_PFB_TILE__SIZE_0;
+ pfb->num_tiles = NV40_PFB_TILE__SIZE_0;
break;
}
- fb_bar_size = drm_get_resource_len(dev, 0) - 1;
- switch (dev_priv->chipset) {
- case 0x40:
- for (i = 0; i < num_tiles; i++) {
- nv_wr32(dev, NV10_PFB_TILE(i), 0);
- nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size);
- }
- break;
- default:
- for (i = 0; i < num_tiles; i++) {
- nv_wr32(dev, NV40_PFB_TILE(i), 0);
- nv_wr32(dev, NV40_PFB_TLIMIT(i), fb_bar_size);
- }
- break;
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < pfb->num_tiles; i++)
+ pfb->set_region_tiling(dev, i, 0, 0, 0);
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c
b/drivers/gpu/drm/nouveau/nv40_graph.c
index d3e0a2a..2435d49 100644
--- a/drivers/gpu/drm/nouveau/nv40_graph.c
+++ b/drivers/gpu/drm/nouveau/nv40_graph.c
@@ -333,6 +333,67 @@ nv40_grctx_vals_load(struct drm_device *dev, struct
nouveau_gpuobj *ctx)
nv_wo32(dev, ctx, cv->data[i].offset, cv->data[i].value);
}
+static void
+nv40_graph_write_tile(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch)
+ addr |= 1;
+
+ switch (dev_priv->chipset) {
+ case 0x44:
+ case 0x4a:
+ case 0x4e:
+ nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
+ break;
+
+ case 0x46:
+ case 0x47:
+ case 0x49:
+ case 0x4b:
+ nv_wr32(dev, NV47_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV47_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV47_PGRAPH_TILE(i), addr);
+ nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch);
+ nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit);
+ nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr);
+ break;
+
+ default:
+ nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
+ nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch);
+ nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit);
+ nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr);
+ break;
+ }
+}
+
+void
+nv40_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+ struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+
+ pfifo->reassign(dev, false);
+ pgraph->fifo_access(dev, false);
+
+ nouveau_wait_for_idle(dev);
+
+ nv40_graph_write_tile(dev, i, addr, size, pitch);
+
+ pgraph->fifo_access(dev, true);
+ pfifo->reassign(dev, true);
+}
+
/*
* G70 0x47
* G71 0x49
@@ -347,7 +408,8 @@ nv40_graph_init(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv (struct drm_nouveau_private
*)dev->dev_private;
- uint32_t vramsz, tmp;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ uint32_t vramsz;
int i, j;
nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) &
@@ -425,74 +487,9 @@ nv40_graph_init(struct drm_device *dev)
nv_wr32(dev, 0x400b38, 0x2ffff800);
nv_wr32(dev, 0x400b3c, 0x00006000);
- /* copy tile info from PFB */
- switch (dev_priv->chipset) {
- case 0x40: /* vanilla NV40 */
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- tmp = nv_rd32(dev, NV10_PFB_TILE(i));
- nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp);
- tmp = nv_rd32(dev, NV10_PFB_TLIMIT(i));
- nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp);
- tmp = nv_rd32(dev, NV10_PFB_TSIZE(i));
- nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp);
- tmp = nv_rd32(dev, NV10_PFB_TSTATUS(i));
- nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp);
- }
- break;
- case 0x44:
- case 0x4a:
- case 0x4e: /* NV44-based cores don't have 0x406900? */
- for (i = 0; i < NV40_PFB_TILE__SIZE_0; i++) {
- tmp = nv_rd32(dev, NV40_PFB_TILE(i));
- nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i));
- nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSIZE(i));
- nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i));
- nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp);
- }
- break;
- case 0x46:
- case 0x47:
- case 0x49:
- case 0x4b: /* G7X-based cores */
- for (i = 0; i < NV40_PFB_TILE__SIZE_1; i++) {
- tmp = nv_rd32(dev, NV40_PFB_TILE(i));
- nv_wr32(dev, NV47_PGRAPH_TILE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i));
- nv_wr32(dev, NV47_PGRAPH_TLIMIT0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSIZE(i));
- nv_wr32(dev, NV47_PGRAPH_TSIZE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i));
- nv_wr32(dev, NV47_PGRAPH_TSTATUS0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp);
- }
- break;
- default: /* everything else */
- for (i = 0; i < NV40_PFB_TILE__SIZE_0; i++) {
- tmp = nv_rd32(dev, NV40_PFB_TILE(i));
- nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i));
- nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSIZE(i));
- nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i));
- nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp);
- }
- break;
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < pfb->num_tiles; i++)
+ nv40_graph_write_tile(dev, i, 0, 0, 0);
/* begin RAM config */
vramsz = drm_get_resource_len(dev, 0) - 1;
--
1.6.4.4
Francisco Jerez
2009-Dec-11 18:33 UTC
[Nouveau] [PATCH 2/3] drm/nouveau: Make the MM aware of pre-G80 tiling.
Signed-off-by: Francisco Jerez <currojerez at riseup.net>
---
drivers/gpu/drm/nouveau/nouveau_bo.c | 221 +++++++++++++++++++++++----------
drivers/gpu/drm/nouveau/nouveau_drv.h | 22 ++++
drivers/gpu/drm/nouveau/nouveau_mem.c | 80 ++++++++++++
3 files changed, 258 insertions(+), 65 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 320a14b..4616bae 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -37,6 +37,7 @@ static void
nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
{
struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+ struct drm_device *dev = dev_priv->dev;
struct nouveau_bo *nvbo = nouveau_bo(bo);
ttm_bo_kunmap(&nvbo->kmap);
@@ -44,12 +45,83 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
if (unlikely(nvbo->gem))
DRM_ERROR("bo %p still attached to GEM object\n", bo);
+ if (nvbo->tile)
+ nv10_mem_expire_tiling(dev, nvbo->tile, NULL);
+
spin_lock(&dev_priv->ttm.bo_list_lock);
list_del(&nvbo->head);
spin_unlock(&dev_priv->ttm.bo_list_lock);
kfree(nvbo);
}
+static void
+nouveau_bo_fixup_align(struct drm_device *dev,
+ uint32_t tile_mode, uint32_t tile_flags,
+ int *align, int *size)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+ /*
+ * Some of the tile_flags have a periodic structure of N*4096 bytes,
+ * align to to that as well as the page size. Overallocate memory to
+ * avoid corruption of other buffer objects.
+ */
+ if (dev_priv->card_type == NV_50) {
+ switch (tile_flags) {
+ case 0x1800:
+ case 0x2800:
+ case 0x4800:
+ case 0x7a00:
+ if (dev_priv->chipset >= 0xA0) {
+ /* This is based on high end cards with 448 bits
+ * memory bus, could be different elsewhere.*/
+ *size += 6 * 28672;
+ /* 8 * 28672 is the actual alignment requirement
+ * but we must also align to page size. */
+ *align = 2 * 8 * 28672;
+ } else if (dev_priv->chipset >= 0x90) {
+ *size += 3 * 16384;
+ *align = 12 * 16834;
+ } else {
+ *size += 3 * 8192;
+ /* 12 * 8192 is the actual alignment requirement
+ * but we must also align to page size. */
+ *align = 2 * 12 * 8192;
+ }
+ break;
+ default:
+ break;
+ }
+
+ } else {
+ if (tile_mode) {
+ if (dev_priv->chipset >= 0x40) {
+ *align = 65536;
+ *size = roundup(*size, 64 * tile_mode);
+
+ } else if (dev_priv->chipset >= 0x30) {
+ *align = 32768;
+ *size = roundup(*size, 32 * tile_mode);
+
+ } else if (dev_priv->chipset >= 0x20) {
+ *align = 16384;
+ *size = roundup(*size, 32 * tile_mode);
+
+ } else if (dev_priv->chipset >= 0x10) {
+ *align = 4096;
+ *size = roundup(*size, 16 * tile_mode);
+ }
+ }
+ }
+
+ *size = ALIGN(*size, PAGE_SIZE);
+
+ if (dev_priv->card_type == NV_50) {
+ *size = ALIGN(*size, 65536);
+ *align = max(65536, *align);
+ }
+}
+
int
nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
int size, int align, uint32_t flags, uint32_t tile_mode,
@@ -70,46 +142,9 @@ nouveau_bo_new(struct drm_device *dev, struct
nouveau_channel *chan,
nvbo->tile_mode = tile_mode;
nvbo->tile_flags = tile_flags;
- /*
- * Some of the tile_flags have a periodic structure of N*4096 bytes,
- * align to to that as well as the page size. Overallocate memory to
- * avoid corruption of other buffer objects.
- */
- switch (tile_flags) {
- case 0x1800:
- case 0x2800:
- case 0x4800:
- case 0x7a00:
- if (dev_priv->chipset >= 0xA0) {
- /* This is based on high end cards with 448 bits
- * memory bus, could be different elsewhere.*/
- size += 6 * 28672;
- /* 8 * 28672 is the actual alignment requirement,
- * but we must also align to page size. */
- align = 2 * 8 * 28672;
- } else if (dev_priv->chipset >= 0x90) {
- size += 3 * 16384;
- align = 12 * 16834;
- } else {
- size += 3 * 8192;
- /* 12 * 8192 is the actual alignment requirement,
- * but we must also align to page size. */
- align = 2 * 12 * 8192;
- }
- break;
- default:
- break;
- }
-
+ nouveau_bo_fixup_align(dev, tile_mode, tile_flags, &align, &size);
align >>= PAGE_SHIFT;
- size = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
- if (dev_priv->card_type == NV_50) {
- size = (size + 65535) & ~65535;
- if (align < (65536 / PAGE_SIZE))
- align = (65536 / PAGE_SIZE);
- }
-
if (flags & TTM_PL_FLAG_VRAM)
nvbo->placements[n++] = TTM_PL_FLAG_VRAM | TTM_PL_MASK_CACHING;
if (flags & TTM_PL_FLAG_TT)
@@ -408,6 +443,7 @@ nouveau_bo_evict_flags(struct ttm_buffer_object *bo, struct
ttm_placement *pl)
/* GPU-assisted copy using NV_MEMORY_TO_MEMORY_FORMAT, can access
* TTM_PL_{VRAM,TT} directly.
*/
+
static int
nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
struct nouveau_bo *nvbo, bool evict, bool no_wait,
@@ -442,11 +478,12 @@ nouveau_bo_mem_ctxdma(struct nouveau_bo *nvbo, struct
nouveau_channel *chan,
}
static int
-nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, int no_wait,
- struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
+ int no_wait, struct ttm_mem_reg *new_mem)
{
struct nouveau_bo *nvbo = nouveau_bo(bo);
struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+ struct ttm_mem_reg *old_mem = &bo->mem;
struct nouveau_channel *chan;
uint64_t src_offset, dst_offset;
uint32_t page_count;
@@ -549,7 +586,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool
evict, bool intr,
if (ret)
goto out;
- ret = nouveau_bo_move_m2mf(bo, true, no_wait, &bo->mem, &tmp_mem);
+ ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait, &tmp_mem);
if (ret)
goto out;
@@ -587,7 +624,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool
evict, bool intr,
if (ret)
goto out;
- ret = nouveau_bo_move_m2mf(bo, true, no_wait, &bo->mem, new_mem);
+ ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait, new_mem);
if (ret)
goto out;
@@ -602,51 +639,105 @@ out:
}
static int
-nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
- bool no_wait, struct ttm_mem_reg *new_mem)
+nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem,
+ struct nouveau_tile_reg **new_tile)
{
struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
- struct nouveau_bo *nvbo = nouveau_bo(bo);
struct drm_device *dev = dev_priv->dev;
- struct ttm_mem_reg *old_mem = &bo->mem;
+ struct nouveau_bo *nvbo = nouveau_bo(bo);
+ uint64_t offset;
int ret;
- if (dev_priv->card_type == NV_50 && new_mem->mem_type ==
TTM_PL_VRAM &&
- !nvbo->no_vm) {
- uint64_t offset = new_mem->mm_node->start << PAGE_SHIFT;
+ if (nvbo->no_vm || new_mem->mem_type != TTM_PL_VRAM) {
+ /* Nothing to do. */
+ *new_tile = NULL;
+ return 0;
+ }
+
+ offset = new_mem->mm_node->start << PAGE_SHIFT;
+ if (dev_priv->card_type == NV_50) {
ret = nv50_mem_vm_bind_linear(dev,
offset + dev_priv->vm_vram_base,
new_mem->size, nvbo->tile_flags,
offset);
if (ret)
return ret;
+
+ } else if (dev_priv->card_type >= NV_10) {
+ *new_tile = nv10_mem_set_tiling(dev, offset, new_mem->size,
+ nvbo->tile_mode);
}
- if (dev_priv->init_state != NOUVEAU_CARD_INIT_DONE)
- return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+ return 0;
+}
+static void
+nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
+ struct nouveau_tile_reg *new_tile,
+ struct nouveau_tile_reg **old_tile)
+{
+ struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+ struct drm_device *dev = dev_priv->dev;
+
+ if (dev_priv->card_type >= NV_10 &&
+ dev_priv->card_type < NV_50) {
+ if (*old_tile)
+ nv10_mem_expire_tiling(dev, *old_tile, bo->sync_obj);
+
+ *old_tile = new_tile;
+ }
+}
+
+static int
+nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
+ bool no_wait, struct ttm_mem_reg *new_mem)
+{
+ struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+ struct nouveau_bo *nvbo = nouveau_bo(bo);
+ struct ttm_mem_reg *old_mem = &bo->mem;
+ struct nouveau_tile_reg *new_tile = NULL;
+ int ret = 0;
+
+ ret = nouveau_bo_vm_bind(bo, new_mem, &new_tile);
+ if (ret)
+ return ret;
+
+ /* Software copy if the card isn't up and running yet. */
+ if (dev_priv->init_state != NOUVEAU_CARD_INIT_DONE) {
+ ret = ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+ goto out;
+ }
+
+ /* Fake bo copy. */
if (old_mem->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
BUG_ON(bo->mem.mm_node != NULL);
bo->mem = *new_mem;
new_mem->mm_node = NULL;
- return 0;
+ goto out;
}
- if (new_mem->mem_type == TTM_PL_SYSTEM) {
- if (old_mem->mem_type == TTM_PL_SYSTEM)
- return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
- if (nouveau_bo_move_flipd(bo, evict, intr, no_wait, new_mem))
- return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
- } else if (old_mem->mem_type == TTM_PL_SYSTEM) {
- if (nouveau_bo_move_flips(bo, evict, intr, no_wait, new_mem))
- return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
- } else {
- if (nouveau_bo_move_m2mf(bo, evict, no_wait, old_mem, new_mem))
- return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
- }
+ /* Hardware assisted copy. */
+ if (new_mem->mem_type == TTM_PL_SYSTEM)
+ ret = nouveau_bo_move_flipd(bo, evict, intr, no_wait, new_mem);
+ else if (old_mem->mem_type == TTM_PL_SYSTEM)
+ ret = nouveau_bo_move_flips(bo, evict, intr, no_wait, new_mem);
+ else
+ ret = nouveau_bo_move_m2mf(bo, evict, intr, no_wait, new_mem);
- return 0;
+ if (!ret)
+ goto out;
+
+ /* Fallback to software copy. */
+ ret = ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+
+out:
+ if (ret)
+ nouveau_bo_vm_cleanup(bo, NULL, &new_tile);
+ else
+ nouveau_bo_vm_cleanup(bo, new_tile, &nvbo->tile);
+
+ return ret;
}
static int
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h
b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 2730497..7a2a322 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -58,11 +58,19 @@ struct nouveau_fpriv {
#define MAX_NUM_DCB_ENTRIES 16
#define NOUVEAU_MAX_CHANNEL_NR 128
+#define NOUVEAU_MAX_TILE_NR 15
#define NV50_VM_MAX_VRAM (2*1024*1024*1024ULL)
#define NV50_VM_BLOCK (512*1024*1024ULL)
#define NV50_VM_VRAM_NR (NV50_VM_MAX_VRAM / NV50_VM_BLOCK)
+struct nouveau_tile_reg {
+ struct nouveau_fence *fence;
+ uint32_t addr;
+ uint32_t size;
+ bool used;
+};
+
struct nouveau_bo {
struct ttm_buffer_object bo;
struct ttm_placement placement;
@@ -82,6 +90,7 @@ struct nouveau_bo {
uint32_t tile_mode;
uint32_t tile_flags;
+ struct nouveau_tile_reg *tile;
struct drm_gem_object *gem;
struct drm_file *cpu_filp;
@@ -554,6 +563,12 @@ struct drm_nouveau_private {
unsigned long sg_handle;
} gart_info;
+ /* nv10-nv40 tiling regions */
+ struct {
+ struct nouveau_tile_reg reg[NOUVEAU_MAX_TILE_NR];
+ spinlock_t lock;
+ } tile;
+
/* G8x/G9x virtual address space */
uint64_t vm_gart_base;
uint64_t vm_gart_size;
@@ -690,6 +705,13 @@ extern void nouveau_mem_release(struct drm_file *, struct
mem_block *heap);
extern int nouveau_mem_init(struct drm_device *);
extern int nouveau_mem_init_agp(struct drm_device *);
extern void nouveau_mem_close(struct drm_device *);
+extern struct nouveau_tile_reg *nv10_mem_set_tiling(struct drm_device *dev,
+ uint32_t addr,
+ uint32_t size,
+ uint32_t pitch);
+extern void nv10_mem_expire_tiling(struct drm_device *dev,
+ struct nouveau_tile_reg *tile,
+ struct nouveau_fence *fence);
extern int nv50_mem_vm_bind_linear(struct drm_device *, uint64_t virt,
uint32_t size, uint32_t flags,
uint64_t phys);
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c
b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 0275571..6056f32 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -192,6 +192,85 @@ void nouveau_mem_release(struct drm_file *file_priv, struct
mem_block *heap)
}
/*
+ * NV10-NV40 tiling helpers
+ */
+
+static void
+nv10_mem_set_tiling_locked(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+ struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i];
+
+ tile->used = true;
+
+ spin_unlock(&dev_priv->tile.lock);
+
+ pgraph->set_region_tiling(dev, i, addr, size, pitch);
+ pfb->set_region_tiling(dev, i, addr, size, pitch);
+
+ spin_lock(&dev_priv->tile.lock);
+
+ tile->addr = addr;
+ tile->size = size;
+ tile->used = !!pitch;
+ nouveau_fence_unref((void **)&tile->fence);
+}
+
+struct nouveau_tile_reg *
+nv10_mem_set_tiling(struct drm_device *dev, uint32_t addr, uint32_t size,
+ uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ struct nouveau_tile_reg *tile = dev_priv->tile.reg, *found = NULL;
+ int i;
+
+ spin_lock(&dev_priv->tile.lock);
+
+ for (i = 0; i < pfb->num_tiles; i++) {
+ if (tile[i].used)
+ /* Tile region in use. */
+ continue;
+
+ if (tile[i].fence &&
+ !nouveau_fence_signalled(tile[i].fence, NULL))
+ /* Pending tile region. */
+ continue;
+
+ if (max(tile[i].addr, addr) <
+ min(tile[i].addr + tile[i].size, addr + size))
+ /* Kill an intersecting tile region. */
+ nv10_mem_set_tiling_locked(dev, i, 0, 0, 0);
+
+ if (pitch && !found) {
+ /* Free tile region. */
+ nv10_mem_set_tiling_locked(dev, i, addr, size, pitch);
+ found = &tile[i];
+ }
+ }
+
+ spin_unlock(&dev_priv->tile.lock);
+
+ return found;
+}
+
+void
+nv10_mem_expire_tiling(struct drm_device *dev, struct nouveau_tile_reg *tile,
+ struct nouveau_fence *fence)
+{
+ if (fence) {
+ /* Mark it as pending. */
+ tile->fence = fence;
+ nouveau_fence_ref(fence);
+ }
+
+ tile->used = false;
+}
+
+/*
* NV50 VM helpers
*/
int
@@ -509,6 +588,7 @@ nouveau_mem_init(struct drm_device *dev)
INIT_LIST_HEAD(&dev_priv->ttm.bo_list);
spin_lock_init(&dev_priv->ttm.bo_list_lock);
+ spin_lock_init(&dev_priv->tile.lock);
dev_priv->fb_available_size = nouveau_mem_fb_amount(dev);
--
1.6.4.4
Francisco Jerez
2009-Dec-11 18:33 UTC
[Nouveau] [PATCH 3/3] drm/nouveau: Fix up buffer eviction, and evict them to GART, if possible.
Signed-off-by: Francisco Jerez <currojerez at riseup.net>
---
drivers/gpu/drm/nouveau/nouveau_bo.c | 6 ++++++
1 files changed, 6 insertions(+), 0 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 4616bae..af0a1f5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -433,10 +433,16 @@ nouveau_bo_evict_flags(struct ttm_buffer_object *bo,
struct ttm_placement *pl)
struct nouveau_bo *nvbo = nouveau_bo(bo);
switch (bo->mem.mem_type) {
+ case TTM_PL_VRAM:
+ nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_TT |
+ TTM_PL_FLAG_SYSTEM);
+ break;
default:
nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_SYSTEM);
break;
}
+
+ *pl = nvbo->placement;
}
--
1.6.4.4
Jimmy Rentz
2009-Dec-11 20:18 UTC
[Nouveau] [PATCH 1/3] drm/nouveau: Pre-G80 tiling support.
On Fri, 11 Dec 2009 19:33:22 +0100 Francisco Jerez <currojerez at riseup.net> wrote:> Signed-off-by: Francisco Jerez <currojerez at riseup.net> > --- > drivers/gpu/drm/nouveau/nouveau_drv.h | 23 +++++ > drivers/gpu/drm/nouveau/nouveau_reg.h | 16 ++-- > drivers/gpu/drm/nouveau/nouveau_state.c | 8 ++ > drivers/gpu/drm/nouveau/nv10_fb.c | 32 ++++++-- > drivers/gpu/drm/nouveau/nv10_graph.c | 47 ++++++++--- > drivers/gpu/drm/nouveau/nv20_graph.c | 80 +++++++++++-------- > drivers/gpu/drm/nouveau/nv40_fb.c | 53 ++++++++----- > drivers/gpu/drm/nouveau/nv40_graph.c | 135 > +++++++++++++++---------------- 8 files changed, 247 insertions(+), > 147 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h > b/drivers/gpu/drm/nouveau/nouveau_drv.h index 88b4c7b..2730497 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_drv.h > +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h > @@ -276,8 +276,13 @@ struct nouveau_timer_engine { > }; > > struct nouveau_fb_engine { > + int num_tiles; > + > int (*init)(struct drm_device *dev); > void (*takedown)(struct drm_device *dev); > + > + void (*set_region_tiling)(struct drm_device *dev, int i, > uint32_t addr, > + uint32_t size, uint32_t pitch); > }; > > struct nouveau_fifo_engine { > @@ -328,6 +333,9 @@ struct nouveau_pgraph_engine { > void (*destroy_context)(struct nouveau_channel *); > int (*load_context)(struct nouveau_channel *); > int (*unload_context)(struct drm_device *); > + > + void (*set_region_tiling)(struct drm_device *dev, int i, > uint32_t addr, > + uint32_t size, uint32_t pitch); > }; > > struct nouveau_engine { > @@ -876,10 +884,16 @@ extern void nv04_fb_takedown(struct drm_device > *); /* nv10_fb.c */ > extern int nv10_fb_init(struct drm_device *); > extern void nv10_fb_takedown(struct drm_device *); > +extern void nv10_fb_set_region_tiling(struct drm_device *dev, int i, > + uint32_t addr, uint32_t size, > + uint32_t pitch); > > /* nv40_fb.c */ > extern int nv40_fb_init(struct drm_device *); > extern void nv40_fb_takedown(struct drm_device *); > +extern void nv40_fb_set_region_tiling(struct drm_device *dev, int i, > + uint32_t addr, uint32_t size, > + uint32_t pitch); > > /* nv04_fifo.c */ > extern int nv04_fifo_init(struct drm_device *); > @@ -938,6 +952,9 @@ extern void nv10_graph_destroy_context(struct > nouveau_channel *); extern int nv10_graph_load_context(struct > nouveau_channel *); extern int nv10_graph_unload_context(struct > drm_device *); extern void nv10_graph_context_switch(struct > drm_device *); +extern void nv10_graph_set_region_tiling(struct > drm_device *dev, int i, > + uint32_t addr, uint32_t > size, > + uint32_t pitch); > > /* nv20_graph.c */ > extern struct nouveau_pgraph_object_class nv20_graph_grclass[]; > @@ -949,6 +966,9 @@ extern int nv20_graph_unload_context(struct > drm_device *); extern int nv20_graph_init(struct drm_device *); > extern void nv20_graph_takedown(struct drm_device *); > extern int nv30_graph_init(struct drm_device *); > +extern void nv20_graph_set_region_tiling(struct drm_device *dev, int > i, > + uint32_t addr, uint32_t > size, > + uint32_t pitch); > > /* nv40_graph.c */ > extern struct nouveau_pgraph_object_class nv40_graph_grclass[]; > @@ -962,6 +982,9 @@ extern int nv40_graph_unload_context(struct > drm_device *); extern int nv40_grctx_init(struct drm_device *); > extern void nv40_grctx_fini(struct drm_device *); > extern void nv40_grctx_vals_load(struct drm_device *, struct > nouveau_gpuobj *); +extern void nv40_graph_set_region_tiling(struct > drm_device *dev, int i, > + uint32_t addr, uint32_t > size, > + uint32_t pitch); > > /* nv50_graph.c */ > extern struct nouveau_pgraph_object_class nv50_graph_grclass[]; > diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h > b/drivers/gpu/drm/nouveau/nouveau_reg.h index fa1b0e7..251f1b3 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_reg.h > +++ b/drivers/gpu/drm/nouveau/nouveau_reg.h > @@ -349,19 +349,19 @@ > #define NV04_PGRAPH_BLEND 0x00400824 > #define NV04_PGRAPH_STORED_FMT 0x00400830 > #define NV04_PGRAPH_PATT_COLORRAM 0x00400900 > -#define NV40_PGRAPH_TILE0(i) > (0x00400900 + (i*16)) -#define > NV40_PGRAPH_TLIMIT0(i) (0x00400904 + > (i*16)) -#define NV40_PGRAPH_TSIZE0(i) > (0x00400908 + (i*16)) -#define > NV40_PGRAPH_TSTATUS0(i) (0x0040090C + > (i*16)) +#define NV20_PGRAPH_TILE(i) > (0x00400900 + (i*16)) +#define > NV20_PGRAPH_TLIMIT(i) (0x00400904 + > (i*16)) +#define NV20_PGRAPH_TSIZE(i) > (0x00400908 + (i*16)) +#define > NV20_PGRAPH_TSTATUS(i) (0x0040090C + > (i*16)) #define NV10_PGRAPH_TILE(i) > (0x00400B00 + (i*16)) #define > NV10_PGRAPH_TLIMIT(i) (0x00400B04 + > (i*16)) #define NV10_PGRAPH_TSIZE(i) > (0x00400B08 + (i*16)) #define > NV10_PGRAPH_TSTATUS(i) (0x00400B0C + > (i*16)) #define NV04_PGRAPH_U_RAM > 0x00400D00 -#define > NV47_PGRAPH_TILE0(i) (0x00400D00 + > (i*16)) -#define NV47_PGRAPH_TLIMIT0(i) > (0x00400D04 + (i*16)) -#define > NV47_PGRAPH_TSIZE0(i) (0x00400D08 + > (i*16)) -#define NV47_PGRAPH_TSTATUS0(i) > (0x00400D0C + (i*16)) +#define > NV47_PGRAPH_TILE(i) (0x00400D00 + > (i*16)) +#define NV47_PGRAPH_TLIMIT(i) > (0x00400D04 + (i*16)) +#define > NV47_PGRAPH_TSIZE(i) (0x00400D08 + > (i*16)) +#define NV47_PGRAPH_TSTATUS(i) > (0x00400D0C + (i*16)) #define > NV04_PGRAPH_V_RAM 0x00400D40 #define > NV04_PGRAPH_W_RAM 0x00400D80 #define > NV10_PGRAPH_COMBINER0_IN_ALPHA 0x00400E40 diff > --git a/drivers/gpu/drm/nouveau/nouveau_state.c > b/drivers/gpu/drm/nouveau/nouveau_state.c index 2ed41d3..4342867 > 100644 --- a/drivers/gpu/drm/nouveau/nouveau_state.c +++ > b/drivers/gpu/drm/nouveau/nouveau_state.c @@ -100,6 +100,7 @@ static > int nouveau_init_engine_ptrs(struct drm_device *dev) > engine->timer.takedown = nv04_timer_takedown; > engine->fb.init = nv10_fb_init; > engine->fb.takedown = nv10_fb_takedown; > + engine->fb.set_region_tiling > nv10_fb_set_region_tiling; engine->graph.grclass > nv10_graph_grclass; engine->graph.init > nv10_graph_init; engine->graph.takedown > nv10_graph_takedown; @@ -109,6 +110,7 @@ static int > nouveau_init_engine_ptrs(struct drm_device *dev) > engine->graph.fifo_access = nv04_graph_fifo_access; > engine->graph.load_context = nv10_graph_load_context; > engine->graph.unload_context = nv10_graph_unload_context; > + engine->graph.set_region_tiling > nv10_graph_set_region_tiling; engine->fifo.channels > 32; engine->fifo.init = nv10_fifo_init; > engine->fifo.takedown > nouveau_stub_takedown; @@ -139,6 +141,7 @@ static int > nouveau_init_engine_ptrs(struct drm_device *dev) > engine->timer.takedown = nv04_timer_takedown; > engine->fb.init = nv10_fb_init; > engine->fb.takedown = nv10_fb_takedown; > + engine->fb.set_region_tiling > nv10_fb_set_region_tiling; engine->graph.grclass > nv20_graph_grclass; engine->graph.init > nv20_graph_init; engine->graph.takedown > nv20_graph_takedown; @@ -148,6 +151,7 @@ static int > nouveau_init_engine_ptrs(struct drm_device *dev) > engine->graph.fifo_access = nv04_graph_fifo_access; > engine->graph.load_context = nv20_graph_load_context; > engine->graph.unload_context = nv20_graph_unload_context; > + engine->graph.set_region_tiling > nv20_graph_set_region_tiling; engine->fifo.channels > 32; engine->fifo.init = nv10_fifo_init; > engine->fifo.takedown > nouveau_stub_takedown; @@ -178,6 +182,7 @@ static int > nouveau_init_engine_ptrs(struct drm_device *dev) > engine->timer.takedown = nv04_timer_takedown; > engine->fb.init = nv10_fb_init; > engine->fb.takedown = nv10_fb_takedown; > + engine->fb.set_region_tiling > nv10_fb_set_region_tiling; engine->graph.grclass > nv30_graph_grclass; engine->graph.init > nv30_graph_init; engine->graph.takedown > nv20_graph_takedown; @@ -187,6 +192,7 @@ static int > nouveau_init_engine_ptrs(struct drm_device *dev) > engine->graph.destroy_context = nv20_graph_destroy_context; > engine->graph.load_context = nv20_graph_load_context; > engine->graph.unload_context = nv20_graph_unload_context; > + engine->graph.set_region_tiling > nv20_graph_set_region_tiling; engine->fifo.channels > 32; engine->fifo.init = nv10_fifo_init; > engine->fifo.takedown > nouveau_stub_takedown; @@ -218,6 +224,7 @@ static int > nouveau_init_engine_ptrs(struct drm_device *dev) > engine->timer.takedown = nv04_timer_takedown; > engine->fb.init = nv40_fb_init; > engine->fb.takedown = nv40_fb_takedown; > + engine->fb.set_region_tiling > nv40_fb_set_region_tiling; engine->graph.grclass > nv40_graph_grclass; engine->graph.init > nv40_graph_init; engine->graph.takedown > nv40_graph_takedown; @@ -227,6 +234,7 @@ static int > nouveau_init_engine_ptrs(struct drm_device *dev) > engine->graph.destroy_context = nv40_graph_destroy_context; > engine->graph.load_context = nv40_graph_load_context; > engine->graph.unload_context = nv40_graph_unload_context; > + engine->graph.set_region_tiling > nv40_graph_set_region_tiling; engine->fifo.channels > 32; engine->fifo.init = nv40_fifo_init; > engine->fifo.takedown > nouveau_stub_takedown; diff --git a/drivers/gpu/drm/nouveau/nv10_fb.c > b/drivers/gpu/drm/nouveau/nv10_fb.c index 79e2d10..cc5cda4 100644 > --- a/drivers/gpu/drm/nouveau/nv10_fb.c > +++ b/drivers/gpu/drm/nouveau/nv10_fb.c > @@ -3,17 +3,37 @@ > #include "nouveau_drv.h" > #include "nouveau_drm.h" > > +void > +nv10_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t > addr, > + uint32_t size, uint32_t pitch) > +{ > + struct drm_nouveau_private *dev_priv = dev->dev_private; > + uint32_t limit = max(1u, addr + size) - 1; > + > + if (pitch) { > + if (dev_priv->card_type >= NV_20) > + addr |= 1; > + else > + addr |= 1 << 31; > + } > + > + nv_wr32(dev, NV10_PFB_TLIMIT(i), limit); > + nv_wr32(dev, NV10_PFB_TSIZE(i), pitch); > + nv_wr32(dev, NV10_PFB_TILE(i), addr); > +} > + > int > nv10_fb_init(struct drm_device *dev) > { > - uint32_t fb_bar_size; > + struct drm_nouveau_private *dev_priv = dev->dev_private; > + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; > int i; > > - fb_bar_size = drm_get_resource_len(dev, 0) - 1; > - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { > - nv_wr32(dev, NV10_PFB_TILE(i), 0); > - nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size); > - } > + pfb->num_tiles = NV10_PFB_TILE__SIZE; > + > + /* Turn all the tiling regions off. */ > + for (i = 0; i < pfb->num_tiles; i++) > + pfb->set_region_tiling(dev, i, 0, 0, 0); > > return 0; > } > diff --git a/drivers/gpu/drm/nouveau/nv10_graph.c > b/drivers/gpu/drm/nouveau/nv10_graph.c index 6bf6804..2aeac8b 100644 > --- a/drivers/gpu/drm/nouveau/nv10_graph.c > +++ b/drivers/gpu/drm/nouveau/nv10_graph.c > @@ -808,6 +808,39 @@ void nv10_graph_destroy_context(struct > nouveau_channel *chan) chan->pgraph_ctx = NULL; > } > > +static void > +nv10_graph_write_tile(struct drm_device *dev, int i, uint32_t addr, > + uint32_t size, uint32_t pitch) > +{ > + uint32_t limit = max(1u, addr + size) - 1; > + > + if (pitch) > + addr |= 1 << 31; > + > + nv_wr32(dev, NV10_PGRAPH_TLIMIT(i), limit); > + nv_wr32(dev, NV10_PGRAPH_TSIZE(i), pitch); > + nv_wr32(dev, NV10_PGRAPH_TILE(i), addr); > +} > + > +void > +nv10_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t > addr, > + uint32_t size, uint32_t pitch) > +{ > + struct drm_nouveau_private *dev_priv = dev->dev_private; > + struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo; > + struct nouveau_pgraph_engine *pgraph > &dev_priv->engine.graph; + > + pfifo->reassign(dev, false); > + pgraph->fifo_access(dev, false); > + > + nouveau_wait_for_idle(dev); > + > + nv10_graph_write_tile(dev, i, addr, size, pitch); > + > + pgraph->fifo_access(dev, true); > + pfifo->reassign(dev, true); > +} > + > int nv10_graph_init(struct drm_device *dev) > { > struct drm_nouveau_private *dev_priv = dev->dev_private; > @@ -836,17 +869,9 @@ int nv10_graph_init(struct drm_device *dev) > } else > nv_wr32(dev, NV10_PGRAPH_DEBUG_4, 0x00000000); > > - /* copy tile info from PFB */ > - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { > - nv_wr32(dev, NV10_PGRAPH_TILE(i), > - nv_rd32(dev, > NV10_PFB_TILE(i))); > - nv_wr32(dev, NV10_PGRAPH_TLIMIT(i), > - nv_rd32(dev, > NV10_PFB_TLIMIT(i))); > - nv_wr32(dev, NV10_PGRAPH_TSIZE(i), > - nv_rd32(dev, > NV10_PFB_TSIZE(i))); > - nv_wr32(dev, NV10_PGRAPH_TSTATUS(i), > - nv_rd32(dev, > NV10_PFB_TSTATUS(i))); > - } > + /* Turn all the tiling regions off. */ > + for (i = 0; i < NV10_PFB_TILE__SIZE; i++) > + nv10_graph_write_tile(dev, i, 0, 0, 0); > > nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH1, 0x00000000); > nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH2, 0x00000000); > diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c > b/drivers/gpu/drm/nouveau/nv20_graph.c index 18ba74f..55d1a8e 100644 > --- a/drivers/gpu/drm/nouveau/nv20_graph.c > +++ b/drivers/gpu/drm/nouveau/nv20_graph.c > @@ -514,6 +514,46 @@ nv20_graph_rdi(struct drm_device *dev) > nouveau_wait_for_idle(dev); > } > > +static void > +nv20_graph_write_tile(struct drm_device *dev, int i, uint32_t addr, > + uint32_t size, uint32_t pitch) > +{ > + uint32_t limit = max(1u, addr + size) - 1; > + > + if (pitch) > + addr |= 1; > + > + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit); > + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch); > + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr); > + > + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + 4 * i); > + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, limit); > + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + 4 * i); > + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, pitch); > + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i); > + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, addr); > +} > + > +void > +nv20_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t > addr, > + uint32_t size, uint32_t pitch) > +{ > + struct drm_nouveau_private *dev_priv = dev->dev_private; > + struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo; > + struct nouveau_pgraph_engine *pgraph > &dev_priv->engine.graph; + > + pfifo->reassign(dev, false); > + pgraph->fifo_access(dev, false); > + > + nouveau_wait_for_idle(dev); > + > + nv20_graph_write_tile(dev, i, addr, size, pitch); > + > + pgraph->fifo_access(dev, true); > + pfifo->reassign(dev, true); > +} > + > int > nv20_graph_init(struct drm_device *dev) > { > @@ -572,27 +612,10 @@ nv20_graph_init(struct drm_device *dev) > nv_wr32(dev, NV10_PGRAPH_RDI_DATA , 0x00000030); > } > > - /* copy tile info from PFB */ > - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { > - nv_wr32(dev, 0x00400904 + i * 0x10, > - nv_rd32(dev, > NV10_PFB_TLIMIT(i))); > - /* which is NV40_PGRAPH_TLIMIT0(i) ?? */ > - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + i * > 4); > - nv_wr32(dev, NV10_PGRAPH_RDI_DATA, > - nv_rd32(dev, > NV10_PFB_TLIMIT(i))); > - nv_wr32(dev, 0x00400908 + i * 0x10, > - nv_rd32(dev, > NV10_PFB_TSIZE(i))); > - /* which is NV40_PGRAPH_TSIZE0(i) ?? */ > - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + i * > 4); > - nv_wr32(dev, NV10_PGRAPH_RDI_DATA, > - nv_rd32(dev, > NV10_PFB_TSIZE(i))); > - nv_wr32(dev, 0x00400900 + i * 0x10, > - nv_rd32(dev, > NV10_PFB_TILE(i))); > - /* which is NV40_PGRAPH_TILE0(i) ?? */ > - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + i * > 4); > - nv_wr32(dev, NV10_PGRAPH_RDI_DATA, > - nv_rd32(dev, > NV10_PFB_TILE(i))); > - } > + /* Turn all the tiling regions off. */ > + for (i = 0; i < NV10_PFB_TILE__SIZE; i++) > + nv20_graph_write_tile(dev, i, 0, 0, 0); > + > for (i = 0; i < 8; i++) { > nv_wr32(dev, 0x400980 + i * 4, nv_rd32(dev, 0x100300 > + i * 4)); nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0090 + i * 4); > @@ -704,18 +727,9 @@ nv30_graph_init(struct drm_device *dev) > > nv_wr32(dev, 0x4000c0, 0x00000016); > > - /* copy tile info from PFB */ > - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { > - nv_wr32(dev, 0x00400904 + i * 0x10, > - nv_rd32(dev, > NV10_PFB_TLIMIT(i))); > - /* which is NV40_PGRAPH_TLIMIT0(i) ?? */ > - nv_wr32(dev, 0x00400908 + i * 0x10, > - nv_rd32(dev, > NV10_PFB_TSIZE(i))); > - /* which is NV40_PGRAPH_TSIZE0(i) ?? */ > - nv_wr32(dev, 0x00400900 + i * 0x10, > - nv_rd32(dev, > NV10_PFB_TILE(i))); > - /* which is NV40_PGRAPH_TILE0(i) ?? */ > - } > + /* Turn all the tiling regions off. */ > + for (i = 0; i < NV10_PFB_TILE__SIZE; i++) > + nv20_graph_write_tile(dev, i, 0, 0, 0); > > nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000100); > nv_wr32(dev, NV10_PGRAPH_STATE , 0xFFFFFFFF); > diff --git a/drivers/gpu/drm/nouveau/nv40_fb.c > b/drivers/gpu/drm/nouveau/nv40_fb.c index ca1d271..3cd07d8 100644 > --- a/drivers/gpu/drm/nouveau/nv40_fb.c > +++ b/drivers/gpu/drm/nouveau/nv40_fb.c > @@ -3,12 +3,37 @@ > #include "nouveau_drv.h" > #include "nouveau_drm.h" > > +void > +nv40_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t > addr, > + uint32_t size, uint32_t pitch) > +{ > + struct drm_nouveau_private *dev_priv = dev->dev_private; > + uint32_t limit = max(1u, addr + size) - 1; > + > + if (pitch) > + addr |= 1; > + > + switch (dev_priv->chipset) { > + case 0x40: > + nv_wr32(dev, NV10_PFB_TLIMIT(i), limit); > + nv_wr32(dev, NV10_PFB_TSIZE(i), pitch); > + nv_wr32(dev, NV10_PFB_TILE(i), addr); > + break; > + > + default: > + nv_wr32(dev, NV40_PFB_TLIMIT(i), limit); > + nv_wr32(dev, NV40_PFB_TSIZE(i), pitch); > + nv_wr32(dev, NV40_PFB_TILE(i), addr); > + break; > + } > +} > + > int > nv40_fb_init(struct drm_device *dev) > { > struct drm_nouveau_private *dev_priv = dev->dev_private; > - uint32_t fb_bar_size, tmp; > - int num_tiles; > + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; > + uint32_t tmp; > int i; > > /* This is strictly a NV4x register (don't know about NV5x). > */ @@ -23,35 +48,23 @@ nv40_fb_init(struct drm_device *dev) > case 0x45: > tmp = nv_rd32(dev, NV10_PFB_CLOSE_PAGE2); > nv_wr32(dev, NV10_PFB_CLOSE_PAGE2, tmp & ~(1 << 15)); > - num_tiles = NV10_PFB_TILE__SIZE; > + pfb->num_tiles = NV10_PFB_TILE__SIZE; > break; > case 0x46: /* G72 */ > case 0x47: /* G70 */ > case 0x49: /* G71 */ > case 0x4b: /* G73 */ > case 0x4c: /* C51 (G7X version) */ > - num_tiles = NV40_PFB_TILE__SIZE_1; > + pfb->num_tiles = NV40_PFB_TILE__SIZE_1; > break; > default: > - num_tiles = NV40_PFB_TILE__SIZE_0; > + pfb->num_tiles = NV40_PFB_TILE__SIZE_0; > break; > } > > - fb_bar_size = drm_get_resource_len(dev, 0) - 1; > - switch (dev_priv->chipset) { > - case 0x40: > - for (i = 0; i < num_tiles; i++) { > - nv_wr32(dev, NV10_PFB_TILE(i), 0); > - nv_wr32(dev, NV10_PFB_TLIMIT(i), > fb_bar_size); > - } > - break; > - default: > - for (i = 0; i < num_tiles; i++) { > - nv_wr32(dev, NV40_PFB_TILE(i), 0); > - nv_wr32(dev, NV40_PFB_TLIMIT(i), > fb_bar_size); > - } > - break; > - } > + /* Turn all the tiling regions off. */ > + for (i = 0; i < pfb->num_tiles; i++) > + pfb->set_region_tiling(dev, i, 0, 0, 0); > > return 0; > } > diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c > b/drivers/gpu/drm/nouveau/nv40_graph.c index d3e0a2a..2435d49 100644 > --- a/drivers/gpu/drm/nouveau/nv40_graph.c > +++ b/drivers/gpu/drm/nouveau/nv40_graph.c > @@ -333,6 +333,67 @@ nv40_grctx_vals_load(struct drm_device *dev, > struct nouveau_gpuobj *ctx) nv_wo32(dev, ctx, cv->data[i].offset, > cv->data[i].value); } > > +static void > +nv40_graph_write_tile(struct drm_device *dev, int i, uint32_t addr, > + uint32_t size, uint32_t pitch) > +{ > + struct drm_nouveau_private *dev_priv = dev->dev_private; > + uint32_t limit = max(1u, addr + size) - 1; > + > + if (pitch) > + addr |= 1; > + > + switch (dev_priv->chipset) { > + case 0x44: > + case 0x4a: > + case 0x4e: > + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch); > + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit); > + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr); > + break; > + > + case 0x46: > + case 0x47: > + case 0x49: > + case 0x4b: > + nv_wr32(dev, NV47_PGRAPH_TSIZE(i), pitch); > + nv_wr32(dev, NV47_PGRAPH_TLIMIT(i), limit); > + nv_wr32(dev, NV47_PGRAPH_TILE(i), addr); > + nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch); > + nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit); > + nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr); > + break; > + > + default: > + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch); > + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit); > + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr); > + nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch); > + nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit); > + nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr); > + break; > + } > +} > +Have you looked at 0xB000, 0xB004, 0xB008 by chance? I noticed that nv uses these tiling regs (what looks like) on nv4a/nv4e (NV44 core) cards but not any others at startup. I thought is was because 0x406900 regs are missing. +#define NV40_PFB_TILE2(i) (0x0000B000 + (i*12)) +#define NV40_PFB_TILE__SIZE_2 12 +#define NV40_PFB_TLIMIT2(i) (0x0000B004 + (i*12)) +#define NV40_PFB_TSIZE2(i) (0x0000B008 + (i*12))
Francisco Jerez
2009-Dec-11 21:28 UTC
[Nouveau] [PATCH 1/3] drm/nouveau: Pre-G80 tiling support.
Francisco Jerez <currojerez at riseup.net> writes:> Jimmy Rentz <jb17bsome at gmail.com> writes: > >> On Fri, 11 Dec 2009 19:33:22 +0100 >> >> Have you looked at 0xB000, 0xB004, 0xB008 by chance? >> I noticed that nv uses these tiling regs (what looks like) on nv4a/nv4e (NV44 core) cards but not any others at startup. I thought is was because 0x406900 regs are missing. > > Yeah, I've seen the blob use those regs in some nv4a mmiotraces, but it > seemed to work without them and they're outside PGRAPH/PFB or anything > we currently exercise so I decided to leave them out. > >> >> +#define NV40_PFB_TILE2(i) (0x0000B000 + (i*12)) >> +#define NV40_PFB_TILE__SIZE_2 12 >> +#define NV40_PFB_TLIMIT2(i) (0x0000B004 + (i*12)) >> +#define NV40_PFB_TSIZE2(i) (0x0000B008 + (i*12))Sorry for not replying-to-all the first time... -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 196 bytes Desc: not available Url : http://lists.freedesktop.org/archives/nouveau/attachments/20091211/cd10b8d4/attachment.pgp
Francisco Jerez
2009-Dec-14 03:03 UTC
[Nouveau] [PATCHv2 1/3] drm/nouveau: Pre-G80 tiling support.
Signed-off-by: Francisco Jerez <currojerez at riseup.net>
---
v2: Simplify things a bit.
drivers/gpu/drm/nouveau/nouveau_drv.h | 18 +++++
drivers/gpu/drm/nouveau/nouveau_reg.h | 16 ++--
drivers/gpu/drm/nouveau/nouveau_state.c | 8 ++
drivers/gpu/drm/nouveau/nv10_fb.c | 32 +++++++--
drivers/gpu/drm/nouveau/nv10_graph.c | 28 +++++---
drivers/gpu/drm/nouveau/nv20_graph.c | 61 ++++++++---------
drivers/gpu/drm/nouveau/nv40_fb.c | 53 +++++++++-----
drivers/gpu/drm/nouveau/nv40_graph.c | 116 +++++++++++++------------------
8 files changed, 185 insertions(+), 147 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h
b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 88b4c7b..40b4a37 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -276,8 +276,13 @@ struct nouveau_timer_engine {
};
struct nouveau_fb_engine {
+ int num_tiles;
+
int (*init)(struct drm_device *dev);
void (*takedown)(struct drm_device *dev);
+
+ void (*set_region_tiling)(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch);
};
struct nouveau_fifo_engine {
@@ -328,6 +333,9 @@ struct nouveau_pgraph_engine {
void (*destroy_context)(struct nouveau_channel *);
int (*load_context)(struct nouveau_channel *);
int (*unload_context)(struct drm_device *);
+
+ void (*set_region_tiling)(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch);
};
struct nouveau_engine {
@@ -876,10 +884,14 @@ extern void nv04_fb_takedown(struct drm_device *);
/* nv10_fb.c */
extern int nv10_fb_init(struct drm_device *);
extern void nv10_fb_takedown(struct drm_device *);
+extern void nv10_fb_set_region_tiling(struct drm_device *, int, uint32_t,
+ uint32_t, uint32_t);
/* nv40_fb.c */
extern int nv40_fb_init(struct drm_device *);
extern void nv40_fb_takedown(struct drm_device *);
+extern void nv40_fb_set_region_tiling(struct drm_device *, int, uint32_t,
+ uint32_t, uint32_t);
/* nv04_fifo.c */
extern int nv04_fifo_init(struct drm_device *);
@@ -938,6 +950,8 @@ extern void nv10_graph_destroy_context(struct
nouveau_channel *);
extern int nv10_graph_load_context(struct nouveau_channel *);
extern int nv10_graph_unload_context(struct drm_device *);
extern void nv10_graph_context_switch(struct drm_device *);
+extern void nv10_graph_set_region_tiling(struct drm_device *, int, uint32_t,
+ uint32_t, uint32_t);
/* nv20_graph.c */
extern struct nouveau_pgraph_object_class nv20_graph_grclass[];
@@ -949,6 +963,8 @@ extern int nv20_graph_unload_context(struct drm_device *);
extern int nv20_graph_init(struct drm_device *);
extern void nv20_graph_takedown(struct drm_device *);
extern int nv30_graph_init(struct drm_device *);
+extern void nv20_graph_set_region_tiling(struct drm_device *, int, uint32_t,
+ uint32_t, uint32_t);
/* nv40_graph.c */
extern struct nouveau_pgraph_object_class nv40_graph_grclass[];
@@ -962,6 +978,8 @@ extern int nv40_graph_unload_context(struct drm_device *);
extern int nv40_grctx_init(struct drm_device *);
extern void nv40_grctx_fini(struct drm_device *);
extern void nv40_grctx_vals_load(struct drm_device *, struct nouveau_gpuobj *);
+extern void nv40_graph_set_region_tiling(struct drm_device *, int, uint32_t,
+ uint32_t, uint32_t);
/* nv50_graph.c */
extern struct nouveau_pgraph_object_class nv50_graph_grclass[];
diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h
b/drivers/gpu/drm/nouveau/nouveau_reg.h
index fa1b0e7..251f1b3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_reg.h
+++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
@@ -349,19 +349,19 @@
#define NV04_PGRAPH_BLEND 0x00400824
#define NV04_PGRAPH_STORED_FMT 0x00400830
#define NV04_PGRAPH_PATT_COLORRAM 0x00400900
-#define NV40_PGRAPH_TILE0(i) (0x00400900 +
(i*16))
-#define NV40_PGRAPH_TLIMIT0(i) (0x00400904 +
(i*16))
-#define NV40_PGRAPH_TSIZE0(i) (0x00400908 +
(i*16))
-#define NV40_PGRAPH_TSTATUS0(i) (0x0040090C +
(i*16))
+#define NV20_PGRAPH_TILE(i) (0x00400900 +
(i*16))
+#define NV20_PGRAPH_TLIMIT(i) (0x00400904 +
(i*16))
+#define NV20_PGRAPH_TSIZE(i) (0x00400908 +
(i*16))
+#define NV20_PGRAPH_TSTATUS(i) (0x0040090C +
(i*16))
#define NV10_PGRAPH_TILE(i) (0x00400B00 +
(i*16))
#define NV10_PGRAPH_TLIMIT(i) (0x00400B04 +
(i*16))
#define NV10_PGRAPH_TSIZE(i) (0x00400B08 +
(i*16))
#define NV10_PGRAPH_TSTATUS(i) (0x00400B0C +
(i*16))
#define NV04_PGRAPH_U_RAM 0x00400D00
-#define NV47_PGRAPH_TILE0(i) (0x00400D00 +
(i*16))
-#define NV47_PGRAPH_TLIMIT0(i) (0x00400D04 +
(i*16))
-#define NV47_PGRAPH_TSIZE0(i) (0x00400D08 +
(i*16))
-#define NV47_PGRAPH_TSTATUS0(i) (0x00400D0C +
(i*16))
+#define NV47_PGRAPH_TILE(i) (0x00400D00 +
(i*16))
+#define NV47_PGRAPH_TLIMIT(i) (0x00400D04 +
(i*16))
+#define NV47_PGRAPH_TSIZE(i) (0x00400D08 +
(i*16))
+#define NV47_PGRAPH_TSTATUS(i) (0x00400D0C +
(i*16))
#define NV04_PGRAPH_V_RAM 0x00400D40
#define NV04_PGRAPH_W_RAM 0x00400D80
#define NV10_PGRAPH_COMBINER0_IN_ALPHA 0x00400E40
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c
b/drivers/gpu/drm/nouveau/nouveau_state.c
index 2ed41d3..4342867 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -100,6 +100,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->timer.takedown = nv04_timer_takedown;
engine->fb.init = nv10_fb_init;
engine->fb.takedown = nv10_fb_takedown;
+ engine->fb.set_region_tiling = nv10_fb_set_region_tiling;
engine->graph.grclass = nv10_graph_grclass;
engine->graph.init = nv10_graph_init;
engine->graph.takedown = nv10_graph_takedown;
@@ -109,6 +110,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->graph.fifo_access = nv04_graph_fifo_access;
engine->graph.load_context = nv10_graph_load_context;
engine->graph.unload_context = nv10_graph_unload_context;
+ engine->graph.set_region_tiling = nv10_graph_set_region_tiling;
engine->fifo.channels = 32;
engine->fifo.init = nv10_fifo_init;
engine->fifo.takedown = nouveau_stub_takedown;
@@ -139,6 +141,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->timer.takedown = nv04_timer_takedown;
engine->fb.init = nv10_fb_init;
engine->fb.takedown = nv10_fb_takedown;
+ engine->fb.set_region_tiling = nv10_fb_set_region_tiling;
engine->graph.grclass = nv20_graph_grclass;
engine->graph.init = nv20_graph_init;
engine->graph.takedown = nv20_graph_takedown;
@@ -148,6 +151,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->graph.fifo_access = nv04_graph_fifo_access;
engine->graph.load_context = nv20_graph_load_context;
engine->graph.unload_context = nv20_graph_unload_context;
+ engine->graph.set_region_tiling = nv20_graph_set_region_tiling;
engine->fifo.channels = 32;
engine->fifo.init = nv10_fifo_init;
engine->fifo.takedown = nouveau_stub_takedown;
@@ -178,6 +182,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->timer.takedown = nv04_timer_takedown;
engine->fb.init = nv10_fb_init;
engine->fb.takedown = nv10_fb_takedown;
+ engine->fb.set_region_tiling = nv10_fb_set_region_tiling;
engine->graph.grclass = nv30_graph_grclass;
engine->graph.init = nv30_graph_init;
engine->graph.takedown = nv20_graph_takedown;
@@ -187,6 +192,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->graph.destroy_context = nv20_graph_destroy_context;
engine->graph.load_context = nv20_graph_load_context;
engine->graph.unload_context = nv20_graph_unload_context;
+ engine->graph.set_region_tiling = nv20_graph_set_region_tiling;
engine->fifo.channels = 32;
engine->fifo.init = nv10_fifo_init;
engine->fifo.takedown = nouveau_stub_takedown;
@@ -218,6 +224,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->timer.takedown = nv04_timer_takedown;
engine->fb.init = nv40_fb_init;
engine->fb.takedown = nv40_fb_takedown;
+ engine->fb.set_region_tiling = nv40_fb_set_region_tiling;
engine->graph.grclass = nv40_graph_grclass;
engine->graph.init = nv40_graph_init;
engine->graph.takedown = nv40_graph_takedown;
@@ -227,6 +234,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->graph.destroy_context = nv40_graph_destroy_context;
engine->graph.load_context = nv40_graph_load_context;
engine->graph.unload_context = nv40_graph_unload_context;
+ engine->graph.set_region_tiling = nv40_graph_set_region_tiling;
engine->fifo.channels = 32;
engine->fifo.init = nv40_fifo_init;
engine->fifo.takedown = nouveau_stub_takedown;
diff --git a/drivers/gpu/drm/nouveau/nv10_fb.c
b/drivers/gpu/drm/nouveau/nv10_fb.c
index 79e2d10..cc5cda4 100644
--- a/drivers/gpu/drm/nouveau/nv10_fb.c
+++ b/drivers/gpu/drm/nouveau/nv10_fb.c
@@ -3,17 +3,37 @@
#include "nouveau_drv.h"
#include "nouveau_drm.h"
+void
+nv10_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch) {
+ if (dev_priv->card_type >= NV_20)
+ addr |= 1;
+ else
+ addr |= 1 << 31;
+ }
+
+ nv_wr32(dev, NV10_PFB_TLIMIT(i), limit);
+ nv_wr32(dev, NV10_PFB_TSIZE(i), pitch);
+ nv_wr32(dev, NV10_PFB_TILE(i), addr);
+}
+
int
nv10_fb_init(struct drm_device *dev)
{
- uint32_t fb_bar_size;
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
int i;
- fb_bar_size = drm_get_resource_len(dev, 0) - 1;
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- nv_wr32(dev, NV10_PFB_TILE(i), 0);
- nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size);
- }
+ pfb->num_tiles = NV10_PFB_TILE__SIZE;
+
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < pfb->num_tiles; i++)
+ pfb->set_region_tiling(dev, i, 0, 0, 0);
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nv10_graph.c
b/drivers/gpu/drm/nouveau/nv10_graph.c
index 6bf6804..669ad9d 100644
--- a/drivers/gpu/drm/nouveau/nv10_graph.c
+++ b/drivers/gpu/drm/nouveau/nv10_graph.c
@@ -808,6 +808,20 @@ void nv10_graph_destroy_context(struct nouveau_channel
*chan)
chan->pgraph_ctx = NULL;
}
+void
+nv10_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch)
+ addr |= 1 << 31;
+
+ nv_wr32(dev, NV10_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV10_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV10_PGRAPH_TILE(i), addr);
+}
+
int nv10_graph_init(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -836,17 +850,9 @@ int nv10_graph_init(struct drm_device *dev)
} else
nv_wr32(dev, NV10_PGRAPH_DEBUG_4, 0x00000000);
- /* copy tile info from PFB */
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- nv_wr32(dev, NV10_PGRAPH_TILE(i),
- nv_rd32(dev, NV10_PFB_TILE(i)));
- nv_wr32(dev, NV10_PGRAPH_TLIMIT(i),
- nv_rd32(dev, NV10_PFB_TLIMIT(i)));
- nv_wr32(dev, NV10_PGRAPH_TSIZE(i),
- nv_rd32(dev, NV10_PFB_TSIZE(i)));
- nv_wr32(dev, NV10_PGRAPH_TSTATUS(i),
- nv_rd32(dev, NV10_PFB_TSTATUS(i)));
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
+ nv10_graph_set_region_tiling(dev, i, 0, 0, 0);
nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH1, 0x00000000);
nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH2, 0x00000000);
diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c
b/drivers/gpu/drm/nouveau/nv20_graph.c
index 18ba74f..d6fc0a8 100644
--- a/drivers/gpu/drm/nouveau/nv20_graph.c
+++ b/drivers/gpu/drm/nouveau/nv20_graph.c
@@ -514,6 +514,27 @@ nv20_graph_rdi(struct drm_device *dev)
nouveau_wait_for_idle(dev);
}
+void
+nv20_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch)
+ addr |= 1;
+
+ nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
+
+ nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + 4 * i);
+ nv_wr32(dev, NV10_PGRAPH_RDI_DATA, limit);
+ nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + 4 * i);
+ nv_wr32(dev, NV10_PGRAPH_RDI_DATA, pitch);
+ nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i);
+ nv_wr32(dev, NV10_PGRAPH_RDI_DATA, addr);
+}
+
int
nv20_graph_init(struct drm_device *dev)
{
@@ -572,27 +593,10 @@ nv20_graph_init(struct drm_device *dev)
nv_wr32(dev, NV10_PGRAPH_RDI_DATA , 0x00000030);
}
- /* copy tile info from PFB */
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- nv_wr32(dev, 0x00400904 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TLIMIT(i)));
- /* which is NV40_PGRAPH_TLIMIT0(i) ?? */
- nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + i * 4);
- nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
- nv_rd32(dev, NV10_PFB_TLIMIT(i)));
- nv_wr32(dev, 0x00400908 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TSIZE(i)));
- /* which is NV40_PGRAPH_TSIZE0(i) ?? */
- nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + i * 4);
- nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
- nv_rd32(dev, NV10_PFB_TSIZE(i)));
- nv_wr32(dev, 0x00400900 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TILE(i)));
- /* which is NV40_PGRAPH_TILE0(i) ?? */
- nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + i * 4);
- nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
- nv_rd32(dev, NV10_PFB_TILE(i)));
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
+ nv20_graph_set_region_tiling(dev, i, 0, 0, 0);
+
for (i = 0; i < 8; i++) {
nv_wr32(dev, 0x400980 + i * 4, nv_rd32(dev, 0x100300 + i * 4));
nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0090 + i * 4);
@@ -704,18 +708,9 @@ nv30_graph_init(struct drm_device *dev)
nv_wr32(dev, 0x4000c0, 0x00000016);
- /* copy tile info from PFB */
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- nv_wr32(dev, 0x00400904 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TLIMIT(i)));
- /* which is NV40_PGRAPH_TLIMIT0(i) ?? */
- nv_wr32(dev, 0x00400908 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TSIZE(i)));
- /* which is NV40_PGRAPH_TSIZE0(i) ?? */
- nv_wr32(dev, 0x00400900 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TILE(i)));
- /* which is NV40_PGRAPH_TILE0(i) ?? */
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
+ nv20_graph_set_region_tiling(dev, i, 0, 0, 0);
nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
nv_wr32(dev, NV10_PGRAPH_STATE , 0xFFFFFFFF);
diff --git a/drivers/gpu/drm/nouveau/nv40_fb.c
b/drivers/gpu/drm/nouveau/nv40_fb.c
index ca1d271..3cd07d8 100644
--- a/drivers/gpu/drm/nouveau/nv40_fb.c
+++ b/drivers/gpu/drm/nouveau/nv40_fb.c
@@ -3,12 +3,37 @@
#include "nouveau_drv.h"
#include "nouveau_drm.h"
+void
+nv40_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch)
+ addr |= 1;
+
+ switch (dev_priv->chipset) {
+ case 0x40:
+ nv_wr32(dev, NV10_PFB_TLIMIT(i), limit);
+ nv_wr32(dev, NV10_PFB_TSIZE(i), pitch);
+ nv_wr32(dev, NV10_PFB_TILE(i), addr);
+ break;
+
+ default:
+ nv_wr32(dev, NV40_PFB_TLIMIT(i), limit);
+ nv_wr32(dev, NV40_PFB_TSIZE(i), pitch);
+ nv_wr32(dev, NV40_PFB_TILE(i), addr);
+ break;
+ }
+}
+
int
nv40_fb_init(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
- uint32_t fb_bar_size, tmp;
- int num_tiles;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ uint32_t tmp;
int i;
/* This is strictly a NV4x register (don't know about NV5x). */
@@ -23,35 +48,23 @@ nv40_fb_init(struct drm_device *dev)
case 0x45:
tmp = nv_rd32(dev, NV10_PFB_CLOSE_PAGE2);
nv_wr32(dev, NV10_PFB_CLOSE_PAGE2, tmp & ~(1 << 15));
- num_tiles = NV10_PFB_TILE__SIZE;
+ pfb->num_tiles = NV10_PFB_TILE__SIZE;
break;
case 0x46: /* G72 */
case 0x47: /* G70 */
case 0x49: /* G71 */
case 0x4b: /* G73 */
case 0x4c: /* C51 (G7X version) */
- num_tiles = NV40_PFB_TILE__SIZE_1;
+ pfb->num_tiles = NV40_PFB_TILE__SIZE_1;
break;
default:
- num_tiles = NV40_PFB_TILE__SIZE_0;
+ pfb->num_tiles = NV40_PFB_TILE__SIZE_0;
break;
}
- fb_bar_size = drm_get_resource_len(dev, 0) - 1;
- switch (dev_priv->chipset) {
- case 0x40:
- for (i = 0; i < num_tiles; i++) {
- nv_wr32(dev, NV10_PFB_TILE(i), 0);
- nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size);
- }
- break;
- default:
- for (i = 0; i < num_tiles; i++) {
- nv_wr32(dev, NV40_PFB_TILE(i), 0);
- nv_wr32(dev, NV40_PFB_TLIMIT(i), fb_bar_size);
- }
- break;
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < pfb->num_tiles; i++)
+ pfb->set_region_tiling(dev, i, 0, 0, 0);
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c
b/drivers/gpu/drm/nouveau/nv40_graph.c
index d3e0a2a..01773e3 100644
--- a/drivers/gpu/drm/nouveau/nv40_graph.c
+++ b/drivers/gpu/drm/nouveau/nv40_graph.c
@@ -333,6 +333,48 @@ nv40_grctx_vals_load(struct drm_device *dev, struct
nouveau_gpuobj *ctx)
nv_wo32(dev, ctx, cv->data[i].offset, cv->data[i].value);
}
+void
+nv40_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch)
+ addr |= 1;
+
+ switch (dev_priv->chipset) {
+ case 0x44:
+ case 0x4a:
+ case 0x4e:
+ nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
+ break;
+
+ case 0x46:
+ case 0x47:
+ case 0x49:
+ case 0x4b:
+ nv_wr32(dev, NV47_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV47_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV47_PGRAPH_TILE(i), addr);
+ nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch);
+ nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit);
+ nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr);
+ break;
+
+ default:
+ nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
+ nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch);
+ nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit);
+ nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr);
+ break;
+ }
+}
+
/*
* G70 0x47
* G71 0x49
@@ -347,7 +389,8 @@ nv40_graph_init(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv (struct drm_nouveau_private
*)dev->dev_private;
- uint32_t vramsz, tmp;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ uint32_t vramsz;
int i, j;
nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) &
@@ -425,74 +468,9 @@ nv40_graph_init(struct drm_device *dev)
nv_wr32(dev, 0x400b38, 0x2ffff800);
nv_wr32(dev, 0x400b3c, 0x00006000);
- /* copy tile info from PFB */
- switch (dev_priv->chipset) {
- case 0x40: /* vanilla NV40 */
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- tmp = nv_rd32(dev, NV10_PFB_TILE(i));
- nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp);
- tmp = nv_rd32(dev, NV10_PFB_TLIMIT(i));
- nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp);
- tmp = nv_rd32(dev, NV10_PFB_TSIZE(i));
- nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp);
- tmp = nv_rd32(dev, NV10_PFB_TSTATUS(i));
- nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp);
- }
- break;
- case 0x44:
- case 0x4a:
- case 0x4e: /* NV44-based cores don't have 0x406900? */
- for (i = 0; i < NV40_PFB_TILE__SIZE_0; i++) {
- tmp = nv_rd32(dev, NV40_PFB_TILE(i));
- nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i));
- nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSIZE(i));
- nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i));
- nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp);
- }
- break;
- case 0x46:
- case 0x47:
- case 0x49:
- case 0x4b: /* G7X-based cores */
- for (i = 0; i < NV40_PFB_TILE__SIZE_1; i++) {
- tmp = nv_rd32(dev, NV40_PFB_TILE(i));
- nv_wr32(dev, NV47_PGRAPH_TILE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i));
- nv_wr32(dev, NV47_PGRAPH_TLIMIT0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSIZE(i));
- nv_wr32(dev, NV47_PGRAPH_TSIZE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i));
- nv_wr32(dev, NV47_PGRAPH_TSTATUS0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp);
- }
- break;
- default: /* everything else */
- for (i = 0; i < NV40_PFB_TILE__SIZE_0; i++) {
- tmp = nv_rd32(dev, NV40_PFB_TILE(i));
- nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i));
- nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSIZE(i));
- nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i));
- nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp);
- }
- break;
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < pfb->num_tiles; i++)
+ nv40_graph_set_region_tiling(dev, i, 0, 0, 0);
/* begin RAM config */
vramsz = drm_get_resource_len(dev, 0) - 1;
--
1.6.4.4
Seemingly Similar Threads
- [PATCH 1/2] drm/nv50: Make ctxprog wait until interrupt handler is done.
- [PATCH 1/2] drm/nv04: Fix NV04 set_operation software method.
- [PATCH 1/5] drm/nv10/plane: fix format computation
- [NOT for merge] Patches that reduce power usage on NV86
- [PATCH v4] pmu/gk20a: PMU boot support