Francisco Jerez
2009-Dec-11 18:33 UTC
[Nouveau] [PATCH 1/3] drm/nouveau: Pre-G80 tiling support.
Signed-off-by: Francisco Jerez <currojerez at riseup.net> --- drivers/gpu/drm/nouveau/nouveau_drv.h | 23 +++++ drivers/gpu/drm/nouveau/nouveau_reg.h | 16 ++-- drivers/gpu/drm/nouveau/nouveau_state.c | 8 ++ drivers/gpu/drm/nouveau/nv10_fb.c | 32 ++++++-- drivers/gpu/drm/nouveau/nv10_graph.c | 47 ++++++++--- drivers/gpu/drm/nouveau/nv20_graph.c | 80 +++++++++++-------- drivers/gpu/drm/nouveau/nv40_fb.c | 53 ++++++++----- drivers/gpu/drm/nouveau/nv40_graph.c | 135 +++++++++++++++---------------- 8 files changed, 247 insertions(+), 147 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 88b4c7b..2730497 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -276,8 +276,13 @@ struct nouveau_timer_engine { }; struct nouveau_fb_engine { + int num_tiles; + int (*init)(struct drm_device *dev); void (*takedown)(struct drm_device *dev); + + void (*set_region_tiling)(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch); }; struct nouveau_fifo_engine { @@ -328,6 +333,9 @@ struct nouveau_pgraph_engine { void (*destroy_context)(struct nouveau_channel *); int (*load_context)(struct nouveau_channel *); int (*unload_context)(struct drm_device *); + + void (*set_region_tiling)(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch); }; struct nouveau_engine { @@ -876,10 +884,16 @@ extern void nv04_fb_takedown(struct drm_device *); /* nv10_fb.c */ extern int nv10_fb_init(struct drm_device *); extern void nv10_fb_takedown(struct drm_device *); +extern void nv10_fb_set_region_tiling(struct drm_device *dev, int i, + uint32_t addr, uint32_t size, + uint32_t pitch); /* nv40_fb.c */ extern int nv40_fb_init(struct drm_device *); extern void nv40_fb_takedown(struct drm_device *); +extern void nv40_fb_set_region_tiling(struct drm_device *dev, int i, + uint32_t addr, uint32_t size, + uint32_t pitch); /* nv04_fifo.c */ extern int nv04_fifo_init(struct drm_device *); @@ -938,6 +952,9 @@ extern void nv10_graph_destroy_context(struct nouveau_channel *); extern int nv10_graph_load_context(struct nouveau_channel *); extern int nv10_graph_unload_context(struct drm_device *); extern void nv10_graph_context_switch(struct drm_device *); +extern void nv10_graph_set_region_tiling(struct drm_device *dev, int i, + uint32_t addr, uint32_t size, + uint32_t pitch); /* nv20_graph.c */ extern struct nouveau_pgraph_object_class nv20_graph_grclass[]; @@ -949,6 +966,9 @@ extern int nv20_graph_unload_context(struct drm_device *); extern int nv20_graph_init(struct drm_device *); extern void nv20_graph_takedown(struct drm_device *); extern int nv30_graph_init(struct drm_device *); +extern void nv20_graph_set_region_tiling(struct drm_device *dev, int i, + uint32_t addr, uint32_t size, + uint32_t pitch); /* nv40_graph.c */ extern struct nouveau_pgraph_object_class nv40_graph_grclass[]; @@ -962,6 +982,9 @@ extern int nv40_graph_unload_context(struct drm_device *); extern int nv40_grctx_init(struct drm_device *); extern void nv40_grctx_fini(struct drm_device *); extern void nv40_grctx_vals_load(struct drm_device *, struct nouveau_gpuobj *); +extern void nv40_graph_set_region_tiling(struct drm_device *dev, int i, + uint32_t addr, uint32_t size, + uint32_t pitch); /* nv50_graph.c */ extern struct nouveau_pgraph_object_class nv50_graph_grclass[]; diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h index fa1b0e7..251f1b3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_reg.h +++ b/drivers/gpu/drm/nouveau/nouveau_reg.h @@ -349,19 +349,19 @@ #define NV04_PGRAPH_BLEND 0x00400824 #define NV04_PGRAPH_STORED_FMT 0x00400830 #define NV04_PGRAPH_PATT_COLORRAM 0x00400900 -#define NV40_PGRAPH_TILE0(i) (0x00400900 + (i*16)) -#define NV40_PGRAPH_TLIMIT0(i) (0x00400904 + (i*16)) -#define NV40_PGRAPH_TSIZE0(i) (0x00400908 + (i*16)) -#define NV40_PGRAPH_TSTATUS0(i) (0x0040090C + (i*16)) +#define NV20_PGRAPH_TILE(i) (0x00400900 + (i*16)) +#define NV20_PGRAPH_TLIMIT(i) (0x00400904 + (i*16)) +#define NV20_PGRAPH_TSIZE(i) (0x00400908 + (i*16)) +#define NV20_PGRAPH_TSTATUS(i) (0x0040090C + (i*16)) #define NV10_PGRAPH_TILE(i) (0x00400B00 + (i*16)) #define NV10_PGRAPH_TLIMIT(i) (0x00400B04 + (i*16)) #define NV10_PGRAPH_TSIZE(i) (0x00400B08 + (i*16)) #define NV10_PGRAPH_TSTATUS(i) (0x00400B0C + (i*16)) #define NV04_PGRAPH_U_RAM 0x00400D00 -#define NV47_PGRAPH_TILE0(i) (0x00400D00 + (i*16)) -#define NV47_PGRAPH_TLIMIT0(i) (0x00400D04 + (i*16)) -#define NV47_PGRAPH_TSIZE0(i) (0x00400D08 + (i*16)) -#define NV47_PGRAPH_TSTATUS0(i) (0x00400D0C + (i*16)) +#define NV47_PGRAPH_TILE(i) (0x00400D00 + (i*16)) +#define NV47_PGRAPH_TLIMIT(i) (0x00400D04 + (i*16)) +#define NV47_PGRAPH_TSIZE(i) (0x00400D08 + (i*16)) +#define NV47_PGRAPH_TSTATUS(i) (0x00400D0C + (i*16)) #define NV04_PGRAPH_V_RAM 0x00400D40 #define NV04_PGRAPH_W_RAM 0x00400D80 #define NV10_PGRAPH_COMBINER0_IN_ALPHA 0x00400E40 diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c index 2ed41d3..4342867 100644 --- a/drivers/gpu/drm/nouveau/nouveau_state.c +++ b/drivers/gpu/drm/nouveau/nouveau_state.c @@ -100,6 +100,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->timer.takedown = nv04_timer_takedown; engine->fb.init = nv10_fb_init; engine->fb.takedown = nv10_fb_takedown; + engine->fb.set_region_tiling = nv10_fb_set_region_tiling; engine->graph.grclass = nv10_graph_grclass; engine->graph.init = nv10_graph_init; engine->graph.takedown = nv10_graph_takedown; @@ -109,6 +110,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->graph.fifo_access = nv04_graph_fifo_access; engine->graph.load_context = nv10_graph_load_context; engine->graph.unload_context = nv10_graph_unload_context; + engine->graph.set_region_tiling = nv10_graph_set_region_tiling; engine->fifo.channels = 32; engine->fifo.init = nv10_fifo_init; engine->fifo.takedown = nouveau_stub_takedown; @@ -139,6 +141,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->timer.takedown = nv04_timer_takedown; engine->fb.init = nv10_fb_init; engine->fb.takedown = nv10_fb_takedown; + engine->fb.set_region_tiling = nv10_fb_set_region_tiling; engine->graph.grclass = nv20_graph_grclass; engine->graph.init = nv20_graph_init; engine->graph.takedown = nv20_graph_takedown; @@ -148,6 +151,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->graph.fifo_access = nv04_graph_fifo_access; engine->graph.load_context = nv20_graph_load_context; engine->graph.unload_context = nv20_graph_unload_context; + engine->graph.set_region_tiling = nv20_graph_set_region_tiling; engine->fifo.channels = 32; engine->fifo.init = nv10_fifo_init; engine->fifo.takedown = nouveau_stub_takedown; @@ -178,6 +182,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->timer.takedown = nv04_timer_takedown; engine->fb.init = nv10_fb_init; engine->fb.takedown = nv10_fb_takedown; + engine->fb.set_region_tiling = nv10_fb_set_region_tiling; engine->graph.grclass = nv30_graph_grclass; engine->graph.init = nv30_graph_init; engine->graph.takedown = nv20_graph_takedown; @@ -187,6 +192,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->graph.destroy_context = nv20_graph_destroy_context; engine->graph.load_context = nv20_graph_load_context; engine->graph.unload_context = nv20_graph_unload_context; + engine->graph.set_region_tiling = nv20_graph_set_region_tiling; engine->fifo.channels = 32; engine->fifo.init = nv10_fifo_init; engine->fifo.takedown = nouveau_stub_takedown; @@ -218,6 +224,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->timer.takedown = nv04_timer_takedown; engine->fb.init = nv40_fb_init; engine->fb.takedown = nv40_fb_takedown; + engine->fb.set_region_tiling = nv40_fb_set_region_tiling; engine->graph.grclass = nv40_graph_grclass; engine->graph.init = nv40_graph_init; engine->graph.takedown = nv40_graph_takedown; @@ -227,6 +234,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->graph.destroy_context = nv40_graph_destroy_context; engine->graph.load_context = nv40_graph_load_context; engine->graph.unload_context = nv40_graph_unload_context; + engine->graph.set_region_tiling = nv40_graph_set_region_tiling; engine->fifo.channels = 32; engine->fifo.init = nv40_fifo_init; engine->fifo.takedown = nouveau_stub_takedown; diff --git a/drivers/gpu/drm/nouveau/nv10_fb.c b/drivers/gpu/drm/nouveau/nv10_fb.c index 79e2d10..cc5cda4 100644 --- a/drivers/gpu/drm/nouveau/nv10_fb.c +++ b/drivers/gpu/drm/nouveau/nv10_fb.c @@ -3,17 +3,37 @@ #include "nouveau_drv.h" #include "nouveau_drm.h" +void +nv10_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + uint32_t limit = max(1u, addr + size) - 1; + + if (pitch) { + if (dev_priv->card_type >= NV_20) + addr |= 1; + else + addr |= 1 << 31; + } + + nv_wr32(dev, NV10_PFB_TLIMIT(i), limit); + nv_wr32(dev, NV10_PFB_TSIZE(i), pitch); + nv_wr32(dev, NV10_PFB_TILE(i), addr); +} + int nv10_fb_init(struct drm_device *dev) { - uint32_t fb_bar_size; + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; int i; - fb_bar_size = drm_get_resource_len(dev, 0) - 1; - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { - nv_wr32(dev, NV10_PFB_TILE(i), 0); - nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size); - } + pfb->num_tiles = NV10_PFB_TILE__SIZE; + + /* Turn all the tiling regions off. */ + for (i = 0; i < pfb->num_tiles; i++) + pfb->set_region_tiling(dev, i, 0, 0, 0); return 0; } diff --git a/drivers/gpu/drm/nouveau/nv10_graph.c b/drivers/gpu/drm/nouveau/nv10_graph.c index 6bf6804..2aeac8b 100644 --- a/drivers/gpu/drm/nouveau/nv10_graph.c +++ b/drivers/gpu/drm/nouveau/nv10_graph.c @@ -808,6 +808,39 @@ void nv10_graph_destroy_context(struct nouveau_channel *chan) chan->pgraph_ctx = NULL; } +static void +nv10_graph_write_tile(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch) +{ + uint32_t limit = max(1u, addr + size) - 1; + + if (pitch) + addr |= 1 << 31; + + nv_wr32(dev, NV10_PGRAPH_TLIMIT(i), limit); + nv_wr32(dev, NV10_PGRAPH_TSIZE(i), pitch); + nv_wr32(dev, NV10_PGRAPH_TILE(i), addr); +} + +void +nv10_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo; + struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph; + + pfifo->reassign(dev, false); + pgraph->fifo_access(dev, false); + + nouveau_wait_for_idle(dev); + + nv10_graph_write_tile(dev, i, addr, size, pitch); + + pgraph->fifo_access(dev, true); + pfifo->reassign(dev, true); +} + int nv10_graph_init(struct drm_device *dev) { struct drm_nouveau_private *dev_priv = dev->dev_private; @@ -836,17 +869,9 @@ int nv10_graph_init(struct drm_device *dev) } else nv_wr32(dev, NV10_PGRAPH_DEBUG_4, 0x00000000); - /* copy tile info from PFB */ - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { - nv_wr32(dev, NV10_PGRAPH_TILE(i), - nv_rd32(dev, NV10_PFB_TILE(i))); - nv_wr32(dev, NV10_PGRAPH_TLIMIT(i), - nv_rd32(dev, NV10_PFB_TLIMIT(i))); - nv_wr32(dev, NV10_PGRAPH_TSIZE(i), - nv_rd32(dev, NV10_PFB_TSIZE(i))); - nv_wr32(dev, NV10_PGRAPH_TSTATUS(i), - nv_rd32(dev, NV10_PFB_TSTATUS(i))); - } + /* Turn all the tiling regions off. */ + for (i = 0; i < NV10_PFB_TILE__SIZE; i++) + nv10_graph_write_tile(dev, i, 0, 0, 0); nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH1, 0x00000000); nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH2, 0x00000000); diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c b/drivers/gpu/drm/nouveau/nv20_graph.c index 18ba74f..55d1a8e 100644 --- a/drivers/gpu/drm/nouveau/nv20_graph.c +++ b/drivers/gpu/drm/nouveau/nv20_graph.c @@ -514,6 +514,46 @@ nv20_graph_rdi(struct drm_device *dev) nouveau_wait_for_idle(dev); } +static void +nv20_graph_write_tile(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch) +{ + uint32_t limit = max(1u, addr + size) - 1; + + if (pitch) + addr |= 1; + + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit); + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch); + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr); + + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + 4 * i); + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, limit); + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + 4 * i); + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, pitch); + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i); + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, addr); +} + +void +nv20_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo; + struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph; + + pfifo->reassign(dev, false); + pgraph->fifo_access(dev, false); + + nouveau_wait_for_idle(dev); + + nv20_graph_write_tile(dev, i, addr, size, pitch); + + pgraph->fifo_access(dev, true); + pfifo->reassign(dev, true); +} + int nv20_graph_init(struct drm_device *dev) { @@ -572,27 +612,10 @@ nv20_graph_init(struct drm_device *dev) nv_wr32(dev, NV10_PGRAPH_RDI_DATA , 0x00000030); } - /* copy tile info from PFB */ - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { - nv_wr32(dev, 0x00400904 + i * 0x10, - nv_rd32(dev, NV10_PFB_TLIMIT(i))); - /* which is NV40_PGRAPH_TLIMIT0(i) ?? */ - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + i * 4); - nv_wr32(dev, NV10_PGRAPH_RDI_DATA, - nv_rd32(dev, NV10_PFB_TLIMIT(i))); - nv_wr32(dev, 0x00400908 + i * 0x10, - nv_rd32(dev, NV10_PFB_TSIZE(i))); - /* which is NV40_PGRAPH_TSIZE0(i) ?? */ - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + i * 4); - nv_wr32(dev, NV10_PGRAPH_RDI_DATA, - nv_rd32(dev, NV10_PFB_TSIZE(i))); - nv_wr32(dev, 0x00400900 + i * 0x10, - nv_rd32(dev, NV10_PFB_TILE(i))); - /* which is NV40_PGRAPH_TILE0(i) ?? */ - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + i * 4); - nv_wr32(dev, NV10_PGRAPH_RDI_DATA, - nv_rd32(dev, NV10_PFB_TILE(i))); - } + /* Turn all the tiling regions off. */ + for (i = 0; i < NV10_PFB_TILE__SIZE; i++) + nv20_graph_write_tile(dev, i, 0, 0, 0); + for (i = 0; i < 8; i++) { nv_wr32(dev, 0x400980 + i * 4, nv_rd32(dev, 0x100300 + i * 4)); nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0090 + i * 4); @@ -704,18 +727,9 @@ nv30_graph_init(struct drm_device *dev) nv_wr32(dev, 0x4000c0, 0x00000016); - /* copy tile info from PFB */ - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { - nv_wr32(dev, 0x00400904 + i * 0x10, - nv_rd32(dev, NV10_PFB_TLIMIT(i))); - /* which is NV40_PGRAPH_TLIMIT0(i) ?? */ - nv_wr32(dev, 0x00400908 + i * 0x10, - nv_rd32(dev, NV10_PFB_TSIZE(i))); - /* which is NV40_PGRAPH_TSIZE0(i) ?? */ - nv_wr32(dev, 0x00400900 + i * 0x10, - nv_rd32(dev, NV10_PFB_TILE(i))); - /* which is NV40_PGRAPH_TILE0(i) ?? */ - } + /* Turn all the tiling regions off. */ + for (i = 0; i < NV10_PFB_TILE__SIZE; i++) + nv20_graph_write_tile(dev, i, 0, 0, 0); nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000100); nv_wr32(dev, NV10_PGRAPH_STATE , 0xFFFFFFFF); diff --git a/drivers/gpu/drm/nouveau/nv40_fb.c b/drivers/gpu/drm/nouveau/nv40_fb.c index ca1d271..3cd07d8 100644 --- a/drivers/gpu/drm/nouveau/nv40_fb.c +++ b/drivers/gpu/drm/nouveau/nv40_fb.c @@ -3,12 +3,37 @@ #include "nouveau_drv.h" #include "nouveau_drm.h" +void +nv40_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + uint32_t limit = max(1u, addr + size) - 1; + + if (pitch) + addr |= 1; + + switch (dev_priv->chipset) { + case 0x40: + nv_wr32(dev, NV10_PFB_TLIMIT(i), limit); + nv_wr32(dev, NV10_PFB_TSIZE(i), pitch); + nv_wr32(dev, NV10_PFB_TILE(i), addr); + break; + + default: + nv_wr32(dev, NV40_PFB_TLIMIT(i), limit); + nv_wr32(dev, NV40_PFB_TSIZE(i), pitch); + nv_wr32(dev, NV40_PFB_TILE(i), addr); + break; + } +} + int nv40_fb_init(struct drm_device *dev) { struct drm_nouveau_private *dev_priv = dev->dev_private; - uint32_t fb_bar_size, tmp; - int num_tiles; + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; + uint32_t tmp; int i; /* This is strictly a NV4x register (don't know about NV5x). */ @@ -23,35 +48,23 @@ nv40_fb_init(struct drm_device *dev) case 0x45: tmp = nv_rd32(dev, NV10_PFB_CLOSE_PAGE2); nv_wr32(dev, NV10_PFB_CLOSE_PAGE2, tmp & ~(1 << 15)); - num_tiles = NV10_PFB_TILE__SIZE; + pfb->num_tiles = NV10_PFB_TILE__SIZE; break; case 0x46: /* G72 */ case 0x47: /* G70 */ case 0x49: /* G71 */ case 0x4b: /* G73 */ case 0x4c: /* C51 (G7X version) */ - num_tiles = NV40_PFB_TILE__SIZE_1; + pfb->num_tiles = NV40_PFB_TILE__SIZE_1; break; default: - num_tiles = NV40_PFB_TILE__SIZE_0; + pfb->num_tiles = NV40_PFB_TILE__SIZE_0; break; } - fb_bar_size = drm_get_resource_len(dev, 0) - 1; - switch (dev_priv->chipset) { - case 0x40: - for (i = 0; i < num_tiles; i++) { - nv_wr32(dev, NV10_PFB_TILE(i), 0); - nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size); - } - break; - default: - for (i = 0; i < num_tiles; i++) { - nv_wr32(dev, NV40_PFB_TILE(i), 0); - nv_wr32(dev, NV40_PFB_TLIMIT(i), fb_bar_size); - } - break; - } + /* Turn all the tiling regions off. */ + for (i = 0; i < pfb->num_tiles; i++) + pfb->set_region_tiling(dev, i, 0, 0, 0); return 0; } diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c index d3e0a2a..2435d49 100644 --- a/drivers/gpu/drm/nouveau/nv40_graph.c +++ b/drivers/gpu/drm/nouveau/nv40_graph.c @@ -333,6 +333,67 @@ nv40_grctx_vals_load(struct drm_device *dev, struct nouveau_gpuobj *ctx) nv_wo32(dev, ctx, cv->data[i].offset, cv->data[i].value); } +static void +nv40_graph_write_tile(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + uint32_t limit = max(1u, addr + size) - 1; + + if (pitch) + addr |= 1; + + switch (dev_priv->chipset) { + case 0x44: + case 0x4a: + case 0x4e: + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch); + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit); + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr); + break; + + case 0x46: + case 0x47: + case 0x49: + case 0x4b: + nv_wr32(dev, NV47_PGRAPH_TSIZE(i), pitch); + nv_wr32(dev, NV47_PGRAPH_TLIMIT(i), limit); + nv_wr32(dev, NV47_PGRAPH_TILE(i), addr); + nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch); + nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit); + nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr); + break; + + default: + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch); + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit); + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr); + nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch); + nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit); + nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr); + break; + } +} + +void +nv40_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo; + struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph; + + pfifo->reassign(dev, false); + pgraph->fifo_access(dev, false); + + nouveau_wait_for_idle(dev); + + nv40_graph_write_tile(dev, i, addr, size, pitch); + + pgraph->fifo_access(dev, true); + pfifo->reassign(dev, true); +} + /* * G70 0x47 * G71 0x49 @@ -347,7 +408,8 @@ nv40_graph_init(struct drm_device *dev) { struct drm_nouveau_private *dev_priv (struct drm_nouveau_private *)dev->dev_private; - uint32_t vramsz, tmp; + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; + uint32_t vramsz; int i, j; nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) & @@ -425,74 +487,9 @@ nv40_graph_init(struct drm_device *dev) nv_wr32(dev, 0x400b38, 0x2ffff800); nv_wr32(dev, 0x400b3c, 0x00006000); - /* copy tile info from PFB */ - switch (dev_priv->chipset) { - case 0x40: /* vanilla NV40 */ - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { - tmp = nv_rd32(dev, NV10_PFB_TILE(i)); - nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp); - tmp = nv_rd32(dev, NV10_PFB_TLIMIT(i)); - nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp); - tmp = nv_rd32(dev, NV10_PFB_TSIZE(i)); - nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp); - tmp = nv_rd32(dev, NV10_PFB_TSTATUS(i)); - nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp); - } - break; - case 0x44: - case 0x4a: - case 0x4e: /* NV44-based cores don't have 0x406900? */ - for (i = 0; i < NV40_PFB_TILE__SIZE_0; i++) { - tmp = nv_rd32(dev, NV40_PFB_TILE(i)); - nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i)); - nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSIZE(i)); - nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i)); - nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp); - } - break; - case 0x46: - case 0x47: - case 0x49: - case 0x4b: /* G7X-based cores */ - for (i = 0; i < NV40_PFB_TILE__SIZE_1; i++) { - tmp = nv_rd32(dev, NV40_PFB_TILE(i)); - nv_wr32(dev, NV47_PGRAPH_TILE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i)); - nv_wr32(dev, NV47_PGRAPH_TLIMIT0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSIZE(i)); - nv_wr32(dev, NV47_PGRAPH_TSIZE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i)); - nv_wr32(dev, NV47_PGRAPH_TSTATUS0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp); - } - break; - default: /* everything else */ - for (i = 0; i < NV40_PFB_TILE__SIZE_0; i++) { - tmp = nv_rd32(dev, NV40_PFB_TILE(i)); - nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i)); - nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSIZE(i)); - nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i)); - nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp); - } - break; - } + /* Turn all the tiling regions off. */ + for (i = 0; i < pfb->num_tiles; i++) + nv40_graph_write_tile(dev, i, 0, 0, 0); /* begin RAM config */ vramsz = drm_get_resource_len(dev, 0) - 1; -- 1.6.4.4
Francisco Jerez
2009-Dec-11 18:33 UTC
[Nouveau] [PATCH 2/3] drm/nouveau: Make the MM aware of pre-G80 tiling.
Signed-off-by: Francisco Jerez <currojerez at riseup.net> --- drivers/gpu/drm/nouveau/nouveau_bo.c | 221 +++++++++++++++++++++++---------- drivers/gpu/drm/nouveau/nouveau_drv.h | 22 ++++ drivers/gpu/drm/nouveau/nouveau_mem.c | 80 ++++++++++++ 3 files changed, 258 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 320a14b..4616bae 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -37,6 +37,7 @@ static void nouveau_bo_del_ttm(struct ttm_buffer_object *bo) { struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev); + struct drm_device *dev = dev_priv->dev; struct nouveau_bo *nvbo = nouveau_bo(bo); ttm_bo_kunmap(&nvbo->kmap); @@ -44,12 +45,83 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo) if (unlikely(nvbo->gem)) DRM_ERROR("bo %p still attached to GEM object\n", bo); + if (nvbo->tile) + nv10_mem_expire_tiling(dev, nvbo->tile, NULL); + spin_lock(&dev_priv->ttm.bo_list_lock); list_del(&nvbo->head); spin_unlock(&dev_priv->ttm.bo_list_lock); kfree(nvbo); } +static void +nouveau_bo_fixup_align(struct drm_device *dev, + uint32_t tile_mode, uint32_t tile_flags, + int *align, int *size) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + + /* + * Some of the tile_flags have a periodic structure of N*4096 bytes, + * align to to that as well as the page size. Overallocate memory to + * avoid corruption of other buffer objects. + */ + if (dev_priv->card_type == NV_50) { + switch (tile_flags) { + case 0x1800: + case 0x2800: + case 0x4800: + case 0x7a00: + if (dev_priv->chipset >= 0xA0) { + /* This is based on high end cards with 448 bits + * memory bus, could be different elsewhere.*/ + *size += 6 * 28672; + /* 8 * 28672 is the actual alignment requirement + * but we must also align to page size. */ + *align = 2 * 8 * 28672; + } else if (dev_priv->chipset >= 0x90) { + *size += 3 * 16384; + *align = 12 * 16834; + } else { + *size += 3 * 8192; + /* 12 * 8192 is the actual alignment requirement + * but we must also align to page size. */ + *align = 2 * 12 * 8192; + } + break; + default: + break; + } + + } else { + if (tile_mode) { + if (dev_priv->chipset >= 0x40) { + *align = 65536; + *size = roundup(*size, 64 * tile_mode); + + } else if (dev_priv->chipset >= 0x30) { + *align = 32768; + *size = roundup(*size, 32 * tile_mode); + + } else if (dev_priv->chipset >= 0x20) { + *align = 16384; + *size = roundup(*size, 32 * tile_mode); + + } else if (dev_priv->chipset >= 0x10) { + *align = 4096; + *size = roundup(*size, 16 * tile_mode); + } + } + } + + *size = ALIGN(*size, PAGE_SIZE); + + if (dev_priv->card_type == NV_50) { + *size = ALIGN(*size, 65536); + *align = max(65536, *align); + } +} + int nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan, int size, int align, uint32_t flags, uint32_t tile_mode, @@ -70,46 +142,9 @@ nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan, nvbo->tile_mode = tile_mode; nvbo->tile_flags = tile_flags; - /* - * Some of the tile_flags have a periodic structure of N*4096 bytes, - * align to to that as well as the page size. Overallocate memory to - * avoid corruption of other buffer objects. - */ - switch (tile_flags) { - case 0x1800: - case 0x2800: - case 0x4800: - case 0x7a00: - if (dev_priv->chipset >= 0xA0) { - /* This is based on high end cards with 448 bits - * memory bus, could be different elsewhere.*/ - size += 6 * 28672; - /* 8 * 28672 is the actual alignment requirement, - * but we must also align to page size. */ - align = 2 * 8 * 28672; - } else if (dev_priv->chipset >= 0x90) { - size += 3 * 16384; - align = 12 * 16834; - } else { - size += 3 * 8192; - /* 12 * 8192 is the actual alignment requirement, - * but we must also align to page size. */ - align = 2 * 12 * 8192; - } - break; - default: - break; - } - + nouveau_bo_fixup_align(dev, tile_mode, tile_flags, &align, &size); align >>= PAGE_SHIFT; - size = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); - if (dev_priv->card_type == NV_50) { - size = (size + 65535) & ~65535; - if (align < (65536 / PAGE_SIZE)) - align = (65536 / PAGE_SIZE); - } - if (flags & TTM_PL_FLAG_VRAM) nvbo->placements[n++] = TTM_PL_FLAG_VRAM | TTM_PL_MASK_CACHING; if (flags & TTM_PL_FLAG_TT) @@ -408,6 +443,7 @@ nouveau_bo_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *pl) /* GPU-assisted copy using NV_MEMORY_TO_MEMORY_FORMAT, can access * TTM_PL_{VRAM,TT} directly. */ + static int nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan, struct nouveau_bo *nvbo, bool evict, bool no_wait, @@ -442,11 +478,12 @@ nouveau_bo_mem_ctxdma(struct nouveau_bo *nvbo, struct nouveau_channel *chan, } static int -nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, int no_wait, - struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem) +nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr, + int no_wait, struct ttm_mem_reg *new_mem) { struct nouveau_bo *nvbo = nouveau_bo(bo); struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev); + struct ttm_mem_reg *old_mem = &bo->mem; struct nouveau_channel *chan; uint64_t src_offset, dst_offset; uint32_t page_count; @@ -549,7 +586,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr, if (ret) goto out; - ret = nouveau_bo_move_m2mf(bo, true, no_wait, &bo->mem, &tmp_mem); + ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait, &tmp_mem); if (ret) goto out; @@ -587,7 +624,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr, if (ret) goto out; - ret = nouveau_bo_move_m2mf(bo, true, no_wait, &bo->mem, new_mem); + ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait, new_mem); if (ret) goto out; @@ -602,51 +639,105 @@ out: } static int -nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr, - bool no_wait, struct ttm_mem_reg *new_mem) +nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem, + struct nouveau_tile_reg **new_tile) { struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev); - struct nouveau_bo *nvbo = nouveau_bo(bo); struct drm_device *dev = dev_priv->dev; - struct ttm_mem_reg *old_mem = &bo->mem; + struct nouveau_bo *nvbo = nouveau_bo(bo); + uint64_t offset; int ret; - if (dev_priv->card_type == NV_50 && new_mem->mem_type == TTM_PL_VRAM && - !nvbo->no_vm) { - uint64_t offset = new_mem->mm_node->start << PAGE_SHIFT; + if (nvbo->no_vm || new_mem->mem_type != TTM_PL_VRAM) { + /* Nothing to do. */ + *new_tile = NULL; + return 0; + } + + offset = new_mem->mm_node->start << PAGE_SHIFT; + if (dev_priv->card_type == NV_50) { ret = nv50_mem_vm_bind_linear(dev, offset + dev_priv->vm_vram_base, new_mem->size, nvbo->tile_flags, offset); if (ret) return ret; + + } else if (dev_priv->card_type >= NV_10) { + *new_tile = nv10_mem_set_tiling(dev, offset, new_mem->size, + nvbo->tile_mode); } - if (dev_priv->init_state != NOUVEAU_CARD_INIT_DONE) - return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem); + return 0; +} +static void +nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo, + struct nouveau_tile_reg *new_tile, + struct nouveau_tile_reg **old_tile) +{ + struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev); + struct drm_device *dev = dev_priv->dev; + + if (dev_priv->card_type >= NV_10 && + dev_priv->card_type < NV_50) { + if (*old_tile) + nv10_mem_expire_tiling(dev, *old_tile, bo->sync_obj); + + *old_tile = new_tile; + } +} + +static int +nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr, + bool no_wait, struct ttm_mem_reg *new_mem) +{ + struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev); + struct nouveau_bo *nvbo = nouveau_bo(bo); + struct ttm_mem_reg *old_mem = &bo->mem; + struct nouveau_tile_reg *new_tile = NULL; + int ret = 0; + + ret = nouveau_bo_vm_bind(bo, new_mem, &new_tile); + if (ret) + return ret; + + /* Software copy if the card isn't up and running yet. */ + if (dev_priv->init_state != NOUVEAU_CARD_INIT_DONE) { + ret = ttm_bo_move_memcpy(bo, evict, no_wait, new_mem); + goto out; + } + + /* Fake bo copy. */ if (old_mem->mem_type == TTM_PL_SYSTEM && !bo->ttm) { BUG_ON(bo->mem.mm_node != NULL); bo->mem = *new_mem; new_mem->mm_node = NULL; - return 0; + goto out; } - if (new_mem->mem_type == TTM_PL_SYSTEM) { - if (old_mem->mem_type == TTM_PL_SYSTEM) - return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem); - if (nouveau_bo_move_flipd(bo, evict, intr, no_wait, new_mem)) - return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem); - } else if (old_mem->mem_type == TTM_PL_SYSTEM) { - if (nouveau_bo_move_flips(bo, evict, intr, no_wait, new_mem)) - return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem); - } else { - if (nouveau_bo_move_m2mf(bo, evict, no_wait, old_mem, new_mem)) - return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem); - } + /* Hardware assisted copy. */ + if (new_mem->mem_type == TTM_PL_SYSTEM) + ret = nouveau_bo_move_flipd(bo, evict, intr, no_wait, new_mem); + else if (old_mem->mem_type == TTM_PL_SYSTEM) + ret = nouveau_bo_move_flips(bo, evict, intr, no_wait, new_mem); + else + ret = nouveau_bo_move_m2mf(bo, evict, intr, no_wait, new_mem); - return 0; + if (!ret) + goto out; + + /* Fallback to software copy. */ + ret = ttm_bo_move_memcpy(bo, evict, no_wait, new_mem); + +out: + if (ret) + nouveau_bo_vm_cleanup(bo, NULL, &new_tile); + else + nouveau_bo_vm_cleanup(bo, new_tile, &nvbo->tile); + + return ret; } static int diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 2730497..7a2a322 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -58,11 +58,19 @@ struct nouveau_fpriv { #define MAX_NUM_DCB_ENTRIES 16 #define NOUVEAU_MAX_CHANNEL_NR 128 +#define NOUVEAU_MAX_TILE_NR 15 #define NV50_VM_MAX_VRAM (2*1024*1024*1024ULL) #define NV50_VM_BLOCK (512*1024*1024ULL) #define NV50_VM_VRAM_NR (NV50_VM_MAX_VRAM / NV50_VM_BLOCK) +struct nouveau_tile_reg { + struct nouveau_fence *fence; + uint32_t addr; + uint32_t size; + bool used; +}; + struct nouveau_bo { struct ttm_buffer_object bo; struct ttm_placement placement; @@ -82,6 +90,7 @@ struct nouveau_bo { uint32_t tile_mode; uint32_t tile_flags; + struct nouveau_tile_reg *tile; struct drm_gem_object *gem; struct drm_file *cpu_filp; @@ -554,6 +563,12 @@ struct drm_nouveau_private { unsigned long sg_handle; } gart_info; + /* nv10-nv40 tiling regions */ + struct { + struct nouveau_tile_reg reg[NOUVEAU_MAX_TILE_NR]; + spinlock_t lock; + } tile; + /* G8x/G9x virtual address space */ uint64_t vm_gart_base; uint64_t vm_gart_size; @@ -690,6 +705,13 @@ extern void nouveau_mem_release(struct drm_file *, struct mem_block *heap); extern int nouveau_mem_init(struct drm_device *); extern int nouveau_mem_init_agp(struct drm_device *); extern void nouveau_mem_close(struct drm_device *); +extern struct nouveau_tile_reg *nv10_mem_set_tiling(struct drm_device *dev, + uint32_t addr, + uint32_t size, + uint32_t pitch); +extern void nv10_mem_expire_tiling(struct drm_device *dev, + struct nouveau_tile_reg *tile, + struct nouveau_fence *fence); extern int nv50_mem_vm_bind_linear(struct drm_device *, uint64_t virt, uint32_t size, uint32_t flags, uint64_t phys); diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 0275571..6056f32 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -192,6 +192,85 @@ void nouveau_mem_release(struct drm_file *file_priv, struct mem_block *heap) } /* + * NV10-NV40 tiling helpers + */ + +static void +nv10_mem_set_tiling_locked(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; + struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph; + struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i]; + + tile->used = true; + + spin_unlock(&dev_priv->tile.lock); + + pgraph->set_region_tiling(dev, i, addr, size, pitch); + pfb->set_region_tiling(dev, i, addr, size, pitch); + + spin_lock(&dev_priv->tile.lock); + + tile->addr = addr; + tile->size = size; + tile->used = !!pitch; + nouveau_fence_unref((void **)&tile->fence); +} + +struct nouveau_tile_reg * +nv10_mem_set_tiling(struct drm_device *dev, uint32_t addr, uint32_t size, + uint32_t pitch) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; + struct nouveau_tile_reg *tile = dev_priv->tile.reg, *found = NULL; + int i; + + spin_lock(&dev_priv->tile.lock); + + for (i = 0; i < pfb->num_tiles; i++) { + if (tile[i].used) + /* Tile region in use. */ + continue; + + if (tile[i].fence && + !nouveau_fence_signalled(tile[i].fence, NULL)) + /* Pending tile region. */ + continue; + + if (max(tile[i].addr, addr) < + min(tile[i].addr + tile[i].size, addr + size)) + /* Kill an intersecting tile region. */ + nv10_mem_set_tiling_locked(dev, i, 0, 0, 0); + + if (pitch && !found) { + /* Free tile region. */ + nv10_mem_set_tiling_locked(dev, i, addr, size, pitch); + found = &tile[i]; + } + } + + spin_unlock(&dev_priv->tile.lock); + + return found; +} + +void +nv10_mem_expire_tiling(struct drm_device *dev, struct nouveau_tile_reg *tile, + struct nouveau_fence *fence) +{ + if (fence) { + /* Mark it as pending. */ + tile->fence = fence; + nouveau_fence_ref(fence); + } + + tile->used = false; +} + +/* * NV50 VM helpers */ int @@ -509,6 +588,7 @@ nouveau_mem_init(struct drm_device *dev) INIT_LIST_HEAD(&dev_priv->ttm.bo_list); spin_lock_init(&dev_priv->ttm.bo_list_lock); + spin_lock_init(&dev_priv->tile.lock); dev_priv->fb_available_size = nouveau_mem_fb_amount(dev); -- 1.6.4.4
Francisco Jerez
2009-Dec-11 18:33 UTC
[Nouveau] [PATCH 3/3] drm/nouveau: Fix up buffer eviction, and evict them to GART, if possible.
Signed-off-by: Francisco Jerez <currojerez at riseup.net> --- drivers/gpu/drm/nouveau/nouveau_bo.c | 6 ++++++ 1 files changed, 6 insertions(+), 0 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 4616bae..af0a1f5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -433,10 +433,16 @@ nouveau_bo_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *pl) struct nouveau_bo *nvbo = nouveau_bo(bo); switch (bo->mem.mem_type) { + case TTM_PL_VRAM: + nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_TT | + TTM_PL_FLAG_SYSTEM); + break; default: nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_SYSTEM); break; } + + *pl = nvbo->placement; } -- 1.6.4.4
Jimmy Rentz
2009-Dec-11 20:18 UTC
[Nouveau] [PATCH 1/3] drm/nouveau: Pre-G80 tiling support.
On Fri, 11 Dec 2009 19:33:22 +0100 Francisco Jerez <currojerez at riseup.net> wrote:> Signed-off-by: Francisco Jerez <currojerez at riseup.net> > --- > drivers/gpu/drm/nouveau/nouveau_drv.h | 23 +++++ > drivers/gpu/drm/nouveau/nouveau_reg.h | 16 ++-- > drivers/gpu/drm/nouveau/nouveau_state.c | 8 ++ > drivers/gpu/drm/nouveau/nv10_fb.c | 32 ++++++-- > drivers/gpu/drm/nouveau/nv10_graph.c | 47 ++++++++--- > drivers/gpu/drm/nouveau/nv20_graph.c | 80 +++++++++++-------- > drivers/gpu/drm/nouveau/nv40_fb.c | 53 ++++++++----- > drivers/gpu/drm/nouveau/nv40_graph.c | 135 > +++++++++++++++---------------- 8 files changed, 247 insertions(+), > 147 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h > b/drivers/gpu/drm/nouveau/nouveau_drv.h index 88b4c7b..2730497 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_drv.h > +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h > @@ -276,8 +276,13 @@ struct nouveau_timer_engine { > }; > > struct nouveau_fb_engine { > + int num_tiles; > + > int (*init)(struct drm_device *dev); > void (*takedown)(struct drm_device *dev); > + > + void (*set_region_tiling)(struct drm_device *dev, int i, > uint32_t addr, > + uint32_t size, uint32_t pitch); > }; > > struct nouveau_fifo_engine { > @@ -328,6 +333,9 @@ struct nouveau_pgraph_engine { > void (*destroy_context)(struct nouveau_channel *); > int (*load_context)(struct nouveau_channel *); > int (*unload_context)(struct drm_device *); > + > + void (*set_region_tiling)(struct drm_device *dev, int i, > uint32_t addr, > + uint32_t size, uint32_t pitch); > }; > > struct nouveau_engine { > @@ -876,10 +884,16 @@ extern void nv04_fb_takedown(struct drm_device > *); /* nv10_fb.c */ > extern int nv10_fb_init(struct drm_device *); > extern void nv10_fb_takedown(struct drm_device *); > +extern void nv10_fb_set_region_tiling(struct drm_device *dev, int i, > + uint32_t addr, uint32_t size, > + uint32_t pitch); > > /* nv40_fb.c */ > extern int nv40_fb_init(struct drm_device *); > extern void nv40_fb_takedown(struct drm_device *); > +extern void nv40_fb_set_region_tiling(struct drm_device *dev, int i, > + uint32_t addr, uint32_t size, > + uint32_t pitch); > > /* nv04_fifo.c */ > extern int nv04_fifo_init(struct drm_device *); > @@ -938,6 +952,9 @@ extern void nv10_graph_destroy_context(struct > nouveau_channel *); extern int nv10_graph_load_context(struct > nouveau_channel *); extern int nv10_graph_unload_context(struct > drm_device *); extern void nv10_graph_context_switch(struct > drm_device *); +extern void nv10_graph_set_region_tiling(struct > drm_device *dev, int i, > + uint32_t addr, uint32_t > size, > + uint32_t pitch); > > /* nv20_graph.c */ > extern struct nouveau_pgraph_object_class nv20_graph_grclass[]; > @@ -949,6 +966,9 @@ extern int nv20_graph_unload_context(struct > drm_device *); extern int nv20_graph_init(struct drm_device *); > extern void nv20_graph_takedown(struct drm_device *); > extern int nv30_graph_init(struct drm_device *); > +extern void nv20_graph_set_region_tiling(struct drm_device *dev, int > i, > + uint32_t addr, uint32_t > size, > + uint32_t pitch); > > /* nv40_graph.c */ > extern struct nouveau_pgraph_object_class nv40_graph_grclass[]; > @@ -962,6 +982,9 @@ extern int nv40_graph_unload_context(struct > drm_device *); extern int nv40_grctx_init(struct drm_device *); > extern void nv40_grctx_fini(struct drm_device *); > extern void nv40_grctx_vals_load(struct drm_device *, struct > nouveau_gpuobj *); +extern void nv40_graph_set_region_tiling(struct > drm_device *dev, int i, > + uint32_t addr, uint32_t > size, > + uint32_t pitch); > > /* nv50_graph.c */ > extern struct nouveau_pgraph_object_class nv50_graph_grclass[]; > diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h > b/drivers/gpu/drm/nouveau/nouveau_reg.h index fa1b0e7..251f1b3 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_reg.h > +++ b/drivers/gpu/drm/nouveau/nouveau_reg.h > @@ -349,19 +349,19 @@ > #define NV04_PGRAPH_BLEND 0x00400824 > #define NV04_PGRAPH_STORED_FMT 0x00400830 > #define NV04_PGRAPH_PATT_COLORRAM 0x00400900 > -#define NV40_PGRAPH_TILE0(i) > (0x00400900 + (i*16)) -#define > NV40_PGRAPH_TLIMIT0(i) (0x00400904 + > (i*16)) -#define NV40_PGRAPH_TSIZE0(i) > (0x00400908 + (i*16)) -#define > NV40_PGRAPH_TSTATUS0(i) (0x0040090C + > (i*16)) +#define NV20_PGRAPH_TILE(i) > (0x00400900 + (i*16)) +#define > NV20_PGRAPH_TLIMIT(i) (0x00400904 + > (i*16)) +#define NV20_PGRAPH_TSIZE(i) > (0x00400908 + (i*16)) +#define > NV20_PGRAPH_TSTATUS(i) (0x0040090C + > (i*16)) #define NV10_PGRAPH_TILE(i) > (0x00400B00 + (i*16)) #define > NV10_PGRAPH_TLIMIT(i) (0x00400B04 + > (i*16)) #define NV10_PGRAPH_TSIZE(i) > (0x00400B08 + (i*16)) #define > NV10_PGRAPH_TSTATUS(i) (0x00400B0C + > (i*16)) #define NV04_PGRAPH_U_RAM > 0x00400D00 -#define > NV47_PGRAPH_TILE0(i) (0x00400D00 + > (i*16)) -#define NV47_PGRAPH_TLIMIT0(i) > (0x00400D04 + (i*16)) -#define > NV47_PGRAPH_TSIZE0(i) (0x00400D08 + > (i*16)) -#define NV47_PGRAPH_TSTATUS0(i) > (0x00400D0C + (i*16)) +#define > NV47_PGRAPH_TILE(i) (0x00400D00 + > (i*16)) +#define NV47_PGRAPH_TLIMIT(i) > (0x00400D04 + (i*16)) +#define > NV47_PGRAPH_TSIZE(i) (0x00400D08 + > (i*16)) +#define NV47_PGRAPH_TSTATUS(i) > (0x00400D0C + (i*16)) #define > NV04_PGRAPH_V_RAM 0x00400D40 #define > NV04_PGRAPH_W_RAM 0x00400D80 #define > NV10_PGRAPH_COMBINER0_IN_ALPHA 0x00400E40 diff > --git a/drivers/gpu/drm/nouveau/nouveau_state.c > b/drivers/gpu/drm/nouveau/nouveau_state.c index 2ed41d3..4342867 > 100644 --- a/drivers/gpu/drm/nouveau/nouveau_state.c +++ > b/drivers/gpu/drm/nouveau/nouveau_state.c @@ -100,6 +100,7 @@ static > int nouveau_init_engine_ptrs(struct drm_device *dev) > engine->timer.takedown = nv04_timer_takedown; > engine->fb.init = nv10_fb_init; > engine->fb.takedown = nv10_fb_takedown; > + engine->fb.set_region_tiling > nv10_fb_set_region_tiling; engine->graph.grclass > nv10_graph_grclass; engine->graph.init > nv10_graph_init; engine->graph.takedown > nv10_graph_takedown; @@ -109,6 +110,7 @@ static int > nouveau_init_engine_ptrs(struct drm_device *dev) > engine->graph.fifo_access = nv04_graph_fifo_access; > engine->graph.load_context = nv10_graph_load_context; > engine->graph.unload_context = nv10_graph_unload_context; > + engine->graph.set_region_tiling > nv10_graph_set_region_tiling; engine->fifo.channels > 32; engine->fifo.init = nv10_fifo_init; > engine->fifo.takedown > nouveau_stub_takedown; @@ -139,6 +141,7 @@ static int > nouveau_init_engine_ptrs(struct drm_device *dev) > engine->timer.takedown = nv04_timer_takedown; > engine->fb.init = nv10_fb_init; > engine->fb.takedown = nv10_fb_takedown; > + engine->fb.set_region_tiling > nv10_fb_set_region_tiling; engine->graph.grclass > nv20_graph_grclass; engine->graph.init > nv20_graph_init; engine->graph.takedown > nv20_graph_takedown; @@ -148,6 +151,7 @@ static int > nouveau_init_engine_ptrs(struct drm_device *dev) > engine->graph.fifo_access = nv04_graph_fifo_access; > engine->graph.load_context = nv20_graph_load_context; > engine->graph.unload_context = nv20_graph_unload_context; > + engine->graph.set_region_tiling > nv20_graph_set_region_tiling; engine->fifo.channels > 32; engine->fifo.init = nv10_fifo_init; > engine->fifo.takedown > nouveau_stub_takedown; @@ -178,6 +182,7 @@ static int > nouveau_init_engine_ptrs(struct drm_device *dev) > engine->timer.takedown = nv04_timer_takedown; > engine->fb.init = nv10_fb_init; > engine->fb.takedown = nv10_fb_takedown; > + engine->fb.set_region_tiling > nv10_fb_set_region_tiling; engine->graph.grclass > nv30_graph_grclass; engine->graph.init > nv30_graph_init; engine->graph.takedown > nv20_graph_takedown; @@ -187,6 +192,7 @@ static int > nouveau_init_engine_ptrs(struct drm_device *dev) > engine->graph.destroy_context = nv20_graph_destroy_context; > engine->graph.load_context = nv20_graph_load_context; > engine->graph.unload_context = nv20_graph_unload_context; > + engine->graph.set_region_tiling > nv20_graph_set_region_tiling; engine->fifo.channels > 32; engine->fifo.init = nv10_fifo_init; > engine->fifo.takedown > nouveau_stub_takedown; @@ -218,6 +224,7 @@ static int > nouveau_init_engine_ptrs(struct drm_device *dev) > engine->timer.takedown = nv04_timer_takedown; > engine->fb.init = nv40_fb_init; > engine->fb.takedown = nv40_fb_takedown; > + engine->fb.set_region_tiling > nv40_fb_set_region_tiling; engine->graph.grclass > nv40_graph_grclass; engine->graph.init > nv40_graph_init; engine->graph.takedown > nv40_graph_takedown; @@ -227,6 +234,7 @@ static int > nouveau_init_engine_ptrs(struct drm_device *dev) > engine->graph.destroy_context = nv40_graph_destroy_context; > engine->graph.load_context = nv40_graph_load_context; > engine->graph.unload_context = nv40_graph_unload_context; > + engine->graph.set_region_tiling > nv40_graph_set_region_tiling; engine->fifo.channels > 32; engine->fifo.init = nv40_fifo_init; > engine->fifo.takedown > nouveau_stub_takedown; diff --git a/drivers/gpu/drm/nouveau/nv10_fb.c > b/drivers/gpu/drm/nouveau/nv10_fb.c index 79e2d10..cc5cda4 100644 > --- a/drivers/gpu/drm/nouveau/nv10_fb.c > +++ b/drivers/gpu/drm/nouveau/nv10_fb.c > @@ -3,17 +3,37 @@ > #include "nouveau_drv.h" > #include "nouveau_drm.h" > > +void > +nv10_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t > addr, > + uint32_t size, uint32_t pitch) > +{ > + struct drm_nouveau_private *dev_priv = dev->dev_private; > + uint32_t limit = max(1u, addr + size) - 1; > + > + if (pitch) { > + if (dev_priv->card_type >= NV_20) > + addr |= 1; > + else > + addr |= 1 << 31; > + } > + > + nv_wr32(dev, NV10_PFB_TLIMIT(i), limit); > + nv_wr32(dev, NV10_PFB_TSIZE(i), pitch); > + nv_wr32(dev, NV10_PFB_TILE(i), addr); > +} > + > int > nv10_fb_init(struct drm_device *dev) > { > - uint32_t fb_bar_size; > + struct drm_nouveau_private *dev_priv = dev->dev_private; > + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; > int i; > > - fb_bar_size = drm_get_resource_len(dev, 0) - 1; > - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { > - nv_wr32(dev, NV10_PFB_TILE(i), 0); > - nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size); > - } > + pfb->num_tiles = NV10_PFB_TILE__SIZE; > + > + /* Turn all the tiling regions off. */ > + for (i = 0; i < pfb->num_tiles; i++) > + pfb->set_region_tiling(dev, i, 0, 0, 0); > > return 0; > } > diff --git a/drivers/gpu/drm/nouveau/nv10_graph.c > b/drivers/gpu/drm/nouveau/nv10_graph.c index 6bf6804..2aeac8b 100644 > --- a/drivers/gpu/drm/nouveau/nv10_graph.c > +++ b/drivers/gpu/drm/nouveau/nv10_graph.c > @@ -808,6 +808,39 @@ void nv10_graph_destroy_context(struct > nouveau_channel *chan) chan->pgraph_ctx = NULL; > } > > +static void > +nv10_graph_write_tile(struct drm_device *dev, int i, uint32_t addr, > + uint32_t size, uint32_t pitch) > +{ > + uint32_t limit = max(1u, addr + size) - 1; > + > + if (pitch) > + addr |= 1 << 31; > + > + nv_wr32(dev, NV10_PGRAPH_TLIMIT(i), limit); > + nv_wr32(dev, NV10_PGRAPH_TSIZE(i), pitch); > + nv_wr32(dev, NV10_PGRAPH_TILE(i), addr); > +} > + > +void > +nv10_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t > addr, > + uint32_t size, uint32_t pitch) > +{ > + struct drm_nouveau_private *dev_priv = dev->dev_private; > + struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo; > + struct nouveau_pgraph_engine *pgraph > &dev_priv->engine.graph; + > + pfifo->reassign(dev, false); > + pgraph->fifo_access(dev, false); > + > + nouveau_wait_for_idle(dev); > + > + nv10_graph_write_tile(dev, i, addr, size, pitch); > + > + pgraph->fifo_access(dev, true); > + pfifo->reassign(dev, true); > +} > + > int nv10_graph_init(struct drm_device *dev) > { > struct drm_nouveau_private *dev_priv = dev->dev_private; > @@ -836,17 +869,9 @@ int nv10_graph_init(struct drm_device *dev) > } else > nv_wr32(dev, NV10_PGRAPH_DEBUG_4, 0x00000000); > > - /* copy tile info from PFB */ > - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { > - nv_wr32(dev, NV10_PGRAPH_TILE(i), > - nv_rd32(dev, > NV10_PFB_TILE(i))); > - nv_wr32(dev, NV10_PGRAPH_TLIMIT(i), > - nv_rd32(dev, > NV10_PFB_TLIMIT(i))); > - nv_wr32(dev, NV10_PGRAPH_TSIZE(i), > - nv_rd32(dev, > NV10_PFB_TSIZE(i))); > - nv_wr32(dev, NV10_PGRAPH_TSTATUS(i), > - nv_rd32(dev, > NV10_PFB_TSTATUS(i))); > - } > + /* Turn all the tiling regions off. */ > + for (i = 0; i < NV10_PFB_TILE__SIZE; i++) > + nv10_graph_write_tile(dev, i, 0, 0, 0); > > nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH1, 0x00000000); > nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH2, 0x00000000); > diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c > b/drivers/gpu/drm/nouveau/nv20_graph.c index 18ba74f..55d1a8e 100644 > --- a/drivers/gpu/drm/nouveau/nv20_graph.c > +++ b/drivers/gpu/drm/nouveau/nv20_graph.c > @@ -514,6 +514,46 @@ nv20_graph_rdi(struct drm_device *dev) > nouveau_wait_for_idle(dev); > } > > +static void > +nv20_graph_write_tile(struct drm_device *dev, int i, uint32_t addr, > + uint32_t size, uint32_t pitch) > +{ > + uint32_t limit = max(1u, addr + size) - 1; > + > + if (pitch) > + addr |= 1; > + > + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit); > + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch); > + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr); > + > + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + 4 * i); > + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, limit); > + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + 4 * i); > + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, pitch); > + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i); > + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, addr); > +} > + > +void > +nv20_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t > addr, > + uint32_t size, uint32_t pitch) > +{ > + struct drm_nouveau_private *dev_priv = dev->dev_private; > + struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo; > + struct nouveau_pgraph_engine *pgraph > &dev_priv->engine.graph; + > + pfifo->reassign(dev, false); > + pgraph->fifo_access(dev, false); > + > + nouveau_wait_for_idle(dev); > + > + nv20_graph_write_tile(dev, i, addr, size, pitch); > + > + pgraph->fifo_access(dev, true); > + pfifo->reassign(dev, true); > +} > + > int > nv20_graph_init(struct drm_device *dev) > { > @@ -572,27 +612,10 @@ nv20_graph_init(struct drm_device *dev) > nv_wr32(dev, NV10_PGRAPH_RDI_DATA , 0x00000030); > } > > - /* copy tile info from PFB */ > - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { > - nv_wr32(dev, 0x00400904 + i * 0x10, > - nv_rd32(dev, > NV10_PFB_TLIMIT(i))); > - /* which is NV40_PGRAPH_TLIMIT0(i) ?? */ > - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + i * > 4); > - nv_wr32(dev, NV10_PGRAPH_RDI_DATA, > - nv_rd32(dev, > NV10_PFB_TLIMIT(i))); > - nv_wr32(dev, 0x00400908 + i * 0x10, > - nv_rd32(dev, > NV10_PFB_TSIZE(i))); > - /* which is NV40_PGRAPH_TSIZE0(i) ?? */ > - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + i * > 4); > - nv_wr32(dev, NV10_PGRAPH_RDI_DATA, > - nv_rd32(dev, > NV10_PFB_TSIZE(i))); > - nv_wr32(dev, 0x00400900 + i * 0x10, > - nv_rd32(dev, > NV10_PFB_TILE(i))); > - /* which is NV40_PGRAPH_TILE0(i) ?? */ > - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + i * > 4); > - nv_wr32(dev, NV10_PGRAPH_RDI_DATA, > - nv_rd32(dev, > NV10_PFB_TILE(i))); > - } > + /* Turn all the tiling regions off. */ > + for (i = 0; i < NV10_PFB_TILE__SIZE; i++) > + nv20_graph_write_tile(dev, i, 0, 0, 0); > + > for (i = 0; i < 8; i++) { > nv_wr32(dev, 0x400980 + i * 4, nv_rd32(dev, 0x100300 > + i * 4)); nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0090 + i * 4); > @@ -704,18 +727,9 @@ nv30_graph_init(struct drm_device *dev) > > nv_wr32(dev, 0x4000c0, 0x00000016); > > - /* copy tile info from PFB */ > - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { > - nv_wr32(dev, 0x00400904 + i * 0x10, > - nv_rd32(dev, > NV10_PFB_TLIMIT(i))); > - /* which is NV40_PGRAPH_TLIMIT0(i) ?? */ > - nv_wr32(dev, 0x00400908 + i * 0x10, > - nv_rd32(dev, > NV10_PFB_TSIZE(i))); > - /* which is NV40_PGRAPH_TSIZE0(i) ?? */ > - nv_wr32(dev, 0x00400900 + i * 0x10, > - nv_rd32(dev, > NV10_PFB_TILE(i))); > - /* which is NV40_PGRAPH_TILE0(i) ?? */ > - } > + /* Turn all the tiling regions off. */ > + for (i = 0; i < NV10_PFB_TILE__SIZE; i++) > + nv20_graph_write_tile(dev, i, 0, 0, 0); > > nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000100); > nv_wr32(dev, NV10_PGRAPH_STATE , 0xFFFFFFFF); > diff --git a/drivers/gpu/drm/nouveau/nv40_fb.c > b/drivers/gpu/drm/nouveau/nv40_fb.c index ca1d271..3cd07d8 100644 > --- a/drivers/gpu/drm/nouveau/nv40_fb.c > +++ b/drivers/gpu/drm/nouveau/nv40_fb.c > @@ -3,12 +3,37 @@ > #include "nouveau_drv.h" > #include "nouveau_drm.h" > > +void > +nv40_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t > addr, > + uint32_t size, uint32_t pitch) > +{ > + struct drm_nouveau_private *dev_priv = dev->dev_private; > + uint32_t limit = max(1u, addr + size) - 1; > + > + if (pitch) > + addr |= 1; > + > + switch (dev_priv->chipset) { > + case 0x40: > + nv_wr32(dev, NV10_PFB_TLIMIT(i), limit); > + nv_wr32(dev, NV10_PFB_TSIZE(i), pitch); > + nv_wr32(dev, NV10_PFB_TILE(i), addr); > + break; > + > + default: > + nv_wr32(dev, NV40_PFB_TLIMIT(i), limit); > + nv_wr32(dev, NV40_PFB_TSIZE(i), pitch); > + nv_wr32(dev, NV40_PFB_TILE(i), addr); > + break; > + } > +} > + > int > nv40_fb_init(struct drm_device *dev) > { > struct drm_nouveau_private *dev_priv = dev->dev_private; > - uint32_t fb_bar_size, tmp; > - int num_tiles; > + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; > + uint32_t tmp; > int i; > > /* This is strictly a NV4x register (don't know about NV5x). > */ @@ -23,35 +48,23 @@ nv40_fb_init(struct drm_device *dev) > case 0x45: > tmp = nv_rd32(dev, NV10_PFB_CLOSE_PAGE2); > nv_wr32(dev, NV10_PFB_CLOSE_PAGE2, tmp & ~(1 << 15)); > - num_tiles = NV10_PFB_TILE__SIZE; > + pfb->num_tiles = NV10_PFB_TILE__SIZE; > break; > case 0x46: /* G72 */ > case 0x47: /* G70 */ > case 0x49: /* G71 */ > case 0x4b: /* G73 */ > case 0x4c: /* C51 (G7X version) */ > - num_tiles = NV40_PFB_TILE__SIZE_1; > + pfb->num_tiles = NV40_PFB_TILE__SIZE_1; > break; > default: > - num_tiles = NV40_PFB_TILE__SIZE_0; > + pfb->num_tiles = NV40_PFB_TILE__SIZE_0; > break; > } > > - fb_bar_size = drm_get_resource_len(dev, 0) - 1; > - switch (dev_priv->chipset) { > - case 0x40: > - for (i = 0; i < num_tiles; i++) { > - nv_wr32(dev, NV10_PFB_TILE(i), 0); > - nv_wr32(dev, NV10_PFB_TLIMIT(i), > fb_bar_size); > - } > - break; > - default: > - for (i = 0; i < num_tiles; i++) { > - nv_wr32(dev, NV40_PFB_TILE(i), 0); > - nv_wr32(dev, NV40_PFB_TLIMIT(i), > fb_bar_size); > - } > - break; > - } > + /* Turn all the tiling regions off. */ > + for (i = 0; i < pfb->num_tiles; i++) > + pfb->set_region_tiling(dev, i, 0, 0, 0); > > return 0; > } > diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c > b/drivers/gpu/drm/nouveau/nv40_graph.c index d3e0a2a..2435d49 100644 > --- a/drivers/gpu/drm/nouveau/nv40_graph.c > +++ b/drivers/gpu/drm/nouveau/nv40_graph.c > @@ -333,6 +333,67 @@ nv40_grctx_vals_load(struct drm_device *dev, > struct nouveau_gpuobj *ctx) nv_wo32(dev, ctx, cv->data[i].offset, > cv->data[i].value); } > > +static void > +nv40_graph_write_tile(struct drm_device *dev, int i, uint32_t addr, > + uint32_t size, uint32_t pitch) > +{ > + struct drm_nouveau_private *dev_priv = dev->dev_private; > + uint32_t limit = max(1u, addr + size) - 1; > + > + if (pitch) > + addr |= 1; > + > + switch (dev_priv->chipset) { > + case 0x44: > + case 0x4a: > + case 0x4e: > + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch); > + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit); > + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr); > + break; > + > + case 0x46: > + case 0x47: > + case 0x49: > + case 0x4b: > + nv_wr32(dev, NV47_PGRAPH_TSIZE(i), pitch); > + nv_wr32(dev, NV47_PGRAPH_TLIMIT(i), limit); > + nv_wr32(dev, NV47_PGRAPH_TILE(i), addr); > + nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch); > + nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit); > + nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr); > + break; > + > + default: > + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch); > + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit); > + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr); > + nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch); > + nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit); > + nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr); > + break; > + } > +} > +Have you looked at 0xB000, 0xB004, 0xB008 by chance? I noticed that nv uses these tiling regs (what looks like) on nv4a/nv4e (NV44 core) cards but not any others at startup. I thought is was because 0x406900 regs are missing. +#define NV40_PFB_TILE2(i) (0x0000B000 + (i*12)) +#define NV40_PFB_TILE__SIZE_2 12 +#define NV40_PFB_TLIMIT2(i) (0x0000B004 + (i*12)) +#define NV40_PFB_TSIZE2(i) (0x0000B008 + (i*12))
Francisco Jerez
2009-Dec-11 21:28 UTC
[Nouveau] [PATCH 1/3] drm/nouveau: Pre-G80 tiling support.
Francisco Jerez <currojerez at riseup.net> writes:> Jimmy Rentz <jb17bsome at gmail.com> writes: > >> On Fri, 11 Dec 2009 19:33:22 +0100 >> >> Have you looked at 0xB000, 0xB004, 0xB008 by chance? >> I noticed that nv uses these tiling regs (what looks like) on nv4a/nv4e (NV44 core) cards but not any others at startup. I thought is was because 0x406900 regs are missing. > > Yeah, I've seen the blob use those regs in some nv4a mmiotraces, but it > seemed to work without them and they're outside PGRAPH/PFB or anything > we currently exercise so I decided to leave them out. > >> >> +#define NV40_PFB_TILE2(i) (0x0000B000 + (i*12)) >> +#define NV40_PFB_TILE__SIZE_2 12 >> +#define NV40_PFB_TLIMIT2(i) (0x0000B004 + (i*12)) >> +#define NV40_PFB_TSIZE2(i) (0x0000B008 + (i*12))Sorry for not replying-to-all the first time... -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 196 bytes Desc: not available Url : http://lists.freedesktop.org/archives/nouveau/attachments/20091211/cd10b8d4/attachment.pgp
Francisco Jerez
2009-Dec-14 03:03 UTC
[Nouveau] [PATCHv2 1/3] drm/nouveau: Pre-G80 tiling support.
Signed-off-by: Francisco Jerez <currojerez at riseup.net> --- v2: Simplify things a bit. drivers/gpu/drm/nouveau/nouveau_drv.h | 18 +++++ drivers/gpu/drm/nouveau/nouveau_reg.h | 16 ++-- drivers/gpu/drm/nouveau/nouveau_state.c | 8 ++ drivers/gpu/drm/nouveau/nv10_fb.c | 32 +++++++-- drivers/gpu/drm/nouveau/nv10_graph.c | 28 +++++--- drivers/gpu/drm/nouveau/nv20_graph.c | 61 ++++++++--------- drivers/gpu/drm/nouveau/nv40_fb.c | 53 +++++++++----- drivers/gpu/drm/nouveau/nv40_graph.c | 116 +++++++++++++------------------ 8 files changed, 185 insertions(+), 147 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 88b4c7b..40b4a37 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -276,8 +276,13 @@ struct nouveau_timer_engine { }; struct nouveau_fb_engine { + int num_tiles; + int (*init)(struct drm_device *dev); void (*takedown)(struct drm_device *dev); + + void (*set_region_tiling)(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch); }; struct nouveau_fifo_engine { @@ -328,6 +333,9 @@ struct nouveau_pgraph_engine { void (*destroy_context)(struct nouveau_channel *); int (*load_context)(struct nouveau_channel *); int (*unload_context)(struct drm_device *); + + void (*set_region_tiling)(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch); }; struct nouveau_engine { @@ -876,10 +884,14 @@ extern void nv04_fb_takedown(struct drm_device *); /* nv10_fb.c */ extern int nv10_fb_init(struct drm_device *); extern void nv10_fb_takedown(struct drm_device *); +extern void nv10_fb_set_region_tiling(struct drm_device *, int, uint32_t, + uint32_t, uint32_t); /* nv40_fb.c */ extern int nv40_fb_init(struct drm_device *); extern void nv40_fb_takedown(struct drm_device *); +extern void nv40_fb_set_region_tiling(struct drm_device *, int, uint32_t, + uint32_t, uint32_t); /* nv04_fifo.c */ extern int nv04_fifo_init(struct drm_device *); @@ -938,6 +950,8 @@ extern void nv10_graph_destroy_context(struct nouveau_channel *); extern int nv10_graph_load_context(struct nouveau_channel *); extern int nv10_graph_unload_context(struct drm_device *); extern void nv10_graph_context_switch(struct drm_device *); +extern void nv10_graph_set_region_tiling(struct drm_device *, int, uint32_t, + uint32_t, uint32_t); /* nv20_graph.c */ extern struct nouveau_pgraph_object_class nv20_graph_grclass[]; @@ -949,6 +963,8 @@ extern int nv20_graph_unload_context(struct drm_device *); extern int nv20_graph_init(struct drm_device *); extern void nv20_graph_takedown(struct drm_device *); extern int nv30_graph_init(struct drm_device *); +extern void nv20_graph_set_region_tiling(struct drm_device *, int, uint32_t, + uint32_t, uint32_t); /* nv40_graph.c */ extern struct nouveau_pgraph_object_class nv40_graph_grclass[]; @@ -962,6 +978,8 @@ extern int nv40_graph_unload_context(struct drm_device *); extern int nv40_grctx_init(struct drm_device *); extern void nv40_grctx_fini(struct drm_device *); extern void nv40_grctx_vals_load(struct drm_device *, struct nouveau_gpuobj *); +extern void nv40_graph_set_region_tiling(struct drm_device *, int, uint32_t, + uint32_t, uint32_t); /* nv50_graph.c */ extern struct nouveau_pgraph_object_class nv50_graph_grclass[]; diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h index fa1b0e7..251f1b3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_reg.h +++ b/drivers/gpu/drm/nouveau/nouveau_reg.h @@ -349,19 +349,19 @@ #define NV04_PGRAPH_BLEND 0x00400824 #define NV04_PGRAPH_STORED_FMT 0x00400830 #define NV04_PGRAPH_PATT_COLORRAM 0x00400900 -#define NV40_PGRAPH_TILE0(i) (0x00400900 + (i*16)) -#define NV40_PGRAPH_TLIMIT0(i) (0x00400904 + (i*16)) -#define NV40_PGRAPH_TSIZE0(i) (0x00400908 + (i*16)) -#define NV40_PGRAPH_TSTATUS0(i) (0x0040090C + (i*16)) +#define NV20_PGRAPH_TILE(i) (0x00400900 + (i*16)) +#define NV20_PGRAPH_TLIMIT(i) (0x00400904 + (i*16)) +#define NV20_PGRAPH_TSIZE(i) (0x00400908 + (i*16)) +#define NV20_PGRAPH_TSTATUS(i) (0x0040090C + (i*16)) #define NV10_PGRAPH_TILE(i) (0x00400B00 + (i*16)) #define NV10_PGRAPH_TLIMIT(i) (0x00400B04 + (i*16)) #define NV10_PGRAPH_TSIZE(i) (0x00400B08 + (i*16)) #define NV10_PGRAPH_TSTATUS(i) (0x00400B0C + (i*16)) #define NV04_PGRAPH_U_RAM 0x00400D00 -#define NV47_PGRAPH_TILE0(i) (0x00400D00 + (i*16)) -#define NV47_PGRAPH_TLIMIT0(i) (0x00400D04 + (i*16)) -#define NV47_PGRAPH_TSIZE0(i) (0x00400D08 + (i*16)) -#define NV47_PGRAPH_TSTATUS0(i) (0x00400D0C + (i*16)) +#define NV47_PGRAPH_TILE(i) (0x00400D00 + (i*16)) +#define NV47_PGRAPH_TLIMIT(i) (0x00400D04 + (i*16)) +#define NV47_PGRAPH_TSIZE(i) (0x00400D08 + (i*16)) +#define NV47_PGRAPH_TSTATUS(i) (0x00400D0C + (i*16)) #define NV04_PGRAPH_V_RAM 0x00400D40 #define NV04_PGRAPH_W_RAM 0x00400D80 #define NV10_PGRAPH_COMBINER0_IN_ALPHA 0x00400E40 diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c index 2ed41d3..4342867 100644 --- a/drivers/gpu/drm/nouveau/nouveau_state.c +++ b/drivers/gpu/drm/nouveau/nouveau_state.c @@ -100,6 +100,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->timer.takedown = nv04_timer_takedown; engine->fb.init = nv10_fb_init; engine->fb.takedown = nv10_fb_takedown; + engine->fb.set_region_tiling = nv10_fb_set_region_tiling; engine->graph.grclass = nv10_graph_grclass; engine->graph.init = nv10_graph_init; engine->graph.takedown = nv10_graph_takedown; @@ -109,6 +110,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->graph.fifo_access = nv04_graph_fifo_access; engine->graph.load_context = nv10_graph_load_context; engine->graph.unload_context = nv10_graph_unload_context; + engine->graph.set_region_tiling = nv10_graph_set_region_tiling; engine->fifo.channels = 32; engine->fifo.init = nv10_fifo_init; engine->fifo.takedown = nouveau_stub_takedown; @@ -139,6 +141,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->timer.takedown = nv04_timer_takedown; engine->fb.init = nv10_fb_init; engine->fb.takedown = nv10_fb_takedown; + engine->fb.set_region_tiling = nv10_fb_set_region_tiling; engine->graph.grclass = nv20_graph_grclass; engine->graph.init = nv20_graph_init; engine->graph.takedown = nv20_graph_takedown; @@ -148,6 +151,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->graph.fifo_access = nv04_graph_fifo_access; engine->graph.load_context = nv20_graph_load_context; engine->graph.unload_context = nv20_graph_unload_context; + engine->graph.set_region_tiling = nv20_graph_set_region_tiling; engine->fifo.channels = 32; engine->fifo.init = nv10_fifo_init; engine->fifo.takedown = nouveau_stub_takedown; @@ -178,6 +182,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->timer.takedown = nv04_timer_takedown; engine->fb.init = nv10_fb_init; engine->fb.takedown = nv10_fb_takedown; + engine->fb.set_region_tiling = nv10_fb_set_region_tiling; engine->graph.grclass = nv30_graph_grclass; engine->graph.init = nv30_graph_init; engine->graph.takedown = nv20_graph_takedown; @@ -187,6 +192,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->graph.destroy_context = nv20_graph_destroy_context; engine->graph.load_context = nv20_graph_load_context; engine->graph.unload_context = nv20_graph_unload_context; + engine->graph.set_region_tiling = nv20_graph_set_region_tiling; engine->fifo.channels = 32; engine->fifo.init = nv10_fifo_init; engine->fifo.takedown = nouveau_stub_takedown; @@ -218,6 +224,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->timer.takedown = nv04_timer_takedown; engine->fb.init = nv40_fb_init; engine->fb.takedown = nv40_fb_takedown; + engine->fb.set_region_tiling = nv40_fb_set_region_tiling; engine->graph.grclass = nv40_graph_grclass; engine->graph.init = nv40_graph_init; engine->graph.takedown = nv40_graph_takedown; @@ -227,6 +234,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev) engine->graph.destroy_context = nv40_graph_destroy_context; engine->graph.load_context = nv40_graph_load_context; engine->graph.unload_context = nv40_graph_unload_context; + engine->graph.set_region_tiling = nv40_graph_set_region_tiling; engine->fifo.channels = 32; engine->fifo.init = nv40_fifo_init; engine->fifo.takedown = nouveau_stub_takedown; diff --git a/drivers/gpu/drm/nouveau/nv10_fb.c b/drivers/gpu/drm/nouveau/nv10_fb.c index 79e2d10..cc5cda4 100644 --- a/drivers/gpu/drm/nouveau/nv10_fb.c +++ b/drivers/gpu/drm/nouveau/nv10_fb.c @@ -3,17 +3,37 @@ #include "nouveau_drv.h" #include "nouveau_drm.h" +void +nv10_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + uint32_t limit = max(1u, addr + size) - 1; + + if (pitch) { + if (dev_priv->card_type >= NV_20) + addr |= 1; + else + addr |= 1 << 31; + } + + nv_wr32(dev, NV10_PFB_TLIMIT(i), limit); + nv_wr32(dev, NV10_PFB_TSIZE(i), pitch); + nv_wr32(dev, NV10_PFB_TILE(i), addr); +} + int nv10_fb_init(struct drm_device *dev) { - uint32_t fb_bar_size; + struct drm_nouveau_private *dev_priv = dev->dev_private; + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; int i; - fb_bar_size = drm_get_resource_len(dev, 0) - 1; - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { - nv_wr32(dev, NV10_PFB_TILE(i), 0); - nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size); - } + pfb->num_tiles = NV10_PFB_TILE__SIZE; + + /* Turn all the tiling regions off. */ + for (i = 0; i < pfb->num_tiles; i++) + pfb->set_region_tiling(dev, i, 0, 0, 0); return 0; } diff --git a/drivers/gpu/drm/nouveau/nv10_graph.c b/drivers/gpu/drm/nouveau/nv10_graph.c index 6bf6804..669ad9d 100644 --- a/drivers/gpu/drm/nouveau/nv10_graph.c +++ b/drivers/gpu/drm/nouveau/nv10_graph.c @@ -808,6 +808,20 @@ void nv10_graph_destroy_context(struct nouveau_channel *chan) chan->pgraph_ctx = NULL; } +void +nv10_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch) +{ + uint32_t limit = max(1u, addr + size) - 1; + + if (pitch) + addr |= 1 << 31; + + nv_wr32(dev, NV10_PGRAPH_TLIMIT(i), limit); + nv_wr32(dev, NV10_PGRAPH_TSIZE(i), pitch); + nv_wr32(dev, NV10_PGRAPH_TILE(i), addr); +} + int nv10_graph_init(struct drm_device *dev) { struct drm_nouveau_private *dev_priv = dev->dev_private; @@ -836,17 +850,9 @@ int nv10_graph_init(struct drm_device *dev) } else nv_wr32(dev, NV10_PGRAPH_DEBUG_4, 0x00000000); - /* copy tile info from PFB */ - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { - nv_wr32(dev, NV10_PGRAPH_TILE(i), - nv_rd32(dev, NV10_PFB_TILE(i))); - nv_wr32(dev, NV10_PGRAPH_TLIMIT(i), - nv_rd32(dev, NV10_PFB_TLIMIT(i))); - nv_wr32(dev, NV10_PGRAPH_TSIZE(i), - nv_rd32(dev, NV10_PFB_TSIZE(i))); - nv_wr32(dev, NV10_PGRAPH_TSTATUS(i), - nv_rd32(dev, NV10_PFB_TSTATUS(i))); - } + /* Turn all the tiling regions off. */ + for (i = 0; i < NV10_PFB_TILE__SIZE; i++) + nv10_graph_set_region_tiling(dev, i, 0, 0, 0); nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH1, 0x00000000); nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH2, 0x00000000); diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c b/drivers/gpu/drm/nouveau/nv20_graph.c index 18ba74f..d6fc0a8 100644 --- a/drivers/gpu/drm/nouveau/nv20_graph.c +++ b/drivers/gpu/drm/nouveau/nv20_graph.c @@ -514,6 +514,27 @@ nv20_graph_rdi(struct drm_device *dev) nouveau_wait_for_idle(dev); } +void +nv20_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch) +{ + uint32_t limit = max(1u, addr + size) - 1; + + if (pitch) + addr |= 1; + + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit); + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch); + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr); + + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + 4 * i); + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, limit); + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + 4 * i); + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, pitch); + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i); + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, addr); +} + int nv20_graph_init(struct drm_device *dev) { @@ -572,27 +593,10 @@ nv20_graph_init(struct drm_device *dev) nv_wr32(dev, NV10_PGRAPH_RDI_DATA , 0x00000030); } - /* copy tile info from PFB */ - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { - nv_wr32(dev, 0x00400904 + i * 0x10, - nv_rd32(dev, NV10_PFB_TLIMIT(i))); - /* which is NV40_PGRAPH_TLIMIT0(i) ?? */ - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + i * 4); - nv_wr32(dev, NV10_PGRAPH_RDI_DATA, - nv_rd32(dev, NV10_PFB_TLIMIT(i))); - nv_wr32(dev, 0x00400908 + i * 0x10, - nv_rd32(dev, NV10_PFB_TSIZE(i))); - /* which is NV40_PGRAPH_TSIZE0(i) ?? */ - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + i * 4); - nv_wr32(dev, NV10_PGRAPH_RDI_DATA, - nv_rd32(dev, NV10_PFB_TSIZE(i))); - nv_wr32(dev, 0x00400900 + i * 0x10, - nv_rd32(dev, NV10_PFB_TILE(i))); - /* which is NV40_PGRAPH_TILE0(i) ?? */ - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + i * 4); - nv_wr32(dev, NV10_PGRAPH_RDI_DATA, - nv_rd32(dev, NV10_PFB_TILE(i))); - } + /* Turn all the tiling regions off. */ + for (i = 0; i < NV10_PFB_TILE__SIZE; i++) + nv20_graph_set_region_tiling(dev, i, 0, 0, 0); + for (i = 0; i < 8; i++) { nv_wr32(dev, 0x400980 + i * 4, nv_rd32(dev, 0x100300 + i * 4)); nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0090 + i * 4); @@ -704,18 +708,9 @@ nv30_graph_init(struct drm_device *dev) nv_wr32(dev, 0x4000c0, 0x00000016); - /* copy tile info from PFB */ - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { - nv_wr32(dev, 0x00400904 + i * 0x10, - nv_rd32(dev, NV10_PFB_TLIMIT(i))); - /* which is NV40_PGRAPH_TLIMIT0(i) ?? */ - nv_wr32(dev, 0x00400908 + i * 0x10, - nv_rd32(dev, NV10_PFB_TSIZE(i))); - /* which is NV40_PGRAPH_TSIZE0(i) ?? */ - nv_wr32(dev, 0x00400900 + i * 0x10, - nv_rd32(dev, NV10_PFB_TILE(i))); - /* which is NV40_PGRAPH_TILE0(i) ?? */ - } + /* Turn all the tiling regions off. */ + for (i = 0; i < NV10_PFB_TILE__SIZE; i++) + nv20_graph_set_region_tiling(dev, i, 0, 0, 0); nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000100); nv_wr32(dev, NV10_PGRAPH_STATE , 0xFFFFFFFF); diff --git a/drivers/gpu/drm/nouveau/nv40_fb.c b/drivers/gpu/drm/nouveau/nv40_fb.c index ca1d271..3cd07d8 100644 --- a/drivers/gpu/drm/nouveau/nv40_fb.c +++ b/drivers/gpu/drm/nouveau/nv40_fb.c @@ -3,12 +3,37 @@ #include "nouveau_drv.h" #include "nouveau_drm.h" +void +nv40_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + uint32_t limit = max(1u, addr + size) - 1; + + if (pitch) + addr |= 1; + + switch (dev_priv->chipset) { + case 0x40: + nv_wr32(dev, NV10_PFB_TLIMIT(i), limit); + nv_wr32(dev, NV10_PFB_TSIZE(i), pitch); + nv_wr32(dev, NV10_PFB_TILE(i), addr); + break; + + default: + nv_wr32(dev, NV40_PFB_TLIMIT(i), limit); + nv_wr32(dev, NV40_PFB_TSIZE(i), pitch); + nv_wr32(dev, NV40_PFB_TILE(i), addr); + break; + } +} + int nv40_fb_init(struct drm_device *dev) { struct drm_nouveau_private *dev_priv = dev->dev_private; - uint32_t fb_bar_size, tmp; - int num_tiles; + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; + uint32_t tmp; int i; /* This is strictly a NV4x register (don't know about NV5x). */ @@ -23,35 +48,23 @@ nv40_fb_init(struct drm_device *dev) case 0x45: tmp = nv_rd32(dev, NV10_PFB_CLOSE_PAGE2); nv_wr32(dev, NV10_PFB_CLOSE_PAGE2, tmp & ~(1 << 15)); - num_tiles = NV10_PFB_TILE__SIZE; + pfb->num_tiles = NV10_PFB_TILE__SIZE; break; case 0x46: /* G72 */ case 0x47: /* G70 */ case 0x49: /* G71 */ case 0x4b: /* G73 */ case 0x4c: /* C51 (G7X version) */ - num_tiles = NV40_PFB_TILE__SIZE_1; + pfb->num_tiles = NV40_PFB_TILE__SIZE_1; break; default: - num_tiles = NV40_PFB_TILE__SIZE_0; + pfb->num_tiles = NV40_PFB_TILE__SIZE_0; break; } - fb_bar_size = drm_get_resource_len(dev, 0) - 1; - switch (dev_priv->chipset) { - case 0x40: - for (i = 0; i < num_tiles; i++) { - nv_wr32(dev, NV10_PFB_TILE(i), 0); - nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size); - } - break; - default: - for (i = 0; i < num_tiles; i++) { - nv_wr32(dev, NV40_PFB_TILE(i), 0); - nv_wr32(dev, NV40_PFB_TLIMIT(i), fb_bar_size); - } - break; - } + /* Turn all the tiling regions off. */ + for (i = 0; i < pfb->num_tiles; i++) + pfb->set_region_tiling(dev, i, 0, 0, 0); return 0; } diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c index d3e0a2a..01773e3 100644 --- a/drivers/gpu/drm/nouveau/nv40_graph.c +++ b/drivers/gpu/drm/nouveau/nv40_graph.c @@ -333,6 +333,48 @@ nv40_grctx_vals_load(struct drm_device *dev, struct nouveau_gpuobj *ctx) nv_wo32(dev, ctx, cv->data[i].offset, cv->data[i].value); } +void +nv40_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr, + uint32_t size, uint32_t pitch) +{ + struct drm_nouveau_private *dev_priv = dev->dev_private; + uint32_t limit = max(1u, addr + size) - 1; + + if (pitch) + addr |= 1; + + switch (dev_priv->chipset) { + case 0x44: + case 0x4a: + case 0x4e: + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch); + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit); + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr); + break; + + case 0x46: + case 0x47: + case 0x49: + case 0x4b: + nv_wr32(dev, NV47_PGRAPH_TSIZE(i), pitch); + nv_wr32(dev, NV47_PGRAPH_TLIMIT(i), limit); + nv_wr32(dev, NV47_PGRAPH_TILE(i), addr); + nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch); + nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit); + nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr); + break; + + default: + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch); + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit); + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr); + nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch); + nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit); + nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr); + break; + } +} + /* * G70 0x47 * G71 0x49 @@ -347,7 +389,8 @@ nv40_graph_init(struct drm_device *dev) { struct drm_nouveau_private *dev_priv (struct drm_nouveau_private *)dev->dev_private; - uint32_t vramsz, tmp; + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb; + uint32_t vramsz; int i, j; nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) & @@ -425,74 +468,9 @@ nv40_graph_init(struct drm_device *dev) nv_wr32(dev, 0x400b38, 0x2ffff800); nv_wr32(dev, 0x400b3c, 0x00006000); - /* copy tile info from PFB */ - switch (dev_priv->chipset) { - case 0x40: /* vanilla NV40 */ - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) { - tmp = nv_rd32(dev, NV10_PFB_TILE(i)); - nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp); - tmp = nv_rd32(dev, NV10_PFB_TLIMIT(i)); - nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp); - tmp = nv_rd32(dev, NV10_PFB_TSIZE(i)); - nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp); - tmp = nv_rd32(dev, NV10_PFB_TSTATUS(i)); - nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp); - } - break; - case 0x44: - case 0x4a: - case 0x4e: /* NV44-based cores don't have 0x406900? */ - for (i = 0; i < NV40_PFB_TILE__SIZE_0; i++) { - tmp = nv_rd32(dev, NV40_PFB_TILE(i)); - nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i)); - nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSIZE(i)); - nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i)); - nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp); - } - break; - case 0x46: - case 0x47: - case 0x49: - case 0x4b: /* G7X-based cores */ - for (i = 0; i < NV40_PFB_TILE__SIZE_1; i++) { - tmp = nv_rd32(dev, NV40_PFB_TILE(i)); - nv_wr32(dev, NV47_PGRAPH_TILE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i)); - nv_wr32(dev, NV47_PGRAPH_TLIMIT0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSIZE(i)); - nv_wr32(dev, NV47_PGRAPH_TSIZE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i)); - nv_wr32(dev, NV47_PGRAPH_TSTATUS0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp); - } - break; - default: /* everything else */ - for (i = 0; i < NV40_PFB_TILE__SIZE_0; i++) { - tmp = nv_rd32(dev, NV40_PFB_TILE(i)); - nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i)); - nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSIZE(i)); - nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp); - tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i)); - nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp); - nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp); - } - break; - } + /* Turn all the tiling regions off. */ + for (i = 0; i < pfb->num_tiles; i++) + nv40_graph_set_region_tiling(dev, i, 0, 0, 0); /* begin RAM config */ vramsz = drm_get_resource_len(dev, 0) - 1; -- 1.6.4.4
Seemingly Similar Threads
- [PATCH 1/2] drm/nv50: Make ctxprog wait until interrupt handler is done.
- [PATCH 1/2] drm/nv04: Fix NV04 set_operation software method.
- [PATCH 1/5] drm/nv10/plane: fix format computation
- [NOT for merge] Patches that reduce power usage on NV86
- [PATCH v4] pmu/gk20a: PMU boot support