Christoph Bumiller
2013-Mar-27 21:16 UTC
[Nouveau] [PATCH 1/4] drm/nvc0: implement VRAM compression
---
drivers/gpu/drm/nouveau/core/include/subdev/ltcg.h | 7 +
drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c | 55 +++++----
drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c | 129 +++++++++++++++++++-
drivers/gpu/drm/nouveau/core/subdev/vm/nvc0.c | 58 +++++++++-
4 files changed, 220 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/ltcg.h
b/drivers/gpu/drm/nouveau/core/include/subdev/ltcg.h
index f351f63..a1985ed 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/ltcg.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/ltcg.h
@@ -4,8 +4,15 @@
#include <core/subdev.h>
#include <core/device.h>
+struct nouveau_mm_node;
+
struct nouveau_ltcg {
struct nouveau_subdev base;
+
+ int (*tags_alloc)(struct nouveau_ltcg *, u32 count,
+ struct nouveau_mm_node **);
+ void (*tags_free)(struct nouveau_ltcg *, struct nouveau_mm_node **);
+ void (*tags_clear)(struct nouveau_ltcg *, u32 first, u32 count);
};
static inline struct nouveau_ltcg *
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c
b/drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c
index 3b2ddc6..86ad592 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c
@@ -23,6 +23,7 @@
*/
#include <subdev/fb.h>
+#include <subdev/ltcg.h>
#include <subdev/bios.h>
struct nvc0_fb_priv {
@@ -31,34 +32,14 @@ struct nvc0_fb_priv {
dma_addr_t r100c10;
};
-/* 0 = unsupported
- * 1 = non-compressed
- * 3 = compressed
- */
-static const u8 types[256] = {
- 1, 1, 3, 3, 3, 3, 0, 3, 3, 3, 3, 0, 0, 0, 0, 0,
- 0, 1, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3,
- 3, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 1, 1, 1, 1, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
- 3, 3, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3,
- 3, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 3, 0, 3, 0, 3,
- 3, 0, 3, 3, 3, 3, 3, 0, 0, 3, 0, 3, 0, 3, 3, 0,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 1, 1, 0
-};
+extern const u8 nvc0_pte_storage_type_map[256];
+
static bool
nvc0_fb_memtype_valid(struct nouveau_fb *pfb, u32 tile_flags)
{
u8 memtype = (tile_flags & 0x0000ff00) >> 8;
- return likely((types[memtype] == 1));
+ return likely((nvc0_pte_storage_type_map[memtype] != 0xff));
}
static int
@@ -130,6 +111,7 @@ nvc0_fb_vram_new(struct nouveau_fb *pfb, u64 size, u32
align, u32 ncmin,
int type = (memtype & 0x0ff);
int back = (memtype & 0x800);
int ret;
+ const bool comp = nvc0_pte_storage_type_map[type] != type;
size >>= 12;
align >>= 12;
@@ -142,10 +124,22 @@ nvc0_fb_vram_new(struct nouveau_fb *pfb, u64 size, u32
align, u32 ncmin,
return -ENOMEM;
INIT_LIST_HEAD(&mem->regions);
- mem->memtype = type;
mem->size = size;
mutex_lock(&pfb->base.mutex);
+ if (comp) {
+ struct nouveau_ltcg *ltcg = nouveau_ltcg(pfb->base.base.parent);
+
+ /* compression only works with lpages */
+ if (align == (1 << (17 - 12))) {
+ int n = size >> 5;
+ ltcg->tags_alloc(ltcg, n, &mem->tag);
+ }
+ if (unlikely(!mem->tag))
+ type = nvc0_pte_storage_type_map[type];
+ }
+ mem->memtype = type;
+
do {
if (back)
ret = nouveau_mm_tail(mm, 1, size, ncmin, align, &r);
@@ -168,6 +162,17 @@ nvc0_fb_vram_new(struct nouveau_fb *pfb, u64 size, u32
align, u32 ncmin,
return 0;
}
+static void
+nvc0_fb_vram_del(struct nouveau_fb *pfb, struct nouveau_mem **pmem)
+{
+ struct nouveau_ltcg *ltcg = nouveau_ltcg(pfb->base.base.parent);
+
+ if ((*pmem)->tag)
+ ltcg->tags_free(ltcg, &(*pmem)->tag);
+
+ nv50_fb_vram_del(pfb, pmem);
+}
+
static int
nvc0_fb_init(struct nouveau_object *object)
{
@@ -215,7 +220,7 @@ nvc0_fb_ctor(struct nouveau_object *parent, struct
nouveau_object *engine,
priv->base.memtype_valid = nvc0_fb_memtype_valid;
priv->base.ram.init = nvc0_fb_vram_init;
priv->base.ram.get = nvc0_fb_vram_new;
- priv->base.ram.put = nv50_fb_vram_del;
+ priv->base.ram.put = nvc0_fb_vram_del;
priv->r100c10_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (priv->r100c10_page) {
diff --git a/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c
b/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c
index 078a2b9..a529563 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c
@@ -23,10 +23,17 @@
*/
#include <subdev/ltcg.h>
+#include <subdev/fb.h>
+#include <subdev/timer.h>
struct nvc0_ltcg_priv {
struct nouveau_ltcg base;
+ u32 part_nr;
+ u32 part_mask;
u32 subp_nr;
+ struct nouveau_mm tags;
+ u32 num_tags;
+ struct nouveau_mm_node *tag_ram;
};
static void
@@ -62,11 +69,104 @@ nvc0_ltcg_intr(struct nouveau_subdev *subdev)
}
static int
+nvc0_ltcg_tags_alloc(struct nouveau_ltcg *ltcg, u32 n,
+ struct nouveau_mm_node **pnode)
+{
+ struct nvc0_ltcg_priv *priv = (struct nvc0_ltcg_priv *)ltcg;
+ int ret;
+
+ ret = nouveau_mm_head(&priv->tags, 1, n, n, 1, pnode);
+ if (ret)
+ *pnode = NULL;
+
+ return ret;
+}
+
+static void
+nvc0_ltcg_tags_free(struct nouveau_ltcg *ltcg, struct nouveau_mm_node **pnode)
+{
+ struct nvc0_ltcg_priv *priv = (struct nvc0_ltcg_priv *)ltcg;
+
+ nouveau_mm_free(&priv->tags, pnode);
+}
+
+static void
+nvc0_ltcg_tags_clear(struct nouveau_ltcg *ltcg, u32 first, u32 count)
+{
+ struct nvc0_ltcg_priv *priv = (struct nvc0_ltcg_priv *)ltcg;
+ u32 last = first + count - 1;
+ int p, i;
+
+ BUG_ON((first > last) || (last >= priv->num_tags));
+
+ nv_wr32(priv, 0x17e8cc, first);
+ nv_wr32(priv, 0x17e8d0, last);
+ nv_wr32(priv, 0x17e8c8, 0x4); /* trigger clear */
+
+ /* wait until it's finished with clearing */
+ for (p = 0; p < priv->part_nr; ++p) {
+ if (!(priv->part_mask & (1 << p)))
+ continue;
+ for (i = 0; i < priv->subp_nr; ++i)
+ nv_wait(priv, 0x1410c8 + p * 0x2000 + i * 0x400, ~0, 0);
+ }
+}
+
+/* TODO: Figure out tag memory details and drop the over-cautious allocation.
+ */
+static int
+nvc0_ltcg_init_tag_ram(struct nouveau_fb *pfb, struct nvc0_ltcg_priv *priv)
+{
+ u32 tag_size, tag_margin, tag_align;
+ int ret;
+
+ nv_wr32(priv, 0x17e8d8, priv->part_nr);
+
+ /* tags for 1/4 of VRAM should be enough (8192/4 per GiB of VRAM) */
+ priv->num_tags = (pfb->ram.size >> 17) / 4;
+ if (priv->num_tags > (1 << 17))
+ priv->num_tags = 1 << 17; /* we have 17 bits in PTE */
+ priv->num_tags = (priv->num_tags + 63) & ~63; /* round up to 64 */
+
+ tag_align = priv->part_nr * 0x800;
+ tag_margin = (tag_align < 0x6000) ? 0x6000 : tag_align;
+
+ /* 4 part 4 sub: 0x2000 bytes for 56 tags */
+ /* 3 part 4 sub: 0x6000 bytes for 168 tags */
+ /*
+ * About 147 bytes per tag. Let's be safe and allocate x2, which makes
+ * 0x4980 bytes for 64 tags, and round up to 0x6000 bytes for 64 tags.
+ *
+ * For 4 GiB of memory we'll have 8192 tags which makes 3 MiB, < 0.1 %.
+ */
+ tag_size = (priv->num_tags / 64) * 0x6000 + tag_margin;
+ tag_size += tag_align;
+ tag_size = (tag_size + 0xfff) >> 12; /* round up */
+
+ ret = nouveau_mm_tail(&pfb->vram, 0, tag_size, tag_size, 1,
+ &priv->tag_ram);
+ if (ret) {
+ priv->num_tags = 0;
+ } else {
+ u64 tag_base = (priv->tag_ram->offset << 12) + tag_margin;
+
+ tag_base += tag_align - 1;
+ tag_base /= tag_align;
+
+ nv_wr32(priv, 0x17e8d4, tag_base);
+ }
+ ret = nouveau_mm_init(&priv->tags, 0, priv->num_tags, 1);
+
+ return ret;
+}
+
+static int
nvc0_ltcg_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
struct nouveau_oclass *oclass, void *data, u32 size,
struct nouveau_object **pobject)
{
struct nvc0_ltcg_priv *priv;
+ struct nouveau_fb *pfb = nouveau_fb(parent);
int ret;
ret = nouveau_ltcg_create(parent, engine, oclass, &priv);
@@ -74,19 +174,44 @@ nvc0_ltcg_ctor(struct nouveau_object *parent, struct
nouveau_object *engine,
if (ret)
return ret;
- priv->subp_nr = nv_rd32(priv, 0x17e8dc) >> 24;
+ priv->part_nr = nv_rd32(priv, 0x022438);
+ priv->part_mask = nv_rd32(priv, 0x022554);
+
+ priv->subp_nr = nv_rd32(priv, 0x17e8dc) >> 28;
+
nv_mask(priv, 0x17e820, 0x00100000, 0x00000000); /* INTR_EN &= ~0x10 */
+ ret = nvc0_ltcg_init_tag_ram(pfb, priv);
+ if (ret)
+ return ret;
+
+ priv->base.tags_alloc = nvc0_ltcg_tags_alloc;
+ priv->base.tags_free = nvc0_ltcg_tags_free;
+ priv->base.tags_clear = nvc0_ltcg_tags_clear;
+
nv_subdev(priv)->intr = nvc0_ltcg_intr;
return 0;
}
+static void
+nvc0_ltcg_dtor(struct nouveau_object *object)
+{
+ struct nouveau_ltcg *ltcg = (struct nouveau_ltcg *)object;
+ struct nvc0_ltcg_priv *priv = (struct nvc0_ltcg_priv *)ltcg;
+ struct nouveau_fb *pfb = nouveau_fb(ltcg->base.base.parent);
+
+ nouveau_mm_fini(&priv->tags);
+ nouveau_mm_free(&pfb->vram, &priv->tag_ram);
+
+ nouveau_ltcg_destroy(ltcg);
+}
+
struct nouveau_oclass
nvc0_ltcg_oclass = {
.handle = NV_SUBDEV(LTCG, 0xc0),
.ofuncs = &(struct nouveau_ofuncs) {
.ctor = nvc0_ltcg_ctor,
- .dtor = _nouveau_ltcg_dtor,
+ .dtor = nvc0_ltcg_dtor,
.init = _nouveau_ltcg_init,
.fini = _nouveau_ltcg_fini,
},
diff --git a/drivers/gpu/drm/nouveau/core/subdev/vm/nvc0.c
b/drivers/gpu/drm/nouveau/core/subdev/vm/nvc0.c
index 30c61e6..4c3b0a2 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/vm/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/vm/nvc0.c
@@ -28,12 +28,54 @@
#include <subdev/timer.h>
#include <subdev/fb.h>
#include <subdev/vm.h>
+#include <subdev/ltcg.h>
struct nvc0_vmmgr_priv {
struct nouveau_vmmgr base;
spinlock_t lock;
};
+
+/* Map from compressed to corresponding uncompressed storage type.
+ * The value 0xff represents an invalid storage type.
+ */
+const u8 nvc0_pte_storage_type_map[256] +{
+ 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0xff, 0x01, /* 0x00 */
+ 0x01, 0x01, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x11, 0xff, 0xff, 0xff, 0xff, 0xff, 0x11, /* 0x10 */
+ 0x11, 0x11, 0x11, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x26, 0x27, /* 0x20 */
+ 0x28, 0x29, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x30 */
+ 0xff, 0xff, 0x26, 0x27, 0x28, 0x29, 0x26, 0x27,
+ 0x28, 0x29, 0xff, 0xff, 0xff, 0xff, 0x46, 0xff, /* 0x40 */
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x46, 0x46, 0x46, 0x46, 0xff, 0xff, 0xff, /* 0x50 */
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x60 */
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70 */
+ 0xff, 0xff, 0xff, 0x7b, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7b, 0x7b, /* 0x80 */
+ 0x7b, 0x7b, 0xff, 0x8b, 0x8c, 0x8d, 0x8e, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90 */
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0x8b, 0x8c, 0x8d, 0x8e, 0xa7, /* 0xa0 */
+ 0xa8, 0xa9, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0 */
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa7,
+ 0xa8, 0xa9, 0xaa, 0xc3, 0xff, 0xff, 0xff, 0xff, /* 0xc0 */
+ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xfe, 0xc3, 0xc3,
+ 0xc3, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0 */
+ 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe,
+ 0xfe, 0xff, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff, /* 0xe0 */
+ 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, 0xfe, 0xff,
+ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xf0 */
+ 0xfe, 0xfe, 0xfe, 0xfe, 0xff, 0xfd, 0xfe, 0xff
+};
+
+
static void
nvc0_vm_map_pgt(struct nouveau_gpuobj *pgd, u32 index,
struct nouveau_gpuobj *pgt[2])
@@ -68,10 +110,20 @@ static void
nvc0_vm_map(struct nouveau_vma *vma, struct nouveau_gpuobj *pgt,
struct nouveau_mem *mem, u32 pte, u32 cnt, u64 phys, u64 delta)
{
- u32 next = 1 << (vma->node->type - 8);
+ u64 next = 1 << (vma->node->type - 8);
phys = nvc0_vm_addr(vma, phys, mem->memtype, 0);
pte <<= 3;
+
+ if (mem->tag) {
+ struct nouveau_ltcg *ltcg +
nouveau_ltcg(vma->vm->vmm->base.base.parent);
+ u32 tag = mem->tag->offset + (delta >> 17);
+ phys |= (u64)tag << (32 + 12);
+ next |= (u64)1 << (32 + 12);
+ ltcg->tags_clear(ltcg, tag, cnt);
+ }
+
while (cnt--) {
nv_wo32(pgt, pte + 0, lower_32_bits(phys));
nv_wo32(pgt, pte + 4, upper_32_bits(phys));
@@ -85,10 +137,12 @@ nvc0_vm_map_sg(struct nouveau_vma *vma, struct
nouveau_gpuobj *pgt,
struct nouveau_mem *mem, u32 pte, u32 cnt, dma_addr_t *list)
{
u32 target = (vma->access & NV_MEM_ACCESS_NOSNOOP) ? 7 : 5;
+ /* compressed storage types are invalid for system memory */
+ u32 memtype = nvc0_pte_storage_type_map[mem->memtype & 0xff];
pte <<= 3;
while (cnt--) {
- u64 phys = nvc0_vm_addr(vma, *list++, mem->memtype, target);
+ u64 phys = nvc0_vm_addr(vma, *list++, memtype, target);
nv_wo32(pgt, pte + 0, lower_32_bits(phys));
nv_wo32(pgt, pte + 4, upper_32_bits(phys));
pte += 8;
--
1.7.3.4
Christoph Bumiller
2013-Mar-27 21:16 UTC
[Nouveau] [PATCH 2/4] drm/nouveau: support NOUVEAU_GETPARAM_GRAPH_UNITS on >= nvc0
---
drivers/gpu/drm/nouveau/core/engine/graph/nv40.c | 10 ++++++++++
drivers/gpu/drm/nouveau/core/engine/graph/nv50.c | 10 ++++++++++
drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c | 15 +++++++++++++++
drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h | 2 ++
drivers/gpu/drm/nouveau/core/engine/graph/nve0.c | 2 ++
.../gpu/drm/nouveau/core/include/engine/graph.h | 4 ++++
drivers/gpu/drm/nouveau/nouveau_abi16.c | 12 ++++--------
7 files changed, 47 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nv40.c
b/drivers/gpu/drm/nouveau/core/engine/graph/nv40.c
index 17049d5..193a5de 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nv40.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nv40.c
@@ -46,6 +46,14 @@ struct nv40_graph_chan {
struct nouveau_graph_chan base;
};
+static u64
+nv40_graph_units(struct nouveau_graph *graph)
+{
+ struct nv40_graph_priv *priv = (void *)graph;
+
+ return nv_rd32(priv, 0x1540);
+}
+
/*******************************************************************************
* Graphics object classes
******************************************************************************/
@@ -359,6 +367,8 @@ nv40_graph_ctor(struct nouveau_object *parent, struct
nouveau_object *engine,
else
nv_engine(priv)->sclass = nv40_graph_sclass;
nv_engine(priv)->tile_prog = nv40_graph_tile_prog;
+
+ priv->base.units = nv40_graph_units;
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c
b/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c
index f2b1a7a..1ac3611 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c
@@ -48,6 +48,14 @@ struct nv50_graph_chan {
struct nouveau_graph_chan base;
};
+static u64
+nv50_graph_units(struct nouveau_graph *graph)
+{
+ struct nv50_graph_priv *priv = (void *)graph;
+
+ return nv_rd32(priv, 0x1540);
+}
+
/*******************************************************************************
* Graphics object classes
******************************************************************************/
@@ -819,6 +827,8 @@ nv50_graph_ctor(struct nouveau_object *parent, struct
nouveau_object *engine,
nv_subdev(priv)->intr = nv50_graph_intr;
nv_engine(priv)->cclass = &nv50_graph_cclass;
+ priv->base.units = nv50_graph_units;
+
switch (nv_device(priv)->chipset) {
case 0x50:
nv_engine(priv)->sclass = nv50_graph_sclass;
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
index 0de0dd7..5ce4941 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
@@ -60,6 +60,19 @@ nvc8_graph_sclass[] = {
{}
};
+u64
+nvc0_graph_units(struct nouveau_graph *graph)
+{
+ struct nvc0_graph_priv *priv = (void *)graph;
+ u64 cfg;
+
+ cfg = (u32)priv->gpc_nr;
+ cfg |= (u32)priv->tpc_total << 8;
+ cfg |= (u64)priv->rop_nr << 32;
+
+ return cfg;
+}
+
/*******************************************************************************
* PGRAPH context
******************************************************************************/
@@ -529,6 +542,8 @@ nvc0_graph_ctor(struct nouveau_object *parent, struct
nouveau_object *engine,
nv_subdev(priv)->intr = nvc0_graph_intr;
nv_engine(priv)->cclass = &nvc0_graph_cclass;
+ priv->base.units = nvc0_graph_units;
+
if (nouveau_boolopt(device->cfgopt, "NvGrUseFW", false)) {
nv_info(priv, "using external firmware\n");
if (nvc0_graph_ctor_fw(priv, "fuc409c", &priv->fuc409c) ||
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h
b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h
index a1e78de..af033dc 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h
@@ -169,4 +169,6 @@ int nvc0_graph_context_ctor(struct nouveau_object *, struct
nouveau_object *,
struct nouveau_object **);
void nvc0_graph_context_dtor(struct nouveau_object *);
+u64 nvc0_graph_units(struct nouveau_graph *);
+
#endif
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c
b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c
index 4857f91..4b45afb 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c
@@ -217,6 +217,8 @@ nve0_graph_ctor(struct nouveau_object *parent, struct
nouveau_object *engine,
nv_engine(priv)->cclass = &nve0_graph_cclass;
nv_engine(priv)->sclass = nve0_graph_sclass;
+ priv->base.units = nvc0_graph_units;
+
if (nouveau_boolopt(device->cfgopt, "NvGrUseFW", false)) {
nv_info(priv, "using external firmware\n");
if (nvc0_graph_ctor_fw(priv, "fuc409c", &priv->fuc409c) ||
diff --git a/drivers/gpu/drm/nouveau/core/include/engine/graph.h
b/drivers/gpu/drm/nouveau/core/include/engine/graph.h
index 6943b40..5d39243 100644
--- a/drivers/gpu/drm/nouveau/core/include/engine/graph.h
+++ b/drivers/gpu/drm/nouveau/core/include/engine/graph.h
@@ -26,6 +26,10 @@ struct nouveau_graph_chan {
struct nouveau_graph {
struct nouveau_engine base;
+
+ /* Returns chipset-specific counts of units packed into an u64.
+ */
+ u64 (*units)(struct nouveau_graph *);
};
static inline struct nouveau_graph *
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c
b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 3b6dc88..f19f0a4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -30,6 +30,7 @@
#include <subdev/fb.h>
#include <subdev/timer.h>
#include <subdev/instmem.h>
+#include <engine/graph.h>
#include "nouveau_drm.h"
#include "nouveau_dma.h"
@@ -168,6 +169,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS)
struct nouveau_drm *drm = nouveau_drm(dev);
struct nouveau_device *device = nv_device(drm->device);
struct nouveau_timer *ptimer = nouveau_timer(device);
+ struct nouveau_graph *graph = (void *)nouveau_engine(device, NVDEV_ENGINE_GR);
struct drm_nouveau_getparam *getparam = data;
switch (getparam->param) {
@@ -208,14 +210,8 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS)
getparam->value = 1;
break;
case NOUVEAU_GETPARAM_GRAPH_UNITS:
- /* NV40 and NV50 versions are quite different, but register
- * address is the same. User is supposed to know the card
- * family anyway... */
- if (device->chipset >= 0x40) {
- getparam->value = nv_rd32(device, 0x001540);
- break;
- }
- /* FALLTHRU */
+ getparam->value = graph->units ? graph->units(graph) : 0;
+ break;
default:
nv_debug(device, "unknown parameter %lld\n", getparam->param);
return -EINVAL;
--
1.7.3.4
Christoph Bumiller
2013-Mar-27 21:16 UTC
[Nouveau] [PATCH 3/4] drm/nvc0: add software methods to control some MP regs
---
.../gpu/drm/nouveau/core/engine/software/nvc0.c | 29 ++++++++++++++++++++
1 files changed, 29 insertions(+), 0 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c
b/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c
index a523eaa..d698e71 100644
--- a/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c
@@ -94,6 +94,32 @@ nvc0_software_mthd_flip(struct nouveau_object *object, u32
mthd,
return -EINVAL;
}
+static int
+nvc0_software_mthd_mp_control(struct nouveau_object *object, u32 mthd,
+ void *args, u32 size)
+{
+ struct nvc0_software_chan *chan = (void *)nv_engctx(object->parent);
+ struct nvc0_software_priv *priv = (void *)nv_object(chan)->engine;
+ u32 data = *(u32 *)args;
+
+ switch (mthd) {
+ case 0x600:
+ nv_wr32(priv, 0x419e00, data); /* MP.PM_UNK000 */
+ break;
+ case 0x644:
+ if (data & ~0x1ffffe)
+ return -EINVAL;
+ nv_wr32(priv, 0x419e44, data); /* MP.TRAP_WARP_ERROR_EN */
+ break;
+ case 0x6ac:
+ nv_wr32(priv, 0x419eac, data); /* MP.PM_UNK0AC */
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
static struct nouveau_omthds
nvc0_software_omthds[] = {
{ 0x0400, 0x0400, nvc0_software_mthd_vblsem_offset },
@@ -101,6 +127,9 @@ nvc0_software_omthds[] = {
{ 0x0408, 0x0408, nvc0_software_mthd_vblsem_value },
{ 0x040c, 0x040c, nvc0_software_mthd_vblsem_release },
{ 0x0500, 0x0500, nvc0_software_mthd_flip },
+ { 0x0600, 0x0600, nvc0_software_mthd_mp_control },
+ { 0x0644, 0x0644, nvc0_software_mthd_mp_control },
+ { 0x06ac, 0x06ac, nvc0_software_mthd_mp_control },
{}
};
--
1.7.3.4
Christoph Bumiller
2013-Mar-27 21:16 UTC
[Nouveau] [PATCH 4/4] drm/nve0: add handling for a bunch of PGRAPH traps
---
drivers/gpu/drm/nouveau/core/engine/graph/nve0.c | 230 ++++++++++++++++++++++
1 files changed, 230 insertions(+), 0 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c
b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c
index 4b45afb..e411b18 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c
@@ -77,11 +77,214 @@ nve0_graph_ctxctl_isr(struct nvc0_graph_priv *priv)
nv_wr32(priv, 0x409c20, ustat);
}
+static const struct nouveau_enum nve0_mp_warp_error[] = {
+ { 0x00, "(no error)" },
+ { 0x01, "stack mismatch" },
+ { 0x05, "misaligned PC" },
+ { 0x08, "misaligned GPR" },
+ { 0x09, "invalid opcode" },
+ { 0x0d, "out of bounds GPR" },
+ { 0x0e, "out of bounds l/s/a[]" },
+ { 0x0f, "unaligned memory access" },
+ { 0x11, "invalid param" },
+ {}
+};
+
+static const struct nouveau_enum nve0_mp_global_error[] = {
+ { 2, "multiple warp errors" },
+ { 3, "out of stack space" },
+ {}
+};
+
+static const struct nouveau_enum nve0_gpc_rop_error[] = {
+ { 1, "RT pitch overrun" },
+ { 4, "RT width overrun" },
+ { 5, "RT height overrun" },
+ { 7, "ZETA storage type mismatch" },
+ { 8, "RT storage type mismatch" },
+ { 10, "RT linear mismatch" },
+ {}
+};
+
+static const struct nouveau_enum nve0_sked_error[] = {
+ { 7, "constant buffer size" },
+ { 9, "local memory size pos" },
+ { 10, "local memory size neg" },
+ { 11, "warp cstack size" },
+ { 12, "total temp size" },
+ { 13, "register count" },
+ { 18, "total threads" },
+ { 20, "program offset" },
+ { 21, "shared memory size" },
+ { 25, "shared/l1 config" },
+ { 26, "total register count" },
+ {}
+};
+
+static void
+nve0_graph_mp_trap(struct nvc0_graph_priv *priv, int gpc, int tp)
+{
+ int i;
+ u32 bpct = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x010));
+ u32 bpst = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x00c));
+ u32 werr = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x648));
+ u32 gerr = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x650));
+
+ nv_error(priv, "GPC%i/TP%i/MP trap:", gpc, tp);
+
+ for (i = 0; i <= 31; ++i) {
+ if (!(gerr & (1 << i)))
+ continue;
+ pr_cont(" <");
+ nouveau_enum_print(nve0_mp_global_error, i);
+ pr_cont(">");
+ }
+ if (werr) {
+ pr_cont(" <");
+ nouveau_enum_print(nve0_mp_warp_error, werr & 0xffff);
+ pr_cont(">");
+ }
+ pr_cont("\n");
+
+ nv_error(priv, "breakpoint control,status = %08x %08x\n", bpct,
bpst);
+
+ /* disable MP trap to avoid spam */
+ nv_mask(priv, TPC_UNIT(gpc, tp, 0x50c), 0x2, 0x0);
+
+ /* TODO: figure out how to resume after an MP trap */
+}
+
+static void
+nve0_graph_tp_trap(struct nvc0_graph_priv *priv, int gpc, int tp)
+{
+ u32 stat = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x508));
+
+ if (stat & 0x1) {
+ u32 trap = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x224));
+ nv_error(priv, "GPC%i/TP%i/TEX trap: %08x\n",
+ gpc, tp, trap);
+
+ nv_wr32(priv, TPC_UNIT(gpc, tp, 0x224), 0xc0000000);
+ stat &= ~0x1;
+ }
+
+ if (stat & 0x2) {
+ nve0_graph_mp_trap(priv, gpc, tp);
+ stat &= ~0x2;
+ }
+
+ if (stat & 0x4) {
+ u32 trap = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x084));
+ nv_error(priv, "GPC%i/TP%i/POLY trap: %08x\n",
+ gpc, tp, trap);
+
+ nv_wr32(priv, TPC_UNIT(gpc, tp, 0x084), 0xc0000000);
+ stat &= ~0x4;
+ }
+
+ if (stat & 0x8) {
+ u32 trap = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x48c));
+ nv_error(priv, "GPC%i/TP%i/L1C trap: %08x\n",
+ gpc, tp, trap);
+
+ nv_wr32(priv, TPC_UNIT(gpc, tp, 0x48c), 0xc0000000);
+ stat &= ~0x8;
+ }
+
+ if (stat) {
+ nv_error(priv, "GPC%i/TP%i: unknown stat %08x\n",
+ gpc, tp, stat);
+ }
+}
+
+static void
+nve0_graph_gpc_trap(struct nvc0_graph_priv *priv)
+{
+ const u32 mask = nv_rd32(priv, 0x400118);
+ int gpc;
+
+ for (gpc = 0; gpc < 4; ++gpc) {
+ u32 stat;
+ int tp;
+
+ if (!(mask & (1 << gpc)))
+ continue;
+ stat = nv_rd32(priv, GPC_UNIT(gpc, 0x2c90));
+
+ if (stat & 0x0001) {
+ u32 trap[4];
+ int i;
+
+ trap[0] = nv_rd32(priv, GPC_UNIT(gpc, 0x0420));
+ trap[1] = nv_rd32(priv, GPC_UNIT(gpc, 0x0434));
+ trap[2] = nv_rd32(priv, GPC_UNIT(gpc, 0x0438));
+ trap[3] = nv_rd32(priv, GPC_UNIT(gpc, 0x043c));
+
+ nv_error(priv, "GPC%i/PROP trap:", gpc);
+ for (i = 0; i <= 29; ++i) {
+ if (!(trap[0] & (1 << i)))
+ continue;
+ pr_cont(" <");
+ nouveau_enum_print(nve0_gpc_rop_error, i);
+ pr_cont(">");
+ }
+ pr_cont("\n");
+
+ nv_error(priv, "x = %u, y = %u, "
+ "format = %x, storage type = %x\n",
+ trap[1] & 0xffff,
+ trap[1] >> 16,
+ (trap[2] >> 8) & 0x3f,
+ trap[3] & 0xff);
+
+ nv_wr32(priv, GPC_UNIT(gpc, 0x0420), 0xc0000000);
+ stat &= ~0x0001;
+ }
+
+ if (stat & 0x0002) {
+ u32 trap = nv_rd32(priv, GPC_UNIT(gpc, 0x0900));
+ nv_error(priv, "GPC%i/ZCULL trap: %08x\n", gpc,
+ trap);
+ nv_wr32(priv, GPC_UNIT(gpc, 0x0900), 0xc0000000);
+ stat &= ~0x0002;
+ }
+
+ if (stat & 0x0004) {
+ u32 trap = nv_rd32(priv, GPC_UNIT(gpc, 0x1028));
+ nv_error(priv, "GPC%i/CCACHE trap: %08x\n", gpc,
+ trap);
+ nv_wr32(priv, GPC_UNIT(gpc, 0x1028), 0xc0000000);
+ stat &= ~0x0004;
+ }
+
+ if (stat & 0x0008) {
+ u32 trap = nv_rd32(priv, GPC_UNIT(gpc, 0x0824));
+ nv_error(priv, "GPC%i/ESETUP trap %08x\n", gpc,
+ trap);
+ nv_wr32(priv, GPC_UNIT(gpc, 0x0824), 0xc0000000);
+ stat &= ~0x0008;
+ }
+
+ for (tp = 0; tp < 8; ++tp) {
+ if (stat & (1 << (16 + tp)))
+ nve0_graph_tp_trap(priv, gpc, tp);
+ }
+ stat &= ~0xff0000;
+
+ if (stat) {
+ nv_error(priv, "GPC%i: unknown stat %08x\n",
+ gpc, stat);
+ }
+ }
+}
+
+
static void
nve0_graph_trap_isr(struct nvc0_graph_priv *priv, int chid, u64 inst,
struct nouveau_object *engctx)
{
u32 trap = nv_rd32(priv, 0x400108);
+ int i;
int rop;
if (trap & 0x00000001) {
@@ -102,6 +305,33 @@ nve0_graph_trap_isr(struct nvc0_graph_priv *priv, int chid,
u64 inst,
trap &= ~0x00000010;
}
+ if (trap & 0x00000100) {
+ u32 stat = nv_rd32(priv, 0x407020);
+ nv_error(priv, "SKED ch %d [0x%010llx %s]:",
+ chid, inst, nouveau_client_name(engctx));
+
+ for (i = 0; i <= 29; ++i) {
+ if (!(stat & (1 << i)))
+ continue;
+ pr_cont(" <");
+ nouveau_enum_print(nve0_sked_error, i);
+ pr_cont(">");
+ }
+ pr_cont("\n");
+
+ if (stat & 0x3fffffff)
+ nv_wr32(priv, 0x407020, 0x40000000);
+ nv_wr32(priv, 0x400108, 0x00000100);
+ trap &= ~0x00000100;
+ }
+
+ if (trap & 0x01000000) {
+ nv_error(priv, "GPC ch %d [0x%010llx %s]:\n",
+ chid, inst, nouveau_client_name(engctx));
+ nve0_graph_gpc_trap(priv);
+ trap &= ~0x01000000;
+ }
+
if (trap & 0x02000000) {
for (rop = 0; rop < priv->rop_nr; rop++) {
u32 statz = nv_rd32(priv, ROP_UNIT(rop, 0x070));
--
1.7.3.4
Seemingly Similar Threads
- [PATCH] drm/nouveau: fix ltcg memory initialization after suspend
- [PATCH] drm/nouveau: fix ltcg memory corruptions
- [PATCH] drm/nvc0-/ltcg: fix ltcg memory initialization after suspend
- [PATCH] drm/nouveau: fix ltcg memory initialization after suspend
- [Bug 54437] New: linux-nouveau2.6 (3.6.0-rc4) : GTX580 : Xorg freezes when using accel