Christoph Bumiller
2013-Mar-27 21:16 UTC
[Nouveau] [PATCH 1/4] drm/nvc0: implement VRAM compression
--- drivers/gpu/drm/nouveau/core/include/subdev/ltcg.h | 7 + drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c | 55 +++++---- drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c | 129 +++++++++++++++++++- drivers/gpu/drm/nouveau/core/subdev/vm/nvc0.c | 58 +++++++++- 4 files changed, 220 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/ltcg.h b/drivers/gpu/drm/nouveau/core/include/subdev/ltcg.h index f351f63..a1985ed 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/ltcg.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/ltcg.h @@ -4,8 +4,15 @@ #include <core/subdev.h> #include <core/device.h> +struct nouveau_mm_node; + struct nouveau_ltcg { struct nouveau_subdev base; + + int (*tags_alloc)(struct nouveau_ltcg *, u32 count, + struct nouveau_mm_node **); + void (*tags_free)(struct nouveau_ltcg *, struct nouveau_mm_node **); + void (*tags_clear)(struct nouveau_ltcg *, u32 first, u32 count); }; static inline struct nouveau_ltcg * diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c index 3b2ddc6..86ad592 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nvc0.c @@ -23,6 +23,7 @@ */ #include <subdev/fb.h> +#include <subdev/ltcg.h> #include <subdev/bios.h> struct nvc0_fb_priv { @@ -31,34 +32,14 @@ struct nvc0_fb_priv { dma_addr_t r100c10; }; -/* 0 = unsupported - * 1 = non-compressed - * 3 = compressed - */ -static const u8 types[256] = { - 1, 1, 3, 3, 3, 3, 0, 3, 3, 3, 3, 0, 0, 0, 0, 0, - 0, 1, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, - 3, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 1, 1, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, - 3, 3, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, - 3, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 3, 0, 3, 0, 3, - 3, 0, 3, 3, 3, 3, 3, 0, 0, 3, 0, 3, 0, 3, 3, 0, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 1, 1, 0 -}; +extern const u8 nvc0_pte_storage_type_map[256]; + static bool nvc0_fb_memtype_valid(struct nouveau_fb *pfb, u32 tile_flags) { u8 memtype = (tile_flags & 0x0000ff00) >> 8; - return likely((types[memtype] == 1)); + return likely((nvc0_pte_storage_type_map[memtype] != 0xff)); } static int @@ -130,6 +111,7 @@ nvc0_fb_vram_new(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin, int type = (memtype & 0x0ff); int back = (memtype & 0x800); int ret; + const bool comp = nvc0_pte_storage_type_map[type] != type; size >>= 12; align >>= 12; @@ -142,10 +124,22 @@ nvc0_fb_vram_new(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin, return -ENOMEM; INIT_LIST_HEAD(&mem->regions); - mem->memtype = type; mem->size = size; mutex_lock(&pfb->base.mutex); + if (comp) { + struct nouveau_ltcg *ltcg = nouveau_ltcg(pfb->base.base.parent); + + /* compression only works with lpages */ + if (align == (1 << (17 - 12))) { + int n = size >> 5; + ltcg->tags_alloc(ltcg, n, &mem->tag); + } + if (unlikely(!mem->tag)) + type = nvc0_pte_storage_type_map[type]; + } + mem->memtype = type; + do { if (back) ret = nouveau_mm_tail(mm, 1, size, ncmin, align, &r); @@ -168,6 +162,17 @@ nvc0_fb_vram_new(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin, return 0; } +static void +nvc0_fb_vram_del(struct nouveau_fb *pfb, struct nouveau_mem **pmem) +{ + struct nouveau_ltcg *ltcg = nouveau_ltcg(pfb->base.base.parent); + + if ((*pmem)->tag) + ltcg->tags_free(ltcg, &(*pmem)->tag); + + nv50_fb_vram_del(pfb, pmem); +} + static int nvc0_fb_init(struct nouveau_object *object) { @@ -215,7 +220,7 @@ nvc0_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine, priv->base.memtype_valid = nvc0_fb_memtype_valid; priv->base.ram.init = nvc0_fb_vram_init; priv->base.ram.get = nvc0_fb_vram_new; - priv->base.ram.put = nv50_fb_vram_del; + priv->base.ram.put = nvc0_fb_vram_del; priv->r100c10_page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (priv->r100c10_page) { diff --git a/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c index 078a2b9..a529563 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c @@ -23,10 +23,17 @@ */ #include <subdev/ltcg.h> +#include <subdev/fb.h> +#include <subdev/timer.h> struct nvc0_ltcg_priv { struct nouveau_ltcg base; + u32 part_nr; + u32 part_mask; u32 subp_nr; + struct nouveau_mm tags; + u32 num_tags; + struct nouveau_mm_node *tag_ram; }; static void @@ -62,11 +69,104 @@ nvc0_ltcg_intr(struct nouveau_subdev *subdev) } static int +nvc0_ltcg_tags_alloc(struct nouveau_ltcg *ltcg, u32 n, + struct nouveau_mm_node **pnode) +{ + struct nvc0_ltcg_priv *priv = (struct nvc0_ltcg_priv *)ltcg; + int ret; + + ret = nouveau_mm_head(&priv->tags, 1, n, n, 1, pnode); + if (ret) + *pnode = NULL; + + return ret; +} + +static void +nvc0_ltcg_tags_free(struct nouveau_ltcg *ltcg, struct nouveau_mm_node **pnode) +{ + struct nvc0_ltcg_priv *priv = (struct nvc0_ltcg_priv *)ltcg; + + nouveau_mm_free(&priv->tags, pnode); +} + +static void +nvc0_ltcg_tags_clear(struct nouveau_ltcg *ltcg, u32 first, u32 count) +{ + struct nvc0_ltcg_priv *priv = (struct nvc0_ltcg_priv *)ltcg; + u32 last = first + count - 1; + int p, i; + + BUG_ON((first > last) || (last >= priv->num_tags)); + + nv_wr32(priv, 0x17e8cc, first); + nv_wr32(priv, 0x17e8d0, last); + nv_wr32(priv, 0x17e8c8, 0x4); /* trigger clear */ + + /* wait until it's finished with clearing */ + for (p = 0; p < priv->part_nr; ++p) { + if (!(priv->part_mask & (1 << p))) + continue; + for (i = 0; i < priv->subp_nr; ++i) + nv_wait(priv, 0x1410c8 + p * 0x2000 + i * 0x400, ~0, 0); + } +} + +/* TODO: Figure out tag memory details and drop the over-cautious allocation. + */ +static int +nvc0_ltcg_init_tag_ram(struct nouveau_fb *pfb, struct nvc0_ltcg_priv *priv) +{ + u32 tag_size, tag_margin, tag_align; + int ret; + + nv_wr32(priv, 0x17e8d8, priv->part_nr); + + /* tags for 1/4 of VRAM should be enough (8192/4 per GiB of VRAM) */ + priv->num_tags = (pfb->ram.size >> 17) / 4; + if (priv->num_tags > (1 << 17)) + priv->num_tags = 1 << 17; /* we have 17 bits in PTE */ + priv->num_tags = (priv->num_tags + 63) & ~63; /* round up to 64 */ + + tag_align = priv->part_nr * 0x800; + tag_margin = (tag_align < 0x6000) ? 0x6000 : tag_align; + + /* 4 part 4 sub: 0x2000 bytes for 56 tags */ + /* 3 part 4 sub: 0x6000 bytes for 168 tags */ + /* + * About 147 bytes per tag. Let's be safe and allocate x2, which makes + * 0x4980 bytes for 64 tags, and round up to 0x6000 bytes for 64 tags. + * + * For 4 GiB of memory we'll have 8192 tags which makes 3 MiB, < 0.1 %. + */ + tag_size = (priv->num_tags / 64) * 0x6000 + tag_margin; + tag_size += tag_align; + tag_size = (tag_size + 0xfff) >> 12; /* round up */ + + ret = nouveau_mm_tail(&pfb->vram, 0, tag_size, tag_size, 1, + &priv->tag_ram); + if (ret) { + priv->num_tags = 0; + } else { + u64 tag_base = (priv->tag_ram->offset << 12) + tag_margin; + + tag_base += tag_align - 1; + tag_base /= tag_align; + + nv_wr32(priv, 0x17e8d4, tag_base); + } + ret = nouveau_mm_init(&priv->tags, 0, priv->num_tags, 1); + + return ret; +} + +static int nvc0_ltcg_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nouveau_oclass *oclass, void *data, u32 size, struct nouveau_object **pobject) { struct nvc0_ltcg_priv *priv; + struct nouveau_fb *pfb = nouveau_fb(parent); int ret; ret = nouveau_ltcg_create(parent, engine, oclass, &priv); @@ -74,19 +174,44 @@ nvc0_ltcg_ctor(struct nouveau_object *parent, struct nouveau_object *engine, if (ret) return ret; - priv->subp_nr = nv_rd32(priv, 0x17e8dc) >> 24; + priv->part_nr = nv_rd32(priv, 0x022438); + priv->part_mask = nv_rd32(priv, 0x022554); + + priv->subp_nr = nv_rd32(priv, 0x17e8dc) >> 28; + nv_mask(priv, 0x17e820, 0x00100000, 0x00000000); /* INTR_EN &= ~0x10 */ + ret = nvc0_ltcg_init_tag_ram(pfb, priv); + if (ret) + return ret; + + priv->base.tags_alloc = nvc0_ltcg_tags_alloc; + priv->base.tags_free = nvc0_ltcg_tags_free; + priv->base.tags_clear = nvc0_ltcg_tags_clear; + nv_subdev(priv)->intr = nvc0_ltcg_intr; return 0; } +static void +nvc0_ltcg_dtor(struct nouveau_object *object) +{ + struct nouveau_ltcg *ltcg = (struct nouveau_ltcg *)object; + struct nvc0_ltcg_priv *priv = (struct nvc0_ltcg_priv *)ltcg; + struct nouveau_fb *pfb = nouveau_fb(ltcg->base.base.parent); + + nouveau_mm_fini(&priv->tags); + nouveau_mm_free(&pfb->vram, &priv->tag_ram); + + nouveau_ltcg_destroy(ltcg); +} + struct nouveau_oclass nvc0_ltcg_oclass = { .handle = NV_SUBDEV(LTCG, 0xc0), .ofuncs = &(struct nouveau_ofuncs) { .ctor = nvc0_ltcg_ctor, - .dtor = _nouveau_ltcg_dtor, + .dtor = nvc0_ltcg_dtor, .init = _nouveau_ltcg_init, .fini = _nouveau_ltcg_fini, }, diff --git a/drivers/gpu/drm/nouveau/core/subdev/vm/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/vm/nvc0.c index 30c61e6..4c3b0a2 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/vm/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/subdev/vm/nvc0.c @@ -28,12 +28,54 @@ #include <subdev/timer.h> #include <subdev/fb.h> #include <subdev/vm.h> +#include <subdev/ltcg.h> struct nvc0_vmmgr_priv { struct nouveau_vmmgr base; spinlock_t lock; }; + +/* Map from compressed to corresponding uncompressed storage type. + * The value 0xff represents an invalid storage type. + */ +const u8 nvc0_pte_storage_type_map[256] +{ + 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0xff, 0x01, /* 0x00 */ + 0x01, 0x01, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x11, 0xff, 0xff, 0xff, 0xff, 0xff, 0x11, /* 0x10 */ + 0x11, 0x11, 0x11, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x26, 0x27, /* 0x20 */ + 0x28, 0x29, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x30 */ + 0xff, 0xff, 0x26, 0x27, 0x28, 0x29, 0x26, 0x27, + 0x28, 0x29, 0xff, 0xff, 0xff, 0xff, 0x46, 0xff, /* 0x40 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x46, 0x46, 0x46, 0x46, 0xff, 0xff, 0xff, /* 0x50 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x60 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70 */ + 0xff, 0xff, 0xff, 0x7b, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7b, 0x7b, /* 0x80 */ + 0x7b, 0x7b, 0xff, 0x8b, 0x8c, 0x8d, 0x8e, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x8b, 0x8c, 0x8d, 0x8e, 0xa7, /* 0xa0 */ + 0xa8, 0xa9, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0 */ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa7, + 0xa8, 0xa9, 0xaa, 0xc3, 0xff, 0xff, 0xff, 0xff, /* 0xc0 */ + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xfe, 0xc3, 0xc3, + 0xc3, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0 */ + 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, + 0xfe, 0xff, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff, /* 0xe0 */ + 0xff, 0xfe, 0xff, 0xfe, 0xff, 0xfe, 0xfe, 0xff, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, /* 0xf0 */ + 0xfe, 0xfe, 0xfe, 0xfe, 0xff, 0xfd, 0xfe, 0xff +}; + + static void nvc0_vm_map_pgt(struct nouveau_gpuobj *pgd, u32 index, struct nouveau_gpuobj *pgt[2]) @@ -68,10 +110,20 @@ static void nvc0_vm_map(struct nouveau_vma *vma, struct nouveau_gpuobj *pgt, struct nouveau_mem *mem, u32 pte, u32 cnt, u64 phys, u64 delta) { - u32 next = 1 << (vma->node->type - 8); + u64 next = 1 << (vma->node->type - 8); phys = nvc0_vm_addr(vma, phys, mem->memtype, 0); pte <<= 3; + + if (mem->tag) { + struct nouveau_ltcg *ltcg + nouveau_ltcg(vma->vm->vmm->base.base.parent); + u32 tag = mem->tag->offset + (delta >> 17); + phys |= (u64)tag << (32 + 12); + next |= (u64)1 << (32 + 12); + ltcg->tags_clear(ltcg, tag, cnt); + } + while (cnt--) { nv_wo32(pgt, pte + 0, lower_32_bits(phys)); nv_wo32(pgt, pte + 4, upper_32_bits(phys)); @@ -85,10 +137,12 @@ nvc0_vm_map_sg(struct nouveau_vma *vma, struct nouveau_gpuobj *pgt, struct nouveau_mem *mem, u32 pte, u32 cnt, dma_addr_t *list) { u32 target = (vma->access & NV_MEM_ACCESS_NOSNOOP) ? 7 : 5; + /* compressed storage types are invalid for system memory */ + u32 memtype = nvc0_pte_storage_type_map[mem->memtype & 0xff]; pte <<= 3; while (cnt--) { - u64 phys = nvc0_vm_addr(vma, *list++, mem->memtype, target); + u64 phys = nvc0_vm_addr(vma, *list++, memtype, target); nv_wo32(pgt, pte + 0, lower_32_bits(phys)); nv_wo32(pgt, pte + 4, upper_32_bits(phys)); pte += 8; -- 1.7.3.4
Christoph Bumiller
2013-Mar-27 21:16 UTC
[Nouveau] [PATCH 2/4] drm/nouveau: support NOUVEAU_GETPARAM_GRAPH_UNITS on >= nvc0
--- drivers/gpu/drm/nouveau/core/engine/graph/nv40.c | 10 ++++++++++ drivers/gpu/drm/nouveau/core/engine/graph/nv50.c | 10 ++++++++++ drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c | 15 +++++++++++++++ drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h | 2 ++ drivers/gpu/drm/nouveau/core/engine/graph/nve0.c | 2 ++ .../gpu/drm/nouveau/core/include/engine/graph.h | 4 ++++ drivers/gpu/drm/nouveau/nouveau_abi16.c | 12 ++++-------- 7 files changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nv40.c b/drivers/gpu/drm/nouveau/core/engine/graph/nv40.c index 17049d5..193a5de 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nv40.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nv40.c @@ -46,6 +46,14 @@ struct nv40_graph_chan { struct nouveau_graph_chan base; }; +static u64 +nv40_graph_units(struct nouveau_graph *graph) +{ + struct nv40_graph_priv *priv = (void *)graph; + + return nv_rd32(priv, 0x1540); +} + /******************************************************************************* * Graphics object classes ******************************************************************************/ @@ -359,6 +367,8 @@ nv40_graph_ctor(struct nouveau_object *parent, struct nouveau_object *engine, else nv_engine(priv)->sclass = nv40_graph_sclass; nv_engine(priv)->tile_prog = nv40_graph_tile_prog; + + priv->base.units = nv40_graph_units; return 0; } diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c b/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c index f2b1a7a..1ac3611 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nv50.c @@ -48,6 +48,14 @@ struct nv50_graph_chan { struct nouveau_graph_chan base; }; +static u64 +nv50_graph_units(struct nouveau_graph *graph) +{ + struct nv50_graph_priv *priv = (void *)graph; + + return nv_rd32(priv, 0x1540); +} + /******************************************************************************* * Graphics object classes ******************************************************************************/ @@ -819,6 +827,8 @@ nv50_graph_ctor(struct nouveau_object *parent, struct nouveau_object *engine, nv_subdev(priv)->intr = nv50_graph_intr; nv_engine(priv)->cclass = &nv50_graph_cclass; + priv->base.units = nv50_graph_units; + switch (nv_device(priv)->chipset) { case 0x50: nv_engine(priv)->sclass = nv50_graph_sclass; diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c index 0de0dd7..5ce4941 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c @@ -60,6 +60,19 @@ nvc8_graph_sclass[] = { {} }; +u64 +nvc0_graph_units(struct nouveau_graph *graph) +{ + struct nvc0_graph_priv *priv = (void *)graph; + u64 cfg; + + cfg = (u32)priv->gpc_nr; + cfg |= (u32)priv->tpc_total << 8; + cfg |= (u64)priv->rop_nr << 32; + + return cfg; +} + /******************************************************************************* * PGRAPH context ******************************************************************************/ @@ -529,6 +542,8 @@ nvc0_graph_ctor(struct nouveau_object *parent, struct nouveau_object *engine, nv_subdev(priv)->intr = nvc0_graph_intr; nv_engine(priv)->cclass = &nvc0_graph_cclass; + priv->base.units = nvc0_graph_units; + if (nouveau_boolopt(device->cfgopt, "NvGrUseFW", false)) { nv_info(priv, "using external firmware\n"); if (nvc0_graph_ctor_fw(priv, "fuc409c", &priv->fuc409c) || diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h index a1e78de..af033dc 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h @@ -169,4 +169,6 @@ int nvc0_graph_context_ctor(struct nouveau_object *, struct nouveau_object *, struct nouveau_object **); void nvc0_graph_context_dtor(struct nouveau_object *); +u64 nvc0_graph_units(struct nouveau_graph *); + #endif diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c index 4857f91..4b45afb 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c @@ -217,6 +217,8 @@ nve0_graph_ctor(struct nouveau_object *parent, struct nouveau_object *engine, nv_engine(priv)->cclass = &nve0_graph_cclass; nv_engine(priv)->sclass = nve0_graph_sclass; + priv->base.units = nvc0_graph_units; + if (nouveau_boolopt(device->cfgopt, "NvGrUseFW", false)) { nv_info(priv, "using external firmware\n"); if (nvc0_graph_ctor_fw(priv, "fuc409c", &priv->fuc409c) || diff --git a/drivers/gpu/drm/nouveau/core/include/engine/graph.h b/drivers/gpu/drm/nouveau/core/include/engine/graph.h index 6943b40..5d39243 100644 --- a/drivers/gpu/drm/nouveau/core/include/engine/graph.h +++ b/drivers/gpu/drm/nouveau/core/include/engine/graph.h @@ -26,6 +26,10 @@ struct nouveau_graph_chan { struct nouveau_graph { struct nouveau_engine base; + + /* Returns chipset-specific counts of units packed into an u64. + */ + u64 (*units)(struct nouveau_graph *); }; static inline struct nouveau_graph * diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index 3b6dc88..f19f0a4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -30,6 +30,7 @@ #include <subdev/fb.h> #include <subdev/timer.h> #include <subdev/instmem.h> +#include <engine/graph.h> #include "nouveau_drm.h" #include "nouveau_dma.h" @@ -168,6 +169,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_device *device = nv_device(drm->device); struct nouveau_timer *ptimer = nouveau_timer(device); + struct nouveau_graph *graph = (void *)nouveau_engine(device, NVDEV_ENGINE_GR); struct drm_nouveau_getparam *getparam = data; switch (getparam->param) { @@ -208,14 +210,8 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) getparam->value = 1; break; case NOUVEAU_GETPARAM_GRAPH_UNITS: - /* NV40 and NV50 versions are quite different, but register - * address is the same. User is supposed to know the card - * family anyway... */ - if (device->chipset >= 0x40) { - getparam->value = nv_rd32(device, 0x001540); - break; - } - /* FALLTHRU */ + getparam->value = graph->units ? graph->units(graph) : 0; + break; default: nv_debug(device, "unknown parameter %lld\n", getparam->param); return -EINVAL; -- 1.7.3.4
Christoph Bumiller
2013-Mar-27 21:16 UTC
[Nouveau] [PATCH 3/4] drm/nvc0: add software methods to control some MP regs
--- .../gpu/drm/nouveau/core/engine/software/nvc0.c | 29 ++++++++++++++++++++ 1 files changed, 29 insertions(+), 0 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c index a523eaa..d698e71 100644 --- a/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c @@ -94,6 +94,32 @@ nvc0_software_mthd_flip(struct nouveau_object *object, u32 mthd, return -EINVAL; } +static int +nvc0_software_mthd_mp_control(struct nouveau_object *object, u32 mthd, + void *args, u32 size) +{ + struct nvc0_software_chan *chan = (void *)nv_engctx(object->parent); + struct nvc0_software_priv *priv = (void *)nv_object(chan)->engine; + u32 data = *(u32 *)args; + + switch (mthd) { + case 0x600: + nv_wr32(priv, 0x419e00, data); /* MP.PM_UNK000 */ + break; + case 0x644: + if (data & ~0x1ffffe) + return -EINVAL; + nv_wr32(priv, 0x419e44, data); /* MP.TRAP_WARP_ERROR_EN */ + break; + case 0x6ac: + nv_wr32(priv, 0x419eac, data); /* MP.PM_UNK0AC */ + break; + default: + return -EINVAL; + } + return 0; +} + static struct nouveau_omthds nvc0_software_omthds[] = { { 0x0400, 0x0400, nvc0_software_mthd_vblsem_offset }, @@ -101,6 +127,9 @@ nvc0_software_omthds[] = { { 0x0408, 0x0408, nvc0_software_mthd_vblsem_value }, { 0x040c, 0x040c, nvc0_software_mthd_vblsem_release }, { 0x0500, 0x0500, nvc0_software_mthd_flip }, + { 0x0600, 0x0600, nvc0_software_mthd_mp_control }, + { 0x0644, 0x0644, nvc0_software_mthd_mp_control }, + { 0x06ac, 0x06ac, nvc0_software_mthd_mp_control }, {} }; -- 1.7.3.4
Christoph Bumiller
2013-Mar-27 21:16 UTC
[Nouveau] [PATCH 4/4] drm/nve0: add handling for a bunch of PGRAPH traps
--- drivers/gpu/drm/nouveau/core/engine/graph/nve0.c | 230 ++++++++++++++++++++++ 1 files changed, 230 insertions(+), 0 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c index 4b45afb..e411b18 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c @@ -77,11 +77,214 @@ nve0_graph_ctxctl_isr(struct nvc0_graph_priv *priv) nv_wr32(priv, 0x409c20, ustat); } +static const struct nouveau_enum nve0_mp_warp_error[] = { + { 0x00, "(no error)" }, + { 0x01, "stack mismatch" }, + { 0x05, "misaligned PC" }, + { 0x08, "misaligned GPR" }, + { 0x09, "invalid opcode" }, + { 0x0d, "out of bounds GPR" }, + { 0x0e, "out of bounds l/s/a[]" }, + { 0x0f, "unaligned memory access" }, + { 0x11, "invalid param" }, + {} +}; + +static const struct nouveau_enum nve0_mp_global_error[] = { + { 2, "multiple warp errors" }, + { 3, "out of stack space" }, + {} +}; + +static const struct nouveau_enum nve0_gpc_rop_error[] = { + { 1, "RT pitch overrun" }, + { 4, "RT width overrun" }, + { 5, "RT height overrun" }, + { 7, "ZETA storage type mismatch" }, + { 8, "RT storage type mismatch" }, + { 10, "RT linear mismatch" }, + {} +}; + +static const struct nouveau_enum nve0_sked_error[] = { + { 7, "constant buffer size" }, + { 9, "local memory size pos" }, + { 10, "local memory size neg" }, + { 11, "warp cstack size" }, + { 12, "total temp size" }, + { 13, "register count" }, + { 18, "total threads" }, + { 20, "program offset" }, + { 21, "shared memory size" }, + { 25, "shared/l1 config" }, + { 26, "total register count" }, + {} +}; + +static void +nve0_graph_mp_trap(struct nvc0_graph_priv *priv, int gpc, int tp) +{ + int i; + u32 bpct = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x010)); + u32 bpst = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x00c)); + u32 werr = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x648)); + u32 gerr = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x650)); + + nv_error(priv, "GPC%i/TP%i/MP trap:", gpc, tp); + + for (i = 0; i <= 31; ++i) { + if (!(gerr & (1 << i))) + continue; + pr_cont(" <"); + nouveau_enum_print(nve0_mp_global_error, i); + pr_cont(">"); + } + if (werr) { + pr_cont(" <"); + nouveau_enum_print(nve0_mp_warp_error, werr & 0xffff); + pr_cont(">"); + } + pr_cont("\n"); + + nv_error(priv, "breakpoint control,status = %08x %08x\n", bpct, bpst); + + /* disable MP trap to avoid spam */ + nv_mask(priv, TPC_UNIT(gpc, tp, 0x50c), 0x2, 0x0); + + /* TODO: figure out how to resume after an MP trap */ +} + +static void +nve0_graph_tp_trap(struct nvc0_graph_priv *priv, int gpc, int tp) +{ + u32 stat = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x508)); + + if (stat & 0x1) { + u32 trap = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x224)); + nv_error(priv, "GPC%i/TP%i/TEX trap: %08x\n", + gpc, tp, trap); + + nv_wr32(priv, TPC_UNIT(gpc, tp, 0x224), 0xc0000000); + stat &= ~0x1; + } + + if (stat & 0x2) { + nve0_graph_mp_trap(priv, gpc, tp); + stat &= ~0x2; + } + + if (stat & 0x4) { + u32 trap = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x084)); + nv_error(priv, "GPC%i/TP%i/POLY trap: %08x\n", + gpc, tp, trap); + + nv_wr32(priv, TPC_UNIT(gpc, tp, 0x084), 0xc0000000); + stat &= ~0x4; + } + + if (stat & 0x8) { + u32 trap = nv_rd32(priv, TPC_UNIT(gpc, tp, 0x48c)); + nv_error(priv, "GPC%i/TP%i/L1C trap: %08x\n", + gpc, tp, trap); + + nv_wr32(priv, TPC_UNIT(gpc, tp, 0x48c), 0xc0000000); + stat &= ~0x8; + } + + if (stat) { + nv_error(priv, "GPC%i/TP%i: unknown stat %08x\n", + gpc, tp, stat); + } +} + +static void +nve0_graph_gpc_trap(struct nvc0_graph_priv *priv) +{ + const u32 mask = nv_rd32(priv, 0x400118); + int gpc; + + for (gpc = 0; gpc < 4; ++gpc) { + u32 stat; + int tp; + + if (!(mask & (1 << gpc))) + continue; + stat = nv_rd32(priv, GPC_UNIT(gpc, 0x2c90)); + + if (stat & 0x0001) { + u32 trap[4]; + int i; + + trap[0] = nv_rd32(priv, GPC_UNIT(gpc, 0x0420)); + trap[1] = nv_rd32(priv, GPC_UNIT(gpc, 0x0434)); + trap[2] = nv_rd32(priv, GPC_UNIT(gpc, 0x0438)); + trap[3] = nv_rd32(priv, GPC_UNIT(gpc, 0x043c)); + + nv_error(priv, "GPC%i/PROP trap:", gpc); + for (i = 0; i <= 29; ++i) { + if (!(trap[0] & (1 << i))) + continue; + pr_cont(" <"); + nouveau_enum_print(nve0_gpc_rop_error, i); + pr_cont(">"); + } + pr_cont("\n"); + + nv_error(priv, "x = %u, y = %u, " + "format = %x, storage type = %x\n", + trap[1] & 0xffff, + trap[1] >> 16, + (trap[2] >> 8) & 0x3f, + trap[3] & 0xff); + + nv_wr32(priv, GPC_UNIT(gpc, 0x0420), 0xc0000000); + stat &= ~0x0001; + } + + if (stat & 0x0002) { + u32 trap = nv_rd32(priv, GPC_UNIT(gpc, 0x0900)); + nv_error(priv, "GPC%i/ZCULL trap: %08x\n", gpc, + trap); + nv_wr32(priv, GPC_UNIT(gpc, 0x0900), 0xc0000000); + stat &= ~0x0002; + } + + if (stat & 0x0004) { + u32 trap = nv_rd32(priv, GPC_UNIT(gpc, 0x1028)); + nv_error(priv, "GPC%i/CCACHE trap: %08x\n", gpc, + trap); + nv_wr32(priv, GPC_UNIT(gpc, 0x1028), 0xc0000000); + stat &= ~0x0004; + } + + if (stat & 0x0008) { + u32 trap = nv_rd32(priv, GPC_UNIT(gpc, 0x0824)); + nv_error(priv, "GPC%i/ESETUP trap %08x\n", gpc, + trap); + nv_wr32(priv, GPC_UNIT(gpc, 0x0824), 0xc0000000); + stat &= ~0x0008; + } + + for (tp = 0; tp < 8; ++tp) { + if (stat & (1 << (16 + tp))) + nve0_graph_tp_trap(priv, gpc, tp); + } + stat &= ~0xff0000; + + if (stat) { + nv_error(priv, "GPC%i: unknown stat %08x\n", + gpc, stat); + } + } +} + + static void nve0_graph_trap_isr(struct nvc0_graph_priv *priv, int chid, u64 inst, struct nouveau_object *engctx) { u32 trap = nv_rd32(priv, 0x400108); + int i; int rop; if (trap & 0x00000001) { @@ -102,6 +305,33 @@ nve0_graph_trap_isr(struct nvc0_graph_priv *priv, int chid, u64 inst, trap &= ~0x00000010; } + if (trap & 0x00000100) { + u32 stat = nv_rd32(priv, 0x407020); + nv_error(priv, "SKED ch %d [0x%010llx %s]:", + chid, inst, nouveau_client_name(engctx)); + + for (i = 0; i <= 29; ++i) { + if (!(stat & (1 << i))) + continue; + pr_cont(" <"); + nouveau_enum_print(nve0_sked_error, i); + pr_cont(">"); + } + pr_cont("\n"); + + if (stat & 0x3fffffff) + nv_wr32(priv, 0x407020, 0x40000000); + nv_wr32(priv, 0x400108, 0x00000100); + trap &= ~0x00000100; + } + + if (trap & 0x01000000) { + nv_error(priv, "GPC ch %d [0x%010llx %s]:\n", + chid, inst, nouveau_client_name(engctx)); + nve0_graph_gpc_trap(priv); + trap &= ~0x01000000; + } + if (trap & 0x02000000) { for (rop = 0; rop < priv->rop_nr; rop++) { u32 statz = nv_rd32(priv, ROP_UNIT(rop, 0x070)); -- 1.7.3.4
Maybe Matching Threads
- [PATCH] drm/nouveau: fix ltcg memory initialization after suspend
- [PATCH] drm/nouveau: fix ltcg memory corruptions
- [PATCH] drm/nvc0-/ltcg: fix ltcg memory initialization after suspend
- [PATCH] drm/nouveau: fix ltcg memory initialization after suspend
- [Bug 54437] New: linux-nouveau2.6 (3.6.0-rc4) : GTX580 : Xorg freezes when using accel