--- src/gallium/drivers/nv50/nv50_context.h | 1 + src/gallium/drivers/nv50/nv50_program.c | 171 ++++++++++++++++++------ src/gallium/drivers/nv50/nv50_program.h | 1 + src/gallium/drivers/nv50/nv50_state_validate.c | 3 + 4 files changed, 138 insertions(+), 38 deletions(-) diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 44463d6..c31c42a 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -190,6 +190,7 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers, /* nv50_program.c */ extern void nv50_vertprog_validate(struct nv50_context *nv50); extern void nv50_fragprog_validate(struct nv50_context *nv50); +extern void nv50_linkage_validate(struct nv50_context *nv50); extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); /* nv50_state_validate.c */ diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 7a4bc18..30a1d32 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1779,7 +1779,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) struct tgsi_parse_context p; boolean ret = FALSE; unsigned i, c; - unsigned fcol, bcol, fcrd, depr; + unsigned fcol[2], bcol[2], fcrd, depr; /* count (centroid) perspective interpolations */ unsigned centroid_loads = 0; @@ -1791,7 +1791,9 @@ nv50_program_tx_prep(struct nv50_pc *pc) r_usage[0] = CALLOC(pc->temp_nr * 4, sizeof(unsigned)); r_usage[1] = CALLOC(pc->attr_nr * 4, sizeof(unsigned)); - depr = fcol = bcol = fcrd = 0xffff; + fcol[0] = fcol[1] = 0xffff; + bcol[0] = bcol[1] = 0xffff; + depr = fcrd = 0xffff; tgsi_parse_init(&p, pc->p->pipe.tokens); while (!tgsi_parse_end_of_tokens(&p)) { @@ -1826,12 +1828,21 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (!d->Declaration.Semantic) break; + c = d->Semantic.SemanticIndex; switch (d->Semantic.SemanticName) { case TGSI_SEMANTIC_POSITION: depr = first; pc->p->cfg.fp.regs[2] |= 0x00000100; pc->p->cfg.fp.regs[3] |= 0x00000011; break; + case TGSI_SEMANTIC_COLOR: + if (pc->p->type == PIPE_SHADER_VERTEX) + fcol[c] = first; + break; + case TGSI_SEMANTIC_BCOLOR: + if (pc->p->type == PIPE_SHADER_VERTEX) + bcol[c] = first; + break; default: break; } @@ -1854,17 +1865,14 @@ nv50_program_tx_prep(struct nv50_pc *pc) break; } + c = d->Semantic.SemanticIndex; if (d->Declaration.Semantic) { switch (d->Semantic.SemanticName) { case TGSI_SEMANTIC_POSITION: fcrd = first; break; case TGSI_SEMANTIC_COLOR: - fcol = first; - mode = INTERP_PERSPECTIVE; - break; - case TGSI_SEMANTIC_BCOLOR: - bcol = first; + fcol[c] = first; mode = INTERP_PERSPECTIVE; break; } @@ -1931,10 +1939,9 @@ nv50_program_tx_prep(struct nv50_pc *pc) /* position should be loaded first */ if (fcrd != 0xffff) { unsigned mask; - mid = 0; + oid = mid = 0; mask = load_fp_attrib(pc, fcrd, r_usage[1], &mid, &aid, &oid); - oid = 0; pc->p->cfg.fp.regs[1] |= (mask << 24); pc->p->cfg.fp.map[0] += 0x04040404 * fcrd; } @@ -1966,16 +1973,24 @@ nv50_program_tx_prep(struct nv50_pc *pc) pc->p->cfg.fp.regs[1] |= 0x08000000; } - for (c = 0; c < 4; c++) { - /* I don't know what these values do, but - * let's set them like the blob does: - */ - if (fcol != 0xffff && r_usage[1][fcol * 4 + c]) - pc->p->cfg.fp.regs[0] += 0x00010000; - if (bcol != 0xffff && r_usage[1][bcol * 4 + c]) - pc->p->cfg.fp.regs[0] += 0x00010000; - } + /* load colors directly after position - XXX: might + * not be necessary if we always get colors first + */ + oid += fcol[0] * 4; + i = mid; + + if (fcol[0] != 0xffff) + load_fp_attrib(pc, fcol[0], r_usage[1], + &mid, &aid, &oid); + if (fcol[1] != 0xffff) + load_fp_attrib(pc, fcol[1], r_usage[1], + &mid, &aid, &oid); + + /* set count of mapped color components */ + pc->p->cfg.fp.regs[0] |= (mid - i) << 16; + /* reset oid and load remaining attrs */ + oid = (fcrd == 0xffff) ? 4 : 0; for (i = 0; i < pc->attr_nr; i++) load_fp_attrib(pc, i, r_usage[1], &mid, &aid, &oid); @@ -1985,8 +2000,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->iv_c) free_temp(pc, pc->iv_c); - pc->p->cfg.fp.high_map = (mid / 4); - pc->p->cfg.fp.high_map += ((mid % 4) ? 1 : 0); + pc->p->cfg.fp.high_map = mid; } else { /* vertex program */ for (i = 0; i < pc->attr_nr; i++) { @@ -2011,6 +2025,10 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (pc->p->type == PIPE_SHADER_VERTEX) { for (i = 0; i < nr; i++) ctor_reg(&pc->result[i], P_RESULT, i / 4, i); + + /* output id offset bcol from fcol */ + if (bcol[0] != 0xffff) + pc->p->cfg.vp.bcol = bcol[0] - fcol[0]; } else { /* pc->p->type == PIPE_SHADER_FRAGMENT */ for (i = 0; i < pc->result_nr; i++) { @@ -2101,7 +2119,7 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) p->cfg.fp.regs[1] = 0x00000400; p->cfg.fp.map[0] = 0x03020100; - p->cfg.fp.high_map = 1; + p->cfg.fp.high_map = 4; break; default: assert(!"unsupported GPU program type"); @@ -2389,15 +2407,12 @@ nv50_vertprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(10, 0); + so = so_new(32, 0); so_method(so, tesla, 0x1650, 2); so_data (so, p->cfg.vp.attr[0]); so_data (so, p->cfg.vp.attr[1]); so_method(so, tesla, 0x16b8, 1); so_data (so, p->cfg.high_result); - so_method(so, tesla, 0x16ac, 2); - so_data (so, p->cfg.high_result); //8); - so_data (so, p->cfg.high_temp); so_method(so, tesla, 0x140c, 1); so_data (so, p->code->start); so_ref(so, &nv50->state.vertprog); @@ -2410,7 +2425,6 @@ nv50_fragprog_validate(struct nv50_context *nv50) struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program *p = nv50->fragprog; struct nouveau_stateobj *so; - unsigned i; if (!p->translated) { nv50_program_validate(nv50, p); @@ -2421,18 +2435,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(32, 0); - so_method(so, tesla, 0x1904, 4); - so_data (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */ - so_data (so, 0x00000004); - so_data (so, 0x00000000); - so_data (so, 0x00000000); - so_method(so, tesla, 0x16bc, p->cfg.fp.high_map); - for (i = 0; i < p->cfg.fp.high_map; i++) - so_data(so, p->cfg.fp.map[i]); - so_method(so, tesla, 0x1988, 2); - so_data (so, p->cfg.fp.regs[1]); /* 0x08040404 / 0x0f000401 */ - so_data (so, p->cfg.high_temp); + so = so_new(8, 0); so_method(so, tesla, 0x1298, 1); so_data (so, p->cfg.high_result); so_method(so, tesla, 0x19a8, 1); @@ -2445,6 +2448,98 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_ref(NULL, &so); } +/* + * 1510 = bitmask to enable clipping planes + * 1688 = two-sided lighting enable + * 16ac = entry count of mapping table at [16bc] + * 16b0 = count of temporaries used in VP + * + * 1904 = 0x01CCBBFF (01 is sometimes 00 - ?) + * CC = number of color components in map (primary + secondary) + * BB = first back color's map index (colors should be contiguous) + * FF = first front color's map index + * + * 1908 = 0x0000HHLL + * LL = first clipping distance map index (4 if no UCPs) + * HH = last clipping distance map index + 1 (0 if no UCPs) + * + * 1910 = 0x00000SSe + * e = enable point size output (0 / 1) + * SS = point size map index (0 if disabled) + * + * 1988 = 0xMMIInnii + * MM = bitmask to un-mask masked VP/GP outputs (i.e. HPOS, generic ?) + * nn = map index of first non-masked output, where to put front color + * II = count of non-masked interpolants + * ii = almost always equal to II (except if II -> 00, why ?) + */ +void +nv50_linkage_validate(struct nv50_context *nv50) +{ + /* this is going to be rather complicated at first, but it works + * like this; maybe we can simplify later, though + */ + struct nouveau_stateobj *so = nv50->state.vertprog; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *vp = nv50->vertprog; + struct nv50_program *fp = nv50->fragprog; + + uint32_t regs[5]; + uint32_t map[8], i, n, k, m = 4; + + memset(map, 0, 8 * sizeof(uint32_t)); + map[0] = fp->cfg.fp.map[0]; + + regs[1] /* 1908 */ = 0x00000004; + regs[2] /* 190c */ = 0x00000000; + regs[3] /* 1910 */ = 0x00000000; + regs[0] /* 1904 */ = fp->cfg.fp.regs[0]; + regs[4] /* 1988 */ = fp->cfg.fp.regs[1]; + + so_method(so, tesla, 0x1688, 1); + + if (nv50->rasterizer->pipe.light_twoside) { + so_data(so, 1); + n = (regs[0] >> 16) & 0xff; + + /* copy front color mappings and add output offset to BFC0 */ + for (i = 4; i < 4 + n; i++, m++) { + k = fp->cfg.fp.map[i / 4] >> (8 * (i % 4)); + k &= 0xff; + map[m / 4] |= (k + vp->cfg.vp.bcol) << (8 * (m % 4)); + } + + regs[0] += n; + regs[2] += (n << 8); + } else + so_data(so, 0); + + for (i = 4; i < fp->cfg.fp.high_map; i++, m++) { + k = fp->cfg.fp.map[i / 4] >> (8 * (i % 4)); + k &= 0xff; + map[m / 4] |= k << (8 * (m % 4)); + } + + so_method(so, tesla, 0x16ac, 2); + so_data (so, m); + so_data (so, vp->cfg.high_temp); + + so_method(so, tesla, 0x1904, 4); + so_data (so, regs[0]); + so_data (so, regs[1]); + so_data (so, regs[2]); + so_data (so, regs[3]); + + n = (m / 4) + ((m % 4) ? 1 : 0); + so_method(so, tesla, 0x16bc, n); + for (i = 0; i < n; i++) + so_data(so, map[i]); + + so_method(so, tesla, 0x1988, 2); + so_data (so, regs[4]); + so_data (so, fp->cfg.high_temp); +} + void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) { diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index ed3f67b..b7921ad 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -39,6 +39,7 @@ struct nv50_program { unsigned high_result; struct { unsigned attr[2]; + unsigned bcol; } vp; struct { unsigned regs[4]; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 0caf4b4..aa02947 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -199,6 +199,9 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) nv50_fragprog_validate(nv50); + if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG)) + nv50_linkage_validate(nv50); + if (nv50->dirty & NV50_NEW_RASTERIZER) so_ref(nv50->rasterizer->so, &nv50->state.rast); -- 1.6.0.6 --------------090503050107050804030002 Content-Type: text/plain; name="0008-nv50-introduce-linkage-stateobj.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="0008-nv50-introduce-linkage-stateobj.patch"