An attempt to improve performance, since assembling the VP output to FP input map became a mess. This probably makes it even worse: It creates VP, FP stateobjs only once and introduces a third shader related stateobj, called linkage, which is stored in a list object that is obtained or created on validation. For each configuration (VP, FP, BFC, PTSZ) there is an extra object. --- src/gallium/drivers/nv50/nv50_context.h | 1 + src/gallium/drivers/nv50/nv50_program.c | 143 +++++++++++++++++++----- src/gallium/drivers/nv50/nv50_program.h | 10 ++ src/gallium/drivers/nv50/nv50_state_validate.c | 2 + 4 files changed, 129 insertions(+), 27 deletions(-) diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index c31c42a..aadcfda 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -117,6 +117,7 @@ struct nv50_state { unsigned miptree_nr; struct nouveau_stateobj *vertprog; struct nouveau_stateobj *fragprog; + struct nouveau_stateobj *plinkage; struct nouveau_stateobj *vtxfmt; struct nouveau_stateobj *vtxbuf; }; diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 30a1d32..5fae325 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -2407,16 +2407,21 @@ nv50_vertprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(32, 0); - so_method(so, tesla, 0x1650, 2); - so_data (so, p->cfg.vp.attr[0]); - so_data (so, p->cfg.vp.attr[1]); - so_method(so, tesla, 0x16b8, 1); - so_data (so, p->cfg.high_result); - so_method(so, tesla, 0x140c, 1); - so_data (so, p->code->start); - so_ref(so, &nv50->state.vertprog); - so_ref(NULL, &so); + if (!p->so) { + so = so_new(7, 0); + so_method(so, tesla, 0x1650, 2); + so_data (so, p->cfg.vp.attr[0]); + so_data (so, p->cfg.vp.attr[1]); + so_method(so, tesla, 0x16b8, 1); + so_data (so, p->cfg.high_result); + so_method(so, tesla, 0x140c, 1); + so_data (so, p->code->start); + so_ref(so, &p->so); + so_ref(NULL, &so); + + } + + so_ref(p->so, &nv50->state.vertprog); } void @@ -2435,17 +2440,64 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(8, 0); - so_method(so, tesla, 0x1298, 1); - so_data (so, p->cfg.high_result); - so_method(so, tesla, 0x19a8, 1); - so_data (so, p->cfg.fp.regs[2]); - so_method(so, tesla, 0x196c, 1); - so_data (so, p->cfg.fp.regs[3]); - so_method(so, tesla, 0x1414, 1); - so_data (so, p->code->start); - so_ref(so, &nv50->state.fragprog); - so_ref(NULL, &so); + if (!p->so) { + so = so_new(8, 0); + so_method(so, tesla, 0x1298, 1); + so_data (so, p->cfg.high_result); + so_method(so, tesla, 0x19a8, 1); + so_data (so, p->cfg.fp.regs[2]); + so_method(so, tesla, 0x196c, 1); + so_data (so, p->cfg.fp.regs[3]); + so_method(so, tesla, 0x1414, 1); + so_data (so, p->code->start); + so_ref(so, &p->so); + so_ref(NULL, &so); + } + + so_ref(p->so, &nv50->state.fragprog); +} + +static struct nv50_linkage * +program_add_linkage(struct nv50_program *vp, struct nv50_program *fp) +{ + struct nv50_linkage *ln = CALLOC_STRUCT(nv50_linkage); + struct nv50_program *pg[2] = { vp, fp }; + unsigned i; + + for (i = 0; i < 2; i++) { + if (pg[i]->ln) { + ln->next[i] = pg[i]->ln->next[i]; + pg[i]->ln->next[i] = ln; + } else { + pg[i]->ln = ln; + ln->next[i] = ln; + } + ln->prog[i] = (void *)pg[i]; + } + + return ln; +} + +static void +program_del_linkage(struct nv50_linkage *ln) +{ + struct nv50_linkage *it; + struct nv50_program *pg[2]; + unsigned i; + + pg[0] = (struct nv50_program *)ln->prog[0]; + pg[1] = (struct nv50_program *)ln->prog[1]; + + for (i = 0; i < 2; i++) { + for (it = pg[i]->ln; it->next[i] != ln; it = it->next[i]); + it->next[i] = ln->next[i]; + if (pg[i]->ln == ln) + pg[i]->ln = (ln->next[i] == ln) ? NULL : ln->next[i]; + } + + if (ln->so) + so_ref(NULL, &ln->so); + FREE(ln); } /* @@ -2473,16 +2525,14 @@ nv50_fragprog_validate(struct nv50_context *nv50) * II = count of non-masked interpolants * ii = almost always equal to II (except if II -> 00, why ?) */ -void -nv50_linkage_validate(struct nv50_context *nv50) +static struct nv50_linkage * +nv50_linkage_create(struct nv50_context *nv50) { - /* this is going to be rather complicated at first, but it works - * like this; maybe we can simplify later, though - */ - struct nouveau_stateobj *so = nv50->state.vertprog; + struct nv50_linkage *ln; struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program *vp = nv50->vertprog; struct nv50_program *fp = nv50->fragprog; + struct nouveau_stateobj *so = so_new(32, 0); uint32_t regs[5]; uint32_t map[8], i, n, k, m = 4; @@ -2538,6 +2588,42 @@ nv50_linkage_validate(struct nv50_context *nv50) so_method(so, tesla, 0x1988, 2); so_data (so, regs[4]); so_data (so, fp->cfg.high_temp); + + ln = program_add_linkage(vp, fp); + + so_ref(so, &ln->so); + so_ref(NULL, &so); + + return ln; +} + +void nv50_linkage_validate(struct nv50_context *nv50) +{ + struct nv50_linkage *it, *ln = NULL; + struct nv50_program *vp = nv50->vertprog; + struct nv50_program *fp = nv50->fragprog; + unsigned cfg; + + cfg = nv50->rasterizer->pipe.light_twoside; + cfg |= nv50->rasterizer->pipe.point_size_per_vertex << 1; + + if (vp->ln) { + it = vp->ln->next[0]; + do { + if (it->prog[1] == (void *)fp && it->cfg == cfg) { + ln = it; + break; + } + it = it->next[0]; + } while (it != vp->ln); + } + + if (!ln) { + ln = nv50_linkage_create(nv50); + ln->cfg = cfg; + } + + so_ref(ln->so, &nv50->state.plinkage); } void @@ -2558,6 +2644,9 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) nouveau_resource_free(&p->data[1]); nouveau_resource_free(&p->code); + while (p->ln) + program_del_linkage(p->ln); + p->translated = 0; } diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index b7921ad..6478338 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -15,6 +15,13 @@ struct nv50_program_exec { } param; }; +struct nv50_linkage { + struct nv50_linkage *next[2]; + struct nouveau_stateobj *so; + void *prog[2]; + unsigned cfg; +}; + struct nv50_program { struct pipe_shader_state pipe; struct tgsi_shader_info info; @@ -34,6 +41,9 @@ struct nv50_program { unsigned immd_nr; unsigned param_nr; + struct nouveau_stateobj *so; + struct nv50_linkage *ln; + struct { unsigned high_temp; unsigned high_result; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index aa02947..cb9bb76 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -150,6 +150,8 @@ nv50_state_emit(struct nv50_context *nv50) so_emit(chan, nv50->state.vertprog); if (nv50->state.dirty & NV50_NEW_FRAGPROG) so_emit(chan, nv50->state.fragprog); + if (nv50->state.dirty & (NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG)) + so_emit(chan, nv50->state.plinkage); if (nv50->state.dirty & NV50_NEW_RASTERIZER) so_emit(chan, nv50->state.rast); if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) -- 1.6.0.6 --------------090503050107050804030002 Content-Type: text/plain; name="0009-nv50-support-for-user-clip-planes.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="0009-nv50-support-for-user-clip-planes.patch"