This might keep the number of used TEMPs down. --- src/gallium/drivers/nv50/nv50_program.c | 59 ++++++++++++++++++++----------- 1 files changed, 38 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 28a9f2a..249f069 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -132,6 +132,7 @@ struct nv50_pc { boolean allow32; boolean join_on; + boolean preload; }; static inline void @@ -1242,6 +1243,23 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) e->inst[1] |= q; } +static void +load_interpolant(struct nv50_pc *pc, struct nv50_reg *r) +{ + struct nv50_reg *iv = pc->iv_p; + int rhw = r->rhw; + + if (pc->interp_mode[r->index] & INTERP_CENTROID) + iv = pc->iv_c; + + r->rhw = -1; + alloc_reg(pc, r); + r->rhw = rhw; + + if (pc->preload) + emit_interp(pc, r, iv, pc->interp_mode[r->index]); +} + static boolean negate_supported(const struct tgsi_full_instruction *insn, int i) { @@ -1297,6 +1315,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, switch (src->SrcRegister.File) { case TGSI_FILE_INPUT: r = &pc->attr[src->SrcRegister.Index * 4 + c]; + if (r->hw == -1 && r->rhw >= 0) + load_interpolant(pc, r); break; case TGSI_FILE_TEMPORARY: r = &pc->temp[src->SrcRegister.Index * 4 + c]; @@ -1416,6 +1436,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) if (*pp_rtmp && (*pp_rtmp)->type != P_TEMP && (nr_dst > 1 || sat)) pp_rtmp = &temp; + pc->preload = (inst->Instruction.Opcode != TGSI_OPCODE_TXP); + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fs &inst->FullSrcRegisters[i]; @@ -1860,18 +1882,15 @@ prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok, } static unsigned -load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid, +prep_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *p_mid, int *aid, int *p_oid) { - struct nv50_reg *iv; - int oid, c, n; + int c, n, oid = *p_oid, mid = *p_mid; unsigned mask = 0; - iv = (pc->interp_mode[i] & INTERP_CENTROID) ? pc->iv_c : pc->iv_p; - for (c = 0, n = i * 4; c < 4; c++, n++) { - oid = (*p_oid)++; pc->attr[n].type = P_TEMP; + pc->attr[n].hw = -1; pc->attr[n].index = i; if (pc->attr[n].acc == acc[n]) @@ -1879,17 +1898,15 @@ load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid, mask |= (1 << c); pc->attr[n].acc = acc[n]; - pc->attr[n].rhw = pc->attr[n].hw = -1; - alloc_reg(pc, &pc->attr[n]); - pc->attr[n].rhw = (*aid)++; - emit_interp(pc, &pc->attr[n], iv, pc->interp_mode[i]); - pc->p->cfg.fp.map[(*mid) / 4] |= oid << (8 * ((*mid) % 4)); - (*mid)++; + pc->p->cfg.fp.map[mid / 4] |= (oid + c) << (8 * (mid % 4)); + mid++; pc->p->cfg.fp.regs[1] += 0x00010001; } + *p_mid = mid; + *p_oid = oid + 4; return mask; } @@ -2063,7 +2080,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (fcrd != 0xffff) { unsigned mask; oid = mid = 0; - mask = load_fp_attrib(pc, fcrd, r_usage[1], + mask = prep_fp_attrib(pc, fcrd, r_usage[1], &mid, &aid, &oid); pc->p->cfg.fp.regs[1] |= (mask << 24); pc->p->cfg.fp.map[0] += 0x04040404 * fcrd; @@ -2103,10 +2120,10 @@ nv50_program_tx_prep(struct nv50_pc *pc) i = mid; if (fcol[0] != 0xffff) - load_fp_attrib(pc, fcol[0], r_usage[1], + prep_fp_attrib(pc, fcol[0], r_usage[1], &mid, &aid, &oid); if (fcol[1] != 0xffff) - load_fp_attrib(pc, fcol[1], r_usage[1], + prep_fp_attrib(pc, fcol[1], r_usage[1], &mid, &aid, &oid); /* set count of mapped color components */ @@ -2115,14 +2132,9 @@ nv50_program_tx_prep(struct nv50_pc *pc) /* reset oid and load remaining attrs */ oid = (fcrd == 0xffff) ? 4 : 0; for (i = 0; i < pc->attr_nr; i++) - load_fp_attrib(pc, i, r_usage[1], + prep_fp_attrib(pc, i, r_usage[1], &mid, &aid, &oid); - if (pc->iv_p) - free_temp(pc, pc->iv_p); - if (pc->iv_c) - free_temp(pc, pc->iv_c); - pc->p->cfg.fp.high_map = mid; } else { /* vertex program */ @@ -2228,6 +2240,11 @@ free_nv50_pc(struct nv50_pc *pc) if (pc->temp) FREE(pc->temp); + if (pc->iv_p) + free_temp(pc, pc->iv_p); + if (pc->iv_c) + free_temp(pc, pc->iv_c); + FREE(pc); } -- 1.6.0.6 --------------090503050107050804030002 Content-Type: text/plain; name="0016-nv50-update-comments.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="0016-nv50-update-comments.patch"