Christoph Bumiller
2009-Jun-21 16:40 UTC
[PATCH] nv50: initial support for IF, ELSE, ENDIF insns
--- src/gallium/drivers/nv50/nv50_program.c | 162 +++++++++++++++++++++++++------ src/gallium/drivers/nv50/nv50_program.h | 1 + 2 files changed, 132 insertions(+), 31 deletions(-) diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 5594560..16bf2f1 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -90,6 +90,8 @@ struct nv50_reg { int acc; /* instruction where this reg is last read (first insn == 1) */ }; +#define MAX_IF_LEVEL 4 /* arbitrary value */ + struct nv50_pc { struct nv50_program *p; @@ -119,11 +121,17 @@ struct nv50_pc { struct nv50_reg r_hpos[4]; + struct nv50_program_exec *if_cond; + struct nv50_program_exec *if_insn[MAX_IF_LEVEL]; + struct nv50_program_exec *if_join[MAX_IF_LEVEL]; + unsigned if_lvl; + /* current instruction and total number of insns */ unsigned insn_cur; unsigned insn_nr; boolean allow32; + boolean join_on; }; static inline void @@ -208,22 +216,6 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) return NULL; } -/* Assign the hw of the discarded temporary register src - * to the tgsi register dst and free src. - */ -static void -assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) -{ - assert(src->index == -1 && src->hw != -1); - - if (dst->hw != -1) - pc->r_temp[dst->hw] = NULL; - pc->r_temp[src->hw] = dst; - dst->hw = src->hw; - - FREE(src); -} - /* release the hardware resource held by r */ static void release_hw(struct nv50_pc *pc, struct nv50_reg *r) @@ -351,6 +343,11 @@ emit(struct nv50_pc *pc, struct nv50_program_exec *e) p->exec_head = e; p->exec_tail = e; p->exec_size += (e->inst[0] & 1) ? 2 : 1; + + if (pc->join_on) { + e->inst[1] |= 0x00000002; + pc->join_on = FALSE; + } } static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); @@ -524,6 +521,28 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f) FREE(imm); } +/* Assign the hw of the discarded temporary register src + * to the tgsi register dst and free src. + */ +static void +assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + assert(src->index == -1 && src->hw != -1); + + if (pc->if_lvl > 0) { + emit_mov(pc, dst, src); + free_temp(pc, src); + return; + } + + if (dst->hw != -1) + pc->r_temp[dst->hw] = NULL; + pc->r_temp[src->hw] = dst; + dst->hw = src->hw; + + FREE(src); +} + static boolean check_swap_src_0_1(struct nv50_pc *pc, struct nv50_reg **s0, struct nv50_reg **s1) @@ -866,6 +885,8 @@ emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, set_src_0(pc, dst, e); emit(pc, e); + pc->if_cond = e; + if (dst != rdst) free_temp(pc, dst); } @@ -1098,6 +1119,39 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, } static void +emit_branch(struct nv50_pc *pc, int pred, unsigned cc, void *join) +{ + struct nv50_program_exec *e = exec(pc); + + if (join) { + set_long(pc, e); + e->inst[0] |= 0xa0000002; + emit(pc, e); + *(struct nv50_program_exec **)join = e; + e = exec(pc); + } + + set_long(pc, e); + e->inst[0] |= 0x10000002; + if (pred >= 0) + set_pred(pc, cc, pred, e); + emit(pc, e); +} + +static void +emit_nop(struct nv50_pc *pc, boolean full) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xf0000000; + if (full) { + set_long(pc, e); + e->inst[1] = 0xe0000000; + } + emit(pc, e); +} + +static void convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) { unsigned q = 0, m = ~0; @@ -1420,6 +1474,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) FREE(one); } break; + case TGSI_OPCODE_ELSE: + emit_branch(pc, -1, 0, NULL); + pc->if_insn[--pc->if_lvl]->bra = (1 << 31) | pc->p->exec_size; + pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; + break; + case TGSI_OPCODE_ENDIF: + i = pc->p->exec_size | (1 << 31); + pc->if_insn[--pc->if_lvl]->bra = i; + if (pc->if_join[pc->if_lvl]) { + pc->if_join[pc->if_lvl]->bra = i; + pc->if_join[pc->if_lvl] = NULL; + pc->join_on = TRUE; + } + if (pc->insn_cur == (pc->insn_nr - 1)) + emit_nop(pc, TRUE); + break; case TGSI_OPCODE_EX2: temp = temp_temp(pc); rtmp = *pp_rtmp; @@ -1442,6 +1512,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) emit_sub(pc, dst[c], src[0][c], temp); } break; + case TGSI_OPCODE_IF: + assert(pc->if_lvl < MAX_IF_LEVEL); + set_pred_wr(pc, 1, 0, pc->if_cond); + emit_branch(pc, 0, 2, &pc->if_join[pc->if_lvl]); + pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; + break; case TGSI_OPCODE_KIL: emit_kil(pc, src[0][0]); emit_kil(pc, src[0][1]); @@ -2181,8 +2257,8 @@ nv50vp_ucp_append(struct nv50_pc *pc) static void nv50_program_tx_postprocess(struct nv50_pc *pc) { - struct nv50_program_exec *e, *e_prev = NULL; - unsigned pos; + struct nv50_program_exec *e, **e_list, *e_prev = NULL; + unsigned i, n, pos; if (pc->p->type == PIPE_SHADER_FRAGMENT) nv50fp_move_outputs(pc); @@ -2190,15 +2266,31 @@ static void nv50_program_tx_postprocess(struct nv50_pc *pc) if (pc->p->type == PIPE_SHADER_VERTEX) nv50vp_ucp_append(pc); + /* collect branching instructions, we need to adjust their target + * offsets when converting half insns + */ + e_list = MALLOC(pc->p->exec_size * sizeof(struct nv50_program_exec *)); + + for (n = 0, e = pc->p->exec_head; e; e = e->next) { + if (e->bra) { + e_list[n++] = e; + e->bra &= ~(1 << 31); + } + } + for (e = pc->p->exec_head, pos = 0; e; e = e->next) { pos += is_long(e) ? 2 : 1; if ((!e->next || is_long(e->next)) && (pos & 1)) { + for (i = 0; i < n; i++) + if (e_list[i]->bra > (pos - 1)) + e_list[i]->bra++; convert_to_long(pc, e); pos++; } e_prev = e->next ? e : e_prev; } + FREE(e_list); /* last instruction must be long */ if (!is_long(pc->p->exec_tail)) { @@ -2234,7 +2326,8 @@ nv50_program_tx(struct nv50_program *p) /* don't allow half insn/immd on first and last instruction */ pc->allow32 = TRUE; - if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr) + if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr || + pc->join_on) pc->allow32 = FALSE; tgsi_parse_token(&parse); @@ -2383,11 +2476,29 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) heap = nv50->screen->code_heap[p->type]; code = nv50->screen->sprogbuf_code[p->type]; + size = align(p->exec_size * 4, 0x100); + + if (!p->code) { + ret = nouveau_resource_alloc(heap, size, p, &p->code); + if (ret) + assert(!"No more space in program VRAM buffer."); + } + if ((p->data[0] && p->data[0]->start != p->data_start[0]) || - (p->data[1] && p->data[1]->start != p->data_start[1])) { + (p->data[1] && p->data[1]->start != p->data_start[1])) + upload = TRUE; + + if (upload) { for (e = p->exec_head; e; e = e->next) { unsigned ei, ci, bs; + if (e->bra) { + assert(!(e->bra & 1)); + bs = (e->bra >> 1) + (p->code->start >> 3); + e->inst[0] &= 0xF0000FFF; + e->inst[0] |= (bs << 12); + } + if (e->param.index < 0) continue; bs = (e->inst[1] >> 22) & 0x07; @@ -2403,8 +2514,6 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) p->data_start[0] = p->data[0]->start; if (p->data[1]) p->data_start[1] = p->data[1]->start; - - upload = TRUE; } if (!upload) @@ -2434,15 +2543,6 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) nouveau_bo_unmap(p->bo); - size = align(p->exec_size * 4, 0x100); - if (!p->code) { - ret = nouveau_resource_alloc(heap, size, p, &p->code); - if (ret) { - NOUVEAU_ERR("Program VRAM buffer is full.\n"); - abort(); - } - } - nv50_transfer_gart_vram(&nv50->screen->base.base, code, p->code->start, p->bo, 0, size); } diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 1206aab..ac5230d 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -13,6 +13,7 @@ struct nv50_program_exec { unsigned mask; unsigned shift; } param; + unsigned bra; }; struct nv50_linkage { -- 1.6.0.6 --------------090503050107050804030002 Content-Type: text/plain; name="0013-nv50-support-for-SLE-SNE-SEQ-SGT.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="0013-nv50-support-for-SLE-SNE-SEQ-SGT.patch"