--- src/gallium/drivers/nv50/nv50_program.c | 118 +++++++++++++++++++++---------- 1 files changed, 80 insertions(+), 38 deletions(-) diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 16bf2f1..75c5cea 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -810,7 +810,11 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) #define CVTOP_TRUNC 0x07 #define CVTOP_SAT 0x08 #define CVTOP_ABS 0x10 +#define CVTOP_ABSRN 0x11 +/* 0x04 == 32 bit */ +/* 0x40 == dst is float */ +/* 0x80 == src is float */ #define CVT_F32_F32 0xc4 #define CVT_F32_S32 0x44 #define CVT_F32_U32 0x64 @@ -819,8 +823,8 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) #define CVT_F32_F32_ROP 0xcc static void -emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, - int wp, unsigned cop, unsigned fmt) +emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, int wp, + struct nv50_reg *src, unsigned cvn, unsigned fmt) { struct nv50_program_exec *e; @@ -829,7 +833,7 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, e->inst[0] |= 0xa0000000; e->inst[1] |= 0x00004000; - e->inst[1] |= (cop << 16); + e->inst[1] |= (cvn << 16); e->inst[1] |= (fmt << 24); set_src_0(pc, src, e); @@ -846,55 +850,94 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, emit(pc, e); } +static inline unsigned +map_opcode_ccode(unsigned op) +{ + switch (op) { + case TGSI_OPCODE_SLT: return TGSI_CC_LT; + case TGSI_OPCODE_SGE: return TGSI_CC_GE; + case TGSI_OPCODE_SEQ: return TGSI_CC_EQ; + case TGSI_OPCODE_SGT: return TGSI_CC_GT; + case TGSI_OPCODE_SLE: return TGSI_CC_LE; + case TGSI_OPCODE_SNE: return TGSI_CC_NE; + default: + assert(0); + return 0; + } +} + +static inline unsigned +map_ccode_nv50(unsigned cc) +{ + assert(cc < 16); + + switch (cc) { + case TGSI_CC_GT: return 0x4; + case TGSI_CC_EQ: return 0x2; + case TGSI_CC_LT: return 0x1; + case TGSI_CC_GE: return 0x6; + case TGSI_CC_LE: return 0x3; + case TGSI_CC_NE: return 0xd; + + case TGSI_CC_GT + 8: return 0x3; + case TGSI_CC_EQ + 8: return 0xd; + case TGSI_CC_LT + 8: return 0x6; + case TGSI_CC_GE + 8: return 0x1; + case TGSI_CC_LE + 8: return 0x4; + case TGSI_CC_NE + 8: return 0x2; + + default: + assert(!"invalid condition code"); + return 0x0; + } +} + static void -emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, +emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, int wp, struct nv50_reg *src0, struct nv50_reg *src1) { struct nv50_program_exec *e = exec(pc); - unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; struct nv50_reg *rdst; - assert(c_op <= 7); if (check_swap_src_0_1(pc, &src0, &src1)) - c_op = inv_cop[c_op]; + c_op += 8; rdst = dst; - if (dst->type != P_TEMP) - dst = alloc_temp(pc, NULL); + if (dst && dst->type != P_TEMP) + dst = temp_temp(pc); /* set.u32 */ set_long(pc, e); e->inst[0] |= 0xb0000000; - e->inst[1] |= (3 << 29); - e->inst[1] |= (c_op << 14); - /*XXX: breaks things, .u32 by default? - * decuda will disasm as .u16 and use .lo/.hi regs, but this - * doesn't seem to match what the hw actually does. - inst[1] |= 0x04000000; << breaks things.. .u32 by default? - */ - set_dst(pc, dst, e); + e->inst[1] |= 0x60000000; + /* XXX: decuda will disasm .u16 lo/hi, + * but 32 bit flag breaks things: */ + /* e->inst[1] |= 0x04000000; */ + e->inst[1] |= (map_ccode_nv50(c_op) << 14); + + if (wp >= 0) + set_pred_wr(pc, 1, wp, e); + if (dst) + set_dst(pc, dst, e); + else { + e->inst[0] |= 0x000001fc; + e->inst[1] |= 0x00000008; + } + set_src_0(pc, src0, e); set_src_1(pc, src1, e); - emit(pc, e); - /* cvt.f32.u32 */ - e = exec(pc); - e->inst[0] = 0xa0000001; - e->inst[1] = 0x64014780; - set_dst(pc, rdst, e); - set_src_0(pc, dst, e); emit(pc, e); - pc->if_cond = e; - if (dst != rdst) - free_temp(pc, dst); + if (rdst) + emit_cvt(pc, rdst, -1, dst, CVTOP_ABSRN, CVT_F32_S32); } static INLINE void emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32_ROP); + emit_cvt(pc, dst, -1, src, CVTOP_FLOOR, CVT_F32_F32_ROP); } static void @@ -914,7 +957,7 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, static INLINE void emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32); + emit_cvt(pc, dst, -1, src, CVTOP_ABS, CVT_F32_F32); } static void @@ -1611,13 +1654,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) if (mask & (1 << 3)) emit_mov_immdval(pc, dst[3], 1.0); break; - case TGSI_OPCODE_SGE: - for (c = 0; c < 4; c++) { - if (!(mask & (1 << c))) - continue; - emit_set(pc, 6, dst[c], src[0][c], src[1][c]); - } - break; case TGSI_OPCODE_SIN: temp = temp_temp(pc); rtmp = *pp_rtmp; @@ -1630,10 +1666,16 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SNE: + i = map_opcode_ccode(inst->Instruction.Opcode); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_set(pc, 1, dst[c], src[0][c], src[1][c]); + emit_set(pc, i, dst[c], -1, src[0][c], src[1][c]); } break; case TGSI_OPCODE_SUB: @@ -1690,7 +1732,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_cvt(pc, rdst[c], dst[c], -1, CVTOP_SAT, 0xc4); + emit_cvt(pc, rdst[c], -1, dst[c], CVTOP_SAT, 0xc4); } } -- 1.6.0.6 --------------090503050107050804030002 Content-Type: text/plain; name="0014-nv50-don-t-allocate-in-the-param-buffer.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="0014-nv50-don-t-allocate-in-the-param-buffer.patch"