thr3ads.net - search: "op

[PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

2014 Nov 18

2

[PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

...from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -2512,7 +2512,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) src0 = fetchSrc(0, c); val0 = getScratch(); mkOp1(OP_FLOOR, TYPE_F32, val0, src0); - mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0); + mkOp2(OP_SUB, TYPE_F32, val0, src0, val0); + mkOp1(OP_SAT, TYPE_F32, dst0[c], val0); } break; case TGSI_OPCODE_ROUND: -- 2.0.4

[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

2014 Nov 18

2

[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

...au/codegen/nv50_ir_from_tgsi.cpp >> @@ -2512,7 +2512,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) >> src0 = fetchSrc(0, c); >> val0 = getScratch(); >> mkOp1(OP_FLOOR, TYPE_F32, val0, src0); >> - mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0); >> + mkOp2(OP_SUB, TYPE_F32, val0, src0, val0); >> + mkOp1(OP_SAT, TYPE_F32, dst0[c], val0); >> } >> break; >> case TGSI_OPCODE_ROUND: >> > > I don't understand the math behind this. For...

[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)

2015 Feb 23

2

[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)

...struction *i) { const int neg0 = i->src(0).mod.neg(); @@ -997,6 +1022,25 @@ CodeEmitterNV50::emitFADD(const Instruction *i) } void +CodeEmitterNV50::emitDADD(const Instruction *i) +{ + const int neg0 = i->src(0).mod.neg(); + const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0); + + assert(!(i->src(0).mod | i->src(1).mod).abs()); + assert(!i->saturate); + assert(i->encSize == 8); + + code[1] = 0x60000000; + code[0] = 0xe0000000; + + emitForm_ADD(i); + + code[1] |= neg0 << 26; + code[1] |= neg1 << 27; +} + +void CodeEmitte...

[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

2014 Nov 18

1

[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

...t;> @@ -2512,7 +2512,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) >>>> src0 = fetchSrc(0, c); >>>> val0 = getScratch(); >>>> mkOp1(OP_FLOOR, TYPE_F32, val0, src0); >>>> - mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0); >>>> + mkOp2(OP_SUB, TYPE_F32, val0, src0, val0); >>>> + mkOp1(OP_SAT, TYPE_F32, dst0[c], val0); >>>> } >>>> break; >>>> case TGSI_OPCODE_ROUND: >>>> >&gt...

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

2

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

...gt; + // 5. Perform 2 Newton-Raphson steps > + if (i->op == OP_RCP) { > + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 > + Value *two = bld.getSSA(8); > + > + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f)); > + > + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8), > + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess), > + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input, > + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), guess, guess)));...

[PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax

2015 Feb 20

10

[PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax

...eEmitterNVC0::emitFADD(const Instruction *i) } void +CodeEmitterNVC0::emitDADD(const Instruction *i) +{ + assert(i->encSize == 8); + emitForm_A(i, HEX64(48000000, 00000001)); + roundMode_A(i); + assert(!i->saturate); + assert(!i->ftz); + emitNegAbs12(i); + if (i->op == OP_SUB) + code[0] ^= 1 << 8; +} + +void CodeEmitterNVC0::emitUADD(const Instruction *i) { uint32_t addOp = 0; @@ -895,6 +947,8 @@ CodeEmitterNVC0::emitMINMAX(const Instruction *i) else if (!isFloatType(i->dType)) op |= isSignedType(i->dType) ? 0x23 : 0x03; + if (i-&...

[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

2014 Nov 18

0

[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

...allium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > @@ -2512,7 +2512,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) > src0 = fetchSrc(0, c); > val0 = getScratch(); > mkOp1(OP_FLOOR, TYPE_F32, val0, src0); > - mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0); > + mkOp2(OP_SUB, TYPE_F32, val0, src0, val0); > + mkOp1(OP_SAT, TYPE_F32, dst0[c], val0); > } > break; > case TGSI_OPCODE_ROUND: > I don't understand the math behind this. For any such large number, as far a...

[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

2014 Nov 18

0

[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

...rom_tgsi.cpp >>> @@ -2512,7 +2512,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) >>> src0 = fetchSrc(0, c); >>> val0 = getScratch(); >>> mkOp1(OP_FLOOR, TYPE_F32, val0, src0); >>> - mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0); >>> + mkOp2(OP_SUB, TYPE_F32, val0, src0, val0); >>> + mkOp1(OP_SAT, TYPE_F32, dst0[c], val0); >>> } >>> break; >>> case TGSI_OPCODE_ROUND: >>> >> >> I don't unde...

[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

0

[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

...U64, bld.getSSA(8), dst[0], dst[1]); + + // 5. Perform 2 Newton-Raphson steps + if (i->op == OP_RCP) { + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 + Value *two = bld.getSSA(8); + + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f)); + + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8), + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess), + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input, + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), guess, guess))); + guess =...

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

0

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

...on-Raphson steps >> + if (i->op == OP_RCP) { >> + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 >> + Value *two = bld.getSSA(8); >> + >> + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f)); >> + >> + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8), >> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess), >> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input, >> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), guess...

[PATCH 1/2] nv50/ir: fix s32 x s32 -> high s32 multiply logic

2014 May 18

1

[PATCH 1/2] nv50/ir: fix s32 x s32 -> high s32 multiply logic

Retrieving the high 32 bits of a signed multiply is rather annoying. It appears that the simplest way to do this is to compute the absolute value of the arguments, and perform a u32 x u32 -> u64 operation. If the arguments' signs differ, then negate the result. Since there is no u64 support in the cvt instruction, we have the perform the 2's complement negation "by hand".

[PATCH] nv50/ir: Add sat modifier for mul

2015 Jan 04

0

[PATCH] nv50/ir: Add sat modifier for mul

...rivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -84,7 +84,7 @@ static const struct opProperties _initProps[] = // neg abs not sat c[] s[], a[], imm { OP_ADD, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 }, { OP_SUB, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 }, - { OP_MUL, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 }, + { OP_MUL, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 }, { OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 }, { OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 }, { OP_MAD,...

[Bug 86618] New: [NV96] neg modifiers not working in MIN and MAX operations

2014 Nov 23

3

[Bug 86618] New: [NV96] neg modifiers not working in MIN and MAX operations

...modifiers shader works as expected: --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -85,8 +85,8 @@ static const struct opProperties _initProps[] = { OP_ADD, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 }, { OP_SUB, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 }, { OP_MUL, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 }, - { OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 }, - { OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 }, + { OP_MAX, 0x0, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 }, + { OP_MIN,...

search for: op_sub