Displaying 13 results from an estimated 13 matches for "op_sub".
Did you mean:
dp_sub
2014 Nov 18
2
[PATCH] nv50/ir: saturate FRC result to avoid completely bogus values
...from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -2512,7 +2512,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
src0 = fetchSrc(0, c);
val0 = getScratch();
mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
- mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
+ mkOp2(OP_SUB, TYPE_F32, val0, src0, val0);
+ mkOp1(OP_SAT, TYPE_F32, dst0[c], val0);
}
break;
case TGSI_OPCODE_ROUND:
--
2.0.4
2014 Nov 18
2
[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values
...au/codegen/nv50_ir_from_tgsi.cpp
>> @@ -2512,7 +2512,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
>> src0 = fetchSrc(0, c);
>> val0 = getScratch();
>> mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
>> - mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
>> + mkOp2(OP_SUB, TYPE_F32, val0, src0, val0);
>> + mkOp1(OP_SAT, TYPE_F32, dst0[c], val0);
>> }
>> break;
>> case TGSI_OPCODE_ROUND:
>>
>
> I don't understand the math behind this. For...
2015 Feb 23
2
[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)
...struction *i)
{
const int neg0 = i->src(0).mod.neg();
@@ -997,6 +1022,25 @@ CodeEmitterNV50::emitFADD(const Instruction *i)
}
void
+CodeEmitterNV50::emitDADD(const Instruction *i)
+{
+ const int neg0 = i->src(0).mod.neg();
+ const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
+
+ assert(!(i->src(0).mod | i->src(1).mod).abs());
+ assert(!i->saturate);
+ assert(i->encSize == 8);
+
+ code[1] = 0x60000000;
+ code[0] = 0xe0000000;
+
+ emitForm_ADD(i);
+
+ code[1] |= neg0 << 26;
+ code[1] |= neg1 << 27;
+}
+
+void
CodeEmitte...
2014 Nov 18
1
[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values
...t;> @@ -2512,7 +2512,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
>>>> src0 = fetchSrc(0, c);
>>>> val0 = getScratch();
>>>> mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
>>>> - mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
>>>> + mkOp2(OP_SUB, TYPE_F32, val0, src0, val0);
>>>> + mkOp1(OP_SAT, TYPE_F32, dst0[c], val0);
>>>> }
>>>> break;
>>>> case TGSI_OPCODE_ROUND:
>>>>
>>...
2015 Feb 23
2
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...gt; + // 5. Perform 2 Newton-Raphson steps
> + if (i->op == OP_RCP) {
> + // RCP: x_{n+1} = 2 * x_n - input * x_n^2
> + Value *two = bld.getSSA(8);
> +
> + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f));
> +
> + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8),
> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess),
> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input,
> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), guess, guess)));...
2015 Feb 20
10
[PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax
...eEmitterNVC0::emitFADD(const Instruction *i)
}
void
+CodeEmitterNVC0::emitDADD(const Instruction *i)
+{
+ assert(i->encSize == 8);
+ emitForm_A(i, HEX64(48000000, 00000001));
+ roundMode_A(i);
+ assert(!i->saturate);
+ assert(!i->ftz);
+ emitNegAbs12(i);
+ if (i->op == OP_SUB)
+ code[0] ^= 1 << 8;
+}
+
+void
CodeEmitterNVC0::emitUADD(const Instruction *i)
{
uint32_t addOp = 0;
@@ -895,6 +947,8 @@ CodeEmitterNVC0::emitMINMAX(const Instruction *i)
else
if (!isFloatType(i->dType))
op |= isSignedType(i->dType) ? 0x23 : 0x03;
+ if (i-&...
2014 Nov 18
0
[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values
...allium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> @@ -2512,7 +2512,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
> src0 = fetchSrc(0, c);
> val0 = getScratch();
> mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
> - mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
> + mkOp2(OP_SUB, TYPE_F32, val0, src0, val0);
> + mkOp1(OP_SAT, TYPE_F32, dst0[c], val0);
> }
> break;
> case TGSI_OPCODE_ROUND:
>
I don't understand the math behind this. For any such large number, as
far a...
2014 Nov 18
0
[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values
...rom_tgsi.cpp
>>> @@ -2512,7 +2512,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
>>> src0 = fetchSrc(0, c);
>>> val0 = getScratch();
>>> mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
>>> - mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
>>> + mkOp2(OP_SUB, TYPE_F32, val0, src0, val0);
>>> + mkOp1(OP_SAT, TYPE_F32, dst0[c], val0);
>>> }
>>> break;
>>> case TGSI_OPCODE_ROUND:
>>>
>>
>> I don't unde...
2015 Feb 23
0
[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...U64, bld.getSSA(8), dst[0], dst[1]);
+
+ // 5. Perform 2 Newton-Raphson steps
+ if (i->op == OP_RCP) {
+ // RCP: x_{n+1} = 2 * x_n - input * x_n^2
+ Value *two = bld.getSSA(8);
+
+ bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f));
+
+ guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8),
+ bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess),
+ bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input,
+ bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), guess, guess)));
+ guess =...
2015 Feb 23
0
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...on-Raphson steps
>> + if (i->op == OP_RCP) {
>> + // RCP: x_{n+1} = 2 * x_n - input * x_n^2
>> + Value *two = bld.getSSA(8);
>> +
>> + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f));
>> +
>> + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8),
>> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess),
>> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input,
>> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), guess...
2014 May 18
1
[PATCH 1/2] nv50/ir: fix s32 x s32 -> high s32 multiply logic
Retrieving the high 32 bits of a signed multiply is rather annoying. It
appears that the simplest way to do this is to compute the absolute
value of the arguments, and perform a u32 x u32 -> u64 operation. If the
arguments' signs differ, then negate the result. Since there is no u64
support in the cvt instruction, we have the perform the 2's complement
negation "by hand".
2015 Jan 04
0
[PATCH] nv50/ir: Add sat modifier for mul
...rivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -84,7 +84,7 @@ static const struct opProperties _initProps[] =
// neg abs not sat c[] s[], a[], imm
{ OP_ADD, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
{ OP_SUB, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 },
- { OP_MUL, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 },
+ { OP_MUL, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
{ OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
{ OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
{ OP_MAD,...
2014 Nov 23
3
[Bug 86618] New: [NV96] neg modifiers not working in MIN and MAX operations
...modifiers shader works as expected:
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -85,8 +85,8 @@ static const struct opProperties _initProps[] =
{ OP_ADD, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
{ OP_SUB, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 },
{ OP_MUL, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 },
- { OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
- { OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
+ { OP_MAX, 0x0, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
+ { OP_MIN,...