Displaying 8 results from an estimated 8 matches for "op_merge".
Did you mean:
opc_merge
2015 Feb 20
10
[PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
.../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 66 +++++++++++++++++++++-
1 file changed, 63 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index dfb093c..e38a3b8 100644
---
2015 Feb 23
2
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...he low 32 bits of the destination. Stick a 0 in.
> dst[0] = bld.loadImm(NULL, 0);
> @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
>
> // 4. Recombine the two dst pieces back into the original destination.
> bld.setPosition(i, true);
> - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
> + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]);
> +
> + // 5. Perform 2 Newton-Raphson steps
> + if (i->op == OP_RCP) {
> + // RCP: x_{n+1} = 2 * x_n - input * x_n^2
> + Value *two = bld.getSSA(8);
>...
2015 Feb 23
2
[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)
...e it.
+ */
+ Value *abs, *guess, *parts[2], *input[2], *shr[4], *pred;
+
+ bld.setPosition(i, false);
+
+ abs = bld.mkOp1v(OP_ABS, TYPE_F64, bld.getSSA(8), i->getSrc(0));
+
+ parts[0] = bld.loadImm(NULL, 0xc7b537a9);
+ parts[1] = bld.loadImm(NULL, 0x5fe6eb50);
+ guess = bld.mkOp2v(OP_MERGE, TYPE_F64, bld.getSSA(8), parts[0], parts[1]);
+
+ bld.mkSplit(input, 4, abs);
+ shr[0] = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(4), input[0], bld.mkImm(1));
+ shr[1] = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(4), input[1], bld.mkImm(1));
+
+ // If the bottom bit of the high word was set, s...
2015 Feb 23
0
[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
.../ 2. We don't care about the low 32 bits of the destination. Stick a 0 in.
dst[0] = bld.loadImm(NULL, 0);
@@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
// 4. Recombine the two dst pieces back into the original destination.
bld.setPosition(i, true);
- bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
+ guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]);
+
+ // 5. Perform 2 Newton-Raphson steps
+ if (i->op == OP_RCP) {
+ // RCP: x_{n+1} = 2 * x_n - input * x_n^2
+ Value *two = bld.getSSA(8);
+
+ bld.mkCvt(OP_CVT, TYPE_F6...
2015 Feb 23
0
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...destination. Stick a 0 in.
>> dst[0] = bld.loadImm(NULL, 0);
>> @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
>>
>> // 4. Recombine the two dst pieces back into the original destination.
>> bld.setPosition(i, true);
>> - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
>> + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]);
>> +
>> + // 5. Perform 2 Newton-Raphson steps
>> + if (i->op == OP_RCP) {
>> + // RCP: x_{n+1} = 2 * x_n - input * x_n^2
>> + Value *t...
2014 Jul 08
1
[PATCH] nv50/ir: use unordered_set instead of list to keep our instructions in uses
...if (!insn->getDef(0)->uses.empty())
- i = insn->getDef(0)->uses.front()->getInsn();
+ i = (*insn->getDef(0)->uses.begin())->getInsn();
// if this is a contraint-move there will only be a single use
if (i && i->op == OP_MERGE) // do we really still need this ?
break;
@@ -1559,7 +1559,7 @@ SpillCodeInserter::run(const std::list<ValuePair>& lst)
// Unspill at each use *before* inserting spill instructions,
// we don't want to have the spill instructions in the use list here....
2015 Nov 05
7
[PATCH mesa 0/5] nouveau: codegen: Make use of double immediates
Hi All,
This series implements using double immediates in the nouveau codegen code.
This turns the following (nvc0) code:
1: mov u32 $r2 0x00000000 (8)
2: mov u32 $r3 0x3fe00000 (8)
3: add f64 $r0d $r0d $r2d (8)
Into:
1: add f64 $r0d $r0d 0.500000 (8)
This has been tested with the 2 double shader tests which I just send to
the piglet list. On a gk208 (gk110 / SM35)
2014 Dec 02
0
[PATCH RESEND] nv50/ir: use unordered_set instead of list to keep track of var defs
...sert(val->defs.begin(), val->defs.end());
nRep->livei.unify(nVal->livei);
return true;
}
@@ -989,7 +989,7 @@ GCRA::doCoalesce(ArrayList& insns, unsigned int mask)
// if this is a contraint-move there will only be a single use
if (i && i->op == OP_MERGE) // do we really still need this ?
break;
- i = insn->getSrc(0)->getUniqueInsn();
+ i = insn->getSrc(0)->getUniqueInsnMerged();
if (i && !i->constrainedDefs()) {
if (coalesceValues(insn->getDef(0), insn->getSrc(0), fal...