Displaying 18 results from an estimated 18 matches for "type_u64".
Did you mean:
type_f64
2016 Sep 27
2
[PATCH] nv50/ir: constant fold OP_SPLIT
...peephole.cpp
@@ -920,6 +920,29 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
Instruction *newi = i;
switch (i->op) {
+ case OP_SPLIT: {
+ uint16_t shift = 0;
+ DataType type = TYPE_NONE;
+ bld.setPosition(i, false);
+ if (i->sType == TYPE_U64 || i->sType == TYPE_S64) {
+ shift = 32;
+ type = (i->sType == TYPE_U64) ? TYPE_U32 : TYPE_S32;
+ }
+ if (i->sType == TYPE_U32 || i->sType == TYPE_S32) {
+ shift = 16;
+ type = (i->sType == TYPE_U32) ? TYPE_U16 : TYPE_S16;
+ }
+ if...
2015 Feb 20
10
[PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
.../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 66 +++++++++++++++++++++-
1 file changed, 63 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index dfb093c..e38a3b8 100644
---
2016 Sep 30
2
[PATCH] nv50/ir: constant fold OP_SPLIT
...ImmediateValue &imm0, int s)
>> Instruction *newi = i;
>>
>> switch (i->op) {
>> + case OP_SPLIT: {
>> + uint16_t shift = 0;
>> + DataType type = TYPE_NONE;
>> + bld.setPosition(i, false);
>> + if (i->sType == TYPE_U64 || i->sType == TYPE_S64) {
>> + shift = 32;
>> + type = (i->sType == TYPE_U64) ? TYPE_U32 : TYPE_S32;
>> + }
>> + if (i->sType == TYPE_U32 || i->sType == TYPE_S32) {
>> + shift = 16;
>> + type = (i->sType =...
2015 Feb 23
2
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...bits of the destination. Stick a 0 in.
> dst[0] = bld.loadImm(NULL, 0);
> @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
>
> // 4. Recombine the two dst pieces back into the original destination.
> bld.setPosition(i, true);
> - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
> + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]);
> +
> + // 5. Perform 2 Newton-Raphson steps
> + if (i->op == OP_RCP) {
> + // RCP: x_{n+1} = 2 * x_n - input * x_n^2
> + Value *two = bld.getSSA(8);
> +
> +...
2016 Sep 28
0
[PATCH] nv50/ir: constant fold OP_SPLIT
...tantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
> Instruction *newi = i;
>
> switch (i->op) {
> + case OP_SPLIT: {
> + uint16_t shift = 0;
> + DataType type = TYPE_NONE;
> + bld.setPosition(i, false);
> + if (i->sType == TYPE_U64 || i->sType == TYPE_S64) {
> + shift = 32;
> + type = (i->sType == TYPE_U64) ? TYPE_U32 : TYPE_S32;
> + }
> + if (i->sType == TYPE_U32 || i->sType == TYPE_S32) {
> + shift = 16;
> + type = (i->sType == TYPE_U32) ? TYPE_U16 :...
2016 Sep 30
0
[PATCH] nv50/ir: constant fold OP_SPLIT
...>>> Instruction *newi = i;
>>>
>>> switch (i->op) {
>>> + case OP_SPLIT: {
>>> + uint16_t shift = 0;
>>> + DataType type = TYPE_NONE;
>>> + bld.setPosition(i, false);
>>> + if (i->sType == TYPE_U64 || i->sType == TYPE_S64) {
>>> + shift = 32;
>>> + type = (i->sType == TYPE_U64) ? TYPE_U32 : TYPE_S32;
>>> + }
>>> + if (i->sType == TYPE_U32 || i->sType == TYPE_S32) {
>>> + shift = 16;
>>> +...
2015 Feb 23
2
[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)
...t[1], bld.loadImm(NULL, 0x80000000))
+ ->setFlagsDef(0, (pred = bld.getSSA(1, FILE_FLAGS)));
+ bld.mkOp1(OP_NEG, TYPE_F64, neg, guess)
+ ->setPredicate(CC_S, pred);
+ bld.mkMov(copy, guess)
+ ->setPredicate(CC_NS, pred);
+ guess = bld.mkOp2v(OP_UNION, TYPE_U64, bld.getSSA(8), neg, copy);
+ } else {
+ Value *half_input = bld.getSSA(8), *three_half = bld.getSSA(8);
+ bld.mkCvt(OP_CVT, TYPE_F64, half_input, TYPE_F32, bld.loadImm(NULL, -0.5f));
+ bld.mkCvt(OP_CVT, TYPE_F64, three_half, TYPE_F32, bld.loadImm(NULL, 1.5f));
+
+ half_input...
2015 Feb 23
0
[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...n't care about the low 32 bits of the destination. Stick a 0 in.
dst[0] = bld.loadImm(NULL, 0);
@@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
// 4. Recombine the two dst pieces back into the original destination.
bld.setPosition(i, true);
- bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
+ guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]);
+
+ // 5. Perform 2 Newton-Raphson steps
+ if (i->op == OP_RCP) {
+ // RCP: x_{n+1} = 2 * x_n - input * x_n^2
+ Value *two = bld.getSSA(8);
+
+ bld.mkCvt(OP_CVT, TYPE_F64, two, TY...
2015 Feb 23
0
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...n. Stick a 0 in.
>> dst[0] = bld.loadImm(NULL, 0);
>> @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
>>
>> // 4. Recombine the two dst pieces back into the original destination.
>> bld.setPosition(i, true);
>> - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
>> + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]);
>> +
>> + // 5. Perform 2 Newton-Raphson steps
>> + if (i->op == OP_RCP) {
>> + // RCP: x_{n+1} = 2 * x_n - input * x_n^2
>> + Value *two = bld.g...
2016 Mar 17
4
[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
...lt:
- assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
+ assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER);
base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
assert(base->reg.size == 8);
if (ptr)
base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
assert(base->reg.size == 8);
atom->setIndirect(0, 0, base);
+ atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
return true;
}
base =
@@ -1963,7 +1964,7 @@ NVC0LoweringPass::visit(Instruction *i)
} else if (i->src(0).getFile()...
2014 May 18
1
[PATCH 1/2] nv50/ir: fix s32 x s32 -> high s32 multiply logic
...BuildUtil *bld, Instruction *mul)
{
const bool highResult = mul->subOp == NV50_IR_SUBOP_MUL_HIGH;
- DataType fTy = mul->sType; // full type
- DataType hTy;
+ DataType fTy; // full type
+ switch (mul->sType) {
+ case TYPE_S32: fTy = TYPE_U32; break;
+ case TYPE_S64: fTy = TYPE_U64; break;
+ default: fTy = mul->sType; break;
+ }
+
+ DataType hTy; // half type
switch (fTy) {
- case TYPE_S32: hTy = TYPE_S16; break;
case TYPE_U32: hTy = TYPE_U16; break;
case TYPE_U64: hTy = TYPE_U32; break;
- case TYPE_S64: hTy = TYPE_S32; break;
default:
retu...
2016 Apr 08
2
[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
...LE_MEMORY_GLOBAL);
>> + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER);
>> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
>> assert(base->reg.size == 8);
>> if (ptr)
>> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
>> assert(base->reg.size == 8);
>> atom->setIndirect(0, 0, base);
>> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
>> return true;
>> }
>> base =
>> @@ -1963,7 +1964,7 @@ NVC0Lowering...
2016 Sep 30
2
[PATCH v2] nv50/ir: constant fold OP_SPLIT
Split the source immediate value into two new values and create OP_MOV
instructions the two newly created values.
V2: get rid of special cases
Signed-off-by: Tobias Klausmann <tobias.johannes.klausmann at mni.thm.de>
---
src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git
2016 Mar 23
0
[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
...c(0).getFile() == FILE_MEMORY_GLOBAL);
> + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER);
> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
> assert(base->reg.size == 8);
> if (ptr)
> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
> assert(base->reg.size == 8);
> atom->setIndirect(0, 0, base);
> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
> return true;
> }
> base =
> @@ -1963,7 +1964,7 @@ NVC0LoweringPass::visit(Instruction *i)...
2016 Apr 12
2
[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
...assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER);
>>>> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
>>>> assert(base->reg.size == 8);
>>>> if (ptr)
>>>> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
>>>> assert(base->reg.size == 8);
>>>> atom->setIndirect(0, 0, base);
>>>> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
>>>> return true;
>>>> }
>>>> ba...
2016 Apr 08
0
[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
...gt;>> + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER);
>>> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
>>> assert(base->reg.size == 8);
>>> if (ptr)
>>> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
>>> assert(base->reg.size == 8);
>>> atom->setIndirect(0, 0, base);
>>> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
>>> return true;
>>> }
>>> base =
>>> @@ -19...
2016 Apr 14
0
[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
...= FILE_MEMORY_BUFFER);
>>>>> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex *
>>>>> 16);
>>>>> assert(base->reg.size == 8);
>>>>> if (ptr)
>>>>> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
>>>>> assert(base->reg.size == 8);
>>>>> atom->setIndirect(0, 0, base);
>>>>> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
>>>>> return true;
>>>>> }
>...
2015 Nov 05
7
[PATCH mesa 0/5] nouveau: codegen: Make use of double immediates
Hi All,
This series implements using double immediates in the nouveau codegen code.
This turns the following (nvc0) code:
1: mov u32 $r2 0x00000000 (8)
2: mov u32 $r3 0x3fe00000 (8)
3: add f64 $r0d $r0d $r2d (8)
Into:
1: add f64 $r0d $r0d 0.500000 (8)
This has been tested with the 2 double shader tests which I just send to
the piglet list. On a gk208 (gk110 / SM35)