thr3ads.net - search: "type

Displaying 18 results from an estimated 18 matches for "type_u64".

Did you mean: type_f64

2016 Sep 27

[PATCH] nv50/ir: constant fold OP_SPLIT

...peephole.cpp @@ -920,6 +920,29 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) Instruction *newi = i; switch (i->op) { + case OP_SPLIT: { + uint16_t shift = 0; + DataType type = TYPE_NONE; + bld.setPosition(i, false); + if (i->sType == TYPE_U64 || i->sType == TYPE_S64) { + shift = 32; + type = (i->sType == TYPE_U64) ? TYPE_U32 : TYPE_S32; + } + if (i->sType == TYPE_U32 || i->sType == TYPE_S32) { + shift = 16; + type = (i->sType == TYPE_U32) ? TYPE_U16 : TYPE_S16; + } + if...

[PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax

2015 Feb 20

[PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> --- .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 66 +++++++++++++++++++++- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index dfb093c..e38a3b8 100644 ---

[PATCH] nv50/ir: constant fold OP_SPLIT

2016 Sep 30

[PATCH] nv50/ir: constant fold OP_SPLIT

...ImmediateValue &imm0, int s) >> Instruction *newi = i; >> >> switch (i->op) { >> + case OP_SPLIT: { >> + uint16_t shift = 0; >> + DataType type = TYPE_NONE; >> + bld.setPosition(i, false); >> + if (i->sType == TYPE_U64 || i->sType == TYPE_S64) { >> + shift = 32; >> + type = (i->sType == TYPE_U64) ? TYPE_U32 : TYPE_S32; >> + } >> + if (i->sType == TYPE_U32 || i->sType == TYPE_S32) { >> + shift = 16; >> + type = (i->sType =...

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

...bits of the destination. Stick a 0 in. > dst[0] = bld.loadImm(NULL, 0); > @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) > > // 4. Recombine the two dst pieces back into the original destination. > bld.setPosition(i, true); > - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); > + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); > + > + // 5. Perform 2 Newton-Raphson steps > + if (i->op == OP_RCP) { > + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 > + Value *two = bld.getSSA(8); > + > +...

[PATCH] nv50/ir: constant fold OP_SPLIT

2016 Sep 28

[PATCH] nv50/ir: constant fold OP_SPLIT

...tantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) > Instruction *newi = i; > > switch (i->op) { > + case OP_SPLIT: { > + uint16_t shift = 0; > + DataType type = TYPE_NONE; > + bld.setPosition(i, false); > + if (i->sType == TYPE_U64 || i->sType == TYPE_S64) { > + shift = 32; > + type = (i->sType == TYPE_U64) ? TYPE_U32 : TYPE_S32; > + } > + if (i->sType == TYPE_U32 || i->sType == TYPE_S32) { > + shift = 16; > + type = (i->sType == TYPE_U32) ? TYPE_U16 :...

[PATCH] nv50/ir: constant fold OP_SPLIT

2016 Sep 30

[PATCH] nv50/ir: constant fold OP_SPLIT

...>>> Instruction *newi = i; >>> >>> switch (i->op) { >>> + case OP_SPLIT: { >>> + uint16_t shift = 0; >>> + DataType type = TYPE_NONE; >>> + bld.setPosition(i, false); >>> + if (i->sType == TYPE_U64 || i->sType == TYPE_S64) { >>> + shift = 32; >>> + type = (i->sType == TYPE_U64) ? TYPE_U32 : TYPE_S32; >>> + } >>> + if (i->sType == TYPE_U32 || i->sType == TYPE_S32) { >>> + shift = 16; >>> +...

[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)

2015 Feb 23

[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)

...t[1], bld.loadImm(NULL, 0x80000000)) + ->setFlagsDef(0, (pred = bld.getSSA(1, FILE_FLAGS))); + bld.mkOp1(OP_NEG, TYPE_F64, neg, guess) + ->setPredicate(CC_S, pred); + bld.mkMov(copy, guess) + ->setPredicate(CC_NS, pred); + guess = bld.mkOp2v(OP_UNION, TYPE_U64, bld.getSSA(8), neg, copy); + } else { + Value *half_input = bld.getSSA(8), *three_half = bld.getSSA(8); + bld.mkCvt(OP_CVT, TYPE_F64, half_input, TYPE_F32, bld.loadImm(NULL, -0.5f)); + bld.mkCvt(OP_CVT, TYPE_F64, three_half, TYPE_F32, bld.loadImm(NULL, 1.5f)); + + half_input...

[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

...n't care about the low 32 bits of the destination. Stick a 0 in. dst[0] = bld.loadImm(NULL, 0); @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) // 4. Recombine the two dst pieces back into the original destination. bld.setPosition(i, true); - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); + + // 5. Perform 2 Newton-Raphson steps + if (i->op == OP_RCP) { + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 + Value *two = bld.getSSA(8); + + bld.mkCvt(OP_CVT, TYPE_F64, two, TY...

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

...n. Stick a 0 in. >> dst[0] = bld.loadImm(NULL, 0); >> @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) >> >> // 4. Recombine the two dst pieces back into the original destination. >> bld.setPosition(i, true); >> - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); >> + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); >> + >> + // 5. Perform 2 Newton-Raphson steps >> + if (i->op == OP_RCP) { >> + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 >> + Value *two = bld.g...

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

2016 Mar 17

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

...lt: - assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); assert(base->reg.size == 8); if (ptr) base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); assert(base->reg.size == 8); atom->setIndirect(0, 0, base); + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; return true; } base = @@ -1963,7 +1964,7 @@ NVC0LoweringPass::visit(Instruction *i) } else if (i->src(0).getFile()...

[PATCH 1/2] nv50/ir: fix s32 x s32 -> high s32 multiply logic

2014 May 18

[PATCH 1/2] nv50/ir: fix s32 x s32 -> high s32 multiply logic

...BuildUtil *bld, Instruction *mul) { const bool highResult = mul->subOp == NV50_IR_SUBOP_MUL_HIGH; - DataType fTy = mul->sType; // full type - DataType hTy; + DataType fTy; // full type + switch (mul->sType) { + case TYPE_S32: fTy = TYPE_U32; break; + case TYPE_S64: fTy = TYPE_U64; break; + default: fTy = mul->sType; break; + } + + DataType hTy; // half type switch (fTy) { - case TYPE_S32: hTy = TYPE_S16; break; case TYPE_U32: hTy = TYPE_U16; break; case TYPE_U64: hTy = TYPE_U32; break; - case TYPE_S64: hTy = TYPE_S32; break; default: retu...

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

2016 Apr 08

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

...LE_MEMORY_GLOBAL); >> + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); >> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); >> assert(base->reg.size == 8); >> if (ptr) >> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); >> assert(base->reg.size == 8); >> atom->setIndirect(0, 0, base); >> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; >> return true; >> } >> base = >> @@ -1963,7 +1964,7 @@ NVC0Lowering...

[PATCH v2] nv50/ir: constant fold OP_SPLIT

2016 Sep 30

[PATCH v2] nv50/ir: constant fold OP_SPLIT

Split the source immediate value into two new values and create OP_MOV instructions the two newly created values. V2: get rid of special cases Signed-off-by: Tobias Klausmann <tobias.johannes.klausmann at mni.thm.de> --- src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

2016 Mar 23

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

...c(0).getFile() == FILE_MEMORY_GLOBAL); > + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); > base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); > assert(base->reg.size == 8); > if (ptr) > base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); > assert(base->reg.size == 8); > atom->setIndirect(0, 0, base); > + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; > return true; > } > base = > @@ -1963,7 +1964,7 @@ NVC0LoweringPass::visit(Instruction *i)...

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

2016 Apr 12

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

...assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); >>>> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); >>>> assert(base->reg.size == 8); >>>> if (ptr) >>>> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); >>>> assert(base->reg.size == 8); >>>> atom->setIndirect(0, 0, base); >>>> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; >>>> return true; >>>> } >>>> ba...

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

2016 Apr 08

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

...gt;>> + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); >>> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); >>> assert(base->reg.size == 8); >>> if (ptr) >>> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); >>> assert(base->reg.size == 8); >>> atom->setIndirect(0, 0, base); >>> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; >>> return true; >>> } >>> base = >>> @@ -19...

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

2016 Apr 14

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

...= FILE_MEMORY_BUFFER); >>>>> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * >>>>> 16); >>>>> assert(base->reg.size == 8); >>>>> if (ptr) >>>>> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); >>>>> assert(base->reg.size == 8); >>>>> atom->setIndirect(0, 0, base); >>>>> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; >>>>> return true; >>>>> } >...

[PATCH mesa 0/5] nouveau: codegen: Make use of double immediates

2015 Nov 05

[PATCH mesa 0/5] nouveau: codegen: Make use of double immediates

Hi All, This series implements using double immediates in the nouveau codegen code. This turns the following (nvc0) code: 1: mov u32 $r2 0x00000000 (8) 2: mov u32 $r3 0x3fe00000 (8) 3: add f64 $r0d $r0d $r2d (8) Into: 1: add f64 $r0d $r0d 0.500000 (8) This has been tested with the 2 double shader tests which I just send to the piglet list. On a gk208 (gk110 / SM35)

search for: type_u64