search for: mkcvt

Displaying 18 results from an estimated 18 matches for "mkcvt".

Did you mean: mkcert
2015 Feb 23
2
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...t[0], dst[1]); > + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); > + > + // 5. Perform 2 Newton-Raphson steps > + if (i->op == OP_RCP) { > + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 > + Value *two = bld.getSSA(8); > + > + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f)); > + > + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8), > + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess), > + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input,...
2015 Feb 23
2
[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)
...A(4), shr[2], shr[3]); + + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8), guess, + bld.mkOp2v(OP_MERGE, TYPE_F64, bld.getSSA(8), shr[0], shr[1])); + + if (i->op == OP_RCP) { + Value *two = bld.getSSA(8), *neg = bld.getSSA(8), *copy = bld.getSSA(8); + + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f)); + + /* Square the guess first, since it was for RSQ */ + guess = bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), guess, guess); + + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(...
2015 Feb 23
0
[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); + + // 5. Perform 2 Newton-Raphson steps + if (i->op == OP_RCP) { + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 + Value *two = bld.getSSA(8); + + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f)); + + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8), + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess), + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input, +...
2015 Feb 23
0
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...s = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); >> + >> + // 5. Perform 2 Newton-Raphson steps >> + if (i->op == OP_RCP) { >> + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 >> + Value *two = bld.getSSA(8); >> + >> + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f)); >> + >> + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8), >> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess), >> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.g...
2013 Dec 08
0
[PATCH] nv50: TXF already has integer arguments, don't try to convert from f32
...exInstruction *i) if (i->op == OP_TXB || i->op == OP_TXL) i->swapSources(dref, lod); - // array index must be converted to u32 if (i->tex.target.isArray()) { - Value *layer = i->getSrc(arg - 1); - LValue *src = new_LValue(func, FILE_GPR); - bld.mkCvt(OP_CVT, TYPE_U32, src, TYPE_F32, layer); - bld.mkOp2(OP_MIN, TYPE_U32, src, src, bld.loadImm(NULL, 511)); - i->setSrc(arg - 1, src); - + if (i->op != OP_TXF) { + // array index must be converted to u32, but it's already an integer + // for TXF + Valu...
2015 Jan 05
0
[PATCH] nv50/ir: change the way float face is returned
...;dType == TYPE_F32) { - bld.mkOp2(OP_AND, TYPE_U32, def, def, bld.mkImm(0x80000000)); - bld.mkOp2(OP_XOR, TYPE_U32, def, def, bld.mkImm(0xbf800000)); + bld.mkOp2(OP_OR, TYPE_U32, def, def, bld.mkImm(0x00000001)); + bld.mkOp1(OP_NEG, TYPE_S32, def, def); + bld.mkCvt(OP_CVT, TYPE_F32, def, TYPE_S32, def); } break; case SV_NCTAID: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index ff48e94..04cd19e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_i...
2015 Jan 11
2
[PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
...place, i'll go >> ahead and follow your advice here. > Oh wait... this was to support saturating an array access into a u16... > > const int sat = (i->op == OP_TXF) ? 1 : 0; > DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; > bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat; > > So... basically if the source is a U32 and the dest is a U16, we want > to saturate there? IMO this is such a minor use-case that it doesn't > really matter. However I guess you can keep the saturate bits around > if you...
2014 Jun 23
1
[PATCH] nv50/ir: make ARB_viewport_array behave like it does with other drivers
..., viewport, val); + else + mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val); + } } else if (f == TGSI_FILE_TEMPORARY || f == TGSI_FILE_PREDICATE || @@ -2523,6 +2535,14 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c)); break; case TGSI_OPCODE_EMIT: + /* export the saved viewport index */ + if (viewport != NULL) { + Symbol *VPSym = mkSymbol(FILE_SHADER_OUTPUT, info->io.viewportID, + TYPE_U32, +...
2014 Jun 23
1
[PATCH v3] nv50/ir: make ARB_viewport_array behave like it does with other drivers
..., viewport, val); + else + mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val); + } } else if (f == TGSI_FILE_TEMPORARY || f == TGSI_FILE_PREDICATE || @@ -2523,6 +2538,13 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c)); break; case TGSI_OPCODE_EMIT: + /* export the saved viewport index */ + if (viewport != NULL) { + Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32, + info->out[info->io.viewpo...
2014 Jun 23
1
[PATCH v2] nv50/ir: make ARB_viewport_array behave like it does with other drivers
..., viewport, val); + else + mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val); + } } else if (f == TGSI_FILE_TEMPORARY || f == TGSI_FILE_PREDICATE || @@ -2523,6 +2537,14 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c)); break; case TGSI_OPCODE_EMIT: + /* export the saved viewport index */ + if (viewport != NULL) { + Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, info->io.viewportId, + TYPE_U32, +...
2015 Jan 11
2
[PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
On 11.01.2015 01:58, Ilia Mirkin wrote: > On Fri, Jan 9, 2015 at 8:24 PM, Tobias Klausmann > <tobias.johannes.klausmann at mni.thm.de> wrote: >> Folding for conversions: F32->(U{16/32}, S{16/32}) and (U{16/32}, {S16/32})->F32 >> >> Signed-off-by: Tobias Klausmann <tobias.johannes.klausmann at mni.thm.de> >> --- >> V2: beat me, whip me, split
2015 Feb 20
10
[PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> --- .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 66 +++++++++++++++++++++- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index dfb093c..e38a3b8 100644 ---
2015 Jan 11
0
[PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
...hy that was added in the first place, i'll go > ahead and follow your advice here. Oh wait... this was to support saturating an array access into a u16... const int sat = (i->op == OP_TXF) ? 1 : 0; DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat; So... basically if the source is a U32 and the dest is a U16, we want to saturate there? IMO this is such a minor use-case that it doesn't really matter. However I guess you can keep the saturate bits around if you like. -ilia
2015 Jan 11
0
[PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
...ahead and follow your advice here. >> >> Oh wait... this was to support saturating an array access into a u16... >> >> const int sat = (i->op == OP_TXF) ? 1 : 0; >> DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; >> bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat; >> >> So... basically if the source is a U32 and the dest is a U16, we want >> to saturate there? IMO this is such a minor use-case that it doesn't >> really matter. However I guess you can keep the saturate bits aro...
2014 Sep 25
0
[PATCH] gm107/ir: fix texture argument order
...4_CHIPSET) { if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { @@ -634,12 +649,17 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) const int sat = (i->op == OP_TXF) ? 1 : 0; DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat; - for (int s = dim; s >= 1; --s) - i->setSrc(s, i->getSrc(s - 1)); - i->setSrc(0, layer); + if (i->op != OP_TXD || chipset < NVISA_GM107_CHIPSET) { + for (int s = dim; s >= 1; -...
2014 Feb 28
0
[PATCH] nv50: enable texture query lod
...ruction *i) +{ + handleTEX(i); + bld.setPosition(i, true); + + /* The returned values are not quite what we want: + * (a) convert from s32 to f32 + * (b) multiply by 1/256 + */ + for (int def = 0; def < 2; ++def) { + if (!i->defExists(def)) + continue; + bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(def), TYPE_S32, i->getDef(def)); + bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(def), + i->getDef(def), bld.loadImm(NULL, 1.0f / 256)); + } + return true; +} + +bool NV50LoweringPreSSA::handleSET(Instruction *i) { if (i->dType == TYP...
2015 Jan 11
2
[PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
...ow your advice here. >>> Oh wait... this was to support saturating an array access into a u16... >>> >>> const int sat = (i->op == OP_TXF) ? 1 : 0; >>> DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; >>> bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat; >>> >>> So... basically if the source is a U32 and the dest is a U16, we want >>> to saturate there? IMO this is such a minor use-case that it doesn't >>> really matter. However I guess you can keep the s...
2014 Jan 13
20
[PATCH 00/19] nv50: add sampler2DMS/GP support to get OpenGL 3.2
OK, so there's a bunch of stuff in here. The geometry stuff is based on the work started by Bryan Cain and Christoph Bumiller. Patches 01-12: Add support for geometry shaders and fix related issues Patches 13-14: Make it possible for fb clears to operate on texture attachments with an explicit layer set (as is allowed in gl 3.2). Patches 15-17: Make ARB_texture_multisample work