Displaying 18 results from an estimated 18 matches for "mkcvt".
Did you mean:
mkcert
2015 Feb 23
2
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...t[0], dst[1]);
> + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]);
> +
> + // 5. Perform 2 Newton-Raphson steps
> + if (i->op == OP_RCP) {
> + // RCP: x_{n+1} = 2 * x_n - input * x_n^2
> + Value *two = bld.getSSA(8);
> +
> + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f));
> +
> + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8),
> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess),
> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input,...
2015 Feb 23
2
[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)
...A(4), shr[2], shr[3]);
+
+ guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8), guess,
+ bld.mkOp2v(OP_MERGE, TYPE_F64, bld.getSSA(8), shr[0], shr[1]));
+
+ if (i->op == OP_RCP) {
+ Value *two = bld.getSSA(8), *neg = bld.getSSA(8), *copy = bld.getSSA(8);
+
+ bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f));
+
+ /* Square the guess first, since it was for RSQ */
+ guess = bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), guess, guess);
+
+ // RCP: x_{n+1} = 2 * x_n - input * x_n^2
+ guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(...
2015 Feb 23
0
[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
+ guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]);
+
+ // 5. Perform 2 Newton-Raphson steps
+ if (i->op == OP_RCP) {
+ // RCP: x_{n+1} = 2 * x_n - input * x_n^2
+ Value *two = bld.getSSA(8);
+
+ bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f));
+
+ guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8),
+ bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess),
+ bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input,
+...
2015 Feb 23
0
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...s = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]);
>> +
>> + // 5. Perform 2 Newton-Raphson steps
>> + if (i->op == OP_RCP) {
>> + // RCP: x_{n+1} = 2 * x_n - input * x_n^2
>> + Value *two = bld.getSSA(8);
>> +
>> + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f));
>> +
>> + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8),
>> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess),
>> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.g...
2013 Dec 08
0
[PATCH] nv50: TXF already has integer arguments, don't try to convert from f32
...exInstruction *i)
if (i->op == OP_TXB || i->op == OP_TXL)
i->swapSources(dref, lod);
- // array index must be converted to u32
if (i->tex.target.isArray()) {
- Value *layer = i->getSrc(arg - 1);
- LValue *src = new_LValue(func, FILE_GPR);
- bld.mkCvt(OP_CVT, TYPE_U32, src, TYPE_F32, layer);
- bld.mkOp2(OP_MIN, TYPE_U32, src, src, bld.loadImm(NULL, 511));
- i->setSrc(arg - 1, src);
-
+ if (i->op != OP_TXF) {
+ // array index must be converted to u32, but it's already an integer
+ // for TXF
+ Valu...
2015 Jan 05
0
[PATCH] nv50/ir: change the way float face is returned
...;dType == TYPE_F32) {
- bld.mkOp2(OP_AND, TYPE_U32, def, def, bld.mkImm(0x80000000));
- bld.mkOp2(OP_XOR, TYPE_U32, def, def, bld.mkImm(0xbf800000));
+ bld.mkOp2(OP_OR, TYPE_U32, def, def, bld.mkImm(0x00000001));
+ bld.mkOp1(OP_NEG, TYPE_S32, def, def);
+ bld.mkCvt(OP_CVT, TYPE_F32, def, TYPE_S32, def);
}
break;
case SV_NCTAID:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index ff48e94..04cd19e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_i...
2015 Jan 11
2
[PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
...place, i'll go
>> ahead and follow your advice here.
> Oh wait... this was to support saturating an array access into a u16...
>
> const int sat = (i->op == OP_TXF) ? 1 : 0;
> DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
> bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat;
>
> So... basically if the source is a U32 and the dest is a U16, we want
> to saturate there? IMO this is such a minor use-case that it doesn't
> really matter. However I guess you can keep the saturate bits around
> if you...
2014 Jun 23
1
[PATCH] nv50/ir: make ARB_viewport_array behave like it does with other drivers
..., viewport, val);
+ else
+ mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val);
+ }
} else
if (f == TGSI_FILE_TEMPORARY ||
f == TGSI_FILE_PREDICATE ||
@@ -2523,6 +2535,14 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
break;
case TGSI_OPCODE_EMIT:
+ /* export the saved viewport index */
+ if (viewport != NULL) {
+ Symbol *VPSym = mkSymbol(FILE_SHADER_OUTPUT, info->io.viewportID,
+ TYPE_U32,
+...
2014 Jun 23
1
[PATCH v3] nv50/ir: make ARB_viewport_array behave like it does with other drivers
..., viewport, val);
+ else
+ mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val);
+ }
} else
if (f == TGSI_FILE_TEMPORARY ||
f == TGSI_FILE_PREDICATE ||
@@ -2523,6 +2538,13 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
break;
case TGSI_OPCODE_EMIT:
+ /* export the saved viewport index */
+ if (viewport != NULL) {
+ Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32,
+ info->out[info->io.viewpo...
2014 Jun 23
1
[PATCH v2] nv50/ir: make ARB_viewport_array behave like it does with other drivers
..., viewport, val);
+ else
+ mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val);
+ }
} else
if (f == TGSI_FILE_TEMPORARY ||
f == TGSI_FILE_PREDICATE ||
@@ -2523,6 +2537,14 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
break;
case TGSI_OPCODE_EMIT:
+ /* export the saved viewport index */
+ if (viewport != NULL) {
+ Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, info->io.viewportId,
+ TYPE_U32,
+...
2015 Jan 11
2
[PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
On 11.01.2015 01:58, Ilia Mirkin wrote:
> On Fri, Jan 9, 2015 at 8:24 PM, Tobias Klausmann
> <tobias.johannes.klausmann at mni.thm.de> wrote:
>> Folding for conversions: F32->(U{16/32}, S{16/32}) and (U{16/32}, {S16/32})->F32
>>
>> Signed-off-by: Tobias Klausmann <tobias.johannes.klausmann at mni.thm.de>
>> ---
>> V2: beat me, whip me, split
2015 Feb 20
10
[PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
.../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 66 +++++++++++++++++++++-
1 file changed, 63 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index dfb093c..e38a3b8 100644
---
2015 Jan 11
0
[PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
...hy that was added in the first place, i'll go
> ahead and follow your advice here.
Oh wait... this was to support saturating an array access into a u16...
const int sat = (i->op == OP_TXF) ? 1 : 0;
DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat;
So... basically if the source is a U32 and the dest is a U16, we want
to saturate there? IMO this is such a minor use-case that it doesn't
really matter. However I guess you can keep the saturate bits around
if you like.
-ilia
2015 Jan 11
0
[PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
...ahead and follow your advice here.
>>
>> Oh wait... this was to support saturating an array access into a u16...
>>
>> const int sat = (i->op == OP_TXF) ? 1 : 0;
>> DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
>> bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat;
>>
>> So... basically if the source is a U32 and the dest is a U16, we want
>> to saturate there? IMO this is such a minor use-case that it doesn't
>> really matter. However I guess you can keep the saturate bits aro...
2014 Sep 25
0
[PATCH] gm107/ir: fix texture argument order
...4_CHIPSET) {
if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
@@ -634,12 +649,17 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
const int sat = (i->op == OP_TXF) ? 1 : 0;
DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat;
- for (int s = dim; s >= 1; --s)
- i->setSrc(s, i->getSrc(s - 1));
- i->setSrc(0, layer);
+ if (i->op != OP_TXD || chipset < NVISA_GM107_CHIPSET) {
+ for (int s = dim; s >= 1; -...
2014 Feb 28
0
[PATCH] nv50: enable texture query lod
...ruction *i)
+{
+ handleTEX(i);
+ bld.setPosition(i, true);
+
+ /* The returned values are not quite what we want:
+ * (a) convert from s32 to f32
+ * (b) multiply by 1/256
+ */
+ for (int def = 0; def < 2; ++def) {
+ if (!i->defExists(def))
+ continue;
+ bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(def), TYPE_S32, i->getDef(def));
+ bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(def),
+ i->getDef(def), bld.loadImm(NULL, 1.0f / 256));
+ }
+ return true;
+}
+
+bool
NV50LoweringPreSSA::handleSET(Instruction *i)
{
if (i->dType == TYP...
2015 Jan 11
2
[PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
...ow your advice here.
>>> Oh wait... this was to support saturating an array access into a u16...
>>>
>>> const int sat = (i->op == OP_TXF) ? 1 : 0;
>>> DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
>>> bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat;
>>>
>>> So... basically if the source is a U32 and the dest is a U16, we want
>>> to saturate there? IMO this is such a minor use-case that it doesn't
>>> really matter. However I guess you can keep the s...
2014 Jan 13
20
[PATCH 00/19] nv50: add sampler2DMS/GP support to get OpenGL 3.2
OK, so there's a bunch of stuff in here. The geometry stuff is based on the
work started by Bryan Cain and Christoph Bumiller.
Patches 01-12: Add support for geometry shaders and fix related issues
Patches 13-14: Make it possible for fb clears to operate on texture attachments
with an explicit layer set (as is allowed in gl 3.2).
Patches 15-17: Make ARB_texture_multisample work