thr3ads.net - search: "loadimm"

Displaying 20 results from an estimated 23 matches for "loadimm".

[PATCH v3 1/2] nv50/ir: Add support for the double Type to BuildUtil

2014 Jul 03

[PATCH v3 1/2] nv50/ir: Add support for the double Type to BuildUtil

...drivers/nouveau/codegen/nv50_ir_build_util.cpp @@ -391,6 +391,17 @@ BuildUtil::mkImm(float f) return mkImm(u.u32); } +ImmediateValue * +BuildUtil::mkImm(double f) +{ + union { + double f64; + uint64_t u64; + } u; + u.f64 = f; + return mkImm(u.u64); +} + Value * BuildUtil::loadImm(Value *dst, float f) { @@ -398,6 +409,12 @@ BuildUtil::loadImm(Value *dst, float f) } Value * +BuildUtil::loadImm(Value *dst, double u) +{ + return mkOp1v(OP_MOV, TYPE_F64, dst ? dst : getScratch(8), mkImm(u)); +} + +Value * BuildUtil::loadImm(Value *dst, uint32_t u) { return mkOp1v(OP...

[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)

2015 Feb 23

[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)

...RSQ: + * + * 0x5fe6eb50c7b537a9 - num >> 1 + * + * For RCP, we will then square it. + */ + Value *abs, *guess, *parts[2], *input[2], *shr[4], *pred; + + bld.setPosition(i, false); + + abs = bld.mkOp1v(OP_ABS, TYPE_F64, bld.getSSA(8), i->getSrc(0)); + + parts[0] = bld.loadImm(NULL, 0xc7b537a9); + parts[1] = bld.loadImm(NULL, 0x5fe6eb50); + guess = bld.mkOp2v(OP_MERGE, TYPE_F64, bld.getSSA(8), parts[0], parts[1]); + + bld.mkSplit(input, 4, abs); + shr[0] = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(4), input[0], bld.mkImm(1)); + shr[1] = bld.mkOp2v(OP_SHR, TYPE_U3...

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

...bld.mkSplit(src, 4, i->getSrc(0)); > + Value *input = i->getSrc(0); > + Value *src[2], *dst[2], *guess, *def = i->getDef(0); > + bld.mkSplit(src, 4, input); > > // 2. We don't care about the low 32 bits of the destination. Stick a 0 in. > dst[0] = bld.loadImm(NULL, 0); > @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) > > // 4. Recombine the two dst pieces back into the original destination. > bld.setPosition(i, true); > - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); > + guess = bld.mkOp2v(OP_MERGE...

[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

...], *def = i->getDef(0); - bld.mkSplit(src, 4, i->getSrc(0)); + Value *input = i->getSrc(0); + Value *src[2], *dst[2], *guess, *def = i->getDef(0); + bld.mkSplit(src, 4, input); // 2. We don't care about the low 32 bits of the destination. Stick a 0 in. dst[0] = bld.loadImm(NULL, 0); @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) // 4. Recombine the two dst pieces back into the original destination. bld.setPosition(i, true); - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst...

[PATCH 1/2] nvc0/ir: use manual TXD when offsets are involved

2014 Jul 05

[PATCH 1/2] nvc0/ir: use manual TXD when offsets are involved

Something about how we're implementing offsets for TXD is wrong, just flip to the generic quadop-based implementation in that case. This is the minimal fix appropriate for backporting. Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> Cc: <mesa-stable at lists.freedesktop.org> --- src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 3 ++- 1 file changed, 2

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

...->getSrc(0)); >> + Value *input = i->getSrc(0); >> + Value *src[2], *dst[2], *guess, *def = i->getDef(0); >> + bld.mkSplit(src, 4, input); >> >> // 2. We don't care about the low 32 bits of the destination. Stick a 0 in. >> dst[0] = bld.loadImm(NULL, 0); >> @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) >> >> // 4. Recombine the two dst pieces back into the original destination. >> bld.setPosition(i, true); >> - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); >> + guess...

[PATCH] gm107/ir: fix loading z offset for layered 3d image bindings

2019 Oct 14

[PATCH] gm107/ir: fix loading z offset for layered 3d image bindings

...etSSA(), ind, bld.mkImm(11)); + Value *is_3d = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), v, bld.mkImm(1)); + pred2d = bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), + TYPE_U32, bld.mkImm(0), is_3d); + + bld.mkOp2(OP_SHR, TYPE_U32, v, v, bld.loadImm(NULL, 16)); + su->moveSources(dim, 1); + su->setSrc(dim, v); + su->tex.target = nv50_ir::TEX_TARGET_3D; + pos++; + } + if (su->tex.bindless) handle = ind; else handle = loadTexHandle(ind, slot + 32); + su->setSrc(arg + pos, handle);...

[PATCH 1/3] nvc0/ir: add base tex offset for fermi indirect tex case

2014 Aug 08

[PATCH 1/3] nvc0/ir: add base tex offset for fermi indirect tex case

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> --- .../drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index f010767..4a9e48f 100644 ---

[PATCH] nv50: TXF already has integer arguments, don't try to convert from f32

2013 Dec 08

[PATCH] nv50: TXF already has integer arguments, don't try to convert from f32

...es(dref, lod); - // array index must be converted to u32 if (i->tex.target.isArray()) { - Value *layer = i->getSrc(arg - 1); - LValue *src = new_LValue(func, FILE_GPR); - bld.mkCvt(OP_CVT, TYPE_U32, src, TYPE_F32, layer); - bld.mkOp2(OP_MIN, TYPE_U32, src, src, bld.loadImm(NULL, 511)); - i->setSrc(arg - 1, src); - + if (i->op != OP_TXF) { + // array index must be converted to u32, but it's already an integer + // for TXF + Value *layer = i->getSrc(arg - 1); + LValue *src = new_LValue(func, FILE_GPR); + bl...

[PATCH] gm107/ir: use lane 0 for manual textureGrad handling

2017 Dec 20

[PATCH] gm107/ir: use lane 0 for manual textureGrad handling

...e same logic, but using SM50-friendly primitives. + static const uint8_t qOps[2] = + { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }; Value *def[4][4]; - Value *crd[3]; + Value *crd[3], *arr, *shadow; Value *tmp; Instruction *tex, *add; - Value *zero = bld.loadImm(bld.getSSA(), 0); + Value *quad = bld.mkImm(SHFL_BOUND_QUAD); int l, c; const int dim = i->tex.target.getDim() + i->tex.target.isCube(); const int array = i->tex.target.isArray(); @@ -115,35 +112,40 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i) for (c = 0; c...

[PATCH 1/2] nv50/ir: fix s32 x s32 -> high s32 multiply logic

2014 May 18

[PATCH 1/2] nv50/ir: fix s32 x s32 -> high s32 multiply logic

...= bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]); @@ -75,24 +92,76 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul) i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]); if (highResult) { - Value *r[4]; + Value *c[2]; + Value *r[5]; Value *imm = bld->loadImm(NULL, 1 << (halfSize * 8)); c[0] = bld->getSSA(1, FILE_FLAGS); c[1] = bld->getSSA(1, FILE_FLAGS); - for (int j = 0; j < 4; ++j) + for (int j = 0; j < 5; ++j) r[j] = bld->getSSA(fullSize); i[8] = bld->mkOp2(OP_SHR, fTy, r[0], t[1], b...

[PATCH] nvc0: do quadops on the right texture coordinates for TXD

2014 Jul 05

[PATCH] nvc0: do quadops on the right texture coordinates for TXD

...veau/codegen/nv50_ir_lowering_nvc0.cpp index 8f26645..0e24db7 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -712,6 +712,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i) Value *zero = bld.loadImm(bld.getSSA(), 0); int l, c; const int dim = i->tex.target.getDim(); + const int array = i->tex.target.isArray(); i->op = OP_TEX; // no need to clone dPdx/dPdy later @@ -722,7 +723,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i) for (l = 0; l < 4; ++l) {...

[PATCH] nv50/ir: avoid creating instructions that can't be emitted

2014 Sep 01

[PATCH] nv50/ir: avoid creating instructions that can't be emitted

...:expr(Instruction *i, ImmediateValue src0; if (i->src(0).getImmediate(src0)) expr(i, src0, *i->getSrc(1)->asImm()); + if (i->saturate && !prog->getTarget()->isSatSupported(i)) { + bld.setPosition(i, false); + i->setSrc(1, bld.loadImm(NULL, res.data.u32)); + } } else { i->op = i->saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */ } -- 1.8.5.5

[PATCH] nv50: enable cube map array texture support

2014 Feb 19

[PATCH] nv50: enable cube map array texture support

...ing_nv50.cpp index 984a8ca..0908447 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp @@ -664,7 +664,7 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i) bld.mkOp2(OP_MIN, TYPE_U32, src, src, bld.loadImm(NULL, 511)); i->setSrc(arg - 1, src); } - if (i->tex.target.isCube()) { + if (i->tex.target.isCube() && i->srcCount() > 4) { std::vector<Value *> acube, a2d; int c; @@ -681,9 +681,10 @@ NV50LoweringPreSSA::handleTEX(TexIn...

[PATCH] gm107/ir: fix texture argument order

2014 Sep 25

[PATCH] gm107/ir: fix texture argument order

...>= 0) ? 1 : 0; + if (chipset >= NVISA_GM107_CHIPSET) + s += dim; if (i->tex.target.isArray()) { - bld.mkOp3(OP_INSBF, TYPE_U32, i->getSrc(0), + bld.mkOp3(OP_INSBF, TYPE_U32, i->getSrc(s), bld.loadImm(NULL, imm), bld.mkImm(0xc10), i->getSrc(s)); } else { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index d47fed2..898653c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp...

[PATCH 1/2] nv50/ir: make sure that texprep/texquerylod's args get coalesced

2014 May 13

[PATCH 1/2] nv50/ir: make sure that texprep/texquerylod's args get coalesced

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> Cc: "10.2" <mesa-stable at lists.freedesktop.org> --- Not 100% sure of the significance of this code, but this seems like the correct thing to do... will definitely run it through a full piglit run before pushing out. src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git

[PATCH] nvc0/ir: move sample id to second source arg to fix sampler2DMS

2014 Mar 20

[PATCH] nvc0/ir: move sample id to second source arg to fix sampler2DMS

...ipset >= NVISA_GK104_CHIPSET || + !i->tex.useOffsets || !i->tex.target.isMS()); + // offset is last source (lod 1st, dc 2nd) if (i->tex.useOffsets) { uint32_t value = 0; @@ -741,7 +748,7 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) i->setSrc(s, bld.loadImm(NULL, value)); } - if (prog->getTarget()->getChipset() >= NVISA_GK104_CHIPSET) { + if (chipset >= NVISA_GK104_CHIPSET) { // // If TEX requires more than 4 sources, the 2nd register tuple must be // aligned to 4, even if it consists of just a single 4-byte...

[PATCH] nv50: enable texture query lod

2014 Feb 28

[PATCH] nv50: enable texture query lod

...ultiply by 1/256 + */ + for (int def = 0; def < 2; ++def) { + if (!i->defExists(def)) + continue; + bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(def), TYPE_S32, i->getDef(def)); + bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(def), + i->getDef(def), bld.loadImm(NULL, 1.0f / 256)); + } + return true; +} + +bool NV50LoweringPreSSA::handleSET(Instruction *i) { if (i->dType == TYPE_F32) { @@ -1196,6 +1217,8 @@ NV50LoweringPreSSA::visit(Instruction *i) return handleTXL(i->asTex()); case OP_TXD: return handleTXD(i->asTex());...

[PATCH] gm107/ir: use lane 0 for manual textureGrad handling

2017 Dec 20

[PATCH] gm107/ir: use lane 0 for manual textureGrad handling

...imitives. > + static const uint8_t qOps[2] = > + { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }; > Value *def[4][4]; > - Value *crd[3]; > + Value *crd[3], *arr, *shadow; > Value *tmp; > Instruction *tex, *add; > - Value *zero = bld.loadImm(bld.getSSA(), 0); > + Value *quad = bld.mkImm(SHFL_BOUND_QUAD); > int l, c; > const int dim = i->tex.target.getDim() + i->tex.target.isCube(); > const int array = i->tex.target.isArray(); > @@ -115,35 +112,40 @@ GM107LoweringPass::handleManualTXD(TexInstruction...

[PATCH 1/2] nvc0: add support for texture gather

2014 Apr 04

[PATCH 1/2] nvc0: add support for texture gather

...value |= (i->tex.offset[0][c] & 0xff) << (c * 8); + } else { + for (n = 0; n < i->tex.useOffsets; ++n) + for (c = 0; c < 3; ++c) + value |= (i->tex.offset[n][c] & 0xf) << (n * 12 + c * 4); + } i->setSrc(s, bld.loadImm(NULL, value)); } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index fa3145e..1ee6f72 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -174,7 +174,6 @@ nvc0_screen...

search for: loadimm