Displaying 20 results from an estimated 23 matches for "loadimm".
2014 Jul 03
1
[PATCH v3 1/2] nv50/ir: Add support for the double Type to BuildUtil
...drivers/nouveau/codegen/nv50_ir_build_util.cpp
@@ -391,6 +391,17 @@ BuildUtil::mkImm(float f)
return mkImm(u.u32);
}
+ImmediateValue *
+BuildUtil::mkImm(double f)
+{
+ union {
+ double f64;
+ uint64_t u64;
+ } u;
+ u.f64 = f;
+ return mkImm(u.u64);
+}
+
Value *
BuildUtil::loadImm(Value *dst, float f)
{
@@ -398,6 +409,12 @@ BuildUtil::loadImm(Value *dst, float f)
}
Value *
+BuildUtil::loadImm(Value *dst, double u)
+{
+ return mkOp1v(OP_MOV, TYPE_F64, dst ? dst : getScratch(8), mkImm(u));
+}
+
+Value *
BuildUtil::loadImm(Value *dst, uint32_t u)
{
return mkOp1v(OP...
2015 Feb 23
2
[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)
...RSQ:
+ *
+ * 0x5fe6eb50c7b537a9 - num >> 1
+ *
+ * For RCP, we will then square it.
+ */
+ Value *abs, *guess, *parts[2], *input[2], *shr[4], *pred;
+
+ bld.setPosition(i, false);
+
+ abs = bld.mkOp1v(OP_ABS, TYPE_F64, bld.getSSA(8), i->getSrc(0));
+
+ parts[0] = bld.loadImm(NULL, 0xc7b537a9);
+ parts[1] = bld.loadImm(NULL, 0x5fe6eb50);
+ guess = bld.mkOp2v(OP_MERGE, TYPE_F64, bld.getSSA(8), parts[0], parts[1]);
+
+ bld.mkSplit(input, 4, abs);
+ shr[0] = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(4), input[0], bld.mkImm(1));
+ shr[1] = bld.mkOp2v(OP_SHR, TYPE_U3...
2015 Feb 23
2
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...bld.mkSplit(src, 4, i->getSrc(0));
> + Value *input = i->getSrc(0);
> + Value *src[2], *dst[2], *guess, *def = i->getDef(0);
> + bld.mkSplit(src, 4, input);
>
> // 2. We don't care about the low 32 bits of the destination. Stick a 0 in.
> dst[0] = bld.loadImm(NULL, 0);
> @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
>
> // 4. Recombine the two dst pieces back into the original destination.
> bld.setPosition(i, true);
> - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
> + guess = bld.mkOp2v(OP_MERGE...
2015 Feb 23
0
[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...], *def = i->getDef(0);
- bld.mkSplit(src, 4, i->getSrc(0));
+ Value *input = i->getSrc(0);
+ Value *src[2], *dst[2], *guess, *def = i->getDef(0);
+ bld.mkSplit(src, 4, input);
// 2. We don't care about the low 32 bits of the destination. Stick a 0 in.
dst[0] = bld.loadImm(NULL, 0);
@@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
// 4. Recombine the two dst pieces back into the original destination.
bld.setPosition(i, true);
- bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
+ guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst...
2014 Jul 05
1
[PATCH 1/2] nvc0/ir: use manual TXD when offsets are involved
Something about how we're implementing offsets for TXD is wrong, just
flip to the generic quadop-based implementation in that case.
This is the minimal fix appropriate for backporting.
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
Cc: <mesa-stable at lists.freedesktop.org>
---
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 3 ++-
1 file changed, 2
2015 Feb 23
0
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...->getSrc(0));
>> + Value *input = i->getSrc(0);
>> + Value *src[2], *dst[2], *guess, *def = i->getDef(0);
>> + bld.mkSplit(src, 4, input);
>>
>> // 2. We don't care about the low 32 bits of the destination. Stick a 0 in.
>> dst[0] = bld.loadImm(NULL, 0);
>> @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
>>
>> // 4. Recombine the two dst pieces back into the original destination.
>> bld.setPosition(i, true);
>> - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]);
>> + guess...
2019 Oct 14
1
[PATCH] gm107/ir: fix loading z offset for layered 3d image bindings
...etSSA(), ind, bld.mkImm(11));
+ Value *is_3d = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), v, bld.mkImm(1));
+ pred2d = bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
+ TYPE_U32, bld.mkImm(0), is_3d);
+
+ bld.mkOp2(OP_SHR, TYPE_U32, v, v, bld.loadImm(NULL, 16));
+ su->moveSources(dim, 1);
+ su->setSrc(dim, v);
+ su->tex.target = nv50_ir::TEX_TARGET_3D;
+ pos++;
+ }
+
if (su->tex.bindless)
handle = ind;
else
handle = loadTexHandle(ind, slot + 32);
+
su->setSrc(arg + pos, handle);...
2014 Aug 08
2
[PATCH 1/3] nvc0/ir: add base tex offset for fermi indirect tex case
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
.../drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index f010767..4a9e48f 100644
---
2013 Dec 08
0
[PATCH] nv50: TXF already has integer arguments, don't try to convert from f32
...es(dref, lod);
- // array index must be converted to u32
if (i->tex.target.isArray()) {
- Value *layer = i->getSrc(arg - 1);
- LValue *src = new_LValue(func, FILE_GPR);
- bld.mkCvt(OP_CVT, TYPE_U32, src, TYPE_F32, layer);
- bld.mkOp2(OP_MIN, TYPE_U32, src, src, bld.loadImm(NULL, 511));
- i->setSrc(arg - 1, src);
-
+ if (i->op != OP_TXF) {
+ // array index must be converted to u32, but it's already an integer
+ // for TXF
+ Value *layer = i->getSrc(arg - 1);
+ LValue *src = new_LValue(func, FILE_GPR);
+ bl...
2017 Dec 20
2
[PATCH] gm107/ir: use lane 0 for manual textureGrad handling
...e same logic, but using SM50-friendly primitives.
+ static const uint8_t qOps[2] =
+ { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) };
Value *def[4][4];
- Value *crd[3];
+ Value *crd[3], *arr, *shadow;
Value *tmp;
Instruction *tex, *add;
- Value *zero = bld.loadImm(bld.getSSA(), 0);
+ Value *quad = bld.mkImm(SHFL_BOUND_QUAD);
int l, c;
const int dim = i->tex.target.getDim() + i->tex.target.isCube();
const int array = i->tex.target.isArray();
@@ -115,35 +112,40 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
for (c = 0; c...
2014 May 18
1
[PATCH 1/2] nv50/ir: fix s32 x s32 -> high s32 multiply logic
...= bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]);
@@ -75,24 +92,76 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]);
if (highResult) {
- Value *r[4];
+ Value *c[2];
+ Value *r[5];
Value *imm = bld->loadImm(NULL, 1 << (halfSize * 8));
c[0] = bld->getSSA(1, FILE_FLAGS);
c[1] = bld->getSSA(1, FILE_FLAGS);
- for (int j = 0; j < 4; ++j)
+ for (int j = 0; j < 5; ++j)
r[j] = bld->getSSA(fullSize);
i[8] = bld->mkOp2(OP_SHR, fTy, r[0], t[1], b...
2014 Jul 05
0
[PATCH] nvc0: do quadops on the right texture coordinates for TXD
...veau/codegen/nv50_ir_lowering_nvc0.cpp
index 8f26645..0e24db7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -712,6 +712,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
Value *zero = bld.loadImm(bld.getSSA(), 0);
int l, c;
const int dim = i->tex.target.getDim();
+ const int array = i->tex.target.isArray();
i->op = OP_TEX; // no need to clone dPdx/dPdy later
@@ -722,7 +723,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
for (l = 0; l < 4; ++l) {...
2014 Sep 01
0
[PATCH] nv50/ir: avoid creating instructions that can't be emitted
...:expr(Instruction *i,
ImmediateValue src0;
if (i->src(0).getImmediate(src0))
expr(i, src0, *i->getSrc(1)->asImm());
+ if (i->saturate && !prog->getTarget()->isSatSupported(i)) {
+ bld.setPosition(i, false);
+ i->setSrc(1, bld.loadImm(NULL, res.data.u32));
+ }
} else {
i->op = i->saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */
}
--
1.8.5.5
2014 Feb 19
0
[PATCH] nv50: enable cube map array texture support
...ing_nv50.cpp
index 984a8ca..0908447 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -664,7 +664,7 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i)
bld.mkOp2(OP_MIN, TYPE_U32, src, src, bld.loadImm(NULL, 511));
i->setSrc(arg - 1, src);
}
- if (i->tex.target.isCube()) {
+ if (i->tex.target.isCube() && i->srcCount() > 4) {
std::vector<Value *> acube, a2d;
int c;
@@ -681,9 +681,10 @@ NV50LoweringPreSSA::handleTEX(TexIn...
2014 Sep 25
0
[PATCH] gm107/ir: fix texture argument order
...>= 0) ? 1 : 0;
+ if (chipset >= NVISA_GM107_CHIPSET)
+ s += dim;
if (i->tex.target.isArray()) {
- bld.mkOp3(OP_INSBF, TYPE_U32, i->getSrc(0),
+ bld.mkOp3(OP_INSBF, TYPE_U32, i->getSrc(s),
bld.loadImm(NULL, imm), bld.mkImm(0xc10),
i->getSrc(s));
} else {
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index d47fed2..898653c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp...
2014 May 13
1
[PATCH 1/2] nv50/ir: make sure that texprep/texquerylod's args get coalesced
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
Cc: "10.2" <mesa-stable at lists.freedesktop.org>
---
Not 100% sure of the significance of this code, but this seems like the
correct thing to do... will definitely run it through a full piglit run before
pushing out.
src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git
2014 Mar 20
0
[PATCH] nvc0/ir: move sample id to second source arg to fix sampler2DMS
...ipset >= NVISA_GK104_CHIPSET ||
+ !i->tex.useOffsets || !i->tex.target.isMS());
+
// offset is last source (lod 1st, dc 2nd)
if (i->tex.useOffsets) {
uint32_t value = 0;
@@ -741,7 +748,7 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
i->setSrc(s, bld.loadImm(NULL, value));
}
- if (prog->getTarget()->getChipset() >= NVISA_GK104_CHIPSET) {
+ if (chipset >= NVISA_GK104_CHIPSET) {
//
// If TEX requires more than 4 sources, the 2nd register tuple must be
// aligned to 4, even if it consists of just a single 4-byte...
2014 Feb 28
0
[PATCH] nv50: enable texture query lod
...ultiply by 1/256
+ */
+ for (int def = 0; def < 2; ++def) {
+ if (!i->defExists(def))
+ continue;
+ bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(def), TYPE_S32, i->getDef(def));
+ bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(def),
+ i->getDef(def), bld.loadImm(NULL, 1.0f / 256));
+ }
+ return true;
+}
+
+bool
NV50LoweringPreSSA::handleSET(Instruction *i)
{
if (i->dType == TYPE_F32) {
@@ -1196,6 +1217,8 @@ NV50LoweringPreSSA::visit(Instruction *i)
return handleTXL(i->asTex());
case OP_TXD:
return handleTXD(i->asTex());...
2017 Dec 20
0
[PATCH] gm107/ir: use lane 0 for manual textureGrad handling
...imitives.
> + static const uint8_t qOps[2] =
> + { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) };
> Value *def[4][4];
> - Value *crd[3];
> + Value *crd[3], *arr, *shadow;
> Value *tmp;
> Instruction *tex, *add;
> - Value *zero = bld.loadImm(bld.getSSA(), 0);
> + Value *quad = bld.mkImm(SHFL_BOUND_QUAD);
> int l, c;
> const int dim = i->tex.target.getDim() + i->tex.target.isCube();
> const int array = i->tex.target.isArray();
> @@ -115,35 +112,40 @@ GM107LoweringPass::handleManualTXD(TexInstruction...
2014 Apr 04
2
[PATCH 1/2] nvc0: add support for texture gather
...value |= (i->tex.offset[0][c] & 0xff) << (c * 8);
+ } else {
+ for (n = 0; n < i->tex.useOffsets; ++n)
+ for (c = 0; c < 3; ++c)
+ value |= (i->tex.offset[n][c] & 0xf) << (n * 12 + c * 4);
+ }
i->setSrc(s, bld.loadImm(NULL, value));
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index fa3145e..1ee6f72 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -174,7 +174,6 @@ nvc0_screen...