thr3ads.net - search: "mkop2"

Displaying 20 results from an estimated 26 matches for "mkop2".

Did you mean: mkop2v

[PATCH 1/2] nv50/ir: fix s32 x s32 -> high s32 multiply logic

2014 May 18

[PATCH 1/2] nv50/ir: fix s32 x s32 -> high s32 multiply logic

...1], mul->getSrc(1)); + } + // split sources into halves - i[0] = bld->mkSplit(a, halfSize, mul->getSrc(0)); - i[1] = bld->mkSplit(b, halfSize, mul->getSrc(1)); + i[0] = bld->mkSplit(a, halfSize, s[0]); + i[1] = bld->mkSplit(b, halfSize, s[1]); i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]); i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]); @@ -75,24 +92,76 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul) i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]); if (highResult) { - Value *r[4]; + Value *c[2]; +...

[PATCH] nv50/ir: change the way float face is returned

2015 Jan 05

[PATCH] nv50/ir: change the way float face is returned

...codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp @@ -1094,8 +1094,9 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i) case SV_FACE: bld.mkInterp(NV50_IR_INTERP_FLAT, def, addr, NULL); if (i->dType == TYPE_F32) { - bld.mkOp2(OP_AND, TYPE_U32, def, def, bld.mkImm(0x80000000)); - bld.mkOp2(OP_XOR, TYPE_U32, def, def, bld.mkImm(0xbf800000)); + bld.mkOp2(OP_OR, TYPE_U32, def, def, bld.mkImm(0x00000001)); + bld.mkOp1(OP_NEG, TYPE_S32, def, def); + bld.mkCvt(OP_CVT, TYPE_F32, def, TYPE_S32, de...

[PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax

2015 Feb 20

[PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> --- .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 66 +++++++++++++++++++++- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index dfb093c..e38a3b8 100644 ---

[PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

2014 Nov 18

[PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

...50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -2512,7 +2512,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) src0 = fetchSrc(0, c); val0 = getScratch(); mkOp1(OP_FLOOR, TYPE_F32, val0, src0); - mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0); + mkOp2(OP_SUB, TYPE_F32, val0, src0, val0); + mkOp1(OP_SAT, TYPE_F32, dst0[c], val0); } break; case TGSI_OPCODE_ROUND: -- 2.0.4

[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

2014 Nov 18

[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

.../nouveau/codegen/nv50_ir_from_tgsi.cpp >> @@ -2512,7 +2512,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) >> src0 = fetchSrc(0, c); >> val0 = getScratch(); >> mkOp1(OP_FLOOR, TYPE_F32, val0, src0); >> - mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0); >> + mkOp2(OP_SUB, TYPE_F32, val0, src0, val0); >> + mkOp1(OP_SAT, TYPE_F32, dst0[c], val0); >> } >> break; >> case TGSI_OPCODE_ROUND: >> > > I don't understand the math behind th...

[PATCH] gm107/ir: take relative pfetch offset into account

2014 Sep 26

[PATCH] gm107/ir: take relative pfetch offset into account

...ium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp index 113f372..64989ac 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp @@ -159,7 +159,10 @@ GM107LoweringPass::handlePFETCH(Instruction *i) bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16)); bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff)); bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff)); - bld.mkOp1(OP_MOV , TYPE_U32, tmp2, bld.mkImm(i->getSrc(0)->reg.data.u32)); + if (i->getSrc(1)) + bld.mkOp2...

[PATCH 1/2] nv50/ir: make sure that texprep/texquerylod's args get coalesced

2014 May 13

[PATCH 1/2] nv50/ir: make sure that texprep/texquerylod's args get coalesced

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> Cc: "10.2" <mesa-stable at lists.freedesktop.org> --- Not 100% sure of the significance of this code, but this seems like the correct thing to do... will definitely run it through a full piglit run before pushing out. src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git

[PATCH] gm107/ir: use lane 0 for manual textureGrad handling

2017 Dec 20

[PATCH] gm107/ir: use lane 0 for manual textureGrad handling

...dow, i->getSrc(array + dim), lane, quad); + } + + // mov coordinates from lane l to all lanes for (c = 0; c < dim; ++c) { - bld.mkOp3(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), - bld.mkImm(l), bld.mkImm(SHFL_BOUND_QUAD)); - add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero); - add->subOp = 0x00; - add->lanes = 1; /* abused for .ndv */ + bld.mkOp3(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), lane, quad); } // add dPdx from lane l to lanes dx for (c = 0; c < dim; ++...

[PATCH] gm107/ir: fix loading z offset for layered 3d image bindings

2019 Oct 14

[PATCH] gm107/ir: fix loading z offset for layered 3d image bindings

...weringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless { uint32_t base = slot * NVC0_SU_INFO__STRIDE; + // We don't upload surface info for bindless for GM107+ + assert(!bindless || targ->getChipset() < NVISA_GM107_CHIPSET); + if (ptr) { ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot)); if (bindless) @@ -2204,7 +2207,7 @@ getDestType(const ImgType type) { } void -NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su) +NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su, Instruction **loaded) { const...

[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

2014 Nov 18

[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

...>>> @@ -2512,7 +2512,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) >>>> src0 = fetchSrc(0, c); >>>> val0 = getScratch(); >>>> mkOp1(OP_FLOOR, TYPE_F32, val0, src0); >>>> - mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0); >>>> + mkOp2(OP_SUB, TYPE_F32, val0, src0, val0); >>>> + mkOp1(OP_SAT, TYPE_F32, dst0[c], val0); >>>> } >>>> break; >>>> case TGSI_OPCODE_ROUND: >>>>...

[PATCH] gm107/ir: use lane 0 for manual textureGrad handling

2017 Dec 20

[PATCH] gm107/ir: use lane 0 for manual textureGrad handling

...below). > + } > + > + // mov coordinates from lane l to all lanes > for (c = 0; c < dim; ++c) { > - bld.mkOp3(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), > - bld.mkImm(l), bld.mkImm(SHFL_BOUND_QUAD)); > - add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero); > - add->subOp = 0x00; > - add->lanes = 1; /* abused for .ndv */ > + bld.mkOp3(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), lane, quad); > } > > // add dPdx from lane l to lanes dx >...

[PATCH] nv50: TXF already has integer arguments, don't try to convert from f32

2013 Dec 08

[PATCH] nv50: TXF already has integer arguments, don't try to convert from f32

...p == OP_TXL) i->swapSources(dref, lod); - // array index must be converted to u32 if (i->tex.target.isArray()) { - Value *layer = i->getSrc(arg - 1); - LValue *src = new_LValue(func, FILE_GPR); - bld.mkCvt(OP_CVT, TYPE_U32, src, TYPE_F32, layer); - bld.mkOp2(OP_MIN, TYPE_U32, src, src, bld.loadImm(NULL, 511)); - i->setSrc(arg - 1, src); - + if (i->op != OP_TXF) { + // array index must be converted to u32, but it's already an integer + // for TXF + Value *layer = i->getSrc(arg - 1); + LValue *src =...

[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

2014 Nov 18

[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

.../src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > @@ -2512,7 +2512,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) > src0 = fetchSrc(0, c); > val0 = getScratch(); > mkOp1(OP_FLOOR, TYPE_F32, val0, src0); > - mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0); > + mkOp2(OP_SUB, TYPE_F32, val0, src0, val0); > + mkOp1(OP_SAT, TYPE_F32, dst0[c], val0); > } > break; > case TGSI_OPCODE_ROUND: > I don't understand the math behind this. For any such large number, a...

[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

2014 Nov 18

[Mesa-dev] [PATCH] nv50/ir: saturate FRC result to avoid completely bogus values

...0_ir_from_tgsi.cpp >>> @@ -2512,7 +2512,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) >>> src0 = fetchSrc(0, c); >>> val0 = getScratch(); >>> mkOp1(OP_FLOOR, TYPE_F32, val0, src0); >>> - mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0); >>> + mkOp2(OP_SUB, TYPE_F32, val0, src0, val0); >>> + mkOp1(OP_SAT, TYPE_F32, dst0[c], val0); >>> } >>> break; >>> case TGSI_OPCODE_ROUND: >>> >> >> I don'...

[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)

2015 Feb 23

[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)

...n square it. + */ + Value *abs, *guess, *parts[2], *input[2], *shr[4], *pred; + + bld.setPosition(i, false); + + abs = bld.mkOp1v(OP_ABS, TYPE_F64, bld.getSSA(8), i->getSrc(0)); + + parts[0] = bld.loadImm(NULL, 0xc7b537a9); + parts[1] = bld.loadImm(NULL, 0x5fe6eb50); + guess = bld.mkOp2v(OP_MERGE, TYPE_F64, bld.getSSA(8), parts[0], parts[1]); + + bld.mkSplit(input, 4, abs); + shr[0] = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(4), input[0], bld.mkImm(1)); + shr[1] = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(4), input[1], bld.mkImm(1)); + + // If the bottom bit of the high word...

[PATCH] nv50/ir: make ARB_viewport_array behave like it does with other drivers

2014 Jun 23

[PATCH] nv50/ir: make ARB_viewport_array behave like it does with other drivers

...1261,8 @@ private: Stack joinBBs; // fork BB, for inserting join ops on ENDIF Stack loopBBs; // loop headers Stack breakBBs; // end of / after loop + + Value *viewport; }; Symbol * @@ -1555,8 +1560,15 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val); } else if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) { - if (ptr || (info->out[idx].mask & (1 << c))) - mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val); + + if (ptr...

[PATCH v3] nv50/ir: make ARB_viewport_array behave like it does with other drivers

2014 Jun 23

[PATCH v3] nv50/ir: make ARB_viewport_array behave like it does with other drivers

...1263,8 @@ private: Stack joinBBs; // fork BB, for inserting join ops on ENDIF Stack loopBBs; // loop headers Stack breakBBs; // end of / after loop + + Value *viewport; }; Symbol * @@ -1555,8 +1562,16 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val); } else if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) { - if (ptr || (info->out[idx].mask & (1 << c))) - mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val); + + if (ptr...

[PATCH v2] nv50/ir: make ARB_viewport_array behave like it does with other drivers

2014 Jun 23

[PATCH v2] nv50/ir: make ARB_viewport_array behave like it does with other drivers

...1263,8 @@ private: Stack joinBBs; // fork BB, for inserting join ops on ENDIF Stack loopBBs; // loop headers Stack breakBBs; // end of / after loop + + Value *viewport; }; Symbol * @@ -1555,8 +1562,15 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val); } else if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) { - if (ptr || (info->out[idx].mask & (1 << c))) - mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val); + + if (ptr...

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

...bout the low 32 bits of the destination. Stick a 0 in. > dst[0] = bld.loadImm(NULL, 0); > @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) > > // 4. Recombine the two dst pieces back into the original destination. > bld.setPosition(i, true); > - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); > + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); > + > + // 5. Perform 2 Newton-Raphson steps > + if (i->op == OP_RCP) { > + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 > + Value *two = bld.getSSA...

[PATCH 00/19] nv50: add sampler2DMS/GP support to get OpenGL 3.2

2014 Jan 13

[PATCH 00/19] nv50: add sampler2DMS/GP support to get OpenGL 3.2

OK, so there's a bunch of stuff in here. The geometry stuff is based on the work started by Bryan Cain and Christoph Bumiller. Patches 01-12: Add support for geometry shaders and fix related issues Patches 13-14: Make it possible for fb clears to operate on texture attachments with an explicit layer set (as is allowed in gl 3.2). Patches 15-17: Make ARB_texture_multisample work

search for: mkop2