search for: mkop2v

Displaying 20 results from an estimated 25 matches for "mkop2v".

2015 Feb 23
2
[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)
...n square it. + */ + Value *abs, *guess, *parts[2], *input[2], *shr[4], *pred; + + bld.setPosition(i, false); + + abs = bld.mkOp1v(OP_ABS, TYPE_F64, bld.getSSA(8), i->getSrc(0)); + + parts[0] = bld.loadImm(NULL, 0xc7b537a9); + parts[1] = bld.loadImm(NULL, 0x5fe6eb50); + guess = bld.mkOp2v(OP_MERGE, TYPE_F64, bld.getSSA(8), parts[0], parts[1]); + + bld.mkSplit(input, 4, abs); + shr[0] = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(4), input[0], bld.mkImm(1)); + shr[1] = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(4), input[1], bld.mkImm(1)); + + // If the bottom bit of the high word w...
2015 Feb 23
2
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...] = bld.loadImm(NULL, 0); > @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) > > // 4. Recombine the two dst pieces back into the original destination. > bld.setPosition(i, true); > - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); > + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); > + > + // 5. Perform 2 Newton-Raphson steps > + if (i->op == OP_RCP) { > + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 > + Value *two = bld.getSSA(8); > + > + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld...
2015 Feb 23
0
[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...ation. Stick a 0 in. dst[0] = bld.loadImm(NULL, 0); @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) // 4. Recombine the two dst pieces back into the original destination. bld.setPosition(i, true); - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); + + // 5. Perform 2 Newton-Raphson steps + if (i->op == OP_RCP) { + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 + Value *two = bld.getSSA(8); + + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f)); + + gue...
2015 Feb 23
0
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...0); >> @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) >> >> // 4. Recombine the two dst pieces back into the original destination. >> bld.setPosition(i, true); >> - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); >> + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); >> + >> + // 5. Perform 2 Newton-Raphson steps >> + if (i->op == OP_RCP) { >> + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 >> + Value *two = bld.getSSA(8); >> + >> + bld.mkCvt(OP_CVT,...
2014 May 21
2
[Mesa-dev] [PATCH 04/12] nv50/ir/tgsi: TGSI_OPCODE_POW replicates its result
..._OPCODE_USHR: > @@ -2254,6 +2253,11 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) > FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) > mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c)); > break; > + case TGSI_OPCODE_POW: > + val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0)); > + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) > + mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); > + break; Can you use mkMov ? Pretty please :) -Emil > case TGSI_OPCODE_EX2: > case TGSI_OPCODE_LG2: >...
2014 Aug 08
2
[PATCH 1/3] nvc0/ir: add base tex offset for fermi indirect tex case
...oweringPass::handleTEX(TexInstruction *i) Value *ticRel = i->getIndirectR(); Value *tscRel = i->getIndirectS(); - if (ticRel) + if (ticRel) { i->setSrc(i->tex.rIndirectSrc, NULL); - if (tscRel) + if (i->tex.r) + ticRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), + ticRel, bld.mkImm(i->tex.r)); + } + if (tscRel) { i->setSrc(i->tex.sIndirectSrc, NULL); + if (i->tex.s) + tscRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), +...
2014 May 21
2
[Mesa-dev] [PATCH 04/12] nv50/ir/tgsi: TGSI_OPCODE_POW replicates its result
...@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) >>> FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) >>> mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c)); >>> break; >>> + case TGSI_OPCODE_POW: >>> + val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0)); >>> + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) >>> + mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); >>> + break; >> Can you use mkMov ? Pretty please :) > > The two are used fairly interc...
2019 Oct 14
1
[PATCH] gm107/ir: fix loading z offset for layered 3d image bindings
...weringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless { uint32_t base = slot * NVC0_SU_INFO__STRIDE; + // We don't upload surface info for bindless for GM107+ + assert(!bindless || targ->getChipset() < NVISA_GM107_CHIPSET); + if (ptr) { ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot)); if (bindless) @@ -2204,7 +2207,7 @@ getDestType(const ImgType type) { } void -NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su) +NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su, Instruction **loaded) { const T...
2014 Jan 13
20
[PATCH 00/19] nv50: add sampler2DMS/GP support to get OpenGL 3.2
OK, so there's a bunch of stuff in here. The geometry stuff is based on the work started by Bryan Cain and Christoph Bumiller. Patches 01-12: Add support for geometry shaders and fix related issues Patches 13-14: Make it possible for fb clears to operate on texture attachments with an explicit layer set (as is allowed in gl 3.2). Patches 15-17: Make ARB_texture_multisample work
2016 Mar 17
4
[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
...true; default: - assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); assert(base->reg.size == 8); if (ptr) base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); assert(base->reg.size == 8); atom->setIndirect(0, 0, base); + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; return true; } base = @@ -1963,7 +1964,7 @@ NVC0LoweringPass::visit(Instruction *i) } else if (i-&gt...
2014 May 20
0
[PATCH 04/12] nv50/ir/tgsi: TGSI_OPCODE_POW replicates its result
...GSI_OPCODE_ISHR: case TGSI_OPCODE_USHR: @@ -2254,6 +2253,11 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c)); break; + case TGSI_OPCODE_POW: + val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0)); + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); + break; case TGSI_OPCODE_EX2: case TGSI_OPCODE_LG2: val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef...
2014 May 21
0
[Mesa-dev] [PATCH 04/12] nv50/ir/tgsi: TGSI_OPCODE_POW replicates its result
...@ -2254,6 +2253,11 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) >> FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) >> mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c)); >> break; >> + case TGSI_OPCODE_POW: >> + val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0)); >> + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) >> + mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); >> + break; > Can you use mkMov ? Pretty please :) The two are used fairly interchangeably, and a lot of th...
2014 May 21
0
[Mesa-dev] [PATCH 04/12] nv50/ir/tgsi: TGSI_OPCODE_POW replicates its result
...nstruction(const struct tgsi_full_instruction *insn) >>>> FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) >>>> mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c)); >>>> break; >>>> + case TGSI_OPCODE_POW: >>>> + val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0)); >>>> + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) >>>> + mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); >>>> + break; >>> Can you use mkMov ? Pretty please :) >> >> The two...
2016 Apr 08
2
[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
...getFile() == FILE_MEMORY_GLOBAL); >> + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); >> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); >> assert(base->reg.size == 8); >> if (ptr) >> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); >> assert(base->reg.size == 8); >> atom->setIndirect(0, 0, base); >> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; >> return true; >> } >> base = >> @@ -1963,7 +1964,...
2016 Jan 14
0
[PATCH] nv50/ir: rebase indirect temp arrays to 0, so that we use less lmem space
...lue *val, Value *ptr); + void adjustTempIndex(int arrayId, int &idx, int &idx2d) const; Value *applySrcMod(Value *, int s, int c); Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr); @@ -1679,11 +1698,23 @@ Converter::shiftAddress(Value *index) return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4)); } +void +Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const +{ + std::map<int, tgsi::Source::TempBase>::const_iterator it = + code->indirectTempBases.find(arrayId); + if (it == code->indire...
2016 Mar 23
0
[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
...ert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); > + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); > base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); > assert(base->reg.size == 8); > if (ptr) > base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); > assert(base->reg.size == 8); > atom->setIndirect(0, 0, base); > + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; > return true; > } > base = > @@ -1963,7 +1964,7 @@ NVC0LoweringPass::visit...
2016 Apr 12
2
[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
...t;>> + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); >>>> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); >>>> assert(base->reg.size == 8); >>>> if (ptr) >>>> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); >>>> assert(base->reg.size == 8); >>>> atom->setIndirect(0, 0, base); >>>> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; >>>> return true; >>>> } >&gt...
2014 May 20
14
[PATCH 00/12] Cherry-pick nv50/nvc0 patches from gallium-nine
I went through the gallium-nine tree and picked out nouveau patches that are general bug-fixes. The first bunch I'd like to also get into 10.2. I've reviewed all of them and they make sense to me, but sending them out for public review as well in case there are any objections. Unless I hear objections, I'd like to push this by Friday. Christoph Bumiller (11): nv50,nvc0: always pull
2016 Apr 08
0
[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
...MORY_GLOBAL); >>> + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); >>> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); >>> assert(base->reg.size == 8); >>> if (ptr) >>> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); >>> assert(base->reg.size == 8); >>> atom->setIndirect(0, 0, base); >>> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; >>> return true; >>> } >>> base = &g...
2016 Apr 14
0
[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
...(0).getFile() == FILE_MEMORY_BUFFER); >>>>> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * >>>>> 16); >>>>> assert(base->reg.size == 8); >>>>> if (ptr) >>>>> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); >>>>> assert(base->reg.size == 8); >>>>> atom->setIndirect(0, 0, base); >>>>> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; >>>>> return true; >>>&gt...