thr3ads.net - search: "mkop2v"

Displaying 20 results from an estimated 25 matches for "mkop2v".

[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)

2015 Feb 23

[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)

...n square it. + */ + Value *abs, *guess, *parts[2], *input[2], *shr[4], *pred; + + bld.setPosition(i, false); + + abs = bld.mkOp1v(OP_ABS, TYPE_F64, bld.getSSA(8), i->getSrc(0)); + + parts[0] = bld.loadImm(NULL, 0xc7b537a9); + parts[1] = bld.loadImm(NULL, 0x5fe6eb50); + guess = bld.mkOp2v(OP_MERGE, TYPE_F64, bld.getSSA(8), parts[0], parts[1]); + + bld.mkSplit(input, 4, abs); + shr[0] = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(4), input[0], bld.mkImm(1)); + shr[1] = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(4), input[1], bld.mkImm(1)); + + // If the bottom bit of the high word w...

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

...] = bld.loadImm(NULL, 0); > @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) > > // 4. Recombine the two dst pieces back into the original destination. > bld.setPosition(i, true); > - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); > + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); > + > + // 5. Perform 2 Newton-Raphson steps > + if (i->op == OP_RCP) { > + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 > + Value *two = bld.getSSA(8); > + > + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld...

[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

...ation. Stick a 0 in. dst[0] = bld.loadImm(NULL, 0); @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) // 4. Recombine the two dst pieces back into the original destination. bld.setPosition(i, true); - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); + + // 5. Perform 2 Newton-Raphson steps + if (i->op == OP_RCP) { + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 + Value *two = bld.getSSA(8); + + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f)); + + gue...

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

...0); >> @@ -93,7 +94,42 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) >> >> // 4. Recombine the two dst pieces back into the original destination. >> bld.setPosition(i, true); >> - bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); >> + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); >> + >> + // 5. Perform 2 Newton-Raphson steps >> + if (i->op == OP_RCP) { >> + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 >> + Value *two = bld.getSSA(8); >> + >> + bld.mkCvt(OP_CVT,...

[Mesa-dev] [PATCH 04/12] nv50/ir/tgsi: TGSI_OPCODE_POW replicates its result

2014 May 21

[Mesa-dev] [PATCH 04/12] nv50/ir/tgsi: TGSI_OPCODE_POW replicates its result

..._OPCODE_USHR: > @@ -2254,6 +2253,11 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) > FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) > mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c)); > break; > + case TGSI_OPCODE_POW: > + val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0)); > + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) > + mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); > + break; Can you use mkMov ? Pretty please :) -Emil > case TGSI_OPCODE_EX2: > case TGSI_OPCODE_LG2: >...

[PATCH 1/3] nvc0/ir: add base tex offset for fermi indirect tex case

2014 Aug 08

[PATCH 1/3] nvc0/ir: add base tex offset for fermi indirect tex case

...oweringPass::handleTEX(TexInstruction *i) Value *ticRel = i->getIndirectR(); Value *tscRel = i->getIndirectS(); - if (ticRel) + if (ticRel) { i->setSrc(i->tex.rIndirectSrc, NULL); - if (tscRel) + if (i->tex.r) + ticRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), + ticRel, bld.mkImm(i->tex.r)); + } + if (tscRel) { i->setSrc(i->tex.sIndirectSrc, NULL); + if (i->tex.s) + tscRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), +...

[Mesa-dev] [PATCH 04/12] nv50/ir/tgsi: TGSI_OPCODE_POW replicates its result

2014 May 21

[Mesa-dev] [PATCH 04/12] nv50/ir/tgsi: TGSI_OPCODE_POW replicates its result

...@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) >>> FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) >>> mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c)); >>> break; >>> + case TGSI_OPCODE_POW: >>> + val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0)); >>> + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) >>> + mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); >>> + break; >> Can you use mkMov ? Pretty please :) > > The two are used fairly interc...

[PATCH] gm107/ir: fix loading z offset for layered 3d image bindings

2019 Oct 14

[PATCH] gm107/ir: fix loading z offset for layered 3d image bindings

...weringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless { uint32_t base = slot * NVC0_SU_INFO__STRIDE; + // We don't upload surface info for bindless for GM107+ + assert(!bindless || targ->getChipset() < NVISA_GM107_CHIPSET); + if (ptr) { ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot)); if (bindless) @@ -2204,7 +2207,7 @@ getDestType(const ImgType type) { } void -NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su) +NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su, Instruction **loaded) { const T...

[PATCH 00/19] nv50: add sampler2DMS/GP support to get OpenGL 3.2

2014 Jan 13

[PATCH 00/19] nv50: add sampler2DMS/GP support to get OpenGL 3.2

OK, so there's a bunch of stuff in here. The geometry stuff is based on the work started by Bryan Cain and Christoph Bumiller. Patches 01-12: Add support for geometry shaders and fix related issues Patches 13-14: Make it possible for fb clears to operate on texture attachments with an explicit layer set (as is allowed in gl 3.2). Patches 15-17: Make ARB_texture_multisample work

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

2016 Mar 17

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

...true; default: - assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); assert(base->reg.size == 8); if (ptr) base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); assert(base->reg.size == 8); atom->setIndirect(0, 0, base); + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; return true; } base = @@ -1963,7 +1964,7 @@ NVC0LoweringPass::visit(Instruction *i) } else if (i-&gt...

[PATCH 04/12] nv50/ir/tgsi: TGSI_OPCODE_POW replicates its result

2014 May 20

[PATCH 04/12] nv50/ir/tgsi: TGSI_OPCODE_POW replicates its result

...GSI_OPCODE_ISHR: case TGSI_OPCODE_USHR: @@ -2254,6 +2253,11 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c)); break; + case TGSI_OPCODE_POW: + val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0)); + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) + mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); + break; case TGSI_OPCODE_EX2: case TGSI_OPCODE_LG2: val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef...

[Mesa-dev] [PATCH 04/12] nv50/ir/tgsi: TGSI_OPCODE_POW replicates its result

2014 May 21

[Mesa-dev] [PATCH 04/12] nv50/ir/tgsi: TGSI_OPCODE_POW replicates its result

...@ -2254,6 +2253,11 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) >> FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) >> mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c)); >> break; >> + case TGSI_OPCODE_POW: >> + val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0)); >> + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) >> + mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); >> + break; > Can you use mkMov ? Pretty please :) The two are used fairly interchangeably, and a lot of th...

[Mesa-dev] [PATCH 04/12] nv50/ir/tgsi: TGSI_OPCODE_POW replicates its result

2014 May 21

[Mesa-dev] [PATCH 04/12] nv50/ir/tgsi: TGSI_OPCODE_POW replicates its result

...nstruction(const struct tgsi_full_instruction *insn) >>>> FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) >>>> mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c)); >>>> break; >>>> + case TGSI_OPCODE_POW: >>>> + val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0)); >>>> + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) >>>> + mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); >>>> + break; >>> Can you use mkMov ? Pretty please :) >> >> The two...

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

2016 Apr 08

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

...getFile() == FILE_MEMORY_GLOBAL); >> + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); >> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); >> assert(base->reg.size == 8); >> if (ptr) >> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); >> assert(base->reg.size == 8); >> atom->setIndirect(0, 0, base); >> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; >> return true; >> } >> base = >> @@ -1963,7 +1964,...

[PATCH] nv50/ir: rebase indirect temp arrays to 0, so that we use less lmem space

2016 Jan 14

[PATCH] nv50/ir: rebase indirect temp arrays to 0, so that we use less lmem space

...lue *val, Value *ptr); + void adjustTempIndex(int arrayId, int &idx, int &idx2d) const; Value *applySrcMod(Value *, int s, int c); Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr); @@ -1679,11 +1698,23 @@ Converter::shiftAddress(Value *index) return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4)); } +void +Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const +{ + std::map<int, tgsi::Source::TempBase>::const_iterator it = + code->indirectTempBases.find(arrayId); + if (it == code->indire...

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

2016 Mar 23

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

...ert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); > + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); > base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); > assert(base->reg.size == 8); > if (ptr) > base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); > assert(base->reg.size == 8); > atom->setIndirect(0, 0, base); > + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; > return true; > } > base = > @@ -1963,7 +1964,7 @@ NVC0LoweringPass::visit...

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

2016 Apr 12

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

...t;>> + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); >>>> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); >>>> assert(base->reg.size == 8); >>>> if (ptr) >>>> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); >>>> assert(base->reg.size == 8); >>>> atom->setIndirect(0, 0, base); >>>> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; >>>> return true; >>>> } >&gt...

[PATCH 00/12] Cherry-pick nv50/nvc0 patches from gallium-nine

2014 May 20

[PATCH 00/12] Cherry-pick nv50/nvc0 patches from gallium-nine

I went through the gallium-nine tree and picked out nouveau patches that are general bug-fixes. The first bunch I'd like to also get into 10.2. I've reviewed all of them and they make sense to me, but sending them out for public review as well in case there are any objections. Unless I hear objections, I'd like to push this by Friday. Christoph Bumiller (11): nv50,nvc0: always pull

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

2016 Apr 08

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

...MORY_GLOBAL); >>> + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); >>> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); >>> assert(base->reg.size == 8); >>> if (ptr) >>> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); >>> assert(base->reg.size == 8); >>> atom->setIndirect(0, 0, base); >>> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; >>> return true; >>> } >>> base = &g...

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

2016 Apr 14

[PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers

...(0).getFile() == FILE_MEMORY_BUFFER); >>>>> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * >>>>> 16); >>>>> assert(base->reg.size == 8); >>>>> if (ptr) >>>>> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); >>>>> assert(base->reg.size == 8); >>>>> atom->setIndirect(0, 0, base); >>>>> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; >>>>> return true; >>>&gt...

search for: mkop2v