thr3ads.net - search: "getdef"

[PATCH] gm107/ir: fix loading z offset for layered 3d image bindings

2019 Oct 14

1

[PATCH] gm107/ir: fix loading z offset for layered 3d image bindings

...sc *format = su->tex.format; int width = format->bits[0] + format->bits[1] + @@ -2223,21 +2226,38 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su) if (width < 32) untypedDst[0] = bld.getSSA(); - for (int i = 0; i < 4; i++) { - typedDst[i] = su->getDef(i); + if (loaded && loaded[0]) { + for (int i = 0; i < 4; i++) { + if (loaded[i]) + typedDst[i] = loaded[i]->getDef(0); + } + } else { + for (int i = 0; i < 4; i++) { + typedDst[i] = su->getDef(i); + } } // Set the unt...

[PATCH] nv50/ir: constant fold OP_SPLIT

2016 Sep 27

2

[PATCH] nv50/ir: constant fold OP_SPLIT

...t = 16; + type = (i->sType == TYPE_U32) ? TYPE_U16 : TYPE_S16; + } + if (i->sType == TYPE_U16 || i->sType == TYPE_S16) { + shift = 8; + type = (i->sType == TYPE_U16) ? TYPE_U8 : TYPE_S8; + } + if (type != TYPE_NONE) { + bld.mkMov(i->getDef(0), bld.mkImm(imm0.reg.data.u64 >> shift), type); + bld.mkMov(i->getDef(1), bld.mkImm(imm0.reg.data.u64), type); + delete_Instruction(prog, i); + } + } + break; case OP_MUL: if (i->dType == TYPE_F32) tryCollapseChainedMULs(i, s, imm0); -- 2...

[PATCH 1/2] nv50/ir: fix s32 x s32 -> high s32 multiply logic

2014 May 18

1

[PATCH 1/2] nv50/ir: fix s32 x s32 -> high s32 multiply logic

...ld->mkOp2(OP_SHR, fTy, r[0], t[1], bld->mkImm(halfSize * 8)); i[6] = bld->mkOp2(OP_ADD, fTy, r[1], r[0], imm); bld->mkMov(r[3], r[0])->setPredicate(CC_NC, c[0]); bld->mkOp2(OP_UNION, TYPE_U32, r[2], r[1], r[3]); - i[5] = bld->mkOp3(OP_MAD, fTy, mul->getDef(0), a[1], b[1], r[2]); + i[5] = bld->mkOp3(OP_MAD, fTy, r[4], a[1], b[1], r[2]); // set carry defs / sources i[3]->setFlagsDef(1, c[0]); - i[4]->setFlagsDef(0, c[1]); // actual result not required, just the carry + // actual result required in negative case,...

[PATCH] nv50/ir: use unordered_set instead of list to keep our instructions in uses

2014 Jul 08

1

[PATCH] nv50/ir: use unordered_set instead of list to keep our instructions in uses

...ers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -686,7 +686,7 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2, // b = mul a, imm // d = mul b, c -> d = mul_x_imm a, c int s2, t2; - insn = mul2->getDef(0)->uses.front()->getInsn(); + insn = (*mul2->getDef(0)->uses.begin())->getInsn(); if (!insn) return; mul1 = mul2; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index e4f56b1..6c83a60...

[PATCH] nv50/ir: constant fold OP_SPLIT

2016 Sep 30

2

[PATCH] nv50/ir: constant fold OP_SPLIT

...if (i->sType == TYPE_U16 || i->sType == TYPE_S16) { >> + shift = 8; >> + type = (i->sType == TYPE_U16) ? TYPE_U8 : TYPE_S8; >> + } > shift = typeSizeOf(i->dType); > >> + if (type != TYPE_NONE) { >> + bld.mkMov(i->getDef(0), bld.mkImm(imm0.reg.data.u64 >> shift), type); >> + bld.mkMov(i->getDef(1), bld.mkImm(imm0.reg.data.u64), type); > u64 val = ...u64; > for (d = 0; i->defExists(d); ++d) { > bld.mkMov(i->getDef(d), bld.mkImm(val & ((1 << shift) - 1)); > val...

[PATCH] nv50: enable texture query lod

2014 Feb 28

0

[PATCH] nv50: enable texture query lod

...); + bld.setPosition(i, true); + + /* The returned values are not quite what we want: + * (a) convert from s32 to f32 + * (b) multiply by 1/256 + */ + for (int def = 0; def < 2; ++def) { + if (!i->defExists(def)) + continue; + bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(def), TYPE_S32, i->getDef(def)); + bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(def), + i->getDef(def), bld.loadImm(NULL, 1.0f / 256)); + } + return true; +} + +bool NV50LoweringPreSSA::handleSET(Instruction *i) { if (i->dType == TYPE_F32) { @@ -1196,6 +1217,8 @@...

[RFC PATCH] nv50/ir: allow spilling of def values for constrained MERGES/UNIONS

2017 Jul 31

1

[RFC PATCH] nv50/ir: allow spilling of def values for constrained MERGES/UNIONS

.../nouveau/codegen/nv50_ir_ra.cpp index b33d7b4010..f29c8a1a95 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -2344,8 +2344,6 @@ RegAlloc::InsertConstraintsPass::insertConstraintMoves() cst->setSrc(s, mov->getDef(0)); cst->bb->insertBefore(cst, mov); - cst->getDef(0)->asLValue()->noSpill = 1; // doesn't help - if (cst->op == OP_UNION) mov->setPredicate(defi->cc, defi->getPredicate()); } -- 2.13.3

[LLVMdev] MemoryDependenceAnalysis Bug or Feature?

2010 Jul 18

2

[LLVMdev] MemoryDependenceAnalysis Bug or Feature?

Yes, I'm not arguing that there is a dependence, just that it's not a clobber dependence. The case of a load is already considered earlier in that function and with isLoad == false it returns MemDepResult::getDef(). My question is: why should a read-only call (which yields AliasAnalysis::Ref and is handled in this code fragment) be any different from e.g. a load. Isn't a read-only call effectively just a series of loads from a memory-dependence perspective? In other words, why does this code fragmen...

[LLVMdev] MemoryDependenceAnalysis Bug or Feature?

2010 Jul 18

0

[LLVMdev] MemoryDependenceAnalysis Bug or Feature?

...010 at 2:50 AM, Marc de Kruijf <dekruijf at cs.wisc.edu> wrote: > Yes, I'm not arguing that there is a dependence, just that it's not a > clobber dependence. The case of a load is already considered earlier in > that function and with isLoad == false it returns MemDepResult::getDef(). > My question is: why should a read-only call (which yields > AliasAnalysis::Ref and is handled in this code fragment) be any different > from e.g. a load. Isn't a read-only call effectively just a series of loads > from a memory-dependence perspective? > In other words, wh...

[LLVMdev] MemoryDependenceAnalysis Bug or Feature?

2010 Jul 16

2

[LLVMdev] MemoryDependenceAnalysis Bug or Feature?

...ntTag) continue; return MemDepResult::getClobber(Inst); case AliasAnalysis::Ref: // If the call is known to never store to the pointer, and if this is a // load query, we can safely ignore it (scan past it). if (isLoad) continue; +++ return MemDepResult::getDef(Inst); default: // Otherwise, there is a potential dependence. Return a clobber. return MemDepResult::getClobber(Inst); } If this seems right to you too, I've attached the patch. If this isn't right, can someone please explain the logic? Thanks, Marc -----------...

[LLVMdev] MemoryDependenceAnalysis Bug or Feature?

2010 Jul 17

0

[LLVMdev] MemoryDependenceAnalysis Bug or Feature?

...epResult::getClobber(Inst); > case AliasAnalysis::Ref: > // If the call is known to never store to the pointer, and if this is > a > // load query, we can safely ignore it (scan past it). > if (isLoad) > continue; > +++ return MemDepResult::getDef(Inst); > default: > // Otherwise, there is a potential dependence. Return a clobber. > return MemDepResult::getClobber(Inst); > } > > If this seems right to you too, I've attached the patch. If this isn't > right, can someone please explain th...

[PATCH] nv50/ir: constant fold OP_SPLIT

2016 Sep 28

0

[PATCH] nv50/ir: constant fold OP_SPLIT

...16; > + } > + if (i->sType == TYPE_U16 || i->sType == TYPE_S16) { > + shift = 8; > + type = (i->sType == TYPE_U16) ? TYPE_U8 : TYPE_S8; > + } shift = typeSizeOf(i->dType); > + if (type != TYPE_NONE) { > + bld.mkMov(i->getDef(0), bld.mkImm(imm0.reg.data.u64 >> shift), type); > + bld.mkMov(i->getDef(1), bld.mkImm(imm0.reg.data.u64), type); u64 val = ...u64; for (d = 0; i->defExists(d); ++d) { bld.mkMov(i->getDef(d), bld.mkImm(val & ((1 << shift) - 1)); val >>= shift; } I th...

[PATCH] nv50/ir: constant fold OP_SPLIT

2016 Sep 30

0

[PATCH] nv50/ir: constant fold OP_SPLIT

...t;sType == TYPE_S16) { >>> + shift = 8; >>> + type = (i->sType == TYPE_U16) ? TYPE_U8 : TYPE_S8; >>> + } >> >> shift = typeSizeOf(i->dType); >> >>> + if (type != TYPE_NONE) { >>> + bld.mkMov(i->getDef(0), bld.mkImm(imm0.reg.data.u64 >> shift), >>> type); >>> + bld.mkMov(i->getDef(1), bld.mkImm(imm0.reg.data.u64), type); >> >> u64 val = ...u64; >> for (d = 0; i->defExists(d); ++d) { >> bld.mkMov(i->getDef(d), bld.mkImm(val &...

[PATCH v5 0/5] nvc0/ir: add support for MAD/FMA PostRALoadPropagation

2017 Mar 26

5

[PATCH v5 0/5] nvc0/ir: add support for MAD/FMA PostRALoadPropagation

was "nv50/ir: PostRaConstantFolding improvements" before. nothing really changed from the last version, just minor things. Karol Herbst (5): nv50/ir: restructure and rename postraconstantfolding pass nv50/ir: implement mad post ra folding for nvc0+ gk110/ir: add LIMM form of mad gm107/ir: add LIMM form of mad nv50/ir: also do PostRaLoadPropagation for FMA

[PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation

2015 Jan 11

6

[PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation

MAD IMM has a very specific SDST == SSRC2 requirement, so don't emit Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 18 ++++++++++++------ .../drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

2

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

.../nv50_ir_lowering_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > @@ -77,8 +77,9 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) > bld.setPosition(i, false); > > // 1. Take the source and it up. > - Value *src[2], *dst[2], *def = i->getDef(0); > - bld.mkSplit(src, 4, i->getSrc(0)); > + Value *input = i->getSrc(0); > + Value *src[2], *dst[2], *guess, *def = i->getDef(0); > + bld.mkSplit(src, 4, input); > > // 2. We don't care about the low 32 bits of the destination. Stick a 0 in. >...

[PATCH] gm107/ir: use lane 0 for manual textureGrad handling

2017 Dec 20

2

[PATCH] gm107/ir: use lane 0 for manual textureGrad handling

...+ } for (c = 0; c < dim; ++c) tex->setSrc(c + array, src[c]); + // broadcast results from lane 0 to all lanes + if (l != 0) { + Value *lane = bld.mkImm(l); + for (c = 0; i->defExists(c); ++c) + bld.mkOp3(OP_SHFL, TYPE_F32, tex->getDef(c), tex->getDef(c), lane, quad); + } bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); // save results -- 2.13.6

[PATCH v2] nv50/ir: constant fold OP_SPLIT

2016 Sep 30

2

[PATCH v2] nv50/ir: constant fold OP_SPLIT

...isSignedType(i->dType)); + if (likely(type != TYPE_NONE)) { + uint64_t val = imm0.reg.data.u64; + uint16_t shift = size * 8; + bld.setPosition(i, false); + for (int8_t d = 0; i->defExists(d); ++d) { + bld.mkMov(i->getDef(d), bld.mkImm(val & ((1 << shift) - 1)), type); + val >>= shift; + } + delete_Instruction(prog, i); + } + } + break; case OP_MUL: if (i->dType == TYPE_F32) tryCollapseChainedMULs(i, s, imm0); -- 2.10.0

[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)

2015 Feb 23

2

[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)

...odeEmitterNV50::emitInstruction(Instruction *insn) break; case OP_ADD: case OP_SUB: - if (isFloatType(insn->dType)) + if (insn->dType == TYPE_F64) + emitDADD(insn); + else if (isFloatType(insn->dType)) emitFADD(insn); else if (insn->getDef(0)->reg.file == FILE_ADDRESS) emitAADD(insn); @@ -1733,14 +1803,18 @@ CodeEmitterNV50::emitInstruction(Instruction *insn) emitUADD(insn); break; case OP_MUL: - if (isFloatType(insn->dType)) + if (insn->dType == TYPE_F64) + emitDMUL(insn); +...

[PATCH 2/3] nv50/ir: For MAD, prefer SDST == SSRC2

2015 Jan 11

0

[PATCH 2/3] nv50/ir: For MAD, prefer SDST == SSRC2

...ns, JOIN_MASK_UNION | JOIN_MASK_TEX); + ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX | JOIN_MASK_MAD); break; case 0xc0: case 0xd0: @@ -995,6 +996,13 @@ GCRA::doCoalesce(ArrayList& insns, unsigned int mask) copyCompound(insn->getSrc(0), insn->getDef(0)); } break; + case OP_MAD: + if (!(mask & JOIN_MASK_MAD)) + break; + if (insn->srcExists(2) && insn->src(2).getFile() == FILE_GPR && + insn->def(0).getFile() == FILE_GPR) + coalesceValues(insn-...

search for: getdef