Roy Spliet
2015-Jan-11 00:22 UTC
[Nouveau] [PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation
MAD IMM has a very specific SDST == SSRC2 requirement, so don't emit Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 18 ++++++++++++------ .../drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp index 2077388..b1e7409 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp @@ -939,9 +939,20 @@ CodeEmitterNV50::emitFMAD(const Instruction *i) code[0] = 0xe0000000; + if (i->src(1).getFile() == FILE_IMMEDIATE) { + code[1] = 0; + emitForm_IMM(i); + code[0] |= neg_mul << 15; + code[0] |= neg_add << 22; + if (i->saturate) + code[0] |= 1 << 8; + } else if (i->encSize == 4) { emitForm_MUL(i); - assert(!neg_mul && !neg_add); + code[0] |= neg_mul << 15; + code[0] |= neg_add << 22; + if (i->saturate) + code[0] |= 1 << 8; } else { code[1] = neg_mul << 26; code[1] |= neg_add << 27; @@ -1931,11 +1942,6 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const // check constraints on short MAD if (info.srcNr >= 2 && i->srcExists(2)) { - if (i->saturate || i->src(2).mod) - return 8; - if ((i->src(0).mod ^ i->src(1).mod) || - (i->src(0).mod | i->src(1).mod).abs()) - return 8; if (!i->defExists(0) || i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id) return 8; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp index 48f996b..f4dedd7 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -118,7 +118,7 @@ void TargetNV50::initOpInfo() static const uint32_t shortForm[(OP_LAST + 31) / 32] { // MOV,ADD,SUB,MUL,SAD,L/PINTERP,RCP,TEX,TXF - 0x00010e40, 0x00000040, 0x00000498, 0x00000000 + 0x00014e40, 0x00000040, 0x00000498, 0x00000000 }; static const operation noDestList[] { -- 2.1.0
Roy Spliet
2015-Jan-11 00:22 UTC
[Nouveau] [PATCH 2/3] nv50/ir: For MAD, prefer SDST == SSRC2
If liveness analysis indicates it's good, this should improve the chances of being able to emit the short MAD form. Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 898653c..1273449 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -627,6 +627,7 @@ RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb) #define JOIN_MASK_UNION (1 << 1) #define JOIN_MASK_MOV (1 << 2) #define JOIN_MASK_TEX (1 << 3) +#define JOIN_MASK_MAD (1 << 4) class GCRA { @@ -851,7 +852,7 @@ GCRA::coalesce(ArrayList& insns) case 0x80: case 0x90: case 0xa0: - ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX); + ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX | JOIN_MASK_MAD); break; case 0xc0: case 0xd0: @@ -995,6 +996,13 @@ GCRA::doCoalesce(ArrayList& insns, unsigned int mask) copyCompound(insn->getSrc(0), insn->getDef(0)); } break; + case OP_MAD: + if (!(mask & JOIN_MASK_MAD)) + break; + if (insn->srcExists(2) && insn->src(2).getFile() == FILE_GPR && + insn->def(0).getFile() == FILE_GPR) + coalesceValues(insn->getDef(0), insn->getSrc(2), false); + break; case OP_TEX: case OP_TXB: case OP_TXL: -- 2.1.0
Add a specific optimisation pass for NV50 to check whether SRC0 or SRC1 is a MOV dst, IMM. If so: fold the IMM in and try to drop the MOV. Must be done post-RA because it is required that SDST == SSRC2. Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 52 ++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 21d20ca..1fc3ae6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -2259,6 +2259,56 @@ FlatteningPass::tryPredicateConditional(BasicBlock *bb) // ============================================================================ +// Fold Immediate into MAD; must be done after register allocation due to +// constraint SDST == SSRC2 +// TODO: +// Does NVC0+ have other situations where this pass makes sense? +class NV50PostRaConstantFolding : public Pass +{ +private: + virtual bool visit(BasicBlock *); +}; + +bool +NV50PostRaConstantFolding::visit(BasicBlock *bb) +{ + Value *vtmp; + Instruction *def; + + for (Instruction *i = bb->getFirst(); i; i = i->next) { + switch (i->op) { + case OP_MAD: + if(i->def(0).getFile() == FILE_GPR && + i->src(0).getFile() == FILE_GPR && + i->src(1).getFile() == FILE_GPR && + i->src(2).getFile() == FILE_GPR && + i->getDef(0)->reg.data.id == i->getSrc(2)->reg.data.id) { + for (int s = 1; s >= 0; s--) { + def = i->getSrc(1)->getInsn(); + if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) { + vtmp = i->getSrc(1); + i->setSrc(1, def->getSrc(0)); + if (vtmp->refCount() == 0) + delete_Instruction(bb->getProgram(), def); + break; + } + + vtmp = i->getSrc(0); + i->setSrc(0, i->getSrc(1)); + i->setSrc(1, vtmp); + } + } + break; + default: + break; + } + } + + return true; +} + +// ============================================================================+ // Common subexpression elimination. Stupid O^2 implementation. class LocalCSE : public Pass { @@ -2629,6 +2679,8 @@ bool Program::optimizePostRA(int level) { RUN_PASS(2, FlatteningPass, run); + if (getTarget()->getChipset() < 0xc0) + RUN_PASS(2, NV50PostRaConstantFolding, run); return true; } -- 2.1.0
Ilia Mirkin
2015-Jan-11 00:34 UTC
[Nouveau] [PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation
And you're allowing saturate/neg emission on the short form. Is this already in envytools? Also, what's the shortForm thing? This change is probably fine, but the changelog needs work. On Sat, Jan 10, 2015 at 7:22 PM, Roy Spliet <rspliet at eclipso.eu> wrote:> MAD IMM has a very specific SDST == SSRC2 requirement, so don't emit > > Signed-off-by: Roy Spliet <rspliet at eclipso.eu> > --- > .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 18 ++++++++++++------ > .../drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 2 +- > 2 files changed, 13 insertions(+), 7 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp > index 2077388..b1e7409 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp > @@ -939,9 +939,20 @@ CodeEmitterNV50::emitFMAD(const Instruction *i) > > code[0] = 0xe0000000; > > + if (i->src(1).getFile() == FILE_IMMEDIATE) { > + code[1] = 0; > + emitForm_IMM(i); > + code[0] |= neg_mul << 15; > + code[0] |= neg_add << 22; > + if (i->saturate) > + code[0] |= 1 << 8; > + } else > if (i->encSize == 4) { > emitForm_MUL(i); > - assert(!neg_mul && !neg_add); > + code[0] |= neg_mul << 15; > + code[0] |= neg_add << 22; > + if (i->saturate) > + code[0] |= 1 << 8; > } else { > code[1] = neg_mul << 26; > code[1] |= neg_add << 27; > @@ -1931,11 +1942,6 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const > > // check constraints on short MAD > if (info.srcNr >= 2 && i->srcExists(2)) { > - if (i->saturate || i->src(2).mod) > - return 8; > - if ((i->src(0).mod ^ i->src(1).mod) || > - (i->src(0).mod | i->src(1).mod).abs()) > - return 8; > if (!i->defExists(0) || > i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id) > return 8; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > index 48f996b..f4dedd7 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > @@ -118,7 +118,7 @@ void TargetNV50::initOpInfo() > static const uint32_t shortForm[(OP_LAST + 31) / 32] > { > // MOV,ADD,SUB,MUL,SAD,L/PINTERP,RCP,TEX,TXF > - 0x00010e40, 0x00000040, 0x00000498, 0x00000000 > + 0x00014e40, 0x00000040, 0x00000498, 0x00000000 > }; > static const operation noDestList[] > { > -- > 2.1.0 > > > > _______________________________________________ > Nouveau mailing list > Nouveau at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau
On Sat, Jan 10, 2015 at 7:23 PM, Roy Spliet <rspliet at eclipso.eu> wrote:> Add a specific optimisation pass for NV50 to check whether SRC0 or SRC1 is > a MOV dst, IMM. If so: fold the IMM in and try to drop the MOV. Must be > done post-RA because it is required that SDST == SSRC2."because it requires that"> > Signed-off-by: Roy Spliet <rspliet at eclipso.eu> > --- > .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 52 ++++++++++++++++++++++ > 1 file changed, 52 insertions(+) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > index 21d20ca..1fc3ae6 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > @@ -2259,6 +2259,56 @@ FlatteningPass::tryPredicateConditional(BasicBlock *bb) > > // ============================================================================> > +// Fold Immediate into MAD; must be done after register allocation due to > +// constraint SDST == SSRC2 > +// TODO: > +// Does NVC0+ have other situations where this pass makes sense? > +class NV50PostRaConstantFolding : public Pass > +{ > +private: > + virtual bool visit(BasicBlock *); > +}; > + > +bool > +NV50PostRaConstantFolding::visit(BasicBlock *bb) > +{ > + Value *vtmp; > + Instruction *def; > + > + for (Instruction *i = bb->getFirst(); i; i = i->next) { > + switch (i->op) { > + case OP_MAD: > + if(i->def(0).getFile() == FILE_GPR && > + i->src(0).getFile() == FILE_GPR && > + i->src(1).getFile() == FILE_GPR && > + i->src(2).getFile() == FILE_GPR && > + i->getDef(0)->reg.data.id == i->getSrc(2)->reg.data.id) {This would be much easier to read as if (... != GPR || != GPR || ...) break; (or continue...)> + for (int s = 1; s >= 0; s--) {You don't end up using 's' in the loop. Did you mean to have some clever logic that flips the order of src0 and src1 in case the "wrong" one came from an immediate?> + def = i->getSrc(1)->getInsn(); > + if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) { > + vtmp = i->getSrc(1); > + i->setSrc(1, def->getSrc(0)); > + if (vtmp->refCount() == 0) > + delete_Instruction(bb->getProgram(), def);This shouldn't be necessary, it's all allocated in an arena and will get cleaned up later.> + break; > + } > + > + vtmp = i->getSrc(0); > + i->setSrc(0, i->getSrc(1)); > + i->setSrc(1, vtmp); > + } > + } > + break; > + default: > + break; > + } > + } > + > + return true; > +} > + > +// ============================================================================> + > // Common subexpression elimination. Stupid O^2 implementation. > class LocalCSE : public Pass > { > @@ -2629,6 +2679,8 @@ bool > Program::optimizePostRA(int level) > { > RUN_PASS(2, FlatteningPass, run); > + if (getTarget()->getChipset() < 0xc0) > + RUN_PASS(2, NV50PostRaConstantFolding, run); > return true; > } > > -- > 2.1.0 > > > > _______________________________________________ > Nouveau mailing list > Nouveau at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau
Ilia Mirkin
2015-Jan-11 00:43 UTC
[Nouveau] [PATCH 2/3] nv50/ir: For MAD, prefer SDST == SSRC2
On Sat, Jan 10, 2015 at 7:22 PM, Roy Spliet <rspliet at eclipso.eu> wrote:> If liveness analysis indicates it's good, this should improve the chances > of being able to emit the short MAD form. > > Signed-off-by: Roy Spliet <rspliet at eclipso.eu> > --- > src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 10 +++++++++- > 1 file changed, 9 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp > index 898653c..1273449 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp > @@ -627,6 +627,7 @@ RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb) > #define JOIN_MASK_UNION (1 << 1) > #define JOIN_MASK_MOV (1 << 2) > #define JOIN_MASK_TEX (1 << 3) > +#define JOIN_MASK_MAD (1 << 4) > > class GCRA > { > @@ -851,7 +852,7 @@ GCRA::coalesce(ArrayList& insns) > case 0x80: > case 0x90: > case 0xa0: > - ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX); > + ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX | JOIN_MASK_MAD); > break; > case 0xc0: > case 0xd0: > @@ -995,6 +996,13 @@ GCRA::doCoalesce(ArrayList& insns, unsigned int mask) > copyCompound(insn->getSrc(0), insn->getDef(0)); > } > break; > + case OP_MAD: > + if (!(mask & JOIN_MASK_MAD)) > + break; > + if (insn->srcExists(2) && insn->src(2).getFile() == FILE_GPR && > + insn->def(0).getFile() == FILE_GPR)Use the same check here as you do elsewhere... check that insn->defExists(0). It might not in case that only flags are returned I guess? Not sure if mad actually allows that.> + coalesceValues(insn->getDef(0), insn->getSrc(2), false); > + break; > case OP_TEX: > case OP_TXB: > case OP_TXL: > -- > 2.1.0 > > > > _______________________________________________ > Nouveau mailing list > Nouveau at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau
Roy Spliet
2015-Jan-11 00:45 UTC
[Nouveau] [PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation
Op 11-01-15 om 01:34 schreef Ilia Mirkin:> And you're allowing saturate/neg emission on the short form.Yes> Is this already in envytools?Tesla floating point instructions are poorly documented in the RST documents; fmad is no exception. I'll make sure to check envydis.> Also, what's the shortForm thing?Documented in envytools; see http://envytools.readthedocs.org/en/latest/hw/graph/tesla/cuda/isa.html#instruction-format . In short, opcodes are either 4 bytes (short) or 8 bytes (long).> This change is > probably fine, but the changelog needs work.If you insist I could elaborate a little further. However, documenting what a short opcode is seems a bit superfluous.> > On Sat, Jan 10, 2015 at 7:22 PM, Roy Spliet <rspliet at eclipso.eu> wrote: >> MAD IMM has a very specific SDST == SSRC2 requirement, so don't emit >> >> Signed-off-by: Roy Spliet <rspliet at eclipso.eu> >> --- >> .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 18 ++++++++++++------ >> .../drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 2 +- >> 2 files changed, 13 insertions(+), 7 deletions(-) >> >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp >> index 2077388..b1e7409 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp >> @@ -939,9 +939,20 @@ CodeEmitterNV50::emitFMAD(const Instruction *i) >> >> code[0] = 0xe0000000; >> >> + if (i->src(1).getFile() == FILE_IMMEDIATE) { >> + code[1] = 0; >> + emitForm_IMM(i); >> + code[0] |= neg_mul << 15; >> + code[0] |= neg_add << 22; >> + if (i->saturate) >> + code[0] |= 1 << 8; >> + } else >> if (i->encSize == 4) { >> emitForm_MUL(i); >> - assert(!neg_mul && !neg_add); >> + code[0] |= neg_mul << 15; >> + code[0] |= neg_add << 22; >> + if (i->saturate) >> + code[0] |= 1 << 8; >> } else { >> code[1] = neg_mul << 26; >> code[1] |= neg_add << 27; >> @@ -1931,11 +1942,6 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const >> >> // check constraints on short MAD >> if (info.srcNr >= 2 && i->srcExists(2)) { >> - if (i->saturate || i->src(2).mod) >> - return 8; >> - if ((i->src(0).mod ^ i->src(1).mod) || >> - (i->src(0).mod | i->src(1).mod).abs()) >> - return 8; >> if (!i->defExists(0) || >> i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id) >> return 8; >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >> index 48f996b..f4dedd7 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >> @@ -118,7 +118,7 @@ void TargetNV50::initOpInfo() >> static const uint32_t shortForm[(OP_LAST + 31) / 32] >> { >> // MOV,ADD,SUB,MUL,SAD,L/PINTERP,RCP,TEX,TXF >> - 0x00010e40, 0x00000040, 0x00000498, 0x00000000 >> + 0x00014e40, 0x00000040, 0x00000498, 0x00000000 >> }; >> static const operation noDestList[] >> { >> -- >> 2.1.0 >> >> >> >> _______________________________________________ >> Nouveau mailing list >> Nouveau at lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/nouveau > _______________________________________________ > Nouveau mailing list > Nouveau at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau
Reasonably Related Threads
- [PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation
- [PATCH 1/2] nv50/ir: make sure that texprep/texquerylod's args get coalesced
- [PATCH RESEND] nv50/ir: use unordered_set instead of list to keep track of var defs
- [PATCH] nv50/ir: don't touch degree on physreg RIG nodes
- [PATCH] nv50/ir: use unordered_set instead of list to keep our instructions in uses