Roy Spliet
2015-Feb-06 21:46 UTC
[Nouveau] [PATCH 1/3] nv50/ir: Add support for MAD 4-byte opcode
Add emission rules for negative and saturate flags for MAD 4-byte opcodes,
and get rid of some of the constraints. Obviously tested with a wide variety
of shaders.
V2: Document MAD as supported short form
V3: Split up IMM from short-form modifiers
Signed-off-by: Roy Spliet <rspliet at eclipso.eu>
---
src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 10 ++++------
src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 4 ++--
2 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 2077388..23e4bab 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -941,7 +941,10 @@ CodeEmitterNV50::emitFMAD(const Instruction *i)
if (i->encSize == 4) {
emitForm_MUL(i);
- assert(!neg_mul && !neg_add);
+ code[0] |= neg_mul << 15;
+ code[0] |= neg_add << 22;
+ if (i->saturate)
+ code[0] |= 1 << 8;
} else {
code[1] = neg_mul << 26;
code[1] |= neg_add << 27;
@@ -1931,11 +1934,6 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i)
const
// check constraints on short MAD
if (info.srcNr >= 2 && i->srcExists(2)) {
- if (i->saturate || i->src(2).mod)
- return 8;
- if ((i->src(0).mod ^ i->src(1).mod) ||
- (i->src(0).mod | i->src(1).mod).abs())
- return 8;
if (!i->defExists(0) ||
i->def(0).rep()->reg.data.id !=
i->src(2).rep()->reg.data.id)
return 8;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index 48f996b..178a167 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -117,8 +117,8 @@ void TargetNV50::initOpInfo()
};
static const uint32_t shortForm[(OP_LAST + 31) / 32] {
- // MOV,ADD,SUB,MUL,SAD,L/PINTERP,RCP,TEX,TXF
- 0x00010e40, 0x00000040, 0x00000498, 0x00000000
+ // MOV,ADD,SUB,MUL,MAD,SAD,L/PINTERP,RCP,TEX,TXF
+ 0x00014e40, 0x00000040, 0x00000498, 0x00000000
};
static const operation noDestList[] {
--
2.1.0
Roy Spliet
2015-Feb-06 21:46 UTC
[Nouveau] [PATCH 2/3] nv50/ir: Add emit support for MAD IMM format
But don't enable generation of it in the opProperties, because we can't
guarantee the SDST==SRC2 constraint until after register assignment. We'll
add a post-RA folding pass to utilise this.
Signed-off-by: Roy Spliet <rspliet at eclipso.eu>
---
src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 23e4bab..b1e7409 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -939,6 +939,14 @@ CodeEmitterNV50::emitFMAD(const Instruction *i)
code[0] = 0xe0000000;
+ if (i->src(1).getFile() == FILE_IMMEDIATE) {
+ code[1] = 0;
+ emitForm_IMM(i);
+ code[0] |= neg_mul << 15;
+ code[0] |= neg_add << 22;
+ if (i->saturate)
+ code[0] |= 1 << 8;
+ } else
if (i->encSize == 4) {
emitForm_MUL(i);
code[0] |= neg_mul << 15;
--
2.1.0
Add a specific optimisation pass for NV50 to check whether SRC0 or SRC1 is
a MOV dst, IMM. If so: fold the IMM in and try to drop the MOV. Must be
done post-RA because it requires that SDST == SSRC2.
V2: improve readability and add comments to clarify decisions
V3: Remove redundant code... compiler already attempts to put the IMM in
SSRC1
Signed-off-by: Roy Spliet <rspliet at eclipso.eu>
Reviewed-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
.../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 53 ++++++++++++++++++++++
1 file changed, 53 insertions(+)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 21d20ca..62d2ef7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2259,6 +2259,56 @@ FlatteningPass::tryPredicateConditional(BasicBlock *bb)
// ============================================================================
+// Fold Immediate into MAD; must be done after register allocation due to
+// constraint SDST == SSRC2
+// TODO:
+// Does NVC0+ have other situations where this pass makes sense?
+class NV50PostRaConstantFolding : public Pass
+{
+private:
+ virtual bool visit(BasicBlock *);
+};
+
+bool
+NV50PostRaConstantFolding::visit(BasicBlock *bb)
+{
+ Value *vtmp;
+ Instruction *def;
+
+ for (Instruction *i = bb->getFirst(); i; i = i->next) {
+ switch (i->op) {
+ case OP_MAD:
+ if (i->def(0).getFile() != FILE_GPR ||
+ i->src(0).getFile() != FILE_GPR ||
+ i->src(1).getFile() != FILE_GPR ||
+ i->src(2).getFile() != FILE_GPR ||
+ i->getDef(0)->reg.data.id !=
i->getSrc(2)->reg.data.id)
+ break;
+
+ def = i->getSrc(1)->getInsn();
+ if (def->op == OP_MOV && def->src(0).getFile() ==
FILE_IMMEDIATE) {
+ vtmp = i->getSrc(1);
+ i->setSrc(1, def->getSrc(0));
+
+ /* There's no post-RA dead code elimination, so do it here
+ * XXX: if we add more code-removing post-RA passes, we might
+ * want to create a post-RA dead-code elim pass */
+ if (vtmp->refCount() == 0)
+ delete_Instruction(bb->getProgram(), def);
+
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ return true;
+}
+
+//
============================================================================+
// Common subexpression elimination. Stupid O^2 implementation.
class LocalCSE : public Pass
{
@@ -2629,6 +2679,9 @@ bool
Program::optimizePostRA(int level)
{
RUN_PASS(2, FlatteningPass, run);
+ if (getTarget()->getChipset() < 0xc0)
+ RUN_PASS(2, NV50PostRaConstantFolding, run);
+
return true;
}
--
2.1.0
Possibly Parallel Threads
- [PATCH 1/2] nv50/ir: Add support for MAD short+IMM notation
- [PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation
- nv50/ir: Implement short notation for MAD V2
- [PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation
- [PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation