Tobias Klausmann
2015-Sep-06 16:14 UTC
[Nouveau] [PATCH] nv50/ir: use unordered_set instead of list to keep track of var defs
The set of variable defs does not need to be ordered in any way, and removing/adding elements is a fairly common operation in various optimization passes. This shortens runtime of piglit test fp-long-alu to ~11s from ~22s No piglit regressions observed on nvc0! Signed-off-by: Tobias Klausmann <tobias.johannes.klausmann at mni.thm.de> --- src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 4 ++-- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 7 +++--- .../drivers/nouveau/codegen/nv50_ir_inlines.h | 28 +++++++++++++--------- .../nouveau/codegen/nv50_ir_lowering_nv50.cpp | 4 ++-- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 6 ++--- .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 4 ++-- src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 17 +++++++------ 7 files changed, 38 insertions(+), 32 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index cce6055..745cdc9 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -154,9 +154,9 @@ ValueDef::set(Value *defVal) if (value == defVal) return; if (value) - value->defs.remove(this); + value->defs.erase(this); if (defVal) - defVal->defs.push_back(this); + defVal->defs.insert(this); value = defVal; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index ba1b085..deeabff 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -570,6 +570,7 @@ public: inline Value *rep() const { return join; } + inline Instruction *getUniqueInsnMerged() const; inline Instruction *getUniqueInsn() const; inline Instruction *getInsn() const; // use when uniqueness is certain @@ -586,11 +587,11 @@ public: static inline Value *get(Iterator&); unordered_set<ValueRef *> uses; - std::list<ValueDef *> defs; + unordered_set<ValueDef *> defs; typedef unordered_set<ValueRef *>::iterator UseIterator; typedef unordered_set<ValueRef *>::const_iterator UseCIterator; - typedef std::list<ValueDef *>::iterator DefIterator; - typedef std::list<ValueDef *>::const_iterator DefCIterator; + typedef unordered_set<ValueDef *>::iterator DefIterator; + typedef unordered_set<ValueDef *>::const_iterator DefCIterator; int id; Storage reg; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h index e465f24..8c8e54c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h @@ -205,21 +205,26 @@ const LValue *ValueDef::preSSA() const Instruction *Value::getInsn() const { - return defs.empty() ? NULL : defs.front()->getInsn(); + return defs.empty() ? NULL : (*defs.begin())->getInsn(); } -Instruction *Value::getUniqueInsn() const +Instruction *Value::getUniqueInsnMerged() const { if (defs.empty()) return NULL; + /* It is not guaranteed that this is the first in the set, lets find it */ + for (DefCIterator it = defs.begin(); it != defs.end(); ++it) + if ((*it)->get() == this) + return (*it)->getInsn(); + /* We should never hit this assert */ + assert(0); + return NULL; +} - // after regalloc, the definitions of coalesced values are linked - if (join != this) { - for (DefCIterator it = defs.begin(); it != defs.end(); ++it) - if ((*it)->get() == this) - return (*it)->getInsn(); - // should be unreachable and trigger assertion at the end - } +Instruction *Value::getUniqueInsn() const +{ + if (defs.empty()) + return NULL; #ifdef DEBUG if (reg.data.id < 0) { int n = 0; @@ -230,8 +235,9 @@ Instruction *Value::getUniqueInsn() const WARN("value %%%i not uniquely defined\n", id); // return NULL ? } #endif - assert(defs.front()->get() == this); - return defs.front()->getInsn(); + ValueDef *def = *defs.begin(); + assert(def->get() == this); + return def->getInsn(); } inline bool Instruction::constrainedDefs() const diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp index bea293b..9d1244d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp @@ -211,7 +211,7 @@ NV50LegalizePostRA::visit(Function *fn) if (outWrites) { for (std::list<Instruction *>::iterator it = outWrites->begin(); it != outWrites->end(); ++it) - (*it)->getSrc(1)->defs.front()->getInsn()->setDef(0, (*it)->getSrc(0)); + (*(*it)->getSrc(1)->defs.begin())->getInsn()->setDef(0, (*it)->getSrc(0)); // instructions will be deleted on exit outWrites->clear(); } @@ -343,7 +343,7 @@ NV50LegalizeSSA::propagateWriteToOutput(Instruction *st) return; // check def instruction can store - Instruction *di = st->getSrc(1)->defs.front()->getInsn(); + Instruction *di =(*st->getSrc(1)->defs.begin())->getInsn(); // TODO: move exports (if beneficial) in common opt pass if (di->isPseudo() || isTextureOp(di->op) || di->defCount(0xff, true) > 1) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index b1f4065..892e7e3 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -193,7 +193,7 @@ NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi, std::list<TexUse> &uses) { while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0))) - insn = insn->getSrc(0)->getUniqueInsn(); + insn = insn->getSrc(0)->getUniqueInsnMerged(); // NOTE: the tex itself is, of course, not an overwriting definition if (insn == texi || !insn->bb->reachableBy(texi->bb, term)) @@ -209,7 +209,7 @@ NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi, case OP_UNION: /* recurse again */ for (int s = 0; insn->srcExists(s); ++s) - findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsn(), term, + findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsnMerged(), term, uses); break; default: @@ -251,7 +251,7 @@ NVC0LegalizePostRA::findFirstUses( // %r1 = x <- overwriting def // %r2 = phi %r0, %r1 for (int s = 0; usei->srcExists(s); ++s) { - Instruction *defi = usei->getSrc(s)->getUniqueInsn(); + Instruction *defi = usei->getSrc(s)->getUniqueInsnMerged(); if (defi && &usei->src(s) != *u) findOverwritingDefs(texi, defi, usei->bb, uses); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index b01ef41..0782def 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -2362,7 +2362,7 @@ FlatteningPass::isConstantCondition(Value *pred) return false; for (int s = 0; s < 2 && insn->srcExists(s); ++s) { - Instruction *ld = insn->getSrc(s)->getUniqueInsn(); + Instruction *ld = insn->getSrc(s)->getUniqueInsnMerged(); DataFile file; if (ld) { if (ld->op != OP_MOV && ld->op != OP_LOAD) @@ -2403,7 +2403,7 @@ FlatteningPass::removeFlow(Instruction *insn) delete_Instruction(prog, term); if (pred && pred->refCount() == 0) { - Instruction *pSet = pred->getUniqueInsn(); + Instruction *pSet = pred->getUniqueInsnMerged(); pred->join->reg.data.id = -1; // deallocate if (pSet->isDead()) delete_Instruction(prog, pSet); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 0cd21cf..167ebdf 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -312,7 +312,7 @@ RegAlloc::BuildIntervalsPass::addLiveRange(Value *val, const BasicBlock *bb, int end) { - Instruction *insn = val->getUniqueInsn(); + Instruction *insn = val->getUniqueInsnMerged(); if (!insn) insn = bb->getFirst(); @@ -578,7 +578,7 @@ RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb) for (int s = 0; i->srcExists(s); ++s) { assert(i->src(s).getInsn()); - if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ? + if (i->getSrc(s)->getUniqueInsnMerged()->bb == bb) // XXX: reachableBy ? bb->liveSet.set(i->getSrc(s)->id); else bb->liveSet.clr(i->getSrc(s)->id); @@ -834,7 +834,7 @@ GCRA::coalesceValues(Value *dst, Value *src, bool force) assert(rep->join == rep && val->join == rep); // add val's definitions to rep and extend the live interval of its RIG node - rep->defs.insert(rep->defs.end(), val->defs.begin(), val->defs.end()); + rep->defs.insert(val->defs.begin(), val->defs.end()); nRep->livei.unify(nVal->livei); return true; } @@ -988,7 +988,7 @@ GCRA::doCoalesce(ArrayList& insns, unsigned int mask) // if this is a contraint-move there will only be a single use if (i && i->op == OP_MERGE) // do we really still need this ? break; - i = insn->getSrc(0)->getUniqueInsn(); + i = insn->getSrc(0)->getUniqueInsnMerged(); if (i && !i->constrainedDefs()) { if (coalesceValues(insn->getDef(0), insn->getSrc(0), false)) copyCompound(insn->getSrc(0), insn->getDef(0)); @@ -1409,7 +1409,7 @@ GCRA::cleanup(const bool success) } else { for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end(); ++d) - lval->join->defs.remove(*d); + lval->join->defs.erase(*d); lval->join = lval; } } @@ -1552,8 +1552,7 @@ SpillCodeInserter::run(const std::list<ValuePair>& lst) // multiple destinations that all need to be spilled (like OP_SPLIT). unordered_set<Instruction *> to_del; - for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end(); - ++d) { + for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();) { Value *slot = mem ? static_cast<Value *>(mem) : new_LValue(func, FILE_GPR); Value *tmp = NULL; @@ -1582,13 +1581,13 @@ SpillCodeInserter::run(const std::list<ValuePair>& lst) assert(defi); if (defi->isPseudo()) { d = lval->defs.erase(d); - --d; if (slot->reg.file == FILE_MEMORY_LOCAL) to_del.insert(defi); else defi->setDef(0, slot); } else { spill(defi, slot, dval); + ++d; } } @@ -2119,7 +2118,7 @@ RegAlloc::InsertConstraintsPass::insertConstraintMoves() } assert(cst->getSrc(s)->defs.size() == 1); // still SSA - Instruction *defi = cst->getSrc(s)->defs.front()->getInsn(); + Instruction *defi =(*cst->getSrc(s)->defs.begin())->getInsn(); // catch some cases where don't really need MOVs if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) continue; -- 2.5.1