Tobias Klausmann
2014-Dec-02 20:19 UTC
[Nouveau] [PATCH RESEND] nv50/ir: use unordered_set instead of list to keep track of var defs
The set of variable defs does not need to be ordered in any way, and removing/adding elements is a fairly common operation in various optimization passes. This shortens runtime of piglit test fp-long-alu to ~11s from ~22s No piglit regressions observed on nvc0! Signed-off-by: Tobias Klausmann <tobias.johannes.klausmann at mni.thm.de> --- src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 4 ++-- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 7 +++--- .../drivers/nouveau/codegen/nv50_ir_inlines.h | 28 +++++++++++++--------- .../nouveau/codegen/nv50_ir_lowering_nv50.cpp | 4 ++-- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 6 ++--- .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 4 ++-- src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 17 +++++++------ 7 files changed, 38 insertions(+), 32 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index ca3c806..3138118 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -154,9 +154,9 @@ ValueDef::set(Value *defVal) if (value == defVal) return; if (value) - value->defs.remove(this); + value->defs.erase(this); if (defVal) - defVal->defs.push_back(this); + defVal->defs.insert(this); value = defVal; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 0ff5e5d..56033f1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -567,6 +567,7 @@ public: inline Value *rep() const { return join; } + inline Instruction *getUniqueInsnMerged() const; inline Instruction *getUniqueInsn() const; inline Instruction *getInsn() const; // use when uniqueness is certain @@ -583,11 +584,11 @@ public: static inline Value *get(Iterator&); std::tr1::unordered_set<ValueRef *> uses; - std::list<ValueDef *> defs; + std::tr1::unordered_set<ValueDef *> defs; typedef std::tr1::unordered_set<ValueRef *>::iterator UseIterator; typedef std::tr1::unordered_set<ValueRef *>::const_iterator UseCIterator; - typedef std::list<ValueDef *>::iterator DefIterator; - typedef std::list<ValueDef *>::const_iterator DefCIterator; + typedef std::tr1::unordered_set<ValueDef *>::iterator DefIterator; + typedef std::tr1::unordered_set<ValueDef *>::const_iterator DefCIterator; int id; Storage reg; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h index 255324f..471d47f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h @@ -205,21 +205,26 @@ const LValue *ValueDef::preSSA() const Instruction *Value::getInsn() const { - return defs.empty() ? NULL : defs.front()->getInsn(); + return defs.empty() ? NULL : (*defs.begin())->getInsn(); } -Instruction *Value::getUniqueInsn() const +Instruction *Value::getUniqueInsnMerged() const { if (defs.empty()) return NULL; + /* It is not guaranteed that this is the first in the set, lets find it */ + for (DefCIterator it = defs.begin(); it != defs.end(); ++it) + if ((*it)->get() == this) + return (*it)->getInsn(); + /* We should never hit this assert */ + assert(0); + return NULL; +} - // after regalloc, the definitions of coalesced values are linked - if (join != this) { - for (DefCIterator it = defs.begin(); it != defs.end(); ++it) - if ((*it)->get() == this) - return (*it)->getInsn(); - // should be unreachable and trigger assertion at the end - } +Instruction *Value::getUniqueInsn() const +{ + if (defs.empty()) + return NULL; #ifdef DEBUG if (reg.data.id < 0) { int n = 0; @@ -230,8 +235,9 @@ Instruction *Value::getUniqueInsn() const WARN("value %%%i not uniquely defined\n", id); // return NULL ? } #endif - assert(defs.front()->get() == this); - return defs.front()->getInsn(); + ValueDef *def = *defs.begin(); + assert(def->get() == this); + return def->getInsn(); } inline bool Instruction::constrainedDefs() const diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp index e283424..f13e0d4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp @@ -211,7 +211,7 @@ NV50LegalizePostRA::visit(Function *fn) if (outWrites) { for (std::list<Instruction *>::iterator it = outWrites->begin(); it != outWrites->end(); ++it) - (*it)->getSrc(1)->defs.front()->getInsn()->setDef(0, (*it)->getSrc(0)); + (*(*it)->getSrc(1)->defs.begin())->getInsn()->setDef(0, (*it)->getSrc(0)); // instructions will be deleted on exit outWrites->clear(); } @@ -343,7 +343,7 @@ NV50LegalizeSSA::propagateWriteToOutput(Instruction *st) return; // check def instruction can store - Instruction *di = st->getSrc(1)->defs.front()->getInsn(); + Instruction *di =(*st->getSrc(1)->defs.begin())->getInsn(); // TODO: move exports (if beneficial) in common opt pass if (di->isPseudo() || isTextureOp(di->op) || di->defCount(0xff, true) > 1) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 9c06d04..ab28f3a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -148,7 +148,7 @@ NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi, std::list<TexUse> &uses) { while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0))) - insn = insn->getSrc(0)->getUniqueInsn(); + insn = insn->getSrc(0)->getUniqueInsnMerged(); if (!insn->bb->reachableBy(texi->bb, term)) return; @@ -163,7 +163,7 @@ NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi, case OP_UNION: /* recurse again */ for (int s = 0; insn->srcExists(s); ++s) - findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsn(), term, + findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsnMerged(), term, uses); break; default: @@ -200,7 +200,7 @@ NVC0LegalizePostRA::findFirstUses( if (usei->op == OP_PHI || usei->op == OP_UNION) { // need a barrier before WAW cases for (int s = 0; usei->srcExists(s); ++s) { - Instruction *defi = usei->getSrc(s)->getUniqueInsn(); + Instruction *defi = usei->getSrc(s)->getUniqueInsnMerged(); if (defi && &usei->src(s) != *u) findOverwritingDefs(texi, defi, usei->bb, uses); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index f520dc6..ff3f8c3 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -2144,7 +2144,7 @@ FlatteningPass::isConstantCondition(Value *pred) return false; for (int s = 0; s < 2 && insn->srcExists(s); ++s) { - Instruction *ld = insn->getSrc(s)->getUniqueInsn(); + Instruction *ld = insn->getSrc(s)->getUniqueInsnMerged(); DataFile file; if (ld) { if (ld->op != OP_MOV && ld->op != OP_LOAD) @@ -2185,7 +2185,7 @@ FlatteningPass::removeFlow(Instruction *insn) delete_Instruction(prog, term); if (pred && pred->refCount() == 0) { - Instruction *pSet = pred->getUniqueInsn(); + Instruction *pSet = pred->getUniqueInsnMerged(); pred->join->reg.data.id = -1; // deallocate if (pSet->isDead()) delete_Instruction(prog, pSet); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index 898653c..8961237 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -313,7 +313,7 @@ RegAlloc::BuildIntervalsPass::addLiveRange(Value *val, const BasicBlock *bb, int end) { - Instruction *insn = val->getUniqueInsn(); + Instruction *insn = val->getUniqueInsnMerged(); if (!insn) insn = bb->getFirst(); @@ -579,7 +579,7 @@ RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb) for (int s = 0; i->srcExists(s); ++s) { assert(i->src(s).getInsn()); - if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ? + if (i->getSrc(s)->getUniqueInsnMerged()->bb == bb) // XXX: reachableBy ? bb->liveSet.set(i->getSrc(s)->id); else bb->liveSet.clr(i->getSrc(s)->id); @@ -835,7 +835,7 @@ GCRA::coalesceValues(Value *dst, Value *src, bool force) assert(rep->join == rep && val->join == rep); // add val's definitions to rep and extend the live interval of its RIG node - rep->defs.insert(rep->defs.end(), val->defs.begin(), val->defs.end()); + rep->defs.insert(val->defs.begin(), val->defs.end()); nRep->livei.unify(nVal->livei); return true; } @@ -989,7 +989,7 @@ GCRA::doCoalesce(ArrayList& insns, unsigned int mask) // if this is a contraint-move there will only be a single use if (i && i->op == OP_MERGE) // do we really still need this ? break; - i = insn->getSrc(0)->getUniqueInsn(); + i = insn->getSrc(0)->getUniqueInsnMerged(); if (i && !i->constrainedDefs()) { if (coalesceValues(insn->getDef(0), insn->getSrc(0), false)) copyCompound(insn->getSrc(0), insn->getDef(0)); @@ -1410,7 +1410,7 @@ GCRA::cleanup(const bool success) } else { for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end(); ++d) - lval->join->defs.remove(*d); + lval->join->defs.erase(*d); lval->join = lval; } } @@ -1553,8 +1553,7 @@ SpillCodeInserter::run(const std::list<ValuePair>& lst) // multiple destinations that all need to be spilled (like OP_SPLIT). std::tr1::unordered_set<Instruction *> to_del; - for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end(); - ++d) { + for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();) { Value *slot = mem ? static_cast<Value *>(mem) : new_LValue(func, FILE_GPR); Value *tmp = NULL; @@ -1583,13 +1582,13 @@ SpillCodeInserter::run(const std::list<ValuePair>& lst) assert(defi); if (defi->isPseudo()) { d = lval->defs.erase(d); - --d; if (slot->reg.file == FILE_MEMORY_LOCAL) to_del.insert(defi); else defi->setDef(0, slot); } else { spill(defi, slot, dval); + ++d; } } @@ -2118,7 +2117,7 @@ RegAlloc::InsertConstraintsPass::insertConstraintMoves() } assert(cst->getSrc(s)->defs.size() == 1); // still SSA - Instruction *defi = cst->getSrc(s)->defs.front()->getInsn(); + Instruction *defi =(*cst->getSrc(s)->defs.begin())->getInsn(); // catch some cases where don't really need MOVs if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) continue; -- 2.1.3
Possibly Parallel Threads
- [PATCH] nv50/ir: use unordered_set instead of list to keep track of var defs
- [PATCH] nv50/ir: use unordered_set instead of list to keep our instructions in uses
- [PATCH 1/2] nvc0/ir: avoid infinite recursion when finding first uses of tex
- [PATCH] nv50/ra: Only increment DefValue counter if we are going to spill
- [PATCH 3/4] nvc0/ir: optimize set & 1.0 to produce boolean-float sets