Ilia Mirkin
2015-Apr-28 07:30 UTC
[Nouveau] [PATCH] nvc0/ir: set FTZ when the inputs are of uncertain provenance
This will set the FTZ flag (flush denorms to zero) on all opcodes that can take it when they have sources that come from other types of opcodes. This resolves issues in Unigine Heaven 4.0 where there were solid-filled boxes popping up. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89455 Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> --- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 33 +++++++++++++++++++++- .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 87e75e1..9a8a41d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -96,6 +96,32 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); } +void +NVC0LegalizeSSA::handleFTZ(Instruction *i) +{ + unsigned s; + + if (i->dType != TYPE_F32 || i->sType != TYPE_F32) + return; + + // Flush denorms to zero to avoid unexpected situations in code. We can + // assume that the output of any instruction that *could* take a ftz (even + // if it doesn't have one) does not need further flushing. + for (s = 0; i->srcExists(s); ++s) { + Instruction *ld = i->getSrc(s)->getInsn(); + if (!ld) + continue; + + OpClass cls = prog->getTarget()->getOpClass(ld->op); + if (ld->dType != TYPE_F32 || ( + cls != OPCLASS_ARITH && cls != OPCLASS_COMPARE && + cls != OPCLASS_CONVERT)) { + i->ftz = true; + return; + } + } +} + bool NVC0LegalizeSSA::visit(Function *fn) { @@ -109,8 +135,13 @@ NVC0LegalizeSSA::visit(BasicBlock *bb) Instruction *next; for (Instruction *i = bb->getEntry(); i; i = next) { next = i->next; - if (i->dType == TYPE_F32) + if (i->dType == TYPE_F32) { + OpClass cls = prog->getTarget()->getOpClass(i->op); + if (cls == OPCLASS_ARITH || cls == OPCLASS_COMPARE || + cls == OPCLASS_CONVERT) + handleFTZ(i); continue; + } switch (i->op) { case OP_DIV: case OP_MOD: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index d8ff5cd..260e101 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -36,6 +36,7 @@ private: // we want to insert calls to the builtin library only after optimization void handleDIV(Instruction *); // integer division, modulus void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt + void handleFTZ(Instruction *); private: BuildUtil bld; -- 2.0.5