Ilia Mirkin
2015-Mar-25 22:00 UTC
[Nouveau] [PATCH] nv50/ir: take postFactor into account when doing peephole optimizations
Multiply operations can have a post-factor on them, which other ops don't support. Only perform the peephole optimizations when there is no post-factor involved. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89758 Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> --- src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 6a4ea4e..dc048e6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -451,7 +451,9 @@ ConstantFolding::expr(Instruction *i, b->data.f32 = 0.0f; } switch (i->dType) { - case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break; + case TYPE_F32: + res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor); + break; case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break; case TYPE_S32: if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { @@ -579,6 +581,7 @@ ConstantFolding::expr(Instruction *i, i->src(0).mod = Modifier(0); i->src(1).mod = Modifier(0); + i->postFactor = 0; i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32)); i->setSrc(1, NULL); @@ -682,7 +685,7 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2, Instruction *insn; Instruction *mul1 = NULL; // mul1 before mul2 int e = 0; - float f = imm2.reg.data.f32; + float f = imm2.reg.data.f32 * exp2f(mul2->postFactor); ImmediateValue imm1; assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32); @@ -782,9 +785,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) i->op = OP_MOV; i->setSrc(0, new_ImmediateValue(prog, 0u)); i->src(0).mod = Modifier(0); + i->postFactor = 0; i->setSrc(1, NULL); } else - if (imm0.isInteger(1) || imm0.isInteger(-1)) { + if (!i->postFactor && (imm0.isInteger(1) || imm0.isInteger(-1))) { if (imm0.isNegative()) i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); i->op = i->src(t).mod.getOp(); @@ -797,7 +801,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) i->src(0).mod = 0; i->setSrc(1, NULL); } else - if (imm0.isInteger(2) || imm0.isInteger(-2)) { + if (!i->postFactor && (imm0.isInteger(2) || imm0.isInteger(-2))) { if (imm0.isNegative()) i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); i->op = OP_ADD; -- 2.0.5
Seemingly Similar Threads
- [PATCH] nv50/ir: fold MAD when one of the multiplicands is const
- [PATCH v2 3/4] nvc0/ir: Handle OP_BFIND when folding constant expressions
- [PATCH 1/2] nvc0/ir: detect AND/SHR pairs and convert into EXTBF
- [PATCH v2 4/4] nvc0/ir: Handle OP_POPCNT when folding constant expressions
- [PATCH] nv50/ir: Propagate third immediate src when folding OP_MAD