Tobias Klausmann
2015-Jan-10 01:24 UTC
[Nouveau] [PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
Folding for conversions: F32->(U{16/32}, S{16/32}) and (U{16/32}, {S16/32})->F32 Signed-off-by: Tobias Klausmann <tobias.johannes.klausmann at mni.thm.de> --- V2: beat me, whip me, split out F64 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 81 ++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 9a0bb60..741c74f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -997,6 +997,87 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) i->op = OP_MOV; break; } + case OP_CVT: { + Storage res; + bld.setPosition(i, true); /* make sure bld is init'ed */ + switch(i->dType) { + case TYPE_U16: + switch (i->sType) { + case TYPE_F32: + if (i->saturate) + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f32, 0, + UINT16_MAX)); + else + res.data.u16 = util_iround(imm0.reg.data.f32); + break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.u16)); + break; + case TYPE_U32: + switch (i->sType) { + case TYPE_F32: + if (i->saturate) + res.data.u32 = util_iround(CLAMP(imm0.reg.data.f32, 0, + UINT32_MAX)); + else + res.data.u32 = util_iround(imm0.reg.data.f32); + break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.u32)); + break; + case TYPE_S16: + switch (i->sType) { + case TYPE_F32: + if (i->saturate) + res.data.s16 = util_iround(CLAMP(imm0.reg.data.f32, INT16_MIN, + INT16_MAX)); + else + res.data.s16 = util_iround(imm0.reg.data.f32); + break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.s16)); + break; + case TYPE_S32: + switch (i->sType) { + case TYPE_F32: + if (i->saturate) + res.data.s32 = util_iround(CLAMP(imm0.reg.data.f32, INT32_MIN, + INT32_MAX)); + else + res.data.s32 = util_iround(imm0.reg.data.f32); + break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.s32)); + break; + case TYPE_F32: + switch (i->sType) { + case TYPE_U16: res.data.f32 = (float) imm0.reg.data.u16; break; + case TYPE_U32: res.data.f32 = (float) imm0.reg.data.u32; break; + case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break; + case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.f32)); + break; + default: + return; + } + i->setType(i->dType); /* Remove i->sType, which we don't need anymore */ + i->setSrc(1, NULL); + i->op = OP_MOV; + + i->src(0).mod = Modifier(0); /* Clear the already applied modifier */ + break; + } default: return; } -- 2.2.1
Ilia Mirkin
2015-Jan-11 00:58 UTC
[Nouveau] [PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
On Fri, Jan 9, 2015 at 8:24 PM, Tobias Klausmann <tobias.johannes.klausmann at mni.thm.de> wrote:> Folding for conversions: F32->(U{16/32}, S{16/32}) and (U{16/32}, {S16/32})->F32 > > Signed-off-by: Tobias Klausmann <tobias.johannes.klausmann at mni.thm.de> > --- > V2: beat me, whip me, split out F64 > > .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 81 ++++++++++++++++++++++ > 1 file changed, 81 insertions(+) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > index 9a0bb60..741c74f 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > @@ -997,6 +997,87 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) > i->op = OP_MOV; > break; > } > + case OP_CVT: { > + Storage res; > + bld.setPosition(i, true); /* make sure bld is init'ed */ > + switch(i->dType) { > + case TYPE_U16: > + switch (i->sType) { > + case TYPE_F32: > + if (i->saturate) > + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f32, 0, > + UINT16_MAX));Where did this saturate stuff come from? It doesn't make sense to saturate to a non-float dtype. I'd go ahead and just assert(!i->saturate) in the int dtype cases. One does wonder what the hw does if the float doesn't fit in the destination... whether it saturates or not. I don't hugely care though.> + else > + res.data.u16 = util_iround(imm0.reg.data.f32); > + break; > + default: > + return; > + } > + i->setSrc(0, bld.mkImm(res.data.u16)); > + break; > + case TYPE_U32: > + switch (i->sType) { > + case TYPE_F32: > + if (i->saturate) > + res.data.u32 = util_iround(CLAMP(imm0.reg.data.f32, 0, > + UINT32_MAX)); > + else > + res.data.u32 = util_iround(imm0.reg.data.f32); > + break; > + default: > + return; > + } > + i->setSrc(0, bld.mkImm(res.data.u32)); > + break; > + case TYPE_S16: > + switch (i->sType) { > + case TYPE_F32: > + if (i->saturate) > + res.data.s16 = util_iround(CLAMP(imm0.reg.data.f32, INT16_MIN, > + INT16_MAX)); > + else > + res.data.s16 = util_iround(imm0.reg.data.f32); > + break; > + default: > + return; > + } > + i->setSrc(0, bld.mkImm(res.data.s16)); > + break; > + case TYPE_S32: > + switch (i->sType) { > + case TYPE_F32: > + if (i->saturate) > + res.data.s32 = util_iround(CLAMP(imm0.reg.data.f32, INT32_MIN, > + INT32_MAX)); > + else > + res.data.s32 = util_iround(imm0.reg.data.f32); > + break; > + default: > + return; > + } > + i->setSrc(0, bld.mkImm(res.data.s32)); > + break; > + case TYPE_F32: > + switch (i->sType) { > + case TYPE_U16: res.data.f32 = (float) imm0.reg.data.u16; break; > + case TYPE_U32: res.data.f32 = (float) imm0.reg.data.u32; break; > + case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break; > + case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break; > + default: > + return; > + } > + i->setSrc(0, bld.mkImm(res.data.f32)); > + break; > + default: > + return; > + } > + i->setType(i->dType); /* Remove i->sType, which we don't need anymore */ > + i->setSrc(1, NULL);How can src(1) be set? OP_CVT only has the one arg...> + i->op = OP_MOV; > + > + i->src(0).mod = Modifier(0); /* Clear the already applied modifier */ > + break; > + } > default: > return; > } > -- > 2.2.1 > > _______________________________________________ > Nouveau mailing list > Nouveau at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau
Tobias Klausmann
2015-Jan-11 17:27 UTC
[Nouveau] [PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
On 11.01.2015 01:58, Ilia Mirkin wrote:> On Fri, Jan 9, 2015 at 8:24 PM, Tobias Klausmann > <tobias.johannes.klausmann at mni.thm.de> wrote: >> Folding for conversions: F32->(U{16/32}, S{16/32}) and (U{16/32}, {S16/32})->F32 >> >> Signed-off-by: Tobias Klausmann <tobias.johannes.klausmann at mni.thm.de> >> --- >> V2: beat me, whip me, split out F64 >> >> .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 81 ++++++++++++++++++++++ >> 1 file changed, 81 insertions(+) >> >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> index 9a0bb60..741c74f 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> @@ -997,6 +997,87 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) >> i->op = OP_MOV; >> break; >> } >> + case OP_CVT: { >> + Storage res; >> + bld.setPosition(i, true); /* make sure bld is init'ed */ >> + switch(i->dType) { >> + case TYPE_U16: >> + switch (i->sType) { >> + case TYPE_F32: >> + if (i->saturate) >> + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f32, 0, >> + UINT16_MAX)); > Where did this saturate stuff come from? It doesn't make sense to > saturate to a non-float dtype. I'd go ahead and just > assert(!i->saturate) in the int dtype cases. > > One does wonder what the hw does if the float doesn't fit in the > destination... whether it saturates or not. I don't hugely care > though.Actually i can't remember why that was added in the first place, i'll go ahead and follow your advice here.>> + else >> + res.data.u16 = util_iround(imm0.reg.data.f32); >> + break; >> + default: >> + return; >> + } >> + i->setSrc(0, bld.mkImm(res.data.u16)); >> + break; >> + case TYPE_U32: >> + switch (i->sType) { >> + case TYPE_F32: >> + if (i->saturate) >> + res.data.u32 = util_iround(CLAMP(imm0.reg.data.f32, 0, >> + UINT32_MAX)); >> + else >> + res.data.u32 = util_iround(imm0.reg.data.f32); >> + break; >> + default: >> + return; >> + } >> + i->setSrc(0, bld.mkImm(res.data.u32)); >> + break; >> + case TYPE_S16: >> + switch (i->sType) { >> + case TYPE_F32: >> + if (i->saturate) >> + res.data.s16 = util_iround(CLAMP(imm0.reg.data.f32, INT16_MIN, >> + INT16_MAX)); >> + else >> + res.data.s16 = util_iround(imm0.reg.data.f32); >> + break; >> + default: >> + return; >> + } >> + i->setSrc(0, bld.mkImm(res.data.s16)); >> + break; >> + case TYPE_S32: >> + switch (i->sType) { >> + case TYPE_F32: >> + if (i->saturate) >> + res.data.s32 = util_iround(CLAMP(imm0.reg.data.f32, INT32_MIN, >> + INT32_MAX)); >> + else >> + res.data.s32 = util_iround(imm0.reg.data.f32); >> + break; >> + default: >> + return; >> + } >> + i->setSrc(0, bld.mkImm(res.data.s32)); >> + break; >> + case TYPE_F32: >> + switch (i->sType) { >> + case TYPE_U16: res.data.f32 = (float) imm0.reg.data.u16; break; >> + case TYPE_U32: res.data.f32 = (float) imm0.reg.data.u32; break; >> + case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break; >> + case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break; >> + default: >> + return; >> + } >> + i->setSrc(0, bld.mkImm(res.data.f32)); >> + break; >> + default: >> + return; >> + } >> + i->setType(i->dType); /* Remove i->sType, which we don't need anymore */ >> + i->setSrc(1, NULL); > How can src(1) be set? OP_CVT only has the one arg...Agreed, its NULL anyway.>> + i->op = OP_MOV; >> + >> + i->src(0).mod = Modifier(0); /* Clear the already applied modifier */ >> + break; >> + } >> default: >> return; >> } >> -- >> 2.2.1 >> >> _______________________________________________ >> Nouveau mailing list >> Nouveau at lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/nouveau
Possibly Parallel Threads
- [PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
- [RESEND/PATCH] nv50/ir: Handle OP_CVT when folding constant expressions
- [PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
- [PATCH v4] nv50/ir: Handle OP_CVT when folding constant expressions
- [PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions