Tobias Klausmann
2015-Jan-09 23:47 UTC
[Nouveau] [RESEND/PATCH] nv50/ir: Handle OP_CVT when folding constant expressions
Folding for conversions: F32->(U{16/32}, S{16/32}) and (U{16/32}, {S16/32})->F32 Signed-off-by: Tobias Klausmann <tobias.johannes.klausmann at mni.thm.de> --- .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 109 +++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 9a0bb60..6a3d515 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -997,6 +997,115 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) i->op = OP_MOV; break; } + case OP_CVT: { + Storage res; + bld.setPosition(i, true); /* make sure bld is init'ed */ + switch(i->dType) { + case TYPE_U16: + switch (i->sType) { + case TYPE_F32: + if (i->saturate) + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f32, 0, + UINT16_MAX)); + else + res.data.u16 = util_iround(imm0.reg.data.f32); + break; + case TYPE_F64: + if (i->saturate) + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f64, 0, + UINT16_MAX)); + else + res.data.u16 = util_iround(imm0.reg.data.f64); + break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.u16)); + break; + case TYPE_U32: + switch (i->sType) { + case TYPE_F32: + if (i->saturate) + res.data.u32 = util_iround(CLAMP(imm0.reg.data.f32, 0, + UINT32_MAX)); + else + res.data.u32 = util_iround(imm0.reg.data.f32); + break; + case TYPE_F64: + if (i->saturate) + res.data.u32 = util_iround(CLAMP(imm0.reg.data.f64, 0, + UINT32_MAX)); + else + res.data.u32 = util_iround(imm0.reg.data.f64); + break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.u32)); + break; + case TYPE_S16: + switch (i->sType) { + case TYPE_F32: + if (i->saturate) + res.data.s16 = util_iround(CLAMP(imm0.reg.data.f32, INT16_MIN, + INT16_MAX)); + else + res.data.s16 = util_iround(imm0.reg.data.f32); + break; + case TYPE_F64: + if (i->saturate) + res.data.s16 = util_iround(CLAMP(imm0.reg.data.f64, INT16_MIN, + INT16_MAX)); + else + res.data.s16 = util_iround(imm0.reg.data.f64); + break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.s16)); + break; + case TYPE_S32: + switch (i->sType) { + case TYPE_F32: + if (i->saturate) + res.data.s32 = util_iround(CLAMP(imm0.reg.data.f32, INT32_MIN, + INT32_MAX)); + else + res.data.s32 = util_iround(imm0.reg.data.f32); + break; + case TYPE_F64: + if (i->saturate) + res.data.s32 = util_iround(CLAMP(imm0.reg.data.f64, INT32_MIN, + INT32_MAX)); + else + res.data.s32 = util_iround(imm0.reg.data.f64); + break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.s32)); + break; + case TYPE_F32: + switch (i->sType) { + case TYPE_U16: res.data.f32 = (float) imm0.reg.data.u16; break; + case TYPE_U32: res.data.f32 = (float) imm0.reg.data.u32; break; + case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break; + case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.f32)); + break; + default: + return; + } + i->setType(i->dType); /* Remove i->sType, which we don't need anymore */ + i->setSrc(1, NULL); + i->op = OP_MOV; + + i->src(0).mod = Modifier(0); /* Clear the already applied modifier */ + break; + } default: return; } -- 2.2.1
Tobias Klausmann
2015-Jan-10 01:08 UTC
[Nouveau] [RESEND/PATCH] nv50/ir: Handle OP_CVT when folding constant expressions
On 10.01.2015 02:41, Ilia Mirkin wrote:> On Fri, Jan 9, 2015 at 6:47 PM, Tobias Klausmann > <tobias.johannes.klausmann at mni.thm.de> wrote: >> Folding for conversions: F32->(U{16/32}, S{16/32}) and (U{16/32}, {S16/32})->F32 >> >> Signed-off-by: Tobias Klausmann <tobias.johannes.klausmann at mni.thm.de> >> --- >> .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 109 +++++++++++++++++++++ >> 1 file changed, 109 insertions(+) >> >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> index 9a0bb60..6a3d515 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> @@ -997,6 +997,115 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) >> i->op = OP_MOV; >> break; >> } >> + case OP_CVT: { >> + Storage res; >> + bld.setPosition(i, true); /* make sure bld is init'ed */ >> + switch(i->dType) { >> + case TYPE_U16: >> + switch (i->sType) { >> + case TYPE_F32: >> + if (i->saturate) >> + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f32, 0, >> + UINT16_MAX)); >> + else >> + res.data.u16 = util_iround(imm0.reg.data.f32); >> + break; >> + case TYPE_F64: > The F64 stuff needs more thought, as I don't think we can always store > the f64 immediates. In my patches, I just outlaw fp64 immediates in > the first place. Please leave these out for now.Oh i removed only the lower part of it, i beg you pardon for delivering that thing here :/> >> + if (i->saturate) >> + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f64, 0, >> + UINT16_MAX)); >> + else >> + res.data.u16 = util_iround(imm0.reg.data.f64); >> + break; >> + default: >> + return; >> + } >> + i->setSrc(0, bld.mkImm(res.data.u16)); >> + break; >> + case TYPE_U32: >> + switch (i->sType) { >> + case TYPE_F32: >> + if (i->saturate) >> + res.data.u32 = util_iround(CLAMP(imm0.reg.data.f32, 0, >> + UINT32_MAX)); >> + else >> + res.data.u32 = util_iround(imm0.reg.data.f32); >> + break; >> + case TYPE_F64: >> + if (i->saturate) >> + res.data.u32 = util_iround(CLAMP(imm0.reg.data.f64, 0, >> + UINT32_MAX)); >> + else >> + res.data.u32 = util_iround(imm0.reg.data.f64); >> + break; >> + default: >> + return; >> + } >> + i->setSrc(0, bld.mkImm(res.data.u32)); >> + break; >> + case TYPE_S16: >> + switch (i->sType) { >> + case TYPE_F32: >> + if (i->saturate) >> + res.data.s16 = util_iround(CLAMP(imm0.reg.data.f32, INT16_MIN, >> + INT16_MAX)); >> + else >> + res.data.s16 = util_iround(imm0.reg.data.f32); >> + break; >> + case TYPE_F64: >> + if (i->saturate) >> + res.data.s16 = util_iround(CLAMP(imm0.reg.data.f64, INT16_MIN, >> + INT16_MAX)); >> + else >> + res.data.s16 = util_iround(imm0.reg.data.f64); >> + break; >> + default: >> + return; >> + } >> + i->setSrc(0, bld.mkImm(res.data.s16)); >> + break; >> + case TYPE_S32: >> + switch (i->sType) { >> + case TYPE_F32: >> + if (i->saturate) >> + res.data.s32 = util_iround(CLAMP(imm0.reg.data.f32, INT32_MIN, >> + INT32_MAX)); >> + else >> + res.data.s32 = util_iround(imm0.reg.data.f32); >> + break; >> + case TYPE_F64: >> + if (i->saturate) >> + res.data.s32 = util_iround(CLAMP(imm0.reg.data.f64, INT32_MIN, >> + INT32_MAX)); >> + else >> + res.data.s32 = util_iround(imm0.reg.data.f64); >> + break; >> + default: >> + return; >> + } >> + i->setSrc(0, bld.mkImm(res.data.s32)); >> + break; >> + case TYPE_F32: >> + switch (i->sType) { >> + case TYPE_U16: res.data.f32 = (float) imm0.reg.data.u16; break; >> + case TYPE_U32: res.data.f32 = (float) imm0.reg.data.u32; break; >> + case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break; >> + case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break; >> + default: >> + return; >> + } >> + i->setSrc(0, bld.mkImm(res.data.f32)); >> + break; >> + default: >> + return; >> + } >> + i->setType(i->dType); /* Remove i->sType, which we don't need anymore */ >> + i->setSrc(1, NULL); >> + i->op = OP_MOV; >> + >> + i->src(0).mod = Modifier(0); /* Clear the already applied modifier */ >> + break; >> + } >> default: >> return; >> } >> -- >> 2.2.1 >> >> _______________________________________________ >> Nouveau mailing list >> Nouveau at lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/nouveau
Tobias Klausmann
2015-Jan-10 01:24 UTC
[Nouveau] [PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
Folding for conversions: F32->(U{16/32}, S{16/32}) and (U{16/32}, {S16/32})->F32 Signed-off-by: Tobias Klausmann <tobias.johannes.klausmann at mni.thm.de> --- V2: beat me, whip me, split out F64 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 81 ++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 9a0bb60..741c74f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -997,6 +997,87 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) i->op = OP_MOV; break; } + case OP_CVT: { + Storage res; + bld.setPosition(i, true); /* make sure bld is init'ed */ + switch(i->dType) { + case TYPE_U16: + switch (i->sType) { + case TYPE_F32: + if (i->saturate) + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f32, 0, + UINT16_MAX)); + else + res.data.u16 = util_iround(imm0.reg.data.f32); + break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.u16)); + break; + case TYPE_U32: + switch (i->sType) { + case TYPE_F32: + if (i->saturate) + res.data.u32 = util_iround(CLAMP(imm0.reg.data.f32, 0, + UINT32_MAX)); + else + res.data.u32 = util_iround(imm0.reg.data.f32); + break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.u32)); + break; + case TYPE_S16: + switch (i->sType) { + case TYPE_F32: + if (i->saturate) + res.data.s16 = util_iround(CLAMP(imm0.reg.data.f32, INT16_MIN, + INT16_MAX)); + else + res.data.s16 = util_iround(imm0.reg.data.f32); + break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.s16)); + break; + case TYPE_S32: + switch (i->sType) { + case TYPE_F32: + if (i->saturate) + res.data.s32 = util_iround(CLAMP(imm0.reg.data.f32, INT32_MIN, + INT32_MAX)); + else + res.data.s32 = util_iround(imm0.reg.data.f32); + break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.s32)); + break; + case TYPE_F32: + switch (i->sType) { + case TYPE_U16: res.data.f32 = (float) imm0.reg.data.u16; break; + case TYPE_U32: res.data.f32 = (float) imm0.reg.data.u32; break; + case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break; + case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.f32)); + break; + default: + return; + } + i->setType(i->dType); /* Remove i->sType, which we don't need anymore */ + i->setSrc(1, NULL); + i->op = OP_MOV; + + i->src(0).mod = Modifier(0); /* Clear the already applied modifier */ + break; + } default: return; } -- 2.2.1
Ilia Mirkin
2015-Jan-10 01:41 UTC
[Nouveau] [RESEND/PATCH] nv50/ir: Handle OP_CVT when folding constant expressions
On Fri, Jan 9, 2015 at 6:47 PM, Tobias Klausmann <tobias.johannes.klausmann at mni.thm.de> wrote:> Folding for conversions: F32->(U{16/32}, S{16/32}) and (U{16/32}, {S16/32})->F32 > > Signed-off-by: Tobias Klausmann <tobias.johannes.klausmann at mni.thm.de> > --- > .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 109 +++++++++++++++++++++ > 1 file changed, 109 insertions(+) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > index 9a0bb60..6a3d515 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > @@ -997,6 +997,115 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) > i->op = OP_MOV; > break; > } > + case OP_CVT: { > + Storage res; > + bld.setPosition(i, true); /* make sure bld is init'ed */ > + switch(i->dType) { > + case TYPE_U16: > + switch (i->sType) { > + case TYPE_F32: > + if (i->saturate) > + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f32, 0, > + UINT16_MAX)); > + else > + res.data.u16 = util_iround(imm0.reg.data.f32); > + break; > + case TYPE_F64:The F64 stuff needs more thought, as I don't think we can always store the f64 immediates. In my patches, I just outlaw fp64 immediates in the first place. Please leave these out for now.> + if (i->saturate) > + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f64, 0, > + UINT16_MAX)); > + else > + res.data.u16 = util_iround(imm0.reg.data.f64); > + break; > + default: > + return; > + } > + i->setSrc(0, bld.mkImm(res.data.u16)); > + break; > + case TYPE_U32: > + switch (i->sType) { > + case TYPE_F32: > + if (i->saturate) > + res.data.u32 = util_iround(CLAMP(imm0.reg.data.f32, 0, > + UINT32_MAX)); > + else > + res.data.u32 = util_iround(imm0.reg.data.f32); > + break; > + case TYPE_F64: > + if (i->saturate) > + res.data.u32 = util_iround(CLAMP(imm0.reg.data.f64, 0, > + UINT32_MAX)); > + else > + res.data.u32 = util_iround(imm0.reg.data.f64); > + break; > + default: > + return; > + } > + i->setSrc(0, bld.mkImm(res.data.u32)); > + break; > + case TYPE_S16: > + switch (i->sType) { > + case TYPE_F32: > + if (i->saturate) > + res.data.s16 = util_iround(CLAMP(imm0.reg.data.f32, INT16_MIN, > + INT16_MAX)); > + else > + res.data.s16 = util_iround(imm0.reg.data.f32); > + break; > + case TYPE_F64: > + if (i->saturate) > + res.data.s16 = util_iround(CLAMP(imm0.reg.data.f64, INT16_MIN, > + INT16_MAX)); > + else > + res.data.s16 = util_iround(imm0.reg.data.f64); > + break; > + default: > + return; > + } > + i->setSrc(0, bld.mkImm(res.data.s16)); > + break; > + case TYPE_S32: > + switch (i->sType) { > + case TYPE_F32: > + if (i->saturate) > + res.data.s32 = util_iround(CLAMP(imm0.reg.data.f32, INT32_MIN, > + INT32_MAX)); > + else > + res.data.s32 = util_iround(imm0.reg.data.f32); > + break; > + case TYPE_F64: > + if (i->saturate) > + res.data.s32 = util_iround(CLAMP(imm0.reg.data.f64, INT32_MIN, > + INT32_MAX)); > + else > + res.data.s32 = util_iround(imm0.reg.data.f64); > + break; > + default: > + return; > + } > + i->setSrc(0, bld.mkImm(res.data.s32)); > + break; > + case TYPE_F32: > + switch (i->sType) { > + case TYPE_U16: res.data.f32 = (float) imm0.reg.data.u16; break; > + case TYPE_U32: res.data.f32 = (float) imm0.reg.data.u32; break; > + case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break; > + case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break; > + default: > + return; > + } > + i->setSrc(0, bld.mkImm(res.data.f32)); > + break; > + default: > + return; > + } > + i->setType(i->dType); /* Remove i->sType, which we don't need anymore */ > + i->setSrc(1, NULL); > + i->op = OP_MOV; > + > + i->src(0).mod = Modifier(0); /* Clear the already applied modifier */ > + break; > + } > default: > return; > } > -- > 2.2.1 > > _______________________________________________ > Nouveau mailing list > Nouveau at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau
Possibly Parallel Threads
- [PATCH v4] nv50/ir: Handle OP_CVT when folding constant expressions
- [PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions
- [PATCH v3 1/2] nv50/ir: Add support for the double Type to BuildUtil
- [PATCH] nv50/ir: Handle OP_CVT when folding constant expressions
- [PATCH v2] nv50/ir: Handle OP_CVT when folding constant expressions