thr3ads.net - search: "type

[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)

2015 Feb 23

2

[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)

...itterNV50::emitIMAD(const Instruction *i) { code[0] = 0x60000000; @@ -1150,9 +1213,11 @@ CodeEmitterNV50::emitSET(const Instruction *i) code[0] = 0x30000000; code[1] = 0x60000000; - emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14); - switch (i->sType) { + case TYPE_F64: + code[0] = 0xe0000000; + code[1] = 0xe0000000; + break; case TYPE_F32: code[0] |= 0x80000000; break; case TYPE_S32: code[1] |= 0x0c000000; break; case TYPE_U32: code[1] |= 0x04000000; break; @@ -1162,6 +1227,9 @@ CodeEmitterNV50::emitSET(const Instruction *i) ass...

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

2

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

...> + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); > + > + // 5. Perform 2 Newton-Raphson steps > + if (i->op == OP_RCP) { > + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 > + Value *two = bld.getSSA(8); > + > + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f)); > + > + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8), > + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess), > + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input, > +...

[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

0

[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

...P_MERGE, TYPE_U64, def, dst[0], dst[1]); + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); + + // 5. Perform 2 Newton-Raphson steps + if (i->op == OP_RCP) { + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 + Value *two = bld.getSSA(8); + + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f)); + + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8), + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess), + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input, +...

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

2015 Feb 23

0

[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results

...(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); >> + >> + // 5. Perform 2 Newton-Raphson steps >> + if (i->op == OP_RCP) { >> + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 >> + Value *two = bld.getSSA(8); >> + >> + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f)); >> + >> + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8), >> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess), >> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input,...

[PATCH v4] nv50/ir: Handle OP_CVT when folding constant expressions

2014 Jul 05

1

[PATCH v4] nv50/ir: Handle OP_CVT when folding constant expressions

..._t conv = util_iround(imm0.reg.data.f32); + res.data.u16 = (conv < 0) ? 0 : CLAMP((uint32_t)conv, 0, + UINT16_MAX); + } + else res.data.u16 = util_iround(imm0.reg.data.f32); + break; + case TYPE_F64: + if (i->saturate) { + int32_t conv = util_iround(imm0.reg.data.f64); + res.data.u16 = (conv < 0) ? 0 : CLAMP((uint32_t)conv, 0, + UINT16_MAX); + } + else res.data.u16 = util_irou...

[PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax

2015 Feb 20

10

[PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax

...e[0] ^= 1 << 8; +} + +void CodeEmitterNVC0::emitUADD(const Instruction *i) { uint32_t addOp = 0; @@ -895,6 +947,8 @@ CodeEmitterNVC0::emitMINMAX(const Instruction *i) else if (!isFloatType(i->dType)) op |= isSignedType(i->dType) ? 0x23 : 0x03; + if (i->dType == TYPE_F64) + op |= 0x01; emitForm_A(i, op); emitNegAbs12(i); @@ -2242,20 +2296,26 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn) break; case OP_ADD: case OP_SUB: - if (isFloatType(insn->dType)) + if (insn->dType == TYPE_F64) + emitDADD(insn); +...

[RESEND/PATCH] nv50/ir: Handle OP_CVT when folding constant expressions

2015 Jan 09

3

[RESEND/PATCH] nv50/ir: Handle OP_CVT when folding constant expressions

...case TYPE_F32: + if (i->saturate) + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f32, 0, + UINT16_MAX)); + else + res.data.u16 = util_iround(imm0.reg.data.f32); + break; + case TYPE_F64: + if (i->saturate) + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f64, 0, + UINT16_MAX)); + else + res.data.u16 = util_iround(imm0.reg.data.f64); + break; + default: +...

[PATCH v3 1/2] nv50/ir: Add support for the double Type to BuildUtil

2014 Jul 03

1

[PATCH v3 1/2] nv50/ir: Add support for the double Type to BuildUtil

...nion { + double f64; + uint64_t u64; + } u; + u.f64 = f; + return mkImm(u.u64); +} + Value * BuildUtil::loadImm(Value *dst, float f) { @@ -398,6 +409,12 @@ BuildUtil::loadImm(Value *dst, float f) } Value * +BuildUtil::loadImm(Value *dst, double u) +{ + return mkOp1v(OP_MOV, TYPE_F64, dst ? dst : getScratch(8), mkImm(u)); +} + +Value * BuildUtil::loadImm(Value *dst, uint32_t u) { return mkOp1v(OP_MOV, TYPE_U32, dst ? dst : getScratch(), mkImm(u)); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util...

[PATCH] nv50/ir: Handle OP_CVT when folding constant expressions

2014 Jul 03

0

[PATCH] nv50/ir: Handle OP_CVT when folding constant expressions

...break; } + case OP_CVT: { + Storage res; + bld.setPosition(i, true); /* make sure bld is init'ed */ + + switch(i->dType) { + case TYPE_U16: + switch (i->sType) { + case TYPE_F32: res.data.u16 = util_iround(imm0.reg.data.f32); break; + case TYPE_F64: res.data.u16 = util_iround(imm0.reg.data.f64); break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.u16)); + break; + case TYPE_U32: + switch (i->sType) { + case TYPE_F32: res.data.u32 = util_iround(imm0.reg.data.f3...

[PATCH v5] nv50/ir: Handle OP_CVT when folding constant expressions

2014 Jul 06

0

[PATCH v5] nv50/ir: Handle OP_CVT when folding constant expressions

...case TYPE_F32: + if (i->saturate) + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f32, 0, + UINT16_MAX)); + else + res.data.u16 = util_iround(imm0.reg.data.f32); + break; + case TYPE_F64: + if (i->saturate) + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f64, 0, + UINT16_MAX)); + else + res.data.u16 = util_iround(imm0.reg.data.f64); + break; + default: +...

[RESEND/PATCH] nv50/ir: Handle OP_CVT when folding constant expressions

2015 Jan 10

0

[RESEND/PATCH] nv50/ir: Handle OP_CVT when folding constant expressions

...if (i->saturate) > + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f32, 0, > + UINT16_MAX)); > + else > + res.data.u16 = util_iround(imm0.reg.data.f32); > + break; > + case TYPE_F64: The F64 stuff needs more thought, as I don't think we can always store the f64 immediates. In my patches, I just outlaw fp64 immediates in the first place. Please leave these out for now. > + if (i->saturate) > + res.data.u16 = util_iround(CLAMP(imm0.reg.dat...

[PATCH mesa 0/5] nouveau: codegen: Make use of double immediates

2015 Nov 05

7

[PATCH mesa 0/5] nouveau: codegen: Make use of double immediates

Hi All, This series implements using double immediates in the nouveau codegen code. This turns the following (nvc0) code: 1: mov u32 $r2 0x00000000 (8) 2: mov u32 $r3 0x3fe00000 (8) 3: add f64 $r0d $r0d $r2d (8) Into: 1: add f64 $r0d $r0d 0.500000 (8) This has been tested with the 2 double shader tests which I just send to the piglet list. On a gk208 (gk110 / SM35)

[PATCH 0/5] nvc0: fp64 preparation

2014 Jul 18

5

[PATCH 0/5] nvc0: fp64 preparation

Most of codegen is already FP64-ready. There are a few edge-cases that I ran into, many of which can apply even to non-fp64-enabled programs (although the double-wide registers are not very common without fp64). I've yet to give this a full piglit run, but wanted to send these out in case someone wanted to comment. They do not depend on the preliminary core fp64 work. Ilia Mirkin (5):

[PATCH] nv50/ir: take postFactor into account when doing peephole optimizations

2015 Mar 25

0

[PATCH] nv50/ir: take postFactor into account when doing peephole optimizations

...truction *i, b->data.f32 = 0.0f; } switch (i->dType) { - case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break; + case TYPE_F32: + res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor); + break; case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break; case TYPE_S32: if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { @@ -579,6 +581,7 @@ ConstantFolding::expr(Instruction *i, i->src(0).mod = Modifier(0); i->src(1).mod = Modifier(0); + i->postFactor = 0; i-...

[PATCH 1/2] nv50/ir: fix s32 x s32 -> high s32 multiply logic

2014 May 18

1

[PATCH 1/2] nv50/ir: fix s32 x s32 -> high s32 multiply logic

Retrieving the high 32 bits of a signed multiply is rather annoying. It appears that the simplest way to do this is to compute the absolute value of the arguments, and perform a u32 x u32 -> u64 operation. If the arguments' signs differ, then negate the result. Since there is no u64 support in the cvt instruction, we have the perform the 2's complement negation "by hand".

search for: type_f64