search for: type_f64

Displaying 15 results from an estimated 15 matches for "type_f64".

Did you mean: type_f32
2015 Feb 23
2
[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)
...itterNV50::emitIMAD(const Instruction *i) { code[0] = 0x60000000; @@ -1150,9 +1213,11 @@ CodeEmitterNV50::emitSET(const Instruction *i) code[0] = 0x30000000; code[1] = 0x60000000; - emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14); - switch (i->sType) { + case TYPE_F64: + code[0] = 0xe0000000; + code[1] = 0xe0000000; + break; case TYPE_F32: code[0] |= 0x80000000; break; case TYPE_S32: code[1] |= 0x0c000000; break; case TYPE_U32: code[1] |= 0x04000000; break; @@ -1162,6 +1227,9 @@ CodeEmitterNV50::emitSET(const Instruction *i) ass...
2015 Feb 23
2
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...> + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); > + > + // 5. Perform 2 Newton-Raphson steps > + if (i->op == OP_RCP) { > + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 > + Value *two = bld.getSSA(8); > + > + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f)); > + > + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8), > + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess), > + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input, > +...
2015 Feb 23
0
[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...P_MERGE, TYPE_U64, def, dst[0], dst[1]); + guess = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); + + // 5. Perform 2 Newton-Raphson steps + if (i->op == OP_RCP) { + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 + Value *two = bld.getSSA(8); + + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f)); + + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8), + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess), + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input, +...
2015 Feb 23
0
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...(OP_MERGE, TYPE_U64, bld.getSSA(8), dst[0], dst[1]); >> + >> + // 5. Perform 2 Newton-Raphson steps >> + if (i->op == OP_RCP) { >> + // RCP: x_{n+1} = 2 * x_n - input * x_n^2 >> + Value *two = bld.getSSA(8); >> + >> + bld.mkCvt(OP_CVT, TYPE_F64, two, TYPE_F32, bld.loadImm(NULL, 2.0f)); >> + >> + guess = bld.mkOp2v(OP_SUB, TYPE_F64, bld.getSSA(8), >> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), two, guess), >> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input,...
2014 Jul 05
1
[PATCH v4] nv50/ir: Handle OP_CVT when folding constant expressions
..._t conv = util_iround(imm0.reg.data.f32); + res.data.u16 = (conv < 0) ? 0 : CLAMP((uint32_t)conv, 0, + UINT16_MAX); + } + else res.data.u16 = util_iround(imm0.reg.data.f32); + break; + case TYPE_F64: + if (i->saturate) { + int32_t conv = util_iround(imm0.reg.data.f64); + res.data.u16 = (conv < 0) ? 0 : CLAMP((uint32_t)conv, 0, + UINT16_MAX); + } + else res.data.u16 = util_irou...
2015 Feb 20
10
[PATCH 01/11] nvc0/ir: add emission of dadd/dmul/dmad opcodes, fix minmax
...e[0] ^= 1 << 8; +} + +void CodeEmitterNVC0::emitUADD(const Instruction *i) { uint32_t addOp = 0; @@ -895,6 +947,8 @@ CodeEmitterNVC0::emitMINMAX(const Instruction *i) else if (!isFloatType(i->dType)) op |= isSignedType(i->dType) ? 0x23 : 0x03; + if (i->dType == TYPE_F64) + op |= 0x01; emitForm_A(i, op); emitNegAbs12(i); @@ -2242,20 +2296,26 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn) break; case OP_ADD: case OP_SUB: - if (isFloatType(insn->dType)) + if (insn->dType == TYPE_F64) + emitDADD(insn); +...
2015 Jan 09
3
[RESEND/PATCH] nv50/ir: Handle OP_CVT when folding constant expressions
...case TYPE_F32: + if (i->saturate) + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f32, 0, + UINT16_MAX)); + else + res.data.u16 = util_iround(imm0.reg.data.f32); + break; + case TYPE_F64: + if (i->saturate) + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f64, 0, + UINT16_MAX)); + else + res.data.u16 = util_iround(imm0.reg.data.f64); + break; + default: +...
2014 Jul 03
1
[PATCH v3 1/2] nv50/ir: Add support for the double Type to BuildUtil
...nion { + double f64; + uint64_t u64; + } u; + u.f64 = f; + return mkImm(u.u64); +} + Value * BuildUtil::loadImm(Value *dst, float f) { @@ -398,6 +409,12 @@ BuildUtil::loadImm(Value *dst, float f) } Value * +BuildUtil::loadImm(Value *dst, double u) +{ + return mkOp1v(OP_MOV, TYPE_F64, dst ? dst : getScratch(8), mkImm(u)); +} + +Value * BuildUtil::loadImm(Value *dst, uint32_t u) { return mkOp1v(OP_MOV, TYPE_U32, dst ? dst : getScratch(), mkImm(u)); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util...
2014 Jul 03
0
[PATCH] nv50/ir: Handle OP_CVT when folding constant expressions
...break; } + case OP_CVT: { + Storage res; + bld.setPosition(i, true); /* make sure bld is init'ed */ + + switch(i->dType) { + case TYPE_U16: + switch (i->sType) { + case TYPE_F32: res.data.u16 = util_iround(imm0.reg.data.f32); break; + case TYPE_F64: res.data.u16 = util_iround(imm0.reg.data.f64); break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.u16)); + break; + case TYPE_U32: + switch (i->sType) { + case TYPE_F32: res.data.u32 = util_iround(imm0.reg.data.f3...
2014 Jul 06
0
[PATCH v5] nv50/ir: Handle OP_CVT when folding constant expressions
...case TYPE_F32: + if (i->saturate) + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f32, 0, + UINT16_MAX)); + else + res.data.u16 = util_iround(imm0.reg.data.f32); + break; + case TYPE_F64: + if (i->saturate) + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f64, 0, + UINT16_MAX)); + else + res.data.u16 = util_iround(imm0.reg.data.f64); + break; + default: +...
2015 Jan 10
0
[RESEND/PATCH] nv50/ir: Handle OP_CVT when folding constant expressions
...if (i->saturate) > + res.data.u16 = util_iround(CLAMP(imm0.reg.data.f32, 0, > + UINT16_MAX)); > + else > + res.data.u16 = util_iround(imm0.reg.data.f32); > + break; > + case TYPE_F64: The F64 stuff needs more thought, as I don't think we can always store the f64 immediates. In my patches, I just outlaw fp64 immediates in the first place. Please leave these out for now. > + if (i->saturate) > + res.data.u16 = util_iround(CLAMP(imm0.reg.dat...
2015 Nov 05
7
[PATCH mesa 0/5] nouveau: codegen: Make use of double immediates
Hi All, This series implements using double immediates in the nouveau codegen code. This turns the following (nvc0) code: 1: mov u32 $r2 0x00000000 (8) 2: mov u32 $r3 0x3fe00000 (8) 3: add f64 $r0d $r0d $r2d (8) Into: 1: add f64 $r0d $r0d 0.500000 (8) This has been tested with the 2 double shader tests which I just send to the piglet list. On a gk208 (gk110 / SM35)
2014 Jul 18
5
[PATCH 0/5] nvc0: fp64 preparation
Most of codegen is already FP64-ready. There are a few edge-cases that I ran into, many of which can apply even to non-fp64-enabled programs (although the double-wide registers are not very common without fp64). I've yet to give this a full piglit run, but wanted to send these out in case someone wanted to comment. They do not depend on the preliminary core fp64 work. Ilia Mirkin (5):
2015 Mar 25
0
[PATCH] nv50/ir: take postFactor into account when doing peephole optimizations
...truction *i, b->data.f32 = 0.0f; } switch (i->dType) { - case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break; + case TYPE_F32: + res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor); + break; case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break; case TYPE_S32: if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { @@ -579,6 +581,7 @@ ConstantFolding::expr(Instruction *i, i->src(0).mod = Modifier(0); i->src(1).mod = Modifier(0); + i->postFactor = 0; i-...
2014 May 18
1
[PATCH 1/2] nv50/ir: fix s32 x s32 -> high s32 multiply logic
Retrieving the high 32 bits of a signed multiply is rather annoying. It appears that the simplest way to do this is to compute the absolute value of the arguments, and perform a u32 x u32 -> u64 operation. If the arguments' signs differ, then negate the result. Since there is no u64 support in the cvt instruction, we have the perform the 2's complement negation "by hand".