Displaying 4 results from an estimated 4 matches for "half_input".
2015 Feb 23
2
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...), two, guess),
> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input,
> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), guess, guess)));
> + } else {
> + // RSQ: x_{n+1} = x_n (1.5 - 0.5 * input * x_n^2)
> + Value *half_input = bld.getSSA(8), *three_half = bld.getSSA(8);
> + bld.mkCvt(OP_CVT, TYPE_F64, half_input, TYPE_F32, bld.loadImm(NULL, -0.5f));
> + bld.mkCvt(OP_CVT, TYPE_F64, three_half, TYPE_F32, bld.loadImm(NULL, 1.5f));
> +
> + half_input = bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8),...
2015 Feb 23
2
[PATCH 1/2] nv50/ir: add fp64 support on G200 (NVA0)
...0, (pred = bld.getSSA(1, FILE_FLAGS)));
+ bld.mkOp1(OP_NEG, TYPE_F64, neg, guess)
+ ->setPredicate(CC_S, pred);
+ bld.mkMov(copy, guess)
+ ->setPredicate(CC_NS, pred);
+ guess = bld.mkOp2v(OP_UNION, TYPE_U64, bld.getSSA(8), neg, copy);
+ } else {
+ Value *half_input = bld.getSSA(8), *three_half = bld.getSSA(8);
+ bld.mkCvt(OP_CVT, TYPE_F64, half_input, TYPE_F32, bld.loadImm(NULL, -0.5f));
+ bld.mkCvt(OP_CVT, TYPE_F64, three_half, TYPE_F32, bld.loadImm(NULL, 1.5f));
+
+ half_input = bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), half_input, abs);
+...
2015 Feb 23
0
[PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...L, TYPE_F64, bld.getSSA(8), two, guess),
+ bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input,
+ bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), guess, guess)));
+ } else {
+ // RSQ: x_{n+1} = x_n (1.5 - 0.5 * input * x_n^2)
+ Value *half_input = bld.getSSA(8), *three_half = bld.getSSA(8);
+ bld.mkCvt(OP_CVT, TYPE_F64, half_input, TYPE_F32, bld.loadImm(NULL, -0.5f));
+ bld.mkCvt(OP_CVT, TYPE_F64, three_half, TYPE_F32, bld.loadImm(NULL, 1.5f));
+
+ half_input = bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), half_input, input);...
2015 Feb 23
0
[Mesa-dev] [PATCH 2/2] nvc0/ir: improve precision of double RCP/RSQ results
...> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), input,
>> + bld.mkOp2v(OP_MUL, TYPE_F64, bld.getSSA(8), guess, guess)));
>> + } else {
>> + // RSQ: x_{n+1} = x_n (1.5 - 0.5 * input * x_n^2)
>> + Value *half_input = bld.getSSA(8), *three_half = bld.getSSA(8);
>> + bld.mkCvt(OP_CVT, TYPE_F64, half_input, TYPE_F32, bld.loadImm(NULL, -0.5f));
>> + bld.mkCvt(OP_CVT, TYPE_F64, three_half, TYPE_F32, bld.loadImm(NULL, 1.5f));
>> +
>> + half_input = bld.mkOp2v(OP_MUL, TYPE_F64,...