Hans de Goede
2015-Nov-05 13:42 UTC
[Nouveau] [PATCH envytools] envydis: gk110: Add support for dadd with an immediate src
This commit adds support for dadd with an immediate src in gk110 code.
The machine-code in question is generated by e.g. nouveau_compiler with
the new "Make use of double immediates" patch series when building the
piglit glsl-algebraic-double-add.shader_test.
This commit changes the output from:
00000010: 001c0001 c38001ff $r0 $r0 $r0 $r0 0x3fe00 0x3fe00
0x3fe0000000000000 0x3fe00000 0x0 0x3 ???
[unknown: 00000000 c0800000] [unknown instruction]
Into:
00000010: 001c0001 c38001ff add rn f64 $r0d $r0d 0x3fe0000000000000
The machine-code in question disassembles to the same using
nvdisasm and works properly on an actual gpu.
Signed-off-by: Hans de Goede <hdegoede at redhat.com>
---
envydis/gk110.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/envydis/gk110.c b/envydis/gk110.c
index 9af18e1..4790533 100644
--- a/envydis/gk110.c
+++ b/envydis/gk110.c
@@ -1274,6 +1274,7 @@ static struct insn tabcctlmod[] = {
* 000000007f800000 2nd src
* 0000003fff800000 2nd src (immediate)
* 0000007fff800000 address
+ * 000003ffff800000 float/double immediate
* 007fffffff800000 long immediate
* 0003fc0000000000 3rd src
* 007c000000000000 misc flags
@@ -1410,6 +1411,7 @@ static struct insn tabi[] = {
{ 0x02c0000000000001ull, 0x37c0000000000003ull, N("add"), T(ftz2f),
T(sat35), T(frm2a), N("f32"), DST, T(neg33), T(abs31), SRC1, T(neg3b),
T(fi2) },
{ 0x0300000000000001ull, 0x37c0000000000003ull, T(minmax), T(ftz2f),
N("f32"), DST, T(neg33), T(abs31), SRC1, T(neg3b), T(fi2) },
{ 0x0340000000000001ull, 0x37c0000000000003ull, N("mul"), T(ftz2f),
T(sat35), T(frm2a), T(neg3b), N("f32"), DST, SRC1, T(fi2) },
+ { 0x0380000000000001ull, 0x37c0000000000003ull, N("add"), T(frm2a),
N("f64"), DSTD, T(neg33), T(abs31), SRC1D, T(neg3b), T(di2) },
{ 0x0400000000000001ull, 0x37c0000000000003ull, N("mul"), T(frm2a),
T(neg3b), N("f64"), DSTD, SRC1D, T(di2) },
{ 0x0500000000000001ull, 0x37c0000000000003ull, N("selp"), DST,
SRC1, T(i3bi2), T(pnot2d), PSRC3 },
{ 0x07c0000000000001ull, 0x37c0000000000003ull, N("rshf"),
N("b32"), DST, SESTART, T(us64_28), SRC1, SRC3, SEEND, T(shfclamp),
T(sui2b) }, // d = (s1 >> s2) | (s3 << (32 - s2))
--
2.5.0
Marcin KoĆcielnicki
2015-Nov-05 13:51 UTC
[Nouveau] [PATCH envytools] envydis: gk110: Add support for dadd with an immediate src
On 05/11/15 14:42, Hans de Goede wrote:> This commit adds support for dadd with an immediate src in gk110 code. > The machine-code in question is generated by e.g. nouveau_compiler with > the new "Make use of double immediates" patch series when building the > piglit glsl-algebraic-double-add.shader_test. > > This commit changes the output from: > > 00000010: 001c0001 c38001ff $r0 $r0 $r0 $r0 0x3fe00 0x3fe00 > 0x3fe0000000000000 0x3fe00000 0x0 0x3 ??? > [unknown: 00000000 c0800000] [unknown instruction] > > Into: > > 00000010: 001c0001 c38001ff add rn f64 $r0d $r0d 0x3fe0000000000000 > > The machine-code in question disassembles to the same using > nvdisasm and works properly on an actual gpu. > > Signed-off-by: Hans de Goede <hdegoede at redhat.com> > --- > envydis/gk110.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/envydis/gk110.c b/envydis/gk110.c > index 9af18e1..4790533 100644 > --- a/envydis/gk110.c > +++ b/envydis/gk110.c > @@ -1274,6 +1274,7 @@ static struct insn tabcctlmod[] = { > * 000000007f800000 2nd src > * 0000003fff800000 2nd src (immediate) > * 0000007fff800000 address > + * 000003ffff800000 float/double immediate > * 007fffffff800000 long immediate > * 0003fc0000000000 3rd src > * 007c000000000000 misc flags > @@ -1410,6 +1411,7 @@ static struct insn tabi[] = { > { 0x02c0000000000001ull, 0x37c0000000000003ull, N("add"), T(ftz2f), T(sat35), T(frm2a), N("f32"), DST, T(neg33), T(abs31), SRC1, T(neg3b), T(fi2) }, > { 0x0300000000000001ull, 0x37c0000000000003ull, T(minmax), T(ftz2f), N("f32"), DST, T(neg33), T(abs31), SRC1, T(neg3b), T(fi2) }, > { 0x0340000000000001ull, 0x37c0000000000003ull, N("mul"), T(ftz2f), T(sat35), T(frm2a), T(neg3b), N("f32"), DST, SRC1, T(fi2) }, > + { 0x0380000000000001ull, 0x37c0000000000003ull, N("add"), T(frm2a), N("f64"), DSTD, T(neg33), T(abs31), SRC1D, T(neg3b), T(di2) }, > { 0x0400000000000001ull, 0x37c0000000000003ull, N("mul"), T(frm2a), T(neg3b), N("f64"), DSTD, SRC1D, T(di2) }, > { 0x0500000000000001ull, 0x37c0000000000003ull, N("selp"), DST, SRC1, T(i3bi2), T(pnot2d), PSRC3 }, > { 0x07c0000000000001ull, 0x37c0000000000003ull, N("rshf"), N("b32"), DST, SESTART, T(us64_28), SRC1, SRC3, SEEND, T(shfclamp), T(sui2b) }, // d = (s1 >> s2) | (s3 << (32 - s2)) >Thanks, applied.