rendercheck -f a8r8g8b8 passes as much as on a GK208, and xv appears to
work. Very lightly tested.
Instead of sticking coordinates into pushbufs, the vertex shader is
modified to read them from a constbuf, indexed by vertex id. This
approach could be used for all nvc0 generations, but I didn't want to
rock the boat.
Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
Note: this won't work for GM20x - we need to allow TIC format to be updated
for that to work. But this is a step in that direction.
src/Makefile.am | 16 ++++++++
src/nouveau_copy.c | 1 +
src/nouveau_exa.c | 2 +-
src/nouveau_xv.c | 2 +-
src/nv_accel_common.c | 1 +
src/nv_driver.c | 1 +
src/nvc0_accel.c | 37 ++++++++++++++---
src/nvc0_exa.c | 48 ++++++++++++++++++++--
src/nvc0_xv.c | 48 ++++++++++++++++++++--
src/shader/Makefile | 23 ++++++++---
src/shader/exac8nv110.fp | 47 +++++++++++++++++++++
src/shader/exac8nv110.fpc | 38 +++++++++++++++++
src/shader/exacanv110.fp | 47 +++++++++++++++++++++
src/shader/exacanv110.fpc | 38 +++++++++++++++++
src/shader/exacmnv110.fp | 47 +++++++++++++++++++++
src/shader/exacmnv110.fpc | 38 +++++++++++++++++
src/shader/exas8nv110.fp | 42 +++++++++++++++++++
src/shader/exas8nv110.fpc | 28 +++++++++++++
src/shader/exasanv110.fp | 47 +++++++++++++++++++++
src/shader/exasanv110.fpc | 38 +++++++++++++++++
src/shader/exascnv110.fp | 38 +++++++++++++++++
src/shader/exascnv110.fpc | 20 +++++++++
src/shader/videonv110.fp | 54 ++++++++++++++++++++++++
src/shader/videonv110.fpc | 52 +++++++++++++++++++++++
src/shader/xfrm2nv110.vp | 82 +++++++++++++++++++++++++++++++++++++
src/shader/xfrm2nv110.vpc | 102 ++++++++++++++++++++++++++++++++++++++++++++++
26 files changed, 918 insertions(+), 19 deletions(-)
create mode 100644 src/shader/exac8nv110.fp
create mode 100644 src/shader/exac8nv110.fpc
create mode 100644 src/shader/exacanv110.fp
create mode 100644 src/shader/exacanv110.fpc
create mode 100644 src/shader/exacmnv110.fp
create mode 100644 src/shader/exacmnv110.fpc
create mode 100644 src/shader/exas8nv110.fp
create mode 100644 src/shader/exas8nv110.fpc
create mode 100644 src/shader/exasanv110.fp
create mode 100644 src/shader/exasanv110.fpc
create mode 100644 src/shader/exascnv110.fp
create mode 100644 src/shader/exascnv110.fpc
create mode 100644 src/shader/videonv110.fp
create mode 100644 src/shader/videonv110.fpc
create mode 100644 src/shader/xfrm2nv110.vp
create mode 100644 src/shader/xfrm2nv110.vpc
diff --git a/src/Makefile.am b/src/Makefile.am
index 1e04ddf..6ba8d87 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -77,48 +77,64 @@ EXTRA_DIST = hwdefs/nv_3ddefs.xml.h \
shader/exac8nve0.fpc \
shader/exac8nvf0.fp \
shader/exac8nvf0.fpc \
+ shader/exac8nv110.fp \
+ shader/exac8nv110.fpc \
shader/exacanvc0.fp \
shader/exacanvc0.fpc \
shader/exacanve0.fp \
shader/exacanve0.fpc \
shader/exacanvf0.fp \
shader/exacanvf0.fpc \
+ shader/exacanv110.fp \
+ shader/exacanv110.fpc \
shader/exacmnvc0.fp \
shader/exacmnvc0.fpc \
shader/exacmnve0.fp \
shader/exacmnve0.fpc \
shader/exacmnvf0.fp \
shader/exacmnvf0.fpc \
+ shader/exacmnv110.fp \
+ shader/exacmnv110.fpc \
shader/exas8nvc0.fp \
shader/exas8nvc0.fpc \
shader/exas8nve0.fp \
shader/exas8nve0.fpc \
shader/exas8nvf0.fp \
shader/exas8nvf0.fpc \
+ shader/exas8nv110.fp \
+ shader/exas8nv110.fpc \
shader/exasanvc0.fp \
shader/exasanvc0.fpc \
shader/exasanve0.fp \
shader/exasanve0.fpc \
shader/exasanvf0.fp \
shader/exasanvf0.fpc \
+ shader/exasanv110.fp \
+ shader/exasanv110.fpc \
shader/exascnvc0.fp \
shader/exascnvc0.fpc \
shader/exascnve0.fp \
shader/exascnve0.fpc \
shader/exascnvf0.fp \
shader/exascnvf0.fpc \
+ shader/exascnv110.fp \
+ shader/exascnv110.fpc \
shader/videonvc0.fp \
shader/videonvc0.fpc \
shader/videonve0.fp \
shader/videonve0.fpc \
shader/videonvf0.fp \
shader/videonvf0.fpc \
+ shader/videonv110.fp \
+ shader/videonv110.fpc \
shader/xfrm2nvc0.vp \
shader/xfrm2nvc0.vpc \
shader/xfrm2nve0.vp \
shader/xfrm2nve0.vpc \
shader/xfrm2nvf0.vp \
shader/xfrm2nvf0.vpc \
+ shader/xfrm2nv110.vp \
+ shader/xfrm2nv110.vpc \
shader/Makefile \
nouveau_local.h \
nouveau_copy.h \
diff --git a/src/nouveau_copy.c b/src/nouveau_copy.c
index e152a53..c139de6 100644
--- a/src/nouveau_copy.c
+++ b/src/nouveau_copy.c
@@ -81,6 +81,7 @@ nouveau_copy_init(ScreenPtr pScreen)
&pNv->ce_channel);
break;
case NV_KEPLER:
+ case NV_MAXWELL:
ret = nouveau_object_new(&pNv->dev->object, 0,
NOUVEAU_FIFO_CHANNEL_CLASS,
&(struct nve0_fifo) {
diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index def66ac..0f02b99 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -514,12 +514,12 @@ nouveau_exa_init(ScreenPtr pScreen)
break;
case NV_FERMI:
case NV_KEPLER:
+ case NV_MAXWELL:
exa->CheckComposite = NVC0EXACheckComposite;
exa->PrepareComposite = NVC0EXAPrepareComposite;
exa->Composite = NVC0EXAComposite;
exa->DoneComposite = NVC0EXADoneComposite;
break;
- case NV_MAXWELL:
default:
break;
}
diff --git a/src/nouveau_xv.c b/src/nouveau_xv.c
index 9d5bad2..2ba1992 100644
--- a/src/nouveau_xv.c
+++ b/src/nouveau_xv.c
@@ -2097,7 +2097,7 @@ NVSetupTexturedVideo (ScreenPtr pScreen,
XF86VideoAdaptorPtr *textureAdaptor)
textureAdaptor[0] = NV40SetupTexturedVideo(pScreen, FALSE);
textureAdaptor[1] = NV40SetupTexturedVideo(pScreen, TRUE);
} else
- if (pNv->Architecture >= NV_TESLA && pNv->Architecture <
NV_MAXWELL) {
+ if (pNv->Architecture >= NV_TESLA) {
textureAdaptor[0] = NV50SetupTexturedVideo(pScreen);
}
}
diff --git a/src/nv_accel_common.c b/src/nv_accel_common.c
index 9361ce8..5d12dd8 100644
--- a/src/nv_accel_common.c
+++ b/src/nv_accel_common.c
@@ -722,6 +722,7 @@ NVAccelCommonInit(ScrnInfoPtr pScrn)
switch (pNv->Architecture) {
case NV_FERMI:
case NV_KEPLER:
+ case NV_MAXWELL:
INIT_CONTEXT_OBJECT(3D_NVC0);
break;
case NV_TESLA:
diff --git a/src/nv_driver.c b/src/nv_driver.c
index 4dde8e0..fff83f8 100644
--- a/src/nv_driver.c
+++ b/src/nv_driver.c
@@ -389,6 +389,7 @@ NVHasKMS(struct pci_device *pci_dev, struct
xf86_platform_device *platform_dev)
case 0xe0:
case 0xf0:
case 0x100:
+ case 0x110:
break;
default:
xf86DrvMsg(-1, X_ERROR, "Unknown chipset: NV%02X\n", chipset);
diff --git a/src/nvc0_accel.c b/src/nvc0_accel.c
index d2a3b93..52a17db 100644
--- a/src/nvc0_accel.c
+++ b/src/nvc0_accel.c
@@ -53,6 +53,16 @@
#include "shader/exas8nvf0.fp"
#include "shader/exac8nvf0.fp"
+#include "shader/xfrm2nv110.vp"
+#include "shader/videonv110.fp"
+
+#include "shader/exascnv110.fp"
+#include "shader/exacmnv110.fp"
+#include "shader/exacanv110.fp"
+#include "shader/exasanv110.fp"
+#include "shader/exas8nv110.fp"
+#include "shader/exac8nv110.fp"
+
#define NVC0PushProgram(pNv,addr,code) do {
\
const unsigned size = sizeof(code) / sizeof(code[0]); \
PUSH_DATAu((pNv)->pushbuf, (pNv)->scratch, (addr), size); \
@@ -223,9 +233,12 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn)
} else if (pNv->dev->chipset < 0xf0) {
class = 0xa097;
handle = 0x0000906e;
- } else {
+ } else if (pNv->dev->chipset < 0x110) {
class = 0xa197;
handle = 0x0000906e;
+ } else {
+ class = 0xb097;
+ handle = 0x0000906e;
}
ret = nouveau_object_new(pNv->channel, class, class,
@@ -304,10 +317,12 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn)
PUSH_DATA (push, 1);
}
- BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3);
- PUSH_DATA (push, (bo->offset + MISC_OFFSET) >> 32);
- PUSH_DATA (push, (bo->offset + MISC_OFFSET));
- PUSH_DATA (push, 1);
+ if (pNv->Architecture < NV_MAXWELL) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3);
+ PUSH_DATA (push, (bo->offset + MISC_OFFSET) >> 32);
+ PUSH_DATA (push, (bo->offset + MISC_OFFSET));
+ PUSH_DATA (push, 1);
+ }
BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
PUSH_DATA (push, (bo->offset + CODE_OFFSET) >> 32);
@@ -334,7 +349,8 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn)
NVC0PushProgram(pNv, PFP_S_A8, NVE0FP_Source_A8);
NVC0PushProgram(pNv, PFP_C_A8, NVE0FP_Composite_A8);
NVC0PushProgram(pNv, PFP_NV12, NVE0FP_NV12);
- } else {
+ } else
+ if (pNv->dev->chipset < 0x110) {
NVC0PushProgram(pNv, PVP_PASS, NVF0VP_Transform2);
NVC0PushProgram(pNv, PFP_S, NVF0FP_Source);
NVC0PushProgram(pNv, PFP_C, NVF0FP_Composite);
@@ -343,6 +359,15 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn)
NVC0PushProgram(pNv, PFP_S_A8, NVF0FP_Source_A8);
NVC0PushProgram(pNv, PFP_C_A8, NVF0FP_Composite_A8);
NVC0PushProgram(pNv, PFP_NV12, NVF0FP_NV12);
+ } else {
+ NVC0PushProgram(pNv, PVP_PASS, NV110VP_Transform2);
+ NVC0PushProgram(pNv, PFP_S, NV110FP_Source);
+ NVC0PushProgram(pNv, PFP_C, NV110FP_Composite);
+ NVC0PushProgram(pNv, PFP_CCA, NV110FP_CAComposite);
+ NVC0PushProgram(pNv, PFP_CCASA, NV110FP_CACompositeSrcAlpha);
+ NVC0PushProgram(pNv, PFP_S_A8, NV110FP_Source_A8);
+ NVC0PushProgram(pNv, PFP_C_A8, NV110FP_Composite_A8);
+ NVC0PushProgram(pNv, PFP_NV12, NV110FP_NV12);
}
BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 4);
diff --git a/src/nvc0_exa.c b/src/nvc0_exa.c
index 6add60b..a53dfe6 100644
--- a/src/nvc0_exa.c
+++ b/src/nvc0_exa.c
@@ -914,14 +914,56 @@ NVC0EXAComposite(PixmapPtr pdpix,
if (!PUSH_SPACE(push, 64))
return;
+ if (pNv->dev->chipset >= 0x110) {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 256);
+ PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA) >> 32);
+ PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 3 * (4 + 2 + 2) + 1);
+ PUSH_DATA (push, 0x80);
+
+ PUSH_DATAf(push, dx);
+ PUSH_DATAf(push, dy + (h * 2));
+ PUSH_DATAf(push, 0);
+ PUSH_DATAf(push, 1);
+ PUSH_DATAf(push, sx);
+ PUSH_DATAf(push, sy + (h * 2));
+ PUSH_DATAf(push, mx);
+ PUSH_DATAf(push, my + (h * 2));
+
+ PUSH_DATAf(push, dx);
+ PUSH_DATAf(push, dy);
+ PUSH_DATAf(push, 0);
+ PUSH_DATAf(push, 1);
+ PUSH_DATAf(push, sx);
+ PUSH_DATAf(push, sy);
+ PUSH_DATAf(push, mx);
+ PUSH_DATAf(push, my);
+
+ PUSH_DATAf(push, dx + (w * 2));
+ PUSH_DATAf(push, dy);
+ PUSH_DATAf(push, 0);
+ PUSH_DATAf(push, 1);
+ PUSH_DATAf(push, sx + (w * 2));
+ PUSH_DATAf(push, sy);
+ PUSH_DATAf(push, mx + (w * 2));
+ PUSH_DATAf(push, my);
+ }
+
BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2);
PUSH_DATA (push, ((dx + w) << 16) | dx);
PUSH_DATA (push, ((dy + h) << 16) | dy);
BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
PUSH_DATA (push, NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES);
- PUSH_VTX2s(push, sx, sy + (h * 2), mx, my + (h * 2), dx, dy + (h * 2));
- PUSH_VTX2s(push, sx, sy, mx, my, dx, dy);
- PUSH_VTX2s(push, sx + (w * 2), sy, mx + (w * 2), my, dx + (w * 2), dy);
+ if (pNv->dev->chipset < 0x110) {
+ PUSH_VTX2s(push, sx, sy + (h * 2), mx, my + (h * 2), dx, dy + (h * 2));
+ PUSH_VTX2s(push, sx, sy, mx, my, dx, dy);
+ PUSH_VTX2s(push, sx + (w * 2), sy, mx + (w * 2), my, dx + (w * 2), dy);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 3);
+ }
BEGIN_NVC0(push, NVC0_3D(VERTEX_END_GL), 1);
PUSH_DATA (push, 0);
}
diff --git a/src/nvc0_xv.c b/src/nvc0_xv.c
index d1d8f18..129c505 100644
--- a/src/nvc0_xv.c
+++ b/src/nvc0_xv.c
@@ -247,15 +247,57 @@ nvc0_xv_image_put(ScrnInfoPtr pScrn,
nouveau_pushbuf_refn (push, refs, 3))
return BadImplementation;
+ if (pNv->dev->chipset >= 0x110) {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 256);
+ PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA) >> 32);
+ PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 3 * (4 + 2 + 2) + 1);
+ PUSH_DATA (push, 0x80);
+
+ PUSH_DATAf(push, sx1);
+ PUSH_DATAf(push, sy1);
+ PUSH_DATAf(push, 0);
+ PUSH_DATAf(push, 1);
+ PUSH_DATAf(push, tx1);
+ PUSH_DATAf(push, ty1);
+ PUSH_DATAf(push, 0);
+ PUSH_DATAf(push, 0);
+
+ PUSH_DATAf(push, sx2+(sx2-sx1));
+ PUSH_DATAf(push, sy1);
+ PUSH_DATAf(push, 0);
+ PUSH_DATAf(push, 1);
+ PUSH_DATAf(push, tx2+(tx2-tx1));
+ PUSH_DATAf(push, ty1);
+ PUSH_DATAf(push, 0);
+ PUSH_DATAf(push, 0);
+
+ PUSH_DATAf(push, sx1);
+ PUSH_DATAf(push, sy2+(sy2-sy1));
+ PUSH_DATAf(push, 0);
+ PUSH_DATAf(push, 1);
+ PUSH_DATAf(push, tx1);
+ PUSH_DATAf(push, ty2+(ty2-ty1));
+ PUSH_DATAf(push, 0);
+ PUSH_DATAf(push, 0);
+ }
+
BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2);
PUSH_DATA (push, sx2 << NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT | sx1);
PUSH_DATA (push, sy2 << NVC0_3D_SCISSOR_VERT_MAX__SHIFT | sy1 );
BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
PUSH_DATA (push, NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES);
- PUSH_VTX1s(push, tx1, ty1, sx1, sy1);
- PUSH_VTX1s(push, tx2+(tx2-tx1), ty1, sx2+(sx2-sx1), sy1);
- PUSH_VTX1s(push, tx1, ty2+(ty2-ty1), sx1, sy2+(sy2-sy1));
+ if (pNv->dev->chipset < 0x110) {
+ PUSH_VTX1s(push, tx1, ty1, sx1, sy1);
+ PUSH_VTX1s(push, tx2+(tx2-tx1), ty1, sx2+(sx2-sx1), sy1);
+ PUSH_VTX1s(push, tx1, ty2+(ty2-ty1), sx1, sy2+(sy2-sy1));
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 3);
+ }
BEGIN_NVC0(push, NVC0_3D(VERTEX_END_GL), 1);
PUSH_DATA (push, 0);
diff --git a/src/shader/Makefile b/src/shader/Makefile
index 2d789be..12bf455 100644
--- a/src/shader/Makefile
+++ b/src/shader/Makefile
@@ -22,23 +22,36 @@ NVF0_SHADERS = xfrm2nvf0.vpc \
exas8nvf0.fpc \
exac8nvf0.fpc \
videonvf0.fpc
+NV110_SHADERS = xfrm2nv110.vpc \
+ exascnv110.fpc \
+ exacmnv110.fpc \
+ exacanv110.fpc \
+ exasanv110.fpc \
+ exas8nv110.fpc \
+ exac8nv110.fpc \
+ videonv110.fpc
-SHADERS = $(NVC0_SHADERS) $(NVE0_SHADERS) $(NVF0_SHADERS)
+SHADERS = $(NVC0_SHADERS) $(NVE0_SHADERS) $(NVF0_SHADERS) $(NV110_SHADERS)
ENVYAS ?= envyas
all: $(SHADERS)
$(filter %nvc0.vpc,$(SHADERS)): %.vpc: %.vp
- cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@
+ cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100
-o $@
$(filter %nvc0.fpc,$(SHADERS)): %.fpc: %.fp
- cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@
+ cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100
-o $@
$(filter %nve0.vpc,$(SHADERS)): %.vpc: %.vp
- cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o
$@
+ cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104
-o $@
$(filter %nve0.fpc,$(SHADERS)): %.fpc: %.fp
- cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o
$@
+ cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104
-o $@
$(filter %nvf0.vpc,$(SHADERS)): %.vpc: %.vp
cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@
$(filter %nvf0.fpc,$(SHADERS)): %.fpc: %.fp
cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@
+
+$(filter %nv110.vpc,$(SHADERS)): %.vpc: %.vp
+ cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@
+$(filter %nv110.fpc,$(SHADERS)): %.fpc: %.fp
+ cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@
diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
new file mode 100644
index 0000000..ce78036
--- /dev/null
+++ b/src/shader/exac8nv110.fp
@@ -0,0 +1,47 @@
+#ifndef ENVYAS
+static uint32_t
+NV110FP_Composite_A8[] = {
+ 0x00001462,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80000000,
+ 0x00000a0a,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000000f,
+ 0x00000000,
+#include "exac8nv110.fpc"
+};
+#else
+
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa pass $r0 a[0x7c] 0x0 0x0 0x1
+mufu rcp $r0 $r0
+ipa $r3 a[0x94] $r0 0x0 0x1
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa $r2 a[0x90] $r0 0x0 0x1
+tex nodep $r1 $r2 0x0 0x1 t2d 0x8
+ipa $r3 a[0x84] $r0 0x0 0x1
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa $r2 a[0x80] $r0 0x0 0x1
+tex nodep $r0 $r2 0x0 0x0 t2d 0x8
+depbar le 0x5 0x0 0x0
+sched (st 0x0) (st 0x0) (st 0x0)
+fmul ftz $r3 $r0 $r1
+mov $r2 $r3 0xf
+mov $r1 $r3 0xf
+sched (st 0x0) (st 0x0) (st 0x0)
+mov $r0 $r3 0xf
+exit
+#endif
diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
new file mode 100644
index 0000000..4aa1368
--- /dev/null
+++ b/src/shader/exac8nv110.fpc
@@ -0,0 +1,38 @@
+0xfc0007e0,
+0x001f8000,
+0xcff7ff00,
+0xe003ff87,
+0x00470000,
+0x50800000,
+0x4007ff03,
+0xe043ff89,
+0xfc0007e0,
+0x001f8000,
+0x0007ff02,
+0xe043ff89,
+0x2ff70201,
+0xc03a0014,
+0x4007ff03,
+0xe043ff88,
+0xfc0007e0,
+0x001f8000,
+0x0007ff02,
+0xe043ff88,
+0x2ff70200,
+0xc03a0004,
+0x34070000,
+0xf0f00000,
+0xfc0007e0,
+0x001f8000,
+0x00170003,
+0x5c681000,
+0x00370002,
+0x5c980780,
+0x00370001,
+0x5c980780,
+0xfc0007e0,
+0x001f8000,
+0x00370000,
+0x5c980780,
+0x0007000f,
+0xe3000000,
diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
new file mode 100644
index 0000000..a70d5c5
--- /dev/null
+++ b/src/shader/exacanv110.fp
@@ -0,0 +1,47 @@
+#ifndef ENVYAS
+static uint32_t
+NV110FP_CAComposite[] = {
+ 0x00001462, /* 0x0000c000 = USES_KIL, MULTI_COLORS */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80000000, /* FRAG_COORD_UMASK = 0x8 */
+ 0x00000a0a, /* FP_INTERP[0x080], 0022 0022 */
+ 0x00000000, /* FP_INTERP[0x0c0], 0 = OFF */
+ 0x00000000, /* FP_INTERP[0x100], 1 = FLAT */
+ 0x00000000, /* FP_INTERP[0x140], 2 = PERSPECTIVE */
+ 0x00000000, /* FP_INTERP[0x180], 3 = LINEAR */
+ 0x00000000, /* FP_INTERP[0x1c0] */
+ 0x00000000, /* FP_INTERP[0x200] */
+ 0x00000000, /* FP_INTERP[0x240] */
+ 0x00000000, /* FP_INTERP[0x280] */
+ 0x00000000, /* FP_INTERP[0x2c0] */
+ 0x00000000, /* FP_INTERP[0x300] */
+ 0x00000000,
+ 0x0000000f, /* FP_RESULT_MASK (0x8000 Face ?) */
+ 0x00000000, /* 0x2 = FragDepth, 0x1 = SampleMask */
+#include "exacanv110.fpc"
+};
+#else
+
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa pass $r0 a[0x7c] 0x0 0x0 0x1
+mufu rcp $r0 $r0
+ipa $r3 a[0x94] $r0 0x0 0x1
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa $r2 a[0x90] $r0 0x0 0x1
+tex nodep $r4 $r2 0x0 0x1 t2d 0xf
+ipa $r1 a[0x84] $r0 0x0 0x1
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa $r0 a[0x80] $r0 0x0 0x1
+tex nodep $r0 $r0 0x0 0x0 t2d 0xf
+depbar le 0x5 0x0 0x0
+sched (st 0x0) (st 0x0) (st 0x0)
+fmul ftz $r3 $r3 $r7
+fmul ftz $r2 $r2 $r6
+fmul ftz $r1 $r1 $r5
+sched (st 0x0) (st 0x0) (st 0x0)
+fmul ftz $r0 $r0 $r4
+exit
+#endif
diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
new file mode 100644
index 0000000..7c0ca5e
--- /dev/null
+++ b/src/shader/exacanv110.fpc
@@ -0,0 +1,38 @@
+0xfc0007e0,
+0x001f8000,
+0xcff7ff00,
+0xe003ff87,
+0x00470000,
+0x50800000,
+0x4007ff03,
+0xe043ff89,
+0xfc0007e0,
+0x001f8000,
+0x0007ff02,
+0xe043ff89,
+0xaff70204,
+0xc03a0017,
+0x4007ff01,
+0xe043ff88,
+0xfc0007e0,
+0x001f8000,
+0x0007ff00,
+0xe043ff88,
+0xaff70000,
+0xc03a0007,
+0x34070000,
+0xf0f00000,
+0xfc0007e0,
+0x001f8000,
+0x00770303,
+0x5c681000,
+0x00670202,
+0x5c681000,
+0x00570101,
+0x5c681000,
+0xfc0007e0,
+0x001f8000,
+0x00470000,
+0x5c681000,
+0x0007000f,
+0xe3000000,
diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
new file mode 100644
index 0000000..fe5c294
--- /dev/null
+++ b/src/shader/exacmnv110.fp
@@ -0,0 +1,47 @@
+#ifndef ENVYAS
+static uint32_t
+NV110FP_Composite[] = {
+ 0x00001462,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80000000,
+ 0x00000a0a,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000000f,
+ 0x00000000,
+#include "exacmnv110.fpc"
+};
+#else
+
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa pass $r0 a[0x7c] 0x0 0x0 0x1
+mufu rcp $r0 $r0
+ipa $r3 a[0x94] $r0 0x0 0x1
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa $r2 a[0x90] $r0 0x0 0x1
+tex nodep $r4 $r2 0x0 0x1 t2d 0x8
+ipa $r1 a[0x84] $r0 0x0 0x1
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa $r0 a[0x80] $r0 0x0 0x1
+tex nodep $r0 $r0 0x0 0x0 t2d 0xf
+depbar le 0x5 0x0 0x0
+sched (st 0x0) (st 0x0) (st 0x0)
+fmul ftz $r3 $r3 $r4
+fmul ftz $r2 $r2 $r4
+fmul ftz $r1 $r1 $r4
+sched (st 0x0) (st 0x0) (st 0x0)
+fmul ftz $r0 $r0 $r4
+exit
+#endif
diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc
new file mode 100644
index 0000000..9d62c1a
--- /dev/null
+++ b/src/shader/exacmnv110.fpc
@@ -0,0 +1,38 @@
+0xfc0007e0,
+0x001f8000,
+0xcff7ff00,
+0xe003ff87,
+0x00470000,
+0x50800000,
+0x4007ff03,
+0xe043ff89,
+0xfc0007e0,
+0x001f8000,
+0x0007ff02,
+0xe043ff89,
+0x2ff70204,
+0xc03a0014,
+0x4007ff01,
+0xe043ff88,
+0xfc0007e0,
+0x001f8000,
+0x0007ff00,
+0xe043ff88,
+0xaff70000,
+0xc03a0007,
+0x34070000,
+0xf0f00000,
+0xfc0007e0,
+0x001f8000,
+0x00470303,
+0x5c681000,
+0x00470202,
+0x5c681000,
+0x00470101,
+0x5c681000,
+0xfc0007e0,
+0x001f8000,
+0x00470000,
+0x5c681000,
+0x0007000f,
+0xe3000000,
diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp
new file mode 100644
index 0000000..4fe2e19
--- /dev/null
+++ b/src/shader/exas8nv110.fp
@@ -0,0 +1,42 @@
+#ifndef ENVYAS
+static uint32_t
+NV110FP_Source_A8[] = {
+ 0x00001462,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80000000,
+ 0x0000000a,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000000f,
+ 0x00000000,
+#include "exas8nv110.fpc"
+};
+#else
+
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa pass $r0 a[0x7c] 0x0 0x0 0x1
+mufu rcp $r0 $r0
+ipa $r1 a[0x84] $r0 0x0 0x1
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa $r0 a[0x80] $r0 0x0 0x1
+tex nodep $r0 $r0 0x0 0x0 t2d 0x8
+depbar le 0x5 0x0 0x0
+sched (st 0x0) (st 0x0) (st 0x0)
+mov $r3 $r0 0xf
+mov $r2 $r0 0xf
+mov $r1 $r0 0xf
+sched (st 0x0) (st 0x0) (st 0x0)
+exit
+#endif
diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc
new file mode 100644
index 0000000..1181c41
--- /dev/null
+++ b/src/shader/exas8nv110.fpc
@@ -0,0 +1,28 @@
+0xfc0007e0,
+0x001f8000,
+0xcff7ff00,
+0xe003ff87,
+0x00470000,
+0x50800000,
+0x4007ff01,
+0xe043ff88,
+0xfc0007e0,
+0x001f8000,
+0x0007ff00,
+0xe043ff88,
+0x2ff70000,
+0xc03a0004,
+0x34070000,
+0xf0f00000,
+0xfc0007e0,
+0x001f8000,
+0x00070003,
+0x5c980780,
+0x00070002,
+0x5c980780,
+0x00070001,
+0x5c980780,
+0xfc0007e0,
+0x001f8000,
+0x0007000f,
+0xe3000000,
diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp
new file mode 100644
index 0000000..61374a6
--- /dev/null
+++ b/src/shader/exasanv110.fp
@@ -0,0 +1,47 @@
+#ifndef ENVYAS
+static uint32_t
+NV110FP_CACompositeSrcAlpha[] = {
+ 0x00001462,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80000000,
+ 0x00000a0a,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000000f,
+ 0x00000000,
+#include "exasanv110.fpc"
+};
+#else
+
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa pass $r0 a[0x7c] 0x0 0x0 0x1
+mufu rcp $r0 $r0
+ipa $r3 a[0x84] $r0 0x0 0x1
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa $r2 a[0x80] $r0 0x0 0x1
+tex nodep $r4 $r2 0x0 0x0 t2d 0x8
+ipa $r1 a[0x94] $r0 0x0 0x1
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa $r0 a[0x90] $r0 0x0 0x1
+tex nodep $r0 $r0 0x0 0x1 t2d 0xf
+depbar le 0x5 0x0 0x0
+sched (st 0x0) (st 0x0) (st 0x0)
+fmul ftz $r3 $r3 $r4
+fmul ftz $r2 $r2 $r4
+fmul ftz $r1 $r1 $r4
+sched (st 0x0) (st 0x0) (st 0x0)
+fmul ftz $r0 $r0 $r4
+exit
+#endif
diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc
new file mode 100644
index 0000000..5516a03
--- /dev/null
+++ b/src/shader/exasanv110.fpc
@@ -0,0 +1,38 @@
+0xfc0007e0,
+0x001f8000,
+0xcff7ff00,
+0xe003ff87,
+0x00470000,
+0x50800000,
+0x4007ff03,
+0xe043ff88,
+0xfc0007e0,
+0x001f8000,
+0x0007ff02,
+0xe043ff88,
+0x2ff70204,
+0xc03a0004,
+0x4007ff01,
+0xe043ff89,
+0xfc0007e0,
+0x001f8000,
+0x0007ff00,
+0xe043ff89,
+0xaff70000,
+0xc03a0017,
+0x34070000,
+0xf0f00000,
+0xfc0007e0,
+0x001f8000,
+0x00470303,
+0x5c681000,
+0x00470202,
+0x5c681000,
+0x00470101,
+0x5c681000,
+0xfc0007e0,
+0x001f8000,
+0x00470000,
+0x5c681000,
+0x0007000f,
+0xe3000000,
diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp
new file mode 100644
index 0000000..90bbb55
--- /dev/null
+++ b/src/shader/exascnv110.fp
@@ -0,0 +1,38 @@
+#ifndef ENVYAS
+static uint32_t
+NV110FP_Source[] = {
+ 0x00001462,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80000000,
+ 0x0000000a,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000000f,
+ 0x00000000,
+#include "exascnv110.fpc"
+};
+#else
+
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa pass $r0 a[0x7c] 0x0 0x0 0x1
+mufu rcp $r0 $r0
+ipa $r1 a[0x84] $r0 0x0 0x1
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa $r0 a[0x80] $r0 0x0 0x1
+tex nodep $r0 $r0 0x0 0x0 t2d 0xf
+depbar le 0x5 0x0 0x0
+sched (st 0x0) (st 0x0) (st 0x0)
+exit
+#endif
diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc
new file mode 100644
index 0000000..2dba15d
--- /dev/null
+++ b/src/shader/exascnv110.fpc
@@ -0,0 +1,20 @@
+0xfc0007e0,
+0x001f8000,
+0xcff7ff00,
+0xe003ff87,
+0x00470000,
+0x50800000,
+0x4007ff01,
+0xe043ff88,
+0xfc0007e0,
+0x001f8000,
+0x0007ff00,
+0xe043ff88,
+0xaff70000,
+0xc03a0007,
+0x34070000,
+0xf0f00000,
+0xfc0007e0,
+0x001f8000,
+0x0007000f,
+0xe3000000,
diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp
new file mode 100644
index 0000000..2728311
--- /dev/null
+++ b/src/shader/videonv110.fp
@@ -0,0 +1,54 @@
+#ifndef ENVYAS
+static uint32_t
+NV110FP_NV12[] = {
+ 0x00001462,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80000000,
+ 0x0000000a,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000000f,
+ 0x00000000,
+#include "videonv110.fpc"
+};
+#else
+
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa pass $r2 a[0x7c] 0x0 0x0 0x1
+mufu rcp $r2 $r2
+ipa $r0 a[0x80] $r2 0x0 0x1
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa $r1 a[0x84] $r2 0x0 0x1
+tex nodep $r4 $r0 0x0 0x0 t2d 0x8
+tex nodep $r0 $r0 0x0 0x1 t2d 0xc
+sched (st 0x0) (st 0x0) (st 0x0)
+depbar le 0x5 0x1 0x1
+fmul ftz $r5 $r4 c0[0x0]
+fadd ftz $r3 $r5 c0[0x4]
+sched (st 0x0) (st 0x0) (st 0x0)
+fadd ftz $r4 $r5 c0[0x8]
+fadd ftz $r5 $r5 c0[0xc]
+depbar le 0x5 0x0 0x0
+sched (st 0x0) (st 0x0) (st 0x0)
+ffma ftz $r3 $r0 c0[0x10] $r3
+ffma ftz $r4 $r0 c0[0x14] $r4
+ffma ftz $r5 $r0 c0[0x18] $r5
+sched (st 0x0) (st 0x0) (st 0x0)
+ffma ftz $r0 $r1 c0[0x1c] $r3
+ffma ftz $r2 $r1 c0[0x24] $r5
+ffma ftz $r1 $r1 c0[0x20] $r4
+sched (st 0x0) (st 0x0) (st 0x0)
+exit
+#endif
diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc
new file mode 100644
index 0000000..31d745a
--- /dev/null
+++ b/src/shader/videonv110.fpc
@@ -0,0 +1,52 @@
+0xfc0007e0,
+0x001f8000,
+0xcff7ff02,
+0xe003ff87,
+0x00470202,
+0x50800000,
+0x0027ff00,
+0xe043ff88,
+0xfc0007e0,
+0x001f8000,
+0x4027ff01,
+0xe043ff88,
+0x2ff70004,
+0xc03a0004,
+0x2ff70000,
+0xc03a0016,
+0xfc0007e0,
+0x001f8000,
+0x34170001,
+0xf0f00000,
+0x00070405,
+0x4c681000,
+0x00170503,
+0x4c581000,
+0xfc0007e0,
+0x001f8000,
+0x00270504,
+0x4c581000,
+0x00370505,
+0x4c581000,
+0x34070000,
+0xf0f00000,
+0xfc0007e0,
+0x001f8000,
+0x00470003,
+0x49a00180,
+0x00570004,
+0x49a00200,
+0x00670005,
+0x49a00280,
+0xfc0007e0,
+0x001f8000,
+0x00770100,
+0x49a00180,
+0x00970102,
+0x49a00280,
+0x00870101,
+0x49a00200,
+0xfc0007e0,
+0x001f8000,
+0x0007000f,
+0xe3000000,
diff --git a/src/shader/xfrm2nv110.vp b/src/shader/xfrm2nv110.vp
new file mode 100644
index 0000000..bbfc527
--- /dev/null
+++ b/src/shader/xfrm2nv110.vp
@@ -0,0 +1,82 @@
+#ifndef ENVYAS
+static uint32_t
+NV110VP_Transform2[] = {
+ 0x02000461,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x000ff000,
+ 0x00000000, /* VP_ATTR_EN[0x000] */
+ 0x00000000, /* VP_ATTR_EN[0x080] */
+ 0x00000000, /* VP_ATTR_EN[0x100] */
+ 0x00000000,
+ 0x00000000, /* VP_ATTR_EN[0x200] */
+ 0x80000000, /* VERTEXID */
+ 0x00000000, /* VP_ATTR_EN[0x300] */
+ 0x00000000,
+ 0x0033f000, /* VP_EXPORT_EN[0x040] */
+ 0x00000000, /* VP_EXPORT_EN[0x0c0] */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000, /* VP_EXPORT_EN[0x2c0] */
+ 0x00000000,
+#include "xfrm2nv110.vpc"
+};
+#else
+
+sched (st 0x0) (st 0x0) (st 0x0)
+ld b32 $r5 a[0x2fc] 0x0
+shl $r5 $r5 0x5
+ld b64 $r0 c0[$r5+0x80]
+sched (st 0x0) (st 0x0) (st 0x0)
+ld b64 $r2 c0[$r5+0x88]
+st b128 a[0x70] $r0 0x0
+
+ld b64 $r0 c0[$r5+0x90]
+sched (st 0x0) (st 0x0) (st 0x0)
+fmul ftz $r2 $r0 c0[0x0]
+fmul ftz $r3 $r0 c0[0xc]
+fmul ftz $r4 $r0 c0[0x18]
+sched (st 0x0) (st 0x0) (st 0x0)
+ffma ftz $r2 $r1 c0[0x4] $r2
+ffma ftz $r3 $r1 c0[0x10] $r3
+ffma ftz $r4 $r1 c0[0x1c] $r4
+sched (st 0x0) (st 0x0) (st 0x0)
+fadd ftz $r2 $r2 c0[0x8]
+fadd ftz $r3 $r3 c0[0x14]
+fadd ftz $r4 $r4 c0[0x20]
+sched (st 0x0) (st 0x0) (st 0x0)
+mufu rcp $r4 $r4
+fmul ftz $r2 $r2 $r4
+fmul ftz $r3 $r3 $r4
+sched (st 0x0) (st 0x0) (st 0x0)
+fmul ftz $r0 $r2 c0[0x24]
+fmul ftz $r1 $r3 c0[0x28]
+st b64 a[0x80] $r0 0x0
+
+sched (st 0x0) (st 0x0) (st 0x0)
+ld b64 $r0 c0[$r5+0x98]
+fmul ftz $r2 $r0 c0[0x2c]
+fmul ftz $r3 $r0 c0[0x38]
+sched (st 0x0) (st 0x0) (st 0x0)
+fmul ftz $r4 $r0 c0[0x44]
+ffma ftz $r2 $r1 c0[0x30] $r2
+ffma ftz $r3 $r1 c0[0x3c] $r3
+sched (st 0x0) (st 0x0) (st 0x0)
+ffma ftz $r4 $r1 c0[0x48] $r4
+fadd ftz $r2 $r2 c0[0x34]
+fadd ftz $r3 $r3 c0[0x40]
+sched (st 0x0) (st 0x0) (st 0x0)
+fadd ftz $r4 $r4 c0[0x4c]
+mufu rcp $r4 $r4
+fmul ftz $r2 $r2 $r4
+sched (st 0x0) (st 0x0) (st 0x0)
+fmul ftz $r3 $r3 $r4
+fmul ftz $r0 $r2 c0[0x50]
+fmul ftz $r1 $r3 c0[0x54]
+sched (st 0x0) (st 0x0) (st 0x0)
+st b64 a[0x90] $r0 0x0
+
+exit
+#endif
diff --git a/src/shader/xfrm2nv110.vpc b/src/shader/xfrm2nv110.vpc
new file mode 100644
index 0000000..0d9ebfd
--- /dev/null
+++ b/src/shader/xfrm2nv110.vpc
@@ -0,0 +1,102 @@
+0xfc0007e0,
+0x001f8000,
+0x2fc7ff05,
+0xefd87f80,
+0x00570505,
+0x38480000,
+0x08070500,
+0xef950000,
+0xfc0007e0,
+0x001f8000,
+0x08870502,
+0xef950000,
+0x0707ff00,
+0xeff1ff80,
+0x09070500,
+0xef950000,
+0xfc0007e0,
+0x001f8000,
+0x00070002,
+0x4c681000,
+0x00370003,
+0x4c681000,
+0x00670004,
+0x4c681000,
+0xfc0007e0,
+0x001f8000,
+0x00170102,
+0x49a00100,
+0x00470103,
+0x49a00180,
+0x00770104,
+0x49a00200,
+0xfc0007e0,
+0x001f8000,
+0x00270202,
+0x4c581000,
+0x00570303,
+0x4c581000,
+0x00870404,
+0x4c581000,
+0xfc0007e0,
+0x001f8000,
+0x00470404,
+0x50800000,
+0x00470202,
+0x5c681000,
+0x00470303,
+0x5c681000,
+0xfc0007e0,
+0x001f8000,
+0x00970200,
+0x4c681000,
+0x00a70301,
+0x4c681000,
+0x0807ff00,
+0xeff0ff80,
+0xfc0007e0,
+0x001f8000,
+0x09870500,
+0xef950000,
+0x00b70002,
+0x4c681000,
+0x00e70003,
+0x4c681000,
+0xfc0007e0,
+0x001f8000,
+0x01170004,
+0x4c681000,
+0x00c70102,
+0x49a00100,
+0x00f70103,
+0x49a00180,
+0xfc0007e0,
+0x001f8000,
+0x01270104,
+0x49a00200,
+0x00d70202,
+0x4c581000,
+0x01070303,
+0x4c581000,
+0xfc0007e0,
+0x001f8000,
+0x01370404,
+0x4c581000,
+0x00470404,
+0x50800000,
+0x00470202,
+0x5c681000,
+0xfc0007e0,
+0x001f8000,
+0x00470303,
+0x5c681000,
+0x01470200,
+0x4c681000,
+0x01570301,
+0x4c681000,
+0xfc0007e0,
+0x001f8000,
+0x0907ff00,
+0xeff0ff80,
+0x0007000f,
+0xe3000000,
--
2.7.3
Samuel Pitoiset
2016-Oct-17 09:28 UTC
[Nouveau] [PATCH] exa: add GM10x acceleration support
Looks reasonable, some minor comments below. On 10/16/2016 02:06 AM, Ilia Mirkin wrote:> rendercheck -f a8r8g8b8 passes as much as on a GK208, and xv appears to > work. Very lightly tested. > > Instead of sticking coordinates into pushbufs, the vertex shader is > modified to read them from a constbuf, indexed by vertex id. This > approach could be used for all nvc0 generations, but I didn't want to > rock the boat. > > Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> > --- > > Note: this won't work for GM20x - we need to allow TIC format to be updated > for that to work. But this is a step in that direction. > > src/Makefile.am | 16 ++++++++ > src/nouveau_copy.c | 1 + > src/nouveau_exa.c | 2 +- > src/nouveau_xv.c | 2 +- > src/nv_accel_common.c | 1 + > src/nv_driver.c | 1 + > src/nvc0_accel.c | 37 ++++++++++++++--- > src/nvc0_exa.c | 48 ++++++++++++++++++++-- > src/nvc0_xv.c | 48 ++++++++++++++++++++-- > src/shader/Makefile | 23 ++++++++--- > src/shader/exac8nv110.fp | 47 +++++++++++++++++++++ > src/shader/exac8nv110.fpc | 38 +++++++++++++++++ > src/shader/exacanv110.fp | 47 +++++++++++++++++++++ > src/shader/exacanv110.fpc | 38 +++++++++++++++++ > src/shader/exacmnv110.fp | 47 +++++++++++++++++++++ > src/shader/exacmnv110.fpc | 38 +++++++++++++++++ > src/shader/exas8nv110.fp | 42 +++++++++++++++++++ > src/shader/exas8nv110.fpc | 28 +++++++++++++ > src/shader/exasanv110.fp | 47 +++++++++++++++++++++ > src/shader/exasanv110.fpc | 38 +++++++++++++++++ > src/shader/exascnv110.fp | 38 +++++++++++++++++ > src/shader/exascnv110.fpc | 20 +++++++++ > src/shader/videonv110.fp | 54 ++++++++++++++++++++++++ > src/shader/videonv110.fpc | 52 +++++++++++++++++++++++ > src/shader/xfrm2nv110.vp | 82 +++++++++++++++++++++++++++++++++++++ > src/shader/xfrm2nv110.vpc | 102 ++++++++++++++++++++++++++++++++++++++++++++++ > 26 files changed, 918 insertions(+), 19 deletions(-) > create mode 100644 src/shader/exac8nv110.fp > create mode 100644 src/shader/exac8nv110.fpc > create mode 100644 src/shader/exacanv110.fp > create mode 100644 src/shader/exacanv110.fpc > create mode 100644 src/shader/exacmnv110.fp > create mode 100644 src/shader/exacmnv110.fpc > create mode 100644 src/shader/exas8nv110.fp > create mode 100644 src/shader/exas8nv110.fpc > create mode 100644 src/shader/exasanv110.fp > create mode 100644 src/shader/exasanv110.fpc > create mode 100644 src/shader/exascnv110.fp > create mode 100644 src/shader/exascnv110.fpc > create mode 100644 src/shader/videonv110.fp > create mode 100644 src/shader/videonv110.fpc > create mode 100644 src/shader/xfrm2nv110.vp > create mode 100644 src/shader/xfrm2nv110.vpc > > diff --git a/src/Makefile.am b/src/Makefile.am > index 1e04ddf..6ba8d87 100644 > --- a/src/Makefile.am > +++ b/src/Makefile.am > @@ -77,48 +77,64 @@ EXTRA_DIST = hwdefs/nv_3ddefs.xml.h \ > shader/exac8nve0.fpc \ > shader/exac8nvf0.fp \ > shader/exac8nvf0.fpc \ > + shader/exac8nv110.fp \ > + shader/exac8nv110.fpc \ > shader/exacanvc0.fp \ > shader/exacanvc0.fpc \ > shader/exacanve0.fp \ > shader/exacanve0.fpc \ > shader/exacanvf0.fp \ > shader/exacanvf0.fpc \ > + shader/exacanv110.fp \ > + shader/exacanv110.fpc \ > shader/exacmnvc0.fp \ > shader/exacmnvc0.fpc \ > shader/exacmnve0.fp \ > shader/exacmnve0.fpc \ > shader/exacmnvf0.fp \ > shader/exacmnvf0.fpc \ > + shader/exacmnv110.fp \ > + shader/exacmnv110.fpc \ > shader/exas8nvc0.fp \ > shader/exas8nvc0.fpc \ > shader/exas8nve0.fp \ > shader/exas8nve0.fpc \ > shader/exas8nvf0.fp \ > shader/exas8nvf0.fpc \ > + shader/exas8nv110.fp \ > + shader/exas8nv110.fpc \ > shader/exasanvc0.fp \ > shader/exasanvc0.fpc \ > shader/exasanve0.fp \ > shader/exasanve0.fpc \ > shader/exasanvf0.fp \ > shader/exasanvf0.fpc \ > + shader/exasanv110.fp \ > + shader/exasanv110.fpc \ > shader/exascnvc0.fp \ > shader/exascnvc0.fpc \ > shader/exascnve0.fp \ > shader/exascnve0.fpc \ > shader/exascnvf0.fp \ > shader/exascnvf0.fpc \ > + shader/exascnv110.fp \ > + shader/exascnv110.fpc \ > shader/videonvc0.fp \ > shader/videonvc0.fpc \ > shader/videonve0.fp \ > shader/videonve0.fpc \ > shader/videonvf0.fp \ > shader/videonvf0.fpc \ > + shader/videonv110.fp \ > + shader/videonv110.fpc \ > shader/xfrm2nvc0.vp \ > shader/xfrm2nvc0.vpc \ > shader/xfrm2nve0.vp \ > shader/xfrm2nve0.vpc \ > shader/xfrm2nvf0.vp \ > shader/xfrm2nvf0.vpc \ > + shader/xfrm2nv110.vp \ > + shader/xfrm2nv110.vpc \ > shader/Makefile \ > nouveau_local.h \ > nouveau_copy.h \ > diff --git a/src/nouveau_copy.c b/src/nouveau_copy.c > index e152a53..c139de6 100644 > --- a/src/nouveau_copy.c > +++ b/src/nouveau_copy.c > @@ -81,6 +81,7 @@ nouveau_copy_init(ScreenPtr pScreen) > &pNv->ce_channel); > break; > case NV_KEPLER: > + case NV_MAXWELL: > ret = nouveau_object_new(&pNv->dev->object, 0, > NOUVEAU_FIFO_CHANNEL_CLASS, > &(struct nve0_fifo) { > diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c > index def66ac..0f02b99 100644 > --- a/src/nouveau_exa.c > +++ b/src/nouveau_exa.c > @@ -514,12 +514,12 @@ nouveau_exa_init(ScreenPtr pScreen) > break; > case NV_FERMI: > case NV_KEPLER: > + case NV_MAXWELL: > exa->CheckComposite = NVC0EXACheckComposite; > exa->PrepareComposite = NVC0EXAPrepareComposite; > exa->Composite = NVC0EXAComposite; > exa->DoneComposite = NVC0EXADoneComposite; > break; > - case NV_MAXWELL: > default: > break; > } > diff --git a/src/nouveau_xv.c b/src/nouveau_xv.c > index 9d5bad2..2ba1992 100644 > --- a/src/nouveau_xv.c > +++ b/src/nouveau_xv.c > @@ -2097,7 +2097,7 @@ NVSetupTexturedVideo (ScreenPtr pScreen, XF86VideoAdaptorPtr *textureAdaptor) > textureAdaptor[0] = NV40SetupTexturedVideo(pScreen, FALSE); > textureAdaptor[1] = NV40SetupTexturedVideo(pScreen, TRUE); > } else > - if (pNv->Architecture >= NV_TESLA && pNv->Architecture < NV_MAXWELL) { > + if (pNv->Architecture >= NV_TESLA) { > textureAdaptor[0] = NV50SetupTexturedVideo(pScreen); > } > } > diff --git a/src/nv_accel_common.c b/src/nv_accel_common.c > index 9361ce8..5d12dd8 100644 > --- a/src/nv_accel_common.c > +++ b/src/nv_accel_common.c > @@ -722,6 +722,7 @@ NVAccelCommonInit(ScrnInfoPtr pScrn) > switch (pNv->Architecture) { > case NV_FERMI: > case NV_KEPLER: > + case NV_MAXWELL: > INIT_CONTEXT_OBJECT(3D_NVC0); > break; > case NV_TESLA: > diff --git a/src/nv_driver.c b/src/nv_driver.c > index 4dde8e0..fff83f8 100644 > --- a/src/nv_driver.c > +++ b/src/nv_driver.c > @@ -389,6 +389,7 @@ NVHasKMS(struct pci_device *pci_dev, struct xf86_platform_device *platform_dev) > case 0xe0: > case 0xf0: > case 0x100: > + case 0x110: > break; > default: > xf86DrvMsg(-1, X_ERROR, "Unknown chipset: NV%02X\n", chipset); > diff --git a/src/nvc0_accel.c b/src/nvc0_accel.c > index d2a3b93..52a17db 100644 > --- a/src/nvc0_accel.c > +++ b/src/nvc0_accel.c > @@ -53,6 +53,16 @@ > #include "shader/exas8nvf0.fp" > #include "shader/exac8nvf0.fp" > > +#include "shader/xfrm2nv110.vp" > +#include "shader/videonv110.fp" > + > +#include "shader/exascnv110.fp" > +#include "shader/exacmnv110.fp" > +#include "shader/exacanv110.fp" > +#include "shader/exasanv110.fp" > +#include "shader/exas8nv110.fp" > +#include "shader/exac8nv110.fp" > + > #define NVC0PushProgram(pNv,addr,code) do { \ > const unsigned size = sizeof(code) / sizeof(code[0]); \ > PUSH_DATAu((pNv)->pushbuf, (pNv)->scratch, (addr), size); \ > @@ -223,9 +233,12 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) > } else if (pNv->dev->chipset < 0xf0) { > class = 0xa097; > handle = 0x0000906e; > - } else { > + } else if (pNv->dev->chipset < 0x110) { > class = 0xa197; > handle = 0x0000906e; > + } else { > + class = 0xb097; > + handle = 0x0000906e; > } > > ret = nouveau_object_new(pNv->channel, class, class, > @@ -304,10 +317,12 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) > PUSH_DATA (push, 1); > } > > - BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3); > - PUSH_DATA (push, (bo->offset + MISC_OFFSET) >> 32); > - PUSH_DATA (push, (bo->offset + MISC_OFFSET)); > - PUSH_DATA (push, 1); > + if (pNv->Architecture < NV_MAXWELL) { > + BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3); > + PUSH_DATA (push, (bo->offset + MISC_OFFSET) >> 32); > + PUSH_DATA (push, (bo->offset + MISC_OFFSET)); > + PUSH_DATA (push, 1); > + } > > BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2); > PUSH_DATA (push, (bo->offset + CODE_OFFSET) >> 32); > @@ -334,7 +349,8 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) > NVC0PushProgram(pNv, PFP_S_A8, NVE0FP_Source_A8); > NVC0PushProgram(pNv, PFP_C_A8, NVE0FP_Composite_A8); > NVC0PushProgram(pNv, PFP_NV12, NVE0FP_NV12); > - } else { > + } else > + if (pNv->dev->chipset < 0x110) { > NVC0PushProgram(pNv, PVP_PASS, NVF0VP_Transform2); > NVC0PushProgram(pNv, PFP_S, NVF0FP_Source); > NVC0PushProgram(pNv, PFP_C, NVF0FP_Composite); > @@ -343,6 +359,15 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) > NVC0PushProgram(pNv, PFP_S_A8, NVF0FP_Source_A8); > NVC0PushProgram(pNv, PFP_C_A8, NVF0FP_Composite_A8); > NVC0PushProgram(pNv, PFP_NV12, NVF0FP_NV12); > + } else { > + NVC0PushProgram(pNv, PVP_PASS, NV110VP_Transform2); > + NVC0PushProgram(pNv, PFP_S, NV110FP_Source); > + NVC0PushProgram(pNv, PFP_C, NV110FP_Composite); > + NVC0PushProgram(pNv, PFP_CCA, NV110FP_CAComposite); > + NVC0PushProgram(pNv, PFP_CCASA, NV110FP_CACompositeSrcAlpha); > + NVC0PushProgram(pNv, PFP_S_A8, NV110FP_Source_A8); > + NVC0PushProgram(pNv, PFP_C_A8, NV110FP_Composite_A8); > + NVC0PushProgram(pNv, PFP_NV12, NV110FP_NV12); > } > > BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 4); > diff --git a/src/nvc0_exa.c b/src/nvc0_exa.c > index 6add60b..a53dfe6 100644 > --- a/src/nvc0_exa.c > +++ b/src/nvc0_exa.c > @@ -914,14 +914,56 @@ NVC0EXAComposite(PixmapPtr pdpix, > if (!PUSH_SPACE(push, 64)) > return; > > + if (pNv->dev->chipset >= 0x110) { > + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); > + PUSH_DATA (push, 256); > + PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA) >> 32);No PUSH_DATAh in the DDX?> + PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA)); > + BEGIN_1IC0(push, NVC0_3D(CB_POS), 3 * (4 + 2 + 2) + 1); > + PUSH_DATA (push, 0x80); > + > + PUSH_DATAf(push, dx); > + PUSH_DATAf(push, dy + (h * 2)); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 1); > + PUSH_DATAf(push, sx); > + PUSH_DATAf(push, sy + (h * 2)); > + PUSH_DATAf(push, mx); > + PUSH_DATAf(push, my + (h * 2)); > + > + PUSH_DATAf(push, dx); > + PUSH_DATAf(push, dy); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 1); > + PUSH_DATAf(push, sx); > + PUSH_DATAf(push, sy); > + PUSH_DATAf(push, mx); > + PUSH_DATAf(push, my); > + > + PUSH_DATAf(push, dx + (w * 2)); > + PUSH_DATAf(push, dy); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 1); > + PUSH_DATAf(push, sx + (w * 2)); > + PUSH_DATAf(push, sy); > + PUSH_DATAf(push, mx + (w * 2)); > + PUSH_DATAf(push, my); > + } > + > BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2); > PUSH_DATA (push, ((dx + w) << 16) | dx); > PUSH_DATA (push, ((dy + h) << 16) | dy); > BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1); > PUSH_DATA (push, NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); > - PUSH_VTX2s(push, sx, sy + (h * 2), mx, my + (h * 2), dx, dy + (h * 2)); > - PUSH_VTX2s(push, sx, sy, mx, my, dx, dy); > - PUSH_VTX2s(push, sx + (w * 2), sy, mx + (w * 2), my, dx + (w * 2), dy); > + if (pNv->dev->chipset < 0x110) { > + PUSH_VTX2s(push, sx, sy + (h * 2), mx, my + (h * 2), dx, dy + (h * 2)); > + PUSH_VTX2s(push, sx, sy, mx, my, dx, dy); > + PUSH_VTX2s(push, sx + (w * 2), sy, mx + (w * 2), my, dx + (w * 2), dy); > + } else { > + BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); > + PUSH_DATA (push, 0); > + PUSH_DATA (push, 3); > + } > BEGIN_NVC0(push, NVC0_3D(VERTEX_END_GL), 1); > PUSH_DATA (push, 0); > } > diff --git a/src/nvc0_xv.c b/src/nvc0_xv.c > index d1d8f18..129c505 100644 > --- a/src/nvc0_xv.c > +++ b/src/nvc0_xv.c > @@ -247,15 +247,57 @@ nvc0_xv_image_put(ScrnInfoPtr pScrn, > nouveau_pushbuf_refn (push, refs, 3)) > return BadImplementation; > > + if (pNv->dev->chipset >= 0x110) { > + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); > + PUSH_DATA (push, 256); > + PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA) >> 32); > + PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA)); > + BEGIN_1IC0(push, NVC0_3D(CB_POS), 3 * (4 + 2 + 2) + 1); > + PUSH_DATA (push, 0x80); > + > + PUSH_DATAf(push, sx1); > + PUSH_DATAf(push, sy1); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 1); > + PUSH_DATAf(push, tx1); > + PUSH_DATAf(push, ty1); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 0); > + > + PUSH_DATAf(push, sx2+(sx2-sx1)); > + PUSH_DATAf(push, sy1); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 1); > + PUSH_DATAf(push, tx2+(tx2-tx1)); > + PUSH_DATAf(push, ty1); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 0); > + > + PUSH_DATAf(push, sx1); > + PUSH_DATAf(push, sy2+(sy2-sy1)); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 1); > + PUSH_DATAf(push, tx1); > + PUSH_DATAf(push, ty2+(ty2-ty1)); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 0); > + } > + > BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2); > PUSH_DATA (push, sx2 << NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT | sx1); > PUSH_DATA (push, sy2 << NVC0_3D_SCISSOR_VERT_MAX__SHIFT | sy1 ); > > BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1); > PUSH_DATA (push, NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); > - PUSH_VTX1s(push, tx1, ty1, sx1, sy1); > - PUSH_VTX1s(push, tx2+(tx2-tx1), ty1, sx2+(sx2-sx1), sy1); > - PUSH_VTX1s(push, tx1, ty2+(ty2-ty1), sx1, sy2+(sy2-sy1)); > + if (pNv->dev->chipset < 0x110) { > + PUSH_VTX1s(push, tx1, ty1, sx1, sy1); > + PUSH_VTX1s(push, tx2+(tx2-tx1), ty1, sx2+(sx2-sx1), sy1); > + PUSH_VTX1s(push, tx1, ty2+(ty2-ty1), sx1, sy2+(sy2-sy1)); > + } else { > + BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); > + PUSH_DATA (push, 0); > + PUSH_DATA (push, 3); > + } > BEGIN_NVC0(push, NVC0_3D(VERTEX_END_GL), 1); > PUSH_DATA (push, 0); > > diff --git a/src/shader/Makefile b/src/shader/Makefile > index 2d789be..12bf455 100644 > --- a/src/shader/Makefile > +++ b/src/shader/Makefile > @@ -22,23 +22,36 @@ NVF0_SHADERS = xfrm2nvf0.vpc \ > exas8nvf0.fpc \ > exac8nvf0.fpc \ > videonvf0.fpc > +NV110_SHADERS = xfrm2nv110.vpc \ > + exascnv110.fpc \ > + exacmnv110.fpc \ > + exacanv110.fpc \ > + exasanv110.fpc \ > + exas8nv110.fpc \ > + exac8nv110.fpc \ > + videonv110.fpc > > -SHADERS = $(NVC0_SHADERS) $(NVE0_SHADERS) $(NVF0_SHADERS) > +SHADERS = $(NVC0_SHADERS) $(NVE0_SHADERS) $(NVF0_SHADERS) $(NV110_SHADERS) > ENVYAS ?= envyas > > all: $(SHADERS) > > $(filter %nvc0.vpc,$(SHADERS)): %.vpc: %.vp > - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@ > + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100 -o $@ > $(filter %nvc0.fpc,$(SHADERS)): %.fpc: %.fp > - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@ > + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100 -o $@ > > $(filter %nve0.vpc,$(SHADERS)): %.vpc: %.vp > - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o $@ > + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104 -o $@ > $(filter %nve0.fpc,$(SHADERS)): %.fpc: %.fp > - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o $@ > + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104 -o $@This is unrelated to your main change, but well should be *exactly* the same thing. :)> > $(filter %nvf0.vpc,$(SHADERS)): %.vpc: %.vp > cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@ > $(filter %nvf0.fpc,$(SHADERS)): %.fpc: %.fp > cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@ > + > +$(filter %nv110.vpc,$(SHADERS)): %.vpc: %.vp > + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@ > +$(filter %nv110.fpc,$(SHADERS)): %.fpc: %.fp > + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@ > diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp > new file mode 100644 > index 0000000..ce78036 > --- /dev/null > +++ b/src/shader/exac8nv110.fp > @@ -0,0 +1,47 @@ > +#ifndef ENVYAS > +static uint32_t > +NV110FP_Composite_A8[] = { > + 0x00001462, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x80000000, > + 0x00000a0a, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x0000000f, > + 0x00000000, > +#include "exac8nv110.fpc" > +}; > +#else > + > +sched (st 0x0) (st 0x0) (st 0x0)Those sched codes are definitely bad, but let's keep them as it for now. I might have a look at some point to improve the thing.> +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > +mufu rcp $r0 $r0 > +ipa $r3 a[0x94] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r2 a[0x90] $r0 0x0 0x1 > +tex nodep $r1 $r2 0x0 0x1 t2d 0x8 > +ipa $r3 a[0x84] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r2 a[0x80] $r0 0x0 0x1 > +tex nodep $r0 $r2 0x0 0x0 t2d 0x8 > +depbar le 0x5 0x0 0x0 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r3 $r0 $r1 > +mov $r2 $r3 0xf > +mov $r1 $r3 0xf > +sched (st 0x0) (st 0x0) (st 0x0) > +mov $r0 $r3 0xf > +exit > +#endif > diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc > new file mode 100644 > index 0000000..4aa1368 > --- /dev/null > +++ b/src/shader/exac8nv110.fpc > @@ -0,0 +1,38 @@ > +0xfc0007e0, > +0x001f8000, > +0xcff7ff00, > +0xe003ff87, > +0x00470000, > +0x50800000, > +0x4007ff03, > +0xe043ff89, > +0xfc0007e0, > +0x001f8000, > +0x0007ff02, > +0xe043ff89, > +0x2ff70201, > +0xc03a0014, > +0x4007ff03, > +0xe043ff88, > +0xfc0007e0, > +0x001f8000, > +0x0007ff02, > +0xe043ff88, > +0x2ff70200, > +0xc03a0004, > +0x34070000, > +0xf0f00000, > +0xfc0007e0, > +0x001f8000, > +0x00170003, > +0x5c681000, > +0x00370002, > +0x5c980780, > +0x00370001, > +0x5c980780, > +0xfc0007e0, > +0x001f8000, > +0x00370000, > +0x5c980780, > +0x0007000f, > +0xe3000000, > diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp > new file mode 100644 > index 0000000..a70d5c5 > --- /dev/null > +++ b/src/shader/exacanv110.fp > @@ -0,0 +1,47 @@ > +#ifndef ENVYAS > +static uint32_t > +NV110FP_CAComposite[] = { > + 0x00001462, /* 0x0000c000 = USES_KIL, MULTI_COLORS */ > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x80000000, /* FRAG_COORD_UMASK = 0x8 */ > + 0x00000a0a, /* FP_INTERP[0x080], 0022 0022 */ > + 0x00000000, /* FP_INTERP[0x0c0], 0 = OFF */ > + 0x00000000, /* FP_INTERP[0x100], 1 = FLAT */ > + 0x00000000, /* FP_INTERP[0x140], 2 = PERSPECTIVE */ > + 0x00000000, /* FP_INTERP[0x180], 3 = LINEAR */ > + 0x00000000, /* FP_INTERP[0x1c0] */ > + 0x00000000, /* FP_INTERP[0x200] */ > + 0x00000000, /* FP_INTERP[0x240] */ > + 0x00000000, /* FP_INTERP[0x280] */ > + 0x00000000, /* FP_INTERP[0x2c0] */ > + 0x00000000, /* FP_INTERP[0x300] */ > + 0x00000000, > + 0x0000000f, /* FP_RESULT_MASK (0x8000 Face ?) */ > + 0x00000000, /* 0x2 = FragDepth, 0x1 = SampleMask */ > +#include "exacanv110.fpc" > +}; > +#else > + > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > +mufu rcp $r0 $r0 > +ipa $r3 a[0x94] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r2 a[0x90] $r0 0x0 0x1 > +tex nodep $r4 $r2 0x0 0x1 t2d 0xf > +ipa $r1 a[0x84] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r0 a[0x80] $r0 0x0 0x1 > +tex nodep $r0 $r0 0x0 0x0 t2d 0xf > +depbar le 0x5 0x0 0x0 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r3 $r3 $r7 > +fmul ftz $r2 $r2 $r6 > +fmul ftz $r1 $r1 $r5 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r0 $r0 $r4 > +exit > +#endif > diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc > new file mode 100644 > index 0000000..7c0ca5e > --- /dev/null > +++ b/src/shader/exacanv110.fpc > @@ -0,0 +1,38 @@ > +0xfc0007e0, > +0x001f8000, > +0xcff7ff00, > +0xe003ff87, > +0x00470000, > +0x50800000, > +0x4007ff03, > +0xe043ff89, > +0xfc0007e0, > +0x001f8000, > +0x0007ff02, > +0xe043ff89, > +0xaff70204, > +0xc03a0017, > +0x4007ff01, > +0xe043ff88, > +0xfc0007e0, > +0x001f8000, > +0x0007ff00, > +0xe043ff88, > +0xaff70000, > +0xc03a0007, > +0x34070000, > +0xf0f00000, > +0xfc0007e0, > +0x001f8000, > +0x00770303, > +0x5c681000, > +0x00670202, > +0x5c681000, > +0x00570101, > +0x5c681000, > +0xfc0007e0, > +0x001f8000, > +0x00470000, > +0x5c681000, > +0x0007000f, > +0xe3000000, > diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp > new file mode 100644 > index 0000000..fe5c294 > --- /dev/null > +++ b/src/shader/exacmnv110.fp > @@ -0,0 +1,47 @@ > +#ifndef ENVYAS > +static uint32_t > +NV110FP_Composite[] = { > + 0x00001462, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x80000000, > + 0x00000a0a, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x0000000f, > + 0x00000000, > +#include "exacmnv110.fpc" > +}; > +#else > + > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > +mufu rcp $r0 $r0 > +ipa $r3 a[0x94] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r2 a[0x90] $r0 0x0 0x1 > +tex nodep $r4 $r2 0x0 0x1 t2d 0x8 > +ipa $r1 a[0x84] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r0 a[0x80] $r0 0x0 0x1 > +tex nodep $r0 $r0 0x0 0x0 t2d 0xf > +depbar le 0x5 0x0 0x0 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r3 $r3 $r4 > +fmul ftz $r2 $r2 $r4 > +fmul ftz $r1 $r1 $r4 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r0 $r0 $r4 > +exit > +#endif > diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc > new file mode 100644 > index 0000000..9d62c1a > --- /dev/null > +++ b/src/shader/exacmnv110.fpc > @@ -0,0 +1,38 @@ > +0xfc0007e0, > +0x001f8000, > +0xcff7ff00, > +0xe003ff87, > +0x00470000, > +0x50800000, > +0x4007ff03, > +0xe043ff89, > +0xfc0007e0, > +0x001f8000, > +0x0007ff02, > +0xe043ff89, > +0x2ff70204, > +0xc03a0014, > +0x4007ff01, > +0xe043ff88, > +0xfc0007e0, > +0x001f8000, > +0x0007ff00, > +0xe043ff88, > +0xaff70000, > +0xc03a0007, > +0x34070000, > +0xf0f00000, > +0xfc0007e0, > +0x001f8000, > +0x00470303, > +0x5c681000, > +0x00470202, > +0x5c681000, > +0x00470101, > +0x5c681000, > +0xfc0007e0, > +0x001f8000, > +0x00470000, > +0x5c681000, > +0x0007000f, > +0xe3000000, > diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp > new file mode 100644 > index 0000000..4fe2e19 > --- /dev/null > +++ b/src/shader/exas8nv110.fp > @@ -0,0 +1,42 @@ > +#ifndef ENVYAS > +static uint32_t > +NV110FP_Source_A8[] = { > + 0x00001462, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x80000000, > + 0x0000000a, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x0000000f, > + 0x00000000, > +#include "exas8nv110.fpc" > +}; > +#else > + > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > +mufu rcp $r0 $r0 > +ipa $r1 a[0x84] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r0 a[0x80] $r0 0x0 0x1 > +tex nodep $r0 $r0 0x0 0x0 t2d 0x8 > +depbar le 0x5 0x0 0x0 > +sched (st 0x0) (st 0x0) (st 0x0) > +mov $r3 $r0 0xf > +mov $r2 $r0 0xf > +mov $r1 $r0 0xf > +sched (st 0x0) (st 0x0) (st 0x0) > +exit > +#endif > diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc > new file mode 100644 > index 0000000..1181c41 > --- /dev/null > +++ b/src/shader/exas8nv110.fpc > @@ -0,0 +1,28 @@ > +0xfc0007e0, > +0x001f8000, > +0xcff7ff00, > +0xe003ff87, > +0x00470000, > +0x50800000, > +0x4007ff01, > +0xe043ff88, > +0xfc0007e0, > +0x001f8000, > +0x0007ff00, > +0xe043ff88, > +0x2ff70000, > +0xc03a0004, > +0x34070000, > +0xf0f00000, > +0xfc0007e0, > +0x001f8000, > +0x00070003, > +0x5c980780, > +0x00070002, > +0x5c980780, > +0x00070001, > +0x5c980780, > +0xfc0007e0, > +0x001f8000, > +0x0007000f, > +0xe3000000, > diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp > new file mode 100644 > index 0000000..61374a6 > --- /dev/null > +++ b/src/shader/exasanv110.fp > @@ -0,0 +1,47 @@ > +#ifndef ENVYAS > +static uint32_t > +NV110FP_CACompositeSrcAlpha[] = { > + 0x00001462, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x80000000, > + 0x00000a0a, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x0000000f, > + 0x00000000, > +#include "exasanv110.fpc" > +}; > +#else > + > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > +mufu rcp $r0 $r0 > +ipa $r3 a[0x84] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r2 a[0x80] $r0 0x0 0x1 > +tex nodep $r4 $r2 0x0 0x0 t2d 0x8 > +ipa $r1 a[0x94] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r0 a[0x90] $r0 0x0 0x1 > +tex nodep $r0 $r0 0x0 0x1 t2d 0xf > +depbar le 0x5 0x0 0x0 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r3 $r3 $r4 > +fmul ftz $r2 $r2 $r4 > +fmul ftz $r1 $r1 $r4 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r0 $r0 $r4 > +exit > +#endif > diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc > new file mode 100644 > index 0000000..5516a03 > --- /dev/null > +++ b/src/shader/exasanv110.fpc > @@ -0,0 +1,38 @@ > +0xfc0007e0, > +0x001f8000, > +0xcff7ff00, > +0xe003ff87, > +0x00470000, > +0x50800000, > +0x4007ff03, > +0xe043ff88, > +0xfc0007e0, > +0x001f8000, > +0x0007ff02, > +0xe043ff88, > +0x2ff70204, > +0xc03a0004, > +0x4007ff01, > +0xe043ff89, > +0xfc0007e0, > +0x001f8000, > +0x0007ff00, > +0xe043ff89, > +0xaff70000, > +0xc03a0017, > +0x34070000, > +0xf0f00000, > +0xfc0007e0, > +0x001f8000, > +0x00470303, > +0x5c681000, > +0x00470202, > +0x5c681000, > +0x00470101, > +0x5c681000, > +0xfc0007e0, > +0x001f8000, > +0x00470000, > +0x5c681000, > +0x0007000f, > +0xe3000000, > diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp > new file mode 100644 > index 0000000..90bbb55 > --- /dev/null > +++ b/src/shader/exascnv110.fp > @@ -0,0 +1,38 @@ > +#ifndef ENVYAS > +static uint32_t > +NV110FP_Source[] = { > + 0x00001462, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x80000000, > + 0x0000000a, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x0000000f, > + 0x00000000, > +#include "exascnv110.fpc" > +}; > +#else > + > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > +mufu rcp $r0 $r0 > +ipa $r1 a[0x84] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r0 a[0x80] $r0 0x0 0x1 > +tex nodep $r0 $r0 0x0 0x0 t2d 0xf > +depbar le 0x5 0x0 0x0 > +sched (st 0x0) (st 0x0) (st 0x0) > +exit > +#endif > diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc > new file mode 100644 > index 0000000..2dba15d > --- /dev/null > +++ b/src/shader/exascnv110.fpc > @@ -0,0 +1,20 @@ > +0xfc0007e0, > +0x001f8000, > +0xcff7ff00, > +0xe003ff87, > +0x00470000, > +0x50800000, > +0x4007ff01, > +0xe043ff88, > +0xfc0007e0, > +0x001f8000, > +0x0007ff00, > +0xe043ff88, > +0xaff70000, > +0xc03a0007, > +0x34070000, > +0xf0f00000, > +0xfc0007e0, > +0x001f8000, > +0x0007000f, > +0xe3000000, > diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp > new file mode 100644 > index 0000000..2728311 > --- /dev/null > +++ b/src/shader/videonv110.fp > @@ -0,0 +1,54 @@ > +#ifndef ENVYAS > +static uint32_t > +NV110FP_NV12[] = { > + 0x00001462, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x80000000, > + 0x0000000a, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x0000000f, > + 0x00000000, > +#include "videonv110.fpc" > +}; > +#else > + > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa pass $r2 a[0x7c] 0x0 0x0 0x1 > +mufu rcp $r2 $r2 > +ipa $r0 a[0x80] $r2 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r1 a[0x84] $r2 0x0 0x1 > +tex nodep $r4 $r0 0x0 0x0 t2d 0x8 > +tex nodep $r0 $r0 0x0 0x1 t2d 0xc > +sched (st 0x0) (st 0x0) (st 0x0) > +depbar le 0x5 0x1 0x1 > +fmul ftz $r5 $r4 c0[0x0] > +fadd ftz $r3 $r5 c0[0x4] > +sched (st 0x0) (st 0x0) (st 0x0) > +fadd ftz $r4 $r5 c0[0x8] > +fadd ftz $r5 $r5 c0[0xc] > +depbar le 0x5 0x0 0x0 > +sched (st 0x0) (st 0x0) (st 0x0) > +ffma ftz $r3 $r0 c0[0x10] $r3 > +ffma ftz $r4 $r0 c0[0x14] $r4 > +ffma ftz $r5 $r0 c0[0x18] $r5 > +sched (st 0x0) (st 0x0) (st 0x0) > +ffma ftz $r0 $r1 c0[0x1c] $r3 > +ffma ftz $r2 $r1 c0[0x24] $r5 > +ffma ftz $r1 $r1 c0[0x20] $r4 > +sched (st 0x0) (st 0x0) (st 0x0) > +exit > +#endif > diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc > new file mode 100644 > index 0000000..31d745a > --- /dev/null > +++ b/src/shader/videonv110.fpc > @@ -0,0 +1,52 @@ > +0xfc0007e0, > +0x001f8000, > +0xcff7ff02, > +0xe003ff87, > +0x00470202, > +0x50800000, > +0x0027ff00, > +0xe043ff88, > +0xfc0007e0, > +0x001f8000, > +0x4027ff01, > +0xe043ff88, > +0x2ff70004, > +0xc03a0004, > +0x2ff70000, > +0xc03a0016, > +0xfc0007e0, > +0x001f8000, > +0x34170001, > +0xf0f00000, > +0x00070405, > +0x4c681000, > +0x00170503, > +0x4c581000, > +0xfc0007e0, > +0x001f8000, > +0x00270504, > +0x4c581000, > +0x00370505, > +0x4c581000, > +0x34070000, > +0xf0f00000, > +0xfc0007e0, > +0x001f8000, > +0x00470003, > +0x49a00180, > +0x00570004, > +0x49a00200, > +0x00670005, > +0x49a00280, > +0xfc0007e0, > +0x001f8000, > +0x00770100, > +0x49a00180, > +0x00970102, > +0x49a00280, > +0x00870101, > +0x49a00200, > +0xfc0007e0, > +0x001f8000, > +0x0007000f, > +0xe3000000, > diff --git a/src/shader/xfrm2nv110.vp b/src/shader/xfrm2nv110.vp > new file mode 100644 > index 0000000..bbfc527 > --- /dev/null > +++ b/src/shader/xfrm2nv110.vp > @@ -0,0 +1,82 @@ > +#ifndef ENVYAS > +static uint32_t > +NV110VP_Transform2[] = { > + 0x02000461, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x000ff000, > + 0x00000000, /* VP_ATTR_EN[0x000] */ > + 0x00000000, /* VP_ATTR_EN[0x080] */ > + 0x00000000, /* VP_ATTR_EN[0x100] */ > + 0x00000000, > + 0x00000000, /* VP_ATTR_EN[0x200] */ > + 0x80000000, /* VERTEXID */ > + 0x00000000, /* VP_ATTR_EN[0x300] */ > + 0x00000000, > + 0x0033f000, /* VP_EXPORT_EN[0x040] */ > + 0x00000000, /* VP_EXPORT_EN[0x0c0] */ > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, /* VP_EXPORT_EN[0x2c0] */ > + 0x00000000, > +#include "xfrm2nv110.vpc" > +}; > +#else > + > +sched (st 0x0) (st 0x0) (st 0x0) > +ld b32 $r5 a[0x2fc] 0x0 > +shl $r5 $r5 0x5 > +ld b64 $r0 c0[$r5+0x80] > +sched (st 0x0) (st 0x0) (st 0x0) > +ld b64 $r2 c0[$r5+0x88] > +st b128 a[0x70] $r0 0x0 > + > +ld b64 $r0 c0[$r5+0x90] > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r2 $r0 c0[0x0] > +fmul ftz $r3 $r0 c0[0xc] > +fmul ftz $r4 $r0 c0[0x18] > +sched (st 0x0) (st 0x0) (st 0x0) > +ffma ftz $r2 $r1 c0[0x4] $r2 > +ffma ftz $r3 $r1 c0[0x10] $r3 > +ffma ftz $r4 $r1 c0[0x1c] $r4 > +sched (st 0x0) (st 0x0) (st 0x0) > +fadd ftz $r2 $r2 c0[0x8] > +fadd ftz $r3 $r3 c0[0x14] > +fadd ftz $r4 $r4 c0[0x20] > +sched (st 0x0) (st 0x0) (st 0x0) > +mufu rcp $r4 $r4 > +fmul ftz $r2 $r2 $r4 > +fmul ftz $r3 $r3 $r4 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r0 $r2 c0[0x24] > +fmul ftz $r1 $r3 c0[0x28] > +st b64 a[0x80] $r0 0x0 > + > +sched (st 0x0) (st 0x0) (st 0x0) > +ld b64 $r0 c0[$r5+0x98] > +fmul ftz $r2 $r0 c0[0x2c] > +fmul ftz $r3 $r0 c0[0x38] > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r4 $r0 c0[0x44] > +ffma ftz $r2 $r1 c0[0x30] $r2 > +ffma ftz $r3 $r1 c0[0x3c] $r3 > +sched (st 0x0) (st 0x0) (st 0x0) > +ffma ftz $r4 $r1 c0[0x48] $r4 > +fadd ftz $r2 $r2 c0[0x34] > +fadd ftz $r3 $r3 c0[0x40] > +sched (st 0x0) (st 0x0) (st 0x0) > +fadd ftz $r4 $r4 c0[0x4c] > +mufu rcp $r4 $r4 > +fmul ftz $r2 $r2 $r4 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r3 $r3 $r4 > +fmul ftz $r0 $r2 c0[0x50] > +fmul ftz $r1 $r3 c0[0x54] > +sched (st 0x0) (st 0x0) (st 0x0) > +st b64 a[0x90] $r0 0x0 > + > +exit > +#endif > diff --git a/src/shader/xfrm2nv110.vpc b/src/shader/xfrm2nv110.vpc > new file mode 100644 > index 0000000..0d9ebfd > --- /dev/null > +++ b/src/shader/xfrm2nv110.vpc > @@ -0,0 +1,102 @@ > +0xfc0007e0, > +0x001f8000, > +0x2fc7ff05, > +0xefd87f80, > +0x00570505, > +0x38480000, > +0x08070500, > +0xef950000, > +0xfc0007e0, > +0x001f8000, > +0x08870502, > +0xef950000, > +0x0707ff00, > +0xeff1ff80, > +0x09070500, > +0xef950000, > +0xfc0007e0, > +0x001f8000, > +0x00070002, > +0x4c681000, > +0x00370003, > +0x4c681000, > +0x00670004, > +0x4c681000, > +0xfc0007e0, > +0x001f8000, > +0x00170102, > +0x49a00100, > +0x00470103, > +0x49a00180, > +0x00770104, > +0x49a00200, > +0xfc0007e0, > +0x001f8000, > +0x00270202, > +0x4c581000, > +0x00570303, > +0x4c581000, > +0x00870404, > +0x4c581000, > +0xfc0007e0, > +0x001f8000, > +0x00470404, > +0x50800000, > +0x00470202, > +0x5c681000, > +0x00470303, > +0x5c681000, > +0xfc0007e0, > +0x001f8000, > +0x00970200, > +0x4c681000, > +0x00a70301, > +0x4c681000, > +0x0807ff00, > +0xeff0ff80, > +0xfc0007e0, > +0x001f8000, > +0x09870500, > +0xef950000, > +0x00b70002, > +0x4c681000, > +0x00e70003, > +0x4c681000, > +0xfc0007e0, > +0x001f8000, > +0x01170004, > +0x4c681000, > +0x00c70102, > +0x49a00100, > +0x00f70103, > +0x49a00180, > +0xfc0007e0, > +0x001f8000, > +0x01270104, > +0x49a00200, > +0x00d70202, > +0x4c581000, > +0x01070303, > +0x4c581000, > +0xfc0007e0, > +0x001f8000, > +0x01370404, > +0x4c581000, > +0x00470404, > +0x50800000, > +0x00470202, > +0x5c681000, > +0xfc0007e0, > +0x001f8000, > +0x00470303, > +0x5c681000, > +0x01470200, > +0x4c681000, > +0x01570301, > +0x4c681000, > +0xfc0007e0, > +0x001f8000, > +0x0907ff00, > +0xeff0ff80, > +0x0007000f, > +0xe3000000, >-- -Samuel
On Mon, Oct 17, 2016 at 5:28 AM, Samuel Pitoiset <samuel.pitoiset at gmail.com> wrote:> Looks reasonable, some minor comments below. > > > On 10/16/2016 02:06 AM, Ilia Mirkin wrote: >> diff --git a/src/nvc0_exa.c b/src/nvc0_exa.c >> index 6add60b..a53dfe6 100644 >> --- a/src/nvc0_exa.c >> +++ b/src/nvc0_exa.c >> @@ -914,14 +914,56 @@ NVC0EXAComposite(PixmapPtr pdpix, >> if (!PUSH_SPACE(push, 64)) >> return; >> >> + if (pNv->dev->chipset >= 0x110) { >> + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); >> + PUSH_DATA (push, 256); >> + PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA) >> 32); > > > No PUSH_DATAh in the DDX?Nope. Didn't feel the burning need to add a helper either.>> $(filter %nvc0.vpc,$(SHADERS)): %.vpc: %.vp >> - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@ >> + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100 >> -o $@ >> $(filter %nvc0.fpc,$(SHADERS)): %.fpc: %.fp >> - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@ >> + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100 >> -o $@ >> >> $(filter %nve0.vpc,$(SHADERS)): %.vpc: %.vp >> - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o >> $@ >> + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104 >> -o $@ >> $(filter %nve0.fpc,$(SHADERS)): %.fpc: %.fp >> - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o >> $@ >> + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104 >> -o $@ > > > This is unrelated to your main change, but well should be *exactly* the same > thing. :)You mean the bit about me adding -V gf100? Figured I'd fix it up while I was at it. The machine/variant names changed though.> > >> >> $(filter %nvf0.vpc,$(SHADERS)): %.vpc: %.vp >> cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@ >> $(filter %nvf0.fpc,$(SHADERS)): %.fpc: %.fp >> cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@ >> + >> +$(filter %nv110.vpc,$(SHADERS)): %.vpc: %.vp >> + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@ >> +$(filter %nv110.fpc,$(SHADERS)): %.fpc: %.fp >> + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@ >> diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp >> new file mode 100644 >> index 0000000..ce78036 >> --- /dev/null >> +++ b/src/shader/exac8nv110.fp >> @@ -0,0 +1,47 @@ >> +#ifndef ENVYAS >> +static uint32_t >> +NV110FP_Composite_A8[] = { >> + 0x00001462, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x80000000, >> + 0x00000a0a, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x0000000f, >> + 0x00000000, >> +#include "exac8nv110.fpc" >> +}; >> +#else >> + >> +sched (st 0x0) (st 0x0) (st 0x0) > > > Those sched codes are definitely bad, but let's keep them as it for now. I > might have a look at some point to improve the thing.Yeah, way wrong. However it's what our compiler would produce. You can use this as a proving ground for your various theories. All simple shaders though, no control flow. Only complex thing is textures. -ilia