rendercheck -f a8r8g8b8 passes as much as on a GK208, and xv appears to work. Very lightly tested. Instead of sticking coordinates into pushbufs, the vertex shader is modified to read them from a constbuf, indexed by vertex id. This approach could be used for all nvc0 generations, but I didn't want to rock the boat. Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> --- Note: this won't work for GM20x - we need to allow TIC format to be updated for that to work. But this is a step in that direction. src/Makefile.am | 16 ++++++++ src/nouveau_copy.c | 1 + src/nouveau_exa.c | 2 +- src/nouveau_xv.c | 2 +- src/nv_accel_common.c | 1 + src/nv_driver.c | 1 + src/nvc0_accel.c | 37 ++++++++++++++--- src/nvc0_exa.c | 48 ++++++++++++++++++++-- src/nvc0_xv.c | 48 ++++++++++++++++++++-- src/shader/Makefile | 23 ++++++++--- src/shader/exac8nv110.fp | 47 +++++++++++++++++++++ src/shader/exac8nv110.fpc | 38 +++++++++++++++++ src/shader/exacanv110.fp | 47 +++++++++++++++++++++ src/shader/exacanv110.fpc | 38 +++++++++++++++++ src/shader/exacmnv110.fp | 47 +++++++++++++++++++++ src/shader/exacmnv110.fpc | 38 +++++++++++++++++ src/shader/exas8nv110.fp | 42 +++++++++++++++++++ src/shader/exas8nv110.fpc | 28 +++++++++++++ src/shader/exasanv110.fp | 47 +++++++++++++++++++++ src/shader/exasanv110.fpc | 38 +++++++++++++++++ src/shader/exascnv110.fp | 38 +++++++++++++++++ src/shader/exascnv110.fpc | 20 +++++++++ src/shader/videonv110.fp | 54 ++++++++++++++++++++++++ src/shader/videonv110.fpc | 52 +++++++++++++++++++++++ src/shader/xfrm2nv110.vp | 82 +++++++++++++++++++++++++++++++++++++ src/shader/xfrm2nv110.vpc | 102 ++++++++++++++++++++++++++++++++++++++++++++++ 26 files changed, 918 insertions(+), 19 deletions(-) create mode 100644 src/shader/exac8nv110.fp create mode 100644 src/shader/exac8nv110.fpc create mode 100644 src/shader/exacanv110.fp create mode 100644 src/shader/exacanv110.fpc create mode 100644 src/shader/exacmnv110.fp create mode 100644 src/shader/exacmnv110.fpc create mode 100644 src/shader/exas8nv110.fp create mode 100644 src/shader/exas8nv110.fpc create mode 100644 src/shader/exasanv110.fp create mode 100644 src/shader/exasanv110.fpc create mode 100644 src/shader/exascnv110.fp create mode 100644 src/shader/exascnv110.fpc create mode 100644 src/shader/videonv110.fp create mode 100644 src/shader/videonv110.fpc create mode 100644 src/shader/xfrm2nv110.vp create mode 100644 src/shader/xfrm2nv110.vpc diff --git a/src/Makefile.am b/src/Makefile.am index 1e04ddf..6ba8d87 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -77,48 +77,64 @@ EXTRA_DIST = hwdefs/nv_3ddefs.xml.h \ shader/exac8nve0.fpc \ shader/exac8nvf0.fp \ shader/exac8nvf0.fpc \ + shader/exac8nv110.fp \ + shader/exac8nv110.fpc \ shader/exacanvc0.fp \ shader/exacanvc0.fpc \ shader/exacanve0.fp \ shader/exacanve0.fpc \ shader/exacanvf0.fp \ shader/exacanvf0.fpc \ + shader/exacanv110.fp \ + shader/exacanv110.fpc \ shader/exacmnvc0.fp \ shader/exacmnvc0.fpc \ shader/exacmnve0.fp \ shader/exacmnve0.fpc \ shader/exacmnvf0.fp \ shader/exacmnvf0.fpc \ + shader/exacmnv110.fp \ + shader/exacmnv110.fpc \ shader/exas8nvc0.fp \ shader/exas8nvc0.fpc \ shader/exas8nve0.fp \ shader/exas8nve0.fpc \ shader/exas8nvf0.fp \ shader/exas8nvf0.fpc \ + shader/exas8nv110.fp \ + shader/exas8nv110.fpc \ shader/exasanvc0.fp \ shader/exasanvc0.fpc \ shader/exasanve0.fp \ shader/exasanve0.fpc \ shader/exasanvf0.fp \ shader/exasanvf0.fpc \ + shader/exasanv110.fp \ + shader/exasanv110.fpc \ shader/exascnvc0.fp \ shader/exascnvc0.fpc \ shader/exascnve0.fp \ shader/exascnve0.fpc \ shader/exascnvf0.fp \ shader/exascnvf0.fpc \ + shader/exascnv110.fp \ + shader/exascnv110.fpc \ shader/videonvc0.fp \ shader/videonvc0.fpc \ shader/videonve0.fp \ shader/videonve0.fpc \ shader/videonvf0.fp \ shader/videonvf0.fpc \ + shader/videonv110.fp \ + shader/videonv110.fpc \ shader/xfrm2nvc0.vp \ shader/xfrm2nvc0.vpc \ shader/xfrm2nve0.vp \ shader/xfrm2nve0.vpc \ shader/xfrm2nvf0.vp \ shader/xfrm2nvf0.vpc \ + shader/xfrm2nv110.vp \ + shader/xfrm2nv110.vpc \ shader/Makefile \ nouveau_local.h \ nouveau_copy.h \ diff --git a/src/nouveau_copy.c b/src/nouveau_copy.c index e152a53..c139de6 100644 --- a/src/nouveau_copy.c +++ b/src/nouveau_copy.c @@ -81,6 +81,7 @@ nouveau_copy_init(ScreenPtr pScreen) &pNv->ce_channel); break; case NV_KEPLER: + case NV_MAXWELL: ret = nouveau_object_new(&pNv->dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS, &(struct nve0_fifo) { diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c index def66ac..0f02b99 100644 --- a/src/nouveau_exa.c +++ b/src/nouveau_exa.c @@ -514,12 +514,12 @@ nouveau_exa_init(ScreenPtr pScreen) break; case NV_FERMI: case NV_KEPLER: + case NV_MAXWELL: exa->CheckComposite = NVC0EXACheckComposite; exa->PrepareComposite = NVC0EXAPrepareComposite; exa->Composite = NVC0EXAComposite; exa->DoneComposite = NVC0EXADoneComposite; break; - case NV_MAXWELL: default: break; } diff --git a/src/nouveau_xv.c b/src/nouveau_xv.c index 9d5bad2..2ba1992 100644 --- a/src/nouveau_xv.c +++ b/src/nouveau_xv.c @@ -2097,7 +2097,7 @@ NVSetupTexturedVideo (ScreenPtr pScreen, XF86VideoAdaptorPtr *textureAdaptor) textureAdaptor[0] = NV40SetupTexturedVideo(pScreen, FALSE); textureAdaptor[1] = NV40SetupTexturedVideo(pScreen, TRUE); } else - if (pNv->Architecture >= NV_TESLA && pNv->Architecture < NV_MAXWELL) { + if (pNv->Architecture >= NV_TESLA) { textureAdaptor[0] = NV50SetupTexturedVideo(pScreen); } } diff --git a/src/nv_accel_common.c b/src/nv_accel_common.c index 9361ce8..5d12dd8 100644 --- a/src/nv_accel_common.c +++ b/src/nv_accel_common.c @@ -722,6 +722,7 @@ NVAccelCommonInit(ScrnInfoPtr pScrn) switch (pNv->Architecture) { case NV_FERMI: case NV_KEPLER: + case NV_MAXWELL: INIT_CONTEXT_OBJECT(3D_NVC0); break; case NV_TESLA: diff --git a/src/nv_driver.c b/src/nv_driver.c index 4dde8e0..fff83f8 100644 --- a/src/nv_driver.c +++ b/src/nv_driver.c @@ -389,6 +389,7 @@ NVHasKMS(struct pci_device *pci_dev, struct xf86_platform_device *platform_dev) case 0xe0: case 0xf0: case 0x100: + case 0x110: break; default: xf86DrvMsg(-1, X_ERROR, "Unknown chipset: NV%02X\n", chipset); diff --git a/src/nvc0_accel.c b/src/nvc0_accel.c index d2a3b93..52a17db 100644 --- a/src/nvc0_accel.c +++ b/src/nvc0_accel.c @@ -53,6 +53,16 @@ #include "shader/exas8nvf0.fp" #include "shader/exac8nvf0.fp" +#include "shader/xfrm2nv110.vp" +#include "shader/videonv110.fp" + +#include "shader/exascnv110.fp" +#include "shader/exacmnv110.fp" +#include "shader/exacanv110.fp" +#include "shader/exasanv110.fp" +#include "shader/exas8nv110.fp" +#include "shader/exac8nv110.fp" + #define NVC0PushProgram(pNv,addr,code) do { \ const unsigned size = sizeof(code) / sizeof(code[0]); \ PUSH_DATAu((pNv)->pushbuf, (pNv)->scratch, (addr), size); \ @@ -223,9 +233,12 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) } else if (pNv->dev->chipset < 0xf0) { class = 0xa097; handle = 0x0000906e; - } else { + } else if (pNv->dev->chipset < 0x110) { class = 0xa197; handle = 0x0000906e; + } else { + class = 0xb097; + handle = 0x0000906e; } ret = nouveau_object_new(pNv->channel, class, class, @@ -304,10 +317,12 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) PUSH_DATA (push, 1); } - BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3); - PUSH_DATA (push, (bo->offset + MISC_OFFSET) >> 32); - PUSH_DATA (push, (bo->offset + MISC_OFFSET)); - PUSH_DATA (push, 1); + if (pNv->Architecture < NV_MAXWELL) { + BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3); + PUSH_DATA (push, (bo->offset + MISC_OFFSET) >> 32); + PUSH_DATA (push, (bo->offset + MISC_OFFSET)); + PUSH_DATA (push, 1); + } BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2); PUSH_DATA (push, (bo->offset + CODE_OFFSET) >> 32); @@ -334,7 +349,8 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) NVC0PushProgram(pNv, PFP_S_A8, NVE0FP_Source_A8); NVC0PushProgram(pNv, PFP_C_A8, NVE0FP_Composite_A8); NVC0PushProgram(pNv, PFP_NV12, NVE0FP_NV12); - } else { + } else + if (pNv->dev->chipset < 0x110) { NVC0PushProgram(pNv, PVP_PASS, NVF0VP_Transform2); NVC0PushProgram(pNv, PFP_S, NVF0FP_Source); NVC0PushProgram(pNv, PFP_C, NVF0FP_Composite); @@ -343,6 +359,15 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) NVC0PushProgram(pNv, PFP_S_A8, NVF0FP_Source_A8); NVC0PushProgram(pNv, PFP_C_A8, NVF0FP_Composite_A8); NVC0PushProgram(pNv, PFP_NV12, NVF0FP_NV12); + } else { + NVC0PushProgram(pNv, PVP_PASS, NV110VP_Transform2); + NVC0PushProgram(pNv, PFP_S, NV110FP_Source); + NVC0PushProgram(pNv, PFP_C, NV110FP_Composite); + NVC0PushProgram(pNv, PFP_CCA, NV110FP_CAComposite); + NVC0PushProgram(pNv, PFP_CCASA, NV110FP_CACompositeSrcAlpha); + NVC0PushProgram(pNv, PFP_S_A8, NV110FP_Source_A8); + NVC0PushProgram(pNv, PFP_C_A8, NV110FP_Composite_A8); + NVC0PushProgram(pNv, PFP_NV12, NV110FP_NV12); } BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 4); diff --git a/src/nvc0_exa.c b/src/nvc0_exa.c index 6add60b..a53dfe6 100644 --- a/src/nvc0_exa.c +++ b/src/nvc0_exa.c @@ -914,14 +914,56 @@ NVC0EXAComposite(PixmapPtr pdpix, if (!PUSH_SPACE(push, 64)) return; + if (pNv->dev->chipset >= 0x110) { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, 256); + PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA) >> 32); + PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 3 * (4 + 2 + 2) + 1); + PUSH_DATA (push, 0x80); + + PUSH_DATAf(push, dx); + PUSH_DATAf(push, dy + (h * 2)); + PUSH_DATAf(push, 0); + PUSH_DATAf(push, 1); + PUSH_DATAf(push, sx); + PUSH_DATAf(push, sy + (h * 2)); + PUSH_DATAf(push, mx); + PUSH_DATAf(push, my + (h * 2)); + + PUSH_DATAf(push, dx); + PUSH_DATAf(push, dy); + PUSH_DATAf(push, 0); + PUSH_DATAf(push, 1); + PUSH_DATAf(push, sx); + PUSH_DATAf(push, sy); + PUSH_DATAf(push, mx); + PUSH_DATAf(push, my); + + PUSH_DATAf(push, dx + (w * 2)); + PUSH_DATAf(push, dy); + PUSH_DATAf(push, 0); + PUSH_DATAf(push, 1); + PUSH_DATAf(push, sx + (w * 2)); + PUSH_DATAf(push, sy); + PUSH_DATAf(push, mx + (w * 2)); + PUSH_DATAf(push, my); + } + BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2); PUSH_DATA (push, ((dx + w) << 16) | dx); PUSH_DATA (push, ((dy + h) << 16) | dy); BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (push, NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); - PUSH_VTX2s(push, sx, sy + (h * 2), mx, my + (h * 2), dx, dy + (h * 2)); - PUSH_VTX2s(push, sx, sy, mx, my, dx, dy); - PUSH_VTX2s(push, sx + (w * 2), sy, mx + (w * 2), my, dx + (w * 2), dy); + if (pNv->dev->chipset < 0x110) { + PUSH_VTX2s(push, sx, sy + (h * 2), mx, my + (h * 2), dx, dy + (h * 2)); + PUSH_VTX2s(push, sx, sy, mx, my, dx, dy); + PUSH_VTX2s(push, sx + (w * 2), sy, mx + (w * 2), my, dx + (w * 2), dy); + } else { + BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 3); + } BEGIN_NVC0(push, NVC0_3D(VERTEX_END_GL), 1); PUSH_DATA (push, 0); } diff --git a/src/nvc0_xv.c b/src/nvc0_xv.c index d1d8f18..129c505 100644 --- a/src/nvc0_xv.c +++ b/src/nvc0_xv.c @@ -247,15 +247,57 @@ nvc0_xv_image_put(ScrnInfoPtr pScrn, nouveau_pushbuf_refn (push, refs, 3)) return BadImplementation; + if (pNv->dev->chipset >= 0x110) { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, 256); + PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA) >> 32); + PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 3 * (4 + 2 + 2) + 1); + PUSH_DATA (push, 0x80); + + PUSH_DATAf(push, sx1); + PUSH_DATAf(push, sy1); + PUSH_DATAf(push, 0); + PUSH_DATAf(push, 1); + PUSH_DATAf(push, tx1); + PUSH_DATAf(push, ty1); + PUSH_DATAf(push, 0); + PUSH_DATAf(push, 0); + + PUSH_DATAf(push, sx2+(sx2-sx1)); + PUSH_DATAf(push, sy1); + PUSH_DATAf(push, 0); + PUSH_DATAf(push, 1); + PUSH_DATAf(push, tx2+(tx2-tx1)); + PUSH_DATAf(push, ty1); + PUSH_DATAf(push, 0); + PUSH_DATAf(push, 0); + + PUSH_DATAf(push, sx1); + PUSH_DATAf(push, sy2+(sy2-sy1)); + PUSH_DATAf(push, 0); + PUSH_DATAf(push, 1); + PUSH_DATAf(push, tx1); + PUSH_DATAf(push, ty2+(ty2-ty1)); + PUSH_DATAf(push, 0); + PUSH_DATAf(push, 0); + } + BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2); PUSH_DATA (push, sx2 << NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT | sx1); PUSH_DATA (push, sy2 << NVC0_3D_SCISSOR_VERT_MAX__SHIFT | sy1 ); BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (push, NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); - PUSH_VTX1s(push, tx1, ty1, sx1, sy1); - PUSH_VTX1s(push, tx2+(tx2-tx1), ty1, sx2+(sx2-sx1), sy1); - PUSH_VTX1s(push, tx1, ty2+(ty2-ty1), sx1, sy2+(sy2-sy1)); + if (pNv->dev->chipset < 0x110) { + PUSH_VTX1s(push, tx1, ty1, sx1, sy1); + PUSH_VTX1s(push, tx2+(tx2-tx1), ty1, sx2+(sx2-sx1), sy1); + PUSH_VTX1s(push, tx1, ty2+(ty2-ty1), sx1, sy2+(sy2-sy1)); + } else { + BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, 3); + } BEGIN_NVC0(push, NVC0_3D(VERTEX_END_GL), 1); PUSH_DATA (push, 0); diff --git a/src/shader/Makefile b/src/shader/Makefile index 2d789be..12bf455 100644 --- a/src/shader/Makefile +++ b/src/shader/Makefile @@ -22,23 +22,36 @@ NVF0_SHADERS = xfrm2nvf0.vpc \ exas8nvf0.fpc \ exac8nvf0.fpc \ videonvf0.fpc +NV110_SHADERS = xfrm2nv110.vpc \ + exascnv110.fpc \ + exacmnv110.fpc \ + exacanv110.fpc \ + exasanv110.fpc \ + exas8nv110.fpc \ + exac8nv110.fpc \ + videonv110.fpc -SHADERS = $(NVC0_SHADERS) $(NVE0_SHADERS) $(NVF0_SHADERS) +SHADERS = $(NVC0_SHADERS) $(NVE0_SHADERS) $(NVF0_SHADERS) $(NV110_SHADERS) ENVYAS ?= envyas all: $(SHADERS) $(filter %nvc0.vpc,$(SHADERS)): %.vpc: %.vp - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@ + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100 -o $@ $(filter %nvc0.fpc,$(SHADERS)): %.fpc: %.fp - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@ + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100 -o $@ $(filter %nve0.vpc,$(SHADERS)): %.vpc: %.vp - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o $@ + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104 -o $@ $(filter %nve0.fpc,$(SHADERS)): %.fpc: %.fp - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o $@ + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104 -o $@ $(filter %nvf0.vpc,$(SHADERS)): %.vpc: %.vp cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@ $(filter %nvf0.fpc,$(SHADERS)): %.fpc: %.fp cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@ + +$(filter %nv110.vpc,$(SHADERS)): %.vpc: %.vp + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@ +$(filter %nv110.fpc,$(SHADERS)): %.fpc: %.fp + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@ diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp new file mode 100644 index 0000000..ce78036 --- /dev/null +++ b/src/shader/exac8nv110.fp @@ -0,0 +1,47 @@ +#ifndef ENVYAS +static uint32_t +NV110FP_Composite_A8[] = { + 0x00001462, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80000000, + 0x00000a0a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000000f, + 0x00000000, +#include "exac8nv110.fpc" +}; +#else + +sched (st 0x0) (st 0x0) (st 0x0) +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 +mufu rcp $r0 $r0 +ipa $r3 a[0x94] $r0 0x0 0x1 +sched (st 0x0) (st 0x0) (st 0x0) +ipa $r2 a[0x90] $r0 0x0 0x1 +tex nodep $r1 $r2 0x0 0x1 t2d 0x8 +ipa $r3 a[0x84] $r0 0x0 0x1 +sched (st 0x0) (st 0x0) (st 0x0) +ipa $r2 a[0x80] $r0 0x0 0x1 +tex nodep $r0 $r2 0x0 0x0 t2d 0x8 +depbar le 0x5 0x0 0x0 +sched (st 0x0) (st 0x0) (st 0x0) +fmul ftz $r3 $r0 $r1 +mov $r2 $r3 0xf +mov $r1 $r3 0xf +sched (st 0x0) (st 0x0) (st 0x0) +mov $r0 $r3 0xf +exit +#endif diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc new file mode 100644 index 0000000..4aa1368 --- /dev/null +++ b/src/shader/exac8nv110.fpc @@ -0,0 +1,38 @@ +0xfc0007e0, +0x001f8000, +0xcff7ff00, +0xe003ff87, +0x00470000, +0x50800000, +0x4007ff03, +0xe043ff89, +0xfc0007e0, +0x001f8000, +0x0007ff02, +0xe043ff89, +0x2ff70201, +0xc03a0014, +0x4007ff03, +0xe043ff88, +0xfc0007e0, +0x001f8000, +0x0007ff02, +0xe043ff88, +0x2ff70200, +0xc03a0004, +0x34070000, +0xf0f00000, +0xfc0007e0, +0x001f8000, +0x00170003, +0x5c681000, +0x00370002, +0x5c980780, +0x00370001, +0x5c980780, +0xfc0007e0, +0x001f8000, +0x00370000, +0x5c980780, +0x0007000f, +0xe3000000, diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp new file mode 100644 index 0000000..a70d5c5 --- /dev/null +++ b/src/shader/exacanv110.fp @@ -0,0 +1,47 @@ +#ifndef ENVYAS +static uint32_t +NV110FP_CAComposite[] = { + 0x00001462, /* 0x0000c000 = USES_KIL, MULTI_COLORS */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80000000, /* FRAG_COORD_UMASK = 0x8 */ + 0x00000a0a, /* FP_INTERP[0x080], 0022 0022 */ + 0x00000000, /* FP_INTERP[0x0c0], 0 = OFF */ + 0x00000000, /* FP_INTERP[0x100], 1 = FLAT */ + 0x00000000, /* FP_INTERP[0x140], 2 = PERSPECTIVE */ + 0x00000000, /* FP_INTERP[0x180], 3 = LINEAR */ + 0x00000000, /* FP_INTERP[0x1c0] */ + 0x00000000, /* FP_INTERP[0x200] */ + 0x00000000, /* FP_INTERP[0x240] */ + 0x00000000, /* FP_INTERP[0x280] */ + 0x00000000, /* FP_INTERP[0x2c0] */ + 0x00000000, /* FP_INTERP[0x300] */ + 0x00000000, + 0x0000000f, /* FP_RESULT_MASK (0x8000 Face ?) */ + 0x00000000, /* 0x2 = FragDepth, 0x1 = SampleMask */ +#include "exacanv110.fpc" +}; +#else + +sched (st 0x0) (st 0x0) (st 0x0) +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 +mufu rcp $r0 $r0 +ipa $r3 a[0x94] $r0 0x0 0x1 +sched (st 0x0) (st 0x0) (st 0x0) +ipa $r2 a[0x90] $r0 0x0 0x1 +tex nodep $r4 $r2 0x0 0x1 t2d 0xf +ipa $r1 a[0x84] $r0 0x0 0x1 +sched (st 0x0) (st 0x0) (st 0x0) +ipa $r0 a[0x80] $r0 0x0 0x1 +tex nodep $r0 $r0 0x0 0x0 t2d 0xf +depbar le 0x5 0x0 0x0 +sched (st 0x0) (st 0x0) (st 0x0) +fmul ftz $r3 $r3 $r7 +fmul ftz $r2 $r2 $r6 +fmul ftz $r1 $r1 $r5 +sched (st 0x0) (st 0x0) (st 0x0) +fmul ftz $r0 $r0 $r4 +exit +#endif diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc new file mode 100644 index 0000000..7c0ca5e --- /dev/null +++ b/src/shader/exacanv110.fpc @@ -0,0 +1,38 @@ +0xfc0007e0, +0x001f8000, +0xcff7ff00, +0xe003ff87, +0x00470000, +0x50800000, +0x4007ff03, +0xe043ff89, +0xfc0007e0, +0x001f8000, +0x0007ff02, +0xe043ff89, +0xaff70204, +0xc03a0017, +0x4007ff01, +0xe043ff88, +0xfc0007e0, +0x001f8000, +0x0007ff00, +0xe043ff88, +0xaff70000, +0xc03a0007, +0x34070000, +0xf0f00000, +0xfc0007e0, +0x001f8000, +0x00770303, +0x5c681000, +0x00670202, +0x5c681000, +0x00570101, +0x5c681000, +0xfc0007e0, +0x001f8000, +0x00470000, +0x5c681000, +0x0007000f, +0xe3000000, diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp new file mode 100644 index 0000000..fe5c294 --- /dev/null +++ b/src/shader/exacmnv110.fp @@ -0,0 +1,47 @@ +#ifndef ENVYAS +static uint32_t +NV110FP_Composite[] = { + 0x00001462, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80000000, + 0x00000a0a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000000f, + 0x00000000, +#include "exacmnv110.fpc" +}; +#else + +sched (st 0x0) (st 0x0) (st 0x0) +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 +mufu rcp $r0 $r0 +ipa $r3 a[0x94] $r0 0x0 0x1 +sched (st 0x0) (st 0x0) (st 0x0) +ipa $r2 a[0x90] $r0 0x0 0x1 +tex nodep $r4 $r2 0x0 0x1 t2d 0x8 +ipa $r1 a[0x84] $r0 0x0 0x1 +sched (st 0x0) (st 0x0) (st 0x0) +ipa $r0 a[0x80] $r0 0x0 0x1 +tex nodep $r0 $r0 0x0 0x0 t2d 0xf +depbar le 0x5 0x0 0x0 +sched (st 0x0) (st 0x0) (st 0x0) +fmul ftz $r3 $r3 $r4 +fmul ftz $r2 $r2 $r4 +fmul ftz $r1 $r1 $r4 +sched (st 0x0) (st 0x0) (st 0x0) +fmul ftz $r0 $r0 $r4 +exit +#endif diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc new file mode 100644 index 0000000..9d62c1a --- /dev/null +++ b/src/shader/exacmnv110.fpc @@ -0,0 +1,38 @@ +0xfc0007e0, +0x001f8000, +0xcff7ff00, +0xe003ff87, +0x00470000, +0x50800000, +0x4007ff03, +0xe043ff89, +0xfc0007e0, +0x001f8000, +0x0007ff02, +0xe043ff89, +0x2ff70204, +0xc03a0014, +0x4007ff01, +0xe043ff88, +0xfc0007e0, +0x001f8000, +0x0007ff00, +0xe043ff88, +0xaff70000, +0xc03a0007, +0x34070000, +0xf0f00000, +0xfc0007e0, +0x001f8000, +0x00470303, +0x5c681000, +0x00470202, +0x5c681000, +0x00470101, +0x5c681000, +0xfc0007e0, +0x001f8000, +0x00470000, +0x5c681000, +0x0007000f, +0xe3000000, diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp new file mode 100644 index 0000000..4fe2e19 --- /dev/null +++ b/src/shader/exas8nv110.fp @@ -0,0 +1,42 @@ +#ifndef ENVYAS +static uint32_t +NV110FP_Source_A8[] = { + 0x00001462, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80000000, + 0x0000000a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000000f, + 0x00000000, +#include "exas8nv110.fpc" +}; +#else + +sched (st 0x0) (st 0x0) (st 0x0) +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 +mufu rcp $r0 $r0 +ipa $r1 a[0x84] $r0 0x0 0x1 +sched (st 0x0) (st 0x0) (st 0x0) +ipa $r0 a[0x80] $r0 0x0 0x1 +tex nodep $r0 $r0 0x0 0x0 t2d 0x8 +depbar le 0x5 0x0 0x0 +sched (st 0x0) (st 0x0) (st 0x0) +mov $r3 $r0 0xf +mov $r2 $r0 0xf +mov $r1 $r0 0xf +sched (st 0x0) (st 0x0) (st 0x0) +exit +#endif diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc new file mode 100644 index 0000000..1181c41 --- /dev/null +++ b/src/shader/exas8nv110.fpc @@ -0,0 +1,28 @@ +0xfc0007e0, +0x001f8000, +0xcff7ff00, +0xe003ff87, +0x00470000, +0x50800000, +0x4007ff01, +0xe043ff88, +0xfc0007e0, +0x001f8000, +0x0007ff00, +0xe043ff88, +0x2ff70000, +0xc03a0004, +0x34070000, +0xf0f00000, +0xfc0007e0, +0x001f8000, +0x00070003, +0x5c980780, +0x00070002, +0x5c980780, +0x00070001, +0x5c980780, +0xfc0007e0, +0x001f8000, +0x0007000f, +0xe3000000, diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp new file mode 100644 index 0000000..61374a6 --- /dev/null +++ b/src/shader/exasanv110.fp @@ -0,0 +1,47 @@ +#ifndef ENVYAS +static uint32_t +NV110FP_CACompositeSrcAlpha[] = { + 0x00001462, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80000000, + 0x00000a0a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000000f, + 0x00000000, +#include "exasanv110.fpc" +}; +#else + +sched (st 0x0) (st 0x0) (st 0x0) +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 +mufu rcp $r0 $r0 +ipa $r3 a[0x84] $r0 0x0 0x1 +sched (st 0x0) (st 0x0) (st 0x0) +ipa $r2 a[0x80] $r0 0x0 0x1 +tex nodep $r4 $r2 0x0 0x0 t2d 0x8 +ipa $r1 a[0x94] $r0 0x0 0x1 +sched (st 0x0) (st 0x0) (st 0x0) +ipa $r0 a[0x90] $r0 0x0 0x1 +tex nodep $r0 $r0 0x0 0x1 t2d 0xf +depbar le 0x5 0x0 0x0 +sched (st 0x0) (st 0x0) (st 0x0) +fmul ftz $r3 $r3 $r4 +fmul ftz $r2 $r2 $r4 +fmul ftz $r1 $r1 $r4 +sched (st 0x0) (st 0x0) (st 0x0) +fmul ftz $r0 $r0 $r4 +exit +#endif diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc new file mode 100644 index 0000000..5516a03 --- /dev/null +++ b/src/shader/exasanv110.fpc @@ -0,0 +1,38 @@ +0xfc0007e0, +0x001f8000, +0xcff7ff00, +0xe003ff87, +0x00470000, +0x50800000, +0x4007ff03, +0xe043ff88, +0xfc0007e0, +0x001f8000, +0x0007ff02, +0xe043ff88, +0x2ff70204, +0xc03a0004, +0x4007ff01, +0xe043ff89, +0xfc0007e0, +0x001f8000, +0x0007ff00, +0xe043ff89, +0xaff70000, +0xc03a0017, +0x34070000, +0xf0f00000, +0xfc0007e0, +0x001f8000, +0x00470303, +0x5c681000, +0x00470202, +0x5c681000, +0x00470101, +0x5c681000, +0xfc0007e0, +0x001f8000, +0x00470000, +0x5c681000, +0x0007000f, +0xe3000000, diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp new file mode 100644 index 0000000..90bbb55 --- /dev/null +++ b/src/shader/exascnv110.fp @@ -0,0 +1,38 @@ +#ifndef ENVYAS +static uint32_t +NV110FP_Source[] = { + 0x00001462, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80000000, + 0x0000000a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000000f, + 0x00000000, +#include "exascnv110.fpc" +}; +#else + +sched (st 0x0) (st 0x0) (st 0x0) +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 +mufu rcp $r0 $r0 +ipa $r1 a[0x84] $r0 0x0 0x1 +sched (st 0x0) (st 0x0) (st 0x0) +ipa $r0 a[0x80] $r0 0x0 0x1 +tex nodep $r0 $r0 0x0 0x0 t2d 0xf +depbar le 0x5 0x0 0x0 +sched (st 0x0) (st 0x0) (st 0x0) +exit +#endif diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc new file mode 100644 index 0000000..2dba15d --- /dev/null +++ b/src/shader/exascnv110.fpc @@ -0,0 +1,20 @@ +0xfc0007e0, +0x001f8000, +0xcff7ff00, +0xe003ff87, +0x00470000, +0x50800000, +0x4007ff01, +0xe043ff88, +0xfc0007e0, +0x001f8000, +0x0007ff00, +0xe043ff88, +0xaff70000, +0xc03a0007, +0x34070000, +0xf0f00000, +0xfc0007e0, +0x001f8000, +0x0007000f, +0xe3000000, diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp new file mode 100644 index 0000000..2728311 --- /dev/null +++ b/src/shader/videonv110.fp @@ -0,0 +1,54 @@ +#ifndef ENVYAS +static uint32_t +NV110FP_NV12[] = { + 0x00001462, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x80000000, + 0x0000000a, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x0000000f, + 0x00000000, +#include "videonv110.fpc" +}; +#else + +sched (st 0x0) (st 0x0) (st 0x0) +ipa pass $r2 a[0x7c] 0x0 0x0 0x1 +mufu rcp $r2 $r2 +ipa $r0 a[0x80] $r2 0x0 0x1 +sched (st 0x0) (st 0x0) (st 0x0) +ipa $r1 a[0x84] $r2 0x0 0x1 +tex nodep $r4 $r0 0x0 0x0 t2d 0x8 +tex nodep $r0 $r0 0x0 0x1 t2d 0xc +sched (st 0x0) (st 0x0) (st 0x0) +depbar le 0x5 0x1 0x1 +fmul ftz $r5 $r4 c0[0x0] +fadd ftz $r3 $r5 c0[0x4] +sched (st 0x0) (st 0x0) (st 0x0) +fadd ftz $r4 $r5 c0[0x8] +fadd ftz $r5 $r5 c0[0xc] +depbar le 0x5 0x0 0x0 +sched (st 0x0) (st 0x0) (st 0x0) +ffma ftz $r3 $r0 c0[0x10] $r3 +ffma ftz $r4 $r0 c0[0x14] $r4 +ffma ftz $r5 $r0 c0[0x18] $r5 +sched (st 0x0) (st 0x0) (st 0x0) +ffma ftz $r0 $r1 c0[0x1c] $r3 +ffma ftz $r2 $r1 c0[0x24] $r5 +ffma ftz $r1 $r1 c0[0x20] $r4 +sched (st 0x0) (st 0x0) (st 0x0) +exit +#endif diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc new file mode 100644 index 0000000..31d745a --- /dev/null +++ b/src/shader/videonv110.fpc @@ -0,0 +1,52 @@ +0xfc0007e0, +0x001f8000, +0xcff7ff02, +0xe003ff87, +0x00470202, +0x50800000, +0x0027ff00, +0xe043ff88, +0xfc0007e0, +0x001f8000, +0x4027ff01, +0xe043ff88, +0x2ff70004, +0xc03a0004, +0x2ff70000, +0xc03a0016, +0xfc0007e0, +0x001f8000, +0x34170001, +0xf0f00000, +0x00070405, +0x4c681000, +0x00170503, +0x4c581000, +0xfc0007e0, +0x001f8000, +0x00270504, +0x4c581000, +0x00370505, +0x4c581000, +0x34070000, +0xf0f00000, +0xfc0007e0, +0x001f8000, +0x00470003, +0x49a00180, +0x00570004, +0x49a00200, +0x00670005, +0x49a00280, +0xfc0007e0, +0x001f8000, +0x00770100, +0x49a00180, +0x00970102, +0x49a00280, +0x00870101, +0x49a00200, +0xfc0007e0, +0x001f8000, +0x0007000f, +0xe3000000, diff --git a/src/shader/xfrm2nv110.vp b/src/shader/xfrm2nv110.vp new file mode 100644 index 0000000..bbfc527 --- /dev/null +++ b/src/shader/xfrm2nv110.vp @@ -0,0 +1,82 @@ +#ifndef ENVYAS +static uint32_t +NV110VP_Transform2[] = { + 0x02000461, + 0x00000000, + 0x00000000, + 0x00000000, + 0x000ff000, + 0x00000000, /* VP_ATTR_EN[0x000] */ + 0x00000000, /* VP_ATTR_EN[0x080] */ + 0x00000000, /* VP_ATTR_EN[0x100] */ + 0x00000000, + 0x00000000, /* VP_ATTR_EN[0x200] */ + 0x80000000, /* VERTEXID */ + 0x00000000, /* VP_ATTR_EN[0x300] */ + 0x00000000, + 0x0033f000, /* VP_EXPORT_EN[0x040] */ + 0x00000000, /* VP_EXPORT_EN[0x0c0] */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, /* VP_EXPORT_EN[0x2c0] */ + 0x00000000, +#include "xfrm2nv110.vpc" +}; +#else + +sched (st 0x0) (st 0x0) (st 0x0) +ld b32 $r5 a[0x2fc] 0x0 +shl $r5 $r5 0x5 +ld b64 $r0 c0[$r5+0x80] +sched (st 0x0) (st 0x0) (st 0x0) +ld b64 $r2 c0[$r5+0x88] +st b128 a[0x70] $r0 0x0 + +ld b64 $r0 c0[$r5+0x90] +sched (st 0x0) (st 0x0) (st 0x0) +fmul ftz $r2 $r0 c0[0x0] +fmul ftz $r3 $r0 c0[0xc] +fmul ftz $r4 $r0 c0[0x18] +sched (st 0x0) (st 0x0) (st 0x0) +ffma ftz $r2 $r1 c0[0x4] $r2 +ffma ftz $r3 $r1 c0[0x10] $r3 +ffma ftz $r4 $r1 c0[0x1c] $r4 +sched (st 0x0) (st 0x0) (st 0x0) +fadd ftz $r2 $r2 c0[0x8] +fadd ftz $r3 $r3 c0[0x14] +fadd ftz $r4 $r4 c0[0x20] +sched (st 0x0) (st 0x0) (st 0x0) +mufu rcp $r4 $r4 +fmul ftz $r2 $r2 $r4 +fmul ftz $r3 $r3 $r4 +sched (st 0x0) (st 0x0) (st 0x0) +fmul ftz $r0 $r2 c0[0x24] +fmul ftz $r1 $r3 c0[0x28] +st b64 a[0x80] $r0 0x0 + +sched (st 0x0) (st 0x0) (st 0x0) +ld b64 $r0 c0[$r5+0x98] +fmul ftz $r2 $r0 c0[0x2c] +fmul ftz $r3 $r0 c0[0x38] +sched (st 0x0) (st 0x0) (st 0x0) +fmul ftz $r4 $r0 c0[0x44] +ffma ftz $r2 $r1 c0[0x30] $r2 +ffma ftz $r3 $r1 c0[0x3c] $r3 +sched (st 0x0) (st 0x0) (st 0x0) +ffma ftz $r4 $r1 c0[0x48] $r4 +fadd ftz $r2 $r2 c0[0x34] +fadd ftz $r3 $r3 c0[0x40] +sched (st 0x0) (st 0x0) (st 0x0) +fadd ftz $r4 $r4 c0[0x4c] +mufu rcp $r4 $r4 +fmul ftz $r2 $r2 $r4 +sched (st 0x0) (st 0x0) (st 0x0) +fmul ftz $r3 $r3 $r4 +fmul ftz $r0 $r2 c0[0x50] +fmul ftz $r1 $r3 c0[0x54] +sched (st 0x0) (st 0x0) (st 0x0) +st b64 a[0x90] $r0 0x0 + +exit +#endif diff --git a/src/shader/xfrm2nv110.vpc b/src/shader/xfrm2nv110.vpc new file mode 100644 index 0000000..0d9ebfd --- /dev/null +++ b/src/shader/xfrm2nv110.vpc @@ -0,0 +1,102 @@ +0xfc0007e0, +0x001f8000, +0x2fc7ff05, +0xefd87f80, +0x00570505, +0x38480000, +0x08070500, +0xef950000, +0xfc0007e0, +0x001f8000, +0x08870502, +0xef950000, +0x0707ff00, +0xeff1ff80, +0x09070500, +0xef950000, +0xfc0007e0, +0x001f8000, +0x00070002, +0x4c681000, +0x00370003, +0x4c681000, +0x00670004, +0x4c681000, +0xfc0007e0, +0x001f8000, +0x00170102, +0x49a00100, +0x00470103, +0x49a00180, +0x00770104, +0x49a00200, +0xfc0007e0, +0x001f8000, +0x00270202, +0x4c581000, +0x00570303, +0x4c581000, +0x00870404, +0x4c581000, +0xfc0007e0, +0x001f8000, +0x00470404, +0x50800000, +0x00470202, +0x5c681000, +0x00470303, +0x5c681000, +0xfc0007e0, +0x001f8000, +0x00970200, +0x4c681000, +0x00a70301, +0x4c681000, +0x0807ff00, +0xeff0ff80, +0xfc0007e0, +0x001f8000, +0x09870500, +0xef950000, +0x00b70002, +0x4c681000, +0x00e70003, +0x4c681000, +0xfc0007e0, +0x001f8000, +0x01170004, +0x4c681000, +0x00c70102, +0x49a00100, +0x00f70103, +0x49a00180, +0xfc0007e0, +0x001f8000, +0x01270104, +0x49a00200, +0x00d70202, +0x4c581000, +0x01070303, +0x4c581000, +0xfc0007e0, +0x001f8000, +0x01370404, +0x4c581000, +0x00470404, +0x50800000, +0x00470202, +0x5c681000, +0xfc0007e0, +0x001f8000, +0x00470303, +0x5c681000, +0x01470200, +0x4c681000, +0x01570301, +0x4c681000, +0xfc0007e0, +0x001f8000, +0x0907ff00, +0xeff0ff80, +0x0007000f, +0xe3000000, -- 2.7.3
Samuel Pitoiset
2016-Oct-17 09:28 UTC
[Nouveau] [PATCH] exa: add GM10x acceleration support
Looks reasonable, some minor comments below. On 10/16/2016 02:06 AM, Ilia Mirkin wrote:> rendercheck -f a8r8g8b8 passes as much as on a GK208, and xv appears to > work. Very lightly tested. > > Instead of sticking coordinates into pushbufs, the vertex shader is > modified to read them from a constbuf, indexed by vertex id. This > approach could be used for all nvc0 generations, but I didn't want to > rock the boat. > > Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> > --- > > Note: this won't work for GM20x - we need to allow TIC format to be updated > for that to work. But this is a step in that direction. > > src/Makefile.am | 16 ++++++++ > src/nouveau_copy.c | 1 + > src/nouveau_exa.c | 2 +- > src/nouveau_xv.c | 2 +- > src/nv_accel_common.c | 1 + > src/nv_driver.c | 1 + > src/nvc0_accel.c | 37 ++++++++++++++--- > src/nvc0_exa.c | 48 ++++++++++++++++++++-- > src/nvc0_xv.c | 48 ++++++++++++++++++++-- > src/shader/Makefile | 23 ++++++++--- > src/shader/exac8nv110.fp | 47 +++++++++++++++++++++ > src/shader/exac8nv110.fpc | 38 +++++++++++++++++ > src/shader/exacanv110.fp | 47 +++++++++++++++++++++ > src/shader/exacanv110.fpc | 38 +++++++++++++++++ > src/shader/exacmnv110.fp | 47 +++++++++++++++++++++ > src/shader/exacmnv110.fpc | 38 +++++++++++++++++ > src/shader/exas8nv110.fp | 42 +++++++++++++++++++ > src/shader/exas8nv110.fpc | 28 +++++++++++++ > src/shader/exasanv110.fp | 47 +++++++++++++++++++++ > src/shader/exasanv110.fpc | 38 +++++++++++++++++ > src/shader/exascnv110.fp | 38 +++++++++++++++++ > src/shader/exascnv110.fpc | 20 +++++++++ > src/shader/videonv110.fp | 54 ++++++++++++++++++++++++ > src/shader/videonv110.fpc | 52 +++++++++++++++++++++++ > src/shader/xfrm2nv110.vp | 82 +++++++++++++++++++++++++++++++++++++ > src/shader/xfrm2nv110.vpc | 102 ++++++++++++++++++++++++++++++++++++++++++++++ > 26 files changed, 918 insertions(+), 19 deletions(-) > create mode 100644 src/shader/exac8nv110.fp > create mode 100644 src/shader/exac8nv110.fpc > create mode 100644 src/shader/exacanv110.fp > create mode 100644 src/shader/exacanv110.fpc > create mode 100644 src/shader/exacmnv110.fp > create mode 100644 src/shader/exacmnv110.fpc > create mode 100644 src/shader/exas8nv110.fp > create mode 100644 src/shader/exas8nv110.fpc > create mode 100644 src/shader/exasanv110.fp > create mode 100644 src/shader/exasanv110.fpc > create mode 100644 src/shader/exascnv110.fp > create mode 100644 src/shader/exascnv110.fpc > create mode 100644 src/shader/videonv110.fp > create mode 100644 src/shader/videonv110.fpc > create mode 100644 src/shader/xfrm2nv110.vp > create mode 100644 src/shader/xfrm2nv110.vpc > > diff --git a/src/Makefile.am b/src/Makefile.am > index 1e04ddf..6ba8d87 100644 > --- a/src/Makefile.am > +++ b/src/Makefile.am > @@ -77,48 +77,64 @@ EXTRA_DIST = hwdefs/nv_3ddefs.xml.h \ > shader/exac8nve0.fpc \ > shader/exac8nvf0.fp \ > shader/exac8nvf0.fpc \ > + shader/exac8nv110.fp \ > + shader/exac8nv110.fpc \ > shader/exacanvc0.fp \ > shader/exacanvc0.fpc \ > shader/exacanve0.fp \ > shader/exacanve0.fpc \ > shader/exacanvf0.fp \ > shader/exacanvf0.fpc \ > + shader/exacanv110.fp \ > + shader/exacanv110.fpc \ > shader/exacmnvc0.fp \ > shader/exacmnvc0.fpc \ > shader/exacmnve0.fp \ > shader/exacmnve0.fpc \ > shader/exacmnvf0.fp \ > shader/exacmnvf0.fpc \ > + shader/exacmnv110.fp \ > + shader/exacmnv110.fpc \ > shader/exas8nvc0.fp \ > shader/exas8nvc0.fpc \ > shader/exas8nve0.fp \ > shader/exas8nve0.fpc \ > shader/exas8nvf0.fp \ > shader/exas8nvf0.fpc \ > + shader/exas8nv110.fp \ > + shader/exas8nv110.fpc \ > shader/exasanvc0.fp \ > shader/exasanvc0.fpc \ > shader/exasanve0.fp \ > shader/exasanve0.fpc \ > shader/exasanvf0.fp \ > shader/exasanvf0.fpc \ > + shader/exasanv110.fp \ > + shader/exasanv110.fpc \ > shader/exascnvc0.fp \ > shader/exascnvc0.fpc \ > shader/exascnve0.fp \ > shader/exascnve0.fpc \ > shader/exascnvf0.fp \ > shader/exascnvf0.fpc \ > + shader/exascnv110.fp \ > + shader/exascnv110.fpc \ > shader/videonvc0.fp \ > shader/videonvc0.fpc \ > shader/videonve0.fp \ > shader/videonve0.fpc \ > shader/videonvf0.fp \ > shader/videonvf0.fpc \ > + shader/videonv110.fp \ > + shader/videonv110.fpc \ > shader/xfrm2nvc0.vp \ > shader/xfrm2nvc0.vpc \ > shader/xfrm2nve0.vp \ > shader/xfrm2nve0.vpc \ > shader/xfrm2nvf0.vp \ > shader/xfrm2nvf0.vpc \ > + shader/xfrm2nv110.vp \ > + shader/xfrm2nv110.vpc \ > shader/Makefile \ > nouveau_local.h \ > nouveau_copy.h \ > diff --git a/src/nouveau_copy.c b/src/nouveau_copy.c > index e152a53..c139de6 100644 > --- a/src/nouveau_copy.c > +++ b/src/nouveau_copy.c > @@ -81,6 +81,7 @@ nouveau_copy_init(ScreenPtr pScreen) > &pNv->ce_channel); > break; > case NV_KEPLER: > + case NV_MAXWELL: > ret = nouveau_object_new(&pNv->dev->object, 0, > NOUVEAU_FIFO_CHANNEL_CLASS, > &(struct nve0_fifo) { > diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c > index def66ac..0f02b99 100644 > --- a/src/nouveau_exa.c > +++ b/src/nouveau_exa.c > @@ -514,12 +514,12 @@ nouveau_exa_init(ScreenPtr pScreen) > break; > case NV_FERMI: > case NV_KEPLER: > + case NV_MAXWELL: > exa->CheckComposite = NVC0EXACheckComposite; > exa->PrepareComposite = NVC0EXAPrepareComposite; > exa->Composite = NVC0EXAComposite; > exa->DoneComposite = NVC0EXADoneComposite; > break; > - case NV_MAXWELL: > default: > break; > } > diff --git a/src/nouveau_xv.c b/src/nouveau_xv.c > index 9d5bad2..2ba1992 100644 > --- a/src/nouveau_xv.c > +++ b/src/nouveau_xv.c > @@ -2097,7 +2097,7 @@ NVSetupTexturedVideo (ScreenPtr pScreen, XF86VideoAdaptorPtr *textureAdaptor) > textureAdaptor[0] = NV40SetupTexturedVideo(pScreen, FALSE); > textureAdaptor[1] = NV40SetupTexturedVideo(pScreen, TRUE); > } else > - if (pNv->Architecture >= NV_TESLA && pNv->Architecture < NV_MAXWELL) { > + if (pNv->Architecture >= NV_TESLA) { > textureAdaptor[0] = NV50SetupTexturedVideo(pScreen); > } > } > diff --git a/src/nv_accel_common.c b/src/nv_accel_common.c > index 9361ce8..5d12dd8 100644 > --- a/src/nv_accel_common.c > +++ b/src/nv_accel_common.c > @@ -722,6 +722,7 @@ NVAccelCommonInit(ScrnInfoPtr pScrn) > switch (pNv->Architecture) { > case NV_FERMI: > case NV_KEPLER: > + case NV_MAXWELL: > INIT_CONTEXT_OBJECT(3D_NVC0); > break; > case NV_TESLA: > diff --git a/src/nv_driver.c b/src/nv_driver.c > index 4dde8e0..fff83f8 100644 > --- a/src/nv_driver.c > +++ b/src/nv_driver.c > @@ -389,6 +389,7 @@ NVHasKMS(struct pci_device *pci_dev, struct xf86_platform_device *platform_dev) > case 0xe0: > case 0xf0: > case 0x100: > + case 0x110: > break; > default: > xf86DrvMsg(-1, X_ERROR, "Unknown chipset: NV%02X\n", chipset); > diff --git a/src/nvc0_accel.c b/src/nvc0_accel.c > index d2a3b93..52a17db 100644 > --- a/src/nvc0_accel.c > +++ b/src/nvc0_accel.c > @@ -53,6 +53,16 @@ > #include "shader/exas8nvf0.fp" > #include "shader/exac8nvf0.fp" > > +#include "shader/xfrm2nv110.vp" > +#include "shader/videonv110.fp" > + > +#include "shader/exascnv110.fp" > +#include "shader/exacmnv110.fp" > +#include "shader/exacanv110.fp" > +#include "shader/exasanv110.fp" > +#include "shader/exas8nv110.fp" > +#include "shader/exac8nv110.fp" > + > #define NVC0PushProgram(pNv,addr,code) do { \ > const unsigned size = sizeof(code) / sizeof(code[0]); \ > PUSH_DATAu((pNv)->pushbuf, (pNv)->scratch, (addr), size); \ > @@ -223,9 +233,12 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) > } else if (pNv->dev->chipset < 0xf0) { > class = 0xa097; > handle = 0x0000906e; > - } else { > + } else if (pNv->dev->chipset < 0x110) { > class = 0xa197; > handle = 0x0000906e; > + } else { > + class = 0xb097; > + handle = 0x0000906e; > } > > ret = nouveau_object_new(pNv->channel, class, class, > @@ -304,10 +317,12 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) > PUSH_DATA (push, 1); > } > > - BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3); > - PUSH_DATA (push, (bo->offset + MISC_OFFSET) >> 32); > - PUSH_DATA (push, (bo->offset + MISC_OFFSET)); > - PUSH_DATA (push, 1); > + if (pNv->Architecture < NV_MAXWELL) { > + BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3); > + PUSH_DATA (push, (bo->offset + MISC_OFFSET) >> 32); > + PUSH_DATA (push, (bo->offset + MISC_OFFSET)); > + PUSH_DATA (push, 1); > + } > > BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2); > PUSH_DATA (push, (bo->offset + CODE_OFFSET) >> 32); > @@ -334,7 +349,8 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) > NVC0PushProgram(pNv, PFP_S_A8, NVE0FP_Source_A8); > NVC0PushProgram(pNv, PFP_C_A8, NVE0FP_Composite_A8); > NVC0PushProgram(pNv, PFP_NV12, NVE0FP_NV12); > - } else { > + } else > + if (pNv->dev->chipset < 0x110) { > NVC0PushProgram(pNv, PVP_PASS, NVF0VP_Transform2); > NVC0PushProgram(pNv, PFP_S, NVF0FP_Source); > NVC0PushProgram(pNv, PFP_C, NVF0FP_Composite); > @@ -343,6 +359,15 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) > NVC0PushProgram(pNv, PFP_S_A8, NVF0FP_Source_A8); > NVC0PushProgram(pNv, PFP_C_A8, NVF0FP_Composite_A8); > NVC0PushProgram(pNv, PFP_NV12, NVF0FP_NV12); > + } else { > + NVC0PushProgram(pNv, PVP_PASS, NV110VP_Transform2); > + NVC0PushProgram(pNv, PFP_S, NV110FP_Source); > + NVC0PushProgram(pNv, PFP_C, NV110FP_Composite); > + NVC0PushProgram(pNv, PFP_CCA, NV110FP_CAComposite); > + NVC0PushProgram(pNv, PFP_CCASA, NV110FP_CACompositeSrcAlpha); > + NVC0PushProgram(pNv, PFP_S_A8, NV110FP_Source_A8); > + NVC0PushProgram(pNv, PFP_C_A8, NV110FP_Composite_A8); > + NVC0PushProgram(pNv, PFP_NV12, NV110FP_NV12); > } > > BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 4); > diff --git a/src/nvc0_exa.c b/src/nvc0_exa.c > index 6add60b..a53dfe6 100644 > --- a/src/nvc0_exa.c > +++ b/src/nvc0_exa.c > @@ -914,14 +914,56 @@ NVC0EXAComposite(PixmapPtr pdpix, > if (!PUSH_SPACE(push, 64)) > return; > > + if (pNv->dev->chipset >= 0x110) { > + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); > + PUSH_DATA (push, 256); > + PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA) >> 32);No PUSH_DATAh in the DDX?> + PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA)); > + BEGIN_1IC0(push, NVC0_3D(CB_POS), 3 * (4 + 2 + 2) + 1); > + PUSH_DATA (push, 0x80); > + > + PUSH_DATAf(push, dx); > + PUSH_DATAf(push, dy + (h * 2)); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 1); > + PUSH_DATAf(push, sx); > + PUSH_DATAf(push, sy + (h * 2)); > + PUSH_DATAf(push, mx); > + PUSH_DATAf(push, my + (h * 2)); > + > + PUSH_DATAf(push, dx); > + PUSH_DATAf(push, dy); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 1); > + PUSH_DATAf(push, sx); > + PUSH_DATAf(push, sy); > + PUSH_DATAf(push, mx); > + PUSH_DATAf(push, my); > + > + PUSH_DATAf(push, dx + (w * 2)); > + PUSH_DATAf(push, dy); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 1); > + PUSH_DATAf(push, sx + (w * 2)); > + PUSH_DATAf(push, sy); > + PUSH_DATAf(push, mx + (w * 2)); > + PUSH_DATAf(push, my); > + } > + > BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2); > PUSH_DATA (push, ((dx + w) << 16) | dx); > PUSH_DATA (push, ((dy + h) << 16) | dy); > BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1); > PUSH_DATA (push, NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); > - PUSH_VTX2s(push, sx, sy + (h * 2), mx, my + (h * 2), dx, dy + (h * 2)); > - PUSH_VTX2s(push, sx, sy, mx, my, dx, dy); > - PUSH_VTX2s(push, sx + (w * 2), sy, mx + (w * 2), my, dx + (w * 2), dy); > + if (pNv->dev->chipset < 0x110) { > + PUSH_VTX2s(push, sx, sy + (h * 2), mx, my + (h * 2), dx, dy + (h * 2)); > + PUSH_VTX2s(push, sx, sy, mx, my, dx, dy); > + PUSH_VTX2s(push, sx + (w * 2), sy, mx + (w * 2), my, dx + (w * 2), dy); > + } else { > + BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); > + PUSH_DATA (push, 0); > + PUSH_DATA (push, 3); > + } > BEGIN_NVC0(push, NVC0_3D(VERTEX_END_GL), 1); > PUSH_DATA (push, 0); > } > diff --git a/src/nvc0_xv.c b/src/nvc0_xv.c > index d1d8f18..129c505 100644 > --- a/src/nvc0_xv.c > +++ b/src/nvc0_xv.c > @@ -247,15 +247,57 @@ nvc0_xv_image_put(ScrnInfoPtr pScrn, > nouveau_pushbuf_refn (push, refs, 3)) > return BadImplementation; > > + if (pNv->dev->chipset >= 0x110) { > + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); > + PUSH_DATA (push, 256); > + PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA) >> 32); > + PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA)); > + BEGIN_1IC0(push, NVC0_3D(CB_POS), 3 * (4 + 2 + 2) + 1); > + PUSH_DATA (push, 0x80); > + > + PUSH_DATAf(push, sx1); > + PUSH_DATAf(push, sy1); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 1); > + PUSH_DATAf(push, tx1); > + PUSH_DATAf(push, ty1); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 0); > + > + PUSH_DATAf(push, sx2+(sx2-sx1)); > + PUSH_DATAf(push, sy1); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 1); > + PUSH_DATAf(push, tx2+(tx2-tx1)); > + PUSH_DATAf(push, ty1); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 0); > + > + PUSH_DATAf(push, sx1); > + PUSH_DATAf(push, sy2+(sy2-sy1)); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 1); > + PUSH_DATAf(push, tx1); > + PUSH_DATAf(push, ty2+(ty2-ty1)); > + PUSH_DATAf(push, 0); > + PUSH_DATAf(push, 0); > + } > + > BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2); > PUSH_DATA (push, sx2 << NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT | sx1); > PUSH_DATA (push, sy2 << NVC0_3D_SCISSOR_VERT_MAX__SHIFT | sy1 ); > > BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1); > PUSH_DATA (push, NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); > - PUSH_VTX1s(push, tx1, ty1, sx1, sy1); > - PUSH_VTX1s(push, tx2+(tx2-tx1), ty1, sx2+(sx2-sx1), sy1); > - PUSH_VTX1s(push, tx1, ty2+(ty2-ty1), sx1, sy2+(sy2-sy1)); > + if (pNv->dev->chipset < 0x110) { > + PUSH_VTX1s(push, tx1, ty1, sx1, sy1); > + PUSH_VTX1s(push, tx2+(tx2-tx1), ty1, sx2+(sx2-sx1), sy1); > + PUSH_VTX1s(push, tx1, ty2+(ty2-ty1), sx1, sy2+(sy2-sy1)); > + } else { > + BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); > + PUSH_DATA (push, 0); > + PUSH_DATA (push, 3); > + } > BEGIN_NVC0(push, NVC0_3D(VERTEX_END_GL), 1); > PUSH_DATA (push, 0); > > diff --git a/src/shader/Makefile b/src/shader/Makefile > index 2d789be..12bf455 100644 > --- a/src/shader/Makefile > +++ b/src/shader/Makefile > @@ -22,23 +22,36 @@ NVF0_SHADERS = xfrm2nvf0.vpc \ > exas8nvf0.fpc \ > exac8nvf0.fpc \ > videonvf0.fpc > +NV110_SHADERS = xfrm2nv110.vpc \ > + exascnv110.fpc \ > + exacmnv110.fpc \ > + exacanv110.fpc \ > + exasanv110.fpc \ > + exas8nv110.fpc \ > + exac8nv110.fpc \ > + videonv110.fpc > > -SHADERS = $(NVC0_SHADERS) $(NVE0_SHADERS) $(NVF0_SHADERS) > +SHADERS = $(NVC0_SHADERS) $(NVE0_SHADERS) $(NVF0_SHADERS) $(NV110_SHADERS) > ENVYAS ?= envyas > > all: $(SHADERS) > > $(filter %nvc0.vpc,$(SHADERS)): %.vpc: %.vp > - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@ > + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100 -o $@ > $(filter %nvc0.fpc,$(SHADERS)): %.fpc: %.fp > - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@ > + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100 -o $@ > > $(filter %nve0.vpc,$(SHADERS)): %.vpc: %.vp > - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o $@ > + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104 -o $@ > $(filter %nve0.fpc,$(SHADERS)): %.fpc: %.fp > - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o $@ > + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104 -o $@This is unrelated to your main change, but well should be *exactly* the same thing. :)> > $(filter %nvf0.vpc,$(SHADERS)): %.vpc: %.vp > cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@ > $(filter %nvf0.fpc,$(SHADERS)): %.fpc: %.fp > cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@ > + > +$(filter %nv110.vpc,$(SHADERS)): %.vpc: %.vp > + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@ > +$(filter %nv110.fpc,$(SHADERS)): %.fpc: %.fp > + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@ > diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp > new file mode 100644 > index 0000000..ce78036 > --- /dev/null > +++ b/src/shader/exac8nv110.fp > @@ -0,0 +1,47 @@ > +#ifndef ENVYAS > +static uint32_t > +NV110FP_Composite_A8[] = { > + 0x00001462, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x80000000, > + 0x00000a0a, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x0000000f, > + 0x00000000, > +#include "exac8nv110.fpc" > +}; > +#else > + > +sched (st 0x0) (st 0x0) (st 0x0)Those sched codes are definitely bad, but let's keep them as it for now. I might have a look at some point to improve the thing.> +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > +mufu rcp $r0 $r0 > +ipa $r3 a[0x94] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r2 a[0x90] $r0 0x0 0x1 > +tex nodep $r1 $r2 0x0 0x1 t2d 0x8 > +ipa $r3 a[0x84] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r2 a[0x80] $r0 0x0 0x1 > +tex nodep $r0 $r2 0x0 0x0 t2d 0x8 > +depbar le 0x5 0x0 0x0 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r3 $r0 $r1 > +mov $r2 $r3 0xf > +mov $r1 $r3 0xf > +sched (st 0x0) (st 0x0) (st 0x0) > +mov $r0 $r3 0xf > +exit > +#endif > diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc > new file mode 100644 > index 0000000..4aa1368 > --- /dev/null > +++ b/src/shader/exac8nv110.fpc > @@ -0,0 +1,38 @@ > +0xfc0007e0, > +0x001f8000, > +0xcff7ff00, > +0xe003ff87, > +0x00470000, > +0x50800000, > +0x4007ff03, > +0xe043ff89, > +0xfc0007e0, > +0x001f8000, > +0x0007ff02, > +0xe043ff89, > +0x2ff70201, > +0xc03a0014, > +0x4007ff03, > +0xe043ff88, > +0xfc0007e0, > +0x001f8000, > +0x0007ff02, > +0xe043ff88, > +0x2ff70200, > +0xc03a0004, > +0x34070000, > +0xf0f00000, > +0xfc0007e0, > +0x001f8000, > +0x00170003, > +0x5c681000, > +0x00370002, > +0x5c980780, > +0x00370001, > +0x5c980780, > +0xfc0007e0, > +0x001f8000, > +0x00370000, > +0x5c980780, > +0x0007000f, > +0xe3000000, > diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp > new file mode 100644 > index 0000000..a70d5c5 > --- /dev/null > +++ b/src/shader/exacanv110.fp > @@ -0,0 +1,47 @@ > +#ifndef ENVYAS > +static uint32_t > +NV110FP_CAComposite[] = { > + 0x00001462, /* 0x0000c000 = USES_KIL, MULTI_COLORS */ > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x80000000, /* FRAG_COORD_UMASK = 0x8 */ > + 0x00000a0a, /* FP_INTERP[0x080], 0022 0022 */ > + 0x00000000, /* FP_INTERP[0x0c0], 0 = OFF */ > + 0x00000000, /* FP_INTERP[0x100], 1 = FLAT */ > + 0x00000000, /* FP_INTERP[0x140], 2 = PERSPECTIVE */ > + 0x00000000, /* FP_INTERP[0x180], 3 = LINEAR */ > + 0x00000000, /* FP_INTERP[0x1c0] */ > + 0x00000000, /* FP_INTERP[0x200] */ > + 0x00000000, /* FP_INTERP[0x240] */ > + 0x00000000, /* FP_INTERP[0x280] */ > + 0x00000000, /* FP_INTERP[0x2c0] */ > + 0x00000000, /* FP_INTERP[0x300] */ > + 0x00000000, > + 0x0000000f, /* FP_RESULT_MASK (0x8000 Face ?) */ > + 0x00000000, /* 0x2 = FragDepth, 0x1 = SampleMask */ > +#include "exacanv110.fpc" > +}; > +#else > + > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > +mufu rcp $r0 $r0 > +ipa $r3 a[0x94] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r2 a[0x90] $r0 0x0 0x1 > +tex nodep $r4 $r2 0x0 0x1 t2d 0xf > +ipa $r1 a[0x84] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r0 a[0x80] $r0 0x0 0x1 > +tex nodep $r0 $r0 0x0 0x0 t2d 0xf > +depbar le 0x5 0x0 0x0 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r3 $r3 $r7 > +fmul ftz $r2 $r2 $r6 > +fmul ftz $r1 $r1 $r5 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r0 $r0 $r4 > +exit > +#endif > diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc > new file mode 100644 > index 0000000..7c0ca5e > --- /dev/null > +++ b/src/shader/exacanv110.fpc > @@ -0,0 +1,38 @@ > +0xfc0007e0, > +0x001f8000, > +0xcff7ff00, > +0xe003ff87, > +0x00470000, > +0x50800000, > +0x4007ff03, > +0xe043ff89, > +0xfc0007e0, > +0x001f8000, > +0x0007ff02, > +0xe043ff89, > +0xaff70204, > +0xc03a0017, > +0x4007ff01, > +0xe043ff88, > +0xfc0007e0, > +0x001f8000, > +0x0007ff00, > +0xe043ff88, > +0xaff70000, > +0xc03a0007, > +0x34070000, > +0xf0f00000, > +0xfc0007e0, > +0x001f8000, > +0x00770303, > +0x5c681000, > +0x00670202, > +0x5c681000, > +0x00570101, > +0x5c681000, > +0xfc0007e0, > +0x001f8000, > +0x00470000, > +0x5c681000, > +0x0007000f, > +0xe3000000, > diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp > new file mode 100644 > index 0000000..fe5c294 > --- /dev/null > +++ b/src/shader/exacmnv110.fp > @@ -0,0 +1,47 @@ > +#ifndef ENVYAS > +static uint32_t > +NV110FP_Composite[] = { > + 0x00001462, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x80000000, > + 0x00000a0a, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x0000000f, > + 0x00000000, > +#include "exacmnv110.fpc" > +}; > +#else > + > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > +mufu rcp $r0 $r0 > +ipa $r3 a[0x94] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r2 a[0x90] $r0 0x0 0x1 > +tex nodep $r4 $r2 0x0 0x1 t2d 0x8 > +ipa $r1 a[0x84] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r0 a[0x80] $r0 0x0 0x1 > +tex nodep $r0 $r0 0x0 0x0 t2d 0xf > +depbar le 0x5 0x0 0x0 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r3 $r3 $r4 > +fmul ftz $r2 $r2 $r4 > +fmul ftz $r1 $r1 $r4 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r0 $r0 $r4 > +exit > +#endif > diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc > new file mode 100644 > index 0000000..9d62c1a > --- /dev/null > +++ b/src/shader/exacmnv110.fpc > @@ -0,0 +1,38 @@ > +0xfc0007e0, > +0x001f8000, > +0xcff7ff00, > +0xe003ff87, > +0x00470000, > +0x50800000, > +0x4007ff03, > +0xe043ff89, > +0xfc0007e0, > +0x001f8000, > +0x0007ff02, > +0xe043ff89, > +0x2ff70204, > +0xc03a0014, > +0x4007ff01, > +0xe043ff88, > +0xfc0007e0, > +0x001f8000, > +0x0007ff00, > +0xe043ff88, > +0xaff70000, > +0xc03a0007, > +0x34070000, > +0xf0f00000, > +0xfc0007e0, > +0x001f8000, > +0x00470303, > +0x5c681000, > +0x00470202, > +0x5c681000, > +0x00470101, > +0x5c681000, > +0xfc0007e0, > +0x001f8000, > +0x00470000, > +0x5c681000, > +0x0007000f, > +0xe3000000, > diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp > new file mode 100644 > index 0000000..4fe2e19 > --- /dev/null > +++ b/src/shader/exas8nv110.fp > @@ -0,0 +1,42 @@ > +#ifndef ENVYAS > +static uint32_t > +NV110FP_Source_A8[] = { > + 0x00001462, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x80000000, > + 0x0000000a, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x0000000f, > + 0x00000000, > +#include "exas8nv110.fpc" > +}; > +#else > + > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > +mufu rcp $r0 $r0 > +ipa $r1 a[0x84] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r0 a[0x80] $r0 0x0 0x1 > +tex nodep $r0 $r0 0x0 0x0 t2d 0x8 > +depbar le 0x5 0x0 0x0 > +sched (st 0x0) (st 0x0) (st 0x0) > +mov $r3 $r0 0xf > +mov $r2 $r0 0xf > +mov $r1 $r0 0xf > +sched (st 0x0) (st 0x0) (st 0x0) > +exit > +#endif > diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc > new file mode 100644 > index 0000000..1181c41 > --- /dev/null > +++ b/src/shader/exas8nv110.fpc > @@ -0,0 +1,28 @@ > +0xfc0007e0, > +0x001f8000, > +0xcff7ff00, > +0xe003ff87, > +0x00470000, > +0x50800000, > +0x4007ff01, > +0xe043ff88, > +0xfc0007e0, > +0x001f8000, > +0x0007ff00, > +0xe043ff88, > +0x2ff70000, > +0xc03a0004, > +0x34070000, > +0xf0f00000, > +0xfc0007e0, > +0x001f8000, > +0x00070003, > +0x5c980780, > +0x00070002, > +0x5c980780, > +0x00070001, > +0x5c980780, > +0xfc0007e0, > +0x001f8000, > +0x0007000f, > +0xe3000000, > diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp > new file mode 100644 > index 0000000..61374a6 > --- /dev/null > +++ b/src/shader/exasanv110.fp > @@ -0,0 +1,47 @@ > +#ifndef ENVYAS > +static uint32_t > +NV110FP_CACompositeSrcAlpha[] = { > + 0x00001462, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x80000000, > + 0x00000a0a, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x0000000f, > + 0x00000000, > +#include "exasanv110.fpc" > +}; > +#else > + > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > +mufu rcp $r0 $r0 > +ipa $r3 a[0x84] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r2 a[0x80] $r0 0x0 0x1 > +tex nodep $r4 $r2 0x0 0x0 t2d 0x8 > +ipa $r1 a[0x94] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r0 a[0x90] $r0 0x0 0x1 > +tex nodep $r0 $r0 0x0 0x1 t2d 0xf > +depbar le 0x5 0x0 0x0 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r3 $r3 $r4 > +fmul ftz $r2 $r2 $r4 > +fmul ftz $r1 $r1 $r4 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r0 $r0 $r4 > +exit > +#endif > diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc > new file mode 100644 > index 0000000..5516a03 > --- /dev/null > +++ b/src/shader/exasanv110.fpc > @@ -0,0 +1,38 @@ > +0xfc0007e0, > +0x001f8000, > +0xcff7ff00, > +0xe003ff87, > +0x00470000, > +0x50800000, > +0x4007ff03, > +0xe043ff88, > +0xfc0007e0, > +0x001f8000, > +0x0007ff02, > +0xe043ff88, > +0x2ff70204, > +0xc03a0004, > +0x4007ff01, > +0xe043ff89, > +0xfc0007e0, > +0x001f8000, > +0x0007ff00, > +0xe043ff89, > +0xaff70000, > +0xc03a0017, > +0x34070000, > +0xf0f00000, > +0xfc0007e0, > +0x001f8000, > +0x00470303, > +0x5c681000, > +0x00470202, > +0x5c681000, > +0x00470101, > +0x5c681000, > +0xfc0007e0, > +0x001f8000, > +0x00470000, > +0x5c681000, > +0x0007000f, > +0xe3000000, > diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp > new file mode 100644 > index 0000000..90bbb55 > --- /dev/null > +++ b/src/shader/exascnv110.fp > @@ -0,0 +1,38 @@ > +#ifndef ENVYAS > +static uint32_t > +NV110FP_Source[] = { > + 0x00001462, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x80000000, > + 0x0000000a, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x0000000f, > + 0x00000000, > +#include "exascnv110.fpc" > +}; > +#else > + > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > +mufu rcp $r0 $r0 > +ipa $r1 a[0x84] $r0 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r0 a[0x80] $r0 0x0 0x1 > +tex nodep $r0 $r0 0x0 0x0 t2d 0xf > +depbar le 0x5 0x0 0x0 > +sched (st 0x0) (st 0x0) (st 0x0) > +exit > +#endif > diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc > new file mode 100644 > index 0000000..2dba15d > --- /dev/null > +++ b/src/shader/exascnv110.fpc > @@ -0,0 +1,20 @@ > +0xfc0007e0, > +0x001f8000, > +0xcff7ff00, > +0xe003ff87, > +0x00470000, > +0x50800000, > +0x4007ff01, > +0xe043ff88, > +0xfc0007e0, > +0x001f8000, > +0x0007ff00, > +0xe043ff88, > +0xaff70000, > +0xc03a0007, > +0x34070000, > +0xf0f00000, > +0xfc0007e0, > +0x001f8000, > +0x0007000f, > +0xe3000000, > diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp > new file mode 100644 > index 0000000..2728311 > --- /dev/null > +++ b/src/shader/videonv110.fp > @@ -0,0 +1,54 @@ > +#ifndef ENVYAS > +static uint32_t > +NV110FP_NV12[] = { > + 0x00001462, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x80000000, > + 0x0000000a, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x0000000f, > + 0x00000000, > +#include "videonv110.fpc" > +}; > +#else > + > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa pass $r2 a[0x7c] 0x0 0x0 0x1 > +mufu rcp $r2 $r2 > +ipa $r0 a[0x80] $r2 0x0 0x1 > +sched (st 0x0) (st 0x0) (st 0x0) > +ipa $r1 a[0x84] $r2 0x0 0x1 > +tex nodep $r4 $r0 0x0 0x0 t2d 0x8 > +tex nodep $r0 $r0 0x0 0x1 t2d 0xc > +sched (st 0x0) (st 0x0) (st 0x0) > +depbar le 0x5 0x1 0x1 > +fmul ftz $r5 $r4 c0[0x0] > +fadd ftz $r3 $r5 c0[0x4] > +sched (st 0x0) (st 0x0) (st 0x0) > +fadd ftz $r4 $r5 c0[0x8] > +fadd ftz $r5 $r5 c0[0xc] > +depbar le 0x5 0x0 0x0 > +sched (st 0x0) (st 0x0) (st 0x0) > +ffma ftz $r3 $r0 c0[0x10] $r3 > +ffma ftz $r4 $r0 c0[0x14] $r4 > +ffma ftz $r5 $r0 c0[0x18] $r5 > +sched (st 0x0) (st 0x0) (st 0x0) > +ffma ftz $r0 $r1 c0[0x1c] $r3 > +ffma ftz $r2 $r1 c0[0x24] $r5 > +ffma ftz $r1 $r1 c0[0x20] $r4 > +sched (st 0x0) (st 0x0) (st 0x0) > +exit > +#endif > diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc > new file mode 100644 > index 0000000..31d745a > --- /dev/null > +++ b/src/shader/videonv110.fpc > @@ -0,0 +1,52 @@ > +0xfc0007e0, > +0x001f8000, > +0xcff7ff02, > +0xe003ff87, > +0x00470202, > +0x50800000, > +0x0027ff00, > +0xe043ff88, > +0xfc0007e0, > +0x001f8000, > +0x4027ff01, > +0xe043ff88, > +0x2ff70004, > +0xc03a0004, > +0x2ff70000, > +0xc03a0016, > +0xfc0007e0, > +0x001f8000, > +0x34170001, > +0xf0f00000, > +0x00070405, > +0x4c681000, > +0x00170503, > +0x4c581000, > +0xfc0007e0, > +0x001f8000, > +0x00270504, > +0x4c581000, > +0x00370505, > +0x4c581000, > +0x34070000, > +0xf0f00000, > +0xfc0007e0, > +0x001f8000, > +0x00470003, > +0x49a00180, > +0x00570004, > +0x49a00200, > +0x00670005, > +0x49a00280, > +0xfc0007e0, > +0x001f8000, > +0x00770100, > +0x49a00180, > +0x00970102, > +0x49a00280, > +0x00870101, > +0x49a00200, > +0xfc0007e0, > +0x001f8000, > +0x0007000f, > +0xe3000000, > diff --git a/src/shader/xfrm2nv110.vp b/src/shader/xfrm2nv110.vp > new file mode 100644 > index 0000000..bbfc527 > --- /dev/null > +++ b/src/shader/xfrm2nv110.vp > @@ -0,0 +1,82 @@ > +#ifndef ENVYAS > +static uint32_t > +NV110VP_Transform2[] = { > + 0x02000461, > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x000ff000, > + 0x00000000, /* VP_ATTR_EN[0x000] */ > + 0x00000000, /* VP_ATTR_EN[0x080] */ > + 0x00000000, /* VP_ATTR_EN[0x100] */ > + 0x00000000, > + 0x00000000, /* VP_ATTR_EN[0x200] */ > + 0x80000000, /* VERTEXID */ > + 0x00000000, /* VP_ATTR_EN[0x300] */ > + 0x00000000, > + 0x0033f000, /* VP_EXPORT_EN[0x040] */ > + 0x00000000, /* VP_EXPORT_EN[0x0c0] */ > + 0x00000000, > + 0x00000000, > + 0x00000000, > + 0x00000000, /* VP_EXPORT_EN[0x2c0] */ > + 0x00000000, > +#include "xfrm2nv110.vpc" > +}; > +#else > + > +sched (st 0x0) (st 0x0) (st 0x0) > +ld b32 $r5 a[0x2fc] 0x0 > +shl $r5 $r5 0x5 > +ld b64 $r0 c0[$r5+0x80] > +sched (st 0x0) (st 0x0) (st 0x0) > +ld b64 $r2 c0[$r5+0x88] > +st b128 a[0x70] $r0 0x0 > + > +ld b64 $r0 c0[$r5+0x90] > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r2 $r0 c0[0x0] > +fmul ftz $r3 $r0 c0[0xc] > +fmul ftz $r4 $r0 c0[0x18] > +sched (st 0x0) (st 0x0) (st 0x0) > +ffma ftz $r2 $r1 c0[0x4] $r2 > +ffma ftz $r3 $r1 c0[0x10] $r3 > +ffma ftz $r4 $r1 c0[0x1c] $r4 > +sched (st 0x0) (st 0x0) (st 0x0) > +fadd ftz $r2 $r2 c0[0x8] > +fadd ftz $r3 $r3 c0[0x14] > +fadd ftz $r4 $r4 c0[0x20] > +sched (st 0x0) (st 0x0) (st 0x0) > +mufu rcp $r4 $r4 > +fmul ftz $r2 $r2 $r4 > +fmul ftz $r3 $r3 $r4 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r0 $r2 c0[0x24] > +fmul ftz $r1 $r3 c0[0x28] > +st b64 a[0x80] $r0 0x0 > + > +sched (st 0x0) (st 0x0) (st 0x0) > +ld b64 $r0 c0[$r5+0x98] > +fmul ftz $r2 $r0 c0[0x2c] > +fmul ftz $r3 $r0 c0[0x38] > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r4 $r0 c0[0x44] > +ffma ftz $r2 $r1 c0[0x30] $r2 > +ffma ftz $r3 $r1 c0[0x3c] $r3 > +sched (st 0x0) (st 0x0) (st 0x0) > +ffma ftz $r4 $r1 c0[0x48] $r4 > +fadd ftz $r2 $r2 c0[0x34] > +fadd ftz $r3 $r3 c0[0x40] > +sched (st 0x0) (st 0x0) (st 0x0) > +fadd ftz $r4 $r4 c0[0x4c] > +mufu rcp $r4 $r4 > +fmul ftz $r2 $r2 $r4 > +sched (st 0x0) (st 0x0) (st 0x0) > +fmul ftz $r3 $r3 $r4 > +fmul ftz $r0 $r2 c0[0x50] > +fmul ftz $r1 $r3 c0[0x54] > +sched (st 0x0) (st 0x0) (st 0x0) > +st b64 a[0x90] $r0 0x0 > + > +exit > +#endif > diff --git a/src/shader/xfrm2nv110.vpc b/src/shader/xfrm2nv110.vpc > new file mode 100644 > index 0000000..0d9ebfd > --- /dev/null > +++ b/src/shader/xfrm2nv110.vpc > @@ -0,0 +1,102 @@ > +0xfc0007e0, > +0x001f8000, > +0x2fc7ff05, > +0xefd87f80, > +0x00570505, > +0x38480000, > +0x08070500, > +0xef950000, > +0xfc0007e0, > +0x001f8000, > +0x08870502, > +0xef950000, > +0x0707ff00, > +0xeff1ff80, > +0x09070500, > +0xef950000, > +0xfc0007e0, > +0x001f8000, > +0x00070002, > +0x4c681000, > +0x00370003, > +0x4c681000, > +0x00670004, > +0x4c681000, > +0xfc0007e0, > +0x001f8000, > +0x00170102, > +0x49a00100, > +0x00470103, > +0x49a00180, > +0x00770104, > +0x49a00200, > +0xfc0007e0, > +0x001f8000, > +0x00270202, > +0x4c581000, > +0x00570303, > +0x4c581000, > +0x00870404, > +0x4c581000, > +0xfc0007e0, > +0x001f8000, > +0x00470404, > +0x50800000, > +0x00470202, > +0x5c681000, > +0x00470303, > +0x5c681000, > +0xfc0007e0, > +0x001f8000, > +0x00970200, > +0x4c681000, > +0x00a70301, > +0x4c681000, > +0x0807ff00, > +0xeff0ff80, > +0xfc0007e0, > +0x001f8000, > +0x09870500, > +0xef950000, > +0x00b70002, > +0x4c681000, > +0x00e70003, > +0x4c681000, > +0xfc0007e0, > +0x001f8000, > +0x01170004, > +0x4c681000, > +0x00c70102, > +0x49a00100, > +0x00f70103, > +0x49a00180, > +0xfc0007e0, > +0x001f8000, > +0x01270104, > +0x49a00200, > +0x00d70202, > +0x4c581000, > +0x01070303, > +0x4c581000, > +0xfc0007e0, > +0x001f8000, > +0x01370404, > +0x4c581000, > +0x00470404, > +0x50800000, > +0x00470202, > +0x5c681000, > +0xfc0007e0, > +0x001f8000, > +0x00470303, > +0x5c681000, > +0x01470200, > +0x4c681000, > +0x01570301, > +0x4c681000, > +0xfc0007e0, > +0x001f8000, > +0x0907ff00, > +0xeff0ff80, > +0x0007000f, > +0xe3000000, >-- -Samuel
On Mon, Oct 17, 2016 at 5:28 AM, Samuel Pitoiset <samuel.pitoiset at gmail.com> wrote:> Looks reasonable, some minor comments below. > > > On 10/16/2016 02:06 AM, Ilia Mirkin wrote: >> diff --git a/src/nvc0_exa.c b/src/nvc0_exa.c >> index 6add60b..a53dfe6 100644 >> --- a/src/nvc0_exa.c >> +++ b/src/nvc0_exa.c >> @@ -914,14 +914,56 @@ NVC0EXAComposite(PixmapPtr pdpix, >> if (!PUSH_SPACE(push, 64)) >> return; >> >> + if (pNv->dev->chipset >= 0x110) { >> + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); >> + PUSH_DATA (push, 256); >> + PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA) >> 32); > > > No PUSH_DATAh in the DDX?Nope. Didn't feel the burning need to add a helper either.>> $(filter %nvc0.vpc,$(SHADERS)): %.vpc: %.vp >> - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@ >> + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100 >> -o $@ >> $(filter %nvc0.fpc,$(SHADERS)): %.fpc: %.fp >> - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@ >> + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100 >> -o $@ >> >> $(filter %nve0.vpc,$(SHADERS)): %.vpc: %.vp >> - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o >> $@ >> + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104 >> -o $@ >> $(filter %nve0.fpc,$(SHADERS)): %.fpc: %.fp >> - cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o >> $@ >> + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104 >> -o $@ > > > This is unrelated to your main change, but well should be *exactly* the same > thing. :)You mean the bit about me adding -V gf100? Figured I'd fix it up while I was at it. The machine/variant names changed though.> > >> >> $(filter %nvf0.vpc,$(SHADERS)): %.vpc: %.vp >> cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@ >> $(filter %nvf0.fpc,$(SHADERS)): %.fpc: %.fp >> cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@ >> + >> +$(filter %nv110.vpc,$(SHADERS)): %.vpc: %.vp >> + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@ >> +$(filter %nv110.fpc,$(SHADERS)): %.fpc: %.fp >> + cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@ >> diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp >> new file mode 100644 >> index 0000000..ce78036 >> --- /dev/null >> +++ b/src/shader/exac8nv110.fp >> @@ -0,0 +1,47 @@ >> +#ifndef ENVYAS >> +static uint32_t >> +NV110FP_Composite_A8[] = { >> + 0x00001462, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x80000000, >> + 0x00000a0a, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x00000000, >> + 0x0000000f, >> + 0x00000000, >> +#include "exac8nv110.fpc" >> +}; >> +#else >> + >> +sched (st 0x0) (st 0x0) (st 0x0) > > > Those sched codes are definitely bad, but let's keep them as it for now. I > might have a look at some point to improve the thing.Yeah, way wrong. However it's what our compiler would produce. You can use this as a proving ground for your various theories. All simple shaders though, no control flow. Only complex thing is textures. -ilia