Aaryaman Vasishta
2017-Jun-10 07:10 UTC
[Nouveau] [PATCH v3] nv110/exa: update sched codes
This patch adds proper delays to maxwell exa shaders. rendercheck tests
seem consistent with/without this patch. I haven't extensively tested
them though.
Trello:
https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays
Signed-off-by: Aaryaman Vasishta <jem456.vasishta at gmail.com>
---
src/shader/exac8nv110.fp | 10 +++++-----
src/shader/exac8nv110.fpc | 18 +++++++++---------
src/shader/exacanv110.fp | 10 +++++-----
src/shader/exacanv110.fpc | 18 +++++++++---------
src/shader/exacmnv110.fp | 10 +++++-----
src/shader/exacmnv110.fpc | 18 +++++++++---------
src/shader/exas8nv110.fp | 6 +++---
src/shader/exas8nv110.fpc | 12 ++++++------
src/shader/exasanv110.fp | 10 +++++-----
src/shader/exasanv110.fpc | 18 +++++++++---------
src/shader/exascnv110.fp | 6 +++---
src/shader/exascnv110.fpc | 10 +++++-----
src/shader/videonv110.fp | 14 +++++++-------
src/shader/videonv110.fpc | 26 +++++++++++++-------------
14 files changed, 93 insertions(+), 93 deletions(-)
diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
index ce78036..101b67f 100644
--- a/src/shader/exac8nv110.fp
+++ b/src/shader/exac8nv110.fp
@@ -25,23 +25,23 @@ NV110FP_Composite_A8[] = {
};
#else
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
ipa pass $r0 a[0x7c] 0x0 0x0 0x1
mufu rcp $r0 $r0
ipa $r3 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt 0x2)
ipa $r2 a[0x90] $r0 0x0 0x1
tex nodep $r1 $r2 0x0 0x1 t2d 0x8
ipa $r3 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
ipa $r2 a[0x80] $r0 0x0 0x1
tex nodep $r0 $r2 0x0 0x0 t2d 0x8
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
fmul ftz $r3 $r0 $r1
mov $r2 $r3 0xf
mov $r1 $r3 0xf
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf) (st 0x0)
mov $r0 $r3 0xf
exit
#endif
diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
index 4aa1368..1f7d649 100644
--- a/src/shader/exac8nv110.fpc
+++ b/src/shader/exac8nv110.fpc
@@ -1,36 +1,36 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
0xcff7ff00,
0xe003ff87,
0x00470000,
0x50800000,
0x4007ff03,
0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0x21e0072f,
+0x005cbc03,
0x0007ff02,
0xe043ff89,
0x2ff70201,
0xc03a0014,
0x4007ff03,
0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe5e0074f,
+0x001fbc06,
0x0007ff02,
0xe043ff88,
0x2ff70200,
0xc03a0004,
0x34070000,
0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc201fe6,
+0x001f8400,
0x00170003,
0x5c681000,
0x00370002,
0x5c980780,
0x00370001,
0x5c980780,
-0xfc0007e0,
+0xfde007e1,
0x001f8000,
0x00370000,
0x5c980780,
diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
index a70d5c5..8a9bd43 100644
--- a/src/shader/exacanv110.fp
+++ b/src/shader/exacanv110.fp
@@ -25,23 +25,23 @@ NV110FP_CAComposite[] = {
};
#else
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
ipa pass $r0 a[0x7c] 0x0 0x0 0x1
mufu rcp $r0 $r0
ipa $r3 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x2)
ipa $r2 a[0x90] $r0 0x0 0x1
tex nodep $r4 $r2 0x0 0x1 t2d 0xf
ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x2 wt 0x4) (st 0xf)
ipa $r0 a[0x80] $r0 0x0 0x1
tex nodep $r0 $r0 0x0 0x0 t2d 0xf
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x4) (st 0x1) (st 0x1)
fmul ftz $r3 $r3 $r7
fmul ftz $r2 $r2 $r6
fmul ftz $r1 $r1 $r5
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x1) (st 0xf) (st 0x0)
fmul ftz $r0 $r0 $r4
exit
#endif
diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
index 7c0ca5e..08a633c 100644
--- a/src/shader/exacanv110.fpc
+++ b/src/shader/exacanv110.fpc
@@ -1,36 +1,36 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
0xcff7ff00,
0xe003ff87,
0x00470000,
0x50800000,
0x4007ff03,
0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0x21e0072f,
+0x001d3c03,
0x0007ff02,
0xe043ff89,
0xaff70204,
0xc03a0017,
0x4007ff01,
0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe9e0274f,
+0x001fbc04,
0x0007ff00,
0xe043ff88,
0xaff70000,
0xc03a0007,
0x34070000,
0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc2027e1,
+0x001f8400,
0x00770303,
0x5c681000,
0x00670202,
0x5c681000,
0x00570101,
0x5c681000,
-0xfc0007e0,
+0xfde00fe1,
0x001f8000,
0x00470000,
0x5c681000,
diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
index fe5c294..39c49de 100644
--- a/src/shader/exacmnv110.fp
+++ b/src/shader/exacmnv110.fp
@@ -25,23 +25,23 @@ NV110FP_Composite[] = {
};
#else
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
ipa pass $r0 a[0x7c] 0x0 0x0 0x1
mufu rcp $r0 $r0
ipa $r3 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1 rd 0x2)
ipa $r2 a[0x90] $r0 0x0 0x1
tex nodep $r4 $r2 0x0 0x1 t2d 0x8
ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x1 wt 0x6) (st 0xf)
ipa $r0 a[0x80] $r0 0x0 0x1
tex nodep $r0 $r0 0x0 0x0 t2d 0xf
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
fmul ftz $r3 $r3 $r4
fmul ftz $r2 $r2 $r4
fmul ftz $r1 $r1 $r4
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf) (st 0x0)
fmul ftz $r0 $r0 $r4
exit
#endif
diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc
index 9d62c1a..f5f06e2 100644
--- a/src/shader/exacmnv110.fpc
+++ b/src/shader/exacmnv110.fpc
@@ -1,36 +1,36 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
0xcff7ff00,
0xe003ff87,
0x00470000,
0x50800000,
0x4007ff03,
0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x0008bc03,
0x0007ff02,
0xe043ff89,
0x2ff70204,
0xc03a0014,
0x4007ff01,
0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe5e0274f,
+0x001fbc06,
0x0007ff00,
0xe043ff88,
0xaff70000,
0xc03a0007,
0x34070000,
0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc201fe1,
+0x001f8400,
0x00470303,
0x5c681000,
0x00470202,
0x5c681000,
0x00470101,
0x5c681000,
-0xfc0007e0,
+0xfde007e1,
0x001f8000,
0x00470000,
0x5c681000,
diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp
index 4fe2e19..a555beb 100644
--- a/src/shader/exas8nv110.fp
+++ b/src/shader/exas8nv110.fp
@@ -25,15 +25,15 @@ NV110FP_Source_A8[] = {
};
#else
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
ipa pass $r0 a[0x7c] 0x0 0x0 0x1
mufu rcp $r0 $r0
ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf)
ipa $r0 a[0x80] $r0 0x0 0x1
tex nodep $r0 $r0 0x0 0x0 t2d 0x8
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x1) (st 0x1) (st 0x1)
mov $r3 $r0 0xf
mov $r2 $r0 0xf
mov $r1 $r0 0xf
diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc
index 1181c41..e58d168 100644
--- a/src/shader/exas8nv110.fpc
+++ b/src/shader/exas8nv110.fpc
@@ -1,21 +1,21 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
0xcff7ff00,
0xe003ff87,
0x00470000,
0x50800000,
0x4007ff01,
0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x001fbc03,
0x0007ff00,
0xe043ff88,
0x2ff70000,
0xc03a0004,
0x34070000,
0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc200fe1,
+0x001f8400,
0x00070003,
0x5c980780,
0x00070002,
diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp
index 61374a6..9f8742a 100644
--- a/src/shader/exasanv110.fp
+++ b/src/shader/exasanv110.fp
@@ -25,23 +25,23 @@ NV110FP_CACompositeSrcAlpha[] = {
};
#else
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
ipa pass $r0 a[0x7c] 0x0 0x0 0x1
mufu rcp $r0 $r0
ipa $r3 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1 rd 0x2)
ipa $r2 a[0x80] $r0 0x0 0x1
tex nodep $r4 $r2 0x0 0x0 t2d 0x8
ipa $r1 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x1 wt 0x6) (st 0xf)
ipa $r0 a[0x90] $r0 0x0 0x1
tex nodep $r0 $r0 0x0 0x1 t2d 0xf
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
fmul ftz $r3 $r3 $r4
fmul ftz $r2 $r2 $r4
fmul ftz $r1 $r1 $r4
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf) (st 0x0)
fmul ftz $r0 $r0 $r4
exit
#endif
diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc
index 5516a03..c291298 100644
--- a/src/shader/exasanv110.fpc
+++ b/src/shader/exasanv110.fpc
@@ -1,36 +1,36 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
0xcff7ff00,
0xe003ff87,
0x00470000,
0x50800000,
0x4007ff03,
0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x0008bc03,
0x0007ff02,
0xe043ff88,
0x2ff70204,
0xc03a0004,
0x4007ff01,
0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0xe5e0274f,
+0x001fbc06,
0x0007ff00,
0xe043ff89,
0xaff70000,
0xc03a0017,
0x34070000,
0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc201fe1,
+0x001f8400,
0x00470303,
0x5c681000,
0x00470202,
0x5c681000,
0x00470101,
0x5c681000,
-0xfc0007e0,
+0xfde007e1,
0x001f8000,
0x00470000,
0x5c681000,
diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp
index 90bbb55..86e14e8 100644
--- a/src/shader/exascnv110.fp
+++ b/src/shader/exascnv110.fp
@@ -25,14 +25,14 @@ NV110FP_Source[] = {
};
#else
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
ipa pass $r0 a[0x7c] 0x0 0x0 0x1
mufu rcp $r0 $r0
ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf)
ipa $r0 a[0x80] $r0 0x0 0x1
tex nodep $r0 $r0 0x0 0x0 t2d 0xf
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf) (st 0x0) (st 0x0)
exit
#endif
diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc
index 2dba15d..1fef5d2 100644
--- a/src/shader/exascnv110.fpc
+++ b/src/shader/exascnv110.fpc
@@ -1,20 +1,20 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
0xcff7ff00,
0xe003ff87,
0x00470000,
0x50800000,
0x4007ff01,
0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xfde0072f,
+0x001fbc03,
0x0007ff00,
0xe043ff88,
0xaff70000,
0xc03a0007,
0x34070000,
0xf0f00000,
-0xfc0007e0,
+0xfc0007ef,
0x001f8000,
0x0007000f,
0xe3000000,
diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp
index 2728311..dd3816c 100644
--- a/src/shader/videonv110.fp
+++ b/src/shader/videonv110.fp
@@ -25,30 +25,30 @@ NV110FP_NV12[] = {
};
#else
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
ipa pass $r2 a[0x7c] 0x0 0x0 0x1
mufu rcp $r2 $r2
ipa $r0 a[0x80] $r2 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
ipa $r1 a[0x84] $r2 0x0 0x1
tex nodep $r4 $r0 0x0 0x0 t2d 0x8
tex nodep $r0 $r0 0x0 0x1 t2d 0xc
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf) (st 0x6 wt 0x1) (st 0x6)
depbar le 0x5 0x1 0x1
fmul ftz $r5 $r4 c0[0x0]
fadd ftz $r3 $r5 c0[0x4]
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6) (st 0x6) (st 0xf)
fadd ftz $r4 $r5 c0[0x8]
fadd ftz $r5 $r5 c0[0xc]
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6 wt 0x2) (st 0x1) (st 0x1)
ffma ftz $r3 $r0 c0[0x10] $r3
ffma ftz $r4 $r0 c0[0x14] $r4
ffma ftz $r5 $r0 c0[0x18] $r5
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0x1) (st 0x6)
ffma ftz $r0 $r1 c0[0x1c] $r3
ffma ftz $r2 $r1 c0[0x24] $r5
ffma ftz $r1 $r1 c0[0x20] $r4
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf) (st 0x0) (st 0x0)
exit
#endif
diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc
index 31d745a..8fbc246 100644
--- a/src/shader/videonv110.fpc
+++ b/src/shader/videonv110.fpc
@@ -1,52 +1,52 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
0xcff7ff02,
0xe003ff87,
0x00470202,
0x50800000,
0x0027ff00,
0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x001cbc03,
0x4027ff01,
0xe043ff88,
0x2ff70004,
0xc03a0004,
0x2ff70000,
0xc03a0016,
-0xfc0007e0,
-0x001f8000,
+0xfcc007ef,
+0x001f9801,
0x34170001,
0xf0f00000,
0x00070405,
0x4c681000,
0x00170503,
0x4c581000,
-0xfc0007e0,
-0x001f8000,
+0xfcc007e6,
+0x001fbc00,
0x00270504,
0x4c581000,
0x00370505,
0x4c581000,
0x34070000,
0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc2017e6,
+0x001f8400,
0x00470003,
0x49a00180,
0x00570004,
0x49a00200,
0x00670005,
0x49a00280,
-0xfc0007e0,
-0x001f8000,
+0xfc2007e1,
+0x001f9800,
0x00770100,
0x49a00180,
0x00970102,
0x49a00280,
0x00870101,
0x49a00200,
-0xfc0007e0,
+0xfc0007ef,
0x001f8000,
0x0007000f,
0xe3000000,
--
2.11.0
Aaryaman Vasishta
2017-Jun-10 07:14 UTC
[Nouveau] [PATCH v3] nv110/exa: update sched codes
See the 'wt' on the first fmul in exacanv110.fp, exacmnv110.fp and exasanv110.fp. Any ideas on what could be causing the first fmul to require $r0 and/or $r1? Cheers, Aaryaman On Sat, Jun 10, 2017 at 4:10 PM, Aaryaman Vasishta < jem456.vasishta at gmail.com> wrote:> This patch adds proper delays to maxwell exa shaders. rendercheck tests > seem consistent with/without this patch. I haven't extensively tested > them though. > > Trello: > https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders- > with-proper-delays > > Signed-off-by: Aaryaman Vasishta <jem456.vasishta at gmail.com> > --- > src/shader/exac8nv110.fp | 10 +++++----- > src/shader/exac8nv110.fpc | 18 +++++++++--------- > src/shader/exacanv110.fp | 10 +++++----- > src/shader/exacanv110.fpc | 18 +++++++++--------- > src/shader/exacmnv110.fp | 10 +++++----- > src/shader/exacmnv110.fpc | 18 +++++++++--------- > src/shader/exas8nv110.fp | 6 +++--- > src/shader/exas8nv110.fpc | 12 ++++++------ > src/shader/exasanv110.fp | 10 +++++----- > src/shader/exasanv110.fpc | 18 +++++++++--------- > src/shader/exascnv110.fp | 6 +++--- > src/shader/exascnv110.fpc | 10 +++++----- > src/shader/videonv110.fp | 14 +++++++------- > src/shader/videonv110.fpc | 26 +++++++++++++------------- > 14 files changed, 93 insertions(+), 93 deletions(-) > > diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp > index ce78036..101b67f 100644 > --- a/src/shader/exac8nv110.fp > +++ b/src/shader/exac8nv110.fp > @@ -25,23 +25,23 @@ NV110FP_Composite_A8[] = { > }; > #else > > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) > ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > mufu rcp $r0 $r0 > ipa $r3 a[0x94] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt 0x2) > ipa $r2 a[0x90] $r0 0x0 0x1 > tex nodep $r1 $r2 0x0 0x1 t2d 0x8 > ipa $r3 a[0x84] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf) > ipa $r2 a[0x80] $r0 0x0 0x1 > tex nodep $r0 $r2 0x0 0x0 t2d 0x8 > depbar le 0x5 0x0 0x0 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x6 wt 0x3) (st 0x1) (st 0x1) > fmul ftz $r3 $r0 $r1 > mov $r2 $r3 0xf > mov $r1 $r3 0xf > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1) (st 0xf) (st 0x0) > mov $r0 $r3 0xf > exit > #endif > diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc > index 4aa1368..1f7d649 100644 > --- a/src/shader/exac8nv110.fpc > +++ b/src/shader/exac8nv110.fpc > @@ -1,36 +1,36 @@ > -0xfc0007e0, > -0x001f8000, > +0xe1a0070f, > +0x003c3c01, > 0xcff7ff00, > 0xe003ff87, > 0x00470000, > 0x50800000, > 0x4007ff03, > 0xe043ff89, > -0xfc0007e0, > -0x001f8000, > +0x21e0072f, > +0x005cbc03, > 0x0007ff02, > 0xe043ff89, > 0x2ff70201, > 0xc03a0014, > 0x4007ff03, > 0xe043ff88, > -0xfc0007e0, > -0x001f8000, > +0xe5e0074f, > +0x001fbc06, > 0x0007ff02, > 0xe043ff88, > 0x2ff70200, > 0xc03a0004, > 0x34070000, > 0xf0f00000, > -0xfc0007e0, > -0x001f8000, > +0xfc201fe6, > +0x001f8400, > 0x00170003, > 0x5c681000, > 0x00370002, > 0x5c980780, > 0x00370001, > 0x5c980780, > -0xfc0007e0, > +0xfde007e1, > 0x001f8000, > 0x00370000, > 0x5c980780, > diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp > index a70d5c5..8a9bd43 100644 > --- a/src/shader/exacanv110.fp > +++ b/src/shader/exacanv110.fp > @@ -25,23 +25,23 @@ NV110FP_CAComposite[] = { > }; > #else > > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) > ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > mufu rcp $r0 $r0 > ipa $r3 a[0x94] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x2) > ipa $r2 a[0x90] $r0 0x0 0x1 > tex nodep $r4 $r2 0x0 0x1 t2d 0xf > ipa $r1 a[0x84] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x2 wt 0x4) (st 0xf) > ipa $r0 a[0x80] $r0 0x0 0x1 > tex nodep $r0 $r0 0x0 0x0 t2d 0xf > depbar le 0x5 0x0 0x0 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1 wt 0x4) (st 0x1) (st 0x1) > fmul ftz $r3 $r3 $r7 > fmul ftz $r2 $r2 $r6 > fmul ftz $r1 $r1 $r5 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1 wt 0x1) (st 0xf) (st 0x0) > fmul ftz $r0 $r0 $r4 > exit > #endif > diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc > index 7c0ca5e..08a633c 100644 > --- a/src/shader/exacanv110.fpc > +++ b/src/shader/exacanv110.fpc > @@ -1,36 +1,36 @@ > -0xfc0007e0, > -0x001f8000, > +0xe1a0070f, > +0x003c3c01, > 0xcff7ff00, > 0xe003ff87, > 0x00470000, > 0x50800000, > 0x4007ff03, > 0xe043ff89, > -0xfc0007e0, > -0x001f8000, > +0x21e0072f, > +0x001d3c03, > 0x0007ff02, > 0xe043ff89, > 0xaff70204, > 0xc03a0017, > 0x4007ff01, > 0xe043ff88, > -0xfc0007e0, > -0x001f8000, > +0xe9e0274f, > +0x001fbc04, > 0x0007ff00, > 0xe043ff88, > 0xaff70000, > 0xc03a0007, > 0x34070000, > 0xf0f00000, > -0xfc0007e0, > -0x001f8000, > +0xfc2027e1, > +0x001f8400, > 0x00770303, > 0x5c681000, > 0x00670202, > 0x5c681000, > 0x00570101, > 0x5c681000, > -0xfc0007e0, > +0xfde00fe1, > 0x001f8000, > 0x00470000, > 0x5c681000, > diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp > index fe5c294..39c49de 100644 > --- a/src/shader/exacmnv110.fp > +++ b/src/shader/exacmnv110.fp > @@ -25,23 +25,23 @@ NV110FP_Composite[] = { > }; > #else > > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) > ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > mufu rcp $r0 $r0 > ipa $r3 a[0x94] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1 rd 0x2) > ipa $r2 a[0x90] $r0 0x0 0x1 > tex nodep $r4 $r2 0x0 0x1 t2d 0x8 > ipa $r1 a[0x84] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x1 wt 0x6) (st 0xf) > ipa $r0 a[0x80] $r0 0x0 0x1 > tex nodep $r0 $r0 0x0 0x0 t2d 0xf > depbar le 0x5 0x0 0x0 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1) > fmul ftz $r3 $r3 $r4 > fmul ftz $r2 $r2 $r4 > fmul ftz $r1 $r1 $r4 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1) (st 0xf) (st 0x0) > fmul ftz $r0 $r0 $r4 > exit > #endif > diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc > index 9d62c1a..f5f06e2 100644 > --- a/src/shader/exacmnv110.fpc > +++ b/src/shader/exacmnv110.fpc > @@ -1,36 +1,36 @@ > -0xfc0007e0, > -0x001f8000, > +0xe1a0070f, > +0x003c3c01, > 0xcff7ff00, > 0xe003ff87, > 0x00470000, > 0x50800000, > 0x4007ff03, > 0xe043ff89, > -0xfc0007e0, > -0x001f8000, > +0xe1e0072f, > +0x0008bc03, > 0x0007ff02, > 0xe043ff89, > 0x2ff70204, > 0xc03a0014, > 0x4007ff01, > 0xe043ff88, > -0xfc0007e0, > -0x001f8000, > +0xe5e0274f, > +0x001fbc06, > 0x0007ff00, > 0xe043ff88, > 0xaff70000, > 0xc03a0007, > 0x34070000, > 0xf0f00000, > -0xfc0007e0, > -0x001f8000, > +0xfc201fe1, > +0x001f8400, > 0x00470303, > 0x5c681000, > 0x00470202, > 0x5c681000, > 0x00470101, > 0x5c681000, > -0xfc0007e0, > +0xfde007e1, > 0x001f8000, > 0x00470000, > 0x5c681000, > diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp > index 4fe2e19..a555beb 100644 > --- a/src/shader/exas8nv110.fp > +++ b/src/shader/exas8nv110.fp > @@ -25,15 +25,15 @@ NV110FP_Source_A8[] = { > }; > #else > > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) > ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > mufu rcp $r0 $r0 > ipa $r1 a[0x84] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf) > ipa $r0 a[0x80] $r0 0x0 0x1 > tex nodep $r0 $r0 0x0 0x0 t2d 0x8 > depbar le 0x5 0x0 0x0 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1 wt 0x1) (st 0x1) (st 0x1) > mov $r3 $r0 0xf > mov $r2 $r0 0xf > mov $r1 $r0 0xf > diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc > index 1181c41..e58d168 100644 > --- a/src/shader/exas8nv110.fpc > +++ b/src/shader/exas8nv110.fpc > @@ -1,21 +1,21 @@ > -0xfc0007e0, > -0x001f8000, > +0xe1a0070f, > +0x003c3c01, > 0xcff7ff00, > 0xe003ff87, > 0x00470000, > 0x50800000, > 0x4007ff01, > 0xe043ff88, > -0xfc0007e0, > -0x001f8000, > +0xe1e0072f, > +0x001fbc03, > 0x0007ff00, > 0xe043ff88, > 0x2ff70000, > 0xc03a0004, > 0x34070000, > 0xf0f00000, > -0xfc0007e0, > -0x001f8000, > +0xfc200fe1, > +0x001f8400, > 0x00070003, > 0x5c980780, > 0x00070002, > diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp > index 61374a6..9f8742a 100644 > --- a/src/shader/exasanv110.fp > +++ b/src/shader/exasanv110.fp > @@ -25,23 +25,23 @@ NV110FP_CACompositeSrcAlpha[] = { > }; > #else > > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) > ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > mufu rcp $r0 $r0 > ipa $r3 a[0x84] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1 rd 0x2) > ipa $r2 a[0x80] $r0 0x0 0x1 > tex nodep $r4 $r2 0x0 0x0 t2d 0x8 > ipa $r1 a[0x94] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x1 wt 0x6) (st 0xf) > ipa $r0 a[0x90] $r0 0x0 0x1 > tex nodep $r0 $r0 0x0 0x1 t2d 0xf > depbar le 0x5 0x0 0x0 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1) > fmul ftz $r3 $r3 $r4 > fmul ftz $r2 $r2 $r4 > fmul ftz $r1 $r1 $r4 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1) (st 0xf) (st 0x0) > fmul ftz $r0 $r0 $r4 > exit > #endif > diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc > index 5516a03..c291298 100644 > --- a/src/shader/exasanv110.fpc > +++ b/src/shader/exasanv110.fpc > @@ -1,36 +1,36 @@ > -0xfc0007e0, > -0x001f8000, > +0xe1a0070f, > +0x003c3c01, > 0xcff7ff00, > 0xe003ff87, > 0x00470000, > 0x50800000, > 0x4007ff03, > 0xe043ff88, > -0xfc0007e0, > -0x001f8000, > +0xe1e0072f, > +0x0008bc03, > 0x0007ff02, > 0xe043ff88, > 0x2ff70204, > 0xc03a0004, > 0x4007ff01, > 0xe043ff89, > -0xfc0007e0, > -0x001f8000, > +0xe5e0274f, > +0x001fbc06, > 0x0007ff00, > 0xe043ff89, > 0xaff70000, > 0xc03a0017, > 0x34070000, > 0xf0f00000, > -0xfc0007e0, > -0x001f8000, > +0xfc201fe1, > +0x001f8400, > 0x00470303, > 0x5c681000, > 0x00470202, > 0x5c681000, > 0x00470101, > 0x5c681000, > -0xfc0007e0, > +0xfde007e1, > 0x001f8000, > 0x00470000, > 0x5c681000, > diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp > index 90bbb55..86e14e8 100644 > --- a/src/shader/exascnv110.fp > +++ b/src/shader/exascnv110.fp > @@ -25,14 +25,14 @@ NV110FP_Source[] = { > }; > #else > > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) > ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > mufu rcp $r0 $r0 > ipa $r1 a[0x84] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf) > ipa $r0 a[0x80] $r0 0x0 0x1 > tex nodep $r0 $r0 0x0 0x0 t2d 0xf > depbar le 0x5 0x0 0x0 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf) (st 0x0) (st 0x0) > exit > #endif > diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc > index 2dba15d..1fef5d2 100644 > --- a/src/shader/exascnv110.fpc > +++ b/src/shader/exascnv110.fpc > @@ -1,20 +1,20 @@ > -0xfc0007e0, > -0x001f8000, > +0xe1a0070f, > +0x003c3c01, > 0xcff7ff00, > 0xe003ff87, > 0x00470000, > 0x50800000, > 0x4007ff01, > 0xe043ff88, > -0xfc0007e0, > -0x001f8000, > +0xfde0072f, > +0x001fbc03, > 0x0007ff00, > 0xe043ff88, > 0xaff70000, > 0xc03a0007, > 0x34070000, > 0xf0f00000, > -0xfc0007e0, > +0xfc0007ef, > 0x001f8000, > 0x0007000f, > 0xe3000000, > diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp > index 2728311..dd3816c 100644 > --- a/src/shader/videonv110.fp > +++ b/src/shader/videonv110.fp > @@ -25,30 +25,30 @@ NV110FP_NV12[] = { > }; > #else > > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) > ipa pass $r2 a[0x7c] 0x0 0x0 0x1 > mufu rcp $r2 $r2 > ipa $r0 a[0x80] $r2 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1) > ipa $r1 a[0x84] $r2 0x0 0x1 > tex nodep $r4 $r0 0x0 0x0 t2d 0x8 > tex nodep $r0 $r0 0x0 0x1 t2d 0xc > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf) (st 0x6 wt 0x1) (st 0x6) > depbar le 0x5 0x1 0x1 > fmul ftz $r5 $r4 c0[0x0] > fadd ftz $r3 $r5 c0[0x4] > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x6) (st 0x6) (st 0xf) > fadd ftz $r4 $r5 c0[0x8] > fadd ftz $r5 $r5 c0[0xc] > depbar le 0x5 0x0 0x0 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x6 wt 0x2) (st 0x1) (st 0x1) > ffma ftz $r3 $r0 c0[0x10] $r3 > ffma ftz $r4 $r0 c0[0x14] $r4 > ffma ftz $r5 $r0 c0[0x18] $r5 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1) (st 0x1) (st 0x6) > ffma ftz $r0 $r1 c0[0x1c] $r3 > ffma ftz $r2 $r1 c0[0x24] $r5 > ffma ftz $r1 $r1 c0[0x20] $r4 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf) (st 0x0) (st 0x0) > exit > #endif > diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc > index 31d745a..8fbc246 100644 > --- a/src/shader/videonv110.fpc > +++ b/src/shader/videonv110.fpc > @@ -1,52 +1,52 @@ > -0xfc0007e0, > -0x001f8000, > +0xe1a0070f, > +0x003c3c01, > 0xcff7ff02, > 0xe003ff87, > 0x00470202, > 0x50800000, > 0x0027ff00, > 0xe043ff88, > -0xfc0007e0, > -0x001f8000, > +0xe1e0072f, > +0x001cbc03, > 0x4027ff01, > 0xe043ff88, > 0x2ff70004, > 0xc03a0004, > 0x2ff70000, > 0xc03a0016, > -0xfc0007e0, > -0x001f8000, > +0xfcc007ef, > +0x001f9801, > 0x34170001, > 0xf0f00000, > 0x00070405, > 0x4c681000, > 0x00170503, > 0x4c581000, > -0xfc0007e0, > -0x001f8000, > +0xfcc007e6, > +0x001fbc00, > 0x00270504, > 0x4c581000, > 0x00370505, > 0x4c581000, > 0x34070000, > 0xf0f00000, > -0xfc0007e0, > -0x001f8000, > +0xfc2017e6, > +0x001f8400, > 0x00470003, > 0x49a00180, > 0x00570004, > 0x49a00200, > 0x00670005, > 0x49a00280, > -0xfc0007e0, > -0x001f8000, > +0xfc2007e1, > +0x001f9800, > 0x00770100, > 0x49a00180, > 0x00970102, > 0x49a00280, > 0x00870101, > 0x49a00200, > -0xfc0007e0, > +0xfc0007ef, > 0x001f8000, > 0x0007000f, > 0xe3000000, > -- > 2.11.0 > >-------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.freedesktop.org/archives/nouveau/attachments/20170610/900fdd28/attachment-0001.html>
On 06/10/2017 09:14 AM, Aaryaman Vasishta wrote:> See the 'wt' on the first fmul in exacanv110.fp, exacmnv110.fp and > exasanv110.fp. Any ideas on what could be causing the first fmul to > require $r0 and/or $r1?'tex nodep $r4 $r2 0x0 0x1 t2d 0xf' is actually: 'tex nodep $r4:$r7 $r2 0x0 0x1 t2d 0xf' Very confusing, I know.> > Cheers, > Aaryaman > > On Sat, Jun 10, 2017 at 4:10 PM, Aaryaman Vasishta > <jem456.vasishta at gmail.com <mailto:jem456.vasishta at gmail.com>> wrote: > > This patch adds proper delays to maxwell exa shaders. rendercheck tests > seem consistent with/without this patch. I haven't extensively tested > them though. > > Trello: > https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays > <https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays> > > Signed-off-by: Aaryaman Vasishta <jem456.vasishta at gmail.com > <mailto:jem456.vasishta at gmail.com>> > --- > src/shader/exac8nv110.fp | 10 +++++----- > src/shader/exac8nv110.fpc | 18 +++++++++--------- > src/shader/exacanv110.fp | 10 +++++----- > src/shader/exacanv110.fpc | 18 +++++++++--------- > src/shader/exacmnv110.fp | 10 +++++----- > src/shader/exacmnv110.fpc | 18 +++++++++--------- > src/shader/exas8nv110.fp | 6 +++--- > src/shader/exas8nv110.fpc | 12 ++++++------ > src/shader/exasanv110.fp | 10 +++++----- > src/shader/exasanv110.fpc | 18 +++++++++--------- > src/shader/exascnv110.fp | 6 +++--- > src/shader/exascnv110.fpc | 10 +++++----- > src/shader/videonv110.fp | 14 +++++++------- > src/shader/videonv110.fpc | 26 +++++++++++++------------- > 14 files changed, 93 insertions(+), 93 deletions(-) > > diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp > index ce78036..101b67f 100644 > --- a/src/shader/exac8nv110.fp > +++ b/src/shader/exac8nv110.fp > @@ -25,23 +25,23 @@ NV110FP_Composite_A8[] = { > }; > #else > > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) > ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > mufu rcp $r0 $r0 > ipa $r3 a[0x94] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 > wt 0x2) > ipa $r2 a[0x90] $r0 0x0 0x1 > tex nodep $r1 $r2 0x0 0x1 t2d 0x8 > ipa $r3 a[0x84] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf) > ipa $r2 a[0x80] $r0 0x0 0x1 > tex nodep $r0 $r2 0x0 0x0 t2d 0x8 > depbar le 0x5 0x0 0x0 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x6 wt 0x3) (st 0x1) (st 0x1) > fmul ftz $r3 $r0 $r1 > mov $r2 $r3 0xf > mov $r1 $r3 0xf > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1) (st 0xf) (st 0x0) > mov $r0 $r3 0xf > exit > #endif > diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc > index 4aa1368..1f7d649 100644 > --- a/src/shader/exac8nv110.fpc > +++ b/src/shader/exac8nv110.fpc > @@ -1,36 +1,36 @@ > -0xfc0007e0, > -0x001f8000, > +0xe1a0070f, > +0x003c3c01, > 0xcff7ff00, > 0xe003ff87, > 0x00470000, > 0x50800000, > 0x4007ff03, > 0xe043ff89, > -0xfc0007e0, > -0x001f8000, > +0x21e0072f, > +0x005cbc03, > 0x0007ff02, > 0xe043ff89, > 0x2ff70201, > 0xc03a0014, > 0x4007ff03, > 0xe043ff88, > -0xfc0007e0, > -0x001f8000, > +0xe5e0074f, > +0x001fbc06, > 0x0007ff02, > 0xe043ff88, > 0x2ff70200, > 0xc03a0004, > 0x34070000, > 0xf0f00000, > -0xfc0007e0, > -0x001f8000, > +0xfc201fe6, > +0x001f8400, > 0x00170003, > 0x5c681000, > 0x00370002, > 0x5c980780, > 0x00370001, > 0x5c980780, > -0xfc0007e0, > +0xfde007e1, > 0x001f8000, > 0x00370000, > 0x5c980780, > diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp > index a70d5c5..8a9bd43 100644 > --- a/src/shader/exacanv110.fp > +++ b/src/shader/exacanv110.fp > @@ -25,23 +25,23 @@ NV110FP_CAComposite[] = { > }; > #else > > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) > ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > mufu rcp $r0 $r0 > ipa $r3 a[0x94] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x2) > ipa $r2 a[0x90] $r0 0x0 0x1 > tex nodep $r4 $r2 0x0 0x1 t2d 0xf > ipa $r1 a[0x84] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x2 wt 0x4) (st 0xf) > ipa $r0 a[0x80] $r0 0x0 0x1 > tex nodep $r0 $r0 0x0 0x0 t2d 0xf > depbar le 0x5 0x0 0x0 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1 wt 0x4) (st 0x1) (st 0x1) > fmul ftz $r3 $r3 $r7 > fmul ftz $r2 $r2 $r6 > fmul ftz $r1 $r1 $r5 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1 wt 0x1) (st 0xf) (st 0x0) > fmul ftz $r0 $r0 $r4 > exit > #endif > diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc > index 7c0ca5e..08a633c 100644 > --- a/src/shader/exacanv110.fpc > +++ b/src/shader/exacanv110.fpc > @@ -1,36 +1,36 @@ > -0xfc0007e0, > -0x001f8000, > +0xe1a0070f, > +0x003c3c01, > 0xcff7ff00, > 0xe003ff87, > 0x00470000, > 0x50800000, > 0x4007ff03, > 0xe043ff89, > -0xfc0007e0, > -0x001f8000, > +0x21e0072f, > +0x001d3c03, > 0x0007ff02, > 0xe043ff89, > 0xaff70204, > 0xc03a0017, > 0x4007ff01, > 0xe043ff88, > -0xfc0007e0, > -0x001f8000, > +0xe9e0274f, > +0x001fbc04, > 0x0007ff00, > 0xe043ff88, > 0xaff70000, > 0xc03a0007, > 0x34070000, > 0xf0f00000, > -0xfc0007e0, > -0x001f8000, > +0xfc2027e1, > +0x001f8400, > 0x00770303, > 0x5c681000, > 0x00670202, > 0x5c681000, > 0x00570101, > 0x5c681000, > -0xfc0007e0, > +0xfde00fe1, > 0x001f8000, > 0x00470000, > 0x5c681000, > diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp > index fe5c294..39c49de 100644 > --- a/src/shader/exacmnv110.fp > +++ b/src/shader/exacmnv110.fp > @@ -25,23 +25,23 @@ NV110FP_Composite[] = { > }; > #else > > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) > ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > mufu rcp $r0 $r0 > ipa $r3 a[0x94] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1 rd 0x2) > ipa $r2 a[0x90] $r0 0x0 0x1 > tex nodep $r4 $r2 0x0 0x1 t2d 0x8 > ipa $r1 a[0x84] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x1 wt 0x6) (st 0xf) > ipa $r0 a[0x80] $r0 0x0 0x1 > tex nodep $r0 $r0 0x0 0x0 t2d 0xf > depbar le 0x5 0x0 0x0 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1) > fmul ftz $r3 $r3 $r4 > fmul ftz $r2 $r2 $r4 > fmul ftz $r1 $r1 $r4 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1) (st 0xf) (st 0x0) > fmul ftz $r0 $r0 $r4 > exit > #endif > diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc > index 9d62c1a..f5f06e2 100644 > --- a/src/shader/exacmnv110.fpc > +++ b/src/shader/exacmnv110.fpc > @@ -1,36 +1,36 @@ > -0xfc0007e0, > -0x001f8000, > +0xe1a0070f, > +0x003c3c01, > 0xcff7ff00, > 0xe003ff87, > 0x00470000, > 0x50800000, > 0x4007ff03, > 0xe043ff89, > -0xfc0007e0, > -0x001f8000, > +0xe1e0072f, > +0x0008bc03, > 0x0007ff02, > 0xe043ff89, > 0x2ff70204, > 0xc03a0014, > 0x4007ff01, > 0xe043ff88, > -0xfc0007e0, > -0x001f8000, > +0xe5e0274f, > +0x001fbc06, > 0x0007ff00, > 0xe043ff88, > 0xaff70000, > 0xc03a0007, > 0x34070000, > 0xf0f00000, > -0xfc0007e0, > -0x001f8000, > +0xfc201fe1, > +0x001f8400, > 0x00470303, > 0x5c681000, > 0x00470202, > 0x5c681000, > 0x00470101, > 0x5c681000, > -0xfc0007e0, > +0xfde007e1, > 0x001f8000, > 0x00470000, > 0x5c681000, > diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp > index 4fe2e19..a555beb 100644 > --- a/src/shader/exas8nv110.fp > +++ b/src/shader/exas8nv110.fp > @@ -25,15 +25,15 @@ NV110FP_Source_A8[] = { > }; > #else > > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) > ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > mufu rcp $r0 $r0 > ipa $r1 a[0x84] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf) > ipa $r0 a[0x80] $r0 0x0 0x1 > tex nodep $r0 $r0 0x0 0x0 t2d 0x8 > depbar le 0x5 0x0 0x0 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1 wt 0x1) (st 0x1) (st 0x1) > mov $r3 $r0 0xf > mov $r2 $r0 0xf > mov $r1 $r0 0xf > diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc > index 1181c41..e58d168 100644 > --- a/src/shader/exas8nv110.fpc > +++ b/src/shader/exas8nv110.fpc > @@ -1,21 +1,21 @@ > -0xfc0007e0, > -0x001f8000, > +0xe1a0070f, > +0x003c3c01, > 0xcff7ff00, > 0xe003ff87, > 0x00470000, > 0x50800000, > 0x4007ff01, > 0xe043ff88, > -0xfc0007e0, > -0x001f8000, > +0xe1e0072f, > +0x001fbc03, > 0x0007ff00, > 0xe043ff88, > 0x2ff70000, > 0xc03a0004, > 0x34070000, > 0xf0f00000, > -0xfc0007e0, > -0x001f8000, > +0xfc200fe1, > +0x001f8400, > 0x00070003, > 0x5c980780, > 0x00070002, > diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp > index 61374a6..9f8742a 100644 > --- a/src/shader/exasanv110.fp > +++ b/src/shader/exasanv110.fp > @@ -25,23 +25,23 @@ NV110FP_CACompositeSrcAlpha[] = { > }; > #else > > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) > ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > mufu rcp $r0 $r0 > ipa $r3 a[0x84] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1 rd 0x2) > ipa $r2 a[0x80] $r0 0x0 0x1 > tex nodep $r4 $r2 0x0 0x0 t2d 0x8 > ipa $r1 a[0x94] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x2 wt 0x4) (st 0xf wr 0x1 wt 0x6) (st 0xf) > ipa $r0 a[0x90] $r0 0x0 0x1 > tex nodep $r0 $r0 0x0 0x1 t2d 0xf > depbar le 0x5 0x0 0x0 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1) > fmul ftz $r3 $r3 $r4 > fmul ftz $r2 $r2 $r4 > fmul ftz $r1 $r1 $r4 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1) (st 0xf) (st 0x0) > fmul ftz $r0 $r0 $r4 > exit > #endif > diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc > index 5516a03..c291298 100644 > --- a/src/shader/exasanv110.fpc > +++ b/src/shader/exasanv110.fpc > @@ -1,36 +1,36 @@ > -0xfc0007e0, > -0x001f8000, > +0xe1a0070f, > +0x003c3c01, > 0xcff7ff00, > 0xe003ff87, > 0x00470000, > 0x50800000, > 0x4007ff03, > 0xe043ff88, > -0xfc0007e0, > -0x001f8000, > +0xe1e0072f, > +0x0008bc03, > 0x0007ff02, > 0xe043ff88, > 0x2ff70204, > 0xc03a0004, > 0x4007ff01, > 0xe043ff89, > -0xfc0007e0, > -0x001f8000, > +0xe5e0274f, > +0x001fbc06, > 0x0007ff00, > 0xe043ff89, > 0xaff70000, > 0xc03a0017, > 0x34070000, > 0xf0f00000, > -0xfc0007e0, > -0x001f8000, > +0xfc201fe1, > +0x001f8400, > 0x00470303, > 0x5c681000, > 0x00470202, > 0x5c681000, > 0x00470101, > 0x5c681000, > -0xfc0007e0, > +0xfde007e1, > 0x001f8000, > 0x00470000, > 0x5c681000, > diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp > index 90bbb55..86e14e8 100644 > --- a/src/shader/exascnv110.fp > +++ b/src/shader/exascnv110.fp > @@ -25,14 +25,14 @@ NV110FP_Source[] = { > }; > #else > > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) > ipa pass $r0 a[0x7c] 0x0 0x0 0x1 > mufu rcp $r0 $r0 > ipa $r1 a[0x84] $r0 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf) > ipa $r0 a[0x80] $r0 0x0 0x1 > tex nodep $r0 $r0 0x0 0x0 t2d 0xf > depbar le 0x5 0x0 0x0 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf) (st 0x0) (st 0x0) > exit > #endif > diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc > index 2dba15d..1fef5d2 100644 > --- a/src/shader/exascnv110.fpc > +++ b/src/shader/exascnv110.fpc > @@ -1,20 +1,20 @@ > -0xfc0007e0, > -0x001f8000, > +0xe1a0070f, > +0x003c3c01, > 0xcff7ff00, > 0xe003ff87, > 0x00470000, > 0x50800000, > 0x4007ff01, > 0xe043ff88, > -0xfc0007e0, > -0x001f8000, > +0xfde0072f, > +0x001fbc03, > 0x0007ff00, > 0xe043ff88, > 0xaff70000, > 0xc03a0007, > 0x34070000, > 0xf0f00000, > -0xfc0007e0, > +0xfc0007ef, > 0x001f8000, > 0x0007000f, > 0xe3000000, > diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp > index 2728311..dd3816c 100644 > --- a/src/shader/videonv110.fp > +++ b/src/shader/videonv110.fp > @@ -25,30 +25,30 @@ NV110FP_NV12[] = { > }; > #else > > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1) > ipa pass $r2 a[0x7c] 0x0 0x0 0x1 > mufu rcp $r2 $r2 > ipa $r0 a[0x80] $r2 0x0 0x1 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1) > ipa $r1 a[0x84] $r2 0x0 0x1 > tex nodep $r4 $r0 0x0 0x0 t2d 0x8 > tex nodep $r0 $r0 0x0 0x1 t2d 0xc > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf) (st 0x6 wt 0x1) (st 0x6) > depbar le 0x5 0x1 0x1 > fmul ftz $r5 $r4 c0[0x0] > fadd ftz $r3 $r5 c0[0x4] > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x6) (st 0x6) (st 0xf) > fadd ftz $r4 $r5 c0[0x8] > fadd ftz $r5 $r5 c0[0xc] > depbar le 0x5 0x0 0x0 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x6 wt 0x2) (st 0x1) (st 0x1) > ffma ftz $r3 $r0 c0[0x10] $r3 > ffma ftz $r4 $r0 c0[0x14] $r4 > ffma ftz $r5 $r0 c0[0x18] $r5 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0x1) (st 0x1) (st 0x6) > ffma ftz $r0 $r1 c0[0x1c] $r3 > ffma ftz $r2 $r1 c0[0x24] $r5 > ffma ftz $r1 $r1 c0[0x20] $r4 > -sched (st 0x0) (st 0x0) (st 0x0) > +sched (st 0xf) (st 0x0) (st 0x0) > exit > #endif > diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc > index 31d745a..8fbc246 100644 > --- a/src/shader/videonv110.fpc > +++ b/src/shader/videonv110.fpc > @@ -1,52 +1,52 @@ > -0xfc0007e0, > -0x001f8000, > +0xe1a0070f, > +0x003c3c01, > 0xcff7ff02, > 0xe003ff87, > 0x00470202, > 0x50800000, > 0x0027ff00, > 0xe043ff88, > -0xfc0007e0, > -0x001f8000, > +0xe1e0072f, > +0x001cbc03, > 0x4027ff01, > 0xe043ff88, > 0x2ff70004, > 0xc03a0004, > 0x2ff70000, > 0xc03a0016, > -0xfc0007e0, > -0x001f8000, > +0xfcc007ef, > +0x001f9801, > 0x34170001, > 0xf0f00000, > 0x00070405, > 0x4c681000, > 0x00170503, > 0x4c581000, > -0xfc0007e0, > -0x001f8000, > +0xfcc007e6, > +0x001fbc00, > 0x00270504, > 0x4c581000, > 0x00370505, > 0x4c581000, > 0x34070000, > 0xf0f00000, > -0xfc0007e0, > -0x001f8000, > +0xfc2017e6, > +0x001f8400, > 0x00470003, > 0x49a00180, > 0x00570004, > 0x49a00200, > 0x00670005, > 0x49a00280, > -0xfc0007e0, > -0x001f8000, > +0xfc2007e1, > +0x001f9800, > 0x00770100, > 0x49a00180, > 0x00970102, > 0x49a00280, > 0x00870101, > 0x49a00200, > -0xfc0007e0, > +0xfc0007ef, > 0x001f8000, > 0x0007000f, > 0xe3000000, > -- > 2.11.0 > >