Displaying 20 results from an estimated 20 matches for "depbar".
2017 Jun 27
4
[PATCH v4] nv110/exa: update sched codes
...(st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt 0x2)
ipa $r2 a[0x90] $r0 0x0 0x1
tex nodep $r1 $r2 0x0 0x1 t2d 0x8
ipa $r3 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
ipa $r2 a[0x80] $r0 0x0 0x1
tex nodep $r0 $r2 0x0 0x0 t2d 0x8
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
fmul ftz $r3 $r0 $r1
mov $r2 $r3 0xf
mov $r1 $r3 0xf
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf) (st 0x0)
mov $r0 $r3 0xf
exit
#endif
diff --git a/src/shader/exac8nv110.fpc b/src/shader/ex...
2017 Jun 28
1
[PATCH v4] nv110/exa: update sched codes
Hi,
On Wed, Jun 28, 2017 at 12:53 PM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
> BTW, you can drop those explicit "depbar" ops. I think they're only
> needed when you're doing something weird with barriers. Blob doesn't
> use them (anymore)
>
Gotcha. Should I remove them in the same patch or a different one? It seems
like the depbar removal is different than what the commit message describes...
2017 Jun 10
2
[PATCH v3] nv110/exa: update sched codes
...(st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt 0x2)
ipa $r2 a[0x90] $r0 0x0 0x1
tex nodep $r1 $r2 0x0 0x1 t2d 0x8
ipa $r3 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
ipa $r2 a[0x80] $r0 0x0 0x1
tex nodep $r0 $r2 0x0 0x0 t2d 0x8
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
fmul ftz $r3 $r0 $r1
mov $r2 $r3 0xf
mov $r1 $r3 0xf
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf) (st 0x0)
mov $r0 $r3 0xf
exit
#endif
diff --git a/src/shader/exac8nv110.fpc b/src/shader/ex...
2017 Jun 03
2
[PATCH v2] nv110/exa: update sched codes
...(st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt 0x2)
ipa $r2 a[0x90] $r0 0x0 0x1
tex nodep $r1 $r2 0x0 0x1 t2d 0x8
ipa $r3 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
ipa $r2 a[0x80] $r0 0x0 0x1
tex nodep $r0 $r2 0x0 0x0 t2d 0x8
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6 wt 0x3) (st 0x6) (st 0x1)
fmul ftz $r3 $r0 $r1
mov $r2 $r3 0xf
mov $r1 $r3 0xf
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6) (st 0xf) (st 0x0)
mov $r0 $r3 0xf
exit
#endif
diff --git a/src/shader/exac8nv110.fpc b/src/shader/ex...
2017 Jun 28
0
[PATCH v4] nv110/exa: update sched codes
BTW, you can drop those explicit "depbar" ops. I think they're only
needed when you're doing something weird with barriers. Blob doesn't
use them (anymore)
On Tue, Jun 27, 2017 at 11:16 AM, Aaryaman Vasishta
<jem456.vasishta at gmail.com> wrote:
> v4: Updated the wait dependancy bars based on tex component masks...
2017 Jun 07
2
[PATCH v2] nv110/exa: update sched codes
...>> +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
>> ipa $r2 a[0x80] $r0 0x0 0x1
>> tex nodep $r0 $r2 0x0 0x0 t2d 0x8
>>
>
> Out of curiosity, what didn't you add a read-dep-bar on $r2:$r3 here?
Missed it, thanks for pointing it out.
>
>
> depbar le 0x5 0x0 0x0
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0x6 wt 0x3) (st 0x6) (st 0x1)
>> fmul ftz $r3 $r0 $r1
>> mov $r2 $r3 0xf
>>
>
> You can stall for only one cycle here, but the 6 cycles on fmul is needed.
>
> mov $r1 $r3 0xf
>> -...
2017 Jun 03
0
[PATCH] nv110/exa: update sched codes
...(st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt 0x2)
ipa $r2 a[0x90] $r0 0x0 0x1
tex nodep $r1 $r2 0x0 0x1 t2d 0x8
ipa $r3 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
ipa $r2 a[0x80] $r0 0x0 0x1
tex nodep $r0 $r2 0x0 0x0 t2d 0x8
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6 wt 0x3) (st 0x6) (st 0x1)
fmul ftz $r3 $r0 $r1
mov $r2 $r3 0xf
mov $r1 $r3 0xf
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6) (st 0xf) (st 0x0)
mov $r0 $r3 0xf
exit
#endif
diff --git a/src/shader/exac8nv110.fpc b/src/shader/ex...
2017 Jun 29
0
[PATCH v4] nv110/exa: update sched codes
...t 0x2)
> ipa $r2 a[0x90] $r0 0x0 0x1
> tex nodep $r1 $r2 0x0 0x1 t2d 0x8
> ipa $r3 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
> ipa $r2 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r2 0x0 0x0 t2d 0x8
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
> fmul ftz $r3 $r0 $r1
> mov $r2 $r3 0xf
> mov $r1 $r3 0xf
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
> mov $r0 $r3 0xf
> exit
> #endi...
2017 Jun 10
0
[PATCH v3] nv110/exa: update sched codes
...0x1 wt 0x2)
> ipa $r2 a[0x90] $r0 0x0 0x1
> tex nodep $r1 $r2 0x0 0x1 t2d 0x8
> ipa $r3 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
> ipa $r2 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r2 0x0 0x0 t2d 0x8
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
> fmul ftz $r3 $r0 $r1
> mov $r2 $r3 0xf
> mov $r1 $r3 0xf
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
> mov $r0 $r3 0xf
> exit
> #endif
>...
2017 Jun 05
0
[PATCH v2] nv110/exa: update sched codes
...2d 0x8
> ipa $r3 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
> ipa $r2 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r2 0x0 0x0 t2d 0x8
Out of curiosity, what didn't you add a read-dep-bar on $r2:$r3 here?
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6 wt 0x3) (st 0x6) (st 0x1)
> fmul ftz $r3 $r0 $r1
> mov $r2 $r3 0xf
You can stall for only one cycle here, but the 6 cycles on fmul is needed.
> mov $r1 $r3 0xf
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sch...
2017 Jun 08
1
[PATCH v2] nv110/exa: update sched codes
...r2:r3, which are written on by the two 'ipa' above it, have already
been waited on in this tex, and both of them read $r0 so we can safely
assume that since the two 'ipa' instructions are already waited on, $r0
will be ready?
>
>
>
>>
>>
>> depbar le 0x5 0x0 0x0
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0x6 wt 0x3) (st 0x6) (st 0x1)
>> fmul ftz $r3 $r0 $r1
>> mov $r2 $r3 0xf
>>
>>
>> You can stall for only one cycle here, but the 6 cycles on fmul is...
2017 Jul 01
2
[PATCH 1/2] nv110/exa: Remove depbars
...--git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
index ce78036..220d7e5 100644
--- a/src/shader/exac8nv110.fp
+++ b/src/shader/exac8nv110.fp
@@ -36,12 +36,11 @@ ipa $r3 a[0x84] $r0 0x0 0x1
sched (st 0x0) (st 0x0) (st 0x0)
ipa $r2 a[0x80] $r0 0x0 0x1
tex nodep $r0 $r2 0x0 0x0 t2d 0x8
-depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
fmul ftz $r3 $r0 $r1
+sched (st 0x0) (st 0x0) (st 0x0)
mov $r2 $r3 0xf
mov $r1 $r3 0xf
-sched (st 0x0) (st 0x0) (st 0x0)
mov $r0 $r3 0xf
+sched (st 0x0) (st 0x0) (st 0x0)
exit
#endif
diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv11...
2017 Jun 07
0
[PATCH v2] nv110/exa: update sched codes
...>
> Missed it, thanks for pointing it out.
You don't have to. 'tex' reads two sources ($r2:$r3) and writes into
$r0, but as $r2:$r3 are NOT re-used before $r0 is read, you can assume
that $r0 will be ready and don't need any read-dep-bar.
>
>
>
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6 wt 0x3) (st 0x6) (st 0x1)
> fmul ftz $r3 $r0 $r1
> mov $r2 $r3 0xf
>
>
> You can stall for only one cycle here, but the 6 cycles on fmul is
> needed.
>
>...
2017 Jun 12
2
[PATCH v3] nv110/exa: update sched codes
...] $r0 0x0 0x1
> tex nodep $r1 $r2 0x0 0x1 t2d 0x8
> ipa $r3 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
> ipa $r2 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r2 0x0 0x0 t2d 0x8
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
> fmul ftz $r3 $r0 $r1
> mov $r2 $r3 0xf
> mov $r1 $r3 0xf
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
> mov $r0...
2018 Sep 08
0
[PATCH] maxwell,pascal: add scheduling data to shaders
...(st 0x2 wr 0x1 rd 0x0 wt 0x3) (st 0x1 wr 0x0 wt 0x1)
ipa $r2 a[0x90] $r0 0x0 0x1
tex nodep $r1 $r2 0x0 0x1 t2d 0x8
ipa $r3 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x2) (st 0x1 wr 0x0 wt 0x5) (st 0xe)
ipa $r2 a[0x80] $r0 0x0 0x1
tex nodep $r0 $r2 0x0 0x0 t2d 0x8
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
fmul ftz $r3 $r0 $r1
mov $r2 $r3 0xf
mov $r1 $r3 0xf
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf wt 0x3f) (st 0x1)
mov $r0 $r3 0xf
exit
+nop 0x0
#endif
diff --git a/src/shader/exac8nv110.fp...
2016 Oct 16
2
[PATCH] exa: add GM10x acceleration support
...ss $r0 a[0x7c] 0x0 0x0 0x1
+mufu rcp $r0 $r0
+ipa $r3 a[0x94] $r0 0x0 0x1
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa $r2 a[0x90] $r0 0x0 0x1
+tex nodep $r1 $r2 0x0 0x1 t2d 0x8
+ipa $r3 a[0x84] $r0 0x0 0x1
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa $r2 a[0x80] $r0 0x0 0x1
+tex nodep $r0 $r2 0x0 0x0 t2d 0x8
+depbar le 0x5 0x0 0x0
+sched (st 0x0) (st 0x0) (st 0x0)
+fmul ftz $r3 $r0 $r1
+mov $r2 $r3 0xf
+mov $r1 $r3 0xf
+sched (st 0x0) (st 0x0) (st 0x0)
+mov $r0 $r3 0xf
+exit
+#endif
diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
new file mode 100644
index 0000000..4aa1368
--- /dev/null
+++...
2016 Oct 27
0
[PATCH v2 1/7] exa: add GM10x acceleration support
...ss $r0 a[0x7c] 0x0 0x0 0x1
+mufu rcp $r0 $r0
+ipa $r3 a[0x94] $r0 0x0 0x1
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa $r2 a[0x90] $r0 0x0 0x1
+tex nodep $r1 $r2 0x0 0x1 t2d 0x8
+ipa $r3 a[0x84] $r0 0x0 0x1
+sched (st 0x0) (st 0x0) (st 0x0)
+ipa $r2 a[0x80] $r0 0x0 0x1
+tex nodep $r0 $r2 0x0 0x0 t2d 0x8
+depbar le 0x5 0x0 0x0
+sched (st 0x0) (st 0x0) (st 0x0)
+fmul ftz $r3 $r0 $r1
+mov $r2 $r3 0xf
+mov $r1 $r3 0xf
+sched (st 0x0) (st 0x0) (st 0x0)
+mov $r0 $r3 0xf
+exit
+#endif
diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
new file mode 100644
index 0000000..4aa1368
--- /dev/null
+++...
2016 Oct 17
0
[PATCH] exa: add GM10x acceleration support
...> +ipa $r3 a[0x94] $r0 0x0 0x1
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa $r2 a[0x90] $r0 0x0 0x1
> +tex nodep $r1 $r2 0x0 0x1 t2d 0x8
> +ipa $r3 a[0x84] $r0 0x0 0x1
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa $r2 a[0x80] $r0 0x0 0x1
> +tex nodep $r0 $r2 0x0 0x0 t2d 0x8
> +depbar le 0x5 0x0 0x0
> +sched (st 0x0) (st 0x0) (st 0x0)
> +fmul ftz $r3 $r0 $r1
> +mov $r2 $r3 0xf
> +mov $r1 $r3 0xf
> +sched (st 0x0) (st 0x0) (st 0x0)
> +mov $r0 $r3 0xf
> +exit
> +#endif
> diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
> new file mod...
2016 Oct 27
11
[PATCH v2 0/7] Add Maxwell support
I believe I've addressed all the feedback from the first time around, and
also made fixes necessary for GM20x based on testing results. I believe
now it should actually work for all GM10x and GM20x. Further, GP10x should
be very easy to add, but without someone to actually test I didn't want to
claim support for it.
Ilia Mirkin (7):
exa: add GM10x acceleration support
hwdefs: update
2017 Jul 01
0
[PATCH v5 2/2] nv110/exa: update sched codes
v5: Rebased on depbar removal patch; removed a redundant read dep-bar.
This patch adds proper delays to maxwell exa shaders. rendercheck tests
seem consistent with/without this patch. I haven't extensively tested
them though.
Trello:
https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays
Sign...