Mel Henning
2025-Sep-23  23:42 UTC
[PATCH 0/1] nouveau: On nvc0 membar between semaphore write and
This takes the fix from 2cb66ae604 ("nouveau: Membar before between
semaphore writes and the interrupt") and also applies it to nvc0_fence.c.
I've tested this on both ampere and kepler. Unfortunately, I don't have
a maxwell card to test on so that branch in the patch hasn't been tested
yet.
Mel Henning (1):
  nouveau: On nvc0 membar between semaphore write and interrupt
 .../drm/nouveau/include/nvhw/class/cl906f.h   | 23 +++++
 .../drm/nouveau/include/nvhw/class/clb06f.h   | 54 +++++++++++
 .../drm/nouveau/include/nvhw/class/clc06f.h   | 93 +++++++++++++++++++
 .../gpu/drm/nouveau/include/nvif/push906f.h   |  2 +
 drivers/gpu/drm/nouveau/nvc0_fence.c          | 31 ++++++-
 5 files changed, 200 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/nouveau/include/nvhw/class/clb06f.h
 create mode 100644 drivers/gpu/drm/nouveau/include/nvhw/class/clc06f.h
-- 
2.51.0
Mel Henning
2025-Sep-23  23:42 UTC
[PATCH 1/1] nouveau: On nvc0 membar between semaphore write and interrupt
This takes the fix from 2cb66ae604 ("nouveau: Membar before between
semaphore writes and the interrupt") and applies it to nvc0_fence.c.
If I force my ampere system down the nvc0_fence path then I reproduce the
same issues with transfer queues + The Talos Principle that were fixed by
the above commit. This fixes that issue in exactly the same way for the
old code path.
Fixes: 60cdadace320 ("drm/nouveau/fence: use NVIDIA's headers for
emit()")
Signed-off-by: Mel Henning <mhenning at darkrefraction.com>
Cc: stable at vger.kernel.org
---
 .../drm/nouveau/include/nvhw/class/cl906f.h   | 23 +++++
 .../drm/nouveau/include/nvhw/class/clb06f.h   | 54 +++++++++++
 .../drm/nouveau/include/nvhw/class/clc06f.h   | 93 +++++++++++++++++++
 .../gpu/drm/nouveau/include/nvif/push906f.h   |  2 +
 drivers/gpu/drm/nouveau/nvc0_fence.c          | 31 ++++++-
 5 files changed, 200 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/nouveau/include/nvhw/class/clb06f.h
 create mode 100644 drivers/gpu/drm/nouveau/include/nvhw/class/clc06f.h
diff --git a/drivers/gpu/drm/nouveau/include/nvhw/class/cl906f.h
b/drivers/gpu/drm/nouveau/include/nvhw/class/cl906f.h
index 673d668885bb..529c785b4651 100644
--- a/drivers/gpu/drm/nouveau/include/nvhw/class/cl906f.h
+++ b/drivers/gpu/drm/nouveau/include/nvhw/class/cl906f.h
@@ -47,6 +47,29 @@
 #define NV906F_SEMAPHORED_RELEASE_SIZE_4BYTE                        0x00000001
 #define NV906F_NON_STALL_INTERRUPT                                 (0x00000020)
 #define NV906F_NON_STALL_INTERRUPT_HANDLE                                 31:0
+#define NV906F_MEM_OP_A                                            (0x00000028)
+#define NV906F_MEM_OP_A_OPERAND_LOW                                       31:2
+#define NV906F_MEM_OP_A_TLB_INVALIDATE_ADDR                               29:2
+#define NV906F_MEM_OP_A_TLB_INVALIDATE_TARGET                            31:30
+#define NV906F_MEM_OP_A_TLB_INVALIDATE_TARGET_VID_MEM               0x00000000
+#define NV906F_MEM_OP_A_TLB_INVALIDATE_TARGET_SYS_MEM_COHERENT      0x00000002
+#define NV906F_MEM_OP_A_TLB_INVALIDATE_TARGET_SYS_MEM_NONCOHERENT   0x00000003
+#define NV906F_MEM_OP_B                                            (0x0000002c)
+#define NV906F_MEM_OP_B_OPERAND_HIGH                                       7:0
+#define NV906F_MEM_OP_B_OPERATION                                        31:27
+#define NV906F_MEM_OP_B_OPERATION_SYSMEMBAR_FLUSH                   0x00000005
+#define NV906F_MEM_OP_B_OPERATION_SOFT_FLUSH                        0x00000006
+#define NV906F_MEM_OP_B_OPERATION_MMU_TLB_INVALIDATE                0x00000009
+#define NV906F_MEM_OP_B_OPERATION_L2_PEERMEM_INVALIDATE             0x0000000d
+#define NV906F_MEM_OP_B_OPERATION_L2_SYSMEM_INVALIDATE              0x0000000e
+#define NV906F_MEM_OP_B_OPERATION_L2_CLEAN_COMPTAGS                 0x0000000f
+#define NV906F_MEM_OP_B_OPERATION_L2_FLUSH_DIRTY                    0x00000010
+#define NV906F_MEM_OP_B_MMU_TLB_INVALIDATE_PDB                             0:0
+#define NV906F_MEM_OP_B_MMU_TLB_INVALIDATE_PDB_ONE                  0x00000000
+#define NV906F_MEM_OP_B_MMU_TLB_INVALIDATE_PDB_ALL                  0x00000001
+#define NV906F_MEM_OP_B_MMU_TLB_INVALIDATE_GPC                             1:1
+#define NV906F_MEM_OP_B_MMU_TLB_INVALIDATE_GPC_ENABLE               0x00000000
+#define NV906F_MEM_OP_B_MMU_TLB_INVALIDATE_GPC_DISABLE              0x00000001
 #define NV906F_SET_REFERENCE                                       (0x00000050)
 #define NV906F_SET_REFERENCE_COUNT                                        31:0
 
diff --git a/drivers/gpu/drm/nouveau/include/nvhw/class/clb06f.h
b/drivers/gpu/drm/nouveau/include/nvhw/class/clb06f.h
new file mode 100644
index 000000000000..15edc9d8dcbe
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvhw/class/clb06f.h
@@ -0,0 +1,54 @@
+/*******************************************************************************
+    Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the
"Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+#ifndef _clb06f_h_
+#define _clb06f_h_
+
+/* fields and values */
+// NOTE - MEM_OP_A and MEM_OP_B have been removed for gm20x to make room for
+// possible future MEM_OP features.  MEM_OP_C/D have identical functionality
+// to the previous MEM_OP_A/B methods.
+#define NVB06F_MEM_OP_C                                            (0x00000030)
+#define NVB06F_MEM_OP_C_OPERAND_LOW                                       31:2
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_PDB                                 0:0
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE                      0x00000000
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL                      0x00000001
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_GPC                                 1:1
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE                   0x00000000
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE                  0x00000001
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_TARGET                            11:10
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_TARGET_VID_MEM               0x00000000
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_TARGET_SYS_MEM_COHERENT      0x00000002
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_TARGET_SYS_MEM_NONCOHERENT   0x00000003
+#define NVB06F_MEM_OP_C_TLB_INVALIDATE_ADDR_LO                           31:12
+#define NVB06F_MEM_OP_D                                            (0x00000034)
+#define NVB06F_MEM_OP_D_OPERAND_HIGH                                       7:0
+#define NVB06F_MEM_OP_D_OPERATION                                        31:27
+#define NVB06F_MEM_OP_D_OPERATION_MEMBAR                            0x00000005
+#define NVB06F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE                0x00000009
+#define NVB06F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE             0x0000000d
+#define NVB06F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE              0x0000000e
+#define NVB06F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS                 0x0000000f
+#define NVB06F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY                    0x00000010
+#define NVB06F_MEM_OP_D_TLB_INVALIDATE_ADDR_HI                             7:0
+
+#endif /* _clb06f_h_ */
diff --git a/drivers/gpu/drm/nouveau/include/nvhw/class/clc06f.h
b/drivers/gpu/drm/nouveau/include/nvhw/class/clc06f.h
new file mode 100644
index 000000000000..4d0f13f79c17
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvhw/class/clc06f.h
@@ -0,0 +1,93 @@
+/*******************************************************************************
+    Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the
"Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+#ifndef _clc06f_h_
+#define _clc06f_h_
+
+/* fields and values */
+// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
+// specifying the page address for a targeted TLB invalidate and the uTLB for
+// a targeted REPLAY_CANCEL for UVM.
+// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
+// rearranged fields.
+#define NVC06F_MEM_OP_A                                            (0x00000028)
+#define NVC06F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID        5:0 
// only relevant for REPLAY_CANCEL_TARGETED
+#define NVC06F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID               10:6 
// only relevant for REPLAY_CANCEL_TARGETED
+#define NVC06F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR                         11:11
+#define NVC06F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN                 0x00000001
+#define NVC06F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS                0x00000000
+#define NVC06F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO                    31:12
+#define NVC06F_MEM_OP_B                                            (0x0000002c)
+#define NVC06F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI                     31:0
+#define NVC06F_MEM_OP_C                                            (0x00000030)
+#define NVC06F_MEM_OP_C_MEMBAR_TYPE                                        2:0
+#define NVC06F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR                      0x00000000
+#define NVC06F_MEM_OP_C_MEMBAR_TYPE_MEMBAR                          0x00000001
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB                                 0:0
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE                      0x00000000
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL                      0x00000001 
// Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_GPC                                 1:1
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE                   0x00000000
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE                  0x00000001
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY                              4:2 
// only relevant if GPC ENABLE
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE                  0x00000000
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START                 0x00000001
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL         0x00000002
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED       0x00000003
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL         0x00000004
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE                            6:5 
// only relevant if GPC ENABLE
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE                0x00000000
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY            0x00000001
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE           0x00000002
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL                    9:7 
// Invalidate affects this level and all below
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL         0x00000000 
// Invalidate tlb caches at all levels of the page table
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY    0x00000001
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0  0x00000002
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1  0x00000003
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2  0x00000004
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3  0x00000005
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4  0x00000006
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5  0x00000007
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE                         
11:10  // only relevant if PDB_ONE
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM            
0x00000000
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT   
0x00000002
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT
0x00000003
+#define NVC06F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO                       31:12 
// only relevant if PDB_ONE
+// MEM_OP_D MUST be preceded by MEM_OPs A-C.
+#define NVC06F_MEM_OP_D                                            (0x00000034)
+#define NVC06F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI                        26:0 
// only relevant if PDB_ONE
+#define NVC06F_MEM_OP_D_OPERATION                                        31:27
+#define NVC06F_MEM_OP_D_OPERATION_MEMBAR                            0x00000005
+#define NVC06F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE                0x00000009
+#define NVC06F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED       0x0000000a
+#define NVC06F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE             0x0000000d
+#define NVC06F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE              0x0000000e
+// CLEAN_LINES is an alias for Tegra/GPU IP usage
+#define NVC06F_MEM_OP_D_OPERATION_L2_INVALIDATE_CLEAN_LINES         0x0000000e
+// This B alias is confusing but it was missed as part of the update. Left here
+// for compatibility.
+#define NVC06F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES         0x0000000e
+#define NVC06F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS                 0x0000000f
+#define NVC06F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY                    0x00000010
+#define NVC06F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS     0x00000015
+
+#endif /* _clc06f_h_ */
diff --git a/drivers/gpu/drm/nouveau/include/nvif/push906f.h
b/drivers/gpu/drm/nouveau/include/nvif/push906f.h
index 79df71de98d2..3d506f4dc2c9 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/push906f.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/push906f.h
@@ -7,6 +7,8 @@
 #ifndef PUSH906F_SUBC
 // Host methods
 #define PUSH906F_SUBC_NV906F	0
+#define PUSH906F_SUBC_NVB06F	0
+#define PUSH906F_SUBC_NVC06F	0
 #define PUSH906F_SUBC_NVC36F	0
 
 // Twod
diff --git a/drivers/gpu/drm/nouveau/nvc0_fence.c
b/drivers/gpu/drm/nouveau/nvc0_fence.c
index a5e98d0d4217..8b36deaaf8cf 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fence.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fence.c
@@ -27,15 +27,18 @@
 
 #include "nv50_display.h"
 
+#include <nvif/class.h>
 #include <nvif/push906f.h>
 
 #include <nvhw/class/cl906f.h>
+#include <nvhw/class/clb06f.h>
+#include <nvhw/class/clc06f.h>
 
 static int
 nvc0_fence_emit32(struct nouveau_channel *chan, u64 virtual, u32 sequence)
 {
 	struct nvif_push *push = &chan->chan.push;
-	int ret = PUSH_WAIT(push, 6);
+	int ret = PUSH_WAIT(push, 12);
 	if (ret == 0) {
 		PUSH_MTHD(push, NV906F, SEMAPHOREA,
 			  NVVAL(NV906F, SEMAPHOREA, OFFSET_UPPER, upper_32_bits(virtual)),
@@ -46,9 +49,31 @@ nvc0_fence_emit32(struct nouveau_channel *chan, u64 virtual,
u32 sequence)
 					SEMAPHORED,
 			  NVDEF(NV906F, SEMAPHORED, OPERATION, RELEASE) |
 			  NVDEF(NV906F, SEMAPHORED, RELEASE_WFI, EN) |
-			  NVDEF(NV906F, SEMAPHORED, RELEASE_SIZE, 16BYTE),
+			  NVDEF(NV906F, SEMAPHORED, RELEASE_SIZE, 16BYTE));
+
+		if (chan->user.oclass >= PASCAL_CHANNEL_GPFIFO_A) {
+			PUSH_MTHD(push, NVC06F, MEM_OP_A, 0,
+						MEM_OP_B, 0,
+
+						MEM_OP_C,
+				  NVDEF(NVC06F, MEM_OP_C, MEMBAR_TYPE, SYS_MEMBAR),
+
+						MEM_OP_D,
+				  NVDEF(NVC06F, MEM_OP_D, OPERATION, MEMBAR));
+		} else if (chan->user.oclass >= MAXWELL_CHANNEL_GPFIFO_A) {
+			PUSH_MTHD(push, NVB06F, MEM_OP_C, 0,
+
+						MEM_OP_D,
+				  NVDEF(NVC06F, MEM_OP_D, OPERATION, MEMBAR));
+		} else {
+			PUSH_MTHD(push, NV906F, MEM_OP_A, 0,
+
+						MEM_OP_B,
+				  NVDEF(NV906F, MEM_OP_B, OPERATION, SYSMEMBAR_FLUSH));
+		}
+
+		PUSH_MTHD(push, NV906F, NON_STALL_INTERRUPT, 0);
 
-					NON_STALL_INTERRUPT, 0);
 		PUSH_KICK(push);
 	}
 	return ret;
-- 
2.51.0