Timur Tabi
2025-Dec-12 20:49 UTC
[PATCH 0/7] gpu: nova-core: expose the logging buffers via debugfs
GSP-RM writes its printf message to "logging buffers", which are blocks memory allocated by the driver. The messages are encoded, so exposing the buffers as debugfs entries allows the buffers to be extracted and decoded by a special application. When the driver loads, a /sys/kernel/debug/nova_core root entry is created. To do this, the normal module_pci_driver! macro call is replaced with an explicit initialization function, as this allows that debugfs entry to be created once for all GPUs. Then in each GPU's initialization, a subdirectory based on the PCI BDF name is created, and the logging buffer entries are created under that. Note: the debugfs entry has a file size of 0, because debugfs defaults a 0 size and the Rust abstractions do not adjust it for the same of the object. Nouveau makes this adjustment manually in the driver. Summary of changes: 1. Replace module_pci_driver! with explicit init function. 2. Creates root, per-gpu, and individual buffer debugfs entries. 3. Adds a pci::name() method to generate a PCI device name string. Alexandre Courbot (2): gpu: nova-core: implement BinaryWriter for LogBuffer gpu: nova-core: create loginit debugfs entry Timur Tabi (5): rust: pci: add PCI device name method gpu: nova-core: Replace module_pci_driver! with explicit module init gpu: nova-core: create debugfs root in PCI init closure gpu: nova-core: use pin projection in method boot() gpu: nova-core: create GSP-RM logging buffers debugfs entries drivers/gpu/nova-core/gsp.rs | 50 +++++++++++++++++++++++++----- drivers/gpu/nova-core/gsp/boot.rs | 15 ++++----- drivers/gpu/nova-core/nova_core.rs | 41 ++++++++++++++++++++++-- rust/helpers/pci.c | 5 +++ rust/kernel/pci.rs | 37 ++++++++++++++++++++++ 5 files changed, 131 insertions(+), 17 deletions(-) base-commit: 187d0801404f415f22c0b31531982c7ea97fa341 -- 2.52.0
Add a name() method to the PCI `Device` type, which returns a CStr
that contains the device name, typically the BDF address.
Signed-off-by: Timur Tabi <ttabi at nvidia.com>
---
rust/helpers/pci.c | 5 +++++
rust/kernel/pci.rs | 37 +++++++++++++++++++++++++++++++++++++
2 files changed, 42 insertions(+)
diff --git a/rust/helpers/pci.c b/rust/helpers/pci.c
index bf8173979c5e..411bc743fcc2 100644
--- a/rust/helpers/pci.c
+++ b/rust/helpers/pci.c
@@ -2,6 +2,11 @@
#include <linux/pci.h>
+const char *rust_helper_pci_name(const struct pci_dev *pdev)
+{
+ return pci_name(pdev);
+}
+
u16 rust_helper_pci_dev_id(struct pci_dev *dev)
{
return PCI_DEVID(dev->bus->number, dev->devfn);
diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs
index 82e128431f08..d0c0c2f6aa32 100644
--- a/rust/kernel/pci.rs
+++ b/rust/kernel/pci.rs
@@ -427,6 +427,43 @@ pub fn pci_class(&self) -> Class {
// SAFETY: `self.as_raw` is a valid pointer to a `struct pci_dev`.
Class::from_raw(unsafe { (*self.as_raw()).class })
}
+
+ /// Returns the PCI device name.
+ ///
+ /// This returns the device name in the format "DDDD:BB:DD.F"
where:
+ /// - DDDD is the PCI domain (4 hex digits)
+ /// - BB is the bus number (2 hex digits)
+ /// - DD is the device number (2 hex digits)
+ /// - F is the function number (1 hex digit)
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use kernel::{c_str, debugfs::Dir, device::Core, pci, prelude::*};
+ /// fn create_debugfs(pdev: &pci::Device<Core>) -> Result {
+ /// let dir = Dir::new(pdev.name());
+ /// Ok(())
+ /// }
+ /// ```
+ #[inline]
+ pub fn name(&self) -> &CStr {
+ // SAFETY: By its type invariant `self.as_raw` is always a valid
pointer to a
+ // `struct pci_dev`, which contains a `struct device dev` member.
+ unsafe {
+ let pci_dev = self.as_raw();
+ let dev = addr_of_mut!((*pci_dev).dev);
+
+ // If init_name is set, use it; otherwise use the kobject name
+ let init_name = (*dev).init_name;
+ let name_ptr = if !init_name.is_null() {
+ init_name
+ } else {
+ (*dev).kobj.name
+ };
+
+ CStr::from_char_ptr(name_ptr)
+ }
+ }
}
impl Device<device::Core> {
--
2.52.0
Timur Tabi
2025-Dec-12 20:49 UTC
[PATCH 2/7] gpu: nova-core: Replace module_pci_driver! with explicit module init
Replace the module_pci_driver! macro with an explicit module
initialization using the standard module! macro and InPlaceModule
trait implementation. No functional change intended, with the
exception that the driver now prints a message when loaded.
Also add a no-op module exit function as a template.
This change is necessary so that we can create a top-level "novacore"
debugfs entry when the driver is loaded.
Signed-off-by: Timur Tabi <ttabi at nvidia.com>
---
drivers/gpu/nova-core/nova_core.rs | 29 ++++++++++++++++++++++++++---
1 file changed, 26 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/nova-core/nova_core.rs
b/drivers/gpu/nova-core/nova_core.rs
index b98a1c03f13d..7d7b75650b04 100644
--- a/drivers/gpu/nova-core/nova_core.rs
+++ b/drivers/gpu/nova-core/nova_core.rs
@@ -2,6 +2,9 @@
//! Nova Core GPU Driver
+use kernel::{error::Error, pci, prelude::*, InPlaceModule};
+use pin_init::{PinInit, pinned_drop};
+
#[macro_use]
mod bitfield;
@@ -21,13 +24,33 @@
pub(crate) const MODULE_NAME: &kernel::str::CStr = <LocalModule as
kernel::ModuleMetadata>::NAME;
-kernel::module_pci_driver! {
- type: driver::NovaCore,
+#[pin_data(PinnedDrop)]
+struct NovaCoreModule {
+ #[pin]
+ _driver:
kernel::driver::Registration<pci::Adapter<driver::NovaCore>>,
+}
+
+impl InPlaceModule for NovaCoreModule {
+ fn init(module: &'static kernel::ThisModule) -> impl
PinInit<Self, Error> {
+ pr_info!("NovaCore GPU driver loaded\n");
+ try_pin_init!(Self {
+ _driver <- kernel::driver::Registration::new(MODULE_NAME,
module),
+ })
+ }
+}
+
+#[pinned_drop]
+impl PinnedDrop for NovaCoreModule {
+ fn drop(self: Pin<&mut Self>) {
+ }
+}
+
+module! {
+ type: NovaCoreModule,
name: "NovaCore",
authors: ["Danilo Krummrich"],
description: "Nova Core GPU driver",
license: "GPL v2",
- firmware: [],
}
kernel::module_firmware!(firmware::ModInfoBuilder);
--
2.52.0
Timur Tabi
2025-Dec-12 20:49 UTC
[PATCH 3/7] gpu: nova-core: create debugfs root in PCI init closure
Create the 'nova_core' root debugfs entry when the driver loads.
Normally, non-const global variables need to be protected by a
mutex. Instead, we use unsafe code, as we know the entry is never
modified after the driver is loaded. This solves the lifetime
issue of the mutex guard, which would otherwise have required the
use of `pin_init_scope`.
Signed-off-by: Timur Tabi <ttabi at nvidia.com>
---
drivers/gpu/nova-core/nova_core.rs | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/nova-core/nova_core.rs
b/drivers/gpu/nova-core/nova_core.rs
index 7d7b75650b04..591edede4376 100644
--- a/drivers/gpu/nova-core/nova_core.rs
+++ b/drivers/gpu/nova-core/nova_core.rs
@@ -2,7 +2,7 @@
//! Nova Core GPU Driver
-use kernel::{error::Error, pci, prelude::*, InPlaceModule};
+use kernel::{error::Error, pci, prelude::*, InPlaceModule, debugfs::Dir};
use pin_init::{PinInit, pinned_drop};
#[macro_use]
@@ -24,6 +24,8 @@
pub(crate) const MODULE_NAME: &kernel::str::CStr = <LocalModule as
kernel::ModuleMetadata>::NAME;
+static mut DEBUGFS_ROOT: Option<Dir> = None;
+
#[pin_data(PinnedDrop)]
struct NovaCoreModule {
#[pin]
@@ -33,6 +35,13 @@ struct NovaCoreModule {
impl InPlaceModule for NovaCoreModule {
fn init(module: &'static kernel::ThisModule) -> impl
PinInit<Self, Error> {
pr_info!("NovaCore GPU driver loaded\n");
+
+ let dir = Dir::new(kernel::c_str!("nova_core"));
+
+ // SAFETY: we are the only driver code running, so there cannot be any
concurrent access to
+ // `DEBUGFS_ROOT`.
+ unsafe { DEBUGFS_ROOT = Some(dir) };
+
try_pin_init!(Self {
_driver <- kernel::driver::Registration::new(MODULE_NAME,
module),
})
@@ -42,6 +51,9 @@ fn init(module: &'static kernel::ThisModule) ->
impl PinInit<Self, Error> {
#[pinned_drop]
impl PinnedDrop for NovaCoreModule {
fn drop(self: Pin<&mut Self>) {
+ // SAFETY: we are the only driver code running, so there cannot be any
concurrent access to
+ // `DEBUGFS_ROOT`.
+ unsafe { DEBUGFS_ROOT = None };
}
}
--
2.52.0
Timur Tabi
2025-Dec-12 20:49 UTC
[PATCH 4/7] gpu: nova-core: implement BinaryWriter for LogBuffer
From: Alexandre Courbot <acourbot at nvidia.com>
`LogBuffer` is the entity we ultimately want to dump through debugfs.
Provide a simple implementation of `BinaryWriter` for it, albeit it
might not cut the safety requirements.
Signed-off-by: Alexandre Courbot <acourbot at nvidia.com>
Signed-off-by: Timur Tabi <ttabi at nvidia.com>
---
drivers/gpu/nova-core/gsp.rs | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
index fb6f74797178..860674dac31e 100644
--- a/drivers/gpu/nova-core/gsp.rs
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -3,6 +3,7 @@
mod boot;
use kernel::{
+ debugfs,
device,
dma::{
CoherentAllocation,
@@ -117,6 +118,29 @@ pub(crate) struct Gsp {
rmargs: CoherentAllocation<GspArgumentsCached>,
}
+impl debugfs::BinaryWriter for LogBuffer {
+ fn write_to_slice(
+ &self,
+ writer: &mut kernel::uaccess::UserSliceWriter,
+ offset: &mut kernel::fs::file::Offset,
+ ) -> Result<usize> {
+ // SAFETY: This is a debug log buffer. GSP may write concurrently, so
the
+ // snapshot may contain partially-written entries. This is acceptable
for
+ // debugging purposes - users should be aware logs may be slightly
garbled
+ // if read while GSP is actively logging.
+ let slice = unsafe { self.0.as_slice(0, self.0.count()) }?;
+
+ writer.write_slice_file(slice, offset)
+ }
+}
+
+// SAFETY: `LogBuffer` only provides shared access to the underlying
`CoherentAllocation`.
+// GSP may write to the buffer concurrently regardless of CPU access, so
concurrent reads
+// from multiple CPU threads do not introduce any additional races beyond what
already
+// exists with the device. Reads may observe partially-written log entries,
which is
+// acceptable for debug logging purposes.
+unsafe impl Sync for LogBuffer {}
+
impl Gsp {
// Creates an in-place initializer for a `Gsp` manager for `pdev`.
pub(crate) fn new(pdev: &pci::Device<device::Bound>) ->
Result<impl PinInit<Self, Error>> {
--
2.52.0
Timur Tabi
2025-Dec-12 20:49 UTC
[PATCH 5/7] gpu: nova-core: use pin projection in method boot()
Struct Gsp in gsp.rs is tagged with #[pin_data], which allows any of its
fields to be pinned (i.e. with #[pin]). When #[pin] is added to any
field in a #[pin_data] struct, fields can no longer be directly accessed
via normal field access. Instead, pin projection must be used to access
those fields.
Currently, no fields are pinned, but that will change. The boot() method
receives self: Pin<&mut Self>. When struct Gsp contains any pinned
fields, direct field access like self.cmdq is not allowed through
Pin<&mut Self>, as Pin prevents obtaining &mut Self to protect
pinned
data from being moved.
Use pin projection via self.as_mut().project() to access struct fields.
The project() method, generated by #[pin_data], returns a projection
struct providing &mut references to non-pinned fields, enabling mutable
access while preserving pin invariants.
Signed-off-by: Timur Tabi <ttabi at nvidia.com>
---
drivers/gpu/nova-core/gsp/boot.rs | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/nova-core/gsp/boot.rs
b/drivers/gpu/nova-core/gsp/boot.rs
index 54937606b5b0..1db1099bd344 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -162,12 +162,13 @@ pub(crate) fn boot(
CoherentAllocation::<GspFwWprMeta>::alloc_coherent(dev, 1,
GFP_KERNEL | __GFP_ZERO)?;
dma_write!(wpr_meta[0] = GspFwWprMeta::new(&gsp_fw,
&fb_layout))?;
- self.cmdq
- .send_command(bar, commands::SetSystemInfo::new(pdev))?;
- self.cmdq.send_command(bar, commands::SetRegistry::new())?;
+ let this = self.as_mut().project();
+
+ this.cmdq.send_command(bar, commands::SetSystemInfo::new(pdev))?;
+ this.cmdq.send_command(bar, commands::SetRegistry::new())?;
gsp_falcon.reset(bar)?;
- let libos_handle = self.libos.dma_handle();
+ let libos_handle = this.libos.dma_handle();
let (mbox0, mbox1) = gsp_falcon.boot(
bar,
Some(libos_handle as u32),
@@ -234,13 +235,13 @@ pub(crate) fn boot(
dev: pdev.as_ref().into(),
bar,
};
- GspSequencer::run(&mut self.cmdq, seq_params)?;
+ GspSequencer::run(this.cmdq, seq_params)?;
// Wait until GSP is fully initialized.
- commands::wait_gsp_init_done(&mut self.cmdq)?;
+ commands::wait_gsp_init_done(this.cmdq)?;
// Obtain and display basic GPU information.
- let info = commands::get_gsp_info(&mut self.cmdq, bar)?;
+ let info = commands::get_gsp_info(this.cmdq, bar)?;
dev_info!(
pdev.as_ref(),
"GPU name: {}\n",
--
2.52.0
From: Alexandre Courbot <acourbot at nvidia.com>
Use the `pin_init_scope` feature to create the debugfs entry for
loginit.
`pin_init_scope` solves the lifetime issue over the `DEBUGFS_ROOT`
reference by delaying its acquisition until the time the entry is
actually initialized.
Signed-off-by: Alexandre Courbot <acourbot at nvidia.com>
Signed-off-by: Timur Tabi <ttabi at nvidia.com>
---
drivers/gpu/nova-core/gsp.rs | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
index 860674dac31e..ba4f789d8ac1 100644
--- a/drivers/gpu/nova-core/gsp.rs
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -107,7 +107,8 @@ pub(crate) struct Gsp {
/// Libos arguments.
pub(crate) libos: CoherentAllocation<LibosMemoryRegionInitArgument>,
/// Init log buffer.
- loginit: LogBuffer,
+ #[pin]
+ pub loginit: debugfs::File<LogBuffer>,
/// Interrupts log buffer.
logintr: LogBuffer,
/// RM log buffer.
@@ -143,7 +144,9 @@ unsafe impl Sync for LogBuffer {}
impl Gsp {
// Creates an in-place initializer for a `Gsp` manager for `pdev`.
- pub(crate) fn new(pdev: &pci::Device<device::Bound>) ->
Result<impl PinInit<Self, Error>> {
+ pub(crate) fn new<'a>(
+ pdev: &'a pci::Device<device::Bound>,
+ ) -> Result<impl PinInit<Self, Error> + 'a> {
let dev = pdev.as_ref();
let libos =
CoherentAllocation::<LibosMemoryRegionInitArgument>::alloc_coherent(
dev,
@@ -173,9 +176,17 @@ pub(crate) fn new(pdev:
&pci::Device<device::Bound>) -> Result<impl PinInit<Self
dma_write!(rmargs[0] = fw::GspArgumentsCached::new(&cmdq))?;
dma_write!(libos[3] =
LibosMemoryRegionInitArgument::new("RMARGS", &rmargs))?;
+ #[allow(static_mut_refs)]
+ let debugfs_dir + // SAFETY: `DEBUGFS_ROOT` is never
modified after initialization, so it is safe to
+ // create a shared reference to it.
+ unsafe { crate::DEBUGFS_ROOT.as_ref() }
+ .map(|root| root.subdir(pdev.name()))
+ .ok_or(ENOENT)?;
+
Ok(try_pin_init!(Self {
libos,
- loginit,
+ loginit <-
debugfs_dir.read_binary_file(kernel::c_str!("loginit"), loginit),
logintr,
logrm,
rmargs,
--
2.52.0
Timur Tabi
2025-Dec-12 20:49 UTC
[PATCH 7/7] gpu: nova-core: create GSP-RM logging buffers debugfs entries
Create read-only debugfs entries for LOGINIT, LOGRM, and LOGINTR, which
are the three primary printf logging buffers from GSP-RM. LOGPMU will
be added at a later date, as it requires it support for its RPC message
first.
This patch uses the `pin_init_scope` feature to create the entries.
`pin_init_scope` solves the lifetime issue over the `DEBUGFS_ROOT`
reference by delaying its acquisition until the time the entry is
actually initialized.
Co-developed-by: Alexandre Courbot <acourbot at nvidia.com>
Signed-off-by: Timur Tabi <ttabi at nvidia.com>
---
drivers/gpu/nova-core/gsp.rs | 25 +++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
index ba4f789d8ac1..2267ec3391dd 100644
--- a/drivers/gpu/nova-core/gsp.rs
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -108,11 +108,13 @@ pub(crate) struct Gsp {
pub(crate) libos: CoherentAllocation<LibosMemoryRegionInitArgument>,
/// Init log buffer.
#[pin]
- pub loginit: debugfs::File<LogBuffer>,
+ loginit: debugfs::File<LogBuffer>,
/// Interrupts log buffer.
- logintr: LogBuffer,
+ #[pin]
+ logintr: debugfs::File<LogBuffer>,
/// RM log buffer.
- logrm: LogBuffer,
+ #[pin]
+ logrm: debugfs::File<LogBuffer>,
/// Command queue.
pub(crate) cmdq: Cmdq,
/// RM arguments.
@@ -177,18 +179,17 @@ pub(crate) fn new<'a>(
dma_write!(libos[3] =
LibosMemoryRegionInitArgument::new("RMARGS", &rmargs))?;
#[allow(static_mut_refs)]
- let debugfs_dir - // SAFETY: `DEBUGFS_ROOT` is never
modified after initialization, so it is safe to
- // create a shared reference to it.
- unsafe { crate::DEBUGFS_ROOT.as_ref() }
- .map(|root| root.subdir(pdev.name()))
- .ok_or(ENOENT)?;
+ // SAFETY: `DEBUGFS_ROOT` is never modified after initialization, so it
is safe to
+ // create a shared reference to it.
+ let novacore_dir = unsafe { crate::DEBUGFS_ROOT.as_ref()
}.ok_or(ENOENT)?;
+
+ let root = novacore_dir.subdir(pdev.name());
Ok(try_pin_init!(Self {
libos,
- loginit <-
debugfs_dir.read_binary_file(kernel::c_str!("loginit"), loginit),
- logintr,
- logrm,
+ loginit <-
root.read_binary_file(kernel::c_str!("loginit"), loginit),
+ logintr <-
root.read_binary_file(kernel::c_str!("logintr"), logintr),
+ logrm <-
root.read_binary_file(kernel::c_str!("logrm"), logrm),
rmargs,
cmdq,
}))
--
2.52.0