Zhi Wang
2025-Dec-06 12:42 UTC
[RFC 0/7] gpu: nova-core: Enable booting GSP with vGPU enabled
The GSP boot support [1][2] has now been fully merged into drm-rust-next, which puts us in a good position to start discussing the requirements for bringing up GSP with vGPU enabled. Booting up GSP with vGPU enabled is a key short-term milestone for upstream vGPU support, allowing us to validate the basic GSP boot flow with vGPU enabled even before the remaining nova-core dependencies are ready. This RFC series builds on top of the latest drm-rust-next with kernel module param support from [3]. A tree can be found [4]. [1] https://lore.kernel.org/all/20251114195552.739371-1-joelagnelf at nvidia.com/ [2] https://lore.kernel.org/all/20250827082015.959430-1-apopple at nvidia.com/ [3] https://lore.kernel.org/all/20250924-module-params-v3-v18-0-bf512c35d910 at kernel.org/ [4] https://github.com/zhiwang-nvidia/nova-core/tree/zhi/rfc-boot-gsp-with-vgpu-enabled Zhi Wang (7): rust: pci: expose sriov_get_totalvfs() helper [!UPSTREAM] rust: pci: support configuration space access gpu: nova-core: introduce vgpu_support module param. gpu: nova-core: populate GSP_VF_INFO when vGPU is enabled gpu: nova-core: set RMSetSriovMode when NVIDIA vGPU is enabled gpu: nova-core: reserve a larger GSP WPR2 heap when vGPU is enabled. gpu: nova-core: load the scrubber ucode when vGPU support is enabled drivers/gpu/nova-core/fb.rs | 19 ++++-- drivers/gpu/nova-core/firmware.rs | 1 + drivers/gpu/nova-core/firmware/booter.rs | 2 + drivers/gpu/nova-core/gpu.rs | 6 +- drivers/gpu/nova-core/gsp.rs | 8 ++- drivers/gpu/nova-core/gsp/boot.rs | 38 +++++++++++- drivers/gpu/nova-core/gsp/commands.rs | 31 ++++++---- drivers/gpu/nova-core/gsp/fw.rs | 75 ++++++++++++++++++++++++ drivers/gpu/nova-core/gsp/fw/commands.rs | 11 +++- drivers/gpu/nova-core/nova_core.rs | 15 +++++ drivers/gpu/nova-core/regs.rs | 11 ++++ drivers/gpu/nova-core/vgpu.rs | 26 ++++++++ rust/kernel/pci.rs | 46 +++++++++++++++ 13 files changed, 267 insertions(+), 22 deletions(-) create mode 100644 drivers/gpu/nova-core/vgpu.rs -- 2.51.0
Add a wrapper for the `pci_sriov_get_totalvfs()` helper, allowing drivers
to query the number of total SR-IOV virtual functions a PCI device
supports.
This is useful for components that need to conditionally enable features
based on SR-IOV capability.
Signed-off-by: Zhi Wang <zhiw at nvidia.com>
---
rust/kernel/pci.rs | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs
index 7fcc5f6022c1..9a82e83dfd30 100644
--- a/rust/kernel/pci.rs
+++ b/rust/kernel/pci.rs
@@ -514,6 +514,18 @@ pub fn pci_class(&self) -> Class {
// SAFETY: `self.as_raw` is a valid pointer to a `struct pci_dev`.
Class::from_raw(unsafe { (*self.as_raw()).class })
}
+
+ /// Returns total number of VFs, or `Err(ENODEV)` if none available.
+ pub fn sriov_get_totalvfs(&self) -> Result<i32> {
+ // SAFETY: `self.as_raw()` is a valid pointer to a `struct pci_dev`.
+ let vfs = unsafe { bindings::pci_sriov_get_totalvfs(self.as_raw()) };
+
+ if vfs != 0 {
+ Ok(vfs)
+ } else {
+ Err(ENODEV)
+ }
+ }
}
impl Device<device::Bound> {
--
2.51.0
Zhi Wang
2025-Dec-06 12:42 UTC
[RFC 2/7] [!UPSTREAM] rust: pci: support configuration space access
The work is WIP at [1].
Link: https://lore.kernel.org/all/20251119112117.116979-1-zhiw at nvidia.com/
[1]
Signed-off-by: Zhi Wang <zhiw at nvidia.com>
---
rust/kernel/pci.rs | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs
index 9a82e83dfd30..e7fdded9d746 100644
--- a/rust/kernel/pci.rs
+++ b/rust/kernel/pci.rs
@@ -526,6 +526,40 @@ pub fn sriov_get_totalvfs(&self) ->
Result<i32> {
Err(ENODEV)
}
}
+
+ /// Find the extended capability
+ pub fn find_ext_capability(&self, cap: i32) -> Option<u16> {
+ // SAFETY: `self.as_raw()` is a valid pointer to a `struct pci_dev`.
+ let offset = unsafe { bindings::pci_find_ext_capability(self.as_raw(),
cap) };
+ if offset != 0 {
+ Some(offset as u16)
+ } else {
+ None
+ }
+ }
+
+ /// Read configuration space by word
+ pub fn config_read_word(&self, where_: i32) -> Result<u16,
Error> {
+ let mut val: u16 = 0;
+
+ // SAFETY: `self.as_raw()` is a valid pointer to `struct pci_dev`,
+ // and `&mut val` is a valid pointer to writable memory.
+ to_result(unsafe {
+ bindings::pci_read_config_word(self.as_raw(), where_, &mut val)
+ })?;
+
+ Ok(val)
+ }
+
+ /// Read configuration space by dword
+ pub fn config_read_dword(&self, where_: i32) -> Result<u32,
Error> {
+ let mut val: u32 = 0;
+ // SAFETY: `self.as_raw()` is a valid pointer to `struct pci_dev`,
+ // and `&mut val` is a valid pointer to writable memory.
+ to_result(unsafe { bindings::pci_read_config_dword(self.as_raw(),
where_, &mut val) })?;
+
+ Ok(val)
+ }
}
impl Device<device::Bound> {
--
2.51.0
Zhi Wang
2025-Dec-06 12:42 UTC
[RFC 3/7] gpu: nova-core: introduce vgpu_support module param.
Introduce a kernel module param to set vGPU support in nova-core.
vgpu_support = 1 (default): automatic
The driver automatically enables or disables vGPU support based on if the
GPU advertises SRIOV caps.
vgpu_support = 0: disabled
Explicitly disables vGPU support. The driver will not enable vGPU support
regardless.
Signed-off-by: Zhi Wang <zhiw at nvidia.com>
---
drivers/gpu/nova-core/gpu.rs | 4 ++++
drivers/gpu/nova-core/nova_core.rs | 15 +++++++++++++++
drivers/gpu/nova-core/vgpu.rs | 26 ++++++++++++++++++++++++++
3 files changed, 45 insertions(+)
create mode 100644 drivers/gpu/nova-core/vgpu.rs
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index 629c9d2dc994..10c5ae07a891 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -20,6 +20,7 @@
gfw,
gsp::Gsp,
regs,
+ vgpu::Vgpu, //
};
macro_rules! define_chipset {
@@ -252,6 +253,7 @@ pub(crate) struct Gpu {
/// GSP runtime data. Temporarily an empty placeholder.
#[pin]
gsp: Gsp,
+ vgpu: Vgpu,
}
impl Gpu {
@@ -271,6 +273,8 @@ pub(crate) fn new<'a>(
.inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did
not complete"))?;
},
+ vgpu: Vgpu::new(pdev)?,
+
sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar,
spec.chipset)?,
gsp_falcon: Falcon::new(
diff --git a/drivers/gpu/nova-core/nova_core.rs
b/drivers/gpu/nova-core/nova_core.rs
index b98a1c03f13d..e034353f69ab 100644
--- a/drivers/gpu/nova-core/nova_core.rs
+++ b/drivers/gpu/nova-core/nova_core.rs
@@ -18,6 +18,7 @@
mod sbuffer;
mod util;
mod vbios;
+mod vgpu;
pub(crate) const MODULE_NAME: &kernel::str::CStr = <LocalModule as
kernel::ModuleMetadata>::NAME;
@@ -28,6 +29,20 @@
description: "Nova Core GPU driver",
license: "GPL v2",
firmware: [],
+ params: {
+ // vgpu_support = 1 (default): automatic
+ //
+ // The driver automatically enables or disables vGPU support based on
if the GPU
+ // advertises SRIOV caps.
+ //
+ // vgpu_support = 0: disabled
+ //
+ // Explicitly disables vGPU support. The driver will not enable vGPU
support regardless.
+ vgpu_support: u32 {
+ default: 1,
+ description: "Enable vGPU support - (1 = auto (default), 0 =
disable)",
+ },
+ },
}
kernel::module_firmware!(firmware::ModInfoBuilder);
diff --git a/drivers/gpu/nova-core/vgpu.rs b/drivers/gpu/nova-core/vgpu.rs
new file mode 100644
index 000000000000..9701b97bf6bf
--- /dev/null
+++ b/drivers/gpu/nova-core/vgpu.rs
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+use kernel::{
+ device,
+ pci,
+ prelude::*, //
+};
+
+use crate::{
+ module_parameters, //
+};
+
+pub(crate) struct Vgpu {
+ pub vgpu_support: bool,
+}
+
+impl Vgpu {
+ pub(crate) fn new(pdev: &pci::Device<device::Bound>) ->
Result<Vgpu> {
+ Ok(Vgpu {
+ vgpu_support: match *module_parameters::vgpu_support.value() {
+ 0 => false,
+ _ => pdev.sriov_get_totalvfs().is_ok(),
+ },
+ })
+ }
+}
--
2.51.0
Zhi Wang
2025-Dec-06 12:42 UTC
[RFC 4/7] gpu: nova-core: populate GSP_VF_INFO when vGPU is enabled
GSP firmware needs to know the VF BAR offsets to correctly calculate the
VF events.
The VF BAR information is stored in GSP_VF_INFO, which needs to be
initialized and uploaded together with the GSP_SYSTEM_INFO.
Populate GSP_VF_INFO when nova-core uploads the GSP_SYSTEM_INFO if NVIDIA
vGPU is enabled.
Signed-off-by: Zhi Wang <zhiw at nvidia.com>
---
drivers/gpu/nova-core/gpu.rs | 2 +-
drivers/gpu/nova-core/gsp.rs | 8 ++-
drivers/gpu/nova-core/gsp/boot.rs | 6 +-
drivers/gpu/nova-core/gsp/commands.rs | 8 ++-
drivers/gpu/nova-core/gsp/fw.rs | 75 ++++++++++++++++++++++++
drivers/gpu/nova-core/gsp/fw/commands.rs | 11 +++-
6 files changed, 102 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index 10c5ae07a891..08a41e7bd982 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -285,7 +285,7 @@ pub(crate) fn new<'a>(
sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset)?,
- gsp <- Gsp::new(pdev)?,
+ gsp <- Gsp::new(pdev, vgpu.vgpu_support)?,
_: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? },
diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
index fb6f74797178..2d9352740c28 100644
--- a/drivers/gpu/nova-core/gsp.rs
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -115,11 +115,16 @@ pub(crate) struct Gsp {
pub(crate) cmdq: Cmdq,
/// RM arguments.
rmargs: CoherentAllocation<GspArgumentsCached>,
+ /// Support vGPU.
+ vgpu_support: bool,
}
impl Gsp {
// Creates an in-place initializer for a `Gsp` manager for `pdev`.
- pub(crate) fn new(pdev: &pci::Device<device::Bound>) ->
Result<impl PinInit<Self, Error>> {
+ pub(crate) fn new(
+ pdev: &pci::Device<device::Bound>,
+ vgpu_support: bool,
+ ) -> Result<impl PinInit<Self, Error>> {
let dev = pdev.as_ref();
let libos =
CoherentAllocation::<LibosMemoryRegionInitArgument>::alloc_coherent(
dev,
@@ -156,6 +161,7 @@ pub(crate) fn new(pdev:
&pci::Device<device::Bound>) -> Result<impl PinInit<Self
logrm,
rmargs,
cmdq,
+ vgpu_support,
}))
}
}
diff --git a/drivers/gpu/nova-core/gsp/boot.rs
b/drivers/gpu/nova-core/gsp/boot.rs
index 54937606b5b0..5016c630cec3 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -33,6 +33,7 @@
gpu::Chipset,
gsp::{
commands,
+ fw::GspVfInfo,
sequencer::{
GspSequencer,
GspSequencerParams, //
@@ -136,6 +137,7 @@ pub(crate) fn boot(
sec2_falcon: &Falcon<Sec2>,
) -> Result {
let dev = pdev.as_ref();
+ let vgpu_support = self.vgpu_support;
let bios = Vbios::new(dev, bar)?;
@@ -162,8 +164,10 @@ pub(crate) fn boot(
CoherentAllocation::<GspFwWprMeta>::alloc_coherent(dev, 1,
GFP_KERNEL | __GFP_ZERO)?;
dma_write!(wpr_meta[0] = GspFwWprMeta::new(&gsp_fw,
&fb_layout))?;
+ let vf_info = GspVfInfo::new(pdev, bar, vgpu_support)?;
+
self.cmdq
- .send_command(bar, commands::SetSystemInfo::new(pdev))?;
+ .send_command(bar, commands::SetSystemInfo::new(pdev, vf_info))?;
self.cmdq.send_command(bar, commands::SetRegistry::new())?;
gsp_falcon.reset(bar)?;
diff --git a/drivers/gpu/nova-core/gsp/commands.rs
b/drivers/gpu/nova-core/gsp/commands.rs
index 0425c65b5d6f..1d519c4ed232 100644
--- a/drivers/gpu/nova-core/gsp/commands.rs
+++ b/drivers/gpu/nova-core/gsp/commands.rs
@@ -26,6 +26,7 @@
},
fw::{
commands::*,
+ GspVfInfo,
MsgFunction, //
},
},
@@ -36,12 +37,13 @@
/// The `GspSetSystemInfo` command.
pub(crate) struct SetSystemInfo<'a> {
pdev: &'a pci::Device<device::Bound>,
+ vf_info: GspVfInfo,
}
impl<'a> SetSystemInfo<'a> {
/// Creates a new `GspSetSystemInfo` command using the parameters of
`pdev`.
- pub(crate) fn new(pdev: &'a pci::Device<device::Bound>) ->
Self {
- Self { pdev }
+ pub(crate) fn new(pdev: &'a pci::Device<device::Bound>,
vf_info: GspVfInfo) -> Self {
+ Self { pdev, vf_info }
}
}
@@ -51,7 +53,7 @@ impl<'a> CommandToGsp for
SetSystemInfo<'a> {
type InitError = Error;
fn init(&self) -> impl Init<Self::Command, Self::InitError> {
- GspSetSystemInfo::init(self.pdev)
+ GspSetSystemInfo::init(self.pdev, self.vf_info)
}
}
diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
index abffd6beec65..a0581ac34586 100644
--- a/drivers/gpu/nova-core/gsp/fw.rs
+++ b/drivers/gpu/nova-core/gsp/fw.rs
@@ -9,8 +9,10 @@
use core::ops::Range;
use kernel::{
+ device,
dma::CoherentAllocation,
fmt,
+ pci,
prelude::*,
ptr::{
Alignable,
@@ -27,6 +29,7 @@
};
use crate::{
+ driver::Bar0,
fb::FbLayout,
firmware::gsp::GspFirmware,
gpu::Chipset,
@@ -926,3 +929,75 @@ fn new(cmdq: &Cmdq) -> Self {
})
}
}
+
+/// VF information - gspVFInfo in SetSystemInfo.
+#[derive(Clone, Copy, Zeroable)]
+#[repr(transparent)]
+pub(crate) struct GspVfInfo {
+ inner: bindings::GSP_VF_INFO,
+}
+
+impl GspVfInfo {
+ /// Creates a new GspVfInfo structure.
+ pub(crate) fn new<'a>(
+ pdev: &'a pci::Device<device::Bound>,
+ bar: &Bar0,
+ vgpu_support: bool,
+ ) -> Result<GspVfInfo> {
+ let mut vf_info = GspVfInfo::zeroed();
+ let info = &mut vf_info.inner;
+
+ if vgpu_support {
+ let val = pdev.sriov_get_totalvfs()?;
+ info.totalVFs = u32::try_from(val)?;
+
+ let pos = pdev
+ .find_ext_capability(kernel::bindings::PCI_EXT_CAP_ID_SRIOV as
i32)
+ .ok_or(ENODEV)?;
+
+ let val = pdev.config_read_word(
+ i32::from(pos) +
i32::from(kernel::bindings::PCI_SRIOV_VF_OFFSET as i32),
+ )?;
+ info.firstVFOffset = u32::from(val);
+
+ let val = pdev.config_read_dword(
+ i32::from(pos) + i32::from(kernel::bindings::PCI_SRIOV_BAR as
i32),
+ )?;
+ info.FirstVFBar0Address = u64::from(val);
+
+ let bar1_lo = pdev.config_read_dword(
+ i32::from(pos) + i32::from(kernel::bindings::PCI_SRIOV_BAR as
i32 + 4),
+ )?;
+ let bar1_hi = pdev.config_read_dword(
+ i32::from(pos) + i32::from(kernel::bindings::PCI_SRIOV_BAR as
i32 + 8),
+ )?;
+
+ let addr_mask =
u64::try_from(kernel::bindings::PCI_BASE_ADDRESS_MEM_MASK)?;
+
+ info.FirstVFBar1Address + (u64::from(bar1_hi)
<< 32) | ((u64::from(bar1_lo)) & addr_mask);
+
+ let bar2_lo = pdev.config_read_dword(
+ i32::from(pos) + i32::from(kernel::bindings::PCI_SRIOV_BAR as
i32 + 12),
+ )?;
+ let bar2_hi = pdev.config_read_dword(
+ i32::from(pos) + i32::from(kernel::bindings::PCI_SRIOV_BAR as
i32 + 16),
+ )?;
+
+ info.FirstVFBar2Address = (u64::from(bar2_hi) << 32) |
(u64::from(bar2_lo) & addr_mask);
+
+ let val = bar.read32(0x88000 + 0xbf4);
+ info.b64bitBar1 = u8::from((val & 0x00000006) == 0x00000004);
+
+ let val = bar.read32(0x88000 + 0xbfc);
+ info.b64bitBar2 = u8::from((val & 0x00000006) == 0x00000004);
+ }
+ Ok(vf_info)
+ }
+}
+
+// SAFETY: Padding is explicit and does not contain uninitialized data.
+unsafe impl AsBytes for GspVfInfo {}
+
+// SAFETY: This struct only contains integer types for which all bit patterns
are valid.
+unsafe impl FromBytes for GspVfInfo {}
diff --git a/drivers/gpu/nova-core/gsp/fw/commands.rs
b/drivers/gpu/nova-core/gsp/fw/commands.rs
index 21be44199693..3b5c05704b2d 100644
--- a/drivers/gpu/nova-core/gsp/fw/commands.rs
+++ b/drivers/gpu/nova-core/gsp/fw/commands.rs
@@ -4,7 +4,10 @@
use kernel::transmute::{AsBytes, FromBytes};
use kernel::{device, pci};
-use crate::gsp::GSP_PAGE_SIZE;
+use crate::gsp::{
+ fw::GspVfInfo,
+ GSP_PAGE_SIZE, //
+};
use super::bindings;
@@ -18,7 +21,10 @@ pub(crate) struct GspSetSystemInfo {
impl GspSetSystemInfo {
/// Returns an in-place initializer for the `GspSetSystemInfo` command.
#[allow(non_snake_case)]
- pub(crate) fn init<'a>(dev: &'a
pci::Device<device::Bound>) -> impl Init<Self, Error> + 'a {
+ pub(crate) fn init<'a>(
+ dev: &'a pci::Device<device::Bound>,
+ info: GspVfInfo,
+ ) -> impl Init<Self, Error> + 'a {
type InnerGspSystemInfo = bindings::GspSystemInfo;
let init_inner = try_init!(InnerGspSystemInfo {
gpuPhysAddr: dev.resource_start(0)?,
@@ -38,6 +44,7 @@ pub(crate) fn init<'a>(dev: &'a
pci::Device<device::Bound>) -> impl Init<Self, E
PCIRevisionID: u32::from(dev.revision_id()),
bIsPrimary: 0,
bPreserveVideoMemoryAllocations: 0,
+ gspVFInfo: info.inner,
..Zeroable::init_zeroed()
});
--
2.51.0
Zhi Wang
2025-Dec-06 12:42 UTC
[RFC 5/7] gpu: nova-core: set RMSetSriovMode when NVIDIA vGPU is enabled
The registry object "RMSetSriovMode" is required to be set when vGPU
is
enabled.
Set "RMSetSriovMode" to 1 when nova-core is loading the GSP firmware
and
initialize the GSP registry objects, if vGPU is enabled.
Signed-off-by: Zhi Wang <zhiw at nvidia.com>
---
drivers/gpu/nova-core/gsp/boot.rs | 3 ++-
drivers/gpu/nova-core/gsp/commands.rs | 23 +++++++++++++++--------
2 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/nova-core/gsp/boot.rs
b/drivers/gpu/nova-core/gsp/boot.rs
index 5016c630cec3..847ce550eccf 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -168,7 +168,8 @@ pub(crate) fn boot(
self.cmdq
.send_command(bar, commands::SetSystemInfo::new(pdev, vf_info))?;
- self.cmdq.send_command(bar, commands::SetRegistry::new())?;
+ self.cmdq
+ .send_command(bar, commands::SetRegistry::new(vgpu_support))?;
gsp_falcon.reset(bar)?;
let libos_handle = self.libos.dma_handle();
diff --git a/drivers/gpu/nova-core/gsp/commands.rs
b/drivers/gpu/nova-core/gsp/commands.rs
index 1d519c4ed232..00ba48a25444 100644
--- a/drivers/gpu/nova-core/gsp/commands.rs
+++ b/drivers/gpu/nova-core/gsp/commands.rs
@@ -64,16 +64,18 @@ struct RegistryEntry {
/// The `SetRegistry` command.
pub(crate) struct SetRegistry {
- entries: [RegistryEntry; Self::NUM_ENTRIES],
+ entries: [RegistryEntry; Self::MAX_NUM_ENTRIES],
+ num_entries: usize,
}
impl SetRegistry {
// For now we hard-code the registry entries. Future work will allow others
to
// be added as module parameters.
- const NUM_ENTRIES: usize = 3;
+ const MAX_NUM_ENTRIES: usize = 4;
/// Creates a new `SetRegistry` command, using a set of hardcoded entries.
- pub(crate) fn new() -> Self {
+ pub(crate) fn new(vgpu_support: bool) -> Self {
+ let num_entries = if vgpu_support { 4 } else { 3 };
Self {
entries: [
// RMSecBusResetEnable - enables PCI secondary bus reset
@@ -93,7 +95,12 @@ pub(crate) fn new() -> Self {
key: "RMDevidCheckIgnore",
value: 1,
},
+ RegistryEntry {
+ key: "RMSetSriovMode",
+ value: 1,
+ },
],
+ num_entries,
}
}
}
@@ -104,15 +111,15 @@ impl CommandToGsp for SetRegistry {
type InitError = Infallible;
fn init(&self) -> impl Init<Self::Command, Self::InitError> {
- PackedRegistryTable::init(Self::NUM_ENTRIES as u32,
self.variable_payload_len() as u32)
+ PackedRegistryTable::init(self.num_entries as u32,
self.variable_payload_len() as u32)
}
fn variable_payload_len(&self) -> usize {
let mut key_size = 0;
- for i in 0..Self::NUM_ENTRIES {
+ for i in 0..self.num_entries {
key_size += self.entries[i].key.len() + 1; // +1 for NULL
terminator
}
- Self::NUM_ENTRIES * size_of::<PackedRegistryEntry>() + key_size
+ self.num_entries * size_of::<PackedRegistryEntry>() + key_size
}
fn init_variable_payload(
@@ -120,12 +127,12 @@ fn init_variable_payload(
dst: &mut SBufferIter<core::array::IntoIter<&mut [u8],
2>>,
) -> Result {
let string_data_start_offset -
size_of::<PackedRegistryTable>() + Self::NUM_ENTRIES *
size_of::<PackedRegistryEntry>();
+ size_of::<PackedRegistryTable>() + self.num_entries *
size_of::<PackedRegistryEntry>();
// Array for string data.
let mut string_data = KVec::new();
- for entry in self.entries.iter().take(Self::NUM_ENTRIES) {
+ for entry in self.entries.iter().take(self.num_entries) {
dst.write_all(
PackedRegistryEntry::new(
(string_data_start_offset + string_data.len()) as u32,
--
2.51.0
Zhi Wang
2025-Dec-06 12:42 UTC
[RFC 6/7] gpu: nova-core: reserve a larger GSP WPR2 heap when vGPU is enabled.
To support the maximum vGPUs on devices that support vGPU, a larger
WPR2 heap size is required. On Ada with vGPU supported, the size should
be set to at least 581MB.
When vGPU support is enabled, reserve a large WPR2 heap size up to
581MB, set the max supported VF to max in WPR2 meta.
Signed-off-by: Zhi Wang <zhiw at nvidia.com>
---
drivers/gpu/nova-core/fb.rs | 19 +++++++++++++++----
drivers/gpu/nova-core/gsp/boot.rs | 2 +-
2 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs
index 3c9cf151786c..9a5c40029f3a 100644
--- a/drivers/gpu/nova-core/fb.rs
+++ b/drivers/gpu/nova-core/fb.rs
@@ -119,7 +119,12 @@ pub(crate) struct FbLayout {
impl FbLayout {
/// Computes the FB layout for `chipset` required to run the `gsp_fw` GSP
firmware.
- pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw:
&GspFirmware) -> Result<Self> {
+ pub(crate) fn new(
+ chipset: Chipset,
+ bar: &Bar0,
+ gsp_fw: &GspFirmware,
+ vgpu_support: bool,
+ ) -> Result<Self> {
let hal = hal::fb_hal(chipset);
let fb = {
@@ -181,8 +186,12 @@ pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw:
&GspFirmware) -> Result<
let wpr2_heap = {
const WPR2_HEAP_DOWN_ALIGN: Alignment =
Alignment::new::<SZ_1M>();
- let wpr2_heap_size -
gsp::LibosParams::from_chipset(chipset).wpr_heap_size(chipset, fb.end);
+ let wpr2_heap_size = if !vgpu_support {
+ gsp::LibosParams::from_chipset(chipset).wpr_heap_size(chipset,
fb.end)
+ } else {
+ 581 * usize_as_u64(SZ_1M)
+ };
+
let wpr2_heap_addr = (elf.start -
wpr2_heap_size).align_down(WPR2_HEAP_DOWN_ALIGN);
wpr2_heap_addr..(elf.start).align_down(WPR2_HEAP_DOWN_ALIGN)
@@ -202,6 +211,8 @@ pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw:
&GspFirmware) -> Result<
wpr2.start - HEAP_SIZE..wpr2.start
};
+ let vf_partition_count = if vgpu_support { 32 } else { 0 };
+
Ok(Self {
fb,
vga_workspace,
@@ -211,7 +222,7 @@ pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw:
&GspFirmware) -> Result<
wpr2_heap,
wpr2,
heap,
- vf_partition_count: 0,
+ vf_partition_count,
})
}
}
diff --git a/drivers/gpu/nova-core/gsp/boot.rs
b/drivers/gpu/nova-core/gsp/boot.rs
index 847ce550eccf..ec006c26f19f 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -146,7 +146,7 @@ pub(crate) fn boot(
GFP_KERNEL,
)?;
- let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?;
+ let fb_layout = FbLayout::new(chipset, bar, &gsp_fw,
vgpu_support)?;
dev_dbg!(dev, "{:#x?}\n", fb_layout);
Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?;
--
2.51.0
Zhi Wang
2025-Dec-06 12:42 UTC
[RFC 7/7] gpu: nova-core: load the scrubber ucode when vGPU support is enabled
To support the maximum vGPUs on the device that support vGPU, a larger
WPR2 heap size is required. By setting the WPR2 heap size larger than 256MB
the scrubber ucode image is required to scrub the FB memory before any
other ucode image is executed.
If not, the GSP firmware hangs when booting.
When vGPU support is enabled, execute the scrubber ucode image to scrub the
FB memory before executing any other ucode images.
Signed-off-by: Zhi Wang <zhiw at nvidia.com>
---
drivers/gpu/nova-core/firmware.rs | 1 +
drivers/gpu/nova-core/firmware/booter.rs | 2 ++
drivers/gpu/nova-core/gsp/boot.rs | 27 ++++++++++++++++++++++++
drivers/gpu/nova-core/regs.rs | 11 ++++++++++
4 files changed, 41 insertions(+)
diff --git a/drivers/gpu/nova-core/firmware.rs
b/drivers/gpu/nova-core/firmware.rs
index 2d2008b33fb4..5ae1ab262d57 100644
--- a/drivers/gpu/nova-core/firmware.rs
+++ b/drivers/gpu/nova-core/firmware.rs
@@ -226,6 +226,7 @@ const fn make_entry_chipset(self, chipset: &str) ->
Self {
.make_entry_file(chipset, "booter_unload")
.make_entry_file(chipset, "bootloader")
.make_entry_file(chipset, "gsp")
+ .make_entry_file(chipset, "scrubber")
}
pub(crate) const fn create(
diff --git a/drivers/gpu/nova-core/firmware/booter.rs
b/drivers/gpu/nova-core/firmware/booter.rs
index f107f753214a..f622c9b960de 100644
--- a/drivers/gpu/nova-core/firmware/booter.rs
+++ b/drivers/gpu/nova-core/firmware/booter.rs
@@ -269,6 +269,7 @@ fn new_booter(dev: &device::Device<device::Bound>,
data: &[u8]) -> Result<Self>
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) enum BooterKind {
+ Scrubber,
Loader,
#[expect(unused)]
Unloader,
@@ -286,6 +287,7 @@ pub(crate) fn new(
bar: &Bar0,
) -> Result<Self> {
let fw_name = match kind {
+ BooterKind::Scrubber => "scrubber",
BooterKind::Loader => "booter_load",
BooterKind::Unloader => "booter_unload",
};
diff --git a/drivers/gpu/nova-core/gsp/boot.rs
b/drivers/gpu/nova-core/gsp/boot.rs
index ec006c26f19f..8ef79433f017 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -151,6 +151,33 @@ pub(crate) fn boot(
Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?;
+ if vgpu_support {
+ let scrubber = BooterFirmware::new(
+ dev,
+ BooterKind::Scrubber,
+ chipset,
+ FIRMWARE_VERSION,
+ sec2_falcon,
+ bar,
+ )?;
+
+ sec2_falcon.reset(bar)?;
+ sec2_falcon.dma_load(bar, &scrubber)?;
+
+ let (mbox0, mbox1) = sec2_falcon.boot(bar, None, None)?;
+
+ dev_dbg!(
+ pdev.as_ref(),
+ "SEC2 MBOX0: {:#x}, MBOX1{:#x}\n",
+ mbox0,
+ mbox1
+ );
+
+ if
!regs::NV_PGC6_BSI_SECURE_SCRATCH_15::read(bar).scrubber_completed() {
+ return Err(ETIMEDOUT);
+ }
+ }
+
let booter_loader = BooterFirmware::new(
dev,
BooterKind::Loader,
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
index 82cc6c0790e5..9f3a52ca014f 100644
--- a/drivers/gpu/nova-core/regs.rs
+++ b/drivers/gpu/nova-core/regs.rs
@@ -173,6 +173,17 @@ pub(crate) fn higher_bound(self) -> u64 {
26:26 boot_stage_3_handoff as bool;
});
+register!(NV_PGC6_BSI_SECURE_SCRATCH_15 @ 0x001180fc {
+ 31:29 scrubber_handoff as u8;
+});
+
+impl NV_PGC6_BSI_SECURE_SCRATCH_15 {
+ /// Returns `true` if scrubber is completed.
+ pub(crate) fn scrubber_completed(self) -> bool {
+ self.scrubber_handoff() >= 0x3
+ }
+}
+
// Privilege level mask register. It dictates whether the host CPU has
privilege to access the
// `PGC6_AON_SECURE_SCRATCH_GROUP_05` register (which it needs to read
GFW_BOOT).
register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128,
--
2.51.0