John Hubbard
2025-Dec-03 05:58 UTC
[PATCH 00/31] gpu: nova-core: firmware: Hopper/Blackwell support
Hi,
This series adds firmware support for Hopper and Blackwell GPUs. By
that I mean: Hopper and Blackwell can now get just as far as Ampere and
Ada do: they can receive a GET_GSP_STATIC_INFO message from the GPU's
GSP firmware, and print the resulting GPU marketing name to dmesg.
Having made that claim, note that I've only actually tested the new
functionality on a single Blackwell GPU, so far: GB202. For Ampere, I've
tested on GA104, and it still works as before.
The doctests are all passing.
This is based on today's drm-rust-next. For reviewer convenience, there
is also a git branch that has this series applied:
https://github.com/johnhubbard/linux/tree/nova-core-blackwell-complete-v0
Note: I've merged in the earlier "[PATCH v2 0/5] gpu: nova-core:
Hopper/Blackwell prerequisites" [1], because it is no longer useful by
itself. That makes for a large series, but now it is entirely
self-contained, so it's easy to apply.
[1] https://lore.kernel.org/20251126013936.650678-1-jhubbard at nvidia.com
John Hubbard (31):
gpu: nova-core: print FB sizes, along with ranges
gpu: nova-core: add FbRange.len() and use it in boot.rs
gpu: nova-core: Hopper/Blackwell: basic GPU identification
nova-core: factor .fwsignature* selection into a new
get_gsp_sigs_section()
gpu: nova-core: use GPU Architecture to simplify HAL selections
gpu: nova-core: apply the one "use" item per line policy to
commands.rs
gpu: nova-core: set DMA mask width based on GPU architecture
gpu: nova-core: move firmware image parsing code to firmware.rs
gpu: nova-core: factor out a section_name_eq() function
gpu: nova-core: don't assume 64-bit firmware images
gpu: nova-core: add support for 32-bit firmware images
gpu: nova-core: add auto-detection of 32-bit, 64-bit firmware images
gpu: nova-core: Hopper/Blackwell: add FMC firmware image, in support
of FSP
gpu: nova-core: Hopper/Blackwell: add FSP falcon engine stub
gpu: nova-core: Hopper/Blackwell: add FSP falcon EMEM operations
gpu: nova-core: Hopper/Blackwell: add FSP message infrastructure
gpu: nova-core: Hopper/Blackwell: calculate reserved FB heap size
gpu: nova-core: Hopper/Blackwell: add needs_large_reserved_mem()
gpu: nova-core: Hopper/Blackwell: add FSP secure boot completion
waiting
gpu: nova-core: Hopper/Blackwell: add FSP message structures
gpu: nova-core: Hopper/Blackwell: add FMC signature extraction
gpu: nova-core: Hopper/Blackwell: add FSP send/receive messaging
gpu: nova-core: Hopper/Blackwell: add FSP Chain of Trust boot
gpu: nova-core: Hopper/Blackwell: larger non-WPR heap
gpu: nova-core: Hopper/Blackwell: larger WPR2 (GSP) heap
gpu: nova-core: refactor SEC2 booter loading into run_booter() helper
gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting
gpu: nova-core: Hopper/Blackwell: add GSP lockdown release polling
gpu: nova-core: Hopper/Blackwell: add FSP Chain of Trust boot path
gpu: nova-core: Hopper/Blackwell: new location for PCI config mirror
gpu: nova-core: clarify the GPU firmware boot steps
drivers/gpu/nova-core/driver.rs | 33 +-
drivers/gpu/nova-core/falcon.rs | 1 +
drivers/gpu/nova-core/falcon/fsp.rs | 167 ++++++
drivers/gpu/nova-core/falcon/hal.rs | 19 +-
drivers/gpu/nova-core/fb.rs | 101 +++-
drivers/gpu/nova-core/fb/hal.rs | 18 +-
drivers/gpu/nova-core/firmware.rs | 193 +++++++
drivers/gpu/nova-core/firmware/fsp.rs | 42 ++
drivers/gpu/nova-core/firmware/gsp.rs | 128 ++---
drivers/gpu/nova-core/fsp.rs | 617 +++++++++++++++++++++++
drivers/gpu/nova-core/gpu.rs | 64 ++-
drivers/gpu/nova-core/gsp/boot.rs | 342 ++++++++++---
drivers/gpu/nova-core/gsp/commands.rs | 8 +-
drivers/gpu/nova-core/gsp/fw.rs | 55 +-
drivers/gpu/nova-core/gsp/fw/commands.rs | 32 +-
drivers/gpu/nova-core/nova_core.rs | 1 +
drivers/gpu/nova-core/num.rs | 10 +
drivers/gpu/nova-core/regs.rs | 59 +++
18 files changed, 1642 insertions(+), 248 deletions(-)
create mode 100644 drivers/gpu/nova-core/falcon/fsp.rs
create mode 100644 drivers/gpu/nova-core/firmware/fsp.rs
create mode 100644 drivers/gpu/nova-core/fsp.rs
base-commit: 57dc2ea0b7bdb828c5d966d9135c28fe854933a4
--
2.52.0
John Hubbard
2025-Dec-03 05:58 UTC
[PATCH 01/31] gpu: nova-core: print FB sizes, along with ranges
For convenience of the reader: now you can directly see the sizes of
each range. It is suprising just how much this helps.
Sample output (using an Ampere GA104):
NovaCore 0000:e1:00.0: FbLayout {
fb: 0x0..0x3ff800000 (16376 MB),
vga_workspace: 0x3ff700000..0x3ff800000 (1 MB),
frts: 0x3ff600000..0x3ff700000 (1 MB),
boot: 0x3ff5fa000..0x3ff600000 (0 MB),
elf: 0x3fb960000..0x3ff5f9000 (60 MB),
wpr2_heap: 0x3f3900000..0x3fb900000 (128 MB),
wpr2: 0x3f3800000..0x3ff700000 (191 MB),
heap: 0x3f3700000..0x3f3800000 (1 MB),
vf_partition_count: 0x0,
}
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/fb.rs | 66 +++++++++++++++++++++++++++----------
1 file changed, 49 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs
index 3c9cf151786c..333e952400e6 100644
--- a/drivers/gpu/nova-core/fb.rs
+++ b/drivers/gpu/nova-core/fb.rs
@@ -1,9 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
-use core::ops::Range;
+use core::ops::{
+ Deref,
+ Range, //
+};
use kernel::{
device,
+ fmt,
prelude::*,
ptr::{
Alignable,
@@ -94,26 +98,54 @@ pub(crate) fn unregister(&self, bar: &Bar0) {
}
}
+pub(crate) struct FbRange(Range<u64>);
+
+impl From<Range<u64>> for FbRange {
+ fn from(range: Range<u64>) -> Self {
+ Self(range)
+ }
+}
+
+impl Deref for FbRange {
+ type Target = Range<u64>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+impl fmt::Debug for FbRange {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) ->
fmt::Result {
+ let size_mb = (self.0.end - self.0.start) >> 20;
+ f.write_fmt(fmt!(
+ "{:#x}..{:#x} ({} MB)",
+ self.0.start,
+ self.0.end,
+ size_mb
+ ))
+ }
+}
+
/// Layout of the GPU framebuffer memory.
///
/// Contains ranges of GPU memory reserved for a given purpose during the GSP
boot process.
#[derive(Debug)]
pub(crate) struct FbLayout {
/// Range of the framebuffer. Starts at `0`.
- pub(crate) fb: Range<u64>,
+ pub(crate) fb: FbRange,
/// VGA workspace, small area of reserved memory at the end of the
framebuffer.
- pub(crate) vga_workspace: Range<u64>,
+ pub(crate) vga_workspace: FbRange,
/// FRTS range.
- pub(crate) frts: Range<u64>,
+ pub(crate) frts: FbRange,
/// Memory area containing the GSP bootloader image.
- pub(crate) boot: Range<u64>,
+ pub(crate) boot: FbRange,
/// Memory area containing the GSP firmware image.
- pub(crate) elf: Range<u64>,
+ pub(crate) elf: FbRange,
/// WPR2 heap.
- pub(crate) wpr2_heap: Range<u64>,
+ pub(crate) wpr2_heap: FbRange,
/// WPR2 region range, starting with an instance of `GspFwWprMeta`.
- pub(crate) wpr2: Range<u64>,
- pub(crate) heap: Range<u64>,
+ pub(crate) wpr2: FbRange,
+ pub(crate) heap: FbRange,
pub(crate) vf_partition_count: u8,
}
@@ -125,7 +157,7 @@ pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw:
&GspFirmware) -> Result<
let fb = {
let fb_size = hal.vidmem_size(bar);
- 0..fb_size
+ FbRange(0..fb_size)
};
let vga_workspace = {
@@ -152,7 +184,7 @@ pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw:
&GspFirmware) -> Result<
}
};
- vga_base..fb.end
+ FbRange(vga_base..fb.end)
};
let frts = {
@@ -160,7 +192,7 @@ pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw:
&GspFirmware) -> Result<
const FRTS_SIZE: u64 = usize_as_u64(SZ_1M);
let frts_base = vga_workspace.start.align_down(FRTS_DOWN_ALIGN) -
FRTS_SIZE;
- frts_base..frts_base + FRTS_SIZE
+ FbRange(frts_base..frts_base + FRTS_SIZE)
};
let boot = {
@@ -168,7 +200,7 @@ pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw:
&GspFirmware) -> Result<
let bootloader_size =
u64::from_safe_cast(gsp_fw.bootloader.ucode.size());
let bootloader_base = (frts.start -
bootloader_size).align_down(BOOTLOADER_DOWN_ALIGN);
- bootloader_base..bootloader_base + bootloader_size
+ FbRange(bootloader_base..bootloader_base + bootloader_size)
};
let elf = {
@@ -176,7 +208,7 @@ pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw:
&GspFirmware) -> Result<
let elf_size = u64::from_safe_cast(gsp_fw.size);
let elf_addr = (boot.start - elf_size).align_down(ELF_DOWN_ALIGN);
- elf_addr..elf_addr + elf_size
+ FbRange(elf_addr..elf_addr + elf_size)
};
let wpr2_heap = {
@@ -185,7 +217,7 @@ pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw:
&GspFirmware) -> Result<
gsp::LibosParams::from_chipset(chipset).wpr_heap_size(chipset,
fb.end);
let wpr2_heap_addr = (elf.start -
wpr2_heap_size).align_down(WPR2_HEAP_DOWN_ALIGN);
- wpr2_heap_addr..(elf.start).align_down(WPR2_HEAP_DOWN_ALIGN)
+
FbRange(wpr2_heap_addr..(elf.start).align_down(WPR2_HEAP_DOWN_ALIGN))
};
let wpr2 = {
@@ -193,13 +225,13 @@ pub(crate) fn new(chipset: Chipset, bar: &Bar0,
gsp_fw: &GspFirmware) -> Result<
let wpr2_addr = (wpr2_heap.start -
u64::from_safe_cast(size_of::<gsp::GspFwWprMeta>()))
.align_down(WPR2_DOWN_ALIGN);
- wpr2_addr..frts.end
+ FbRange(wpr2_addr..frts.end)
};
let heap = {
const HEAP_SIZE: u64 = usize_as_u64(SZ_1M);
- wpr2.start - HEAP_SIZE..wpr2.start
+ FbRange(wpr2.start - HEAP_SIZE..wpr2.start)
};
Ok(Self {
--
2.52.0
John Hubbard
2025-Dec-03 05:58 UTC
[PATCH 02/31] gpu: nova-core: add FbRange.len() and use it in boot.rs
A tiny simplification: now that FbLayout uses its own specific FbRange
type, add an FbRange.len() method, and use that to (very slightly)
simplify the calculation of Frts::frts_size initialization.
Suggested-by: Alexandre Courbot <acourbot at nvidia.com>
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/fb.rs | 6 ++++++
drivers/gpu/nova-core/gsp/boot.rs | 2 +-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs
index 333e952400e6..9fcd915e12e1 100644
--- a/drivers/gpu/nova-core/fb.rs
+++ b/drivers/gpu/nova-core/fb.rs
@@ -100,6 +100,12 @@ pub(crate) fn unregister(&self, bar: &Bar0) {
pub(crate) struct FbRange(Range<u64>);
+impl FbRange {
+ pub(crate) fn len(&self) -> u64 {
+ self.0.end - self.0.start
+ }
+}
+
impl From<Range<u64>> for FbRange {
fn from(range: Range<u64>) -> Self {
Self(range)
diff --git a/drivers/gpu/nova-core/gsp/boot.rs
b/drivers/gpu/nova-core/gsp/boot.rs
index 54937606b5b0..846064221931 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -70,7 +70,7 @@ fn run_fwsec_frts(
bios,
FwsecCommand::Frts {
frts_addr: fb_layout.frts.start,
- frts_size: fb_layout.frts.end - fb_layout.frts.start,
+ frts_size: fb_layout.frts.len(),
},
)?;
--
2.52.0
John Hubbard
2025-Dec-03 05:58 UTC
[PATCH 03/31] gpu: nova-core: Hopper/Blackwell: basic GPU identification
Hopper (GH100) and Blackwell identification, including ELF
.fwsignature_* items.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/falcon/hal.rs | 3 ++-
drivers/gpu/nova-core/fb/hal.rs | 5 ++---
drivers/gpu/nova-core/firmware/gsp.rs | 17 +++++++++++++++++
drivers/gpu/nova-core/gpu.rs | 22 ++++++++++++++++++++++
4 files changed, 43 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/nova-core/falcon/hal.rs
b/drivers/gpu/nova-core/falcon/hal.rs
index 8dc56a28ad65..82558af1b927 100644
--- a/drivers/gpu/nova-core/falcon/hal.rs
+++ b/drivers/gpu/nova-core/falcon/hal.rs
@@ -50,7 +50,8 @@ pub(super) fn falcon_hal<E: FalconEngine + 'static>(
use Chipset::*;
let hal = match chipset {
- GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 |
AD107 => {
+ GA102 | GA103 | GA104 | GA106 | GA107 | GH100 | AD102 | AD103 | AD104 |
AD106 | AD107
+ | GB100 | GB102 | GB202 | GB203 | GB205 | GB206 | GB207 => {
KBox::new(ga102::Ga102::<E>::new(), GFP_KERNEL)? as
KBox<dyn FalconHal<E>>
}
_ => return Err(ENOTSUPP),
diff --git a/drivers/gpu/nova-core/fb/hal.rs b/drivers/gpu/nova-core/fb/hal.rs
index aba0abd8ee00..71fa92d1b709 100644
--- a/drivers/gpu/nova-core/fb/hal.rs
+++ b/drivers/gpu/nova-core/fb/hal.rs
@@ -34,8 +34,7 @@ pub(super) fn fb_hal(chipset: Chipset) -> &'static
dyn FbHal {
match chipset {
TU102 | TU104 | TU106 | TU117 | TU116 => tu102::TU102_HAL,
GA100 => ga100::GA100_HAL,
- GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 |
AD107 => {
- ga102::GA102_HAL
- }
+ GA102 | GA103 | GA104 | GA106 | GA107 | GH100 | AD102 | AD103 | AD104 |
AD106 | AD107
+ | GB100 | GB102 | GB202 | GB203 | GB205 | GB206 | GB207 =>
ga102::GA102_HAL,
}
}
diff --git a/drivers/gpu/nova-core/firmware/gsp.rs
b/drivers/gpu/nova-core/firmware/gsp.rs
index 0549805282ab..547f46b6655b 100644
--- a/drivers/gpu/nova-core/firmware/gsp.rs
+++ b/drivers/gpu/nova-core/firmware/gsp.rs
@@ -164,7 +164,24 @@ pub(crate) fn new<'a, 'b>(
let sigs_section = match chipset.arch() {
Architecture::Ampere => ".fwsignature_ga10x",
+ Architecture::Hopper => ".fwsignature_gh10x",
Architecture::Ada => ".fwsignature_ad10x",
+ Architecture::Blackwell => {
+ // Distinguish between GB10x and GB20x series
+ match chipset {
+ // GB10x series: GB100, GB102
+ Chipset::GB100 | Chipset::GB102 =>
".fwsignature_gb10x",
+ // GB20x series: GB202, GB203, GB205, GB206, GB207
+ Chipset::GB202
+ | Chipset::GB203
+ | Chipset::GB205
+ | Chipset::GB206
+ | Chipset::GB207 => ".fwsignature_gb20x",
+ // Non-Blackwell chipsets, which can't happen here, but
Rust doesn't know that.
+ _ => return Err(ENOTSUPP),
+ }
+ }
+
_ => return Err(ENOTSUPP),
};
let signatures = elf::elf64_section(fw.data(), sigs_section)
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index 629c9d2dc994..c21ce91924f5 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -83,12 +83,22 @@ fn try_from(value: u32) -> Result<Self,
Self::Error> {
GA104 = 0x174,
GA106 = 0x176,
GA107 = 0x177,
+ // Hopper
+ GH100 = 0x180,
// Ada
AD102 = 0x192,
AD103 = 0x193,
AD104 = 0x194,
AD106 = 0x196,
AD107 = 0x197,
+ // Blackwell
+ GB100 = 0x1a0,
+ GB102 = 0x1a2,
+ GB202 = 0x1b2,
+ GB203 = 0x1b3,
+ GB205 = 0x1b5,
+ GB206 = 0x1b6,
+ GB207 = 0x1b7,
});
impl Chipset {
@@ -100,9 +110,17 @@ pub(crate) fn arch(&self) -> Architecture {
Self::GA100 | Self::GA102 | Self::GA103 | Self::GA104 | Self::GA106
| Self::GA107 => {
Architecture::Ampere
}
+ Self::GH100 => Architecture::Hopper,
Self::AD102 | Self::AD103 | Self::AD104 | Self::AD106 | Self::AD107
=> {
Architecture::Ada
}
+ Self::GB100
+ | Self::GB102
+ | Self::GB202
+ | Self::GB203
+ | Self::GB205
+ | Self::GB206
+ | Self::GB207 => Architecture::Blackwell,
}
}
}
@@ -132,7 +150,9 @@ pub(crate) enum Architecture {
#[default]
Turing = 0x16,
Ampere = 0x17,
+ Hopper = 0x18,
Ada = 0x19,
+ Blackwell = 0x1b,
}
impl TryFrom<u8> for Architecture {
@@ -142,7 +162,9 @@ fn try_from(value: u8) -> Result<Self> {
match value {
0x16 => Ok(Self::Turing),
0x17 => Ok(Self::Ampere),
+ 0x18 => Ok(Self::Hopper),
0x19 => Ok(Self::Ada),
+ 0x1b => Ok(Self::Blackwell),
_ => Err(ENODEV),
}
}
--
2.52.0
John Hubbard
2025-Dec-03 05:58 UTC
[PATCH 04/31] nova-core: factor .fwsignature* selection into a new get_gsp_sigs_section()
Keep Gsp::new() from getting too cluttered, by factoring out the
selection of .fwsignature* items. This will continue to grow as we add
GPUs.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/firmware/gsp.rs | 43 ++++++++++++++-------------
1 file changed, 23 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/nova-core/firmware/gsp.rs
b/drivers/gpu/nova-core/firmware/gsp.rs
index 547f46b6655b..86ed4d650d05 100644
--- a/drivers/gpu/nova-core/firmware/gsp.rs
+++ b/drivers/gpu/nova-core/firmware/gsp.rs
@@ -151,39 +151,42 @@ pub(crate) struct GspFirmware {
}
impl GspFirmware {
- /// Loads the GSP firmware binaries, map them into `dev`'s
address-space, and creates the page
- /// tables expected by the GSP bootloader to load it.
- pub(crate) fn new<'a, 'b>(
- dev: &'a device::Device<device::Bound>,
- chipset: Chipset,
- ver: &'b str,
- ) -> Result<impl PinInit<Self, Error> + 'a> {
- let fw = super::request_firmware(dev, chipset, "gsp", ver)?;
-
- let fw_section = elf::elf64_section(fw.data(),
".fwimage").ok_or(EINVAL)?;
-
- let sigs_section = match chipset.arch() {
- Architecture::Ampere => ".fwsignature_ga10x",
- Architecture::Hopper => ".fwsignature_gh10x",
- Architecture::Ada => ".fwsignature_ad10x",
+ fn get_gsp_sigs_section(chipset: Chipset) -> Result<&'static
str> {
+ match chipset.arch() {
+ Architecture::Ampere => Ok(".fwsignature_ga10x"),
+ Architecture::Hopper => Ok(".fwsignature_gh10x"),
+ Architecture::Ada => Ok(".fwsignature_ad10x"),
Architecture::Blackwell => {
// Distinguish between GB10x and GB20x series
match chipset {
// GB10x series: GB100, GB102
- Chipset::GB100 | Chipset::GB102 =>
".fwsignature_gb10x",
+ Chipset::GB100 | Chipset::GB102 =>
Ok(".fwsignature_gb10x"),
// GB20x series: GB202, GB203, GB205, GB206, GB207
Chipset::GB202
| Chipset::GB203
| Chipset::GB205
| Chipset::GB206
- | Chipset::GB207 => ".fwsignature_gb20x",
+ | Chipset::GB207 => Ok(".fwsignature_gb20x"),
// Non-Blackwell chipsets, which can't happen here, but
Rust doesn't know that.
- _ => return Err(ENOTSUPP),
+ _ => Err(ENOTSUPP),
}
}
+ _ => Err(ENOTSUPP),
+ }
+ }
- _ => return Err(ENOTSUPP),
- };
+ /// Loads the GSP firmware binaries, map them into `dev`'s
address-space, and creates the page
+ /// tables expected by the GSP bootloader to load it.
+ pub(crate) fn new<'a, 'b>(
+ dev: &'a device::Device<device::Bound>,
+ chipset: Chipset,
+ ver: &'b str,
+ ) -> Result<impl PinInit<Self, Error> + 'a> {
+ let fw = super::request_firmware(dev, chipset, "gsp", ver)?;
+
+ let fw_section = elf::elf64_section(fw.data(),
".fwimage").ok_or(EINVAL)?;
+
+ let sigs_section = Self::get_gsp_sigs_section(chipset)?;
let signatures = elf::elf64_section(fw.data(), sigs_section)
.ok_or(EINVAL)
.and_then(|data| DmaObject::from_data(dev, data))?;
--
2.52.0
John Hubbard
2025-Dec-03 05:58 UTC
[PATCH 05/31] gpu: nova-core: use GPU Architecture to simplify HAL selections
Instead of long, exhaustive lists of GPUs ("Chipsets"), use entire
GPU Architectures, such as "Blackwell" or "Turing", to make
HAL choices.
A tiny side effect: moved a "use" statement out of function scope, in
each file, up to the top of the file, as per Rust for Linux conventions.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/falcon/hal.rs | 20 +++++++++++++-------
drivers/gpu/nova-core/fb/hal.rs | 17 +++++++++--------
2 files changed, 22 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/nova-core/falcon/hal.rs
b/drivers/gpu/nova-core/falcon/hal.rs
index 82558af1b927..5c504577b97c 100644
--- a/drivers/gpu/nova-core/falcon/hal.rs
+++ b/drivers/gpu/nova-core/falcon/hal.rs
@@ -9,7 +9,10 @@
FalconBromParams,
FalconEngine, //
},
- gpu::Chipset,
+ gpu::{
+ Architecture,
+ Chipset, //
+ },
};
mod ga102;
@@ -47,14 +50,17 @@ fn signature_reg_fuse_version(
pub(super) fn falcon_hal<E: FalconEngine + 'static>(
chipset: Chipset,
) -> Result<KBox<dyn FalconHal<E>>> {
- use Chipset::*;
-
- let hal = match chipset {
- GA102 | GA103 | GA104 | GA106 | GA107 | GH100 | AD102 | AD103 | AD104 |
AD106 | AD107
- | GB100 | GB102 | GB202 | GB203 | GB205 | GB206 | GB207 => {
+ let hal = match chipset.arch() {
+ Architecture::Ampere
+ | Architecture::Hopper
+ | Architecture::Ada
+ | Architecture::Blackwell => {
KBox::new(ga102::Ga102::<E>::new(), GFP_KERNEL)? as
KBox<dyn FalconHal<E>>
}
- _ => return Err(ENOTSUPP),
+ Architecture::Turing => {
+ // TODO: Add Turing falcon HAL support
+ return Err(ENOTSUPP);
+ }
};
Ok(hal)
diff --git a/drivers/gpu/nova-core/fb/hal.rs b/drivers/gpu/nova-core/fb/hal.rs
index 71fa92d1b709..d795ef7ee65d 100644
--- a/drivers/gpu/nova-core/fb/hal.rs
+++ b/drivers/gpu/nova-core/fb/hal.rs
@@ -4,7 +4,10 @@
use crate::{
driver::Bar0,
- gpu::Chipset, //
+ gpu::{
+ Architecture,
+ Chipset, //
+ },
};
mod ga100;
@@ -29,12 +32,10 @@ pub(crate) trait FbHal {
/// Returns the HAL corresponding to `chipset`.
pub(super) fn fb_hal(chipset: Chipset) -> &'static dyn FbHal {
- use Chipset::*;
-
- match chipset {
- TU102 | TU104 | TU106 | TU117 | TU116 => tu102::TU102_HAL,
- GA100 => ga100::GA100_HAL,
- GA102 | GA103 | GA104 | GA106 | GA107 | GH100 | AD102 | AD103 | AD104 |
AD106 | AD107
- | GB100 | GB102 | GB202 | GB203 | GB205 | GB206 | GB207 =>
ga102::GA102_HAL,
+ match chipset.arch() {
+ Architecture::Turing => tu102::TU102_HAL,
+ Architecture::Ampere if chipset == Chipset::GA100 =>
ga100::GA100_HAL,
+ Architecture::Ampere => ga102::GA102_HAL,
+ Architecture::Hopper | Architecture::Ada | Architecture::Blackwell
=> ga102::GA102_HAL,
}
}
--
2.52.0
John Hubbard
2025-Dec-03 05:58 UTC
[PATCH 06/31] gpu: nova-core: apply the one "use" item per line policy to commands.rs
As per [1], we need one "use" item per line, in order to reduce merge
conflicts. Furthermore, we need a trailing ", //" in order to tell
rustfmt(1) to leave it alone.
This does that for commands.rs, which is the only file in nova-core that
has any remaining instances of the old style.
[1] https://docs.kernel.org/rust/coding-guidelines.html#imports
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/gsp/fw/commands.rs | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/nova-core/gsp/fw/commands.rs
b/drivers/gpu/nova-core/gsp/fw/commands.rs
index 21be44199693..470d8edb62ff 100644
--- a/drivers/gpu/nova-core/gsp/fw/commands.rs
+++ b/drivers/gpu/nova-core/gsp/fw/commands.rs
@@ -1,8 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
-use kernel::prelude::*;
-use kernel::transmute::{AsBytes, FromBytes};
-use kernel::{device, pci};
+use kernel::{
+ device,
+ pci,
+ prelude::*,
+ transmute::{
+ AsBytes,
+ FromBytes, //
+ }, //
+};
use crate::gsp::GSP_PAGE_SIZE;
--
2.52.0
John Hubbard
2025-Dec-03 05:58 UTC
[PATCH 07/31] gpu: nova-core: set DMA mask width based on GPU architecture
This removes a "TODO" item in the code, which was hardcoded to work on
Ampere and Ada GPUs. Hopper/Blackwell+ have a larger width, so do an
early read of boot42, in order to pick the correct value.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/driver.rs | 33 +++++++++++++++++----------------
drivers/gpu/nova-core/gpu.rs | 29 ++++++++++++++++++++++++++++-
2 files changed, 45 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
index d91bbc50cde7..3179a4d47af4 100644
--- a/drivers/gpu/nova-core/driver.rs
+++ b/drivers/gpu/nova-core/driver.rs
@@ -4,8 +4,10 @@
auxiliary,
c_str,
device::Core,
- dma::Device,
- dma::DmaMask,
+ dma::{
+ Device,
+ DmaMask, //
+ },
pci,
pci::{
Class,
@@ -17,7 +19,10 @@
sync::Arc, //
};
-use crate::gpu::Gpu;
+use crate::gpu::{
+ read_architecture,
+ Gpu, //
+};
#[pin_data]
pub(crate) struct NovaCore {
@@ -28,14 +33,6 @@ pub(crate) struct NovaCore {
const BAR0_SIZE: usize = SZ_16M;
-// For now we only support Ampere which can use up to 47-bit DMA addresses.
-//
-// TODO: Add an abstraction for this to support newer GPUs which may support
-// larger DMA addresses. Limiting these GPUs to smaller address widths
won't
-// have any adverse affects, unless installed on systems which require larger
-// DMA addresses. These systems should be quite rare.
-const GPU_DMA_BITS: u32 = 47;
-
pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
kernel::pci_device_table!(
@@ -73,11 +70,6 @@ fn probe(pdev: &pci::Device<Core>, _info:
&Self::IdInfo) -> Result<Pin<KBox<Self
pdev.enable_device_mem()?;
pdev.set_master();
- // SAFETY: No concurrent DMA allocations or mappings can be made
because
- // the device is still being probed and therefore isn't being used
by
- // other threads of execution.
- unsafe {
pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? };
-
let devres_bar = Arc::pin_init(
pdev.iomap_region_sized::<BAR0_SIZE>(0,
c_str!("nova-core/bar0")),
GFP_KERNEL,
@@ -88,6 +80,15 @@ fn probe(pdev: &pci::Device<Core>, _info:
&Self::IdInfo) -> Result<Pin<KBox<Self
let bar_clone = Arc::clone(&devres_bar);
let bar = bar_clone.access(pdev.as_ref())?;
+ // Read the GPU architecture early to determine the correct DMA address
width.
+ // Hopper/Blackwell+ support 52-bit DMA addresses, earlier
architectures use 47-bit.
+ let arch = read_architecture(bar)?;
+
+ // SAFETY: No concurrent DMA allocations or mappings can be made
because
+ // the device is still being probed and therefore isn't being used
by
+ // other threads of execution.
+ unsafe {
pdev.dma_set_mask_and_coherent(DmaMask::try_new(arch.dma_addr_bits())?)? };
+
let this = KBox::pin_init(
try_pin_init!(Self {
gpu <- Gpu::new(pdev, devres_bar, bar),
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index c21ce91924f5..624bbc2a54e8 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -102,7 +102,7 @@ fn try_from(value: u32) -> Result<Self,
Self::Error> {
});
impl Chipset {
- pub(crate) fn arch(&self) -> Architecture {
+ pub(crate) const fn arch(&self) -> Architecture {
match self {
Self::TU102 | Self::TU104 | Self::TU106 | Self::TU117 | Self::TU116
=> {
Architecture::Turing
@@ -155,6 +155,19 @@ pub(crate) enum Architecture {
Blackwell = 0x1b,
}
+impl Architecture {
+ /// Returns the number of DMA address bits supported by this architecture.
+ ///
+ /// Hopper and Blackwell support 52-bit DMA addresses, while earlier
architectures
+ /// (Turing, Ampere, Ada) support 47-bit DMA addresses.
+ pub(crate) const fn dma_addr_bits(&self) -> u32 {
+ match self {
+ Self::Turing | Self::Ampere | Self::Ada => 47,
+ Self::Hopper | Self::Blackwell => 52,
+ }
+ }
+}
+
impl TryFrom<u8> for Architecture {
type Error = Error;
@@ -203,6 +216,20 @@ pub(crate) struct Spec {
revision: Revision,
}
+/// Reads the GPU architecture from BAR0 registers.
+///
+/// This is a lightweight check used early in probe to determine the correct
DMA address width
+/// before the full [`Spec`] is constructed.
+pub(crate) fn read_architecture(bar: &Bar0) ->
Result<Architecture> {
+ let boot0 = regs::NV_PMC_BOOT_0::read(bar);
+
+ if boot0.is_older_than_fermi() {
+ return Err(ENODEV);
+ }
+
+ regs::NV_PMC_BOOT_42::read(bar).architecture()
+}
+
impl Spec {
fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
// Some brief notes about boot0 and boot42, in chronological order:
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 08/31] gpu: nova-core: move firmware image parsing code to firmware.rs
Up until now, only the GSP required parsing of its firmware headers.
However, upcoming support for Hopper/Blackwell+ adds another firmware
image (FMC), along with another format (ELF32).
Therefore, the current ELF64 section parsing support needs to be moved
up a level, so that both of the above can use it.
There are no functional changes. This is pure code movement.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/firmware.rs | 89 +++++++++++++++++++++++++
drivers/gpu/nova-core/firmware/gsp.rs | 94 ++-------------------------
2 files changed, 93 insertions(+), 90 deletions(-)
diff --git a/drivers/gpu/nova-core/firmware.rs
b/drivers/gpu/nova-core/firmware.rs
index 2d2008b33fb4..31a89abc5a87 100644
--- a/drivers/gpu/nova-core/firmware.rs
+++ b/drivers/gpu/nova-core/firmware.rs
@@ -242,3 +242,92 @@ pub(crate) const fn create(
this.0
}
}
+
+/// Ad-hoc and temporary module to extract sections from ELF images.
+///
+/// Some firmware images are currently packaged as ELF files, where sections
names are used as keys
+/// to specific and related bits of data. Future firmware versions are
scheduled to move away from
+/// that scheme before nova-core becomes stable, which means this module will
eventually be
+/// removed.
+mod elf {
+ use core::mem::size_of;
+
+ use kernel::bindings;
+ use kernel::str::CStr;
+ use kernel::transmute::FromBytes;
+
+ /// Newtype to provide a [`FromBytes`] implementation.
+ #[repr(transparent)]
+ struct Elf64Hdr(bindings::elf64_hdr);
+ // SAFETY: all bit patterns are valid for this type, and it doesn't use
interior mutability.
+ unsafe impl FromBytes for Elf64Hdr {}
+
+ #[repr(transparent)]
+ struct Elf64SHdr(bindings::elf64_shdr);
+ // SAFETY: all bit patterns are valid for this type, and it doesn't use
interior mutability.
+ unsafe impl FromBytes for Elf64SHdr {}
+
+ /// Tries to extract section with name `name` from the ELF64 image `elf`,
and returns it.
+ pub(super) fn elf64_section<'a, 'b>(elf: &'a [u8],
name: &'b str) -> Option<&'a [u8]> {
+ let hdr = &elf
+ .get(0..size_of::<bindings::elf64_hdr>())
+ .and_then(Elf64Hdr::from_bytes)?
+ .0;
+
+ // Get all the section headers.
+ let mut shdr = {
+ let shdr_num = usize::from(hdr.e_shnum);
+ let shdr_start = usize::try_from(hdr.e_shoff).ok()?;
+ let shdr_end = shdr_num
+ .checked_mul(size_of::<Elf64SHdr>())
+ .and_then(|v| v.checked_add(shdr_start))?;
+
+ elf.get(shdr_start..shdr_end)
+ .map(|slice| slice.chunks_exact(size_of::<Elf64SHdr>()))?
+ };
+
+ // Get the strings table.
+ let strhdr = shdr
+ .clone()
+ .nth(usize::from(hdr.e_shstrndx))
+ .and_then(Elf64SHdr::from_bytes)?;
+
+ // Find the section which name matches `name` and return it.
+ shdr.find(|&sh| {
+ let Some(hdr) = Elf64SHdr::from_bytes(sh) else {
+ return false;
+ };
+
+ let Some(name_idx) = strhdr
+ .0
+ .sh_offset
+ .checked_add(u64::from(hdr.0.sh_name))
+ .and_then(|idx| usize::try_from(idx).ok())
+ else {
+ return false;
+ };
+
+ // Get the start of the name.
+ elf.get(name_idx..)
+ // Stop at the first `0`.
+ .and_then(|nstr| nstr.get(0..=nstr.iter().position(|b| *b ==
0)?))
+ // Convert into CStr. This should never fail because of the
line above.
+ .and_then(|nstr| CStr::from_bytes_with_nul(nstr).ok())
+ // Convert into str.
+ .and_then(|c_str| c_str.to_str().ok())
+ // Check that the name matches.
+ .map(|str| str == name)
+ .unwrap_or(false)
+ })
+ // Return the slice containing the section.
+ .and_then(|sh| {
+ let hdr = Elf64SHdr::from_bytes(sh)?;
+ let start = usize::try_from(hdr.0.sh_offset).ok()?;
+ let end = usize::try_from(hdr.0.sh_size)
+ .ok()
+ .and_then(|sh_size| start.checked_add(sh_size))?;
+
+ elf.get(start..end)
+ })
+ }
+}
diff --git a/drivers/gpu/nova-core/firmware/gsp.rs
b/drivers/gpu/nova-core/firmware/gsp.rs
index 86ed4d650d05..4d84bd049d9c 100644
--- a/drivers/gpu/nova-core/firmware/gsp.rs
+++ b/drivers/gpu/nova-core/firmware/gsp.rs
@@ -18,7 +18,10 @@
use crate::{
dma::DmaObject,
- firmware::riscv::RiscvFirmware,
+ firmware::{
+ elf,
+ riscv::RiscvFirmware, //
+ },
gpu::{
Architecture,
Chipset, //
@@ -27,95 +30,6 @@
num::FromSafeCast,
};
-/// Ad-hoc and temporary module to extract sections from ELF images.
-///
-/// Some firmware images are currently packaged as ELF files, where sections
names are used as keys
-/// to specific and related bits of data. Future firmware versions are
scheduled to move away from
-/// that scheme before nova-core becomes stable, which means this module will
eventually be
-/// removed.
-mod elf {
- use core::mem::size_of;
-
- use kernel::bindings;
- use kernel::str::CStr;
- use kernel::transmute::FromBytes;
-
- /// Newtype to provide a [`FromBytes`] implementation.
- #[repr(transparent)]
- struct Elf64Hdr(bindings::elf64_hdr);
- // SAFETY: all bit patterns are valid for this type, and it doesn't use
interior mutability.
- unsafe impl FromBytes for Elf64Hdr {}
-
- #[repr(transparent)]
- struct Elf64SHdr(bindings::elf64_shdr);
- // SAFETY: all bit patterns are valid for this type, and it doesn't use
interior mutability.
- unsafe impl FromBytes for Elf64SHdr {}
-
- /// Tries to extract section with name `name` from the ELF64 image `elf`,
and returns it.
- pub(super) fn elf64_section<'a, 'b>(elf: &'a [u8],
name: &'b str) -> Option<&'a [u8]> {
- let hdr = &elf
- .get(0..size_of::<bindings::elf64_hdr>())
- .and_then(Elf64Hdr::from_bytes)?
- .0;
-
- // Get all the section headers.
- let mut shdr = {
- let shdr_num = usize::from(hdr.e_shnum);
- let shdr_start = usize::try_from(hdr.e_shoff).ok()?;
- let shdr_end = shdr_num
- .checked_mul(size_of::<Elf64SHdr>())
- .and_then(|v| v.checked_add(shdr_start))?;
-
- elf.get(shdr_start..shdr_end)
- .map(|slice| slice.chunks_exact(size_of::<Elf64SHdr>()))?
- };
-
- // Get the strings table.
- let strhdr = shdr
- .clone()
- .nth(usize::from(hdr.e_shstrndx))
- .and_then(Elf64SHdr::from_bytes)?;
-
- // Find the section which name matches `name` and return it.
- shdr.find(|&sh| {
- let Some(hdr) = Elf64SHdr::from_bytes(sh) else {
- return false;
- };
-
- let Some(name_idx) = strhdr
- .0
- .sh_offset
- .checked_add(u64::from(hdr.0.sh_name))
- .and_then(|idx| usize::try_from(idx).ok())
- else {
- return false;
- };
-
- // Get the start of the name.
- elf.get(name_idx..)
- // Stop at the first `0`.
- .and_then(|nstr| nstr.get(0..=nstr.iter().position(|b| *b ==
0)?))
- // Convert into CStr. This should never fail because of the
line above.
- .and_then(|nstr| CStr::from_bytes_with_nul(nstr).ok())
- // Convert into str.
- .and_then(|c_str| c_str.to_str().ok())
- // Check that the name matches.
- .map(|str| str == name)
- .unwrap_or(false)
- })
- // Return the slice containing the section.
- .and_then(|sh| {
- let hdr = Elf64SHdr::from_bytes(sh)?;
- let start = usize::try_from(hdr.0.sh_offset).ok()?;
- let end = usize::try_from(hdr.0.sh_size)
- .ok()
- .and_then(|sh_size| start.checked_add(sh_size))?;
-
- elf.get(start..end)
- })
- }
-}
-
/// GSP firmware with 3-level radix page tables for the GSP bootloader.
///
/// The bootloader expects firmware to be mapped starting at address 0 in
GSP's virtual address
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 09/31] gpu: nova-core: factor out a section_name_eq() function
Factor out a chunk of complexity into a new subroutine. This is an
incremental step in adding ELF32 support to the existing ELF64 section
support, for handling GPU firmware.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/firmware.rs | 39 +++++++++++++++----------------
1 file changed, 19 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/nova-core/firmware.rs
b/drivers/gpu/nova-core/firmware.rs
index 31a89abc5a87..5ed079a45ec2 100644
--- a/drivers/gpu/nova-core/firmware.rs
+++ b/drivers/gpu/nova-core/firmware.rs
@@ -267,6 +267,24 @@ unsafe impl FromBytes for Elf64Hdr {}
// SAFETY: all bit patterns are valid for this type, and it doesn't use
interior mutability.
unsafe impl FromBytes for Elf64SHdr {}
+ /// Check if the section name at `strtab_offset + name_offset` equals
`target`.
+ fn section_name_eq(elf: &[u8], strtab_offset: u64, name_offset: u32,
target: &str) -> bool {
+ strtab_offset
+ // Compute the index into the ELF image.
+ .checked_add(u64::from(name_offset))
+ .and_then(|idx| usize::try_from(idx).ok())
+ // Get the start of the name.
+ .and_then(|name_idx| elf.get(name_idx..))
+ // Stop at the first `0`.
+ .and_then(|s| s.get(0..=s.iter().position(|b| *b == 0)?))
+ // Convert into CStr.
+ .and_then(|s| CStr::from_bytes_with_nul(s).ok())
+ // Convert into str.
+ .and_then(|s| s.to_str().ok())
+ // Check that the name matches.
+ .is_some_and(|s| s == target)
+ }
+
/// Tries to extract section with name `name` from the ELF64 image `elf`,
and returns it.
pub(super) fn elf64_section<'a, 'b>(elf: &'a [u8],
name: &'b str) -> Option<&'a [u8]> {
let hdr = &elf
@@ -298,26 +316,7 @@ pub(super) fn elf64_section<'a, 'b>(elf:
&'a [u8], name: &'b str) -> Option<&'a
return false;
};
- let Some(name_idx) = strhdr
- .0
- .sh_offset
- .checked_add(u64::from(hdr.0.sh_name))
- .and_then(|idx| usize::try_from(idx).ok())
- else {
- return false;
- };
-
- // Get the start of the name.
- elf.get(name_idx..)
- // Stop at the first `0`.
- .and_then(|nstr| nstr.get(0..=nstr.iter().position(|b| *b ==
0)?))
- // Convert into CStr. This should never fail because of the
line above.
- .and_then(|nstr| CStr::from_bytes_with_nul(nstr).ok())
- // Convert into str.
- .and_then(|c_str| c_str.to_str().ok())
- // Check that the name matches.
- .map(|str| str == name)
- .unwrap_or(false)
+ section_name_eq(elf, strhdr.0.sh_offset, hdr.0.sh_name, name)
})
// Return the slice containing the section.
.and_then(|sh| {
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 10/31] gpu: nova-core: don't assume 64-bit firmware images
Add ElfHeader and ElfSectionHeader traits to abstract out differences
between ELF32 and ELF64. Implement these for ELF64.
This is in preparation for upcoming ELF32 section support, and for
auto-selecting ELF32 or ELF64.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/firmware.rs | 118 ++++++++++++++++++++----------
1 file changed, 80 insertions(+), 38 deletions(-)
diff --git a/drivers/gpu/nova-core/firmware.rs
b/drivers/gpu/nova-core/firmware.rs
index 5ed079a45ec2..1cb5897778f8 100644
--- a/drivers/gpu/nova-core/firmware.rs
+++ b/drivers/gpu/nova-core/firmware.rs
@@ -256,17 +256,60 @@ mod elf {
use kernel::str::CStr;
use kernel::transmute::FromBytes;
- /// Newtype to provide a [`FromBytes`] implementation.
+ /// Trait to abstract over ELF header differences (32-bit vs 64-bit).
+ trait ElfHeader: FromBytes {
+ fn shnum(&self) -> u16;
+ fn shoff(&self) -> u64;
+ fn shstrndx(&self) -> u16;
+ }
+
+ /// Trait to abstract over ELF section header differences (32-bit vs
64-bit).
+ trait ElfSectionHeader: FromBytes {
+ fn name(&self) -> u32;
+ fn offset(&self) -> u64;
+ fn size(&self) -> u64;
+ }
+
+ /// Newtype to provide [`FromBytes`] and [`ElfHeader`] implementations.
#[repr(transparent)]
struct Elf64Hdr(bindings::elf64_hdr);
// SAFETY: all bit patterns are valid for this type, and it doesn't use
interior mutability.
unsafe impl FromBytes for Elf64Hdr {}
+ impl ElfHeader for Elf64Hdr {
+ fn shnum(&self) -> u16 {
+ self.0.e_shnum
+ }
+
+ fn shoff(&self) -> u64 {
+ self.0.e_shoff
+ }
+
+ fn shstrndx(&self) -> u16 {
+ self.0.e_shstrndx
+ }
+ }
+
+ /// Newtype to provide [`FromBytes`] and [`ElfSectionHeader`]
implementations.
#[repr(transparent)]
struct Elf64SHdr(bindings::elf64_shdr);
// SAFETY: all bit patterns are valid for this type, and it doesn't use
interior mutability.
unsafe impl FromBytes for Elf64SHdr {}
+ impl ElfSectionHeader for Elf64SHdr {
+ fn name(&self) -> u32 {
+ self.0.sh_name
+ }
+
+ fn offset(&self) -> u64 {
+ self.0.sh_offset
+ }
+
+ fn size(&self) -> u64 {
+ self.0.sh_size
+ }
+ }
+
/// Check if the section name at `strtab_offset + name_offset` equals
`target`.
fn section_name_eq(elf: &[u8], strtab_offset: u64, name_offset: u32,
target: &str) -> bool {
strtab_offset
@@ -285,48 +328,47 @@ fn section_name_eq(elf: &[u8], strtab_offset: u64,
name_offset: u32, target: &st
.is_some_and(|s| s == target)
}
- /// Tries to extract section with name `name` from the ELF64 image `elf`,
and returns it.
- pub(super) fn elf64_section<'a, 'b>(elf: &'a [u8],
name: &'b str) -> Option<&'a [u8]> {
- let hdr = &elf
- .get(0..size_of::<bindings::elf64_hdr>())
- .and_then(Elf64Hdr::from_bytes)?
- .0;
-
- // Get all the section headers.
- let mut shdr = {
- let shdr_num = usize::from(hdr.e_shnum);
- let shdr_start = usize::try_from(hdr.e_shoff).ok()?;
- let shdr_end = shdr_num
- .checked_mul(size_of::<Elf64SHdr>())
- .and_then(|v| v.checked_add(shdr_start))?;
-
- elf.get(shdr_start..shdr_end)
- .map(|slice| slice.chunks_exact(size_of::<Elf64SHdr>()))?
- };
+ fn elf_section_generic<'a, H, S>(elf: &'a [u8], name:
&str) -> Option<&'a [u8]>
+ where
+ H: ElfHeader,
+ S: ElfSectionHeader,
+ {
+ let hdr = H::from_bytes(elf.get(0..size_of::<H>())?)?;
+
+ let shdr_num = usize::from(hdr.shnum());
+ let shdr_start = usize::try_from(hdr.shoff()).ok()?;
+ let shdr_end = shdr_num
+ .checked_mul(size_of::<S>())
+ .and_then(|v| v.checked_add(shdr_start))?;
+
+ // Get all the section headers as an iterator over byte chunks.
+ let shdr_bytes = elf.get(shdr_start..shdr_end)?;
+ let mut shdr_iter = shdr_bytes.chunks_exact(size_of::<S>());
// Get the strings table.
- let strhdr = shdr
+ let strhdr = shdr_iter
.clone()
- .nth(usize::from(hdr.e_shstrndx))
- .and_then(Elf64SHdr::from_bytes)?;
+ .nth(usize::from(hdr.shstrndx()))
+ .and_then(S::from_bytes)?;
// Find the section which name matches `name` and return it.
- shdr.find(|&sh| {
- let Some(hdr) = Elf64SHdr::from_bytes(sh) else {
- return false;
- };
-
- section_name_eq(elf, strhdr.0.sh_offset, hdr.0.sh_name, name)
- })
- // Return the slice containing the section.
- .and_then(|sh| {
- let hdr = Elf64SHdr::from_bytes(sh)?;
- let start = usize::try_from(hdr.0.sh_offset).ok()?;
- let end = usize::try_from(hdr.0.sh_size)
- .ok()
- .and_then(|sh_size| start.checked_add(sh_size))?;
-
- elf.get(start..end)
+ shdr_iter.find_map(|sh_bytes| {
+ let sh = S::from_bytes(sh_bytes)?;
+
+ if section_name_eq(elf, strhdr.offset(), sh.name(), name) {
+ let start = usize::try_from(sh.offset()).ok()?;
+ let end = usize::try_from(sh.size())
+ .ok()
+ .and_then(|sz| start.checked_add(sz))?;
+ elf.get(start..end)
+ } else {
+ None
+ }
})
}
+
+ /// Extract the section with name `name` from the ELF64 image `elf`.
+ pub(super) fn elf64_section<'a>(elf: &'a [u8], name:
&str) -> Option<&'a [u8]> {
+ elf_section_generic::<Elf64Hdr, Elf64SHdr>(elf, name)
+ }
}
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 11/31] gpu: nova-core: add support for 32-bit firmware images
Add Elf32Hdr and Elf32SHdr newtypes, implement the ElfHeader and
ElfSectionHeader traits for them, and add elf32_section().
This mirrors the existing ELF64 support, using the same generic
infrastructure.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/firmware.rs | 46 +++++++++++++++++++++++++++++++
1 file changed, 46 insertions(+)
diff --git a/drivers/gpu/nova-core/firmware.rs
b/drivers/gpu/nova-core/firmware.rs
index 1cb5897778f8..c355e0d7e407 100644
--- a/drivers/gpu/nova-core/firmware.rs
+++ b/drivers/gpu/nova-core/firmware.rs
@@ -310,6 +310,46 @@ fn size(&self) -> u64 {
}
}
+ /// Newtype to provide [`FromBytes`] and [`ElfHeader`] implementations for
ELF32.
+ #[repr(transparent)]
+ struct Elf32Hdr(bindings::elf32_hdr);
+ // SAFETY: all bit patterns are valid for this type, and it doesn't use
interior mutability.
+ unsafe impl FromBytes for Elf32Hdr {}
+
+ impl ElfHeader for Elf32Hdr {
+ fn shnum(&self) -> u16 {
+ self.0.e_shnum
+ }
+
+ fn shoff(&self) -> u64 {
+ u64::from(self.0.e_shoff)
+ }
+
+ fn shstrndx(&self) -> u16 {
+ self.0.e_shstrndx
+ }
+ }
+
+ /// Newtype to provide [`FromBytes`] and [`ElfSectionHeader`]
implementations for ELF32.
+ #[repr(transparent)]
+ struct Elf32SHdr(bindings::elf32_shdr);
+ // SAFETY: all bit patterns are valid for this type, and it doesn't use
interior mutability.
+ unsafe impl FromBytes for Elf32SHdr {}
+
+ impl ElfSectionHeader for Elf32SHdr {
+ fn name(&self) -> u32 {
+ self.0.sh_name
+ }
+
+ fn offset(&self) -> u64 {
+ u64::from(self.0.sh_offset)
+ }
+
+ fn size(&self) -> u64 {
+ u64::from(self.0.sh_size)
+ }
+ }
+
/// Check if the section name at `strtab_offset + name_offset` equals
`target`.
fn section_name_eq(elf: &[u8], strtab_offset: u64, name_offset: u32,
target: &str) -> bool {
strtab_offset
@@ -371,4 +411,10 @@ fn elf_section_generic<'a, H, S>(elf: &'a
[u8], name: &str) -> Option<&'a [u8]>
pub(super) fn elf64_section<'a>(elf: &'a [u8], name:
&str) -> Option<&'a [u8]> {
elf_section_generic::<Elf64Hdr, Elf64SHdr>(elf, name)
}
+
+ /// Extract section with name `name` from the ELF32 image `elf`.
+ #[expect(dead_code)]
+ pub(super) fn elf32_section<'a>(elf: &'a [u8], name:
&str) -> Option<&'a [u8]> {
+ elf_section_generic::<Elf32Hdr, Elf32SHdr>(elf, name)
+ }
}
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 12/31] gpu: nova-core: add auto-detection of 32-bit, 64-bit firmware images
Add elf_section() which checks the ELF magic and class byte to
automatically dispatch to elf32_section() or elf64_section().
Update existing callers to use elf_section() instead of calling
elf64_section() directly.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/firmware.rs | 20 +++++++++++++++++---
drivers/gpu/nova-core/firmware/gsp.rs | 4 ++--
2 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/nova-core/firmware.rs
b/drivers/gpu/nova-core/firmware.rs
index c355e0d7e407..e45b91bb45a3 100644
--- a/drivers/gpu/nova-core/firmware.rs
+++ b/drivers/gpu/nova-core/firmware.rs
@@ -408,13 +408,27 @@ fn elf_section_generic<'a, H, S>(elf:
&'a [u8], name: &str) -> Option<&'a [u8]>
}
/// Extract the section with name `name` from the ELF64 image `elf`.
- pub(super) fn elf64_section<'a>(elf: &'a [u8], name:
&str) -> Option<&'a [u8]> {
+ fn elf64_section<'a>(elf: &'a [u8], name: &str) ->
Option<&'a [u8]> {
elf_section_generic::<Elf64Hdr, Elf64SHdr>(elf, name)
}
/// Extract section with name `name` from the ELF32 image `elf`.
- #[expect(dead_code)]
- pub(super) fn elf32_section<'a>(elf: &'a [u8], name:
&str) -> Option<&'a [u8]> {
+ fn elf32_section<'a>(elf: &'a [u8], name: &str) ->
Option<&'a [u8]> {
elf_section_generic::<Elf32Hdr, Elf32SHdr>(elf, name)
}
+
+ /// Automatically detects ELF32 vs ELF64 based on the ELF header.
+ pub(super) fn elf_section<'a>(elf: &'a [u8], name:
&str) -> Option<&'a [u8]> {
+ // Check ELF magic.
+ if elf.len() < 5 || elf.get(0..4)? != b"\x7fELF" {
+ return None;
+ }
+
+ // Check ELF class: 1 = 32-bit, 2 = 64-bit.
+ match elf.get(4)? {
+ 1 => elf32_section(elf, name),
+ 2 => elf64_section(elf, name),
+ _ => None,
+ }
+ }
}
diff --git a/drivers/gpu/nova-core/firmware/gsp.rs
b/drivers/gpu/nova-core/firmware/gsp.rs
index 4d84bd049d9c..1f473e73a428 100644
--- a/drivers/gpu/nova-core/firmware/gsp.rs
+++ b/drivers/gpu/nova-core/firmware/gsp.rs
@@ -98,10 +98,10 @@ pub(crate) fn new<'a, 'b>(
) -> Result<impl PinInit<Self, Error> + 'a> {
let fw = super::request_firmware(dev, chipset, "gsp", ver)?;
- let fw_section = elf::elf64_section(fw.data(),
".fwimage").ok_or(EINVAL)?;
+ let fw_section = elf::elf_section(fw.data(),
".fwimage").ok_or(EINVAL)?;
let sigs_section = Self::get_gsp_sigs_section(chipset)?;
- let signatures = elf::elf64_section(fw.data(), sigs_section)
+ let signatures = elf::elf_section(fw.data(), sigs_section)
.ok_or(EINVAL)
.and_then(|data| DmaObject::from_data(dev, data))?;
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 13/31] gpu: nova-core: Hopper/Blackwell: add FMC firmware image, in support of FSP
FSP is a hardware unit that runs FMC firmware.
Co-developed-by: Alexandre Courbot <acourbot at nvidia.com>
Signed-off-by: Alexandre Courbot <acourbot at nvidia.com>
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/firmware.rs | 1 +
drivers/gpu/nova-core/firmware/fsp.rs | 44 +++++++++++++++++++++++++++
2 files changed, 45 insertions(+)
create mode 100644 drivers/gpu/nova-core/firmware/fsp.rs
diff --git a/drivers/gpu/nova-core/firmware.rs
b/drivers/gpu/nova-core/firmware.rs
index e45b91bb45a3..5cbb8be7434f 100644
--- a/drivers/gpu/nova-core/firmware.rs
+++ b/drivers/gpu/nova-core/firmware.rs
@@ -24,6 +24,7 @@
};
pub(crate) mod booter;
+pub(crate) mod fsp;
pub(crate) mod fwsec;
pub(crate) mod gsp;
pub(crate) mod riscv;
diff --git a/drivers/gpu/nova-core/firmware/fsp.rs
b/drivers/gpu/nova-core/firmware/fsp.rs
new file mode 100644
index 000000000000..80401b964488
--- /dev/null
+++ b/drivers/gpu/nova-core/firmware/fsp.rs
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! FSP is a hardware unit that runs FMC firmware.
+
+use kernel::{
+ device,
+ prelude::*, //
+};
+
+use crate::{
+ dma::DmaObject,
+ firmware::elf,
+ gpu::Chipset, //
+};
+
+#[expect(unused)]
+pub(crate) struct FspFirmware {
+ /// FMC firmware image data (only the .image section)
+ fmc_image: DmaObject,
+ /// Full FMC ELF data (for signature extraction)
+ fmc_full: DmaObject,
+}
+
+impl FspFirmware {
+ #[expect(unused)]
+ pub(crate) fn new(
+ dev: &device::Device<device::Bound>,
+ chipset: Chipset,
+ ver: &str,
+ ) -> Result<Self> {
+ let fw = super::request_firmware(dev, chipset, "fmc", ver)?;
+
+ // FSP expects only the .image section, not the entire ELF file
+ let fmc_image_data = elf::elf_section(fw.data(),
"image").ok_or_else(|| {
+ dev_err!(dev, "FMC ELF file missing 'image'
section\n");
+ EINVAL
+ })?;
+
+ Ok(Self {
+ fmc_image: DmaObject::from_data(dev, fmc_image_data)?,
+ fmc_full: DmaObject::from_data(dev, fw.data())?,
+ })
+ }
+}
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 14/31] gpu: nova-core: Hopper/Blackwell: add FSP falcon engine stub
Add the FSP (Firmware System Processor) falcon engine type that will
handle secure boot and Chain of Trust operations on Hopper and Blackwell
architectures.
The FSP falcon replaces SEC2's role in the boot sequence for these newer
architectures. This initial stub just defines the falcon type and its
base address.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/falcon.rs | 1 +
drivers/gpu/nova-core/falcon/fsp.rs | 32 +++++++++++++++++++++++++++++
2 files changed, 33 insertions(+)
create mode 100644 drivers/gpu/nova-core/falcon/fsp.rs
diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
index 82c661aef594..4fcd55f731d7 100644
--- a/drivers/gpu/nova-core/falcon.rs
+++ b/drivers/gpu/nova-core/falcon.rs
@@ -30,6 +30,7 @@
regs::macros::RegisterBase, //
};
+pub(crate) mod fsp;
pub(crate) mod gsp;
mod hal;
pub(crate) mod sec2;
diff --git a/drivers/gpu/nova-core/falcon/fsp.rs
b/drivers/gpu/nova-core/falcon/fsp.rs
new file mode 100644
index 000000000000..7323ae2f2302
--- /dev/null
+++ b/drivers/gpu/nova-core/falcon/fsp.rs
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! FSP (Firmware System Processor) falcon engine for Hopper/Blackwell GPUs.
+//!
+//! The FSP falcon handles secure boot and Chain of Trust operations
+//! on Hopper and Blackwell architectures, replacing SEC2's role.
+
+use crate::{
+ falcon::{
+ FalconEngine,
+ PFalcon2Base,
+ PFalconBase, //
+ },
+ regs::macros::RegisterBase,
+};
+
+/// Type specifying the `Fsp` falcon engine. Cannot be instantiated.
+#[allow(dead_code)]
+pub(crate) struct Fsp(());
+
+impl RegisterBase<PFalconBase> for Fsp {
+ // FSP falcon base address for Blackwell
+ const BASE: usize = 0x8f2000;
+}
+
+impl RegisterBase<PFalcon2Base> for Fsp {
+ const BASE: usize = 0x8f3000;
+}
+
+impl FalconEngine for Fsp {
+ const ID: Self = Fsp(());
+}
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 15/31] gpu: nova-core: Hopper/Blackwell: add FSP falcon EMEM operations
Add external memory (EMEM) read/write operations to the GPU's FSP falcon
engine. These operations use Falcon PIO (Programmed I/O) to communicate
with the FSP through indirect memory access.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/falcon/fsp.rs | 60 ++++++++++++++++++++++++++++-
drivers/gpu/nova-core/regs.rs | 10 +++++
2 files changed, 69 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/nova-core/falcon/fsp.rs
b/drivers/gpu/nova-core/falcon/fsp.rs
index 7323ae2f2302..9e796e82e556 100644
--- a/drivers/gpu/nova-core/falcon/fsp.rs
+++ b/drivers/gpu/nova-core/falcon/fsp.rs
@@ -5,15 +5,27 @@
//! The FSP falcon handles secure boot and Chain of Trust operations
//! on Hopper and Blackwell architectures, replacing SEC2's role.
+use kernel::prelude::*;
+
use crate::{
+ driver::Bar0,
falcon::{
+ Falcon,
FalconEngine,
PFalcon2Base,
PFalconBase, //
},
- regs::macros::RegisterBase,
+ regs::{
+ self,
+ macros::RegisterBase, //
+ },
};
+/// EMEM control register bit 24: write mode.
+const EMEM_CTL_WRITE: u32 = 1 << 24;
+/// EMEM control register bit 25: read mode.
+const EMEM_CTL_READ: u32 = 1 << 25;
+
/// Type specifying the `Fsp` falcon engine. Cannot be instantiated.
#[allow(dead_code)]
pub(crate) struct Fsp(());
@@ -30,3 +42,49 @@ impl RegisterBase<PFalcon2Base> for Fsp {
impl FalconEngine for Fsp {
const ID: Self = Fsp(());
}
+
+impl Falcon<Fsp> {
+ /// Writes `data` to FSP external memory at byte `offset` using Falcon PIO.
+ ///
+ /// Returns `EINVAL` if offset or data length is not 4-byte aligned.
+ #[allow(dead_code)]
+ pub(crate) fn write_emem(&self, bar: &Bar0, offset: u32, data:
&[u8]) -> Result {
+ if offset % 4 != 0 || data.len() % 4 != 0 {
+ return Err(EINVAL);
+ }
+
+ regs::NV_PFALCON_FALCON_EMEM_CTL::default()
+ .set_value(EMEM_CTL_WRITE | offset)
+ .write(bar, &Fsp::ID);
+
+ for chunk in data.chunks_exact(4) {
+ let word = u32::from_le_bytes([chunk[0], chunk[1], chunk[2],
chunk[3]]);
+ regs::NV_PFALCON_FALCON_EMEM_DATA::default()
+ .set_data(word)
+ .write(bar, &Fsp::ID);
+ }
+
+ Ok(())
+ }
+
+ /// Reads FSP external memory at byte `offset` into `data` using Falcon
PIO.
+ ///
+ /// Returns `EINVAL` if offset or data length is not 4-byte aligned.
+ #[allow(dead_code)]
+ pub(crate) fn read_emem(&self, bar: &Bar0, offset: u32, data:
&mut [u8]) -> Result {
+ if offset % 4 != 0 || data.len() % 4 != 0 {
+ return Err(EINVAL);
+ }
+
+ regs::NV_PFALCON_FALCON_EMEM_CTL::default()
+ .set_value(EMEM_CTL_READ | offset)
+ .write(bar, &Fsp::ID);
+
+ for chunk in data.chunks_exact_mut(4) {
+ let word = regs::NV_PFALCON_FALCON_EMEM_DATA::read(bar,
&Fsp::ID).data();
+ chunk.copy_from_slice(&word.to_le_bytes());
+ }
+
+ Ok(())
+ }
+}
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
index 82cc6c0790e5..b642cee9611d 100644
--- a/drivers/gpu/nova-core/regs.rs
+++ b/drivers/gpu/nova-core/regs.rs
@@ -391,6 +391,16 @@ pub(crate) fn mem_scrubbing_done(self) -> bool {
8:8 br_fetch as bool;
});
+// GP102 EMEM PIO registers (used by FSP for Hopper/Blackwell)
+// These registers provide falcon external memory communication interface
+register!(NV_PFALCON_FALCON_EMEM_CTL @ PFalconBase[0x00000ac0] {
+ 31:0 value as u32; // EMEM control register
+});
+
+register!(NV_PFALCON_FALCON_EMEM_DATA @ PFalconBase[0x00000ac4] {
+ 31:0 data as u32; // EMEM data register
+});
+
// The modules below provide registers that are not identical on all supported
chips. They should
// only be used in HAL modules.
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 16/31] gpu: nova-core: Hopper/Blackwell: add FSP message infrastructure
Add the FSP messaging infrastructure needed for Chain of Trust
communication on Hopper/Blackwell GPUs.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/falcon/fsp.rs | 77 +++++++++++++++++++++++++++++
drivers/gpu/nova-core/regs.rs | 48 ++++++++++++++++++
2 files changed, 125 insertions(+)
diff --git a/drivers/gpu/nova-core/falcon/fsp.rs
b/drivers/gpu/nova-core/falcon/fsp.rs
index 9e796e82e556..0e8522b1171d 100644
--- a/drivers/gpu/nova-core/falcon/fsp.rs
+++ b/drivers/gpu/nova-core/falcon/fsp.rs
@@ -87,4 +87,81 @@ pub(crate) fn read_emem(&self, bar: &Bar0, offset:
u32, data: &mut [u8]) -> Resu
Ok(())
}
+
+ /// Poll FSP for incoming data.
+ ///
+ /// Returns the size of available data in bytes, or 0 if no data is
available.
+ ///
+ /// The FSP message queue is not circular - pointers are reset to 0 after
each
+ /// message exchange, so `tail >= head` is always true when data is
present.
+ #[allow(dead_code)]
+ pub(crate) fn poll_msgq(&self, bar: &Bar0) -> u32 {
+ let head = regs::NV_PFSP_MSGQ_HEAD::read(bar).address();
+ let tail = regs::NV_PFSP_MSGQ_TAIL::read(bar).address();
+
+ if head == tail {
+ return 0;
+ }
+
+ // TAIL points at last DWORD written, so add 4 to get total size
+ (tail - head) + 4
+ }
+
+ /// Send message to FSP.
+ ///
+ /// Writes a message to FSP EMEM and updates queue pointers to notify FSP.
+ ///
+ /// # Arguments
+ /// * `bar` - BAR0 memory mapping
+ /// * `packet` - Message data (must be 4-byte aligned in length)
+ ///
+ /// # Returns
+ /// `Ok(())` on success, `Err(EINVAL)` if packet is empty or not 4-byte
aligned
+ #[allow(dead_code)]
+ pub(crate) fn send_msg(&self, bar: &Bar0, packet: &[u8]) ->
Result {
+ if packet.is_empty() {
+ return Err(EINVAL);
+ }
+
+ // Write message to EMEM at offset 0 (validates 4-byte alignment)
+ self.write_emem(bar, 0, packet)?;
+
+ // Update queue pointers - TAIL points at last DWORD written
+ let tail_offset = u32::try_from(packet.len() - 4).map_err(|_| EINVAL)?;
+ regs::NV_PFSP_QUEUE_TAIL::default()
+ .set_address(tail_offset)
+ .write(bar);
+ regs::NV_PFSP_QUEUE_HEAD::default()
+ .set_address(0)
+ .write(bar);
+
+ Ok(())
+ }
+
+ /// Receive message from FSP.
+ ///
+ /// Reads a message from FSP EMEM and resets queue pointers.
+ ///
+ /// # Arguments
+ /// * `bar` - BAR0 memory mapping
+ /// * `buffer` - Buffer to receive message data
+ /// * `size` - Size of message to read in bytes (from `poll_msgq`)
+ ///
+ /// # Returns
+ /// `Ok(bytes_read)` on success, `Err(EINVAL)` if size is 0, exceeds
buffer, or not aligned
+ #[allow(dead_code)]
+ pub(crate) fn recv_msg(&self, bar: &Bar0, buffer: &mut [u8],
size: usize) -> Result<usize> {
+ if size == 0 || size > buffer.len() {
+ return Err(EINVAL);
+ }
+
+ // Read response from EMEM at offset 0 (validates 4-byte alignment)
+ self.read_emem(bar, 0, &mut buffer[..size])?;
+
+ // Reset message queue pointers after reading
+ regs::NV_PFSP_MSGQ_TAIL::default().set_address(0).write(bar);
+ regs::NV_PFSP_MSGQ_HEAD::default().set_address(0).write(bar);
+
+ Ok(size)
+ }
}
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
index b642cee9611d..0d5c13f19073 100644
--- a/drivers/gpu/nova-core/regs.rs
+++ b/drivers/gpu/nova-core/regs.rs
@@ -401,6 +401,54 @@ pub(crate) fn mem_scrubbing_done(self) -> bool {
31:0 data as u32; // EMEM data register
});
+// FSP (Firmware System Processor) queue registers for Hopper/Blackwell Chain
of Trust
+// These registers manage falcon EMEM communication queues
+register!(NV_PFSP_QUEUE_HEAD @ 0x008f2c00 {
+ 31:0 address as u32;
+});
+
+register!(NV_PFSP_QUEUE_TAIL @ 0x008f2c04 {
+ 31:0 address as u32;
+});
+
+register!(NV_PFSP_MSGQ_HEAD @ 0x008f2c80 {
+ 31:0 address as u32;
+});
+
+register!(NV_PFSP_MSGQ_TAIL @ 0x008f2c84 {
+ 31:0 address as u32;
+});
+
+// PTHERM registers
+
+// FSP secure boot completion status register used by FSP to signal boot
completion.
+// This is the NV_THERM_I2CS_SCRATCH register.
+// Different architectures use different addresses:
+// - Hopper (GH100): 0x000200bc
+// - Blackwell (GB202): 0x00ad00bc
+#[allow(dead_code)]
+pub(crate) fn fsp_thermal_scratch_reg_addr(arch: Architecture) ->
Result<usize> {
+ match arch {
+ Architecture::Hopper => Ok(0x000200bc),
+ Architecture::Blackwell => Ok(0x00ad00bc),
+ _ => Err(kernel::error::code::ENOTSUPP),
+ }
+}
+
+/// FSP writes this value to indicate successful boot completion.
+#[allow(dead_code)]
+pub(crate) const FSP_BOOT_COMPLETE_SUCCESS: u32 = 0xff;
+
+// Helper function to read FSP boot completion status from the correct register
+#[allow(dead_code)]
+pub(crate) fn read_fsp_boot_complete_status(
+ bar: &crate::driver::Bar0,
+ arch: Architecture,
+) -> Result<u32> {
+ let addr = fsp_thermal_scratch_reg_addr(arch)?;
+ Ok(bar.read32(addr))
+}
+
// The modules below provide registers that are not identical on all supported
chips. They should
// only be used in HAL modules.
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 17/31] gpu: nova-core: Hopper/Blackwell: calculate reserved FB heap size
Various "reserved" areas of FB (frame buffer: vidmem) have to be
calculated, because the GSP booting process needs this information.
The calculations are const, so a new const-compatible alignment function
is also added to num.rs, in order to align the reserved areas.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/fb.rs | 18 ++++++++++++++++++
drivers/gpu/nova-core/gsp/fw.rs | 6 +++++-
drivers/gpu/nova-core/num.rs | 10 ++++++++++
3 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs
index 9fcd915e12e1..e697436de29a 100644
--- a/drivers/gpu/nova-core/fb.rs
+++ b/drivers/gpu/nova-core/fb.rs
@@ -153,6 +153,9 @@ pub(crate) struct FbLayout {
pub(crate) wpr2: FbRange,
pub(crate) heap: FbRange,
pub(crate) vf_partition_count: u8,
+ /// Total reserved size (heap + PMU reserved), aligned to 2MB.
+ #[allow(dead_code)]
+ pub(crate) total_reserved_size: u32,
}
impl FbLayout {
@@ -240,6 +243,16 @@ pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw:
&GspFirmware) -> Result<
FbRange(wpr2.start - HEAP_SIZE..wpr2.start)
};
+ // Calculate reserved sizes. PMU reservation is a subset of the total
reserved size.
+ let heap_size = (heap.end - heap.start) as u64;
+ let pmu_reserved_size = u64::from(PMU_RESERVED_SIZE);
+
+ let total_reserved_size = {
+ let total = heap_size + pmu_reserved_size;
+ const RSVD_ALIGN: Alignment = Alignment::new::<SZ_2M>();
+ total.align_up(RSVD_ALIGN).ok_or(EINVAL)?
+ };
+
Ok(Self {
fb,
vga_workspace,
@@ -250,6 +263,11 @@ pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw:
&GspFirmware) -> Result<
wpr2,
heap,
vf_partition_count: 0,
+ total_reserved_size: total_reserved_size as u32,
})
}
}
+
+/// PMU reserved size, aligned to 128KB.
+pub(crate) const PMU_RESERVED_SIZE: u32 +
crate::num::const_align_up::<SZ_128K>(SZ_8M + SZ_16M + SZ_4K) as u32;
diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
index abffd6beec65..8d668a24142c 100644
--- a/drivers/gpu/nova-core/gsp/fw.rs
+++ b/drivers/gpu/nova-core/gsp/fw.rs
@@ -27,7 +27,10 @@
};
use crate::{
- fb::FbLayout,
+ fb::{
+ FbLayout,
+ PMU_RESERVED_SIZE, //
+ },
firmware::gsp::GspFirmware,
gpu::Chipset,
gsp::{
@@ -183,6 +186,7 @@ pub(crate) fn new(gsp_firmware: &GspFirmware, fb_layout:
&FbLayout) -> Self {
fbSize: fb_layout.fb.end - fb_layout.fb.start,
vgaWorkspaceOffset: fb_layout.vga_workspace.start,
vgaWorkspaceSize: fb_layout.vga_workspace.end -
fb_layout.vga_workspace.start,
+ pmuReservedSize: PMU_RESERVED_SIZE,
..Default::default()
})
}
diff --git a/drivers/gpu/nova-core/num.rs b/drivers/gpu/nova-core/num.rs
index c952a834e662..f068722c5bdf 100644
--- a/drivers/gpu/nova-core/num.rs
+++ b/drivers/gpu/nova-core/num.rs
@@ -215,3 +215,13 @@ pub(crate) const fn [<$from _into_ $into>]<const
N: $from>() -> $into {
impl_const_into!(u64 => { u8, u16, u32 });
impl_const_into!(u32 => { u8, u16 });
impl_const_into!(u16 => { u8 });
+
+/// Aligns `value` up to `ALIGN` at compile time.
+///
+/// This is the const-compatible equivalent of
[`kernel::ptr::Alignable::align_up`].
+/// `ALIGN` must be a power of two (enforced at compile time).
+#[inline(always)]
+pub(crate) const fn const_align_up<const ALIGN: usize>(value: usize)
-> usize {
+ build_assert!(ALIGN.is_power_of_two());
+ (value + (ALIGN - 1)) & !(ALIGN - 1)
+}
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 18/31] gpu: nova-core: Hopper/Blackwell: add needs_large_reserved_mem()
Hopper, Blackwell and later need to reserve more memory than previous
GPUs did.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/gpu.rs | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index 624bbc2a54e8..c0473ef8ac47 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -123,6 +123,11 @@ pub(crate) const fn arch(&self) -> Architecture {
| Self::GB207 => Architecture::Blackwell,
}
}
+
+ #[expect(dead_code)]
+ pub(crate) fn needs_large_reserved_mem(&self) -> bool {
+ matches!(self.arch(), Architecture::Hopper | Architecture::Blackwell)
+ }
}
// TODO
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 19/31] gpu: nova-core: Hopper/Blackwell: add FSP secure boot completion waiting
Add the FSP (Firmware System Processor) module for Hopper/Blackwell GPUs.
These architectures use a simplified firmware boot sequence:
FMC --> FSP --> GSP, with no SEC2 involvement.
This commit adds the ability to wait for FSP secure boot completion by
polling the I2CS thermal scratch register until FSP signals success.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/fsp.rs | 183 +++++++++++++++++++++++++++++
drivers/gpu/nova-core/nova_core.rs | 1 +
2 files changed, 184 insertions(+)
create mode 100644 drivers/gpu/nova-core/fsp.rs
diff --git a/drivers/gpu/nova-core/fsp.rs b/drivers/gpu/nova-core/fsp.rs
new file mode 100644
index 000000000000..12529b139861
--- /dev/null
+++ b/drivers/gpu/nova-core/fsp.rs
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// TODO: remove this once the code is fully functional
+#![expect(dead_code)]
+
+//! FSP (Firmware System Processor) interface for Hopper/Blackwell GPUs.
+//!
+//! Hopper/Blackwell use a simplified firmware boot sequence: FMC --> FSP
--> GSP.
+//! Unlike Turing/Ampere/Ada, there is NO SEC2 (Security Engine 2) usage.
+//! FSP handles secure boot directly using FMC firmware + Chain of Trust.
+
+use kernel::{
+ device,
+ io::poll::read_poll_timeout,
+ prelude::*,
+ time::Delta,
+ transmute::{
+ AsBytes,
+ FromBytes, //
+ },
+};
+
+use crate::regs::FSP_BOOT_COMPLETE_SUCCESS;
+
+/// FSP secure boot completion timeout in milliseconds.
+const FSP_SECURE_BOOT_TIMEOUT_MS: i64 = 4000;
+
+/// MCTP (Management Component Transport Protocol) header values for FSP
communication.
+pub(crate) mod mctp {
+ pub(super) const HEADER_SOM: u32 = 1; // Start of Message
+ pub(super) const HEADER_EOM: u32 = 1; // End of Message
+ pub(super) const HEADER_SEID: u32 = 0; // Source Endpoint ID
+ pub(super) const HEADER_SEQ: u32 = 0; // Sequence number
+
+ pub(super) const MSG_TYPE_VENDOR_PCI: u32 = 0x7e;
+ pub(super) const VENDOR_ID_NV: u32 = 0x10de;
+ pub(super) const NVDM_TYPE_COT: u32 = 0x14;
+ pub(super) const NVDM_TYPE_FSP_RESPONSE: u32 = 0x15;
+}
+
+/// GSP FMC boot parameters structure.
+/// This is what FSP expects to receive for booting GSP-RM.
+/// GSP FMC initialization parameters.
+#[repr(C)]
+#[derive(Debug, Clone, Copy, Default)]
+struct GspFmcInitParams {
+ /// CC initialization "registry keys"
+ regkeys: u32,
+}
+
+// SAFETY: GspFmcInitParams is a simple C struct with only primitive types.
+unsafe impl AsBytes for GspFmcInitParams {}
+// SAFETY: All bit patterns are valid for the primitive fields.
+unsafe impl FromBytes for GspFmcInitParams {}
+
+/// GSP ACR (Authenticated Code RAM) boot parameters.
+#[repr(C)]
+#[derive(Debug, Clone, Copy, Default)]
+struct GspAcrBootGspRmParams {
+ /// Physical memory aperture through which gspRmDescPa is accessed
+ target: u32,
+ /// Size in bytes of the GSP-RM descriptor structure
+ gsp_rm_desc_size: u32,
+ /// Physical offset in the target aperture of the GSP-RM descriptor
structure
+ gsp_rm_desc_offset: u64,
+ /// Physical offset in FB to set the start of the WPR containing GSP-RM
+ wpr_carveout_offset: u64,
+ /// Size in bytes of the WPR containing GSP-RM
+ wpr_carveout_size: u32,
+ /// Whether to boot GSP-RM or GSP-Proxy through ACR
+ b_is_gsp_rm_boot: u32,
+}
+
+// SAFETY: GspAcrBootGspRmParams is a simple C struct with only primitive
types.
+unsafe impl AsBytes for GspAcrBootGspRmParams {}
+// SAFETY: All bit patterns are valid for the primitive fields.
+unsafe impl FromBytes for GspAcrBootGspRmParams {}
+
+/// GSP RM boot parameters.
+#[repr(C)]
+#[derive(Debug, Clone, Copy, Default)]
+struct GspRmParams {
+ /// Physical memory aperture through which bootArgsOffset is accessed
+ target: u32,
+ /// Physical offset in the memory aperture that will be passed to GSP-RM
+ boot_args_offset: u64,
+}
+
+// SAFETY: GspRmParams is a simple C struct with only primitive types.
+unsafe impl AsBytes for GspRmParams {}
+// SAFETY: All bit patterns are valid for the primitive fields.
+unsafe impl FromBytes for GspRmParams {}
+
+/// GSP SPDM (Security Protocol and Data Model) parameters.
+#[repr(C)]
+#[derive(Debug, Clone, Copy, Default)]
+struct GspSpdmParams {
+ /// Physical Memory Aperture through which all addresses are accessed
+ target: u32,
+ /// Physical offset in the memory aperture where SPDM payload buffer is
stored
+ payload_buffer_offset: u64,
+ /// Size of the above payload buffer
+ payload_buffer_size: u32,
+}
+
+// SAFETY: GspSpdmParams is a simple C struct with only primitive types.
+unsafe impl AsBytes for GspSpdmParams {}
+// SAFETY: All bit patterns are valid for the primitive fields.
+unsafe impl FromBytes for GspSpdmParams {}
+
+/// Complete GSP FMC boot parameters structure.
+/// This is what FSP expects to receive - NOT a raw libos address!
+#[repr(C)]
+#[derive(Debug, Clone, Copy, Default)]
+pub(crate) struct GspFmcBootParams {
+ init_params: GspFmcInitParams,
+ boot_gsp_rm_params: GspAcrBootGspRmParams,
+ gsp_rm_params: GspRmParams,
+ gsp_spdm_params: GspSpdmParams,
+}
+
+// SAFETY: GspFmcBootParams is composed of C structs with only primitive types.
+unsafe impl AsBytes for GspFmcBootParams {}
+// SAFETY: All bit patterns are valid for the primitive fields.
+unsafe impl FromBytes for GspFmcBootParams {}
+
+/// FSP interface for Hopper/Blackwell GPUs.
+pub(crate) struct Fsp;
+
+impl Fsp {
+ /// Wait for FSP secure boot completion.
+ ///
+ /// Polls the thermal scratch register until FSP signals boot completion
+ /// or timeout occurs.
+ pub(crate) fn wait_secure_boot(
+ dev: &device::Device<device::Bound>,
+ bar: &crate::driver::Bar0,
+ arch: crate::gpu::Architecture,
+ ) -> Result<()> {
+ let timeout = Delta::from_millis(FSP_SECURE_BOOT_TIMEOUT_MS);
+
+ // Check if this architecture supports FSP thermal scratch register
+ let initial_status +
crate::regs::read_fsp_boot_complete_status(bar, arch).inspect_err(|_| {
+ dev_err!(
+ dev,
+ "FSP thermal scratch register not supported for
architecture {:?}\n",
+ arch
+ )
+ })?;
+ dev_dbg!(
+ dev,
+ "FSP initial I2CS scratch register status: {:#x}\n",
+ initial_status
+ );
+
+ read_poll_timeout(
+ || crate::regs::read_fsp_boot_complete_status(bar, arch),
+ |&status| {
+ dev_dbg!(
+ dev,
+ "FSP I2CS scratch register status: {:#x} (expected:
{:#x})\n",
+ status,
+ FSP_BOOT_COMPLETE_SUCCESS
+ );
+ status == FSP_BOOT_COMPLETE_SUCCESS
+ },
+ Delta::ZERO,
+ timeout,
+ )
+ .map_err(|_| {
+ let final_status +
crate::regs::read_fsp_boot_complete_status(bar, arch).unwrap_or(0xDEADBEEF);
+ dev_err!(
+ dev,
+ "FSP secure boot completion timeout - final status:
{:#x}\n",
+ final_status
+ );
+ ETIMEDOUT
+ })
+ .map(|_| ())
+ }
+}
diff --git a/drivers/gpu/nova-core/nova_core.rs
b/drivers/gpu/nova-core/nova_core.rs
index b98a1c03f13d..5a8f52cebfff 100644
--- a/drivers/gpu/nova-core/nova_core.rs
+++ b/drivers/gpu/nova-core/nova_core.rs
@@ -10,6 +10,7 @@
mod falcon;
mod fb;
mod firmware;
+mod fsp;
mod gfw;
mod gpu;
mod gsp;
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 20/31] gpu: nova-core: Hopper/Blackwell: add FSP message structures
Add the data structures for FSP Chain of Trust communication. These
include the FMC signature container (hash, public key, signature) and
the NVDM payload structures for sending COT messages and receiving
responses.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/fsp.rs | 76 ++++++++++++++++++++++++++++++++++++
1 file changed, 76 insertions(+)
diff --git a/drivers/gpu/nova-core/fsp.rs b/drivers/gpu/nova-core/fsp.rs
index 12529b139861..389c43bfd538 100644
--- a/drivers/gpu/nova-core/fsp.rs
+++ b/drivers/gpu/nova-core/fsp.rs
@@ -124,6 +124,82 @@ unsafe impl AsBytes for GspFmcBootParams {}
// SAFETY: All bit patterns are valid for the primitive fields.
unsafe impl FromBytes for GspFmcBootParams {}
+/// Size constraints for FSP security signatures.
+const FSP_HASH_SIZE: usize = 48; // SHA-384 hash (12 x u32)
+const FSP_PKEY_SIZE: usize = 97; // Public key size for GB202 (not 384!)
+const FSP_SIG_SIZE: usize = 96; // Signature size for GB202 (not 384!)
+
+/// Structure to hold FMC signatures.
+#[derive(Debug, Clone, Copy)]
+pub(crate) struct FmcSignatures {
+ pub hash384: [u32; 12], // SHA-384 hash (48 bytes)
+ pub public_key: [u32; 96], // RSA public key (384 bytes)
+ pub signature: [u32; 96], // RSA signature (384 bytes)
+}
+
+impl Default for FmcSignatures {
+ fn default() -> Self {
+ Self {
+ hash384: [0u32; 12],
+ public_key: [0u32; 96],
+ signature: [0u32; 96],
+ }
+ }
+}
+
+/// FSP Command Response payload structure.
+/// NVDM_PAYLOAD_COMMAND_RESPONSE structure.
+#[repr(C, packed)]
+#[derive(Clone, Copy)]
+struct NvdmPayloadCommandResponse {
+ task_id: u32,
+ command_nvdm_type: u32,
+ error_code: u32,
+}
+
+/// NVDM (NVIDIA Device Management) COT (Chain of Trust) payload structure.
+/// This is the main message payload sent to FSP for Chain of Trust.
+#[repr(C, packed)]
+#[derive(Clone, Copy)]
+struct NvdmPayloadCot {
+ version: u16, // offset 0x0, size 2
+ size: u16, // offset 0x2, size 2
+ gsp_fmc_sysmem_offset: u64, // offset 0x4, size 8
+ frts_sysmem_offset: u64, // offset 0xC, size 8
+ frts_sysmem_size: u32, // offset 0x14, size 4
+ frts_vidmem_offset: u64, // offset 0x18, size 8
+ frts_vidmem_size: u32, // offset 0x20, size 4
+ // Authentication related fields
+ hash384: [u32; 12], // offset 0x24, size 48 (0x30)
+ public_key: [u32; 96], // offset 0x54, size 384 (0x180)
+ signature: [u32; 96], // offset 0x1D4, size 384 (0x180)
+ gsp_boot_args_sysmem_offset: u64, // offset 0x354, size 8
+}
+
+/// Complete FSP message structure with MCTP and NVDM headers.
+#[repr(C, packed)]
+#[derive(Clone, Copy)]
+struct FspMessage {
+ mctp_header: u32,
+ nvdm_header: u32,
+ cot: NvdmPayloadCot,
+}
+
+// SAFETY: FspMessage is a packed C struct with only integral fields.
+unsafe impl AsBytes for FspMessage {}
+
+/// Complete FSP response structure with MCTP and NVDM headers.
+#[repr(C, packed)]
+#[derive(Clone, Copy)]
+struct FspResponse {
+ mctp_header: u32,
+ nvdm_header: u32,
+ response: NvdmPayloadCommandResponse,
+}
+
+// SAFETY: FspResponse is a packed C struct with only integral fields.
+unsafe impl FromBytes for FspResponse {}
+
/// FSP interface for Hopper/Blackwell GPUs.
pub(crate) struct Fsp;
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 21/31] gpu: nova-core: Hopper/Blackwell: add FMC signature extraction
Add extract_fmc_signatures_static() to parse cryptographic signatures
from FMC ELF firmware sections. This extracts the SHA-384 hash, RSA
public key, and signature needed for Chain of Trust verification.
Also exposes the elf_section() helper from firmware.rs for use by FSP.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/firmware.rs | 4 +-
drivers/gpu/nova-core/fsp.rs | 104 ++++++++++++++++++++++++++++++
2 files changed, 107 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/nova-core/firmware.rs
b/drivers/gpu/nova-core/firmware.rs
index 5cbb8be7434f..7f8d62f9ceba 100644
--- a/drivers/gpu/nova-core/firmware.rs
+++ b/drivers/gpu/nova-core/firmware.rs
@@ -23,6 +23,8 @@
},
};
+pub(crate) use elf::elf_section;
+
pub(crate) mod booter;
pub(crate) mod fsp;
pub(crate) mod fwsec;
@@ -419,7 +421,7 @@ fn elf32_section<'a>(elf: &'a [u8], name:
&str) -> Option<&'a [u8]> {
}
/// Automatically detects ELF32 vs ELF64 based on the ELF header.
- pub(super) fn elf_section<'a>(elf: &'a [u8], name:
&str) -> Option<&'a [u8]> {
+ pub(crate) fn elf_section<'a>(elf: &'a [u8], name:
&str) -> Option<&'a [u8]> {
// Check ELF magic.
if elf.len() < 5 || elf.get(0..4)? != b"\x7fELF" {
return None;
diff --git a/drivers/gpu/nova-core/fsp.rs b/drivers/gpu/nova-core/fsp.rs
index 389c43bfd538..311b6d4c6011 100644
--- a/drivers/gpu/nova-core/fsp.rs
+++ b/drivers/gpu/nova-core/fsp.rs
@@ -256,4 +256,108 @@ pub(crate) fn wait_secure_boot(
})
.map(|_| ())
}
+
+ /// Extract FMC firmware signatures for Chain of Trust verification.
+ ///
+ /// Extracts real cryptographic signatures from FMC ELF32 firmware
sections.
+ /// Returns signatures in a heap-allocated structure to prevent stack
overflow.
+ pub(crate) fn extract_fmc_signatures_static(
+ dev: &device::Device<device::Bound>,
+ fmc_fw_data: &[u8],
+ ) -> Result<KBox<FmcSignatures>> {
+ dev_dbg!(dev, "FMC firmware size: {} bytes\n",
fmc_fw_data.len());
+
+ // Extract hash section (SHA-384)
+ let hash_section = crate::firmware::elf_section(fmc_fw_data,
"hash")
+ .ok_or(EINVAL)
+ .inspect_err(|_| dev_err!(dev, "FMC firmware missing
'hash' section\n"))?;
+
+ // Extract public key section (RSA public key)
+ let pkey_section = crate::firmware::elf_section(fmc_fw_data,
"publickey")
+ .ok_or(EINVAL)
+ .inspect_err(|_| dev_err!(dev, "FMC firmware missing
'publickey' section\n"))?;
+
+ // Extract signature section (RSA signature)
+ let sig_section = crate::firmware::elf_section(fmc_fw_data,
"signature")
+ .ok_or(EINVAL)
+ .inspect_err(|_| dev_err!(dev, "FMC firmware missing
'signature' section\n"))?;
+
+ dev_dbg!(
+ dev,
+ "FMC ELF sections: hash={} bytes, pkey={} bytes, sig={}
bytes\n",
+ hash_section.len(),
+ pkey_section.len(),
+ sig_section.len()
+ );
+
+ // Validate section sizes - hash must be exactly 48 bytes
+ if hash_section.len() != FSP_HASH_SIZE {
+ dev_err!(
+ dev,
+ "FMC hash section size {} != expected {}\n",
+ hash_section.len(),
+ FSP_HASH_SIZE
+ );
+ return Err(EINVAL);
+ }
+
+ // Public key and signature can be smaller than the fixed array sizes
+ if pkey_section.len() > FSP_PKEY_SIZE * 4 {
+ dev_err!(
+ dev,
+ "FMC publickey section size {} > maximum {}\n",
+ pkey_section.len(),
+ FSP_PKEY_SIZE * 4
+ );
+ return Err(EINVAL);
+ }
+
+ if sig_section.len() > FSP_SIG_SIZE * 4 {
+ dev_err!(
+ dev,
+ "FMC signature section size {} > maximum {}\n",
+ sig_section.len(),
+ FSP_SIG_SIZE * 4
+ );
+ return Err(EINVAL);
+ }
+
+ // Allocate signature structure on heap to avoid stack overflow
+ let mut signatures = KBox::new(FmcSignatures::default(), GFP_KERNEL)?;
+
+ // Copy hash section directly as bytes (48 bytes exactly)
+ // SAFETY: hash384 is a [u32; 12] array (48 bytes), and we create a
byte slice of
+ // exactly FSP_HASH_SIZE (48) bytes. The pointer is valid and properly
aligned.
+ let hash_bytes = unsafe {
+ core::slice::from_raw_parts_mut(
+ signatures.hash384.as_mut_ptr().cast::<u8>(),
+ FSP_HASH_SIZE,
+ )
+ };
+ hash_bytes.copy_from_slice(hash_section);
+
+ // Copy public key section (up to 388 bytes, zero-padded)
+ // SAFETY: public_key is a [u32; 96] array (384 bytes), and we create a
byte slice of
+ // FSP_PKEY_SIZE * 4 bytes. The pointer is valid and properly aligned.
+ let pkey_bytes = unsafe {
+ core::slice::from_raw_parts_mut(
+ signatures.public_key.as_mut_ptr().cast::<u8>(),
+ FSP_PKEY_SIZE * 4,
+ )
+ };
+ pkey_bytes[..pkey_section.len()].copy_from_slice(pkey_section);
+
+ // Copy signature section (up to 384 bytes, zero-padded)
+ // SAFETY: signature is a [u32; 96] array (384 bytes), and we create a
byte slice of
+ // FSP_SIG_SIZE * 4 bytes. The pointer is valid and properly aligned.
+ let sig_bytes = unsafe {
+ core::slice::from_raw_parts_mut(
+ signatures.signature.as_mut_ptr().cast::<u8>(),
+ FSP_SIG_SIZE * 4,
+ )
+ };
+ sig_bytes[..sig_section.len()].copy_from_slice(sig_section);
+
+ Ok(signatures)
+ }
}
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 22/31] gpu: nova-core: Hopper/Blackwell: add FSP send/receive messaging
Add send_sync_fsp() which sends an MCTP/NVDM message to FSP and waits
for the response. This handles the low-level protocol details including
header validation, error checking, and timeout handling.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/fsp.rs | 102 +++++++++++++++++++++++++++++++++++
1 file changed, 102 insertions(+)
diff --git a/drivers/gpu/nova-core/fsp.rs b/drivers/gpu/nova-core/fsp.rs
index 311b6d4c6011..bb1e19c03c30 100644
--- a/drivers/gpu/nova-core/fsp.rs
+++ b/drivers/gpu/nova-core/fsp.rs
@@ -22,6 +22,9 @@
use crate::regs::FSP_BOOT_COMPLETE_SUCCESS;
+/// FSP message timeout in milliseconds.
+const FSP_MSG_TIMEOUT_MS: i64 = 2000;
+
/// FSP secure boot completion timeout in milliseconds.
const FSP_SECURE_BOOT_TIMEOUT_MS: i64 = 4000;
@@ -360,4 +363,103 @@ pub(crate) fn extract_fmc_signatures_static(
Ok(signatures)
}
+
+ /// Send message to FSP and wait for response.
+ fn send_sync_fsp(
+ dev: &device::Device<device::Bound>,
+ bar: &crate::driver::Bar0,
+ fsp_falcon: &crate::falcon::Falcon<crate::falcon::fsp::Fsp>,
+ nvdm_type: u32,
+ packet: &[u8],
+ ) -> Result<()> {
+ // Send message
+ fsp_falcon.send_msg(bar, packet)?;
+
+ // Wait for response
+ let timeout = Delta::from_millis(FSP_MSG_TIMEOUT_MS);
+ let packet_size = read_poll_timeout(
+ || Ok(fsp_falcon.poll_msgq(bar)),
+ |&size| size > 0,
+ Delta::ZERO,
+ timeout,
+ )
+ .map_err(|_| {
+ dev_err!(dev, "FSP response timeout\n");
+ ETIMEDOUT
+ })?;
+
+ // Receive response
+ let packet_size = packet_size as usize;
+ let mut response_buf = KVec::<u8>::new();
+ response_buf.resize(packet_size, 0, GFP_KERNEL)?;
+ fsp_falcon.recv_msg(bar, &mut response_buf, packet_size)?;
+
+ // Parse response
+ if response_buf.len() < core::mem::size_of::<FspResponse>() {
+ dev_err!(dev, "FSP response too small: {}\n",
response_buf.len());
+ return Err(EIO);
+ }
+
+ let response =
FspResponse::from_bytes(&response_buf[..]).ok_or(EIO)?;
+
+ // Copy packed struct fields to avoid alignment issues
+ let mctp_header = response.mctp_header;
+ let nvdm_header = response.nvdm_header;
+ let command_nvdm_type = response.response.command_nvdm_type;
+ let error_code = response.response.error_code;
+
+ // Validate MCTP header
+ let mctp_som = (mctp_header >> 31) & 1;
+ let mctp_eom = (mctp_header >> 30) & 1;
+ if mctp_som != 1 || mctp_eom != 1 {
+ dev_err!(
+ dev,
+ "Unexpected MCTP header in FSP reply: {:#x}\n",
+ mctp_header
+ );
+ return Err(EIO);
+ }
+
+ // Validate NVDM header
+ let nvdm_msg_type = nvdm_header & 0x7f;
+ let nvdm_vendor_id = (nvdm_header >> 8) & 0xffff;
+ let nvdm_type_resp = (nvdm_header >> 24) & 0xff;
+
+ if nvdm_msg_type != mctp::MSG_TYPE_VENDOR_PCI
+ || nvdm_vendor_id != mctp::VENDOR_ID_NV
+ || nvdm_type_resp != mctp::NVDM_TYPE_FSP_RESPONSE
+ {
+ dev_err!(
+ dev,
+ "Unexpected NVDM header in FSP reply: {:#x}\n",
+ nvdm_header
+ );
+ return Err(EIO);
+ }
+
+ // Check command type matches
+ if command_nvdm_type != nvdm_type {
+ dev_err!(
+ dev,
+ "Expected NVDM type {:#x} in reply, got {:#x}\n",
+ nvdm_type,
+ command_nvdm_type
+ );
+ return Err(EIO);
+ }
+
+ // Check for errors
+ if error_code != 0 {
+ dev_err!(
+ dev,
+ "NVDM command {:#x} failed with error {:#x}\n",
+ nvdm_type,
+ error_code
+ );
+ return Err(EIO);
+ }
+
+ dev_dbg!(dev, "FSP command {:#x} completed successfully\n",
nvdm_type);
+ Ok(())
+ }
}
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 23/31] gpu: nova-core: Hopper/Blackwell: add FSP Chain of Trust boot
Add the boot functions that construct FMC boot parameters and send the
Chain of Trust message to FSP. This completes the FSP communication
infrastructure needed to boot GSP firmware on Hopper/Blackwell GPUs.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/fsp.rs | 156 +++++++++++++++++++++++++++++++++++
drivers/gpu/nova-core/gpu.rs | 1 -
2 files changed, 156 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/nova-core/fsp.rs b/drivers/gpu/nova-core/fsp.rs
index bb1e19c03c30..5840ab78e79f 100644
--- a/drivers/gpu/nova-core/fsp.rs
+++ b/drivers/gpu/nova-core/fsp.rs
@@ -13,6 +13,10 @@
device,
io::poll::read_poll_timeout,
prelude::*,
+ ptr::{
+ Alignable,
+ Alignment, //
+ },
time::Delta,
transmute::{
AsBytes,
@@ -22,6 +26,10 @@
use crate::regs::FSP_BOOT_COMPLETE_SUCCESS;
+/// FSP Chain of Trust (COT) version for Blackwell.
+/// GB202 uses version 2 (not 1 like GH100)
+const FSP_COT_VERSION: u16 = 2;
+
/// FSP message timeout in milliseconds.
const FSP_MSG_TIMEOUT_MS: i64 = 2000;
@@ -364,6 +372,154 @@ pub(crate) fn extract_fmc_signatures_static(
Ok(signatures)
}
+ /// Creates FMC boot parameters structure for FSP.
+ ///
+ /// This structure tells FSP how to boot GSP-RM with the correct memory
layout.
+ pub(crate) fn create_fmc_boot_params(
+ dev: &device::Device<device::Bound>,
+ wpr_meta_addr: u64,
+ wpr_meta_size: u32,
+ libos_addr: u64,
+ ) ->
Result<kernel::dma::CoherentAllocation<GspFmcBootParams>> {
+ use kernel::dma::CoherentAllocation;
+
+ const GSP_DMA_TARGET_COHERENT_SYSTEM: u32 = 1;
+ const GSP_DMA_TARGET_NONCOHERENT_SYSTEM: u32 = 2;
+
+ let fmc_boot_params =
CoherentAllocation::<GspFmcBootParams>::alloc_coherent(
+ dev,
+ 1,
+ GFP_KERNEL | __GFP_ZERO,
+ )?;
+
+ // Configure ACR boot parameters (WPR metadata location) using
dma_write! macro
+ kernel::dma_write!(
+ fmc_boot_params[0].boot_gsp_rm_params.target =
GSP_DMA_TARGET_COHERENT_SYSTEM
+ )?;
+ kernel::dma_write!(
+ fmc_boot_params[0].boot_gsp_rm_params.gsp_rm_desc_offset =
wpr_meta_addr
+ )?;
+
kernel::dma_write!(fmc_boot_params[0].boot_gsp_rm_params.gsp_rm_desc_size =
wpr_meta_size)?;
+
+ // Blackwell FSP expects wpr_carveout_offset and wpr_carveout_size to
be zero;
+ // it obtains WPR info from other sources.
+
+
kernel::dma_write!(fmc_boot_params[0].boot_gsp_rm_params.b_is_gsp_rm_boot = 1)?;
+
+ // Configure RM parameters (libos location) using dma_write! macro
+ kernel::dma_write!(
+ fmc_boot_params[0].gsp_rm_params.target =
GSP_DMA_TARGET_NONCOHERENT_SYSTEM
+ )?;
+ kernel::dma_write!(fmc_boot_params[0].gsp_rm_params.boot_args_offset =
libos_addr)?;
+
+ dev_dbg!(
+ dev,
+ "FMC Boot Params (addr={:#x}):\n target={}\n
desc_size={:#x}\n \
+ desc_offset={:#x}\n rm_target={}\n boot_args_offset={:#x} \
+ (libos_addr passed in: {:#x})\n",
+ fmc_boot_params.dma_handle(),
+ GSP_DMA_TARGET_COHERENT_SYSTEM,
+ wpr_meta_size,
+ wpr_meta_addr,
+ GSP_DMA_TARGET_NONCOHERENT_SYSTEM,
+ libos_addr,
+ libos_addr
+ );
+
+ Ok(fmc_boot_params)
+ }
+
+ /// Boot GSP FMC with pre-extracted signatures.
+ ///
+ /// This version takes pre-extracted signatures and FMC image data.
+ /// Used when signatures are extracted separately from the full ELF file.
+ #[allow(clippy::too_many_arguments)]
+ pub(crate) fn boot_gsp_fmc_with_signatures(
+ dev: &device::Device<device::Bound>,
+ bar: &crate::driver::Bar0,
+ chipset: crate::gpu::Chipset,
+ fmc_image_fw: &crate::dma::DmaObject, // Contains only the image
section
+ fmc_boot_params:
&kernel::dma::CoherentAllocation<GspFmcBootParams>,
+ total_reserved_size: u64,
+ resume: bool,
+ fsp_falcon: &crate::falcon::Falcon<crate::falcon::fsp::Fsp>,
+ signatures: &FmcSignatures,
+ ) -> Result<()> {
+ dev_dbg!(dev, "Starting FSP boot sequence for {}\n",
chipset);
+
+ // Build FSP Chain of Trust message
+ let fmc_addr = fmc_image_fw.dma_handle(); // Now points to image data
only
+ let fmc_boot_params_addr = fmc_boot_params.dma_handle();
+
+ // frts_offset is relative to FB end: FRTS_location = FB_END -
frts_offset
+ let frts_offset = if !resume {
+ let mut frts_reserved_size = if chipset.needs_large_reserved_mem()
{
+ 0x220000 // heap_size_non_wpr for Hopper/Blackwell+
+ } else {
+ total_reserved_size
+ };
+
+ // Add PMU reserved size
+ frts_reserved_size += u64::from(crate::fb::PMU_RESERVED_SIZE);
+
+ frts_reserved_size
+ .align_up(Alignment::new::<0x200000>())
+ .unwrap_or(frts_reserved_size)
+ } else {
+ 0
+ };
+ let frts_size = if !resume { 0x100000 } else { 0 }; // 1MB FRTS size
+
+ // Build the FSP message
+ let msg = KBox::new(
+ FspMessage {
+ mctp_header: (mctp::HEADER_SOM << 31)
+ | (mctp::HEADER_EOM << 30)
+ | (mctp::HEADER_SEID << 16)
+ | (mctp::HEADER_SEQ << 28),
+
+ nvdm_header: (mctp::MSG_TYPE_VENDOR_PCI)
+ | (mctp::VENDOR_ID_NV << 8)
+ | (mctp::NVDM_TYPE_COT << 24),
+
+ cot: NvdmPayloadCot {
+ version: FSP_COT_VERSION,
+ size: core::mem::size_of::<NvdmPayloadCot>() as u16,
+ gsp_fmc_sysmem_offset: fmc_addr,
+ frts_sysmem_offset: 0,
+ frts_sysmem_size: 0,
+ frts_vidmem_offset: frts_offset,
+ frts_vidmem_size: frts_size,
+ hash384: signatures.hash384,
+ public_key: signatures.public_key,
+ signature: signatures.signature,
+ gsp_boot_args_sysmem_offset: fmc_boot_params_addr,
+ },
+ },
+ GFP_KERNEL,
+ )?;
+
+ // Convert message to bytes for sending
+ let msg_bytes = msg.as_bytes();
+
+ dev_dbg!(
+ dev,
+ "FSP COT Message:\n size={} bytes\n fmc_addr={:#x}\n
boot_params={:#x}\n \
+ frts_offset={:#x}\n frts_size={:#x}\n",
+ msg_bytes.len(),
+ fmc_addr,
+ fmc_boot_params_addr,
+ frts_offset,
+ frts_size
+ );
+
+ // Send COT message to FSP and wait for response
+ Self::send_sync_fsp(dev, bar, fsp_falcon, mctp::NVDM_TYPE_COT,
msg_bytes)?;
+
+ dev_dbg!(dev, "FSP Chain of Trust completed successfully\n");
+ Ok(())
+ }
+
/// Send message to FSP and wait for response.
fn send_sync_fsp(
dev: &device::Device<device::Bound>,
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index c0473ef8ac47..8fdce488612a 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -124,7 +124,6 @@ pub(crate) const fn arch(&self) -> Architecture {
}
}
- #[expect(dead_code)]
pub(crate) fn needs_large_reserved_mem(&self) -> bool {
matches!(self.arch(), Architecture::Hopper | Architecture::Blackwell)
}
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 24/31] gpu: nova-core: Hopper/Blackwell: larger non-WPR heap
Hopper, Blackwell and later require more space for the non-WPR heap.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/fb.rs | 15 ++++++++++++---
drivers/gpu/nova-core/fsp.rs | 2 +-
2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs
index e697436de29a..a402a9fe86f8 100644
--- a/drivers/gpu/nova-core/fb.rs
+++ b/drivers/gpu/nova-core/fb.rs
@@ -98,6 +98,16 @@ pub(crate) fn unregister(&self, bar: &Bar0) {
}
}
+/// Calculate non-WPR heap size based on chipset architecture.
+/// This matches the logic used in FSP for consistency.
+pub(crate) fn calc_non_wpr_heap_size(chipset: Chipset) -> u64 {
+ if chipset.needs_large_reserved_mem() {
+ 0x220000 // ~2.1MB for Hopper/Blackwell+
+ } else {
+ SZ_1M as u64 // 1MB for older architectures
+ }
+}
+
pub(crate) struct FbRange(Range<u64>);
impl FbRange {
@@ -238,9 +248,8 @@ pub(crate) fn new(chipset: Chipset, bar: &Bar0, gsp_fw:
&GspFirmware) -> Result<
};
let heap = {
- const HEAP_SIZE: u64 = usize_as_u64(SZ_1M);
-
- FbRange(wpr2.start - HEAP_SIZE..wpr2.start)
+ let heap_size = calc_non_wpr_heap_size(chipset);
+ FbRange(wpr2.start - heap_size..wpr2.start)
};
// Calculate reserved sizes. PMU reservation is a subset of the total
reserved size.
diff --git a/drivers/gpu/nova-core/fsp.rs b/drivers/gpu/nova-core/fsp.rs
index 5840ab78e79f..7d46fbcc7abd 100644
--- a/drivers/gpu/nova-core/fsp.rs
+++ b/drivers/gpu/nova-core/fsp.rs
@@ -454,7 +454,7 @@ pub(crate) fn boot_gsp_fmc_with_signatures(
// frts_offset is relative to FB end: FRTS_location = FB_END -
frts_offset
let frts_offset = if !resume {
let mut frts_reserved_size = if chipset.needs_large_reserved_mem()
{
- 0x220000 // heap_size_non_wpr for Hopper/Blackwell+
+ crate::fb::calc_non_wpr_heap_size(chipset)
} else {
total_reserved_size
};
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 25/31] gpu: nova-core: Hopper/Blackwell: larger WPR2 (GSP) heap
Hopper, Blackwell and later GPUs require a larger heap for WPR2.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/gsp/fw.rs | 49 +++++++++++++++++++++++++--------
1 file changed, 38 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
index 8d668a24142c..6b7afbd7d9b6 100644
--- a/drivers/gpu/nova-core/gsp/fw.rs
+++ b/drivers/gpu/nova-core/gsp/fw.rs
@@ -49,21 +49,35 @@ enum GspFwHeapParams {}
/// Minimum required alignment for the GSP heap.
const GSP_HEAP_ALIGNMENT: Alignment = Alignment::new::<{ 1 << 20
}>();
+// These constants override the generated bindings for architecture-specific
heap sizing.
+//
+// 14MB for Hopper/Blackwell+.
+const GSP_FW_HEAP_PARAM_BASE_RM_SIZE_GH100: u64 = 14 * SZ_1M as u64;
+// 142MB client alloc for ~188MB total.
+const GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE_GH100: u64 = 142 * SZ_1M as u64;
+// Blackwell-specific minimum heap size (88 + 12 + 70 = 170MB)
+const GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MIN_MB_BLACKWELL: u64 = 170;
+
impl GspFwHeapParams {
/// Returns the amount of GSP-RM heap memory used during GSP-RM boot and
initialization (up to
/// and including the first client subdevice allocation).
- fn base_rm_size(_chipset: Chipset) -> u64 {
- // TODO: this needs to be updated to return the correct value for
Hopper+ once support for
- // them is added:
- // u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_GH100)
- u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_TU10X)
+ fn base_rm_size(chipset: Chipset) -> u64 {
+ if chipset.needs_large_reserved_mem() {
+ GSP_FW_HEAP_PARAM_BASE_RM_SIZE_GH100
+ } else {
+ u64::from(bindings::GSP_FW_HEAP_PARAM_BASE_RM_SIZE_TU10X)
+ }
}
/// Returns the amount of heap memory required to support a single channel
allocation.
- fn client_alloc_size() -> u64 {
- u64::from(bindings::GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE)
- .align_up(GSP_HEAP_ALIGNMENT)
- .unwrap_or(u64::MAX)
+ fn client_alloc_size(chipset: Chipset) -> u64 {
+ if chipset.needs_large_reserved_mem() {
+ GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE_GH100
+ } else {
+ u64::from(bindings::GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE)
+ }
+ .align_up(GSP_HEAP_ALIGNMENT)
+ .unwrap_or(u64::MAX)
}
/// Returns the amount of memory to reserve for management purposes for a
framebuffer of size
@@ -106,12 +120,25 @@ impl LibosParams {
* num::usize_as_u64(SZ_1M),
};
+ /// Hopper/Blackwell+ GPUs need a larger minimum heap size than the
bindings specify.
+ /// The r570 bindings set LIBOS3_BAREMETAL_MIN_MB to 88MB, but
Hopper/Blackwell+ actually
+ /// requires 170MB (88 + 12 + 70).
+ const LIBOS_BLACKWELL: LibosParams = LibosParams {
+ carveout_size:
num::u32_as_u64(bindings::GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS3_BAREMETAL),
+ allowed_heap_size:
GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MIN_MB_BLACKWELL
+ * num::usize_as_u64(SZ_1M)
+
..num::u32_as_u64(bindings::GSP_FW_HEAP_SIZE_OVERRIDE_LIBOS3_BAREMETAL_MAX_MB)
+ * num::usize_as_u64(SZ_1M),
+ };
+
/// Returns the libos parameters corresponding to `chipset`.
pub(crate) fn from_chipset(chipset: Chipset) -> &'static
LibosParams {
if chipset < Chipset::GA102 {
&Self::LIBOS2
- } else {
+ } else if chipset < Chipset::GH100 {
&Self::LIBOS3
+ } else {
+ &Self::LIBOS_BLACKWELL
}
}
@@ -124,7 +151,7 @@ pub(crate) fn wpr_heap_size(&self, chipset: Chipset,
fb_size: u64) -> u64 {
// RM boot working memory,
.saturating_add(GspFwHeapParams::base_rm_size(chipset))
// One RM client,
- .saturating_add(GspFwHeapParams::client_alloc_size())
+ .saturating_add(GspFwHeapParams::client_alloc_size(chipset))
// Overhead for memory management.
.saturating_add(GspFwHeapParams::management_overhead(fb_size))
// Clamp to the supported heap sizes.
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 26/31] gpu: nova-core: refactor SEC2 booter loading into run_booter() helper
Extract the SEC2 booter loading sequence into a dedicated helper
function. This is a pure refactoring with no behavior change, done
in preparation for adding an alternative FSP boot path.
Co-developed-by: Alexandre Courbot <acourbot at nvidia.com>
Signed-off-by: Alexandre Courbot <acourbot at nvidia.com>
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/gsp/boot.rs | 67 ++++++++++++++++---------------
1 file changed, 35 insertions(+), 32 deletions(-)
diff --git a/drivers/gpu/nova-core/gsp/boot.rs
b/drivers/gpu/nova-core/gsp/boot.rs
index 846064221931..315f84907b11 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -120,6 +120,40 @@ fn run_fwsec_frts(
}
}
+ fn run_booter(
+ dev: &device::Device<device::Bound>,
+ bar: &Bar0,
+ chipset: Chipset,
+ sec2_falcon: &Falcon<Sec2>,
+ wpr_meta: &CoherentAllocation<GspFwWprMeta>,
+ ) -> Result {
+ let booter_loader = BooterFirmware::new(
+ dev,
+ BooterKind::Loader,
+ chipset,
+ FIRMWARE_VERSION,
+ sec2_falcon,
+ bar,
+ )?;
+
+ sec2_falcon.reset(bar)?;
+ sec2_falcon.dma_load(bar, &booter_loader)?;
+ let wpr_handle = wpr_meta.dma_handle();
+ let (mbox0, mbox1) = sec2_falcon.boot(
+ bar,
+ Some(wpr_handle as u32),
+ Some((wpr_handle >> 32) as u32),
+ )?;
+ dev_dbg!(dev, "SEC2 MBOX0: {:#x}, MBOX1{:#x}\n", mbox0,
mbox1);
+
+ if mbox0 != 0 {
+ dev_err!(dev, "Booter-load failed with error {:#x}\n",
mbox0);
+ return Err(ENODEV);
+ }
+
+ Ok(())
+ }
+
/// Attempt to boot the GSP.
///
/// This is a GPU-dependent and complex procedure that involves loading
firmware files from
@@ -149,15 +183,6 @@ pub(crate) fn boot(
Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?;
- let booter_loader = BooterFirmware::new(
- dev,
- BooterKind::Loader,
- chipset,
- FIRMWARE_VERSION,
- sec2_falcon,
- bar,
- )?;
-
let wpr_meta
CoherentAllocation::<GspFwWprMeta>::alloc_coherent(dev, 1, GFP_KERNEL |
__GFP_ZERO)?;
dma_write!(wpr_meta[0] = GspFwWprMeta::new(&gsp_fw,
&fb_layout))?;
@@ -185,29 +210,7 @@ pub(crate) fn boot(
"Using SEC2 to load and run the booter_load
firmware...\n"
);
- sec2_falcon.reset(bar)?;
- sec2_falcon.dma_load(bar, &booter_loader)?;
- let wpr_handle = wpr_meta.dma_handle();
- let (mbox0, mbox1) = sec2_falcon.boot(
- bar,
- Some(wpr_handle as u32),
- Some((wpr_handle >> 32) as u32),
- )?;
- dev_dbg!(
- pdev.as_ref(),
- "SEC2 MBOX0: {:#x}, MBOX1{:#x}\n",
- mbox0,
- mbox1
- );
-
- if mbox0 != 0 {
- dev_err!(
- pdev.as_ref(),
- "Booter-load failed with error {:#x}\n",
- mbox0
- );
- return Err(ENODEV);
- }
+ Self::run_booter(dev, bar, chipset, sec2_falcon, &wpr_meta)?;
gsp_falcon.write_os_version(bar, gsp_fw.bootloader.app_version);
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 27/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting
Hopper and Blackwell GPUs use FSP-based secure boot and do not require
waiting for GFW_BOOT completion. Skip this step for these architectures.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/gpu.rs | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index 8fdce488612a..fe078547c9b3 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -320,8 +320,13 @@ pub(crate) fn new<'a>(
// We must wait for GFW_BOOT completion before doing any
significant setup on the GPU.
_: {
- gfw::wait_gfw_boot_completion(bar)
- .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did
not complete"))?;
+ if matches!(
+ spec.chipset.arch(),
+ Architecture::Turing | Architecture::Ampere |
Architecture::Ada
+ ) {
+ gfw::wait_gfw_boot_completion(bar)
+ .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot
did not complete"))?;
+ }
},
sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar,
spec.chipset)?,
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 28/31] gpu: nova-core: Hopper/Blackwell: add GSP lockdown release polling
On Hopper and Blackwell, FSP boots GSP with hardware lockdown enabled.
After FSP Chain of Trust completes, the driver must poll for lockdown
release before proceeding with GSP initialization. Add the register
bit and helper functions needed for this polling.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/gsp/boot.rs | 86 ++++++++++++++++++++++++++++++-
drivers/gpu/nova-core/regs.rs | 1 +
2 files changed, 86 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/nova-core/gsp/boot.rs
b/drivers/gpu/nova-core/gsp/boot.rs
index 315f84907b11..4d04135a700e 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -15,7 +15,8 @@
falcon::{
gsp::Gsp,
sec2::Sec2,
- Falcon, //
+ Falcon,
+ FalconEngine, //
},
fb::FbLayout,
firmware::{
@@ -154,6 +155,89 @@ fn run_booter(
Ok(())
}
+ /// Check if GSP lockdown has been released after FSP Chain of Trust
+ fn gsp_lockdown_released(
+ dev: &device::Device,
+ gsp_falcon: &Falcon<Gsp>,
+ bar: &Bar0,
+ fmc_boot_params_addr: u64,
+ mbox0: &mut u32,
+ ) -> bool {
+ // Read GSP falcon mailbox0
+ *mbox0 = gsp_falcon.read_mailbox0(bar);
+
+ // Check 1: If mbox0 has 0xbadf4100 pattern, GSP is still locked down
+ if *mbox0 != 0 && (*mbox0 & 0xffffff00) == 0xbadf4100 {
+ return false;
+ }
+
+ // Check 2: If mbox0 has a value, check if it's an error
+ if *mbox0 != 0 {
+ let mbox1 = gsp_falcon.read_mailbox1(bar);
+
+ let combined_addr = (u64::from(mbox1) << 32) |
u64::from(*mbox0);
+ if combined_addr != fmc_boot_params_addr {
+ // Address doesn't match - GSP wrote an error code
+ // Return TRUE (lockdown released) with error
+ dev_dbg!(dev,
+ "GSP lockdown released with error: mbox0={:#x},
combined_addr={:#x}, expected={:#x}",
+ *mbox0, combined_addr, fmc_boot_params_addr);
+ return true;
+ }
+ }
+
+ // Check 3: Verify HWCFG2 RISCV_BR_PRIV_LOCKDOWN bit is clear
+ let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar,
&crate::falcon::gsp::Gsp::ID);
+ !hwcfg2.riscv_br_priv_lockdown()
+ }
+
+ /// Wait for GSP lockdown to be released after FSP Chain of Trust
+ #[expect(dead_code)]
+ fn wait_for_gsp_lockdown_release(
+ dev: &device::Device,
+ bar: &Bar0,
+ gsp_falcon: &Falcon<Gsp>,
+ fmc_boot_params_addr: u64,
+ ) -> Result<u32> {
+ dev_dbg!(dev, "Waiting for GSP lockdown release\n");
+
+ let mut mbox0: u32 = 0;
+
+ read_poll_timeout(
+ || {
+ let released = Self::gsp_lockdown_released(
+ dev,
+ gsp_falcon,
+ bar,
+ fmc_boot_params_addr,
+ &mut mbox0,
+ );
+
+ Ok((released, mbox0))
+ },
+ |(released, _)| *released,
+ Delta::ZERO,
+ Delta::from_millis(4000),
+ )
+ .inspect_err(|_| {
+ dev_err!(dev, "GSP lockdown release timeout\n");
+ })
+ .map(|(_, mbox0)| mbox0)
+ .and_then(|mbox0| {
+ // Check mbox0 for error after wait completion
+ if mbox0 != 0 {
+ dev_err!(dev, "GSP-FMC boot failed (mbox: {:#x})\n",
mbox0);
+ Err(EIO)
+ } else {
+ dev_dbg!(
+ dev,
+ "GSP hardware lockdown fully released, proceeding with
initialization\n"
+ );
+ Ok(mbox0)
+ }
+ })
+ }
+
/// Attempt to boot the GSP.
///
/// This is a GPU-dependent and complex procedure that involves loading
firmware files from
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
index 0d5c13f19073..a41d31449c65 100644
--- a/drivers/gpu/nova-core/regs.rs
+++ b/drivers/gpu/nova-core/regs.rs
@@ -277,6 +277,7 @@ pub(crate) fn vga_workspace_addr(self) ->
Option<u64> {
register!(NV_PFALCON_FALCON_HWCFG2 @ PFalconBase[0x000000f4] {
10:10 riscv as bool;
+ 11:11 riscv_br_priv_lockdown as bool, "RISC-V branch privilege
lockdown bit";
12:12 mem_scrubbing as bool, "Set to 0 after memory scrubbing is
completed";
31:31 reset_ready as bool, "Signal indicating that reset is
completed (GA102+)";
});
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 29/31] gpu: nova-core: Hopper/Blackwell: add FSP Chain of Trust boot path
Add the FSP-based boot path for Hopper and Blackwell GPUs. Unlike
Turing/Ampere/Ada which use SEC2 to load the booter firmware, Hopper
and Blackwell use FSP (Firmware System Processor) with FMC firmware
to establish a Chain of Trust and boot GSP directly.
The boot() function now dispatches to either run_booter() (SEC2 path)
or run_fsp() (FSP path) based on the GPU architecture. The cmdq
commands are moved to after GSP boot, and the GSP sequencer is only
run for SEC2-based architectures.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/firmware/fsp.rs | 6 +-
drivers/gpu/nova-core/fsp.rs | 6 +-
drivers/gpu/nova-core/gsp/boot.rs | 159 ++++++++++++++++++++------
3 files changed, 126 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/nova-core/firmware/fsp.rs
b/drivers/gpu/nova-core/firmware/fsp.rs
index 80401b964488..d88c7a91e2bc 100644
--- a/drivers/gpu/nova-core/firmware/fsp.rs
+++ b/drivers/gpu/nova-core/firmware/fsp.rs
@@ -13,16 +13,14 @@
gpu::Chipset, //
};
-#[expect(unused)]
pub(crate) struct FspFirmware {
/// FMC firmware image data (only the .image section)
- fmc_image: DmaObject,
+ pub(crate) fmc_image: DmaObject,
/// Full FMC ELF data (for signature extraction)
- fmc_full: DmaObject,
+ pub(crate) fmc_full: DmaObject,
}
impl FspFirmware {
- #[expect(unused)]
pub(crate) fn new(
dev: &device::Device<device::Bound>,
chipset: Chipset,
diff --git a/drivers/gpu/nova-core/fsp.rs b/drivers/gpu/nova-core/fsp.rs
index 7d46fbcc7abd..9c11ceb6ab4d 100644
--- a/drivers/gpu/nova-core/fsp.rs
+++ b/drivers/gpu/nova-core/fsp.rs
@@ -1,8 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-// TODO: remove this once the code is fully functional
-#![expect(dead_code)]
-
//! FSP (Firmware System Processor) interface for Hopper/Blackwell GPUs.
//!
//! Hopper/Blackwell use a simplified firmware boot sequence: FMC --> FSP
--> GSP.
@@ -11,6 +8,7 @@
use kernel::{
device,
+ dma::CoherentAllocation,
io::poll::read_poll_timeout,
prelude::*,
ptr::{
@@ -381,8 +379,6 @@ pub(crate) fn create_fmc_boot_params(
wpr_meta_size: u32,
libos_addr: u64,
) ->
Result<kernel::dma::CoherentAllocation<GspFmcBootParams>> {
- use kernel::dma::CoherentAllocation;
-
const GSP_DMA_TARGET_COHERENT_SYSTEM: u32 = 1;
const GSP_DMA_TARGET_NONCOHERENT_SYSTEM: u32 = 2;
diff --git a/drivers/gpu/nova-core/gsp/boot.rs
b/drivers/gpu/nova-core/gsp/boot.rs
index 4d04135a700e..0fbaa73eb55c 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -13,6 +13,7 @@
use crate::{
driver::Bar0,
falcon::{
+ fsp::Fsp as FspEngine,
gsp::Gsp,
sec2::Sec2,
Falcon,
@@ -24,6 +25,7 @@
BooterFirmware,
BooterKind, //
},
+ fsp::FspFirmware,
fwsec::{
FwsecCommand,
FwsecFirmware, //
@@ -31,9 +33,11 @@
gsp::GspFirmware,
FIRMWARE_VERSION, //
},
- gpu::Chipset,
+ fsp::Fsp,
+ gpu::{Architecture, Chipset},
gsp::{
commands,
+ fw::LibosMemoryRegionInitArgument,
sequencer::{
GspSequencer,
GspSequencerParams, //
@@ -155,6 +159,59 @@ fn run_booter(
Ok(())
}
+ fn run_fsp(
+ dev: &device::Device<device::Bound>,
+ bar: &Bar0,
+ chipset: Chipset,
+ gsp_falcon: &Falcon<Gsp>,
+ wpr_meta: &CoherentAllocation<GspFwWprMeta>,
+ libos: &CoherentAllocation<LibosMemoryRegionInitArgument>,
+ fb_layout: &FbLayout,
+ ) -> Result {
+ let fsp_falcon = Falcon::<FspEngine>::new(dev, chipset)?;
+
+ Fsp::wait_secure_boot(dev, bar, chipset.arch())?;
+
+ let fsp_fw = FspFirmware::new(dev, chipset, FIRMWARE_VERSION)?;
+
+ // SAFETY: fmc_full is a valid DmaObject with a contiguous allocation
of size() bytes
+ // starting at start_ptr(). The slice is only used for signature
extraction within this
+ // function scope while fsp_fw remains valid.
+ let fmc_full_data = unsafe {
+ core::slice::from_raw_parts(fsp_fw.fmc_full.start_ptr(),
fsp_fw.fmc_full.size())
+ };
+ let signatures = Fsp::extract_fmc_signatures_static(dev,
fmc_full_data)?;
+
+ // Create FMC boot parameters
+ let fmc_boot_params = Fsp::create_fmc_boot_params(
+ dev,
+ wpr_meta.dma_handle(),
+ core::mem::size_of::<GspFwWprMeta>() as u32,
+ libos.dma_handle(),
+ )?;
+
+ // Execute FSP Chain of Trust
+ // NOTE: FSP Chain of Trust handles GSP boot internally - we do NOT
reset or boot GSP
+ Fsp::boot_gsp_fmc_with_signatures(
+ dev,
+ bar,
+ chipset,
+ &fsp_fw.fmc_image,
+ &fmc_boot_params,
+ u64::from(fb_layout.total_reserved_size),
+ false, // not resuming
+ &fsp_falcon,
+ &signatures,
+ )?;
+
+ // Wait for GSP lockdown to be released
+ let fmc_boot_params_addr = fmc_boot_params.dma_handle();
+ let _mbox0 + Self::wait_for_gsp_lockdown_release(dev, bar,
gsp_falcon, fmc_boot_params_addr)?;
+
+ Ok(())
+ }
+
/// Check if GSP lockdown has been released after FSP Chain of Trust
fn gsp_lockdown_released(
dev: &device::Device,
@@ -192,7 +249,6 @@ fn gsp_lockdown_released(
}
/// Wait for GSP lockdown to be released after FSP Chain of Trust
- #[expect(dead_code)]
fn wait_for_gsp_lockdown_release(
dev: &device::Device,
bar: &Bar0,
@@ -255,8 +311,6 @@ pub(crate) fn boot(
) -> Result {
let dev = pdev.as_ref();
- let bios = Vbios::new(dev, bar)?;
-
let gsp_fw = KBox::pin_init(
GspFirmware::new(dev, chipset, FIRMWARE_VERSION)?,
GFP_KERNEL,
@@ -265,36 +319,58 @@ pub(crate) fn boot(
let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?;
dev_dbg!(dev, "{:#x?}\n", fb_layout);
- Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?;
+ if matches!(
+ chipset.arch(),
+ Architecture::Turing | Architecture::Ampere | Architecture::Ada
+ ) {
+ let bios = Vbios::new(dev, bar)?;
+ Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios,
&fb_layout)?;
+ }
let wpr_meta
CoherentAllocation::<GspFwWprMeta>::alloc_coherent(dev, 1, GFP_KERNEL |
__GFP_ZERO)?;
dma_write!(wpr_meta[0] = GspFwWprMeta::new(&gsp_fw,
&fb_layout))?;
- self.cmdq
- .send_command(bar, commands::SetSystemInfo::new(pdev))?;
- self.cmdq.send_command(bar, commands::SetRegistry::new())?;
+ // For SEC2-based architectures, reset GSP and boot it before SEC2
+ if matches!(
+ chipset.arch(),
+ Architecture::Turing | Architecture::Ampere | Architecture::Ada
+ ) {
+ gsp_falcon.reset(bar)?;
+ let libos_handle = self.libos.dma_handle();
+ let (mbox0, mbox1) = gsp_falcon.boot(
+ bar,
+ Some(libos_handle as u32),
+ Some((libos_handle >> 32) as u32),
+ )?;
+ dev_dbg!(
+ pdev.as_ref(),
+ "GSP MBOX0: {:#x}, MBOX1: {:#x}\n",
+ mbox0,
+ mbox1
+ );
- gsp_falcon.reset(bar)?;
- let libos_handle = self.libos.dma_handle();
- let (mbox0, mbox1) = gsp_falcon.boot(
- bar,
- Some(libos_handle as u32),
- Some((libos_handle >> 32) as u32),
- )?;
- dev_dbg!(
- pdev.as_ref(),
- "GSP MBOX0: {:#x}, MBOX1: {:#x}\n",
- mbox0,
- mbox1
- );
+ dev_dbg!(
+ pdev.as_ref(),
+ "Using SEC2 to load and run the booter_load
firmware...\n"
+ );
+ }
- dev_dbg!(
- pdev.as_ref(),
- "Using SEC2 to load and run the booter_load
firmware...\n"
- );
+ match chipset.arch() {
+ Architecture::Turing | Architecture::Ampere | Architecture::Ada
=> {
+ Self::run_booter(dev, bar, chipset, sec2_falcon,
&wpr_meta)?
+ }
- Self::run_booter(dev, bar, chipset, sec2_falcon, &wpr_meta)?;
+ Architecture::Hopper | Architecture::Blackwell => Self::run_fsp(
+ dev,
+ bar,
+ chipset,
+ gsp_falcon,
+ &wpr_meta,
+ &self.libos,
+ &fb_layout,
+ )?,
+ }
gsp_falcon.write_os_version(bar, gsp_fw.bootloader.app_version);
@@ -312,16 +388,27 @@ pub(crate) fn boot(
gsp_falcon.is_riscv_active(bar),
);
- // Create and run the GSP sequencer.
- let seq_params = GspSequencerParams {
- bootloader_app_version: gsp_fw.bootloader.app_version,
- libos_dma_handle: libos_handle,
- gsp_falcon,
- sec2_falcon,
- dev: pdev.as_ref().into(),
- bar,
- };
- GspSequencer::run(&mut self.cmdq, seq_params)?;
+ // Now that GSP is active, send system info and registry
+ self.cmdq
+ .send_command(bar, commands::SetSystemInfo::new(pdev))?;
+ self.cmdq.send_command(bar, commands::SetRegistry::new())?;
+
+ if matches!(
+ chipset.arch(),
+ Architecture::Turing | Architecture::Ampere | Architecture::Ada
+ ) {
+ let libos_handle = self.libos.dma_handle();
+ // Create and run the GSP sequencer.
+ let seq_params = GspSequencerParams {
+ bootloader_app_version: gsp_fw.bootloader.app_version,
+ libos_dma_handle: libos_handle,
+ gsp_falcon,
+ sec2_falcon,
+ dev: pdev.as_ref().into(),
+ bar,
+ };
+ GspSequencer::run(&mut self.cmdq, seq_params)?;
+ }
// Wait until GSP is fully initialized.
commands::wait_gsp_init_done(&mut self.cmdq)?;
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 30/31] gpu: nova-core: Hopper/Blackwell: new location for PCI config mirror
Hopper and Blackwell GPUs use a different PCI config mirror base address
(0x092000) compared to earlier architectures (0x088000). Pass the chipset
through to GspSetSystemInfo::init() so it can select the correct address.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/gsp/boot.rs | 2 +-
drivers/gpu/nova-core/gsp/commands.rs | 8 +++++---
drivers/gpu/nova-core/gsp/fw/commands.rs | 18 +++++++++++++++---
3 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/nova-core/gsp/boot.rs
b/drivers/gpu/nova-core/gsp/boot.rs
index 0fbaa73eb55c..084be5586389 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -390,7 +390,7 @@ pub(crate) fn boot(
// Now that GSP is active, send system info and registry
self.cmdq
- .send_command(bar, commands::SetSystemInfo::new(pdev))?;
+ .send_command(bar, commands::SetSystemInfo::new(pdev, chipset))?;
self.cmdq.send_command(bar, commands::SetRegistry::new())?;
if matches!(
diff --git a/drivers/gpu/nova-core/gsp/commands.rs
b/drivers/gpu/nova-core/gsp/commands.rs
index 0425c65b5d6f..2a0b796e5927 100644
--- a/drivers/gpu/nova-core/gsp/commands.rs
+++ b/drivers/gpu/nova-core/gsp/commands.rs
@@ -18,6 +18,7 @@
use crate::{
driver::Bar0,
+ gpu::Chipset,
gsp::{
cmdq::{
Cmdq,
@@ -36,12 +37,13 @@
/// The `GspSetSystemInfo` command.
pub(crate) struct SetSystemInfo<'a> {
pdev: &'a pci::Device<device::Bound>,
+ chipset: Chipset,
}
impl<'a> SetSystemInfo<'a> {
/// Creates a new `GspSetSystemInfo` command using the parameters of
`pdev`.
- pub(crate) fn new(pdev: &'a pci::Device<device::Bound>) ->
Self {
- Self { pdev }
+ pub(crate) fn new(pdev: &'a pci::Device<device::Bound>,
chipset: Chipset) -> Self {
+ Self { pdev, chipset }
}
}
@@ -51,7 +53,7 @@ impl<'a> CommandToGsp for
SetSystemInfo<'a> {
type InitError = Error;
fn init(&self) -> impl Init<Self::Command, Self::InitError> {
- GspSetSystemInfo::init(self.pdev)
+ GspSetSystemInfo::init(self.pdev, self.chipset)
}
}
diff --git a/drivers/gpu/nova-core/gsp/fw/commands.rs
b/drivers/gpu/nova-core/gsp/fw/commands.rs
index 470d8edb62ff..fe8f56ba3e80 100644
--- a/drivers/gpu/nova-core/gsp/fw/commands.rs
+++ b/drivers/gpu/nova-core/gsp/fw/commands.rs
@@ -10,7 +10,13 @@
}, //
};
-use crate::gsp::GSP_PAGE_SIZE;
+use crate::{
+ gpu::{
+ Architecture,
+ Chipset, //
+ },
+ gsp::GSP_PAGE_SIZE, //
+};
use super::bindings;
@@ -24,7 +30,10 @@ pub(crate) struct GspSetSystemInfo {
impl GspSetSystemInfo {
/// Returns an in-place initializer for the `GspSetSystemInfo` command.
#[allow(non_snake_case)]
- pub(crate) fn init<'a>(dev: &'a
pci::Device<device::Bound>) -> impl Init<Self, Error> + 'a {
+ pub(crate) fn init<'a>(
+ dev: &'a pci::Device<device::Bound>,
+ chipset: Chipset,
+ ) -> impl Init<Self, Error> + 'a {
type InnerGspSystemInfo = bindings::GspSystemInfo;
let init_inner = try_init!(InnerGspSystemInfo {
gpuPhysAddr: dev.resource_start(0)?,
@@ -35,7 +44,10 @@ pub(crate) fn init<'a>(dev: &'a
pci::Device<device::Bound>) -> impl Init<Self, E
// Using TASK_SIZE in r535_gsp_rpc_set_system_info() seems wrong
because
// TASK_SIZE is per-task. That's probably a design issue in
GSP-RM though.
maxUserVa: (1 << 47) - 4096,
- pciConfigMirrorBase: 0x088000,
+ pciConfigMirrorBase: match chipset.arch() {
+ Architecture::Turing | Architecture::Ampere | Architecture::Ada
=> 0x088000,
+ Architecture::Hopper | Architecture::Blackwell => 0x092000,
+ },
pciConfigMirrorSize: 0x001000,
PCIDeviceID: (u32::from(dev.device_id()) << 16) |
u32::from(dev.vendor_id().as_raw()),
--
2.52.0
John Hubbard
2025-Dec-03 05:59 UTC
[PATCH 31/31] gpu: nova-core: clarify the GPU firmware boot steps
Now that Hopper/Blackwell GSP is up and running, it's clear how to
factor out the common code and the per-architecture code, for booting
up firmware. The key is that, for Turing, Ampere, and Ada, the SEC2
firmware is used and a CPU "sequencer" must be run. For Hopper,
Blackwell and later GPUs, there is no SEC2, no sequencer, but there is
an FSP to get running instead.
This change makes that clearly visible on-screen.
Signed-off-by: John Hubbard <jhubbard at nvidia.com>
---
drivers/gpu/nova-core/gsp/boot.rs | 116 +++++++++++++++++-------------
1 file changed, 65 insertions(+), 51 deletions(-)
diff --git a/drivers/gpu/nova-core/gsp/boot.rs
b/drivers/gpu/nova-core/gsp/boot.rs
index 084be5586389..79a2f5acc09b 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -159,7 +159,48 @@ fn run_booter(
Ok(())
}
- fn run_fsp(
+ /// Boot GSP via SEC2 booter firmware (Turing/Ampere/Ada path).
+ ///
+ /// This path uses FWSEC-FRTS to set up WPR2, then boots GSP directly,
+ /// then uses SEC2 to run the booter firmware.
+ #[allow(clippy::too_many_arguments)]
+ fn boot_via_sec2(
+ dev: &device::Device<device::Bound>,
+ bar: &Bar0,
+ chipset: Chipset,
+ gsp_falcon: &Falcon<Gsp>,
+ sec2_falcon: &Falcon<Sec2>,
+ fb_layout: &FbLayout,
+ libos: &CoherentAllocation<LibosMemoryRegionInitArgument>,
+ wpr_meta: &CoherentAllocation<GspFwWprMeta>,
+ ) -> Result {
+ // Run FWSEC-FRTS to set up the WPR2 region
+ let bios = Vbios::new(dev, bar)?;
+ Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, fb_layout)?;
+
+ // Reset and boot GSP before SEC2
+ gsp_falcon.reset(bar)?;
+ let libos_handle = libos.dma_handle();
+ let (mbox0, mbox1) = gsp_falcon.boot(
+ bar,
+ Some(libos_handle as u32),
+ Some((libos_handle >> 32) as u32),
+ )?;
+ dev_dbg!(dev, "GSP MBOX0: {:#x}, MBOX1: {:#x}\n", mbox0,
mbox1);
+ dev_dbg!(
+ dev,
+ "Using SEC2 to load and run the booter_load
firmware...\n"
+ );
+
+ // Run booter via SEC2
+ Self::run_booter(dev, bar, chipset, sec2_falcon, wpr_meta)
+ }
+
+ /// Boot GSP via FSP Chain of Trust (Hopper/Blackwell+ path).
+ ///
+ /// This path uses FSP to establish a chain of trust and boot GSP-FMC. FSP
handles
+ /// the GSP boot internally - no manual GSP reset/boot is needed.
+ fn boot_via_fsp(
dev: &device::Device<device::Bound>,
bar: &Bar0,
chipset: Chipset,
@@ -310,6 +351,10 @@ pub(crate) fn boot(
sec2_falcon: &Falcon<Sec2>,
) -> Result {
let dev = pdev.as_ref();
+ let uses_sec2 = matches!(
+ chipset.arch(),
+ Architecture::Turing | Architecture::Ampere | Architecture::Ada
+ );
let gsp_fw = KBox::pin_init(
GspFirmware::new(dev, chipset, FIRMWARE_VERSION)?,
@@ -319,49 +364,24 @@ pub(crate) fn boot(
let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?;
dev_dbg!(dev, "{:#x?}\n", fb_layout);
- if matches!(
- chipset.arch(),
- Architecture::Turing | Architecture::Ampere | Architecture::Ada
- ) {
- let bios = Vbios::new(dev, bar)?;
- Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios,
&fb_layout)?;
- }
-
let wpr_meta
CoherentAllocation::<GspFwWprMeta>::alloc_coherent(dev, 1, GFP_KERNEL |
__GFP_ZERO)?;
dma_write!(wpr_meta[0] = GspFwWprMeta::new(&gsp_fw,
&fb_layout))?;
- // For SEC2-based architectures, reset GSP and boot it before SEC2
- if matches!(
- chipset.arch(),
- Architecture::Turing | Architecture::Ampere | Architecture::Ada
- ) {
- gsp_falcon.reset(bar)?;
- let libos_handle = self.libos.dma_handle();
- let (mbox0, mbox1) = gsp_falcon.boot(
+ // Architecture-specific boot path
+ if uses_sec2 {
+ Self::boot_via_sec2(
+ dev,
bar,
- Some(libos_handle as u32),
- Some((libos_handle >> 32) as u32),
+ chipset,
+ gsp_falcon,
+ sec2_falcon,
+ &fb_layout,
+ &self.libos,
+ &wpr_meta,
)?;
- dev_dbg!(
- pdev.as_ref(),
- "GSP MBOX0: {:#x}, MBOX1: {:#x}\n",
- mbox0,
- mbox1
- );
-
- dev_dbg!(
- pdev.as_ref(),
- "Using SEC2 to load and run the booter_load
firmware...\n"
- );
- }
-
- match chipset.arch() {
- Architecture::Turing | Architecture::Ampere | Architecture::Ada
=> {
- Self::run_booter(dev, bar, chipset, sec2_falcon,
&wpr_meta)?
- }
-
- Architecture::Hopper | Architecture::Blackwell => Self::run_fsp(
+ } else {
+ Self::boot_via_fsp(
dev,
bar,
chipset,
@@ -369,9 +389,10 @@ pub(crate) fn boot(
&wpr_meta,
&self.libos,
&fb_layout,
- )?,
+ )?;
}
+ // Common post-boot initialization
gsp_falcon.write_os_version(bar, gsp_fw.bootloader.app_version);
// Poll for RISC-V to become active before running sequencer
@@ -382,29 +403,22 @@ pub(crate) fn boot(
Delta::from_secs(5),
)?;
- dev_dbg!(
- pdev.as_ref(),
- "RISC-V active? {}\n",
- gsp_falcon.is_riscv_active(bar),
- );
+ dev_dbg!(dev, "RISC-V active? {}\n",
gsp_falcon.is_riscv_active(bar));
// Now that GSP is active, send system info and registry
self.cmdq
.send_command(bar, commands::SetSystemInfo::new(pdev, chipset))?;
self.cmdq.send_command(bar, commands::SetRegistry::new())?;
- if matches!(
- chipset.arch(),
- Architecture::Turing | Architecture::Ampere | Architecture::Ada
- ) {
+ // SEC2-based architectures need to run the GSP sequencer
+ if uses_sec2 {
let libos_handle = self.libos.dma_handle();
- // Create and run the GSP sequencer.
let seq_params = GspSequencerParams {
bootloader_app_version: gsp_fw.bootloader.app_version,
libos_dma_handle: libos_handle,
gsp_falcon,
sec2_falcon,
- dev: pdev.as_ref().into(),
+ dev: dev.into(),
bar,
};
GspSequencer::run(&mut self.cmdq, seq_params)?;
@@ -416,7 +430,7 @@ pub(crate) fn boot(
// Obtain and display basic GPU information.
let info = commands::get_gsp_info(&mut self.cmdq, bar)?;
dev_info!(
- pdev.as_ref(),
+ dev,
"GPU name: {}\n",
info.gpu_name().unwrap_or("invalid GPU name")
);
--
2.52.0