Timur Tabi
2025-Dec-01 23:39 UTC
[PATCH v2 12/13] gpu: nova-core: add PIO support for loading firmware images
Turing and GA100 use programmed I/O (PIO) instead of DMA to upload
firmware images into Falcon memory.
A new firmware called the Generic Bootloader (as opposed to the
GSP Bootloader) is used to upload FWSEC.
Signed-off-by: Timur Tabi <ttabi at nvidia.com>
---
drivers/gpu/nova-core/falcon.rs | 149 +++++++++++++++++++++++-
drivers/gpu/nova-core/firmware.rs | 4 +-
drivers/gpu/nova-core/firmware/fwsec.rs | 142 +++++++++++++++++++++-
drivers/gpu/nova-core/gsp/boot.rs | 10 +-
4 files changed, 293 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
index 2770d608a2cf..88f65ee7805a 100644
--- a/drivers/gpu/nova-core/falcon.rs
+++ b/drivers/gpu/nova-core/falcon.rs
@@ -12,14 +12,17 @@
io::poll::read_poll_timeout,
prelude::*,
sync::aref::ARef,
- time::{
- Delta, //
- },
+ time::Delta,
+ transmute::AsBytes, //
};
use crate::{
dma::DmaObject,
driver::Bar0,
+ firmware::fwsec::{
+ BootloaderDmemDescV2,
+ GenericBootloader, //
+ },
gpu::Chipset,
num::{
FromSafeCast,
@@ -406,6 +409,146 @@ pub(crate) fn reset(&self, bar: &Bar0) ->
Result {
Ok(())
}
+
+ /// See nvkm_falcon_pio_wr - takes a byte array instead of a FalconFirmware
+ fn pio_wr_bytes(
+ &self,
+ bar: &Bar0,
+ img: &[u8],
+ mem_base: u16,
+ target_mem: FalconMem,
+ port: u8,
+ tag: u16
+ ) {
+ let port = usize::from(port);
+
+ match target_mem {
+ FalconMem::ImemSecure | FalconMem::ImemNonSecure => {
+ regs::NV_PFALCON_FALCON_IMEMC::default()
+ .set_secure(target_mem == FalconMem::ImemSecure)
+ .set_aincw(true)
+ .set_offs(mem_base)
+ .write(bar, &E::ID, port);
+
+ let mut tag = tag;
+ for block in img.chunks(256) {
+ regs::NV_PFALCON_FALCON_IMEMT::default()
+ .set_tag(tag)
+ .write(bar, &E::ID, port);
+ for word in block.chunks(4) {
+ let w = u32::from_le_bytes(word.try_into().unwrap());
+ regs::NV_PFALCON_FALCON_IMEMD::default()
+ .set_data(w)
+ .write(bar, &E::ID, port);
+ }
+ tag += 1;
+ }
+ },
+ FalconMem::Dmem => {
+ regs::NV_PFALCON_FALCON_DMEMC::default()
+ .set_aincw(true)
+ .set_offs(mem_base)
+ .write(bar, &E::ID, port);
+
+ for block in img.chunks(256) {
+ for word in block.chunks_exact(4) {
+ regs::NV_PFALCON_FALCON_DMEMD::default()
+
.set_data(u32::from_le_bytes(word.try_into().unwrap()))
+ .write(bar, &E::ID, port);
+ }
+ }
+ },
+ }
+ }
+
+ fn pio_wr<F: FalconFirmware<Target = E>>(
+ &self,
+ bar: &Bar0,
+ fw: &F,
+ target_mem: FalconMem,
+ load_offsets: &FalconLoadTarget,
+ port: u8,
+ tag: u16,
+ ) -> Result {
+ let start = usize::from_safe_cast(load_offsets.src_start);
+ let len = usize::from_safe_cast(load_offsets.len);
+
+ // SAFETY: as_slice() ensures that start+len is within range
+ let data = unsafe { fw.as_slice(start, len)? };
+
+ self.pio_wr_bytes(bar, data, u16::try_from(load_offsets.dst_start)?,
target_mem, port, tag);
+
+ Ok(())
+ }
+
+ /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the
falcon to run it.
+ pub(crate) fn pio_load<F: FalconFirmware<Target = E>>(
+ &self,
+ bar: &Bar0,
+ fw: &F,
+ gbl: Option<&GenericBootloader>
+ ) -> Result {
+ let imem_sec = fw.imem_sec_load_params();
+ let imem_ns = fw.imem_ns_load_params().unwrap();
+ let dmem = fw.dmem_load_params();
+
+ regs::NV_PFALCON_FBIF_CTL::read(bar, &E::ID)
+ .set_allow_phys_no_ctx(true)
+ .write(bar, &E::ID);
+
+ regs::NV_PFALCON_FALCON_DMACTL::default()
+ .write(bar, &E::ID);
+
+ // If the Generic Bootloader was passed, then use it to boot FRTS
+ if let Some(gbl) = gbl {
+ let dst_start = u16::try_from(0x10000 - gbl.desc.code_size)?;
+ let data =
&gbl.ucode[..usize::from_safe_cast(gbl.desc.code_size)];
+ let tag = u16::try_from(gbl.desc.start_tag)?;
+
+ self.pio_wr_bytes(bar, data, dst_start, FalconMem::ImemNonSecure,
0, tag);
+
+ // This structure tells the generic bootloader where to find the
FWSEC
+ // image.
+ let dmem_desc = BootloaderDmemDescV2 {
+ reserved: [0; 4],
+ signature: [0; 4],
+ ctx_dma: 4, // FALCON_DMAIDX_PHYS_SYS_NCOH
+ code_dma_base: fw.dma_handle(),
+ non_sec_code_off: imem_ns.dst_start,
+ non_sec_code_size: imem_ns.len,
+ sec_code_off: imem_sec.dst_start,
+ sec_code_size: imem_sec.len,
+ code_entry_point: 0,
+ data_dma_base: fw.dma_handle() + u64::from(dmem.src_start),
+ data_size: dmem.len,
+ argc: 0,
+ argv: 0,
+ };
+
+ regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 4, |v| {
+ v.set_target(FalconFbifTarget::CoherentSysmem)
+ .set_mem_type(FalconFbifMemType::Physical)
+ });
+
+ self.pio_wr_bytes(bar, dmem_desc.as_bytes(), 0, FalconMem::Dmem, 0,
0);
+ } else {
+ self.pio_wr(bar, fw, FalconMem::ImemNonSecure, &imem_ns, 0,
+ u16::try_from(imem_ns.dst_start >> 8)?)?;
+ self.pio_wr(bar, fw, FalconMem::ImemSecure, &imem_sec, 0,
+ u16::try_from(imem_sec.dst_start >> 8)?)?;
+ self.pio_wr(bar, fw, FalconMem::Dmem, &dmem, 0, 0)?;
+ }
+
+ self.hal.program_brom(self, bar, &fw.brom_params())?;
+
+ // Set `BootVec` to start of non-secure code.
+ regs::NV_PFALCON_FALCON_BOOTVEC::default()
+ .set_value(fw.boot_addr())
+ .write(bar, &E::ID);
+
+ Ok(())
+ }
+
/// Perform a DMA write according to `load_offsets` from `dma_handle` into
the falcon's
/// `target_mem`.
///
diff --git a/drivers/gpu/nova-core/firmware.rs
b/drivers/gpu/nova-core/firmware.rs
index 169b07ca340a..3008d18f9313 100644
--- a/drivers/gpu/nova-core/firmware.rs
+++ b/drivers/gpu/nova-core/firmware.rs
@@ -31,7 +31,7 @@
pub(crate) const FIRMWARE_VERSION: &str = "570.144";
/// Requests the GPU firmware `name` suitable for `chipset`, with version
`ver`.
-fn request_firmware(
+pub(crate) fn request_firmware(
dev: &device::Device,
chipset: gpu::Chipset,
name: &str,
@@ -258,7 +258,7 @@ fn no_patch_signature(self) -> FirmwareDmaObject<F,
Signed> {
/// Header common to most firmware files.
#[repr(C)]
#[derive(Debug, Clone)]
-struct BinHdr {
+pub(crate) struct BinHdr {
/// Magic number, must be `0x10de`.
bin_magic: u32,
/// Version of the header.
diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs
b/drivers/gpu/nova-core/firmware/fwsec.rs
index 36ff8ed51c23..159aedd221e8 100644
--- a/drivers/gpu/nova-core/firmware/fwsec.rs
+++ b/drivers/gpu/nova-core/firmware/fwsec.rs
@@ -40,12 +40,15 @@
FalconLoadTarget, //
},
firmware::{
+ FIRMWARE_VERSION,
+ BinHdr,
FalconUCodeDesc,
FirmwareDmaObject,
FirmwareSignature,
Signed,
Unsigned, //
},
+ gpu::Chipset,
num::{
FromSafeCast,
IntoSafeCast, //
@@ -213,6 +216,72 @@ unsafe fn transmute_mut<T: Sized + FromBytes +
AsBytes>(
T::from_bytes_mut(unsafe { fw.as_slice_mut(offset, size_of::<T>())?
}).ok_or(EINVAL)
}
+/// Descriptor used by RM to figure out the requirements of the boot loader.
+#[repr(C)]
+#[derive(Debug, Clone)]
+pub(crate) struct BootloaderDesc {
+ /// Starting tag of bootloader.
+ pub start_tag: u32,
+ /// DMEM offset where [`BootloaderDmemDescV2`] is to be loaded.
+ pub dmem_load_off: u32,
+ /// Offset of code section in the image.
+ pub code_off: u32,
+ /// Size of code section in the image.
+ pub code_size: u32,
+ /// Offset of data section in the image.
+ pub data_off: u32,
+ /// Size of data section in the image.
+ pub data_size: u32,
+}
+// SAFETY: any byte sequence is valid for this struct.
+unsafe impl FromBytes for BootloaderDesc {}
+// SAFETY: This struct doesn't contain uninitialized bytes and doesn't
have interior mutability.
+unsafe impl AsBytes for BootloaderDesc {}
+
+/// Structure used by the boot-loader to load the rest of the code.
+///
+/// This has to be filled by the GPU driver and copied into DMEM at offset
+/// [`BootloaderDesc.dmem_load_off`].
+#[repr(C, packed)]
+#[derive(Debug, Clone)]
+pub(crate) struct BootloaderDmemDescV2 {
+ /// Reserved, should always be first element.
+ pub reserved: [u32; 4],
+ /// 16B signature for secure code, 0s if no secure code.
+ pub signature: [u32; 4],
+ /// DMA context used by the bootloader while loading code/data.
+ pub ctx_dma: u32,
+ /// 256B-aligned physical FB address where code is located.
+ pub code_dma_base: u64,
+ /// Offset from `code_dma_base` where the non-secure code is located (must be
multiple of 256).
+ pub non_sec_code_off: u32,
+ /// Size of the non-secure code part.
+ pub non_sec_code_size: u32,
+ /// Offset from `code_dma_base` where the secure code is located (must be
multiple of 256).
+ pub sec_code_off: u32,
+ /// Size of the secure code part.
+ pub sec_code_size: u32,
+ /// Code entry point invoked by the bootloader after code is loaded.
+ pub code_entry_point: u32,
+ /// 256B-aligned physical FB address where data is located.
+ pub data_dma_base: u64,
+ /// Size of data block (should be multiple of 256B).
+ pub data_size: u32,
+ /// Arguments to be passed to the target firmware being loaded.
+ pub argc: u32,
+ /// Number of arguments to be passed to the target firmware being loaded.
+ pub argv: u32,
+}
+// SAFETY: any byte sequence is valid for this struct.
+unsafe impl FromBytes for BootloaderDmemDescV2 {}
+// SAFETY: This struct doesn't contain uninitialized bytes and doesn't
have interior mutability.
+unsafe impl AsBytes for BootloaderDmemDescV2 {}
+
+pub(crate) struct GenericBootloader {
+ pub desc: BootloaderDesc,
+ pub ucode: Vec<u8, kernel::alloc::allocator::Kmalloc>,
+}
+
/// The FWSEC microcode, extracted from the BIOS and to be run on the GSP
falcon.
///
/// It is responsible for e.g. carving out the WPR2 region as the first step of
the GSP bootflow.
@@ -221,6 +290,8 @@ pub(crate) struct FwsecFirmware {
desc: FalconUCodeDesc,
/// GPU-accessible DMA object containing the firmware.
ucode: FirmwareDmaObject<Self, Signed>,
+ /// Generic bootloader
+ gen_bootloader: Option<GenericBootloader>,
}
impl FalconLoadParams for FwsecFirmware {
@@ -275,7 +346,19 @@ fn brom_params(&self) -> FalconBromParams {
}
fn boot_addr(&self) -> u32 {
- 0
+ match &self.desc {
+ FalconUCodeDesc::V2(_v2) => {
+ // On V2 platforms, the boot address is extracted from the
+ // generic bootloader, because the gbl is what actually copies
+ // FWSEC into memory, so that is what needs to be booted.
+ if let Some(ref gbl) = self.gen_bootloader {
+ gbl.desc.start_tag << 8
+ } else {
+ 0
+ }
+ },
+ FalconUCodeDesc::V3(_v3) => 0,
+ }
}
}
@@ -376,6 +459,7 @@ impl FwsecFirmware {
/// command.
pub(crate) fn new(
dev: &Device<device::Bound>,
+ chipset: Chipset,
falcon: &Falcon<Gsp>,
bar: &Bar0,
bios: &Vbios,
@@ -432,9 +516,49 @@ pub(crate) fn new(
ucode_dma.no_patch_signature()
};
+ // The Generic Bootloader exists only on Turing and GA100. To avoid a
bogus
+ // console error message on other platforms, only try to load it if
it's
+ // supposed to be there.
+ let gbl_fw = if chipset < Chipset::GA102 {
+ super::request_firmware(dev, chipset, "gen_bootloader",
FIRMWARE_VERSION)
+ } else {
+ Err(ENOENT)
+ };
+
+ let gbl = match gbl_fw {
+ Ok(fw) => {
+ let hdr = fw.data()
+ .get(0..size_of::<BinHdr>())
+ .and_then(BinHdr::from_bytes_copy)
+ .ok_or(EINVAL)?;
+
+ let desc_offset = usize::from_safe_cast(hdr.header_offset);
+ let desc = fw.data()
+ .get(desc_offset..desc_offset +
size_of::<BootloaderDesc>())
+ .and_then(BootloaderDesc::from_bytes_copy)
+ .ok_or(EINVAL)?;
+
+ let ucode_start = usize::from_safe_cast(hdr.data_offset);
+ let ucode_size = usize::from_safe_cast(hdr.data_size);
+ let ucode_data = fw.data()
+ .get(ucode_start..ucode_start + ucode_size)
+ .ok_or(EINVAL)?;
+
+ let mut ucode = KVec::new();
+ ucode.extend_from_slice(ucode_data, GFP_KERNEL)?;
+
+ Some(GenericBootloader {
+ desc,
+ ucode,
+ })
+ },
+ Err(_) => None,
+ };
+
Ok(FwsecFirmware {
- desc: desc,
+ desc,
ucode: ucode_signed,
+ gen_bootloader: gbl,
})
}
@@ -449,9 +573,17 @@ pub(crate) fn run(
falcon
.reset(bar)
.inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon:
{:?}\n", e))?;
- falcon
- .dma_load(bar, self)
- .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware:
{:?}\n", e))?;
+
+ // If the Generic Bootloader was found, then upload it via PIO ,
otherwise
+ if let Some(ref gbl) = self.gen_bootloader {
+ falcon
+ .pio_load(bar, self, Some(gbl))
+ .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC
firmware: {:?}\n", e))?;
+ } else {
+ falcon
+ .dma_load(bar, self)
+ .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC
firmware: {:?}\n", e))?;
+ }
let (mbox0, _) = falcon
.boot(bar, Some(0), None)
.inspect_err(|e| dev_err!(dev, "Failed to boot FWSEC firmware:
{:?}\n", e))?;
diff --git a/drivers/gpu/nova-core/gsp/boot.rs
b/drivers/gpu/nova-core/gsp/boot.rs
index 54937606b5b0..fda01afda9ed 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -48,6 +48,7 @@ impl super::Gsp {
/// created the WPR2 region.
fn run_fwsec_frts(
dev: &device::Device<device::Bound>,
+ chipset: Chipset,
falcon: &Falcon<Gsp>,
bar: &Bar0,
bios: &Vbios,
@@ -65,6 +66,7 @@ fn run_fwsec_frts(
let fwsec_frts = FwsecFirmware::new(
dev,
+ chipset,
falcon,
bar,
bios,
@@ -147,7 +149,7 @@ pub(crate) fn boot(
let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?;
dev_dbg!(dev, "{:#x?}\n", fb_layout);
- Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?;
+ Self::run_fwsec_frts(dev, chipset, gsp_falcon, bar, &bios,
&fb_layout)?;
let booter_loader = BooterFirmware::new(
dev,
@@ -186,7 +188,11 @@ pub(crate) fn boot(
);
sec2_falcon.reset(bar)?;
- sec2_falcon.dma_load(bar, &booter_loader)?;
+ if sec2_falcon.supports_dma() {
+ sec2_falcon.dma_load(bar, &booter_loader)?;
+ } else {
+ sec2_falcon.pio_load(bar, &booter_loader, None)?;
+ }
let wpr_handle = wpr_meta.dma_handle();
let (mbox0, mbox1) = sec2_falcon.boot(
bar,
--
2.52.0
Joel Fernandes
2025-Dec-02 21:23 UTC
[PATCH v2 12/13] gpu: nova-core: add PIO support for loading firmware images
On 12/1/2025 6:39 PM, Timur Tabi wrote:> > + > + /// See nvkm_falcon_pio_wr - takes a byte array instead of a FalconFirmware > + fn pio_wr_bytes( > + &self, > + bar: &Bar0, > + img: &[u8], > + mem_base: u16, > + target_mem: FalconMem, > + port: u8, > + tag: u16 > + ) { > + let port = usize::from(port); > + > + match target_mem { > + FalconMem::ImemSecure | FalconMem::ImemNonSecure => { > + regs::NV_PFALCON_FALCON_IMEMC::default() > + .set_secure(target_mem == FalconMem::ImemSecure) > + .set_aincw(true) > + .set_offs(mem_base) > + .write(bar, &E::ID, port); > + > + let mut tag = tag; > + for block in img.chunks(256) { > + regs::NV_PFALCON_FALCON_IMEMT::default() > + .set_tag(tag) > + .write(bar, &E::ID, port); > + for word in block.chunks(4) { > + let w = u32::from_le_bytes(word.try_into().unwrap());If img.size is not a multiple of 4 bytes, this can panic right? Even if it is unlikely, unwrap() is quite frowned up due to possibility of panic. I'd recommend something like the following since the function cannot return an error: let w = if let Ok(bytes) = word.try_into() { u32::from_le_bytes(bytes) } else { // can print a warning here too if needed. let mut buf = [0u8; 4]; buf[..word.len()].copy_from_slice(word); u32::from_le_bytes(buf) }; Btw, I wish we could encode the slice length constraint in the slice type itself (i.e., the slice length ought to be a certain multiple). But I think there's no way to do that without introducing a new type. Thanks.
Joel Fernandes
2025-Dec-02 21:28 UTC
[PATCH v2 12/13] gpu: nova-core: add PIO support for loading firmware images
On 12/1/2025 6:39 PM, Timur Tabi wrote:> + > + /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. > + pub(crate) fn pio_load<F: FalconFirmware<Target = E>>( > + &self, > + bar: &Bar0, > + fw: &F, > + gbl: Option<&GenericBootloader> > + ) -> Result { > + let imem_sec = fw.imem_sec_load_params(); > + let imem_ns = fw.imem_ns_load_params().unwrap();Same unwrap() is here too. Thanks.