Timur Tabi
2025-Nov-14 23:30 UTC
[PATCH 11/11] gpu: nova-core: add PIO support for loading firmware images
Turing and GA100 use programmed I/O (PIO) instead of DMA to upload
firmware images into Falcon memory.
A new firmware called the Generic Bootloader (as opposed to the
GSP Bootloader) is used to upload FWSEC.
Signed-off-by: Timur Tabi <ttabi at nvidia.com>
---
drivers/gpu/nova-core/falcon.rs | 181 ++++++++++++++++++++++++
drivers/gpu/nova-core/firmware.rs | 4 +-
drivers/gpu/nova-core/firmware/fwsec.rs | 112 ++++++++++++++-
drivers/gpu/nova-core/gsp/boot.rs | 10 +-
4 files changed, 299 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
index 7af32f65ba5f..f9a4a35b7569 100644
--- a/drivers/gpu/nova-core/falcon.rs
+++ b/drivers/gpu/nova-core/falcon.rs
@@ -20,6 +20,10 @@
use crate::{
dma::DmaObject,
driver::Bar0,
+ firmware::fwsec::{
+ BootloaderDmemDescV2,
+ GenericBootloader, //
+ },
gpu::Chipset,
num::{
FromSafeCast,
@@ -400,6 +404,183 @@ pub(crate) fn reset(&self, bar: &Bar0) ->
Result {
Ok(())
}
+
+ /// See nvkm_falcon_pio_wr - takes a byte array instead of a FalconFirmware
+ fn pio_wr_bytes(
+ &self,
+ bar: &Bar0,
+ source: *const u8,
+ mem_base: u16,
+ length: usize,
+ target_mem: FalconMem,
+ port: u8,
+ tag: u16
+ ) -> Result {
+ // To avoid unnecessary complication in the write loop, make sure the
buffer
+ // length is aligned. It always is, which is why an assertion is okay.
+ assert!((length % 4) == 0);
+
+ // From now on, we treat the data as an array of u32
+
+ let length = length / 4;
+ let mut remaining_len: usize = length;
+ let mut img_offset: usize = 0;
+ let mut tag = tag;
+
+ // Get data as a slice of u32s
+ let img = unsafe {
+ core::slice::from_raw_parts(source as *const u32, length)
+ };
+
+ match target_mem {
+ FalconMem::ImemSec | FalconMem::ImemNs => {
+ regs::NV_PFALCON_FALCON_IMEMC::default()
+ .set_secure(target_mem == FalconMem::ImemSec)
+ .set_aincw(true)
+ .set_offs(mem_base)
+ .write(bar, &E::ID, port as usize);
+ },
+ FalconMem::Dmem => {
+ // gm200_flcn_pio_dmem_wr_init
+ regs::NV_PFALCON_FALCON_DMEMC::default()
+ .set_aincw(true)
+ .set_offs(mem_base)
+ .write(bar, &E::ID, port as usize);
+ },
+ }
+
+ while remaining_len > 0 {
+ let xfer_len = core::cmp::min(remaining_len, 256 / 4); //
pio->max = 256
+
+ // Perform the PIO write for the next 256 bytes. Each tag
represents
+ // a 256-byte block in IMEM/DMEM.
+ let mut len = xfer_len;
+
+ match target_mem {
+ FalconMem::ImemSec | FalconMem::ImemNs => {
+ regs::NV_PFALCON_FALCON_IMEMT::default()
+ .set_tag(tag)
+ .write(bar, &E::ID, port as usize);
+
+ while len > 0 {
+ regs::NV_PFALCON_FALCON_IMEMD::default()
+ .set_data(img[img_offset])
+ .write(bar, &E::ID, port as usize);
+ img_offset += 1;
+ len -= 1;
+ };
+
+ tag += 1;
+ },
+ FalconMem::Dmem => {
+ // tag is ignored for DMEM
+ while len > 0 {
+ regs::NV_PFALCON_FALCON_DMEMD::default()
+ .set_data(img[img_offset])
+ .write(bar, &E::ID, port as usize);
+ img_offset += 1;
+ len -= 1;
+ };
+ },
+ }
+
+ remaining_len -= xfer_len;
+ }
+
+ Ok(())
+ }
+
+ /// See nvkm_falcon_pio_wr
+ fn pio_wr<F: FalconFirmware<Target = E>>(
+ &self,
+ bar: &Bar0,
+ fw: &F,
+ target_mem: FalconMem,
+ load_offsets: &FalconLoadTarget,
+ port: u8,
+ tag: u16,
+ ) -> Result {
+ // FIXME: There's probably a better way to create a pointer to
inside the firmware
+ // Maybe CoherentAllocation needs to implement a method for that.
+ let start = unsafe { fw.start_ptr().add(load_offsets.src_start as
usize) };
+ self.pio_wr_bytes(bar, start,
+ load_offsets.dst_start as u16,
+ load_offsets.len as usize, target_mem, port, tag)
+ }
+
+ /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the
falcon to run it.
+ pub(crate) fn pio_load<F: FalconFirmware<Target = E>>(
+ &self,
+ bar: &Bar0,
+ fw: &F,
+ gbl: Option<&GenericBootloader>
+ ) -> Result {
+ let imem_sec = fw.imem_sec_load_params();
+ let imem_ns = fw.imem_ns_load_params().unwrap();
+ let dmem = fw.dmem_load_params();
+
+ regs::NV_PFALCON_FBIF_CTL::read(bar, &E::ID)
+ .set_allow_phys_no_ctx(true)
+ .write(bar, &E::ID);
+
+ regs::NV_PFALCON_FALCON_DMACTL::default()
+ .write(bar, &E::ID);
+
+ // If the Generic Bootloader was passed, then use it to boot FRTS
+ if let Some(gbl) = gbl {
+ let load_params = FalconLoadTarget {
+ src_start: 0,
+ dst_start: 0x10000 - gbl.desc.code_size,
+ len: gbl.desc.code_size,
+ };
+ self.pio_wr_bytes(bar, gbl.ucode.as_ptr(),
+ load_params.dst_start as u16, load_params.len as usize,
+ FalconMem::ImemNs, 0, gbl.desc.start_tag as u16)?;
+
+ // This structure tells the generic bootloader where to find the
FWSEC
+ // image.
+ let dmem_desc = BootloaderDmemDescV2 {
+ reserved: [0; 4],
+ signature: [0; 4],
+ ctx_dma: 4, // FALCON_DMAIDX_PHYS_SYS_NCOH
+ code_dma_base: fw.dma_handle(),
+ non_sec_code_off: imem_ns.dst_start,
+ non_sec_code_size: imem_ns.len,
+ sec_code_off: imem_sec.dst_start,
+ sec_code_size: imem_sec.len,
+ code_entry_point: 0,
+ data_dma_base: fw.dma_handle() + dmem.src_start as u64,
+ data_size: dmem.len,
+ argc: 0,
+ argv: 0,
+ };
+
+ regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 4, |v| {
+ v.set_target(FalconFbifTarget::CoherentSysmem)
+ .set_mem_type(FalconFbifMemType::Physical)
+ });
+
+ self.pio_wr_bytes(bar, &dmem_desc as *const _ as *const u8, 0,
+ core::mem::size_of::<BootloaderDmemDescV2>(),
+ FalconMem::Dmem, 0, 0)?;
+ } else {
+ self.pio_wr(bar, fw, FalconMem::ImemNs, &imem_ns, 0,
+ u16::try_from(imem_ns.dst_start >> 8)?)?;
+ self.pio_wr(bar, fw, FalconMem::ImemSec, &imem_sec, 0,
+ u16::try_from(imem_sec.dst_start >> 8)?)?;
+ self.pio_wr(bar, fw, FalconMem::Dmem, &dmem, 0, 0)?;
+ }
+
+ self.hal.program_brom(self, bar, &fw.brom_params())?;
+
+ // Set `BootVec` to start of non-secure code.
+ regs::NV_PFALCON_FALCON_BOOTVEC::default()
+ .set_value(fw.boot_addr())
+ .write(bar, &E::ID);
+
+ Ok(())
+ }
+
/// Perform a DMA write according to `load_offsets` from `dma_handle` into
the falcon's
/// `target_mem`.
///
diff --git a/drivers/gpu/nova-core/firmware.rs
b/drivers/gpu/nova-core/firmware.rs
index 5ca5bf1fb610..ecab16af0166 100644
--- a/drivers/gpu/nova-core/firmware.rs
+++ b/drivers/gpu/nova-core/firmware.rs
@@ -31,7 +31,7 @@
pub(crate) const FIRMWARE_VERSION: &str = "570.144";
/// Requests the GPU firmware `name` suitable for `chipset`, with version
`ver`.
-fn request_firmware(
+pub(crate) fn request_firmware(
dev: &device::Device,
chipset: gpu::Chipset,
name: &str,
@@ -252,7 +252,7 @@ fn no_patch_signature(self) -> FirmwareDmaObject<F,
Signed> {
/// Header common to most firmware files.
#[repr(C)]
#[derive(Debug, Clone)]
-struct BinHdr {
+pub(crate) struct BinHdr {
/// Magic number, must be `0x10de`.
bin_magic: u32,
/// Version of the header.
diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs
b/drivers/gpu/nova-core/firmware/fwsec.rs
index 36ff8ed51c23..39fe23dab4bf 100644
--- a/drivers/gpu/nova-core/firmware/fwsec.rs
+++ b/drivers/gpu/nova-core/firmware/fwsec.rs
@@ -40,12 +40,15 @@
FalconLoadTarget, //
},
firmware::{
+ FIRMWARE_VERSION,
+ BinHdr,
FalconUCodeDesc,
FirmwareDmaObject,
FirmwareSignature,
Signed,
Unsigned, //
},
+ gpu::Chipset,
num::{
FromSafeCast,
IntoSafeCast, //
@@ -213,6 +216,44 @@ unsafe fn transmute_mut<T: Sized + FromBytes +
AsBytes>(
T::from_bytes_mut(unsafe { fw.as_slice_mut(offset, size_of::<T>())?
}).ok_or(EINVAL)
}
+#[repr(C)]
+#[derive(Debug, Clone)]
+pub(crate) struct BootloaderDesc {
+ pub start_tag: u32,
+ pub dmem_load_off: u32,
+ pub code_off: u32,
+ pub code_size: u32,
+ pub data_off: u32,
+ pub data_size: u32,
+}
+// SAFETY: any byte sequence is valid for this struct.
+unsafe impl FromBytes for BootloaderDesc {}
+
+#[repr(C, packed)]
+#[derive(Debug, Clone)]
+pub(crate) struct BootloaderDmemDescV2 {
+ pub reserved: [u32; 4],
+ pub signature: [u32; 4],
+ pub ctx_dma: u32,
+ pub code_dma_base: u64,
+ pub non_sec_code_off: u32,
+ pub non_sec_code_size: u32,
+ pub sec_code_off: u32,
+ pub sec_code_size: u32,
+ pub code_entry_point: u32,
+ pub data_dma_base: u64,
+ pub data_size: u32,
+ pub argc: u32,
+ pub argv: u32,
+}
+// SAFETY: any byte sequence is valid for this struct.
+unsafe impl kernel::transmute::FromBytes for BootloaderDmemDescV2 {}
+
+pub(crate) struct GenericBootloader {
+ pub desc: BootloaderDesc,
+ pub ucode: Vec<u8, kernel::alloc::allocator::Kmalloc>,
+}
+
/// The FWSEC microcode, extracted from the BIOS and to be run on the GSP
falcon.
///
/// It is responsible for e.g. carving out the WPR2 region as the first step of
the GSP bootflow.
@@ -221,6 +262,8 @@ pub(crate) struct FwsecFirmware {
desc: FalconUCodeDesc,
/// GPU-accessible DMA object containing the firmware.
ucode: FirmwareDmaObject<Self, Signed>,
+ /// Generic bootloader
+ gen_bootloader: Option<GenericBootloader>,
}
impl FalconLoadParams for FwsecFirmware {
@@ -275,7 +318,19 @@ fn brom_params(&self) -> FalconBromParams {
}
fn boot_addr(&self) -> u32 {
- 0
+ match &self.desc {
+ FalconUCodeDesc::V2(_v2) => {
+ // On V2 platforms, the boot address is extracted from the
+ // generic bootloader, because the gbl is what actually copies
+ // FWSEC into memory, so that is what needs to be booted.
+ if let Some(ref gbl) = self.gen_bootloader {
+ gbl.desc.start_tag << 8
+ } else {
+ 0
+ }
+ },
+ FalconUCodeDesc::V3(_v3) => 0,
+ }
}
}
@@ -376,6 +431,7 @@ impl FwsecFirmware {
/// command.
pub(crate) fn new(
dev: &Device<device::Bound>,
+ chipset: Chipset,
falcon: &Falcon<Gsp>,
bar: &Bar0,
bios: &Vbios,
@@ -432,9 +488,49 @@ pub(crate) fn new(
ucode_dma.no_patch_signature()
};
+ // The Generic Bootloader exists only on Turing and GA100. To avoid a
bogus
+ // console error message on other platforms, only try to load it if
it's
+ // supposed to be there.
+ let gbl_fw = if chipset < Chipset::GA102 {
+ super::request_firmware(dev, chipset, "gen_bootloader",
FIRMWARE_VERSION)
+ } else {
+ Err(ENOENT)
+ };
+
+ let gbl = match gbl_fw {
+ Ok(fw) => {
+ let hdr = fw.data()
+ .get(0..size_of::<BinHdr>())
+ .and_then(BinHdr::from_bytes_copy)
+ .ok_or(EINVAL)?;
+
+ let desc_offset = hdr.header_offset as usize;
+ let desc = fw.data()
+ .get(desc_offset..desc_offset +
size_of::<BootloaderDesc>())
+ .and_then(BootloaderDesc::from_bytes_copy)
+ .ok_or(EINVAL)?;
+
+ let ucode_start = hdr.data_offset as usize;
+ let ucode_size = hdr.data_size as usize;
+ let ucode_data = fw.data()
+ .get(ucode_start..ucode_start + ucode_size)
+ .ok_or(EINVAL)?;
+
+ let mut ucode = KVec::new();
+ ucode.extend_from_slice(ucode_data, GFP_KERNEL)?;
+
+ Some(GenericBootloader {
+ desc: desc,
+ ucode: ucode,
+ })
+ },
+ Err(_) => None,
+ };
+
Ok(FwsecFirmware {
desc: desc,
ucode: ucode_signed,
+ gen_bootloader: gbl,
})
}
@@ -449,9 +545,17 @@ pub(crate) fn run(
falcon
.reset(bar)
.inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon:
{:?}\n", e))?;
- falcon
- .dma_load(bar, self)
- .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware:
{:?}\n", e))?;
+
+ // If the Generic Bootloader was found, then upload it via PIO ,
otherwise
+ if let Some(ref gbl) = self.gen_bootloader {
+ falcon
+ .pio_load(bar, self, Some(gbl))
+ .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC
firmware: {:?}\n", e))?;
+ } else {
+ falcon
+ .dma_load(bar, self)
+ .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC
firmware: {:?}\n", e))?;
+ }
let (mbox0, _) = falcon
.boot(bar, Some(0), None)
.inspect_err(|e| dev_err!(dev, "Failed to boot FWSEC firmware:
{:?}\n", e))?;
diff --git a/drivers/gpu/nova-core/gsp/boot.rs
b/drivers/gpu/nova-core/gsp/boot.rs
index eb0ee4f66f0c..ff53d58c1df6 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -44,6 +44,7 @@ impl super::Gsp {
/// created the WPR2 region.
fn run_fwsec_frts(
dev: &device::Device<device::Bound>,
+ chipset: Chipset,
falcon: &Falcon<Gsp>,
bar: &Bar0,
bios: &Vbios,
@@ -61,6 +62,7 @@ fn run_fwsec_frts(
let fwsec_frts = FwsecFirmware::new(
dev,
+ chipset,
falcon,
bar,
bios,
@@ -143,7 +145,7 @@ pub(crate) fn boot(
let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?;
dev_dbg!(dev, "{:#x?}\n", fb_layout);
- Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?;
+ Self::run_fwsec_frts(dev, chipset, gsp_falcon, bar, &bios,
&fb_layout)?;
let booter_loader = BooterFirmware::new(
dev,
@@ -182,7 +184,11 @@ pub(crate) fn boot(
);
sec2_falcon.reset(bar)?;
- sec2_falcon.dma_load(bar, &booter_loader)?;
+ if chipset > Chipset::GA100 {
+ sec2_falcon.dma_load(bar, &booter_loader)?;
+ } else {
+ sec2_falcon.pio_load(bar, &booter_loader, None)?;
+ }
let wpr_handle = wpr_meta.dma_handle();
let (mbox0, mbox1) = sec2_falcon.boot(
bar,
--
2.51.2
Joel Fernandes
2025-Nov-17 23:34 UTC
[PATCH 11/11] gpu: nova-core: add PIO support for loading firmware images
On Fri, Nov 14, 2025 at 05:30:44PM -0600, Timur Tabi wrote:> Turing and GA100 use programmed I/O (PIO) instead of DMA to upload > firmware images into Falcon memory. > > A new firmware called the Generic Bootloader (as opposed to the > GSP Bootloader) is used to upload FWSEC. > > Signed-off-by: Timur Tabi <ttabi at nvidia.com> > --- > drivers/gpu/nova-core/falcon.rs | 181 ++++++++++++++++++++++++ > drivers/gpu/nova-core/firmware.rs | 4 +- > drivers/gpu/nova-core/firmware/fwsec.rs | 112 ++++++++++++++- > drivers/gpu/nova-core/gsp/boot.rs | 10 +- > 4 files changed, 299 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs > index 7af32f65ba5f..f9a4a35b7569 100644 > --- a/drivers/gpu/nova-core/falcon.rs > +++ b/drivers/gpu/nova-core/falcon.rs > @@ -20,6 +20,10 @@ > use crate::{ > dma::DmaObject, > driver::Bar0, > + firmware::fwsec::{ > + BootloaderDmemDescV2, > + GenericBootloader, // > + }, > gpu::Chipset, > num::{ > FromSafeCast, > @@ -400,6 +404,183 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result { > Ok(()) > } > > + > + /// See nvkm_falcon_pio_wr - takes a byte array instead of a FalconFirmware > + fn pio_wr_bytes( > + &self, > + bar: &Bar0, > + source: *const u8, > + mem_base: u16, > + length: usize, > + target_mem: FalconMem, > + port: u8, > + tag: u16Please don't use pointers for source, use slices instead, then you don't need to assume length is multiple of 4, you can just return error if it is. fn pio_wr_bytes( &self, bar: &Bar0, data: &[u8], mem_base: u16, target_mem: FalconMem, port: u8, tag: u16 ) -> Result {> + ) -> Result { > + // To avoid unnecessary complication in the write loop, make sure the buffer > + // length is aligned. It always is, which is why an assertion is okay. > + assert!((length % 4) == 0);Can get rid of this then and just return error if it is not multiple of 4.> + > + // From now on, we treat the data as an array of u32 > + > + let length = length / 4; > + let mut remaining_len: usize = length; > + let mut img_offset: usize = 0; > + let mut tag = tag; > + > + // Get data as a slice of u32s > + let img = unsafe {Missing safety comment. Please go over the coding guidelines and format comments according to guidelines.> + core::slice::from_raw_parts(source as *const u32, length) > + }; > + > + match target_mem { > + FalconMem::ImemSec | FalconMem::ImemNs => { > + regs::NV_PFALCON_FALCON_IMEMC::default() > + .set_secure(target_mem == FalconMem::ImemSec) > + .set_aincw(true) > + .set_offs(mem_base) > + .write(bar, &E::ID, port as usize); > + }, > + FalconMem::Dmem => { > + // gm200_flcn_pio_dmem_wr_initMisplaced comment?> + regs::NV_PFALCON_FALCON_DMEMC::default() > + .set_aincw(true) > + .set_offs(mem_base) > + .write(bar, &E::ID, port as usize); > + }, > + } > + > + while remaining_len > 0 { > + let xfer_len = core::cmp::min(remaining_len, 256 / 4); // pio->max = 256 > + > + // Perform the PIO write for the next 256 bytes. Each tag represents > + // a 256-byte block in IMEM/DMEM. > + let mut len = xfer_len; > + > + match target_mem { > + FalconMem::ImemSec | FalconMem::ImemNs => { > + regs::NV_PFALCON_FALCON_IMEMT::default() > + .set_tag(tag) > + .write(bar, &E::ID, port as usize); > + > + while len > 0 { > + regs::NV_PFALCON_FALCON_IMEMD::default() > + .set_data(img[img_offset]) > + .write(bar, &E::ID, port as usize); > + img_offset += 1; > + len -= 1; > + }; > + > + tag += 1; > + }, > + FalconMem::Dmem => { > + // tag is ignored for DMEM > + while len > 0 { > + regs::NV_PFALCON_FALCON_DMEMD::default() > + .set_data(img[img_offset]) > + .write(bar, &E::ID, port as usize); > + img_offset += 1; > + len -= 1; > + }; > + }, > + } > + > + remaining_len -= xfer_len; > + } > + > + Ok(()) > + } > + > + /// See nvkm_falcon_pio_wr > + fn pio_wr<F: FalconFirmware<Target = E>>( > + &self, > + bar: &Bar0, > + fw: &F, > + target_mem: FalconMem, > + load_offsets: &FalconLoadTarget, > + port: u8, > + tag: u16, > + ) -> Result { > + // FIXME: There's probably a better way to create a pointer to inside the firmware > + // Maybe CoherentAllocation needs to implement a method for that. > + let start = unsafe { fw.start_ptr().add(load_offsets.src_start as usize) }; > + self.pio_wr_bytes(bar, start, > + load_offsets.dst_start as u16,Lossy conversions require comments. 'as' is a lossy conversion. thanks, - Joel [...]
Alexandre Courbot
2025-Nov-19 04:28 UTC
[PATCH 11/11] gpu: nova-core: add PIO support for loading firmware images
On Sat Nov 15, 2025 at 8:30 AM JST, Timur Tabi wrote:> Turing and GA100 use programmed I/O (PIO) instead of DMA to upload > firmware images into Falcon memory. > > A new firmware called the Generic Bootloader (as opposed to the > GSP Bootloader) is used to upload FWSEC. > > Signed-off-by: Timur Tabi <ttabi at nvidia.com> > --- > drivers/gpu/nova-core/falcon.rs | 181 ++++++++++++++++++++++++ > drivers/gpu/nova-core/firmware.rs | 4 +- > drivers/gpu/nova-core/firmware/fwsec.rs | 112 ++++++++++++++- > drivers/gpu/nova-core/gsp/boot.rs | 10 +- > 4 files changed, 299 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs > index 7af32f65ba5f..f9a4a35b7569 100644 > --- a/drivers/gpu/nova-core/falcon.rs > +++ b/drivers/gpu/nova-core/falcon.rs > @@ -20,6 +20,10 @@ > use crate::{ > dma::DmaObject, > driver::Bar0, > + firmware::fwsec::{ > + BootloaderDmemDescV2, > + GenericBootloader, // > + }, > gpu::Chipset, > num::{ > FromSafeCast, > @@ -400,6 +404,183 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result { > Ok(()) > } > > + > + /// See nvkm_falcon_pio_wr - takes a byte array instead of a FalconFirmware > + fn pio_wr_bytes( > + &self, > + bar: &Bar0, > + source: *const u8, > + mem_base: u16, > + length: usize,We will definitely want to combine `source` and `length` into a convenient `&[u8]`. Now I understand why you used a pointer here, because we need to write an instance of `BootloaderDmemDescV2`, and also because we use data from a `CoherentAllocation`. The first one is easy to fix: `BootloaderDmemDescV2` is just a bunch of integers, so you can implement `AsBytes` on it and get a nice slice of bytes exactly as we want.> + target_mem: FalconMem, > + port: u8, > + tag: u16 > + ) -> Result { > + // To avoid unnecessary complication in the write loop, make sure the buffer > + // length is aligned. It always is, which is why an assertion is okay. > + assert!((length % 4) == 0);Let's return an error instead of panicking here.> + > + // From now on, we treat the data as an array of u32 > + > + let length = length / 4; > + let mut remaining_len: usize = length; > + let mut img_offset: usize = 0; > + let mut tag = tag; > + > + // Get data as a slice of u32s > + let img = unsafe { > + core::slice::from_raw_parts(source as *const u32, length) > + }; > + > + match target_mem { > + FalconMem::ImemSec | FalconMem::ImemNs => { > + regs::NV_PFALCON_FALCON_IMEMC::default() > + .set_secure(target_mem == FalconMem::ImemSec) > + .set_aincw(true) > + .set_offs(mem_base) > + .write(bar, &E::ID, port as usize); > + }, > + FalconMem::Dmem => { > + // gm200_flcn_pio_dmem_wr_initProbably a stray development-time comment.> + regs::NV_PFALCON_FALCON_DMEMC::default() > + .set_aincw(true) > + .set_offs(mem_base) > + .write(bar, &E::ID, port as usize); > + }, > + } > + > + while remaining_len > 0 { > + let xfer_len = core::cmp::min(remaining_len, 256 / 4); // pio->max = 256 > + > + // Perform the PIO write for the next 256 bytes. Each tag represents > + // a 256-byte block in IMEM/DMEM. > + let mut len = xfer_len; > + > + match target_mem { > + FalconMem::ImemSec | FalconMem::ImemNs => { > + regs::NV_PFALCON_FALCON_IMEMT::default() > + .set_tag(tag) > + .write(bar, &E::ID, port as usize); > + > + while len > 0 { > + regs::NV_PFALCON_FALCON_IMEMD::default() > + .set_data(img[img_offset]) > + .write(bar, &E::ID, port as usize); > + img_offset += 1; > + len -= 1; > + }; > + > + tag += 1; > + }, > + FalconMem::Dmem => { > + // tag is ignored for DMEM > + while len > 0 { > + regs::NV_PFALCON_FALCON_DMEMD::default() > + .set_data(img[img_offset]) > + .write(bar, &E::ID, port as usize); > + img_offset += 1; > + len -= 1; > + }; > + }, > + } > + > + remaining_len -= xfer_len; > + }Let's turn this C-style loop into something more Rustey. We want to divide the input twice: once in 256 bytes block to write the Imem tag if needed, and then again in blocks of `u32`. Nova being little-endian, we can assume that ordering. This lets us leverage `chunks` and `from_bytes`. I got the following (untested) code, which assumes `source` is the `&[u8]` we want to write: // Length of an IMEM tag in bytes. const IMEM_TAG_LEN: usize = 256; for chunk in source.chunks(IMEM_TAG_LEN) { // Convert our chunk of bytes into an array of u32s. // // This can never fail as the sizes match, but propagate the error // to avoid an `unsafe` statement. let chunk = <[u32; IMEM_TAG_LEN / size_of::<u32>()]>::from_bytes(chunk)?; match target_mem { FalconMem::Imem { .. } => { regs::NV_PFALCON_FALCON_IMEMT::default().set_tag(tag).write( bar, &E::ID, port as usize, ); tag += 1; for &data in chunk { regs::NV_PFALCON_FALCON_IMEMD::default() .set_data(data) .write(bar, &E::ID, port as usize); } } FalconMem::Dmem => { for &data in chunk { regs::NV_PFALCON_FALCON_DMEMD::default() .set_data(data) .write(bar, &E::ID, port as usize); } } } } The cool thing is that this removes all the mutable variables and counters, with the exception of `tag`. It is also shorter and more explicit about the intent IMHO.> + > + Ok(()) > + } > + > + /// See nvkm_falcon_pio_wrThis doc isn't really helpful - why is this method needed at all? It appears to be because we pass the firmware data as a `CoherentAllocation`, which PIO loading can not work with directly since it bitbangs the data to load instead of using DMA. But `pio_wr` is only ever called from `pio_load`, so `pio_load` could call the `as_slice` method of `CoherentAllocation` to obtain a slice and work with `pio_wr_bytes` directly, removing the need for this method.> + fn pio_wr<F: FalconFirmware<Target = E>>( > + &self, > + bar: &Bar0, > + fw: &F, > + target_mem: FalconMem, > + load_offsets: &FalconLoadTarget, > + port: u8, > + tag: u16, > + ) -> Result { > + // FIXME: There's probably a better way to create a pointer to inside the firmware > + // Maybe CoherentAllocation needs to implement a method for that. > + let start = unsafe { fw.start_ptr().add(load_offsets.src_start as usize) };Yes, `as_slice` will give you a slice that you can pass directly to the updated `pio_wr_bytes`: let fw_bytes = unsafe { fw.as_slice(0, fw.size())? };> + self.pio_wr_bytes(bar, start, > + load_offsets.dst_start as u16, > + load_offsets.len as usize, target_mem, port, tag) > + } > + > + /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. > + pub(crate) fn pio_load<F: FalconFirmware<Target = E>>( > + &self, > + bar: &Bar0, > + fw: &F, > + gbl: Option<&GenericBootloader> > + ) -> Result { > + let imem_sec = fw.imem_sec_load_params(); > + let imem_ns = fw.imem_ns_load_params().unwrap();Let's return an error in this case instead of panicking.> + let dmem = fw.dmem_load_params(); > + > + regs::NV_PFALCON_FBIF_CTL::read(bar, &E::ID) > + .set_allow_phys_no_ctx(true) > + .write(bar, &E::ID); > + > + regs::NV_PFALCON_FALCON_DMACTL::default() > + .write(bar, &E::ID); > + > + // If the Generic Bootloader was passed, then use it to boot FRTS > + if let Some(gbl) = gbl { > + let load_params = FalconLoadTarget { > + src_start: 0, > + dst_start: 0x10000 - gbl.desc.code_size, > + len: gbl.desc.code_size, > + }; > + self.pio_wr_bytes(bar, gbl.ucode.as_ptr(), > + load_params.dst_start as u16, load_params.len as usize, > + FalconMem::ImemNs, 0, gbl.desc.start_tag as u16)?; > + > + // This structure tells the generic bootloader where to find the FWSEC > + // image. > + let dmem_desc = BootloaderDmemDescV2 { > + reserved: [0; 4], > + signature: [0; 4], > + ctx_dma: 4, // FALCON_DMAIDX_PHYS_SYS_NCOH > + code_dma_base: fw.dma_handle(), > + non_sec_code_off: imem_ns.dst_start, > + non_sec_code_size: imem_ns.len, > + sec_code_off: imem_sec.dst_start, > + sec_code_size: imem_sec.len, > + code_entry_point: 0, > + data_dma_base: fw.dma_handle() + dmem.src_start as u64, > + data_size: dmem.len, > + argc: 0, > + argv: 0, > + }; > + > + regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 4, |v| { > + v.set_target(FalconFbifTarget::CoherentSysmem) > + .set_mem_type(FalconFbifMemType::Physical) > + }); > + > + self.pio_wr_bytes(bar, &dmem_desc as *const _ as *const u8, 0, > + core::mem::size_of::<BootloaderDmemDescV2>(), > + FalconMem::Dmem, 0, 0)?;Once you have `AsBytes` implemented on BootloaderDmemDescV2, you can just do self.pio_wr_bytes(bar, dmem_desc.as_bytes(), 0, FalconMem::Dmem, 0, 0)?;> + } else { > + self.pio_wr(bar, fw, FalconMem::ImemNs, &imem_ns, 0, > + u16::try_from(imem_ns.dst_start >> 8)?)?; > + self.pio_wr(bar, fw, FalconMem::ImemSec, &imem_sec, 0, > + u16::try_from(imem_sec.dst_start >> 8)?)?; > + self.pio_wr(bar, fw, FalconMem::Dmem, &dmem, 0, 0)?; > + } > + > + > + self.hal.program_brom(self, bar, &fw.brom_params())?; > + // Set `BootVec` to start of non-secure code. > + regs::NV_PFALCON_FALCON_BOOTVEC::default() > + .set_value(fw.boot_addr()) > + .write(bar, &E::ID); > + > + Ok(()) > + } > + > /// Perform a DMA write according to `load_offsets` from `dma_handle` into the falcon's > /// `target_mem`. > /// > diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs > index 5ca5bf1fb610..ecab16af0166 100644 > --- a/drivers/gpu/nova-core/firmware.rs > +++ b/drivers/gpu/nova-core/firmware.rs > @@ -31,7 +31,7 @@ > pub(crate) const FIRMWARE_VERSION: &str = "570.144"; > > /// Requests the GPU firmware `name` suitable for `chipset`, with version `ver`. > -fn request_firmware( > +pub(crate) fn request_firmware(This isn't needed, `request_firmware` is only ever called from child modules, which can access the private members of their parents.> dev: &device::Device, > chipset: gpu::Chipset, > name: &str, > @@ -252,7 +252,7 @@ fn no_patch_signature(self) -> FirmwareDmaObject<F, Signed> { > /// Header common to most firmware files. > #[repr(C)] > #[derive(Debug, Clone)] > -struct BinHdr { > +pub(crate) struct BinHdr {Same here.
Alexandre Courbot
2025-Nov-19 07:01 UTC
[PATCH 11/11] gpu: nova-core: add PIO support for loading firmware images
On Sat Nov 15, 2025 at 8:30 AM JST, Timur Tabi wrote: <snip>> @@ -182,7 +184,11 @@ pub(crate) fn boot( > ); > > sec2_falcon.reset(bar)?; > - sec2_falcon.dma_load(bar, &booter_loader)?; > + if chipset > Chipset::GA100 { > + sec2_falcon.dma_load(bar, &booter_loader)?; > + } else { > + sec2_falcon.pio_load(bar, &booter_loader, None)?; > + } > let wpr_handle = wpr_meta.dma_handle(); > let (mbox0, mbox1) = sec2_falcon.boot( > bar,Ah, one more thing: if the loading method is only dependent on the chipset version, we should probably make it part of the HAL.