Zhi Wang
2025-Dec-06 12:42 UTC
[RFC 7/7] gpu: nova-core: load the scrubber ucode when vGPU support is enabled
To support the maximum vGPUs on the device that support vGPU, a larger
WPR2 heap size is required. By setting the WPR2 heap size larger than 256MB
the scrubber ucode image is required to scrub the FB memory before any
other ucode image is executed.
If not, the GSP firmware hangs when booting.
When vGPU support is enabled, execute the scrubber ucode image to scrub the
FB memory before executing any other ucode images.
Signed-off-by: Zhi Wang <zhiw at nvidia.com>
---
drivers/gpu/nova-core/firmware.rs | 1 +
drivers/gpu/nova-core/firmware/booter.rs | 2 ++
drivers/gpu/nova-core/gsp/boot.rs | 27 ++++++++++++++++++++++++
drivers/gpu/nova-core/regs.rs | 11 ++++++++++
4 files changed, 41 insertions(+)
diff --git a/drivers/gpu/nova-core/firmware.rs
b/drivers/gpu/nova-core/firmware.rs
index 2d2008b33fb4..5ae1ab262d57 100644
--- a/drivers/gpu/nova-core/firmware.rs
+++ b/drivers/gpu/nova-core/firmware.rs
@@ -226,6 +226,7 @@ const fn make_entry_chipset(self, chipset: &str) ->
Self {
.make_entry_file(chipset, "booter_unload")
.make_entry_file(chipset, "bootloader")
.make_entry_file(chipset, "gsp")
+ .make_entry_file(chipset, "scrubber")
}
pub(crate) const fn create(
diff --git a/drivers/gpu/nova-core/firmware/booter.rs
b/drivers/gpu/nova-core/firmware/booter.rs
index f107f753214a..f622c9b960de 100644
--- a/drivers/gpu/nova-core/firmware/booter.rs
+++ b/drivers/gpu/nova-core/firmware/booter.rs
@@ -269,6 +269,7 @@ fn new_booter(dev: &device::Device<device::Bound>,
data: &[u8]) -> Result<Self>
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) enum BooterKind {
+ Scrubber,
Loader,
#[expect(unused)]
Unloader,
@@ -286,6 +287,7 @@ pub(crate) fn new(
bar: &Bar0,
) -> Result<Self> {
let fw_name = match kind {
+ BooterKind::Scrubber => "scrubber",
BooterKind::Loader => "booter_load",
BooterKind::Unloader => "booter_unload",
};
diff --git a/drivers/gpu/nova-core/gsp/boot.rs
b/drivers/gpu/nova-core/gsp/boot.rs
index ec006c26f19f..8ef79433f017 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -151,6 +151,33 @@ pub(crate) fn boot(
Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?;
+ if vgpu_support {
+ let scrubber = BooterFirmware::new(
+ dev,
+ BooterKind::Scrubber,
+ chipset,
+ FIRMWARE_VERSION,
+ sec2_falcon,
+ bar,
+ )?;
+
+ sec2_falcon.reset(bar)?;
+ sec2_falcon.dma_load(bar, &scrubber)?;
+
+ let (mbox0, mbox1) = sec2_falcon.boot(bar, None, None)?;
+
+ dev_dbg!(
+ pdev.as_ref(),
+ "SEC2 MBOX0: {:#x}, MBOX1{:#x}\n",
+ mbox0,
+ mbox1
+ );
+
+ if
!regs::NV_PGC6_BSI_SECURE_SCRATCH_15::read(bar).scrubber_completed() {
+ return Err(ETIMEDOUT);
+ }
+ }
+
let booter_loader = BooterFirmware::new(
dev,
BooterKind::Loader,
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
index 82cc6c0790e5..9f3a52ca014f 100644
--- a/drivers/gpu/nova-core/regs.rs
+++ b/drivers/gpu/nova-core/regs.rs
@@ -173,6 +173,17 @@ pub(crate) fn higher_bound(self) -> u64 {
26:26 boot_stage_3_handoff as bool;
});
+register!(NV_PGC6_BSI_SECURE_SCRATCH_15 @ 0x001180fc {
+ 31:29 scrubber_handoff as u8;
+});
+
+impl NV_PGC6_BSI_SECURE_SCRATCH_15 {
+ /// Returns `true` if scrubber is completed.
+ pub(crate) fn scrubber_completed(self) -> bool {
+ self.scrubber_handoff() >= 0x3
+ }
+}
+
// Privilege level mask register. It dictates whether the host CPU has
privilege to access the
// `PGC6_AON_SECURE_SCRATCH_GROUP_05` register (which it needs to read
GFW_BOOT).
register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128,
--
2.51.0
Joel Fernandes
2025-Dec-07 02:26 UTC
[RFC 7/7] gpu: nova-core: load the scrubber ucode when vGPU support is enabled
Hi Zhi, On 12/6/2025 7:42 AM, Zhi Wang wrote:> To support the maximum vGPUs on the device that support vGPU, a larger > WPR2 heap size is required. By setting the WPR2 heap size larger than 256MB > the scrubber ucode image is required to scrub the FB memory before any > other ucode image is executed. > > If not, the GSP firmware hangs when booting. > > When vGPU support is enabled, execute the scrubber ucode image to scrub the > FB memory before executing any other ucode images. >[..]> pub(crate) const fn create( > diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs > index f107f753214a..f622c9b960de 100644 > --- a/drivers/gpu/nova-core/firmware/booter.rs > +++ b/drivers/gpu/nova-core/firmware/booter.rs > @@ -269,6 +269,7 @@ fn new_booter(dev: &device::Device<device::Bound>, data: &[u8]) -> Result<Self> > > #[derive(Copy, Clone, Debug, PartialEq)] > pub(crate) enum BooterKind { > + Scrubber, > Loader, > #[expect(unused)] > Unloader, > @@ -286,6 +287,7 @@ pub(crate) fn new( > bar: &Bar0, > ) -> Result<Self> { > let fw_name = match kind { > + BooterKind::Scrubber => "scrubber", > BooterKind::Loader => "booter_load", > BooterKind::Unloader => "booter_unload", > }; > diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs > index ec006c26f19f..8ef79433f017 100644 > --- a/drivers/gpu/nova-core/gsp/boot.rs > +++ b/drivers/gpu/nova-core/gsp/boot.rs > @@ -151,6 +151,33 @@ pub(crate) fn boot( > > Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?;Could you elaborate on how the timeout below works? See comment below.> > + if vgpu_support { > + let scrubber = BooterFirmware::new( > + dev, > + BooterKind::Scrubber, > + chipset, > + FIRMWARE_VERSION, > + sec2_falcon, > + bar, > + )?; > + > + sec2_falcon.reset(bar)?; > + sec2_falcon.dma_load(bar, &scrubber)?; > + > + let (mbox0, mbox1) = sec2_falcon.boot(bar, None, None)?;boot() already returns -ETIMEDOUT via wait_till_halted()->read_poll_timeout(). The wait there is 2 seconds. I assume the scrubber would have completed by then.> + > + dev_dbg!( > + pdev.as_ref(), > + "SEC2 MBOX0: {:#x}, MBOX1{:#x}\n", > + mbox0, > + mbox1 > + ); > + > + if !regs::NV_PGC6_BSI_SECURE_SCRATCH_15::read(bar).scrubber_completed() { > + return Err(ETIMEDOUT);So under which situation do you get to this point (!scrubber_completed) ? Basically I am not sure if ETIMEDOUT is the right error to return here, because boot() already returns ETIMEDOUT by waiting for the halt. If you still want return ETIMEDOUT here, then it sounds like you're waiting for scrubbing beyond the waiting already done by boot(). If so, then shouldn't you need to use read_poll_timeout() here? perhaps something like: read_poll_timeout( || Ok(regs::NV_PGC6_BSI_SECURE_SCRATCH_15::read(bar).scrubber_completed()), |val: &bool| *val, Delta::from_millis(10), Delta::from_secs(5), )?; Thanks.
Dirk Behme
2025-Dec-07 06:42 UTC
[RFC 7/7] gpu: nova-core: load the scrubber ucode when vGPU support is enabled
On 06.12.25 13:42, Zhi Wang wrote:> To support the maximum vGPUs on the device that support vGPU, a larger > WPR2 heap size is required. By setting the WPR2 heap size larger than 256MB > the scrubber ucode image is required to scrub the FB memory before any > other ucode image is executed. > > If not, the GSP firmware hangs when booting. > > When vGPU support is enabled, execute the scrubber ucode image to scrub the > FB memory before executing any other ucode images. > > Signed-off-by: Zhi Wang <zhiw at nvidia.com> > --- > drivers/gpu/nova-core/firmware.rs | 1 + > drivers/gpu/nova-core/firmware/booter.rs | 2 ++ > drivers/gpu/nova-core/gsp/boot.rs | 27 ++++++++++++++++++++++++ > drivers/gpu/nova-core/regs.rs | 11 ++++++++++ > 4 files changed, 41 insertions(+) > > diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs > index 2d2008b33fb4..5ae1ab262d57 100644 > --- a/drivers/gpu/nova-core/firmware.rs > +++ b/drivers/gpu/nova-core/firmware.rs > @@ -226,6 +226,7 @@ const fn make_entry_chipset(self, chipset: &str) -> Self { > .make_entry_file(chipset, "booter_unload") > .make_entry_file(chipset, "bootloader") > .make_entry_file(chipset, "gsp") > + .make_entry_file(chipset, "scrubber") > } > > pub(crate) const fn create( > diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs > index f107f753214a..f622c9b960de 100644 > --- a/drivers/gpu/nova-core/firmware/booter.rs > +++ b/drivers/gpu/nova-core/firmware/booter.rs > @@ -269,6 +269,7 @@ fn new_booter(dev: &device::Device<device::Bound>, data: &[u8]) -> Result<Self> > > #[derive(Copy, Clone, Debug, PartialEq)] > pub(crate) enum BooterKind { > + Scrubber, > Loader, > #[expect(unused)] > Unloader, > @@ -286,6 +287,7 @@ pub(crate) fn new( > bar: &Bar0, > ) -> Result<Self> { > let fw_name = match kind { > + BooterKind::Scrubber => "scrubber", > BooterKind::Loader => "booter_load", > BooterKind::Unloader => "booter_unload", > }; > diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs > index ec006c26f19f..8ef79433f017 100644 > --- a/drivers/gpu/nova-core/gsp/boot.rs > +++ b/drivers/gpu/nova-core/gsp/boot.rs > @@ -151,6 +151,33 @@ pub(crate) fn boot( > > Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?; > > + if vgpu_support { > + let scrubber = BooterFirmware::new( > + dev, > + BooterKind::Scrubber, > + chipset, > + FIRMWARE_VERSION, > + sec2_falcon, > + bar, > + )?; > + > + sec2_falcon.reset(bar)?; > + sec2_falcon.dma_load(bar, &scrubber)?; > + > + let (mbox0, mbox1) = sec2_falcon.boot(bar, None, None)?; > + > + dev_dbg!( > + pdev.as_ref(),I think you can use `dev` here? Dirk> + "SEC2 MBOX0: {:#x}, MBOX1{:#x}\n", > + mbox0, > + mbox1 > + ); > + > + if !regs::NV_PGC6_BSI_SECURE_SCRATCH_15::read(bar).scrubber_completed() { > + return Err(ETIMEDOUT); > + } > + } > + > let booter_loader = BooterFirmware::new( > dev, > BooterKind::Loader, > diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs > index 82cc6c0790e5..9f3a52ca014f 100644 > --- a/drivers/gpu/nova-core/regs.rs > +++ b/drivers/gpu/nova-core/regs.rs > @@ -173,6 +173,17 @@ pub(crate) fn higher_bound(self) -> u64 { > 26:26 boot_stage_3_handoff as bool; > }); > > +register!(NV_PGC6_BSI_SECURE_SCRATCH_15 @ 0x001180fc { > + 31:29 scrubber_handoff as u8; > +}); > + > +impl NV_PGC6_BSI_SECURE_SCRATCH_15 { > + /// Returns `true` if scrubber is completed. > + pub(crate) fn scrubber_completed(self) -> bool { > + self.scrubber_handoff() >= 0x3 > + } > +} > + > // Privilege level mask register. It dictates whether the host CPU has privilege to access the > // `PGC6_AON_SECURE_SCRATCH_GROUP_05` register (which it needs to read GFW_BOOT). > register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128,