Timur Tabi
2025-Nov-03 19:12 UTC
[PATCH v2 00/12] nova-core: Complete GSP boot and begin RPC communication
On Sun, 2025-11-02 at 18:59 -0500, Joel Fernandes wrote:> Hello! > These patches a refresh of the series adding support for final stages of the > GSP boot process where a sequencer which inteprets firmware instructions needs > to run to boot the GSP processor, followed by waiting for an INIT_DONE message > from the GSP. > > The patches are based on Alex's github branch which have several prerequisites: > Repo: https://github.com/Gnurou/linux.git?Branch: b4/gsp_boot > > I also dropped several patches (mainly from John that have already been > applied).? Tested on Ampere GA102. We also need the "gpu: nova-core: Add > get_gsp_info() command" patch which I dropped since it needs to be reworked, > and it is not needed for GSP boot on Ampere (but John mentioned it is needed > for Blackwell so we could include it in the Blackwell series or I can try to > include it in this series if I'm respinning).I applied your patches on top of Alex's tree, and when I boot on a GA102 I get this: [ 376.316679] NovaCore 0000:65:00.0: NVIDIA (Chipset: GA102, Architecture: Ampere, Revision: a.1) [ 377.188060] NovaCore 0000:65:00.0: GSP RPC: send: seq# 0, function=Ok(GspSetSystemInfo), length=0x3f0 [ 377.188070] NovaCore 0000:65:00.0: GSP RPC: send: seq# 1, function=Ok(SetRegistry), length=0xc5 [ 378.315960] NovaCore 0000:65:00.0: GSP RPC: receive: seq# 0, function=NOCAT, length=0x50c [ 378.319875] NovaCore 0000:65:00.0: probe with driver NovaCore failed with error -34 Are you sure there are no other patches? The RPC patches can't depend on INIT_DONE being the first response. Getting a NOCAT RPC first is not uncommon.
Joel Fernandes
2025-Nov-03 19:35 UTC
[PATCH v2 00/12] nova-core: Complete GSP boot and begin RPC communication
On 11/3/2025 2:12 PM, Timur Tabi wrote:> On Sun, 2025-11-02 at 18:59 -0500, Joel Fernandes wrote: >> Hello! >> These patches a refresh of the series adding support for final stages of the >> GSP boot process where a sequencer which inteprets firmware instructions needs >> to run to boot the GSP processor, followed by waiting for an INIT_DONE message >> from the GSP. >> >> The patches are based on Alex's github branch which have several prerequisites: >> Repo: https://github.com/Gnurou/linux.git?Branch: b4/gsp_boot >> >> I also dropped several patches (mainly from John that have already been >> applied).? Tested on Ampere GA102. We also need the "gpu: nova-core: Add >> get_gsp_info() command" patch which I dropped since it needs to be reworked, >> and it is not needed for GSP boot on Ampere (but John mentioned it is needed >> for Blackwell so we could include it in the Blackwell series or I can try to >> include it in this series if I'm respinning). > > I applied your patches on top of Alex's tree, and when I boot on a GA102 I get this: > > [ 376.316679] NovaCore 0000:65:00.0: NVIDIA (Chipset: GA102, Architecture: Ampere, Revision: a.1) > [ 377.188060] NovaCore 0000:65:00.0: GSP RPC: send: seq# 0, function=Ok(GspSetSystemInfo), > length=0x3f0 > [ 377.188070] NovaCore 0000:65:00.0: GSP RPC: send: seq# 1, function=Ok(SetRegistry), length=0xc5 > [ 378.315960] NovaCore 0000:65:00.0: GSP RPC: receive: seq# 0, function=NOCAT, length=0x50c > [ 378.319875] NovaCore 0000:65:00.0: probe with driver NovaCore failed with error -34 > > Are you sure there are no other patches? The RPC patches can't depend on INIT_DONE being the first > response. Getting a NOCAT RPC first is not uncommon.It works on my end. Do you have "the wait for init done" patch (the 12th patch?) You can also boot my tree which has all the patches, i.e. this series + Alex's b4/gsp_boot branch: https://web.git.kernel.org/pub/scm/linux/kernel/git/jfern/linux.git/log/?h=nova-seq-init-done-submitted-v2 [ 4.672205] virtme-ng-init: initialization done [ 25.745799] NovaCore 0000:00:07.0: NVIDIA (Chipset: GA102, Architecture: Ampere, Revision: a.1) [ 26.364343] NovaCore 0000:00:07.0: GSP RPC: send: seq# 0, function=Ok(GspSetSystemInfo), length=0x3f0 [ 26.364634] NovaCore 0000:00:07.0: GSP RPC: send: seq# 1, function=Ok(SetRegistry), length=0xc5 [ 27.561186] NovaCore 0000:00:07.0: GSP RPC: receive: seq# 0, function=RUN_CPU_SEQUENCER, length=0x18e8 [ 27.635180] NovaCore 0000:00:07.0: GSP RPC: receive: seq# 0, function=NOCAT, length=0x50c [ 27.635529] NovaCore 0000:00:07.0: GSP RPC: receive: seq# 0, function=LIBOS_PRINT, length=0x68 [ 27.635795] NovaCore 0000:00:07.0: GSP RPC: receive: seq# 0, function=LIBOS_PRINT, length=0x70 [ 27.790175] NovaCore 0000:00:07.0: GSP RPC: receive: seq# 0, function=INIT_DONE, length=0x50
Joel Fernandes
2025-Nov-04 23:26 UTC
[PATCH v2 13/12] nova-core: sequencer: Refactor run() to handle unknown messages
Refactor GspSequencer::run() to follow the same pattern as gsp_init_done()
by wrapping message reception in a loop that ignores unknown messages
(ERANGE errors).
Suggested-by: Timur Tabi <ttabi at nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf at nvidia.com>
---
Additional patch to cure probe issue on Timur's GA102 (which happens to
receive
too many NOCAT records).
drivers/gpu/nova-core/gsp/sequencer.rs | 86 +++++++++++++++-----------
1 file changed, 49 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/nova-core/gsp/sequencer.rs
b/drivers/gpu/nova-core/gsp/sequencer.rs
index ecc80f668dc8..b98e5146abd8 100644
--- a/drivers/gpu/nova-core/gsp/sequencer.rs
+++ b/drivers/gpu/nova-core/gsp/sequencer.rs
@@ -35,8 +35,8 @@ impl MessageFromGsp for fw::rpc_run_cpu_sequencer_v17_00 {
const CMD_SIZE: usize = size_of::<fw::GSP_SEQUENCER_BUFFER_CMD>();
-struct GspSequencerInfo<'a> {
- info: &'a fw::rpc_run_cpu_sequencer_v17_00,
+struct GspSequencerInfo {
+ cmd_index: u32,
cmd_data: KVec<u8>,
}
@@ -125,7 +125,7 @@ pub(crate) fn size_bytes(&self) -> usize {
}
pub(crate) struct GspSequencer<'a> {
- seq_info: GspSequencerInfo<'a>,
+ seq_info: GspSequencerInfo,
bar: &'a Bar0,
sec2_falcon: &'a Falcon<Sec2>,
gsp_falcon: &'a Falcon<Gsp>,
@@ -368,7 +368,7 @@ fn into_iter(self) -> Self::IntoIter {
GspSeqIter {
cmd_data,
current_offset: 0,
- total_cmds: self.seq_info.info.cmdIndex,
+ total_cmds: self.seq_info.cmd_index,
cmds_processed: 0,
dev: self.dev,
}
@@ -387,41 +387,53 @@ pub(crate) struct GspSequencerParams<'a> {
impl<'a> GspSequencer<'a> {
pub(crate) fn run(cmdq: &mut Cmdq, params:
GspSequencerParams<'a>, timeout: Delta) -> Result {
- cmdq.receive_msg_from_gsp(timeout, |info, mut sbuf| {
- let cmd_data = sbuf.flush_into_kvec(GFP_KERNEL)?;
- let seq_info = GspSequencerInfo { info, cmd_data };
-
- let sequencer = GspSequencer {
- seq_info,
- bar: params.bar,
- sec2_falcon: params.sec2_falcon,
- gsp_falcon: params.gsp_falcon,
- libos_dma_handle: params.libos_dma_handle,
- gsp_fw: params.gsp_fw,
- dev: params.dev,
- };
-
- dev_dbg!(params.dev, "Running CPU Sequencer commands\n");
-
- for cmd_result in &sequencer {
- match cmd_result {
- Ok(cmd) => cmd.run(&sequencer)?,
- Err(e) => {
- dev_err!(
- params.dev,
- "Error running command at index {}\n",
- sequencer.seq_info.info.cmdIndex
- );
- return Err(e);
- }
+ let seq_info = loop {
+ match cmdq.receive_msg_from_gsp(
+ timeout,
+ |info: &fw::rpc_run_cpu_sequencer_v17_00, mut sbuf| {
+ let cmd_data = sbuf.flush_into_kvec(GFP_KERNEL)?;
+ Ok(GspSequencerInfo {
+ cmd_index: info.cmdIndex,
+ cmd_data,
+ })
+ },
+ ) {
+ Ok(seq_info) => break seq_info,
+ Err(ERANGE) => continue,
+ Err(e) => return Err(e),
+ }
+ };
+
+ let sequencer = GspSequencer {
+ seq_info,
+ bar: params.bar,
+ sec2_falcon: params.sec2_falcon,
+ gsp_falcon: params.gsp_falcon,
+ libos_dma_handle: params.libos_dma_handle,
+ gsp_fw: params.gsp_fw,
+ dev: params.dev,
+ };
+
+ dev_dbg!(params.dev, "Running CPU Sequencer commands\n");
+
+ for cmd_result in &sequencer {
+ match cmd_result {
+ Ok(cmd) => cmd.run(&sequencer)?,
+ Err(e) => {
+ dev_err!(
+ params.dev,
+ "Error running command at index {}\n",
+ sequencer.seq_info.cmd_index
+ );
+ return Err(e);
}
}
+ }
- dev_dbg!(
- params.dev,
- "CPU Sequencer commands completed successfully\n"
- );
- Ok(())
- })
+ dev_dbg!(
+ params.dev,
+ "CPU Sequencer commands completed successfully\n"
+ );
+ Ok(())
}
}
--
2.34.1