thr3ads.net - Nouveau - [RFC 4/4] drm: define NVIDIA DRM format modifiers for GB20x [Jul 2025]

If this information is useful, please help other people find it:
Share via:

Faith Ekstrand

2025-Jul-03 23:22 UTC

[RFC 4/4] drm: define NVIDIA DRM format modifiers for GB20x

On Thu, Jul 3, 2025 at 6:34?PM James Jones <jajones at nvidia.com> wrote:
> The layout of bits within the individual tiles
> (referred to as sectors in the
> DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D() macro)
> changed for some formats starting in Blackwell 2
> GPUs. To denote the difference, extend the sector
> field in the parametric format modifier definition
> used to generate modifier values for NVIDIA
> hardware.
>
> Without this change, it would be impossible to
> differentiate the two layouts based on modifiers,
> and as a result software could attempt to share
> surfaces directly between pre-GB20x and GB20x
> cards, resulting in corruption when the surface
> was accessed on one of the GPUs after being
> populated with content by the other.
>
> Of note: This change causes the
> DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D() macro to
> evaluate its "s" parameter twice, with the side
> effects that entails. I surveyed all usage of the
> modifier in the kernel and Mesa code, and that
> does not appear to be problematic in any current
> usage, but I thought it was worth calling out.
>
> Signed-off-by: James Jones <jajones at nvidia.com>
> ---
>  include/uapi/drm/drm_fourcc.h | 46 +++++++++++++++++++++--------------
>  1 file changed, 28 insertions(+), 18 deletions(-)
>
> diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
> index 052e5fdd1d3b..348b2f1c1cb7 100644
> --- a/include/uapi/drm/drm_fourcc.h
> +++ b/include/uapi/drm/drm_fourcc.h
> @@ -909,8 +909,10 @@ extern "C" {
>  #define __fourcc_mod_nvidia_pkind_shift 12
>  #define __fourcc_mod_nvidia_kgen_mask 0x3ULL
>  #define __fourcc_mod_nvidia_kgen_shift 20
> -#define __fourcc_mod_nvidia_slayout_mask 0x1ULL
> -#define __fourcc_mod_nvidia_slayout_shift 22
> +#define __fourcc_mod_nvidia_slayout_low_mask 0x1ULL
> +#define __fourcc_mod_nvidia_slayout_low_shift 22
> +#define __fourcc_mod_nvidia_slayout_high_mask 0x2ULL
> +#define __fourcc_mod_nvidia_slayout_high_shift 25
>  #define __fourcc_mod_nvidia_comp_mask 0x7ULL
>  #define __fourcc_mod_nvidia_comp_shift 23
>
> @@ -973,14 +975,16 @@ extern "C" {
>   *               2 = Gob Height 8, Turing+ Page Kind mapping
>   *               3 = Reserved for future use.
>   *
> - * 22:22 s     Sector layout.  On Tegra GPUs prior to Xavier, there is a
> further
> - *             bit remapping step that occurs at an even lower level than
> the
> - *             page kind and block linear swizzles.  This causes the
> layout of
> - *             surfaces mapped in those SOC's GPUs to be incompatible
> with the
> - *             equivalent mapping on other GPUs in the same system.
> + * 22:22 s     Sector layout.  There is a further bit remapping step that
> occurs
> + * 26:26       at an even lower level than the page kind and block linear
> + *             swizzles.  This causes the bit arrangement of surfaces in
> memory
> + *             to differ subtly, and prevents direct sharing of surfaces
> between
> + *             GPUs with different layouts.
>   *
> - *               0 = Tegra K1 - Tegra Parker/TX2 Layout.
> - *               1 = Desktop GPU and Tegra Xavier+ Layout
> + *               0 = Tegra K1 - Tegra Parker/TX2 Layout
> + *               1 = Pre-GB20x, Tegra Xavier-Orin, GB10 Layout
> + *               2 = GB20x(Blackwell 2)+ Layout for some pixel/texel sizes
>
I'm not sure I like just lumping all of blackwell together. Blackwell is
the same as Turing for 32, 64, and 128-bit formats. It's only different on
8 and 16 and those aren't the same. The way we modeled this for NVK is to
have Turing, Blackwell8Bit, and Blackwell16Bit GOBTypes. I think I'd prefer
the modifiers take a similar form.

Technically, this isn't strictly necessary as there is always a format and
formats with different element sizes aren't compatible so a driver can
always look at format+modifier.  However, it is a better model of the
hardware.

~Faith Ekstrand


> + *               3 = reserved for future use.
>   *
>   * 25:23 c     Lossless Framebuffer Compression type.
>   *
> @@ -995,7 +999,7 @@ extern "C" {
>   *               6 = Reserved for future use
>   *               7 = Reserved for future use
>   *
> - * 55:26 -     Reserved for future use.  Must be zero.
> + * 55:27 -     Reserved for future use.  Must be zero.
>   */
>  #define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \
>         fourcc_mod_code(NVIDIA, \
> @@ -1006,8 +1010,10 @@ extern "C" {
>                           __fourcc_mod_nvidia_pkind_shift) | \
>                          (((g) & __fourcc_mod_nvidia_kgen_mask)
<< \
>                           __fourcc_mod_nvidia_kgen_shift) | \
> -                        (((s) & __fourcc_mod_nvidia_slayout_mask)
<< \
> -                         __fourcc_mod_nvidia_slayout_shift) | \
> +                        (((s) & __fourcc_mod_nvidia_slayout_low_mask)
<< \
> +                         __fourcc_mod_nvidia_slayout_low_shift) | \
> +                        (((s) & __fourcc_mod_nvidia_slayout_high_mask)
<<
> \
> +                         __fourcc_mod_nvidia_slayout_high_shift) | \
>                          (((c) & __fourcc_mod_nvidia_comp_mask)
<< \
>                           __fourcc_mod_nvidia_comp_shift)))
>
> @@ -1037,12 +1043,6 @@ __DRM_FOURCC_MKNVHELPER_FUNC(pkind)
>   */
>  __DRM_FOURCC_MKNVHELPER_FUNC(kgen)
>
> -/*
> - * Get the sector layout specified by mod:
> - * static inline __u64 drm_fourcc_nvidia_format_mod_slayout(__u64 mod)
> - */
> -__DRM_FOURCC_MKNVHELPER_FUNC(slayout)
> -
>  /*
>   * Get the lossless framebuffer compression specified by mod:
>   * static inline __u64 drm_fourcc_nvidia_format_mod_kgen(__u64 mod)
> @@ -1051,6 +1051,16 @@ __DRM_FOURCC_MKNVHELPER_FUNC(comp)
>
>  #undef __DRM_FOURCC_MKNVHELPER_FUNC
>
> +/* Get the sector layout specified by mod: */
> +static inline __u64
> +drm_fourcc_nvidia_format_mod_slayout(__u64 mod)
> +{
> +       return ((mod >> __fourcc_mod_nvidia_slayout_low_shift) &
> +               __fourcc_mod_nvidia_slayout_low_mask) |
> +               ((mod >> __fourcc_mod_nvidia_slayout_high_shift)
&
> +                __fourcc_mod_nvidia_slayout_high_mask);
> +}
> +
>  /* To grandfather in prior block linear format modifiers to the above
> layout,
>   * the page kind "0", which corresponds to
"pitch/linear" and hence is
> unusable
>   * with block-linear layouts, is remapped within drivers to the value
> 0xfe,
> --
> 2.49.0
>
>-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<https://lists.freedesktop.org/archives/nouveau/attachments/20250703/4ac24976/attachment.htm>

James Jones

2025-Jul-04 04:54 UTC

head link

[RFC 4/4] drm: define NVIDIA DRM format modifiers for GB20x

On 7/3/25 16:22, Faith Ekstrand wrote:> On Thu, Jul 3, 2025 at 6:34?PM James Jones <jajones at nvidia.com 
> <mailto:jajones at nvidia.com>> wrote:
> 
>     The layout of bits within the individual tiles
>     (referred to as sectors in the
>     DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D() macro)
>     changed for some formats starting in Blackwell 2
>     GPUs. To denote the difference, extend the sector
>     field in the parametric format modifier definition
>     used to generate modifier values for NVIDIA
>     hardware.
> 
>     Without this change, it would be impossible to
>     differentiate the two layouts based on modifiers,
>     and as a result software could attempt to share
>     surfaces directly between pre-GB20x and GB20x
>     cards, resulting in corruption when the surface
>     was accessed on one of the GPUs after being
>     populated with content by the other.
> 
>     Of note: This change causes the
>     DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D() macro to
>     evaluate its "s" parameter twice, with the side
>     effects that entails. I surveyed all usage of the
>     modifier in the kernel and Mesa code, and that
>     does not appear to be problematic in any current
>     usage, but I thought it was worth calling out.
> 
>     Signed-off-by: James Jones <jajones at nvidia.com
>     <mailto:jajones at nvidia.com>>
>     ---
>      ?include/uapi/drm/drm_fourcc.h | 46
+++++++++++++++++++++--------------
>      ?1 file changed, 28 insertions(+), 18 deletions(-)
> 
>     diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/
>     drm_fourcc.h
>     index 052e5fdd1d3b..348b2f1c1cb7 100644
>     --- a/include/uapi/drm/drm_fourcc.h
>     +++ b/include/uapi/drm/drm_fourcc.h
>     @@ -909,8 +909,10 @@ extern "C" {
>      ?#define __fourcc_mod_nvidia_pkind_shift 12
>      ?#define __fourcc_mod_nvidia_kgen_mask 0x3ULL
>      ?#define __fourcc_mod_nvidia_kgen_shift 20
>     -#define __fourcc_mod_nvidia_slayout_mask 0x1ULL
>     -#define __fourcc_mod_nvidia_slayout_shift 22
>     +#define __fourcc_mod_nvidia_slayout_low_mask 0x1ULL
>     +#define __fourcc_mod_nvidia_slayout_low_shift 22
>     +#define __fourcc_mod_nvidia_slayout_high_mask 0x2ULL
>     +#define __fourcc_mod_nvidia_slayout_high_shift 25
>      ?#define __fourcc_mod_nvidia_comp_mask 0x7ULL
>      ?#define __fourcc_mod_nvidia_comp_shift 23
> 
>     @@ -973,14 +975,16 @@ extern "C" {
>      ? *? ? ? ? ? ? ? ?2 = Gob Height 8, Turing+ Page Kind mapping
>      ? *? ? ? ? ? ? ? ?3 = Reserved for future use.
>      ? *
>     - * 22:22 s? ? ?Sector layout.? On Tegra GPUs prior to Xavier, there
>     is a further
>     - *? ? ? ? ? ? ?bit remapping step that occurs at an even lower
>     level than the
>     - *? ? ? ? ? ? ?page kind and block linear swizzles.? This causes
>     the layout of
>     - *? ? ? ? ? ? ?surfaces mapped in those SOC's GPUs to be
>     incompatible with the
>     - *? ? ? ? ? ? ?equivalent mapping on other GPUs in the same system.
>     + * 22:22 s? ? ?Sector layout.? There is a further bit remapping
>     step that occurs
>     + * 26:26? ? ? ?at an even lower level than the page kind and block
>     linear
>     + *? ? ? ? ? ? ?swizzles.? This causes the bit arrangement of
>     surfaces in memory
>     + *? ? ? ? ? ? ?to differ subtly, and prevents direct sharing of
>     surfaces between
>     + *? ? ? ? ? ? ?GPUs with different layouts.
>      ? *
>     - *? ? ? ? ? ? ? ?0 = Tegra K1 - Tegra Parker/TX2 Layout.
>     - *? ? ? ? ? ? ? ?1 = Desktop GPU and Tegra Xavier+ Layout
>     + *? ? ? ? ? ? ? ?0 = Tegra K1 - Tegra Parker/TX2 Layout
>     + *? ? ? ? ? ? ? ?1 = Pre-GB20x, Tegra Xavier-Orin, GB10 Layout
>     + *? ? ? ? ? ? ? ?2 = GB20x(Blackwell 2)+ Layout for some pixel/
>     texel sizes
> 
> 
> I'm not sure I like just lumping all of blackwell together. Blackwell
is
> the same as Turing for 32, 64, and 128-bit formats. It's only different
> on 8 and 16 and those aren't the same. The way we modeled this for NVK 
> is to have Turing, Blackwell8Bit, and Blackwell16Bit?GOBTypes. I think 
> I'd prefer the modifiers take a similar form.
> 
> Technically, this isn't strictly necessary as there is always a format 
> and formats with different element sizes aren't compatible so a driver 
> can always look at format+modifier.? However, it is a better model of 
> the hardware.
Yeah, my thinking was drivers would only use sector layout 2 for those 8 
and 16-bit formats, and still return sector layout 1 modifiers for other 
formats, so I think we're in agreement there. I could update the comment 
to make that clearer.

You also want one sector layout for 8-bit and one for 16-bit (E.g., 2 == 
GB20x 8-bit and 3 == GB20x 16-bit)? I guess there are some cases where 
that would be useful. I just hate to burn extra values, but I don't feel 
strongly. I'll add that in the next iteration if no one objects.

Whatever design we settle on, I think it should be a goal to allow 
pre-GB20x cards to continue sharing e.g., 32-bit surfaces directly with 
GB20x cards. Some users are going to want to mix cards like that at some 
point.

Thanks,
-James
> ~Faith Ekstrand
> 
>     + *? ? ? ? ? ? ? ?3 = reserved for future use.
>      ? *
>      ? * 25:23 c? ? ?Lossless Framebuffer Compression type.
>      ? *
>     @@ -995,7 +999,7 @@ extern "C" {
>      ? *? ? ? ? ? ? ? ?6 = Reserved for future use
>      ? *? ? ? ? ? ? ? ?7 = Reserved for future use
>      ? *
>     - * 55:26 -? ? ?Reserved for future use.? Must be zero.
>     + * 55:27 -? ? ?Reserved for future use.? Must be zero.
>      ? */
>      ?#define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \
>      ? ? ? ? fourcc_mod_code(NVIDIA, \
>     @@ -1006,8 +1010,10 @@ extern "C" {
>      ? ? ? ? ? ? ? ? ? ? ? ? ? __fourcc_mod_nvidia_pkind_shift) | \
>      ? ? ? ? ? ? ? ? ? ? ? ? ?(((g) & __fourcc_mod_nvidia_kgen_mask)
<< \
>      ? ? ? ? ? ? ? ? ? ? ? ? ? __fourcc_mod_nvidia_kgen_shift) | \
>     -? ? ? ? ? ? ? ? ? ? ? ? (((s) & __fourcc_mod_nvidia_slayout_mask)
<< \
>     -? ? ? ? ? ? ? ? ? ? ? ? ?__fourcc_mod_nvidia_slayout_shift) | \
>     +? ? ? ? ? ? ? ? ? ? ? ? (((s) &
>     __fourcc_mod_nvidia_slayout_low_mask) << \
>     +? ? ? ? ? ? ? ? ? ? ? ? ?__fourcc_mod_nvidia_slayout_low_shift) | \
>     +? ? ? ? ? ? ? ? ? ? ? ? (((s) &
>     __fourcc_mod_nvidia_slayout_high_mask) << \
>     +? ? ? ? ? ? ? ? ? ? ? ? ?__fourcc_mod_nvidia_slayout_high_shift) | \
>      ? ? ? ? ? ? ? ? ? ? ? ? ?(((c) & __fourcc_mod_nvidia_comp_mask)
<< \
>      ? ? ? ? ? ? ? ? ? ? ? ? ? __fourcc_mod_nvidia_comp_shift)))
> 
>     @@ -1037,12 +1043,6 @@ __DRM_FOURCC_MKNVHELPER_FUNC(pkind)
>      ? */
>      ?__DRM_FOURCC_MKNVHELPER_FUNC(kgen)
> 
>     -/*
>     - * Get the sector layout specified by mod:
>     - * static inline __u64 drm_fourcc_nvidia_format_mod_slayout(__u64 mod)
>     - */
>     -__DRM_FOURCC_MKNVHELPER_FUNC(slayout)
>     -
>      ?/*
>      ? * Get the lossless framebuffer compression specified by mod:
>      ? * static inline __u64 drm_fourcc_nvidia_format_mod_kgen(__u64 mod)
>     @@ -1051,6 +1051,16 @@ __DRM_FOURCC_MKNVHELPER_FUNC(comp)
> 
>      ?#undef __DRM_FOURCC_MKNVHELPER_FUNC
> 
>     +/* Get the sector layout specified by mod: */
>     +static inline __u64
>     +drm_fourcc_nvidia_format_mod_slayout(__u64 mod)
>     +{
>     +? ? ? ?return ((mod >> __fourcc_mod_nvidia_slayout_low_shift)
&
>     +? ? ? ? ? ? ? ?__fourcc_mod_nvidia_slayout_low_mask) |
>     +? ? ? ? ? ? ? ?((mod >> __fourcc_mod_nvidia_slayout_high_shift)
&
>     +? ? ? ? ? ? ? ? __fourcc_mod_nvidia_slayout_high_mask);
>     +}
>     +
>      ?/* To grandfather in prior block linear format modifiers to the
>     above layout,
>      ? * the page kind "0", which corresponds to
"pitch/linear" and
>     hence is unusable
>      ? * with block-linear layouts, is remapped within drivers to the
>     value 0xfe,
>     -- 
>     2.49.0
>

Nouveau - Jul 2025 - [RFC 4/4] drm: define NVIDIA DRM format modifiers for GB20x

[RFC 4/4] drm: define NVIDIA DRM format modifiers for GB20x

[RFC 4/4] drm: define NVIDIA DRM format modifiers for GB20x