Faith Ekstrand
2025-Jul-03 23:22 UTC
[RFC 4/4] drm: define NVIDIA DRM format modifiers for GB20x
On Thu, Jul 3, 2025 at 6:34?PM James Jones <jajones at nvidia.com> wrote:> The layout of bits within the individual tiles > (referred to as sectors in the > DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D() macro) > changed for some formats starting in Blackwell 2 > GPUs. To denote the difference, extend the sector > field in the parametric format modifier definition > used to generate modifier values for NVIDIA > hardware. > > Without this change, it would be impossible to > differentiate the two layouts based on modifiers, > and as a result software could attempt to share > surfaces directly between pre-GB20x and GB20x > cards, resulting in corruption when the surface > was accessed on one of the GPUs after being > populated with content by the other. > > Of note: This change causes the > DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D() macro to > evaluate its "s" parameter twice, with the side > effects that entails. I surveyed all usage of the > modifier in the kernel and Mesa code, and that > does not appear to be problematic in any current > usage, but I thought it was worth calling out. > > Signed-off-by: James Jones <jajones at nvidia.com> > --- > include/uapi/drm/drm_fourcc.h | 46 +++++++++++++++++++++-------------- > 1 file changed, 28 insertions(+), 18 deletions(-) > > diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h > index 052e5fdd1d3b..348b2f1c1cb7 100644 > --- a/include/uapi/drm/drm_fourcc.h > +++ b/include/uapi/drm/drm_fourcc.h > @@ -909,8 +909,10 @@ extern "C" { > #define __fourcc_mod_nvidia_pkind_shift 12 > #define __fourcc_mod_nvidia_kgen_mask 0x3ULL > #define __fourcc_mod_nvidia_kgen_shift 20 > -#define __fourcc_mod_nvidia_slayout_mask 0x1ULL > -#define __fourcc_mod_nvidia_slayout_shift 22 > +#define __fourcc_mod_nvidia_slayout_low_mask 0x1ULL > +#define __fourcc_mod_nvidia_slayout_low_shift 22 > +#define __fourcc_mod_nvidia_slayout_high_mask 0x2ULL > +#define __fourcc_mod_nvidia_slayout_high_shift 25 > #define __fourcc_mod_nvidia_comp_mask 0x7ULL > #define __fourcc_mod_nvidia_comp_shift 23 > > @@ -973,14 +975,16 @@ extern "C" { > * 2 = Gob Height 8, Turing+ Page Kind mapping > * 3 = Reserved for future use. > * > - * 22:22 s Sector layout. On Tegra GPUs prior to Xavier, there is a > further > - * bit remapping step that occurs at an even lower level than > the > - * page kind and block linear swizzles. This causes the > layout of > - * surfaces mapped in those SOC's GPUs to be incompatible > with the > - * equivalent mapping on other GPUs in the same system. > + * 22:22 s Sector layout. There is a further bit remapping step that > occurs > + * 26:26 at an even lower level than the page kind and block linear > + * swizzles. This causes the bit arrangement of surfaces in > memory > + * to differ subtly, and prevents direct sharing of surfaces > between > + * GPUs with different layouts. > * > - * 0 = Tegra K1 - Tegra Parker/TX2 Layout. > - * 1 = Desktop GPU and Tegra Xavier+ Layout > + * 0 = Tegra K1 - Tegra Parker/TX2 Layout > + * 1 = Pre-GB20x, Tegra Xavier-Orin, GB10 Layout > + * 2 = GB20x(Blackwell 2)+ Layout for some pixel/texel sizes >I'm not sure I like just lumping all of blackwell together. Blackwell is the same as Turing for 32, 64, and 128-bit formats. It's only different on 8 and 16 and those aren't the same. The way we modeled this for NVK is to have Turing, Blackwell8Bit, and Blackwell16Bit GOBTypes. I think I'd prefer the modifiers take a similar form. Technically, this isn't strictly necessary as there is always a format and formats with different element sizes aren't compatible so a driver can always look at format+modifier. However, it is a better model of the hardware. ~Faith Ekstrand> + * 3 = reserved for future use. > * > * 25:23 c Lossless Framebuffer Compression type. > * > @@ -995,7 +999,7 @@ extern "C" { > * 6 = Reserved for future use > * 7 = Reserved for future use > * > - * 55:26 - Reserved for future use. Must be zero. > + * 55:27 - Reserved for future use. Must be zero. > */ > #define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \ > fourcc_mod_code(NVIDIA, \ > @@ -1006,8 +1010,10 @@ extern "C" { > __fourcc_mod_nvidia_pkind_shift) | \ > (((g) & __fourcc_mod_nvidia_kgen_mask) << \ > __fourcc_mod_nvidia_kgen_shift) | \ > - (((s) & __fourcc_mod_nvidia_slayout_mask) << \ > - __fourcc_mod_nvidia_slayout_shift) | \ > + (((s) & __fourcc_mod_nvidia_slayout_low_mask) << \ > + __fourcc_mod_nvidia_slayout_low_shift) | \ > + (((s) & __fourcc_mod_nvidia_slayout_high_mask) << > \ > + __fourcc_mod_nvidia_slayout_high_shift) | \ > (((c) & __fourcc_mod_nvidia_comp_mask) << \ > __fourcc_mod_nvidia_comp_shift))) > > @@ -1037,12 +1043,6 @@ __DRM_FOURCC_MKNVHELPER_FUNC(pkind) > */ > __DRM_FOURCC_MKNVHELPER_FUNC(kgen) > > -/* > - * Get the sector layout specified by mod: > - * static inline __u64 drm_fourcc_nvidia_format_mod_slayout(__u64 mod) > - */ > -__DRM_FOURCC_MKNVHELPER_FUNC(slayout) > - > /* > * Get the lossless framebuffer compression specified by mod: > * static inline __u64 drm_fourcc_nvidia_format_mod_kgen(__u64 mod) > @@ -1051,6 +1051,16 @@ __DRM_FOURCC_MKNVHELPER_FUNC(comp) > > #undef __DRM_FOURCC_MKNVHELPER_FUNC > > +/* Get the sector layout specified by mod: */ > +static inline __u64 > +drm_fourcc_nvidia_format_mod_slayout(__u64 mod) > +{ > + return ((mod >> __fourcc_mod_nvidia_slayout_low_shift) & > + __fourcc_mod_nvidia_slayout_low_mask) | > + ((mod >> __fourcc_mod_nvidia_slayout_high_shift) & > + __fourcc_mod_nvidia_slayout_high_mask); > +} > + > /* To grandfather in prior block linear format modifiers to the above > layout, > * the page kind "0", which corresponds to "pitch/linear" and hence is > unusable > * with block-linear layouts, is remapped within drivers to the value > 0xfe, > -- > 2.49.0 > >-------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.freedesktop.org/archives/nouveau/attachments/20250703/4ac24976/attachment.htm>
James Jones
2025-Jul-04 04:54 UTC
[RFC 4/4] drm: define NVIDIA DRM format modifiers for GB20x
On 7/3/25 16:22, Faith Ekstrand wrote:> On Thu, Jul 3, 2025 at 6:34?PM James Jones <jajones at nvidia.com > <mailto:jajones at nvidia.com>> wrote: > > The layout of bits within the individual tiles > (referred to as sectors in the > DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D() macro) > changed for some formats starting in Blackwell 2 > GPUs. To denote the difference, extend the sector > field in the parametric format modifier definition > used to generate modifier values for NVIDIA > hardware. > > Without this change, it would be impossible to > differentiate the two layouts based on modifiers, > and as a result software could attempt to share > surfaces directly between pre-GB20x and GB20x > cards, resulting in corruption when the surface > was accessed on one of the GPUs after being > populated with content by the other. > > Of note: This change causes the > DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D() macro to > evaluate its "s" parameter twice, with the side > effects that entails. I surveyed all usage of the > modifier in the kernel and Mesa code, and that > does not appear to be problematic in any current > usage, but I thought it was worth calling out. > > Signed-off-by: James Jones <jajones at nvidia.com > <mailto:jajones at nvidia.com>> > --- > ?include/uapi/drm/drm_fourcc.h | 46 +++++++++++++++++++++-------------- > ?1 file changed, 28 insertions(+), 18 deletions(-) > > diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/ > drm_fourcc.h > index 052e5fdd1d3b..348b2f1c1cb7 100644 > --- a/include/uapi/drm/drm_fourcc.h > +++ b/include/uapi/drm/drm_fourcc.h > @@ -909,8 +909,10 @@ extern "C" { > ?#define __fourcc_mod_nvidia_pkind_shift 12 > ?#define __fourcc_mod_nvidia_kgen_mask 0x3ULL > ?#define __fourcc_mod_nvidia_kgen_shift 20 > -#define __fourcc_mod_nvidia_slayout_mask 0x1ULL > -#define __fourcc_mod_nvidia_slayout_shift 22 > +#define __fourcc_mod_nvidia_slayout_low_mask 0x1ULL > +#define __fourcc_mod_nvidia_slayout_low_shift 22 > +#define __fourcc_mod_nvidia_slayout_high_mask 0x2ULL > +#define __fourcc_mod_nvidia_slayout_high_shift 25 > ?#define __fourcc_mod_nvidia_comp_mask 0x7ULL > ?#define __fourcc_mod_nvidia_comp_shift 23 > > @@ -973,14 +975,16 @@ extern "C" { > ? *? ? ? ? ? ? ? ?2 = Gob Height 8, Turing+ Page Kind mapping > ? *? ? ? ? ? ? ? ?3 = Reserved for future use. > ? * > - * 22:22 s? ? ?Sector layout.? On Tegra GPUs prior to Xavier, there > is a further > - *? ? ? ? ? ? ?bit remapping step that occurs at an even lower > level than the > - *? ? ? ? ? ? ?page kind and block linear swizzles.? This causes > the layout of > - *? ? ? ? ? ? ?surfaces mapped in those SOC's GPUs to be > incompatible with the > - *? ? ? ? ? ? ?equivalent mapping on other GPUs in the same system. > + * 22:22 s? ? ?Sector layout.? There is a further bit remapping > step that occurs > + * 26:26? ? ? ?at an even lower level than the page kind and block > linear > + *? ? ? ? ? ? ?swizzles.? This causes the bit arrangement of > surfaces in memory > + *? ? ? ? ? ? ?to differ subtly, and prevents direct sharing of > surfaces between > + *? ? ? ? ? ? ?GPUs with different layouts. > ? * > - *? ? ? ? ? ? ? ?0 = Tegra K1 - Tegra Parker/TX2 Layout. > - *? ? ? ? ? ? ? ?1 = Desktop GPU and Tegra Xavier+ Layout > + *? ? ? ? ? ? ? ?0 = Tegra K1 - Tegra Parker/TX2 Layout > + *? ? ? ? ? ? ? ?1 = Pre-GB20x, Tegra Xavier-Orin, GB10 Layout > + *? ? ? ? ? ? ? ?2 = GB20x(Blackwell 2)+ Layout for some pixel/ > texel sizes > > > I'm not sure I like just lumping all of blackwell together. Blackwell is > the same as Turing for 32, 64, and 128-bit formats. It's only different > on 8 and 16 and those aren't the same. The way we modeled this for NVK > is to have Turing, Blackwell8Bit, and Blackwell16Bit?GOBTypes. I think > I'd prefer the modifiers take a similar form. > > Technically, this isn't strictly necessary as there is always a format > and formats with different element sizes aren't compatible so a driver > can always look at format+modifier.? However, it is a better model of > the hardware.Yeah, my thinking was drivers would only use sector layout 2 for those 8 and 16-bit formats, and still return sector layout 1 modifiers for other formats, so I think we're in agreement there. I could update the comment to make that clearer. You also want one sector layout for 8-bit and one for 16-bit (E.g., 2 == GB20x 8-bit and 3 == GB20x 16-bit)? I guess there are some cases where that would be useful. I just hate to burn extra values, but I don't feel strongly. I'll add that in the next iteration if no one objects. Whatever design we settle on, I think it should be a goal to allow pre-GB20x cards to continue sharing e.g., 32-bit surfaces directly with GB20x cards. Some users are going to want to mix cards like that at some point. Thanks, -James> ~Faith Ekstrand > > + *? ? ? ? ? ? ? ?3 = reserved for future use. > ? * > ? * 25:23 c? ? ?Lossless Framebuffer Compression type. > ? * > @@ -995,7 +999,7 @@ extern "C" { > ? *? ? ? ? ? ? ? ?6 = Reserved for future use > ? *? ? ? ? ? ? ? ?7 = Reserved for future use > ? * > - * 55:26 -? ? ?Reserved for future use.? Must be zero. > + * 55:27 -? ? ?Reserved for future use.? Must be zero. > ? */ > ?#define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \ > ? ? ? ? fourcc_mod_code(NVIDIA, \ > @@ -1006,8 +1010,10 @@ extern "C" { > ? ? ? ? ? ? ? ? ? ? ? ? ? __fourcc_mod_nvidia_pkind_shift) | \ > ? ? ? ? ? ? ? ? ? ? ? ? ?(((g) & __fourcc_mod_nvidia_kgen_mask) << \ > ? ? ? ? ? ? ? ? ? ? ? ? ? __fourcc_mod_nvidia_kgen_shift) | \ > -? ? ? ? ? ? ? ? ? ? ? ? (((s) & __fourcc_mod_nvidia_slayout_mask) << \ > -? ? ? ? ? ? ? ? ? ? ? ? ?__fourcc_mod_nvidia_slayout_shift) | \ > +? ? ? ? ? ? ? ? ? ? ? ? (((s) & > __fourcc_mod_nvidia_slayout_low_mask) << \ > +? ? ? ? ? ? ? ? ? ? ? ? ?__fourcc_mod_nvidia_slayout_low_shift) | \ > +? ? ? ? ? ? ? ? ? ? ? ? (((s) & > __fourcc_mod_nvidia_slayout_high_mask) << \ > +? ? ? ? ? ? ? ? ? ? ? ? ?__fourcc_mod_nvidia_slayout_high_shift) | \ > ? ? ? ? ? ? ? ? ? ? ? ? ?(((c) & __fourcc_mod_nvidia_comp_mask) << \ > ? ? ? ? ? ? ? ? ? ? ? ? ? __fourcc_mod_nvidia_comp_shift))) > > @@ -1037,12 +1043,6 @@ __DRM_FOURCC_MKNVHELPER_FUNC(pkind) > ? */ > ?__DRM_FOURCC_MKNVHELPER_FUNC(kgen) > > -/* > - * Get the sector layout specified by mod: > - * static inline __u64 drm_fourcc_nvidia_format_mod_slayout(__u64 mod) > - */ > -__DRM_FOURCC_MKNVHELPER_FUNC(slayout) > - > ?/* > ? * Get the lossless framebuffer compression specified by mod: > ? * static inline __u64 drm_fourcc_nvidia_format_mod_kgen(__u64 mod) > @@ -1051,6 +1051,16 @@ __DRM_FOURCC_MKNVHELPER_FUNC(comp) > > ?#undef __DRM_FOURCC_MKNVHELPER_FUNC > > +/* Get the sector layout specified by mod: */ > +static inline __u64 > +drm_fourcc_nvidia_format_mod_slayout(__u64 mod) > +{ > +? ? ? ?return ((mod >> __fourcc_mod_nvidia_slayout_low_shift) & > +? ? ? ? ? ? ? ?__fourcc_mod_nvidia_slayout_low_mask) | > +? ? ? ? ? ? ? ?((mod >> __fourcc_mod_nvidia_slayout_high_shift) & > +? ? ? ? ? ? ? ? __fourcc_mod_nvidia_slayout_high_mask); > +} > + > ?/* To grandfather in prior block linear format modifiers to the > above layout, > ? * the page kind "0", which corresponds to "pitch/linear" and > hence is unusable > ? * with block-linear layouts, is remapped within drivers to the > value 0xfe, > -- > 2.49.0 >