James Jones
2025-Jul-03 22:36 UTC
[RFC 4/4] drm: define NVIDIA DRM format modifiers for GB20x
The layout of bits within the individual tiles (referred to as sectors in the DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D() macro) changed for some formats starting in Blackwell 2 GPUs. To denote the difference, extend the sector field in the parametric format modifier definition used to generate modifier values for NVIDIA hardware. Without this change, it would be impossible to differentiate the two layouts based on modifiers, and as a result software could attempt to share surfaces directly between pre-GB20x and GB20x cards, resulting in corruption when the surface was accessed on one of the GPUs after being populated with content by the other. Of note: This change causes the DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D() macro to evaluate its "s" parameter twice, with the side effects that entails. I surveyed all usage of the modifier in the kernel and Mesa code, and that does not appear to be problematic in any current usage, but I thought it was worth calling out. Signed-off-by: James Jones <jajones at nvidia.com> --- include/uapi/drm/drm_fourcc.h | 46 +++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 052e5fdd1d3b..348b2f1c1cb7 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -909,8 +909,10 @@ extern "C" { #define __fourcc_mod_nvidia_pkind_shift 12 #define __fourcc_mod_nvidia_kgen_mask 0x3ULL #define __fourcc_mod_nvidia_kgen_shift 20 -#define __fourcc_mod_nvidia_slayout_mask 0x1ULL -#define __fourcc_mod_nvidia_slayout_shift 22 +#define __fourcc_mod_nvidia_slayout_low_mask 0x1ULL +#define __fourcc_mod_nvidia_slayout_low_shift 22 +#define __fourcc_mod_nvidia_slayout_high_mask 0x2ULL +#define __fourcc_mod_nvidia_slayout_high_shift 25 #define __fourcc_mod_nvidia_comp_mask 0x7ULL #define __fourcc_mod_nvidia_comp_shift 23 @@ -973,14 +975,16 @@ extern "C" { * 2 = Gob Height 8, Turing+ Page Kind mapping * 3 = Reserved for future use. * - * 22:22 s Sector layout. On Tegra GPUs prior to Xavier, there is a further - * bit remapping step that occurs at an even lower level than the - * page kind and block linear swizzles. This causes the layout of - * surfaces mapped in those SOC's GPUs to be incompatible with the - * equivalent mapping on other GPUs in the same system. + * 22:22 s Sector layout. There is a further bit remapping step that occurs + * 26:26 at an even lower level than the page kind and block linear + * swizzles. This causes the bit arrangement of surfaces in memory + * to differ subtly, and prevents direct sharing of surfaces between + * GPUs with different layouts. * - * 0 = Tegra K1 - Tegra Parker/TX2 Layout. - * 1 = Desktop GPU and Tegra Xavier+ Layout + * 0 = Tegra K1 - Tegra Parker/TX2 Layout + * 1 = Pre-GB20x, Tegra Xavier-Orin, GB10 Layout + * 2 = GB20x(Blackwell 2)+ Layout for some pixel/texel sizes + * 3 = reserved for future use. * * 25:23 c Lossless Framebuffer Compression type. * @@ -995,7 +999,7 @@ extern "C" { * 6 = Reserved for future use * 7 = Reserved for future use * - * 55:26 - Reserved for future use. Must be zero. + * 55:27 - Reserved for future use. Must be zero. */ #define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \ fourcc_mod_code(NVIDIA, \ @@ -1006,8 +1010,10 @@ extern "C" { __fourcc_mod_nvidia_pkind_shift) | \ (((g) & __fourcc_mod_nvidia_kgen_mask) << \ __fourcc_mod_nvidia_kgen_shift) | \ - (((s) & __fourcc_mod_nvidia_slayout_mask) << \ - __fourcc_mod_nvidia_slayout_shift) | \ + (((s) & __fourcc_mod_nvidia_slayout_low_mask) << \ + __fourcc_mod_nvidia_slayout_low_shift) | \ + (((s) & __fourcc_mod_nvidia_slayout_high_mask) << \ + __fourcc_mod_nvidia_slayout_high_shift) | \ (((c) & __fourcc_mod_nvidia_comp_mask) << \ __fourcc_mod_nvidia_comp_shift))) @@ -1037,12 +1043,6 @@ __DRM_FOURCC_MKNVHELPER_FUNC(pkind) */ __DRM_FOURCC_MKNVHELPER_FUNC(kgen) -/* - * Get the sector layout specified by mod: - * static inline __u64 drm_fourcc_nvidia_format_mod_slayout(__u64 mod) - */ -__DRM_FOURCC_MKNVHELPER_FUNC(slayout) - /* * Get the lossless framebuffer compression specified by mod: * static inline __u64 drm_fourcc_nvidia_format_mod_kgen(__u64 mod) @@ -1051,6 +1051,16 @@ __DRM_FOURCC_MKNVHELPER_FUNC(comp) #undef __DRM_FOURCC_MKNVHELPER_FUNC +/* Get the sector layout specified by mod: */ +static inline __u64 +drm_fourcc_nvidia_format_mod_slayout(__u64 mod) +{ + return ((mod >> __fourcc_mod_nvidia_slayout_low_shift) & + __fourcc_mod_nvidia_slayout_low_mask) | + ((mod >> __fourcc_mod_nvidia_slayout_high_shift) & + __fourcc_mod_nvidia_slayout_high_mask); +} + /* To grandfather in prior block linear format modifiers to the above layout, * the page kind "0", which corresponds to "pitch/linear" and hence is unusable * with block-linear layouts, is remapped within drivers to the value 0xfe, -- 2.49.0
Faith Ekstrand
2025-Jul-03 23:22 UTC
[RFC 4/4] drm: define NVIDIA DRM format modifiers for GB20x
On Thu, Jul 3, 2025 at 6:34?PM James Jones <jajones at nvidia.com> wrote:> The layout of bits within the individual tiles > (referred to as sectors in the > DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D() macro) > changed for some formats starting in Blackwell 2 > GPUs. To denote the difference, extend the sector > field in the parametric format modifier definition > used to generate modifier values for NVIDIA > hardware. > > Without this change, it would be impossible to > differentiate the two layouts based on modifiers, > and as a result software could attempt to share > surfaces directly between pre-GB20x and GB20x > cards, resulting in corruption when the surface > was accessed on one of the GPUs after being > populated with content by the other. > > Of note: This change causes the > DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D() macro to > evaluate its "s" parameter twice, with the side > effects that entails. I surveyed all usage of the > modifier in the kernel and Mesa code, and that > does not appear to be problematic in any current > usage, but I thought it was worth calling out. > > Signed-off-by: James Jones <jajones at nvidia.com> > --- > include/uapi/drm/drm_fourcc.h | 46 +++++++++++++++++++++-------------- > 1 file changed, 28 insertions(+), 18 deletions(-) > > diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h > index 052e5fdd1d3b..348b2f1c1cb7 100644 > --- a/include/uapi/drm/drm_fourcc.h > +++ b/include/uapi/drm/drm_fourcc.h > @@ -909,8 +909,10 @@ extern "C" { > #define __fourcc_mod_nvidia_pkind_shift 12 > #define __fourcc_mod_nvidia_kgen_mask 0x3ULL > #define __fourcc_mod_nvidia_kgen_shift 20 > -#define __fourcc_mod_nvidia_slayout_mask 0x1ULL > -#define __fourcc_mod_nvidia_slayout_shift 22 > +#define __fourcc_mod_nvidia_slayout_low_mask 0x1ULL > +#define __fourcc_mod_nvidia_slayout_low_shift 22 > +#define __fourcc_mod_nvidia_slayout_high_mask 0x2ULL > +#define __fourcc_mod_nvidia_slayout_high_shift 25 > #define __fourcc_mod_nvidia_comp_mask 0x7ULL > #define __fourcc_mod_nvidia_comp_shift 23 > > @@ -973,14 +975,16 @@ extern "C" { > * 2 = Gob Height 8, Turing+ Page Kind mapping > * 3 = Reserved for future use. > * > - * 22:22 s Sector layout. On Tegra GPUs prior to Xavier, there is a > further > - * bit remapping step that occurs at an even lower level than > the > - * page kind and block linear swizzles. This causes the > layout of > - * surfaces mapped in those SOC's GPUs to be incompatible > with the > - * equivalent mapping on other GPUs in the same system. > + * 22:22 s Sector layout. There is a further bit remapping step that > occurs > + * 26:26 at an even lower level than the page kind and block linear > + * swizzles. This causes the bit arrangement of surfaces in > memory > + * to differ subtly, and prevents direct sharing of surfaces > between > + * GPUs with different layouts. > * > - * 0 = Tegra K1 - Tegra Parker/TX2 Layout. > - * 1 = Desktop GPU and Tegra Xavier+ Layout > + * 0 = Tegra K1 - Tegra Parker/TX2 Layout > + * 1 = Pre-GB20x, Tegra Xavier-Orin, GB10 Layout > + * 2 = GB20x(Blackwell 2)+ Layout for some pixel/texel sizes >I'm not sure I like just lumping all of blackwell together. Blackwell is the same as Turing for 32, 64, and 128-bit formats. It's only different on 8 and 16 and those aren't the same. The way we modeled this for NVK is to have Turing, Blackwell8Bit, and Blackwell16Bit GOBTypes. I think I'd prefer the modifiers take a similar form. Technically, this isn't strictly necessary as there is always a format and formats with different element sizes aren't compatible so a driver can always look at format+modifier. However, it is a better model of the hardware. ~Faith Ekstrand> + * 3 = reserved for future use. > * > * 25:23 c Lossless Framebuffer Compression type. > * > @@ -995,7 +999,7 @@ extern "C" { > * 6 = Reserved for future use > * 7 = Reserved for future use > * > - * 55:26 - Reserved for future use. Must be zero. > + * 55:27 - Reserved for future use. Must be zero. > */ > #define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \ > fourcc_mod_code(NVIDIA, \ > @@ -1006,8 +1010,10 @@ extern "C" { > __fourcc_mod_nvidia_pkind_shift) | \ > (((g) & __fourcc_mod_nvidia_kgen_mask) << \ > __fourcc_mod_nvidia_kgen_shift) | \ > - (((s) & __fourcc_mod_nvidia_slayout_mask) << \ > - __fourcc_mod_nvidia_slayout_shift) | \ > + (((s) & __fourcc_mod_nvidia_slayout_low_mask) << \ > + __fourcc_mod_nvidia_slayout_low_shift) | \ > + (((s) & __fourcc_mod_nvidia_slayout_high_mask) << > \ > + __fourcc_mod_nvidia_slayout_high_shift) | \ > (((c) & __fourcc_mod_nvidia_comp_mask) << \ > __fourcc_mod_nvidia_comp_shift))) > > @@ -1037,12 +1043,6 @@ __DRM_FOURCC_MKNVHELPER_FUNC(pkind) > */ > __DRM_FOURCC_MKNVHELPER_FUNC(kgen) > > -/* > - * Get the sector layout specified by mod: > - * static inline __u64 drm_fourcc_nvidia_format_mod_slayout(__u64 mod) > - */ > -__DRM_FOURCC_MKNVHELPER_FUNC(slayout) > - > /* > * Get the lossless framebuffer compression specified by mod: > * static inline __u64 drm_fourcc_nvidia_format_mod_kgen(__u64 mod) > @@ -1051,6 +1051,16 @@ __DRM_FOURCC_MKNVHELPER_FUNC(comp) > > #undef __DRM_FOURCC_MKNVHELPER_FUNC > > +/* Get the sector layout specified by mod: */ > +static inline __u64 > +drm_fourcc_nvidia_format_mod_slayout(__u64 mod) > +{ > + return ((mod >> __fourcc_mod_nvidia_slayout_low_shift) & > + __fourcc_mod_nvidia_slayout_low_mask) | > + ((mod >> __fourcc_mod_nvidia_slayout_high_shift) & > + __fourcc_mod_nvidia_slayout_high_mask); > +} > + > /* To grandfather in prior block linear format modifiers to the above > layout, > * the page kind "0", which corresponds to "pitch/linear" and hence is > unusable > * with block-linear layouts, is remapped within drivers to the value > 0xfe, > -- > 2.49.0 > >-------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.freedesktop.org/archives/nouveau/attachments/20250703/4ac24976/attachment.htm>