James Jones
2024-Sep-06 19:36 UTC
[PATCH v2 3/3] drm/nouveau: Add drm_panic support for nv50+
Right, there are 3 iterations of block linear tiling actually. NV50 does support scanout of block linear surfaces. All block-linear-capable GPUs do. The 3 generations are: NV5x/G8x/GTXXX line: Original block size. GFXXX(nvc0 I believe in nouveau terms)-GV100: double the block height I believe. GTXXX+: Same block size, but the layout within a block is subtly different, at least as visible in CPU mappings. If there isn't enough info on the layouts in the reverse engineered documentation, believe this is somewhat documented in drm_fourcc.h, and it has a link to a technical reference manual describing the details of the 2nd-gen layout (Pre-turing), though I believe it's behind a developer account login wall that's free to sign up for but may require agreeing to a EULA of some sort regarding the content. Once you get the pre-turing layout working, just halve the block height to get it working on NV5x. If that isn't sufficient, reach out and we'll see what we can do to improve documentation or answer specific questions. Thanks, -James On 9/6/24 05:53, Ilia Mirkin wrote:> On Fri, Sep 6, 2024 at 6:05?AM Jocelyn Falempe <jfalempe at redhat.com > <mailto:jfalempe at redhat.com>> wrote: > > Add drm_panic support, for nv50+ cards. > It's enough to get the panic screen while running Gnome/Wayland on a > GTX 1650. > It doesn't support multi-plane or compressed format. > Support for other formats and older cards will come later. > Tiling is only tested on GTX1650, and might be wrong for other cards. > > > I'm moderately sure that nv50 and nvc0 tile differently (the general > algo is the same, but height is different): > > https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html?highlight=tiling#blocklinear-surfaces <https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html?highlight=tiling#blocklinear-surfaces> > > That said, I don't know that nv50 supports scanout of tiled surfaces > (nor was I aware that nvc0+ did, perhaps it's a recent feature, or > perhaps I'm just forgetful). > > Cheers, > > ? -ilia > > > Signed-off-by: Jocelyn Falempe <jfalempe at redhat.com > <mailto:jfalempe at redhat.com>> > --- > v2: > ?* Rebase and drop already merged patches. > ?* Rework the tiling algorithm, using "swizzle" to compute the offset > ? ?inside the block. > > ?drivers/gpu/drm/nouveau/dispnv50/wndw.c | 107 +++++++++++++++++++++++- > ?1 file changed, 105 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c > b/drivers/gpu/drm/nouveau/dispnv50/wndw.c > index 7a2cceaee6e9..50ecf6f12b81 100644 > --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c > +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c > @@ -30,11 +30,16 @@ > ?#include <nvhw/class/cl507e.h> > ?#include <nvhw/class/clc37e.h> > > +#include <linux/iosys-map.h> > + > ?#include <drm/drm_atomic.h> > ?#include <drm/drm_atomic_helper.h> > ?#include <drm/drm_blend.h> > -#include <drm/drm_gem_atomic_helper.h> > ?#include <drm/drm_fourcc.h> > +#include <drm/drm_framebuffer.h> > +#include <drm/drm_gem_atomic_helper.h> > +#include <drm/drm_panic.h> > +#include <drm/ttm/ttm_bo.h> > > ?#include "nouveau_bo.h" > ?#include "nouveau_gem.h" > @@ -577,6 +582,93 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, > struct drm_plane_state *state) > ? ? ? ? return 0; > ?} > > +#define NV_TILE_BLK_BASE_HEIGHT 8? ? ? /* In pixel */ > +#define NV_TILE_GOB_SIZE 64? ? /* In bytes */ > +#define NV_TILE_BLK_WIDTH (NV_TILE_GOB_SIZE / 4) /* For 32 bits > pixel */ > + > +/* get the offset in bytes inside the framebuffer, after taking > tiling into account */ > +static unsigned int nv50_get_tiled_offset(struct drm_scanout_buffer > *sb, unsigned int blk_h, > +? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?unsigned int x, unsigned > int y) > +{ > +? ? ? ?u32 blk_x, blk_y, blk_sz, blk_off, pitch; > +? ? ? ?u32 swizzle; > + > +? ? ? ?blk_sz = NV_TILE_GOB_SIZE * blk_h; > +? ? ? ?pitch = DIV_ROUND_UP(sb->width, NV_TILE_BLK_WIDTH); > + > +? ? ? ?/* block coordinate */ > +? ? ? ?blk_x = x / NV_TILE_BLK_WIDTH; > +? ? ? ?blk_y = y / blk_h; > + > +? ? ? ?blk_off = ((blk_y * pitch) + blk_x) * blk_sz; > + > +? ? ? ?y = y % blk_h; > + > +? ? ? ?/* Inside the block, use the fast address swizzle to compute > the offset > +? ? ? ? * For nvidia blocklinear, bit order is yn..y3 x3 y2 x2 y1 > y0 x1 x0 > +? ? ? ? */ > +? ? ? ?swizzle = (x & 3) | (y & 3) << 2 | (x & 4) << 2 | (y & 4) << 3; > +? ? ? ?swizzle |= (x & 8) << 3 | (y >> 3) << 7; > + > +? ? ? ?return blk_off + swizzle * 4; > +} > + > +static void nv50_set_pixel(struct drm_scanout_buffer *sb, unsigned > int x, unsigned int y, u32 color) > +{ > +? ? ? ?struct drm_framebuffer *fb = sb->private; > +? ? ? ?unsigned int off; > +? ? ? ?/* According to DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D > documentation, > +? ? ? ? * the last 4 bits of the modifier is log2(blk_height / > NV_TILE_BLK_BASE_HEIGHT) > +? ? ? ? */ > +? ? ? ?unsigned int blk_h = NV_TILE_BLK_BASE_HEIGHT * (1 << > (fb->modifier & 0xf)); > + > +? ? ? ?off = nv50_get_tiled_offset(sb, blk_h, x, y); > +? ? ? ?iosys_map_wr(&sb->map[0], off, u32, color); > +} > + > +static int > +nv50_wndw_get_scanout_buffer(struct drm_plane *plane, struct > drm_scanout_buffer *sb) > +{ > +? ? ? ?struct drm_framebuffer *fb; > +? ? ? ?struct nouveau_bo *nvbo; > + > +? ? ? ?if (!plane->state || !plane->state->fb) > +? ? ? ? ? ? ? ?return -EINVAL; > + > +? ? ? ?fb = plane->state->fb; > +? ? ? ?nvbo = nouveau_gem_object(fb->obj[0]); > + > +? ? ? ?/* Don't support compressed format, or multiplane yet. */ > +? ? ? ?if (nvbo->comp || fb->format->num_planes != 1) > +? ? ? ? ? ? ? ?return -EOPNOTSUPP; > + > +? ? ? ?if (nouveau_bo_map(nvbo)) { > +? ? ? ? ? ? ? ?pr_warn("nouveau bo map failed, panic won't be > displayed\n"); > +? ? ? ? ? ? ? ?return -ENOMEM; > +? ? ? ?} > + > +? ? ? ?if (nvbo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK) > +? ? ? ? ? ? ? ?iosys_map_set_vaddr_iomem(&sb->map[0], > nvbo->kmap.virtual); > +? ? ? ?else > +? ? ? ? ? ? ? ?iosys_map_set_vaddr(&sb->map[0], nvbo->kmap.virtual); > + > +? ? ? ?sb->height = fb->height; > +? ? ? ?sb->width = fb->width; > +? ? ? ?sb->pitch[0] = fb->pitches[0]; > +? ? ? ?sb->format = fb->format; > + > +? ? ? ?/* If tiling is enabled, use the set_pixel() to display > correctly. > +? ? ? ? * Only handle 32bits format for now. > +? ? ? ? */ > +? ? ? ?if (fb->modifier & 0xf) { > +? ? ? ? ? ? ? ?if (fb->format->cpp[0] != 4) > +? ? ? ? ? ? ? ? ? ? ? ?return -EOPNOTSUPP; > +? ? ? ? ? ? ? ?sb->private = (void *) fb; > +? ? ? ? ? ? ? ?sb->set_pixel = nv50_set_pixel; > +? ? ? ?} > +? ? ? ?return 0; > +} > + > ?static const struct drm_plane_helper_funcs > ?nv50_wndw_helper = { > ? ? ? ? .prepare_fb = nv50_wndw_prepare_fb, > @@ -584,6 +676,14 @@ nv50_wndw_helper = { > ? ? ? ? .atomic_check = nv50_wndw_atomic_check, > ?}; > > +static const struct drm_plane_helper_funcs > +nv50_wndw_primary_helper = { > +? ? ? ?.prepare_fb = nv50_wndw_prepare_fb, > +? ? ? ?.cleanup_fb = nv50_wndw_cleanup_fb, > +? ? ? ?.atomic_check = nv50_wndw_atomic_check, > +? ? ? ?.get_scanout_buffer = nv50_wndw_get_scanout_buffer, > +}; > + > ?static void > ?nv50_wndw_atomic_destroy_state(struct drm_plane *plane, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct drm_plane_state *state) > @@ -732,7 +832,10 @@ nv50_wndw_new_(const struct nv50_wndw_func > *func, struct drm_device *dev, > ? ? ? ? ? ? ? ? return ret; > ? ? ? ? } > > -? ? ? ?drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper); > +? ? ? ?if (type == DRM_PLANE_TYPE_PRIMARY) > +? ? ? ? ? ? ? ?drm_plane_helper_add(&wndw->plane, > &nv50_wndw_primary_helper); > +? ? ? ?else > +? ? ? ? ? ? ? ?drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper); > > ? ? ? ? if (wndw->func->ilut) { > ? ? ? ? ? ? ? ? ret = nv50_lut_init(disp, mmu, &wndw->ilut); > -- > 2.46.0 >
Jocelyn Falempe
2024-Sep-11 14:19 UTC
[PATCH v2 3/3] drm/nouveau: Add drm_panic support for nv50+
On 06/09/2024 21:36, James Jones wrote:> Right, there are 3 iterations of block linear tiling actually. NV50 does > support scanout of block linear surfaces. All block-linear-capable GPUs > do. The 3 generations are: > > NV5x/G8x/GTXXX line: Original block size. > GFXXX(nvc0 I believe in nouveau terms)-GV100: double the block height I > believe. > GTXXX+: Same block size, but the layout within a block is subtly > different, at least as visible in CPU mappings. >I'm looking at how to check for specific chip in nouveau, and fix the tiling for other cards than Turing. It looks like in most case nouveau uses device->info.chipset, with hardcoded hex value. so for nvc0+ I should check device->info.chipset >= 0xc0 ? chipset < c0 : block_height 4, "old layout" chipset >= c0 : block_height 8, "old layout" chispet >= ?? : block_height 8, "new layout" For testing, I have at hand a GTX1650 (Turing) and an old Geforce 8800GTS (Tesla?), so it's a NV92, and still uses this nv50+ code ? So I should be able to figure out if there is a tiling layout difference on my 8800GTS.> If there isn't enough info on the layouts in the reverse engineered > documentation, believe this is somewhat documented in drm_fourcc.h, and > it has a link to a technical reference manual describing the details of > the 2nd-gen layout (Pre-turing), though I believe it's behind a > developer account login wall that's free to sign up for but may require > agreeing to a EULA of some sort regarding the content. Once you get the > pre-turing layout working, just halve the block height to get it working > on NV5x. If that isn't sufficient, reach out and we'll see what we can > do to improve documentation or answer specific questions.I didn't find the link, but I think I have enough information, thanks for your support. Best regards, -- Jocelyn> > Thanks, > -James > > On 9/6/24 05:53, Ilia Mirkin wrote: >> On Fri, Sep 6, 2024 at 6:05?AM Jocelyn Falempe <jfalempe at redhat.com >> <mailto:jfalempe at redhat.com>> wrote: >> >> ??? Add drm_panic support, for nv50+ cards. >> ??? It's enough to get the panic screen while running Gnome/Wayland on a >> ??? GTX 1650. >> ??? It doesn't support multi-plane or compressed format. >> ??? Support for other formats and older cards will come later. >> ??? Tiling is only tested on GTX1650, and might be wrong for other cards. >> >> >> I'm moderately sure that nv50 and nvc0 tile differently (the general >> algo is the same, but height is different): >> >> https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html? >> highlight=tiling#blocklinear-surfaces <https:// >> envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html? >> highlight=tiling#blocklinear-surfaces> >> >> That said, I don't know that nv50 supports scanout of tiled surfaces >> (nor was I aware that nvc0+ did, perhaps it's a recent feature, or >> perhaps I'm just forgetful). >> >> Cheers, >> >> ?? -ilia >> >> >> ??? Signed-off-by: Jocelyn Falempe <jfalempe at redhat.com >> ??? <mailto:jfalempe at redhat.com>> >> ??? --- >> ??? v2: >> ???? ?* Rebase and drop already merged patches. >> ???? ?* Rework the tiling algorithm, using "swizzle" to compute the >> offset >> ???? ? ?inside the block. >> >> ???? ?drivers/gpu/drm/nouveau/dispnv50/wndw.c | 107 ++++++++++++++++++ >> +++++- >> ???? ?1 file changed, 105 insertions(+), 2 deletions(-) >> >> ??? diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c >> ??? b/drivers/gpu/drm/nouveau/dispnv50/wndw.c >> ??? index 7a2cceaee6e9..50ecf6f12b81 100644 >> ??? --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c >> ??? +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c >> ??? @@ -30,11 +30,16 @@ >> ???? ?#include <nvhw/class/cl507e.h> >> ???? ?#include <nvhw/class/clc37e.h> >> >> ??? +#include <linux/iosys-map.h> >> ??? + >> ???? ?#include <drm/drm_atomic.h> >> ???? ?#include <drm/drm_atomic_helper.h> >> ???? ?#include <drm/drm_blend.h> >> ??? -#include <drm/drm_gem_atomic_helper.h> >> ???? ?#include <drm/drm_fourcc.h> >> ??? +#include <drm/drm_framebuffer.h> >> ??? +#include <drm/drm_gem_atomic_helper.h> >> ??? +#include <drm/drm_panic.h> >> ??? +#include <drm/ttm/ttm_bo.h> >> >> ???? ?#include "nouveau_bo.h" >> ???? ?#include "nouveau_gem.h" >> ??? @@ -577,6 +582,93 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, >> ??? struct drm_plane_state *state) >> ???? ? ? ? ? return 0; >> ???? ?} >> >> ??? +#define NV_TILE_BLK_BASE_HEIGHT 8? ? ? /* In pixel */ >> ??? +#define NV_TILE_GOB_SIZE 64? ? /* In bytes */ >> ??? +#define NV_TILE_BLK_WIDTH (NV_TILE_GOB_SIZE / 4) /* For 32 bits >> ??? pixel */ >> ??? + >> ??? +/* get the offset in bytes inside the framebuffer, after taking >> ??? tiling into account */ >> ??? +static unsigned int nv50_get_tiled_offset(struct drm_scanout_buffer >> ??? *sb, unsigned int blk_h, >> ??? +? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?unsigned int x, unsigned >> ??? int y) >> ??? +{ >> ??? +? ? ? ?u32 blk_x, blk_y, blk_sz, blk_off, pitch; >> ??? +? ? ? ?u32 swizzle; >> ??? + >> ??? +? ? ? ?blk_sz = NV_TILE_GOB_SIZE * blk_h; >> ??? +? ? ? ?pitch = DIV_ROUND_UP(sb->width, NV_TILE_BLK_WIDTH); >> ??? + >> ??? +? ? ? ?/* block coordinate */ >> ??? +? ? ? ?blk_x = x / NV_TILE_BLK_WIDTH; >> ??? +? ? ? ?blk_y = y / blk_h; >> ??? + >> ??? +? ? ? ?blk_off = ((blk_y * pitch) + blk_x) * blk_sz; >> ??? + >> ??? +? ? ? ?y = y % blk_h; >> ??? + >> ??? +? ? ? ?/* Inside the block, use the fast address swizzle to compute >> ??? the offset >> ??? +? ? ? ? * For nvidia blocklinear, bit order is yn..y3 x3 y2 x2 y1 >> ??? y0 x1 x0 >> ??? +? ? ? ? */ >> ??? +? ? ? ?swizzle = (x & 3) | (y & 3) << 2 | (x & 4) << 2 | (y & 4) >> << 3; >> ??? +? ? ? ?swizzle |= (x & 8) << 3 | (y >> 3) << 7; >> ??? + >> ??? +? ? ? ?return blk_off + swizzle * 4; >> ??? +} >> ??? + >> ??? +static void nv50_set_pixel(struct drm_scanout_buffer *sb, unsigned >> ??? int x, unsigned int y, u32 color) >> ??? +{ >> ??? +? ? ? ?struct drm_framebuffer *fb = sb->private; >> ??? +? ? ? ?unsigned int off; >> ??? +? ? ? ?/* According to DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D >> ??? documentation, >> ??? +? ? ? ? * the last 4 bits of the modifier is log2(blk_height / >> ??? NV_TILE_BLK_BASE_HEIGHT) >> ??? +? ? ? ? */ >> ??? +? ? ? ?unsigned int blk_h = NV_TILE_BLK_BASE_HEIGHT * (1 << >> ??? (fb->modifier & 0xf)); >> ??? + >> ??? +? ? ? ?off = nv50_get_tiled_offset(sb, blk_h, x, y); >> ??? +? ? ? ?iosys_map_wr(&sb->map[0], off, u32, color); >> ??? +} >> ??? + >> ??? +static int >> ??? +nv50_wndw_get_scanout_buffer(struct drm_plane *plane, struct >> ??? drm_scanout_buffer *sb) >> ??? +{ >> ??? +? ? ? ?struct drm_framebuffer *fb; >> ??? +? ? ? ?struct nouveau_bo *nvbo; >> ??? + >> ??? +? ? ? ?if (!plane->state || !plane->state->fb) >> ??? +? ? ? ? ? ? ? ?return -EINVAL; >> ??? + >> ??? +? ? ? ?fb = plane->state->fb; >> ??? +? ? ? ?nvbo = nouveau_gem_object(fb->obj[0]); >> ??? + >> ??? +? ? ? ?/* Don't support compressed format, or multiplane yet. */ >> ??? +? ? ? ?if (nvbo->comp || fb->format->num_planes != 1) >> ??? +? ? ? ? ? ? ? ?return -EOPNOTSUPP; >> ??? + >> ??? +? ? ? ?if (nouveau_bo_map(nvbo)) { >> ??? +? ? ? ? ? ? ? ?pr_warn("nouveau bo map failed, panic won't be >> ??? displayed\n"); >> ??? +? ? ? ? ? ? ? ?return -ENOMEM; >> ??? +? ? ? ?} >> ??? + >> ??? +? ? ? ?if (nvbo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK) >> ??? +? ? ? ? ? ? ? ?iosys_map_set_vaddr_iomem(&sb->map[0], >> ??? nvbo->kmap.virtual); >> ??? +? ? ? ?else >> ??? +? ? ? ? ? ? ? ?iosys_map_set_vaddr(&sb->map[0], nvbo->kmap.virtual); >> ??? + >> ??? +? ? ? ?sb->height = fb->height; >> ??? +? ? ? ?sb->width = fb->width; >> ??? +? ? ? ?sb->pitch[0] = fb->pitches[0]; >> ??? +? ? ? ?sb->format = fb->format; >> ??? + >> ??? +? ? ? ?/* If tiling is enabled, use the set_pixel() to display >> ??? correctly. >> ??? +? ? ? ? * Only handle 32bits format for now. >> ??? +? ? ? ? */ >> ??? +? ? ? ?if (fb->modifier & 0xf) { >> ??? +? ? ? ? ? ? ? ?if (fb->format->cpp[0] != 4) >> ??? +? ? ? ? ? ? ? ? ? ? ? ?return -EOPNOTSUPP; >> ??? +? ? ? ? ? ? ? ?sb->private = (void *) fb; >> ??? +? ? ? ? ? ? ? ?sb->set_pixel = nv50_set_pixel; >> ??? +? ? ? ?} >> ??? +? ? ? ?return 0; >> ??? +} >> ??? + >> ???? ?static const struct drm_plane_helper_funcs >> ???? ?nv50_wndw_helper = { >> ???? ? ? ? ? .prepare_fb = nv50_wndw_prepare_fb, >> ??? @@ -584,6 +676,14 @@ nv50_wndw_helper = { >> ???? ? ? ? ? .atomic_check = nv50_wndw_atomic_check, >> ???? ?}; >> >> ??? +static const struct drm_plane_helper_funcs >> ??? +nv50_wndw_primary_helper = { >> ??? +? ? ? ?.prepare_fb = nv50_wndw_prepare_fb, >> ??? +? ? ? ?.cleanup_fb = nv50_wndw_cleanup_fb, >> ??? +? ? ? ?.atomic_check = nv50_wndw_atomic_check, >> ??? +? ? ? ?.get_scanout_buffer = nv50_wndw_get_scanout_buffer, >> ??? +}; >> ??? + >> ???? ?static void >> ???? ?nv50_wndw_atomic_destroy_state(struct drm_plane *plane, >> ???? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct drm_plane_state *state) >> ??? @@ -732,7 +832,10 @@ nv50_wndw_new_(const struct nv50_wndw_func >> ??? *func, struct drm_device *dev, >> ???? ? ? ? ? ? ? ? ? return ret; >> ???? ? ? ? ? } >> >> ??? -? ? ? ?drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper); >> ??? +? ? ? ?if (type == DRM_PLANE_TYPE_PRIMARY) >> ??? +? ? ? ? ? ? ? ?drm_plane_helper_add(&wndw->plane, >> ??? &nv50_wndw_primary_helper); >> ??? +? ? ? ?else >> ??? +? ? ? ? ? ? ? ?drm_plane_helper_add(&wndw->plane, >> &nv50_wndw_helper); >> >> ???? ? ? ? ? if (wndw->func->ilut) { >> ???? ? ? ? ? ? ? ? ? ret = nv50_lut_init(disp, mmu, &wndw->ilut); >> ??? -- ??? 2.46.0 >>