Maarten Lankhorst
2015-Mar-14 19:30 UTC
[Nouveau] [PATCH ddx] Add support for VRAM-less devices to the ddx
With this patch the DDX almost works with GK20A, the missing piece is adding COHERENT mappings to the right places. ;-) If you specify NOUVEAU_BO_APER the kernel will truncate valid_domains to the domains specified at creation time. This means that as long as we only specify the correct domain in nouveau_allocate_surface the effect is still the same. Signed-off-by: Maarten Lankhorst <dev at mblankhorst.nl> --- The only thing still missing is adding NOUVEAU_BO_COHERENT to nouveau_allocate_surface, the scratch buffer, and nouveau_exa_scratch. After that xorg stays up long enough to crash in nouveau_exa_share_pixmap_backing, I'm still investigating that. :-) diff --git a/src/nouveau_dri2.c b/src/nouveau_dri2.c index e3445b2..69fcd8c 100644 --- a/src/nouveau_dri2.c +++ b/src/nouveau_dri2.c @@ -684,7 +684,7 @@ nouveau_dri2_finish_swap(DrawablePtr draw, unsigned int frame, /* Reference the back buffer to sync it to vblank */ nouveau_pushbuf_refn(push, &(struct nouveau_pushbuf_refn) { src_bo, - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD + NOUVEAU_BO_APER | NOUVEAU_BO_RD }, 1); if (pNv->Architecture >= NV_FERMI) @@ -725,7 +725,7 @@ nouveau_dri2_finish_swap(DrawablePtr draw, unsigned int frame, * on occluded drawables. */ nouveau_pushbuf_refn(push, &(struct nouveau_pushbuf_refn) { dst_bo, - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD + NOUVEAU_BO_APER | NOUVEAU_BO_RD }, 1); REGION_TRANSLATE(0, ®, -draw->x, -draw->y); diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c index def66ac..3a93d02 100644 --- a/src/nouveau_exa.c +++ b/src/nouveau_exa.c @@ -263,7 +263,7 @@ nouveau_exa_download_from_screen(PixmapPtr pspix, int x, int y, int w, int h, goto memcpy; if (!NVAccelM2MF(pNv, w, lines, cpp, 0, tmp_offset, - nouveau_pixmap_bo(pspix), NOUVEAU_BO_VRAM, + nouveau_pixmap_bo(pspix), NOUVEAU_BO_APER, src_pitch, pspix->drawable.height, x, y, tmp, NOUVEAU_BO_GART, tmp_pitch, lines, 0, 0)) @@ -361,7 +361,7 @@ nouveau_exa_upload_to_screen(PixmapPtr pdpix, int x, int y, int w, int h, if (!NVAccelM2MF(pNv, w, lines, cpp, tmp_offset, 0, tmp, NOUVEAU_BO_GART, tmp_pitch, lines, 0, 0, - nouveau_pixmap_bo(pdpix), NOUVEAU_BO_VRAM, + nouveau_pixmap_bo(pdpix), NOUVEAU_BO_APER, dst_pitch, pdpix->drawable.height, x, y)) goto memcpy; diff --git a/src/nv_accel_common.c b/src/nv_accel_common.c index 4484c1c..e1a8a12 100644 --- a/src/nv_accel_common.c +++ b/src/nv_accel_common.c @@ -37,7 +37,7 @@ nouveau_allocate_surface(ScrnInfoPtr scrn, int width, int height, int bpp, Bool tiled = (usage_hint & NOUVEAU_CREATE_PIXMAP_TILED); Bool shared = FALSE; union nouveau_bo_config cfg = {}; - int flags = NOUVEAU_BO_MAP | (bpp >= 8 ? NOUVEAU_BO_VRAM : 0); + int flags = NOUVEAU_BO_MAP; int cpp = bpp / 8, ret; #ifdef NOUVEAU_PIXMAP_SHARING @@ -46,7 +46,7 @@ nouveau_allocate_surface(ScrnInfoPtr scrn, int width, int height, int bpp, flags = NOUVEAU_BO_MAP; if (bpp >= 8) - flags |= shared ? NOUVEAU_BO_GART : NOUVEAU_BO_VRAM; + flags |= shared ? NOUVEAU_BO_GART : pNv->vram_domain; if (pNv->Architecture >= NV_TESLA) { if (scanout) { @@ -677,7 +677,7 @@ NVAccelCommonInit(ScrnInfoPtr pScrn) pNv->pushbuf->user_priv = pNv->bufctx; /* Scratch buffer */ - ret = nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, + ret = nouveau_bo_new(pNv->dev, pNv->vram_domain | NOUVEAU_BO_MAP, 128 * 1024, 128 * 1024, NULL, &pNv->scratch); if (!ret) ret = nouveau_bo_map(pNv->scratch, 0, pNv->client); diff --git a/src/nv_driver.c b/src/nv_driver.c index 8e2ae03..b0e9d11 100644 --- a/src/nv_driver.c +++ b/src/nv_driver.c @@ -954,7 +954,10 @@ NVPreInit(ScrnInfoPtr pScrn, int flags) * The first thing we should figure out is the depth, bpp, etc. */ - if (dev->vram_size <= 16 * 1024 * 1024) + pNv->vram_domain = NOUVEAU_BO_VRAM; + if (!dev->vram_size) + pNv->vram_domain = NOUVEAU_BO_GART; + else if (dev->vram_size <= 16 * 1024 * 1024) defaultDepth = 16; if (!xf86SetDepthBpp(pScrn, defaultDepth, 0, 0, Support32bppFb)) { NVPreInitFail("\n"); diff --git a/src/nv_type.h b/src/nv_type.h index e6ab192..c5a2684 100644 --- a/src/nv_type.h +++ b/src/nv_type.h @@ -122,6 +122,7 @@ typedef struct _NVRec { struct nouveau_bo *scratch; Bool ce_enabled; + uint32_t vram_domain; struct nouveau_object *ce_channel; struct nouveau_pushbuf *ce_pushbuf; struct nouveau_object *NvCopy; diff --git a/src/nvc0_accel.c b/src/nvc0_accel.c index 848ca87..7188230 100644 --- a/src/nvc0_accel.c +++ b/src/nvc0_accel.c @@ -242,7 +242,7 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) if (nouveau_pushbuf_space(push, 512, 0, 0) || nouveau_pushbuf_refn (push, &(struct nouveau_pushbuf_refn) { - pNv->scratch, NOUVEAU_BO_VRAM | + pNv->scratch, NOUVEAU_BO_APER | NOUVEAU_BO_WR }, 1)) return FALSE; diff --git a/src/nvc0_exa.c b/src/nvc0_exa.c index 1f33353..596361e 100644 --- a/src/nvc0_exa.c +++ b/src/nvc0_exa.c @@ -96,11 +96,10 @@ NVC0EXAAcquireSurface2D(PixmapPtr ppix, int is_src, uint32_t fmt) { NVC0EXA_LOCALS(ppix); struct nouveau_bo *bo = nouveau_pixmap_bo(ppix); - struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix); int mthd = is_src ? NV50_2D_SRC_FORMAT : NV50_2D_DST_FORMAT; uint32_t bo_flags; - bo_flags = nvpix->shared ? NOUVEAU_BO_GART : NOUVEAU_BO_VRAM; + bo_flags = NOUVEAU_BO_APER; bo_flags |= is_src ? NOUVEAU_BO_RD : NOUVEAU_BO_WR; if (!nv50_style_tiled_pixmap(ppix)) { @@ -579,7 +578,7 @@ NVC0EXAPictTexture(NVPtr pNv, PixmapPtr ppix, PicturePtr ppict, unsigned unit) if (!nv50_style_tiled_pixmap(ppix)) NOUVEAU_FALLBACK("pixmap is scanout buffer\n"); - PUSH_REFN (push, bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + PUSH_REFN (push, bo, NOUVEAU_BO_APER | NOUVEAU_BO_RD); PUSH_DATAu(push, pNv->scratch, TIC_OFFSET + (unit * 32), 8); switch (ppict->format) { case PICT_a8r8g8b8: @@ -884,14 +883,14 @@ NVC0EXAPrepareComposite(int op, PUSH_DATA (push, 0); PUSH_RESET(push); - PUSH_REFN (push, pNv->scratch, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + PUSH_REFN (push, pNv->scratch, NOUVEAU_BO_APER | NOUVEAU_BO_RDWR); if (pspict->pDrawable) PUSH_REFN (push, nouveau_pixmap_bo(pspix), - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - PUSH_REFN (push, dst, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + NOUVEAU_BO_APER | NOUVEAU_BO_RD); + PUSH_REFN (push, dst, NOUVEAU_BO_APER | NOUVEAU_BO_WR); if (pmpict && pmpict->pDrawable) PUSH_REFN (push, nouveau_pixmap_bo(pmpix), - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + NOUVEAU_BO_APER | NOUVEAU_BO_RD); nouveau_pushbuf_bufctx(push, pNv->bufctx); if (nouveau_pushbuf_validate(push)) { diff --git a/src/nvc0_xv.c b/src/nvc0_xv.c index d1d8f18..31e2309 100644 --- a/src/nvc0_xv.c +++ b/src/nvc0_xv.c @@ -69,9 +69,9 @@ nvc0_xv_image_put(ScrnInfoPtr pScrn, NVPtr pNv = NVPTR(pScrn); struct nouveau_bo *dst = nouveau_pixmap_bo(ppix); struct nouveau_pushbuf_refn refs[] = { - { pNv->scratch, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR }, - { src, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD }, - { dst, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR }, + { pNv->scratch, NOUVEAU_BO_APER | NOUVEAU_BO_RDWR }, + { src, NOUVEAU_BO_APER | NOUVEAU_BO_RD }, + { dst, NOUVEAU_BO_APER | NOUVEAU_BO_WR }, }; struct nouveau_pushbuf *push = pNv->pushbuf; uint32_t mode = 0xd0005000 | (src->config.nvc0.tile_mode << 18);
Maarten Lankhorst
2015-Mar-14 21:43 UTC
[Nouveau] [PATCH v2 ddx] Add support for VRAM-less devices to the ddx
With this patch the DDX almost works with GK20A, the missing piece is adding COHERENT mappings to the right places. If you specify NOUVEAU_BO_APER the kernel will truncate valid_domains to the domains specified at creation time. This means that as long as we only specify the correct domain in nouveau_allocate_surface the effect is still the same. Signed-off-by: Maarten Lankhorst <dev at mblankhorst.nl> --- Changes since v1: Fix nouveau_exa_create_pixmap to not trigger the <= 32 MB code for vram_size == 0. This makes tegra work correctly with set_shared_pixmap. diff --git a/src/nouveau_dri2.c b/src/nouveau_dri2.c index e3445b2..69fcd8c 100644 --- a/src/nouveau_dri2.c +++ b/src/nouveau_dri2.c @@ -684,7 +684,7 @@ nouveau_dri2_finish_swap(DrawablePtr draw, unsigned int frame, /* Reference the back buffer to sync it to vblank */ nouveau_pushbuf_refn(push, &(struct nouveau_pushbuf_refn) { src_bo, - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD + NOUVEAU_BO_APER | NOUVEAU_BO_RD }, 1); if (pNv->Architecture >= NV_FERMI) @@ -725,7 +725,7 @@ nouveau_dri2_finish_swap(DrawablePtr draw, unsigned int frame, * on occluded drawables. */ nouveau_pushbuf_refn(push, &(struct nouveau_pushbuf_refn) { dst_bo, - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD + NOUVEAU_BO_APER | NOUVEAU_BO_RD }, 1); REGION_TRANSLATE(0, ®, -draw->x, -draw->y); diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c index def66ac..dbba664 100644 --- a/src/nouveau_exa.c +++ b/src/nouveau_exa.c @@ -121,7 +121,7 @@ nouveau_exa_create_pixmap(ScreenPtr pScreen, int width, int height, int depth, if (!width || !height) return calloc(1, sizeof(*nvpix)); - if (!pNv->exa_force_cp && pNv->dev->vram_size <= 32 * 1024 * 1024) + if (!pNv->exa_force_cp && pNv->dev->vram_size && pNv->dev->vram_size <= 32 * 1024 * 1024) return NULL; nvpix = calloc(1, sizeof(*nvpix)); @@ -263,7 +263,7 @@ nouveau_exa_download_from_screen(PixmapPtr pspix, int x, int y, int w, int h, goto memcpy; if (!NVAccelM2MF(pNv, w, lines, cpp, 0, tmp_offset, - nouveau_pixmap_bo(pspix), NOUVEAU_BO_VRAM, + nouveau_pixmap_bo(pspix), NOUVEAU_BO_APER, src_pitch, pspix->drawable.height, x, y, tmp, NOUVEAU_BO_GART, tmp_pitch, lines, 0, 0)) @@ -361,7 +361,7 @@ nouveau_exa_upload_to_screen(PixmapPtr pdpix, int x, int y, int w, int h, if (!NVAccelM2MF(pNv, w, lines, cpp, tmp_offset, 0, tmp, NOUVEAU_BO_GART, tmp_pitch, lines, 0, 0, - nouveau_pixmap_bo(pdpix), NOUVEAU_BO_VRAM, + nouveau_pixmap_bo(pdpix), NOUVEAU_BO_APER, dst_pitch, pdpix->drawable.height, x, y)) goto memcpy; diff --git a/src/nv_accel_common.c b/src/nv_accel_common.c index 4484c1c..e1a8a12 100644 --- a/src/nv_accel_common.c +++ b/src/nv_accel_common.c @@ -37,7 +37,7 @@ nouveau_allocate_surface(ScrnInfoPtr scrn, int width, int height, int bpp, Bool tiled = (usage_hint & NOUVEAU_CREATE_PIXMAP_TILED); Bool shared = FALSE; union nouveau_bo_config cfg = {}; - int flags = NOUVEAU_BO_MAP | (bpp >= 8 ? NOUVEAU_BO_VRAM : 0); + int flags = NOUVEAU_BO_MAP; int cpp = bpp / 8, ret; #ifdef NOUVEAU_PIXMAP_SHARING @@ -46,7 +46,7 @@ nouveau_allocate_surface(ScrnInfoPtr scrn, int width, int height, int bpp, flags = NOUVEAU_BO_MAP; if (bpp >= 8) - flags |= shared ? NOUVEAU_BO_GART : NOUVEAU_BO_VRAM; + flags |= shared ? NOUVEAU_BO_GART : pNv->vram_domain; if (pNv->Architecture >= NV_TESLA) { if (scanout) { @@ -677,7 +677,7 @@ NVAccelCommonInit(ScrnInfoPtr pScrn) pNv->pushbuf->user_priv = pNv->bufctx; /* Scratch buffer */ - ret = nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, + ret = nouveau_bo_new(pNv->dev, pNv->vram_domain | NOUVEAU_BO_MAP, 128 * 1024, 128 * 1024, NULL, &pNv->scratch); if (!ret) ret = nouveau_bo_map(pNv->scratch, 0, pNv->client); diff --git a/src/nv_driver.c b/src/nv_driver.c index 8e2ae03..e1aacae 100644 --- a/src/nv_driver.c +++ b/src/nv_driver.c @@ -954,7 +954,10 @@ NVPreInit(ScrnInfoPtr pScrn, int flags) * The first thing we should figure out is the depth, bpp, etc. */ - if (dev->vram_size <= 16 * 1024 * 1024) + pNv->vram_domain = NOUVEAU_BO_VRAM; + if (!dev->vram_size) + pNv->vram_domain = NOUVEAU_BO_GART; + else if (dev->vram_size <= 16 * 1024 * 1024) defaultDepth = 16; if (!xf86SetDepthBpp(pScrn, defaultDepth, 0, 0, Support32bppFb)) { NVPreInitFail("\n"); diff --git a/src/nv_type.h b/src/nv_type.h index e6ab192..c5a2684 100644 --- a/src/nv_type.h +++ b/src/nv_type.h @@ -122,6 +122,7 @@ typedef struct _NVRec { struct nouveau_bo *scratch; Bool ce_enabled; + uint32_t vram_domain; struct nouveau_object *ce_channel; struct nouveau_pushbuf *ce_pushbuf; struct nouveau_object *NvCopy; diff --git a/src/nvc0_accel.c b/src/nvc0_accel.c index 848ca87..7188230 100644 --- a/src/nvc0_accel.c +++ b/src/nvc0_accel.c @@ -242,7 +242,7 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn) if (nouveau_pushbuf_space(push, 512, 0, 0) || nouveau_pushbuf_refn (push, &(struct nouveau_pushbuf_refn) { - pNv->scratch, NOUVEAU_BO_VRAM | + pNv->scratch, NOUVEAU_BO_APER | NOUVEAU_BO_WR }, 1)) return FALSE; diff --git a/src/nvc0_exa.c b/src/nvc0_exa.c index 1f33353..596361e 100644 --- a/src/nvc0_exa.c +++ b/src/nvc0_exa.c @@ -96,11 +96,10 @@ NVC0EXAAcquireSurface2D(PixmapPtr ppix, int is_src, uint32_t fmt) { NVC0EXA_LOCALS(ppix); struct nouveau_bo *bo = nouveau_pixmap_bo(ppix); - struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix); int mthd = is_src ? NV50_2D_SRC_FORMAT : NV50_2D_DST_FORMAT; uint32_t bo_flags; - bo_flags = nvpix->shared ? NOUVEAU_BO_GART : NOUVEAU_BO_VRAM; + bo_flags = NOUVEAU_BO_APER; bo_flags |= is_src ? NOUVEAU_BO_RD : NOUVEAU_BO_WR; if (!nv50_style_tiled_pixmap(ppix)) { @@ -579,7 +578,7 @@ NVC0EXAPictTexture(NVPtr pNv, PixmapPtr ppix, PicturePtr ppict, unsigned unit) if (!nv50_style_tiled_pixmap(ppix)) NOUVEAU_FALLBACK("pixmap is scanout buffer\n"); - PUSH_REFN (push, bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + PUSH_REFN (push, bo, NOUVEAU_BO_APER | NOUVEAU_BO_RD); PUSH_DATAu(push, pNv->scratch, TIC_OFFSET + (unit * 32), 8); switch (ppict->format) { case PICT_a8r8g8b8: @@ -884,14 +883,14 @@ NVC0EXAPrepareComposite(int op, PUSH_DATA (push, 0); PUSH_RESET(push); - PUSH_REFN (push, pNv->scratch, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); + PUSH_REFN (push, pNv->scratch, NOUVEAU_BO_APER | NOUVEAU_BO_RDWR); if (pspict->pDrawable) PUSH_REFN (push, nouveau_pixmap_bo(pspix), - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - PUSH_REFN (push, dst, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + NOUVEAU_BO_APER | NOUVEAU_BO_RD); + PUSH_REFN (push, dst, NOUVEAU_BO_APER | NOUVEAU_BO_WR); if (pmpict && pmpict->pDrawable) PUSH_REFN (push, nouveau_pixmap_bo(pmpix), - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + NOUVEAU_BO_APER | NOUVEAU_BO_RD); nouveau_pushbuf_bufctx(push, pNv->bufctx); if (nouveau_pushbuf_validate(push)) { diff --git a/src/nvc0_xv.c b/src/nvc0_xv.c index d1d8f18..31e2309 100644 --- a/src/nvc0_xv.c +++ b/src/nvc0_xv.c @@ -69,9 +69,9 @@ nvc0_xv_image_put(ScrnInfoPtr pScrn, NVPtr pNv = NVPTR(pScrn); struct nouveau_bo *dst = nouveau_pixmap_bo(ppix); struct nouveau_pushbuf_refn refs[] = { - { pNv->scratch, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR }, - { src, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD }, - { dst, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR }, + { pNv->scratch, NOUVEAU_BO_APER | NOUVEAU_BO_RDWR }, + { src, NOUVEAU_BO_APER | NOUVEAU_BO_RD }, + { dst, NOUVEAU_BO_APER | NOUVEAU_BO_WR }, }; struct nouveau_pushbuf *push = pNv->pushbuf; uint32_t mode = 0xd0005000 | (src->config.nvc0.tile_mode << 18);