Lucas Stach
2012-Jan-10 11:41 UTC
[Nouveau] [PATCH 0/4] nvfx: rework render temps code and fixes
This patch series silences some unknown cap warnings and fixes up coding style (patch 1+4). The most important part of this series are the two patches in the middle. They rework the state_fb code, so that we are able to render to not 64 byte aligned targets, as this is the only real use-case for render temporaries this allows us to drop temp code completely and simplifies a lot of cases. This is only tested on nv49, but there should be nothing different in this area for other nv3x or nv4x gpus. I plan on working on top of that in the following days, so please review and apply as time permits. Thanks, Lucas Lucas Stach (4): nvfx: say no to stream output caps nvfx: rework state_fb code to get rid of render temps nvfx: drop render temporaries code nvfx: random cleanups of the state validation code src/gallium/drivers/nvfx/nvfx_context.h | 2 +- src/gallium/drivers/nvfx/nvfx_fragtex.c | 4 - src/gallium/drivers/nvfx/nvfx_miptree.c | 18 +--- src/gallium/drivers/nvfx/nvfx_resource.h | 30 +----- src/gallium/drivers/nvfx/nvfx_screen.c | 6 +- src/gallium/drivers/nvfx/nvfx_state_emit.c | 94 +++++------------ src/gallium/drivers/nvfx/nvfx_state_fb.c | 155 ++++++++++----------------- src/gallium/drivers/nvfx/nvfx_surface.c | 136 +++---------------------- 8 files changed, 108 insertions(+), 337 deletions(-) -- 1.7.7.5
Lucas Stach
2012-Jan-10 11:41 UTC
[Nouveau] [PATCH 1/4] nvfx: say no to stream output caps
nvfx doesn't support any kind of stream out, so silence the unused cap warnings. Signed-off-by: Lucas Stach <dev at lynxeye.de> --- src/gallium/drivers/nvfx/nvfx_screen.c | 6 +++++- 1 files changed, 5 insertions(+), 1 deletions(-) diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index b4a1b3a..938d67b 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -88,8 +88,12 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_MAX_TEXEL_OFFSET: case PIPE_CAP_CONDITIONAL_RENDER: case PIPE_CAP_TEXTURE_BARRIER: - return 0; case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS: + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: return 0; default: NOUVEAU_ERR("Warning: unknown PIPE_CAP %d\n", param); -- 1.7.7.5
Lucas Stach
2012-Jan-10 11:41 UTC
[Nouveau] [PATCH 2/4] nvfx: rework state_fb code to get rid of render temps
This commit rewrites a lot of the state_fb code to support rendering to targets not aligned to 64 byte. This allows us to drop the render temporaries as unaligned targets are the only use-case where they are really needed. The temporaries code was used for a lot of things more, but apparently those also work without temps. There is one regression in piglit fbo-clear-formats, but this will be fixed with the use of real hardware clears and doesn't matter in practice as no real application tries to scissor clear a 2x2 pixel render target. Signed-off-by: Lucas Stach <dev at lynxeye.de> --- src/gallium/drivers/nvfx/nvfx_context.h | 2 +- src/gallium/drivers/nvfx/nvfx_state_emit.c | 59 ++--------- src/gallium/drivers/nvfx/nvfx_state_fb.c | 155 ++++++++++----------------- src/gallium/drivers/nvfx/nvfx_surface.c | 2 + 4 files changed, 72 insertions(+), 146 deletions(-) diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 09d394b..019be6c 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -247,7 +247,7 @@ extern void nvfx_vtxfmt_validate(struct nvfx_context *nvfx); /* nvfx_fb.c */ extern int nvfx_framebuffer_prepare(struct nvfx_context *nvfx); -extern void nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result); +extern void nvfx_framebuffer_validate(struct nvfx_context *nvfx); void nvfx_framebuffer_relocate(struct nvfx_context *nvfx); diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index 25da80e..e2cfb76 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -199,7 +199,6 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned dirty; unsigned still_dirty = 0; - int new_fb_mode = -1; /* 1 = all swizzled, 0 = make all linear */ boolean flush_tex_cache = FALSE; unsigned render_temps; @@ -213,29 +212,12 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) nvfx->relocs_needed = NVFX_RELOCATE_ALL; } - /* These can trigger use the of 3D engine to copy temporaries. - * That will recurse here and thus dirty all 3D state, so we need to this before anything else, and in a loop.. - * This converges to having clean temps, then binding both fragtexes and framebuffers. - */ - while(nvfx->dirty & (NVFX_NEW_FB | NVFX_NEW_SAMPLER)) - { - if(nvfx->dirty & NVFX_NEW_SAMPLER) - { - nvfx->dirty &=~ NVFX_NEW_SAMPLER; - nvfx_fragtex_validate(nvfx); - - // TODO: only set this if really necessary - flush_tex_cache = TRUE; - } + if(nvfx->dirty & NVFX_NEW_SAMPLER) { + nvfx->dirty &=~ NVFX_NEW_SAMPLER; + nvfx_fragtex_validate(nvfx); - if(nvfx->dirty & NVFX_NEW_FB) - { - nvfx->dirty &=~ NVFX_NEW_FB; - new_fb_mode = nvfx_framebuffer_prepare(nvfx); - - // TODO: make sure this doesn't happen, i.e. fbs have matching formats - assert(new_fb_mode >= 0); - } + // TODO: only set this if really necessary + flush_tex_cache = TRUE; } dirty = nvfx->dirty; @@ -305,8 +287,8 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) } } - if(new_fb_mode >= 0) - nvfx_framebuffer_validate(nvfx, new_fb_mode); + if(dirty & NVFX_NEW_FB) + nvfx_framebuffer_validate(nvfx); if(dirty & NVFX_NEW_BLEND) sb_emit(chan, nvfx->blend->sb, nvfx->blend->sb_len); @@ -324,19 +306,19 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) etracer, neverball, foobillard, glest totally misrender TODO: find the right fix */ - if(dirty & (NVFX_NEW_VIEWPORT | NVFX_NEW_RAST | NVFX_NEW_ZSA) || (new_fb_mode >= 0)) + if(dirty & (NVFX_NEW_VIEWPORT | NVFX_NEW_RAST | NVFX_NEW_ZSA)) { nvfx_state_viewport_validate(nvfx); } - if(dirty & NVFX_NEW_ZSA || (new_fb_mode >= 0)) + if(dirty & (NVFX_NEW_ZSA | NVFX_NEW_FB)) { BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_WRITE_ENABLE, 2); OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.writemask); - OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.enabled); + OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.enabled); } - if((new_fb_mode >= 0) || (dirty & NVFX_NEW_FRAGPROG)) + if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FB)) nvfx_coord_conventions_validate(nvfx); if(flush_tex_cache) @@ -353,25 +335,6 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) nvfx->dirty = dirty & still_dirty; - render_temps = nvfx->state.render_temps; - if(render_temps) - { - for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i) - { - if(render_temps & (1 << i)) { - assert(((struct nvfx_surface*)nvfx->framebuffer.cbufs[i])->temp); - util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]), - (struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]); - } - } - - if(render_temps & 0x80) { - assert(((struct nvfx_surface*)nvfx->framebuffer.zsbuf)->temp); - util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf), - (struct util_dirty_surface*)nvfx->framebuffer.zsbuf); - } - } - return TRUE; } diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c index 406f5bb..ffc0844 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_fb.c +++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c @@ -3,96 +3,25 @@ #include "util/u_format.h" static inline boolean -nvfx_surface_linear_renderable(struct pipe_surface* surf) +nvfx_surface_linear_target(struct pipe_surface* surf) { - /* TODO: precompute this in nvfx_surface creation */ - return (surf->texture->flags & NOUVEAU_RESOURCE_FLAG_LINEAR) - && !(((struct nvfx_surface*)surf)->offset & 63) - && !(((struct nvfx_surface*)surf)->pitch & 63); + return !!((struct nvfx_miptree*)surf->texture)->linear_pitch; } -static inline boolean -nvfx_surface_swizzled_renderable(struct pipe_framebuffer_state* fb, struct pipe_surface* surf) -{ - /* TODO: precompute this in nvfx_surface creation */ - return !((struct nvfx_miptree*)surf->texture)->linear_pitch - && (surf->texture->target != PIPE_TEXTURE_3D || u_minify(surf->texture->depth0, surf->u.tex.level) <= 1) - && !(((struct nvfx_surface*)surf)->offset & 127) - && (surf->width == fb->width) - && (surf->height == fb->height) - && !((struct nvfx_surface*)surf)->temp - && (surf->format == PIPE_FORMAT_B8G8R8A8_UNORM || surf->format == PIPE_FORMAT_B8G8R8X8_UNORM || surf->format == PIPE_FORMAT_B5G6R5_UNORM); -} - -static boolean -nvfx_surface_get_render_target(struct pipe_surface* surf, int all_swizzled, struct nvfx_render_target* target) +static void +nvfx_surface_get_render_target(struct pipe_surface* surf, + struct nvfx_render_target* target) { struct nvfx_surface* ns = (struct nvfx_surface*)surf; - if(!ns->temp) - { - target->bo = ((struct nvfx_miptree*)surf->texture)->base.bo; - target->offset = ns->offset; - target->pitch = align(ns->pitch, 64); - assert(target->pitch); - return FALSE; - } - else - { - target->offset = 0; - target->pitch = ns->temp->linear_pitch; - target->bo = ns->temp->base.bo; - assert(target->pitch); - return TRUE; - } -} -int -nvfx_framebuffer_prepare(struct nvfx_context *nvfx) -{ - struct pipe_framebuffer_state *fb = &nvfx->framebuffer; - int i, color_format = 0, zeta_format = 0; - int all_swizzled = 1; - - if(!nvfx->is_nv4x) - assert(fb->nr_cbufs <= 1); - else - assert(fb->nr_cbufs <= 4); - - for (i = 0; i < fb->nr_cbufs; i++) { - if (color_format) { - if(color_format != fb->cbufs[i]->format) - return -1; - } else - color_format = fb->cbufs[i]->format; - - if(!nvfx_surface_swizzled_renderable(fb, fb->cbufs[i])) - all_swizzled = 0; - } - - if (fb->zsbuf) { - /* TODO: return FALSE if we have a format not supporting a depth buffer (e.g. r8); currently those are not supported at all */ - if(!nvfx_surface_swizzled_renderable(fb, fb->zsbuf)) - all_swizzled = 0; - - if(all_swizzled && util_format_get_blocksize(color_format) != util_format_get_blocksize(zeta_format)) - all_swizzled = 0; - } - - for (i = 0; i < fb->nr_cbufs; i++) { - if(!((struct nvfx_surface*)fb->cbufs[i])->temp && !all_swizzled && !nvfx_surface_linear_renderable(fb->cbufs[i])) - nvfx_surface_create_temp(&nvfx->pipe, fb->cbufs[i]); - } - - if(fb->zsbuf) { - if(!((struct nvfx_surface*)fb->zsbuf)->temp && !all_swizzled && !nvfx_surface_linear_renderable(fb->zsbuf)) - nvfx_surface_create_temp(&nvfx->pipe, fb->zsbuf); - } - - return all_swizzled; + target->bo = ((struct nvfx_miptree*)surf->texture)->base.bo; + target->offset = ns->offset; + target->pitch = align(ns->pitch, 64); + assert(target->pitch); } void -nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) +nvfx_framebuffer_validate(struct nvfx_context *nvfx) { struct pipe_framebuffer_state *fb = &nvfx->framebuffer; struct nouveau_channel *chan = nvfx->screen->base.channel; @@ -102,37 +31,69 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; unsigned h = fb->height; + int all_swizzled =1 , cb_format = 0; + + /* do some sanity checks on the render target state and check if the targets + * are swizzled + */ + nvfx->is_nv4x ? assert(fb->nr_cbufs <= 4) : assert(fb->nr_cbufs <= 1); + if(fb->nr_cbufs && fb->zsbuf) + assert(util_format_get_blocksize(fb->cbufs[0]->format) =+ util_format_get_blocksize(fb->zsbuf->format)); + + for(i = 0; i < fb->nr_cbufs; i++) { + if(cb_format) + assert(cb_format == fb->cbufs[i]->format); + else + cb_format = fb->cbufs[i]->format; + + if(nvfx_surface_linear_target(fb->cbufs[i])) + all_swizzled = 0; + } + + if(fb->zsbuf && nvfx_surface_linear_target(fb->zsbuf)) + all_swizzled = 0; rt_enable = (NV30_3D_RT_ENABLE_COLOR0 << fb->nr_cbufs) - 1; - if (rt_enable & (NV30_3D_RT_ENABLE_COLOR1 | - NV40_3D_RT_ENABLE_COLOR2 | NV40_3D_RT_ENABLE_COLOR3)) + if(rt_enable & (NV30_3D_RT_ENABLE_COLOR1 | + NV40_3D_RT_ENABLE_COLOR2 | NV40_3D_RT_ENABLE_COLOR3)) rt_enable |= NV30_3D_RT_ENABLE_MRT; - nvfx->state.render_temps = 0; - - for (i = 0; i < fb->nr_cbufs; i++) - nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->cbufs[i], prepare_result, &nvfx->hw_rt[i]) << i; + for(i = 0; i < fb->nr_cbufs; i++) + nvfx_surface_get_render_target(fb->cbufs[i], &nvfx->hw_rt[i]); for(; i < 4; ++i) nvfx->hw_rt[i].bo = NULL; nvfx->hw_zeta.bo = NULL; - if (fb->zsbuf) { - nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->zsbuf, prepare_result, &nvfx->hw_zeta) << 7; - - assert(util_format_get_stride(fb->zsbuf->format, fb->width) <= nvfx->hw_zeta.pitch); - assert(nvfx->hw_zeta.offset + nvfx->hw_zeta.pitch * fb->height <= nvfx->hw_zeta.bo->size); + if(fb->zsbuf) { + nvfx_surface_get_render_target(fb->zsbuf, &nvfx->hw_zeta); + assert(util_format_get_stride(fb->zsbuf->format, fb->width) <+ nvfx->hw_zeta.pitch); } - if (prepare_result) { - assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + if(all_swizzled) { + /* hardware rounds down render target offset to 64 bytes, + * but surfaces with a size of 2x2 pixel (16bpp) or 1x1 pixel (32bpp) + * have an unaligned start address, for those two important square + * formats we can hack around this limitation by adjusting the viewport + */ + if(nvfx->hw_rt[0].offset & 63) { + int delta = nvfx->hw_rt[0].offset & 63; + h = 2; + w = 16; + nvfx->viewport.translate[0] += delta / + (util_format_get_blocksize(fb->cbufs[0]->format) * 2); + nvfx->dirty |= NVFX_NEW_VIEWPORT; + } rt_format = NV30_3D_RT_FORMAT_TYPE_SWIZZLED | - (util_logbase2(fb->width) << NV30_3D_RT_FORMAT_LOG2_WIDTH__SHIFT) | - (util_logbase2(fb->height) << NV30_3D_RT_FORMAT_LOG2_HEIGHT__SHIFT); - } else + (util_logbase2(w) << NV30_3D_RT_FORMAT_LOG2_WIDTH__SHIFT) | + (util_logbase2(h) << NV30_3D_RT_FORMAT_LOG2_HEIGHT__SHIFT); + } else { rt_format = NV30_3D_RT_FORMAT_TYPE_LINEAR; + } if(fb->nr_cbufs > 0) { switch (fb->cbufs[0]->format) { diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index cac3fca..73a5260 100644 --- a/src/gallium/drivers/nvfx/nvfx_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -428,6 +428,8 @@ nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int void nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf) { + assert (0); + struct nvfx_surface* ns = (struct nvfx_surface*)surf; struct pipe_resource template; memset(&template, 0, sizeof(struct pipe_resource)); -- 1.7.7.5
Lucas Stach
2012-Jan-10 11:41 UTC
[Nouveau] [PATCH 3/4] nvfx: drop render temporaries code
This code is unneeded now, we don't use render temps any more. Signed-off-by: Lucas Stach <dev at lynxeye.de> --- src/gallium/drivers/nvfx/nvfx_fragtex.c | 4 - src/gallium/drivers/nvfx/nvfx_miptree.c | 18 +--- src/gallium/drivers/nvfx/nvfx_resource.h | 30 +------ src/gallium/drivers/nvfx/nvfx_surface.c | 138 +++--------------------------- 4 files changed, 18 insertions(+), 172 deletions(-) diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c index cb87539..036991e 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragtex.c +++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c @@ -189,10 +189,6 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx) samplers &= ~(1 << unit); if(nvfx->fragment_sampler_views[unit] && nvfx->tex_sampler[unit]) { - util_dirty_surfaces_use_for_sampling(&nvfx->pipe, - &((struct nvfx_miptree*)nvfx->fragment_sampler_views[unit]->texture)->dirty_surfaces, - nvfx_surface_flush); - if(!nvfx->is_nv4x) nv30_fragtex_set(nvfx, unit); else diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c index 8695aa3..dd9ad35 100644 --- a/src/gallium/drivers/nvfx/nvfx_miptree.c +++ b/src/gallium/drivers/nvfx/nvfx_miptree.c @@ -99,9 +99,7 @@ nvfx_miptree_layout(struct nvfx_miptree *mt) static void nvfx_miptree_surface_final_destroy(struct pipe_surface* ps) { - struct nvfx_surface* ns = (struct nvfx_surface*)ps; pipe_resource_reference(&ps->texture, 0); - pipe_resource_reference((struct pipe_resource**)&ns->temp, 0); FREE(ps); } @@ -127,7 +125,6 @@ nvfx_miptree_create_skeleton(struct pipe_screen *pscreen, const struct pipe_reso return NULL; mt->base.base = *pt; - util_dirty_surfaces_init(&mt->dirty_surfaces); pipe_reference_init(&mt->base.base.reference, 1); mt->base.base.screen = pscreen; @@ -201,24 +198,17 @@ nvfx_miptree_surface_new(struct pipe_context *pipe, struct pipe_resource *pt, if(util_surfaces_get(&mt->surfaces, sizeof(struct nvfx_surface), pipe, pt, level, surf_tmpl->u.tex.first_layer, surf_tmpl->usage, (struct pipe_surface **)&ns)) { - util_dirty_surface_init(&ns->base); ns->pitch = nvfx_subresource_pitch(pt, level); ns->offset = nvfx_subresource_offset(pt, surf_tmpl->u.tex.first_layer, level, surf_tmpl->u.tex.first_layer); } - return &ns->base.base; + return &ns->base; } void nvfx_miptree_surface_del(struct pipe_context *pipe, struct pipe_surface *ps) { - struct nvfx_surface* ns = (struct nvfx_surface*)ps; - - if(!ns->temp) - { - assert(!util_dirty_surface_is_dirty(&ns->base)); - util_surfaces_detach(&((struct nvfx_miptree*)ps->texture)->surfaces, ps); - pipe_resource_reference(&ps->texture, 0); - FREE(ps); - } + util_surfaces_detach(&((struct nvfx_miptree*)ps->texture)->surfaces, ps); + pipe_resource_reference(&ps->texture, 0); + FREE(ps); } diff --git a/src/gallium/drivers/nvfx/nvfx_resource.h b/src/gallium/drivers/nvfx/nvfx_resource.h index 6e0493f..4a66358 100644 --- a/src/gallium/drivers/nvfx/nvfx_resource.h +++ b/src/gallium/drivers/nvfx/nvfx_resource.h @@ -47,18 +47,6 @@ nvfx_resource_on_gpu(struct pipe_resource* pr) #define NVFX_MAX_TEXTURE_LEVELS 16 -/* We have the following invariants for render temporaries - * - * 1. Render temporaries are always linear - * 2. Render temporaries are always up to date - * 3. Currently, render temporaries are destroyed when the resource is used for sampling, but kept for any other use - * - * Also, we do NOT flush temporaries on any pipe->flush(). - * This is fine, as long as scanout targets and shared resources never need temps. - * - * TODO: we may want to also support swizzled temporaries to improve performance in some cases. - */ - struct nvfx_miptree { struct nvfx_resource base; @@ -67,15 +55,12 @@ struct nvfx_miptree { unsigned level_offset[NVFX_MAX_TEXTURE_LEVELS]; struct util_surfaces surfaces; - struct util_dirty_surfaces dirty_surfaces; }; struct nvfx_surface { - struct util_dirty_surface base; + struct pipe_surface base; unsigned pitch; unsigned offset; - - struct nvfx_miptree* temp; }; static INLINE struct nouveau_bo * @@ -86,13 +71,6 @@ nvfx_surface_buffer(struct pipe_surface *surf) return mt->bo; } -static INLINE struct util_dirty_surfaces* -nvfx_surface_get_dirty_surfaces(struct pipe_surface* surf) -{ - struct nvfx_miptree *mt = (struct nvfx_miptree *)surf->texture; - return &mt->dirty_surfaces; -} - void nvfx_init_resource_functions(struct pipe_context *pipe); @@ -159,12 +137,6 @@ nvfx_subresource_pitch(struct pipe_resource* pt, unsigned level) } } -void -nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf); - -void -nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf); - struct nvfx_buffer { struct nvfx_resource base; diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index 73a5260..4dd6ccc 100644 --- a/src/gallium/drivers/nvfx/nvfx_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -90,31 +90,20 @@ nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, rgn->y = y; rgn->z = 0; - if(surf->temp) - { - rgn->bo = surf->temp->base.bo; - rgn->offset = 0; - rgn->pitch = surf->temp->linear_pitch; - - if(for_write) - util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(&surf->base.base), &surf->base); - } else { - rgn->bo = ((struct nvfx_resource*)surf->base.base.texture)->bo; - rgn->offset = surf->offset; - - if(surf->base.base.texture->flags & NOUVEAU_RESOURCE_FLAG_LINEAR) - rgn->pitch = surf->pitch; - else - { - rgn->pitch = 0; - rgn->z = surf->base.base.u.tex.first_layer; - rgn->w = surf->base.base.width; - rgn->h = surf->base.base.height; - rgn->d = u_minify(surf->base.base.texture->depth0, surf->base.base.u.tex.level); - } + rgn->bo = ((struct nvfx_resource*)surf->base.texture)->bo; + rgn->offset = surf->offset; + + if(surf->base.texture->flags & NOUVEAU_RESOURCE_FLAG_LINEAR) + rgn->pitch = surf->pitch; + else { + rgn->pitch = 0; + rgn->z = surf->base.u.tex.first_layer; + rgn->w = surf->base.width; + rgn->h = surf->base.height; + rgn->d = u_minify(surf->base.texture->depth0, surf->base.u.tex.level); } - nvfx_region_set_format(rgn, surf->base.base.format); + nvfx_region_set_format(rgn, surf->base.format); if(!rgn->pitch) nv04_region_try_to_linearize(rgn); } @@ -125,7 +114,7 @@ nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* if(pt->target != PIPE_BUFFER) { struct nvfx_surface* ns = (struct nvfx_surface*)util_surfaces_peek(&((struct nvfx_miptree*)pt)->surfaces, pt, level, z); - if(ns && util_dirty_surface_is_dirty(&ns->base)) + if(ns) { nvfx_region_init_for_surface(rgn, ns, x, y, for_write); return; @@ -376,107 +365,6 @@ nvfx_screen_surface_init(struct pipe_screen *pscreen) } static void -nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int to_temp) -{ - struct nvfx_surface* ns = (struct nvfx_surface*)surf; - struct pipe_box box; - struct nvfx_context* nvfx = nvfx_context(pipe); - struct nvfx_miptree* temp; - unsigned use_vertex_buffers; - boolean use_index_buffer; - unsigned base_vertex; - - /* temporarily detach the temp, so it isn't used in place of the actual resource */ - temp = ns->temp; - ns->temp = 0; - - // TODO: we really should do this validation before setting these variable in draw calls - use_vertex_buffers = nvfx->use_vertex_buffers; - use_index_buffer = nvfx->use_index_buffer; - base_vertex = nvfx->base_vertex; - - box.x = box.y = 0; - assert(surf->u.tex.first_layer == surf->u.tex.last_layer); - box.width = surf->width; - box.height = surf->height; - box.depth = 1; - - if(to_temp) { - box.z = surf->u.tex.first_layer; - nvfx_resource_copy_region(pipe, &temp->base.base, 0, 0, 0, 0, surf->texture, surf->u.tex.level, &box); - } - else { - box.z = 0; - nvfx_resource_copy_region(pipe, surf->texture, surf->u.tex.level, 0, 0, surf->u.tex.first_layer, &temp->base.base, 0, &box); - } - - /* If this triggers, it probably means we attempted to use the blitter - * but failed due to non-renderability of the target. - * Obviously, this would lead to infinite recursion if supported. */ - assert(!ns->temp); - - ns->temp = temp; - - nvfx->use_vertex_buffers = use_vertex_buffers; - nvfx->use_index_buffer = use_index_buffer; - nvfx->base_vertex = base_vertex; - - nvfx->dirty |= NVFX_NEW_ARRAYS; - nvfx->draw_dirty |= NVFX_NEW_ARRAYS; -} - -void -nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf) -{ - assert (0); - - struct nvfx_surface* ns = (struct nvfx_surface*)surf; - struct pipe_resource template; - memset(&template, 0, sizeof(struct pipe_resource)); - template.target = PIPE_TEXTURE_2D; - template.format = surf->format; - template.width0 = surf->width; - template.height0 = surf->height; - template.depth0 = 1; - template.nr_samples = surf->texture->nr_samples; - template.flags = NOUVEAU_RESOURCE_FLAG_LINEAR; - - assert(!ns->temp && !util_dirty_surface_is_dirty(&ns->base)); - - ns->temp = (struct nvfx_miptree*)nvfx_miptree_create(pipe->screen, &template); - nvfx_surface_copy_temp(pipe, surf, 1); -} - -void -nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf) -{ - struct nvfx_context* nvfx = (struct nvfx_context*)pipe; - struct nvfx_surface* ns = (struct nvfx_surface*)surf; - boolean bound = FALSE; - - nvfx_surface_copy_temp(pipe, surf, 0); - - util_dirty_surface_set_clean(nvfx_surface_get_dirty_surfaces(surf), &ns->base); - - if(nvfx->framebuffer.zsbuf == surf) - bound = TRUE; - else - { - for(unsigned i = 0; i < nvfx->framebuffer.nr_cbufs; ++i) - { - if(nvfx->framebuffer.cbufs[i] == surf) - { - bound = TRUE; - break; - } - } - } - - if(!bound) - pipe_resource_reference((struct pipe_resource**)&ns->temp, 0); -} - -static void nvfx_clear_render_target(struct pipe_context *pipe, struct pipe_surface *dst, const union pipe_color_union *color, -- 1.7.7.5
Lucas Stach
2012-Jan-10 11:41 UTC
[Nouveau] [PATCH 4/4] nvfx: random cleanups of the state validation code
Signed-off-by: Lucas Stach <dev at lynxeye.de> --- src/gallium/drivers/nvfx/nvfx_state_emit.c | 49 ++++++++++++--------------- 1 files changed, 22 insertions(+), 27 deletions(-) diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index e2cfb76..a959015 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -200,7 +200,6 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) unsigned dirty; unsigned still_dirty = 0; boolean flush_tex_cache = FALSE; - unsigned render_temps; if(nvfx != nvfx->screen->cur_ctx) { @@ -212,14 +211,6 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) nvfx->relocs_needed = NVFX_RELOCATE_ALL; } - if(nvfx->dirty & NVFX_NEW_SAMPLER) { - nvfx->dirty &=~ NVFX_NEW_SAMPLER; - nvfx_fragtex_validate(nvfx); - - // TODO: only set this if really necessary - flush_tex_cache = TRUE; - } - dirty = nvfx->dirty; if(nvfx->render_mode == HW) @@ -252,6 +243,13 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) } } + if(dirty & NVFX_NEW_SAMPLER) { + nvfx_fragtex_validate(nvfx); + + // TODO: only set this if really necessary + flush_tex_cache = TRUE; + } + if(dirty & NVFX_NEW_RAST) sb_emit(chan, nvfx->rasterizer->sb, nvfx->rasterizer->sb_len); @@ -264,10 +262,13 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(nvfx->dirty & (NVFX_NEW_UCP | NVFX_NEW_RAST)) nvfx_ucp_validate(nvfx); - if(nvfx->use_vp_clipping && (nvfx->dirty & (NVFX_NEW_UCP | NVFX_NEW_VERTPROG | NVFX_NEW_RAST))) + if(nvfx->use_vp_clipping && (nvfx->dirty & + (NVFX_NEW_UCP | NVFX_NEW_VERTPROG | + NVFX_NEW_RAST))) nvfx_vertprog_ucp_validate(nvfx); - if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST | NVFX_NEW_VERTPROG | NVFX_NEW_SPRITE)) + if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST | + NVFX_NEW_VERTPROG | NVFX_NEW_SPRITE)) { nvfx_fragprog_validate(nvfx); if(dirty & NVFX_NEW_FRAGPROG) @@ -302,11 +303,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(dirty & NVFX_NEW_SR) nvfx_state_sr_validate(nvfx); -/* All these dependencies are wrong, but otherwise - etracer, neverball, foobillard, glest totally misrender - TODO: find the right fix -*/ - if(dirty & (NVFX_NEW_VIEWPORT | NVFX_NEW_RAST | NVFX_NEW_ZSA)) + if(dirty & NVFX_NEW_VIEWPORT) { nvfx_state_viewport_validate(nvfx); } @@ -314,23 +311,21 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(dirty & (NVFX_NEW_ZSA | NVFX_NEW_FB)) { BEGIN_RING(chan, eng3d, NV30_3D_DEPTH_WRITE_ENABLE, 2); - OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.writemask); - OUT_RING(chan, nvfx->framebuffer.zsbuf && nvfx->zsa->pipe.depth.enabled); + OUT_RING(chan, nvfx->framebuffer.zsbuf && + nvfx->zsa->pipe.depth.writemask); + OUT_RING(chan, nvfx->framebuffer.zsbuf && + nvfx->zsa->pipe.depth.enabled); } if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FB)) nvfx_coord_conventions_validate(nvfx); - if(flush_tex_cache) + if(flush_tex_cache && nvfx->is_nv4x) { - // TODO: what about nv30? - if(nvfx->is_nv4x) - { - BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1); - OUT_RING(chan, 2); - BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1); - OUT_RING(chan, 1); - } + BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1); + OUT_RING(chan, 2); + BEGIN_RING(chan, eng3d, NV40_3D_TEX_CACHE_CTL, 1); + OUT_RING(chan, 1); } nvfx->dirty = dirty & still_dirty; -- 1.7.7.5
Patrice Mandin
2012-Jan-14 12:14 UTC
[Nouveau] [PATCH 0/4] nvfx: rework render temps code and fixes
Le Tue, 10 Jan 2012 12:41:00 +0100 Lucas Stach <dev at lynxeye.de> a ?crit:> This patch series silences some unknown cap warnings and fixes up > coding style (patch 1+4). > > The most important part of this series are the two patches in the > middle. They rework the state_fb code, so that we are able to > render to not 64 byte aligned targets, as this is the only real > use-case for render temporaries this allows us to drop temp code > completely and simplifies a lot of cases. > > This is only tested on nv49, but there should be nothing different > in this area for other nv3x or nv4x gpus. > > I plan on working on top of that in the following days, so please > review and apply as time permits. > > Thanks, > LucasHello, I just tried on my NV34 the whole series. Here is what I noticed: - Seems rendering is a bit slower (in ioquake3, in the first map q3dm0, rendering the mirror and the portal causes more audio stuttering) but it it just my feeling. - ut2004demo hangs at start (display just one frame in the nvidia licence plate part), but the system does not crash, and the game can be killed. Do you want me to check which of your patches makes ut2004 hanging? -- Patrice Mandin WWW: http://pmandin.atari.org/ Programmeur Linux, Atari Sp?cialit?: D?veloppement, jeux "who writes the code, decides"