Ilia Mirkin
2015-May-25 19:29 UTC
[Nouveau] [PATCH 1/2] nv30/draw: rework some of the output vertex buffer logic
This makes the vertex buffer go to GART, not VRAM, and redoes the mapping to not use the UNSYNCHRONIZED access (which is meaningless on a VRAM buffer anyways). While we're at it, add some flushes for VBO data. Moving the vertex buffer from VRAM to GART makes glxgears work fully with NV30_SWTNL=1. The other changes just seem like a good idea. I'm not sure *why* moving the buffer from VRAM makes it work... perhaps something doesn't get flushed in time? However this is a single use by the GPU buffer, so STREAM seems like the correct usage semantic for it. Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> Cc: "10.5 10.6" <mesa-stable at lists.freedesktop.org> --- src/gallium/drivers/nouveau/nv30/nv30_draw.c | 30 +++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c index 6a0d06f..a681135 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c @@ -71,12 +71,12 @@ nv30_render_allocate_vertices(struct vbuf_render *render, struct nv30_render *r = nv30_render(render); struct nv30_context *nv30 = r->nv30; - r->length = vertex_size * nr_vertices; + r->length = (uint32_t)vertex_size * (uint32_t)nr_vertices; if (r->offset + r->length >= render->max_vertex_buffer_bytes) { pipe_resource_reference(&r->buffer, NULL); r->buffer = pipe_buffer_create(&nv30->screen->base.base, - PIPE_BIND_VERTEX_BUFFER, 0, + PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM, render->max_vertex_buffer_bytes); if (!r->buffer) return FALSE; @@ -91,10 +91,14 @@ static void * nv30_render_map_vertices(struct vbuf_render *render) { struct nv30_render *r = nv30_render(render); - char *map = pipe_buffer_map(&r->nv30->base.pipe, r->buffer, - PIPE_TRANSFER_WRITE | - PIPE_TRANSFER_UNSYNCHRONIZED, &r->transfer); - return map + r->offset; + char *map = pipe_buffer_map_range( + &r->nv30->base.pipe, r->buffer, + r->offset, r->length, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_DISCARD_RANGE, + &r->transfer); + assert(map); + return map; } static void @@ -127,12 +131,18 @@ nv30_render_draw_elements(struct vbuf_render *render, for (i = 0; i < r->vertex_info.num_attribs; i++) { PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP, nv04_resource(r->buffer), r->offset + r->vtxptr[i], - NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0); + NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1); } if (!nv30_state_validate(nv30, ~0, FALSE)) return; + if (nv30->base.vbo_dirty) { + BEGIN_NV04(push, NV30_3D(VTX_CACHE_INVALIDATE_1710), 1); + PUSH_DATA (push, 0); + nv30->base.vbo_dirty = FALSE; + } + BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); PUSH_DATA (push, r->prim); @@ -178,6 +188,12 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr) if (!nv30_state_validate(nv30, ~0, FALSE)) return; + if (nv30->base.vbo_dirty) { + BEGIN_NV04(push, NV30_3D(VTX_CACHE_INVALIDATE_1710), 1); + PUSH_DATA (push, 0); + nv30->base.vbo_dirty = FALSE; + } + BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); PUSH_DATA (push, r->prim); -- 2.3.6
Ilia Mirkin
2015-May-25 19:29 UTC
[Nouveau] [PATCH 2/2] nv30/draw: switch varying hookup logic to know about texcoords
Commit 8acaf862dfe switched things over to use TEXCOORD instead of GENERIC, but did not update the nv30 swtnl draw paths. This teaches the draw logic about TEXCOORD. Among other things, this fixes a crash in demos/arbocclude when using swtnl. Curiously enough, the point-sprite piglit works without this. Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> Cc: "10.5 10.6" <mesa-stable at lists.freedesktop.org> --- src/gallium/drivers/nouveau/nv30/nv30_draw.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c index a681135..03c0c70 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c @@ -230,22 +230,24 @@ static const struct { [TGSI_SEMANTIC_BCOLOR ] = { EMIT_4F, INTERP_LINEAR , 1, 3, 0x00000004 }, [TGSI_SEMANTIC_FOG ] = { EMIT_4F, INTERP_PERSPECTIVE, 5, 5, 0x00000010 }, [TGSI_SEMANTIC_PSIZE ] = { EMIT_1F_PSIZE, INTERP_POS , 6, 6, 0x00000020 }, - [TGSI_SEMANTIC_GENERIC ] = { EMIT_4F, INTERP_PERSPECTIVE, 8, 7, 0x00004000 } + [TGSI_SEMANTIC_TEXCOORD] = { EMIT_4F, INTERP_PERSPECTIVE, 8, 7, 0x00004000 }, }; static boolean vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx) { - struct pipe_screen *pscreen = &r->nv30->screen->base.base; + struct nv30_screen *screen = r->nv30->screen; struct nv30_fragprog *fp = r->nv30->fragprog.program; struct vertex_info *vinfo = &r->vertex_info; enum pipe_format format; uint emit = EMIT_OMIT; uint result = *idx; - if (sem == TGSI_SEMANTIC_GENERIC && result >= 8) { - for (result = 0; result < 8; result++) { - if (fp->texcoord[result] == *idx) { + if (sem == TGSI_SEMANTIC_GENERIC) { + uint num_texcoords = (screen->eng3d->oclass < NV40_3D_CLASS) ? 8 : 10; + for (result = 0; result < num_texcoords; result++) { + if (fp->texcoord[result] == *idx + 8) { + sem = TGSI_SEMANTIC_TEXCOORD; emit = vroute[sem].emit; break; } @@ -260,11 +262,11 @@ vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx) draw_emit_vertex_attr(vinfo, emit, vroute[sem].interp, attrib); format = draw_translate_vinfo_format(emit); - r->vtxfmt[attrib] = nv30_vtxfmt(pscreen, format)->hw; + r->vtxfmt[attrib] = nv30_vtxfmt(&screen->base.base, format)->hw; r->vtxptr[attrib] = vinfo->size | NV30_3D_VTXBUF_DMA1; vinfo->size += draw_translate_vinfo_size(emit); - if (nv30_screen(pscreen)->eng3d->oclass < NV40_3D_CLASS) { + if (screen->eng3d->oclass < NV40_3D_CLASS) { r->vtxprog[attrib][0] = 0x001f38d8; r->vtxprog[attrib][1] = 0x0080001b | (attrib << 9); r->vtxprog[attrib][2] = 0x0836106c; @@ -276,7 +278,12 @@ vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx) r->vtxprog[attrib][3] = 0x6041ff80 | (result + vroute[sem].vp40) << 2; } - *idx = vroute[sem].ow40 << result; + if (result < 8) + *idx = vroute[sem].ow40 << result; + else { + assert(sem == TGSI_SEMANTIC_TEXCOORD); + *idx = 0x00001000 << (result - 8); + } return TRUE; } @@ -330,7 +337,7 @@ nv30_render_validate(struct nv30_context *nv30) while (pntc && attrib < 16) { uint index = ffs(pntc) - 1; pntc &= ~(1 << index); - if (vroute_add(r, attrib, TGSI_SEMANTIC_GENERIC, &index)) { + if (vroute_add(r, attrib, TGSI_SEMANTIC_TEXCOORD, &index)) { vp_attribs |= (1 << attrib++); vp_results |= index; } -- 2.3.6
Tobias Klausmann
2015-May-26 00:37 UTC
[Nouveau] [PATCH 2/2] nv30/draw: switch varying hookup logic to know about texcoords
On 25.05.2015 21:29, Ilia Mirkin wrote:> Commit 8acaf862dfe switched things over to use TEXCOORD instead of > GENERIC, but did not update the nv30 swtnl draw paths. This teaches the > draw logic about TEXCOORD. > > Among other things, this fixes a crash in demos/arbocclude when using > swtnl. Curiously enough, the point-sprite piglit works without this. > > Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> > Cc: "10.5 10.6" <mesa-stable at lists.freedesktop.org> > --- > src/gallium/drivers/nouveau/nv30/nv30_draw.c | 25 ++++++++++++++++--------- > 1 file changed, 16 insertions(+), 9 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c > index a681135..03c0c70 100644 > --- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c > +++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c > @@ -230,22 +230,24 @@ static const struct { > [TGSI_SEMANTIC_BCOLOR ] = { EMIT_4F, INTERP_LINEAR , 1, 3, 0x00000004 }, > [TGSI_SEMANTIC_FOG ] = { EMIT_4F, INTERP_PERSPECTIVE, 5, 5, 0x00000010 }, > [TGSI_SEMANTIC_PSIZE ] = { EMIT_1F_PSIZE, INTERP_POS , 6, 6, 0x00000020 }, > - [TGSI_SEMANTIC_GENERIC ] = { EMIT_4F, INTERP_PERSPECTIVE, 8, 7, 0x00004000 } > + [TGSI_SEMANTIC_TEXCOORD] = { EMIT_4F, INTERP_PERSPECTIVE, 8, 7, 0x00004000 }, > }; > > static boolean > vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx) > { > - struct pipe_screen *pscreen = &r->nv30->screen->base.base; > + struct nv30_screen *screen = r->nv30->screen; > struct nv30_fragprog *fp = r->nv30->fragprog.program; > struct vertex_info *vinfo = &r->vertex_info; > enum pipe_format format; > uint emit = EMIT_OMIT; > uint result = *idx; > > - if (sem == TGSI_SEMANTIC_GENERIC && result >= 8) { > - for (result = 0; result < 8; result++) { > - if (fp->texcoord[result] == *idx) { > + if (sem == TGSI_SEMANTIC_GENERIC) { > + uint num_texcoords = (screen->eng3d->oclass < NV40_3D_CLASS) ? 8 : 10; > + for (result = 0; result < num_texcoords; result++) { > + if (fp->texcoord[result] == *idx + 8) {maybe i'm too tired, but why exactly *idx + 8 ?> + sem = TGSI_SEMANTIC_TEXCOORD; > emit = vroute[sem].emit; > break; > } > @@ -260,11 +262,11 @@ vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx) > draw_emit_vertex_attr(vinfo, emit, vroute[sem].interp, attrib); > format = draw_translate_vinfo_format(emit); > > - r->vtxfmt[attrib] = nv30_vtxfmt(pscreen, format)->hw; > + r->vtxfmt[attrib] = nv30_vtxfmt(&screen->base.base, format)->hw; > r->vtxptr[attrib] = vinfo->size | NV30_3D_VTXBUF_DMA1; > vinfo->size += draw_translate_vinfo_size(emit); > > - if (nv30_screen(pscreen)->eng3d->oclass < NV40_3D_CLASS) { > + if (screen->eng3d->oclass < NV40_3D_CLASS) { > r->vtxprog[attrib][0] = 0x001f38d8; > r->vtxprog[attrib][1] = 0x0080001b | (attrib << 9); > r->vtxprog[attrib][2] = 0x0836106c; > @@ -276,7 +278,12 @@ vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx) > r->vtxprog[attrib][3] = 0x6041ff80 | (result + vroute[sem].vp40) << 2; > } > > - *idx = vroute[sem].ow40 << result; > + if (result < 8) > + *idx = vroute[sem].ow40 << result; > + else { > + assert(sem == TGSI_SEMANTIC_TEXCOORD); > + *idx = 0x00001000 << (result - 8); > + } > return TRUE; > } > > @@ -330,7 +337,7 @@ nv30_render_validate(struct nv30_context *nv30) > > while (pntc && attrib < 16) { > uint index = ffs(pntc) - 1; pntc &= ~(1 << index); > - if (vroute_add(r, attrib, TGSI_SEMANTIC_GENERIC, &index)) { > + if (vroute_add(r, attrib, TGSI_SEMANTIC_TEXCOORD, &index)) { > vp_attribs |= (1 << attrib++); > vp_results |= index; > }
Tobias Klausmann
2015-May-26 00:42 UTC
[Nouveau] [PATCH 1/2] nv30/draw: rework some of the output vertex buffer logic
On 25.05.2015 21:29, Ilia Mirkin wrote:> This makes the vertex buffer go to GART, not VRAM, and redoes the > mapping to not use the UNSYNCHRONIZED access (which is meaningless on a > VRAM buffer anyways). While we're at it, add some flushes for VBO data. > > Moving the vertex buffer from VRAM to GART makes glxgears work fully > with NV30_SWTNL=1. The other changes just seem like a good idea. I'm not > sure *why* moving the buffer from VRAM makes it work... perhaps > something doesn't get flushed in time? However this is a single use by > the GPU buffer, so STREAM seems like the correct usage semantic for it.i'm not really happy moving things to gart and don't see why this resolves the issue but granted if it works out :-) Reviewed-by: Tobias Klausmann <tobias.johannes.klausmann at mni.thm.de>> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> > Cc: "10.5 10.6" <mesa-stable at lists.freedesktop.org> > --- > src/gallium/drivers/nouveau/nv30/nv30_draw.c | 30 +++++++++++++++++++++------- > 1 file changed, 23 insertions(+), 7 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c > index 6a0d06f..a681135 100644 > --- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c > +++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c > @@ -71,12 +71,12 @@ nv30_render_allocate_vertices(struct vbuf_render *render, > struct nv30_render *r = nv30_render(render); > struct nv30_context *nv30 = r->nv30; > > - r->length = vertex_size * nr_vertices; > + r->length = (uint32_t)vertex_size * (uint32_t)nr_vertices; > > if (r->offset + r->length >= render->max_vertex_buffer_bytes) { > pipe_resource_reference(&r->buffer, NULL); > r->buffer = pipe_buffer_create(&nv30->screen->base.base, > - PIPE_BIND_VERTEX_BUFFER, 0, > + PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM, > render->max_vertex_buffer_bytes); > if (!r->buffer) > return FALSE; > @@ -91,10 +91,14 @@ static void * > nv30_render_map_vertices(struct vbuf_render *render) > { > struct nv30_render *r = nv30_render(render); > - char *map = pipe_buffer_map(&r->nv30->base.pipe, r->buffer, > - PIPE_TRANSFER_WRITE | > - PIPE_TRANSFER_UNSYNCHRONIZED, &r->transfer); > - return map + r->offset; > + char *map = pipe_buffer_map_range( > + &r->nv30->base.pipe, r->buffer, > + r->offset, r->length, > + PIPE_TRANSFER_WRITE | > + PIPE_TRANSFER_DISCARD_RANGE, > + &r->transfer); > + assert(map); > + return map; > } > > static void > @@ -127,12 +131,18 @@ nv30_render_draw_elements(struct vbuf_render *render, > for (i = 0; i < r->vertex_info.num_attribs; i++) { > PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP, > nv04_resource(r->buffer), r->offset + r->vtxptr[i], > - NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0); > + NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1); > } > > if (!nv30_state_validate(nv30, ~0, FALSE)) > return; > > + if (nv30->base.vbo_dirty) { > + BEGIN_NV04(push, NV30_3D(VTX_CACHE_INVALIDATE_1710), 1); > + PUSH_DATA (push, 0); > + nv30->base.vbo_dirty = FALSE; > + } > + > BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); > PUSH_DATA (push, r->prim); > > @@ -178,6 +188,12 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr) > if (!nv30_state_validate(nv30, ~0, FALSE)) > return; > > + if (nv30->base.vbo_dirty) { > + BEGIN_NV04(push, NV30_3D(VTX_CACHE_INVALIDATE_1710), 1); > + PUSH_DATA (push, 0); > + nv30->base.vbo_dirty = FALSE; > + } > + > BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); > PUSH_DATA (push, r->prim); >
Possibly Parallel Threads
- [PATCH 2/2] nv30/draw: switch varying hookup logic to know about texcoords
- [PATCH 2/2] nv30/draw: switch varying hookup logic to know about texcoords
- [PATCH try 2 1/2] gallium/nouveau: decouple nouveau_fence implementation from screen
- [PATCH 1/2] gallium/nouveau: decouple nouveau_fence implementation from screen
- [PATCH 1/2] nv30: avoid doing extra work on clear and hitting unexpected states