Ilia Mirkin
2015-Sep-09 07:17 UTC
[Nouveau] [PATCH] nvc0: keep track of cb bindings per buffer, use for upload settings
CB updates to bound buffers need to go through the CB_DATA endpoints, otherwise the shader may not notice that the updates happened. Furthermore, these updates have to go in to the same address as the bound buffer, otherwise, again, the shader may not notice updates. So we keep track of all the places where a constbuf is bound, and iterate over all of them when updating data. If a binding is found that encompasses the region to be updated, then we use the settings of that binding for the upload. Otherwise we upload as a regular data update. This fixes piglit 'arb_uniform_buffer_object-rendering offset' as well as blurriness in Witcher2. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91890 Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> Cc: "11.0" <mesa-stable at lists.freedesktop.org> --- src/gallium/drivers/nouveau/nouveau_buffer.c | 4 +- src/gallium/drivers/nouveau/nouveau_buffer.h | 2 + src/gallium/drivers/nouveau/nouveau_context.h | 5 ++- src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 8 ++-- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 2 + .../drivers/nouveau/nvc0/nvc0_state_validate.c | 3 +- src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c | 46 ++++++++++++++++++++-- 7 files changed, 58 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c index 912b778..4937dae 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.c +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c @@ -206,8 +206,8 @@ nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx, nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain, tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size); else - if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb) - nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0, + if (nv->push_cb && can_cb) + nv->push_cb(nv, buf, base, size / 4, (const uint32_t *)data); else nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data); diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h b/src/gallium/drivers/nouveau/nouveau_buffer.h index 7e6a6cc..d45bf7a 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.h +++ b/src/gallium/drivers/nouveau/nouveau_buffer.h @@ -41,6 +41,8 @@ struct nv04_resource { uint8_t status; uint8_t domain; + uint16_t cb_bindings[6]; /* per-shader per-slot bindings */ + struct nouveau_fence *fence; struct nouveau_fence *fence_wr; diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h index 24deb7e..decb271 100644 --- a/src/gallium/drivers/nouveau/nouveau_context.h +++ b/src/gallium/drivers/nouveau/nouveau_context.h @@ -6,6 +6,8 @@ #define NOUVEAU_MAX_SCRATCH_BUFS 4 +struct nv04_resource; + struct nouveau_context { struct pipe_context pipe; struct nouveau_screen *screen; @@ -23,8 +25,7 @@ struct nouveau_context { unsigned, const void *); /* base, size refer to the whole constant buffer */ void (*push_cb)(struct nouveau_context *, - struct nouveau_bo *, unsigned domain, - unsigned base, unsigned size, + struct nv04_resource *, unsigned offset, unsigned words, const uint32_t *); /* @return: @ref reduced by nr of references found in context */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 6ed79cf..30bee3a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -299,10 +299,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv, struct nouveau_bo *dst, unsigned offset, unsigned domain, unsigned size, const void *data); void -nvc0_cb_push(struct nouveau_context *, - struct nouveau_bo *bo, unsigned domain, - unsigned base, unsigned size, - unsigned offset, unsigned words, const uint32_t *data); +nvc0_cb_bo_push(struct nouveau_context *, + struct nouveau_bo *bo, unsigned domain, + unsigned base, unsigned size, + unsigned offset, unsigned words, const uint32_t *data); /* nvc0_vbo.c */ void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index ee29912..c5bfd03 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -831,6 +831,8 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, } nvc0->constbuf_dirty[s] |= 1 << i; + if (nvc0->constbuf[s][i].u.buf) + nv04_resource(nvc0->constbuf[s][i].u.buf)->cb_bindings[s] &= ~(1 << i); pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res); nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? true : false; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index 47bd66d..aec0609 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -440,7 +440,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); PUSH_DATA (push, (0 << 4) | 1); } - nvc0_cb_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), + nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), base, nvc0->state.uniform_buffer_bound[s], 0, (size + 3) / 4, nvc0->constbuf[s][0].u.data); @@ -458,6 +458,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD); nvc0->cb_dirty = 1; /* Force cache flush for UBO. */ + res->cb_bindings[s] |= 1 << i; } else { BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); PUSH_DATA (push, (i << 4) | 0); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c index 7cc5b4b..ff2ad19 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c @@ -506,12 +506,49 @@ nvc0_miptree_transfer_unmap(struct pipe_context *pctx, } /* This happens rather often with DTD9/st. */ -void +static void nvc0_cb_push(struct nouveau_context *nv, - struct nouveau_bo *bo, unsigned domain, - unsigned base, unsigned size, + struct nv04_resource *res, unsigned offset, unsigned words, const uint32_t *data) { + struct nvc0_context *nvc0 = nvc0_context(&nv->pipe); + struct nvc0_constbuf *cb = NULL; + int s; + + /* Go through all the constbuf binding points of this buffer and try to + * find one which contains the region to be updated. + */ + for (s = 0; s < 6 && !cb; s++) { + uint16_t bindings = res->cb_bindings[s]; + while (bindings) { + int i = ffs(i) - 1; + uint32_t cb_offset = nvc0->constbuf[s][i].offset; + + bindings &= ~(1 << i); + if (cb_offset <= offset && + cb_offset + nvc0->constbuf[s][i].size >= offset + words * 4) { + cb = &nvc0->constbuf[s][i]; + break; + } + } + } + + if (cb) { + nvc0_cb_bo_push(nv, res->bo, res->domain, + res->offset + cb->offset, cb->size, + offset - cb->offset, words, data); + } else { + nv->push_data(nv, res->bo, res->offset + offset, res->domain, + words * 4, data); + } +} + +void +nvc0_cb_bo_push(struct nouveau_context *nv, + struct nouveau_bo *bo, unsigned domain, + unsigned base, unsigned size, + unsigned offset, unsigned words, const uint32_t *data) +{ struct nouveau_pushbuf *push = nv->pushbuf; NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_count, 1); @@ -520,6 +557,9 @@ nvc0_cb_push(struct nouveau_context *nv, assert(!(offset & 3)); size = align(size, 0x100); + assert(offset < size); + assert(offset + words * 4 <= size); + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, size); PUSH_DATAh(push, bo->offset + base); -- 2.4.6
Ilia Mirkin
2015-Sep-09 17:47 UTC
[Nouveau] [PATCH] nvc0: keep track of cb bindings per buffer, use for upload settings
On Wed, Sep 9, 2015 at 3:17 AM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:> CB updates to bound buffers need to go through the CB_DATA endpoints, > otherwise the shader may not notice that the updates happened. > Furthermore, these updates have to go in to the same address as the > bound buffer, otherwise, again, the shader may not notice updates. > > So we keep track of all the places where a constbuf is bound, and > iterate over all of them when updating data. If a binding is found that > encompasses the region to be updated, then we use the settings of that > binding for the upload. Otherwise we upload as a regular data update. > > This fixes piglit 'arb_uniform_buffer_object-rendering offset' as well > as blurriness in Witcher2. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91890 > Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> > Cc: "11.0" <mesa-stable at lists.freedesktop.org> > --- > src/gallium/drivers/nouveau/nouveau_buffer.c | 4 +- > src/gallium/drivers/nouveau/nouveau_buffer.h | 2 + > src/gallium/drivers/nouveau/nouveau_context.h | 5 ++- > src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 8 ++-- > src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 2 + > .../drivers/nouveau/nvc0/nvc0_state_validate.c | 3 +- > src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c | 46 ++++++++++++++++++++-- > 7 files changed, 58 insertions(+), 12 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c > index 912b778..4937dae 100644 > --- a/src/gallium/drivers/nouveau/nouveau_buffer.c > +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c > @@ -206,8 +206,8 @@ nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx, > nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain, > tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size); > else > - if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb) > - nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0, > + if (nv->push_cb && can_cb) > + nv->push_cb(nv, buf, > base, size / 4, (const uint32_t *)data); > else > nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data); > diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h b/src/gallium/drivers/nouveau/nouveau_buffer.h > index 7e6a6cc..d45bf7a 100644 > --- a/src/gallium/drivers/nouveau/nouveau_buffer.h > +++ b/src/gallium/drivers/nouveau/nouveau_buffer.h > @@ -41,6 +41,8 @@ struct nv04_resource { > uint8_t status; > uint8_t domain; > > + uint16_t cb_bindings[6]; /* per-shader per-slot bindings */ > + > struct nouveau_fence *fence; > struct nouveau_fence *fence_wr; > > diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h > index 24deb7e..decb271 100644 > --- a/src/gallium/drivers/nouveau/nouveau_context.h > +++ b/src/gallium/drivers/nouveau/nouveau_context.h > @@ -6,6 +6,8 @@ > > #define NOUVEAU_MAX_SCRATCH_BUFS 4 > > +struct nv04_resource; > + > struct nouveau_context { > struct pipe_context pipe; > struct nouveau_screen *screen; > @@ -23,8 +25,7 @@ struct nouveau_context { > unsigned, const void *); > /* base, size refer to the whole constant buffer */ > void (*push_cb)(struct nouveau_context *, > - struct nouveau_bo *, unsigned domain, > - unsigned base, unsigned size, > + struct nv04_resource *, > unsigned offset, unsigned words, const uint32_t *); > > /* @return: @ref reduced by nr of references found in context */ > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > index 6ed79cf..30bee3a 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > @@ -299,10 +299,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv, > struct nouveau_bo *dst, unsigned offset, unsigned domain, > unsigned size, const void *data); > void > -nvc0_cb_push(struct nouveau_context *, > - struct nouveau_bo *bo, unsigned domain, > - unsigned base, unsigned size, > - unsigned offset, unsigned words, const uint32_t *data); > +nvc0_cb_bo_push(struct nouveau_context *, > + struct nouveau_bo *bo, unsigned domain, > + unsigned base, unsigned size, > + unsigned offset, unsigned words, const uint32_t *data); > > /* nvc0_vbo.c */ > void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *); > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c > index ee29912..c5bfd03 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c > @@ -831,6 +831,8 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, > } > nvc0->constbuf_dirty[s] |= 1 << i; > > + if (nvc0->constbuf[s][i].u.buf) > + nv04_resource(nvc0->constbuf[s][i].u.buf)->cb_bindings[s] &= ~(1 << i); > pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res); > > nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? true : false; > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c > index 47bd66d..aec0609 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c > @@ -440,7 +440,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) > BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); > PUSH_DATA (push, (0 << 4) | 1); > } > - nvc0_cb_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), > + nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), > base, nvc0->state.uniform_buffer_bound[s], > 0, (size + 3) / 4, > nvc0->constbuf[s][0].u.data); > @@ -458,6 +458,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) > BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD); > > nvc0->cb_dirty = 1; /* Force cache flush for UBO. */ > + res->cb_bindings[s] |= 1 << i; > } else { > BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); > PUSH_DATA (push, (i << 4) | 0); > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c > index 7cc5b4b..ff2ad19 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c > @@ -506,12 +506,49 @@ nvc0_miptree_transfer_unmap(struct pipe_context *pctx, > } > > /* This happens rather often with DTD9/st. */ > -void > +static void > nvc0_cb_push(struct nouveau_context *nv, > - struct nouveau_bo *bo, unsigned domain, > - unsigned base, unsigned size, > + struct nv04_resource *res, > unsigned offset, unsigned words, const uint32_t *data) > { > + struct nvc0_context *nvc0 = nvc0_context(&nv->pipe); > + struct nvc0_constbuf *cb = NULL; > + int s; > + > + /* Go through all the constbuf binding points of this buffer and try to > + * find one which contains the region to be updated. > + */ > + for (s = 0; s < 6 && !cb; s++) { > + uint16_t bindings = res->cb_bindings[s]; > + while (bindings) { > + int i = ffs(i) - 1;joi tells me that this works considerably better as ffs(bindings) - 1. Will do a piglit run on my GK208, see if Heaven still works, and then push. (That's right... cower in the extensiveness of my QA process...)> + uint32_t cb_offset = nvc0->constbuf[s][i].offset; > + > + bindings &= ~(1 << i); > + if (cb_offset <= offset && > + cb_offset + nvc0->constbuf[s][i].size >= offset + words * 4) { > + cb = &nvc0->constbuf[s][i]; > + break; > + } > + } > + } > + > + if (cb) { > + nvc0_cb_bo_push(nv, res->bo, res->domain, > + res->offset + cb->offset, cb->size, > + offset - cb->offset, words, data); > + } else { > + nv->push_data(nv, res->bo, res->offset + offset, res->domain, > + words * 4, data); > + } > +} > + > +void > +nvc0_cb_bo_push(struct nouveau_context *nv, > + struct nouveau_bo *bo, unsigned domain, > + unsigned base, unsigned size, > + unsigned offset, unsigned words, const uint32_t *data) > +{ > struct nouveau_pushbuf *push = nv->pushbuf; > > NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_count, 1); > @@ -520,6 +557,9 @@ nvc0_cb_push(struct nouveau_context *nv, > assert(!(offset & 3)); > size = align(size, 0x100); > > + assert(offset < size); > + assert(offset + words * 4 <= size); > + > BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); > PUSH_DATA (push, size); > PUSH_DATAh(push, bo->offset + base); > -- > 2.4.6 >