Christoph Bumiller
2009-Jun-21 16:00 UTC
[PATCH] nv50: select shader program through VP/FP_START_ID
Instead of specifying the program buffer address on every program change, just set an offset in a shared program buffer, like the binary driver does. --- src/gallium/drivers/nv50/nv50_context.h | 6 ++ src/gallium/drivers/nv50/nv50_program.c | 78 +++++++++++------------------ src/gallium/drivers/nv50/nv50_program.h | 1 + src/gallium/drivers/nv50/nv50_screen.c | 27 ++++++++++- src/gallium/drivers/nv50/nv50_screen.h | 2 + src/gallium/drivers/nv50/nv50_transfer.c | 12 +++++ 6 files changed, 77 insertions(+), 49 deletions(-) diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 9b8cc4d..44463d6 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -198,4 +198,10 @@ extern boolean nv50_state_validate(struct nv50_context *nv50); /* nv50_tex.c */ extern void nv50_tex_validate(struct nv50_context *); +/* nv50_transfer.c */ +extern void nv50_transfer_gart_vram(struct pipe_screen *pscreen, + struct nouveau_bo *dst, unsigned dst_off, + struct nouveau_bo *src, unsigned src_off, + unsigned size); + #endif diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 32d1bf8..4ef7748 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -2289,19 +2289,22 @@ static void nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) { struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program_exec *e; - struct nouveau_stateobj *so; - const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; - unsigned start, count, *up, *ptr; + struct nouveau_resource *heap; + struct nouveau_bo *code; + int ret; + unsigned size, *ptr; boolean upload = FALSE; if (!p->bo) { - nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100, - p->exec_size * 4, &p->bo); + nouveau_bo_new(chan->device, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, + 0x100, p->exec_size * 4, &p->bo); upload = TRUE; } + heap = nv50->screen->code_heap[p->type]; + code = nv50->screen->sprogbuf_code[p->type]; + if ((p->data[0] && p->data[0]->start != p->data_start[0]) || (p->data[1] && p->data[1]->start != p->data_start[1])) { for (e = p->exec_head; e; e = e->next) { @@ -2338,44 +2341,32 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) } #endif - up = ptr = MALLOC(p->exec_size * 4); + ret = nouveau_bo_map(p->bo, NOUVEAU_BO_WR); + if (ret) { + NOUVEAU_ERR("Failed to map program upload buffer (%i).\n",ret); + abort(); + } + + ptr = (unsigned *)p->bo->map; for (e = p->exec_head; e; e = e->next) { *(ptr++) = e->inst[0]; if (is_long(e)) *(ptr++) = e->inst[1]; } - so = so_new(4,2); - so_method(so, nv50->screen->tesla, 0x1280, 3); - so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_LOW, 0, 0); - so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4)); - - start = 0; count = p->exec_size; - while (count) { - struct nouveau_channel *chan = nv50->screen->base.channel; - unsigned nr; - - so_emit(chan, so); + nouveau_bo_unmap(p->bo); - nr = MIN2(count, 2047); - nr = MIN2(chan->pushbuf->remaining, nr); - if (chan->pushbuf->remaining < (nr + 3)) { - FIRE_RING(chan); - continue; + size = align(p->exec_size * 4, 0x100); + if (!p->code) { + ret = nouveau_resource_alloc(heap, size, p, &p->code); + if (ret) { + NOUVEAU_ERR("Program VRAM buffer is full.\n"); + abort(); } - - BEGIN_RING(chan, tesla, 0x0f00, 1); - OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD); - BEGIN_RING(chan, tesla, 0x40000f04, nr); - OUT_RINGp (chan, up + start, nr); - - start += nr; - count -= nr; } - FREE(up); - so_ref(NULL, &so); + nv50_transfer_gart_vram(&nv50->screen->base.base, + code, p->code->start, p->bo, 0, size); } void @@ -2394,12 +2385,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(13, 2); - so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); - so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_LOW, 0, 0); + so = so_new(10, 0); so_method(so, tesla, 0x1650, 2); so_data (so, p->cfg.vp.attr[0]); so_data (so, p->cfg.vp.attr[1]); @@ -2409,7 +2395,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) so_data (so, p->cfg.high_result); //8); so_data (so, p->cfg.high_temp); so_method(so, tesla, 0x140c, 1); - so_data (so, 0); /* program start offset */ + so_data (so, p->code->start); so_ref(so, &nv50->state.vertprog); so_ref(NULL, &so); } @@ -2431,12 +2417,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(64, 2); - so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); - so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_LOW, 0, 0); + so = so_new(32, 0); so_method(so, tesla, 0x1904, 4); so_data (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */ so_data (so, 0x00000004); @@ -2455,7 +2436,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_method(so, tesla, 0x196c, 1); so_data (so, p->cfg.fp.regs[3]); so_method(so, tesla, 0x1414, 1); - so_data (so, 0); /* program start offset */ + so_data (so, p->code->start); so_ref(so, &nv50->state.fragprog); so_ref(NULL, &so); } @@ -2476,6 +2457,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) nouveau_resource_free(&p->data[0]); nouveau_resource_free(&p->data[1]); + nouveau_resource_free(&p->code); p->translated = 0; } diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 096e047..ed3f67b 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -27,6 +27,7 @@ struct nv50_program { struct nouveau_resource *data[2]; unsigned data_start[2]; + struct nouveau_resource *code; struct nouveau_bo *bo; float *immd; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index f42b784..954b67a 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -266,7 +266,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref(NULL, &so); /* Static tesla init */ - so = so_new(256, 20); + so = so_new(256, 24); so_method(so, screen->tesla, 0x1558, 1); so_data (so, 1); @@ -290,6 +290,31 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_method(so, screen->tesla, 0x16b8, 1); so_data (so, 8); + /* create VRAM buffers for shader programs */ + for (i = 0; i < 2; i++) { + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0x100, 0x10000, + &screen->sprogbuf_code[i]); + if (ret || nouveau_resource_init( + &screen->code_heap[i], 0, 0x10000)) { + NOUVEAU_ERR("Failed to initialize program buffers."); + nv50_screen_destroy(pscreen); + return NULL; + } + } + + /* set program buffer addresses */ + so_method(so, screen->tesla, NV50TCL_VP_ADDRESS_HIGH, 2); + so_reloc (so, screen->sprogbuf_code[0], 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->sprogbuf_code[0], 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + + so_method(so, screen->tesla, NV50TCL_FP_ADDRESS_HIGH, 2); + so_reloc (so, screen->sprogbuf_code[1], 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->sprogbuf_code[1], 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + /* constant buffers for immediates and VP/FP parameters */ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4, &screen->constbuf_misc[0]); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 61e24a5..2481492 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -17,9 +17,11 @@ struct nv50_screen { struct nouveau_bo *constbuf_misc[1]; struct nouveau_bo *constbuf_parm[2]; + struct nouveau_bo *sprogbuf_code[2]; struct nouveau_resource *immd_heap[1]; struct nouveau_resource *parm_heap[2]; + struct nouveau_resource *code_heap[2]; struct nouveau_bo *tic; struct nouveau_bo *tsc; diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index d0b7f0b..f7f5858 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -99,6 +99,18 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct nouveau_bo *src_bo, } } +void +nv50_transfer_gart_vram(struct pipe_screen *pscreen, + struct nouveau_bo *dst, unsigned dst_offset, + struct nouveau_bo *src, unsigned src_offset, + unsigned size) +{ + nv50_transfer_rect_m2mf(pscreen, + src, src_offset, size, 0, 0, 0, 0, + dst, dst_offset, size, 0, 0, 0, 0, + 1, size, 1, NOUVEAU_BO_GART, NOUVEAU_BO_VRAM); +} + static struct pipe_transfer * nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice, -- 1.6.0.6 --------------090503050107050804030002 Content-Type: text/plain; name="0004-nv50-use-ctor_reg-to-initialize-nv50_regs.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="0004-nv50-use-ctor_reg-to-initialize-nv50_regs.patch"