Xavier Chantry
2010-May-08 22:41 UTC
[Nouveau] [PATCH] nv50 : fix too long shader uploads by splitting them
This fixes fp-long-alu test which failed/stopped at depth 3077 with the following kernel errors : [drm] nouveau 0000:01:00.0: Allocating FIFO number 3 [drm] nouveau 0000:01:00.0: nouveau_channel_alloc: initialised FIFO 3 [drm] nouveau 0000:01:00.0: PGRAPH_DATA_ERROR - Ch 3/3 Class 0x502d Mthd 0x0838 Data 0x00000001:0x0001e0f8 [drm] nouveau 0000:01:00.0: PGRAPH_DATA_ERROR - INVALID_VALUE [drm] nouveau 0000:01:00.0: PGRAPH_TRAP - Ch 3/7 Class 0x8297 Mthd 0x15e0 Data 0x00000000:0x00000000 [drm] nouveau 0000:01:00.0: PGRAPH_TRAP_MP_EXEC - TP 0 MP 0: INVALID_OPCODE at 000000 warp 4, opcode ffffffff ffffffff [drm] nouveau 0000:01:00.0: PGRAPH_TRAP_MP_EXEC - TP 0 MP 1: INVALID_OPCODE at 000000 warp 1, opcode ffffffff ffffffff [drm] nouveau 0000:01:00.0: PGRAPH_TRAP_MP_EXEC - TP 1 MP 0: INVALID_OPCODE at 000000 warp 3, opcode ffffffff ffffffff [drm] nouveau 0000:01:00.0: PGRAPH_TRAP_MP_EXEC - TP 1 MP 1: INVALID_OPCODE at 000000 warp 4, opcode ffffffff ffffffff [drm] nouveau 0000:01:00.0: nouveau_channel_free: freeing fifo 3 Just one mystery, fp-long-alu test still fails if I split upload at 65536 (supposed to be the max value), without triggering any pgraph errors. Any lower value (256 aligned) works. Thanks to Christoph Bumiller and Marcin Ko?cielnicki for the help ! Signed-off-by: Xavier Chantry <chantry.xavier at gmail.com> --- src/gallium/drivers/nv50/nv50_program.c | 24 ++++++++++++++++++++---- src/gallium/drivers/nv50/nv50_transfer.c | 3 --- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 0156ff9..33981fa 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -4207,10 +4207,13 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) static void nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) { + struct nouveau_grobj *tesla = nv50->screen->tesla; struct nouveau_channel *chan = nv50->screen->base.channel; struct nv50_program_exec *e; uint32_t *up, i; boolean upload = FALSE; + unsigned offset; + int width; if (!p->bo) { nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100, @@ -4267,10 +4270,23 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) NOUVEAU_ERR("0x%08x\n", e->inst[1]); } #endif - nv50_upload_sifc(nv50, p->bo, 0, NOUVEAU_BO_VRAM, - NV50_2D_DST_FORMAT_R8_UNORM, 65536, 1, 262144, - up, NV50_2D_SIFC_FORMAT_R8_UNORM, 0, - 0, 0, p->exec_size * 4, 1, 1); + + /* SIFC_HEIGHT/SIFC_WIDTH of 65536 do not work, and are not reported + * as data error either. hw bug ? */ +#define SIFC_MAX_WIDTH (65536-256) + offset = 0; + width = p->exec_size * 4; + while(width > 0) { + nv50_upload_sifc(nv50, p->bo, offset, NOUVEAU_BO_VRAM, + NV50_2D_DST_FORMAT_R8_UNORM, 65536, 1, 262144, + &up[offset / 4], NV50_2D_SIFC_FORMAT_R8_UNORM, 0, + 0, 0, MIN2(SIFC_MAX_WIDTH, width), 1, 1); + width -= SIFC_MAX_WIDTH; + offset += SIFC_MAX_WIDTH; + break; + } + BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1); + OUT_RING (chan, 0); FREE(up); } diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index c5581a6..8b66c42 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -274,7 +274,6 @@ nv50_upload_sifc(struct nv50_context *nv50, { struct nouveau_channel *chan = nv50->screen->base.channel; struct nouveau_grobj *eng2d = nv50->screen->eng2d; - struct nouveau_grobj *tesla = nv50->screen->tesla; unsigned line_dwords = (w * cpp + 3) / 4; reloc |= NOUVEAU_BO_WR; @@ -347,6 +346,4 @@ nv50_upload_sifc(struct nv50_context *nv50, src = (uint8_t *) src + src_pitch; } - BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1); - OUT_RING (chan, 0); } -- 1.7.1
Xavier Chantry
2010-May-08 22:46 UTC
[Nouveau] [PATCH] nv50 : fix too long shader uploads by splitting them
On Sun, May 9, 2010 at 12:41 AM, Xavier Chantry <chantry.xavier at gmail.com> wrote:> This fixes fp-long-alu test which failed/stopped at depth 3077 with the > following kernel errors : > > [drm] nouveau 0000:01:00.0: Allocating FIFO number 3 > [drm] nouveau 0000:01:00.0: nouveau_channel_alloc: initialised FIFO 3 > [drm] nouveau 0000:01:00.0: PGRAPH_DATA_ERROR - Ch 3/3 Class 0x502d > Mthd 0x0838 Data 0x00000001:0x0001e0f8 > [drm] nouveau 0000:01:00.0: PGRAPH_DATA_ERROR - INVALID_VALUE > [drm] nouveau 0000:01:00.0: PGRAPH_TRAP - Ch 3/7 Class 0x8297 Mthd > 0x15e0 Data 0x00000000:0x00000000 > [drm] nouveau 0000:01:00.0: PGRAPH_TRAP_MP_EXEC - TP 0 MP 0: > INVALID_OPCODE at 000000 warp 4, opcode ffffffff ffffffff > [drm] nouveau 0000:01:00.0: PGRAPH_TRAP_MP_EXEC - TP 0 MP 1: > INVALID_OPCODE at 000000 warp 1, opcode ffffffff ffffffff > [drm] nouveau 0000:01:00.0: PGRAPH_TRAP_MP_EXEC - TP 1 MP 0: > INVALID_OPCODE at 000000 warp 3, opcode ffffffff ffffffff > [drm] nouveau 0000:01:00.0: PGRAPH_TRAP_MP_EXEC - TP 1 MP 1: > INVALID_OPCODE at 000000 warp 4, opcode ffffffff ffffffff > [drm] nouveau 0000:01:00.0: nouveau_channel_free: freeing fifo 3 > > Just one mystery, fp-long-alu test still fails if I split upload at 65536 > (supposed to be the max value), without triggering any pgraph errors. > Any lower value (256 aligned) works. > > Thanks to Christoph Bumiller and Marcin Ko?cielnicki for the help ! > > Signed-off-by: Xavier Chantry <chantry.xavier at gmail.com> > --- > ?src/gallium/drivers/nv50/nv50_program.c ?| ? 24 ++++++++++++++++++++---- > ?src/gallium/drivers/nv50/nv50_transfer.c | ? ?3 --- > ?2 files changed, 20 insertions(+), 7 deletions(-) > > diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c > index 0156ff9..33981fa 100644 > --- a/src/gallium/drivers/nv50/nv50_program.c > +++ b/src/gallium/drivers/nv50/nv50_program.c > @@ -4207,10 +4207,13 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) > ?static void > ?nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) > ?{ > + ? ? ? struct nouveau_grobj *tesla = nv50->screen->tesla; > ? ? ? ?struct nouveau_channel *chan = nv50->screen->base.channel; > ? ? ? ?struct nv50_program_exec *e; > ? ? ? ?uint32_t *up, i; > ? ? ? ?boolean upload = FALSE; > + ? ? ? unsigned offset; > + ? ? ? int width; > > ? ? ? ?if (!p->bo) { > ? ? ? ? ? ? ? ?nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100, > @@ -4267,10 +4270,23 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) > ? ? ? ? ? ? ? ? ? ? ? ?NOUVEAU_ERR("0x%08x\n", e->inst[1]); > ? ? ? ?} > ?#endif > - ? ? ? nv50_upload_sifc(nv50, p->bo, 0, NOUVEAU_BO_VRAM, > - ? ? ? ? ? ? ? ? ? ? ? ?NV50_2D_DST_FORMAT_R8_UNORM, 65536, 1, 262144, > - ? ? ? ? ? ? ? ? ? ? ? ?up, NV50_2D_SIFC_FORMAT_R8_UNORM, 0, > - ? ? ? ? ? ? ? ? ? ? ? ?0, 0, p->exec_size * 4, 1, 1); > + > + ? ? ? /* SIFC_HEIGHT/SIFC_WIDTH of 65536 do not work, and are not reported > + ? ? ? ?* as data error either. hw bug ? */ > +#define SIFC_MAX_WIDTH (65536-256) > + ? ? ? offset = 0; > + ? ? ? width = p->exec_size * 4; > + ? ? ? while(width > 0) { > + ? ? ? ? ? ? ? nv50_upload_sifc(nv50, p->bo, offset, NOUVEAU_BO_VRAM, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? NV50_2D_DST_FORMAT_R8_UNORM, 65536, 1, 262144, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &up[offset / 4], NV50_2D_SIFC_FORMAT_R8_UNORM, 0, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0, 0, MIN2(SIFC_MAX_WIDTH, width), 1, 1); > + ? ? ? ? ? ? ? width -= SIFC_MAX_WIDTH; > + ? ? ? ? ? ? ? offset += SIFC_MAX_WIDTH; > + ? ? ? ? ? ? ? break;damn, the break is a debug leftover, sorry. I will resubmit...
Xavier Chantry
2010-May-09 09:37 UTC
[Nouveau] [PATCH] nv50 : fix too long shader uploads by splitting them
This fixes fp-long-alu test which failed/stopped at depth 3077 with the following kernel errors : [drm] nouveau 0000:01:00.0: Allocating FIFO number 3 [drm] nouveau 0000:01:00.0: nouveau_channel_alloc: initialised FIFO 3 [drm] nouveau 0000:01:00.0: PGRAPH_DATA_ERROR - Ch 3/3 Class 0x502d Mthd 0x0838 Data 0x00000001:0x0001e0f8 [drm] nouveau 0000:01:00.0: PGRAPH_DATA_ERROR - INVALID_VALUE [drm] nouveau 0000:01:00.0: PGRAPH_TRAP - Ch 3/7 Class 0x8297 Mthd 0x15e0 Data 0x00000000:0x00000000 [drm] nouveau 0000:01:00.0: PGRAPH_TRAP_MP_EXEC - TP 0 MP 0: INVALID_OPCODE at 000000 warp 4, opcode ffffffff ffffffff [drm] nouveau 0000:01:00.0: PGRAPH_TRAP_MP_EXEC - TP 0 MP 1: INVALID_OPCODE at 000000 warp 1, opcode ffffffff ffffffff [drm] nouveau 0000:01:00.0: PGRAPH_TRAP_MP_EXEC - TP 1 MP 0: INVALID_OPCODE at 000000 warp 3, opcode ffffffff ffffffff [drm] nouveau 0000:01:00.0: PGRAPH_TRAP_MP_EXEC - TP 1 MP 1: INVALID_OPCODE at 000000 warp 4, opcode ffffffff ffffffff [drm] nouveau 0000:01:00.0: nouveau_channel_free: freeing fifo 3 Just one mystery, fp-long-alu test still fails if I split upload at 65536 (supposed to be the max value), without triggering any pgraph errors. Any lower value (256 aligned) works. Thanks to Christoph Bumiller and Marcin Ko?cielnicki for the help ! Signed-off-by: Xavier Chantry <chantry.xavier at gmail.com> --- src/gallium/drivers/nv50/nv50_program.c | 23 +++++++++++++++++++---- src/gallium/drivers/nv50/nv50_transfer.c | 3 --- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 0156ff9..9a41f44 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -4207,10 +4207,13 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) static void nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) { + struct nouveau_grobj *tesla = nv50->screen->tesla; struct nouveau_channel *chan = nv50->screen->base.channel; struct nv50_program_exec *e; uint32_t *up, i; boolean upload = FALSE; + unsigned offset; + int width; if (!p->bo) { nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100, @@ -4267,10 +4270,22 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) NOUVEAU_ERR("0x%08x\n", e->inst[1]); } #endif - nv50_upload_sifc(nv50, p->bo, 0, NOUVEAU_BO_VRAM, - NV50_2D_DST_FORMAT_R8_UNORM, 65536, 1, 262144, - up, NV50_2D_SIFC_FORMAT_R8_UNORM, 0, - 0, 0, p->exec_size * 4, 1, 1); + + /* SIFC_HEIGHT/SIFC_WIDTH of 65536 do not work, and are not reported + * as data error either. hw bug ? */ +#define SIFC_MAX_WIDTH (65536-256) + offset = 0; + width = p->exec_size * 4; + while(width > 0) { + nv50_upload_sifc(nv50, p->bo, offset, NOUVEAU_BO_VRAM, + NV50_2D_DST_FORMAT_R8_UNORM, 65536, 1, 262144, + &up[offset / 4], NV50_2D_SIFC_FORMAT_R8_UNORM, 0, + 0, 0, MIN2(SIFC_MAX_WIDTH, width), 1, 1); + width -= SIFC_MAX_WIDTH; + offset += SIFC_MAX_WIDTH; + } + BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1); + OUT_RING (chan, 0); FREE(up); } diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index c5581a6..8b66c42 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -274,7 +274,6 @@ nv50_upload_sifc(struct nv50_context *nv50, { struct nouveau_channel *chan = nv50->screen->base.channel; struct nouveau_grobj *eng2d = nv50->screen->eng2d; - struct nouveau_grobj *tesla = nv50->screen->tesla; unsigned line_dwords = (w * cpp + 3) / 4; reloc |= NOUVEAU_BO_WR; @@ -347,6 +346,4 @@ nv50_upload_sifc(struct nv50_context *nv50, src = (uint8_t *) src + src_pitch; } - BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1); - OUT_RING (chan, 0); } -- 1.7.1