Hans de Goede
2016-Mar-17 16:07 UTC
[Nouveau] [PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
Some of the lowering steps we currently do for FILE_MEMORY_GLOBAL only apply to buffers, making it impossible to use FILE_MEMORY_GLOBAL for OpenCL global buffers. This commits changes the buffer code to use FILE_MEMORY_BUFFER at the ir_from_tgsi and lowering steps, freeing use of FILE_MEMORY_GLOBAL for use with OpenCL global buffers. Note that after lowering buffer accesses use the FILE_MEMORY_GLOBAL register file. Tested with piglet on a gk107, before this patch: ./piglit run -o shader -t '.*arb_shader_storage_buffer_object.*' results/shader [9/9] pass: 9 / after: ./piglit run -o shader -t '.*arb_shader_storage_buffer_object.*' results/shader [9/9] pass: 9 / Signed-off-by: Hans de Goede <hdegoede at redhat.com> --- Changes in v2: -New patch in v2 of patch-set to re-enable support for global opencl buffers --- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 +- src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 8 +++++--- src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 5 ++++- src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 + 6 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 7b0eb2f..5141fc6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -332,6 +332,7 @@ enum DataFile FILE_MEMORY_CONST, FILE_SHADER_INPUT, FILE_SHADER_OUTPUT, + FILE_MEMORY_BUFFER, FILE_MEMORY_GLOBAL, FILE_MEMORY_SHARED, FILE_MEMORY_LOCAL, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index baa2e30..7ae0cb2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -373,7 +373,7 @@ static nv50_ir::DataFile translateFile(uint file) case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE; case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; - case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL; + case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER; case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL; case TGSI_FILE_SAMPLER: case TGSI_FILE_NULL: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index d0936d8..628deb7 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1141,13 +1141,14 @@ NVC0LoweringPass::handleATOM(Instruction *atom) handleSharedATOM(atom); return true; default: - assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); assert(base->reg.size == 8); if (ptr) base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); assert(base->reg.size == 8); atom->setIndirect(0, 0, base); + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; return true; } base @@ -1963,7 +1964,7 @@ NVC0LoweringPass::visit(Instruction *i) } else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) { assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL); i->op = OP_VFETCH; - } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { + } else if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { Value *ind = i->getIndirect(0, 1); Value *ptr = loadResInfo64(ind, i->getSrc(0)->reg.fileIndex * 16); // XXX come up with a way not to do this for EVERY little access but @@ -1978,6 +1979,7 @@ NVC0LoweringPass::visit(Instruction *i) } i->setIndirect(0, 1, NULL); i->setIndirect(0, 0, ptr); + i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length); i->setPredicate(CC_NOT_P, pred); if (i->defExists(0)) { @@ -1987,7 +1989,7 @@ NVC0LoweringPass::visit(Instruction *i) break; case OP_ATOM: { - const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL; + const bool cctl = i->src(0).getFile() == FILE_MEMORY_BUFFER; handleATOM(i); handleCasExch(i, cctl); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index cfa85ec..870b36e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -455,6 +455,7 @@ int Symbol::print(char *buf, size_t size, case FILE_MEMORY_CONST: c = 'c'; break; case FILE_SHADER_INPUT: c = 'a'; break; case FILE_SHADER_OUTPUT: c = 'o'; break; + case FILE_MEMORY_BUFFER: c = 'b'; break; // Only used before lowering case FILE_MEMORY_GLOBAL: c = 'g'; break; case FILE_MEMORY_SHARED: c = 's'; break; case FILE_MEMORY_LOCAL: c = 'l'; break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp index 2c4d7f5..2af1715 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -207,6 +207,7 @@ TargetNV50::getFileSize(DataFile file) const case FILE_MEMORY_CONST: return 65536; case FILE_SHADER_INPUT: return 0x200; case FILE_SHADER_OUTPUT: return 0x200; + case FILE_MEMORY_BUFFER: return 0xffffffff; case FILE_MEMORY_GLOBAL: return 0xffffffff; case FILE_MEMORY_SHARED: return 16 << 10; case FILE_MEMORY_LOCAL: return 48 << 10; @@ -406,7 +407,8 @@ TargetNV50::isAccessSupported(DataFile file, DataType ty) const if (ty == TYPE_B96 || ty == TYPE_NONE) return false; if (typeSizeof(ty) > 4) - return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL); + return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL) || + (file == FILE_MEMORY_BUFFER); return true; } @@ -509,6 +511,7 @@ int TargetNV50::getLatency(const Instruction *i) const switch (i->src(0).getFile()) { case FILE_MEMORY_LOCAL: case FILE_MEMORY_GLOBAL: + case FILE_MEMORY_BUFFER: return 100; // really 400 to 800 default: return 22; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp index a03afa8..9e1e7bf 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -248,6 +248,7 @@ TargetNVC0::getFileSize(DataFile file) const case FILE_MEMORY_CONST: return 65536; case FILE_SHADER_INPUT: return 0x400; case FILE_SHADER_OUTPUT: return 0x400; + case FILE_MEMORY_BUFFER: return 0xffffffff; case FILE_MEMORY_GLOBAL: return 0xffffffff; case FILE_MEMORY_SHARED: return 16 << 10; case FILE_MEMORY_LOCAL: return 48 << 10; -- 2.7.3
Hans de Goede
2016-Mar-17 16:07 UTC
[Nouveau] [PATCH mesa v2 2/2] nouveau: codegen: Add support for OpenCL global memory buffers
Add support for OpenCL global memory buffers, note this has only been tested with regular load and stores and likely needs more work for e.g. atomic ops. Signed-off-by: Hans de Goede <hdegoede at redhat.com> --- Changes in v2: -Redo on top of new "Use FILE_MEMORY_BUFFER for buffers" patch --- src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 7ae0cb2..a7a1d54 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1286,7 +1286,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (dst.isIndirect(0)) indirectTempArrays.insert(dst.getArrayId()); } else - if (dst.getFile() == TGSI_FILE_BUFFER) { + if (dst.getFile() == TGSI_FILE_BUFFER || + (dst.getFile() == TGSI_FILE_MEMORY && + memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { info->io.globalAccess |= 0x2; } } @@ -1297,7 +1299,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (src.isIndirect(0)) indirectTempArrays.insert(src.getArrayId()); } else - if (src.getFile() == TGSI_FILE_BUFFER) { + if (src.getFile() == TGSI_FILE_BUFFER || + (src.getFile() == TGSI_FILE_MEMORY && + memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? 0x1 : 0x2; } else @@ -1531,6 +1535,10 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) if (tgsiFile == TGSI_FILE_MEMORY) { switch (code->memoryFiles[fileIdx].mem_type) { + case TGSI_MEMORY_TYPE_GLOBAL: + /* No-op this is the default for TGSI_FILE_MEMORY */ + sym->setFile(FILE_MEMORY_GLOBAL); + break; case TGSI_MEMORY_TYPE_SHARED: sym->setFile(FILE_MEMORY_SHARED); break; -- 2.7.3
Hans de Goede
2016-Mar-21 11:26 UTC
[Nouveau] [Mesa-dev, v2, 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
Hi, On 17-03-16 17:07, Hans de Goede wrote:> Some of the lowering steps we currently do for FILE_MEMORY_GLOBAL only > apply to buffers, making it impossible to use FILE_MEMORY_GLOBAL for > OpenCL global buffers. > > This commits changes the buffer code to use FILE_MEMORY_BUFFER at the > ir_from_tgsi and lowering steps, freeing use of FILE_MEMORY_GLOBAL > for use with OpenCL global buffers. > > Note that after lowering buffer accesses use the FILE_MEMORY_GLOBAL > register file. > > Tested with piglet on a gk107, before this patch: > ./piglit run -o shader -t '.*arb_shader_storage_buffer_object.*' results/shader > [9/9] pass: 9 / > after: > ./piglit run -o shader -t '.*arb_shader_storage_buffer_object.*' results/shader > [9/9] pass: 9 / > > Signed-off-by: Hans de Goede <hdegoede at redhat.com>Ping, any chance I can get a review of these 2 patches ? Thanks, Hans> --- > Changes in v2: > -New patch in v2 of patch-set to re-enable support for global opencl buffers > --- > src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + > src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 +- > src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 8 +++++--- > src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + > src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 5 ++++- > src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 + > 6 files changed, 13 insertions(+), 5 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h > index 7b0eb2f..5141fc6 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h > @@ -332,6 +332,7 @@ enum DataFile > FILE_MEMORY_CONST, > FILE_SHADER_INPUT, > FILE_SHADER_OUTPUT, > + FILE_MEMORY_BUFFER, > FILE_MEMORY_GLOBAL, > FILE_MEMORY_SHARED, > FILE_MEMORY_LOCAL, > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > index baa2e30..7ae0cb2 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > @@ -373,7 +373,7 @@ static nv50_ir::DataFile translateFile(uint file) > case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE; > case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; > case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; > - case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL; > + case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER; > case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL; > case TGSI_FILE_SAMPLER: > case TGSI_FILE_NULL: > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > index d0936d8..628deb7 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > @@ -1141,13 +1141,14 @@ NVC0LoweringPass::handleATOM(Instruction *atom) > handleSharedATOM(atom); > return true; > default: > - assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); > + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); > base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); > assert(base->reg.size == 8); > if (ptr) > base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); > assert(base->reg.size == 8); > atom->setIndirect(0, 0, base); > + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; > return true; > } > base > @@ -1963,7 +1964,7 @@ NVC0LoweringPass::visit(Instruction *i) > } else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) { > assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL); > i->op = OP_VFETCH; > - } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { > + } else if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { > Value *ind = i->getIndirect(0, 1); > Value *ptr = loadResInfo64(ind, i->getSrc(0)->reg.fileIndex * 16); > // XXX come up with a way not to do this for EVERY little access but > @@ -1978,6 +1979,7 @@ NVC0LoweringPass::visit(Instruction *i) > } > i->setIndirect(0, 1, NULL); > i->setIndirect(0, 0, ptr); > + i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; > bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length); > i->setPredicate(CC_NOT_P, pred); > if (i->defExists(0)) { > @@ -1987,7 +1989,7 @@ NVC0LoweringPass::visit(Instruction *i) > break; > case OP_ATOM: > { > - const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL; > + const bool cctl = i->src(0).getFile() == FILE_MEMORY_BUFFER; > handleATOM(i); > handleCasExch(i, cctl); > } > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > index cfa85ec..870b36e 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > @@ -455,6 +455,7 @@ int Symbol::print(char *buf, size_t size, > case FILE_MEMORY_CONST: c = 'c'; break; > case FILE_SHADER_INPUT: c = 'a'; break; > case FILE_SHADER_OUTPUT: c = 'o'; break; > + case FILE_MEMORY_BUFFER: c = 'b'; break; // Only used before lowering > case FILE_MEMORY_GLOBAL: c = 'g'; break; > case FILE_MEMORY_SHARED: c = 's'; break; > case FILE_MEMORY_LOCAL: c = 'l'; break; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > index 2c4d7f5..2af1715 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > @@ -207,6 +207,7 @@ TargetNV50::getFileSize(DataFile file) const > case FILE_MEMORY_CONST: return 65536; > case FILE_SHADER_INPUT: return 0x200; > case FILE_SHADER_OUTPUT: return 0x200; > + case FILE_MEMORY_BUFFER: return 0xffffffff; > case FILE_MEMORY_GLOBAL: return 0xffffffff; > case FILE_MEMORY_SHARED: return 16 << 10; > case FILE_MEMORY_LOCAL: return 48 << 10; > @@ -406,7 +407,8 @@ TargetNV50::isAccessSupported(DataFile file, DataType ty) const > if (ty == TYPE_B96 || ty == TYPE_NONE) > return false; > if (typeSizeof(ty) > 4) > - return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL); > + return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL) || > + (file == FILE_MEMORY_BUFFER); > return true; > } > > @@ -509,6 +511,7 @@ int TargetNV50::getLatency(const Instruction *i) const > switch (i->src(0).getFile()) { > case FILE_MEMORY_LOCAL: > case FILE_MEMORY_GLOBAL: > + case FILE_MEMORY_BUFFER: > return 100; // really 400 to 800 > default: > return 22; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > index a03afa8..9e1e7bf 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > @@ -248,6 +248,7 @@ TargetNVC0::getFileSize(DataFile file) const > case FILE_MEMORY_CONST: return 65536; > case FILE_SHADER_INPUT: return 0x400; > case FILE_SHADER_OUTPUT: return 0x400; > + case FILE_MEMORY_BUFFER: return 0xffffffff; > case FILE_MEMORY_GLOBAL: return 0xffffffff; > case FILE_MEMORY_SHARED: return 16 << 10; > case FILE_MEMORY_LOCAL: return 48 << 10; >
Samuel Pitoiset
2016-Mar-23 22:10 UTC
[Nouveau] [PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
Are you sure this won't break compute shaders on fermi? Could you please double-check that? One minor comment below. On 03/17/2016 05:07 PM, Hans de Goede wrote:> Some of the lowering steps we currently do for FILE_MEMORY_GLOBAL only > apply to buffers, making it impossible to use FILE_MEMORY_GLOBAL for > OpenCL global buffers. > > This commits changes the buffer code to use FILE_MEMORY_BUFFER at the > ir_from_tgsi and lowering steps, freeing use of FILE_MEMORY_GLOBAL > for use with OpenCL global buffers. > > Note that after lowering buffer accesses use the FILE_MEMORY_GLOBAL > register file. > > Tested with piglet on a gk107, before this patch: > ./piglit run -o shader -t '.*arb_shader_storage_buffer_object.*' results/shader > [9/9] pass: 9 / > after: > ./piglit run -o shader -t '.*arb_shader_storage_buffer_object.*' results/shader > [9/9] pass: 9 / > > Signed-off-by: Hans de Goede <hdegoede at redhat.com> > --- > Changes in v2: > -New patch in v2 of patch-set to re-enable support for global opencl buffers > --- > src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + > src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 +- > src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 8 +++++--- > src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + > src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 5 ++++- > src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 + > 6 files changed, 13 insertions(+), 5 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h > index 7b0eb2f..5141fc6 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h > @@ -332,6 +332,7 @@ enum DataFile > FILE_MEMORY_CONST, > FILE_SHADER_INPUT, > FILE_SHADER_OUTPUT, > + FILE_MEMORY_BUFFER, > FILE_MEMORY_GLOBAL, > FILE_MEMORY_SHARED, > FILE_MEMORY_LOCAL, > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > index baa2e30..7ae0cb2 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > @@ -373,7 +373,7 @@ static nv50_ir::DataFile translateFile(uint file) > case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE; > case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; > case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; > - case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL; > + case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER; > case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL; > case TGSI_FILE_SAMPLER: > case TGSI_FILE_NULL: > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > index d0936d8..628deb7 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > @@ -1141,13 +1141,14 @@ NVC0LoweringPass::handleATOM(Instruction *atom) > handleSharedATOM(atom); > return true; > default: > - assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); > + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); > base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); > assert(base->reg.size == 8); > if (ptr) > base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); > assert(base->reg.size == 8); > atom->setIndirect(0, 0, base); > + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; > return true; > } > base > @@ -1963,7 +1964,7 @@ NVC0LoweringPass::visit(Instruction *i) > } else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) { > assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL); > i->op = OP_VFETCH; > - } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { > + } else if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { > Value *ind = i->getIndirect(0, 1); > Value *ptr = loadResInfo64(ind, i->getSrc(0)->reg.fileIndex * 16); > // XXX come up with a way not to do this for EVERY little access but > @@ -1978,6 +1979,7 @@ NVC0LoweringPass::visit(Instruction *i) > } > i->setIndirect(0, 1, NULL); > i->setIndirect(0, 0, ptr); > + i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; > bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length); > i->setPredicate(CC_NOT_P, pred); > if (i->defExists(0)) { > @@ -1987,7 +1989,7 @@ NVC0LoweringPass::visit(Instruction *i) > break; > case OP_ATOM: > { > - const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL; > + const bool cctl = i->src(0).getFile() == FILE_MEMORY_BUFFER; > handleATOM(i); > handleCasExch(i, cctl); > } > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > index cfa85ec..870b36e 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > @@ -455,6 +455,7 @@ int Symbol::print(char *buf, size_t size, > case FILE_MEMORY_CONST: c = 'c'; break; > case FILE_SHADER_INPUT: c = 'a'; break; > case FILE_SHADER_OUTPUT: c = 'o'; break; > + case FILE_MEMORY_BUFFER: c = 'b'; break; // Only used before loweringCould you please show me the output of NV50_PROG_DEBUG=255 with a test which uses this file type? I'm not sure if using b[] is better than g[] actually.> case FILE_MEMORY_GLOBAL: c = 'g'; break; > case FILE_MEMORY_SHARED: c = 's'; break; > case FILE_MEMORY_LOCAL: c = 'l'; break; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > index 2c4d7f5..2af1715 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > @@ -207,6 +207,7 @@ TargetNV50::getFileSize(DataFile file) const > case FILE_MEMORY_CONST: return 65536; > case FILE_SHADER_INPUT: return 0x200; > case FILE_SHADER_OUTPUT: return 0x200; > + case FILE_MEMORY_BUFFER: return 0xffffffff; > case FILE_MEMORY_GLOBAL: return 0xffffffff; > case FILE_MEMORY_SHARED: return 16 << 10; > case FILE_MEMORY_LOCAL: return 48 << 10; > @@ -406,7 +407,8 @@ TargetNV50::isAccessSupported(DataFile file, DataType ty) const > if (ty == TYPE_B96 || ty == TYPE_NONE) > return false; > if (typeSizeof(ty) > 4) > - return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL); > + return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL) || > + (file == FILE_MEMORY_BUFFER); > return true; > } > > @@ -509,6 +511,7 @@ int TargetNV50::getLatency(const Instruction *i) const > switch (i->src(0).getFile()) { > case FILE_MEMORY_LOCAL: > case FILE_MEMORY_GLOBAL: > + case FILE_MEMORY_BUFFER: > return 100; // really 400 to 800 > default: > return 22; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > index a03afa8..9e1e7bf 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > @@ -248,6 +248,7 @@ TargetNVC0::getFileSize(DataFile file) const > case FILE_MEMORY_CONST: return 65536; > case FILE_SHADER_INPUT: return 0x400; > case FILE_SHADER_OUTPUT: return 0x400; > + case FILE_MEMORY_BUFFER: return 0xffffffff; > case FILE_MEMORY_GLOBAL: return 0xffffffff; > case FILE_MEMORY_SHARED: return 16 << 10; > case FILE_MEMORY_LOCAL: return 48 << 10; >
Hans de Goede
2016-Apr-08 10:17 UTC
[Nouveau] [PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
Hi, On 23-03-16 23:10, Samuel Pitoiset wrote:> Are you sure this won't break compute shaders on fermi? > Could you please double-check that?I just checked: lspci: 01:00.0 VGA compatible controller: NVIDIA Corporation GF119 [GeForce GT 610] (rev a1) Before this patch-set: [hans at plank piglit]$ ./piglit run -o shader -t '.*arb_shader_storage_buffer_object.*' results/shader [9/9] pass: 9 / After this patch-set: [hans at plank piglit]$ ./piglit run -o shader -t '.*arb_shader_storage_buffer_object.*' results/shader [9/9] pass: 9 /> One minor comment below. > > On 03/17/2016 05:07 PM, Hans de Goede wrote: >> Some of the lowering steps we currently do for FILE_MEMORY_GLOBAL only >> apply to buffers, making it impossible to use FILE_MEMORY_GLOBAL for >> OpenCL global buffers. >> >> This commits changes the buffer code to use FILE_MEMORY_BUFFER at the >> ir_from_tgsi and lowering steps, freeing use of FILE_MEMORY_GLOBAL >> for use with OpenCL global buffers. >> >> Note that after lowering buffer accesses use the FILE_MEMORY_GLOBAL >> register file. >> >> Tested with piglet on a gk107, before this patch: >> ./piglit run -o shader -t '.*arb_shader_storage_buffer_object.*' results/shader >> [9/9] pass: 9 / >> after: >> ./piglit run -o shader -t '.*arb_shader_storage_buffer_object.*' results/shader >> [9/9] pass: 9 / >> >> Signed-off-by: Hans de Goede <hdegoede at redhat.com> >> --- >> Changes in v2: >> -New patch in v2 of patch-set to re-enable support for global opencl buffers >> --- >> src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + >> src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 +- >> src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 8 +++++--- >> src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + >> src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 5 ++++- >> src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 + >> 6 files changed, 13 insertions(+), 5 deletions(-) >> >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h >> index 7b0eb2f..5141fc6 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h >> @@ -332,6 +332,7 @@ enum DataFile >> FILE_MEMORY_CONST, >> FILE_SHADER_INPUT, >> FILE_SHADER_OUTPUT, >> + FILE_MEMORY_BUFFER, >> FILE_MEMORY_GLOBAL, >> FILE_MEMORY_SHARED, >> FILE_MEMORY_LOCAL, >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >> index baa2e30..7ae0cb2 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >> @@ -373,7 +373,7 @@ static nv50_ir::DataFile translateFile(uint file) >> case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE; >> case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; >> case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; >> - case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL; >> + case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER; >> case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL; >> case TGSI_FILE_SAMPLER: >> case TGSI_FILE_NULL: >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >> index d0936d8..628deb7 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >> @@ -1141,13 +1141,14 @@ NVC0LoweringPass::handleATOM(Instruction *atom) >> handleSharedATOM(atom); >> return true; >> default: >> - assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); >> + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER); >> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); >> assert(base->reg.size == 8); >> if (ptr) >> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr); >> assert(base->reg.size == 8); >> atom->setIndirect(0, 0, base); >> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; >> return true; >> } >> base >> @@ -1963,7 +1964,7 @@ NVC0LoweringPass::visit(Instruction *i) >> } else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) { >> assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL); >> i->op = OP_VFETCH; >> - } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { >> + } else if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { >> Value *ind = i->getIndirect(0, 1); >> Value *ptr = loadResInfo64(ind, i->getSrc(0)->reg.fileIndex * 16); >> // XXX come up with a way not to do this for EVERY little access but >> @@ -1978,6 +1979,7 @@ NVC0LoweringPass::visit(Instruction *i) >> } >> i->setIndirect(0, 1, NULL); >> i->setIndirect(0, 0, ptr); >> + i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; >> bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length); >> i->setPredicate(CC_NOT_P, pred); >> if (i->defExists(0)) { >> @@ -1987,7 +1989,7 @@ NVC0LoweringPass::visit(Instruction *i) >> break; >> case OP_ATOM: >> { >> - const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL; >> + const bool cctl = i->src(0).getFile() == FILE_MEMORY_BUFFER; >> handleATOM(i); >> handleCasExch(i, cctl); >> } >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >> index cfa85ec..870b36e 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >> @@ -455,6 +455,7 @@ int Symbol::print(char *buf, size_t size, >> case FILE_MEMORY_CONST: c = 'c'; break; >> case FILE_SHADER_INPUT: c = 'a'; break; >> case FILE_SHADER_OUTPUT: c = 'o'; break; >> + case FILE_MEMORY_BUFFER: c = 'b'; break; // Only used before lowering > > Could you please show me the output of NV50_PROG_DEBUG=255 with a test which uses this file type? I'm not sure if using b[] is better than g[] actually.NV50_PROG_DEBUG=255 bin/arb_shader_storage_buffer_object-rendering ... MAIN:-1 () BB:0 (52 instructions) - df = { } -> BB:1 (tree) 0: mov u32 %r1 0x00000000 (0) 1: ld u32 %r0 b[0x0] (0) 2: presin f32 %r3 %r0 (0) 3: cos f32 %r3 %r3 (0) 4: mov u32 %r2 %r3 (0) ... And after the first lowering step: MAIN:-1 () BB:0 (77 instructions) - df = { } -> BB:1 (tree) 0: mov u32 %r56 0x00000000 (0) 1: ld u64 %r57d c15[0x300] (0) 2: mov u32 %r58 0x00000004 (0) 3: ld u32 %r59 c15[0x308] (0) 4: set u8 %p60 gt u32 %r58 %r59 (0) 5: mov u32 %r61 0x00000000 (0) 6: not %p60 ld u32 %r62 g[%r57d+0x0] (0) ... Note how the 'b' printing is only used before the buffer access is lowered to a global access, so this seems to be the right thing todo. Regards, Hans> >> case FILE_MEMORY_GLOBAL: c = 'g'; break; >> case FILE_MEMORY_SHARED: c = 's'; break; >> case FILE_MEMORY_LOCAL: c = 'l'; break; >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >> index 2c4d7f5..2af1715 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >> @@ -207,6 +207,7 @@ TargetNV50::getFileSize(DataFile file) const >> case FILE_MEMORY_CONST: return 65536; >> case FILE_SHADER_INPUT: return 0x200; >> case FILE_SHADER_OUTPUT: return 0x200; >> + case FILE_MEMORY_BUFFER: return 0xffffffff; >> case FILE_MEMORY_GLOBAL: return 0xffffffff; >> case FILE_MEMORY_SHARED: return 16 << 10; >> case FILE_MEMORY_LOCAL: return 48 << 10; >> @@ -406,7 +407,8 @@ TargetNV50::isAccessSupported(DataFile file, DataType ty) const >> if (ty == TYPE_B96 || ty == TYPE_NONE) >> return false; >> if (typeSizeof(ty) > 4) >> - return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL); >> + return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL) || >> + (file == FILE_MEMORY_BUFFER); >> return true; >> } >> >> @@ -509,6 +511,7 @@ int TargetNV50::getLatency(const Instruction *i) const >> switch (i->src(0).getFile()) { >> case FILE_MEMORY_LOCAL: >> case FILE_MEMORY_GLOBAL: >> + case FILE_MEMORY_BUFFER: >> return 100; // really 400 to 800 >> default: >> return 22; >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >> index a03afa8..9e1e7bf 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >> @@ -248,6 +248,7 @@ TargetNVC0::getFileSize(DataFile file) const >> case FILE_MEMORY_CONST: return 65536; >> case FILE_SHADER_INPUT: return 0x400; >> case FILE_SHADER_OUTPUT: return 0x400; >> + case FILE_MEMORY_BUFFER: return 0xffffffff; >> case FILE_MEMORY_GLOBAL: return 0xffffffff; >> case FILE_MEMORY_SHARED: return 16 << 10; >> case FILE_MEMORY_LOCAL: return 48 << 10; >>
Possibly Parallel Threads
- [PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
- [PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
- [PATCH mesa 4/6] nouveau: codegen: s/FILE_MEMORY_GLOBAL/FILE_MEMORY_BUFFER/
- [PATCH mesa 5/6] nouveau: codegen: Add support for OpenCL global memory buffers
- [PATCH mesa 1/6] tgsi_build: Fix return of uninitialized memory in tgsi_*_instruction_memory