Samuel Pitoiset
2016-Mar-16 10:37 UTC
[Nouveau] [PATCH mesa 5/6] nouveau: codegen: Add support for OpenCL global memory buffers
Could you please get rid of the cosmetic changes (eg. the switch ones)? Because this doesn't really improve readability and in my opinion these changes should be eventually done in a separate patch. Other than that, this patch is : Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com> Yes, this probably won't work as is for atomic operations but the lowering pass is already here, so it should be easy to make it work. On 03/16/2016 10:23 AM, Hans de Goede wrote:> Add support for OpenCL global memory buffers, note this has only > been tested with regular load and stores and likely needs more work > for e.g. atomic ops. > > Signed-off-by: Hans de Goede <hdegoede at redhat.com> > --- > src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + > .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 31 +++++++++++++++++----- > .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 5 +++- > .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 10 ++++--- > .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 26 +++++++++++++----- > .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 14 +++++++--- > .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 5 +++- > .../drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + > .../nouveau/codegen/nv50_ir_target_nv50.cpp | 1 + > .../nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 + > 10 files changed, 74 insertions(+), 21 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h > index fdc2195..5141fc6 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h > @@ -333,6 +333,7 @@ enum DataFile > FILE_SHADER_INPUT, > FILE_SHADER_OUTPUT, > FILE_MEMORY_BUFFER, > + FILE_MEMORY_GLOBAL, > FILE_MEMORY_SHARED, > FILE_MEMORY_LOCAL, > FILE_SYSTEM_VALUE, > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp > index 02a1101..62f1598 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp > @@ -1641,8 +1641,15 @@ CodeEmitterGK110::emitSTORE(const Instruction *i) > int32_t offset = SDATA(i->src(0)).offset; > > switch (i->src(0).getFile()) { > - case FILE_MEMORY_BUFFER: code[1] = 0xe0000000; code[0] = 0x00000000; break; > - case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break; > + case FILE_MEMORY_BUFFER: > + case FILE_MEMORY_GLOBAL: > + code[0] = 0x00000000; > + code[1] = 0xe0000000; > + break; > + case FILE_MEMORY_LOCAL: > + code[0] = 0x00000002; > + code[1] = 0x7a800000; > + break; > case FILE_MEMORY_SHARED: > code[0] = 0x00000002; > if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) > @@ -1678,7 +1685,8 @@ CodeEmitterGK110::emitSTORE(const Instruction *i) > > srcId(i->src(1), 2); > srcId(i->src(0).getIndirect(0), 10); > - if (i->src(0).getFile() == FILE_MEMORY_BUFFER && > + if ((i->src(0).getFile() == FILE_MEMORY_BUFFER || > + i->src(0).getFile() == FILE_MEMORY_GLOBAL) && > i->src(0).isIndirect(0) && > i->getIndirect(0, 0)->reg.size == 8) > code[1] |= 1 << 23; > @@ -1690,8 +1698,15 @@ CodeEmitterGK110::emitLOAD(const Instruction *i) > int32_t offset = SDATA(i->src(0)).offset; > > switch (i->src(0).getFile()) { > - case FILE_MEMORY_BUFFER: code[1] = 0xc0000000; code[0] = 0x00000000; break; > - case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break; > + case FILE_MEMORY_BUFFER: > + case FILE_MEMORY_GLOBAL: > + code[0] = 0x00000000; > + code[1] = 0xc0000000; > + break; > + case FILE_MEMORY_LOCAL: > + code[0] = 0x00000002; > + code[1] = 0x7a000000; > + break; > case FILE_MEMORY_SHARED: > code[0] = 0x00000002; > if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) > @@ -1800,7 +1815,8 @@ CodeEmitterGK110::emitMOV(const Instruction *i) > static inline bool > uses64bitAddress(const Instruction *ldst) > { > - return ldst->src(0).getFile() == FILE_MEMORY_BUFFER && > + return (ldst->src(0).getFile() == FILE_MEMORY_BUFFER || > + ldst->src(0).getFile() == FILE_MEMORY_GLOBAL) && > ldst->src(0).isIndirect(0) && > ldst->getIndirect(0, 0)->reg.size == 8; > } > @@ -1862,7 +1878,8 @@ CodeEmitterGK110::emitCCTL(const Instruction *i) > > code[0] = 0x00000002 | (i->subOp << 2); > > - if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { > + if (i->src(0).getFile() == FILE_MEMORY_BUFFER || > + i->src(0).getFile() == FILE_MEMORY_GLOBAL) { > code[1] = 0x7b000000; > } else { > code[1] = 0x7c000000; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp > index 27f287f..3fcdc55 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp > @@ -2417,7 +2417,8 @@ void > CodeEmitterGM107::emitCCTL() > { > unsigned width; > - if (insn->src(0).getFile() == FILE_MEMORY_BUFFER) { > + if (insn->src(0).getFile() == FILE_MEMORY_BUFFER || > + insn->src(0).getFile() == FILE_MEMORY_GLOBAL) { > emitInsn(0xef600000); > width = 30; > } else { > @@ -2989,6 +2990,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i) > case FILE_MEMORY_LOCAL : emitLDL(); break; > case FILE_MEMORY_SHARED: emitLDS(); break; > case FILE_MEMORY_BUFFER: emitLD(); break; > + case FILE_MEMORY_GLOBAL: emitLD(); break; > default: > assert(!"invalid load"); > emitNOP(); > @@ -3000,6 +3002,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i) > case FILE_MEMORY_LOCAL : emitSTL(); break; > case FILE_MEMORY_SHARED: emitSTS(); break; > case FILE_MEMORY_BUFFER: emitST(); break; > + case FILE_MEMORY_GLOBAL: emitST(); break; > default: > assert(!"invalid load"); > emitNOP(); > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp > index 7476e21..2653c82 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp > @@ -663,6 +663,7 @@ CodeEmitterNV50::emitLOAD(const Instruction *i) > code[1] = 0x40000000; > break; > case FILE_MEMORY_BUFFER: > + case FILE_MEMORY_GLOBAL: > code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); > code[1] = 0x80000000; > break; > @@ -671,7 +672,8 @@ CodeEmitterNV50::emitLOAD(const Instruction *i) > break; > } > if (sf == FILE_MEMORY_LOCAL || > - sf == FILE_MEMORY_BUFFER) > + sf == FILE_MEMORY_BUFFER || > + sf == FILE_MEMORY_GLOBAL) > emitLoadStoreSizeLG(i->sType, 21 + 32); > > setDst(i, 0); > @@ -679,7 +681,8 @@ CodeEmitterNV50::emitLOAD(const Instruction *i) > emitFlagsRd(i); > emitFlagsWr(i); > > - if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { > + if (i->src(0).getFile() == FILE_MEMORY_BUFFER || > + i->src(0).getFile() == FILE_MEMORY_GLOBAL) { > srcId(*i->src(0).getIndirect(0), 9); > } else { > setAReg16(i, 0); > @@ -700,6 +703,7 @@ CodeEmitterNV50::emitSTORE(const Instruction *i) > srcId(i->src(1), 32 + 14); > break; > case FILE_MEMORY_BUFFER: > + case FILE_MEMORY_GLOBAL: > code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); > code[1] = 0xa0000000; > emitLoadStoreSizeLG(i->dType, 21 + 32); > @@ -737,7 +741,7 @@ CodeEmitterNV50::emitSTORE(const Instruction *i) > break; > } > > - if (f == FILE_MEMORY_BUFFER) > + if (f == FILE_MEMORY_BUFFER || f == FILE_MEMORY_GLOBAL) > srcId(*i->src(0).getIndirect(0), 9); > else > setAReg16(i, 0); > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp > index 6236659..ca475ce 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp > @@ -281,6 +281,7 @@ CodeEmitterNVC0::setAddressByFile(const ValueRef& src) > { > switch (src.getFile()) { > case FILE_MEMORY_BUFFER: > + case FILE_MEMORY_GLOBAL: > srcAddr32(src, 26, 0); > break; > case FILE_MEMORY_LOCAL: > @@ -1768,7 +1769,8 @@ CodeEmitterNVC0::emitCachingMode(CacheMode c) > static inline bool > uses64bitAddress(const Instruction *ldst) > { > - return ldst->src(0).getFile() == FILE_MEMORY_BUFFER && > + return (ldst->src(0).getFile() == FILE_MEMORY_BUFFER || > + ldst->src(0).getFile() == FILE_MEMORY_GLOBAL) && > ldst->src(0).isIndirect(0) && > ldst->getIndirect(0, 0)->reg.size == 8; > } > @@ -1779,8 +1781,13 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i) > uint32_t opc; > > switch (i->src(0).getFile()) { > - case FILE_MEMORY_BUFFER: opc = 0x90000000; break; > - case FILE_MEMORY_LOCAL: opc = 0xc8000000; break; > + case FILE_MEMORY_BUFFER: > + case FILE_MEMORY_GLOBAL: > + opc = 0x90000000; > + break; > + case FILE_MEMORY_LOCAL: > + opc = 0xc8000000; > + break; > case FILE_MEMORY_SHARED: > if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) { > if (targ->getChipset() >= NVISA_GK104_CHIPSET) > @@ -1828,8 +1835,13 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i) > code[0] = 0x00000005; > > switch (i->src(0).getFile()) { > - case FILE_MEMORY_BUFFER: opc = 0x80000000; break; > - case FILE_MEMORY_LOCAL: opc = 0xc0000000; break; > + case FILE_MEMORY_BUFFER: > + case FILE_MEMORY_GLOBAL: > + opc = 0x80000000; > + break; > + case FILE_MEMORY_LOCAL: > + opc = 0xc0000000; > + break; > case FILE_MEMORY_SHARED: > if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) { > if (targ->getChipset() >= NVISA_GK104_CHIPSET) > @@ -2090,7 +2102,8 @@ CodeEmitterNVC0::emitCCTL(const Instruction *i) > { > code[0] = 0x00000005 | (i->subOp << 5); > > - if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { > + if (i->src(0).getFile() == FILE_MEMORY_BUFFER || > + i->src(0).getFile() == FILE_MEMORY_GLOBAL) { > code[1] = 0x98000000; > srcAddr32(i->src(0), 28, 2); > } else { > @@ -3122,6 +3135,7 @@ SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const > case FILE_MEMORY_CONST: > case FILE_MEMORY_SHARED: > case FILE_MEMORY_BUFFER: > + case FILE_MEMORY_GLOBAL: > case FILE_SYSTEM_VALUE: > // TODO: any restrictions here ? > break; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > index 91879e4..c167c4a 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp > @@ -374,7 +374,7 @@ static nv50_ir::DataFile translateFile(uint file) > case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; > case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; > case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER; > - case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_BUFFER; > + case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL; > case TGSI_FILE_SAMPLER: > case TGSI_FILE_NULL: > default: > @@ -1284,7 +1284,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) > if (dst.isIndirect(0)) > indirectTempArrays.insert(dst.getArrayId()); > } else > - if (dst.getFile() == TGSI_FILE_BUFFER) { > + if (dst.getFile() == TGSI_FILE_BUFFER || > + (dst.getFile() == TGSI_FILE_MEMORY && > + memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { > info->io.globalAccess |= 0x2; > } > } > @@ -1295,7 +1297,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) > if (src.isIndirect(0)) > indirectTempArrays.insert(src.getArrayId()); > } else > - if (src.getFile() == TGSI_FILE_BUFFER) { > + if (src.getFile() == TGSI_FILE_BUFFER || > + (src.getFile() == TGSI_FILE_MEMORY && > + memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { > info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? > 0x1 : 0x2; > } else > @@ -1529,6 +1533,10 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) > > if (tgsiFile == TGSI_FILE_MEMORY) { > switch (code->memoryFiles[fileIdx].mem_type) { > + case TGSI_MEMORY_TYPE_GLOBAL: > + /* No-op this is the default for TGSI_FILE_MEMORY */ > + sym->setFile(FILE_MEMORY_GLOBAL); > + break; > case TGSI_MEMORY_TYPE_SHARED: > sym->setFile(FILE_MEMORY_SHARED); > break; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > index 4a96d04..84d2944 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > @@ -2581,6 +2581,7 @@ MemoryOpt::runOpt(BasicBlock *bb) > ldst->op == OP_MEMBAR) { > purgeRecords(NULL, FILE_MEMORY_LOCAL); > purgeRecords(NULL, FILE_MEMORY_BUFFER); > + purgeRecords(NULL, FILE_MEMORY_GLOBAL); > purgeRecords(NULL, FILE_MEMORY_SHARED); > purgeRecords(NULL, FILE_SHADER_OUTPUT); > } else > @@ -2588,6 +2589,7 @@ MemoryOpt::runOpt(BasicBlock *bb) > if (ldst->src(0).getFile() == FILE_MEMORY_BUFFER) { > purgeRecords(NULL, FILE_MEMORY_LOCAL); > purgeRecords(NULL, FILE_MEMORY_BUFFER); > + purgeRecords(NULL, FILE_MEMORY_GLOBAL); > purgeRecords(NULL, FILE_MEMORY_SHARED); > } else { > purgeRecords(NULL, ldst->src(0).getFile()); > @@ -2607,7 +2609,8 @@ MemoryOpt::runOpt(BasicBlock *bb) > DataFile file = ldst->src(0).getFile(); > > // if ld l[]/g[] look for previous store to eliminate the reload > - if (file == FILE_MEMORY_BUFFER || file == FILE_MEMORY_LOCAL) { > + if (file == FILE_MEMORY_BUFFER || file == FILE_MEMORY_LOCAL || > + file == FILE_MEMORY_GLOBAL) { > // TODO: shared memory ? > rec = findRecord(ldst, false, isAdjacent); > if (rec && !isAdjacent) > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > index 73ed753..3917768 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > @@ -456,6 +456,7 @@ int Symbol::print(char *buf, size_t size, > case FILE_SHADER_INPUT: c = 'a'; break; > case FILE_SHADER_OUTPUT: c = 'o'; break; > case FILE_MEMORY_BUFFER: c = 'g'; break; > + case FILE_MEMORY_GLOBAL: c = 'g'; break; > case FILE_MEMORY_SHARED: c = 's'; break; > case FILE_MEMORY_LOCAL: c = 'l'; break; > default: > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > index 1cd45a2..5c60b22 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > @@ -208,6 +208,7 @@ TargetNV50::getFileSize(DataFile file) const > case FILE_SHADER_INPUT: return 0x200; > case FILE_SHADER_OUTPUT: return 0x200; > case FILE_MEMORY_BUFFER: return 0xffffffff; > + case FILE_MEMORY_GLOBAL: return 0xffffffff; > case FILE_MEMORY_SHARED: return 16 << 10; > case FILE_MEMORY_LOCAL: return 48 << 10; > case FILE_SYSTEM_VALUE: return 16; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > index bda59a5..9e1e7bf 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > @@ -249,6 +249,7 @@ TargetNVC0::getFileSize(DataFile file) const > case FILE_SHADER_INPUT: return 0x400; > case FILE_SHADER_OUTPUT: return 0x400; > case FILE_MEMORY_BUFFER: return 0xffffffff; > + case FILE_MEMORY_GLOBAL: return 0xffffffff; > case FILE_MEMORY_SHARED: return 16 << 10; > case FILE_MEMORY_LOCAL: return 48 << 10; > case FILE_SYSTEM_VALUE: return 32; >
Hans de Goede
2016-Mar-16 10:45 UTC
[Nouveau] [PATCH mesa 5/6] nouveau: codegen: Add support for OpenCL global memory buffers
Hi, On 16-03-16 11:37, Samuel Pitoiset wrote:> Could you please get rid of the cosmetic changes (eg. the switch ones)? > Because this doesn't really improve readability and in my opinion these changes should be eventually done in a separate patch.I need at least halve of those cosmetic changes, because half of them is not cosmetic, e.g. : - case FILE_MEMORY_BUFFER: code[1] = 0xe0000000; code[0] = 0x00000000; break; - case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break; + case FILE_MEMORY_BUFFER: + case FILE_MEMORY_GLOBAL: + code[0] = 0x00000000; + code[1] = 0xe0000000; + break; + case FILE_MEMORY_LOCAL: + code[0] = 0x00000002; + code[1] = 0x7a800000; + break; The first bit actually changes things to have 2 cases for the BUFFER code, an other way of writing this would be: + case FILE_MEMORY_GLOBAL: case FILE_MEMORY_BUFFER: code[1] = 0xe0000000; code[0] = 0x00000000; break; case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break; But that just looks weird, if we have multiple case labels we should not use the single line statement following the case label style IMHO, which brings us to: + case FILE_MEMORY_BUFFER: + case FILE_MEMORY_GLOBAL: + code[0] = 0x00000000; + code[1] = 0xe0000000; + break; At which point keeping the LOCAL code looks ugly IMHO: + case FILE_MEMORY_BUFFER: + case FILE_MEMORY_GLOBAL: + code[0] = 0x00000000; + code[1] = 0xe0000000; + break; case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break; case FILE_MEMORY_SHARED: code[0] = 0x00000002; if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) Notice how the FILE_MEMORY_LOCAL case looks weird now. Note I'm open to fixing this however you like, just explaining why I did it the way I did it.> Other than that, this patch is : > > Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>Thanks for the reviews! Regards, Hans> Yes, this probably won't work as is for atomic operations but the lowering pass is already here, so it should be easy to make it work. > > On 03/16/2016 10:23 AM, Hans de Goede wrote: >> Add support for OpenCL global memory buffers, note this has only >> been tested with regular load and stores and likely needs more work >> for e.g. atomic ops. >> >> Signed-off-by: Hans de Goede <hdegoede at redhat.com> >> --- >> src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + >> .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 31 +++++++++++++++++----- >> .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 5 +++- >> .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 10 ++++--- >> .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 26 +++++++++++++----- >> .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 14 +++++++--- >> .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 5 +++- >> .../drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + >> .../nouveau/codegen/nv50_ir_target_nv50.cpp | 1 + >> .../nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 + >> 10 files changed, 74 insertions(+), 21 deletions(-) >> >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h >> index fdc2195..5141fc6 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h >> @@ -333,6 +333,7 @@ enum DataFile >> FILE_SHADER_INPUT, >> FILE_SHADER_OUTPUT, >> FILE_MEMORY_BUFFER, >> + FILE_MEMORY_GLOBAL, >> FILE_MEMORY_SHARED, >> FILE_MEMORY_LOCAL, >> FILE_SYSTEM_VALUE, >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp >> index 02a1101..62f1598 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp >> @@ -1641,8 +1641,15 @@ CodeEmitterGK110::emitSTORE(const Instruction *i) >> int32_t offset = SDATA(i->src(0)).offset; >> >> switch (i->src(0).getFile()) { >> - case FILE_MEMORY_BUFFER: code[1] = 0xe0000000; code[0] = 0x00000000; break; >> - case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break; >> + case FILE_MEMORY_BUFFER: >> + case FILE_MEMORY_GLOBAL: >> + code[0] = 0x00000000; >> + code[1] = 0xe0000000; >> + break; >> + case FILE_MEMORY_LOCAL: >> + code[0] = 0x00000002; >> + code[1] = 0x7a800000; >> + break; >> case FILE_MEMORY_SHARED: >> code[0] = 0x00000002; >> if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) >> @@ -1678,7 +1685,8 @@ CodeEmitterGK110::emitSTORE(const Instruction *i) >> >> srcId(i->src(1), 2); >> srcId(i->src(0).getIndirect(0), 10); >> - if (i->src(0).getFile() == FILE_MEMORY_BUFFER && >> + if ((i->src(0).getFile() == FILE_MEMORY_BUFFER || >> + i->src(0).getFile() == FILE_MEMORY_GLOBAL) && >> i->src(0).isIndirect(0) && >> i->getIndirect(0, 0)->reg.size == 8) >> code[1] |= 1 << 23; >> @@ -1690,8 +1698,15 @@ CodeEmitterGK110::emitLOAD(const Instruction *i) >> int32_t offset = SDATA(i->src(0)).offset; >> >> switch (i->src(0).getFile()) { >> - case FILE_MEMORY_BUFFER: code[1] = 0xc0000000; code[0] = 0x00000000; break; >> - case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break; >> + case FILE_MEMORY_BUFFER: >> + case FILE_MEMORY_GLOBAL: >> + code[0] = 0x00000000; >> + code[1] = 0xc0000000; >> + break; >> + case FILE_MEMORY_LOCAL: >> + code[0] = 0x00000002; >> + code[1] = 0x7a000000; >> + break; >> case FILE_MEMORY_SHARED: >> code[0] = 0x00000002; >> if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) >> @@ -1800,7 +1815,8 @@ CodeEmitterGK110::emitMOV(const Instruction *i) >> static inline bool >> uses64bitAddress(const Instruction *ldst) >> { >> - return ldst->src(0).getFile() == FILE_MEMORY_BUFFER && >> + return (ldst->src(0).getFile() == FILE_MEMORY_BUFFER || >> + ldst->src(0).getFile() == FILE_MEMORY_GLOBAL) && >> ldst->src(0).isIndirect(0) && >> ldst->getIndirect(0, 0)->reg.size == 8; >> } >> @@ -1862,7 +1878,8 @@ CodeEmitterGK110::emitCCTL(const Instruction *i) >> >> code[0] = 0x00000002 | (i->subOp << 2); >> >> - if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { >> + if (i->src(0).getFile() == FILE_MEMORY_BUFFER || >> + i->src(0).getFile() == FILE_MEMORY_GLOBAL) { >> code[1] = 0x7b000000; >> } else { >> code[1] = 0x7c000000; >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >> index 27f287f..3fcdc55 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >> @@ -2417,7 +2417,8 @@ void >> CodeEmitterGM107::emitCCTL() >> { >> unsigned width; >> - if (insn->src(0).getFile() == FILE_MEMORY_BUFFER) { >> + if (insn->src(0).getFile() == FILE_MEMORY_BUFFER || >> + insn->src(0).getFile() == FILE_MEMORY_GLOBAL) { >> emitInsn(0xef600000); >> width = 30; >> } else { >> @@ -2989,6 +2990,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i) >> case FILE_MEMORY_LOCAL : emitLDL(); break; >> case FILE_MEMORY_SHARED: emitLDS(); break; >> case FILE_MEMORY_BUFFER: emitLD(); break; >> + case FILE_MEMORY_GLOBAL: emitLD(); break; >> default: >> assert(!"invalid load"); >> emitNOP(); >> @@ -3000,6 +3002,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i) >> case FILE_MEMORY_LOCAL : emitSTL(); break; >> case FILE_MEMORY_SHARED: emitSTS(); break; >> case FILE_MEMORY_BUFFER: emitST(); break; >> + case FILE_MEMORY_GLOBAL: emitST(); break; >> default: >> assert(!"invalid load"); >> emitNOP(); >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp >> index 7476e21..2653c82 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp >> @@ -663,6 +663,7 @@ CodeEmitterNV50::emitLOAD(const Instruction *i) >> code[1] = 0x40000000; >> break; >> case FILE_MEMORY_BUFFER: >> + case FILE_MEMORY_GLOBAL: >> code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); >> code[1] = 0x80000000; >> break; >> @@ -671,7 +672,8 @@ CodeEmitterNV50::emitLOAD(const Instruction *i) >> break; >> } >> if (sf == FILE_MEMORY_LOCAL || >> - sf == FILE_MEMORY_BUFFER) >> + sf == FILE_MEMORY_BUFFER || >> + sf == FILE_MEMORY_GLOBAL) >> emitLoadStoreSizeLG(i->sType, 21 + 32); >> >> setDst(i, 0); >> @@ -679,7 +681,8 @@ CodeEmitterNV50::emitLOAD(const Instruction *i) >> emitFlagsRd(i); >> emitFlagsWr(i); >> >> - if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { >> + if (i->src(0).getFile() == FILE_MEMORY_BUFFER || >> + i->src(0).getFile() == FILE_MEMORY_GLOBAL) { >> srcId(*i->src(0).getIndirect(0), 9); >> } else { >> setAReg16(i, 0); >> @@ -700,6 +703,7 @@ CodeEmitterNV50::emitSTORE(const Instruction *i) >> srcId(i->src(1), 32 + 14); >> break; >> case FILE_MEMORY_BUFFER: >> + case FILE_MEMORY_GLOBAL: >> code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); >> code[1] = 0xa0000000; >> emitLoadStoreSizeLG(i->dType, 21 + 32); >> @@ -737,7 +741,7 @@ CodeEmitterNV50::emitSTORE(const Instruction *i) >> break; >> } >> >> - if (f == FILE_MEMORY_BUFFER) >> + if (f == FILE_MEMORY_BUFFER || f == FILE_MEMORY_GLOBAL) >> srcId(*i->src(0).getIndirect(0), 9); >> else >> setAReg16(i, 0); >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp >> index 6236659..ca475ce 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp >> @@ -281,6 +281,7 @@ CodeEmitterNVC0::setAddressByFile(const ValueRef& src) >> { >> switch (src.getFile()) { >> case FILE_MEMORY_BUFFER: >> + case FILE_MEMORY_GLOBAL: >> srcAddr32(src, 26, 0); >> break; >> case FILE_MEMORY_LOCAL: >> @@ -1768,7 +1769,8 @@ CodeEmitterNVC0::emitCachingMode(CacheMode c) >> static inline bool >> uses64bitAddress(const Instruction *ldst) >> { >> - return ldst->src(0).getFile() == FILE_MEMORY_BUFFER && >> + return (ldst->src(0).getFile() == FILE_MEMORY_BUFFER || >> + ldst->src(0).getFile() == FILE_MEMORY_GLOBAL) && >> ldst->src(0).isIndirect(0) && >> ldst->getIndirect(0, 0)->reg.size == 8; >> } >> @@ -1779,8 +1781,13 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i) >> uint32_t opc; >> >> switch (i->src(0).getFile()) { >> - case FILE_MEMORY_BUFFER: opc = 0x90000000; break; >> - case FILE_MEMORY_LOCAL: opc = 0xc8000000; break; >> + case FILE_MEMORY_BUFFER: >> + case FILE_MEMORY_GLOBAL: >> + opc = 0x90000000; >> + break; >> + case FILE_MEMORY_LOCAL: >> + opc = 0xc8000000; >> + break; >> case FILE_MEMORY_SHARED: >> if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) { >> if (targ->getChipset() >= NVISA_GK104_CHIPSET) >> @@ -1828,8 +1835,13 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i) >> code[0] = 0x00000005; >> >> switch (i->src(0).getFile()) { >> - case FILE_MEMORY_BUFFER: opc = 0x80000000; break; >> - case FILE_MEMORY_LOCAL: opc = 0xc0000000; break; >> + case FILE_MEMORY_BUFFER: >> + case FILE_MEMORY_GLOBAL: >> + opc = 0x80000000; >> + break; >> + case FILE_MEMORY_LOCAL: >> + opc = 0xc0000000; >> + break; >> case FILE_MEMORY_SHARED: >> if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) { >> if (targ->getChipset() >= NVISA_GK104_CHIPSET) >> @@ -2090,7 +2102,8 @@ CodeEmitterNVC0::emitCCTL(const Instruction *i) >> { >> code[0] = 0x00000005 | (i->subOp << 5); >> >> - if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { >> + if (i->src(0).getFile() == FILE_MEMORY_BUFFER || >> + i->src(0).getFile() == FILE_MEMORY_GLOBAL) { >> code[1] = 0x98000000; >> srcAddr32(i->src(0), 28, 2); >> } else { >> @@ -3122,6 +3135,7 @@ SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const >> case FILE_MEMORY_CONST: >> case FILE_MEMORY_SHARED: >> case FILE_MEMORY_BUFFER: >> + case FILE_MEMORY_GLOBAL: >> case FILE_SYSTEM_VALUE: >> // TODO: any restrictions here ? >> break; >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >> index 91879e4..c167c4a 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >> @@ -374,7 +374,7 @@ static nv50_ir::DataFile translateFile(uint file) >> case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; >> case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; >> case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER; >> - case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_BUFFER; >> + case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL; >> case TGSI_FILE_SAMPLER: >> case TGSI_FILE_NULL: >> default: >> @@ -1284,7 +1284,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) >> if (dst.isIndirect(0)) >> indirectTempArrays.insert(dst.getArrayId()); >> } else >> - if (dst.getFile() == TGSI_FILE_BUFFER) { >> + if (dst.getFile() == TGSI_FILE_BUFFER || >> + (dst.getFile() == TGSI_FILE_MEMORY && >> + memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { >> info->io.globalAccess |= 0x2; >> } >> } >> @@ -1295,7 +1297,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) >> if (src.isIndirect(0)) >> indirectTempArrays.insert(src.getArrayId()); >> } else >> - if (src.getFile() == TGSI_FILE_BUFFER) { >> + if (src.getFile() == TGSI_FILE_BUFFER || >> + (src.getFile() == TGSI_FILE_MEMORY && >> + memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { >> info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? >> 0x1 : 0x2; >> } else >> @@ -1529,6 +1533,10 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) >> >> if (tgsiFile == TGSI_FILE_MEMORY) { >> switch (code->memoryFiles[fileIdx].mem_type) { >> + case TGSI_MEMORY_TYPE_GLOBAL: >> + /* No-op this is the default for TGSI_FILE_MEMORY */ >> + sym->setFile(FILE_MEMORY_GLOBAL); >> + break; >> case TGSI_MEMORY_TYPE_SHARED: >> sym->setFile(FILE_MEMORY_SHARED); >> break; >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> index 4a96d04..84d2944 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >> @@ -2581,6 +2581,7 @@ MemoryOpt::runOpt(BasicBlock *bb) >> ldst->op == OP_MEMBAR) { >> purgeRecords(NULL, FILE_MEMORY_LOCAL); >> purgeRecords(NULL, FILE_MEMORY_BUFFER); >> + purgeRecords(NULL, FILE_MEMORY_GLOBAL); >> purgeRecords(NULL, FILE_MEMORY_SHARED); >> purgeRecords(NULL, FILE_SHADER_OUTPUT); >> } else >> @@ -2588,6 +2589,7 @@ MemoryOpt::runOpt(BasicBlock *bb) >> if (ldst->src(0).getFile() == FILE_MEMORY_BUFFER) { >> purgeRecords(NULL, FILE_MEMORY_LOCAL); >> purgeRecords(NULL, FILE_MEMORY_BUFFER); >> + purgeRecords(NULL, FILE_MEMORY_GLOBAL); >> purgeRecords(NULL, FILE_MEMORY_SHARED); >> } else { >> purgeRecords(NULL, ldst->src(0).getFile()); >> @@ -2607,7 +2609,8 @@ MemoryOpt::runOpt(BasicBlock *bb) >> DataFile file = ldst->src(0).getFile(); >> >> // if ld l[]/g[] look for previous store to eliminate the reload >> - if (file == FILE_MEMORY_BUFFER || file == FILE_MEMORY_LOCAL) { >> + if (file == FILE_MEMORY_BUFFER || file == FILE_MEMORY_LOCAL || >> + file == FILE_MEMORY_GLOBAL) { >> // TODO: shared memory ? >> rec = findRecord(ldst, false, isAdjacent); >> if (rec && !isAdjacent) >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >> index 73ed753..3917768 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >> @@ -456,6 +456,7 @@ int Symbol::print(char *buf, size_t size, >> case FILE_SHADER_INPUT: c = 'a'; break; >> case FILE_SHADER_OUTPUT: c = 'o'; break; >> case FILE_MEMORY_BUFFER: c = 'g'; break; >> + case FILE_MEMORY_GLOBAL: c = 'g'; break; >> case FILE_MEMORY_SHARED: c = 's'; break; >> case FILE_MEMORY_LOCAL: c = 'l'; break; >> default: >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >> index 1cd45a2..5c60b22 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >> @@ -208,6 +208,7 @@ TargetNV50::getFileSize(DataFile file) const >> case FILE_SHADER_INPUT: return 0x200; >> case FILE_SHADER_OUTPUT: return 0x200; >> case FILE_MEMORY_BUFFER: return 0xffffffff; >> + case FILE_MEMORY_GLOBAL: return 0xffffffff; >> case FILE_MEMORY_SHARED: return 16 << 10; >> case FILE_MEMORY_LOCAL: return 48 << 10; >> case FILE_SYSTEM_VALUE: return 16; >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >> index bda59a5..9e1e7bf 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >> @@ -249,6 +249,7 @@ TargetNVC0::getFileSize(DataFile file) const >> case FILE_SHADER_INPUT: return 0x400; >> case FILE_SHADER_OUTPUT: return 0x400; >> case FILE_MEMORY_BUFFER: return 0xffffffff; >> + case FILE_MEMORY_GLOBAL: return 0xffffffff; >> case FILE_MEMORY_SHARED: return 16 << 10; >> case FILE_MEMORY_LOCAL: return 48 << 10; >> case FILE_SYSTEM_VALUE: return 32; >>
Samuel Pitoiset
2016-Mar-16 10:49 UTC
[Nouveau] [PATCH mesa 5/6] nouveau: codegen: Add support for OpenCL global memory buffers
On 03/16/2016 11:45 AM, Hans de Goede wrote:> Hi, > > On 16-03-16 11:37, Samuel Pitoiset wrote: >> Could you please get rid of the cosmetic changes (eg. the switch ones)? >> Because this doesn't really improve readability and in my opinion >> these changes should be eventually done in a separate patch. > > I need at least halve of those cosmetic changes, because half of them is > not cosmetic, e.g. : > > - case FILE_MEMORY_BUFFER: code[1] = 0xe0000000; code[0] = 0x00000000; > break; > - case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; > break; > + case FILE_MEMORY_BUFFER: > + case FILE_MEMORY_GLOBAL: > + code[0] = 0x00000000; > + code[1] = 0xe0000000; > + break; > + case FILE_MEMORY_LOCAL: > + code[0] = 0x00000002; > + code[1] = 0x7a800000; > + break; > > The first bit actually changes things to have 2 cases for the BUFFER > code, an > other way of writing this would be: > > + case FILE_MEMORY_GLOBAL: > case FILE_MEMORY_BUFFER: code[1] = 0xe0000000; code[0] > 0x00000000; break; > case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; > break; > > But that just looks weird, if we have multiple case labels we should not > use > the single line statement following the case label style IMHO, which > brings us to: > > + case FILE_MEMORY_BUFFER: > + case FILE_MEMORY_GLOBAL: > + code[0] = 0x00000000; > + code[1] = 0xe0000000; > + break; > > At which point keeping the LOCAL code looks ugly IMHO: > > + case FILE_MEMORY_BUFFER: > + case FILE_MEMORY_GLOBAL: > + code[0] = 0x00000000; > + code[1] = 0xe0000000; > + break; > case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; > break; > case FILE_MEMORY_SHARED: > code[0] = 0x00000002; > if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) > > Notice how the FILE_MEMORY_LOCAL case looks weird now. > > Note I'm open to fixing this however you like, just explaining why I did it > the way I did it.This makes more sense actually, and you have strong arguments. :-) Feel free to keep this as is, but at the first look it looked weird.> >> Other than that, this patch is : >> >> Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com> > > Thanks for the reviews!You're welcome.> > Regards, > > Hans > > >> Yes, this probably won't work as is for atomic operations but the >> lowering pass is already here, so it should be easy to make it work. >> >> On 03/16/2016 10:23 AM, Hans de Goede wrote: >>> Add support for OpenCL global memory buffers, note this has only >>> been tested with regular load and stores and likely needs more work >>> for e.g. atomic ops. >>> >>> Signed-off-by: Hans de Goede <hdegoede at redhat.com> >>> --- >>> src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + >>> .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 31 >>> +++++++++++++++++----- >>> .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 5 +++- >>> .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 10 ++++--- >>> .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 26 >>> +++++++++++++----- >>> .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 14 +++++++--- >>> .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 5 +++- >>> .../drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + >>> .../nouveau/codegen/nv50_ir_target_nv50.cpp | 1 + >>> .../nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 + >>> 10 files changed, 74 insertions(+), 21 deletions(-) >>> >>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir.h >>> index fdc2195..5141fc6 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h >>> @@ -333,6 +333,7 @@ enum DataFile >>> FILE_SHADER_INPUT, >>> FILE_SHADER_OUTPUT, >>> FILE_MEMORY_BUFFER, >>> + FILE_MEMORY_GLOBAL, >>> FILE_MEMORY_SHARED, >>> FILE_MEMORY_LOCAL, >>> FILE_SYSTEM_VALUE, >>> diff --git >>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp >>> index 02a1101..62f1598 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp >>> @@ -1641,8 +1641,15 @@ CodeEmitterGK110::emitSTORE(const Instruction *i) >>> int32_t offset = SDATA(i->src(0)).offset; >>> >>> switch (i->src(0).getFile()) { >>> - case FILE_MEMORY_BUFFER: code[1] = 0xe0000000; code[0] >>> 0x00000000; break; >>> - case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] >>> 0x00000002; break; >>> + case FILE_MEMORY_BUFFER: >>> + case FILE_MEMORY_GLOBAL: >>> + code[0] = 0x00000000; >>> + code[1] = 0xe0000000; >>> + break; >>> + case FILE_MEMORY_LOCAL: >>> + code[0] = 0x00000002; >>> + code[1] = 0x7a800000; >>> + break; >>> case FILE_MEMORY_SHARED: >>> code[0] = 0x00000002; >>> if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) >>> @@ -1678,7 +1685,8 @@ CodeEmitterGK110::emitSTORE(const Instruction *i) >>> >>> srcId(i->src(1), 2); >>> srcId(i->src(0).getIndirect(0), 10); >>> - if (i->src(0).getFile() == FILE_MEMORY_BUFFER && >>> + if ((i->src(0).getFile() == FILE_MEMORY_BUFFER || >>> + i->src(0).getFile() == FILE_MEMORY_GLOBAL) && >>> i->src(0).isIndirect(0) && >>> i->getIndirect(0, 0)->reg.size == 8) >>> code[1] |= 1 << 23; >>> @@ -1690,8 +1698,15 @@ CodeEmitterGK110::emitLOAD(const Instruction *i) >>> int32_t offset = SDATA(i->src(0)).offset; >>> >>> switch (i->src(0).getFile()) { >>> - case FILE_MEMORY_BUFFER: code[1] = 0xc0000000; code[0] >>> 0x00000000; break; >>> - case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] >>> 0x00000002; break; >>> + case FILE_MEMORY_BUFFER: >>> + case FILE_MEMORY_GLOBAL: >>> + code[0] = 0x00000000; >>> + code[1] = 0xc0000000; >>> + break; >>> + case FILE_MEMORY_LOCAL: >>> + code[0] = 0x00000002; >>> + code[1] = 0x7a000000; >>> + break; >>> case FILE_MEMORY_SHARED: >>> code[0] = 0x00000002; >>> if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) >>> @@ -1800,7 +1815,8 @@ CodeEmitterGK110::emitMOV(const Instruction *i) >>> static inline bool >>> uses64bitAddress(const Instruction *ldst) >>> { >>> - return ldst->src(0).getFile() == FILE_MEMORY_BUFFER && >>> + return (ldst->src(0).getFile() == FILE_MEMORY_BUFFER || >>> + ldst->src(0).getFile() == FILE_MEMORY_GLOBAL) && >>> ldst->src(0).isIndirect(0) && >>> ldst->getIndirect(0, 0)->reg.size == 8; >>> } >>> @@ -1862,7 +1878,8 @@ CodeEmitterGK110::emitCCTL(const Instruction *i) >>> >>> code[0] = 0x00000002 | (i->subOp << 2); >>> >>> - if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { >>> + if (i->src(0).getFile() == FILE_MEMORY_BUFFER || >>> + i->src(0).getFile() == FILE_MEMORY_GLOBAL) { >>> code[1] = 0x7b000000; >>> } else { >>> code[1] = 0x7c000000; >>> diff --git >>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >>> index 27f287f..3fcdc55 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp >>> @@ -2417,7 +2417,8 @@ void >>> CodeEmitterGM107::emitCCTL() >>> { >>> unsigned width; >>> - if (insn->src(0).getFile() == FILE_MEMORY_BUFFER) { >>> + if (insn->src(0).getFile() == FILE_MEMORY_BUFFER || >>> + insn->src(0).getFile() == FILE_MEMORY_GLOBAL) { >>> emitInsn(0xef600000); >>> width = 30; >>> } else { >>> @@ -2989,6 +2990,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i) >>> case FILE_MEMORY_LOCAL : emitLDL(); break; >>> case FILE_MEMORY_SHARED: emitLDS(); break; >>> case FILE_MEMORY_BUFFER: emitLD(); break; >>> + case FILE_MEMORY_GLOBAL: emitLD(); break; >>> default: >>> assert(!"invalid load"); >>> emitNOP(); >>> @@ -3000,6 +3002,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i) >>> case FILE_MEMORY_LOCAL : emitSTL(); break; >>> case FILE_MEMORY_SHARED: emitSTS(); break; >>> case FILE_MEMORY_BUFFER: emitST(); break; >>> + case FILE_MEMORY_GLOBAL: emitST(); break; >>> default: >>> assert(!"invalid load"); >>> emitNOP(); >>> diff --git >>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp >>> index 7476e21..2653c82 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp >>> @@ -663,6 +663,7 @@ CodeEmitterNV50::emitLOAD(const Instruction *i) >>> code[1] = 0x40000000; >>> break; >>> case FILE_MEMORY_BUFFER: >>> + case FILE_MEMORY_GLOBAL: >>> code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); >>> code[1] = 0x80000000; >>> break; >>> @@ -671,7 +672,8 @@ CodeEmitterNV50::emitLOAD(const Instruction *i) >>> break; >>> } >>> if (sf == FILE_MEMORY_LOCAL || >>> - sf == FILE_MEMORY_BUFFER) >>> + sf == FILE_MEMORY_BUFFER || >>> + sf == FILE_MEMORY_GLOBAL) >>> emitLoadStoreSizeLG(i->sType, 21 + 32); >>> >>> setDst(i, 0); >>> @@ -679,7 +681,8 @@ CodeEmitterNV50::emitLOAD(const Instruction *i) >>> emitFlagsRd(i); >>> emitFlagsWr(i); >>> >>> - if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { >>> + if (i->src(0).getFile() == FILE_MEMORY_BUFFER || >>> + i->src(0).getFile() == FILE_MEMORY_GLOBAL) { >>> srcId(*i->src(0).getIndirect(0), 9); >>> } else { >>> setAReg16(i, 0); >>> @@ -700,6 +703,7 @@ CodeEmitterNV50::emitSTORE(const Instruction *i) >>> srcId(i->src(1), 32 + 14); >>> break; >>> case FILE_MEMORY_BUFFER: >>> + case FILE_MEMORY_GLOBAL: >>> code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); >>> code[1] = 0xa0000000; >>> emitLoadStoreSizeLG(i->dType, 21 + 32); >>> @@ -737,7 +741,7 @@ CodeEmitterNV50::emitSTORE(const Instruction *i) >>> break; >>> } >>> >>> - if (f == FILE_MEMORY_BUFFER) >>> + if (f == FILE_MEMORY_BUFFER || f == FILE_MEMORY_GLOBAL) >>> srcId(*i->src(0).getIndirect(0), 9); >>> else >>> setAReg16(i, 0); >>> diff --git >>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp >>> index 6236659..ca475ce 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp >>> @@ -281,6 +281,7 @@ CodeEmitterNVC0::setAddressByFile(const ValueRef& >>> src) >>> { >>> switch (src.getFile()) { >>> case FILE_MEMORY_BUFFER: >>> + case FILE_MEMORY_GLOBAL: >>> srcAddr32(src, 26, 0); >>> break; >>> case FILE_MEMORY_LOCAL: >>> @@ -1768,7 +1769,8 @@ CodeEmitterNVC0::emitCachingMode(CacheMode c) >>> static inline bool >>> uses64bitAddress(const Instruction *ldst) >>> { >>> - return ldst->src(0).getFile() == FILE_MEMORY_BUFFER && >>> + return (ldst->src(0).getFile() == FILE_MEMORY_BUFFER || >>> + ldst->src(0).getFile() == FILE_MEMORY_GLOBAL) && >>> ldst->src(0).isIndirect(0) && >>> ldst->getIndirect(0, 0)->reg.size == 8; >>> } >>> @@ -1779,8 +1781,13 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i) >>> uint32_t opc; >>> >>> switch (i->src(0).getFile()) { >>> - case FILE_MEMORY_BUFFER: opc = 0x90000000; break; >>> - case FILE_MEMORY_LOCAL: opc = 0xc8000000; break; >>> + case FILE_MEMORY_BUFFER: >>> + case FILE_MEMORY_GLOBAL: >>> + opc = 0x90000000; >>> + break; >>> + case FILE_MEMORY_LOCAL: >>> + opc = 0xc8000000; >>> + break; >>> case FILE_MEMORY_SHARED: >>> if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) { >>> if (targ->getChipset() >= NVISA_GK104_CHIPSET) >>> @@ -1828,8 +1835,13 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i) >>> code[0] = 0x00000005; >>> >>> switch (i->src(0).getFile()) { >>> - case FILE_MEMORY_BUFFER: opc = 0x80000000; break; >>> - case FILE_MEMORY_LOCAL: opc = 0xc0000000; break; >>> + case FILE_MEMORY_BUFFER: >>> + case FILE_MEMORY_GLOBAL: >>> + opc = 0x80000000; >>> + break; >>> + case FILE_MEMORY_LOCAL: >>> + opc = 0xc0000000; >>> + break; >>> case FILE_MEMORY_SHARED: >>> if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) { >>> if (targ->getChipset() >= NVISA_GK104_CHIPSET) >>> @@ -2090,7 +2102,8 @@ CodeEmitterNVC0::emitCCTL(const Instruction *i) >>> { >>> code[0] = 0x00000005 | (i->subOp << 5); >>> >>> - if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { >>> + if (i->src(0).getFile() == FILE_MEMORY_BUFFER || >>> + i->src(0).getFile() == FILE_MEMORY_GLOBAL) { >>> code[1] = 0x98000000; >>> srcAddr32(i->src(0), 28, 2); >>> } else { >>> @@ -3122,6 +3135,7 @@ SchedDataCalculator::checkRd(const Value *v, >>> int cycle, int& delay) const >>> case FILE_MEMORY_CONST: >>> case FILE_MEMORY_SHARED: >>> case FILE_MEMORY_BUFFER: >>> + case FILE_MEMORY_GLOBAL: >>> case FILE_SYSTEM_VALUE: >>> // TODO: any restrictions here ? >>> break; >>> diff --git >>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >>> index 91879e4..c167c4a 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp >>> @@ -374,7 +374,7 @@ static nv50_ir::DataFile translateFile(uint file) >>> case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; >>> case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; >>> case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER; >>> - case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_BUFFER; >>> + case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL; >>> case TGSI_FILE_SAMPLER: >>> case TGSI_FILE_NULL: >>> default: >>> @@ -1284,7 +1284,9 @@ bool Source::scanInstruction(const struct >>> tgsi_full_instruction *inst) >>> if (dst.isIndirect(0)) >>> indirectTempArrays.insert(dst.getArrayId()); >>> } else >>> - if (dst.getFile() == TGSI_FILE_BUFFER) { >>> + if (dst.getFile() == TGSI_FILE_BUFFER || >>> + (dst.getFile() == TGSI_FILE_MEMORY && >>> + memoryFiles[dst.getIndex(0)].mem_type =>>> TGSI_MEMORY_TYPE_GLOBAL)) { >>> info->io.globalAccess |= 0x2; >>> } >>> } >>> @@ -1295,7 +1297,9 @@ bool Source::scanInstruction(const struct >>> tgsi_full_instruction *inst) >>> if (src.isIndirect(0)) >>> indirectTempArrays.insert(src.getArrayId()); >>> } else >>> - if (src.getFile() == TGSI_FILE_BUFFER) { >>> + if (src.getFile() == TGSI_FILE_BUFFER || >>> + (src.getFile() == TGSI_FILE_MEMORY && >>> + memoryFiles[src.getIndex(0)].mem_type =>>> TGSI_MEMORY_TYPE_GLOBAL)) { >>> info->io.globalAccess |= (insn.getOpcode() =>>> TGSI_OPCODE_LOAD) ? >>> 0x1 : 0x2; >>> } else >>> @@ -1529,6 +1533,10 @@ Converter::makeSym(uint tgsiFile, int fileIdx, >>> int idx, int c, uint32_t address) >>> >>> if (tgsiFile == TGSI_FILE_MEMORY) { >>> switch (code->memoryFiles[fileIdx].mem_type) { >>> + case TGSI_MEMORY_TYPE_GLOBAL: >>> + /* No-op this is the default for TGSI_FILE_MEMORY */ >>> + sym->setFile(FILE_MEMORY_GLOBAL); >>> + break; >>> case TGSI_MEMORY_TYPE_SHARED: >>> sym->setFile(FILE_MEMORY_SHARED); >>> break; >>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >>> index 4a96d04..84d2944 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp >>> @@ -2581,6 +2581,7 @@ MemoryOpt::runOpt(BasicBlock *bb) >>> ldst->op == OP_MEMBAR) { >>> purgeRecords(NULL, FILE_MEMORY_LOCAL); >>> purgeRecords(NULL, FILE_MEMORY_BUFFER); >>> + purgeRecords(NULL, FILE_MEMORY_GLOBAL); >>> purgeRecords(NULL, FILE_MEMORY_SHARED); >>> purgeRecords(NULL, FILE_SHADER_OUTPUT); >>> } else >>> @@ -2588,6 +2589,7 @@ MemoryOpt::runOpt(BasicBlock *bb) >>> if (ldst->src(0).getFile() == FILE_MEMORY_BUFFER) { >>> purgeRecords(NULL, FILE_MEMORY_LOCAL); >>> purgeRecords(NULL, FILE_MEMORY_BUFFER); >>> + purgeRecords(NULL, FILE_MEMORY_GLOBAL); >>> purgeRecords(NULL, FILE_MEMORY_SHARED); >>> } else { >>> purgeRecords(NULL, ldst->src(0).getFile()); >>> @@ -2607,7 +2609,8 @@ MemoryOpt::runOpt(BasicBlock *bb) >>> DataFile file = ldst->src(0).getFile(); >>> >>> // if ld l[]/g[] look for previous store to eliminate the >>> reload >>> - if (file == FILE_MEMORY_BUFFER || file == FILE_MEMORY_LOCAL) { >>> + if (file == FILE_MEMORY_BUFFER || file == FILE_MEMORY_LOCAL || >>> + file == FILE_MEMORY_GLOBAL) { >>> // TODO: shared memory ? >>> rec = findRecord(ldst, false, isAdjacent); >>> if (rec && !isAdjacent) >>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >>> index 73ed753..3917768 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp >>> @@ -456,6 +456,7 @@ int Symbol::print(char *buf, size_t size, >>> case FILE_SHADER_INPUT: c = 'a'; break; >>> case FILE_SHADER_OUTPUT: c = 'o'; break; >>> case FILE_MEMORY_BUFFER: c = 'g'; break; >>> + case FILE_MEMORY_GLOBAL: c = 'g'; break; >>> case FILE_MEMORY_SHARED: c = 's'; break; >>> case FILE_MEMORY_LOCAL: c = 'l'; break; >>> default: >>> diff --git >>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >>> index 1cd45a2..5c60b22 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp >>> @@ -208,6 +208,7 @@ TargetNV50::getFileSize(DataFile file) const >>> case FILE_SHADER_INPUT: return 0x200; >>> case FILE_SHADER_OUTPUT: return 0x200; >>> case FILE_MEMORY_BUFFER: return 0xffffffff; >>> + case FILE_MEMORY_GLOBAL: return 0xffffffff; >>> case FILE_MEMORY_SHARED: return 16 << 10; >>> case FILE_MEMORY_LOCAL: return 48 << 10; >>> case FILE_SYSTEM_VALUE: return 16; >>> diff --git >>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >>> index bda59a5..9e1e7bf 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp >>> @@ -249,6 +249,7 @@ TargetNVC0::getFileSize(DataFile file) const >>> case FILE_SHADER_INPUT: return 0x400; >>> case FILE_SHADER_OUTPUT: return 0x400; >>> case FILE_MEMORY_BUFFER: return 0xffffffff; >>> + case FILE_MEMORY_GLOBAL: return 0xffffffff; >>> case FILE_MEMORY_SHARED: return 16 << 10; >>> case FILE_MEMORY_LOCAL: return 48 << 10; >>> case FILE_SYSTEM_VALUE: return 32; >>>
Apparently Analagous Threads
- [PATCH mesa 5/6] nouveau: codegen: Add support for OpenCL global memory buffers
- [PATCH mesa 4/6] nouveau: codegen: s/FILE_MEMORY_GLOBAL/FILE_MEMORY_BUFFER/
- [PATCH mesa 4/6] nouveau: codegen: s/FILE_MEMORY_GLOBAL/FILE_MEMORY_BUFFER/
- [PATCH mesa 4/6] nouveau: codegen: s/FILE_MEMORY_GLOBAL/FILE_MEMORY_BUFFER/
- [PATCH mesa 5/6] nouveau: codegen: Add support for OpenCL global memory buffers