Displaying 20 results from an estimated 23 matches for "file_gpr".
2017 Mar 26
5
[PATCH v5 0/5] nvc0/ir: add support for MAD/FMA PostRALoadPropagation
was "nv50/ir: PostRaConstantFolding improvements" before.
nothing really changed from the last version, just minor things.
Karol Herbst (5):
nv50/ir: restructure and rename postraconstantfolding pass
nv50/ir: implement mad post ra folding for nvc0+
gk110/ir: add LIMM form of mad
gm107/ir: add LIMM form of mad
nv50/ir: also do PostRaLoadPropagation for FMA
2015 Jan 13
3
nv50/ir: Implement short notation for MAD V2
V2: clarify code, commit msgs, add comments. Drop code to was supposed to
make register assignment prefer SDST == SRC2 (patch 2) for now, because it
didn't quite do what I intended.
2015 Jan 11
6
[PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation
MAD IMM has a very specific SDST == SSRC2 requirement, so don't emit
Signed-off-by: Roy Spliet <rspliet at eclipso.eu>
---
.../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 18 ++++++++++++------
.../drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 2 +-
2 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
2015 Jan 23
3
[PATCH 1/2] nv50/ir: Add support for MAD short+IMM notation
Add emission rules for negative and saturate flags for MAD 4-byte opcodes,
and get rid of constraints. Short MAD has a very specific SDST == SSRC2
requirement, and since MAD IMM is short notation + 4-byte immediate, don't
have the compiler create MAD IMM instructions yet.
V2: Document MAD as supported short form
Signed-off-by: Roy Spliet <rspliet at eclipso.eu>
---
2015 Jan 11
0
[PATCH 3/3] nv50/ir: Fold IMM into MAD
...te:
+ virtual bool visit(BasicBlock *);
+};
+
+bool
+NV50PostRaConstantFolding::visit(BasicBlock *bb)
+{
+ Value *vtmp;
+ Instruction *def;
+
+ for (Instruction *i = bb->getFirst(); i; i = i->next) {
+ switch (i->op) {
+ case OP_MAD:
+ if(i->def(0).getFile() == FILE_GPR &&
+ i->src(0).getFile() == FILE_GPR &&
+ i->src(1).getFile() == FILE_GPR &&
+ i->src(2).getFile() == FILE_GPR &&
+ i->getDef(0)->reg.data.id == i->getSrc(2)->reg.data.id) {
+ for...
2015 Jan 13
0
[PATCH 2/3] nv50/ir: Fold IMM into MAD
...te:
+ virtual bool visit(BasicBlock *);
+};
+
+bool
+NV50PostRaConstantFolding::visit(BasicBlock *bb)
+{
+ Value *vtmp;
+ Instruction *def;
+
+ for (Instruction *i = bb->getFirst(); i; i = i->next) {
+ switch (i->op) {
+ case OP_MAD:
+ if(i->def(0).getFile() != FILE_GPR ||
+ i->src(0).getFile() != FILE_GPR ||
+ i->src(1).getFile() != FILE_GPR ||
+ i->src(2).getFile() != FILE_GPR ||
+ i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id)
+ break;
+
+ for (int s = 0; s < 2; s++)...
2015 Jan 23
0
[PATCH 2/2] nv50/ir: Fold IMM into MAD
...e:
+ virtual bool visit(BasicBlock *);
+};
+
+bool
+NV50PostRaConstantFolding::visit(BasicBlock *bb)
+{
+ Value *vtmp;
+ Instruction *def;
+
+ for (Instruction *i = bb->getFirst(); i; i = i->next) {
+ switch (i->op) {
+ case OP_MAD:
+ if (i->def(0).getFile() != FILE_GPR ||
+ i->src(0).getFile() != FILE_GPR ||
+ i->src(1).getFile() != FILE_GPR ||
+ i->src(2).getFile() != FILE_GPR ||
+ i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id)
+ break;
+
+ def = i->getSrc(1)->...
2015 Feb 06
0
[PATCH 3/3] nv50/ir: Fold IMM into MAD
...e:
+ virtual bool visit(BasicBlock *);
+};
+
+bool
+NV50PostRaConstantFolding::visit(BasicBlock *bb)
+{
+ Value *vtmp;
+ Instruction *def;
+
+ for (Instruction *i = bb->getFirst(); i; i = i->next) {
+ switch (i->op) {
+ case OP_MAD:
+ if (i->def(0).getFile() != FILE_GPR ||
+ i->src(0).getFile() != FILE_GPR ||
+ i->src(1).getFile() != FILE_GPR ||
+ i->src(2).getFile() != FILE_GPR ||
+ i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id)
+ break;
+
+ def = i->getSrc(1)->...
2015 Feb 06
2
[PATCH 1/3] nv50/ir: Add support for MAD 4-byte opcode
Add emission rules for negative and saturate flags for MAD 4-byte opcodes,
and get rid of some of the constraints. Obviously tested with a wide variety
of shaders.
V2: Document MAD as supported short form
V3: Split up IMM from short-form modifiers
Signed-off-by: Roy Spliet <rspliet at eclipso.eu>
---
src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 10 ++++------
2016 Jan 14
0
[PATCH] nv50/ir: only use FILE_LOCAL_MEMORY for temp arrays that use indirection
...:Source *code) : BuildUtil(ir),
code(code),
tgsi(NULL),
- tData(this), aData(this), pData(this), oData(this)
+ tData(this), lData(this), aData(this), pData(this), oData(this)
{
info = code->info;
- const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR;
-
const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE);
const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
- tData.setup(TGSI_FILE_TEMPO...
2013 Dec 08
0
[PATCH] nv50: TXF already has integer arguments, don't try to convert from f32
...ingPreSSA::handleTEX(TexInstruction *i)
if (i->op == OP_TXB || i->op == OP_TXL)
i->swapSources(dref, lod);
- // array index must be converted to u32
if (i->tex.target.isArray()) {
- Value *layer = i->getSrc(arg - 1);
- LValue *src = new_LValue(func, FILE_GPR);
- bld.mkCvt(OP_CVT, TYPE_U32, src, TYPE_F32, layer);
- bld.mkOp2(OP_MIN, TYPE_U32, src, src, bld.loadImm(NULL, 511));
- i->setSrc(arg - 1, src);
-
+ if (i->op != OP_TXF) {
+ // array index must be converted to u32, but it's already an integer
+ // for...
2014 May 27
0
[PATCH v2 2/2] nvc0: use SM35 ISA with GK20A
...)
+ return gk104_builtin_offsets[builtin];
+ /* fall-through for GK20A */
case 0xf0:
case 0x100:
return gk110_builtin_offsets[builtin];
@@ -235,7 +240,7 @@ TargetNVC0::getFileSize(DataFile file) const
{
switch (file) {
case FILE_NULL: return 0;
- case FILE_GPR: return (chipset >= NVISA_GK110_CHIPSET) ? 255 : 63;
+ case FILE_GPR: return (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63;
case FILE_PREDICATE: return 7;
case FILE_FLAGS: return 1;
case FILE_ADDRESS: return 0;
--
1.9.3
2015 Jan 11
0
[PATCH 2/3] nv50/ir: For MAD, prefer SDST == SSRC2
...(ArrayList& insns, unsigned int mask)
copyCompound(insn->getSrc(0), insn->getDef(0));
}
break;
+ case OP_MAD:
+ if (!(mask & JOIN_MASK_MAD))
+ break;
+ if (insn->srcExists(2) && insn->src(2).getFile() == FILE_GPR &&
+ insn->def(0).getFile() == FILE_GPR)
+ coalesceValues(insn->getDef(0), insn->getSrc(2), false);
+ break;
case OP_TEX:
case OP_TXB:
case OP_TXL:
--
2.1.0
2017 Aug 19
1
[PATCH] nv50/ra: Only increment DefValue counter if we are going to spill
...*> to_del;
- for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();
- ++d) {
+ for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();) {
Value *slot = mem ?
static_cast<Value *>(mem) : new_LValue(func, FILE_GPR);
Value *tmp = NULL;
@@ -1787,13 +1786,13 @@ SpillCodeInserter::run(const std::list<ValuePair>& lst)
assert(defi);
if (defi->isPseudo()) {
d = lval->defs.erase(d);
- --d;
if (slot->reg.file == FILE_MEMORY_LOCAL)...
2018 Apr 19
3
[Bug 106132] New: bar.sync encoding incorrect for GM107
...objdump -sass.
I looked at nv50_ir_emit_gm107.cpp to understand the instruction encoding.
> [Mesa-dev] [PATCH] gm107/ir: add emission for BAR
> Samuel Pitoiset samuel.pitoiset at gmail.com
> Tue Mar 1 17:44:42 UTC 2016
>
> + // barrier id
> + if (insn->src(0).getFile() == FILE_GPR) {
> + emitGPR(0x08, insn->src(0));
> + } else {
> + ImmediateValue *imm = insn->getSrc(0)->asImm();
> + assert(imm);
> + emitField(0x08, 8, imm->reg.data.u32);
> + emitField(0x2b, 1, 1);
> + }
> +
> + // thread count
> + if...
2014 May 27
8
[PATCH 0/2] nvc0: support for GK20A (Tegra K1)
The following 2 patches make it possible to run Mesa programs on GK20A
(Tegra K1).
GK20A is very similar to GK104, but uses a new (backward-compatible) 3D class
as well as the same ISA as GK110 (SM35). Taking these differences into account
is sufficient to successfully render simple off-screen buffers.
Alexandre Courbot (2):
nvc0: add GK20A 3D class
nvc0: use SM35 ISA with GK20A
2014 May 10
2
[PATCH] nv50: fix setting of texture ms info to be per-stage
...ring_nv50.cpp
index eafc0a7..63db1d7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -591,6 +591,10 @@ void NV50LoweringPreSSA::loadTexMsInfo(uint32_t off, Value **ms,
Value *tmp = new_LValue(func, FILE_GPR);
uint8_t b = prog->driver->io.resInfoCBSlot;
off += prog->driver->io.suInfoBase;
+ if (prog->getType() > Program::TYPE_VERTEX)
+ off += 16 * 2 * 4;
+ if (prog->getType() > Program::TYPE_GEOMETRY)
+ off += 16 * 2 * 4;
*ms_x = bld.mkLoadv(TYPE_U32, b...
2014 Jan 13
20
[PATCH 00/19] nv50: add sampler2DMS/GP support to get OpenGL 3.2
OK, so there's a bunch of stuff in here. The geometry stuff is based on the
work started by Bryan Cain and Christoph Bumiller.
Patches 01-12: Add support for geometry shaders and fix related issues
Patches 13-14: Make it possible for fb clears to operate on texture attachments
with an explicit layer set (as is allowed in gl 3.2).
Patches 15-17: Make ARB_texture_multisample work
2014 Mar 20
0
[PATCH] nvc0/ir: move sample id to second source arg to fix sampler2DMS
...// (nvc0) generate and move the tsc/tic/array source to the front
- if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
+ if (i->tex.target.isArray() || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa
Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL;
@@ -728,6 +729,12 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
i->setSrc(0, src);
}
+ // for nvc0, the sample id ends up being treated as an offset, so we can't
+ // do o...
2014 Sep 01
0
[PATCH] nv50/ir: use unordered_set instead of list to keep track of var defs
...Sym() : NULL;
- for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();
- ++d) {
+ for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();) {
Value *slot = mem ?
static_cast<Value *>(mem) : new_LValue(func, FILE_GPR);
Value *tmp = NULL;
@@ -1577,13 +1576,13 @@ SpillCodeInserter::run(const std::list<ValuePair>& lst)
assert(defi);
if (defi->isPseudo()) {
d = lval->defs.erase(d);
- --d;
if (slot->reg.file == FILE_MEMORY_LOCAL)...