Ilia Mirkin
2015-Oct-20 22:31 UTC
[Nouveau] [PATCH] nvc0: do upload-time fixups for interpolation parameters
WIP: only support SM35, need to add SM20 and SM50 support Unfortunately flatshading is an all-or-nothing proposition on nvc0, while GL 3.0 calls for the ability to selectively specify explicit interpolation parameters on gl_Color/gl_SecondaryColor which would override the flatshading setting. This allows us to fix up the interpolation settings after shader generation based on rasterizer settings. While we're at it, we can add support for dynamically forcing all (non-flat) shader inputs to be interpolated per-sample, which allows st/mesa to not generate variants for these. Fixes the remaining failing glsl-1.30/execution/interpolation piglits. Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu> --- Incomplete as per above. Wanted to get it out there in case there was any feedback. This will only work on GK110/GK208 as-is. .../drivers/nouveau/codegen/nv50_ir_driver.h | 5 +++ .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 31 +++++++++++++++-- .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 14 +++++--- .../drivers/nouveau/codegen/nv50_ir_target.cpp | 39 +++++++++++++++++++++- .../drivers/nouveau/codegen/nv50_ir_target.h | 21 ++++++++++++ src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 23 ++++++++++++- src/gallium/drivers/nouveau/nvc0/nvc0_program.h | 6 ++-- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 +- .../drivers/nouveau/nvc0/nvc0_shader_state.c | 16 ++++++++- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 3 -- .../drivers/nouveau/nvc0/nvc0_state_validate.c | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h | 2 +- 12 files changed, 147 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 14acb60..2f5654f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -99,6 +99,7 @@ struct nv50_ir_prog_info uint8_t sourceRep; /* NV50_PROGRAM_IR */ const void *source; void *relocData; + void *interpData; struct nv50_ir_prog_symbol *syms; uint16_t numSyms; } bin; @@ -198,6 +199,10 @@ extern void nv50_ir_relocate_code(void *relocData, uint32_t *code, uint32_t libPos, uint32_t dataPos); +extern void +nv50_ir_change_interp(void *interpData, uint32_t *code, + bool force_per_sample, bool flatshade); + /* obtain code that will be shared among programs */ extern void nv50_ir_get_target_library(uint32_t chipset, const uint32_t **code, uint32_t *size); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 8f15429..d712c9c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1437,6 +1437,30 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i) code[1] |= (i->ipa & 0xc) << (19 - 2); } +static void +interpApply(const InterpEntry *entry, uint32_t *code, + bool force_persample_interp, bool flatshade) +{ + int ipa = entry->ipa; + int reg = entry->reg; + int loc = entry->loc; + + if (flatshade && + (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) { + ipa = NV50_IR_INTERP_FLAT; + reg = 0xff; + } else if (force_persample_interp && + (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && + (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { + ipa |= NV50_IR_INTERP_CENTROID; + } + code[loc + 1] &= ~(0xf << 19); + code[loc + 1] |= (ipa & 0x3) << 21; + code[loc + 1] |= (ipa & 0xc) << (19 - 2); + code[loc + 0] &= ~(0xff << 23); + code[loc + 0] |= reg << 23; +} + void CodeEmitterGK110::emitINTERP(const Instruction *i) { @@ -1448,10 +1472,13 @@ CodeEmitterGK110::emitINTERP(const Instruction *i) if (i->saturate) code[1] |= 1 << 18; - if (i->op == OP_PINTERP) + if (i->op == OP_PINTERP) { srcId(i->src(1), 23); - else + addInterp(i->ipa, SDATA(i->src(1)).id, interpApply); + } else { code[0] |= 0xff << 23; + addInterp(i->ipa, 0xff, interpApply); + } srcId(i->src(0).getIndirect(0), 10); emitInterpMode(i); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 21099d5..0489ef8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1057,7 +1057,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) default: break; } - if (decl->Interp.Location || info->io.sampleInterp) + if (decl->Interp.Location) info->in[i].centroid = 1; } @@ -1122,6 +1122,10 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) case TGSI_SEMANTIC_VERTEXID: info->io.vertexId = first; break; + case TGSI_SEMANTIC_SAMPLEID: + case TGSI_SEMANTIC_SAMPLEPOS: + info->io.sampleInterp = 1; + break; default: break; } @@ -1341,6 +1345,8 @@ private: void handleINTERP(Value *dst0[4]); + uint8_t translateInterpMode(const struct nv50_ir_varying *var, + operation& op); Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr); void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork); @@ -1454,8 +1460,8 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) return sym; } -static inline uint8_t -translateInterpMode(const struct nv50_ir_varying *var, operation& op) +uint8_t +Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op) { uint8_t mode = NV50_IR_INTERP_PERSPECTIVE; @@ -1471,7 +1477,7 @@ translateInterpMode(const struct nv50_ir_varying *var, operation& op) op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC) ? OP_PINTERP : OP_LINTERP; - if (var->centroid) + if (var->centroid || info->io.sampleInterp) mode |= NV50_IR_INTERP_CENTROID; return mode; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index fe530c7..afc8ff1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -166,7 +166,7 @@ void Target::destroy(Target *targ) delete targ; } -CodeEmitter::CodeEmitter(const Target *target) : targ(target) +CodeEmitter::CodeEmitter(const Target *target) : targ(target), interpInfo(NULL) { } @@ -388,6 +388,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info) } } info->bin.relocData = emit->getRelocInfo(); + info->bin.interpData = emit->getInterpInfo(); emitSymbolTable(info); @@ -428,6 +429,29 @@ CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m, return true; } +bool +CodeEmitter::addInterp(int ipa, int reg, InterpApply apply) +{ + unsigned int n = interpInfo ? interpInfo->count : 0; + + if (!(n % RELOC_ALLOC_INCREMENT)) { + size_t size = sizeof(InterpInfo) + n * sizeof(InterpEntry); + interpInfo = reinterpret_cast<InterpInfo *>( + REALLOC(interpInfo, n ? size : 0, + size + RELOC_ALLOC_INCREMENT * sizeof(InterpEntry))); + if (!interpInfo) + return false; + if (n == 0) + memset(interpInfo, 0, sizeof(InterpInfo)); + } + ++interpInfo->count; + + interpInfo->entry[n] = InterpEntry(ipa, reg, codeSize >> 2); + interpInfo->apply = apply; + + return true; +} + void RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const { @@ -472,6 +496,19 @@ nv50_ir_relocate_code(void *relocData, uint32_t *code, } void +nv50_ir_change_interp(void *interpData, uint32_t *code, + bool force_persample_interp, bool flatshade) +{ + nv50_ir::InterpInfo *info = reinterpret_cast<nv50_ir::InterpInfo *>( + interpData); + + // force_persample_interp: all non-flat -> per-sample + // flatshade: all color -> flat + for (unsigned i = 0; i < info->count; ++i) + info->apply(&info->entry[i], code, force_persample_interp, flatshade); +} + +void nv50_ir_get_target_library(uint32_t chipset, const uint32_t **code, uint32_t *size) { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h index 591916e..4e33997 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h @@ -58,6 +58,23 @@ struct RelocInfo RelocEntry entry[0]; }; +struct InterpEntry +{ + InterpEntry(int ipa, int reg, int loc) : ipa(ipa), reg(reg), loc(loc) {} + uint32_t ipa:4; // SC mode used to identify colors + uint32_t reg:8; // The reg used for perspective division + uint32_t loc:20; // Let's hope we don't have more than 1M-sized shaders +}; + +typedef void (*InterpApply)(const InterpEntry*, uint32_t*, bool, bool); + +struct InterpInfo +{ + uint32_t count; + InterpApply apply; + InterpEntry entry[0]; +}; + class CodeEmitter { public: @@ -78,6 +95,9 @@ public: inline void *getRelocInfo() const { return relocInfo; } + bool addInterp(int ipa, int reg, InterpApply apply); + inline void *getInterpInfo() const { return interpInfo; } + virtual void prepareEmission(Program *); virtual void prepareEmission(Function *); virtual void prepareEmission(BasicBlock *); @@ -92,6 +112,7 @@ protected: uint32_t codeSizeLimit; RelocInfo *relocInfo; + InterpInfo *interpInfo; }; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 0acd7ab..b89c216 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -424,6 +424,8 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) for (i = 0; i < info->numInputs; ++i) { m = nvc0_hdr_interp_mode(&info->in[i]); + if (info->in[i].sn == TGSI_SEMANTIC_COLOR && info->in[i].sc) + fp->fp.color_interp[info->in[i].si] = m | (info->in[i].mask << 4); for (c = 0; c < 4; ++c) { if (!(info->in[i].mask & (1 << c))) continue; @@ -531,7 +533,6 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) info->io.genUserClip = prog->vp.num_ucps; info->io.ucpBase = 256; info->io.ucpCBSlot = 15; - info->io.sampleInterp = prog->fp.sample_interp; if (prog->type == PIPE_SHADER_COMPUTE) { if (chipset >= NVISA_GK104_CHIPSET) { @@ -575,6 +576,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) prog->immd_data = info->immd.buf; prog->immd_size = info->immd.bufSize; prog->relocs = info->bin.relocData; + prog->interps = info->bin.interpData; prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1)); prog->num_barriers = info->numBarriers; @@ -713,6 +715,24 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) if (prog->relocs) nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0); + if (prog->interps) + nv50_ir_change_interp(prog->interps, prog->code, + prog->fp.force_persample_interp, + prog->fp.flatshade); + if (!is_cp && prog->interps) { + for (int i = 0; i < 2; i++) { + unsigned mask = prog->fp.color_interp[i] >> 4; + unsigned interp = prog->fp.color_interp[i] & 3; + if (!mask) + continue; + prog->hdr[14] &= ~(0xff << (8 * i)); + if (prog->fp.flatshade) + interp = NVC0_INTERP_FLAT; + for (int c = 0; c < 4; c++) + if (mask & (1 << c)) + prog->hdr[14] |= interp << (2 * (4 * i + c)); + } + } #ifdef DEBUG if (debug_get_bool_option("NV50_PROG_DEBUG", false)) @@ -773,6 +793,7 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog) FREE(prog->code); /* may be 0 for hardcoded shaders */ FREE(prog->immd_data); FREE(prog->relocs); + FREE(prog->interps); if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms) FREE(prog->cp.syms); if (prog->tfb) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h index fa14d68..de75e6b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h @@ -45,8 +45,9 @@ struct nvc0_program { } vp; struct { uint8_t depth_layout; - uint8_t in_pos[PIPE_MAX_SHADER_INPUTS]; - uint8_t sample_interp; + uint8_t color_interp[2]; + bool force_persample_interp; + bool flatshade; } fp; struct { uint32_t tess_mode; /* ~0 if defined by the other stage */ @@ -61,6 +62,7 @@ struct nvc0_program { uint8_t num_barriers; void *relocs; + void *interps; struct nvc0_transform_feedback_state *tfb; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index f34ad0e..e9784a9 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -179,6 +179,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; @@ -201,7 +202,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: - case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index 9b6c47e..3405e7c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -107,8 +107,22 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *fp = nvc0->fragprog; + struct pipe_rasterizer_state *rast = &nvc0->rast->pipe; + + bool has_color = fp->fp.color_interp[0] || fp->fp.color_interp[1]; + if (fp->fp.force_persample_interp != rast->force_persample_interp || + (has_color && fp->fp.flatshade != rast->flatshade)) { + /* Force the program to be reuploaded, which will trigger interp fixups + * to get applied + */ + if (fp->mem) + nouveau_heap_free(&fp->mem); + } else if (!(nvc0->dirty & NVC0_NEW_FRAGPROG)) { + return; + } - fp->fp.sample_interp = nvc0->min_samples > 1; + fp->fp.force_persample_interp = rast->force_persample_interp; + fp->fp.flatshade = rast->flatshade; if (!nvc0_program_validate(nvc0, fp)) return; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 742bef3..e95554d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -212,9 +212,6 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, * always emit 16 commands, one for each scissor rectangle, here. */ - SB_BEGIN_3D(so, SHADE_MODEL, 1); - SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT : - NVC0_3D_SHADE_MODEL_SMOOTH); SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first); SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index fe10ac1..510ad35 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -679,7 +679,7 @@ static struct state_validate { { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG }, { nvc0_validate_tess_state, NVC0_NEW_TESSFACTOR }, { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG }, - { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG }, + { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG | NVC0_NEW_RASTERIZER }, { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA | NVC0_NEW_RASTERIZER }, { nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER }, diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h index 8bc33c6..f9680f5 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h @@ -23,7 +23,7 @@ struct nvc0_blend_stateobj { struct nvc0_rasterizer_stateobj { struct pipe_rasterizer_state pipe; int size; - uint32_t state[44]; + uint32_t state[42]; }; struct nvc0_zsa_stateobj { -- 2.4.10