Following a series of patches to improve nouveaus clock tree parsing. Reclocking these engines (all but memory) is pretty stable on the cards I've tested. Please review and merge when approved. These patches do not solve the problem that core/shader engine doesn't like to be clocked up too far without fb following, with visible corruption as a result. I suspect this problem is unrelated and requires a separate patch towards context creation to even things out. Roy
Roy Spliet
2014-Aug-21 11:45 UTC
[Nouveau] [PATCH 1/7] clock/nva3: Parse clock control registers more accurately
Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c | 32 +++++++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c index 9fb5835..a08011c 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c +++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * * Authors: Ben Skeggs + * Roy Spliet */ #include <subdev/bios.h> @@ -42,9 +43,17 @@ static u32 read_vco(struct nva3_clock_priv *priv, int clk) { u32 sctl = nv_rd32(priv, 0x4120 + (clk * 4)); - if ((sctl & 0x00000030) != 0x00000030) + + switch (sctl & 0x00000030) { + case 0x00000000: + return nv_device(priv)->crystal; + case 0x00000020: return read_pll(priv, 0x41, 0x00e820); - return read_pll(priv, 0x42, 0x00e8a0); + case 0x00000030: + return read_pll(priv, 0x42, 0x00e8a0); + default: + return 0; + } } static u32 @@ -66,14 +75,25 @@ read_clk(struct nva3_clock_priv *priv, int clk, bool ignore_en) if (!ignore_en && !(sctl & 0x00000100)) return 0; + /* out_alt */ + if (sctl & 0x00000400) + return 108000; + + /* vco_out */ switch (sctl & 0x00003000) { case 0x00000000: - return nv_device(priv)->crystal; + if (!(sctl & 0x00000200)) + return nv_device(priv)->crystal; + return 0; case 0x00002000: if (sctl & 0x00000040) return 108000; return 100000; case 0x00003000: + /* vco_enable */ + if (!(sctl & 0x00000001)) + return 0; + sclk = read_vco(priv, clk); sdiv = ((sctl & 0x003f0000) >> 16) + 2; return (sclk * 2) / sdiv; @@ -95,7 +115,9 @@ read_pll(struct nva3_clock_priv *priv, int clk, u32 pll) N = (coef & 0x0000ff00) >> 8; P = (coef & 0x003f0000) >> 16; - /* no post-divider on these.. */ + /* no post-divider on these.. + * XXX: it looks more like two post-"dividers" that + * cross each other out in the default RPLL config */ if ((pll & 0x00ff00) == 0x00e800) P = 1; @@ -136,6 +158,8 @@ nva3_clock_read(struct nouveau_clock *clk, enum nv_clk_src src) nv_error(clk, "invalid clock source %d\n", src); return -EINVAL; } + + return 0; } int -- 1.9.3
Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c | 73 +++++++++++++++--------- drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h | 2 +- drivers/gpu/drm/nouveau/core/subdev/fb/ramnva3.c | 2 +- 3 files changed, 48 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c index a08011c..b9ab90a 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c +++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c @@ -163,17 +163,12 @@ nva3_clock_read(struct nouveau_clock *clk, enum nv_clk_src src) } int -nva3_clock_info(struct nouveau_clock *clock, int clk, u32 pll, u32 khz, +nva3_clk_info(struct nouveau_clock *clock, int clk, u32 khz, struct nva3_clock_info *info) { - struct nouveau_bios *bios = nouveau_bios(clock); struct nva3_clock_priv *priv = (void *)clock; - struct nvbios_pll limits; - u32 oclk, sclk, sdiv; - int P, N, M, diff; - int ret; + u32 oclk, sclk, sdiv, diff; - info->pll = 0; info->clk = 0; switch (khz) { @@ -188,40 +183,64 @@ nva3_clock_info(struct nouveau_clock *clock, int clk, u32 pll, u32 khz, return khz; default: sclk = read_vco(priv, clk); - sdiv = min((sclk * 2) / (khz - 2999), (u32)65); - /* if the clock has a PLL attached, and we can get a within - * [-2, 3) MHz of a divider, we'll disable the PLL and use - * the divider instead. - * - * divider can go as low as 2, limited here because NVIDIA + sdiv = min((sclk * 2) / khz, (u32)65); + oclk = (sclk * 2) / sdiv; + diff = ((khz + 3000) - oclk); + + /* When imprecise, play it safe and aim for a clock lower than + * desired rather than higher */ + if (diff < 0) { + sdiv++; + oclk = (sclk * 2) / sdiv; + } + + /* divider can go as low as 2, limited here because NVIDIA * and the VBIOS on my NVA8 seem to prefer using the PLL * for 810MHz - is there a good reason? - */ + * XXX: PLLs with refclk 810MHz? */ if (sdiv > 4) { - oclk = (sclk * 2) / sdiv; - diff = khz - oclk; - if (!pll || (diff >= -2000 && diff < 3000)) { - info->clk = (((sdiv - 2) << 16) | 0x00003100); - return oclk; - } + info->clk = (((sdiv - 2) << 16) | 0x00003100); + return oclk; } - if (!pll) - return -ERANGE; break; } + return -ERANGE; +} + +int +nva3_pll_info(struct nouveau_clock *clock, int clk, u32 pll, u32 khz, + struct nva3_clock_info *info) +{ + struct nouveau_bios *bios = nouveau_bios(clock); + struct nva3_clock_priv *priv = (void *)clock; + int clk_khz; + struct nvbios_pll limits; + int P, N, M, diff; + int ret; + + info->pll = 0; + + /* If we can get a within [-2, 3) MHz of a divider, we'll disable the + * PLL and use the divider instead. */ + clk_khz = nva3_clk_info(clock, clk, khz, info); + diff = khz - clk_khz; + if (!pll || (diff >= -2000 && diff < 3000)) { + return clk_khz; + } + + /* Try with PLL */ ret = nvbios_pll_parse(bios, pll, &limits); if (ret) return ret; - limits.refclk = read_clk(priv, clk - 0x10, true); - if (!limits.refclk) + clk_khz = nva3_clk_info(clock, clk - 0x10, limits.refclk, info); + if (clk_khz != limits.refclk) return -EINVAL; ret = nva3_pll_calc(nv_subdev(priv), &limits, khz, &N, NULL, &M, &P); if (ret >= 0) { - info->clk = nv_rd32(priv, 0x4120 + (clk * 4)); info->pll = (P << 16) | (N << 8) | M; } @@ -232,7 +251,7 @@ static int calc_clk(struct nva3_clock_priv *priv, struct nouveau_cstate *cstate, int clk, u32 pll, int idx) { - int ret = nva3_clock_info(&priv->base, clk, pll, cstate->domain[idx], + int ret = nva3_pll_info(&priv->base, clk, pll, cstate->domain[idx], &priv->eng[idx]); if (ret >= 0) return 0; @@ -249,7 +268,7 @@ prog_pll(struct nva3_clock_priv *priv, int clk, u32 pll, int idx) const u32 coef = pll + 4; if (info->pll) { - nv_mask(priv, src0, 0x00000101, 0x00000101); + nv_mask(priv, src0, 0x003f3141, 0x00000101 | info->clk); nv_wr32(priv, coef, info->pll); nv_mask(priv, ctrl, 0x00000015, 0x00000015); nv_mask(priv, ctrl, 0x00000010, 0x00000000); diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h index 6229a50..2b4b3ea 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h +++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h @@ -8,7 +8,7 @@ struct nva3_clock_info { u32 pll; }; -int nva3_clock_info(struct nouveau_clock *, int, u32, u32, +int nva3_pll_info(struct nouveau_clock *, int, u32, u32, struct nva3_clock_info *); #endif diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnva3.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnva3.c index 8076fb1..686e0d6 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnva3.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnva3.c @@ -123,7 +123,7 @@ nva3_ram_calc(struct nouveau_fb *pfb, u32 freq) timing.data = 0; } - ret = nva3_clock_info(nouveau_clock(pfb), 0x12, 0x4000, freq, &mclk); + ret = nva3_pll_info(nouveau_clock(pfb), 0x12, 0x4000, freq, &mclk); if (ret < 0) { nv_error(pfb, "failed mclk calculation\n"); return ret; -- 1.9.3
Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c | 82 +++++++++++++++++++++--- drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h | 4 ++ 2 files changed, 78 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c index b9ab90a..eeab2d5 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c +++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c @@ -136,12 +136,11 @@ static int nva3_clock_read(struct nouveau_clock *clk, enum nv_clk_src src) { struct nva3_clock_priv *priv = (void *)clk; + u32 hsrc; switch (src) { case nv_clk_src_crystal: return nv_device(priv)->crystal; - case nv_clk_src_href: - return 100000; case nv_clk_src_core: return read_pll(priv, 0x00, 0x4200); case nv_clk_src_shader: @@ -154,6 +153,18 @@ nva3_clock_read(struct nouveau_clock *clk, enum nv_clk_src src) return read_clk(priv, 0x21, false); case nv_clk_src_daemon: return read_clk(priv, 0x25, false); + case nv_clk_src_host: + hsrc = (nv_rd32(priv, 0xc040) & 0x30000000) >> 28; + switch (hsrc) { + case 0: + return read_clk(priv, 0x1d, false); + case 2: + case 3: + return 277000; + default: + nv_error(clk, "unknown HOST clock source %d\n", hsrc); + return -EINVAL; + } default: nv_error(clk, "invalid clock source %d\n", src); return -EINVAL; @@ -258,6 +269,34 @@ calc_clk(struct nva3_clock_priv *priv, struct nouveau_cstate *cstate, return ret; } +static int +calc_host(struct nva3_clock_priv *priv, struct nouveau_cstate *cstate) +{ + int ret = 0; + u32 kHz = cstate->domain[nv_clk_src_host]; + struct nva3_clock_info *info = &priv->eng[nv_clk_src_host]; + + if (kHz == 277000) { + info->clk = 0; + info->host_out = NVA3_HOST_277; + return 0; + } + + info->host_out = NVA3_HOST_CLK; + + ret = nva3_clk_info(&priv->base, 0x1d, kHz, info); + if (ret >= 0) + return 0; + return ret; +} + +static void +disable_clk_src(struct nva3_clock_priv *priv, u32 src) +{ + nv_mask(priv, src, 0x00000100, 0x00000000); + nv_mask(priv, src, 0x00000001, 0x00000000); +} + static void prog_pll(struct nva3_clock_priv *priv, int clk, u32 pll, int idx) { @@ -275,15 +314,13 @@ prog_pll(struct nva3_clock_priv *priv, int clk, u32 pll, int idx) nv_wait(priv, ctrl, 0x00020000, 0x00020000); nv_mask(priv, ctrl, 0x00000010, 0x00000010); nv_mask(priv, ctrl, 0x00000008, 0x00000000); - nv_mask(priv, src1, 0x00000100, 0x00000000); - nv_mask(priv, src1, 0x00000001, 0x00000000); + disable_clk_src(priv, src1); } else { nv_mask(priv, src1, 0x003f3141, 0x00000101 | info->clk); nv_mask(priv, ctrl, 0x00000018, 0x00000018); udelay(20); nv_mask(priv, ctrl, 0x00000001, 0x00000000); - nv_mask(priv, src0, 0x00000100, 0x00000000); - nv_mask(priv, src0, 0x00000001, 0x00000000); + disable_clk_src(priv, src0); } } @@ -294,6 +331,33 @@ prog_clk(struct nva3_clock_priv *priv, int clk, int idx) nv_mask(priv, 0x004120 + (clk * 4), 0x003f3141, 0x00000101 | info->clk); } +static void +prog_host(struct nva3_clock_priv *priv) +{ + struct nva3_clock_info *info = &priv->eng[nv_clk_src_host]; + u32 hsrc = (nv_rd32(priv, 0xc040)); + + switch (info->host_out) { + case NVA3_HOST_277: + if ((hsrc & 0x30000000) == 0) { + nv_wr32(priv, 0xc040, hsrc | 0x20000000); + disable_clk_src(priv, 0x4194); + } + break; + case NVA3_HOST_CLK: + prog_clk(priv, 0x1d, nv_clk_src_host); + if ((hsrc & 0x30000000) >= 0x20000000) { + nv_wr32(priv, 0xc040, hsrc & ~0x30000000); + } + break; + default: + break; + } + + /* This seems to be a clock gating factor on idle, always set to 64 */ + nv_wr32(priv, 0xc044, 0x3e); +} + static int nva3_clock_calc(struct nouveau_clock *clk, struct nouveau_cstate *cstate) { @@ -303,7 +367,8 @@ nva3_clock_calc(struct nouveau_clock *clk, struct nouveau_cstate *cstate) if ((ret = calc_clk(priv, cstate, 0x10, 0x4200, nv_clk_src_core)) || (ret = calc_clk(priv, cstate, 0x11, 0x4220, nv_clk_src_shader)) || (ret = calc_clk(priv, cstate, 0x20, 0x0000, nv_clk_src_disp)) || - (ret = calc_clk(priv, cstate, 0x21, 0x0000, nv_clk_src_vdec))) + (ret = calc_clk(priv, cstate, 0x21, 0x0000, nv_clk_src_vdec)) || + (ret = calc_host(priv, cstate))) return ret; return 0; @@ -317,6 +382,7 @@ nva3_clock_prog(struct nouveau_clock *clk) prog_pll(priv, 0x01, 0x004220, nv_clk_src_shader); prog_clk(priv, 0x20, nv_clk_src_disp); prog_clk(priv, 0x21, nv_clk_src_vdec); + prog_host(priv); return 0; } @@ -328,12 +394,12 @@ nva3_clock_tidy(struct nouveau_clock *clk) static struct nouveau_clocks nva3_domain[] = { { nv_clk_src_crystal, 0xff }, - { nv_clk_src_href , 0xff }, { nv_clk_src_core , 0x00, 0, "core", 1000 }, { nv_clk_src_shader , 0x01, 0, "shader", 1000 }, { nv_clk_src_mem , 0x02, 0, "memory", 1000 }, { nv_clk_src_vdec , 0x03 }, { nv_clk_src_disp , 0x04 }, + { nv_clk_src_host , 0x05 }, { nv_clk_src_max } }; diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h index 2b4b3ea..54f9949 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h +++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h @@ -6,6 +6,10 @@ struct nva3_clock_info { u32 clk; u32 pll; + enum { + NVA3_HOST_277, + NVA3_HOST_CLK, + } host_out; }; int nva3_pll_info(struct nouveau_clock *, int, u32, u32, -- 1.9.3
Roy Spliet
2014-Aug-21 11:45 UTC
[Nouveau] [PATCH 4/7] clock/nva3: Abort when PLL doesn't lock
Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c index eeab2d5..2d75212 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c +++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c @@ -311,7 +311,11 @@ prog_pll(struct nva3_clock_priv *priv, int clk, u32 pll, int idx) nv_wr32(priv, coef, info->pll); nv_mask(priv, ctrl, 0x00000015, 0x00000015); nv_mask(priv, ctrl, 0x00000010, 0x00000000); - nv_wait(priv, ctrl, 0x00020000, 0x00020000); + if (!nv_wait(priv, ctrl, 0x00020000, 0x00020000)) { + nv_mask(priv, ctrl, 0x00000010, 0x00000010); + nv_mask(priv, src0, 0x00000101, 0x00000000); + return; + } nv_mask(priv, ctrl, 0x00000010, 0x00000010); nv_mask(priv, ctrl, 0x00000008, 0x00000000); disable_clk_src(priv, src1); -- 1.9.3
Roy Spliet
2014-Aug-21 11:45 UTC
[Nouveau] [PATCH 5/7] clock/nva3: For PLL clocks always make sure the PLL is not in use
Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c index 2d75212..3e1164c 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c +++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c @@ -305,8 +305,17 @@ prog_pll(struct nva3_clock_priv *priv, int clk, u32 pll, int idx) const u32 src1 = 0x004160 + (clk * 4); const u32 ctrl = pll + 0; const u32 coef = pll + 4; + u32 bypass; if (info->pll) { + /* Always start from a non-PLL clock */ + bypass = nv_rd32(priv, ctrl) & 0x00000008; + if (!bypass) { + nv_mask(priv, src1, 0x00000101, 0x00000101); + nv_mask(priv, ctrl, 0x00000008, 0x00000008); + udelay(20); + } + nv_mask(priv, src0, 0x003f3141, 0x00000101 | info->clk); nv_wr32(priv, coef, info->pll); nv_mask(priv, ctrl, 0x00000015, 0x00000015); -- 1.9.3
Roy Spliet
2014-Aug-21 11:45 UTC
[Nouveau] [PATCH 6/7] clock/nva3: Set intermediate core clock on reclocking
Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- .../gpu/drm/nouveau/core/include/subdev/clock.h | 1 + drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c | 63 +++++++++++++++++----- drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h | 1 + 3 files changed, 51 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/clock.h b/drivers/gpu/drm/nouveau/core/include/subdev/clock.h index c01e29c..676b49e 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/clock.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/clock.h @@ -29,6 +29,7 @@ enum nv_clk_src { nv_clk_src_mdiv, nv_clk_src_core, + nv_clk_src_core_intm, nv_clk_src_shader, nv_clk_src_mem, diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c index 3e1164c..14a5060 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c +++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c @@ -142,6 +142,7 @@ nva3_clock_read(struct nouveau_clock *clk, enum nv_clk_src src) case nv_clk_src_crystal: return nv_device(priv)->crystal; case nv_clk_src_core: + case nv_clk_src_core_intm: return read_pll(priv, 0x00, 0x4200); case nv_clk_src_shader: return read_pll(priv, 0x01, 0x4220); @@ -226,7 +227,6 @@ nva3_pll_info(struct nouveau_clock *clock, int clk, u32 pll, u32 khz, { struct nouveau_bios *bios = nouveau_bios(clock); struct nva3_clock_priv *priv = (void *)clock; - int clk_khz; struct nvbios_pll limits; int P, N, M, diff; int ret; @@ -235,10 +235,10 @@ nva3_pll_info(struct nouveau_clock *clock, int clk, u32 pll, u32 khz, /* If we can get a within [-2, 3) MHz of a divider, we'll disable the * PLL and use the divider instead. */ - clk_khz = nva3_clk_info(clock, clk, khz, info); - diff = khz - clk_khz; + ret = nva3_clk_info(clock, clk, khz, info); + diff = khz - ret; if (!pll || (diff >= -2000 && diff < 3000)) { - return clk_khz; + goto out; } /* Try with PLL */ @@ -246,8 +246,8 @@ nva3_pll_info(struct nouveau_clock *clock, int clk, u32 pll, u32 khz, if (ret) return ret; - clk_khz = nva3_clk_info(clock, clk - 0x10, limits.refclk, info); - if (clk_khz != limits.refclk) + ret = nva3_clk_info(clock, clk - 0x10, limits.refclk, info); + if (ret != limits.refclk) return -EINVAL; ret = nva3_pll_calc(nv_subdev(priv), &limits, khz, &N, NULL, &M, &P); @@ -255,6 +255,9 @@ nva3_pll_info(struct nouveau_clock *clock, int clk, u32 pll, u32 khz, info->pll = (P << 16) | (N << 8) | M; } +out: + info->fb_delay = max(((khz + 7566) / 15133), (u32) 18); + return ret ? ret : -ERANGE; } @@ -371,10 +374,26 @@ prog_host(struct nva3_clock_priv *priv) nv_wr32(priv, 0xc044, 0x3e); } +static void +prog_core(struct nva3_clock_priv *priv, int idx) +{ + struct nva3_clock_info *info = &priv->eng[idx]; + u32 fb_delay = nv_rd32(priv, 0x10002c); + + if (fb_delay < info->fb_delay) + nv_wr32(priv, 0x10002c, info->fb_delay); + + prog_pll(priv, 0x00, 0x004200, idx); + + if (fb_delay > info->fb_delay) + nv_wr32(priv, 0x10002c, info->fb_delay); +} + static int nva3_clock_calc(struct nouveau_clock *clk, struct nouveau_cstate *cstate) { struct nva3_clock_priv *priv = (void *)clk; + struct nva3_clock_info *core = &priv->eng[nv_clk_src_core]; int ret; if ((ret = calc_clk(priv, cstate, 0x10, 0x4200, nv_clk_src_core)) || @@ -384,6 +403,16 @@ nva3_clock_calc(struct nouveau_clock *clk, struct nouveau_cstate *cstate) (ret = calc_host(priv, cstate))) return ret; + /* XXX: Should be reading the highest bit in the VBIOS clock to decide + * whether to use a PLL or not... but using a PLL defeats the purpose */ + if (core->pll) { + ret = nva3_clk_info(clk, 0x10, + cstate->domain[nv_clk_src_core_intm], + &priv->eng[nv_clk_src_core_intm]); + if (ret < 0) + return ret; + } + return 0; } @@ -391,7 +420,12 @@ static int nva3_clock_prog(struct nouveau_clock *clk) { struct nva3_clock_priv *priv = (void *)clk; - prog_pll(priv, 0x00, 0x004200, nv_clk_src_core); + struct nva3_clock_info *core = &priv->eng[nv_clk_src_core]; + + if (core->pll) + prog_core(priv, nv_clk_src_core_intm); + + prog_core(priv, nv_clk_src_core); prog_pll(priv, 0x01, 0x004220, nv_clk_src_shader); prog_clk(priv, 0x20, nv_clk_src_disp); prog_clk(priv, 0x21, nv_clk_src_vdec); @@ -406,13 +440,14 @@ nva3_clock_tidy(struct nouveau_clock *clk) static struct nouveau_clocks nva3_domain[] = { - { nv_clk_src_crystal, 0xff }, - { nv_clk_src_core , 0x00, 0, "core", 1000 }, - { nv_clk_src_shader , 0x01, 0, "shader", 1000 }, - { nv_clk_src_mem , 0x02, 0, "memory", 1000 }, - { nv_clk_src_vdec , 0x03 }, - { nv_clk_src_disp , 0x04 }, - { nv_clk_src_host , 0x05 }, + { nv_clk_src_crystal , 0xff }, + { nv_clk_src_core , 0x00, 0, "core", 1000 }, + { nv_clk_src_shader , 0x01, 0, "shader", 1000 }, + { nv_clk_src_mem , 0x02, 0, "memory", 1000 }, + { nv_clk_src_vdec , 0x03 }, + { nv_clk_src_disp , 0x04 }, + { nv_clk_src_host , 0x05 }, + { nv_clk_src_core_intm, 0x06 }, { nv_clk_src_max } }; diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h index 54f9949..0539be4 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h +++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h @@ -10,6 +10,7 @@ struct nva3_clock_info { NVA3_HOST_277, NVA3_HOST_CLK, } host_out; + u32 fb_delay; }; int nva3_pll_info(struct nouveau_clock *, int, u32, u32, -- 1.9.3
Roy Spliet
2014-Aug-21 11:45 UTC
[Nouveau] [PATCH 7/7] clock/nva3: Pause the GPU before reclocking
Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- .../gpu/drm/nouveau/core/include/subdev/clock.h | 3 ++ drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c | 45 ++++++++++++++++++++++ drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c | 30 ++++----------- 3 files changed, 55 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/clock.h b/drivers/gpu/drm/nouveau/core/include/subdev/clock.h index 676b49e..52e65b8 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/clock.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/clock.h @@ -146,6 +146,9 @@ int nv04_clock_pll_prog(struct nouveau_clock *, u32 reg1, int nva3_clock_pll_calc(struct nouveau_clock *, struct nvbios_pll *, int clk, struct nouveau_pll_vals *); +int nva3_clock_pre(struct nouveau_clock *clk, unsigned long *flags); +void nva3_clock_post(struct nouveau_clock *clk, unsigned long *flags); + int nouveau_clock_ustate(struct nouveau_clock *, int req); int nouveau_clock_astate(struct nouveau_clock *, int req, int rel); int nouveau_clock_dstate(struct nouveau_clock *, int req, int rel); diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c index 14a5060..cd31cf3 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c +++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c @@ -23,6 +23,7 @@ * Roy Spliet */ +#include <engine/fifo.h> #include <subdev/bios.h> #include <subdev/bios/pll.h> #include <subdev/timer.h> @@ -293,6 +294,41 @@ calc_host(struct nva3_clock_priv *priv, struct nouveau_cstate *cstate) return ret; } +int +nva3_clock_pre(struct nouveau_clock *clk, unsigned long *flags) +{ + struct nouveau_fifo *pfifo = nouveau_fifo(clk); + + /* halt and idle execution engines */ + nv_mask(clk, 0x020060, 0x00070000, 0x00000000); + nv_mask(clk, 0x002504, 0x00000001, 0x00000001); + /* Wait until the interrupt handler is finished */ + if (!nv_wait(clk, 0x000100, 0xffffffff, 0x00000000)) + return -EBUSY; + + if (pfifo) + pfifo->pause(pfifo, flags); + + if (!nv_wait(clk, 0x002504, 0x00000010, 0x00000010)) + return -EIO; + if (!nv_wait(clk, 0x00251c, 0x0000003f, 0x0000003f)) + return -EIO; + + return 0; +} + +void +nva3_clock_post(struct nouveau_clock *clk, unsigned long *flags) +{ + struct nouveau_fifo *pfifo = nouveau_fifo(clk); + + if (pfifo) + pfifo->start(pfifo, flags); + + nv_mask(clk, 0x002504, 0x00000001, 0x00000000); + nv_mask(clk, 0x020060, 0x00070000, 0x00040000); +} + static void disable_clk_src(struct nva3_clock_priv *priv, u32 src) { @@ -421,6 +457,12 @@ nva3_clock_prog(struct nouveau_clock *clk) { struct nva3_clock_priv *priv = (void *)clk; struct nva3_clock_info *core = &priv->eng[nv_clk_src_core]; + int ret; + unsigned long flags; + + ret = nva3_clock_pre(clk, &flags); + if (ret) + return ret; if (core->pll) prog_core(priv, nv_clk_src_core_intm); @@ -430,6 +472,9 @@ nva3_clock_prog(struct nouveau_clock *clk) prog_clk(priv, 0x20, nv_clk_src_disp); prog_clk(priv, 0x21, nv_clk_src_vdec); prog_host(priv); + + nva3_clock_post(clk, &flags); + return 0; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c index 6a65fc9..fa91d2a 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c +++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c @@ -299,25 +299,13 @@ static int nvaa_clock_prog(struct nouveau_clock *clk) { struct nvaa_clock_priv *priv = (void *)clk; - struct nouveau_fifo *pfifo = nouveau_fifo(clk); + u32 pllmask = 0, mast; unsigned long flags; - u32 pllmask = 0, mast, ptherm_gate; - int ret = -EBUSY; - - /* halt and idle execution engines */ - ptherm_gate = nv_mask(clk, 0x020060, 0x00070000, 0x00000000); - nv_mask(clk, 0x002504, 0x00000001, 0x00000001); - /* Wait until the interrupt handler is finished */ - if (!nv_wait(clk, 0x000100, 0xffffffff, 0x00000000)) - goto resume; - - if (pfifo) - pfifo->pause(pfifo, &flags); + int ret; - if (!nv_wait(clk, 0x002504, 0x00000010, 0x00000010)) - goto resume; - if (!nv_wait(clk, 0x00251c, 0x0000003f, 0x0000003f)) - goto resume; + ret = nva3_clock_pre(clk, &flags); + if (ret) + return ret; /* First switch to safe clocks: href */ mast = nv_mask(clk, 0xc054, 0x03400e70, 0x03400640); @@ -378,12 +366,6 @@ nvaa_clock_prog(struct nouveau_clock *clk) ret = 0; resume: - if (pfifo) - pfifo->start(pfifo, &flags); - - nv_mask(clk, 0x002504, 0x00000001, 0x00000000); - nv_wr32(clk, 0x020060, ptherm_gate); - /* Disable some PLLs and dividers when unused */ if (priv->csrc != nv_clk_src_core) { nv_wr32(clk, 0x4040, 0x00000000); @@ -395,6 +377,8 @@ resume: nv_mask(clk, 0x4020, 0x80000000, 0x00000000); } + nva3_clock_post(clk, &flags); + return ret; } -- 1.9.3
Ilia Mirkin
2014-Aug-21 12:25 UTC
[Nouveau] [PATCH 7/7] clock/nva3: Pause the GPU before reclocking
On Thu, Aug 21, 2014 at 7:45 AM, Roy Spliet <rspliet at eclipso.eu> wrote:> Signed-off-by: Roy Spliet <rspliet at eclipso.eu> > --- > .../gpu/drm/nouveau/core/include/subdev/clock.h | 3 ++ > drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c | 45 ++++++++++++++++++++++ > drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c | 30 ++++----------- > 3 files changed, 55 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/clock.h b/drivers/gpu/drm/nouveau/core/include/subdev/clock.h > index 676b49e..52e65b8 100644 > --- a/drivers/gpu/drm/nouveau/core/include/subdev/clock.h > +++ b/drivers/gpu/drm/nouveau/core/include/subdev/clock.h > @@ -146,6 +146,9 @@ int nv04_clock_pll_prog(struct nouveau_clock *, u32 reg1, > int nva3_clock_pll_calc(struct nouveau_clock *, struct nvbios_pll *, > int clk, struct nouveau_pll_vals *); > > +int nva3_clock_pre(struct nouveau_clock *clk, unsigned long *flags); > +void nva3_clock_post(struct nouveau_clock *clk, unsigned long *flags); > + > int nouveau_clock_ustate(struct nouveau_clock *, int req); > int nouveau_clock_astate(struct nouveau_clock *, int req, int rel); > int nouveau_clock_dstate(struct nouveau_clock *, int req, int rel); > diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c > index 14a5060..cd31cf3 100644 > --- a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c > +++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c > @@ -23,6 +23,7 @@ > * Roy Spliet > */ > > +#include <engine/fifo.h> > #include <subdev/bios.h> > #include <subdev/bios/pll.h> > #include <subdev/timer.h> > @@ -293,6 +294,41 @@ calc_host(struct nva3_clock_priv *priv, struct nouveau_cstate *cstate) > return ret; > } > > +int > +nva3_clock_pre(struct nouveau_clock *clk, unsigned long *flags) > +{ > + struct nouveau_fifo *pfifo = nouveau_fifo(clk); > + > + /* halt and idle execution engines */ > + nv_mask(clk, 0x020060, 0x00070000, 0x00000000); > + nv_mask(clk, 0x002504, 0x00000001, 0x00000001); > + /* Wait until the interrupt handler is finished */ > + if (!nv_wait(clk, 0x000100, 0xffffffff, 0x00000000)) > + return -EBUSY;So this will return with the fifo and whatever 20060 controls all blocked. That seems bad, unless you guarantee that _post gets run no matter what.> + > + if (pfifo) > + pfifo->pause(pfifo, flags); > + > + if (!nv_wait(clk, 0x002504, 0x00000010, 0x00000010)) > + return -EIO; > + if (!nv_wait(clk, 0x00251c, 0x0000003f, 0x0000003f)) > + return -EIO; > + > + return 0; > +} > + > +void > +nva3_clock_post(struct nouveau_clock *clk, unsigned long *flags) > +{ > + struct nouveau_fifo *pfifo = nouveau_fifo(clk); > + > + if (pfifo) > + pfifo->start(pfifo, flags); > + > + nv_mask(clk, 0x002504, 0x00000001, 0x00000000); > + nv_mask(clk, 0x020060, 0x00070000, 0x00040000); > +} > + > static void > disable_clk_src(struct nva3_clock_priv *priv, u32 src) > { > @@ -421,6 +457,12 @@ nva3_clock_prog(struct nouveau_clock *clk) > { > struct nva3_clock_priv *priv = (void *)clk; > struct nva3_clock_info *core = &priv->eng[nv_clk_src_core]; > + int ret; > + unsigned long flags; > + > + ret = nva3_clock_pre(clk, &flags); > + if (ret) > + return ret;The fifo may be paused. Do you have to run _post unconditionally here perhaps? goto's make this sort of thing really simple (+ return ret instead of return 0).> > if (core->pll) > prog_core(priv, nv_clk_src_core_intm); > @@ -430,6 +472,9 @@ nva3_clock_prog(struct nouveau_clock *clk) > prog_clk(priv, 0x20, nv_clk_src_disp); > prog_clk(priv, 0x21, nv_clk_src_vdec); > prog_host(priv); > + > + nva3_clock_post(clk, &flags); > + > return 0; > } > > diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c > index 6a65fc9..fa91d2a 100644 > --- a/drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c > +++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c > @@ -299,25 +299,13 @@ static int > nvaa_clock_prog(struct nouveau_clock *clk) > { > struct nvaa_clock_priv *priv = (void *)clk; > - struct nouveau_fifo *pfifo = nouveau_fifo(clk); > + u32 pllmask = 0, mast; > unsigned long flags; > - u32 pllmask = 0, mast, ptherm_gate; > - int ret = -EBUSY; > - > - /* halt and idle execution engines */ > - ptherm_gate = nv_mask(clk, 0x020060, 0x00070000, 0x00000000); > - nv_mask(clk, 0x002504, 0x00000001, 0x00000001); > - /* Wait until the interrupt handler is finished */ > - if (!nv_wait(clk, 0x000100, 0xffffffff, 0x00000000)) > - goto resume;As above, you've killed the 'resume' logic...> - > - if (pfifo) > - pfifo->pause(pfifo, &flags); > + int ret; > > - if (!nv_wait(clk, 0x002504, 0x00000010, 0x00000010)) > - goto resume; > - if (!nv_wait(clk, 0x00251c, 0x0000003f, 0x0000003f)) > - goto resume; > + ret = nva3_clock_pre(clk, &flags); > + if (ret) > + return ret; > > /* First switch to safe clocks: href */ > mast = nv_mask(clk, 0xc054, 0x03400e70, 0x03400640); > @@ -378,12 +366,6 @@ nvaa_clock_prog(struct nouveau_clock *clk) > ret = 0; > > resume: > - if (pfifo) > - pfifo->start(pfifo, &flags); > - > - nv_mask(clk, 0x002504, 0x00000001, 0x00000000); > - nv_wr32(clk, 0x020060, ptherm_gate); > - > /* Disable some PLLs and dividers when unused */ > if (priv->csrc != nv_clk_src_core) { > nv_wr32(clk, 0x4040, 0x00000000); > @@ -395,6 +377,8 @@ resume: > nv_mask(clk, 0x4020, 0x80000000, 0x00000000); > } > > + nva3_clock_post(clk, &flags); > + > return ret; > } > > -- > 1.9.3 > > > > _______________________________________________ > Nouveau mailing list > Nouveau at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau
On 21 August 2014 at 12:45, Roy Spliet <rspliet at eclipso.eu> wrote:> Signed-off-by: Roy Spliet <rspliet at eclipso.eu>Hi Roy Just have a quick scan through a nouveau build with -Wextra and I've noticed an interesting warning> --- > drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c | 73 +++++++++++++++--------- > drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h | 2 +- > drivers/gpu/drm/nouveau/core/subdev/fb/ramnva3.c | 2 +- > 3 files changed, 48 insertions(+), 29 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c > index a08011c..b9ab90a 100644 > --- a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c > +++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c > @@ -163,17 +163,12 @@ nva3_clock_read(struct nouveau_clock *clk, enum nv_clk_src src) > } > > int > -nva3_clock_info(struct nouveau_clock *clock, int clk, u32 pll, u32 khz, > +nva3_clk_info(struct nouveau_clock *clock, int clk, u32 khz, > struct nva3_clock_info *info) > { > - struct nouveau_bios *bios = nouveau_bios(clock); > struct nva3_clock_priv *priv = (void *)clock; > - struct nvbios_pll limits; > - u32 oclk, sclk, sdiv; > - int P, N, M, diff; > - int ret; > + u32 oclk, sclk, sdiv, diff; > > - info->pll = 0; > info->clk = 0; > > switch (khz) { > @@ -188,40 +183,64 @@ nva3_clock_info(struct nouveau_clock *clock, int clk, u32 pll, u32 khz, > return khz; > default: > sclk = read_vco(priv, clk); > - sdiv = min((sclk * 2) / (khz - 2999), (u32)65); > - /* if the clock has a PLL attached, and we can get a within > - * [-2, 3) MHz of a divider, we'll disable the PLL and use > - * the divider instead. > - * > - * divider can go as low as 2, limited here because NVIDIA > + sdiv = min((sclk * 2) / khz, (u32)65); > + oclk = (sclk * 2) / sdiv; > + diff = ((khz + 3000) - oclk); > + > + /* When imprecise, play it safe and aim for a clock lower than > + * desired rather than higher */ > + if (diff < 0) {^ warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] I'm assuming that the appropriate fix would be to change the variable diff to be unsigned ? Cheers, Emil