Karol Herbst
2016-Jan-13  12:25 UTC
[Nouveau] [PATCH 0/2] allow partly reclocking on chipset
some chipset have working engine reclocking, but broken memory reclocking like Fermi. We should for now, add the functionality to allow partly reclocking for those. Allthough this doesn't give as much performance as one might wish, it is till noticeable and may improve performance enough to be noted. Karol Herbst (2): clk: seperate engine and memory reclock toggles clk: allow engine reclock on fermi drm/nouveau/include/nvkm/subdev/clk.h | 3 ++- drm/nouveau/nvkm/subdev/clk/base.c | 21 ++++++++++++++------- drm/nouveau/nvkm/subdev/clk/gf100.c | 3 ++- drm/nouveau/nvkm/subdev/clk/gk104.c | 3 ++- drm/nouveau/nvkm/subdev/clk/gk20a.c | 2 +- drm/nouveau/nvkm/subdev/clk/gt215.c | 2 +- drm/nouveau/nvkm/subdev/clk/mcp77.c | 3 ++- drm/nouveau/nvkm/subdev/clk/nv04.c | 2 +- drm/nouveau/nvkm/subdev/clk/nv40.c | 2 +- drm/nouveau/nvkm/subdev/clk/nv50.c | 2 +- drm/nouveau/nvkm/subdev/clk/priv.h | 6 ++++-- 11 files changed, 31 insertions(+), 18 deletions(-) -- 2.7.0
Karol Herbst
2016-Jan-13  12:25 UTC
[Nouveau] [PATCH 1/2] clk: seperate engine and memory reclock toggles
simple, allow chipsets to enable either engine or memory reclocking if one of
that works already instead of disabling both
Signed-off-by: Karol Herbst <nouveau at karolherbst.de>
---
 drm/nouveau/include/nvkm/subdev/clk.h |  3 ++-
 drm/nouveau/nvkm/subdev/clk/base.c    | 21 ++++++++++++++-------
 drm/nouveau/nvkm/subdev/clk/gf100.c   |  3 ++-
 drm/nouveau/nvkm/subdev/clk/gk104.c   |  3 ++-
 drm/nouveau/nvkm/subdev/clk/gk20a.c   |  2 +-
 drm/nouveau/nvkm/subdev/clk/gt215.c   |  2 +-
 drm/nouveau/nvkm/subdev/clk/mcp77.c   |  3 ++-
 drm/nouveau/nvkm/subdev/clk/nv04.c    |  2 +-
 drm/nouveau/nvkm/subdev/clk/nv40.c    |  2 +-
 drm/nouveau/nvkm/subdev/clk/nv50.c    |  2 +-
 drm/nouveau/nvkm/subdev/clk/priv.h    |  6 ++++--
 11 files changed, 31 insertions(+), 18 deletions(-)
diff --git a/drm/nouveau/include/nvkm/subdev/clk.h
b/drm/nouveau/include/nvkm/subdev/clk.h
index 6b33bc0..9453229 100644
--- a/drm/nouveau/include/nvkm/subdev/clk.h
+++ b/drm/nouveau/include/nvkm/subdev/clk.h
@@ -96,7 +96,8 @@ struct nvkm_clk {
 	int tstate; /* thermal adjustment (max-) */
 	int dstate; /* display adjustment (min+) */
 
-	bool allow_reclock;
+	bool allow_eng_reclock;
+	bool allow_mem_reclock;
 
 	/*XXX: die, these are here *only* to support the completely
 	 *     bat-shit insane what-was-nouveau_hw.c code
diff --git a/drm/nouveau/nvkm/subdev/clk/base.c
b/drm/nouveau/nvkm/subdev/clk/base.c
index 889cce2..36729fa 100644
--- a/drm/nouveau/nvkm/subdev/clk/base.c
+++ b/drm/nouveau/nvkm/subdev/clk/base.c
@@ -84,6 +84,9 @@ nvkm_cstate_prog(struct nvkm_clk *clk, struct nvkm_pstate
*pstate, int cstatei)
 	struct nvkm_cstate *cstate;
 	int ret;
 
+	if (!clk->allow_eng_reclock)
+		return 0;
+
 	if (!list_empty(&pstate->list)) {
 		cstate = list_entry(pstate->list.prev, typeof(*cstate), head);
 	} else {
@@ -190,7 +193,7 @@ nvkm_pstate_prog(struct nvkm_clk *clk, int pstatei)
 
 	nvkm_pcie_set_link(pci, pstate->pcie_speed, pstate->pcie_width);
 
-	if (ram && ram->func->calc) {
+	if (clk->allow_mem_reclock && ram && ram->func->calc)
{
 		int khz = pstate->base.domain[nv_clk_src_mem];
 		do {
 			ret = ram->func->calc(ram, khz);
@@ -383,7 +386,7 @@ nvkm_clk_ustate_update(struct nvkm_clk *clk, int req)
 	struct nvkm_pstate *pstate;
 	int i = 0;
 
-	if (!clk->allow_reclock)
+	if (!(clk->allow_eng_reclock || clk->allow_mem_reclock))
 		return -ENOSYS;
 
 	if (req != -1 && req != -2) {
@@ -406,7 +409,7 @@ nvkm_clk_nstate(struct nvkm_clk *clk, const char *mode, int
arglen)
 {
 	int ret = 1;
 
-	if (clk->allow_reclock && !strncasecmpz(mode, "auto",
arglen))
+	if (clk->allow_eng_reclock && !strncasecmpz(mode, "auto",
arglen))
 		return -2;
 
 	if (strncasecmpz(mode, "disabled", arglen)) {
@@ -559,7 +562,8 @@ nvkm_clk = {
 
 int
 nvkm_clk_ctor(const struct nvkm_clk_func *func, struct nvkm_device *device,
-	      int index, bool allow_reclock, struct nvkm_clk *clk)
+	      int index, bool allow_eng_reclock, bool allow_mem_reclock,
+	      struct nvkm_clk *clk)
 {
 	int ret, idx, arglen;
 	const char *mode;
@@ -570,7 +574,8 @@ nvkm_clk_ctor(const struct nvkm_clk_func *func, struct
nvkm_device *device,
 	clk->domains = func->domains;
 	clk->ustate_ac = -1;
 	clk->ustate_dc = -1;
-	clk->allow_reclock = allow_reclock;
+	clk->allow_eng_reclock = allow_eng_reclock;
+	clk->allow_mem_reclock = allow_mem_reclock;
 
 	INIT_WORK(&clk->work, nvkm_pstate_work);
 	init_waitqueue_head(&clk->wait);
@@ -612,9 +617,11 @@ nvkm_clk_ctor(const struct nvkm_clk_func *func, struct
nvkm_device *device,
 
 int
 nvkm_clk_new_(const struct nvkm_clk_func *func, struct nvkm_device *device,
-	      int index, bool allow_reclock, struct nvkm_clk **pclk)
+	      int index, bool allow_eng_reclock, bool allow_mem_reclock,
+	      struct nvkm_clk **pclk)
 {
 	if (!(*pclk = kzalloc(sizeof(**pclk), GFP_KERNEL)))
 		return -ENOMEM;
-	return nvkm_clk_ctor(func, device, index, allow_reclock, *pclk);
+	return nvkm_clk_ctor(func, device, index, allow_eng_reclock,
+			     allow_mem_reclock, *pclk);
 }
diff --git a/drm/nouveau/nvkm/subdev/clk/gf100.c
b/drm/nouveau/nvkm/subdev/clk/gf100.c
index ecb7d1f..ad93350 100644
--- a/drm/nouveau/nvkm/subdev/clk/gf100.c
+++ b/drm/nouveau/nvkm/subdev/clk/gf100.c
@@ -535,5 +535,6 @@ gf100_clk_new(struct nvkm_device *device, int index, struct
nvkm_clk **pclk)
 		return -ENOMEM;
 	*pclk = &clk->base;
 
-	return nvkm_clk_ctor(&gf100_clk, device, index, false, &clk->base);
+	return nvkm_clk_ctor(&gf100_clk, device, index, false, false,
+			     &clk->base);
 }
diff --git a/drm/nouveau/nvkm/subdev/clk/gk104.c
b/drm/nouveau/nvkm/subdev/clk/gk104.c
index 975c401..e661bf1 100644
--- a/drm/nouveau/nvkm/subdev/clk/gk104.c
+++ b/drm/nouveau/nvkm/subdev/clk/gk104.c
@@ -506,5 +506,6 @@ gk104_clk_new(struct nvkm_device *device, int index, struct
nvkm_clk **pclk)
 		return -ENOMEM;
 	*pclk = &clk->base;
 
-	return nvkm_clk_ctor(&gk104_clk, device, index, true, &clk->base);
+	return nvkm_clk_ctor(&gk104_clk, device, index, true, true,
+			     &clk->base);
 }
diff --git a/drm/nouveau/nvkm/subdev/clk/gk20a.c
b/drm/nouveau/nvkm/subdev/clk/gk20a.c
index 254094a..8355400 100644
--- a/drm/nouveau/nvkm/subdev/clk/gk20a.c
+++ b/drm/nouveau/nvkm/subdev/clk/gk20a.c
@@ -663,7 +663,7 @@ gk20a_clk_new(struct nvkm_device *device, int index, struct
nvkm_clk **pclk)
 	clk->params = &gk20a_pllg_params;
 	clk->parent_rate = clk_get_rate(tdev->clk);
 
-	ret = nvkm_clk_ctor(&gk20a_clk, device, index, true, &clk->base);
+	ret = nvkm_clk_ctor(&gk20a_clk, device, index, true, true,
&clk->base);
 	nvkm_info(&clk->base.subdev, "parent clock rate: %d Mhz\n",
 		  clk->parent_rate / MHZ);
 	return ret;
diff --git a/drm/nouveau/nvkm/subdev/clk/gt215.c
b/drm/nouveau/nvkm/subdev/clk/gt215.c
index 056702e..dc3c4cc 100644
--- a/drm/nouveau/nvkm/subdev/clk/gt215.c
+++ b/drm/nouveau/nvkm/subdev/clk/gt215.c
@@ -542,5 +542,5 @@ gt215_clk_new(struct nvkm_device *device, int index, struct
nvkm_clk **pclk)
 		return -ENOMEM;
 	*pclk = &clk->base;
 
-	return nvkm_clk_ctor(>215_clk, device, index, true, &clk->base);
+	return nvkm_clk_ctor(>215_clk, device, index, true, true,
&clk->base);
 }
diff --git a/drm/nouveau/nvkm/subdev/clk/mcp77.c
b/drm/nouveau/nvkm/subdev/clk/mcp77.c
index 1c21b8b..1cab517 100644
--- a/drm/nouveau/nvkm/subdev/clk/mcp77.c
+++ b/drm/nouveau/nvkm/subdev/clk/mcp77.c
@@ -419,5 +419,6 @@ mcp77_clk_new(struct nvkm_device *device, int index, struct
nvkm_clk **pclk)
 		return -ENOMEM;
 	*pclk = &clk->base;
 
-	return nvkm_clk_ctor(&mcp77_clk, device, index, true, &clk->base);
+	return nvkm_clk_ctor(&mcp77_clk, device, index, true, true,
+			     &clk->base);
 }
diff --git a/drm/nouveau/nvkm/subdev/clk/nv04.c
b/drm/nouveau/nvkm/subdev/clk/nv04.c
index b280f85..edb12a9 100644
--- a/drm/nouveau/nvkm/subdev/clk/nv04.c
+++ b/drm/nouveau/nvkm/subdev/clk/nv04.c
@@ -74,7 +74,7 @@ nv04_clk = {
 int
 nv04_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
 {
-	int ret = nvkm_clk_new_(&nv04_clk, device, index, false, pclk);
+	int ret = nvkm_clk_new_(&nv04_clk, device, index, false, false, pclk);
 	if (ret == 0) {
 		(*pclk)->pll_calc = nv04_clk_pll_calc;
 		(*pclk)->pll_prog = nv04_clk_pll_prog;
diff --git a/drm/nouveau/nvkm/subdev/clk/nv40.c
b/drm/nouveau/nvkm/subdev/clk/nv40.c
index 2ab9b9b..96f2ab7 100644
--- a/drm/nouveau/nvkm/subdev/clk/nv40.c
+++ b/drm/nouveau/nvkm/subdev/clk/nv40.c
@@ -228,5 +228,5 @@ nv40_clk_new(struct nvkm_device *device, int index, struct
nvkm_clk **pclk)
 	clk->base.pll_prog = nv04_clk_pll_prog;
 	*pclk = &clk->base;
 
-	return nvkm_clk_ctor(&nv40_clk, device, index, true, &clk->base);
+	return nvkm_clk_ctor(&nv40_clk, device, index, true, true,
&clk->base);
 }
diff --git a/drm/nouveau/nvkm/subdev/clk/nv50.c
b/drm/nouveau/nvkm/subdev/clk/nv50.c
index 5841f29..2c7a565 100644
--- a/drm/nouveau/nvkm/subdev/clk/nv50.c
+++ b/drm/nouveau/nvkm/subdev/clk/nv50.c
@@ -514,7 +514,7 @@ nv50_clk_new_(const struct nvkm_clk_func *func, struct
nvkm_device *device,
 
 	if (!(clk = kzalloc(sizeof(*clk), GFP_KERNEL)))
 		return -ENOMEM;
-	ret = nvkm_clk_ctor(func, device, index, allow_reclock, &clk->base);
+	ret = nvkm_clk_ctor(func, device, index, allow_reclock, allow_reclock,
&clk->base);
 	*pclk = &clk->base;
 	if (ret)
 		return ret;
diff --git a/drm/nouveau/nvkm/subdev/clk/priv.h
b/drm/nouveau/nvkm/subdev/clk/priv.h
index 51eafc0..bb6c0c4 100644
--- a/drm/nouveau/nvkm/subdev/clk/priv.h
+++ b/drm/nouveau/nvkm/subdev/clk/priv.h
@@ -16,9 +16,11 @@ struct nvkm_clk_func {
 };
 
 int nvkm_clk_ctor(const struct nvkm_clk_func *, struct nvkm_device *, int,
-		  bool allow_reclock, struct nvkm_clk *);
+		  bool allow_eng_reclock, bool allow_mem_reclock,
+		  struct nvkm_clk *);
 int nvkm_clk_new_(const struct nvkm_clk_func *, struct nvkm_device *, int,
-		  bool allow_reclock, struct nvkm_clk **);
+		  bool allow_eng_reclock, bool allow_mem_reclock,
+		  struct nvkm_clk **);
 
 int nv04_clk_pll_calc(struct nvkm_clk *, struct nvbios_pll *, int clk,
 		      struct nvkm_pll_vals *);
-- 
2.7.0
Karol Herbst
2016-Jan-13  12:25 UTC
[Nouveau] [PATCH 2/2] clk: allow engine reclock on fermi
this gives me on my 630M fermi card some speed improvements while on 0f: pixmark_piano: ~800ms to ~500ms frame time unigine_heaven (lowest setting, fullhd): 5.1 fps to 6.4 fps) clocks for this gpu: 07: 270 MHz 0f: 475 MHz Signed-off-by: Karol Herbst <nouveau at karolherbst.de> --- drm/nouveau/nvkm/subdev/clk/gf100.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drm/nouveau/nvkm/subdev/clk/gf100.c b/drm/nouveau/nvkm/subdev/clk/gf100.c index ad93350..2ce902f 100644 --- a/drm/nouveau/nvkm/subdev/clk/gf100.c +++ b/drm/nouveau/nvkm/subdev/clk/gf100.c @@ -535,6 +535,6 @@ gf100_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk) return -ENOMEM; *pclk = &clk->base; - return nvkm_clk_ctor(&gf100_clk, device, index, false, false, + return nvkm_clk_ctor(&gf100_clk, device, index, true, false, &clk->base); } -- 2.7.0
Samuel Pitoiset
2016-Jan-13  12:43 UTC
[Nouveau] [PATCH 0/2] allow partly reclocking on chipset
Hi! Did you check on different Fermi chipsets or only with one variant? Are you sure that engine reclocking works as expected on Fermi? Because enabling it without a strong inspection sounds like a prediction and it might not work. On 01/13/2016 01:25 PM, Karol Herbst wrote:> some chipset have working engine reclocking, but broken memory reclocking like > Fermi. We should for now, add the functionality to allow partly reclocking for > those. > > Allthough this doesn't give as much performance as one might wish, it is till > noticeable and may improve performance enough to be noted. > > Karol Herbst (2): > clk: seperate engine and memory reclock toggles > clk: allow engine reclock on fermi > > drm/nouveau/include/nvkm/subdev/clk.h | 3 ++- > drm/nouveau/nvkm/subdev/clk/base.c | 21 ++++++++++++++------- > drm/nouveau/nvkm/subdev/clk/gf100.c | 3 ++- > drm/nouveau/nvkm/subdev/clk/gk104.c | 3 ++- > drm/nouveau/nvkm/subdev/clk/gk20a.c | 2 +- > drm/nouveau/nvkm/subdev/clk/gt215.c | 2 +- > drm/nouveau/nvkm/subdev/clk/mcp77.c | 3 ++- > drm/nouveau/nvkm/subdev/clk/nv04.c | 2 +- > drm/nouveau/nvkm/subdev/clk/nv40.c | 2 +- > drm/nouveau/nvkm/subdev/clk/nv50.c | 2 +- > drm/nouveau/nvkm/subdev/clk/priv.h | 6 ++++-- > 11 files changed, 31 insertions(+), 18 deletions(-) >-- -Samuel
Karol Herbst
2016-Jan-13  12:49 UTC
[Nouveau] [PATCH 0/2] allow partly reclocking on chipset
> Samuel Pitoiset <samuel.pitoiset at gmail.com> hat am 13. Januar 2016 um 13:43 > geschrieben: > > Hi! > > Did you check on different Fermi chipsets or only with one variant?currently I only checked that on my nvc1, but I thought I could just send the patches and it is easier for others to try it out this way.> > Are you sure that engine reclocking works as expected on Fermi? Because > enabling it without a strong inspection sounds like a prediction and it > might not work.It seems to work, because I got a huge performance increase in gputest_pixmark_piano, check the second commit for details ;)> > On 01/13/2016 01:25 PM, Karol Herbst wrote: > > some chipset have working engine reclocking, but broken memory reclocking > > like > > Fermi. We should for now, add the functionality to allow partly reclocking > > for > > those. > > > > Allthough this doesn't give as much performance as one might wish, it is > > till > > noticeable and may improve performance enough to be noted. > > > > Karol Herbst (2): > > clk: seperate engine and memory reclock toggles > > clk: allow engine reclock on fermi > > > > drm/nouveau/include/nvkm/subdev/clk.h | 3 ++- > > drm/nouveau/nvkm/subdev/clk/base.c | 21 ++++++++++++++------- > > drm/nouveau/nvkm/subdev/clk/gf100.c | 3 ++- > > drm/nouveau/nvkm/subdev/clk/gk104.c | 3 ++- > > drm/nouveau/nvkm/subdev/clk/gk20a.c | 2 +- > > drm/nouveau/nvkm/subdev/clk/gt215.c | 2 +- > > drm/nouveau/nvkm/subdev/clk/mcp77.c | 3 ++- > > drm/nouveau/nvkm/subdev/clk/nv04.c | 2 +- > > drm/nouveau/nvkm/subdev/clk/nv40.c | 2 +- > > drm/nouveau/nvkm/subdev/clk/nv50.c | 2 +- > > drm/nouveau/nvkm/subdev/clk/priv.h | 6 ++++-- > > 11 files changed, 31 insertions(+), 18 deletions(-) > > > > -- > -Samuel
Hey Karol, Until you can convince me that this (pending) progress-in-insight[1] is accounted for in the way nouveau configures the clock registers, I must NACK this patch. Sorry. Roy [1] "nvkm/clk/gf100: Read secondary bypass postdiv when required" - currently on https://github.com/RSpliet/kernel-nouveau-nv50-pm/commit/d230af49bd0e25271161a9622337b01443214ec0 Op 13-01-16 om 12:25 schreef Karol Herbst:> this gives me on my 630M fermi card some speed improvements while on 0f: > > pixmark_piano: ~800ms to ~500ms frame time > unigine_heaven (lowest setting, fullhd): 5.1 fps to 6.4 fps) > > clocks for this gpu: > 07: 270 MHz > 0f: 475 MHz > > Signed-off-by: Karol Herbst <nouveau at karolherbst.de> > --- > drm/nouveau/nvkm/subdev/clk/gf100.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drm/nouveau/nvkm/subdev/clk/gf100.c b/drm/nouveau/nvkm/subdev/clk/gf100.c > index ad93350..2ce902f 100644 > --- a/drm/nouveau/nvkm/subdev/clk/gf100.c > +++ b/drm/nouveau/nvkm/subdev/clk/gf100.c > @@ -535,6 +535,6 @@ gf100_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk) > return -ENOMEM; > *pclk = &clk->base; > > - return nvkm_clk_ctor(&gf100_clk, device, index, false, false, > + return nvkm_clk_ctor(&gf100_clk, device, index, true, false, > &clk->base); > }