Lyude
2017-Apr-25 18:38 UTC
[Nouveau] [PATCH] drm/nouveau: Add support for clockgating on Fermi+
This adds support for enabling automatic clockgating on nvidia GPUs for Fermi and later generations. This saves a little bit of power, bringing my fermi GPU's power consumption from ~28.3W on idle to ~27W, and my kepler's idle power consumption from ~23.6W to ~21.65W. Similar to how the nvidia driver seems to handle this, we enable clockgating for each engine that supports it after it's initialization. Signed-off-by: Lyude <lyude at redhat.com> --- .../gpu/drm/nouveau/include/nvkm/subdev/therm.h | 4 ++ drivers/gpu/drm/nouveau/nvkm/core/engine.c | 20 +++++- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 14 ++-- drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild | 2 + drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c | 2 + .../gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c | 49 ++++++++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c | 77 ++++++++++++++++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c | 2 + drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c | 2 + drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h | 10 +++ 11 files changed, 175 insertions(+), 9 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h index b268b96..904aa56 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h @@ -84,6 +84,9 @@ struct nvkm_therm { int (*attr_get)(struct nvkm_therm *, enum nvkm_therm_attr_type); int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int); + + int (*clkgate_engine)(struct nvkm_therm *, enum nvkm_devidx); + void (*clkgate_set)(struct nvkm_therm *, int gate_idx, bool enable); }; int nvkm_therm_temp_get(struct nvkm_therm *); @@ -94,6 +97,7 @@ int nv40_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int nv50_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int g84_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gt215_therm_new(struct nvkm_device *, int, struct nvkm_therm **); +int gf100_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gf119_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gm107_therm_new(struct nvkm_device *, int, struct nvkm_therm **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c b/drivers/gpu/drm/nouveau/nvkm/core/engine.c index b6c9169..473ad3e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c @@ -26,6 +26,7 @@ #include <core/option.h> #include <subdev/fb.h> +#include <subdev/therm.h> bool nvkm_engine_chsw_load(struct nvkm_engine *engine) @@ -86,6 +87,13 @@ static int nvkm_engine_fini(struct nvkm_subdev *subdev, bool suspend) { struct nvkm_engine *engine = nvkm_engine(subdev); + struct nvkm_therm *therm = subdev->device->therm; + int gate_idx; + + gate_idx = therm->clkgate_engine(therm, subdev->index); + if (gate_idx != -1) + therm->clkgate_set(therm, gate_idx, false); + if (engine->func->fini) return engine->func->fini(engine, suspend); return 0; @@ -96,12 +104,13 @@ nvkm_engine_init(struct nvkm_subdev *subdev) { struct nvkm_engine *engine = nvkm_engine(subdev); struct nvkm_fb *fb = subdev->device->fb; + struct nvkm_therm *therm = subdev->device->therm; int ret = 0, i; s64 time; if (!engine->usecount) { nvkm_trace(subdev, "init skipped, engine has no users\n"); - return ret; + goto finish; } if (engine->func->oneinit && !engine->subdev.oneinit) { @@ -123,6 +132,15 @@ nvkm_engine_init(struct nvkm_subdev *subdev) for (i = 0; fb && i < fb->tile.regions; i++) nvkm_engine_tile(engine, i); + +finish: + if (!ret) { + int gate_idx = therm->clkgate_engine(therm, subdev->index); + + if (gate_idx != -1) + therm->clkgate_set(therm, gate_idx, true); + } + return ret; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index b690bc1..d133016 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1355,7 +1355,7 @@ nvc0_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1392,7 +1392,7 @@ nvc1_chipset = { .mxm = nv50_mxm_new, .pci = gf106_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1428,7 +1428,7 @@ nvc3_chipset = { .mxm = nv50_mxm_new, .pci = gf106_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1464,7 +1464,7 @@ nvc4_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1501,7 +1501,7 @@ nvc8_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1538,7 +1538,7 @@ nvce_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1575,7 +1575,7 @@ nvcf_chipset = { .mxm = nv50_mxm_new, .pci = gf106_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild index 135758b..cbb9465 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild @@ -1,4 +1,5 @@ nvkm-y += nvkm/subdev/therm/base.o +nvkm-y += nvkm/subdev/therm/clkgate.o nvkm-y += nvkm/subdev/therm/fan.o nvkm-y += nvkm/subdev/therm/fannil.o nvkm-y += nvkm/subdev/therm/fanpwm.o @@ -9,5 +10,6 @@ nvkm-y += nvkm/subdev/therm/nv40.o nvkm-y += nvkm/subdev/therm/nv50.o nvkm-y += nvkm/subdev/therm/g84.o nvkm-y += nvkm/subdev/therm/gt215.o +nvkm-y += nvkm/subdev/therm/gf100.o nvkm-y += nvkm/subdev/therm/gf119.o nvkm-y += nvkm/subdev/therm/gm107.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c index df949fa..723c0c1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c @@ -393,6 +393,8 @@ nvkm_therm_new_(const struct nvkm_therm_func *func, struct nvkm_device *device, therm->fan_set = nvkm_therm_fan_user_set; therm->attr_get = nvkm_therm_attr_get; therm->attr_set = nvkm_therm_attr_set; + therm->clkgate_engine = nvkm_therm_clkgate_engine; + therm->clkgate_set = nvkm_therm_clkgate_set; therm->mode = therm->suspend = -1; /* undefined */ return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c new file mode 100644 index 0000000..c030ea9 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c @@ -0,0 +1,49 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Lyude Paul + */ +#include "priv.h" + +int +nvkm_therm_clkgate_engine(struct nvkm_therm *therm, enum nvkm_devidx subdev) +{ + if (!therm->func->clkgate_engine) + return -1; + + return therm->func->clkgate_engine(subdev); +} + +void +nvkm_therm_clkgate_set(struct nvkm_therm *therm, int gate_idx, bool enable) +{ + if (!therm->func->clkgate_set) + return; + + if (enable) + nvkm_trace(&therm->subdev, + "Enabling clockgating for gate 0x%x\n", gate_idx); + else + nvkm_trace(&therm->subdev, + "Disabling clockgating for gate 0x%x\n", gate_idx); + + therm->func->clkgate_set(therm, gate_idx, enable); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c new file mode 100644 index 0000000..820934f --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c @@ -0,0 +1,77 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Lyude Paul + */ +#include <core/device.h> + +#include "priv.h" + +int +gf100_clkgate_engine(enum nvkm_devidx subdev) +{ + switch (subdev) { + case NVKM_ENGINE_GR: return 0x00; + case NVKM_ENGINE_MSPDEC: return 0x04; + case NVKM_ENGINE_MSPPP: return 0x08; + case NVKM_ENGINE_MSVLD: return 0x0c; + case NVKM_ENGINE_CE0: return 0x10; + case NVKM_ENGINE_CE1: return 0x14; + case NVKM_ENGINE_MSENC: return 0x18; + case NVKM_ENGINE_CE2: return 0x1c; + default: return -1; + } +} + +void +gf100_clkgate_set(struct nvkm_therm *therm, int gate_idx, bool enable) +{ + u8 data; + + if (enable) /* ENG_CLK=auto, BLK_CLK=auto, ENG_PWR=run, BLK_PWR=auto */ + data = 0x45; + else /* ENG_CLK=run, BLK_CLK=run, ENG_PWR=run, BLK_PWR=run */ + data = 0x0; + + nvkm_mask(therm->subdev.device, 0x20200 + gate_idx, 0xff, data); +} + +static const struct nvkm_therm_func +gf100_therm = { + .init = gt215_therm_init, + .fini = g84_therm_fini, + .pwm_ctrl = nv50_fan_pwm_ctrl, + .pwm_get = nv50_fan_pwm_get, + .pwm_set = nv50_fan_pwm_set, + .pwm_clock = nv50_fan_pwm_clock, + .temp_get = g84_temp_get, + .fan_sense = gt215_therm_fan_sense, + .program_alarms = nvkm_therm_program_alarms_polling, + .clkgate_engine = gf100_clkgate_engine, + .clkgate_set = gf100_clkgate_set, +}; + +int +gf100_therm_new(struct nvkm_device *device, int index, + struct nvkm_therm **ptherm) +{ + return nvkm_therm_new_(&gf100_therm, device, index, ptherm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c index 06dcfd6..a2626fb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c @@ -143,6 +143,8 @@ gf119_therm = { .temp_get = g84_temp_get, .fan_sense = gt215_therm_fan_sense, .program_alarms = nvkm_therm_program_alarms_polling, + .clkgate_engine = gf100_clkgate_engine, + .clkgate_set = gf100_clkgate_set, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c index 86848ec..c580c39 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c @@ -65,6 +65,8 @@ gm107_therm = { .temp_get = g84_temp_get, .fan_sense = gt215_therm_fan_sense, .program_alarms = nvkm_therm_program_alarms_polling, + .clkgate_engine = gf100_clkgate_engine, + .clkgate_set = gf100_clkgate_set, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c index c08097f..4caf401 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c @@ -36,7 +36,7 @@ gt215_therm_fan_sense(struct nvkm_therm *therm) return -ENODEV; } -static void +void gt215_therm_init(struct nvkm_therm *therm) { struct nvkm_device *device = therm->subdev.device; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h index 235a5d8..80367a7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h @@ -81,6 +81,9 @@ void nvkm_therm_sensor_event(struct nvkm_therm *, enum nvkm_therm_thrs, enum nvkm_therm_thrs_direction); void nvkm_therm_program_alarms_polling(struct nvkm_therm *); +int nvkm_therm_clkgate_engine(struct nvkm_therm *, enum nvkm_devidx); +void nvkm_therm_clkgate_set(struct nvkm_therm *, int gate_idx, bool enable); + struct nvkm_therm_func { void (*init)(struct nvkm_therm *); void (*fini)(struct nvkm_therm *); @@ -96,6 +99,9 @@ struct nvkm_therm_func { int (*fan_sense)(struct nvkm_therm *); void (*program_alarms)(struct nvkm_therm *); + + int (*clkgate_engine)(enum nvkm_devidx); + void (*clkgate_set)(struct nvkm_therm *, int, bool); }; void nv40_therm_intr(struct nvkm_therm *); @@ -110,6 +116,10 @@ void g84_sensor_setup(struct nvkm_therm *); void g84_therm_fini(struct nvkm_therm *); int gt215_therm_fan_sense(struct nvkm_therm *); +void gt215_therm_init(struct nvkm_therm *); + +int gf100_clkgate_engine(enum nvkm_devidx); +void gf100_clkgate_set(struct nvkm_therm *, int, bool); void gf119_therm_init(struct nvkm_therm *); -- 2.9.3
Roy Spliet
2017-Apr-25 20:42 UTC
[Nouveau] [PATCH] drm/nouveau: Add support for clockgating on Fermi+
Thanks for the work so far. A quick scan through the first NVC4 trace at hand, using upstream demmio, reveals at least 20 writes to the BLCG registers of PGRAPH and a few in PXBAR prior to altering the value of register 0x20200 (see below). We know that these are related to the clock gating you enable. Are you 110% sure that fiddling with 0x20200 bits without first setting these values can *never* cause any issues with stability or correctness, even under the weirdest of loads? Roy 8<---------------------------------- [0] 96.240915 MMIO32 W 0x4041f0 0x0000c646 PGRAPH.DISPATCH.HW_BLK.BLCG <= 0xc646 [0] 96.240925 MMIO32 W 0x409890 0x00000045 PGRAPH.CTXCTL.HW_BLK.BLCG <= 0x45 [0] 96.240935 MMIO32 W 0x4078c0 0x00004242 PGRAPH.TPBUS.HW_BLK.BLCG <= 0x4242 [0] 96.240946 MMIO32 W 0x406000 0x00004442 PGRAPH.UNK6000.HW_BLK0.BLCG <= 0x4442 [0] 96.240956 MMIO32 W 0x406010 0x00004242 PGRAPH.UNK6000.HW_BLK1.BLCG <= 0x4242 [0] 96.240966 MMIO32 W 0x405860 0x00004242 PGRAPH.UNK5800.HW_BLK.BLCG <= 0x4242 [0] 96.240977 MMIO32 W 0x40590c 0x0000c242 PGRAPH.UNK5900.HW_CGBLK.BLCG <= 0xc242 [0] 96.240987 MMIO32 W 0x408040 0x0000c443 PGRAPH.CCACHE.HW_BLK.BLCG <= 0xc443 [0] 96.240997 MMIO32 W 0x41a890 0x00004242 PGRAPH.GPC_BROADCAST.CTXCTL.HW_BLK.BLCG <= 0x4242 [0] 96.241007 MMIO32 W 0x418500 0x0000c242 PGRAPH.GPC_BROADCAST.UNK500.HW_CGBLK.BLCG <= 0xc242 [0] 96.241018 MMIO32 W 0x418608 0x0000c242 PGRAPH.GPC_BROADCAST.UNK600.HW_BLK.BLCG <= 0xc242 [0] 96.241028 MMIO32 W 0x418688 0x0000c242 PGRAPH.GPC_BROADCAST.UNK680.HW_BLK.BLCG <= 0xc242 [0] 96.241038 MMIO32 W 0x418718 0x00000042 PGRAPH.GPC_BROADCAST.UNK700.HW_BLK.BLCG <= 0x42 [0] 96.241048 MMIO32 W 0x418828 0x00008442 PGRAPH.GPC_BROADCAST.ESETUP.HW_CGBLK.BLCG <= 0x8442 [0] 96.241058 MMIO32 W 0x418bbc 0x0000c242 PGRAPH.GPC_BROADCAST.TPBUS.HW_BLK.BLCG <= 0xc242 [0] 96.241069 MMIO32 W 0x418970 0x0000c242 PGRAPH.GPC_BROADCAST.ZCULL.HW_BLK.BLCG <= 0xc242 [0] 96.241079 MMIO32 W 0x418c70 0x0000c242 PGRAPH.GPC_BROADCAST.TPCONF.HW_BLK.BLCG <= 0xc242 [0] 96.241089 MMIO32 W 0x418cf0 0x0000c242 PGRAPH.GPC_BROADCAST.UNKC80.HW_BLK.BLCG <= 0xc242 [0] 96.241102 MMIO32 W 0x418d70 0x0000c242 PGRAPH.GPC_BROADCAST.UNKD00.HW_BLK.BLCG <= 0xc242 [0] 96.241112 MMIO32 W 0x418f0c 0x0000c242 PGRAPH.GPC_BROADCAST.UNKF00.HW_BLK.BLCG <= 0xc242 [0] 96.241122 MMIO32 W 0x418e0c 0x0000c242 PGRAPH.GPC_BROADCAST.UNKE00.HW_BLK.BLCG <= 0xc242 [0] 96.241132 MMIO32 W 0x419020 0x0000c242 PGRAPH.GPC_BROADCAST.CCACHE.HW_CGBLK0.BLCG <= 0xc242 [0] 96.241143 MMIO32 W 0x419038 0x00000042 PGRAPH.GPC_BROADCAST.CCACHE.HW_CGBLK1.BLCG <= 0x42 [0] 96.241153 MMIO32 W 0x418898 0x00004242 PGRAPH.GPC_BROADCAST.FFB.BLCG <= 0x4242 [0] 96.241163 MMIO32 W 0x419a40 0x0000c242 PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK0.BLCG <= 0xc242 [0] 96.241173 MMIO32 W 0x419a48 0x0000c242 PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK1.BLCG <= 0xc242 [0] 96.241183 MMIO32 W 0x419a50 0x0000c242 PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK2.BLCG <= 0xc242 [0] 96.241194 MMIO32 W 0x419a58 0x0000c242 PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK3.BLCG <= 0xc242 [0] 96.241204 MMIO32 W 0x419a60 0x0000c242 PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK4.BLCG <= 0xc242 [0] 96.241214 MMIO32 W 0x419a68 0x0000c242 PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK5.BLCG <= 0xc242 [0] 96.241224 MMIO32 W 0x419a70 0x0000c242 PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK6.BLCG <= 0xc242 [0] 96.241235 MMIO32 W 0x419a78 0x0000c242 PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK7.BLCG <= 0xc242 [0] 96.241245 MMIO32 W 0x419a80 0x0000c242 PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK8.BLCG <= 0xc242 [0] 96.241255 MMIO32 W 0x419acc 0x0000c742 PGRAPH.GPC_BROADCAST.TPC_ALL.TEX+0xcc <= 0xc742 [0] 96.241265 MMIO32 W 0x419868 0x00008242 PGRAPH.GPC_BROADCAST.TPC_ALL.POLY.HW_BLK.BLCG <= 0x8242 [0] 96.241275 MMIO32 W 0x419ccc 0x00004242 PGRAPH.GPC_BROADCAST.TPC_ALL.L1.BLCG0 <= 0x4242 [0] 96.241286 MMIO32 W 0x419cd4 0x00004242 PGRAPH.GPC_BROADCAST.TPC_ALL.L1.HW_CGBLK1.BLCG <= 0x4242 [0] 96.241296 MMIO32 W 0x419cdc 0x00004242 PGRAPH.GPC_BROADCAST.TPC_ALL.L1.HW_CGBLK2.BLCG <= 0x4242 [0] 96.241306 MMIO32 W 0x419be8 0x0000c242 PGRAPH.GPC_BROADCAST.TPC_ALL.TPBUS.HW_BLK.BLCG <= 0xc242 [0] 96.241316 MMIO32 W 0x419d30 0x0000c242 PGRAPH.GPC_BROADCAST.TPC_ALL.MASTER.HW_CGBLK.BLCG <= 0xc242 [0] 96.241326 MMIO32 W 0x419c70 0x0000c542 PGRAPH.GPC_BROADCAST.TPC_ALL.UNK400.HW_BLK.BLCG <= 0xc542 [0] 96.241337 MMIO32 W 0x419fc0 0x0000d04b PGRAPH.GPC_BROADCAST.TPC_ALL.MP.HW_BLK0.BLCG <= 0xd04b [0] 96.241347 MMIO32 W 0x419fd4 0x0000cb4b PGRAPH.GPC_BROADCAST.TPC_ALL.MP.HW_BLK1.BLCG <= 0xcb4b [0] 96.241357 MMIO32 W 0x419fe8 0x0000cb4b PGRAPH.GPC_BROADCAST.TPC_ALL.MP.HW_BLK2.BLCG <= 0xcb4b [0] 96.241367 MMIO32 W 0x408810 0x0000c242 PGRAPH.ROP_BROADCAST.ZROP.HW_CGBLK0.BLCG <= 0xc242 [0] 96.241377 MMIO32 W 0x408818 0x0000c242 PGRAPH.ROP_BROADCAST.ZROP.HW_CGBLK1.BLCG <= 0xc242 [0] 96.241388 MMIO32 W 0x408a80 0x0000c242 PGRAPH.ROP_BROADCAST.HW_CGBLK0.BLCG <= 0xc242 [0] 96.241398 MMIO32 W 0x408a88 0x0000c242 PGRAPH.ROP_BROADCAST.HW_CGBLK1.BLCG <= 0xc242 [0] 96.241408 MMIO32 W 0x408a90 0x0000c242 PGRAPH.ROP_BROADCAST.HW_CGBLK2.BLCG <= 0xc242 [0] 96.241418 MMIO32 W 0x408a98 0x0000c242 PGRAPH.ROP_BROADCAST.HW_CGBLK3.BLCG <= 0xc242 [0] 96.241429 MMIO32 W 0x408aa0 0x0000c242 PGRAPH.ROP_BROADCAST.HW_CGBLK4.BLCG <= 0xc242 [0] 96.241439 MMIO32 W 0x408aa8 0x0000c242 PGRAPH.ROP_BROADCAST.HW_CGBLK5.BLCG <= 0xc242 [0] 96.241449 MMIO32 W 0x4089a8 0x0000c242 PGRAPH.ROP_BROADCAST.CROP.HW_CGBLK0.BLCG <= 0xc242 [0] 96.241459 MMIO32 W 0x4089b0 0x00000242 PGRAPH.ROP_BROADCAST.CROP.HW_CGBLK1.BLCG <= 0x242 [0] 96.241470 MMIO32 W 0x4089b8 0x0000c242 PGRAPH.ROP_BROADCAST.CROP.HW_CGBLK2.BLCG <= 0xc242 [0] 96.241483 MMIO32 R 0x121c78 0x00000002 PIBUS.MAIN.GPC_COUNT => 0x2 [0] 96.241496 MMIO32 W 0x13c820 0x0001007f PXBAR.UNK1800.BLCG <= 0x1007f [0] 96.241508 MMIO32 W 0x13cc00 0x00000042 PXBAR.GPC_UNK2[0].HW_BLK.BLCG <= 0x42 [0] 96.241521 MMIO32 W 0x13cc20 0x00000042 PXBAR.GPC_UNK2[0x1].HW_BLK.BLCG <= 0x42 [...] [0] 98.773852 MMIO32 R 0x020200 0x27722444 PTHERM.PGRAPH_CG_CTRL => { ENG_CLK = RUN | BLK_CLK = AUTO | ENG_PWR = RUN | BLK_PWR = AUTO | ENG_FILTER = 0x4 | ENG_MANT = 0x1 | ENG_DLY_BEFORE 0x2 | ENG_DLY_AFTER = 0x7 | BLK_DLY_BEFORE = 0x7 | BLK_DLY_AFTER = 0x2 } [0] 98.773877 MMIO32 W 0x020200 0x27722445 PTHERM.PGRAPH_CG_CTRL <= { ENG_CLK = AUTO | BLK_CLK = AUTO | ENG_PWR = RUN | BLK_PWR = AUTO | ENG_FILTER = 0x4 | ENG_MANT = 0x1 | ENG_DLY_BEFORE 0x2 | ENG_DLY_AFTER = 0x7 | BLK_DLY_BEFORE = 0x7 | BLK_DLY_AFTER = 0x2 } [0] 98.773904 MMIO32 R 0x020200 0x27722445 PTHERM.PGRAPH_CG_CTRL => { ENG_CLK = AUTO | BLK_CLK = AUTO | ENG_PWR = RUN | BLK_PWR = AUTO | ENG_FILTER = 0x4 | ENG_MANT = 0x1 | ENG_DLY_BEFORE 0x2 | ENG_DLY_AFTER = 0x7 | BLK_DLY_BEFORE = 0x7 | BLK_DLY_AFTER = 0x2 } [0] 98.773930 MMIO32 W 0x020200 0x27726e45 PTHERM.PGRAPH_CG_CTRL <= { ENG_CLK = AUTO | BLK_CLK = AUTO | ENG_PWR = RUN | BLK_PWR = AUTO | ENG_FILTER = 0xe | ENG_MANT = 0x3 | ENG_DLY_BEFORE = 0x2 | ENG_DLY_AFTER = 0x7 | BLK_DLY_BEFORE = 0x7 | BLK_DLY_AFTER = 0x2 } Op 25-04-17 om 19:38 schreef Lyude:> This adds support for enabling automatic clockgating on nvidia GPUs for > Fermi and later generations. This saves a little bit of power, bringing > my fermi GPU's power consumption from ~28.3W on idle to ~27W, and my > kepler's idle power consumption from ~23.6W to ~21.65W. > > Similar to how the nvidia driver seems to handle this, we enable > clockgating for each engine that supports it after it's initialization. > > Signed-off-by: Lyude <lyude at redhat.com> > --- > .../gpu/drm/nouveau/include/nvkm/subdev/therm.h | 4 ++ > drivers/gpu/drm/nouveau/nvkm/core/engine.c | 20 +++++- > drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 14 ++-- > drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild | 2 + > drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c | 2 + > .../gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c | 49 ++++++++++++++ > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c | 77 ++++++++++++++++++++++ > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c | 2 + > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c | 2 + > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c | 2 +- > drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h | 10 +++ > 11 files changed, 175 insertions(+), 9 deletions(-) > create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c > create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c > > diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > index b268b96..904aa56 100644 > --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > @@ -84,6 +84,9 @@ struct nvkm_therm { > > int (*attr_get)(struct nvkm_therm *, enum nvkm_therm_attr_type); > int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int); > + > + int (*clkgate_engine)(struct nvkm_therm *, enum nvkm_devidx); > + void (*clkgate_set)(struct nvkm_therm *, int gate_idx, bool enable); > }; > > int nvkm_therm_temp_get(struct nvkm_therm *); > @@ -94,6 +97,7 @@ int nv40_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > int nv50_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > int g84_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > int gt215_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > +int gf100_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > int gf119_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > int gm107_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > #endif > diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c b/drivers/gpu/drm/nouveau/nvkm/core/engine.c > index b6c9169..473ad3e 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c > +++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c > @@ -26,6 +26,7 @@ > #include <core/option.h> > > #include <subdev/fb.h> > +#include <subdev/therm.h> > > bool > nvkm_engine_chsw_load(struct nvkm_engine *engine) > @@ -86,6 +87,13 @@ static int > nvkm_engine_fini(struct nvkm_subdev *subdev, bool suspend) > { > struct nvkm_engine *engine = nvkm_engine(subdev); > + struct nvkm_therm *therm = subdev->device->therm; > + int gate_idx; > + > + gate_idx = therm->clkgate_engine(therm, subdev->index); > + if (gate_idx != -1) > + therm->clkgate_set(therm, gate_idx, false); > + > if (engine->func->fini) > return engine->func->fini(engine, suspend); > return 0; > @@ -96,12 +104,13 @@ nvkm_engine_init(struct nvkm_subdev *subdev) > { > struct nvkm_engine *engine = nvkm_engine(subdev); > struct nvkm_fb *fb = subdev->device->fb; > + struct nvkm_therm *therm = subdev->device->therm; > int ret = 0, i; > s64 time; > > if (!engine->usecount) { > nvkm_trace(subdev, "init skipped, engine has no users\n"); > - return ret; > + goto finish; > } > > if (engine->func->oneinit && !engine->subdev.oneinit) { > @@ -123,6 +132,15 @@ nvkm_engine_init(struct nvkm_subdev *subdev) > > for (i = 0; fb && i < fb->tile.regions; i++) > nvkm_engine_tile(engine, i); > + > +finish: > + if (!ret) { > + int gate_idx = therm->clkgate_engine(therm, subdev->index); > + > + if (gate_idx != -1) > + therm->clkgate_set(therm, gate_idx, true); > + } > + > return ret; > } > > diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c > index b690bc1..d133016 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c > +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c > @@ -1355,7 +1355,7 @@ nvc0_chipset = { > .mxm = nv50_mxm_new, > .pci = gf100_pci_new, > .pmu = gf100_pmu_new, > - .therm = gt215_therm_new, > + .therm = gf100_therm_new, > .timer = nv41_timer_new, > .volt = gf100_volt_new, > .ce[0] = gf100_ce_new, > @@ -1392,7 +1392,7 @@ nvc1_chipset = { > .mxm = nv50_mxm_new, > .pci = gf106_pci_new, > .pmu = gf100_pmu_new, > - .therm = gt215_therm_new, > + .therm = gf100_therm_new, > .timer = nv41_timer_new, > .volt = gf100_volt_new, > .ce[0] = gf100_ce_new, > @@ -1428,7 +1428,7 @@ nvc3_chipset = { > .mxm = nv50_mxm_new, > .pci = gf106_pci_new, > .pmu = gf100_pmu_new, > - .therm = gt215_therm_new, > + .therm = gf100_therm_new, > .timer = nv41_timer_new, > .volt = gf100_volt_new, > .ce[0] = gf100_ce_new, > @@ -1464,7 +1464,7 @@ nvc4_chipset = { > .mxm = nv50_mxm_new, > .pci = gf100_pci_new, > .pmu = gf100_pmu_new, > - .therm = gt215_therm_new, > + .therm = gf100_therm_new, > .timer = nv41_timer_new, > .volt = gf100_volt_new, > .ce[0] = gf100_ce_new, > @@ -1501,7 +1501,7 @@ nvc8_chipset = { > .mxm = nv50_mxm_new, > .pci = gf100_pci_new, > .pmu = gf100_pmu_new, > - .therm = gt215_therm_new, > + .therm = gf100_therm_new, > .timer = nv41_timer_new, > .volt = gf100_volt_new, > .ce[0] = gf100_ce_new, > @@ -1538,7 +1538,7 @@ nvce_chipset = { > .mxm = nv50_mxm_new, > .pci = gf100_pci_new, > .pmu = gf100_pmu_new, > - .therm = gt215_therm_new, > + .therm = gf100_therm_new, > .timer = nv41_timer_new, > .volt = gf100_volt_new, > .ce[0] = gf100_ce_new, > @@ -1575,7 +1575,7 @@ nvcf_chipset = { > .mxm = nv50_mxm_new, > .pci = gf106_pci_new, > .pmu = gf100_pmu_new, > - .therm = gt215_therm_new, > + .therm = gf100_therm_new, > .timer = nv41_timer_new, > .volt = gf100_volt_new, > .ce[0] = gf100_ce_new, > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild > index 135758b..cbb9465 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild > @@ -1,4 +1,5 @@ > nvkm-y += nvkm/subdev/therm/base.o > +nvkm-y += nvkm/subdev/therm/clkgate.o > nvkm-y += nvkm/subdev/therm/fan.o > nvkm-y += nvkm/subdev/therm/fannil.o > nvkm-y += nvkm/subdev/therm/fanpwm.o > @@ -9,5 +10,6 @@ nvkm-y += nvkm/subdev/therm/nv40.o > nvkm-y += nvkm/subdev/therm/nv50.o > nvkm-y += nvkm/subdev/therm/g84.o > nvkm-y += nvkm/subdev/therm/gt215.o > +nvkm-y += nvkm/subdev/therm/gf100.o > nvkm-y += nvkm/subdev/therm/gf119.o > nvkm-y += nvkm/subdev/therm/gm107.o > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c > index df949fa..723c0c1 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c > @@ -393,6 +393,8 @@ nvkm_therm_new_(const struct nvkm_therm_func *func, struct nvkm_device *device, > therm->fan_set = nvkm_therm_fan_user_set; > therm->attr_get = nvkm_therm_attr_get; > therm->attr_set = nvkm_therm_attr_set; > + therm->clkgate_engine = nvkm_therm_clkgate_engine; > + therm->clkgate_set = nvkm_therm_clkgate_set; > therm->mode = therm->suspend = -1; /* undefined */ > return 0; > } > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c > new file mode 100644 > index 0000000..c030ea9 > --- /dev/null > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c > @@ -0,0 +1,49 @@ > +/* > + * Copyright 2017 Red Hat Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + * > + * Authors: Lyude Paul > + */ > +#include "priv.h" > + > +int > +nvkm_therm_clkgate_engine(struct nvkm_therm *therm, enum nvkm_devidx subdev) > +{ > + if (!therm->func->clkgate_engine) > + return -1; > + > + return therm->func->clkgate_engine(subdev); > +} > + > +void > +nvkm_therm_clkgate_set(struct nvkm_therm *therm, int gate_idx, bool enable) > +{ > + if (!therm->func->clkgate_set) > + return; > + > + if (enable) > + nvkm_trace(&therm->subdev, > + "Enabling clockgating for gate 0x%x\n", gate_idx); > + else > + nvkm_trace(&therm->subdev, > + "Disabling clockgating for gate 0x%x\n", gate_idx); > + > + therm->func->clkgate_set(therm, gate_idx, enable); > +} > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c > new file mode 100644 > index 0000000..820934f > --- /dev/null > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c > @@ -0,0 +1,77 @@ > +/* > + * Copyright 2017 Red Hat Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + * > + * Authors: Lyude Paul > + */ > +#include <core/device.h> > + > +#include "priv.h" > + > +int > +gf100_clkgate_engine(enum nvkm_devidx subdev) > +{ > + switch (subdev) { > + case NVKM_ENGINE_GR: return 0x00; > + case NVKM_ENGINE_MSPDEC: return 0x04; > + case NVKM_ENGINE_MSPPP: return 0x08; > + case NVKM_ENGINE_MSVLD: return 0x0c; > + case NVKM_ENGINE_CE0: return 0x10; > + case NVKM_ENGINE_CE1: return 0x14; > + case NVKM_ENGINE_MSENC: return 0x18; > + case NVKM_ENGINE_CE2: return 0x1c; > + default: return -1; > + } > +} > + > +void > +gf100_clkgate_set(struct nvkm_therm *therm, int gate_idx, bool enable) > +{ > + u8 data; > + > + if (enable) /* ENG_CLK=auto, BLK_CLK=auto, ENG_PWR=run, BLK_PWR=auto */ > + data = 0x45; > + else /* ENG_CLK=run, BLK_CLK=run, ENG_PWR=run, BLK_PWR=run */ > + data = 0x0; > + > + nvkm_mask(therm->subdev.device, 0x20200 + gate_idx, 0xff, data); > +} > + > +static const struct nvkm_therm_func > +gf100_therm = { > + .init = gt215_therm_init, > + .fini = g84_therm_fini, > + .pwm_ctrl = nv50_fan_pwm_ctrl, > + .pwm_get = nv50_fan_pwm_get, > + .pwm_set = nv50_fan_pwm_set, > + .pwm_clock = nv50_fan_pwm_clock, > + .temp_get = g84_temp_get, > + .fan_sense = gt215_therm_fan_sense, > + .program_alarms = nvkm_therm_program_alarms_polling, > + .clkgate_engine = gf100_clkgate_engine, > + .clkgate_set = gf100_clkgate_set, > +}; > + > +int > +gf100_therm_new(struct nvkm_device *device, int index, > + struct nvkm_therm **ptherm) > +{ > + return nvkm_therm_new_(&gf100_therm, device, index, ptherm); > +} > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c > index 06dcfd6..a2626fb 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c > @@ -143,6 +143,8 @@ gf119_therm = { > .temp_get = g84_temp_get, > .fan_sense = gt215_therm_fan_sense, > .program_alarms = nvkm_therm_program_alarms_polling, > + .clkgate_engine = gf100_clkgate_engine, > + .clkgate_set = gf100_clkgate_set, > }; > > int > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c > index 86848ec..c580c39 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c > @@ -65,6 +65,8 @@ gm107_therm = { > .temp_get = g84_temp_get, > .fan_sense = gt215_therm_fan_sense, > .program_alarms = nvkm_therm_program_alarms_polling, > + .clkgate_engine = gf100_clkgate_engine, > + .clkgate_set = gf100_clkgate_set, > }; > > int > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c > index c08097f..4caf401 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c > @@ -36,7 +36,7 @@ gt215_therm_fan_sense(struct nvkm_therm *therm) > return -ENODEV; > } > > -static void > +void > gt215_therm_init(struct nvkm_therm *therm) > { > struct nvkm_device *device = therm->subdev.device; > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h > index 235a5d8..80367a7 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h > @@ -81,6 +81,9 @@ void nvkm_therm_sensor_event(struct nvkm_therm *, enum nvkm_therm_thrs, > enum nvkm_therm_thrs_direction); > void nvkm_therm_program_alarms_polling(struct nvkm_therm *); > > +int nvkm_therm_clkgate_engine(struct nvkm_therm *, enum nvkm_devidx); > +void nvkm_therm_clkgate_set(struct nvkm_therm *, int gate_idx, bool enable); > + > struct nvkm_therm_func { > void (*init)(struct nvkm_therm *); > void (*fini)(struct nvkm_therm *); > @@ -96,6 +99,9 @@ struct nvkm_therm_func { > int (*fan_sense)(struct nvkm_therm *); > > void (*program_alarms)(struct nvkm_therm *); > + > + int (*clkgate_engine)(enum nvkm_devidx); > + void (*clkgate_set)(struct nvkm_therm *, int, bool); > }; > > void nv40_therm_intr(struct nvkm_therm *); > @@ -110,6 +116,10 @@ void g84_sensor_setup(struct nvkm_therm *); > void g84_therm_fini(struct nvkm_therm *); > > int gt215_therm_fan_sense(struct nvkm_therm *); > +void gt215_therm_init(struct nvkm_therm *); > + > +int gf100_clkgate_engine(enum nvkm_devidx); > +void gf100_clkgate_set(struct nvkm_therm *, int, bool); > > void gf119_therm_init(struct nvkm_therm *); >
Ben Skeggs
2017-Apr-25 20:54 UTC
[Nouveau] [PATCH] drm/nouveau: Add support for clockgating on Fermi+
On 04/26/2017 06:42 AM, Roy Spliet wrote:> Thanks for the work so far. > > A quick scan through the first NVC4 trace at hand, using upstream > demmio, reveals at least 20 writes to the BLCG registers of PGRAPH and a > few in PXBAR prior to altering the value of register 0x20200 (see > below). We know that these are related to the clock gating you enable. > Are you 110% sure that fiddling with 0x20200 bits without first setting > these values can *never* cause any issues with stability or correctness, > even under the weirdest of loads?I too would prefer we limit this to altering only the ELCG bits for now. And, perhaps also rename clkgate_set() to elcg_set() to make it clear what gating feature it is handling. Would be great to add BLCG support too though ;) Ben.> > > Roy > > 8<---------------------------------- > > [0] 96.240915 MMIO32 W 0x4041f0 0x0000c646 PGRAPH.DISPATCH.HW_BLK.BLCG > <= 0xc646 > [0] 96.240925 MMIO32 W 0x409890 0x00000045 PGRAPH.CTXCTL.HW_BLK.BLCG <> 0x45 > [0] 96.240935 MMIO32 W 0x4078c0 0x00004242 PGRAPH.TPBUS.HW_BLK.BLCG <> 0x4242 > [0] 96.240946 MMIO32 W 0x406000 0x00004442 PGRAPH.UNK6000.HW_BLK0.BLCG > <= 0x4442 > [0] 96.240956 MMIO32 W 0x406010 0x00004242 PGRAPH.UNK6000.HW_BLK1.BLCG > <= 0x4242 > [0] 96.240966 MMIO32 W 0x405860 0x00004242 PGRAPH.UNK5800.HW_BLK.BLCG <> 0x4242 > [0] 96.240977 MMIO32 W 0x40590c 0x0000c242 PGRAPH.UNK5900.HW_CGBLK.BLCG > <= 0xc242 > [0] 96.240987 MMIO32 W 0x408040 0x0000c443 PGRAPH.CCACHE.HW_BLK.BLCG <> 0xc443 > [0] 96.240997 MMIO32 W 0x41a890 0x00004242 > PGRAPH.GPC_BROADCAST.CTXCTL.HW_BLK.BLCG <= 0x4242 > [0] 96.241007 MMIO32 W 0x418500 0x0000c242 > PGRAPH.GPC_BROADCAST.UNK500.HW_CGBLK.BLCG <= 0xc242 > [0] 96.241018 MMIO32 W 0x418608 0x0000c242 > PGRAPH.GPC_BROADCAST.UNK600.HW_BLK.BLCG <= 0xc242 > [0] 96.241028 MMIO32 W 0x418688 0x0000c242 > PGRAPH.GPC_BROADCAST.UNK680.HW_BLK.BLCG <= 0xc242 > [0] 96.241038 MMIO32 W 0x418718 0x00000042 > PGRAPH.GPC_BROADCAST.UNK700.HW_BLK.BLCG <= 0x42 > [0] 96.241048 MMIO32 W 0x418828 0x00008442 > PGRAPH.GPC_BROADCAST.ESETUP.HW_CGBLK.BLCG <= 0x8442 > [0] 96.241058 MMIO32 W 0x418bbc 0x0000c242 > PGRAPH.GPC_BROADCAST.TPBUS.HW_BLK.BLCG <= 0xc242 > [0] 96.241069 MMIO32 W 0x418970 0x0000c242 > PGRAPH.GPC_BROADCAST.ZCULL.HW_BLK.BLCG <= 0xc242 > [0] 96.241079 MMIO32 W 0x418c70 0x0000c242 > PGRAPH.GPC_BROADCAST.TPCONF.HW_BLK.BLCG <= 0xc242 > [0] 96.241089 MMIO32 W 0x418cf0 0x0000c242 > PGRAPH.GPC_BROADCAST.UNKC80.HW_BLK.BLCG <= 0xc242 > [0] 96.241102 MMIO32 W 0x418d70 0x0000c242 > PGRAPH.GPC_BROADCAST.UNKD00.HW_BLK.BLCG <= 0xc242 > [0] 96.241112 MMIO32 W 0x418f0c 0x0000c242 > PGRAPH.GPC_BROADCAST.UNKF00.HW_BLK.BLCG <= 0xc242 > [0] 96.241122 MMIO32 W 0x418e0c 0x0000c242 > PGRAPH.GPC_BROADCAST.UNKE00.HW_BLK.BLCG <= 0xc242 > [0] 96.241132 MMIO32 W 0x419020 0x0000c242 > PGRAPH.GPC_BROADCAST.CCACHE.HW_CGBLK0.BLCG <= 0xc242 > [0] 96.241143 MMIO32 W 0x419038 0x00000042 > PGRAPH.GPC_BROADCAST.CCACHE.HW_CGBLK1.BLCG <= 0x42 > [0] 96.241153 MMIO32 W 0x418898 0x00004242 PGRAPH.GPC_BROADCAST.FFB.BLCG > <= 0x4242 > [0] 96.241163 MMIO32 W 0x419a40 0x0000c242 > PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK0.BLCG <= 0xc242 > [0] 96.241173 MMIO32 W 0x419a48 0x0000c242 > PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK1.BLCG <= 0xc242 > [0] 96.241183 MMIO32 W 0x419a50 0x0000c242 > PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK2.BLCG <= 0xc242 > [0] 96.241194 MMIO32 W 0x419a58 0x0000c242 > PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK3.BLCG <= 0xc242 > [0] 96.241204 MMIO32 W 0x419a60 0x0000c242 > PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK4.BLCG <= 0xc242 > [0] 96.241214 MMIO32 W 0x419a68 0x0000c242 > PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK5.BLCG <= 0xc242 > [0] 96.241224 MMIO32 W 0x419a70 0x0000c242 > PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK6.BLCG <= 0xc242 > [0] 96.241235 MMIO32 W 0x419a78 0x0000c242 > PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK7.BLCG <= 0xc242 > [0] 96.241245 MMIO32 W 0x419a80 0x0000c242 > PGRAPH.GPC_BROADCAST.TPC_ALL.TEX.HW_CGBLK8.BLCG <= 0xc242 > [0] 96.241255 MMIO32 W 0x419acc 0x0000c742 > PGRAPH.GPC_BROADCAST.TPC_ALL.TEX+0xcc <= 0xc742 > [0] 96.241265 MMIO32 W 0x419868 0x00008242 > PGRAPH.GPC_BROADCAST.TPC_ALL.POLY.HW_BLK.BLCG <= 0x8242 > [0] 96.241275 MMIO32 W 0x419ccc 0x00004242 > PGRAPH.GPC_BROADCAST.TPC_ALL.L1.BLCG0 <= 0x4242 > [0] 96.241286 MMIO32 W 0x419cd4 0x00004242 > PGRAPH.GPC_BROADCAST.TPC_ALL.L1.HW_CGBLK1.BLCG <= 0x4242 > [0] 96.241296 MMIO32 W 0x419cdc 0x00004242 > PGRAPH.GPC_BROADCAST.TPC_ALL.L1.HW_CGBLK2.BLCG <= 0x4242 > [0] 96.241306 MMIO32 W 0x419be8 0x0000c242 > PGRAPH.GPC_BROADCAST.TPC_ALL.TPBUS.HW_BLK.BLCG <= 0xc242 > [0] 96.241316 MMIO32 W 0x419d30 0x0000c242 > PGRAPH.GPC_BROADCAST.TPC_ALL.MASTER.HW_CGBLK.BLCG <= 0xc242 > [0] 96.241326 MMIO32 W 0x419c70 0x0000c542 > PGRAPH.GPC_BROADCAST.TPC_ALL.UNK400.HW_BLK.BLCG <= 0xc542 > [0] 96.241337 MMIO32 W 0x419fc0 0x0000d04b > PGRAPH.GPC_BROADCAST.TPC_ALL.MP.HW_BLK0.BLCG <= 0xd04b > [0] 96.241347 MMIO32 W 0x419fd4 0x0000cb4b > PGRAPH.GPC_BROADCAST.TPC_ALL.MP.HW_BLK1.BLCG <= 0xcb4b > [0] 96.241357 MMIO32 W 0x419fe8 0x0000cb4b > PGRAPH.GPC_BROADCAST.TPC_ALL.MP.HW_BLK2.BLCG <= 0xcb4b > [0] 96.241367 MMIO32 W 0x408810 0x0000c242 > PGRAPH.ROP_BROADCAST.ZROP.HW_CGBLK0.BLCG <= 0xc242 > [0] 96.241377 MMIO32 W 0x408818 0x0000c242 > PGRAPH.ROP_BROADCAST.ZROP.HW_CGBLK1.BLCG <= 0xc242 > [0] 96.241388 MMIO32 W 0x408a80 0x0000c242 > PGRAPH.ROP_BROADCAST.HW_CGBLK0.BLCG <= 0xc242 > [0] 96.241398 MMIO32 W 0x408a88 0x0000c242 > PGRAPH.ROP_BROADCAST.HW_CGBLK1.BLCG <= 0xc242 > [0] 96.241408 MMIO32 W 0x408a90 0x0000c242 > PGRAPH.ROP_BROADCAST.HW_CGBLK2.BLCG <= 0xc242 > [0] 96.241418 MMIO32 W 0x408a98 0x0000c242 > PGRAPH.ROP_BROADCAST.HW_CGBLK3.BLCG <= 0xc242 > [0] 96.241429 MMIO32 W 0x408aa0 0x0000c242 > PGRAPH.ROP_BROADCAST.HW_CGBLK4.BLCG <= 0xc242 > [0] 96.241439 MMIO32 W 0x408aa8 0x0000c242 > PGRAPH.ROP_BROADCAST.HW_CGBLK5.BLCG <= 0xc242 > [0] 96.241449 MMIO32 W 0x4089a8 0x0000c242 > PGRAPH.ROP_BROADCAST.CROP.HW_CGBLK0.BLCG <= 0xc242 > [0] 96.241459 MMIO32 W 0x4089b0 0x00000242 > PGRAPH.ROP_BROADCAST.CROP.HW_CGBLK1.BLCG <= 0x242 > [0] 96.241470 MMIO32 W 0x4089b8 0x0000c242 > PGRAPH.ROP_BROADCAST.CROP.HW_CGBLK2.BLCG <= 0xc242 > [0] 96.241483 MMIO32 R 0x121c78 0x00000002 PIBUS.MAIN.GPC_COUNT => 0x2 > [0] 96.241496 MMIO32 W 0x13c820 0x0001007f PXBAR.UNK1800.BLCG <= 0x1007f > [0] 96.241508 MMIO32 W 0x13cc00 0x00000042 PXBAR.GPC_UNK2[0].HW_BLK.BLCG > <= 0x42 > [0] 96.241521 MMIO32 W 0x13cc20 0x00000042 > PXBAR.GPC_UNK2[0x1].HW_BLK.BLCG <= 0x42 > > [...] > > [0] 98.773852 MMIO32 R 0x020200 0x27722444 PTHERM.PGRAPH_CG_CTRL => { > ENG_CLK = RUN | BLK_CLK = AUTO | ENG_PWR = RUN | BLK_PWR = AUTO | > ENG_FILTER = 0x4 | ENG_MANT = 0x1 | ENG_DLY_BEFORE > 0x2 | ENG_DLY_AFTER = 0x7 | BLK_DLY_BEFORE = 0x7 | BLK_DLY_AFTER = 0x2 } > [0] 98.773877 MMIO32 W 0x020200 0x27722445 PTHERM.PGRAPH_CG_CTRL <= { > ENG_CLK = AUTO | BLK_CLK = AUTO | ENG_PWR = RUN | BLK_PWR = AUTO | > ENG_FILTER = 0x4 | ENG_MANT = 0x1 | ENG_DLY_BEFORE > 0x2 | ENG_DLY_AFTER = 0x7 | BLK_DLY_BEFORE = 0x7 | BLK_DLY_AFTER = 0x2 } > [0] 98.773904 MMIO32 R 0x020200 0x27722445 PTHERM.PGRAPH_CG_CTRL => { > ENG_CLK = AUTO | BLK_CLK = AUTO | ENG_PWR = RUN | BLK_PWR = AUTO | > ENG_FILTER = 0x4 | ENG_MANT = 0x1 | ENG_DLY_BEFORE > 0x2 | ENG_DLY_AFTER = 0x7 | BLK_DLY_BEFORE = 0x7 | BLK_DLY_AFTER = 0x2 } > [0] 98.773930 MMIO32 W 0x020200 0x27726e45 PTHERM.PGRAPH_CG_CTRL <= { > ENG_CLK = AUTO | BLK_CLK = AUTO | ENG_PWR = RUN | BLK_PWR = AUTO | > ENG_FILTER = 0xe | ENG_MANT = 0x3 | ENG_DLY_BEFORE = 0x2 | ENG_DLY_AFTER > = 0x7 | BLK_DLY_BEFORE = 0x7 | BLK_DLY_AFTER = 0x2 } > > > Op 25-04-17 om 19:38 schreef Lyude: >> This adds support for enabling automatic clockgating on nvidia GPUs for >> Fermi and later generations. This saves a little bit of power, bringing >> my fermi GPU's power consumption from ~28.3W on idle to ~27W, and my >> kepler's idle power consumption from ~23.6W to ~21.65W. >> >> Similar to how the nvidia driver seems to handle this, we enable >> clockgating for each engine that supports it after it's initialization. >> >> Signed-off-by: Lyude <lyude at redhat.com> >> --- >> .../gpu/drm/nouveau/include/nvkm/subdev/therm.h | 4 ++ >> drivers/gpu/drm/nouveau/nvkm/core/engine.c | 20 +++++- >> drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 14 ++-- >> drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild | 2 + >> drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c | 2 + >> .../gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c | 49 ++++++++++++++ >> drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c | 77 >> ++++++++++++++++++++++ >> drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c | 2 + >> drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c | 2 + >> drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c | 2 +- >> drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h | 10 +++ >> 11 files changed, 175 insertions(+), 9 deletions(-) >> create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c >> create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c >> >> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h >> b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h >> index b268b96..904aa56 100644 >> --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h >> +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h >> @@ -84,6 +84,9 @@ struct nvkm_therm { >> int (*attr_get)(struct nvkm_therm *, enum nvkm_therm_attr_type); >> int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, >> int); >> + >> + int (*clkgate_engine)(struct nvkm_therm *, enum nvkm_devidx); >> + void (*clkgate_set)(struct nvkm_therm *, int gate_idx, bool enable); >> }; >> int nvkm_therm_temp_get(struct nvkm_therm *); >> @@ -94,6 +97,7 @@ int nv40_therm_new(struct nvkm_device *, int, struct >> nvkm_therm **); >> int nv50_therm_new(struct nvkm_device *, int, struct nvkm_therm **); >> int g84_therm_new(struct nvkm_device *, int, struct nvkm_therm **); >> int gt215_therm_new(struct nvkm_device *, int, struct nvkm_therm **); >> +int gf100_therm_new(struct nvkm_device *, int, struct nvkm_therm **); >> int gf119_therm_new(struct nvkm_device *, int, struct nvkm_therm **); >> int gm107_therm_new(struct nvkm_device *, int, struct nvkm_therm **); >> #endif >> diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c >> b/drivers/gpu/drm/nouveau/nvkm/core/engine.c >> index b6c9169..473ad3e 100644 >> --- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c >> +++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c >> @@ -26,6 +26,7 @@ >> #include <core/option.h> >> #include <subdev/fb.h> >> +#include <subdev/therm.h> >> bool >> nvkm_engine_chsw_load(struct nvkm_engine *engine) >> @@ -86,6 +87,13 @@ static int >> nvkm_engine_fini(struct nvkm_subdev *subdev, bool suspend) >> { >> struct nvkm_engine *engine = nvkm_engine(subdev); >> + struct nvkm_therm *therm = subdev->device->therm; >> + int gate_idx; >> + >> + gate_idx = therm->clkgate_engine(therm, subdev->index); >> + if (gate_idx != -1) >> + therm->clkgate_set(therm, gate_idx, false); >> + >> if (engine->func->fini) >> return engine->func->fini(engine, suspend); >> return 0; >> @@ -96,12 +104,13 @@ nvkm_engine_init(struct nvkm_subdev *subdev) >> { >> struct nvkm_engine *engine = nvkm_engine(subdev); >> struct nvkm_fb *fb = subdev->device->fb; >> + struct nvkm_therm *therm = subdev->device->therm; >> int ret = 0, i; >> s64 time; >> if (!engine->usecount) { >> nvkm_trace(subdev, "init skipped, engine has no users\n"); >> - return ret; >> + goto finish; >> } >> if (engine->func->oneinit && !engine->subdev.oneinit) { >> @@ -123,6 +132,15 @@ nvkm_engine_init(struct nvkm_subdev *subdev) >> for (i = 0; fb && i < fb->tile.regions; i++) >> nvkm_engine_tile(engine, i); >> + >> +finish: >> + if (!ret) { >> + int gate_idx = therm->clkgate_engine(therm, subdev->index); >> + >> + if (gate_idx != -1) >> + therm->clkgate_set(therm, gate_idx, true); >> + } >> + >> return ret; >> } >> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c >> b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c >> index b690bc1..d133016 100644 >> --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c >> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c >> @@ -1355,7 +1355,7 @@ nvc0_chipset = { >> .mxm = nv50_mxm_new, >> .pci = gf100_pci_new, >> .pmu = gf100_pmu_new, >> - .therm = gt215_therm_new, >> + .therm = gf100_therm_new, >> .timer = nv41_timer_new, >> .volt = gf100_volt_new, >> .ce[0] = gf100_ce_new, >> @@ -1392,7 +1392,7 @@ nvc1_chipset = { >> .mxm = nv50_mxm_new, >> .pci = gf106_pci_new, >> .pmu = gf100_pmu_new, >> - .therm = gt215_therm_new, >> + .therm = gf100_therm_new, >> .timer = nv41_timer_new, >> .volt = gf100_volt_new, >> .ce[0] = gf100_ce_new, >> @@ -1428,7 +1428,7 @@ nvc3_chipset = { >> .mxm = nv50_mxm_new, >> .pci = gf106_pci_new, >> .pmu = gf100_pmu_new, >> - .therm = gt215_therm_new, >> + .therm = gf100_therm_new, >> .timer = nv41_timer_new, >> .volt = gf100_volt_new, >> .ce[0] = gf100_ce_new, >> @@ -1464,7 +1464,7 @@ nvc4_chipset = { >> .mxm = nv50_mxm_new, >> .pci = gf100_pci_new, >> .pmu = gf100_pmu_new, >> - .therm = gt215_therm_new, >> + .therm = gf100_therm_new, >> .timer = nv41_timer_new, >> .volt = gf100_volt_new, >> .ce[0] = gf100_ce_new, >> @@ -1501,7 +1501,7 @@ nvc8_chipset = { >> .mxm = nv50_mxm_new, >> .pci = gf100_pci_new, >> .pmu = gf100_pmu_new, >> - .therm = gt215_therm_new, >> + .therm = gf100_therm_new, >> .timer = nv41_timer_new, >> .volt = gf100_volt_new, >> .ce[0] = gf100_ce_new, >> @@ -1538,7 +1538,7 @@ nvce_chipset = { >> .mxm = nv50_mxm_new, >> .pci = gf100_pci_new, >> .pmu = gf100_pmu_new, >> - .therm = gt215_therm_new, >> + .therm = gf100_therm_new, >> .timer = nv41_timer_new, >> .volt = gf100_volt_new, >> .ce[0] = gf100_ce_new, >> @@ -1575,7 +1575,7 @@ nvcf_chipset = { >> .mxm = nv50_mxm_new, >> .pci = gf106_pci_new, >> .pmu = gf100_pmu_new, >> - .therm = gt215_therm_new, >> + .therm = gf100_therm_new, >> .timer = nv41_timer_new, >> .volt = gf100_volt_new, >> .ce[0] = gf100_ce_new, >> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild >> b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild >> index 135758b..cbb9465 100644 >> --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild >> +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild >> @@ -1,4 +1,5 @@ >> nvkm-y += nvkm/subdev/therm/base.o >> +nvkm-y += nvkm/subdev/therm/clkgate.o >> nvkm-y += nvkm/subdev/therm/fan.o >> nvkm-y += nvkm/subdev/therm/fannil.o >> nvkm-y += nvkm/subdev/therm/fanpwm.o >> @@ -9,5 +10,6 @@ nvkm-y += nvkm/subdev/therm/nv40.o >> nvkm-y += nvkm/subdev/therm/nv50.o >> nvkm-y += nvkm/subdev/therm/g84.o >> nvkm-y += nvkm/subdev/therm/gt215.o >> +nvkm-y += nvkm/subdev/therm/gf100.o >> nvkm-y += nvkm/subdev/therm/gf119.o >> nvkm-y += nvkm/subdev/therm/gm107.o >> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c >> b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c >> index df949fa..723c0c1 100644 >> --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c >> +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c >> @@ -393,6 +393,8 @@ nvkm_therm_new_(const struct nvkm_therm_func >> *func, struct nvkm_device *device, >> therm->fan_set = nvkm_therm_fan_user_set; >> therm->attr_get = nvkm_therm_attr_get; >> therm->attr_set = nvkm_therm_attr_set; >> + therm->clkgate_engine = nvkm_therm_clkgate_engine; >> + therm->clkgate_set = nvkm_therm_clkgate_set; >> therm->mode = therm->suspend = -1; /* undefined */ >> return 0; >> } >> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c >> b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c >> new file mode 100644 >> index 0000000..c030ea9 >> --- /dev/null >> +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c >> @@ -0,0 +1,49 @@ >> +/* >> + * Copyright 2017 Red Hat Inc. >> + * >> + * Permission is hereby granted, free of charge, to any person >> obtaining a >> + * copy of this software and associated documentation files (the >> "Software"), >> + * to deal in the Software without restriction, including without >> limitation >> + * the rights to use, copy, modify, merge, publish, distribute, >> sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be >> included in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >> EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >> MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT >> SHALL >> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, >> DAMAGES OR >> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> + * OTHER DEALINGS IN THE SOFTWARE. >> + * >> + * Authors: Lyude Paul >> + */ >> +#include "priv.h" >> + >> +int >> +nvkm_therm_clkgate_engine(struct nvkm_therm *therm, enum nvkm_devidx >> subdev) >> +{ >> + if (!therm->func->clkgate_engine) >> + return -1; >> + >> + return therm->func->clkgate_engine(subdev); >> +} >> + >> +void >> +nvkm_therm_clkgate_set(struct nvkm_therm *therm, int gate_idx, bool >> enable) >> +{ >> + if (!therm->func->clkgate_set) >> + return; >> + >> + if (enable) >> + nvkm_trace(&therm->subdev, >> + "Enabling clockgating for gate 0x%x\n", gate_idx); >> + else >> + nvkm_trace(&therm->subdev, >> + "Disabling clockgating for gate 0x%x\n", gate_idx); >> + >> + therm->func->clkgate_set(therm, gate_idx, enable); >> +} >> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c >> b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c >> new file mode 100644 >> index 0000000..820934f >> --- /dev/null >> +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c >> @@ -0,0 +1,77 @@ >> +/* >> + * Copyright 2017 Red Hat Inc. >> + * >> + * Permission is hereby granted, free of charge, to any person >> obtaining a >> + * copy of this software and associated documentation files (the >> "Software"), >> + * to deal in the Software without restriction, including without >> limitation >> + * the rights to use, copy, modify, merge, publish, distribute, >> sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be >> included in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >> EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >> MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT >> SHALL >> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, >> DAMAGES OR >> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >> + * OTHER DEALINGS IN THE SOFTWARE. >> + * >> + * Authors: Lyude Paul >> + */ >> +#include <core/device.h> >> + >> +#include "priv.h" >> + >> +int >> +gf100_clkgate_engine(enum nvkm_devidx subdev) >> +{ >> + switch (subdev) { >> + case NVKM_ENGINE_GR: return 0x00; >> + case NVKM_ENGINE_MSPDEC: return 0x04; >> + case NVKM_ENGINE_MSPPP: return 0x08; >> + case NVKM_ENGINE_MSVLD: return 0x0c; >> + case NVKM_ENGINE_CE0: return 0x10; >> + case NVKM_ENGINE_CE1: return 0x14; >> + case NVKM_ENGINE_MSENC: return 0x18; >> + case NVKM_ENGINE_CE2: return 0x1c; >> + default: return -1; >> + } >> +} >> + >> +void >> +gf100_clkgate_set(struct nvkm_therm *therm, int gate_idx, bool enable) >> +{ >> + u8 data; >> + >> + if (enable) /* ENG_CLK=auto, BLK_CLK=auto, ENG_PWR=run, >> BLK_PWR=auto */ >> + data = 0x45; >> + else /* ENG_CLK=run, BLK_CLK=run, ENG_PWR=run, BLK_PWR=run */ >> + data = 0x0; >> + >> + nvkm_mask(therm->subdev.device, 0x20200 + gate_idx, 0xff, data); >> +} >> + >> +static const struct nvkm_therm_func >> +gf100_therm = { >> + .init = gt215_therm_init, >> + .fini = g84_therm_fini, >> + .pwm_ctrl = nv50_fan_pwm_ctrl, >> + .pwm_get = nv50_fan_pwm_get, >> + .pwm_set = nv50_fan_pwm_set, >> + .pwm_clock = nv50_fan_pwm_clock, >> + .temp_get = g84_temp_get, >> + .fan_sense = gt215_therm_fan_sense, >> + .program_alarms = nvkm_therm_program_alarms_polling, >> + .clkgate_engine = gf100_clkgate_engine, >> + .clkgate_set = gf100_clkgate_set, >> +}; >> + >> +int >> +gf100_therm_new(struct nvkm_device *device, int index, >> + struct nvkm_therm **ptherm) >> +{ >> + return nvkm_therm_new_(&gf100_therm, device, index, ptherm); >> +} >> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c >> b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c >> index 06dcfd6..a2626fb 100644 >> --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c >> +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c >> @@ -143,6 +143,8 @@ gf119_therm = { >> .temp_get = g84_temp_get, >> .fan_sense = gt215_therm_fan_sense, >> .program_alarms = nvkm_therm_program_alarms_polling, >> + .clkgate_engine = gf100_clkgate_engine, >> + .clkgate_set = gf100_clkgate_set, >> }; >> int >> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c >> b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c >> index 86848ec..c580c39 100644 >> --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c >> +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c >> @@ -65,6 +65,8 @@ gm107_therm = { >> .temp_get = g84_temp_get, >> .fan_sense = gt215_therm_fan_sense, >> .program_alarms = nvkm_therm_program_alarms_polling, >> + .clkgate_engine = gf100_clkgate_engine, >> + .clkgate_set = gf100_clkgate_set, >> }; >> int >> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c >> b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c >> index c08097f..4caf401 100644 >> --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c >> +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c >> @@ -36,7 +36,7 @@ gt215_therm_fan_sense(struct nvkm_therm *therm) >> return -ENODEV; >> } >> -static void >> +void >> gt215_therm_init(struct nvkm_therm *therm) >> { >> struct nvkm_device *device = therm->subdev.device; >> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h >> b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h >> index 235a5d8..80367a7 100644 >> --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h >> +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h >> @@ -81,6 +81,9 @@ void nvkm_therm_sensor_event(struct nvkm_therm *, >> enum nvkm_therm_thrs, >> enum nvkm_therm_thrs_direction); >> void nvkm_therm_program_alarms_polling(struct nvkm_therm *); >> +int nvkm_therm_clkgate_engine(struct nvkm_therm *, enum nvkm_devidx); >> +void nvkm_therm_clkgate_set(struct nvkm_therm *, int gate_idx, bool >> enable); >> + >> struct nvkm_therm_func { >> void (*init)(struct nvkm_therm *); >> void (*fini)(struct nvkm_therm *); >> @@ -96,6 +99,9 @@ struct nvkm_therm_func { >> int (*fan_sense)(struct nvkm_therm *); >> void (*program_alarms)(struct nvkm_therm *); >> + >> + int (*clkgate_engine)(enum nvkm_devidx); >> + void (*clkgate_set)(struct nvkm_therm *, int, bool); >> }; >> void nv40_therm_intr(struct nvkm_therm *); >> @@ -110,6 +116,10 @@ void g84_sensor_setup(struct nvkm_therm *); >> void g84_therm_fini(struct nvkm_therm *); >> int gt215_therm_fan_sense(struct nvkm_therm *); >> +void gt215_therm_init(struct nvkm_therm *); >> + >> +int gf100_clkgate_engine(enum nvkm_devidx); >> +void gf100_clkgate_set(struct nvkm_therm *, int, bool); >> void gf119_therm_init(struct nvkm_therm *); >> > > _______________________________________________ > Nouveau mailing list > Nouveau at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/nouveau
Karol Herbst
2017-Apr-25 22:49 UTC
[Nouveau] [PATCH] drm/nouveau: Add support for clockgating on Fermi+
Hi Lyude, thanks for the great work. Just a view comments inline. 2017-04-25 20:38 GMT+02:00 Lyude <lyude at redhat.com>:> This adds support for enabling automatic clockgating on nvidia GPUs for > Fermi and later generations. This saves a little bit of power, bringing > my fermi GPU's power consumption from ~28.3W on idle to ~27W, and my > kepler's idle power consumption from ~23.6W to ~21.65W. > > Similar to how the nvidia driver seems to handle this, we enable > clockgating for each engine that supports it after it's initialization. > > Signed-off-by: Lyude <lyude at redhat.com> > --- > .../gpu/drm/nouveau/include/nvkm/subdev/therm.h | 4 ++ > drivers/gpu/drm/nouveau/nvkm/core/engine.c | 20 +++++- > drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 14 ++-- > drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild | 2 + > drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c | 2 + > .../gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c | 49 ++++++++++++++ > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c | 77 ++++++++++++++++++++++ > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c | 2 + > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c | 2 + > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c | 2 +- > drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h | 10 +++ > 11 files changed, 175 insertions(+), 9 deletions(-) > create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c > create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c > > diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > index b268b96..904aa56 100644 > --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > @@ -84,6 +84,9 @@ struct nvkm_therm { > > int (*attr_get)(struct nvkm_therm *, enum nvkm_therm_attr_type); > int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int); > + > + int (*clkgate_engine)(struct nvkm_therm *, enum nvkm_devidx); > + void (*clkgate_set)(struct nvkm_therm *, int gate_idx, bool enable);remove those and have a simple "nvkm_therm_clkgate_engine" function This way you know that every user calls this function and don't have to check for silly function pointers like you currently do in engine.c> }; > > int nvkm_therm_temp_get(struct nvkm_therm *); > @@ -94,6 +97,7 @@ int nv40_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > int nv50_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > int g84_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > int gt215_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > +int gf100_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > int gf119_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > int gm107_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > #endif > diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c b/drivers/gpu/drm/nouveau/nvkm/core/engine.c > index b6c9169..473ad3e 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c > +++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c > @@ -26,6 +26,7 @@ > #include <core/option.h> > > #include <subdev/fb.h> > +#include <subdev/therm.h> > > bool > nvkm_engine_chsw_load(struct nvkm_engine *engine) > @@ -86,6 +87,13 @@ static int > nvkm_engine_fini(struct nvkm_subdev *subdev, bool suspend) > { > struct nvkm_engine *engine = nvkm_engine(subdev); > + struct nvkm_therm *therm = subdev->device->therm; > + int gate_idx; > + > + gate_idx = therm->clkgate_engine(therm, subdev->index); > + if (gate_idx != -1) > + therm->clkgate_set(therm, gate_idx, false); > +move this code inside "nvkm_therm_clkgate_engine". Nobody outside nvkm_therm should even care about the index.> if (engine->func->fini) > return engine->func->fini(engine, suspend); > return 0; > @@ -96,12 +104,13 @@ nvkm_engine_init(struct nvkm_subdev *subdev) > { > struct nvkm_engine *engine = nvkm_engine(subdev); > struct nvkm_fb *fb = subdev->device->fb; > + struct nvkm_therm *therm = subdev->device->therm; > int ret = 0, i; > s64 time; > > if (!engine->usecount) { > nvkm_trace(subdev, "init skipped, engine has no users\n"); > - return ret; > + goto finish; > } > > if (engine->func->oneinit && !engine->subdev.oneinit) { > @@ -123,6 +132,15 @@ nvkm_engine_init(struct nvkm_subdev *subdev) > > for (i = 0; fb && i < fb->tile.regions; i++) > nvkm_engine_tile(engine, i); > + > +finish: > + if (!ret) { > + int gate_idx = therm->clkgate_engine(therm, subdev->index); > + > + if (gate_idx != -1) > + therm->clkgate_set(therm, gate_idx, true); > + } > +same code as above. More code sharing!> return ret; > } > > diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c > index b690bc1..d133016 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c > +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c > @@ -1355,7 +1355,7 @@ nvc0_chipset = { > .mxm = nv50_mxm_new, > .pci = gf100_pci_new, > .pmu = gf100_pmu_new, > - .therm = gt215_therm_new, > + .therm = gf100_therm_new, > .timer = nv41_timer_new, > .volt = gf100_volt_new, > .ce[0] = gf100_ce_new, > @@ -1392,7 +1392,7 @@ nvc1_chipset = { > .mxm = nv50_mxm_new, > .pci = gf106_pci_new, > .pmu = gf100_pmu_new, > - .therm = gt215_therm_new, > + .therm = gf100_therm_new, > .timer = nv41_timer_new, > .volt = gf100_volt_new, > .ce[0] = gf100_ce_new, > @@ -1428,7 +1428,7 @@ nvc3_chipset = { > .mxm = nv50_mxm_new, > .pci = gf106_pci_new, > .pmu = gf100_pmu_new, > - .therm = gt215_therm_new, > + .therm = gf100_therm_new, > .timer = nv41_timer_new, > .volt = gf100_volt_new, > .ce[0] = gf100_ce_new, > @@ -1464,7 +1464,7 @@ nvc4_chipset = { > .mxm = nv50_mxm_new, > .pci = gf100_pci_new, > .pmu = gf100_pmu_new, > - .therm = gt215_therm_new, > + .therm = gf100_therm_new, > .timer = nv41_timer_new, > .volt = gf100_volt_new, > .ce[0] = gf100_ce_new, > @@ -1501,7 +1501,7 @@ nvc8_chipset = { > .mxm = nv50_mxm_new, > .pci = gf100_pci_new, > .pmu = gf100_pmu_new, > - .therm = gt215_therm_new, > + .therm = gf100_therm_new, > .timer = nv41_timer_new, > .volt = gf100_volt_new, > .ce[0] = gf100_ce_new, > @@ -1538,7 +1538,7 @@ nvce_chipset = { > .mxm = nv50_mxm_new, > .pci = gf100_pci_new, > .pmu = gf100_pmu_new, > - .therm = gt215_therm_new, > + .therm = gf100_therm_new, > .timer = nv41_timer_new, > .volt = gf100_volt_new, > .ce[0] = gf100_ce_new, > @@ -1575,7 +1575,7 @@ nvcf_chipset = { > .mxm = nv50_mxm_new, > .pci = gf106_pci_new, > .pmu = gf100_pmu_new, > - .therm = gt215_therm_new, > + .therm = gf100_therm_new, > .timer = nv41_timer_new, > .volt = gf100_volt_new, > .ce[0] = gf100_ce_new, > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild > index 135758b..cbb9465 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild > @@ -1,4 +1,5 @@ > nvkm-y += nvkm/subdev/therm/base.o > +nvkm-y += nvkm/subdev/therm/clkgate.o > nvkm-y += nvkm/subdev/therm/fan.o > nvkm-y += nvkm/subdev/therm/fannil.o > nvkm-y += nvkm/subdev/therm/fanpwm.o > @@ -9,5 +10,6 @@ nvkm-y += nvkm/subdev/therm/nv40.o > nvkm-y += nvkm/subdev/therm/nv50.o > nvkm-y += nvkm/subdev/therm/g84.o > nvkm-y += nvkm/subdev/therm/gt215.o > +nvkm-y += nvkm/subdev/therm/gf100.o > nvkm-y += nvkm/subdev/therm/gf119.o > nvkm-y += nvkm/subdev/therm/gm107.o > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c > index df949fa..723c0c1 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c > @@ -393,6 +393,8 @@ nvkm_therm_new_(const struct nvkm_therm_func *func, struct nvkm_device *device, > therm->fan_set = nvkm_therm_fan_user_set; > therm->attr_get = nvkm_therm_attr_get; > therm->attr_set = nvkm_therm_attr_set; > + therm->clkgate_engine = nvkm_therm_clkgate_engine; > + therm->clkgate_set = nvkm_therm_clkgate_set;remove those, because we should only have a nvkm_therm_clkgate_engine call> therm->mode = therm->suspend = -1; /* undefined */ > return 0; > } > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c > new file mode 100644 > index 0000000..c030ea9 > --- /dev/null > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c > @@ -0,0 +1,49 @@ > +/* > + * Copyright 2017 Red Hat Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + * > + * Authors: Lyude Paul > + */ > +#include "priv.h" > + > +int > +nvkm_therm_clkgate_engine(struct nvkm_therm *therm, enum nvkm_devidx subdev) > +{ > + if (!therm->func->clkgate_engine) > + return -1; > + > + return therm->func->clkgate_engine(subdev); > +} > + > +void > +nvkm_therm_clkgate_set(struct nvkm_therm *therm, int gate_idx, bool enable) > +{ > + if (!therm->func->clkgate_set) > + return; > + > + if (enable) > + nvkm_trace(&therm->subdev, > + "Enabling clockgating for gate 0x%x\n", gate_idx); > + else > + nvkm_trace(&therm->subdev, > + "Disabling clockgating for gate 0x%x\n", gate_idx); > + > + therm->func->clkgate_set(therm, gate_idx, enable); > +} > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c > new file mode 100644 > index 0000000..820934f > --- /dev/null > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c > @@ -0,0 +1,77 @@ > +/* > + * Copyright 2017 Red Hat Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + * > + * Authors: Lyude Paul > + */ > +#include <core/device.h> > + > +#include "priv.h" > + > +int > +gf100_clkgate_engine(enum nvkm_devidx subdev) > +{ > + switch (subdev) { > + case NVKM_ENGINE_GR: return 0x00; > + case NVKM_ENGINE_MSPDEC: return 0x04; > + case NVKM_ENGINE_MSPPP: return 0x08; > + case NVKM_ENGINE_MSVLD: return 0x0c; > + case NVKM_ENGINE_CE0: return 0x10; > + case NVKM_ENGINE_CE1: return 0x14; > + case NVKM_ENGINE_MSENC: return 0x18; > + case NVKM_ENGINE_CE2: return 0x1c; > + default: return -1; > + } > +} > + > +void > +gf100_clkgate_set(struct nvkm_therm *therm, int gate_idx, bool enable) > +{ > + u8 data; > + > + if (enable) /* ENG_CLK=auto, BLK_CLK=auto, ENG_PWR=run, BLK_PWR=auto */ > + data = 0x45; > + else /* ENG_CLK=run, BLK_CLK=run, ENG_PWR=run, BLK_PWR=run */ > + data = 0x0;I would rather use 0x44 here as Nvidia does? I don't think they disable it completly, maybe they only leave it on kepler? not quite sure.> + > + nvkm_mask(therm->subdev.device, 0x20200 + gate_idx, 0xff, data); > +} > + > +static const struct nvkm_therm_func > +gf100_therm = { > + .init = gt215_therm_init, > + .fini = g84_therm_fini, > + .pwm_ctrl = nv50_fan_pwm_ctrl, > + .pwm_get = nv50_fan_pwm_get, > + .pwm_set = nv50_fan_pwm_set, > + .pwm_clock = nv50_fan_pwm_clock, > + .temp_get = g84_temp_get, > + .fan_sense = gt215_therm_fan_sense, > + .program_alarms = nvkm_therm_program_alarms_polling, > + .clkgate_engine = gf100_clkgate_engine, > + .clkgate_set = gf100_clkgate_set, > +}; > + > +int > +gf100_therm_new(struct nvkm_device *device, int index, > + struct nvkm_therm **ptherm) > +{ > + return nvkm_therm_new_(&gf100_therm, device, index, ptherm); > +} > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c > index 06dcfd6..a2626fb 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c > @@ -143,6 +143,8 @@ gf119_therm = { > .temp_get = g84_temp_get, > .fan_sense = gt215_therm_fan_sense, > .program_alarms = nvkm_therm_program_alarms_polling, > + .clkgate_engine = gf100_clkgate_engine, > + .clkgate_set = gf100_clkgate_set, > }; > > int > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c > index 86848ec..c580c39 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c > @@ -65,6 +65,8 @@ gm107_therm = { > .temp_get = g84_temp_get, > .fan_sense = gt215_therm_fan_sense, > .program_alarms = nvkm_therm_program_alarms_polling, > + .clkgate_engine = gf100_clkgate_engine, > + .clkgate_set = gf100_clkgate_set, > }; > > int > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c > index c08097f..4caf401 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c > @@ -36,7 +36,7 @@ gt215_therm_fan_sense(struct nvkm_therm *therm) > return -ENODEV; > } > > -static void > +void > gt215_therm_init(struct nvkm_therm *therm) > { > struct nvkm_device *device = therm->subdev.device; > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h > index 235a5d8..80367a7 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h > @@ -81,6 +81,9 @@ void nvkm_therm_sensor_event(struct nvkm_therm *, enum nvkm_therm_thrs, > enum nvkm_therm_thrs_direction); > void nvkm_therm_program_alarms_polling(struct nvkm_therm *); > > +int nvkm_therm_clkgate_engine(struct nvkm_therm *, enum nvkm_devidx); > +void nvkm_therm_clkgate_set(struct nvkm_therm *, int gate_idx, bool enable); > + > struct nvkm_therm_func { > void (*init)(struct nvkm_therm *); > void (*fini)(struct nvkm_therm *); > @@ -96,6 +99,9 @@ struct nvkm_therm_func { > int (*fan_sense)(struct nvkm_therm *); > > void (*program_alarms)(struct nvkm_therm *); > + > + int (*clkgate_engine)(enum nvkm_devidx); > + void (*clkgate_set)(struct nvkm_therm *, int, bool); > }; > > void nv40_therm_intr(struct nvkm_therm *); > @@ -110,6 +116,10 @@ void g84_sensor_setup(struct nvkm_therm *); > void g84_therm_fini(struct nvkm_therm *); > > int gt215_therm_fan_sense(struct nvkm_therm *); > +void gt215_therm_init(struct nvkm_therm *); > + > +int gf100_clkgate_engine(enum nvkm_devidx); > +void gf100_clkgate_set(struct nvkm_therm *, int, bool); > > void gf119_therm_init(struct nvkm_therm *); > > -- > 2.9.3 >
Lyude Paul
2017-Apr-26 18:31 UTC
[Nouveau] [PATCH] drm/nouveau: Add support for clockgating on Fermi+
On Wed, 2017-04-26 at 00:49 +0200, Karol Herbst wrote:> Hi Lyude, > > thanks for the great work. Just a view comments inline. > > 2017-04-25 20:38 GMT+02:00 Lyude <lyude at redhat.com>: > > This adds support for enabling automatic clockgating on nvidia GPUs > > for > > Fermi and later generations. This saves a little bit of power, > > bringing > > my fermi GPU's power consumption from ~28.3W on idle to ~27W, and > > my > > kepler's idle power consumption from ~23.6W to ~21.65W. > > > > Similar to how the nvidia driver seems to handle this, we enable > > clockgating for each engine that supports it after it's > > initialization. > > > > Signed-off-by: Lyude <lyude at redhat.com> > > --- > > .../gpu/drm/nouveau/include/nvkm/subdev/therm.h | 4 ++ > > drivers/gpu/drm/nouveau/nvkm/core/engine.c | 20 +++++- > > drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 14 ++-- > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild | 2 + > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c | 2 + > > .../gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c | 49 > > ++++++++++++++ > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c | 77 > > ++++++++++++++++++++++ > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c | 2 + > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c | 2 + > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c | 2 +- > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h | 10 +++ > > 11 files changed, 175 insertions(+), 9 deletions(-) > > create mode 100644 > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c > > create mode 100644 > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c > > > > diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > > b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > > index b268b96..904aa56 100644 > > --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > > +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > > @@ -84,6 +84,9 @@ struct nvkm_therm { > > > > int (*attr_get)(struct nvkm_therm *, enum > > nvkm_therm_attr_type); > > int (*attr_set)(struct nvkm_therm *, enum > > nvkm_therm_attr_type, int); > > + > > + int (*clkgate_engine)(struct nvkm_therm *, enum > > nvkm_devidx); > > + void (*clkgate_set)(struct nvkm_therm *, int gate_idx, bool > > enable); > > remove those and have a simple "nvkm_therm_clkgate_engine" function > > This way you know that every user calls this function and don't have > to check for silly function pointers like you currently do in > engine.c > > > }; > > > > int nvkm_therm_temp_get(struct nvkm_therm *); > > @@ -94,6 +97,7 @@ int nv40_therm_new(struct nvkm_device *, int, > > struct nvkm_therm **); > > int nv50_therm_new(struct nvkm_device *, int, struct nvkm_therm > > **); > > int g84_therm_new(struct nvkm_device *, int, struct nvkm_therm > > **); > > int gt215_therm_new(struct nvkm_device *, int, struct nvkm_therm > > **); > > +int gf100_therm_new(struct nvkm_device *, int, struct nvkm_therm > > **); > > int gf119_therm_new(struct nvkm_device *, int, struct nvkm_therm > > **); > > int gm107_therm_new(struct nvkm_device *, int, struct nvkm_therm > > **); > > #endif > > diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c > > b/drivers/gpu/drm/nouveau/nvkm/core/engine.c > > index b6c9169..473ad3e 100644 > > --- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c > > +++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c > > @@ -26,6 +26,7 @@ > > #include <core/option.h> > > > > #include <subdev/fb.h> > > +#include <subdev/therm.h> > > > > bool > > nvkm_engine_chsw_load(struct nvkm_engine *engine) > > @@ -86,6 +87,13 @@ static int > > nvkm_engine_fini(struct nvkm_subdev *subdev, bool suspend) > > { > > struct nvkm_engine *engine = nvkm_engine(subdev); > > + struct nvkm_therm *therm = subdev->device->therm; > > + int gate_idx; > > + > > + gate_idx = therm->clkgate_engine(therm, subdev->index); > > + if (gate_idx != -1) > > + therm->clkgate_set(therm, gate_idx, false); > > + > > move this code inside "nvkm_therm_clkgate_engine". Nobody outside > nvkm_therm should even care about the index. > > > if (engine->func->fini) > > return engine->func->fini(engine, suspend); > > return 0; > > @@ -96,12 +104,13 @@ nvkm_engine_init(struct nvkm_subdev *subdev) > > { > > struct nvkm_engine *engine = nvkm_engine(subdev); > > struct nvkm_fb *fb = subdev->device->fb; > > + struct nvkm_therm *therm = subdev->device->therm; > > int ret = 0, i; > > s64 time; > > > > if (!engine->usecount) { > > nvkm_trace(subdev, "init skipped, engine has no > > users\n"); > > - return ret; > > + goto finish; > > } > > > > if (engine->func->oneinit && !engine->subdev.oneinit) { > > @@ -123,6 +132,15 @@ nvkm_engine_init(struct nvkm_subdev *subdev) > > > > for (i = 0; fb && i < fb->tile.regions; i++) > > nvkm_engine_tile(engine, i); > > + > > +finish: > > + if (!ret) { > > + int gate_idx = therm->clkgate_engine(therm, subdev- > > >index); > > + > > + if (gate_idx != -1) > > + therm->clkgate_set(therm, gate_idx, true); > > + } > > + > > same code as above. More code sharing! > > > return ret; > > } > > > > diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c > > b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c > > index b690bc1..d133016 100644 > > --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c > > +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c > > @@ -1355,7 +1355,7 @@ nvc0_chipset = { > > .mxm = nv50_mxm_new, > > .pci = gf100_pci_new, > > .pmu = gf100_pmu_new, > > - .therm = gt215_therm_new, > > + .therm = gf100_therm_new, > > .timer = nv41_timer_new, > > .volt = gf100_volt_new, > > .ce[0] = gf100_ce_new, > > @@ -1392,7 +1392,7 @@ nvc1_chipset = { > > .mxm = nv50_mxm_new, > > .pci = gf106_pci_new, > > .pmu = gf100_pmu_new, > > - .therm = gt215_therm_new, > > + .therm = gf100_therm_new, > > .timer = nv41_timer_new, > > .volt = gf100_volt_new, > > .ce[0] = gf100_ce_new, > > @@ -1428,7 +1428,7 @@ nvc3_chipset = { > > .mxm = nv50_mxm_new, > > .pci = gf106_pci_new, > > .pmu = gf100_pmu_new, > > - .therm = gt215_therm_new, > > + .therm = gf100_therm_new, > > .timer = nv41_timer_new, > > .volt = gf100_volt_new, > > .ce[0] = gf100_ce_new, > > @@ -1464,7 +1464,7 @@ nvc4_chipset = { > > .mxm = nv50_mxm_new, > > .pci = gf100_pci_new, > > .pmu = gf100_pmu_new, > > - .therm = gt215_therm_new, > > + .therm = gf100_therm_new, > > .timer = nv41_timer_new, > > .volt = gf100_volt_new, > > .ce[0] = gf100_ce_new, > > @@ -1501,7 +1501,7 @@ nvc8_chipset = { > > .mxm = nv50_mxm_new, > > .pci = gf100_pci_new, > > .pmu = gf100_pmu_new, > > - .therm = gt215_therm_new, > > + .therm = gf100_therm_new, > > .timer = nv41_timer_new, > > .volt = gf100_volt_new, > > .ce[0] = gf100_ce_new, > > @@ -1538,7 +1538,7 @@ nvce_chipset = { > > .mxm = nv50_mxm_new, > > .pci = gf100_pci_new, > > .pmu = gf100_pmu_new, > > - .therm = gt215_therm_new, > > + .therm = gf100_therm_new, > > .timer = nv41_timer_new, > > .volt = gf100_volt_new, > > .ce[0] = gf100_ce_new, > > @@ -1575,7 +1575,7 @@ nvcf_chipset = { > > .mxm = nv50_mxm_new, > > .pci = gf106_pci_new, > > .pmu = gf100_pmu_new, > > - .therm = gt215_therm_new, > > + .therm = gf100_therm_new, > > .timer = nv41_timer_new, > > .volt = gf100_volt_new, > > .ce[0] = gf100_ce_new, > > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild > > b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild > > index 135758b..cbb9465 100644 > > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild > > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild > > @@ -1,4 +1,5 @@ > > nvkm-y += nvkm/subdev/therm/base.o > > +nvkm-y += nvkm/subdev/therm/clkgate.o > > nvkm-y += nvkm/subdev/therm/fan.o > > nvkm-y += nvkm/subdev/therm/fannil.o > > nvkm-y += nvkm/subdev/therm/fanpwm.o > > @@ -9,5 +10,6 @@ nvkm-y += nvkm/subdev/therm/nv40.o > > nvkm-y += nvkm/subdev/therm/nv50.o > > nvkm-y += nvkm/subdev/therm/g84.o > > nvkm-y += nvkm/subdev/therm/gt215.o > > +nvkm-y += nvkm/subdev/therm/gf100.o > > nvkm-y += nvkm/subdev/therm/gf119.o > > nvkm-y += nvkm/subdev/therm/gm107.o > > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c > > b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c > > index df949fa..723c0c1 100644 > > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c > > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c > > @@ -393,6 +393,8 @@ nvkm_therm_new_(const struct nvkm_therm_func > > *func, struct nvkm_device *device, > > therm->fan_set = nvkm_therm_fan_user_set; > > therm->attr_get = nvkm_therm_attr_get; > > therm->attr_set = nvkm_therm_attr_set; > > + therm->clkgate_engine = nvkm_therm_clkgate_engine; > > + therm->clkgate_set = nvkm_therm_clkgate_set; > > remove those, because we should only have a nvkm_therm_clkgate_engine > call > > > therm->mode = therm->suspend = -1; /* undefined */ > > return 0; > > } > > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c > > b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c > > new file mode 100644 > > index 0000000..c030ea9 > > --- /dev/null > > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c > > @@ -0,0 +1,49 @@ > > +/* > > + * Copyright 2017 Red Hat Inc. > > + * > > + * Permission is hereby granted, free of charge, to any person > > obtaining a > > + * copy of this software and associated documentation files (the > > "Software"), > > + * to deal in the Software without restriction, including without > > limitation > > + * the rights to use, copy, modify, merge, publish, distribute, > > sublicense, > > + * and/or sell copies of the Software, and to permit persons to > > whom the > > + * Software is furnished to do so, subject to the following > > conditions: > > + * > > + * The above copyright notice and this permission notice shall be > > included in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > > EVENT SHALL > > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, > > DAMAGES OR > > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > > OTHERWISE, > > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE > > USE OR > > + * OTHER DEALINGS IN THE SOFTWARE. > > + * > > + * Authors: Lyude Paul > > + */ > > +#include "priv.h" > > + > > +int > > +nvkm_therm_clkgate_engine(struct nvkm_therm *therm, enum > > nvkm_devidx subdev) > > +{ > > + if (!therm->func->clkgate_engine) > > + return -1; > > + > > + return therm->func->clkgate_engine(subdev); > > +} > > + > > +void > > +nvkm_therm_clkgate_set(struct nvkm_therm *therm, int gate_idx, > > bool enable) > > +{ > > + if (!therm->func->clkgate_set) > > + return; > > + > > + if (enable) > > + nvkm_trace(&therm->subdev, > > + "Enabling clockgating for gate 0x%x\n", > > gate_idx); > > + else > > + nvkm_trace(&therm->subdev, > > + "Disabling clockgating for gate 0x%x\n", > > gate_idx); > > + > > + therm->func->clkgate_set(therm, gate_idx, enable); > > +} > > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c > > b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c > > new file mode 100644 > > index 0000000..820934f > > --- /dev/null > > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c > > @@ -0,0 +1,77 @@ > > +/* > > + * Copyright 2017 Red Hat Inc. > > + * > > + * Permission is hereby granted, free of charge, to any person > > obtaining a > > + * copy of this software and associated documentation files (the > > "Software"), > > + * to deal in the Software without restriction, including without > > limitation > > + * the rights to use, copy, modify, merge, publish, distribute, > > sublicense, > > + * and/or sell copies of the Software, and to permit persons to > > whom the > > + * Software is furnished to do so, subject to the following > > conditions: > > + * > > + * The above copyright notice and this permission notice shall be > > included in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > > EVENT SHALL > > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, > > DAMAGES OR > > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > > OTHERWISE, > > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE > > USE OR > > + * OTHER DEALINGS IN THE SOFTWARE. > > + * > > + * Authors: Lyude Paul > > + */ > > +#include <core/device.h> > > + > > +#include "priv.h" > > + > > +int > > +gf100_clkgate_engine(enum nvkm_devidx subdev) > > +{ > > + switch (subdev) { > > + case NVKM_ENGINE_GR: return 0x00; > > + case NVKM_ENGINE_MSPDEC: return 0x04; > > + case NVKM_ENGINE_MSPPP: return 0x08; > > + case NVKM_ENGINE_MSVLD: return 0x0c; > > + case NVKM_ENGINE_CE0: return 0x10; > > + case NVKM_ENGINE_CE1: return 0x14; > > + case NVKM_ENGINE_MSENC: return 0x18; > > + case NVKM_ENGINE_CE2: return 0x1c; > > + default: return -1; > > + } > > +} > > + > > +void > > +gf100_clkgate_set(struct nvkm_therm *therm, int gate_idx, bool > > enable) > > +{ > > + u8 data; > > + > > + if (enable) /* ENG_CLK=auto, BLK_CLK=auto, ENG_PWR=run, > > BLK_PWR=auto */ > > + data = 0x45; > > + else /* ENG_CLK=run, BLK_CLK=run, ENG_PWR=run, > > BLK_PWR=run */ > > + data = 0x0; > > I would rather use 0x44 here as Nvidia does? I don't think they > disable it completly, maybe they only leave it on kepler? not quite > sure.JFYI: according to the vbios repo the nvidia blob actually uses 0x45 for everything except for PTHERM.UNK254_CG_CTRL: ./nv136/<REDACTED>/gp106_mmiotrace.xz: [1] 854.334687 MMIO32 W 0x020200 0x2772ed45 PTHERM.PGRAPH_CG_CTRL <= { ENG_CLK = AUTO | BLK_CLK = AUTO | ENG_PWR = RUN | BLK_PWR = AUTO | ENG_FILTER = 0xd | ENG_MANT = 0x7 | ENG_DLY_BEFORE = 0x2 | ENG_DLY_AFTER = 0x7 | BLK_DLY_BEFORE = 0x7 | BLK_DLY_AFTER = 0x2 } vs ./nv136/<REDACTED>/gp106_mmiotrace.xz: [1] 854.251848 MMIO32 W 0x020254 0x27722444 PTHERM.UNK254_CG_CTRL <= { ENG_CLK = RUN | BLK_CLK = AUTO | ENG_PWR = RUN | BLK_PWR = AUTO | ENG_FILTER = 0x4 | ENG_MANT = 0x1 | ENG_DLY_BEFORE = 0x2 | ENG_DLY_AFTER = 0x7 | BLK_DLY_BEFORE = 0x7 | BLK_DLY_AFTER = 0x2 }> > > + > > + nvkm_mask(therm->subdev.device, 0x20200 + gate_idx, 0xff, > > data); > > +} > > + > > +static const struct nvkm_therm_func > > +gf100_therm = { > > + .init = gt215_therm_init, > > + .fini = g84_therm_fini, > > + .pwm_ctrl = nv50_fan_pwm_ctrl, > > + .pwm_get = nv50_fan_pwm_get, > > + .pwm_set = nv50_fan_pwm_set, > > + .pwm_clock = nv50_fan_pwm_clock, > > + .temp_get = g84_temp_get, > > + .fan_sense = gt215_therm_fan_sense, > > + .program_alarms = nvkm_therm_program_alarms_polling, > > + .clkgate_engine = gf100_clkgate_engine, > > + .clkgate_set = gf100_clkgate_set, > > +}; > > + > > +int > > +gf100_therm_new(struct nvkm_device *device, int index, > > + struct nvkm_therm **ptherm) > > +{ > > + return nvkm_therm_new_(&gf100_therm, device, index, > > ptherm); > > +} > > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c > > b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c > > index 06dcfd6..a2626fb 100644 > > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c > > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c > > @@ -143,6 +143,8 @@ gf119_therm = { > > .temp_get = g84_temp_get, > > .fan_sense = gt215_therm_fan_sense, > > .program_alarms = nvkm_therm_program_alarms_polling, > > + .clkgate_engine = gf100_clkgate_engine, > > + .clkgate_set = gf100_clkgate_set, > > }; > > > > int > > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c > > b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c > > index 86848ec..c580c39 100644 > > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c > > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c > > @@ -65,6 +65,8 @@ gm107_therm = { > > .temp_get = g84_temp_get, > > .fan_sense = gt215_therm_fan_sense, > > .program_alarms = nvkm_therm_program_alarms_polling, > > + .clkgate_engine = gf100_clkgate_engine, > > + .clkgate_set = gf100_clkgate_set, > > }; > > > > int > > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c > > b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c > > index c08097f..4caf401 100644 > > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c > > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c > > @@ -36,7 +36,7 @@ gt215_therm_fan_sense(struct nvkm_therm *therm) > > return -ENODEV; > > } > > > > -static void > > +void > > gt215_therm_init(struct nvkm_therm *therm) > > { > > struct nvkm_device *device = therm->subdev.device; > > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h > > b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h > > index 235a5d8..80367a7 100644 > > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h > > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h > > @@ -81,6 +81,9 @@ void nvkm_therm_sensor_event(struct nvkm_therm *, > > enum nvkm_therm_thrs, > > enum nvkm_therm_thrs_direction); > > void nvkm_therm_program_alarms_polling(struct nvkm_therm *); > > > > +int nvkm_therm_clkgate_engine(struct nvkm_therm *, enum > > nvkm_devidx); > > +void nvkm_therm_clkgate_set(struct nvkm_therm *, int gate_idx, > > bool enable); > > + > > struct nvkm_therm_func { > > void (*init)(struct nvkm_therm *); > > void (*fini)(struct nvkm_therm *); > > @@ -96,6 +99,9 @@ struct nvkm_therm_func { > > int (*fan_sense)(struct nvkm_therm *); > > > > void (*program_alarms)(struct nvkm_therm *); > > + > > + int (*clkgate_engine)(enum nvkm_devidx); > > + void (*clkgate_set)(struct nvkm_therm *, int, bool); > > }; > > > > void nv40_therm_intr(struct nvkm_therm *); > > @@ -110,6 +116,10 @@ void g84_sensor_setup(struct nvkm_therm *); > > void g84_therm_fini(struct nvkm_therm *); > > > > int gt215_therm_fan_sense(struct nvkm_therm *); > > +void gt215_therm_init(struct nvkm_therm *); > > + > > +int gf100_clkgate_engine(enum nvkm_devidx); > > +void gf100_clkgate_set(struct nvkm_therm *, int, bool); > > > > void gf119_therm_init(struct nvkm_therm *); > > > > -- > > 2.9.3 > >-- Cheers, Lyude
Lyude
2017-Apr-26 23:09 UTC
[Nouveau] [PATCH v2] drm/nouveau: Add support for clockgating on Fermi+
This adds support for enabling automatic clockgating on nvidia GPUs for Fermi and later generations. This saves a little bit of power, bringing my fermi GPU's power consumption from ~28.3W on idle to ~27W, and my kepler's idle power consumption from ~23.6W to ~21.65W. Similar to how the nvidia driver seems to handle this, we enable clockgating for each engine that supports it after it's initialization. Changes since v1: - Move function pointers for clockgating functions out of nvkm_therm, just expose one less complex function to callers: nvkm_therm_clkgate_engine() - Use 0x44 for disabling clockgating instead of just shutting all of nvidia's power management for each gate off, since that's what the nvidia blob does Signed-off-by: Lyude <lyude at redhat.com> --- .../gpu/drm/nouveau/include/nvkm/subdev/therm.h | 2 + drivers/gpu/drm/nouveau/nvkm/core/engine.c | 12 +++- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 14 ++-- drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild | 2 + .../gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c | 32 +++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c | 81 ++++++++++++++++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h | 5 ++ 10 files changed, 143 insertions(+), 9 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h index b268b96..0e2574d 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h @@ -89,11 +89,13 @@ struct nvkm_therm { int nvkm_therm_temp_get(struct nvkm_therm *); int nvkm_therm_fan_sense(struct nvkm_therm *); int nvkm_therm_cstate(struct nvkm_therm *, int, int); +void nvkm_therm_clkgate_engine(struct nvkm_therm *, enum nvkm_devidx, bool); int nv40_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int nv50_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int g84_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gt215_therm_new(struct nvkm_device *, int, struct nvkm_therm **); +int gf100_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gf119_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gm107_therm_new(struct nvkm_device *, int, struct nvkm_therm **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c b/drivers/gpu/drm/nouveau/nvkm/core/engine.c index b6c9169..e3d52c1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c @@ -26,6 +26,7 @@ #include <core/option.h> #include <subdev/fb.h> +#include <subdev/therm.h> bool nvkm_engine_chsw_load(struct nvkm_engine *engine) @@ -86,6 +87,9 @@ static int nvkm_engine_fini(struct nvkm_subdev *subdev, bool suspend) { struct nvkm_engine *engine = nvkm_engine(subdev); + + nvkm_therm_clkgate_engine(subdev->device->therm, subdev->index, true); + if (engine->func->fini) return engine->func->fini(engine, suspend); return 0; @@ -96,12 +100,13 @@ nvkm_engine_init(struct nvkm_subdev *subdev) { struct nvkm_engine *engine = nvkm_engine(subdev); struct nvkm_fb *fb = subdev->device->fb; + struct nvkm_therm *therm = subdev->device->therm; int ret = 0, i; s64 time; if (!engine->usecount) { nvkm_trace(subdev, "init skipped, engine has no users\n"); - return ret; + goto finish; } if (engine->func->oneinit && !engine->subdev.oneinit) { @@ -123,6 +128,11 @@ nvkm_engine_init(struct nvkm_subdev *subdev) for (i = 0; fb && i < fb->tile.regions; i++) nvkm_engine_tile(engine, i); + +finish: + if (!ret) + nvkm_therm_clkgate_engine(therm, subdev->index, true); + return ret; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index b690bc1..d133016 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1355,7 +1355,7 @@ nvc0_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1392,7 +1392,7 @@ nvc1_chipset = { .mxm = nv50_mxm_new, .pci = gf106_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1428,7 +1428,7 @@ nvc3_chipset = { .mxm = nv50_mxm_new, .pci = gf106_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1464,7 +1464,7 @@ nvc4_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1501,7 +1501,7 @@ nvc8_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1538,7 +1538,7 @@ nvce_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1575,7 +1575,7 @@ nvcf_chipset = { .mxm = nv50_mxm_new, .pci = gf106_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild index 135758b..cbb9465 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild @@ -1,4 +1,5 @@ nvkm-y += nvkm/subdev/therm/base.o +nvkm-y += nvkm/subdev/therm/clkgate.o nvkm-y += nvkm/subdev/therm/fan.o nvkm-y += nvkm/subdev/therm/fannil.o nvkm-y += nvkm/subdev/therm/fanpwm.o @@ -9,5 +10,6 @@ nvkm-y += nvkm/subdev/therm/nv40.o nvkm-y += nvkm/subdev/therm/nv50.o nvkm-y += nvkm/subdev/therm/g84.o nvkm-y += nvkm/subdev/therm/gt215.o +nvkm-y += nvkm/subdev/therm/gf100.o nvkm-y += nvkm/subdev/therm/gf119.o nvkm-y += nvkm/subdev/therm/gm107.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c new file mode 100644 index 0000000..48494d0 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c @@ -0,0 +1,32 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Lyude Paul + */ +#include "priv.h" + +void +nvkm_therm_clkgate_engine(struct nvkm_therm *therm, enum nvkm_devidx engine, + bool enable) +{ + if (therm->func->clkgate_engine) + therm->func->clkgate_engine(therm, engine, enable); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c new file mode 100644 index 0000000..c31bd2c --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c @@ -0,0 +1,81 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Lyude Paul + */ +#include <core/device.h> + +#include "priv.h" + +static inline int +gf100_clkgate_engine_offset(enum nvkm_devidx subdev) +{ + switch (subdev) { + case NVKM_ENGINE_GR: return 0x00; + case NVKM_ENGINE_MSPDEC: return 0x04; + case NVKM_ENGINE_MSPPP: return 0x08; + case NVKM_ENGINE_MSVLD: return 0x0c; + case NVKM_ENGINE_CE0: return 0x10; + case NVKM_ENGINE_CE1: return 0x14; + case NVKM_ENGINE_MSENC: return 0x18; + case NVKM_ENGINE_CE2: return 0x1c; + default: return -1; + } +} + +void +gf100_clkgate_engine(struct nvkm_therm *therm, enum nvkm_devidx subdev, + bool enable) +{ + int offset = gf100_clkgate_engine_offset(subdev); + u8 data; + + if (offset == -1) + return; + + if (enable) /* ENG_CLK=auto, BLK_CLK=auto, ENG_PWR=run, BLK_PWR=auto */ + data = 0x45; + else /* ENG_CLK=run, BLK_CLK=auto, ENG_PWR=run, BLK_PWR=auto*/ + data = 0x44; + + nvkm_mask(therm->subdev.device, 0x20200 + offset, 0xff, data); +} + +static const struct nvkm_therm_func +gf100_therm = { + .init = gt215_therm_init, + .fini = g84_therm_fini, + .pwm_ctrl = nv50_fan_pwm_ctrl, + .pwm_get = nv50_fan_pwm_get, + .pwm_set = nv50_fan_pwm_set, + .pwm_clock = nv50_fan_pwm_clock, + .temp_get = g84_temp_get, + .fan_sense = gt215_therm_fan_sense, + .program_alarms = nvkm_therm_program_alarms_polling, + .clkgate_engine = gf100_clkgate_engine, +}; + +int +gf100_therm_new(struct nvkm_device *device, int index, + struct nvkm_therm **ptherm) +{ + return nvkm_therm_new_(&gf100_therm, device, index, ptherm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c index 06dcfd6..568dffa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c @@ -143,6 +143,7 @@ gf119_therm = { .temp_get = g84_temp_get, .fan_sense = gt215_therm_fan_sense, .program_alarms = nvkm_therm_program_alarms_polling, + .clkgate_engine = gf100_clkgate_engine, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c index 86848ec..afc4ff6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c @@ -65,6 +65,7 @@ gm107_therm = { .temp_get = g84_temp_get, .fan_sense = gt215_therm_fan_sense, .program_alarms = nvkm_therm_program_alarms_polling, + .clkgate_engine = gf100_clkgate_engine, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c index c08097f..4caf401 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c @@ -36,7 +36,7 @@ gt215_therm_fan_sense(struct nvkm_therm *therm) return -ENODEV; } -static void +void gt215_therm_init(struct nvkm_therm *therm) { struct nvkm_device *device = therm->subdev.device; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h index 235a5d8..32d9bce 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h @@ -96,6 +96,8 @@ struct nvkm_therm_func { int (*fan_sense)(struct nvkm_therm *); void (*program_alarms)(struct nvkm_therm *); + + void (*clkgate_engine)(struct nvkm_therm *, enum nvkm_devidx, bool); }; void nv40_therm_intr(struct nvkm_therm *); @@ -110,6 +112,9 @@ void g84_sensor_setup(struct nvkm_therm *); void g84_therm_fini(struct nvkm_therm *); int gt215_therm_fan_sense(struct nvkm_therm *); +void gt215_therm_init(struct nvkm_therm *); + +void gf100_clkgate_engine(struct nvkm_therm *, enum nvkm_devidx, bool); void gf119_therm_init(struct nvkm_therm *); -- 2.9.3
Lyude
2017-Apr-26 23:23 UTC
[Nouveau] [PATCH v3] drm/nouveau: Add support for clockgating on Fermi+
This adds support for enabling automatic clockgating on nvidia GPUs for Fermi and later generations. This saves a little bit of power, bringing my fermi GPU's power consumption from ~28.3W on idle to ~27W, and my kepler's idle power consumption from ~23.6W to ~21.65W. Similar to how the nvidia driver seems to handle this, we enable clockgating for each engine that supports it after it's initialization. Changes since v1: - Move function pointers for clockgating functions out of nvkm_therm, just expose one less complex function to callers: nvkm_therm_clkgate_engine() - Use 0x44 for disabling clockgating instead of just shutting all of nvidia's power management for each gate off, since that's what the nvidia blob does Changes since v2: - Disable clockgating in nvkm_engine_fini, don't enable it! Signed-off-by: Lyude <lyude at redhat.com> --- .../gpu/drm/nouveau/include/nvkm/subdev/therm.h | 2 + drivers/gpu/drm/nouveau/nvkm/core/engine.c | 12 +++- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 14 ++-- drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild | 2 + .../gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c | 32 +++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c | 81 ++++++++++++++++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h | 5 ++ 10 files changed, 143 insertions(+), 9 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h index b268b96..0e2574d 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h @@ -89,11 +89,13 @@ struct nvkm_therm { int nvkm_therm_temp_get(struct nvkm_therm *); int nvkm_therm_fan_sense(struct nvkm_therm *); int nvkm_therm_cstate(struct nvkm_therm *, int, int); +void nvkm_therm_clkgate_engine(struct nvkm_therm *, enum nvkm_devidx, bool); int nv40_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int nv50_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int g84_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gt215_therm_new(struct nvkm_device *, int, struct nvkm_therm **); +int gf100_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gf119_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gm107_therm_new(struct nvkm_device *, int, struct nvkm_therm **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c b/drivers/gpu/drm/nouveau/nvkm/core/engine.c index b6c9169..38b4cd1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c @@ -26,6 +26,7 @@ #include <core/option.h> #include <subdev/fb.h> +#include <subdev/therm.h> bool nvkm_engine_chsw_load(struct nvkm_engine *engine) @@ -86,6 +87,9 @@ static int nvkm_engine_fini(struct nvkm_subdev *subdev, bool suspend) { struct nvkm_engine *engine = nvkm_engine(subdev); + + nvkm_therm_clkgate_engine(subdev->device->therm, subdev->index, false); + if (engine->func->fini) return engine->func->fini(engine, suspend); return 0; @@ -96,12 +100,13 @@ nvkm_engine_init(struct nvkm_subdev *subdev) { struct nvkm_engine *engine = nvkm_engine(subdev); struct nvkm_fb *fb = subdev->device->fb; + struct nvkm_therm *therm = subdev->device->therm; int ret = 0, i; s64 time; if (!engine->usecount) { nvkm_trace(subdev, "init skipped, engine has no users\n"); - return ret; + goto finish; } if (engine->func->oneinit && !engine->subdev.oneinit) { @@ -123,6 +128,11 @@ nvkm_engine_init(struct nvkm_subdev *subdev) for (i = 0; fb && i < fb->tile.regions; i++) nvkm_engine_tile(engine, i); + +finish: + if (!ret) + nvkm_therm_clkgate_engine(therm, subdev->index, true); + return ret; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index b690bc1..d133016 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1355,7 +1355,7 @@ nvc0_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1392,7 +1392,7 @@ nvc1_chipset = { .mxm = nv50_mxm_new, .pci = gf106_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1428,7 +1428,7 @@ nvc3_chipset = { .mxm = nv50_mxm_new, .pci = gf106_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1464,7 +1464,7 @@ nvc4_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1501,7 +1501,7 @@ nvc8_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1538,7 +1538,7 @@ nvce_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1575,7 +1575,7 @@ nvcf_chipset = { .mxm = nv50_mxm_new, .pci = gf106_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild index 135758b..cbb9465 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild @@ -1,4 +1,5 @@ nvkm-y += nvkm/subdev/therm/base.o +nvkm-y += nvkm/subdev/therm/clkgate.o nvkm-y += nvkm/subdev/therm/fan.o nvkm-y += nvkm/subdev/therm/fannil.o nvkm-y += nvkm/subdev/therm/fanpwm.o @@ -9,5 +10,6 @@ nvkm-y += nvkm/subdev/therm/nv40.o nvkm-y += nvkm/subdev/therm/nv50.o nvkm-y += nvkm/subdev/therm/g84.o nvkm-y += nvkm/subdev/therm/gt215.o +nvkm-y += nvkm/subdev/therm/gf100.o nvkm-y += nvkm/subdev/therm/gf119.o nvkm-y += nvkm/subdev/therm/gm107.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c new file mode 100644 index 0000000..48494d0 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c @@ -0,0 +1,32 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Lyude Paul + */ +#include "priv.h" + +void +nvkm_therm_clkgate_engine(struct nvkm_therm *therm, enum nvkm_devidx engine, + bool enable) +{ + if (therm->func->clkgate_engine) + therm->func->clkgate_engine(therm, engine, enable); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c new file mode 100644 index 0000000..c31bd2c --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c @@ -0,0 +1,81 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Lyude Paul + */ +#include <core/device.h> + +#include "priv.h" + +static inline int +gf100_clkgate_engine_offset(enum nvkm_devidx subdev) +{ + switch (subdev) { + case NVKM_ENGINE_GR: return 0x00; + case NVKM_ENGINE_MSPDEC: return 0x04; + case NVKM_ENGINE_MSPPP: return 0x08; + case NVKM_ENGINE_MSVLD: return 0x0c; + case NVKM_ENGINE_CE0: return 0x10; + case NVKM_ENGINE_CE1: return 0x14; + case NVKM_ENGINE_MSENC: return 0x18; + case NVKM_ENGINE_CE2: return 0x1c; + default: return -1; + } +} + +void +gf100_clkgate_engine(struct nvkm_therm *therm, enum nvkm_devidx subdev, + bool enable) +{ + int offset = gf100_clkgate_engine_offset(subdev); + u8 data; + + if (offset == -1) + return; + + if (enable) /* ENG_CLK=auto, BLK_CLK=auto, ENG_PWR=run, BLK_PWR=auto */ + data = 0x45; + else /* ENG_CLK=run, BLK_CLK=auto, ENG_PWR=run, BLK_PWR=auto*/ + data = 0x44; + + nvkm_mask(therm->subdev.device, 0x20200 + offset, 0xff, data); +} + +static const struct nvkm_therm_func +gf100_therm = { + .init = gt215_therm_init, + .fini = g84_therm_fini, + .pwm_ctrl = nv50_fan_pwm_ctrl, + .pwm_get = nv50_fan_pwm_get, + .pwm_set = nv50_fan_pwm_set, + .pwm_clock = nv50_fan_pwm_clock, + .temp_get = g84_temp_get, + .fan_sense = gt215_therm_fan_sense, + .program_alarms = nvkm_therm_program_alarms_polling, + .clkgate_engine = gf100_clkgate_engine, +}; + +int +gf100_therm_new(struct nvkm_device *device, int index, + struct nvkm_therm **ptherm) +{ + return nvkm_therm_new_(&gf100_therm, device, index, ptherm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c index 06dcfd6..568dffa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c @@ -143,6 +143,7 @@ gf119_therm = { .temp_get = g84_temp_get, .fan_sense = gt215_therm_fan_sense, .program_alarms = nvkm_therm_program_alarms_polling, + .clkgate_engine = gf100_clkgate_engine, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c index 86848ec..afc4ff6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c @@ -65,6 +65,7 @@ gm107_therm = { .temp_get = g84_temp_get, .fan_sense = gt215_therm_fan_sense, .program_alarms = nvkm_therm_program_alarms_polling, + .clkgate_engine = gf100_clkgate_engine, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c index c08097f..4caf401 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c @@ -36,7 +36,7 @@ gt215_therm_fan_sense(struct nvkm_therm *therm) return -ENODEV; } -static void +void gt215_therm_init(struct nvkm_therm *therm) { struct nvkm_device *device = therm->subdev.device; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h index 235a5d8..32d9bce 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h @@ -96,6 +96,8 @@ struct nvkm_therm_func { int (*fan_sense)(struct nvkm_therm *); void (*program_alarms)(struct nvkm_therm *); + + void (*clkgate_engine)(struct nvkm_therm *, enum nvkm_devidx, bool); }; void nv40_therm_intr(struct nvkm_therm *); @@ -110,6 +112,9 @@ void g84_sensor_setup(struct nvkm_therm *); void g84_therm_fini(struct nvkm_therm *); int gt215_therm_fan_sense(struct nvkm_therm *); +void gt215_therm_init(struct nvkm_therm *); + +void gf100_clkgate_engine(struct nvkm_therm *, enum nvkm_devidx, bool); void gf119_therm_init(struct nvkm_therm *); -- 2.9.3
Possibly Parallel Threads
- [PATCH] drm/nouveau: Add support for clockgating on Fermi+
- [PATCH] drm/nouveau: Add support for clockgating on Fermi+
- [PATCH] drm/nouveau: Add support for clockgating on Fermi+
- [RFC v3 0/4] Implement full clockgating for Kepler1 and 2
- [RFC 0/4] Implement full clockgating for Kepler1 and 2