We should simply return errors while the GPU is turned off, because the sensors aren't accessable and setting any kind of value doesn't make any sense. Fixes sensors values reported by "sensors" Before: nouveau-pci-0100 Adapter: PCI adapter GPU core: +0.60 V (min = +0.60 V, max = +1.20 V) temp1: -0.0°C (high = +95.0°C, hyst = +3.0°C) (crit = +105.0°C, hyst = +5.0°C) (emerg = +135.0°C, hyst = +5.0°C) power1: -22.00 uW After: nouveau-pci-0100 Adapter: PCI adapter GPU core: N/A (min = +0.60 V, max = +1.20 V) temp1: N/A (high = +95.0°C, hyst = +3.0°C) (crit = +105.0°C, hyst = +5.0°C) (emerg = +135.0°C, hyst = +5.0°C) power1: N/A Karol Herbst (3): therm: split return code and value in nvkm_get_temp hwmon: properly check for errors subdev/volt/gk104: return error when read fails drm/nouveau/include/nvkm/subdev/therm.h | 2 +- drm/nouveau/nouveau_hwmon.c | 48 ++++++++++++++++++++++++--------- drm/nouveau/nvkm/subdev/therm/base.c | 19 +++++++++---- drm/nouveau/nvkm/subdev/therm/g84.c | 11 ++++---- drm/nouveau/nvkm/subdev/therm/nv40.c | 9 +++---- drm/nouveau/nvkm/subdev/therm/nv50.c | 9 +++---- drm/nouveau/nvkm/subdev/therm/priv.h | 4 +-- drm/nouveau/nvkm/subdev/therm/temp.c | 16 ++++++++--- drm/nouveau/nvkm/subdev/volt/gk104.c | 7 ++++- 9 files changed, 82 insertions(+), 43 deletions(-) -- 2.14.1
Karol Herbst
2017-Sep-02 23:54 UTC
[Nouveau] [PATCH 1/3] therm: split return code and value in nvkm_get_temp
The current hwmon code doesn't check if the returned value was actually an error. Since Kepler temperature sensors are able to report negative values. Since Pascal (and maybe earlier) we have sensors with improved precision. Adjust the nvkm_get_temp method to be able to deal with those changes and let hwmon return an error properly. Signed-off-by: Karol Herbst <karolherbst at gmail.com> --- drm/nouveau/include/nvkm/subdev/therm.h | 2 +- drm/nouveau/nouveau_hwmon.c | 15 +++++++++------ drm/nouveau/nvkm/subdev/therm/base.c | 19 ++++++++++++++----- drm/nouveau/nvkm/subdev/therm/g84.c | 11 ++++++----- drm/nouveau/nvkm/subdev/therm/nv40.c | 9 +++------ drm/nouveau/nvkm/subdev/therm/nv50.c | 9 +++------ drm/nouveau/nvkm/subdev/therm/priv.h | 4 ++-- drm/nouveau/nvkm/subdev/therm/temp.c | 16 ++++++++++++---- 8 files changed, 50 insertions(+), 35 deletions(-) diff --git a/drm/nouveau/include/nvkm/subdev/therm.h b/drm/nouveau/include/nvkm/subdev/therm.h index 1bfd93b8..192169f2 100644 --- a/drm/nouveau/include/nvkm/subdev/therm.h +++ b/drm/nouveau/include/nvkm/subdev/therm.h @@ -86,7 +86,7 @@ struct nvkm_therm { int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int); }; -int nvkm_therm_temp_get(struct nvkm_therm *); +int nvkm_therm_temp_get(struct nvkm_therm *, int *); int nvkm_therm_fan_sense(struct nvkm_therm *); int nvkm_therm_cstate(struct nvkm_therm *, int, int); diff --git a/drm/nouveau/nouveau_hwmon.c b/drm/nouveau/nouveau_hwmon.c index 7c965648..f1914d9a 100644 --- a/drm/nouveau/nouveau_hwmon.c +++ b/drm/nouveau/nouveau_hwmon.c @@ -326,8 +326,9 @@ nouveau_temp_is_visible(const void *data, u32 attr, int channel) { struct nouveau_drm *drm = nouveau_drm((struct drm_device *)data); struct nvkm_therm *therm = nvxx_therm(&drm->client.device); + int val; - if (therm && therm->attr_get && nvkm_therm_temp_get(therm) < 0) + if (therm && therm->attr_get && nvkm_therm_temp_get(therm, &val) < 0) return 0; switch (attr) { @@ -421,15 +422,16 @@ nouveau_temp_read(struct device *dev, u32 attr, int channel, long *val) struct drm_device *drm_dev = dev_get_drvdata(dev); struct nouveau_drm *drm = nouveau_drm(drm_dev); struct nvkm_therm *therm = nvxx_therm(&drm->client.device); - int ret; + int ret = 0; + int temp; if (!therm || !therm->attr_get) return -EOPNOTSUPP; switch (attr) { case hwmon_temp_input: - ret = nvkm_therm_temp_get(therm); - *val = ret < 0 ? ret : (ret * 1000); + ret = nvkm_therm_temp_get(therm, &temp); + *val = temp * 1000; break; case hwmon_temp_max: *val = therm->attr_get(therm, NVKM_THERM_ATTR_THRS_DOWN_CLK) @@ -459,7 +461,7 @@ nouveau_temp_read(struct device *dev, u32 attr, int channel, long *val) return -EOPNOTSUPP; } - return 0; + return ret; } static int @@ -713,6 +715,7 @@ nouveau_hwmon_init(struct drm_device *dev) struct device *hwmon_dev; int ret = 0; int i = 0; + int val; hwmon = drm->hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL); if (!hwmon) @@ -720,7 +723,7 @@ nouveau_hwmon_init(struct drm_device *dev) hwmon->dev = dev; if (therm && therm->attr_get && therm->attr_set) { - if (nvkm_therm_temp_get(therm) >= 0) + if (nvkm_therm_temp_get(therm, &val) >= 0) special_groups[i++] = &temp1_auto_point_sensor_group; if (therm->fan_get && therm->fan_get(therm) >= 0) special_groups[i++] = &pwm_fan_sensor_group; diff --git a/drm/nouveau/nvkm/subdev/therm/base.c b/drm/nouveau/nvkm/subdev/therm/base.c index 952a7cb0..27a03c50 100644 --- a/drm/nouveau/nvkm/subdev/therm/base.c +++ b/drm/nouveau/nvkm/subdev/therm/base.c @@ -24,22 +24,26 @@ #include "priv.h" int -nvkm_therm_temp_get(struct nvkm_therm *therm) +nvkm_therm_temp_get(struct nvkm_therm *therm, int *val) { if (therm->func->temp_get) - return therm->func->temp_get(therm); + return therm->func->temp_get(therm, val); return -ENODEV; } static int nvkm_therm_update_trip(struct nvkm_therm *therm) { + int temp, ret; struct nvbios_therm_trip_point *trip = therm->fan->bios.trip, *cur_trip = NULL, *last_trip = therm->last_trip; - u8 temp = therm->func->temp_get(therm); u16 duty, i; + ret = therm->func->temp_get(therm, &temp); + if (ret < 0) + return ret; + /* look for the trip point corresponding to the current temperature */ cur_trip = NULL; for (i = 0; i < therm->fan->bios.nr_fan_trip; i++) { @@ -67,9 +71,13 @@ static int nvkm_therm_compute_linear_duty(struct nvkm_therm *therm, u8 linear_min_temp, u8 linear_max_temp) { - u8 temp = therm->func->temp_get(therm); + int temp, ret; u16 duty; + ret = therm->func->temp_get(therm, &temp); + if (ret < 0) + return ret; + /* handle the non-linear part first */ if (temp < linear_min_temp) return therm->fan->bios.min_duty; @@ -182,6 +190,7 @@ nvkm_therm_fan_mode(struct nvkm_therm *therm, int mode) { struct nvkm_subdev *subdev = &therm->subdev; struct nvkm_device *device = subdev->device; + int val; static const char *name[] = { "disabled", "manual", @@ -197,7 +206,7 @@ nvkm_therm_fan_mode(struct nvkm_therm *therm, int mode) /* do not allow automatic fan management if the thermal sensor is * not available */ if (mode == NVKM_THERM_CTRL_AUTO && - therm->func->temp_get(therm) < 0) + therm->func->temp_get(therm, &val) < 0) return -EINVAL; if (therm->mode == mode) diff --git a/drm/nouveau/nvkm/subdev/therm/g84.c b/drm/nouveau/nvkm/subdev/therm/g84.c index 96f8da40..3bb2d829 100644 --- a/drm/nouveau/nvkm/subdev/therm/g84.c +++ b/drm/nouveau/nvkm/subdev/therm/g84.c @@ -27,14 +27,15 @@ #include <subdev/fuse.h> int -g84_temp_get(struct nvkm_therm *therm) +g84_temp_get(struct nvkm_therm *therm, int *val) { struct nvkm_device *device = therm->subdev.device; - if (nvkm_fuse_read(device->fuse, 0x1a8) == 1) - return nvkm_rd32(device, 0x20400); - else + if (nvkm_fuse_read(device->fuse, 0x1a8) != 1) return -ENODEV; + + *val = nvkm_rd32(device, 0x20400); + return 0; } void @@ -115,7 +116,7 @@ g84_therm_threshold_hyst_emulation(struct nvkm_therm *therm, } /* fix the state (in case someone reprogrammed the alarms) */ - cur = therm->func->temp_get(therm); + therm->func->temp_get(therm, &cur); if (new_state == NVKM_THERM_THRS_LOWER && cur > thrs->temp) new_state = NVKM_THERM_THRS_HIGHER; else if (new_state == NVKM_THERM_THRS_HIGHER && diff --git a/drm/nouveau/nvkm/subdev/therm/nv40.c b/drm/nouveau/nvkm/subdev/therm/nv40.c index 2c92ffb5..cfd5b215 100644 --- a/drm/nouveau/nvkm/subdev/therm/nv40.c +++ b/drm/nouveau/nvkm/subdev/therm/nv40.c @@ -70,7 +70,7 @@ nv40_sensor_setup(struct nvkm_therm *therm) } static int -nv40_temp_get(struct nvkm_therm *therm) +nv40_temp_get(struct nvkm_therm *therm, int *val) { struct nvkm_device *device = therm->subdev.device; struct nvbios_therm_sensor *sensor = &therm->bios_sensor; @@ -95,11 +95,8 @@ nv40_temp_get(struct nvkm_therm *therm) core_temp = core_temp + sensor->offset_num / sensor->offset_den; core_temp = core_temp + sensor->offset_constant - 8; - /* reserve negative temperatures for errors */ - if (core_temp < 0) - core_temp = 0; - - return core_temp; + *val = core_temp; + return 0; } static int diff --git a/drm/nouveau/nvkm/subdev/therm/nv50.c b/drm/nouveau/nvkm/subdev/therm/nv50.c index 9b57b433..62ec4063 100644 --- a/drm/nouveau/nvkm/subdev/therm/nv50.c +++ b/drm/nouveau/nvkm/subdev/therm/nv50.c @@ -126,7 +126,7 @@ nv50_sensor_setup(struct nvkm_therm *therm) } static int -nv50_temp_get(struct nvkm_therm *therm) +nv50_temp_get(struct nvkm_therm *therm, int *val) { struct nvkm_device *device = therm->subdev.device; struct nvbios_therm_sensor *sensor = &therm->bios_sensor; @@ -143,11 +143,8 @@ nv50_temp_get(struct nvkm_therm *therm) core_temp = core_temp + sensor->offset_num / sensor->offset_den; core_temp = core_temp + sensor->offset_constant - 8; - /* reserve negative temperatures for errors */ - if (core_temp < 0) - core_temp = 0; - - return core_temp; + *val = core_temp; + return 0; } static void diff --git a/drm/nouveau/nvkm/subdev/therm/priv.h b/drm/nouveau/nvkm/subdev/therm/priv.h index 1f46e371..b325ec5f 100644 --- a/drm/nouveau/nvkm/subdev/therm/priv.h +++ b/drm/nouveau/nvkm/subdev/therm/priv.h @@ -91,7 +91,7 @@ struct nvkm_therm_func { int (*pwm_set)(struct nvkm_therm *, int line, u32, u32); int (*pwm_clock)(struct nvkm_therm *, int line); - int (*temp_get)(struct nvkm_therm *); + int (*temp_get)(struct nvkm_therm *, int *); int (*fan_sense)(struct nvkm_therm *); @@ -105,7 +105,7 @@ int nv50_fan_pwm_get(struct nvkm_therm *, int, u32 *, u32 *); int nv50_fan_pwm_set(struct nvkm_therm *, int, u32, u32); int nv50_fan_pwm_clock(struct nvkm_therm *, int); -int g84_temp_get(struct nvkm_therm *); +int g84_temp_get(struct nvkm_therm *, int *); void g84_sensor_setup(struct nvkm_therm *); void g84_therm_fini(struct nvkm_therm *); diff --git a/drm/nouveau/nvkm/subdev/therm/temp.c b/drm/nouveau/nvkm/subdev/therm/temp.c index ddb2b2c6..5ec2dfb3 100644 --- a/drm/nouveau/nvkm/subdev/therm/temp.c +++ b/drm/nouveau/nvkm/subdev/therm/temp.c @@ -86,7 +86,10 @@ nvkm_therm_sensor_event(struct nvkm_therm *therm, enum nvkm_therm_thrs thrs, static const char * const thresholds[] = { "fanboost", "downclock", "critical", "shutdown" }; - int temperature = therm->func->temp_get(therm); + int temperature; + + if (therm->func->temp_get(therm, &temperature) < 0) + return; if (thrs < 0 || thrs > 3) return; @@ -140,7 +143,10 @@ nvkm_therm_threshold_hyst_polling(struct nvkm_therm *therm, { enum nvkm_therm_thrs_direction direction; enum nvkm_therm_thrs_state prev_state, new_state; - int temp = therm->func->temp_get(therm); + int temp; + + if (therm->func->temp_get(therm, &temp) < 0) + return; prev_state = nvkm_therm_sensor_get_threshold_state(therm, thrs_name); @@ -166,6 +172,7 @@ alarm_timer_callback(struct nvkm_alarm *alarm) struct nvbios_therm_sensor *sensor = &therm->bios_sensor; struct nvkm_timer *tmr = therm->subdev.device->timer; unsigned long flags; + int val; spin_lock_irqsave(&therm->sensor.alarm_program_lock, flags); @@ -185,7 +192,7 @@ alarm_timer_callback(struct nvkm_alarm *alarm) spin_unlock_irqrestore(&therm->sensor.alarm_program_lock, flags); /* schedule the next poll in one second */ - if (therm->func->temp_get(therm) >= 0) + if (therm->func->temp_get(therm, &val) >= 0) nvkm_timer_alarm(tmr, 1000000000ULL, alarm); } @@ -227,9 +234,10 @@ nvkm_therm_sensor_fini(struct nvkm_therm *therm, bool suspend) void nvkm_therm_sensor_preinit(struct nvkm_therm *therm) { + int val; const char *sensor_avail = "yes"; - if (therm->func->temp_get(therm) < 0) + if (therm->func->temp_get(therm, &val) < 0) sensor_avail = "no"; nvkm_debug(&therm->subdev, "internal sensor: %s\n", sensor_avail); -- 2.14.1
Otherwise hwmon interprets error codes as real values. Signed-off-by: Karol Herbst <karolherbst at gmail.com> --- drm/nouveau/nouveau_hwmon.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/drm/nouveau/nouveau_hwmon.c b/drm/nouveau/nouveau_hwmon.c index f1914d9a..0d75c14d 100644 --- a/drm/nouveau/nouveau_hwmon.c +++ b/drm/nouveau/nouveau_hwmon.c @@ -470,18 +470,23 @@ nouveau_fan_read(struct device *dev, u32 attr, int channel, long *val) struct drm_device *drm_dev = dev_get_drvdata(dev); struct nouveau_drm *drm = nouveau_drm(drm_dev); struct nvkm_therm *therm = nvxx_therm(&drm->client.device); + int ret; if (!therm) return -EOPNOTSUPP; switch (attr) { case hwmon_fan_input: - *val = nvkm_therm_fan_sense(therm); + ret = nvkm_therm_fan_sense(therm); break; default: return -EOPNOTSUPP; } + if (ret < 0) + return ret; + + *val = ret; return 0; } @@ -491,7 +496,7 @@ nouveau_in_read(struct device *dev, u32 attr, int channel, long *val) struct drm_device *drm_dev = dev_get_drvdata(dev); struct nouveau_drm *drm = nouveau_drm(drm_dev); struct nvkm_volt *volt = nvxx_volt(&drm->client.device); - int ret; + int ret = 0; if (!volt) return -EOPNOTSUPP; @@ -499,7 +504,8 @@ nouveau_in_read(struct device *dev, u32 attr, int channel, long *val) switch (attr) { case hwmon_in_input: ret = nvkm_volt_get(volt); - *val = ret < 0 ? ret : (ret / 1000); + if (ret >= 0) + *val = ret / 1000; break; case hwmon_in_min: *val = volt->min_uv > 0 ? (volt->min_uv / 1000) : -ENODEV; @@ -511,7 +517,7 @@ nouveau_in_read(struct device *dev, u32 attr, int channel, long *val) return -EOPNOTSUPP; } - return 0; + return ret; } static int @@ -520,21 +526,26 @@ nouveau_pwm_read(struct device *dev, u32 attr, int channel, long *val) struct drm_device *drm_dev = dev_get_drvdata(dev); struct nouveau_drm *drm = nouveau_drm(drm_dev); struct nvkm_therm *therm = nvxx_therm(&drm->client.device); + int ret; if (!therm || !therm->attr_get || !therm->fan_get) return -EOPNOTSUPP; switch (attr) { case hwmon_pwm_enable: - *val = therm->attr_get(therm, NVKM_THERM_ATTR_FAN_MODE); + ret = therm->attr_get(therm, NVKM_THERM_ATTR_FAN_MODE); break; case hwmon_pwm_input: - *val = therm->fan_get(therm); + ret = therm->fan_get(therm); break; default: return -EOPNOTSUPP; } + if (ret < 0) + return ret; + + *val = ret; return 0; } @@ -544,18 +555,26 @@ nouveau_power_read(struct device *dev, u32 attr, int channel, long *val) struct drm_device *drm_dev = dev_get_drvdata(dev); struct nouveau_drm *drm = nouveau_drm(drm_dev); struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->client.device); + int ret; if (!iccsense) return -EOPNOTSUPP; switch (attr) { case hwmon_power_input: - *val = nvkm_iccsense_read_all(iccsense); + ret = nvkm_iccsense_read_all(iccsense); + if (ret < 0) + return ret; + *val = ret; break; case hwmon_power_max: + if (iccsense->power_w_max <= 0) + return -ENODEV; *val = iccsense->power_w_max; break; case hwmon_power_crit: + if (iccsense->power_w_crit <= 0) + return -ENODEV; *val = iccsense->power_w_crit; break; default: -- 2.14.1
Karol Herbst
2017-Sep-02 23:54 UTC
[Nouveau] [PATCH 3/3] subdev/volt/gk104: return error when read fails
While my gpu was powered off, hwmon returned 0.6V as the current voltage. If nvkm_rd32 fails for any reason, return the error. With that sensors will display a "N/A" instead of 0.6V. Signed-off-by: Karol Herbst <karolherbst at gmail.com> --- drm/nouveau/nvkm/subdev/volt/gk104.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drm/nouveau/nvkm/subdev/volt/gk104.c b/drm/nouveau/nvkm/subdev/volt/gk104.c index 1c744e02..53a7af9d 100644 --- a/drm/nouveau/nvkm/subdev/volt/gk104.c +++ b/drm/nouveau/nvkm/subdev/volt/gk104.c @@ -40,10 +40,15 @@ gk104_volt_get(struct nvkm_volt *base) { struct nvbios_volt *bios = &gk104_volt(base)->bios; struct nvkm_device *device = base->subdev.device; - u32 div, duty; + int div, duty; div = nvkm_rd32(device, 0x20340); + if (div < 0) + return div; + duty = nvkm_rd32(device, 0x20344); + if (duty < 0) + return duty; return bios->base + bios->pwm_range * duty / div; } -- 2.14.1
Reasonably Related Threads
- [PATCH v2 1/2] drm/nouveau: return temperatures in temp_get() via parameter
- [PATCH v2 1/2] drm/nouveau: return temperatures in temp_get() via parameter
- [PATCH] drm/nouveau: Add fine-grain temperature reporting
- [PATCH 03/32] therm: Split return code and value in nvkm_get_temp
- [RFC PATCH 01/29] therm: split return code and value in nvkm_get_temp