Paul Dufresne
2023-Dec-02 20:17 UTC
[Nouveau] How to set debug parameters to get the information I want?
I now have something like: GRUB_CMDLINE_LINUX="pcie_aspm=off nouveau.debug=\"info,PDISP=debug\"" in /etc/default/grub in case you want to know why the pcie_aspm=off it remove the AER corrected errors on PCIE device that my GTX 660 to generate nouveau.debug=info does not give me the information I want, which is the state of the different engines with faulted 1 or faulted 0 after: Apr 10 18:22:59 jenny kernel: nouveau 0000:02:00.0: fifo: SCHED_ERROR 0a [CTXSW_TIMEOUT] to add more useful info in https://gitlab.freedesktop.org/xorg/driver/xf86-video-nouveau/-/issues/339 nouveau.debug=debug produce so much "crap" that when I "dmesg > somefile" somefile does not have all messages from boot, because there is so much unneeded info, that only unneeded info is in the file. unneeded info looks like this: [ 338.951804] nouveau: DRM-master:00000000:8000900b: fini completed in 7us [ 338.951811] nouveau: DRM-master:00000000:8000900b: destroy children... [ 338.951818] nouveau: DRM-master:00000000:8000900b: destroy running... [ 338.951825] nouveau: DRM-master:00000000:8000900b: destroy completed in 6us... [ 338.954303] nouveau: DRM-master:00000000:8000900b: init running... [ 338.954313] nouveau: DRM-master:00000000:8000900b: init children... [ 338.954319] nouveau: DRM-master:00000000:8000900b: init completed in 7us [ 338.954911] nouveau: DRM-master:00000000:8000900b: init running... and appears too often, but less often: [ 339.029792] nouveau 0000:01:00.0: therm: FAN target request: 31% [ 339.029805] nouveau 0000:01:00.0: therm: FAN target: 31 [ 339.029816] nouveau 0000:01:00.0: therm: FAN update: 31 I think, the DRM-master should not be at debug level, but on a more "paranoia" level. Anyway, I need help to build a nouveau.debug= line that would make me achieve my goal... please!
Paul Dufresne
2023-Dec-02 23:45 UTC
[Nouveau] Ré : How to set debug parameters to get the information I want?
Seems I have been able to push:
unneeded info looks like this:
[ 338.951804] nouveau: DRM-master:00000000:8000900b: fini completed in 7us
[ 338.951811] nouveau: DRM-master:00000000:8000900b: destroy children...
[ 338.951818] nouveau: DRM-master:00000000:8000900b: destroy running...
[ 338.951825] nouveau: DRM-master:00000000:8000900b: destroy completed in
6us...
[ 338.954303] nouveau: DRM-master:00000000:8000900b: init running...
to trace level... making nouveau.debug=debug more clear.
I have found the macros that do these messages in
drivers/gpu/drm/nouveau/include/nvkm/core/client.h :
/* logging for client-facing objects */
#define nvif_printk(o,l,p,f,a...) do { \
const struct nvkm_object *_object = (o); \
const struct nvkm_client *_client = _object->client; \
if (_client->debug >= NV_DBG_##l) \
printk(KERN_##p "nouveau: %s:%08x:%08x: "f, _client->name, \
_object->handle, _object->oclass, ##a); \
} while(0)
#define nvif_fatal(o,f,a...) nvif_printk((o), FATAL, CRIT, f, ##a)
#define nvif_error(o,f,a...) nvif_printk((o), ERROR, ERR, f, ##a)
#define nvif_debug(o,f,a...) nvif_printk((o), DEBUG, INFO, f, ##a)
#define nvif_trace(o,f,a...) nvif_printk((o), TRACE, INFO, f, ##a)
#define nvif_info(o,f,a...) nvif_printk((o), INFO, INFO, f, ##a)
#define nvif_ioctl(o,f,a...) nvif_trace((o), "ioctl: "f, ##a)
#endif
So all I done was:
sed -i 's/nvif_debug/nvif_trace/g'
drivers/gpu/drm/nouveau/nvkm/core/object.c
resulting in a "patch":
paul at albukerk:~/linux/linux-6.7-rc3$ diff -u
drivers/gpu/drm/nouveau/nvkm/core/object.c.orig
drivers/gpu/drm/nouveau/nvkm/core/object.c
--- drivers/gpu/drm/nouveau/nvkm/core/object.c.orig 2023-12-02
17:47:01.298989857 -0500
+++ drivers/gpu/drm/nouveau/nvkm/core/object.c 2023-12-02 17:48:32.814997742
-0500
@@ -183,7 +183,7 @@
s64 time;
int ret;
- nvif_debug(object, "%s children...\n", action);
+ nvif_trace(object, "%s children...\n", action);
time = ktime_to_us(ktime_get());
list_for_each_entry_reverse(child, &object->tree, head) {
ret = nvkm_object_fini(child, suspend);
@@ -191,7 +191,7 @@
goto fail_child;
}
- nvif_debug(object, "%s running...\n", action);
+ nvif_trace(object, "%s running...\n", action);
if (object->func->fini) {
ret = object->func->fini(object, suspend);
if (ret) {
@@ -202,7 +202,7 @@
}
time = ktime_to_us(ktime_get()) - time;
- nvif_debug(object, "%s completed in %lldus\n", action, time);
+ nvif_trace(object, "%s completed in %lldus\n", action, time);
return 0;
fail:
@@ -225,7 +225,7 @@
s64 time;
int ret;
- nvif_debug(object, "init running...\n");
+ nvif_trace(object, "init running...\n");
time = ktime_to_us(ktime_get());
if (object->func->init) {
ret = object->func->init(object);
@@ -233,7 +233,7 @@
goto fail;
}
- nvif_debug(object, "init children...\n");
+ nvif_trace(object, "init children...\n");
list_for_each_entry(child, &object->tree, head) {
ret = nvkm_object_init(child);
if (ret)
@@ -241,7 +241,7 @@
}
time = ktime_to_us(ktime_get()) - time;
- nvif_debug(object, "init completed in %lldus\n", time);
+ nvif_trace(object, "init completed in %lldus\n", time);
return 0;
fail_child:
@@ -261,19 +261,19 @@
void *data = object;
s64 time;
- nvif_debug(object, "destroy children...\n");
+ nvif_trace(object, "destroy children...\n");
time = ktime_to_us(ktime_get());
list_for_each_entry_safe(child, ctemp, &object->tree, head) {
nvkm_object_del(&child);
}
- nvif_debug(object, "destroy running...\n");
+ nvif_trace(object, "destroy running...\n");
nvkm_object_unmap(object);
if (object->func->dtor)
data = object->func->dtor(object);
nvkm_engine_unref(&object->engine);
time = ktime_to_us(ktime_get()) - time;
- nvif_debug(object, "destroy completed in %lldus...\n", time);
+ nvif_trace(object, "destroy completed in %lldus...\n", time);
return data;
}
paul at albukerk:~/linux/linux-6.7-rc3$
Now need to work on removing also the:
[ 339.029792] nouveau 0000:01:00.0: therm: FAN target request: 31%
[ 339.029805] nouveau 0000:01:00.0: therm: FAN target: 31
[ 339.029816] nouveau 0000:01:00.0: therm: FAN update: 31