Peter Wu
2016-May-24 22:52 UTC
[Nouveau] [PATCH 0/4] nouveau fixes for RPM/Optimus-related hangs
Hi, Here are two patches to fix an issue reported on kernel bugzilla (infinite loop due to unchecked function) and a more important fix to fix hanging Optimus machines when runtime PM is enabled (with pm/pci patches). An older (obsolete) patch for the first issue was tested by the reporter: https://bugzilla.kernel.org/show_bug.cgi?id=104791#c11 (it is replaced by "check for function 0x1B before using it"). The second issue will occur when: - A modern Optimus laptop is in use (designed for Windows 8 or newer). - nouveau runtime PM is enabled (1 or the default -1). - The patch "PCI: Add runtime PM support for PCIe ports" from Mika is pulled into v4.7 (or v4.8[1]?) via the pci/pm branch, https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/commit/?h=pci/pm&id=8b71f5652eeac561acf883da01ab4810f763ee42 (see also the discussion for "[PATCH] PCI: Power on bridges before scanning new devices" at http://article.gmane.org/gmane.linux.power-management.general/76411) The first two patches are just refactoring to reduce code duplication (and scratch an itch) and make the following patches possible. The next two patches fix the problems reported above. I intend to get these patches in 4.7 (or the first version where pci/pm gets merged) to avoid a lockup when runpm is enabled. Note: - If the fourth patch is merged before/without Mika's PCIe port patch, then those modern Optimus machines above will not be put into D3cold. - If the fourth patch is not merged (or merged after Mika's patch), then under the above conditions the affected machine can lock up. - The three other patches are unrelated to this issue and can safely be merged. Tested with: - Linux v4.6 + pci/pm + these four patches - Hardware: Clevo P651RA with acpi_osi="!Windows 2015" (the latter is a workaround for another PCIe issue). - Card is asleep, woke up with lspci, waited a bit and retried/suspended: - # lspci -xxxxnnvvvv >/dev/null; sleep 5 - # lspci -xxxxnnvvvv >/dev/null; sleep 5; systemctl suspend - # lspci -xxxxnnvvvv >/dev/null; systemctl suspend Kind regards, Peter [1]: https://lkml.kernel.org/r/20160524211309.GH1789 at lahna.fi.intel.com Peter Wu (4): drm/nouveau/acpi: ensure matching ACPI handle and supported functions drm/nouveau/acpi: return supported DSM functions drm/nouveau/acpi: check for function 0x1B before using it drm/nouveau/acpi: fix lockup with PCIe runtime PM drivers/gpu/drm/nouveau/nouveau_acpi.c | 100 +++++++++++++++++++++------------ 1 file changed, 63 insertions(+), 37 deletions(-) -- 2.8.2
Peter Wu
2016-May-24 22:52 UTC
[Nouveau] [PATCH 1/4] drm/nouveau/acpi: ensure matching ACPI handle and supported functions
Ensure that the returned set of supported DSM functions (MUX, Optimus)
match the ACPI handle that is set in nouveau_dsm_pci_probe.
As there are no machines with a MUX function on just one PCI device and
an Optimus on another, there should not be a functional impact. This
change however makes this implicit assumption more obvious.
Convert int to bool and rename has_dsm to has_mux while at it.
Signed-off-by: Peter Wu <peter at lekensteyn.nl>
---
drivers/gpu/drm/nouveau/nouveau_acpi.c | 55 ++++++++++++++--------------------
1 file changed, 23 insertions(+), 32 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c
b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index cdf5227..45fa9b2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -57,9 +57,6 @@ bool nouveau_is_v1_dsm(void) {
return nouveau_dsm_priv.dsm_detected;
}
-#define NOUVEAU_DSM_HAS_MUX 0x1
-#define NOUVEAU_DSM_HAS_OPT 0x2
-
#ifdef CONFIG_VGA_SWITCHEROO
static const char nouveau_dsm_muid[] = {
0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D,
@@ -212,26 +209,33 @@ static const struct vga_switcheroo_handler
nouveau_dsm_handler = {
.get_client_id = nouveau_dsm_get_client_id,
};
-static int nouveau_dsm_pci_probe(struct pci_dev *pdev)
+static void nouveau_dsm_pci_probe(struct pci_dev *pdev, bool *has_mux,
+ bool *has_opt)
{
acpi_handle dhandle;
- int retval = 0;
+ bool supports_mux;
+ bool supports_opt;
dhandle = ACPI_HANDLE(&pdev->dev);
if (!dhandle)
- return false;
+ return;
if (!acpi_has_method(dhandle, "_DSM"))
- return false;
+ return;
+
+ supports_mux = acpi_check_dsm(dhandle, nouveau_dsm_muid, 0x00000102,
+ 1 << NOUVEAU_DSM_POWER);
+ supports_opt = nouveau_check_optimus_dsm(dhandle);
- if (acpi_check_dsm(dhandle, nouveau_dsm_muid, 0x00000102,
- 1 << NOUVEAU_DSM_POWER))
- retval |= NOUVEAU_DSM_HAS_MUX;
+ /* Does not look like a Nvidia device. */
+ if (!supports_mux && !supports_opt)
+ return;
- if (nouveau_check_optimus_dsm(dhandle))
- retval |= NOUVEAU_DSM_HAS_OPT;
+ nouveau_dsm_priv.dhandle = dhandle;
+ *has_mux = supports_mux;
+ *has_opt = supports_opt;
- if (retval & NOUVEAU_DSM_HAS_OPT) {
+ if (supports_opt) {
uint32_t result;
nouveau_optimus_dsm(dhandle, NOUVEAU_DSM_OPTIMUS_CAPS, 0,
&result);
@@ -240,10 +244,6 @@ static int nouveau_dsm_pci_probe(struct pci_dev *pdev)
(result & OPTIMUS_DYNAMIC_PWR_CAP) ? "dynamic power, " :
"",
(result & OPTIMUS_HDA_CODEC_MASK) ? "hda bios codec
supported" : "");
}
- if (retval)
- nouveau_dsm_priv.dhandle = dhandle;
-
- return retval;
}
static bool nouveau_dsm_detect(void)
@@ -251,11 +251,10 @@ static bool nouveau_dsm_detect(void)
char acpi_method_name[255] = { 0 };
struct acpi_buffer buffer = {sizeof(acpi_method_name), acpi_method_name};
struct pci_dev *pdev = NULL;
- int has_dsm = 0;
- int has_optimus = 0;
+ bool has_mux = false;
+ bool has_optimus = false;
int vga_count = 0;
bool guid_valid;
- int retval;
bool ret = false;
/* lookup the MXM GUID */
@@ -268,32 +267,24 @@ static bool nouveau_dsm_detect(void)
while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL)
{
vga_count++;
- retval = nouveau_dsm_pci_probe(pdev);
- if (retval & NOUVEAU_DSM_HAS_MUX)
- has_dsm |= 1;
- if (retval & NOUVEAU_DSM_HAS_OPT)
- has_optimus = 1;
+ nouveau_dsm_pci_probe(pdev, &has_mux, &has_optimus);
}
while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_3D << 8, pdev)) != NULL)
{
vga_count++;
- retval = nouveau_dsm_pci_probe(pdev);
- if (retval & NOUVEAU_DSM_HAS_MUX)
- has_dsm |= 1;
- if (retval & NOUVEAU_DSM_HAS_OPT)
- has_optimus = 1;
+ nouveau_dsm_pci_probe(pdev, &has_mux, &has_optimus);
}
/* find the optimus DSM or the old v1 DSM */
- if (has_optimus == 1) {
+ if (has_optimus) {
acpi_get_name(nouveau_dsm_priv.dhandle, ACPI_FULL_PATHNAME,
&buffer);
printk(KERN_INFO "VGA switcheroo: detected Optimus DSM method %s
handle\n",
acpi_method_name);
nouveau_dsm_priv.optimus_detected = true;
ret = true;
- } else if (vga_count == 2 && has_dsm && guid_valid) {
+ } else if (vga_count == 2 && has_mux && guid_valid) {
acpi_get_name(nouveau_dsm_priv.dhandle, ACPI_FULL_PATHNAME,
&buffer);
printk(KERN_INFO "VGA switcheroo: detected DSM switching method %s
handle\n",
--
2.8.2
Peter Wu
2016-May-24 22:52 UTC
[Nouveau] [PATCH 2/4] drm/nouveau/acpi: return supported DSM functions
Return the set of supported functions to the caller. No functional
changes.
Signed-off-by: Peter Wu <peter at lekensteyn.nl>
---
drivers/gpu/drm/nouveau/nouveau_acpi.c | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c
b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 45fa9b2..71d5e6a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -107,7 +107,7 @@ static int nouveau_optimus_dsm(acpi_handle handle, int func,
int arg, uint32_t *
* requirements on the fourth parameter, so a private implementation
* instead of using acpi_check_dsm().
*/
-static int nouveau_check_optimus_dsm(acpi_handle handle)
+static int nouveau_dsm_get_optimus_functions(acpi_handle handle)
{
int result;
@@ -122,7 +122,9 @@ static int nouveau_check_optimus_dsm(acpi_handle handle)
* ACPI Spec v4 9.14.1: if bit 0 is zero, no function is supported.
* If the n-th bit is enabled, function n is supported
*/
- return result & 1 && result & (1 <<
NOUVEAU_DSM_OPTIMUS_CAPS);
+ if (result & 1 && result & (1 <<
NOUVEAU_DSM_OPTIMUS_CAPS))
+ return result;
+ return 0;
}
static int nouveau_dsm(acpi_handle handle, int func, int arg)
@@ -214,7 +216,7 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, bool
*has_mux,
{
acpi_handle dhandle;
bool supports_mux;
- bool supports_opt;
+ int optimus_funcs;
dhandle = ACPI_HANDLE(&pdev->dev);
if (!dhandle)
@@ -225,17 +227,17 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev,
bool *has_mux,
supports_mux = acpi_check_dsm(dhandle, nouveau_dsm_muid, 0x00000102,
1 << NOUVEAU_DSM_POWER);
- supports_opt = nouveau_check_optimus_dsm(dhandle);
+ optimus_funcs = nouveau_dsm_get_optimus_functions(dhandle);
/* Does not look like a Nvidia device. */
- if (!supports_mux && !supports_opt)
+ if (!supports_mux && !optimus_funcs)
return;
nouveau_dsm_priv.dhandle = dhandle;
*has_mux = supports_mux;
- *has_opt = supports_opt;
+ *has_opt = !!optimus_funcs;
- if (supports_opt) {
+ if (optimus_funcs) {
uint32_t result;
nouveau_optimus_dsm(dhandle, NOUVEAU_DSM_OPTIMUS_CAPS, 0,
&result);
--
2.8.2
Peter Wu
2016-May-24 22:53 UTC
[Nouveau] [PATCH 3/4] drm/nouveau/acpi: check for function 0x1B before using it
Do not unconditionally invoke function 0x1B without checking for its
availability, it leads to an infinite loop on some firmware.
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=104791
Fixes: 5addcf0a5f0fad ("nouveau: add runtime PM support (v0.9)")
Signed-off-by: Peter Wu <peter at lekensteyn.nl>
---
drivers/gpu/drm/nouveau/nouveau_acpi.c | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c
b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 71d5e6a..df9f73e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -45,6 +45,7 @@
static struct nouveau_dsm_priv {
bool dsm_detected;
bool optimus_detected;
+ bool optimus_flags_detected;
acpi_handle dhandle;
acpi_handle rom_handle;
} nouveau_dsm_priv;
@@ -212,7 +213,7 @@ static const struct vga_switcheroo_handler
nouveau_dsm_handler = {
};
static void nouveau_dsm_pci_probe(struct pci_dev *pdev, bool *has_mux,
- bool *has_opt)
+ bool *has_opt, bool *has_opt_flags)
{
acpi_handle dhandle;
bool supports_mux;
@@ -236,6 +237,7 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, bool
*has_mux,
nouveau_dsm_priv.dhandle = dhandle;
*has_mux = supports_mux;
*has_opt = !!optimus_funcs;
+ *has_opt_flags = optimus_funcs & (1 << NOUVEAU_DSM_OPTIMUS_FLAGS);
if (optimus_funcs) {
uint32_t result;
@@ -255,6 +257,7 @@ static bool nouveau_dsm_detect(void)
struct pci_dev *pdev = NULL;
bool has_mux = false;
bool has_optimus = false;
+ bool has_optimus_flags = false;
int vga_count = 0;
bool guid_valid;
bool ret = false;
@@ -269,13 +272,15 @@ static bool nouveau_dsm_detect(void)
while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL)
{
vga_count++;
- nouveau_dsm_pci_probe(pdev, &has_mux, &has_optimus);
+ nouveau_dsm_pci_probe(pdev, &has_mux, &has_optimus,
+ &has_optimus_flags);
}
while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_3D << 8, pdev)) != NULL)
{
vga_count++;
- nouveau_dsm_pci_probe(pdev, &has_mux, &has_optimus);
+ nouveau_dsm_pci_probe(pdev, &has_mux, &has_optimus,
+ &has_optimus_flags);
}
/* find the optimus DSM or the old v1 DSM */
@@ -285,6 +290,7 @@ static bool nouveau_dsm_detect(void)
printk(KERN_INFO "VGA switcheroo: detected Optimus DSM method %s
handle\n",
acpi_method_name);
nouveau_dsm_priv.optimus_detected = true;
+ nouveau_dsm_priv.optimus_flags_detected = has_optimus_flags;
ret = true;
} else if (vga_count == 2 && has_mux && guid_valid) {
acpi_get_name(nouveau_dsm_priv.dhandle, ACPI_FULL_PATHNAME,
@@ -317,8 +323,9 @@ void nouveau_switcheroo_optimus_dsm(void)
if (!nouveau_dsm_priv.optimus_detected)
return;
- nouveau_optimus_dsm(nouveau_dsm_priv.dhandle, NOUVEAU_DSM_OPTIMUS_FLAGS,
- 0x3, &result);
+ if (nouveau_dsm_priv.optimus_flags_detected)
+ nouveau_optimus_dsm(nouveau_dsm_priv.dhandle, NOUVEAU_DSM_OPTIMUS_FLAGS,
+ 0x3, &result);
nouveau_optimus_dsm(nouveau_dsm_priv.dhandle, NOUVEAU_DSM_OPTIMUS_CAPS,
NOUVEAU_DSM_OPTIMUS_SET_POWERDOWN, &result);
--
2.8.2
Peter Wu
2016-May-24 22:53 UTC
[Nouveau] [PATCH 4/4] drm/nouveau/acpi: fix lockup with PCIe runtime PM
Since "PCI: Add runtime PM support for PCIe ports", the parent PCIe
port
can be runtime-suspended which disables power resources via ACPI. This
is incompatible with DSM, resulting in a GPU device which is still in D3
and locks up the kernel on resume.
Mirror the behavior of Windows 8 and newer[1] (as observed via an AMLi
debugger trace) and stop using the DSM functions for D3cold when power
resources are available on the parent PCIe port.
[1]:
https://msdn.microsoft.com/windows/hardware/drivers/bringup/firmware-requirements-for-d3cold
Signed-off-by: Peter Wu <peter at lekensteyn.nl>
---
drivers/gpu/drm/nouveau/nouveau_acpi.c | 34 ++++++++++++++++++++++++++++++----
1 file changed, 30 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c
b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index df9f73e..e469df7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -46,6 +46,7 @@ static struct nouveau_dsm_priv {
bool dsm_detected;
bool optimus_detected;
bool optimus_flags_detected;
+ bool optimus_skip_dsm;
acpi_handle dhandle;
acpi_handle rom_handle;
} nouveau_dsm_priv;
@@ -212,8 +213,26 @@ static const struct vga_switcheroo_handler
nouveau_dsm_handler = {
.get_client_id = nouveau_dsm_get_client_id,
};
+/* Firmware supporting Windows 8 or later do not use _DSM to put the device
into
+ * D3cold, they instead rely on disabling power resources on the parent. */
+static bool nouveau_pr3_present(struct pci_dev *pdev)
+{
+ struct pci_dev *parent_pdev = pci_upstream_bridge(pdev);
+ struct acpi_device *ad;
+
+ if (!parent_pdev)
+ return false;
+
+ ad = ACPI_COMPANION(&parent_pdev->dev);
+ if (!ad)
+ return false;
+
+ return ad->power.flags.power_resources;
+}
+
static void nouveau_dsm_pci_probe(struct pci_dev *pdev, bool *has_mux,
- bool *has_opt, bool *has_opt_flags)
+ bool *has_opt, bool *has_opt_flags,
+ bool *has_power_resources)
{
acpi_handle dhandle;
bool supports_mux;
@@ -238,6 +257,7 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, bool
*has_mux,
*has_mux = supports_mux;
*has_opt = !!optimus_funcs;
*has_opt_flags = optimus_funcs & (1 << NOUVEAU_DSM_OPTIMUS_FLAGS);
+ *has_power_resources = false;
if (optimus_funcs) {
uint32_t result;
@@ -247,6 +267,8 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, bool
*has_mux,
(result & OPTIMUS_ENABLED) ? "enabled" :
"disabled",
(result & OPTIMUS_DYNAMIC_PWR_CAP) ? "dynamic power, " :
"",
(result & OPTIMUS_HDA_CODEC_MASK) ? "hda bios codec
supported" : "");
+
+ *has_power_resources = nouveau_pr3_present(pdev);
}
}
@@ -258,6 +280,7 @@ static bool nouveau_dsm_detect(void)
bool has_mux = false;
bool has_optimus = false;
bool has_optimus_flags = false;
+ bool has_power_resources = false;
int vga_count = 0;
bool guid_valid;
bool ret = false;
@@ -273,14 +296,14 @@ static bool nouveau_dsm_detect(void)
vga_count++;
nouveau_dsm_pci_probe(pdev, &has_mux, &has_optimus,
- &has_optimus_flags);
+ &has_optimus_flags, &has_power_resources);
}
while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_3D << 8, pdev)) != NULL)
{
vga_count++;
nouveau_dsm_pci_probe(pdev, &has_mux, &has_optimus,
- &has_optimus_flags);
+ &has_optimus_flags, &has_power_resources);
}
/* find the optimus DSM or the old v1 DSM */
@@ -289,8 +312,11 @@ static bool nouveau_dsm_detect(void)
&buffer);
printk(KERN_INFO "VGA switcheroo: detected Optimus DSM method %s
handle\n",
acpi_method_name);
+ if (has_power_resources)
+ pr_info("nouveau: detected PR support, will not use DSM\n");
nouveau_dsm_priv.optimus_detected = true;
nouveau_dsm_priv.optimus_flags_detected = has_optimus_flags;
+ nouveau_dsm_priv.optimus_skip_dsm = has_power_resources;
ret = true;
} else if (vga_count == 2 && has_mux && guid_valid) {
acpi_get_name(nouveau_dsm_priv.dhandle, ACPI_FULL_PATHNAME,
@@ -320,7 +346,7 @@ void nouveau_register_dsm_handler(void)
void nouveau_switcheroo_optimus_dsm(void)
{
u32 result = 0;
- if (!nouveau_dsm_priv.optimus_detected)
+ if (!nouveau_dsm_priv.optimus_detected || nouveau_dsm_priv.optimus_skip_dsm)
return;
if (nouveau_dsm_priv.optimus_flags_detected)
--
2.8.2
Hans de Goede
2016-May-25 09:08 UTC
[Nouveau] [PATCH 0/4] nouveau fixes for RPM/Optimus-related hangs
Hi, On 25-05-16 00:52, Peter Wu wrote:> Hi, > > Here are two patches to fix an issue reported on kernel bugzilla (infinite loop > due to unchecked function) and a more important fix to fix hanging Optimus > machines when runtime PM is enabled (with pm/pci patches). > > An older (obsolete) patch for the first issue was tested by the reporter: > https://bugzilla.kernel.org/show_bug.cgi?id=104791#c11 > (it is replaced by "check for function 0x1B before using it"). > > The second issue will occur when: > - A modern Optimus laptop is in use (designed for Windows 8 or newer). > - nouveau runtime PM is enabled (1 or the default -1). > - The patch "PCI: Add runtime PM support for PCIe ports" from Mika is pulled > into v4.7 (or v4.8[1]?) via the pci/pm branch, > https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/commit/?h=pci/pm&id=8b71f5652eeac561acf883da01ab4810f763ee42 > (see also the discussion for "[PATCH] PCI: Power on bridges before scanning new > devices" at http://article.gmane.org/gmane.linux.power-management.general/76411) > > The first two patches are just refactoring to reduce code duplication (and > scratch an itch) and make the following patches possible. The next two patches > fix the problems reported above. > > I intend to get these patches in 4.7 (or the first version where pci/pm gets > merged) to avoid a lockup when runpm is enabled. Note: > - If the fourth patch is merged before/without Mika's PCIe port patch, then > those modern Optimus machines above will not be put into D3cold. > - If the fourth patch is not merged (or merged after Mika's patch), then under > the above conditions the affected machine can lock up. > - The three other patches are unrelated to this issue and can safely be merged. > > Tested with: > - Linux v4.6 + pci/pm + these four patches > - Hardware: Clevo P651RA with acpi_osi="!Windows 2015" (the latter is a > workaround for another PCIe issue). > - Card is asleep, woke up with lspci, waited a bit and retried/suspended: > - # lspci -xxxxnnvvvv >/dev/null; sleep 5 > - # lspci -xxxxnnvvvv >/dev/null; sleep 5; systemctl suspend > - # lspci -xxxxnnvvvv >/dev/null; systemctl suspendSeries looks good to me: Reviewed-by: Hans de Goede <hdegoede at redhat.com> Regards, Hans> > Kind regards, > Peter > > [1]: https://lkml.kernel.org/r/20160524211309.GH1789 at lahna.fi.intel.com > > Peter Wu (4): > drm/nouveau/acpi: ensure matching ACPI handle and supported functions > drm/nouveau/acpi: return supported DSM functions > drm/nouveau/acpi: check for function 0x1B before using it > drm/nouveau/acpi: fix lockup with PCIe runtime PM > > drivers/gpu/drm/nouveau/nouveau_acpi.c | 100 +++++++++++++++++++++------------ > 1 file changed, 63 insertions(+), 37 deletions(-) >
Mika Westerberg
2016-May-25 13:55 UTC
[Nouveau] [PATCH 4/4] drm/nouveau/acpi: fix lockup with PCIe runtime PM
On Wed, May 25, 2016 at 12:53:01AM +0200, Peter Wu wrote:> Since "PCI: Add runtime PM support for PCIe ports", the parent PCIe port > can be runtime-suspended which disables power resources via ACPI. This > is incompatible with DSM, resulting in a GPU device which is still in D3 > and locks up the kernel on resume. > > Mirror the behavior of Windows 8 and newer[1] (as observed via an AMLi > debugger trace) and stop using the DSM functions for D3cold when power > resources are available on the parent PCIe port. > > [1]: https://msdn.microsoft.com/windows/hardware/drivers/bringup/firmware-requirements-for-d3cold > > Signed-off-by: Peter Wu <peter at lekensteyn.nl> > --- > drivers/gpu/drm/nouveau/nouveau_acpi.c | 34 ++++++++++++++++++++++++++++++---- > 1 file changed, 30 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c > index df9f73e..e469df7 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c > +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c > @@ -46,6 +46,7 @@ static struct nouveau_dsm_priv { > bool dsm_detected; > bool optimus_detected; > bool optimus_flags_detected; > + bool optimus_skip_dsm; > acpi_handle dhandle; > acpi_handle rom_handle; > } nouveau_dsm_priv; > @@ -212,8 +213,26 @@ static const struct vga_switcheroo_handler nouveau_dsm_handler = { > .get_client_id = nouveau_dsm_get_client_id, > }; > > +/* Firmware supporting Windows 8 or later do not use _DSM to put the device into > + * D3cold, they instead rely on disabling power resources on the parent. */ > +static bool nouveau_pr3_present(struct pci_dev *pdev) > +{ > + struct pci_dev *parent_pdev = pci_upstream_bridge(pdev); > + struct acpi_device *ad;Nit: please call this adev instead of ad.> + > + if (!parent_pdev) > + return false; > + > + ad = ACPI_COMPANION(&parent_pdev->dev); > + if (!ad) > + return false; > + > + return ad->power.flags.power_resources;Is this sufficient to tell if the parent device has _PR3? I thought it returns true if it has power resources in general, not necessarily _PR3. Otherwise this looks okay to me.
Emil Velikov
2016-May-27 13:01 UTC
[Nouveau] [PATCH 4/4] drm/nouveau/acpi: fix lockup with PCIe runtime PM
Hi Peter, On 24 May 2016 at 23:53, Peter Wu <peter at lekensteyn.nl> wrote:> Since "PCI: Add runtime PM support for PCIe ports", the parent PCIe port > can be runtime-suspended which disables power resources via ACPI. This > is incompatible with DSM, resulting in a GPU device which is still in D3 > and locks up the kernel on resume. > > Mirror the behavior of Windows 8 and newer[1] (as observed via an AMLi > debugger trace) and stop using the DSM functions for D3cold when power > resources are available on the parent PCIe port. > > [1]: https://msdn.microsoft.com/windows/hardware/drivers/bringup/firmware-requirements-for-d3cold > > Signed-off-by: Peter Wu <peter at lekensteyn.nl> > --- > drivers/gpu/drm/nouveau/nouveau_acpi.c | 34 ++++++++++++++++++++++++++++++---- > 1 file changed, 30 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c > index df9f73e..e469df7 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c > +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c > @@ -46,6 +46,7 @@ static struct nouveau_dsm_priv { > bool dsm_detected; > bool optimus_detected; > bool optimus_flags_detected; > + bool optimus_skip_dsm; > acpi_handle dhandle; > acpi_handle rom_handle; > } nouveau_dsm_priv; > @@ -212,8 +213,26 @@ static const struct vga_switcheroo_handler nouveau_dsm_handler = { > .get_client_id = nouveau_dsm_get_client_id, > }; > > +/* Firmware supporting Windows 8 or later do not use _DSM to put the device into > + * D3cold, they instead rely on disabling power resources on the parent. */ > +static bool nouveau_pr3_present(struct pci_dev *pdev) > +{ > + struct pci_dev *parent_pdev = pci_upstream_bridge(pdev); > + struct acpi_device *ad; > + > + if (!parent_pdev) > + return false; > + > + ad = ACPI_COMPANION(&parent_pdev->dev); > + if (!ad) > + return false; > + > + return ad->power.flags.power_resources; > +} > + > static void nouveau_dsm_pci_probe(struct pci_dev *pdev, bool *has_mux, > - bool *has_opt, bool *has_opt_flags) > + bool *has_opt, bool *has_opt_flags, > + bool *has_power_resources) > { > acpi_handle dhandle; > bool supports_mux; > @@ -238,6 +257,7 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, bool *has_mux, > *has_mux = supports_mux; > *has_opt = !!optimus_funcs; > *has_opt_flags = optimus_funcs & (1 << NOUVEAU_DSM_OPTIMUS_FLAGS); > + *has_power_resources = false; > > if (optimus_funcs) { > uint32_t result; > @@ -247,6 +267,8 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, bool *has_mux, > (result & OPTIMUS_ENABLED) ? "enabled" : "disabled", > (result & OPTIMUS_DYNAMIC_PWR_CAP) ? "dynamic power, " : "", > (result & OPTIMUS_HDA_CODEC_MASK) ? "hda bios codec supported" : ""); > + > + *has_power_resources = nouveau_pr3_present(pdev); > } > } > > @@ -258,6 +280,7 @@ static bool nouveau_dsm_detect(void) > bool has_mux = false; > bool has_optimus = false; > bool has_optimus_flags = false; > + bool has_power_resources = false; > int vga_count = 0; > bool guid_valid; > bool ret = false; > @@ -273,14 +296,14 @@ static bool nouveau_dsm_detect(void) > vga_count++; > > nouveau_dsm_pci_probe(pdev, &has_mux, &has_optimus, > - &has_optimus_flags); > + &has_optimus_flags, &has_power_resources); > } > > while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_3D << 8, pdev)) != NULL) { > vga_count++; > > nouveau_dsm_pci_probe(pdev, &has_mux, &has_optimus, > - &has_optimus_flags); > + &has_optimus_flags, &has_power_resources); > } >This and earlier patch break things in a subtle way. Namely: upon the second (and any later) call into the nouveau_dsm_pci_probe() function, the had_foo flags are reset. Thus only the specifics of the _final_ device are being used (at a later stage). IMHO one should change that to "_any_ device", which will match the original code and the actual intent further down in the file. Regards, Emil
Reasonably Related Threads
- [PATCH v2 0/4] nouveau RPM fixes for Optimus
- [PATCH v3 0/4] nouveau RPM fixes for Optimus (final)
- [PATCH 4/4] drm/nouveau/acpi: fix lockup with PCIe runtime PM
- [PATCH 4/4] drm/nouveau/acpi: fix lockup with PCIe runtime PM
- [PATCH 1/8] acpi: Rename v1 DSM to mux to avoid ambiguity