Hi All! At long last, a respin of the cpuidle vs rcu cleanup patches. v1: https://lkml.kernel.org/r/20220608142723.103523089 at infradead.org These here patches clean up the mess that is cpuidle vs rcuidle. At the end of the ride there's only on RCU_NONIDLE user left: arch/arm64/kernel/suspend.c: RCU_NONIDLE(__cpu_suspend_exit()); and 'one' trace_*_rcuidle() user: kernel/trace/trace_preemptirq.c: trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); kernel/trace/trace_preemptirq.c: trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); kernel/trace/trace_preemptirq.c: trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr); kernel/trace/trace_preemptirq.c: trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr); kernel/trace/trace_preemptirq.c: trace_preempt_enable_rcuidle(a0, a1); kernel/trace/trace_preemptirq.c: trace_preempt_disable_rcuidle(a0, a1); However this last is all in deprecated code that should be unused for GENERIC_ENTRY. I've touched a lot of code that I can't test and I might've broken something by accident. In particular the whole ARM cpuidle stuff was quite involved. Please all; have a look where you haven't already. New since v1: - rebase on top of Frederic's rcu-context-tracking rename fest - more omap goodness as per the last discusion (thanks Tony!) - removed one more RCU_NONIDLE() from arm64/risc-v perf code - ubsan/kasan fixes - intel_idle module-param for testing - a bunch of extra __always_inline, because compilers are silly. --- arch/alpha/kernel/process.c | 1 - arch/alpha/kernel/vmlinux.lds.S | 1 - arch/arc/kernel/process.c | 3 ++ arch/arc/kernel/vmlinux.lds.S | 1 - arch/arm/include/asm/vmlinux.lds.h | 1 - arch/arm/kernel/process.c | 1 - arch/arm/kernel/smp.c | 6 +-- arch/arm/mach-gemini/board-dt.c | 3 +- arch/arm/mach-imx/cpuidle-imx6q.c | 4 +- arch/arm/mach-imx/cpuidle-imx6sx.c | 5 ++- arch/arm/mach-omap2/common.h | 6 ++- arch/arm/mach-omap2/cpuidle34xx.c | 16 +++++++- arch/arm/mach-omap2/cpuidle44xx.c | 29 +++++++------- arch/arm/mach-omap2/omap-mpuss-lowpower.c | 12 +++++- arch/arm/mach-omap2/pm.h | 2 +- arch/arm/mach-omap2/pm24xx.c | 51 +----------------------- arch/arm/mach-omap2/pm34xx.c | 14 +++++-- arch/arm/mach-omap2/pm44xx.c | 2 +- arch/arm/mach-omap2/powerdomain.c | 10 ++--- arch/arm64/kernel/idle.c | 1 - arch/arm64/kernel/smp.c | 4 +- arch/arm64/kernel/vmlinux.lds.S | 1 - arch/csky/kernel/process.c | 1 - arch/csky/kernel/smp.c | 2 +- arch/csky/kernel/vmlinux.lds.S | 1 - arch/hexagon/kernel/process.c | 1 - arch/hexagon/kernel/vmlinux.lds.S | 1 - arch/ia64/kernel/process.c | 1 + arch/ia64/kernel/vmlinux.lds.S | 1 - arch/loongarch/kernel/idle.c | 1 + arch/loongarch/kernel/vmlinux.lds.S | 1 - arch/m68k/kernel/vmlinux-nommu.lds | 1 - arch/m68k/kernel/vmlinux-std.lds | 1 - arch/m68k/kernel/vmlinux-sun3.lds | 1 - arch/microblaze/kernel/process.c | 1 - arch/microblaze/kernel/vmlinux.lds.S | 1 - arch/mips/kernel/idle.c | 8 ++-- arch/mips/kernel/vmlinux.lds.S | 1 - arch/nios2/kernel/process.c | 1 - arch/nios2/kernel/vmlinux.lds.S | 1 - arch/openrisc/kernel/process.c | 1 + arch/openrisc/kernel/vmlinux.lds.S | 1 - arch/parisc/kernel/process.c | 2 - arch/parisc/kernel/vmlinux.lds.S | 1 - arch/powerpc/kernel/idle.c | 5 +-- arch/powerpc/kernel/vmlinux.lds.S | 1 - arch/riscv/kernel/process.c | 1 - arch/riscv/kernel/vmlinux-xip.lds.S | 1 - arch/riscv/kernel/vmlinux.lds.S | 1 - arch/s390/kernel/idle.c | 1 - arch/s390/kernel/vmlinux.lds.S | 1 - arch/sh/kernel/idle.c | 1 + arch/sh/kernel/vmlinux.lds.S | 1 - arch/sparc/kernel/leon_pmc.c | 4 ++ arch/sparc/kernel/process_32.c | 1 - arch/sparc/kernel/process_64.c | 3 +- arch/sparc/kernel/vmlinux.lds.S | 1 - arch/um/kernel/dyn.lds.S | 1 - arch/um/kernel/process.c | 1 - arch/um/kernel/uml.lds.S | 1 - arch/x86/boot/compressed/vmlinux.lds.S | 1 + arch/x86/coco/tdx/tdcall.S | 15 +------ arch/x86/coco/tdx/tdx.c | 25 ++++-------- arch/x86/events/amd/brs.c | 13 +++---- arch/x86/include/asm/fpu/xcr.h | 4 +- arch/x86/include/asm/irqflags.h | 11 ++---- arch/x86/include/asm/mwait.h | 14 +++---- arch/x86/include/asm/nospec-branch.h | 2 +- arch/x86/include/asm/paravirt.h | 6 ++- arch/x86/include/asm/perf_event.h | 2 +- arch/x86/include/asm/shared/io.h | 4 +- arch/x86/include/asm/shared/tdx.h | 1 - arch/x86/include/asm/special_insns.h | 8 ++-- arch/x86/include/asm/xen/hypercall.h | 2 +- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kernel/fpu/core.c | 4 +- arch/x86/kernel/paravirt.c | 14 ++++++- arch/x86/kernel/process.c | 65 +++++++++++++++---------------- arch/x86/kernel/vmlinux.lds.S | 1 - arch/x86/lib/memcpy_64.S | 5 +-- arch/x86/lib/memmove_64.S | 4 +- arch/x86/lib/memset_64.S | 4 +- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/irq.c | 2 +- arch/xtensa/kernel/process.c | 1 + arch/xtensa/kernel/vmlinux.lds.S | 1 - drivers/acpi/processor_idle.c | 36 ++++++++++------- drivers/base/power/runtime.c | 24 ++++++------ drivers/clk/clk.c | 8 ++-- drivers/cpuidle/cpuidle-arm.c | 1 + drivers/cpuidle/cpuidle-big_little.c | 8 +++- drivers/cpuidle/cpuidle-mvebu-v7.c | 7 ++++ drivers/cpuidle/cpuidle-psci.c | 10 +++-- drivers/cpuidle/cpuidle-qcom-spm.c | 1 + drivers/cpuidle/cpuidle-riscv-sbi.c | 10 +++-- drivers/cpuidle/cpuidle-tegra.c | 21 +++++++--- drivers/cpuidle/cpuidle.c | 21 +++++----- drivers/cpuidle/dt_idle_states.c | 2 +- drivers/cpuidle/poll_state.c | 10 ++++- drivers/idle/intel_idle.c | 19 +++++---- drivers/perf/arm_pmu.c | 11 +----- drivers/perf/riscv_pmu_sbi.c | 8 +--- include/asm-generic/vmlinux.lds.h | 9 ++--- include/linux/compiler_types.h | 8 +++- include/linux/cpu.h | 3 -- include/linux/cpuidle.h | 34 ++++++++++++++++ include/linux/cpumask.h | 4 +- include/linux/percpu-defs.h | 2 +- include/linux/sched/idle.h | 40 ++++++++++++++----- include/linux/thread_info.h | 18 ++++++++- include/linux/tracepoint.h | 13 ++++++- kernel/cpu_pm.c | 9 ----- kernel/printk/printk.c | 2 +- kernel/sched/idle.c | 47 +++++++--------------- kernel/time/tick-broadcast-hrtimer.c | 29 ++++++-------- kernel/time/tick-broadcast.c | 6 ++- kernel/trace/trace.c | 3 ++ lib/ubsan.c | 5 ++- mm/kasan/kasan.h | 4 ++ mm/kasan/shadow.c | 38 ++++++++++++++++++ tools/objtool/check.c | 17 ++++++++ 121 files changed, 511 insertions(+), 420 deletions(-)
Peter Zijlstra
2022-Sep-19 09:59 UTC
[PATCH v2 01/44] x86/perf/amd: Remove tracing from perf_lopwr_cb()
The perf_lopwr_cb() is called from the idle routines; there is no RCU
there, we must not enter tracing.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
arch/x86/events/amd/brs.c | 13 +++++--------
arch/x86/include/asm/perf_event.h | 2 +-
2 files changed, 6 insertions(+), 9 deletions(-)
--- a/arch/x86/events/amd/brs.c
+++ b/arch/x86/events/amd/brs.c
@@ -41,18 +41,15 @@ static inline unsigned int brs_to(int id
return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1;
}
-static inline void set_debug_extn_cfg(u64 val)
+static __always_inline void set_debug_extn_cfg(u64 val)
{
/* bits[4:3] must always be set to 11b */
- wrmsrl(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3);
+ __wrmsr(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3, val >> 32);
}
-static inline u64 get_debug_extn_cfg(void)
+static __always_inline u64 get_debug_extn_cfg(void)
{
- u64 val;
-
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, val);
- return val;
+ return __rdmsr(MSR_AMD_DBG_EXTN_CFG);
}
static bool __init amd_brs_detect(void)
@@ -338,7 +335,7 @@ void amd_pmu_brs_sched_task(struct perf_
* called from ACPI processor_idle.c or acpi_pad.c
* with interrupts disabled
*/
-void perf_amd_brs_lopwr_cb(bool lopwr_in)
+void noinstr perf_amd_brs_lopwr_cb(bool lopwr_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
union amd_debug_extn_cfg cfg;
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -554,7 +554,7 @@ extern void perf_amd_brs_lopwr_cb(bool l
DECLARE_STATIC_CALL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
-static inline void perf_lopwr_cb(bool lopwr_in)
+static __always_inline void perf_lopwr_cb(bool lopwr_in)
{
static_call_mod(perf_lopwr_cb)(lopwr_in);
}
Peter Zijlstra
2022-Sep-19 09:59 UTC
[PATCH v2 02/44] x86/idle: Replace x86_idle with a static_call
Typical boot time setup; no need to suffer an indirect call for that.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
Reviewed-by: Frederic Weisbecker <frederic at kernel.org>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki at intel.com>
---
arch/x86/kernel/process.c | 50 +++++++++++++++++++++++++---------------------
1 file changed, 28 insertions(+), 22 deletions(-)
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -24,6 +24,7 @@
#include <linux/cpuidle.h>
#include <linux/acpi.h>
#include <linux/elf-randomize.h>
+#include <linux/static_call.h>
#include <trace/events/power.h>
#include <linux/hw_breakpoint.h>
#include <asm/cpu.h>
@@ -692,7 +693,23 @@ void __switch_to_xtra(struct task_struct
unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
EXPORT_SYMBOL(boot_option_idle_override);
-static void (*x86_idle)(void);
+/*
+ * We use this if we don't have any better idle routine..
+ */
+void __cpuidle default_idle(void)
+{
+ raw_safe_halt();
+}
+#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
+EXPORT_SYMBOL(default_idle);
+#endif
+
+DEFINE_STATIC_CALL_NULL(x86_idle, default_idle);
+
+static bool x86_idle_set(void)
+{
+ return !!static_call_query(x86_idle);
+}
#ifndef CONFIG_SMP
static inline void play_dead(void)
@@ -715,28 +732,17 @@ void arch_cpu_idle_dead(void)
/*
* Called from the generic idle code.
*/
-void arch_cpu_idle(void)
-{
- x86_idle();
-}
-
-/*
- * We use this if we don't have any better idle routine..
- */
-void __cpuidle default_idle(void)
+void __cpuidle arch_cpu_idle(void)
{
- raw_safe_halt();
+ static_call(x86_idle)();
}
-#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
-EXPORT_SYMBOL(default_idle);
-#endif
#ifdef CONFIG_XEN
bool xen_set_default_idle(void)
{
- bool ret = !!x86_idle;
+ bool ret = x86_idle_set();
- x86_idle = default_idle;
+ static_call_update(x86_idle, default_idle);
return ret;
}
@@ -859,20 +865,20 @@ void select_idle_routine(const struct cp
if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1)
pr_warn_once("WARNING: polling idle and HT enabled, performance may
degrade\n");
#endif
- if (x86_idle || boot_option_idle_override == IDLE_POLL)
+ if (x86_idle_set() || boot_option_idle_override == IDLE_POLL)
return;
if (boot_cpu_has_bug(X86_BUG_AMD_E400)) {
pr_info("using AMD E400 aware idle routine\n");
- x86_idle = amd_e400_idle;
+ static_call_update(x86_idle, amd_e400_idle);
} else if (prefer_mwait_c1_over_halt(c)) {
pr_info("using mwait in idle threads\n");
- x86_idle = mwait_idle;
+ static_call_update(x86_idle, mwait_idle);
} else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
pr_info("using TDX aware idle routine\n");
- x86_idle = tdx_safe_halt;
+ static_call_update(x86_idle, tdx_safe_halt);
} else
- x86_idle = default_idle;
+ static_call_update(x86_idle, default_idle);
}
void amd_e400_c1e_apic_setup(void)
@@ -925,7 +931,7 @@ static int __init idle_setup(char *str)
* To continue to load the CPU idle driver, don't touch
* the boot_option_idle_override.
*/
- x86_idle = default_idle;
+ static_call_update(x86_idle, default_idle);
boot_option_idle_override = IDLE_HALT;
} else if (!strcmp(str, "nomwait")) {
/*
Peter Zijlstra
2022-Sep-19 09:59 UTC
[PATCH v2 03/44] cpuidle/poll: Ensure IRQ state is invariant
cpuidle_state::enter() methods should be IRQ invariant
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki at intel.com>
---
drivers/cpuidle/poll_state.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -17,7 +17,7 @@ static int __cpuidle poll_idle(struct cp
dev->poll_time_limit = false;
- local_irq_enable();
+ raw_local_irq_enable();
if (!current_set_polling_and_test()) {
unsigned int loop_count = 0;
u64 limit;
@@ -36,6 +36,8 @@ static int __cpuidle poll_idle(struct cp
}
}
}
+ raw_local_irq_disable();
+
current_clr_polling();
return index;
Make cpuidle_enter_state() consistent with the s2idle variant and
verify ->enter() always returns with interrupts disabled.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
drivers/cpuidle/cpuidle.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -236,7 +236,11 @@ int cpuidle_enter_state(struct cpuidle_d
stop_critical_timings();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
ct_idle_enter();
+
entered_state = target_state->enter(dev, drv, index);
+ if (WARN_ONCE(!irqs_disabled(), "%ps leaked IRQ state",
target_state->enter))
+ raw_local_irq_disable();
+
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
ct_idle_exit();
start_critical_timings();
@@ -248,12 +252,8 @@ int cpuidle_enter_state(struct cpuidle_d
/* The cpu is no longer idle or about to enter idle. */
sched_idle_set_state(NULL);
- if (broadcast) {
- if (WARN_ON_ONCE(!irqs_disabled()))
- local_irq_disable();
-
+ if (broadcast)
tick_broadcast_exit();
- }
if (!cpuidle_state_is_coupled(drv, index))
local_irq_enable();
Peter Zijlstra
2022-Sep-19 09:59 UTC
[PATCH v2 05/44] cpuidle,riscv: Push RCU-idle into driver
Doing RCU-idle outside the driver, only to then temporarily enable it again, at least twice, before going idle is daft. Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org> --- drivers/cpuidle/cpuidle-riscv-sbi.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) --- a/drivers/cpuidle/cpuidle-riscv-sbi.c +++ b/drivers/cpuidle/cpuidle-riscv-sbi.c @@ -116,12 +116,12 @@ static int __sbi_enter_domain_idle_state return -1; /* Do runtime PM to manage a hierarchical CPU toplogy. */ - ct_irq_enter_irqson(); if (s2idle) dev_pm_genpd_suspend(pd_dev); else pm_runtime_put_sync_suspend(pd_dev); - ct_irq_exit_irqson(); + + ct_idle_enter(); if (sbi_is_domain_state_available()) state = sbi_get_domain_state(); @@ -130,12 +130,12 @@ static int __sbi_enter_domain_idle_state ret = sbi_suspend(state) ? -1 : idx; - ct_irq_enter_irqson(); + ct_idle_exit(); + if (s2idle) dev_pm_genpd_resume(pd_dev); else pm_runtime_get_sync(pd_dev); - ct_irq_exit_irqson(); cpu_pm_exit(); @@ -246,6 +246,7 @@ static int sbi_dt_cpu_init_topology(stru * of a shared state for the domain, assumes the domain states are all * deeper states. */ + drv->states[state_count - 1].flags |= CPUIDLE_FLAG_RCU_IDLE; drv->states[state_count - 1].enter = sbi_enter_domain_idle_state; drv->states[state_count - 1].enter_s2idle sbi_enter_s2idle_domain_idle_state;
Peter Zijlstra
2022-Sep-19 09:59 UTC
[PATCH v2 06/44] cpuidle,tegra: Push RCU-idle into driver
Doing RCU-idle outside the driver, only to then temporarily enable it
again, at least twice, before going idle is daft.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
drivers/cpuidle/cpuidle-tegra.c | 21 ++++++++++++++++-----
1 file changed, 16 insertions(+), 5 deletions(-)
--- a/drivers/cpuidle/cpuidle-tegra.c
+++ b/drivers/cpuidle/cpuidle-tegra.c
@@ -180,9 +180,11 @@ static int tegra_cpuidle_state_enter(str
}
local_fiq_disable();
- RCU_NONIDLE(tegra_pm_set_cpu_in_lp2());
+ tegra_pm_set_cpu_in_lp2();
cpu_pm_enter();
+ ct_idle_enter();
+
switch (index) {
case TEGRA_C7:
err = tegra_cpuidle_c7_enter();
@@ -197,8 +199,10 @@ static int tegra_cpuidle_state_enter(str
break;
}
+ ct_idle_exit();
+
cpu_pm_exit();
- RCU_NONIDLE(tegra_pm_clear_cpu_in_lp2());
+ tegra_pm_clear_cpu_in_lp2();
local_fiq_enable();
return err ?: index;
@@ -226,6 +230,7 @@ static int tegra_cpuidle_enter(struct cp
struct cpuidle_driver *drv,
int index)
{
+ bool do_rcu = drv->states[index].flags & CPUIDLE_FLAG_RCU_IDLE;
unsigned int cpu = cpu_logical_map(dev->cpu);
int ret;
@@ -233,9 +238,13 @@ static int tegra_cpuidle_enter(struct cp
if (dev->states_usage[index].disable)
return -1;
- if (index == TEGRA_C1)
+ if (index == TEGRA_C1) {
+ if (do_rcu)
+ ct_idle_enter();
ret = arm_cpuidle_simple_enter(dev, drv, index);
- else
+ if (do_rcu)
+ ct_idle_exit();
+ } else
ret = tegra_cpuidle_state_enter(dev, index, cpu);
if (ret < 0) {
@@ -285,7 +294,8 @@ static struct cpuidle_driver tegra_idle_
.exit_latency = 2000,
.target_residency = 2200,
.power_usage = 100,
- .flags = CPUIDLE_FLAG_TIMER_STOP,
+ .flags = CPUIDLE_FLAG_TIMER_STOP |
+ CPUIDLE_FLAG_RCU_IDLE,
.name = "C7",
.desc = "CPU core powered off",
},
@@ -295,6 +305,7 @@ static struct cpuidle_driver tegra_idle_
.target_residency = 10000,
.power_usage = 0,
.flags = CPUIDLE_FLAG_TIMER_STOP |
+ CPUIDLE_FLAG_RCU_IDLE |
CPUIDLE_FLAG_COUPLED,
.name = "CC6",
.desc = "CPU cluster powered off",
Peter Zijlstra
2022-Sep-19 09:59 UTC
[PATCH v2 07/44] cpuidle,psci: Push RCU-idle into driver
Doing RCU-idle outside the driver, only to then temporarily enable it again, at least twice, before going idle is daft. Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org> --- drivers/cpuidle/cpuidle-psci.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) --- a/drivers/cpuidle/cpuidle-psci.c +++ b/drivers/cpuidle/cpuidle-psci.c @@ -69,12 +69,12 @@ static int __psci_enter_domain_idle_stat return -1; /* Do runtime PM to manage a hierarchical CPU toplogy. */ - ct_irq_enter_irqson(); if (s2idle) dev_pm_genpd_suspend(pd_dev); else pm_runtime_put_sync_suspend(pd_dev); - ct_irq_exit_irqson(); + + ct_idle_enter(); state = psci_get_domain_state(); if (!state) @@ -82,12 +82,12 @@ static int __psci_enter_domain_idle_stat ret = psci_cpu_suspend_enter(state) ? -1 : idx; - ct_irq_enter_irqson(); + ct_idle_exit(); + if (s2idle) dev_pm_genpd_resume(pd_dev); else pm_runtime_get_sync(pd_dev); - ct_irq_exit_irqson(); cpu_pm_exit(); @@ -240,6 +240,7 @@ static int psci_dt_cpu_init_topology(str * of a shared state for the domain, assumes the domain states are all * deeper states. */ + drv->states[state_count - 1].flags |= CPUIDLE_FLAG_RCU_IDLE; drv->states[state_count - 1].enter = psci_enter_domain_idle_state; drv->states[state_count - 1].enter_s2idle = psci_enter_s2idle_domain_idle_state; psci_cpuidle_use_cpuhp = true;
Peter Zijlstra
2022-Sep-19 09:59 UTC
[PATCH v2 08/44] cpuidle,imx6: Push RCU-idle into driver
Doing RCU-idle outside the driver, only to then temporarily enable it again, at least twice, before going idle is daft. Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org> --- arch/arm/mach-imx/cpuidle-imx6sx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) --- a/arch/arm/mach-imx/cpuidle-imx6sx.c +++ b/arch/arm/mach-imx/cpuidle-imx6sx.c @@ -47,7 +47,9 @@ static int imx6sx_enter_wait(struct cpui cpu_pm_enter(); cpu_cluster_pm_enter(); + ct_idle_enter(); cpu_suspend(0, imx6sx_idle_finish); + ct_idle_exit(); cpu_cluster_pm_exit(); cpu_pm_exit(); @@ -87,7 +89,8 @@ static struct cpuidle_driver imx6sx_cpui */ .exit_latency = 300, .target_residency = 500, - .flags = CPUIDLE_FLAG_TIMER_STOP, + .flags = CPUIDLE_FLAG_TIMER_STOP | + CPUIDLE_FLAG_RCU_IDLE, .enter = imx6sx_enter_wait, .name = "LOW-POWER-IDLE", .desc = "ARM power off",
Peter Zijlstra
2022-Sep-19 09:59 UTC
[PATCH v2 09/44] cpuidle,omap3: Push RCU-idle into driver
Doing RCU-idle outside the driver, only to then teporarily enable it
again before going idle is daft.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
Reviewed-by: Tony Lindgren <tony at atomide.com>
Tested-by: Tony Lindgren <tony at atomide.com>
---
arch/arm/mach-omap2/cpuidle34xx.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
--- a/arch/arm/mach-omap2/cpuidle34xx.c
+++ b/arch/arm/mach-omap2/cpuidle34xx.c
@@ -133,7 +133,9 @@ static int omap3_enter_idle(struct cpuid
}
/* Execute ARM wfi */
+ ct_idle_enter();
omap_sram_idle();
+ ct_idle_exit();
/*
* Call idle CPU PM enter notifier chain to restore
@@ -265,6 +267,7 @@ static struct cpuidle_driver omap3_idle_
.owner = THIS_MODULE,
.states = {
{
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = omap3_enter_idle_bm,
.exit_latency = 2 + 2,
.target_residency = 5,
@@ -272,6 +275,7 @@ static struct cpuidle_driver omap3_idle_
.desc = "MPU ON + CORE ON",
},
{
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = omap3_enter_idle_bm,
.exit_latency = 10 + 10,
.target_residency = 30,
@@ -279,6 +283,7 @@ static struct cpuidle_driver omap3_idle_
.desc = "MPU ON + CORE ON",
},
{
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = omap3_enter_idle_bm,
.exit_latency = 50 + 50,
.target_residency = 300,
@@ -286,6 +291,7 @@ static struct cpuidle_driver omap3_idle_
.desc = "MPU RET + CORE ON",
},
{
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = omap3_enter_idle_bm,
.exit_latency = 1500 + 1800,
.target_residency = 4000,
@@ -293,6 +299,7 @@ static struct cpuidle_driver omap3_idle_
.desc = "MPU OFF + CORE ON",
},
{
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = omap3_enter_idle_bm,
.exit_latency = 2500 + 7500,
.target_residency = 12000,
@@ -300,6 +307,7 @@ static struct cpuidle_driver omap3_idle_
.desc = "MPU RET + CORE RET",
},
{
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = omap3_enter_idle_bm,
.exit_latency = 3000 + 8500,
.target_residency = 15000,
@@ -307,6 +315,7 @@ static struct cpuidle_driver omap3_idle_
.desc = "MPU OFF + CORE RET",
},
{
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = omap3_enter_idle_bm,
.exit_latency = 10000 + 30000,
.target_residency = 30000,
@@ -328,6 +337,7 @@ static struct cpuidle_driver omap3430_id
.owner = THIS_MODULE,
.states = {
{
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = omap3_enter_idle_bm,
.exit_latency = 110 + 162,
.target_residency = 5,
@@ -335,6 +345,7 @@ static struct cpuidle_driver omap3430_id
.desc = "MPU ON + CORE ON",
},
{
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = omap3_enter_idle_bm,
.exit_latency = 106 + 180,
.target_residency = 309,
@@ -342,6 +353,7 @@ static struct cpuidle_driver omap3430_id
.desc = "MPU ON + CORE ON",
},
{
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = omap3_enter_idle_bm,
.exit_latency = 107 + 410,
.target_residency = 46057,
@@ -349,6 +361,7 @@ static struct cpuidle_driver omap3430_id
.desc = "MPU RET + CORE ON",
},
{
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = omap3_enter_idle_bm,
.exit_latency = 121 + 3374,
.target_residency = 46057,
@@ -356,6 +369,7 @@ static struct cpuidle_driver omap3430_id
.desc = "MPU OFF + CORE ON",
},
{
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = omap3_enter_idle_bm,
.exit_latency = 855 + 1146,
.target_residency = 46057,
@@ -363,6 +377,7 @@ static struct cpuidle_driver omap3430_id
.desc = "MPU RET + CORE RET",
},
{
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = omap3_enter_idle_bm,
.exit_latency = 7580 + 4134,
.target_residency = 484329,
@@ -370,6 +385,7 @@ static struct cpuidle_driver omap3430_id
.desc = "MPU OFF + CORE RET",
},
{
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = omap3_enter_idle_bm,
.exit_latency = 7505 + 15274,
.target_residency = 484329,
Peter Zijlstra
2022-Sep-19 09:59 UTC
[PATCH v2 10/44] cpuidle,armada: Push RCU-idle into driver
Doing RCU-idle outside the driver, only to then temporarily enable it
again before going idle is daft.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
drivers/cpuidle/cpuidle-mvebu-v7.c | 7 +++++++
1 file changed, 7 insertions(+)
--- a/drivers/cpuidle/cpuidle-mvebu-v7.c
+++ b/drivers/cpuidle/cpuidle-mvebu-v7.c
@@ -36,7 +36,10 @@ static int mvebu_v7_enter_idle(struct cp
if (drv->states[index].flags & MVEBU_V7_FLAG_DEEP_IDLE)
deepidle = true;
+ ct_idle_enter();
ret = mvebu_v7_cpu_suspend(deepidle);
+ ct_idle_exit();
+
cpu_pm_exit();
if (ret)
@@ -49,6 +52,7 @@ static struct cpuidle_driver armadaxp_id
.name = "armada_xp_idle",
.states[0] = ARM_CPUIDLE_WFI_STATE,
.states[1] = {
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = mvebu_v7_enter_idle,
.exit_latency = 100,
.power_usage = 50,
@@ -57,6 +61,7 @@ static struct cpuidle_driver armadaxp_id
.desc = "CPU power down",
},
.states[2] = {
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = mvebu_v7_enter_idle,
.exit_latency = 1000,
.power_usage = 5,
@@ -72,6 +77,7 @@ static struct cpuidle_driver armada370_i
.name = "armada_370_idle",
.states[0] = ARM_CPUIDLE_WFI_STATE,
.states[1] = {
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = mvebu_v7_enter_idle,
.exit_latency = 100,
.power_usage = 5,
@@ -87,6 +93,7 @@ static struct cpuidle_driver armada38x_i
.name = "armada_38x_idle",
.states[0] = ARM_CPUIDLE_WFI_STATE,
.states[1] = {
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = mvebu_v7_enter_idle,
.exit_latency = 10,
.power_usage = 5,
Peter Zijlstra
2022-Sep-19 09:59 UTC
[PATCH v2 11/44] cpuidle,omap4: Push RCU-idle into driver
Doing RCU-idle outside the driver, only to then temporarily enable it
again, some *four* times, before going idle is daft.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
Reviewed-by: Tony Lindgren <tony at atomide.com>
Tested-by: Tony Lindgren <tony at atomide.com>
---
arch/arm/mach-omap2/cpuidle44xx.c | 29 ++++++++++++++++++-----------
1 file changed, 18 insertions(+), 11 deletions(-)
--- a/arch/arm/mach-omap2/cpuidle44xx.c
+++ b/arch/arm/mach-omap2/cpuidle44xx.c
@@ -105,7 +105,9 @@ static int omap_enter_idle_smp(struct cp
}
raw_spin_unlock_irqrestore(&mpu_lock, flag);
+ ct_idle_enter();
omap4_enter_lowpower(dev->cpu, cx->cpu_state);
+ ct_idle_exit();
raw_spin_lock_irqsave(&mpu_lock, flag);
if (cx->mpu_state_vote == num_online_cpus())
@@ -151,10 +153,10 @@ static int omap_enter_idle_coupled(struc
(cx->mpu_logic_state == PWRDM_POWER_OFF);
/* Enter broadcast mode for periodic timers */
- RCU_NONIDLE(tick_broadcast_enable());
+ tick_broadcast_enable();
/* Enter broadcast mode for one-shot timers */
- RCU_NONIDLE(tick_broadcast_enter());
+ tick_broadcast_enter();
/*
* Call idle CPU PM enter notifier chain so that
@@ -166,7 +168,7 @@ static int omap_enter_idle_coupled(struc
if (dev->cpu == 0) {
pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state);
- RCU_NONIDLE(omap_set_pwrdm_state(mpu_pd, cx->mpu_state));
+ omap_set_pwrdm_state(mpu_pd, cx->mpu_state);
/*
* Call idle CPU cluster PM enter notifier chain
@@ -178,14 +180,16 @@ static int omap_enter_idle_coupled(struc
index = 0;
cx = state_ptr + index;
pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state);
- RCU_NONIDLE(omap_set_pwrdm_state(mpu_pd, cx->mpu_state));
+ omap_set_pwrdm_state(mpu_pd, cx->mpu_state);
mpuss_can_lose_context = 0;
}
}
}
+ ct_idle_enter();
omap4_enter_lowpower(dev->cpu, cx->cpu_state);
cpu_done[dev->cpu] = true;
+ ct_idle_exit();
/* Wakeup CPU1 only if it is not offlined */
if (dev->cpu == 0 && cpumask_test_cpu(1, cpu_online_mask)) {
@@ -194,9 +198,9 @@ static int omap_enter_idle_coupled(struc
mpuss_can_lose_context)
gic_dist_disable();
- RCU_NONIDLE(clkdm_deny_idle(cpu_clkdm[1]));
- RCU_NONIDLE(omap_set_pwrdm_state(cpu_pd[1], PWRDM_POWER_ON));
- RCU_NONIDLE(clkdm_allow_idle(cpu_clkdm[1]));
+ clkdm_deny_idle(cpu_clkdm[1]);
+ omap_set_pwrdm_state(cpu_pd[1], PWRDM_POWER_ON);
+ clkdm_allow_idle(cpu_clkdm[1]);
if (IS_PM44XX_ERRATUM(PM_OMAP4_ROM_SMP_BOOT_ERRATUM_GICD) &&
mpuss_can_lose_context) {
@@ -222,7 +226,7 @@ static int omap_enter_idle_coupled(struc
cpu_pm_exit();
cpu_pm_out:
- RCU_NONIDLE(tick_broadcast_exit());
+ tick_broadcast_exit();
fail:
cpuidle_coupled_parallel_barrier(dev, &abort_barrier);
@@ -247,7 +251,8 @@ static struct cpuidle_driver omap4_idle_
/* C2 - CPU0 OFF + CPU1 OFF + MPU CSWR */
.exit_latency = 328 + 440,
.target_residency = 960,
- .flags = CPUIDLE_FLAG_COUPLED,
+ .flags = CPUIDLE_FLAG_COUPLED |
+ CPUIDLE_FLAG_RCU_IDLE,
.enter = omap_enter_idle_coupled,
.name = "C2",
.desc = "CPUx OFF, MPUSS CSWR",
@@ -256,7 +261,8 @@ static struct cpuidle_driver omap4_idle_
/* C3 - CPU0 OFF + CPU1 OFF + MPU OSWR */
.exit_latency = 460 + 518,
.target_residency = 1100,
- .flags = CPUIDLE_FLAG_COUPLED,
+ .flags = CPUIDLE_FLAG_COUPLED |
+ CPUIDLE_FLAG_RCU_IDLE,
.enter = omap_enter_idle_coupled,
.name = "C3",
.desc = "CPUx OFF, MPUSS OSWR",
@@ -282,7 +288,8 @@ static struct cpuidle_driver omap5_idle_
/* C2 - CPU0 RET + CPU1 RET + MPU CSWR */
.exit_latency = 48 + 60,
.target_residency = 100,
- .flags = CPUIDLE_FLAG_TIMER_STOP,
+ .flags = CPUIDLE_FLAG_TIMER_STOP |
+ CPUIDLE_FLAG_RCU_IDLE,
.enter = omap_enter_idle_smp,
.name = "C2",
.desc = "CPUx CSWR, MPUSS CSWR",
Peter Zijlstra
2022-Sep-19 09:59 UTC
[PATCH v2 12/44] cpuidle,dt: Push RCU-idle into driver
Doing RCU-idle outside the driver, only to then temporarily enable it
again before going idle is daft.
Notably: this converts all dt_init_idle_driver() and
__CPU_PM_CPU_IDLE_ENTER() users for they are inextrably intertwined.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
arch/arm/mach-omap2/cpuidle34xx.c | 4 ++--
drivers/acpi/processor_idle.c | 2 ++
drivers/cpuidle/cpuidle-arm.c | 1 +
drivers/cpuidle/cpuidle-big_little.c | 8 ++++++--
drivers/cpuidle/cpuidle-psci.c | 1 +
drivers/cpuidle/cpuidle-qcom-spm.c | 1 +
drivers/cpuidle/cpuidle-riscv-sbi.c | 1 +
drivers/cpuidle/dt_idle_states.c | 2 +-
include/linux/cpuidle.h | 4 ++++
9 files changed, 19 insertions(+), 5 deletions(-)
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -1200,6 +1200,8 @@ static int acpi_processor_setup_lpi_stat
state->target_residency = lpi->min_residency;
if (lpi->arch_flags)
state->flags |= CPUIDLE_FLAG_TIMER_STOP;
+ if (lpi->entry_method == ACPI_CSTATE_FFH)
+ state->flags |= CPUIDLE_FLAG_RCU_IDLE;
state->enter = acpi_idle_lpi_enter;
drv->safe_state_index = i;
}
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -53,6 +53,7 @@ static struct cpuidle_driver arm_idle_dr
* handler for idle state index 0.
*/
.states[0] = {
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.enter = arm_enter_idle_state,
.exit_latency = 1,
.target_residency = 1,
--- a/drivers/cpuidle/cpuidle-big_little.c
+++ b/drivers/cpuidle/cpuidle-big_little.c
@@ -64,7 +64,8 @@ static struct cpuidle_driver bl_idle_lit
.enter = bl_enter_powerdown,
.exit_latency = 700,
.target_residency = 2500,
- .flags = CPUIDLE_FLAG_TIMER_STOP,
+ .flags = CPUIDLE_FLAG_TIMER_STOP |
+ CPUIDLE_FLAG_RCU_IDLE,
.name = "C1",
.desc = "ARM little-cluster power down",
},
@@ -85,7 +86,8 @@ static struct cpuidle_driver bl_idle_big
.enter = bl_enter_powerdown,
.exit_latency = 500,
.target_residency = 2000,
- .flags = CPUIDLE_FLAG_TIMER_STOP,
+ .flags = CPUIDLE_FLAG_TIMER_STOP |
+ CPUIDLE_FLAG_RCU_IDLE,
.name = "C1",
.desc = "ARM big-cluster power down",
},
@@ -124,11 +126,13 @@ static int bl_enter_powerdown(struct cpu
struct cpuidle_driver *drv, int idx)
{
cpu_pm_enter();
+ ct_idle_enter();
cpu_suspend(0, bl_powerdown_finisher);
/* signals the MCPM core that CPU is out of low power state */
mcpm_cpu_powered_up();
+ ct_idle_exit();
cpu_pm_exit();
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -357,6 +357,7 @@ static int psci_idle_init_cpu(struct dev
* PSCI idle states relies on architectural WFI to be represented as
* state index 0.
*/
+ drv->states[0].flags = CPUIDLE_FLAG_RCU_IDLE;
drv->states[0].enter = psci_enter_idle_state;
drv->states[0].exit_latency = 1;
drv->states[0].target_residency = 1;
--- a/drivers/cpuidle/cpuidle-qcom-spm.c
+++ b/drivers/cpuidle/cpuidle-qcom-spm.c
@@ -72,6 +72,7 @@ static struct cpuidle_driver qcom_spm_id
.owner = THIS_MODULE,
.states[0] = {
.enter = spm_enter_idle_state,
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.exit_latency = 1,
.target_residency = 1,
.power_usage = UINT_MAX,
--- a/drivers/cpuidle/cpuidle-riscv-sbi.c
+++ b/drivers/cpuidle/cpuidle-riscv-sbi.c
@@ -332,6 +332,7 @@ static int sbi_cpuidle_init_cpu(struct d
drv->cpumask = (struct cpumask *)cpumask_of(cpu);
/* RISC-V architectural WFI to be represented as state index 0. */
+ drv->states[0].flags = CPUIDLE_FLAG_RCU_IDLE;
drv->states[0].enter = sbi_cpuidle_enter_state;
drv->states[0].exit_latency = 1;
drv->states[0].target_residency = 1;
--- a/drivers/cpuidle/dt_idle_states.c
+++ b/drivers/cpuidle/dt_idle_states.c
@@ -77,7 +77,7 @@ static int init_state_node(struct cpuidl
if (err)
desc = state_node->name;
- idle_state->flags = 0;
+ idle_state->flags = CPUIDLE_FLAG_RCU_IDLE;
if (of_property_read_bool(state_node, "local-timer-stop"))
idle_state->flags |= CPUIDLE_FLAG_TIMER_STOP;
/*
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -282,14 +282,18 @@ extern s64 cpuidle_governor_latency_req(
int __ret = 0; \
\
if (!idx) { \
+ ct_idle_enter(); \
cpu_do_idle(); \
+ ct_idle_exit(); \
return idx; \
} \
\
if (!is_retention) \
__ret = cpu_pm_enter(); \
if (!__ret) { \
+ ct_idle_enter(); \
__ret = low_level_idle_enter(state); \
+ ct_idle_exit(); \
if (!is_retention) \
cpu_pm_exit(); \
} \
The whole disable-RCU, enable-IRQS dance is very intricate since
changing IRQ state is traced, which depends on RCU.
Add two helpers for the cpuidle case that mirror the entry code.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
arch/arm/mach-imx/cpuidle-imx6q.c | 4 +--
arch/arm/mach-imx/cpuidle-imx6sx.c | 4 +--
arch/arm/mach-omap2/cpuidle34xx.c | 4 +--
arch/arm/mach-omap2/cpuidle44xx.c | 8 +++---
drivers/acpi/processor_idle.c | 8 ++++--
drivers/cpuidle/cpuidle-big_little.c | 4 +--
drivers/cpuidle/cpuidle-mvebu-v7.c | 4 +--
drivers/cpuidle/cpuidle-psci.c | 4 +--
drivers/cpuidle/cpuidle-riscv-sbi.c | 4 +--
drivers/cpuidle/cpuidle-tegra.c | 8 +++---
drivers/cpuidle/cpuidle.c | 11 ++++----
include/linux/cpuidle.h | 38 ++++++++++++++++++++++++++---
kernel/sched/idle.c | 45 ++++++++++-------------------------
kernel/time/tick-broadcast.c | 6 +++-
14 files changed, 86 insertions(+), 66 deletions(-)
--- a/arch/arm/mach-imx/cpuidle-imx6q.c
+++ b/arch/arm/mach-imx/cpuidle-imx6q.c
@@ -25,9 +25,9 @@ static int imx6q_enter_wait(struct cpuid
imx6_set_lpm(WAIT_UNCLOCKED);
raw_spin_unlock(&cpuidle_lock);
- ct_idle_enter();
+ ct_cpuidle_enter();
cpu_do_idle();
- ct_idle_exit();
+ ct_cpuidle_exit();
raw_spin_lock(&cpuidle_lock);
if (num_idle_cpus-- == num_online_cpus())
--- a/arch/arm/mach-imx/cpuidle-imx6sx.c
+++ b/arch/arm/mach-imx/cpuidle-imx6sx.c
@@ -47,9 +47,9 @@ static int imx6sx_enter_wait(struct cpui
cpu_pm_enter();
cpu_cluster_pm_enter();
- ct_idle_enter();
+ ct_cpuidle_enter();
cpu_suspend(0, imx6sx_idle_finish);
- ct_idle_exit();
+ ct_cpuidle_exit();
cpu_cluster_pm_exit();
cpu_pm_exit();
--- a/arch/arm/mach-omap2/cpuidle34xx.c
+++ b/arch/arm/mach-omap2/cpuidle34xx.c
@@ -133,9 +133,9 @@ static int omap3_enter_idle(struct cpuid
}
/* Execute ARM wfi */
- ct_idle_enter();
+ ct_cpuidle_enter();
omap_sram_idle();
- ct_idle_exit();
+ ct_cpuidle_exit();
/*
* Call idle CPU PM enter notifier chain to restore
--- a/arch/arm/mach-omap2/cpuidle44xx.c
+++ b/arch/arm/mach-omap2/cpuidle44xx.c
@@ -105,9 +105,9 @@ static int omap_enter_idle_smp(struct cp
}
raw_spin_unlock_irqrestore(&mpu_lock, flag);
- ct_idle_enter();
+ ct_cpuidle_enter();
omap4_enter_lowpower(dev->cpu, cx->cpu_state);
- ct_idle_exit();
+ ct_cpuidle_exit();
raw_spin_lock_irqsave(&mpu_lock, flag);
if (cx->mpu_state_vote == num_online_cpus())
@@ -186,10 +186,10 @@ static int omap_enter_idle_coupled(struc
}
}
- ct_idle_enter();
+ ct_cpuidle_enter();
omap4_enter_lowpower(dev->cpu, cx->cpu_state);
cpu_done[dev->cpu] = true;
- ct_idle_exit();
+ ct_cpuidle_exit();
/* Wakeup CPU1 only if it is not offlined */
if (dev->cpu == 0 && cpumask_test_cpu(1, cpu_online_mask)) {
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -627,6 +627,8 @@ static int __cpuidle acpi_idle_enter_bm(
*/
bool dis_bm = pr->flags.bm_control;
+ instrumentation_begin();
+
/* If we can skip BM, demote to a safe state. */
if (!cx->bm_sts_skip && acpi_idle_bm_check()) {
dis_bm = false;
@@ -648,11 +650,11 @@ static int __cpuidle acpi_idle_enter_bm(
raw_spin_unlock(&c3_lock);
}
- ct_idle_enter();
+ ct_cpuidle_enter();
acpi_idle_do_entry(cx);
- ct_idle_exit();
+ ct_cpuidle_exit();
/* Re-enable bus master arbitration */
if (dis_bm) {
@@ -662,6 +664,8 @@ static int __cpuidle acpi_idle_enter_bm(
raw_spin_unlock(&c3_lock);
}
+ instrumentation_end();
+
return index;
}
--- a/drivers/cpuidle/cpuidle-big_little.c
+++ b/drivers/cpuidle/cpuidle-big_little.c
@@ -126,13 +126,13 @@ static int bl_enter_powerdown(struct cpu
struct cpuidle_driver *drv, int idx)
{
cpu_pm_enter();
- ct_idle_enter();
+ ct_cpuidle_enter();
cpu_suspend(0, bl_powerdown_finisher);
/* signals the MCPM core that CPU is out of low power state */
mcpm_cpu_powered_up();
- ct_idle_exit();
+ ct_cpuidle_exit();
cpu_pm_exit();
--- a/drivers/cpuidle/cpuidle-mvebu-v7.c
+++ b/drivers/cpuidle/cpuidle-mvebu-v7.c
@@ -36,9 +36,9 @@ static int mvebu_v7_enter_idle(struct cp
if (drv->states[index].flags & MVEBU_V7_FLAG_DEEP_IDLE)
deepidle = true;
- ct_idle_enter();
+ ct_cpuidle_enter();
ret = mvebu_v7_cpu_suspend(deepidle);
- ct_idle_exit();
+ ct_cpuidle_exit();
cpu_pm_exit();
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -74,7 +74,7 @@ static int __psci_enter_domain_idle_stat
else
pm_runtime_put_sync_suspend(pd_dev);
- ct_idle_enter();
+ ct_cpuidle_enter();
state = psci_get_domain_state();
if (!state)
@@ -82,7 +82,7 @@ static int __psci_enter_domain_idle_stat
ret = psci_cpu_suspend_enter(state) ? -1 : idx;
- ct_idle_exit();
+ ct_cpuidle_exit();
if (s2idle)
dev_pm_genpd_resume(pd_dev);
--- a/drivers/cpuidle/cpuidle-riscv-sbi.c
+++ b/drivers/cpuidle/cpuidle-riscv-sbi.c
@@ -121,7 +121,7 @@ static int __sbi_enter_domain_idle_state
else
pm_runtime_put_sync_suspend(pd_dev);
- ct_idle_enter();
+ ct_cpuidle_enter();
if (sbi_is_domain_state_available())
state = sbi_get_domain_state();
@@ -130,7 +130,7 @@ static int __sbi_enter_domain_idle_state
ret = sbi_suspend(state) ? -1 : idx;
- ct_idle_exit();
+ ct_cpuidle_exit();
if (s2idle)
dev_pm_genpd_resume(pd_dev);
--- a/drivers/cpuidle/cpuidle-tegra.c
+++ b/drivers/cpuidle/cpuidle-tegra.c
@@ -183,7 +183,7 @@ static int tegra_cpuidle_state_enter(str
tegra_pm_set_cpu_in_lp2();
cpu_pm_enter();
- ct_idle_enter();
+ ct_cpuidle_enter();
switch (index) {
case TEGRA_C7:
@@ -199,7 +199,7 @@ static int tegra_cpuidle_state_enter(str
break;
}
- ct_idle_exit();
+ ct_cpuidle_exit();
cpu_pm_exit();
tegra_pm_clear_cpu_in_lp2();
@@ -240,10 +240,10 @@ static int tegra_cpuidle_enter(struct cp
if (index == TEGRA_C1) {
if (do_rcu)
- ct_idle_enter();
+ ct_cpuidle_enter();
ret = arm_cpuidle_simple_enter(dev, drv, index);
if (do_rcu)
- ct_idle_exit();
+ ct_cpuidle_exit();
} else
ret = tegra_cpuidle_state_enter(dev, index, cpu);
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -14,6 +14,7 @@
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <linux/sched/idle.h>
#include <linux/notifier.h>
#include <linux/pm_qos.h>
#include <linux/cpu.h>
@@ -152,12 +153,12 @@ static void enter_s2idle_proper(struct c
*/
stop_critical_timings();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- ct_idle_enter();
+ ct_cpuidle_enter();
target_state->enter_s2idle(dev, drv, index);
if (WARN_ON_ONCE(!irqs_disabled()))
- local_irq_disable();
+ raw_local_irq_disable();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- ct_idle_exit();
+ ct_cpuidle_exit();
tick_unfreeze();
start_critical_timings();
@@ -235,14 +236,14 @@ int cpuidle_enter_state(struct cpuidle_d
stop_critical_timings();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- ct_idle_enter();
+ ct_cpuidle_enter();
entered_state = target_state->enter(dev, drv, index);
if (WARN_ONCE(!irqs_disabled(), "%ps leaked IRQ state",
target_state->enter))
raw_local_irq_disable();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- ct_idle_exit();
+ ct_cpuidle_exit();
start_critical_timings();
sched_clock_idle_wakeup_event();
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -14,6 +14,7 @@
#include <linux/percpu.h>
#include <linux/list.h>
#include <linux/hrtimer.h>
+#include <linux/context_tracking.h>
#define CPUIDLE_STATE_MAX 10
#define CPUIDLE_NAME_LEN 16
@@ -115,6 +116,35 @@ struct cpuidle_device {
DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev);
+static __always_inline void ct_cpuidle_enter(void)
+{
+ lockdep_assert_irqs_disabled();
+ /*
+ * Idle is allowed to (temporary) enable IRQs. It
+ * will return with IRQs disabled.
+ *
+ * Trace IRQs enable here, then switch off RCU, and have
+ * arch_cpu_idle() use raw_local_irq_enable(). Note that
+ * ct_idle_enter() relies on lockdep IRQ state, so switch that
+ * last -- this is very similar to the entry code.
+ */
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare();
+ instrumentation_end();
+ ct_idle_enter();
+ lockdep_hardirqs_on(_THIS_IP_);
+}
+
+static __always_inline void ct_cpuidle_exit(void)
+{
+ /*
+ * Carefully undo the above.
+ */
+ lockdep_hardirqs_off(_THIS_IP_);
+ ct_idle_exit();
+ instrumentation_begin();
+}
+
/****************************
* CPUIDLE DRIVER INTERFACE *
****************************/
@@ -282,18 +312,18 @@ extern s64 cpuidle_governor_latency_req(
int __ret = 0; \
\
if (!idx) { \
- ct_idle_enter(); \
+ ct_cpuidle_enter(); \
cpu_do_idle(); \
- ct_idle_exit(); \
+ ct_cpuidle_exit(); \
return idx; \
} \
\
if (!is_retention) \
__ret = cpu_pm_enter(); \
if (!__ret) { \
- ct_idle_enter(); \
+ ct_cpuidle_enter(); \
__ret = low_level_idle_enter(state); \
- ct_idle_exit(); \
+ ct_cpuidle_exit(); \
if (!is_retention) \
cpu_pm_exit(); \
} \
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -51,18 +51,22 @@ __setup("hlt", cpu_idle_nopoll_setup);
static noinline int __cpuidle cpu_idle_poll(void)
{
+ instrumentation_begin();
trace_cpu_idle(0, smp_processor_id());
stop_critical_timings();
- ct_idle_enter();
- local_irq_enable();
+ ct_cpuidle_enter();
+ raw_local_irq_enable();
while (!tif_need_resched() &&
(cpu_idle_force_poll || tick_check_broadcast_expired()))
cpu_relax();
+ raw_local_irq_disable();
- ct_idle_exit();
+ ct_cpuidle_exit();
start_critical_timings();
trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+ local_irq_enable();
+ instrumentation_end();
return 1;
}
@@ -85,44 +89,21 @@ void __weak arch_cpu_idle(void)
*/
void __cpuidle default_idle_call(void)
{
- if (current_clr_polling_and_test()) {
- local_irq_enable();
- } else {
-
+ instrumentation_begin();
+ if (!current_clr_polling_and_test()) {
trace_cpu_idle(1, smp_processor_id());
stop_critical_timings();
- /*
- * arch_cpu_idle() is supposed to enable IRQs, however
- * we can't do that because of RCU and tracing.
- *
- * Trace IRQs enable here, then switch off RCU, and have
- * arch_cpu_idle() use raw_local_irq_enable(). Note that
- * ct_idle_enter() relies on lockdep IRQ state, so switch that
- * last -- this is very similar to the entry code.
- */
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare();
- ct_idle_enter();
- lockdep_hardirqs_on(_THIS_IP_);
-
+ ct_cpuidle_enter();
arch_cpu_idle();
-
- /*
- * OK, so IRQs are enabled here, but RCU needs them disabled to
- * turn itself back on.. funny thing is that disabling IRQs
- * will cause tracing, which needs RCU. Jump through hoops to
- * make it 'work'.
- */
raw_local_irq_disable();
- lockdep_hardirqs_off(_THIS_IP_);
- ct_idle_exit();
- lockdep_hardirqs_on(_THIS_IP_);
- raw_local_irq_enable();
+ ct_cpuidle_exit();
start_critical_timings();
trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
}
+ local_irq_enable();
+ instrumentation_end();
}
static int call_cpuidle_s2idle(struct cpuidle_driver *drv,
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -622,9 +622,13 @@ struct cpumask *tick_get_broadcast_onesh
* to avoid a deep idle transition as we are about to get the
* broadcast IPI right away.
*/
-int tick_check_broadcast_expired(void)
+noinstr int tick_check_broadcast_expired(void)
{
+#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
+ return arch_test_bit(smp_processor_id(),
cpumask_bits(tick_broadcast_force_mask));
+#else
return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
+#endif
}
/*
Peter Zijlstra
2022-Sep-19 09:59 UTC
[PATCH v2 14/44] cpuidle, cpu_pm: Remove RCU fiddling from cpu_pm_{enter, exit}()
All callers should still have RCU enabled.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
Acked-by: Mark Rutland <mark.rutland at arm.com>
---
kernel/cpu_pm.c | 9 ---------
1 file changed, 9 deletions(-)
--- a/kernel/cpu_pm.c
+++ b/kernel/cpu_pm.c
@@ -30,16 +30,9 @@ static int cpu_pm_notify(enum cpu_pm_eve
{
int ret;
- /*
- * This introduces a RCU read critical section, which could be
- * disfunctional in cpu idle. Copy RCU_NONIDLE code to let RCU know
- * this.
- */
- ct_irq_enter_irqson();
rcu_read_lock();
ret = raw_notifier_call_chain(&cpu_pm_notifier.chain, event, NULL);
rcu_read_unlock();
- ct_irq_exit_irqson();
return notifier_to_errno(ret);
}
@@ -49,11 +42,9 @@ static int cpu_pm_notify_robust(enum cpu
unsigned long flags;
int ret;
- ct_irq_enter_irqson();
raw_spin_lock_irqsave(&cpu_pm_notifier.lock, flags);
ret = raw_notifier_call_chain_robust(&cpu_pm_notifier.chain, event_up,
event_down, NULL);
raw_spin_unlock_irqrestore(&cpu_pm_notifier.lock, flags);
- ct_irq_exit_irqson();
return notifier_to_errno(ret);
}
All the idle routines are called with RCU disabled, as such there must
not be any tracing inside.
While there; clean-up the io-port idle thing.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
drivers/acpi/processor_idle.c | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -108,8 +108,8 @@ static const struct dmi_system_id proces
static void __cpuidle acpi_safe_halt(void)
{
if (!tif_need_resched()) {
- safe_halt();
- local_irq_disable();
+ raw_safe_halt();
+ raw_local_irq_disable();
}
}
@@ -524,16 +524,21 @@ static int acpi_idle_bm_check(void)
return bm_status;
}
-static void wait_for_freeze(void)
+static __cpuidle void io_idle(unsigned long addr)
{
+ /* IO port based C-state */
+ inb(addr);
+
#ifdef CONFIG_X86
/* No delay is needed if we are in guest */
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return;
#endif
- /* Dummy wait op - must do something useless after P_LVL2 read
- because chipsets cannot guarantee that STPCLK# signal
- gets asserted in time to freeze execution properly. */
+ /*
+ * Dummy wait op - must do something useless after P_LVL2 read
+ * because chipsets cannot guarantee that STPCLK# signal
+ * gets asserted in time to freeze execution properly.
+ */
inl(acpi_gbl_FADT.xpm_timer_block.address);
}
@@ -553,9 +558,7 @@ static void __cpuidle acpi_idle_do_entry
} else if (cx->entry_method == ACPI_CSTATE_HALT) {
acpi_safe_halt();
} else {
- /* IO port based C-state */
- inb(cx->address);
- wait_for_freeze();
+ io_idle(cx->address);
}
perf_lopwr_cb(false);
@@ -577,8 +580,7 @@ static int acpi_idle_play_dead(struct cp
if (cx->entry_method == ACPI_CSTATE_HALT)
safe_halt();
else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
- inb(cx->address);
- wait_for_freeze();
+ io_idle(cx->address);
} else
return -ENODEV;
The __cpuidle functions will become a noinstr class, as such they need
explicit annotations.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki at intel.com>
---
drivers/cpuidle/poll_state.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -13,7 +13,10 @@
static int __cpuidle poll_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
- u64 time_start = local_clock();
+ u64 time_start;
+
+ instrumentation_begin();
+ time_start = local_clock();
dev->poll_time_limit = false;
@@ -39,6 +42,7 @@ static int __cpuidle poll_idle(struct cp
raw_local_irq_disable();
current_clr_polling();
+ instrumentation_end();
return index;
}
Peter Zijlstra
2022-Sep-19 09:59 UTC
[PATCH v2 17/44] objtool/idle: Validate __cpuidle code as noinstr
Idle code is very like entry code in that RCU isn't available. As
such, add a little validation.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
Acked-by: Geert Uytterhoeven <geert at linux-m68k.org>
---
arch/alpha/kernel/vmlinux.lds.S | 1 -
arch/arc/kernel/vmlinux.lds.S | 1 -
arch/arm/include/asm/vmlinux.lds.h | 1 -
arch/arm64/kernel/vmlinux.lds.S | 1 -
arch/csky/kernel/vmlinux.lds.S | 1 -
arch/hexagon/kernel/vmlinux.lds.S | 1 -
arch/ia64/kernel/vmlinux.lds.S | 1 -
arch/loongarch/kernel/vmlinux.lds.S | 1 -
arch/m68k/kernel/vmlinux-nommu.lds | 1 -
arch/m68k/kernel/vmlinux-std.lds | 1 -
arch/m68k/kernel/vmlinux-sun3.lds | 1 -
arch/microblaze/kernel/vmlinux.lds.S | 1 -
arch/mips/kernel/vmlinux.lds.S | 1 -
arch/nios2/kernel/vmlinux.lds.S | 1 -
arch/openrisc/kernel/vmlinux.lds.S | 1 -
arch/parisc/kernel/vmlinux.lds.S | 1 -
arch/powerpc/kernel/vmlinux.lds.S | 1 -
arch/riscv/kernel/vmlinux-xip.lds.S | 1 -
arch/riscv/kernel/vmlinux.lds.S | 1 -
arch/s390/kernel/vmlinux.lds.S | 1 -
arch/sh/kernel/vmlinux.lds.S | 1 -
arch/sparc/kernel/vmlinux.lds.S | 1 -
arch/um/kernel/dyn.lds.S | 1 -
arch/um/kernel/uml.lds.S | 1 -
arch/x86/include/asm/irqflags.h | 11 ++++-------
arch/x86/include/asm/mwait.h | 2 +-
arch/x86/kernel/vmlinux.lds.S | 1 -
arch/xtensa/kernel/vmlinux.lds.S | 1 -
include/asm-generic/vmlinux.lds.h | 9 +++------
include/linux/compiler_types.h | 8 ++++++--
include/linux/cpu.h | 3 ---
tools/objtool/check.c | 13 +++++++++++++
32 files changed, 27 insertions(+), 45 deletions(-)
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -27,7 +27,6 @@ SECTIONS
HEAD_TEXT
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
*(.fixup)
*(.gnu.warning)
--- a/arch/arc/kernel/vmlinux.lds.S
+++ b/arch/arc/kernel/vmlinux.lds.S
@@ -85,7 +85,6 @@ SECTIONS
_stext = .;
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
--- a/arch/arm/include/asm/vmlinux.lds.h
+++ b/arch/arm/include/asm/vmlinux.lds.h
@@ -96,7 +96,6 @@
SOFTIRQENTRY_TEXT \
TEXT_TEXT \
SCHED_TEXT \
- CPUIDLE_TEXT \
LOCK_TEXT \
KPROBES_TEXT \
ARM_STUBS_TEXT \
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -163,7 +163,6 @@ SECTIONS
ENTRY_TEXT
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
HYPERVISOR_TEXT
--- a/arch/csky/kernel/vmlinux.lds.S
+++ b/arch/csky/kernel/vmlinux.lds.S
@@ -38,7 +38,6 @@ SECTIONS
SOFTIRQENTRY_TEXT
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
*(.fixup)
--- a/arch/hexagon/kernel/vmlinux.lds.S
+++ b/arch/hexagon/kernel/vmlinux.lds.S
@@ -41,7 +41,6 @@ SECTIONS
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
*(.fixup)
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -51,7 +51,6 @@ SECTIONS {
__end_ivt_text = .;
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
--- a/arch/loongarch/kernel/vmlinux.lds.S
+++ b/arch/loongarch/kernel/vmlinux.lds.S
@@ -41,7 +41,6 @@ SECTIONS
.text : {
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
--- a/arch/m68k/kernel/vmlinux-nommu.lds
+++ b/arch/m68k/kernel/vmlinux-nommu.lds
@@ -48,7 +48,6 @@ SECTIONS {
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
*(.fixup)
. = ALIGN(16);
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -19,7 +19,6 @@ SECTIONS
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
*(.fixup)
*(.gnu.warning)
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -19,7 +19,6 @@ SECTIONS
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
*(.fixup)
*(.gnu.warning)
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -36,7 +36,6 @@ SECTIONS {
EXIT_TEXT
EXIT_CALL
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -61,7 +61,6 @@ SECTIONS
.text : {
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
--- a/arch/nios2/kernel/vmlinux.lds.S
+++ b/arch/nios2/kernel/vmlinux.lds.S
@@ -24,7 +24,6 @@ SECTIONS
.text : {
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
--- a/arch/openrisc/kernel/vmlinux.lds.S
+++ b/arch/openrisc/kernel/vmlinux.lds.S
@@ -52,7 +52,6 @@ SECTIONS
_stext = .;
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -86,7 +86,6 @@ SECTIONS
TEXT_TEXT
LOCK_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -107,7 +107,6 @@ SECTIONS
#endif
NOINSTR_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
--- a/arch/riscv/kernel/vmlinux-xip.lds.S
+++ b/arch/riscv/kernel/vmlinux-xip.lds.S
@@ -39,7 +39,6 @@ SECTIONS
_stext = .;
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
ENTRY_TEXT
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -42,7 +42,6 @@ SECTIONS
_stext = .;
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
ENTRY_TEXT
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -42,7 +42,6 @@ SECTIONS
HEAD_TEXT
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -29,7 +29,6 @@ SECTIONS
HEAD_TEXT
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -50,7 +50,6 @@ SECTIONS
HEAD_TEXT
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -74,7 +74,6 @@ SECTIONS
_stext = .;
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -35,7 +35,6 @@ SECTIONS
_stext = .;
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -8,9 +8,6 @@
#include <asm/nospec-branch.h>
-/* Provide __cpuidle; we can't safely include <linux/cpu.h> */
-#define __cpuidle __section(".cpuidle.text")
-
/*
* Interrupt control:
*/
@@ -45,13 +42,13 @@ static __always_inline void native_irq_e
asm volatile("sti": : :"memory");
}
-static inline __cpuidle void native_safe_halt(void)
+static __always_inline void native_safe_halt(void)
{
mds_idle_clear_cpu_buffers();
asm volatile("sti; hlt": : :"memory");
}
-static inline __cpuidle void native_halt(void)
+static __always_inline void native_halt(void)
{
mds_idle_clear_cpu_buffers();
asm volatile("hlt": : :"memory");
@@ -84,7 +81,7 @@ static __always_inline void arch_local_i
* Used in the idle loop; sti takes one instruction cycle
* to complete:
*/
-static inline __cpuidle void arch_safe_halt(void)
+static __always_inline void arch_safe_halt(void)
{
native_safe_halt();
}
@@ -93,7 +90,7 @@ static inline __cpuidle void arch_safe_h
* Used when interrupts are already enabled or to
* shutdown the processor:
*/
-static inline __cpuidle void halt(void)
+static __always_inline void halt(void)
{
native_halt();
}
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -104,7 +104,7 @@ static inline void __sti_mwait(unsigned
* New with Core Duo processors, MWAIT can take some hints based on CPU
* capability.
*/
-static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned
long ecx)
{
if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) {
if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) {
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -129,7 +129,6 @@ SECTIONS
HEAD_TEXT
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
ALIGN_ENTRY_TEXT_BEGIN
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -125,7 +125,6 @@ SECTIONS
ENTRY_TEXT
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
*(.fixup)
}
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -559,6 +559,9 @@
ALIGN_FUNCTION(); \
__noinstr_text_start = .; \
*(.noinstr.text) \
+ __cpuidle_text_start = .; \
+ *(.cpuidle.text) \
+ __cpuidle_text_end = .; \
__noinstr_text_end = .;
/*
@@ -600,12 +603,6 @@
*(.spinlock.text) \
__lock_text_end = .;
-#define CPUIDLE_TEXT \
- ALIGN_FUNCTION(); \
- __cpuidle_text_start = .; \
- *(.cpuidle.text) \
- __cpuidle_text_end = .;
-
#define KPROBES_TEXT \
ALIGN_FUNCTION(); \
__kprobes_text_start = .; \
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -227,10 +227,14 @@ struct ftrace_likely_data {
#endif
/* Section for code which can't be instrumented at all */
-#define noinstr \
- noinline notrace __attribute((__section__(".noinstr.text"))) \
+#define __noinstr_section(section) \
+ noinline notrace __attribute((__section__(section))) \
__no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage
+#define noinstr __noinstr_section(".noinstr.text")
+
+#define __cpuidle __noinstr_section(".cpuidle.text")
+
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -176,9 +176,6 @@ void __noreturn cpu_startup_entry(enum c
void cpu_idle_poll_ctrl(bool enable);
-/* Attach to any functions which should be considered cpuidle. */
-#define __cpuidle __section(".cpuidle.text")
-
bool cpu_in_idle(unsigned long pc);
void arch_cpu_idle(void);
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -377,6 +377,7 @@ static int decode_instructions(struct ob
if (!strcmp(sec->name, ".noinstr.text") ||
!strcmp(sec->name, ".entry.text") ||
+ !strcmp(sec->name, ".cpuidle.text") ||
!strncmp(sec->name, ".text.__x86.", 12))
sec->noinstr = true;
@@ -3187,6 +3188,12 @@ static inline bool noinstr_call_dest(str
return true;
/*
+ * If the symbol is a static_call trampoline, we can't tell.
+ */
+ if (func->static_call_tramp)
+ return true;
+
+ /*
* The __ubsan_handle_*() calls are like WARN(), they only happen when
* something 'BAD' happened. At the risk of taking the machine down,
* let them proceed to get the message out.
@@ -3932,6 +3939,12 @@ static int validate_noinstr_sections(str
if (sec) {
warnings += validate_section(file, sec);
warnings += validate_unwind_hints(file, sec);
+ }
+
+ sec = find_section_by_name(file->elf, ".cpuidle.text");
+ if (sec) {
+ warnings += validate_section(file, sec);
+ warnings += validate_unwind_hints(file, sec);
}
return warnings;
Peter Zijlstra
2022-Sep-19 09:59 UTC
[PATCH v2 18/44] cpuidle, intel_idle: Fix CPUIDLE_FLAG_IRQ_ENABLE *again*
vmlinux.o: warning: objtool: intel_idle_irq+0x10c: call to trace_hardirqs_off()
leaves .noinstr.text section
As per commit 32d4fd5751ea ("cpuidle,intel_idle: Fix
CPUIDLE_FLAG_IRQ_ENABLE"):
"must not have tracing in idle functions"
Clearly people can't read and tinker along until splat dissapears.
This straight up reverts commit d295ad34f236 ("intel_idle: Fix false
positive RCU splats due to incorrect hardirqs state").
It doesn't re-introduce the problem because preceding patches fixed it
properly.
Fixes: d295ad34f236 ("intel_idle: Fix false positive RCU splats due to
incorrect hardirqs state")
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
drivers/idle/intel_idle.c | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -168,13 +168,7 @@ static __cpuidle int intel_idle_irq(stru
raw_local_irq_enable();
ret = __intel_idle(dev, drv, index);
-
- /*
- * The lockdep hardirqs state may be changed to 'on' with timer
- * tick interrupt followed by __do_softirq(). Use local_irq_disable()
- * to keep the hardirqs state correct.
- */
- local_irq_disable();
+ raw_local_irq_disable();
return ret;
}
Peter Zijlstra
2022-Sep-19 09:59 UTC
[PATCH v2 19/44] cpuidle,intel_idle: Fix CPUIDLE_FLAG_INIT_XSTATE
vmlinux.o: warning: objtool: intel_idle_s2idle+0xd5: call to fpu_idle_fpregs()
leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle_xstate+0x11: call to fpu_idle_fpregs()
leaves .noinstr.text section
vmlinux.o: warning: objtool: fpu_idle_fpregs+0x9: call to xfeatures_in_use()
leaves .noinstr.text section
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
arch/x86/include/asm/fpu/xcr.h | 4 ++--
arch/x86/include/asm/special_insns.h | 2 +-
arch/x86/kernel/fpu/core.c | 4 ++--
3 files changed, 5 insertions(+), 5 deletions(-)
--- a/arch/x86/include/asm/fpu/xcr.h
+++ b/arch/x86/include/asm/fpu/xcr.h
@@ -5,7 +5,7 @@
#define XCR_XFEATURE_ENABLED_MASK 0x00000000
#define XCR_XFEATURE_IN_USE_MASK 0x00000001
-static inline u64 xgetbv(u32 index)
+static __always_inline u64 xgetbv(u32 index)
{
u32 eax, edx;
@@ -27,7 +27,7 @@ static inline void xsetbv(u32 index, u64
*
* Callers should check X86_FEATURE_XGETBV1.
*/
-static inline u64 xfeatures_in_use(void)
+static __always_inline u64 xfeatures_in_use(void)
{
return xgetbv(XCR_XFEATURE_IN_USE_MASK);
}
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -295,7 +295,7 @@ static inline int enqcmds(void __iomem *
return 0;
}
-static inline void tile_release(void)
+static __always_inline void tile_release(void)
{
/*
* Instruction opcode for TILERELEASE; supported in binutils
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -856,12 +856,12 @@ int fpu__exception_code(struct fpu *fpu,
* Initialize register state that may prevent from entering low-power idle.
* This function will be invoked from the cpuidle driver only when needed.
*/
-void fpu_idle_fpregs(void)
+noinstr void fpu_idle_fpregs(void)
{
/* Note: AMX_TILE being enabled implies XGETBV1 support */
if (cpu_feature_enabled(X86_FEATURE_AMX_TILE) &&
(xfeatures_in_use() & XFEATURE_MASK_XTILE)) {
tile_release();
- fpregs_deactivate(¤t->thread.fpu);
+ __this_cpu_write(fpu_fpregs_owner_ctx, NULL);
}
}
Peter Zijlstra
2022-Sep-19 09:59 UTC
[PATCH v2 20/44] cpuidle,intel_idle: Fix CPUIDLE_FLAG_IBRS
vmlinux.o: warning: objtool: intel_idle_ibrs+0x17: call to spec_ctrl_current()
leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle_ibrs+0x27: call to wrmsrl.constprop.0()
leaves .noinstr.text section
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
arch/x86/kernel/cpu/bugs.c | 2 +-
drivers/idle/intel_idle.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -79,7 +79,7 @@ void write_spec_ctrl_current(u64 val, bo
wrmsrl(MSR_IA32_SPEC_CTRL, val);
}
-u64 spec_ctrl_current(void)
+noinstr u64 spec_ctrl_current(void)
{
return this_cpu_read(x86_spec_ctrl_current);
}
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -181,12 +181,12 @@ static __cpuidle int intel_idle_ibrs(str
int ret;
if (smt_active)
- wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
ret = __intel_idle(dev, drv, index);
if (smt_active)
- wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
return ret;
}
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 21/44] arch/idle: Change arch_cpu_idle() IRQ behaviour
Current arch_cpu_idle() is called with IRQs disabled, but will return
with IRQs enabled.
However, the very first thing the generic code does after calling
arch_cpu_idle() is raw_local_irq_disable(). This means that
architectures that can idle with IRQs disabled end up doing a
pointless 'enable-disable' dance.
Therefore, push this IRQ disabling into the idle function, meaning
that those architectures can avoid the pointless IRQ state flipping.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy at amd.com>
Acked-by: Mark Rutland <mark.rutland at arm.com> [arm64]
Acked-by: Rafael J. Wysocki <rafael.j.wysocki at intel.com>
---
arch/alpha/kernel/process.c | 1 -
arch/arc/kernel/process.c | 3 +++
arch/arm/kernel/process.c | 1 -
arch/arm/mach-gemini/board-dt.c | 3 ++-
arch/arm64/kernel/idle.c | 1 -
arch/csky/kernel/process.c | 1 -
arch/csky/kernel/smp.c | 2 +-
arch/hexagon/kernel/process.c | 1 -
arch/ia64/kernel/process.c | 1 +
arch/loongarch/kernel/idle.c | 1 +
arch/microblaze/kernel/process.c | 1 -
arch/mips/kernel/idle.c | 8 +++-----
arch/nios2/kernel/process.c | 1 -
arch/openrisc/kernel/process.c | 1 +
arch/parisc/kernel/process.c | 2 --
arch/powerpc/kernel/idle.c | 5 ++---
arch/riscv/kernel/process.c | 1 -
arch/s390/kernel/idle.c | 1 -
arch/sh/kernel/idle.c | 1 +
arch/sparc/kernel/leon_pmc.c | 4 ++++
arch/sparc/kernel/process_32.c | 1 -
arch/sparc/kernel/process_64.c | 3 ++-
arch/um/kernel/process.c | 1 -
arch/x86/coco/tdx/tdx.c | 3 +++
arch/x86/kernel/process.c | 15 ++++-----------
arch/xtensa/kernel/process.c | 1 +
kernel/sched/idle.c | 2 --
27 files changed, 29 insertions(+), 37 deletions(-)
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -57,7 +57,6 @@ EXPORT_SYMBOL(pm_power_off);
void arch_cpu_idle(void)
{
wtint(0);
- raw_local_irq_enable();
}
void arch_cpu_idle_dead(void)
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -114,6 +114,8 @@ void arch_cpu_idle(void)
"sleep %0 \n"
:
:"I"(arg)); /* can't be "r" has to be embedded const
*/
+
+ raw_local_irq_disable();
}
#else /* ARC700 */
@@ -122,6 +124,7 @@ void arch_cpu_idle(void)
{
/* sleep, but enable both set E1/E2 (levels of interrupts) before committing
*/
__asm__ __volatile__("sleep 0x3 \n");
+ raw_local_irq_disable();
}
#endif
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -78,7 +78,6 @@ void arch_cpu_idle(void)
arm_pm_idle();
else
cpu_do_idle();
- raw_local_irq_enable();
}
void arch_cpu_idle_prepare(void)
--- a/arch/arm/mach-gemini/board-dt.c
+++ b/arch/arm/mach-gemini/board-dt.c
@@ -42,8 +42,9 @@ static void gemini_idle(void)
*/
/* FIXME: Enabling interrupts here is racy! */
- local_irq_enable();
+ raw_local_irq_enable();
cpu_do_idle();
+ raw_local_irq_disable();
}
static void __init gemini_init_machine(void)
--- a/arch/arm64/kernel/idle.c
+++ b/arch/arm64/kernel/idle.c
@@ -42,5 +42,4 @@ void noinstr arch_cpu_idle(void)
* tricks
*/
cpu_do_idle();
- raw_local_irq_enable();
}
--- a/arch/csky/kernel/process.c
+++ b/arch/csky/kernel/process.c
@@ -100,6 +100,5 @@ void arch_cpu_idle(void)
#ifdef CONFIG_CPU_PM_STOP
asm volatile("stop\n");
#endif
- raw_local_irq_enable();
}
#endif
--- a/arch/csky/kernel/smp.c
+++ b/arch/csky/kernel/smp.c
@@ -309,7 +309,7 @@ void arch_cpu_idle_dead(void)
while (!secondary_stack)
arch_cpu_idle();
- local_irq_disable();
+ raw_local_irq_disable();
asm volatile(
"mov sp, %0\n"
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -44,7 +44,6 @@ void arch_cpu_idle(void)
{
__vmwait();
/* interrupts wake us up, but irqs are still disabled */
- raw_local_irq_enable();
}
/*
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -242,6 +242,7 @@ void arch_cpu_idle(void)
(*mark_idle)(1);
raw_safe_halt();
+ raw_local_irq_disable();
if (mark_idle)
(*mark_idle)(0);
--- a/arch/loongarch/kernel/idle.c
+++ b/arch/loongarch/kernel/idle.c
@@ -13,4 +13,5 @@ void __cpuidle arch_cpu_idle(void)
{
raw_local_irq_enable();
__arch_cpu_idle(); /* idle instruction needs irq enabled */
+ raw_local_irq_disable();
}
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -140,5 +140,4 @@ int dump_fpu(struct pt_regs *regs, elf_f
void arch_cpu_idle(void)
{
- raw_local_irq_enable();
}
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -33,13 +33,13 @@ static void __cpuidle r3081_wait(void)
{
unsigned long cfg = read_c0_conf();
write_c0_conf(cfg | R30XX_CONF_HALT);
- raw_local_irq_enable();
}
void __cpuidle r4k_wait(void)
{
raw_local_irq_enable();
__r4k_wait();
+ raw_local_irq_disable();
}
/*
@@ -57,7 +57,6 @@ void __cpuidle r4k_wait_irqoff(void)
" .set arch=r4000 \n"
" wait \n"
" .set pop \n");
- raw_local_irq_enable();
}
/*
@@ -77,7 +76,6 @@ static void __cpuidle rm7k_wait_irqoff(v
" wait \n"
" mtc0 $1, $12 # stalls until W stage \n"
" .set pop \n");
- raw_local_irq_enable();
}
/*
@@ -103,6 +101,8 @@ static void __cpuidle au1k_wait(void)
" nop \n"
" .set pop \n"
: : "r" (au1k_wait), "r" (c0status));
+
+ raw_local_irq_disable();
}
static int __initdata nowait;
@@ -245,8 +245,6 @@ void arch_cpu_idle(void)
{
if (cpu_wait)
cpu_wait();
- else
- raw_local_irq_enable();
}
#ifdef CONFIG_CPU_IDLE
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -33,7 +33,6 @@ EXPORT_SYMBOL(pm_power_off);
void arch_cpu_idle(void)
{
- raw_local_irq_enable();
}
/*
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -102,6 +102,7 @@ void arch_cpu_idle(void)
raw_local_irq_enable();
if (mfspr(SPR_UPR) & SPR_UPR_PMP)
mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
+ raw_local_irq_disable();
}
void (*pm_power_off)(void) = NULL;
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -187,8 +187,6 @@ void arch_cpu_idle_dead(void)
void __cpuidle arch_cpu_idle(void)
{
- raw_local_irq_enable();
-
/* nop on real hardware, qemu will idle sleep. */
asm volatile("or %%r10,%%r10,%%r10\n":::);
}
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -51,10 +51,9 @@ void arch_cpu_idle(void)
* Some power_save functions return with
* interrupts enabled, some don't.
*/
- if (irqs_disabled())
- raw_local_irq_enable();
+ if (!irqs_disabled())
+ raw_local_irq_disable();
} else {
- raw_local_irq_enable();
/*
* Go into low thread priority and possibly
* low power mode.
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -39,7 +39,6 @@ extern asmlinkage void ret_from_kernel_t
void arch_cpu_idle(void)
{
cpu_do_idle();
- raw_local_irq_enable();
}
void __show_regs(struct pt_regs *regs)
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -66,7 +66,6 @@ void arch_cpu_idle(void)
idle->idle_count++;
account_idle_time(cputime_to_nsecs(idle_time));
raw_write_seqcount_end(&idle->seqcount);
- raw_local_irq_enable();
}
static ssize_t show_idle_count(struct device *dev,
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -25,6 +25,7 @@ void default_idle(void)
raw_local_irq_enable();
/* Isn't this racy ? */
cpu_sleep();
+ raw_local_irq_disable();
clear_bl_bit();
}
--- a/arch/sparc/kernel/leon_pmc.c
+++ b/arch/sparc/kernel/leon_pmc.c
@@ -57,6 +57,8 @@ static void pmc_leon_idle_fixup(void)
"lda [%0] %1, %%g0\n"
:
: "r"(address), "i"(ASI_LEON_BYPASS));
+
+ raw_local_irq_disable();
}
/*
@@ -70,6 +72,8 @@ static void pmc_leon_idle(void)
/* For systems without power-down, this will be no-op */
__asm__ __volatile__ ("wr %g0, %asr19\n\t");
+
+ raw_local_irq_disable();
}
/* Install LEON Power Down function */
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -71,7 +71,6 @@ void arch_cpu_idle(void)
{
if (sparc_idle)
(*sparc_idle)();
- raw_local_irq_enable();
}
/* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -59,7 +59,6 @@ void arch_cpu_idle(void)
{
if (tlb_type != hypervisor) {
touch_nmi_watchdog();
- raw_local_irq_enable();
} else {
unsigned long pstate;
@@ -90,6 +89,8 @@ void arch_cpu_idle(void)
"wrpr %0, %%g0, %%pstate"
: "=&r" (pstate)
: "i" (PSTATE_IE));
+
+ raw_local_irq_disable();
}
}
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -217,7 +217,6 @@ void arch_cpu_idle(void)
{
cpu_tasks[current_thread_info()->cpu].pid = os_getpid();
um_idle_sleep();
- raw_local_irq_enable();
}
int __cant_sleep(void) {
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -223,6 +223,9 @@ void __cpuidle tdx_safe_halt(void)
*/
if (__halt(irq_disabled, do_sti))
WARN_ONCE(1, "HLT instruction emulation failed\n");
+
+ /* XXX I can't make sense of what @do_sti actually does */
+ raw_local_irq_disable();
}
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -701,6 +701,7 @@ EXPORT_SYMBOL(boot_option_idle_override)
void __cpuidle default_idle(void)
{
raw_safe_halt();
+ raw_local_irq_disable();
}
#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
EXPORT_SYMBOL(default_idle);
@@ -806,13 +807,7 @@ static void amd_e400_idle(void)
default_idle();
- /*
- * The switch back from broadcast mode needs to be called with
- * interrupts disabled.
- */
- raw_local_irq_disable();
tick_broadcast_exit();
- raw_local_irq_enable();
}
/*
@@ -870,12 +865,10 @@ static __cpuidle void mwait_idle(void)
}
__monitor((void *)¤t_thread_info()->flags, 0, 0);
- if (!need_resched())
+ if (!need_resched()) {
__sti_mwait(0, 0);
- else
- raw_local_irq_enable();
- } else {
- raw_local_irq_enable();
+ raw_local_irq_disable();
+ }
}
__current_clr_polling();
}
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -183,6 +183,7 @@ void coprocessor_flush_release_all(struc
void arch_cpu_idle(void)
{
platform_idle();
+ raw_local_irq_disable();
}
/*
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -79,7 +79,6 @@ void __weak arch_cpu_idle_dead(void) { }
void __weak arch_cpu_idle(void)
{
cpu_idle_force_poll = 1;
- raw_local_irq_enable();
}
/**
@@ -96,7 +95,6 @@ void __cpuidle default_idle_call(void)
ct_cpuidle_enter();
arch_cpu_idle();
- raw_local_irq_disable();
ct_cpuidle_exit();
start_critical_timings();
Now that arch_cpu_idle() is expected to return with IRQs disabled,
avoid the useless STI/CLI dance.
Per the specs this is supposed to work, but nobody has yet relied up
this behaviour so broken implementations are possible.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
arch/x86/coco/tdx/tdcall.S | 13 -------------
arch/x86/coco/tdx/tdx.c | 23 ++++-------------------
arch/x86/include/asm/shared/tdx.h | 1 -
3 files changed, 4 insertions(+), 33 deletions(-)
--- a/arch/x86/coco/tdx/tdcall.S
+++ b/arch/x86/coco/tdx/tdcall.S
@@ -139,19 +139,6 @@ SYM_FUNC_START(__tdx_hypercall)
movl $TDVMCALL_EXPOSE_REGS_MASK, %ecx
- /*
- * For the idle loop STI needs to be called directly before the TDCALL
- * that enters idle (EXIT_REASON_HLT case). STI instruction enables
- * interrupts only one instruction later. If there is a window between
- * STI and the instruction that emulates the HALT state, there is a
- * chance for interrupts to happen in this window, which can delay the
- * HLT operation indefinitely. Since this is the not the desired
- * result, conditionally call STI before TDCALL.
- */
- testq $TDX_HCALL_ISSUE_STI, %rsi
- jz .Lskip_sti
- sti
-.Lskip_sti:
tdcall
/*
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -169,7 +169,7 @@ static int ve_instr_len(struct ve_info *
}
}
-static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
+static u64 __cpuidle __halt(const bool irq_disabled)
{
struct tdx_hypercall_args args = {
.r10 = TDX_HYPERCALL_STANDARD,
@@ -189,20 +189,14 @@ static u64 __cpuidle __halt(const bool i
* can keep the vCPU in virtual HLT, even if an IRQ is
* pending, without hanging/breaking the guest.
*/
- return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0);
+ return __tdx_hypercall(&args, 0);
}
static int handle_halt(struct ve_info *ve)
{
- /*
- * Since non safe halt is mainly used in CPU offlining
- * and the guest will always stay in the halt state, don't
- * call the STI instruction (set do_sti as false).
- */
const bool irq_disabled = irqs_disabled();
- const bool do_sti = false;
- if (__halt(irq_disabled, do_sti))
+ if (__halt(irq_disabled))
return -EIO;
return ve_instr_len(ve);
@@ -210,22 +204,13 @@ static int handle_halt(struct ve_info *v
void __cpuidle tdx_safe_halt(void)
{
- /*
- * For do_sti=true case, __tdx_hypercall() function enables
- * interrupts using the STI instruction before the TDCALL. So
- * set irq_disabled as false.
- */
const bool irq_disabled = false;
- const bool do_sti = true;
/*
* Use WARN_ONCE() to report the failure.
*/
- if (__halt(irq_disabled, do_sti))
+ if (__halt(irq_disabled))
WARN_ONCE(1, "HLT instruction emulation failed\n");
-
- /* XXX I can't make sense of what @do_sti actually does */
- raw_local_irq_disable();
}
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -8,7 +8,6 @@
#define TDX_HYPERCALL_STANDARD 0
#define TDX_HCALL_HAS_OUTPUT BIT(0)
-#define TDX_HCALL_ISSUE_STI BIT(1)
#define TDX_CPUID_LEAF_ID 0x21
#define TDX_IDENT "IntelTDX "
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 23/44] arm,smp: Remove trace_.*_rcuidle() usage
None of these functions should ever be ran with RCU disabled anymore.
Specifically, do_handle_IPI() is only called from handle_IPI() which
explicitly does irq_enter()/irq_exit() which ensures RCU is watching.
The problem with smp_cross_call() was, per commit 7c64cc0531fa ("arm: Use
_rcuidle for smp_cross_call() tracepoints"), that
cpuidle_enter_state_coupled() already had RCU disabled, but that's
long been fixed by commit 1098582a0f6c ("sched,idle,rcu: Push rcu_idle
deeper into the idle path").
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
arch/arm/kernel/smp.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -639,7 +639,7 @@ static void do_handle_IPI(int ipinr)
unsigned int cpu = smp_processor_id();
if ((unsigned)ipinr < NR_IPI)
- trace_ipi_entry_rcuidle(ipi_types[ipinr]);
+ trace_ipi_entry(ipi_types[ipinr]);
switch (ipinr) {
case IPI_WAKEUP:
@@ -686,7 +686,7 @@ static void do_handle_IPI(int ipinr)
}
if ((unsigned)ipinr < NR_IPI)
- trace_ipi_exit_rcuidle(ipi_types[ipinr]);
+ trace_ipi_exit(ipi_types[ipinr]);
}
/* Legacy version, should go away once all irqchips have been converted */
@@ -709,7 +709,7 @@ static irqreturn_t ipi_handler(int irq,
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
{
- trace_ipi_raise_rcuidle(target, ipi_types[ipinr]);
+ trace_ipi_raise(target, ipi_types[ipinr]);
__ipi_send_mask(ipi_desc[ipinr], target);
}
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 24/44] arm64,smp: Remove trace_.*_rcuidle() usage
Ever since commit d3afc7f12987 ("arm64: Allow IPIs to be handled as
normal interrupts") this function is called in regular IRQ context.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
Acked-by: Mark Rutland <mark.rutland at arm.com>
Acked-by: Marc Zyngier <maz at kernel.org>
---
arch/arm64/kernel/smp.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -865,7 +865,7 @@ static void do_handle_IPI(int ipinr)
unsigned int cpu = smp_processor_id();
if ((unsigned)ipinr < NR_IPI)
- trace_ipi_entry_rcuidle(ipi_types[ipinr]);
+ trace_ipi_entry(ipi_types[ipinr]);
switch (ipinr) {
case IPI_RESCHEDULE:
@@ -914,7 +914,7 @@ static void do_handle_IPI(int ipinr)
}
if ((unsigned)ipinr < NR_IPI)
- trace_ipi_exit_rcuidle(ipi_types[ipinr]);
+ trace_ipi_exit(ipi_types[ipinr]);
}
static irqreturn_t ipi_handler(int irq, void *data)
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 25/44] printk: Remove trace_.*_rcuidle() usage
The problem, per commit fc98c3c8c9dc ("printk: use rcuidle console
tracepoint"), was printk usage from the cpuidle path where RCU was
already disabled.
Per the patches earlier in this series, this is no longer the case.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
Reviewed-by: Sergey Senozhatsky <senozhatsky at chromium.org>
Acked-by: Petr Mladek <pmladek at suse.com>
---
kernel/printk/printk.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2238,7 +2238,7 @@ static u16 printk_sprint(char *text, u16
}
}
- trace_console_rcuidle(text, text_len);
+ trace_console(text, text_len);
return text_len;
}
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 26/44] time/tick-broadcast: Remove RCU_NONIDLE usage
No callers left that have already disabled RCU.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
Acked-by: Mark Rutland <mark.rutland at arm.com>
---
kernel/time/tick-broadcast-hrtimer.c | 29 ++++++++++++-----------------
1 file changed, 12 insertions(+), 17 deletions(-)
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -56,25 +56,20 @@ static int bc_set_next(ktime_t expires,
* hrtimer callback function is currently running, then
* hrtimer_start() cannot move it and the timer stays on the CPU on
* which it is assigned at the moment.
+ */
+ hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED_HARD);
+ /*
+ * The core tick broadcast mode expects bc->bound_on to be set
+ * correctly to prevent a CPU which has the broadcast hrtimer
+ * armed from going deep idle.
*
- * As this can be called from idle code, the hrtimer_start()
- * invocation has to be wrapped with RCU_NONIDLE() as
- * hrtimer_start() can call into tracing.
+ * As tick_broadcast_lock is held, nothing can change the cpu
+ * base which was just established in hrtimer_start() above. So
+ * the below access is safe even without holding the hrtimer
+ * base lock.
*/
- RCU_NONIDLE( {
- hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED_HARD);
- /*
- * The core tick broadcast mode expects bc->bound_on to be set
- * correctly to prevent a CPU which has the broadcast hrtimer
- * armed from going deep idle.
- *
- * As tick_broadcast_lock is held, nothing can change the cpu
- * base which was just established in hrtimer_start() above. So
- * the below access is safe even without holding the hrtimer
- * base lock.
- */
- bc->bound_on = bctimer.base->cpu_base->cpu;
- } );
+ bc->bound_on = bctimer.base->cpu_base->cpu;
+
return 0;
}
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 27/44] cpuidle, sched: Remove annotations from TIF_{POLLING_NRFLAG, NEED_RESCHED}
vmlinux.o: warning: objtool: mwait_idle+0x5: call to
current_set_polling_and_test() leaves .noinstr.text section
vmlinux.o: warning: objtool: acpi_processor_ffh_cstate_enter+0xc5: call to
current_set_polling_and_test() leaves .noinstr.text section
vmlinux.o: warning: objtool: cpu_idle_poll.isra.0+0x73: call to
test_ti_thread_flag() leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle+0xbc: call to
current_set_polling_and_test() leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle_irq+0xea: call to
current_set_polling_and_test() leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle_s2idle+0xb4: call to
current_set_polling_and_test() leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle+0xa6: call to current_clr_polling()
leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle_irq+0xbf: call to current_clr_polling()
leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle_s2idle+0xa1: call to
current_clr_polling() leaves .noinstr.text section
vmlinux.o: warning: objtool: mwait_idle+0xe: call to __current_set_polling()
leaves .noinstr.text section
vmlinux.o: warning: objtool: acpi_processor_ffh_cstate_enter+0xc5: call to
__current_set_polling() leaves .noinstr.text section
vmlinux.o: warning: objtool: cpu_idle_poll.isra.0+0x73: call to
test_ti_thread_flag() leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle+0xbc: call to __current_set_polling()
leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle_irq+0xea: call to
__current_set_polling() leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle_s2idle+0xb4: call to
__current_set_polling() leaves .noinstr.text section
vmlinux.o: warning: objtool: cpu_idle_poll.isra.0+0x73: call to
test_ti_thread_flag() leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle_s2idle+0x73: call to
test_ti_thread_flag.constprop.0() leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle_irq+0x91: call to
test_ti_thread_flag.constprop.0() leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle+0x78: call to
test_ti_thread_flag.constprop.0() leaves .noinstr.text section
vmlinux.o: warning: objtool: acpi_safe_halt+0xf: call to
test_ti_thread_flag.constprop.0() leaves .noinstr.text section
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
include/linux/sched/idle.h | 40 ++++++++++++++++++++++++++++++----------
include/linux/thread_info.h | 18 +++++++++++++++++-
2 files changed, 47 insertions(+), 11 deletions(-)
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -23,12 +23,37 @@ static inline void wake_up_if_idle(int c
*/
#ifdef TIF_POLLING_NRFLAG
-static inline void __current_set_polling(void)
+#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_ATOMIC_H
+
+static __always_inline void __current_set_polling(void)
{
- set_thread_flag(TIF_POLLING_NRFLAG);
+ arch_set_bit(TIF_POLLING_NRFLAG,
+ (unsigned long *)(¤t_thread_info()->flags));
}
-static inline bool __must_check current_set_polling_and_test(void)
+static __always_inline void __current_clr_polling(void)
+{
+ arch_clear_bit(TIF_POLLING_NRFLAG,
+ (unsigned long *)(¤t_thread_info()->flags));
+}
+
+#else
+
+static __always_inline void __current_set_polling(void)
+{
+ set_bit(TIF_POLLING_NRFLAG,
+ (unsigned long *)(¤t_thread_info()->flags));
+}
+
+static __always_inline void __current_clr_polling(void)
+{
+ clear_bit(TIF_POLLING_NRFLAG,
+ (unsigned long *)(¤t_thread_info()->flags));
+}
+
+#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_ATOMIC_H */
+
+static __always_inline bool __must_check current_set_polling_and_test(void)
{
__current_set_polling();
@@ -41,12 +66,7 @@ static inline bool __must_check current_
return unlikely(tif_need_resched());
}
-static inline void __current_clr_polling(void)
-{
- clear_thread_flag(TIF_POLLING_NRFLAG);
-}
-
-static inline bool __must_check current_clr_polling_and_test(void)
+static __always_inline bool __must_check current_clr_polling_and_test(void)
{
__current_clr_polling();
@@ -73,7 +93,7 @@ static inline bool __must_check current_
}
#endif
-static inline void current_clr_polling(void)
+static __always_inline void current_clr_polling(void)
{
__current_clr_polling();
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -177,7 +177,23 @@ static __always_inline unsigned long rea
clear_ti_thread_flag(task_thread_info(t), TIF_##fl)
#endif /* !CONFIG_GENERIC_ENTRY */
-#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
+#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
+
+static __always_inline bool tif_need_resched(void)
+{
+ return arch_test_bit(TIF_NEED_RESCHED,
+ (unsigned long *)(¤t_thread_info()->flags));
+}
+
+#else
+
+static __always_inline bool tif_need_resched(void)
+{
+ return test_bit(TIF_NEED_RESCHED,
+ (unsigned long *)(¤t_thread_info()->flags));
+}
+
+#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
static inline int arch_within_stack_frames(const void * const stack,
vmlinux.o: warning: objtool: intel_idle_s2idle+0x6e: call to
__monitor.constprop.0() leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle_irq+0x8c: call to
__monitor.constprop.0() leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle+0x73: call to __monitor.constprop.0()
leaves .noinstr.text section
vmlinux.o: warning: objtool: mwait_idle+0x88: call to clflush() leaves
.noinstr.text section
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
arch/x86/include/asm/mwait.h | 12 ++++++------
arch/x86/include/asm/special_insns.h | 2 +-
2 files changed, 7 insertions(+), 7 deletions(-)
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -25,7 +25,7 @@
#define TPAUSE_C01_STATE 1
#define TPAUSE_C02_STATE 0
-static inline void __monitor(const void *eax, unsigned long ecx,
+static __always_inline void __monitor(const void *eax, unsigned long ecx,
unsigned long edx)
{
/* "monitor %eax, %ecx, %edx;" */
@@ -33,7 +33,7 @@ static inline void __monitor(const void
:: "a" (eax), "c" (ecx), "d"(edx));
}
-static inline void __monitorx(const void *eax, unsigned long ecx,
+static __always_inline void __monitorx(const void *eax, unsigned long ecx,
unsigned long edx)
{
/* "monitorx %eax, %ecx, %edx;" */
@@ -41,7 +41,7 @@ static inline void __monitorx(const void
:: "a" (eax), "c" (ecx), "d"(edx));
}
-static inline void __mwait(unsigned long eax, unsigned long ecx)
+static __always_inline void __mwait(unsigned long eax, unsigned long ecx)
{
mds_idle_clear_cpu_buffers();
@@ -76,8 +76,8 @@ static inline void __mwait(unsigned long
* EAX (logical) address to monitor
* ECX #GP if not zero
*/
-static inline void __mwaitx(unsigned long eax, unsigned long ebx,
- unsigned long ecx)
+static __always_inline void __mwaitx(unsigned long eax, unsigned long ebx,
+ unsigned long ecx)
{
/* No MDS buffer clear as this is AMD/HYGON only */
@@ -86,7 +86,7 @@ static inline void __mwaitx(unsigned lon
:: "a" (eax), "b" (ebx), "c" (ecx));
}
-static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx)
{
mds_idle_clear_cpu_buffers();
/* "mwait %eax, %ecx;" */
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -196,7 +196,7 @@ static inline void load_gs_index(unsigne
#endif /* CONFIG_PARAVIRT_XXL */
-static inline void clflush(volatile void *__p)
+static __always_inline void clflush(volatile void *__p)
{
asm volatile("clflush %0" : "+m" (*(volatile char __force
*)__p));
}
vmlinux.o: warning: objtool: __halt+0x2c: call to hcall_func.constprop.0()
leaves .noinstr.text section
vmlinux.o: warning: objtool: __halt+0x3f: call to __tdx_hypercall() leaves
.noinstr.text section
vmlinux.o: warning: objtool: __tdx_hypercall+0x66: call to
__tdx_hypercall_failed() leaves .noinstr.text section
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
arch/x86/boot/compressed/vmlinux.lds.S | 1 +
arch/x86/coco/tdx/tdcall.S | 2 ++
arch/x86/coco/tdx/tdx.c | 5 +++--
3 files changed, 6 insertions(+), 2 deletions(-)
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -34,6 +34,7 @@ SECTIONS
_text = .; /* Text */
*(.text)
*(.text.*)
+ *(.noinstr.text)
_etext = . ;
}
.rodata : {
--- a/arch/x86/coco/tdx/tdcall.S
+++ b/arch/x86/coco/tdx/tdcall.S
@@ -31,6 +31,8 @@
TDX_R12 | TDX_R13 | \
TDX_R14 | TDX_R15 )
+.section .noinstr.text, "ax"
+
/*
* __tdx_module_call() - Used by TDX guests to request services from
* the TDX module (does not include VMM services) using TDCALL instruction.
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -53,8 +53,9 @@ static inline u64 _tdx_hypercall(u64 fn,
}
/* Called from __tdx_hypercall() for unrecoverable failure */
-void __tdx_hypercall_failed(void)
+noinstr void __tdx_hypercall_failed(void)
{
+ instrumentation_begin();
panic("TDVMCALL failed. TDX module bug?");
}
@@ -64,7 +65,7 @@ void __tdx_hypercall_failed(void)
* Reusing the KVM EXIT_REASON macros makes it easier to connect the host and
* guest sides of these calls.
*/
-static u64 hcall_func(u64 exit_reason)
+static __always_inline u64 hcall_func(u64 exit_reason)
{
return exit_reason;
}
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 30/44] cpuidle,xenpv: Make more PARAVIRT_XXL noinstr clean
vmlinux.o: warning: objtool: acpi_idle_enter_s2idle+0xde: call to wbinvd()
leaves .noinstr.text section
vmlinux.o: warning: objtool: default_idle+0x4: call to arch_safe_halt() leaves
.noinstr.text section
vmlinux.o: warning: objtool: xen_safe_halt+0xa: call to
HYPERVISOR_sched_op.constprop.0() leaves .noinstr.text section
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
Reviewed-by: Srivatsa S. Bhat (VMware) <srivatsa at csail.mit.edu>
---
arch/x86/include/asm/paravirt.h | 6 ++++--
arch/x86/include/asm/special_insns.h | 4 ++--
arch/x86/include/asm/xen/hypercall.h | 2 +-
arch/x86/kernel/paravirt.c | 14 ++++++++++++--
arch/x86/xen/enlighten_pv.c | 2 +-
arch/x86/xen/irq.c | 2 +-
6 files changed, 21 insertions(+), 9 deletions(-)
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -168,7 +168,7 @@ static inline void __write_cr4(unsigned
PVOP_VCALL1(cpu.write_cr4, x);
}
-static inline void arch_safe_halt(void)
+static __always_inline void arch_safe_halt(void)
{
PVOP_VCALL0(irq.safe_halt);
}
@@ -178,7 +178,9 @@ static inline void halt(void)
PVOP_VCALL0(irq.halt);
}
-static inline void wbinvd(void)
+extern noinstr void pv_native_wbinvd(void);
+
+static __always_inline void wbinvd(void)
{
PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV));
}
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -115,7 +115,7 @@ static inline void wrpkru(u32 pkru)
}
#endif
-static inline void native_wbinvd(void)
+static __always_inline void native_wbinvd(void)
{
asm volatile("wbinvd": : :"memory");
}
@@ -179,7 +179,7 @@ static inline void __write_cr4(unsigned
native_write_cr4(x);
}
-static inline void wbinvd(void)
+static __always_inline void wbinvd(void)
{
native_wbinvd();
}
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -382,7 +382,7 @@ MULTI_stack_switch(struct multicall_entr
}
#endif
-static inline int
+static __always_inline int
HYPERVISOR_sched_op(int cmd, void *arg)
{
return _hypercall2(int, sched_op, cmd, arg);
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -233,6 +233,11 @@ static noinstr void pv_native_set_debugr
native_set_debugreg(regno, val);
}
+noinstr void pv_native_wbinvd(void)
+{
+ native_wbinvd();
+}
+
static noinstr void pv_native_irq_enable(void)
{
native_irq_enable();
@@ -242,6 +247,11 @@ static noinstr void pv_native_irq_disabl
{
native_irq_disable();
}
+
+static noinstr void pv_native_safe_halt(void)
+{
+ native_safe_halt();
+}
#endif
enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
@@ -273,7 +283,7 @@ struct paravirt_patch_template pv_ops .cpu.read_cr0 =
native_read_cr0,
.cpu.write_cr0 = native_write_cr0,
.cpu.write_cr4 = native_write_cr4,
- .cpu.wbinvd = native_wbinvd,
+ .cpu.wbinvd = pv_native_wbinvd,
.cpu.read_msr = native_read_msr,
.cpu.write_msr = native_write_msr,
.cpu.read_msr_safe = native_read_msr_safe,
@@ -307,7 +317,7 @@ struct paravirt_patch_template pv_ops .irq.save_fl =
__PV_IS_CALLEE_SAVE(native_save_fl),
.irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable),
.irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable),
- .irq.safe_halt = native_safe_halt,
+ .irq.safe_halt = pv_native_safe_halt,
.irq.halt = native_halt,
#endif /* CONFIG_PARAVIRT_XXL */
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1019,7 +1019,7 @@ static const typeof(pv_ops) xen_cpu_ops
.write_cr4 = xen_write_cr4,
- .wbinvd = native_wbinvd,
+ .wbinvd = pv_native_wbinvd,
.read_msr = xen_read_msr,
.write_msr = xen_write_msr,
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -24,7 +24,7 @@ noinstr void xen_force_evtchn_callback(v
(void)HYPERVISOR_xen_version(0, NULL);
}
-static void xen_safe_halt(void)
+static noinstr void xen_safe_halt(void)
{
/* Blocking includes an implicit local_irq_enable(). */
if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
vmlinux.o: warning: objtool: mwait_idle+0x47: call to
mds_idle_clear_cpu_buffers() leaves .noinstr.text section
vmlinux.o: warning: objtool: acpi_processor_ffh_cstate_enter+0xa2: call to
mds_idle_clear_cpu_buffers() leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle+0x91: call to
mds_idle_clear_cpu_buffers() leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle_s2idle+0x8c: call to
mds_idle_clear_cpu_buffers() leaves .noinstr.text section
vmlinux.o: warning: objtool: intel_idle_irq+0xaa: call to
mds_idle_clear_cpu_buffers() leaves .noinstr.text section
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
arch/x86/include/asm/nospec-branch.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -310,7 +310,7 @@ static __always_inline void mds_user_cle
*
* Clear CPU buffers if the corresponding static key is enabled
*/
-static inline void mds_idle_clear_cpu_buffers(void)
+static __always_inline void mds_idle_clear_cpu_buffers(void)
{
if (static_branch_likely(&mds_idle_clear))
mds_clear_cpu_buffers();
vmlinux.o: warning: objtool: io_idle+0xc: call to __inb.isra.0() leaves
.noinstr.text section
vmlinux.o: warning: objtool: acpi_idle_enter+0xfe: call to num_online_cpus()
leaves .noinstr.text section
vmlinux.o: warning: objtool: acpi_idle_enter+0x115: call to
acpi_idle_fallback_to_c1.isra.0() leaves .noinstr.text section
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki at intel.com>
---
arch/x86/include/asm/shared/io.h | 4 ++--
drivers/acpi/processor_idle.c | 2 +-
include/linux/cpumask.h | 4 ++--
3 files changed, 5 insertions(+), 5 deletions(-)
--- a/arch/x86/include/asm/shared/io.h
+++ b/arch/x86/include/asm/shared/io.h
@@ -5,13 +5,13 @@
#include <linux/types.h>
#define BUILDIO(bwl, bw, type) \
-static inline void __out##bwl(type value, u16 port) \
+static __always_inline void __out##bwl(type value, u16 port) \
{ \
asm volatile("out" #bwl " %" #bw "0, %w1" \
: : "a"(value), "Nd"(port)); \
} \
\
-static inline type __in##bwl(u16 port) \
+static __always_inline type __in##bwl(u16 port) \
{ \
type value; \
asm volatile("in" #bwl " %w1, %" #bw "0" \
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -593,7 +593,7 @@ static int acpi_idle_play_dead(struct cp
return 0;
}
-static bool acpi_idle_fallback_to_c1(struct acpi_processor *pr)
+static __always_inline bool acpi_idle_fallback_to_c1(struct acpi_processor *pr)
{
return IS_ENABLED(CONFIG_HOTPLUG_CPU) && !pr->flags.has_cst
&&
!(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED);
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -908,9 +908,9 @@ static inline const struct cpumask *get_
* concurrent CPU hotplug operations unless invoked from a cpuhp_lock held
* region.
*/
-static inline unsigned int num_online_cpus(void)
+static __always_inline unsigned int num_online_cpus(void)
{
- return atomic_read(&__num_online_cpus);
+ return arch_atomic_read(&__num_online_cpus);
}
#define num_possible_cpus() cpumask_weight(cpu_possible_mask)
#define num_present_cpus() cpumask_weight(cpu_present_mask)
CONFIG_GENERIC_ENTRY disallows any and all tracing when RCU isn't enabled. XXX if s390 (the only other GENERIC_ENTRY user as of this writing) isn't comfortable with this, we could switch to HAVE_NOINSTR_VALIDATION which is x86_64 only atm. Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org> --- include/linux/tracepoint.h | 13 ++++++++++++- kernel/trace/trace.c | 3 +++ 2 files changed, 15 insertions(+), 1 deletion(-) --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -178,6 +178,16 @@ static inline struct tracepoint *tracepo #endif /* CONFIG_HAVE_STATIC_CALL */ /* + * CONFIG_GENERIC_ENTRY archs are expected to have sanitized entry and idle + * code that disallow any/all tracing/instrumentation when RCU isn't watching. + */ +#ifdef CONFIG_GENERIC_ENTRY +#define RCUIDLE_COND(rcuidle) (rcuidle) +#else +#define RCUIDLE_COND(rcuidle) (rcuidle && in_nmi()) +#endif + +/* * it_func[0] is never NULL because there is at least one element in the array * when the array itself is non NULL. */ @@ -189,7 +199,8 @@ static inline struct tracepoint *tracepo return; \ \ /* srcu can't be used from NMI */ \ - WARN_ON_ONCE(rcuidle && in_nmi()); \ + if (WARN_ON_ONCE(RCUIDLE_COND(rcuidle))) \ + return; \ \ /* keep srcu and sched-rcu usage consistent */ \ preempt_disable_notrace(); \ --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3104,6 +3104,9 @@ void __trace_stack(struct trace_array *t return; } + if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY))) + return; + /* * When an NMI triggers, RCU is enabled via ct_nmi_enter(), * but if the above rcu_is_watching() failed, then the NMI
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 34/44] cpuidle,omap3: Use WFI for omap3_pm_idle()
arch_cpu_idle() is a very simple idle interface and exposes only a single idle state and is expected to not require RCU and not do any tracing/instrumentation. As such, omap_sram_idle() is not a valid implementation. Replace it with the simple (shallow) omap3_do_wfi() call. Leaving the more complicated idle states for the cpuidle driver. Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org> Acked-by: Tony Lindgren <tony at atomide.com> --- arch/arm/mach-omap2/pm34xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -294,7 +294,7 @@ static void omap3_pm_idle(void) if (omap_irq_pending()) return; - omap_sram_idle(); + omap3_do_wfi(); } #ifdef CONFIG_SUSPEND
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 35/44] cpuidle,omap3: Push RCU-idle into omap_sram_idle()
OMAP3 uses full SoC suspend modes as idle states, as such it needs the
whole power-domain and clock-domain code from the idle path.
All that code is not suitable to run with RCU disabled, as such push
RCU-idle deeper still.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
Reviewed-by: Tony Lindgren <tony at atomide.com>
Tested-by: Tony Lindgren <tony at atomide.com>
---
arch/arm/mach-omap2/cpuidle34xx.c | 4 +---
arch/arm/mach-omap2/pm.h | 2 +-
arch/arm/mach-omap2/pm34xx.c | 12 ++++++++++--
3 files changed, 12 insertions(+), 6 deletions(-)
--- a/arch/arm/mach-omap2/cpuidle34xx.c
+++ b/arch/arm/mach-omap2/cpuidle34xx.c
@@ -133,9 +133,7 @@ static int omap3_enter_idle(struct cpuid
}
/* Execute ARM wfi */
- ct_cpuidle_enter();
- omap_sram_idle();
- ct_cpuidle_exit();
+ omap_sram_idle(true);
/*
* Call idle CPU PM enter notifier chain to restore
--- a/arch/arm/mach-omap2/pm.h
+++ b/arch/arm/mach-omap2/pm.h
@@ -29,7 +29,7 @@ static inline int omap4_idle_init(void)
extern void *omap3_secure_ram_storage;
extern void omap3_pm_off_mode_enable(int);
-extern void omap_sram_idle(void);
+extern void omap_sram_idle(bool rcuidle);
extern int omap_pm_clkdms_setup(struct clockdomain *clkdm, void *unused);
#if defined(CONFIG_PM_OPP)
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -26,6 +26,7 @@
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/of.h>
+#include <linux/cpuidle.h>
#include <trace/events/power.h>
@@ -174,7 +175,7 @@ static int omap34xx_do_sram_idle(unsigne
return 0;
}
-void omap_sram_idle(void)
+void omap_sram_idle(bool rcuidle)
{
/* Variable to tell what needs to be saved and restored
* in omap_sram_idle*/
@@ -254,11 +255,18 @@ void omap_sram_idle(void)
*/
if (save_state)
omap34xx_save_context(omap3_arm_context);
+
+ if (rcuidle)
+ ct_cpuidle_enter();
+
if (save_state == 1 || save_state == 3)
cpu_suspend(save_state, omap34xx_do_sram_idle);
else
omap34xx_do_sram_idle(save_state);
+ if (rcuidle)
+ ct_cpuidle_exit();
+
/* Restore normal SDRC POWER settings */
if (cpu_is_omap3430() && omap_rev() >= OMAP3430_REV_ES3_0
&&
(omap_type() == OMAP2_DEVICE_TYPE_EMU ||
@@ -316,7 +324,7 @@ static int omap3_pm_suspend(void)
omap3_intc_suspend();
- omap_sram_idle();
+ omap_sram_idle(false);
restore:
/* Restore next_pwrsts */
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 36/44] cpuidle, omap4: Push RCU-idle into omap4_enter_lowpower()
From: Tony Lindgren <tony at atomide.com>
OMAP4 uses full SoC suspend modes as idle states, as such it needs the
whole power-domain and clock-domain code from the idle path.
All that code is not suitable to run with RCU disabled, as such push
RCU-idle deeper still.
Signed-off-by: Tony Lindgren <tony at atomide.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
Link: https://lkml.kernel.org/r/Yqcv6crSNKuSWoTu at atomide.com
---
arch/arm/mach-omap2/common.h | 6 ++++--
arch/arm/mach-omap2/cpuidle44xx.c | 8 ++------
arch/arm/mach-omap2/omap-mpuss-lowpower.c | 12 +++++++++++-
arch/arm/mach-omap2/pm44xx.c | 2 +-
4 files changed, 18 insertions(+), 10 deletions(-)
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -284,11 +284,13 @@ extern u32 omap4_get_cpu1_ns_pa_addr(voi
#if defined(CONFIG_SMP) && defined(CONFIG_PM)
extern int omap4_mpuss_init(void);
-extern int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state);
+extern int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state,
+ bool rcuidle);
extern int omap4_hotplug_cpu(unsigned int cpu, unsigned int power_state);
#else
static inline int omap4_enter_lowpower(unsigned int cpu,
- unsigned int power_state)
+ unsigned int power_state,
+ bool rcuidle)
{
cpu_do_idle();
return 0;
--- a/arch/arm/mach-omap2/cpuidle44xx.c
+++ b/arch/arm/mach-omap2/cpuidle44xx.c
@@ -105,9 +105,7 @@ static int omap_enter_idle_smp(struct cp
}
raw_spin_unlock_irqrestore(&mpu_lock, flag);
- ct_cpuidle_enter();
- omap4_enter_lowpower(dev->cpu, cx->cpu_state);
- ct_cpuidle_exit();
+ omap4_enter_lowpower(dev->cpu, cx->cpu_state, true);
raw_spin_lock_irqsave(&mpu_lock, flag);
if (cx->mpu_state_vote == num_online_cpus())
@@ -186,10 +184,8 @@ static int omap_enter_idle_coupled(struc
}
}
- ct_cpuidle_enter();
- omap4_enter_lowpower(dev->cpu, cx->cpu_state);
+ omap4_enter_lowpower(dev->cpu, cx->cpu_state, true);
cpu_done[dev->cpu] = true;
- ct_cpuidle_exit();
/* Wakeup CPU1 only if it is not offlined */
if (dev->cpu == 0 && cpumask_test_cpu(1, cpu_online_mask)) {
--- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c
+++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
@@ -33,6 +33,7 @@
* and first to wake-up when MPUSS low power states are excercised
*/
+#include <linux/cpuidle.h>
#include <linux/kernel.h>
#include <linux/io.h>
#include <linux/errno.h>
@@ -214,6 +215,7 @@ static void __init save_l2x0_context(voi
* of OMAP4 MPUSS subsystem
* @cpu : CPU ID
* @power_state: Low power state.
+ * @rcuidle: RCU needs to be idled
*
* MPUSS states for the context save:
* save_state @@ -222,7 +224,8 @@ static void __init save_l2x0_context(voi
* 2 - CPUx L1 and logic lost + GIC lost: MPUSS OSWR
* 3 - CPUx L1 and logic lost + GIC + L2 lost: DEVICE OFF
*/
-int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state)
+int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state,
+ bool rcuidle)
{
struct omap4_cpu_pm_info *pm_info = &per_cpu(omap4_pm_info, cpu);
unsigned int save_state = 0, cpu_logic_state = PWRDM_POWER_RET;
@@ -268,6 +271,10 @@ int omap4_enter_lowpower(unsigned int cp
cpu_clear_prev_logic_pwrst(cpu);
pwrdm_set_next_pwrst(pm_info->pwrdm, power_state);
pwrdm_set_logic_retst(pm_info->pwrdm, cpu_logic_state);
+
+ if (rcuidle)
+ ct_cpuidle_enter();
+
set_cpu_wakeup_addr(cpu, __pa_symbol(omap_pm_ops.resume));
omap_pm_ops.scu_prepare(cpu, power_state);
l2x0_pwrst_prepare(cpu, save_state);
@@ -283,6 +290,9 @@ int omap4_enter_lowpower(unsigned int cp
if (IS_PM44XX_ERRATUM(PM_OMAP4_ROM_SMP_BOOT_ERRATUM_GICD) && cpu)
gic_dist_enable();
+ if (rcuidle)
+ ct_cpuidle_exit();
+
/*
* Restore the CPUx power state to ON otherwise CPUx
* power domain can transitions to programmed low power
--- a/arch/arm/mach-omap2/pm44xx.c
+++ b/arch/arm/mach-omap2/pm44xx.c
@@ -76,7 +76,7 @@ static int omap4_pm_suspend(void)
* domain CSWR is not supported by hardware.
* More details can be found in OMAP4430 TRM section 4.3.4.2.
*/
- omap4_enter_lowpower(cpu_id, cpu_suspend_state);
+ omap4_enter_lowpower(cpu_id, cpu_suspend_state, false);
/* Restore next powerdomain state */
list_for_each_entry(pwrst, &pwrst_list, node) {
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 37/44] arm,omap2: Use WFI for omap2_pm_idle()
arch_cpu_idle() is a very simple idle interface and exposes only a
single idle state and is expected to not require RCU and not do any
tracing/instrumentation.
As such, omap2_pm_idle() is not a valid implementation. Replace it
with a simple (shallow) omap2_do_wfi() call.
Omap2 doesn't have a cpuidle driver; but adding one would be the
recourse to (re)gain the other idle states.
Suggested-by: Tony Lindgren <tony at atomide.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
arch/arm/mach-omap2/pm24xx.c | 51 +------------------------------------------
1 file changed, 2 insertions(+), 49 deletions(-)
--- a/arch/arm/mach-omap2/pm24xx.c
+++ b/arch/arm/mach-omap2/pm24xx.c
@@ -116,50 +116,12 @@ static int omap2_enter_full_retention(vo
static int sti_console_enabled;
-static int omap2_allow_mpu_retention(void)
-{
- if (!omap2xxx_cm_mpu_retention_allowed())
- return 0;
- if (sti_console_enabled)
- return 0;
-
- return 1;
-}
-
-static void omap2_enter_mpu_retention(void)
+static void omap2_do_wfi(void)
{
const int zero = 0;
- /* The peripherals seem not to be able to wake up the MPU when
- * it is in retention mode. */
- if (omap2_allow_mpu_retention()) {
- /* REVISIT: These write to reserved bits? */
- omap_prm_clear_mod_irqs(CORE_MOD, PM_WKST1, ~0);
- omap_prm_clear_mod_irqs(CORE_MOD, OMAP24XX_PM_WKST2, ~0);
- omap_prm_clear_mod_irqs(WKUP_MOD, PM_WKST, ~0);
-
- /* Try to enter MPU retention */
- pwrdm_set_next_pwrst(mpu_pwrdm, PWRDM_POWER_RET);
-
- } else {
- /* Block MPU retention */
- pwrdm_set_next_pwrst(mpu_pwrdm, PWRDM_POWER_ON);
- }
-
/* WFI */
asm("mcr p15, 0, %0, c7, c0, 4" : : "r" (zero) :
"memory", "cc");
-
- pwrdm_set_next_pwrst(mpu_pwrdm, PWRDM_POWER_ON);
-}
-
-static int omap2_can_sleep(void)
-{
- if (omap2xxx_cm_fclks_active())
- return 0;
- if (__clk_is_enabled(osc_ck))
- return 0;
-
- return 1;
}
static void omap2_pm_idle(void)
@@ -169,16 +131,7 @@ static void omap2_pm_idle(void)
if (omap_irq_pending())
return;
- error = cpu_cluster_pm_enter();
- if (error || !omap2_can_sleep()) {
- omap2_enter_mpu_retention();
- goto out_cpu_cluster_pm;
- }
-
- omap2_enter_full_retention();
-
-out_cpu_cluster_pm:
- cpu_cluster_pm_exit();
+ omap2_do_wfi();
}
static void __init prcm_setup_regs(void)
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 38/44] cpuidle,powerdomain: Remove trace_.*_rcuidle()
OMAP was the one and only user.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
arch/arm/mach-omap2/powerdomain.c | 10 +++++-----
drivers/base/power/runtime.c | 24 ++++++++++++------------
2 files changed, 17 insertions(+), 17 deletions(-)
--- a/arch/arm/mach-omap2/powerdomain.c
+++ b/arch/arm/mach-omap2/powerdomain.c
@@ -187,9 +187,9 @@ static int _pwrdm_state_switch(struct po
trace_state = (PWRDM_TRACE_STATES_FLAG |
((next & OMAP_POWERSTATE_MASK) << 8) |
((prev & OMAP_POWERSTATE_MASK) << 0));
- trace_power_domain_target_rcuidle(pwrdm->name,
- trace_state,
- raw_smp_processor_id());
+ trace_power_domain_target(pwrdm->name,
+ trace_state,
+ raw_smp_processor_id());
}
break;
default:
@@ -541,8 +541,8 @@ int pwrdm_set_next_pwrst(struct powerdom
if (arch_pwrdm && arch_pwrdm->pwrdm_set_next_pwrst) {
/* Trace the pwrdm desired target state */
- trace_power_domain_target_rcuidle(pwrdm->name, pwrst,
- raw_smp_processor_id());
+ trace_power_domain_target(pwrdm->name, pwrst,
+ raw_smp_processor_id());
/* Program the pwrdm desired target state */
ret = arch_pwrdm->pwrdm_set_next_pwrst(pwrdm, pwrst);
}
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -442,7 +442,7 @@ static int rpm_idle(struct device *dev,
int (*callback)(struct device *);
int retval;
- trace_rpm_idle_rcuidle(dev, rpmflags);
+ trace_rpm_idle(dev, rpmflags);
retval = rpm_check_suspend_allowed(dev);
if (retval < 0)
; /* Conditions are wrong. */
@@ -481,7 +481,7 @@ static int rpm_idle(struct device *dev,
dev->power.request_pending = true;
queue_work(pm_wq, &dev->power.work);
}
- trace_rpm_return_int_rcuidle(dev, _THIS_IP_, 0);
+ trace_rpm_return_int(dev, _THIS_IP_, 0);
return 0;
}
@@ -493,7 +493,7 @@ static int rpm_idle(struct device *dev,
wake_up_all(&dev->power.wait_queue);
out:
- trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
+ trace_rpm_return_int(dev, _THIS_IP_, retval);
return retval ? retval : rpm_suspend(dev, rpmflags | RPM_AUTO);
}
@@ -557,7 +557,7 @@ static int rpm_suspend(struct device *de
struct device *parent = NULL;
int retval;
- trace_rpm_suspend_rcuidle(dev, rpmflags);
+ trace_rpm_suspend(dev, rpmflags);
repeat:
retval = rpm_check_suspend_allowed(dev);
@@ -708,7 +708,7 @@ static int rpm_suspend(struct device *de
}
out:
- trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
+ trace_rpm_return_int(dev, _THIS_IP_, retval);
return retval;
@@ -760,7 +760,7 @@ static int rpm_resume(struct device *dev
struct device *parent = NULL;
int retval = 0;
- trace_rpm_resume_rcuidle(dev, rpmflags);
+ trace_rpm_resume(dev, rpmflags);
repeat:
if (dev->power.runtime_error) {
@@ -925,7 +925,7 @@ static int rpm_resume(struct device *dev
spin_lock_irq(&dev->power.lock);
}
- trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
+ trace_rpm_return_int(dev, _THIS_IP_, retval);
return retval;
}
@@ -1081,7 +1081,7 @@ int __pm_runtime_idle(struct device *dev
if (retval < 0) {
return retval;
} else if (retval > 0) {
- trace_rpm_usage_rcuidle(dev, rpmflags);
+ trace_rpm_usage(dev, rpmflags);
return 0;
}
}
@@ -1119,7 +1119,7 @@ int __pm_runtime_suspend(struct device *
if (retval < 0) {
return retval;
} else if (retval > 0) {
- trace_rpm_usage_rcuidle(dev, rpmflags);
+ trace_rpm_usage(dev, rpmflags);
return 0;
}
}
@@ -1202,7 +1202,7 @@ int pm_runtime_get_if_active(struct devi
} else {
retval = atomic_inc_not_zero(&dev->power.usage_count);
}
- trace_rpm_usage_rcuidle(dev, 0);
+ trace_rpm_usage(dev, 0);
spin_unlock_irqrestore(&dev->power.lock, flags);
return retval;
@@ -1566,7 +1566,7 @@ void pm_runtime_allow(struct device *dev
if (ret == 0)
rpm_idle(dev, RPM_AUTO | RPM_ASYNC);
else if (ret > 0)
- trace_rpm_usage_rcuidle(dev, RPM_AUTO | RPM_ASYNC);
+ trace_rpm_usage(dev, RPM_AUTO | RPM_ASYNC);
out:
spin_unlock_irq(&dev->power.lock);
@@ -1635,7 +1635,7 @@ static void update_autosuspend(struct de
atomic_inc(&dev->power.usage_count);
rpm_resume(dev, 0);
} else {
- trace_rpm_usage_rcuidle(dev, 0);
+ trace_rpm_usage(dev, 0);
}
}
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 39/44] cpuidle,clk: Remove trace_.*_rcuidle()
OMAP was the one and only user.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
drivers/clk/clk.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -978,12 +978,12 @@ static void clk_core_disable(struct clk_
if (--core->enable_count > 0)
return;
- trace_clk_disable_rcuidle(core);
+ trace_clk_disable(core);
if (core->ops->disable)
core->ops->disable(core->hw);
- trace_clk_disable_complete_rcuidle(core);
+ trace_clk_disable_complete(core);
clk_core_disable(core->parent);
}
@@ -1037,12 +1037,12 @@ static int clk_core_enable(struct clk_co
if (ret)
return ret;
- trace_clk_enable_rcuidle(core);
+ trace_clk_enable(core);
if (core->ops->enable)
ret = core->ops->enable(core->hw);
- trace_clk_enable_complete_rcuidle(core);
+ trace_clk_enable_complete(core);
if (ret) {
clk_core_disable(core->parent);
clang-14 allyesconfig gives:
vmlinux.o: warning: objtool: emulator_cmpxchg_emulated+0x705: call to
__ubsan_handle_load_invalid_value() with UACCESS enabled
vmlinux.o: warning: objtool: paging64_update_accessed_dirty_bits+0x39e: call to
__ubsan_handle_load_invalid_value() with UACCESS enabled
vmlinux.o: warning: objtool: paging32_update_accessed_dirty_bits+0x390: call to
__ubsan_handle_load_invalid_value() with UACCESS enabled
vmlinux.o: warning: objtool: ept_update_accessed_dirty_bits+0x43f: call to
__ubsan_handle_load_invalid_value() with UACCESS enabled
Add the required eflags save/restore and whitelist the thing.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
lib/ubsan.c | 5 ++++-
tools/objtool/check.c | 1 +
2 files changed, 5 insertions(+), 1 deletion(-)
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -340,9 +340,10 @@ void __ubsan_handle_load_invalid_value(v
{
struct invalid_value_data *data = _data;
char val_str[VALUE_LENGTH];
+ unsigned long ua_flags = user_access_save();
if (suppress_report(&data->location))
- return;
+ goto out;
ubsan_prologue(&data->location, "invalid-load");
@@ -352,6 +353,8 @@ void __ubsan_handle_load_invalid_value(v
val_str, data->type->type_name);
ubsan_epilogue();
+out:
+ user_access_restore(ua_flags);
}
EXPORT_SYMBOL(__ubsan_handle_load_invalid_value);
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1068,6 +1068,7 @@ static const char *uaccess_safe_builtin[
"__ubsan_handle_type_mismatch",
"__ubsan_handle_type_mismatch_v1",
"__ubsan_handle_shift_out_of_bounds",
+ "__ubsan_handle_load_invalid_value",
/* misc */
"csum_partial_copy_generic",
"copy_mc_fragile",
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 41/44] intel_idle: Add force_irq_on module param
For testing purposes.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
drivers/idle/intel_idle.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1787,6 +1787,9 @@ static bool __init intel_idle_verify_cst
return true;
}
+static bool force_irq_on __read_mostly;
+module_param(force_irq_on, bool, 0444);
+
static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
{
int cstate;
@@ -1838,8 +1841,10 @@ static void __init intel_idle_init_cstat
/* Structure copy. */
drv->states[drv->state_count] = cpuidle_state_table[cstate];
- if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE)
+ if ((cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) ||
force_irq_on) {
+ printk("intel_idle: forced intel_idle_irq for state %d\n",
cstate);
drv->states[drv->state_count].enter = intel_idle_irq;
+ }
if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 42/44] entry, kasan, x86: Disallow overriding mem*() functions
KASAN cannot just hijack the mem*() functions, it needs to emit
__asan_mem*() variants if it wants instrumentation (other sanitizers
already do this).
vmlinux.o: warning: objtool: sync_regs+0x24: call to memcpy() leaves
.noinstr.text section
vmlinux.o: warning: objtool: vc_switch_off_ist+0xbe: call to memcpy() leaves
.noinstr.text section
vmlinux.o: warning: objtool: fixup_bad_iret+0x36: call to memset() leaves
.noinstr.text section
vmlinux.o: warning: objtool: __sev_get_ghcb+0xa0: call to memcpy() leaves
.noinstr.text section
vmlinux.o: warning: objtool: __sev_put_ghcb+0x35: call to memcpy() leaves
.noinstr.text section
Remove the weak aliases to ensure nobody hijacks these functions and
add them to the noinstr section.
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
arch/x86/lib/memcpy_64.S | 5 ++---
arch/x86/lib/memmove_64.S | 4 +++-
arch/x86/lib/memset_64.S | 4 +++-
mm/kasan/kasan.h | 4 ++++
mm/kasan/shadow.c | 38 ++++++++++++++++++++++++++++++++++++++
tools/objtool/check.c | 3 +++
6 files changed, 53 insertions(+), 5 deletions(-)
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -7,7 +7,7 @@
#include <asm/alternative.h>
#include <asm/export.h>
-.pushsection .noinstr.text, "ax"
+.section .noinstr.text, "ax"
/*
* We build a jump to memcpy_orig by default which gets NOPped out on
@@ -42,7 +42,7 @@ SYM_FUNC_START(__memcpy)
SYM_FUNC_END(__memcpy)
EXPORT_SYMBOL(__memcpy)
-SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
+SYM_FUNC_ALIAS(memcpy, __memcpy)
EXPORT_SYMBOL(memcpy)
/*
@@ -183,4 +183,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
RET
SYM_FUNC_END(memcpy_orig)
-.popsection
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -13,6 +13,8 @@
#undef memmove
+.section .noinstr.text, "ax"
+
/*
* Implement memmove(). This can handle overlap between src and dst.
*
@@ -213,5 +215,5 @@ SYM_FUNC_START(__memmove)
SYM_FUNC_END(__memmove)
EXPORT_SYMBOL(__memmove)
-SYM_FUNC_ALIAS_WEAK(memmove, __memmove)
+SYM_FUNC_ALIAS(memmove, __memmove)
EXPORT_SYMBOL(memmove)
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -6,6 +6,8 @@
#include <asm/alternative.h>
#include <asm/export.h>
+.section .noinstr.text, "ax"
+
/*
* ISO C memset - set a memory block to a byte value. This function uses fast
* string to get better performance than the original function. The code is
@@ -43,7 +45,7 @@ SYM_FUNC_START(__memset)
SYM_FUNC_END(__memset)
EXPORT_SYMBOL(__memset)
-SYM_FUNC_ALIAS_WEAK(memset, __memset)
+SYM_FUNC_ALIAS(memset, __memset)
EXPORT_SYMBOL(memset)
/*
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -551,6 +551,10 @@ void __asan_set_shadow_f3(const void *ad
void __asan_set_shadow_f5(const void *addr, size_t size);
void __asan_set_shadow_f8(const void *addr, size_t size);
+void *__asan_memset(void *addr, int c, size_t len);
+void *__asan_memmove(void *dest, const void *src, size_t len);
+void *__asan_memcpy(void *dest, const void *src, size_t len);
+
void __hwasan_load1_noabort(unsigned long addr);
void __hwasan_store1_noabort(unsigned long addr);
void __hwasan_load2_noabort(unsigned long addr);
--- a/mm/kasan/shadow.c
+++ b/mm/kasan/shadow.c
@@ -38,6 +38,12 @@ bool __kasan_check_write(const volatile
}
EXPORT_SYMBOL(__kasan_check_write);
+#ifndef CONFIG_GENERIC_ENTRY
+/*
+ * CONFIG_GENERIC_ENTRY relies on compiler emitted mem*() calls to not be
+ * instrumented. KASAN enabled toolchains should emit __asan_mem*() functions
+ * for the sites they want to instrument.
+ */
#undef memset
void *memset(void *addr, int c, size_t len)
{
@@ -68,6 +74,38 @@ void *memcpy(void *dest, const void *src
return __memcpy(dest, src, len);
}
+#endif
+
+void *__asan_memset(void *addr, int c, size_t len)
+{
+ if (!kasan_check_range((unsigned long)addr, len, true, _RET_IP_))
+ return NULL;
+
+ return __memset(addr, c, len);
+}
+EXPORT_SYMBOL(__asan_memset);
+
+#ifdef __HAVE_ARCH_MEMMOVE
+void *__asan_memmove(void *dest, const void *src, size_t len)
+{
+ if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) ||
+ !kasan_check_range((unsigned long)dest, len, true, _RET_IP_))
+ return NULL;
+
+ return __memmove(dest, src, len);
+}
+EXPORT_SYMBOL(__asan_memmove);
+#endif
+
+void *__asan_memcpy(void *dest, const void *src, size_t len)
+{
+ if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) ||
+ !kasan_check_range((unsigned long)dest, len, true, _RET_IP_))
+ return NULL;
+
+ return __memcpy(dest, src, len);
+}
+EXPORT_SYMBOL(__asan_memcpy);
void kasan_poison(const void *addr, size_t size, u8 value, bool init)
{
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -956,6 +956,9 @@ static const char *uaccess_safe_builtin[
"__asan_store16_noabort",
"__kasan_check_read",
"__kasan_check_write",
+ "__asan_memset",
+ "__asan_memmove",
+ "__asan_memcpy",
/* KASAN in-line */
"__asan_report_load_n_noabort",
"__asan_report_load1_noabort",
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 43/44] sched: Always inline __this_cpu_preempt_check()
vmlinux.o: warning: objtool: in_entry_stack+0x9: call to
__this_cpu_preempt_check() leaves .noinstr.text section
vmlinux.o: warning: objtool: default_do_nmi+0x10: call to
__this_cpu_preempt_check() leaves .noinstr.text section
vmlinux.o: warning: objtool: fpu_idle_fpregs+0x41: call to
__this_cpu_preempt_check() leaves .noinstr.text section
vmlinux.o: warning: objtool: kvm_read_and_reset_apf_flags+0x1: call to
__this_cpu_preempt_check() leaves .noinstr.text section
vmlinux.o: warning: objtool: lockdep_hardirqs_on+0xb0: call to
__this_cpu_preempt_check() leaves .noinstr.text section
vmlinux.o: warning: objtool: lockdep_hardirqs_off+0xae: call to
__this_cpu_preempt_check() leaves .noinstr.text section
vmlinux.o: warning: objtool: irqentry_nmi_enter+0x69: call to
__this_cpu_preempt_check() leaves .noinstr.text section
vmlinux.o: warning: objtool: irqentry_nmi_exit+0x32: call to
__this_cpu_preempt_check() leaves .noinstr.text section
vmlinux.o: warning: objtool: acpi_processor_ffh_cstate_enter+0x9: call to
__this_cpu_preempt_check() leaves .noinstr.text section
vmlinux.o: warning: objtool: acpi_idle_enter+0x43: call to
__this_cpu_preempt_check() leaves .noinstr.text section
vmlinux.o: warning: objtool: acpi_idle_enter_s2idle+0x45: call to
__this_cpu_preempt_check() leaves .noinstr.text section
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
---
include/linux/percpu-defs.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -310,7 +310,7 @@ extern void __bad_size_call_parameter(vo
#ifdef CONFIG_DEBUG_PREEMPT
extern void __this_cpu_preempt_check(const char *op);
#else
-static inline void __this_cpu_preempt_check(const char *op) { }
+static __always_inline void __this_cpu_preempt_check(const char *op) { }
#endif
#define __pcpu_size_call_return(stem, variable) \
Peter Zijlstra
2022-Sep-19 10:00 UTC
[PATCH v2 44/44] arm64,riscv,perf: Remove RCU_NONIDLE() usage
The PM notifiers should no longer be ran with RCU disabled (per the previous patches), as such this hack is no longer required either. Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org> --- drivers/perf/arm_pmu.c | 11 +---------- drivers/perf/riscv_pmu_sbi.c | 8 +------- 2 files changed, 2 insertions(+), 17 deletions(-) --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -762,17 +762,8 @@ static void cpu_pm_pmu_setup(struct arm_ case CPU_PM_ENTER_FAILED: /* * Restore and enable the counter. - * armpmu_start() indirectly calls - * - * perf_event_update_userpage() - * - * that requires RCU read locking to be functional, - * wrap the call within RCU_NONIDLE to make the - * RCU subsystem aware this cpu is not idle from - * an RCU perspective for the armpmu_start() call - * duration. */ - RCU_NONIDLE(armpmu_start(event, PERF_EF_RELOAD)); + armpmu_start(event, PERF_EF_RELOAD); break; default: break; --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -747,14 +747,8 @@ static int riscv_pm_pmu_notify(struct no case CPU_PM_ENTER_FAILED: /* * Restore and enable the counter. - * - * Requires RCU read locking to be functional, - * wrap the call within RCU_NONIDLE to make the - * RCU subsystem aware this cpu is not idle from - * an RCU perspective for the riscv_pmu_start() call - * duration. */ - RCU_NONIDLE(riscv_pmu_start(event, PERF_EF_RELOAD)); + riscv_pmu_start(event, PERF_EF_RELOAD); break; default: break;
On Mon, Sep 19, 2022 at 12:17 PM Peter Zijlstra <peterz at infradead.org> wrote:> > Hi All! > > At long last, a respin of the cpuidle vs rcu cleanup patches. > > v1: https://lkml.kernel.org/r/20220608142723.103523089 at infradead.org > > These here patches clean up the mess that is cpuidle vs rcuidle. > > At the end of the ride there's only on RCU_NONIDLE user left: > > arch/arm64/kernel/suspend.c: RCU_NONIDLE(__cpu_suspend_exit()); > > and 'one' trace_*_rcuidle() user: > > kernel/trace/trace_preemptirq.c: trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); > kernel/trace/trace_preemptirq.c: trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); > kernel/trace/trace_preemptirq.c: trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr); > kernel/trace/trace_preemptirq.c: trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr); > kernel/trace/trace_preemptirq.c: trace_preempt_enable_rcuidle(a0, a1); > kernel/trace/trace_preemptirq.c: trace_preempt_disable_rcuidle(a0, a1); > > However this last is all in deprecated code that should be unused for GENERIC_ENTRY. > > I've touched a lot of code that I can't test and I might've broken something by > accident. In particular the whole ARM cpuidle stuff was quite involved. > > Please all; have a look where you haven't already. > > > New since v1: > > - rebase on top of Frederic's rcu-context-tracking rename fest > - more omap goodness as per the last discusion (thanks Tony!) > - removed one more RCU_NONIDLE() from arm64/risc-v perf code > - ubsan/kasan fixes > - intel_idle module-param for testing > - a bunch of extra __always_inline, because compilers are silly.Acked-by: Rafael J. Wysocki <rafael.j.wysocki at intel.com> for the whole set and let me know if you want me to merge any of these through cpuidle. Thanks!> > --- > arch/alpha/kernel/process.c | 1 - > arch/alpha/kernel/vmlinux.lds.S | 1 - > arch/arc/kernel/process.c | 3 ++ > arch/arc/kernel/vmlinux.lds.S | 1 - > arch/arm/include/asm/vmlinux.lds.h | 1 - > arch/arm/kernel/process.c | 1 - > arch/arm/kernel/smp.c | 6 +-- > arch/arm/mach-gemini/board-dt.c | 3 +- > arch/arm/mach-imx/cpuidle-imx6q.c | 4 +- > arch/arm/mach-imx/cpuidle-imx6sx.c | 5 ++- > arch/arm/mach-omap2/common.h | 6 ++- > arch/arm/mach-omap2/cpuidle34xx.c | 16 +++++++- > arch/arm/mach-omap2/cpuidle44xx.c | 29 +++++++------- > arch/arm/mach-omap2/omap-mpuss-lowpower.c | 12 +++++- > arch/arm/mach-omap2/pm.h | 2 +- > arch/arm/mach-omap2/pm24xx.c | 51 +----------------------- > arch/arm/mach-omap2/pm34xx.c | 14 +++++-- > arch/arm/mach-omap2/pm44xx.c | 2 +- > arch/arm/mach-omap2/powerdomain.c | 10 ++--- > arch/arm64/kernel/idle.c | 1 - > arch/arm64/kernel/smp.c | 4 +- > arch/arm64/kernel/vmlinux.lds.S | 1 - > arch/csky/kernel/process.c | 1 - > arch/csky/kernel/smp.c | 2 +- > arch/csky/kernel/vmlinux.lds.S | 1 - > arch/hexagon/kernel/process.c | 1 - > arch/hexagon/kernel/vmlinux.lds.S | 1 - > arch/ia64/kernel/process.c | 1 + > arch/ia64/kernel/vmlinux.lds.S | 1 - > arch/loongarch/kernel/idle.c | 1 + > arch/loongarch/kernel/vmlinux.lds.S | 1 - > arch/m68k/kernel/vmlinux-nommu.lds | 1 - > arch/m68k/kernel/vmlinux-std.lds | 1 - > arch/m68k/kernel/vmlinux-sun3.lds | 1 - > arch/microblaze/kernel/process.c | 1 - > arch/microblaze/kernel/vmlinux.lds.S | 1 - > arch/mips/kernel/idle.c | 8 ++-- > arch/mips/kernel/vmlinux.lds.S | 1 - > arch/nios2/kernel/process.c | 1 - > arch/nios2/kernel/vmlinux.lds.S | 1 - > arch/openrisc/kernel/process.c | 1 + > arch/openrisc/kernel/vmlinux.lds.S | 1 - > arch/parisc/kernel/process.c | 2 - > arch/parisc/kernel/vmlinux.lds.S | 1 - > arch/powerpc/kernel/idle.c | 5 +-- > arch/powerpc/kernel/vmlinux.lds.S | 1 - > arch/riscv/kernel/process.c | 1 - > arch/riscv/kernel/vmlinux-xip.lds.S | 1 - > arch/riscv/kernel/vmlinux.lds.S | 1 - > arch/s390/kernel/idle.c | 1 - > arch/s390/kernel/vmlinux.lds.S | 1 - > arch/sh/kernel/idle.c | 1 + > arch/sh/kernel/vmlinux.lds.S | 1 - > arch/sparc/kernel/leon_pmc.c | 4 ++ > arch/sparc/kernel/process_32.c | 1 - > arch/sparc/kernel/process_64.c | 3 +- > arch/sparc/kernel/vmlinux.lds.S | 1 - > arch/um/kernel/dyn.lds.S | 1 - > arch/um/kernel/process.c | 1 - > arch/um/kernel/uml.lds.S | 1 - > arch/x86/boot/compressed/vmlinux.lds.S | 1 + > arch/x86/coco/tdx/tdcall.S | 15 +------ > arch/x86/coco/tdx/tdx.c | 25 ++++-------- > arch/x86/events/amd/brs.c | 13 +++---- > arch/x86/include/asm/fpu/xcr.h | 4 +- > arch/x86/include/asm/irqflags.h | 11 ++---- > arch/x86/include/asm/mwait.h | 14 +++---- > arch/x86/include/asm/nospec-branch.h | 2 +- > arch/x86/include/asm/paravirt.h | 6 ++- > arch/x86/include/asm/perf_event.h | 2 +- > arch/x86/include/asm/shared/io.h | 4 +- > arch/x86/include/asm/shared/tdx.h | 1 - > arch/x86/include/asm/special_insns.h | 8 ++-- > arch/x86/include/asm/xen/hypercall.h | 2 +- > arch/x86/kernel/cpu/bugs.c | 2 +- > arch/x86/kernel/fpu/core.c | 4 +- > arch/x86/kernel/paravirt.c | 14 ++++++- > arch/x86/kernel/process.c | 65 +++++++++++++++---------------- > arch/x86/kernel/vmlinux.lds.S | 1 - > arch/x86/lib/memcpy_64.S | 5 +-- > arch/x86/lib/memmove_64.S | 4 +- > arch/x86/lib/memset_64.S | 4 +- > arch/x86/xen/enlighten_pv.c | 2 +- > arch/x86/xen/irq.c | 2 +- > arch/xtensa/kernel/process.c | 1 + > arch/xtensa/kernel/vmlinux.lds.S | 1 - > drivers/acpi/processor_idle.c | 36 ++++++++++------- > drivers/base/power/runtime.c | 24 ++++++------ > drivers/clk/clk.c | 8 ++-- > drivers/cpuidle/cpuidle-arm.c | 1 + > drivers/cpuidle/cpuidle-big_little.c | 8 +++- > drivers/cpuidle/cpuidle-mvebu-v7.c | 7 ++++ > drivers/cpuidle/cpuidle-psci.c | 10 +++-- > drivers/cpuidle/cpuidle-qcom-spm.c | 1 + > drivers/cpuidle/cpuidle-riscv-sbi.c | 10 +++-- > drivers/cpuidle/cpuidle-tegra.c | 21 +++++++--- > drivers/cpuidle/cpuidle.c | 21 +++++----- > drivers/cpuidle/dt_idle_states.c | 2 +- > drivers/cpuidle/poll_state.c | 10 ++++- > drivers/idle/intel_idle.c | 19 +++++---- > drivers/perf/arm_pmu.c | 11 +----- > drivers/perf/riscv_pmu_sbi.c | 8 +--- > include/asm-generic/vmlinux.lds.h | 9 ++--- > include/linux/compiler_types.h | 8 +++- > include/linux/cpu.h | 3 -- > include/linux/cpuidle.h | 34 ++++++++++++++++ > include/linux/cpumask.h | 4 +- > include/linux/percpu-defs.h | 2 +- > include/linux/sched/idle.h | 40 ++++++++++++++----- > include/linux/thread_info.h | 18 ++++++++- > include/linux/tracepoint.h | 13 ++++++- > kernel/cpu_pm.c | 9 ----- > kernel/printk/printk.c | 2 +- > kernel/sched/idle.c | 47 +++++++--------------- > kernel/time/tick-broadcast-hrtimer.c | 29 ++++++-------- > kernel/time/tick-broadcast.c | 6 ++- > kernel/trace/trace.c | 3 ++ > lib/ubsan.c | 5 ++- > mm/kasan/kasan.h | 4 ++ > mm/kasan/shadow.c | 38 ++++++++++++++++++ > tools/objtool/check.c | 17 ++++++++ > 121 files changed, 511 insertions(+), 420 deletions(-) >
Because Nadav asked about tracing/kprobing idle, I had another go around
and noticed not all functions calling ct_cpuidle_enter are __cpuidle.
Basically all cpuidle_driver::enter functions should be __cpuidle; i'll
do that audit shortly.
For now this is ct_cpuidle_enter / CPU_IDLE_ENTER users.
---
--- a/arch/arm/mach-imx/cpuidle-imx6q.c
+++ b/arch/arm/mach-imx/cpuidle-imx6q.c
@@ -17,8 +17,8 @@
static int num_idle_cpus = 0;
static DEFINE_RAW_SPINLOCK(cpuidle_lock);
-static int imx6q_enter_wait(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int index)
+static __cpuidle int imx6q_enter_wait(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
{
raw_spin_lock(&cpuidle_lock);
if (++num_idle_cpus == num_online_cpus())
--- a/arch/arm/mach-imx/cpuidle-imx6sx.c
+++ b/arch/arm/mach-imx/cpuidle-imx6sx.c
@@ -30,8 +30,8 @@ static int imx6sx_idle_finish(unsigned l
return 0;
}
-static int imx6sx_enter_wait(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int index)
+static __cpuidle int imx6sx_enter_wait(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
{
imx6_set_lpm(WAIT_UNCLOCKED);
--- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c
+++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
@@ -224,8 +224,8 @@ static void __init save_l2x0_context(voi
* 2 - CPUx L1 and logic lost + GIC lost: MPUSS OSWR
* 3 - CPUx L1 and logic lost + GIC + L2 lost: DEVICE OFF
*/
-int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state,
- bool rcuidle)
+__cpuidle int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state,
+ bool rcuidle)
{
struct omap4_cpu_pm_info *pm_info = &per_cpu(omap4_pm_info, cpu);
unsigned int save_state = 0, cpu_logic_state = PWRDM_POWER_RET;
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -175,7 +175,7 @@ static int omap34xx_do_sram_idle(unsigne
return 0;
}
-void omap_sram_idle(bool rcuidle)
+__cpuidle void omap_sram_idle(bool rcuidle)
{
/* Variable to tell what needs to be saved and restored
* in omap_sram_idle*/
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -62,7 +62,7 @@ int acpi_processor_ffh_lpi_probe(unsigne
return psci_acpi_cpu_init_idle(cpu);
}
-int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
+__cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
{
u32 state = lpi->address;
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -31,8 +31,8 @@
* Called from the CPUidle framework to program the device to the
* specified target state selected by the governor.
*/
-static int arm_enter_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static __cpuidle int arm_enter_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
{
/*
* Pass idle state index to arm_cpuidle_suspend which in turn
--- a/drivers/cpuidle/cpuidle-big_little.c
+++ b/drivers/cpuidle/cpuidle-big_little.c
@@ -122,8 +122,8 @@ static int notrace bl_powerdown_finisher
* Called from the CPUidle framework to program the device to the
* specified target state selected by the governor.
*/
-static int bl_enter_powerdown(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static __cpuidle int bl_enter_powerdown(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
{
cpu_pm_enter();
ct_cpuidle_enter();
--- a/drivers/cpuidle/cpuidle-mvebu-v7.c
+++ b/drivers/cpuidle/cpuidle-mvebu-v7.c
@@ -25,9 +25,9 @@
static int (*mvebu_v7_cpu_suspend)(int);
-static int mvebu_v7_enter_idle(struct cpuidle_device *dev,
- struct cpuidle_driver *drv,
- int index)
+static __cpuidle int mvebu_v7_enter_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
{
int ret;
bool deepidle = false;
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -49,14 +49,9 @@ static inline u32 psci_get_domain_state(
return __this_cpu_read(domain_state);
}
-static inline int psci_enter_state(int idx, u32 state)
-{
- return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, idx, state);
-}
-
-static int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx,
- bool s2idle)
+static __cpuidle int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx,
+ bool s2idle)
{
struct psci_cpuidle_data *data = this_cpu_ptr(&psci_cpuidle_data);
u32 *states = data->psci_states;
@@ -192,12 +187,12 @@ static void psci_idle_init_cpuhp(void)
pr_warn("Failed %d while setup cpuhp state\n", err);
}
-static int psci_enter_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static __cpuidle int psci_enter_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
{
u32 *state = __this_cpu_read(psci_cpuidle_data.psci_states);
- return psci_enter_state(idx, state[idx]);
+ return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, idx, state[idx]);
}
static const struct of_device_id psci_idle_state_match[] = {
--- a/drivers/cpuidle/cpuidle-qcom-spm.c
+++ b/drivers/cpuidle/cpuidle-qcom-spm.c
@@ -58,8 +58,8 @@ static int qcom_cpu_spc(struct spm_drive
return ret;
}
-static int spm_enter_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static __cpuidle int spm_enter_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
{
struct cpuidle_qcom_spm_data *data = container_of(drv, struct
cpuidle_qcom_spm_data,
cpuidle_driver);
--- a/drivers/cpuidle/cpuidle-riscv-sbi.c
+++ b/drivers/cpuidle/cpuidle-riscv-sbi.c
@@ -93,17 +93,17 @@ static int sbi_suspend(u32 state)
return sbi_suspend_finisher(state, 0, 0);
}
-static int sbi_cpuidle_enter_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static __cpuidle int sbi_cpuidle_enter_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
{
u32 *states = __this_cpu_read(sbi_cpuidle_data.states);
return CPU_PM_CPU_IDLE_ENTER_PARAM(sbi_suspend, idx, states[idx]);
}
-static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx,
- bool s2idle)
+static __cpuidle int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx,
+ bool s2idle)
{
struct sbi_cpuidle_data *data = this_cpu_ptr(&sbi_cpuidle_data);
u32 *states = data->states;
--- a/drivers/cpuidle/cpuidle-tegra.c
+++ b/drivers/cpuidle/cpuidle-tegra.c
@@ -160,8 +160,8 @@ static int tegra_cpuidle_coupled_barrier
return 0;
}
-static int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
- int index, unsigned int cpu)
+static __cpuidle int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
+ int index, unsigned int cpu)
{
int err;
@@ -226,9 +226,9 @@ static int tegra_cpuidle_adjust_state_in
return index;
}
-static int tegra_cpuidle_enter(struct cpuidle_device *dev,
- struct cpuidle_driver *drv,
- int index)
+static __cpuidle int tegra_cpuidle_enter(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
{
bool do_rcu = drv->states[index].flags & CPUIDLE_FLAG_RCU_IDLE;
unsigned int cpu = cpu_logical_map(dev->cpu);
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -137,11 +137,13 @@ int cpuidle_find_deepest_state(struct cp
}
#ifdef CONFIG_SUSPEND
-static void enter_s2idle_proper(struct cpuidle_driver *drv,
- struct cpuidle_device *dev, int index)
+static __cpuidle void enter_s2idle_proper(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev, int index)
{
- ktime_t time_start, time_end;
struct cpuidle_state *target_state = &drv->states[index];
+ ktime_t time_start, time_end;
+
+ instrumentation_begin();
time_start = ns_to_ktime(local_clock());
@@ -152,13 +154,18 @@ static void enter_s2idle_proper(struct c
* suspended is generally unsafe.
*/
stop_critical_timings();
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
ct_cpuidle_enter();
+ /* Annotate away the indirect call */
+ instrumentation_begin();
+ }
target_state->enter_s2idle(dev, drv, index);
if (WARN_ON_ONCE(!irqs_disabled()))
raw_local_irq_disable();
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
+ instrumentation_end();
ct_cpuidle_exit();
+ }
tick_unfreeze();
start_critical_timings();
@@ -166,6 +173,7 @@ static void enter_s2idle_proper(struct c
dev->states_usage[index].s2idle_time += ktime_us_delta(time_end,
time_start);
dev->states_usage[index].s2idle_usage++;
+ instrumentation_end();
}
/**
@@ -200,8 +208,9 @@ int cpuidle_enter_s2idle(struct cpuidle_
* @drv: cpuidle driver for this cpu
* @index: index into the states table in @drv of the state to enter
*/
-int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
- int index)
+__cpuidle int cpuidle_enter_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
{
int entered_state;
@@ -209,6 +218,8 @@ int cpuidle_enter_state(struct cpuidle_d
bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
ktime_t time_start, time_end;
+ instrumentation_begin();
+
/*
* Tell the time framework to switch to a broadcast timer because our
* local timer will be shut down. If a local timer is used from another
@@ -235,15 +246,21 @@ int cpuidle_enter_state(struct cpuidle_d
time_start = ns_to_ktime(local_clock());
stop_critical_timings();
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
ct_cpuidle_enter();
+ /* Annotate away the indirect call */
+ instrumentation_begin();
+ }
entered_state = target_state->enter(dev, drv, index);
+
if (WARN_ONCE(!irqs_disabled(), "%ps leaked IRQ state",
target_state->enter))
raw_local_irq_disable();
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
+ instrumentation_end();
ct_cpuidle_exit();
+ }
start_critical_timings();
sched_clock_idle_wakeup_event();
@@ -306,6 +323,8 @@ int cpuidle_enter_state(struct cpuidle_d
dev->states_usage[index].rejected++;
}
+ instrumentation_end();
+
return entered_state;
}
On Mon, 19 Sept 2022 at 12:18, Peter Zijlstra <peterz at infradead.org> wrote:> > Hi All! > > At long last, a respin of the cpuidle vs rcu cleanup patches. > > v1: https://lkml.kernel.org/r/20220608142723.103523089 at infradead.org > > These here patches clean up the mess that is cpuidle vs rcuidle. > > At the end of the ride there's only on RCU_NONIDLE user left: > > arch/arm64/kernel/suspend.c: RCU_NONIDLE(__cpu_suspend_exit()); > > and 'one' trace_*_rcuidle() user: > > kernel/trace/trace_preemptirq.c: trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); > kernel/trace/trace_preemptirq.c: trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); > kernel/trace/trace_preemptirq.c: trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr); > kernel/trace/trace_preemptirq.c: trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr); > kernel/trace/trace_preemptirq.c: trace_preempt_enable_rcuidle(a0, a1); > kernel/trace/trace_preemptirq.c: trace_preempt_disable_rcuidle(a0, a1); > > However this last is all in deprecated code that should be unused for GENERIC_ENTRY. > > I've touched a lot of code that I can't test and I might've broken something by > accident. In particular the whole ARM cpuidle stuff was quite involved. > > Please all; have a look where you haven't already. > > > New since v1: > > - rebase on top of Frederic's rcu-context-tracking rename fest > - more omap goodness as per the last discusion (thanks Tony!) > - removed one more RCU_NONIDLE() from arm64/risc-v perf code > - ubsan/kasan fixes > - intel_idle module-param for testing > - a bunch of extra __always_inline, because compilers are silly. > > --- > arch/alpha/kernel/process.c | 1 - > arch/alpha/kernel/vmlinux.lds.S | 1 - > arch/arc/kernel/process.c | 3 ++ > arch/arc/kernel/vmlinux.lds.S | 1 - > arch/arm/include/asm/vmlinux.lds.h | 1 - > arch/arm/kernel/process.c | 1 - > arch/arm/kernel/smp.c | 6 +-- > arch/arm/mach-gemini/board-dt.c | 3 +- > arch/arm/mach-imx/cpuidle-imx6q.c | 4 +- > arch/arm/mach-imx/cpuidle-imx6sx.c | 5 ++- > arch/arm/mach-omap2/common.h | 6 ++- > arch/arm/mach-omap2/cpuidle34xx.c | 16 +++++++- > arch/arm/mach-omap2/cpuidle44xx.c | 29 +++++++------- > arch/arm/mach-omap2/omap-mpuss-lowpower.c | 12 +++++- > arch/arm/mach-omap2/pm.h | 2 +- > arch/arm/mach-omap2/pm24xx.c | 51 +----------------------- > arch/arm/mach-omap2/pm34xx.c | 14 +++++-- > arch/arm/mach-omap2/pm44xx.c | 2 +- > arch/arm/mach-omap2/powerdomain.c | 10 ++--- > arch/arm64/kernel/idle.c | 1 - > arch/arm64/kernel/smp.c | 4 +- > arch/arm64/kernel/vmlinux.lds.S | 1 - > arch/csky/kernel/process.c | 1 - > arch/csky/kernel/smp.c | 2 +- > arch/csky/kernel/vmlinux.lds.S | 1 - > arch/hexagon/kernel/process.c | 1 - > arch/hexagon/kernel/vmlinux.lds.S | 1 - > arch/ia64/kernel/process.c | 1 + > arch/ia64/kernel/vmlinux.lds.S | 1 - > arch/loongarch/kernel/idle.c | 1 + > arch/loongarch/kernel/vmlinux.lds.S | 1 - > arch/m68k/kernel/vmlinux-nommu.lds | 1 - > arch/m68k/kernel/vmlinux-std.lds | 1 - > arch/m68k/kernel/vmlinux-sun3.lds | 1 - > arch/microblaze/kernel/process.c | 1 - > arch/microblaze/kernel/vmlinux.lds.S | 1 - > arch/mips/kernel/idle.c | 8 ++-- > arch/mips/kernel/vmlinux.lds.S | 1 - > arch/nios2/kernel/process.c | 1 - > arch/nios2/kernel/vmlinux.lds.S | 1 - > arch/openrisc/kernel/process.c | 1 + > arch/openrisc/kernel/vmlinux.lds.S | 1 - > arch/parisc/kernel/process.c | 2 - > arch/parisc/kernel/vmlinux.lds.S | 1 - > arch/powerpc/kernel/idle.c | 5 +-- > arch/powerpc/kernel/vmlinux.lds.S | 1 - > arch/riscv/kernel/process.c | 1 - > arch/riscv/kernel/vmlinux-xip.lds.S | 1 - > arch/riscv/kernel/vmlinux.lds.S | 1 - > arch/s390/kernel/idle.c | 1 - > arch/s390/kernel/vmlinux.lds.S | 1 - > arch/sh/kernel/idle.c | 1 + > arch/sh/kernel/vmlinux.lds.S | 1 - > arch/sparc/kernel/leon_pmc.c | 4 ++ > arch/sparc/kernel/process_32.c | 1 - > arch/sparc/kernel/process_64.c | 3 +- > arch/sparc/kernel/vmlinux.lds.S | 1 - > arch/um/kernel/dyn.lds.S | 1 - > arch/um/kernel/process.c | 1 - > arch/um/kernel/uml.lds.S | 1 - > arch/x86/boot/compressed/vmlinux.lds.S | 1 + > arch/x86/coco/tdx/tdcall.S | 15 +------ > arch/x86/coco/tdx/tdx.c | 25 ++++-------- > arch/x86/events/amd/brs.c | 13 +++---- > arch/x86/include/asm/fpu/xcr.h | 4 +- > arch/x86/include/asm/irqflags.h | 11 ++---- > arch/x86/include/asm/mwait.h | 14 +++---- > arch/x86/include/asm/nospec-branch.h | 2 +- > arch/x86/include/asm/paravirt.h | 6 ++- > arch/x86/include/asm/perf_event.h | 2 +- > arch/x86/include/asm/shared/io.h | 4 +- > arch/x86/include/asm/shared/tdx.h | 1 - > arch/x86/include/asm/special_insns.h | 8 ++-- > arch/x86/include/asm/xen/hypercall.h | 2 +- > arch/x86/kernel/cpu/bugs.c | 2 +- > arch/x86/kernel/fpu/core.c | 4 +- > arch/x86/kernel/paravirt.c | 14 ++++++- > arch/x86/kernel/process.c | 65 +++++++++++++++---------------- > arch/x86/kernel/vmlinux.lds.S | 1 - > arch/x86/lib/memcpy_64.S | 5 +-- > arch/x86/lib/memmove_64.S | 4 +- > arch/x86/lib/memset_64.S | 4 +- > arch/x86/xen/enlighten_pv.c | 2 +- > arch/x86/xen/irq.c | 2 +- > arch/xtensa/kernel/process.c | 1 + > arch/xtensa/kernel/vmlinux.lds.S | 1 - > drivers/acpi/processor_idle.c | 36 ++++++++++------- > drivers/base/power/runtime.c | 24 ++++++------ > drivers/clk/clk.c | 8 ++-- > drivers/cpuidle/cpuidle-arm.c | 1 + > drivers/cpuidle/cpuidle-big_little.c | 8 +++- > drivers/cpuidle/cpuidle-mvebu-v7.c | 7 ++++ > drivers/cpuidle/cpuidle-psci.c | 10 +++-- > drivers/cpuidle/cpuidle-qcom-spm.c | 1 + > drivers/cpuidle/cpuidle-riscv-sbi.c | 10 +++-- > drivers/cpuidle/cpuidle-tegra.c | 21 +++++++--- > drivers/cpuidle/cpuidle.c | 21 +++++----- > drivers/cpuidle/dt_idle_states.c | 2 +- > drivers/cpuidle/poll_state.c | 10 ++++- > drivers/idle/intel_idle.c | 19 +++++---- > drivers/perf/arm_pmu.c | 11 +----- > drivers/perf/riscv_pmu_sbi.c | 8 +--- > include/asm-generic/vmlinux.lds.h | 9 ++--- > include/linux/compiler_types.h | 8 +++- > include/linux/cpu.h | 3 -- > include/linux/cpuidle.h | 34 ++++++++++++++++ > include/linux/cpumask.h | 4 +- > include/linux/percpu-defs.h | 2 +- > include/linux/sched/idle.h | 40 ++++++++++++++----- > include/linux/thread_info.h | 18 ++++++++- > include/linux/tracepoint.h | 13 ++++++- > kernel/cpu_pm.c | 9 ----- > kernel/printk/printk.c | 2 +- > kernel/sched/idle.c | 47 +++++++--------------- > kernel/time/tick-broadcast-hrtimer.c | 29 ++++++-------- > kernel/time/tick-broadcast.c | 6 ++- > kernel/trace/trace.c | 3 ++ > lib/ubsan.c | 5 ++- > mm/kasan/kasan.h | 4 ++ > mm/kasan/shadow.c | 38 ++++++++++++++++++ > tools/objtool/check.c | 17 ++++++++ > 121 files changed, 511 insertions(+), 420 deletions(-)Thanks for cleaning up the situation! I have applied this on a plain v6.0 (only one patch had a minor conflict) and tested this on an ARM64 Dragonboard 410c, which uses cpuidle-psci and the cpuidle-psci-domain. I didn't observe any problems, so feel free to add: Tested-by: Ulf Hansson <ulf.hansson at linaro.org> Kind regards Uffe