Jan Beulich
2013-Aug-29 13:04 UTC
[PATCH 0/3] x86: mwait_idle improvements ported from Linux
1: x86/mwait_idle: remove assumption of one C-state per MWAIT flag 2: x86/mwait_idle: export both C1 and C1E 3: x86/mwait_idle: initial C8, C9, C10 support Signed-off-by: Len Brown <len.brown@intel.com> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Jan Beulich
2013-Aug-29 13:11 UTC
[PATCH 1/3] x86/mwait_idle: remove assumption of one C-state per MWAIT flag
Remove the assumption that cstate_tables are indexed by MWAIT flag values. Each entry identifies itself via its own flags value. This change is needed to support multiple states that share the same MWAIT flags. Note that this can have an effect on what state is described by ''N'' on cmdline max_cstate=N on some systems. Signed-off-by: Len Brown <len.brown@intel.com> Avoided the effect of changing the meaning of "max_cstate=". Drop MWAIT_MAX_NUM_CSTATES (done differently in a prior patch on Linux). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/cpu/mwait-idle.c +++ b/xen/arch/x86/cpu/mwait-idle.c @@ -116,146 +116,146 @@ static const struct cpuidle_state { */ #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) #define MWAIT2flg(eax) ((eax & 0xFF) << 24) +#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) +#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK) /* * States are indexed by the cstate number, * which is also the index into the MWAIT hint array. * Thus C0 is a dummy. */ -static const struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ +static const struct cpuidle_state nehalem_cstates[] = { + { .name = "C1-NHM", .flags = MWAIT2flg(0x00), .exit_latency = 3, .target_residency = 6, }, - { /* MWAIT C2 */ + { .name = "C3-NHM", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 20, .target_residency = 80, }, - { /* MWAIT C3 */ + { .name = "C6-NHM", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, .target_residency = 800, - } + }, + {} }; -static const struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ +static const struct cpuidle_state snb_cstates[] = { + { .name = "C1-SNB", .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, }, - { /* MWAIT C2 */ + { .name = "C3-SNB", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, .target_residency = 211, }, - { /* MWAIT C3 */ + { .name = "C6-SNB", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 104, .target_residency = 345, }, - { /* MWAIT C4 */ + { .name = "C7-SNB", .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 109, .target_residency = 345, - } + }, + {} }; -static const struct cpuidle_state ivb_cstates[MWAIT_MAX_NUM_CSTATES] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ +static const struct cpuidle_state ivb_cstates[] = { + { .name = "C1-IVB", .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, }, - { /* MWAIT C2 */ + { .name = "C3-IVB", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 59, .target_residency = 156, }, - { /* MWAIT C3 */ + { .name = "C6-IVB", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, .target_residency = 300, }, - { /* MWAIT C4 */ + { .name = "C7-IVB", .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 87, .target_residency = 300, - } + }, + {} }; -static const struct cpuidle_state hsw_cstates[MWAIT_MAX_NUM_CSTATES] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ +static const struct cpuidle_state hsw_cstates[] = { + { .name = "C1-HSW", .flags = MWAIT2flg(0x00), .exit_latency = 2, .target_residency = 2, }, - { /* MWAIT C2 */ + { .name = "C3-HSW", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 33, .target_residency = 100, }, - { /* MWAIT C3 */ + { .name = "C6-HSW", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 133, .target_residency = 400, }, - { /* MWAIT C4 */ + { .name = "C7s-HSW", .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 166, .target_residency = 500, }, + {} }; -static const struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ +static const struct cpuidle_state atom_cstates[] = { + { .name = "C1-ATM", .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 4, }, - { /* MWAIT C2 */ + { .name = "C2-ATM", .flags = MWAIT2flg(0x10), .exit_latency = 20, .target_residency = 80, }, - { /* MWAIT C3 */ }, - { /* MWAIT C4 */ + { .name = "C4-ATM", .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 100, .target_residency = 400, }, - { /* MWAIT C5 */ }, - { /* MWAIT C6 */ + { .name = "C6-ATM", .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 140, .target_residency = 560, - } + }, + {} }; static void mwait_idle(void) @@ -476,29 +476,25 @@ static int mwait_idle_cpu_init(struct no dev->count = 1; - for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) { - unsigned int num_substates; + for (cstate = 0; cpuidle_state_table[cstate].target_residency; ++cstate) { + unsigned int num_substates, hint, state, substate; struct acpi_processor_cx *cx; - if (cstate > max_cstate) { + hint = flg2MWAIT(cpuidle_state_table[cstate].flags); + state = MWAIT_HINT2CSTATE(hint) + 1; + substate = MWAIT_HINT2SUBSTATE(hint); + + if (state > max_cstate) { printk(PREFIX "max C-state %u reached\n", max_cstate); break; } /* Does the state exist in CPUID.MWAIT? */ - num_substates = (mwait_substates >> (cstate * 4)) - & MWAIT_SUBSTATE_MASK; - if (!num_substates) - continue; - /* Is the state not enabled? */ - if (!cpuidle_state_table[cstate].target_residency) { - /* does the driver not know about the state? */ - if (!pm_idle_save && !*cpuidle_state_table[cstate].name) - pr_debug(PREFIX "unaware of family %#x model %#x MWAIT %u\n", - boot_cpu_data.x86, - boot_cpu_data.x86_model, cstate); + num_substates = (mwait_substates >> (state * 4)) + & MWAIT_SUBSTATE_MASK; + /* if sub-state in table is not enumerated by CPUID */ + if (substate >= num_substates) continue; - } if (dev->count >= ACPI_PROCESSOR_MAX_POWER) { printk(PREFIX "max C-state count of %u reached\n", @@ -506,15 +502,13 @@ static int mwait_idle_cpu_init(struct no break; } - if (cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) { - if (pm_idle_save) - continue; + if (state > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && + !pm_idle_save) setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE); - } cx = dev->states + dev->count; - cx->type = cstate; - cx->address = flg2MWAIT(cpuidle_state_table[cstate].flags); + cx->type = state; + cx->address = hint; cx->entry_method = ACPI_CSTATE_EM_FFH; cx->latency = cpuidle_state_table[cstate].exit_latency; cx->target_residency --- a/xen/include/asm-x86/mwait.h +++ b/xen/include/asm-x86/mwait.h @@ -4,7 +4,6 @@ #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf #define MWAIT_SUBSTATE_SIZE 4 -#define MWAIT_MAX_NUM_CSTATES 8 #define CPUID_MWAIT_LEAF 5 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Here we disable HW promotion of C1 to C1E and export both C1 and C1E as distinct C-states. This allows a cpuidle governor to choose a lower latency C-state than C1E when necessary to satisfy performance and QOS constraints -- and still save power versus polling. This also corrects the erroneous latency previously reported for C1E -- it is 10usec, not 1usec. Signed-off-by: Len Brown <len.brown@intel.com> Avoided the effect of changing the meaning of "max_cstate=". Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/cpu/mwait-idle.c +++ b/xen/arch/x86/cpu/mwait-idle.c @@ -88,6 +88,7 @@ struct idle_cpu { * Indicate which enable bits to clear here. */ unsigned long auto_demotion_disable_flags; + bool_t disable_promotion_to_c1e; }; static const struct idle_cpu *icpu; @@ -132,6 +133,12 @@ static const struct cpuidle_state nehale .target_residency = 6, }, { + .name = "C1E-NHM", + .flags = MWAIT2flg(0x01), + .exit_latency = 10, + .target_residency = 20, + }, + { .name = "C3-NHM", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 20, @@ -150,8 +157,14 @@ static const struct cpuidle_state snb_cs { .name = "C1-SNB", .flags = MWAIT2flg(0x00), - .exit_latency = 1, - .target_residency = 1, + .exit_latency = 2, + .target_residency = 2, + }, + { + .name = "C1E-SNB", + .flags = MWAIT2flg(0x01), + .exit_latency = 10, + .target_residency = 20, }, { .name = "C3-SNB", @@ -182,6 +195,12 @@ static const struct cpuidle_state ivb_cs .target_residency = 1, }, { + .name = "C1E-IVB", + .flags = MWAIT2flg(0x01), + .exit_latency = 10, + .target_residency = 20, + }, + { .name = "C3-IVB", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 59, @@ -210,6 +229,12 @@ static const struct cpuidle_state hsw_cs .target_residency = 2, }, { + .name = "C1E-HSW", + .flags = MWAIT2flg(0x01), + .exit_latency = 10, + .target_residency = 20, + }, + { .name = "C3-HSW", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 33, @@ -232,10 +257,10 @@ static const struct cpuidle_state hsw_cs static const struct cpuidle_state atom_cstates[] = { { - .name = "C1-ATM", + .name = "C1E-ATM", .flags = MWAIT2flg(0x00), - .exit_latency = 1, - .target_residency = 4, + .exit_latency = 10, + .target_residency = 20, }, { .name = "C2-ATM", @@ -354,9 +379,19 @@ static void auto_demotion_disable(void * wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits); } +static void c1e_promotion_disable(void *dummy) +{ + u64 msr_bits; + + rdmsrl(MSR_IA32_POWER_CTL, msr_bits); + msr_bits &= ~0x2; + wrmsrl(MSR_IA32_POWER_CTL, msr_bits); +} + static const struct idle_cpu idle_cpu_nehalem = { .state_table = nehalem_cstates, .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, + .disable_promotion_to_c1e = 1, }; static const struct idle_cpu idle_cpu_atom = { @@ -370,14 +405,17 @@ static const struct idle_cpu idle_cpu_li static const struct idle_cpu idle_cpu_snb = { .state_table = snb_cstates, + .disable_promotion_to_c1e = 1, }; static const struct idle_cpu idle_cpu_ivb = { .state_table = ivb_cstates, + .disable_promotion_to_c1e = 1, }; static const struct idle_cpu idle_cpu_hsw = { .state_table = hsw_cstates, + .disable_promotion_to_c1e = 1, }; #define ICPU(model, cpu) { 6, model, &idle_cpu_##cpu } @@ -520,6 +558,9 @@ static int mwait_idle_cpu_init(struct no if (icpu->auto_demotion_disable_flags) on_selected_cpus(cpumask_of(cpu), auto_demotion_disable, NULL, 1); + if (icpu->disable_promotion_to_c1e) + on_selected_cpus(cpumask_of(cpu), c1e_promotion_disable, NULL, 1); + return NOTIFY_DONE; } --- a/xen/include/asm-x86/msr-index.h +++ b/xen/include/asm-x86/msr-index.h @@ -83,6 +83,8 @@ #define MSR_IA32_LASTINTFROMIP 0x000001dd #define MSR_IA32_LASTINTTOIP 0x000001de +#define MSR_IA32_POWER_CTL 0x000001fc + #define MSR_IA32_MTRR_PHYSBASE0 0x00000200 #define MSR_IA32_MTRR_PHYSMASK0 0x00000201 #define MSR_IA32_MTRR_PHYSBASE1 0x00000202 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Allow mwait_idle to utilize C8, C9, C10 when they are present on... "Fourth Generation Intel(R) Core(TM) Processors", which are based on Intel(R) microarchitecture code name Haswell. Signed-off-by: Len Brown <len.brown@intel.com> Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/cpu/mwait-idle.c +++ b/xen/arch/x86/cpu/mwait-idle.c @@ -252,6 +252,24 @@ static const struct cpuidle_state hsw_cs .exit_latency = 166, .target_residency = 500, }, + { + .name = "C8-HSW", + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 300, + .target_residency = 900, + }, + { + .name = "C9-HSW", + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 600, + .target_residency = 1800, + }, + { + .name = "C10-HSW", + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 2600, + .target_residency = 7700, + }, {} }; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Keir Fraser
2013-Aug-29 17:47 UTC
Re: [PATCH 0/3] x86: mwait_idle improvements ported from Linux
On 29/08/2013 14:04, "Jan Beulich" <JBeulich@suse.com> wrote:> 1: x86/mwait_idle: remove assumption of one C-state per MWAIT flag > 2: x86/mwait_idle: export both C1 and C1E > 3: x86/mwait_idle: initial C8, C9, C10 support > > Signed-off-by: Len Brown <len.brown@intel.com> > Signed-off-by: Jan Beulich <jbeulich@suse.com>Acked-by: Keir Fraser <keir@xen.org>> > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
Matt Wilson
2013-Aug-29 20:20 UTC
Re: [PATCH 0/3] x86: mwait_idle improvements ported from Linux
On Thu, Aug 29, 2013 at 02:04:54PM +0100, Jan Beulich wrote:> 1: x86/mwait_idle: remove assumption of one C-state per MWAIT flag > 2: x86/mwait_idle: export both C1 and C1E > 3: x86/mwait_idle: initial C8, C9, C10 support > > Signed-off-by: Len Brown <len.brown@intel.com> > Signed-off-by: Jan Beulich <jbeulich@suse.com>Some procedure questions: Should these commits mention the upstream Linux commits from which they are derived? Are we following the [] convention for modifications made when applying work? Should Len be the Author on these patches? --msw
Jan Beulich
2013-Aug-30 07:53 UTC
Re: [PATCH 0/3] x86: mwait_idle improvements ported from Linux
>>> On 29.08.13 at 22:20, Matt Wilson <msw@amazon.com> wrote: > On Thu, Aug 29, 2013 at 02:04:54PM +0100, Jan Beulich wrote: >> 1: x86/mwait_idle: remove assumption of one C-state per MWAIT flag >> 2: x86/mwait_idle: export both C1 and C1E >> 3: x86/mwait_idle: initial C8, C9, C10 support >> >> Signed-off-by: Len Brown <len.brown@intel.com> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> > > Some procedure questions: > > Should these commits mention the upstream Linux commits from which > they are derived?That''s of questionable use imo.> Are we following the [] convention for modifications made when > applying work?For brief notes I do this; for more extended notes we''re used to insert text between the individual Signed-off-by''s (on the hypervisor side at least).> Should Len be the Author on these patches?He being the first one in the Signed-off-by sequence, he''ll end up being the author with the way my commit script works (yes, I could have added a From: line, but I generally consider this useful only when this doesn''t match the first Signed-off-by, as then it''ll be the person named by From: who''s going to be the author; in no case do I consider the From: line to be useful in a git commit''s description, as there it''s clearly redundant with git''s meta data). Albeit it''s always questionable whom to consider being the author when the original commit was significantly reworked. I''m trying to set some boundary when doing this: When the changes are too heavy, I drop the original Signed-off-by and name original authorship in the description instead. For the first two patches on this series, this was really on the edge between the two models (but when in doubt I prefer crediting the original author). Jan
Nakajima, Jun
2013-Sep-02 19:21 UTC
Re: [PATCH 3/3] x86/mwait_idle: initial C8, C9, C10 support
On Thu, Aug 29, 2013 at 6:12 AM, Jan Beulich <JBeulich@suse.com> wrote:> Allow mwait_idle to utilize C8, C9, C10 when they are present on... > "Fourth Generation Intel(R) Core(TM) Processors", which are based on > Intel(R) microarchitecture code name Haswell. > > Signed-off-by: Len Brown <len.brown@intel.com> > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/arch/x86/cpu/mwait-idle.c > +++ b/xen/arch/x86/cpu/mwait-idle.c > @@ -252,6 +252,24 @@ static const struct cpuidle_state hsw_cs > .exit_latency = 166, > .target_residency = 500, > }, > + { > + .name = "C8-HSW", > + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, > + .exit_latency = 300, > + .target_residency = 900, > + }, > + { > + .name = "C9-HSW", > + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, > + .exit_latency = 600, > + .target_residency = 1800, > + }, > + { > + .name = "C10-HSW", > + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, > + .exit_latency = 2600, > + .target_residency = 7700, > + }, > {} > }; > > > >Acked-by: Jun Nakajima <jun.nakajima@intel.com> -- Jun Intel Open Source Technology Center