Boris Ostrovsky
2012-Jun-04 18:47 UTC
[PATCH] xenpm, x86: Fix reporting of idle state average residency times
# HG changeset patch # User Boris Ostrovsky <boris.ostrovsky@amd.com> # Date 1338831484 -7200 # Node ID 390346181927d0350f83c4046470a0b9b28c1686 # Parent 6bea63e6c780de6303361e5f190d6925996cd2a2 xenpm, x86: Fix reporting of idle state average residency times If CPU stays in the same idle state for the full duration of xenpm sample then average residency may not be reported correctly since usage counter will not be incremented. In addition, in order to calculate averages correctly residence time and usage counter should be read and written atomically. Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com> diff -r 6bea63e6c780 -r 390346181927 tools/misc/xenpm.c --- a/tools/misc/xenpm.c Sat Jun 02 08:39:45 2012 +0100 +++ b/tools/misc/xenpm.c Mon Jun 04 19:38:04 2012 +0200 @@ -389,7 +389,14 @@ static void signal_int_handler(int signo res = ( diff >= 0 ) ? diff : 0; triggers = cxstat_end[i].triggers[j] - cxstat_start[i].triggers[j]; - avg_res = (triggers==0) ? 0: (double)res/triggers/1000000.0; + /* + * triggers may be zero if the CPU has been in this state for + * the whole sample or if it never entered the state + */ + if ( triggers == 0 && cxstat_end[i].last == j ) + avg_res = (double)sum_cx[i]/1000000.0; + else + avg_res = (triggers==0) ? 0: (double)res/triggers/1000000.0; printf(" C%d\t%"PRIu64"\t(%5.2f%%)\t%.2f\n", j, res/1000000UL, 100 * res / (double)sum_cx[i], avg_res ); } diff -r 6bea63e6c780 -r 390346181927 xen/arch/x86/acpi/cpu_idle.c --- a/xen/arch/x86/acpi/cpu_idle.c Sat Jun 02 08:39:45 2012 +0100 +++ b/xen/arch/x86/acpi/cpu_idle.c Mon Jun 04 19:38:04 2012 +0200 @@ -165,29 +165,35 @@ static void print_acpi_power(uint32_t cp { uint32_t i, idle_usage = 0; uint64_t res, idle_res = 0; + u32 usage; + u8 last_state_idx; printk("==cpu%d==\n", cpu); - printk("active state:\t\tC%d\n", - power->last_state ? power->last_state->idx : -1); + last_state_idx = power->last_state ? power->last_state->idx : -1; + printk("active state:\t\tC%d\n", last_state_idx); printk("max_cstate:\t\tC%d\n", max_cstate); printk("states:\n"); for ( i = 1; i < power->count; i++ ) { + spin_lock_irq(&power->stat_lock); res = tick_to_ns(power->states[i].time); - idle_usage += power->states[i].usage; + usage = power->states[i].usage; + spin_unlock_irq(&power->stat_lock); + + idle_usage += usage; idle_res += res; - printk((power->last_state && power->last_state->idx == i) ? - " *" : " "); + printk((last_state_idx == i) ? " *" : " "); printk("C%d:\t", i); printk("type[C%d] ", power->states[i].type); printk("latency[%03d] ", power->states[i].latency); - printk("usage[%08d] ", power->states[i].usage); + printk("usage[%08d] ", usage); printk("method[%5s] ", acpi_cstate_method_name[power->states[i].entry_method]); printk("duration[%"PRId64"]\n", res); } - printk(" C0:\tusage[%08d] duration[%"PRId64"]\n", + printk((last_state_idx == 0) ? " *" : " "); + printk("C0:\tusage[%08d] duration[%"PRId64"]\n", idle_usage, NOW() - idle_res); print_hw_residencies(cpu); @@ -384,12 +390,29 @@ bool_t errata_c6_eoi_workaround(void) return (fix_needed && cpu_has_pending_apic_eoi()); } +static inline void acpi_update_idle_stats(struct acpi_processor_power *power, + struct acpi_processor_cx *cx, + int64_t sleep_ticks) +{ + /* Interrupts are disabled */ + + spin_lock(&power->stat_lock); + + cx->usage++; + if ( sleep_ticks > 0 ) + { + power->last_residency = tick_to_ns(sleep_ticks) / 1000UL; + cx->time += sleep_ticks; + } + + spin_unlock(&power->stat_lock); +} + static void acpi_processor_idle(void) { struct acpi_processor_power *power = processor_powers[smp_processor_id()]; struct acpi_processor_cx *cx = NULL; int next_state; - int64_t sleep_ticks = 0; uint64_t t1, t2 = 0; u32 exp = 0, pred = 0; u32 irq_traced[4] = { 0 }; @@ -462,10 +485,10 @@ static void acpi_processor_idle(void) /* Trace cpu idle exit */ TRACE_6D(TRC_PM_IDLE_EXIT, cx->idx, t2, irq_traced[0], irq_traced[1], irq_traced[2], irq_traced[3]); + /* Update statistics */ + acpi_update_idle_stats(power, cx, ticks_elapsed(t1, t2)); /* Re-enable interrupts */ local_irq_enable(); - /* Compute time (ticks) that we were actually asleep */ - sleep_ticks = ticks_elapsed(t1, t2); break; } @@ -537,28 +560,26 @@ static void acpi_processor_idle(void) TRACE_6D(TRC_PM_IDLE_EXIT, cx->idx, t2, irq_traced[0], irq_traced[1], irq_traced[2], irq_traced[3]); + /* Update statistics */ + acpi_update_idle_stats(power, cx, ticks_elapsed(t1, t2)); /* Re-enable interrupts */ local_irq_enable(); /* recovering APIC */ lapic_timer_on(); - /* Compute time (ticks) that we were actually asleep */ - sleep_ticks = ticks_elapsed(t1, t2); break; default: + /* Now in C0 */ + power->last_state = &power->states[0]; local_irq_enable(); sched_tick_resume(); cpufreq_dbs_timer_resume(); return; } - cx->usage++; - if ( sleep_ticks > 0 ) - { - power->last_residency = tick_to_ns(sleep_ticks) / 1000UL; - cx->time += sleep_ticks; - } + /* Now in C0 */ + power->last_state = &power->states[0]; sched_tick_resume(); cpufreq_dbs_timer_resume(); @@ -662,6 +683,7 @@ static int cpuidle_init_cpu(int cpu) acpi_power->states[1].type = ACPI_STATE_C1; acpi_power->states[1].entry_method = ACPI_CSTATE_EM_HALT; acpi_power->safe_state = &acpi_power->states[1]; + spin_lock_init(&acpi_power->stat_lock); return 0; } @@ -1064,7 +1086,8 @@ uint32_t pmstat_get_cx_nr(uint32_t cpuid int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat) { struct acpi_processor_power *power = processor_powers[cpuid]; - uint64_t usage, res, idle_usage = 0, idle_res = 0; + uint64_t idle_usage = 0, idle_res = 0; + uint64_t usage[ACPI_PROCESSOR_MAX_POWER], res[ACPI_PROCESSOR_MAX_POWER]; int i; struct hw_residencies hw_res; @@ -1084,16 +1107,14 @@ int pmstat_get_cx_stat(uint32_t cpuid, s if ( pm_idle_save == NULL ) { /* C1 */ - usage = 1; - res = stat->idle_time; - if ( copy_to_guest_offset(stat->triggers, 1, &usage, 1) || - copy_to_guest_offset(stat->residencies, 1, &res, 1) ) - return -EFAULT; + usage[1] = 1; + res[1] = stat->idle_time; /* C0 */ - res = NOW() - res; - if ( copy_to_guest_offset(stat->triggers, 0, &usage, 1) || - copy_to_guest_offset(stat->residencies, 0, &res, 1) ) + res[0] = NOW() - res[1]; + + if ( copy_to_guest_offset(stat->triggers, 0, &usage[0], 2) || + copy_to_guest_offset(stat->residencies, 0, &res[0], 2) ) return -EFAULT; stat->pc2 = 0; @@ -1110,21 +1131,25 @@ int pmstat_get_cx_stat(uint32_t cpuid, s { if ( i != 0 ) { - usage = power->states[i].usage; - res = tick_to_ns(power->states[i].time); - idle_usage += usage; - idle_res += res; + spin_lock_irq(&power->stat_lock); + usage[i] = power->states[i].usage; + res[i] = tick_to_ns(power->states[i].time); + spin_unlock_irq(&power->stat_lock); + + idle_usage += usage[i]; + idle_res += res[i]; } else { - usage = idle_usage; - res = NOW() - idle_res; + usage[i] = idle_usage; + res[i] = NOW() - idle_res; } - if ( copy_to_guest_offset(stat->triggers, i, &usage, 1) || - copy_to_guest_offset(stat->residencies, i, &res, 1) ) - return -EFAULT; } + if ( copy_to_guest_offset(stat->triggers, 0, &usage[0], power->count) || + copy_to_guest_offset(stat->residencies, 0, &res[0], power->count) ) + return -EFAULT; + get_hw_residencies(cpuid, &hw_res); stat->pc2 = hw_res.pc2; diff -r 6bea63e6c780 -r 390346181927 xen/include/xen/cpuidle.h --- a/xen/include/xen/cpuidle.h Sat Jun 02 08:39:45 2012 +0100 +++ b/xen/include/xen/cpuidle.h Mon Jun 04 19:38:04 2012 +0200 @@ -28,6 +28,7 @@ #define _XEN_CPUIDLE_H #include <xen/cpumask.h> +#include <xen/spinlock.h> #define ACPI_PROCESSOR_MAX_POWER 8 #define CPUIDLE_NAME_LEN 16 @@ -69,6 +70,7 @@ struct acpi_processor_power void *gdata; /* governor specific data */ u32 last_residency; u32 count; + spinlock_t stat_lock; struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER]; };