Wei, Gang
2010-Apr-12 04:45 UTC
[Xen-devel] [PATCH]CPUFREQ: Fix two racing issues during cpu hotplug
CPUFREQ: Fix two racing issues during cpu hotplug Move cpufreq_del_cpu() call into __cpu_disable to eliminate racing with dbs timer handler on policy & drv_data. Put it after local_irq_enable because xmalloc/xfree in cpufreq_del_cpu assert for this. Change access to cpufreq_statistic_lock from spin_lock to spin_lock_irqsave to avoid statistic data access racing between cpufreq_statistic_exit and dbs timer handler. Signed-off-by: Wei Gang <gang.wei@intel.com> diff -r adce8bc43fcc xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Tue Apr 06 07:16:47 2010 +0100 +++ b/xen/arch/x86/smpboot.c Fri Apr 09 11:54:42 2010 +0800 @@ -1293,6 +1293,7 @@ int __cpu_disable(void) clear_local_APIC(); /* Allow any queued timer interrupts to get serviced */ local_irq_enable(); + cpufreq_del_cpu(cpu); mdelay(1); local_irq_disable(); @@ -1362,8 +1363,6 @@ int cpu_down(unsigned int cpu) } printk("Prepare to bring CPU%d down...\n", cpu); - - cpufreq_del_cpu(cpu); err = stop_machine_run(take_cpu_down, NULL, cpu); if (err < 0) diff -r adce8bc43fcc xen/drivers/acpi/pmstat.c --- a/xen/drivers/acpi/pmstat.c Tue Apr 06 07:16:47 2010 +0100 +++ b/xen/drivers/acpi/pmstat.c Fri Apr 09 11:39:27 2010 +0800 @@ -86,15 +86,17 @@ int do_get_pm_info(struct xen_sysctl_get case PMSTAT_get_pxstat: { uint32_t ct; - struct pm_px *pxpt = cpufreq_statistic_data[op->cpuid]; + struct pm_px *pxpt; spinlock_t *cpufreq_statistic_lock = &per_cpu(cpufreq_statistic_lock, op->cpuid); - - spin_lock(cpufreq_statistic_lock); - + unsigned long flags; + + spin_lock_irqsave(cpufreq_statistic_lock, flags); + + pxpt = cpufreq_statistic_data[op->cpuid]; if ( !pxpt || !pxpt->u.pt || !pxpt->u.trans_pt ) { - spin_unlock(cpufreq_statistic_lock); + spin_unlock_irqrestore(cpufreq_statistic_lock, flags); return -ENODATA; } @@ -105,14 +107,14 @@ int do_get_pm_info(struct xen_sysctl_get ct = pmpt->perf.state_count; if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) ) { - spin_unlock(cpufreq_statistic_lock); + spin_unlock_irqrestore(cpufreq_statistic_lock, flags); ret = -EFAULT; break; } if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) ) { - spin_unlock(cpufreq_statistic_lock); + spin_unlock_irqrestore(cpufreq_statistic_lock, flags); ret = -EFAULT; break; } @@ -122,7 +124,7 @@ int do_get_pm_info(struct xen_sysctl_get op->u.getpx.last = pxpt->u.last; op->u.getpx.cur = pxpt->u.cur; - spin_unlock(cpufreq_statistic_lock); + spin_unlock_irqrestore(cpufreq_statistic_lock, flags); break; } diff -r adce8bc43fcc xen/drivers/cpufreq/utility.c --- a/xen/drivers/cpufreq/utility.c Tue Apr 06 07:16:47 2010 +0100 +++ b/xen/drivers/cpufreq/utility.c Fri Apr 09 11:39:27 2010 +0800 @@ -67,12 +67,13 @@ void cpufreq_statistic_update(unsigned i struct processor_pminfo *pmpt = processor_pminfo[cpu]; spinlock_t *cpufreq_statistic_lock = &per_cpu(cpufreq_statistic_lock, cpu); - - spin_lock(cpufreq_statistic_lock); + unsigned long flags; + + spin_lock_irqsave(cpufreq_statistic_lock, flags); pxpt = cpufreq_statistic_data[cpu]; if ( !pxpt || !pmpt ) { - spin_unlock(cpufreq_statistic_lock); + spin_unlock_irqrestore(cpufreq_statistic_lock, flags); return; } @@ -84,7 +85,7 @@ void cpufreq_statistic_update(unsigned i (*(pxpt->u.trans_pt + from * pmpt->perf.state_count + to))++; - spin_unlock(cpufreq_statistic_lock); + spin_unlock_irqrestore(cpufreq_statistic_lock, flags); } int cpufreq_statistic_init(unsigned int cpuid) @@ -94,15 +95,16 @@ int cpufreq_statistic_init(unsigned int const struct processor_pminfo *pmpt = processor_pminfo[cpuid]; spinlock_t *cpufreq_statistic_lock = &per_cpu(cpufreq_statistic_lock, cpuid); + unsigned long flags; if ( !pmpt ) return -EINVAL; - spin_lock(cpufreq_statistic_lock); - + spin_lock_irqsave(cpufreq_statistic_lock, flags); pxpt = cpufreq_statistic_data[cpuid]; + spin_unlock_irqrestore(cpufreq_statistic_lock, flags); + if ( pxpt ) { - spin_unlock(cpufreq_statistic_lock); return 0; } @@ -110,16 +112,13 @@ int cpufreq_statistic_init(unsigned int pxpt = xmalloc(struct pm_px); if ( !pxpt ) { - spin_unlock(cpufreq_statistic_lock); return -ENOMEM; } memset(pxpt, 0, sizeof(*pxpt)); - cpufreq_statistic_data[cpuid] = pxpt; pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count); if (!pxpt->u.trans_pt) { xfree(pxpt); - spin_unlock(cpufreq_statistic_lock); return -ENOMEM; } @@ -127,7 +126,6 @@ int cpufreq_statistic_init(unsigned int if (!pxpt->u.pt) { xfree(pxpt->u.trans_pt); xfree(pxpt); - spin_unlock(cpufreq_statistic_lock); return -ENOMEM; } @@ -143,8 +141,18 @@ int cpufreq_statistic_init(unsigned int pxpt->prev_state_wall = NOW(); pxpt->prev_idle_wall = get_cpu_idle_time(cpuid); - spin_unlock(cpufreq_statistic_lock); - + spin_lock_irqsave(cpufreq_statistic_lock, flags); + if ( !cpufreq_statistic_data[cpuid] ) { + cpufreq_statistic_data[cpuid] = pxpt; + pxpt = NULL; + } + spin_unlock_irqrestore(cpufreq_statistic_lock, flags); + + if (pxpt) { + xfree(pxpt->u.trans_pt); + xfree(pxpt->u.pt); + xfree(pxpt); + } return 0; } @@ -153,21 +161,18 @@ void cpufreq_statistic_exit(unsigned int struct pm_px *pxpt; spinlock_t *cpufreq_statistic_lock = &per_cpu(cpufreq_statistic_lock, cpuid); - - spin_lock(cpufreq_statistic_lock); - + unsigned long flags; + + spin_lock_irqsave(cpufreq_statistic_lock, flags); pxpt = cpufreq_statistic_data[cpuid]; - if (!pxpt) { - spin_unlock(cpufreq_statistic_lock); - return; - } - - xfree(pxpt->u.trans_pt); - xfree(pxpt->u.pt); - xfree(pxpt); cpufreq_statistic_data[cpuid] = NULL; - - spin_unlock(cpufreq_statistic_lock); + spin_unlock_irqrestore(cpufreq_statistic_lock, flags); + + if (pxpt) { + xfree(pxpt->u.trans_pt); + xfree(pxpt->u.pt); + xfree(pxpt); + } } void cpufreq_statistic_reset(unsigned int cpuid) @@ -177,12 +182,13 @@ void cpufreq_statistic_reset(unsigned in const struct processor_pminfo *pmpt = processor_pminfo[cpuid]; spinlock_t *cpufreq_statistic_lock = &per_cpu(cpufreq_statistic_lock, cpuid); - - spin_lock(cpufreq_statistic_lock); + unsigned long flags; + + spin_lock_irqsave(cpufreq_statistic_lock, flags); pxpt = cpufreq_statistic_data[cpuid]; if ( !pmpt || !pxpt || !pxpt->u.pt || !pxpt->u.trans_pt ) { - spin_unlock(cpufreq_statistic_lock); + spin_unlock_irqrestore(cpufreq_statistic_lock, flags); return; } @@ -199,7 +205,7 @@ void cpufreq_statistic_reset(unsigned in pxpt->prev_state_wall = NOW(); pxpt->prev_idle_wall = get_cpu_idle_time(cpuid); - spin_unlock(cpufreq_statistic_lock); + spin_unlock_irqrestore(cpufreq_statistic_lock, flags); } _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2010-Apr-12 06:09 UTC
[Xen-devel] Re: [PATCH]CPUFREQ: Fix two racing issues during cpu hotplug
On 12/04/2010 05:45, "Wei, Gang" <gang.wei@intel.com> wrote:> Move cpufreq_del_cpu() call into __cpu_disable to eliminate racing with dbs > timer handler on policy & drv_data. Put it after local_irq_enable because > xmalloc/xfree in cpufreq_del_cpu assert for this.Can''t you just kill_timer()? Adding extra code into a stop_machine context is dangerous: e.g., xmalloc()->alloc_xenheap_pages()->memguard_unguard_range()->map_pages_to_xen ()->flush_area_all() results in deadlock as other cpus are spinning with irqs disabled.> Change access to cpufreq_statistic_lock from spin_lock to spin_lock_irqsave to > avoid statistic data access racing between cpufreq_statistic_exit and dbs > timer handler.DBS timer handler is called in softirq context; cpu_freq_statistic_exit() appears also always to be called from non-irq context. I don''t see what interrupt context you are protecting against. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Wei, Gang
2010-Apr-12 06:54 UTC
[Xen-devel] RE: [PATCH]CPUFREQ: Fix two racing issues during cpu hotplug
On Monday, 2010-4-12 2:10 PM, Keir Fraser wrote:> On 12/04/2010 05:45, "Wei, Gang" <gang.wei@intel.com> wrote: > >> Move cpufreq_del_cpu() call into __cpu_disable to eliminate racing >> with dbs timer handler on policy & drv_data. Put it after >> local_irq_enable because xmalloc/xfree in cpufreq_del_cpu assert for >> this. > > Can''t you just kill_timer()? Adding extra code into a stop_machine > context is dangerous: e.g., > xmalloc()->alloc_xenheap_pages()->memguard_unguard_range()->map_pages_to_xen > ()->flush_area_all() results in deadlock as other cpus are spinning > with irqs disabled.You are right. kill_timer stop timer and wait until timer handler end if this timer is current running. I can switch to it. BTW, I may need to re-init the killed timer before set_timer on it, right?> >> Change access to cpufreq_statistic_lock from spin_lock to >> spin_lock_irqsave to avoid statistic data access racing between >> cpufreq_statistic_exit and dbs timer handler. > > DBS timer handler is called in softirq context; > cpu_freq_statistic_exit() appears also always to be called from > non-irq context. I don''t see what interrupt context you are > protecting against.Ok. It is my mis-unstanding. I used to think do_softirq is also called before irq returns. I will rework another very simple patch soon. Jimmy _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2010-Apr-12 06:59 UTC
[Xen-devel] Re: [PATCH]CPUFREQ: Fix two racing issues during cpu hotplug
On 12/04/2010 07:54, "Wei, Gang" <gang.wei@intel.com> wrote:>> Can''t you just kill_timer()? Adding extra code into a stop_machine >> context is dangerous: e.g., >> xmalloc()->alloc_xenheap_pages()->memguard_unguard_range()->map_pages_to_xen >> ()->flush_area_all() results in deadlock as other cpus are spinning >> with irqs disabled. > > You are right. kill_timer stop timer and wait until timer handler end if this > timer is current running. I can switch to it. BTW, I may need to re-init the > killed timer before set_timer on it, right?Yes, init_timer() must be called to be able to reuse a kill_timer()ed timer. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Wei, Gang
2010-Apr-12 07:40 UTC
[Xen-devel] RE: [PATCH]CPUFREQ: Fix two racing issues during cpu hotplug
Resend. CPUFREQ: fix racing issue for cpu hotplug To eliminate racing between dbs timer handler and cpufreq_del_cpu, using kill_timer instead of stop_timer to make sure timer handler execution finished before other stuff in cpufreq_del_cpu. BTW, fix a lost point of cpufreq_statistic_lock taking sequence. Signed-off-by: Wei Gang <gang.wei@intel.com> diff -r 824d35c72a8d xen/drivers/acpi/pmstat.c --- a/xen/drivers/acpi/pmstat.c Mon Apr 12 15:32:32 2010 +0800 +++ b/xen/drivers/acpi/pmstat.c Mon Apr 12 15:33:00 2010 +0800 @@ -86,12 +86,13 @@ int do_get_pm_info(struct xen_sysctl_get case PMSTAT_get_pxstat: { uint32_t ct; - struct pm_px *pxpt = cpufreq_statistic_data[op->cpuid]; + struct pm_px *pxpt; spinlock_t *cpufreq_statistic_lock = &per_cpu(cpufreq_statistic_lock, op->cpuid); spin_lock(cpufreq_statistic_lock); + pxpt = cpufreq_statistic_data[op->cpuid]; if ( !pxpt || !pxpt->u.pt || !pxpt->u.trans_pt ) { spin_unlock(cpufreq_statistic_lock); diff -r 824d35c72a8d xen/drivers/cpufreq/cpufreq_ondemand.c --- a/xen/drivers/cpufreq/cpufreq_ondemand.c Mon Apr 12 15:32:32 2010 +0800 +++ b/xen/drivers/cpufreq/cpufreq_ondemand.c Mon Apr 12 15:33:17 2010 +0800 @@ -196,9 +196,8 @@ static void dbs_timer_init(struct cpu_db { dbs_info->enable = 1; - if ( !dbs_timer[dbs_info->cpu].function ) - init_timer(&dbs_timer[dbs_info->cpu], do_dbs_timer, - (void *)dbs_info, dbs_info->cpu); + init_timer(&dbs_timer[dbs_info->cpu], do_dbs_timer, + (void *)dbs_info, dbs_info->cpu); set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate); @@ -213,7 +212,7 @@ static void dbs_timer_exit(struct cpu_db { dbs_info->enable = 0; dbs_info->stoppable = 0; - stop_timer(&dbs_timer[dbs_info->cpu]); + kill_timer(&dbs_timer[dbs_info->cpu]); } int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel