Rendezvous selected cpus in softirq This is similar to stop_machine_run stub from Linux, to pull selected cpus in rendezvous point and the do some batch work under a safe environment. Current one usage is from S3 path, where individual cpu is pulled down with related online footprints being cleared. It''s dangerous to have other cpus checking clobbered data structure in the middle, such as cpu_online_map, cpu_sibling_map, etc. Signed-off-by Kevin Tian <kevin.tian@intel.com> diff -r 667d1a73d2ca xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Sat Feb 02 11:06:02 2008 +0800 +++ b/xen/arch/x86/domain.c Sat Feb 02 14:11:45 2008 +0800 @@ -82,7 +82,6 @@ static void default_idle(void) static void play_dead(void) { - __cpu_disable(); /* This must be done before dead CPU ack */ cpu_exit_clear(); hvm_cpu_down(); diff -r 667d1a73d2ca xen/arch/x86/smp.c --- a/xen/arch/x86/smp.c Sat Feb 02 11:06:02 2008 +0800 +++ b/xen/arch/x86/smp.c Sat Feb 02 14:11:30 2008 +0800 @@ -22,6 +22,7 @@ #include <asm/ipi.h> #include <asm/hvm/support.h> #include <mach_apic.h> +#include <xen/softirq.h> /* * Some notes on x86 processor bugs affecting SMP operation: @@ -374,3 +375,151 @@ fastcall void smp_call_function_interrup irq_exit(); } + +enum rendezvous_action { + RENDEZVOUS_START, + RENDEZVOUS_DISABLE_IRQ, + RENDEZVOUS_INVOKE, + RENDEZVOUS_EXIT, +}; + +struct rendezvous_data { + void (*fn) (void *); + void *data; + cpumask_t selected; + cpumask_t fn_mask; + atomic_t done; + enum rendezvous_action action; +}; + +static struct rendezvous_data *rdz_data; +static spinlock_t rendezvous_lock = SPIN_LOCK_UNLOCKED; +static void rendezvous_set_action(enum rendezvous_action action, int cpus) +{ + atomic_set(&rdz_data->done, 0); + smp_wmb(); + rdz_data->action = action; + while ( atomic_read(&rdz_data->done) != cpus ) + cpu_relax(); +} + +/* Rendezouvs selected cpus in softirq context and call (*fn) set in fn_mask */ +int on_rendezvous_cpus ( + cpumask_t selected, + cpumask_t fn_mask, + void (*fn) (void *), + void *data, + int disable_irq) +{ + struct rendezvous_data rdz; + unsigned int nr_cpus, nr_fn_cpus, self, cpu, cur smp_processor_id(); + + ASSERT(local_irq_is_enabled()); + ASSERT(cpus_subset(fn_mask, selected)); + + if ( cpus_weight(fn_mask) && !fn ) + return -1; + + /* current cpu conducts the rendezvous process */ + cpu_clear(cur, selected); + self = cpu_test_and_clear(cur, fn_mask); + nr_cpus = cpus_weight(selected); + nr_fn_cpus = cpus_weight(fn_mask); + + if ( nr_cpus == 0 ) + { + if ( self ) + (*fn)(data); + return 0; + } + + rdz.fn = fn; + rdz.data = data; + rdz.selected = selected; + rdz.fn_mask = fn_mask; + atomic_set(&rdz.done, 0); + rdz.action = RENDEZVOUS_START; + + /* Note: We shouldn''t spin on lock when it''s held by others since others + * is expecting this cpus to enter softirq context. Or else deadlock + * is caused. + */ + if ( !spin_trylock(&rendezvous_lock) ) + return -1; + + rdz_data = &rdz; + smp_wmb(); + + for_each_cpu_mask(cpu, selected) + cpu_raise_softirq(cpu, RENDEZVOUS_SOFTIRQ); + + /* Wait all concerned cpu to enter rendezvous point */ + while ( atomic_read(&rdz_data->done) != nr_cpus ) + cpu_relax(); + + if ( disable_irq ) + rendezvous_set_action(RENDEZVOUS_DISABLE_IRQ, nr_cpus); + + if ( self ) + (*fn)(data); + + /* Wait cpus to finish work if applicable */ + if ( nr_fn_cpus != 0 ) + rendezvous_set_action(RENDEZVOUS_INVOKE, nr_fn_cpus); + + rendezvous_set_action(RENDEZVOUS_EXIT, nr_cpus); + spin_unlock(&rendezvous_lock); + return 0; +} + +static void rendezvous_softirq(void) +{ + int irq_disabled = 0; + int invoked = 0; + int required; + + ASSERT(cpu_isset(smp_processor_id(), rdz_data->selected)); + + required = cpu_isset(smp_processor_id(), rdz_data->fn_mask); + smp_mb(); + atomic_inc(&rdz_data->done); + + while ( rdz_data->action != RENDEZVOUS_EXIT ) + { + switch ( rdz_data->action ) + { + case RENDEZVOUS_DISABLE_IRQ: + if ( !irq_disabled ) + { + local_irq_disable(); + irq_disabled = 1; + smp_mb(); + atomic_inc(&rdz_data->done); + } + break; + case RENDEZVOUS_INVOKE: + if ( required && !invoked ) + { + rdz_data->fn(rdz_data->data); + invoked = 1; + smp_mb(); + atomic_inc(&rdz_data->done); + } + break; + default: + break; + } + + cpu_relax(); + } + + smp_mb(); + atomic_inc(&rdz_data->done); +} + +static int __init cpu_rendezvous_init(void) +{ + open_softirq(RENDEZVOUS_SOFTIRQ, rendezvous_softirq); + return 0; +} +__initcall(cpu_rendezvous_init); diff -r 667d1a73d2ca xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Sat Feb 02 11:06:02 2008 +0800 +++ b/xen/arch/x86/smpboot.c Sat Feb 02 14:11:30 2008 +0800 @@ -1192,7 +1192,7 @@ remove_siblinginfo(int cpu) } extern void fixup_irqs(cpumask_t map); -int __cpu_disable(void) +void __cpu_disable(void *data) { cpumask_t map = cpu_online_map; int cpu = smp_processor_id(); @@ -1206,7 +1206,15 @@ int __cpu_disable(void) * Especially so if we''re not using an IOAPIC -zwane */ if (cpu == 0) - return -EBUSY; + return; + + /* By far only S3 is using this path, and thus only idle vcpus + * are running on all APs when it''s called. To support full + * cpu hotplug, other notification mechanisms should be introduced + * like to migrate vcpus out of this one before rendezvous point + */ + if (!is_idle_vcpu(current)) + return; local_irq_disable(); clear_local_APIC(); @@ -1223,7 +1231,6 @@ int __cpu_disable(void) fixup_irqs(map); /* It''s now safe to remove this processor from the online map */ cpu_clear(cpu, cpu_online_map); - return 0; } void __cpu_die(unsigned int cpu) @@ -1269,7 +1276,8 @@ int cpu_down(unsigned int cpu) int cpu_down(unsigned int cpu) { int err = 0; - cpumask_t mask; + cpumask_t rmask = cpu_online_map; + cpumask_t mask = CPU_MASK_NONE; spin_lock(&cpu_add_remove_lock); if (num_online_cpus() == 1) { @@ -1283,11 +1291,9 @@ int cpu_down(unsigned int cpu) } printk("Prepare to bring CPU%d down...\n", cpu); - /* Send notification to remote idle vcpu */ - cpus_clear(mask); - cpu_set(cpu, mask); - per_cpu(cpu_state, cpu) = CPU_DYING; - smp_send_event_check_mask(mask); + cpu_clear(smp_processor_id(), rmask); /* all except self */ + cpu_set(cpu, mask); /* cpu to be die */ + on_rendezvous_cpus(rmask, mask, __cpu_disable, NULL, 1); __cpu_die(cpu); @@ -1364,9 +1370,8 @@ void enable_nonboot_cpus(void) cpus_clear(frozen_cpus); } #else /* ... !CONFIG_HOTPLUG_CPU */ -int __cpu_disable(void) -{ - return -ENOSYS; +void __cpu_disable(void *data) +{ } void __cpu_die(unsigned int cpu) diff -r 667d1a73d2ca xen/include/asm-x86/smp.h --- a/xen/include/asm-x86/smp.h Sat Feb 02 11:06:02 2008 +0800 +++ b/xen/include/asm-x86/smp.h Sat Feb 02 14:11:30 2008 +0800 @@ -51,12 +51,11 @@ extern u8 x86_cpu_to_apicid[]; /* State of each CPU. */ #define CPU_ONLINE 0x0002 /* CPU is up */ -#define CPU_DYING 0x0003 /* CPU is requested to die */ #define CPU_DEAD 0x0004 /* CPU is dead */ DECLARE_PER_CPU(int, cpu_state); #ifdef CONFIG_HOTPLUG_CPU -#define cpu_is_offline(cpu) unlikely(per_cpu(cpu_state,cpu) =CPU_DYING) +#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) extern int cpu_down(unsigned int cpu); extern int cpu_up(unsigned int cpu); extern void cpu_exit_clear(void); @@ -102,8 +101,10 @@ static __inline int logical_smp_processo #endif -extern int __cpu_disable(void); +extern void __cpu_disable(void *data); extern void __cpu_die(unsigned int cpu); +extern int on_rendezvous_cpus(cpumask_t selected, cpumask_t fn_mask, + void (*fn) (void *), void *data, int disable_irq); #endif /* !__ASSEMBLY__ */ #else /* CONFIG_SMP */ diff -r 667d1a73d2ca xen/include/xen/softirq.h --- a/xen/include/xen/softirq.h Sat Feb 02 11:06:02 2008 +0800 +++ b/xen/include/xen/softirq.h Sat Feb 02 14:12:06 2008 +0800 @@ -10,8 +10,9 @@ #define PAGE_SCRUB_SOFTIRQ 5 #define TRACE_SOFTIRQ 6 #define RCU_SOFTIRQ 7 +#define RENDEZVOUS_SOFTIRQ 8 -#define NR_COMMON_SOFTIRQS 8 +#define NR_COMMON_SOFTIRQS 9 #include <asm/softirq.h> _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
This is a good thing to have, and my main comments are stylistic. Firstly, I think the name and interface change compared with Linux''s stop_machine mechanism is gratuitous. I would prefer us to have the same interface (and also probably use the same or similar names throughout the implementation, where appropriate). I don''t think the more flexible interface expressed here is useful -- certainly finding an application for rendezvousing a set of CPUs and running a function on a subset of those, with optional irq disable, is a bit of a brain bender. :-) So, stick to stop_machine interface, semantics, and naming.... Also, I''d prefer this to be implemented in common/stop_machine.c if at all possible. It''s not really x86 specific. Certainly I do not want it in smp.c, as that file is full enough already of random cruft with no other home. Oh, also I think you are missing local_irq_disable() on the CPU that calls on_rendezvous_cpus(). Like the Linxu implementation you should probably do it at the same time you signal other CPUs to do so. Apart from that it''s a good idea, and I''ll look more closely at how you tie it in to CPU hotplug when you resubmit it. Thanks, Keir On 2/2/08 08:11, "Tian, Kevin" <kevin.tian@intel.com> wrote:> Rendezvous selected cpus in softirq > > This is similar to stop_machine_run stub from Linux, to pull > selected cpus in rendezvous point and the do some batch work > under a safe environment. Current one usage is from S3 path, > where individual cpu is pulled down with related online > footprints being cleared. It''s dangerous to have other cpus > checking clobbered data structure in the middle, such as > cpu_online_map, cpu_sibling_map, etc. > > Signed-off-by Kevin Tian <kevin.tian@intel.com> > > diff -r 667d1a73d2ca xen/arch/x86/domain.c > --- a/xen/arch/x86/domain.c Sat Feb 02 11:06:02 2008 +0800 > +++ b/xen/arch/x86/domain.c Sat Feb 02 14:11:45 2008 +0800 > @@ -82,7 +82,6 @@ static void default_idle(void) > > static void play_dead(void) > { > - __cpu_disable(); > /* This must be done before dead CPU ack */ > cpu_exit_clear(); > hvm_cpu_down(); > diff -r 667d1a73d2ca xen/arch/x86/smp.c > --- a/xen/arch/x86/smp.c Sat Feb 02 11:06:02 2008 +0800 > +++ b/xen/arch/x86/smp.c Sat Feb 02 14:11:30 2008 +0800 > @@ -22,6 +22,7 @@ > #include <asm/ipi.h> > #include <asm/hvm/support.h> > #include <mach_apic.h> > +#include <xen/softirq.h> > > /* > * Some notes on x86 processor bugs affecting SMP operation: > @@ -374,3 +375,151 @@ fastcall void smp_call_function_interrup > > irq_exit(); > } > + > +enum rendezvous_action { > + RENDEZVOUS_START, > + RENDEZVOUS_DISABLE_IRQ, > + RENDEZVOUS_INVOKE, > + RENDEZVOUS_EXIT, > +}; > + > +struct rendezvous_data { > + void (*fn) (void *); > + void *data; > + cpumask_t selected; > + cpumask_t fn_mask; > + atomic_t done; > + enum rendezvous_action action; > +}; > + > +static struct rendezvous_data *rdz_data; > +static spinlock_t rendezvous_lock = SPIN_LOCK_UNLOCKED; > +static void rendezvous_set_action(enum rendezvous_action action, int > cpus) > +{ > + atomic_set(&rdz_data->done, 0); > + smp_wmb(); > + rdz_data->action = action; > + while ( atomic_read(&rdz_data->done) != cpus ) > + cpu_relax(); > +} > + > +/* Rendezouvs selected cpus in softirq context and call (*fn) set in > fn_mask */ > +int on_rendezvous_cpus ( > + cpumask_t selected, > + cpumask_t fn_mask, > + void (*fn) (void *), > + void *data, > + int disable_irq) > +{ > + struct rendezvous_data rdz; > + unsigned int nr_cpus, nr_fn_cpus, self, cpu, cur > smp_processor_id(); > + > + ASSERT(local_irq_is_enabled()); > + ASSERT(cpus_subset(fn_mask, selected)); > + > + if ( cpus_weight(fn_mask) && !fn ) > + return -1; > + > + /* current cpu conducts the rendezvous process */ > + cpu_clear(cur, selected); > + self = cpu_test_and_clear(cur, fn_mask); > + nr_cpus = cpus_weight(selected); > + nr_fn_cpus = cpus_weight(fn_mask); > + > + if ( nr_cpus == 0 ) > + { > + if ( self ) > + (*fn)(data); > + return 0; > + } > + > + rdz.fn = fn; > + rdz.data = data; > + rdz.selected = selected; > + rdz.fn_mask = fn_mask; > + atomic_set(&rdz.done, 0); > + rdz.action = RENDEZVOUS_START; > + > + /* Note: We shouldn''t spin on lock when it''s held by others since > others > + * is expecting this cpus to enter softirq context. Or else > deadlock > + * is caused. > + */ > + if ( !spin_trylock(&rendezvous_lock) ) > + return -1; > + > + rdz_data = &rdz; > + smp_wmb(); > + > + for_each_cpu_mask(cpu, selected) > + cpu_raise_softirq(cpu, RENDEZVOUS_SOFTIRQ); > + > + /* Wait all concerned cpu to enter rendezvous point */ > + while ( atomic_read(&rdz_data->done) != nr_cpus ) > + cpu_relax(); > + > + if ( disable_irq ) > + rendezvous_set_action(RENDEZVOUS_DISABLE_IRQ, nr_cpus); > + > + if ( self ) > + (*fn)(data); > + > + /* Wait cpus to finish work if applicable */ > + if ( nr_fn_cpus != 0 ) > + rendezvous_set_action(RENDEZVOUS_INVOKE, nr_fn_cpus); > + > + rendezvous_set_action(RENDEZVOUS_EXIT, nr_cpus); > + spin_unlock(&rendezvous_lock); > + return 0; > +} > + > +static void rendezvous_softirq(void) > +{ > + int irq_disabled = 0; > + int invoked = 0; > + int required; > + > + ASSERT(cpu_isset(smp_processor_id(), rdz_data->selected)); > + > + required = cpu_isset(smp_processor_id(), rdz_data->fn_mask); > + smp_mb(); > + atomic_inc(&rdz_data->done); > + > + while ( rdz_data->action != RENDEZVOUS_EXIT ) > + { > + switch ( rdz_data->action ) > + { > + case RENDEZVOUS_DISABLE_IRQ: > + if ( !irq_disabled ) > + { > + local_irq_disable(); > + irq_disabled = 1; > + smp_mb(); > + atomic_inc(&rdz_data->done); > + } > + break; > + case RENDEZVOUS_INVOKE: > + if ( required && !invoked ) > + { > + rdz_data->fn(rdz_data->data); > + invoked = 1; > + smp_mb(); > + atomic_inc(&rdz_data->done); > + } > + break; > + default: > + break; > + } > + > + cpu_relax(); > + } > + > + smp_mb(); > + atomic_inc(&rdz_data->done); > +} > + > +static int __init cpu_rendezvous_init(void) > +{ > + open_softirq(RENDEZVOUS_SOFTIRQ, rendezvous_softirq); > + return 0; > +} > +__initcall(cpu_rendezvous_init); > diff -r 667d1a73d2ca xen/arch/x86/smpboot.c > --- a/xen/arch/x86/smpboot.c Sat Feb 02 11:06:02 2008 +0800 > +++ b/xen/arch/x86/smpboot.c Sat Feb 02 14:11:30 2008 +0800 > @@ -1192,7 +1192,7 @@ remove_siblinginfo(int cpu) > } > > extern void fixup_irqs(cpumask_t map); > -int __cpu_disable(void) > +void __cpu_disable(void *data) > { > cpumask_t map = cpu_online_map; > int cpu = smp_processor_id(); > @@ -1206,7 +1206,15 @@ int __cpu_disable(void) > * Especially so if we''re not using an IOAPIC -zwane > */ > if (cpu == 0) > - return -EBUSY; > + return; > + > + /* By far only S3 is using this path, and thus only idle vcpus > + * are running on all APs when it''s called. To support full > + * cpu hotplug, other notification mechanisms should be > introduced > + * like to migrate vcpus out of this one before rendezvous point > + */ > + if (!is_idle_vcpu(current)) > + return; > > local_irq_disable(); > clear_local_APIC(); > @@ -1223,7 +1231,6 @@ int __cpu_disable(void) > fixup_irqs(map); > /* It''s now safe to remove this processor from the online map */ > cpu_clear(cpu, cpu_online_map); > - return 0; > } > > void __cpu_die(unsigned int cpu) > @@ -1269,7 +1276,8 @@ int cpu_down(unsigned int cpu) > int cpu_down(unsigned int cpu) > { > int err = 0; > - cpumask_t mask; > + cpumask_t rmask = cpu_online_map; > + cpumask_t mask = CPU_MASK_NONE; > > spin_lock(&cpu_add_remove_lock); > if (num_online_cpus() == 1) { > @@ -1283,11 +1291,9 @@ int cpu_down(unsigned int cpu) > } > > printk("Prepare to bring CPU%d down...\n", cpu); > - /* Send notification to remote idle vcpu */ > - cpus_clear(mask); > - cpu_set(cpu, mask); > - per_cpu(cpu_state, cpu) = CPU_DYING; > - smp_send_event_check_mask(mask); > + cpu_clear(smp_processor_id(), rmask); /* all except self */ > + cpu_set(cpu, mask); /* cpu to be die */ > + on_rendezvous_cpus(rmask, mask, __cpu_disable, NULL, 1); > > __cpu_die(cpu); > > @@ -1364,9 +1370,8 @@ void enable_nonboot_cpus(void) > cpus_clear(frozen_cpus); > } > #else /* ... !CONFIG_HOTPLUG_CPU */ > -int __cpu_disable(void) > -{ > - return -ENOSYS; > +void __cpu_disable(void *data) > +{ > } > > void __cpu_die(unsigned int cpu) > diff -r 667d1a73d2ca xen/include/asm-x86/smp.h > --- a/xen/include/asm-x86/smp.h Sat Feb 02 11:06:02 2008 +0800 > +++ b/xen/include/asm-x86/smp.h Sat Feb 02 14:11:30 2008 +0800 > @@ -51,12 +51,11 @@ extern u8 x86_cpu_to_apicid[]; > > /* State of each CPU. */ > #define CPU_ONLINE 0x0002 /* CPU is up */ > -#define CPU_DYING 0x0003 /* CPU is requested to die */ > #define CPU_DEAD 0x0004 /* CPU is dead */ > DECLARE_PER_CPU(int, cpu_state); > > #ifdef CONFIG_HOTPLUG_CPU > -#define cpu_is_offline(cpu) unlikely(per_cpu(cpu_state,cpu) => CPU_DYING) > +#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) > extern int cpu_down(unsigned int cpu); > extern int cpu_up(unsigned int cpu); > extern void cpu_exit_clear(void); > @@ -102,8 +101,10 @@ static __inline int logical_smp_processo > > #endif > > -extern int __cpu_disable(void); > +extern void __cpu_disable(void *data); > extern void __cpu_die(unsigned int cpu); > +extern int on_rendezvous_cpus(cpumask_t selected, cpumask_t fn_mask, > + void (*fn) (void *), void *data, int disable_irq); > #endif /* !__ASSEMBLY__ */ > > #else /* CONFIG_SMP */ > diff -r 667d1a73d2ca xen/include/xen/softirq.h > --- a/xen/include/xen/softirq.h Sat Feb 02 11:06:02 2008 +0800 > +++ b/xen/include/xen/softirq.h Sat Feb 02 14:12:06 2008 +0800 > @@ -10,8 +10,9 @@ > #define PAGE_SCRUB_SOFTIRQ 5 > #define TRACE_SOFTIRQ 6 > #define RCU_SOFTIRQ 7 > +#define RENDEZVOUS_SOFTIRQ 8 > > -#define NR_COMMON_SOFTIRQS 8 > +#define NR_COMMON_SOFTIRQS 9 > > #include <asm/softirq.h> > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>From: Keir Fraser [mailto:Keir.Fraser@cl.cam.ac.uk] >Sent: 2008年2月4日 1:44 > >This is a good thing to have, and my main comments are >stylistic. Firstly, I >think the name and interface change compared with Linux''s stop_machine >mechanism is gratuitous. I would prefer us to have the same >interface (and >also probably use the same or similar names throughout the >implementation, >where appropriate). I don''t think the more flexible interface >expressed here >is useful -- certainly finding an application for >rendezvousing a set of >CPUs and running a function on a subset of those, with >optional irq disable, >is a bit of a brain bender. :-) So, stick to stop_machine interface, >semantics, and naming....Here I''d like to explain why Linux stop_machine semantics is a bit unhandily for Xen, and to change that further vary from Linux logic which lead me to create a new name. :-) Linux creates kernel threads and schedule them to individual cpus to fulfill synchronization process. It''s the one executing (*fn) to conduct the whole process. For example, if _stop_machine is called on cpu_i, with target cpu_j to execute (*fn). stop_machine first creates a kernel thread, bound to cpu_j and then wait for completion on cpu_i which causes context_switch. When kernel thread is scheduled on cpu_j, it then creates (max_cpus - 1) kernel threads on the rest cpus and wait them to be scheduled. Once stop threads are scheduled on all cpus, cpu_j first command other cpus to stop activity, then execute (*fn), and finally commands resume to others. As the result, previous flow on cpu_i is waken up upon completion. Xen has no dynamically created vcpus and thus cpu_i can only send a notification to cpu_j and wait cpu_j to check at some time. It''s naturally to take a softirq bit since do_softirq is in a safe point just before resuming to guest or in idle_loop. Then it''s a bit cumbersome to let cpu_j to conduct the stop process. First cpu_i needs to block itself after sending notification to cpu_j, or else cpu_i would not handle softirq later when cpu_j requests to do. Then block on what? Maybe an event channel... But the contin- uation stack mechanism in Xen decides more dirty work to be added since previous frames are lost and stack reset to bottom after resume. Uah, maybe we can temporarily check softirq on cpu_i in a check loop, however that still may cause context switch if, a schedule softirq is triggered in between. All above just made me distraught to follow Linux semantics, and further thought leads me to why not let cpu_i to conduct stop process directly, and then let concerned cpus to call (*fn) by adding a new action as ***_INVOKE. By this way, cpu_i doesn''t need to be cut off from current flow, and once stop_machine returns, all necessary works to be handled in a stopped environment are fulfilled. Actually further we may even conduct cpu hotplug in one call, with all other APs to invoke cpu_disable at same time at S3. Finally coming up above conclusion, I really thought it''s different from Linux stop_machine semantics, and thus re-wrote the stuff including name as differentation. Though only S3 is the only user on this inf by far, I do think it useful in other places later when one cpu wants to kick another one to do something in a safe point.> >Also, I''d prefer this to be implemented in >common/stop_machine.c if at all >possible. It''s not really x86 specific. Certainly I do not >want it in smp.c, >as that file is full enough already of random cruft with no other home.OK.> >Oh, also I think you are missing local_irq_disable() on the >CPU that calls >on_rendezvous_cpus(). Like the Linxu implementation you should >probably do >it at the same time you signal other CPUs to do so.Good point.> >Apart from that it''s a good idea, and I''ll look more closely >at how you tie >it in to CPU hotplug when you resubmit it. >Then before re-submission, I''ll wait for you further comment to my forenamed explanation first. Thanks, Kevin _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
On 4/2/08 02:02, "Tian, Kevin" <kevin.tian@intel.com> wrote:> All above just made me distraught to follow Linux semantics, and > further thought leads me to why not let cpu_i to conduct stop process > directly, and then let concerned cpus to call (*fn) by adding a new > action as ***_INVOKE. By this way, cpu_i doesn''t need to be cut > off from current flow, and once stop_machine returns, all necessary > works to be handled in a stopped environment are fulfilled.Yes, I saw that, but it doesn''t require a modified interface. The semantics of the call are still: 1. Synchronize/rendezvous all CPUs (the caller is assumed already to be at a safe point and just needs to disable IRQs at the right time). 2. Run the (*fn) on one designated CPU (via your new bit of mechanism). 3. All done. This fits fine within the stop_machine_run(fn, data, cpu) interface, albeit that the underlying implementation is somewhat different (and you already have that pretty much correct!). Really all you need to do is put your implementation in a different file, do some simple renaming of stuff, push some of your caller code (the bits that create the cpumasks) into your stop_machine_run() implementation and give stop_machine_run() the simple Linux interface. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>From: Keir Fraser [mailto:Keir.Fraser@cl.cam.ac.uk] >Sent: 2008年2月4日 15:31 > >On 4/2/08 02:02, "Tian, Kevin" <kevin.tian@intel.com> wrote: > >> All above just made me distraught to follow Linux semantics, and >> further thought leads me to why not let cpu_i to conduct stop process >> directly, and then let concerned cpus to call (*fn) by adding a new >> action as ***_INVOKE. By this way, cpu_i doesn''t need to be cut >> off from current flow, and once stop_machine returns, all necessary >> works to be handled in a stopped environment are fulfilled. > >Yes, I saw that, but it doesn''t require a modified interface. >The semantics >of the call are still: > 1. Synchronize/rendezvous all CPUs (the caller is assumed >already to be at >a safe point and just needs to disable IRQs at the right time). > 2. Run the (*fn) on one designated CPU (via your new bit of >mechanism). > 3. All done. > >This fits fine within the stop_machine_run(fn, data, cpu) >interface, albeit >that the underlying implementation is somewhat different (and >you already >have that pretty much correct!). > >Really all you need to do is put your implementation in a >different file, do >some simple renaming of stuff, push some of your caller code >(the bits that >create the cpumasks) into your stop_machine_run() >implementation and give >stop_machine_run() the simple Linux interface. > > -- Keir >Well, understand this time. Reshuffle the code now. :-) Thanks, Kevin _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Then, attach updated version per your comments, with Linux stop_machine semantics kept in common place. Build tested for x86, and ideally it should also work on other archs. One small issue is about the lock handle with cpu hotplug. For cpu hotplug request from control tools, there''s a dead lock race between pure cpu hotplug path and stop machine if we still use spinlock as the guard. For example, stop_machine may hold lock on one cpu, and wait response from another one who happnes to spin loop on lock per hotplug request. In that case, the other cpu gets no chance to handle softirq. We may be able to use trylock for cpu_up/down, however that also affects stop_machine like called from S3 path for which try-and-give-up is too heavy and unexpected. Maybe some new cpu_tryup/trydown may be used. Not sure... But anyway, still some way to go for a full cpu hotplug support which doesn''t prevent this feature to slip in based on spinlock at this stage. :-) More comments? Thanks, Kevin>From: Keir Fraser [mailto:Keir.Fraser@cl.cam.ac.uk] >Sent: 2008年2月4日 15:31 > >On 4/2/08 02:02, "Tian, Kevin" <kevin.tian@intel.com> wrote: > >> All above just made me distraught to follow Linux semantics, and >> further thought leads me to why not let cpu_i to conduct stop process >> directly, and then let concerned cpus to call (*fn) by adding a new >> action as ***_INVOKE. By this way, cpu_i doesn''t need to be cut >> off from current flow, and once stop_machine returns, all necessary >> works to be handled in a stopped environment are fulfilled. > >Yes, I saw that, but it doesn''t require a modified interface. >The semantics >of the call are still: > 1. Synchronize/rendezvous all CPUs (the caller is assumed >already to be at >a safe point and just needs to disable IRQs at the right time). > 2. Run the (*fn) on one designated CPU (via your new bit of >mechanism). > 3. All done. > >This fits fine within the stop_machine_run(fn, data, cpu) >interface, albeit >that the underlying implementation is somewhat different (and >you already >have that pretty much correct!). > >Really all you need to do is put your implementation in a >different file, do >some simple renaming of stuff, push some of your caller code >(the bits that >create the cpumasks) into your stop_machine_run() >implementation and give >stop_machine_run() the simple Linux interface. > > -- Keir > > > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Much better. I think this will go in, although I might rename some more stuff inside stop_machine.c towards Linux stop_machine-style names. -- Keir On 4/2/08 09:51, "Tian, Kevin" <kevin.tian@intel.com> wrote:> Then, attach updated version per your comments, with Linux > stop_machine semantics kept in common place. Build tested > for x86, and ideally it should also work on other archs. > > One small issue is about the lock handle with cpu hotplug. > For cpu hotplug request from control tools, there''s a dead lock > race between pure cpu hotplug path and stop machine if we > still use spinlock as the guard. For example, stop_machine > may hold lock on one cpu, and wait response from another > one who happnes to spin loop on lock per hotplug request. > In that case, the other cpu gets no chance to handle softirq. > > We may be able to use trylock for cpu_up/down, however > that also affects stop_machine like called from S3 path for > which try-and-give-up is too heavy and unexpected. Maybe > some new cpu_tryup/trydown may be used. Not sure... > > But anyway, still some way to go for a full cpu hotplug support > which doesn''t prevent this feature to slip in based on spinlock > at this stage. :-) > > More comments? > > Thanks, > Kevin > >> From: Keir Fraser [mailto:Keir.Fraser@cl.cam.ac.uk] >> Sent: 2008年2月4日 15:31 >> >> On 4/2/08 02:02, "Tian, Kevin" <kevin.tian@intel.com> wrote: >> >>> All above just made me distraught to follow Linux semantics, and >>> further thought leads me to why not let cpu_i to conduct stop process >>> directly, and then let concerned cpus to call (*fn) by adding a new >>> action as ***_INVOKE. By this way, cpu_i doesn''t need to be cut >>> off from current flow, and once stop_machine returns, all necessary >>> works to be handled in a stopped environment are fulfilled. >> >> Yes, I saw that, but it doesn''t require a modified interface. >> The semantics >> of the call are still: >> 1. Synchronize/rendezvous all CPUs (the caller is assumed >> already to be at >> a safe point and just needs to disable IRQs at the right time). >> 2. Run the (*fn) on one designated CPU (via your new bit of >> mechanism). >> 3. All done. >> >> This fits fine within the stop_machine_run(fn, data, cpu) >> interface, albeit >> that the underlying implementation is somewhat different (and >> you already >> have that pretty much correct!). >> >> Really all you need to do is put your implementation in a >> different file, do >> some simple renaming of stuff, push some of your caller code >> (the bits that >> create the cpumasks) into your stop_machine_run() >> implementation and give >> stop_machine_run() the simple Linux interface. >> >> -- Keir >> >> >> >>_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Applied, but I put the patch on a diet first. You''ll want to check it still does what you expect... :-) -- Keir On 4/2/08 09:51, "Tian, Kevin" <kevin.tian@intel.com> wrote:> Then, attach updated version per your comments, with Linux > stop_machine semantics kept in common place. Build tested > for x86, and ideally it should also work on other archs. > > One small issue is about the lock handle with cpu hotplug. > For cpu hotplug request from control tools, there''s a dead lock > race between pure cpu hotplug path and stop machine if we > still use spinlock as the guard. For example, stop_machine > may hold lock on one cpu, and wait response from another > one who happnes to spin loop on lock per hotplug request. > In that case, the other cpu gets no chance to handle softirq. > > We may be able to use trylock for cpu_up/down, however > that also affects stop_machine like called from S3 path for > which try-and-give-up is too heavy and unexpected. Maybe > some new cpu_tryup/trydown may be used. Not sure... > > But anyway, still some way to go for a full cpu hotplug support > which doesn''t prevent this feature to slip in based on spinlock > at this stage. :-) > > More comments? > > Thanks, > Kevin > >> From: Keir Fraser [mailto:Keir.Fraser@cl.cam.ac.uk] >> Sent: 2008年2月4日 15:31 >> >> On 4/2/08 02:02, "Tian, Kevin" <kevin.tian@intel.com> wrote: >> >>> All above just made me distraught to follow Linux semantics, and >>> further thought leads me to why not let cpu_i to conduct stop process >>> directly, and then let concerned cpus to call (*fn) by adding a new >>> action as ***_INVOKE. By this way, cpu_i doesn''t need to be cut >>> off from current flow, and once stop_machine returns, all necessary >>> works to be handled in a stopped environment are fulfilled. >> >> Yes, I saw that, but it doesn''t require a modified interface. >> The semantics >> of the call are still: >> 1. Synchronize/rendezvous all CPUs (the caller is assumed >> already to be at >> a safe point and just needs to disable IRQs at the right time). >> 2. Run the (*fn) on one designated CPU (via your new bit of >> mechanism). >> 3. All done. >> >> This fits fine within the stop_machine_run(fn, data, cpu) >> interface, albeit >> that the underlying implementation is somewhat different (and >> you already >> have that pretty much correct!). >> >> Really all you need to do is put your implementation in a >> different file, do >> some simple renaming of stuff, push some of your caller code >> (the bits that >> create the cpumasks) into your stop_machine_run() >> implementation and give >> stop_machine_run() the simple Linux interface. >> >> -- Keir >> >> >> >>_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>From: Keir Fraser [mailto:Keir.Fraser@cl.cam.ac.uk] >Sent: 2008年2月12日 0:02 > >Applied, but I put the patch on a diet first. You''ll want to >check it still >does what you expect... :-) > > -- Keir >It''s much cleaner, and my quick test proved its correctness. Thorough test is on-going... Thanks, Kevin _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel