thr3ads.net - Xen devel - [Xen-devel] [PATCH] Rendezvous selected cpus [Feb 2008]

If this information is useful, please help other people find it:
Share via:

Tian, Kevin

2008-Feb-02 08:11 UTC

[Xen-devel] [PATCH] Rendezvous selected cpus

Rendezvous selected cpus in softirq

This is similar to stop_machine_run stub from Linux, to pull
selected cpus in rendezvous point and the do some batch work
under a safe environment. Current one usage is from S3 path,
where individual cpu is pulled down with related online
footprints being cleared. It''s dangerous to have other cpus
checking clobbered data structure in the middle, such as
cpu_online_map, cpu_sibling_map, etc.

Signed-off-by Kevin Tian <kevin.tian@intel.com>

diff -r 667d1a73d2ca xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c	Sat Feb 02 11:06:02 2008 +0800
+++ b/xen/arch/x86/domain.c	Sat Feb 02 14:11:45 2008 +0800
@@ -82,7 +82,6 @@ static void default_idle(void)
 
 static void play_dead(void)
 {
-    __cpu_disable();
     /* This must be done before dead CPU ack */
     cpu_exit_clear();
     hvm_cpu_down();
diff -r 667d1a73d2ca xen/arch/x86/smp.c
--- a/xen/arch/x86/smp.c	Sat Feb 02 11:06:02 2008 +0800
+++ b/xen/arch/x86/smp.c	Sat Feb 02 14:11:30 2008 +0800
@@ -22,6 +22,7 @@
 #include <asm/ipi.h>
 #include <asm/hvm/support.h>
 #include <mach_apic.h>
+#include <xen/softirq.h>
 
 /*
  *	Some notes on x86 processor bugs affecting SMP operation:
@@ -374,3 +375,151 @@ fastcall void smp_call_function_interrup
 
     irq_exit();
 }
+
+enum rendezvous_action {
+    RENDEZVOUS_START,
+    RENDEZVOUS_DISABLE_IRQ,
+    RENDEZVOUS_INVOKE,
+    RENDEZVOUS_EXIT,
+};
+
+struct rendezvous_data {
+    void (*fn) (void *);
+    void *data;
+    cpumask_t selected;
+    cpumask_t fn_mask;
+    atomic_t  done;
+    enum rendezvous_action action;
+};
+
+static struct rendezvous_data *rdz_data;
+static spinlock_t rendezvous_lock = SPIN_LOCK_UNLOCKED;
+static void rendezvous_set_action(enum rendezvous_action action, int
cpus)
+{
+    atomic_set(&rdz_data->done, 0);
+    smp_wmb();
+    rdz_data->action = action;
+    while ( atomic_read(&rdz_data->done) != cpus )
+        cpu_relax();
+}
+
+/* Rendezouvs selected cpus in softirq context and call (*fn) set in
fn_mask */ 
+int on_rendezvous_cpus (
+    cpumask_t selected,
+    cpumask_t fn_mask,
+    void (*fn) (void *),
+    void *data,
+    int disable_irq)
+{
+    struct rendezvous_data rdz;
+    unsigned int nr_cpus, nr_fn_cpus, self, cpu, cur smp_processor_id();
+
+    ASSERT(local_irq_is_enabled());
+    ASSERT(cpus_subset(fn_mask, selected));
+
+    if ( cpus_weight(fn_mask) && !fn )
+        return -1;
+
+    /* current cpu conducts the rendezvous process */
+    cpu_clear(cur, selected);
+    self = cpu_test_and_clear(cur, fn_mask);
+    nr_cpus = cpus_weight(selected);
+    nr_fn_cpus = cpus_weight(fn_mask);
+
+    if ( nr_cpus == 0 )
+    {
+        if ( self )
+            (*fn)(data);
+        return 0;
+    }
+
+    rdz.fn = fn;
+    rdz.data = data;
+    rdz.selected = selected;
+    rdz.fn_mask = fn_mask;
+    atomic_set(&rdz.done, 0);
+    rdz.action = RENDEZVOUS_START;
+
+    /* Note: We shouldn''t spin on lock when it''s held by
others since
others
+     * is expecting this cpus to enter softirq context. Or else
deadlock
+     * is caused.
+     */
+    if ( !spin_trylock(&rendezvous_lock) )
+        return -1;
+
+    rdz_data = &rdz;
+    smp_wmb();
+
+    for_each_cpu_mask(cpu, selected)
+        cpu_raise_softirq(cpu, RENDEZVOUS_SOFTIRQ);
+
+    /* Wait all concerned cpu to enter rendezvous point */
+    while ( atomic_read(&rdz_data->done) != nr_cpus )
+        cpu_relax();
+
+    if ( disable_irq )
+        rendezvous_set_action(RENDEZVOUS_DISABLE_IRQ, nr_cpus);
+
+    if ( self )
+        (*fn)(data);
+
+    /* Wait cpus to finish work if applicable */
+    if ( nr_fn_cpus != 0 )
+        rendezvous_set_action(RENDEZVOUS_INVOKE, nr_fn_cpus);
+
+    rendezvous_set_action(RENDEZVOUS_EXIT, nr_cpus);
+    spin_unlock(&rendezvous_lock);
+    return 0;
+}
+
+static void rendezvous_softirq(void)
+{
+    int irq_disabled = 0;
+    int invoked = 0;
+    int required;
+
+    ASSERT(cpu_isset(smp_processor_id(), rdz_data->selected));
+
+    required = cpu_isset(smp_processor_id(), rdz_data->fn_mask);
+    smp_mb();
+    atomic_inc(&rdz_data->done);
+
+    while ( rdz_data->action != RENDEZVOUS_EXIT )
+    {
+        switch ( rdz_data->action )
+        {
+        case RENDEZVOUS_DISABLE_IRQ:
+            if ( !irq_disabled )
+            {
+                local_irq_disable();
+                irq_disabled = 1;
+                smp_mb();
+                atomic_inc(&rdz_data->done);
+            }
+            break;
+        case RENDEZVOUS_INVOKE:
+            if ( required && !invoked )
+            {
+                rdz_data->fn(rdz_data->data);
+                invoked = 1;
+                smp_mb();
+                atomic_inc(&rdz_data->done);
+            }
+            break;
+        default:
+            break;
+        }
+
+        cpu_relax();
+    }
+
+    smp_mb();
+    atomic_inc(&rdz_data->done);
+}
+
+static int __init cpu_rendezvous_init(void)
+{
+    open_softirq(RENDEZVOUS_SOFTIRQ, rendezvous_softirq);
+    return 0;
+}
+__initcall(cpu_rendezvous_init);
diff -r 667d1a73d2ca xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c	Sat Feb 02 11:06:02 2008 +0800
+++ b/xen/arch/x86/smpboot.c	Sat Feb 02 14:11:30 2008 +0800
@@ -1192,7 +1192,7 @@ remove_siblinginfo(int cpu)
 }
 
 extern void fixup_irqs(cpumask_t map);
-int __cpu_disable(void)
+void __cpu_disable(void *data)
 {
 	cpumask_t map = cpu_online_map;
 	int cpu = smp_processor_id();
@@ -1206,7 +1206,15 @@ int __cpu_disable(void)
 	 * Especially so if we''re not using an IOAPIC	-zwane
 	 */
 	if (cpu == 0)
-		return -EBUSY;
+		return;
+
+	/* By far only S3 is using this path, and thus only idle vcpus
+	 * are running on all APs when it''s called. To support full
+	 * cpu hotplug, other notification mechanisms should be
introduced
+	 * like to migrate vcpus out of this one before rendezvous point
+	 */
+	if (!is_idle_vcpu(current))
+		return;
 
 	local_irq_disable();
 	clear_local_APIC();
@@ -1223,7 +1231,6 @@ int __cpu_disable(void)
 	fixup_irqs(map);
 	/* It''s now safe to remove this processor from the online map */
 	cpu_clear(cpu, cpu_online_map);
-	return 0;
 }
 
 void __cpu_die(unsigned int cpu)
@@ -1269,7 +1276,8 @@ int cpu_down(unsigned int cpu)
 int cpu_down(unsigned int cpu)
 {
 	int err = 0;
-	cpumask_t mask;
+	cpumask_t rmask = cpu_online_map;
+	cpumask_t mask = CPU_MASK_NONE;
 
 	spin_lock(&cpu_add_remove_lock);
 	if (num_online_cpus() == 1) {
@@ -1283,11 +1291,9 @@ int cpu_down(unsigned int cpu)
 	}
 
 	printk("Prepare to bring CPU%d down...\n", cpu);
-	/* Send notification to remote idle vcpu */
-	cpus_clear(mask);
-	cpu_set(cpu, mask);
-	per_cpu(cpu_state, cpu) = CPU_DYING;
-	smp_send_event_check_mask(mask);
+	cpu_clear(smp_processor_id(), rmask); /* all except self */
+	cpu_set(cpu, mask); /* cpu to be die */
+	on_rendezvous_cpus(rmask, mask, __cpu_disable, NULL, 1);
 
 	__cpu_die(cpu);
 
@@ -1364,9 +1370,8 @@ void enable_nonboot_cpus(void)
 	cpus_clear(frozen_cpus);
 }
 #else /* ... !CONFIG_HOTPLUG_CPU */
-int __cpu_disable(void)
-{
-	return -ENOSYS;
+void __cpu_disable(void *data)
+{
 }
 
 void __cpu_die(unsigned int cpu)
diff -r 667d1a73d2ca xen/include/asm-x86/smp.h
--- a/xen/include/asm-x86/smp.h	Sat Feb 02 11:06:02 2008 +0800
+++ b/xen/include/asm-x86/smp.h	Sat Feb 02 14:11:30 2008 +0800
@@ -51,12 +51,11 @@ extern u8 x86_cpu_to_apicid[];
 
 /* State of each CPU. */
 #define CPU_ONLINE	0x0002	/* CPU is up */
-#define CPU_DYING	0x0003	/* CPU is requested to die */
 #define CPU_DEAD	0x0004	/* CPU is dead */
 DECLARE_PER_CPU(int, cpu_state);
 
 #ifdef CONFIG_HOTPLUG_CPU
-#define cpu_is_offline(cpu) unlikely(per_cpu(cpu_state,cpu) =CPU_DYING)
+#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
 extern int cpu_down(unsigned int cpu);
 extern int cpu_up(unsigned int cpu);
 extern void cpu_exit_clear(void);
@@ -102,8 +101,10 @@ static __inline int logical_smp_processo
 
 #endif
 
-extern int __cpu_disable(void);
+extern void __cpu_disable(void *data);
 extern void __cpu_die(unsigned int cpu);
+extern int on_rendezvous_cpus(cpumask_t selected, cpumask_t fn_mask,
+    void (*fn) (void *), void *data, int disable_irq);
 #endif /* !__ASSEMBLY__ */
 
 #else /* CONFIG_SMP */
diff -r 667d1a73d2ca xen/include/xen/softirq.h
--- a/xen/include/xen/softirq.h	Sat Feb 02 11:06:02 2008 +0800
+++ b/xen/include/xen/softirq.h	Sat Feb 02 14:12:06 2008 +0800
@@ -10,8 +10,9 @@
 #define PAGE_SCRUB_SOFTIRQ                5
 #define TRACE_SOFTIRQ                     6
 #define RCU_SOFTIRQ                       7
+#define RENDEZVOUS_SOFTIRQ                8
 
-#define NR_COMMON_SOFTIRQS                8
+#define NR_COMMON_SOFTIRQS                9
 
 #include <asm/softirq.h>
 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

Keir Fraser

2008-Feb-03 17:44 UTC

head link

Re: [Xen-devel] [PATCH] Rendezvous selected cpus

This is a good thing to have, and my main comments are stylistic. Firstly, I
think the name and interface change compared with Linux''s stop_machine
mechanism is gratuitous. I would prefer us to have the same interface (and
also probably use the same or similar names throughout the implementation,
where appropriate). I don''t think the more flexible interface expressed
here
is useful -- certainly finding an application for rendezvousing a set of
CPUs and running a function on a subset of those, with optional irq disable,
is a bit of a brain bender. :-) So, stick to stop_machine interface,
semantics, and naming....

Also, I''d prefer this to be implemented in common/stop_machine.c if at
all
possible. It''s not really x86 specific. Certainly I do not want it in
smp.c,
as that file is full enough already of random cruft with no other home.

Oh, also I think you are missing local_irq_disable() on the CPU that calls
on_rendezvous_cpus(). Like the Linxu implementation you should probably do
it at the same time you signal other CPUs to do so.

Apart from that it''s a good idea, and I''ll look more closely
at how you tie
it in to CPU hotplug when you resubmit it.

 Thanks,
 Keir

On 2/2/08 08:11, "Tian, Kevin" <kevin.tian@intel.com> wrote:
> Rendezvous selected cpus in softirq
> 
> This is similar to stop_machine_run stub from Linux, to pull
> selected cpus in rendezvous point and the do some batch work
> under a safe environment. Current one usage is from S3 path,
> where individual cpu is pulled down with related online
> footprints being cleared. It''s dangerous to have other cpus
> checking clobbered data structure in the middle, such as
> cpu_online_map, cpu_sibling_map, etc.
> 
> Signed-off-by Kevin Tian <kevin.tian@intel.com>
> 
> diff -r 667d1a73d2ca xen/arch/x86/domain.c
> --- a/xen/arch/x86/domain.c Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/arch/x86/domain.c Sat Feb 02 14:11:45 2008 +0800
> @@ -82,7 +82,6 @@ static void default_idle(void)
>  
>  static void play_dead(void)
>  {
> -    __cpu_disable();
>      /* This must be done before dead CPU ack */
>      cpu_exit_clear();
>      hvm_cpu_down();
> diff -r 667d1a73d2ca xen/arch/x86/smp.c
> --- a/xen/arch/x86/smp.c Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/arch/x86/smp.c Sat Feb 02 14:11:30 2008 +0800
> @@ -22,6 +22,7 @@
>  #include <asm/ipi.h>
>  #include <asm/hvm/support.h>
>  #include <mach_apic.h>
> +#include <xen/softirq.h>
>  
>  /*
>   * Some notes on x86 processor bugs affecting SMP operation:
> @@ -374,3 +375,151 @@ fastcall void smp_call_function_interrup
>  
>      irq_exit();
>  }
> +
> +enum rendezvous_action {
> +    RENDEZVOUS_START,
> +    RENDEZVOUS_DISABLE_IRQ,
> +    RENDEZVOUS_INVOKE,
> +    RENDEZVOUS_EXIT,
> +};
> +
> +struct rendezvous_data {
> +    void (*fn) (void *);
> +    void *data;
> +    cpumask_t selected;
> +    cpumask_t fn_mask;
> +    atomic_t  done;
> +    enum rendezvous_action action;
> +};
> +
> +static struct rendezvous_data *rdz_data;
> +static spinlock_t rendezvous_lock = SPIN_LOCK_UNLOCKED;
> +static void rendezvous_set_action(enum rendezvous_action action, int
> cpus)
> +{
> +    atomic_set(&rdz_data->done, 0);
> +    smp_wmb();
> +    rdz_data->action = action;
> +    while ( atomic_read(&rdz_data->done) != cpus )
> +        cpu_relax();
> +}
> +
> +/* Rendezouvs selected cpus in softirq context and call (*fn) set in
> fn_mask */ 
> +int on_rendezvous_cpus (
> +    cpumask_t selected,
> +    cpumask_t fn_mask,
> +    void (*fn) (void *),
> +    void *data,
> +    int disable_irq)
> +{
> +    struct rendezvous_data rdz;
> +    unsigned int nr_cpus, nr_fn_cpus, self, cpu, cur >
smp_processor_id();
> +
> +    ASSERT(local_irq_is_enabled());
> +    ASSERT(cpus_subset(fn_mask, selected));
> +
> +    if ( cpus_weight(fn_mask) && !fn )
> +        return -1;
> +
> +    /* current cpu conducts the rendezvous process */
> +    cpu_clear(cur, selected);
> +    self = cpu_test_and_clear(cur, fn_mask);
> +    nr_cpus = cpus_weight(selected);
> +    nr_fn_cpus = cpus_weight(fn_mask);
> +
> +    if ( nr_cpus == 0 )
> +    {
> +        if ( self )
> +            (*fn)(data);
> +        return 0;
> +    }
> +
> +    rdz.fn = fn;
> +    rdz.data = data;
> +    rdz.selected = selected;
> +    rdz.fn_mask = fn_mask;
> +    atomic_set(&rdz.done, 0);
> +    rdz.action = RENDEZVOUS_START;
> +
> +    /* Note: We shouldn''t spin on lock when it''s held by
others since
> others
> +     * is expecting this cpus to enter softirq context. Or else
> deadlock
> +     * is caused.
> +     */
> +    if ( !spin_trylock(&rendezvous_lock) )
> +        return -1;
> +
> +    rdz_data = &rdz;
> +    smp_wmb();
> +
> +    for_each_cpu_mask(cpu, selected)
> +        cpu_raise_softirq(cpu, RENDEZVOUS_SOFTIRQ);
> +
> +    /* Wait all concerned cpu to enter rendezvous point */
> +    while ( atomic_read(&rdz_data->done) != nr_cpus )
> +        cpu_relax();
> +
> +    if ( disable_irq )
> +        rendezvous_set_action(RENDEZVOUS_DISABLE_IRQ, nr_cpus);
> +
> +    if ( self )
> +        (*fn)(data);
> +
> +    /* Wait cpus to finish work if applicable */
> +    if ( nr_fn_cpus != 0 )
> +        rendezvous_set_action(RENDEZVOUS_INVOKE, nr_fn_cpus);
> +
> +    rendezvous_set_action(RENDEZVOUS_EXIT, nr_cpus);
> +    spin_unlock(&rendezvous_lock);
> +    return 0;
> +}
> +
> +static void rendezvous_softirq(void)
> +{
> +    int irq_disabled = 0;
> +    int invoked = 0;
> +    int required;
> +
> +    ASSERT(cpu_isset(smp_processor_id(), rdz_data->selected));
> +
> +    required = cpu_isset(smp_processor_id(), rdz_data->fn_mask);
> +    smp_mb();
> +    atomic_inc(&rdz_data->done);
> +
> +    while ( rdz_data->action != RENDEZVOUS_EXIT )
> +    {
> +        switch ( rdz_data->action )
> +        {
> +        case RENDEZVOUS_DISABLE_IRQ:
> +            if ( !irq_disabled )
> +            {
> +                local_irq_disable();
> +                irq_disabled = 1;
> +                smp_mb();
> +                atomic_inc(&rdz_data->done);
> +            }
> +            break;
> +        case RENDEZVOUS_INVOKE:
> +            if ( required && !invoked )
> +            {
> +                rdz_data->fn(rdz_data->data);
> +                invoked = 1;
> +                smp_mb();
> +                atomic_inc(&rdz_data->done);
> +            }
> +            break;
> +        default:
> +            break;
> +        }
> +
> +        cpu_relax();
> +    }
> +
> +    smp_mb();
> +    atomic_inc(&rdz_data->done);
> +}
> +
> +static int __init cpu_rendezvous_init(void)
> +{
> +    open_softirq(RENDEZVOUS_SOFTIRQ, rendezvous_softirq);
> +    return 0;
> +}
> +__initcall(cpu_rendezvous_init);
> diff -r 667d1a73d2ca xen/arch/x86/smpboot.c
> --- a/xen/arch/x86/smpboot.c Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/arch/x86/smpboot.c Sat Feb 02 14:11:30 2008 +0800
> @@ -1192,7 +1192,7 @@ remove_siblinginfo(int cpu)
>  }
>  
>  extern void fixup_irqs(cpumask_t map);
> -int __cpu_disable(void)
> +void __cpu_disable(void *data)
>  {
> cpumask_t map = cpu_online_map;
> int cpu = smp_processor_id();
> @@ -1206,7 +1206,15 @@ int __cpu_disable(void)
> * Especially so if we''re not using an IOAPIC -zwane
> */
> if (cpu == 0)
> -  return -EBUSY;
> +  return;
> +
> + /* By far only S3 is using this path, and thus only idle vcpus
> +  * are running on all APs when it''s called. To support full
> +  * cpu hotplug, other notification mechanisms should be
> introduced
> +  * like to migrate vcpus out of this one before rendezvous point
> +  */
> + if (!is_idle_vcpu(current))
> +  return;
>  
> local_irq_disable();
> clear_local_APIC();
> @@ -1223,7 +1231,6 @@ int __cpu_disable(void)
> fixup_irqs(map);
> /* It''s now safe to remove this processor from the online map */
> cpu_clear(cpu, cpu_online_map);
> - return 0;
>  }
>  
>  void __cpu_die(unsigned int cpu)
> @@ -1269,7 +1276,8 @@ int cpu_down(unsigned int cpu)
>  int cpu_down(unsigned int cpu)
>  {
> int err = 0;
> - cpumask_t mask;
> + cpumask_t rmask = cpu_online_map;
> + cpumask_t mask = CPU_MASK_NONE;
>  
> spin_lock(&cpu_add_remove_lock);
> if (num_online_cpus() == 1) {
> @@ -1283,11 +1291,9 @@ int cpu_down(unsigned int cpu)
> }
>  
> printk("Prepare to bring CPU%d down...\n", cpu);
> - /* Send notification to remote idle vcpu */
> - cpus_clear(mask);
> - cpu_set(cpu, mask);
> - per_cpu(cpu_state, cpu) = CPU_DYING;
> - smp_send_event_check_mask(mask);
> + cpu_clear(smp_processor_id(), rmask); /* all except self */
> + cpu_set(cpu, mask); /* cpu to be die */
> + on_rendezvous_cpus(rmask, mask, __cpu_disable, NULL, 1);
>  
> __cpu_die(cpu);
>  
> @@ -1364,9 +1370,8 @@ void enable_nonboot_cpus(void)
> cpus_clear(frozen_cpus);
>  }
>  #else /* ... !CONFIG_HOTPLUG_CPU */
> -int __cpu_disable(void)
> -{
> - return -ENOSYS;
> +void __cpu_disable(void *data)
> +{
>  }
>  
>  void __cpu_die(unsigned int cpu)
> diff -r 667d1a73d2ca xen/include/asm-x86/smp.h
> --- a/xen/include/asm-x86/smp.h Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/include/asm-x86/smp.h Sat Feb 02 14:11:30 2008 +0800
> @@ -51,12 +51,11 @@ extern u8 x86_cpu_to_apicid[];
>  
>  /* State of each CPU. */
>  #define CPU_ONLINE 0x0002 /* CPU is up */
> -#define CPU_DYING 0x0003 /* CPU is requested to die */
>  #define CPU_DEAD 0x0004 /* CPU is dead */
>  DECLARE_PER_CPU(int, cpu_state);
>  
>  #ifdef CONFIG_HOTPLUG_CPU
> -#define cpu_is_offline(cpu) unlikely(per_cpu(cpu_state,cpu) =>
CPU_DYING)
> +#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
>  extern int cpu_down(unsigned int cpu);
>  extern int cpu_up(unsigned int cpu);
>  extern void cpu_exit_clear(void);
> @@ -102,8 +101,10 @@ static __inline int logical_smp_processo
>  
>  #endif
>  
> -extern int __cpu_disable(void);
> +extern void __cpu_disable(void *data);
>  extern void __cpu_die(unsigned int cpu);
> +extern int on_rendezvous_cpus(cpumask_t selected, cpumask_t fn_mask,
> +    void (*fn) (void *), void *data, int disable_irq);
>  #endif /* !__ASSEMBLY__ */
>  
>  #else /* CONFIG_SMP */
> diff -r 667d1a73d2ca xen/include/xen/softirq.h
> --- a/xen/include/xen/softirq.h Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/include/xen/softirq.h Sat Feb 02 14:12:06 2008 +0800
> @@ -10,8 +10,9 @@
>  #define PAGE_SCRUB_SOFTIRQ                5
>  #define TRACE_SOFTIRQ                     6
>  #define RCU_SOFTIRQ                       7
> +#define RENDEZVOUS_SOFTIRQ                8
>  
> -#define NR_COMMON_SOFTIRQS                8
> +#define NR_COMMON_SOFTIRQS                9
>  
>  #include <asm/softirq.h>
>  
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

Tian, Kevin

2008-Feb-04 02:02 UTC

head link

RE: [Xen-devel] [PATCH] Rendezvous selected cpus

>From: Keir Fraser [mailto:Keir.Fraser@cl.cam.ac.uk] 
>Sent: 2008年2月4日 1:44
>
>This is a good thing to have, and my main comments are 
>stylistic. Firstly, I
>think the name and interface change compared with Linux''s
stop_machine
>mechanism is gratuitous. I would prefer us to have the same 
>interface (and
>also probably use the same or similar names throughout the 
>implementation,
>where appropriate). I don''t think the more flexible interface 
>expressed here
>is useful -- certainly finding an application for 
>rendezvousing a set of
>CPUs and running a function on a subset of those, with 
>optional irq disable,
>is a bit of a brain bender. :-) So, stick to stop_machine interface,
>semantics, and naming....
Here I''d like to explain why Linux stop_machine semantics is a bit
unhandily for Xen, and to change that further vary from Linux logic
which lead me to create a new name. :-)

Linux creates kernel threads and schedule them to individual cpus
to fulfill synchronization process. It''s the one executing (*fn) to
conduct the whole process. For example, if _stop_machine is called
on cpu_i, with target cpu_j to execute (*fn). stop_machine first
creates a kernel thread, bound to cpu_j and then wait for completion
on cpu_i which causes context_switch. When kernel thread is
scheduled on cpu_j, it then creates (max_cpus - 1) kernel threads
on the rest cpus and wait them to be scheduled. Once stop threads
are scheduled on all cpus, cpu_j first command other cpus to stop
activity, then execute (*fn), and finally commands resume to others.
As the result, previous flow on cpu_i is waken up upon completion.

Xen has no dynamically created vcpus and thus cpu_i can only send
a notification to cpu_j and wait cpu_j to check at some time. It''s
naturally to take a softirq bit since do_softirq is in a safe point just
before resuming to guest or in idle_loop.

Then it''s a bit cumbersome to let cpu_j to conduct the stop process.
First cpu_i needs to block itself after sending notification to cpu_j,
or else cpu_i would not handle softirq later when cpu_j requests to
do. Then block on what? Maybe an event channel... But the contin-
uation stack mechanism in Xen decides more dirty work to be added
since previous frames are lost and stack reset to bottom after resume.
Uah, maybe we can temporarily check softirq on cpu_i in a check
loop, however that still may cause context switch if, a schedule
softirq is triggered in between.

All above just made me distraught to follow Linux semantics, and
further thought leads me to why not let cpu_i to conduct stop process
directly, and then let concerned cpus to call (*fn) by adding a new
action as ***_INVOKE. By this way, cpu_i doesn''t need to be cut
off from current flow, and once stop_machine returns, all necessary
works to be handled in a stopped environment are fulfilled.

Actually further we may even conduct cpu hotplug in one call, with
all other APs to invoke cpu_disable at same time at S3.

Finally coming up above conclusion, I really thought it''s different
from
Linux stop_machine semantics, and thus re-wrote the stuff including
name as differentation. Though only S3 is the only user on this inf
by far, I do think it useful in other places later when one cpu wants
to kick another one to do something in a safe point.
>
>Also, I''d prefer this to be implemented in 
>common/stop_machine.c if at all
>possible. It''s not really x86 specific. Certainly I do not 
>want it in smp.c,
>as that file is full enough already of random cruft with no other home.
OK.
>
>Oh, also I think you are missing local_irq_disable() on the 
>CPU that calls
>on_rendezvous_cpus(). Like the Linxu implementation you should 
>probably do
>it at the same time you signal other CPUs to do so.
Good point.
>
>Apart from that it''s a good idea, and I''ll look more
closely
>at how you tie
>it in to CPU hotplug when you resubmit it.
>
Then before re-submission, I''ll wait for you further comment to my
forenamed explanation first.

Thanks,
Kevin

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

Keir Fraser

2008-Feb-04 07:30 UTC

head link

Re: [Xen-devel] [PATCH] Rendezvous selected cpus

On 4/2/08 02:02, "Tian, Kevin" <kevin.tian@intel.com> wrote:
> All above just made me distraught to follow Linux semantics, and
> further thought leads me to why not let cpu_i to conduct stop process
> directly, and then let concerned cpus to call (*fn) by adding a new
> action as ***_INVOKE. By this way, cpu_i doesn''t need to be cut
> off from current flow, and once stop_machine returns, all necessary
> works to be handled in a stopped environment are fulfilled.
Yes, I saw that, but it doesn''t require a modified interface. The
semantics
of the call are still:
 1. Synchronize/rendezvous all CPUs (the caller is assumed already to be at
a safe point and just needs to disable IRQs at the right time).
 2. Run the (*fn) on one designated CPU (via your new bit of mechanism).
 3. All done.

This fits fine within the stop_machine_run(fn, data, cpu) interface, albeit
that the underlying implementation is somewhat different (and you already
have that pretty much correct!).

Really all you need to do is put your implementation in a different file, do
some simple renaming of stuff, push some of your caller code (the bits that
create the cpumasks) into your stop_machine_run() implementation and give
stop_machine_run() the simple Linux interface.

 -- Keir

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

Tian, Kevin

2008-Feb-04 07:33 UTC

head link

RE: [Xen-devel] [PATCH] Rendezvous selected cpus

>From: Keir Fraser [mailto:Keir.Fraser@cl.cam.ac.uk] 
>Sent: 2008年2月4日 15:31
>
>On 4/2/08 02:02, "Tian, Kevin" <kevin.tian@intel.com> wrote:
>
>> All above just made me distraught to follow Linux semantics, and
>> further thought leads me to why not let cpu_i to conduct stop process
>> directly, and then let concerned cpus to call (*fn) by adding a new
>> action as ***_INVOKE. By this way, cpu_i doesn''t need to be
cut
>> off from current flow, and once stop_machine returns, all necessary
>> works to be handled in a stopped environment are fulfilled.
>
>Yes, I saw that, but it doesn''t require a modified interface. 
>The semantics
>of the call are still:
> 1. Synchronize/rendezvous all CPUs (the caller is assumed 
>already to be at
>a safe point and just needs to disable IRQs at the right time).
> 2. Run the (*fn) on one designated CPU (via your new bit of 
>mechanism).
> 3. All done.
>
>This fits fine within the stop_machine_run(fn, data, cpu) 
>interface, albeit
>that the underlying implementation is somewhat different (and 
>you already
>have that pretty much correct!).
>
>Really all you need to do is put your implementation in a 
>different file, do
>some simple renaming of stuff, push some of your caller code 
>(the bits that
>create the cpumasks) into your stop_machine_run() 
>implementation and give
>stop_machine_run() the simple Linux interface.
>
> -- Keir
> 
Well, understand this time. Reshuffle the code now. :-)

Thanks,
Kevin

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

Tian, Kevin

2008-Feb-04 09:51 UTC

head link

RE: [Xen-devel] [PATCH] Rendezvous selected cpus

Then, attach updated version per your comments, with Linux
stop_machine semantics kept in common place. Build tested
for x86, and ideally it should also work on other archs.

One small issue is about the lock handle with cpu hotplug.
For cpu hotplug request from control tools, there''s a dead lock
race between pure cpu hotplug path and stop machine if we
still use spinlock as the guard. For example, stop_machine
may hold lock on one cpu, and wait response from another
one who happnes to spin loop on lock per hotplug request.
In that case, the other cpu gets no chance to handle softirq.

We may be able to use trylock for cpu_up/down, however
that also affects stop_machine like called from S3 path for
which try-and-give-up is too heavy and unexpected. Maybe
some new cpu_tryup/trydown may be used. Not sure...

But anyway, still some way to go for a full cpu hotplug support
which doesn''t prevent this feature to slip in based on spinlock
at this stage. :-)

More comments?

Thanks,
Kevin
>From: Keir Fraser [mailto:Keir.Fraser@cl.cam.ac.uk] 
>Sent: 2008年2月4日 15:31
>
>On 4/2/08 02:02, "Tian, Kevin" <kevin.tian@intel.com> wrote:
>
>> All above just made me distraught to follow Linux semantics, and
>> further thought leads me to why not let cpu_i to conduct stop process
>> directly, and then let concerned cpus to call (*fn) by adding a new
>> action as ***_INVOKE. By this way, cpu_i doesn''t need to be
cut
>> off from current flow, and once stop_machine returns, all necessary
>> works to be handled in a stopped environment are fulfilled.
>
>Yes, I saw that, but it doesn''t require a modified interface. 
>The semantics
>of the call are still:
> 1. Synchronize/rendezvous all CPUs (the caller is assumed 
>already to be at
>a safe point and just needs to disable IRQs at the right time).
> 2. Run the (*fn) on one designated CPU (via your new bit of 
>mechanism).
> 3. All done.
>
>This fits fine within the stop_machine_run(fn, data, cpu) 
>interface, albeit
>that the underlying implementation is somewhat different (and 
>you already
>have that pretty much correct!).
>
>Really all you need to do is put your implementation in a 
>different file, do
>some simple renaming of stuff, push some of your caller code 
>(the bits that
>create the cpumasks) into your stop_machine_run() 
>implementation and give
>stop_machine_run() the simple Linux interface.
>
> -- Keir
> 
>
>
>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

Keir Fraser

2008-Feb-04 11:01 UTC

head link

Re: [Xen-devel] [PATCH] Rendezvous selected cpus

Much better. I think this will go in, although I might rename some more
stuff inside stop_machine.c towards Linux stop_machine-style names.

 -- Keir

On 4/2/08 09:51, "Tian, Kevin" <kevin.tian@intel.com> wrote:
> Then, attach updated version per your comments, with Linux
> stop_machine semantics kept in common place. Build tested
> for x86, and ideally it should also work on other archs.
> 
> One small issue is about the lock handle with cpu hotplug.
> For cpu hotplug request from control tools, there''s a dead lock
> race between pure cpu hotplug path and stop machine if we
> still use spinlock as the guard. For example, stop_machine
> may hold lock on one cpu, and wait response from another
> one who happnes to spin loop on lock per hotplug request.
> In that case, the other cpu gets no chance to handle softirq.
> 
> We may be able to use trylock for cpu_up/down, however
> that also affects stop_machine like called from S3 path for
> which try-and-give-up is too heavy and unexpected. Maybe
> some new cpu_tryup/trydown may be used. Not sure...
> 
> But anyway, still some way to go for a full cpu hotplug support
> which doesn''t prevent this feature to slip in based on spinlock
> at this stage. :-)
> 
> More comments?
> 
> Thanks,
> Kevin
> 
>> From: Keir Fraser [mailto:Keir.Fraser@cl.cam.ac.uk]
>> Sent: 2008年2月4日 15:31
>> 
>> On 4/2/08 02:02, "Tian, Kevin" <kevin.tian@intel.com>
wrote:
>> 
>>> All above just made me distraught to follow Linux semantics, and
>>> further thought leads me to why not let cpu_i to conduct stop
process
>>> directly, and then let concerned cpus to call (*fn) by adding a new
>>> action as ***_INVOKE. By this way, cpu_i doesn''t need to
be cut
>>> off from current flow, and once stop_machine returns, all necessary
>>> works to be handled in a stopped environment are fulfilled.
>> 
>> Yes, I saw that, but it doesn''t require a modified interface.
>> The semantics
>> of the call are still:
>> 1. Synchronize/rendezvous all CPUs (the caller is assumed
>> already to be at
>> a safe point and just needs to disable IRQs at the right time).
>> 2. Run the (*fn) on one designated CPU (via your new bit of
>> mechanism).
>> 3. All done.
>> 
>> This fits fine within the stop_machine_run(fn, data, cpu)
>> interface, albeit
>> that the underlying implementation is somewhat different (and
>> you already
>> have that pretty much correct!).
>> 
>> Really all you need to do is put your implementation in a
>> different file, do
>> some simple renaming of stuff, push some of your caller code
>> (the bits that
>> create the cpumasks) into your stop_machine_run()
>> implementation and give
>> stop_machine_run() the simple Linux interface.
>> 
>> -- Keir
>> 
>> 
>> 
>> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

Keir Fraser

2008-Feb-11 16:01 UTC

head link

Re: [Xen-devel] [PATCH] Rendezvous selected cpus

Applied, but I put the patch on a diet first. You''ll want to check it
still
does what you expect... :-)

 -- Keir

On 4/2/08 09:51, "Tian, Kevin" <kevin.tian@intel.com> wrote:
> Then, attach updated version per your comments, with Linux
> stop_machine semantics kept in common place. Build tested
> for x86, and ideally it should also work on other archs.
> 
> One small issue is about the lock handle with cpu hotplug.
> For cpu hotplug request from control tools, there''s a dead lock
> race between pure cpu hotplug path and stop machine if we
> still use spinlock as the guard. For example, stop_machine
> may hold lock on one cpu, and wait response from another
> one who happnes to spin loop on lock per hotplug request.
> In that case, the other cpu gets no chance to handle softirq.
> 
> We may be able to use trylock for cpu_up/down, however
> that also affects stop_machine like called from S3 path for
> which try-and-give-up is too heavy and unexpected. Maybe
> some new cpu_tryup/trydown may be used. Not sure...
> 
> But anyway, still some way to go for a full cpu hotplug support
> which doesn''t prevent this feature to slip in based on spinlock
> at this stage. :-)
> 
> More comments?
> 
> Thanks,
> Kevin
> 
>> From: Keir Fraser [mailto:Keir.Fraser@cl.cam.ac.uk]
>> Sent: 2008年2月4日 15:31
>> 
>> On 4/2/08 02:02, "Tian, Kevin" <kevin.tian@intel.com>
wrote:
>> 
>>> All above just made me distraught to follow Linux semantics, and
>>> further thought leads me to why not let cpu_i to conduct stop
process
>>> directly, and then let concerned cpus to call (*fn) by adding a new
>>> action as ***_INVOKE. By this way, cpu_i doesn''t need to
be cut
>>> off from current flow, and once stop_machine returns, all necessary
>>> works to be handled in a stopped environment are fulfilled.
>> 
>> Yes, I saw that, but it doesn''t require a modified interface.
>> The semantics
>> of the call are still:
>> 1. Synchronize/rendezvous all CPUs (the caller is assumed
>> already to be at
>> a safe point and just needs to disable IRQs at the right time).
>> 2. Run the (*fn) on one designated CPU (via your new bit of
>> mechanism).
>> 3. All done.
>> 
>> This fits fine within the stop_machine_run(fn, data, cpu)
>> interface, albeit
>> that the underlying implementation is somewhat different (and
>> you already
>> have that pretty much correct!).
>> 
>> Really all you need to do is put your implementation in a
>> different file, do
>> some simple renaming of stuff, push some of your caller code
>> (the bits that
>> create the cpumasks) into your stop_machine_run()
>> implementation and give
>> stop_machine_run() the simple Linux interface.
>> 
>> -- Keir
>> 
>> 
>> 
>> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

Tian, Kevin

2008-Feb-14 09:03 UTC

head link

RE: [Xen-devel] [PATCH] Rendezvous selected cpus

>From: Keir Fraser [mailto:Keir.Fraser@cl.cam.ac.uk] 
>Sent: 2008年2月12日 0:02
>
>Applied, but I put the patch on a diet first. You''ll want to 
>check it still
>does what you expect... :-)
>
> -- Keir
>
It''s much cleaner, and my quick test proved its correctness.
Thorough test is on-going...

Thanks,
Kevin

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

Xen devel - Feb 2008 - [PATCH] Rendezvous selected cpus

[Xen-devel] [PATCH] Rendezvous selected cpus

Re: [Xen-devel] [PATCH] Rendezvous selected cpus

RE: [Xen-devel] [PATCH] Rendezvous selected cpus

Re: [Xen-devel] [PATCH] Rendezvous selected cpus

RE: [Xen-devel] [PATCH] Rendezvous selected cpus

RE: [Xen-devel] [PATCH] Rendezvous selected cpus

Re: [Xen-devel] [PATCH] Rendezvous selected cpus

Re: [Xen-devel] [PATCH] Rendezvous selected cpus

RE: [Xen-devel] [PATCH] Rendezvous selected cpus