thr3ads.net - Xen devel - [PATCH] scheduler: adjust internal locking interface [Oct 2013]

If this information is useful, please help other people find it:
Share via:

Jan Beulich

2013-Oct-11 14:14 UTC

[PATCH] scheduler: adjust internal locking interface

Make the locking functions return the lock pointers, so they can be
passed to the unlocking functions (which in turn can check that the
lock is still actually providing the intended protection, i.e. the
parameters determining which lock is the right one didn''t change).

Further use proper spin lock primitives rather than open coded
local_irq_...() constructs, so that interrupts can be re-enabled as
appropriate while spinning.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Note that this implies David Vrabel''s patch titled "sched: fix
race
between sched_move_domain() and vcpu_wake()" to be applied already (as
the change done there gets further adjusted here).

--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -160,18 +160,16 @@ static inline void vcpu_runstate_change(
 
 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
 {
+    spinlock_t *lock = likely(v == current) ? NULL : vcpu_schedule_lock_irq(v);
     s_time_t delta;
 
-    if ( unlikely(v != current) )
-        vcpu_schedule_lock_irq(v);
-
     memcpy(runstate, &v->runstate, sizeof(*runstate));
     delta = NOW() - runstate->state_entry_time;
     if ( delta > 0 )
         runstate->time[runstate->state] += delta;
 
-    if ( unlikely(v != current) )
-        vcpu_schedule_unlock_irq(v);
+    if ( unlikely(lock != NULL) )
+        vcpu_schedule_unlock_irq(lock, v);
 }
 
 uint64_t get_cpu_idle_time(unsigned int cpu)
@@ -278,8 +276,7 @@ int sched_move_domain(struct domain *d, 
     new_p = cpumask_first(c->cpu_valid);
     for_each_vcpu ( d, v )
     {
-        spinlock_t *schedule_lock = per_cpu(schedule_data,
-                                            v->processor).schedule_lock;
+        spinlock_t *schedule_lock;
 
         vcpudata = v->sched_priv;
 
@@ -289,8 +286,9 @@ int sched_move_domain(struct domain *d, 
 
         cpumask_setall(v->cpu_affinity);
 
-        spin_lock_irq(schedule_lock);
+        schedule_lock = vcpu_schedule_lock_irq(v);
         v->processor = new_p;
+        /* _Not_ vcpu_schedule_unlock_irq(): v->processor changed! */
         spin_unlock_irq(schedule_lock);
 
         v->sched_priv = vcpu_priv[v->vcpu_id];
@@ -340,8 +338,7 @@ void sched_destroy_domain(struct domain 
 void vcpu_sleep_nosync(struct vcpu *v)
 {
     unsigned long flags;
-
-    vcpu_schedule_lock_irqsave(v, flags);
+    spinlock_t *lock = vcpu_schedule_lock_irqsave(v, &flags);
 
     if ( likely(!vcpu_runnable(v)) )
     {
@@ -351,7 +348,7 @@ void vcpu_sleep_nosync(struct vcpu *v)
         SCHED_OP(VCPU2OP(v), sleep, v);
     }
 
-    vcpu_schedule_unlock_irqrestore(v, flags);
+    vcpu_schedule_unlock_irqrestore(lock, flags, v);
 
     TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
 }
@@ -369,8 +366,7 @@ void vcpu_sleep_sync(struct vcpu *v)
 void vcpu_wake(struct vcpu *v)
 {
     unsigned long flags;
-
-    vcpu_schedule_lock_irqsave(v, flags);
+    spinlock_t *lock = vcpu_schedule_lock_irqsave(v, &flags);
 
     if ( likely(vcpu_runnable(v)) )
     {
@@ -384,7 +380,7 @@ void vcpu_wake(struct vcpu *v)
             vcpu_runstate_change(v, RUNSTATE_offline, NOW());
     }
 
-    vcpu_schedule_unlock_irqrestore(v, flags);
+    vcpu_schedule_unlock_irqrestore(lock, flags, v);
 
     TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
 }
@@ -535,10 +531,11 @@ static void vcpu_migrate(struct vcpu *v)
  */
 void vcpu_force_reschedule(struct vcpu *v)
 {
-    vcpu_schedule_lock_irq(v);
+    spinlock_t *lock = vcpu_schedule_lock_irq(v);
+
     if ( v->is_running )
         set_bit(_VPF_migrating, &v->pause_flags);
-    vcpu_schedule_unlock_irq(v);
+    vcpu_schedule_unlock_irq(lock, v);
 
     if ( test_bit(_VPF_migrating, &v->pause_flags) )
     {
@@ -553,7 +550,7 @@ void restore_vcpu_affinity(struct domain
 
     for_each_vcpu ( d, v )
     {
-        vcpu_schedule_lock_irq(v);
+        spinlock_t *lock = vcpu_schedule_lock_irq(v);
 
         if ( v->affinity_broken )
         {
@@ -566,13 +563,13 @@ void restore_vcpu_affinity(struct domain
         if ( v->processor == smp_processor_id() )
         {
             set_bit(_VPF_migrating, &v->pause_flags);
-            vcpu_schedule_unlock_irq(v);
+            vcpu_schedule_unlock_irq(lock, v);
             vcpu_sleep_nosync(v);
             vcpu_migrate(v);
         }
         else
         {
-            vcpu_schedule_unlock_irq(v);
+            vcpu_schedule_unlock_irq(lock, v);
         }
     }
 
@@ -599,7 +596,7 @@ int cpu_disable_scheduler(unsigned int c
     {
         for_each_vcpu ( d, v )
         {
-            vcpu_schedule_lock_irq(v);
+            spinlock_t *lock = vcpu_schedule_lock_irq(v);
 
             cpumask_and(&online_affinity, v->cpu_affinity,
c->cpu_valid);
             if ( cpumask_empty(&online_affinity) &&
@@ -620,13 +617,13 @@ int cpu_disable_scheduler(unsigned int c
             if ( v->processor == cpu )
             {
                 set_bit(_VPF_migrating, &v->pause_flags);
-                vcpu_schedule_unlock_irq(v);
+                vcpu_schedule_unlock_irq(lock, v);
                 vcpu_sleep_nosync(v);
                 vcpu_migrate(v);
             }
             else
             {
-                vcpu_schedule_unlock_irq(v);
+                vcpu_schedule_unlock_irq(lock, v);
             }
 
             /*
@@ -653,6 +650,7 @@ int vcpu_set_affinity(struct vcpu *v, co
 {
     cpumask_t online_affinity;
     cpumask_t *online;
+    spinlock_t *lock;
 
     if ( v->domain->is_pinned )
         return -EINVAL;
@@ -661,7 +659,7 @@ int vcpu_set_affinity(struct vcpu *v, co
     if ( cpumask_empty(&online_affinity) )
         return -EINVAL;
 
-    vcpu_schedule_lock_irq(v);
+    lock = vcpu_schedule_lock_irq(v);
 
     cpumask_copy(v->cpu_affinity, affinity);
 
@@ -669,7 +667,7 @@ int vcpu_set_affinity(struct vcpu *v, co
      * when changing the affinity */
     set_bit(_VPF_migrating, &v->pause_flags);
 
-    vcpu_schedule_unlock_irq(v);
+    vcpu_schedule_unlock_irq(lock, v);
 
     domain_update_node_affinity(v->domain);
 
@@ -783,10 +781,10 @@ static long do_poll(struct sched_poll *s
 static long do_yield(void)
 {
     struct vcpu * v=current;
+    spinlock_t *lock = vcpu_schedule_lock_irq(v);
 
-    vcpu_schedule_lock_irq(v);
     SCHED_OP(VCPU2OP(v), yield, v);
-    vcpu_schedule_unlock_irq(v);
+    vcpu_schedule_unlock_irq(lock, v);
 
     TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id,
current->vcpu_id);
     raise_softirq(SCHEDULE_SOFTIRQ);
@@ -1147,6 +1145,7 @@ static void schedule(void)
     unsigned long        *tasklet_work = &this_cpu(tasklet_work_to_do);
     bool_t                tasklet_work_scheduled = 0;
     struct schedule_data *sd;
+    spinlock_t           *lock;
     struct task_slice     next_slice;
     int cpu = smp_processor_id();
 
@@ -1173,7 +1172,7 @@ static void schedule(void)
         BUG();
     }
 
-    pcpu_schedule_lock_irq(cpu);
+    lock = pcpu_schedule_lock_irq(cpu);
 
     stop_timer(&sd->s_timer);
     
@@ -1190,7 +1189,7 @@ static void schedule(void)
 
     if ( unlikely(prev == next) )
     {
-        pcpu_schedule_unlock_irq(cpu);
+        pcpu_schedule_unlock_irq(lock, cpu);
         trace_continue_running(next);
         return continue_running(prev);
     }
@@ -1228,7 +1227,7 @@ static void schedule(void)
     ASSERT(!next->is_running);
     next->is_running = 1;
 
-    pcpu_schedule_unlock_irq(cpu);
+    pcpu_schedule_unlock_irq(lock, cpu);
 
     SCHED_STAT_CRANK(sched_ctx);
 
@@ -1415,6 +1414,7 @@ int schedule_cpu_switch(unsigned int cpu
 {
     unsigned long flags;
     struct vcpu *idle;
+    spinlock_t *lock;
     void *ppriv, *ppriv_old, *vpriv, *vpriv_old;
     struct scheduler *old_ops = per_cpu(scheduler, cpu);
     struct scheduler *new_ops = (c == NULL) ? &ops : c->sched;
@@ -1433,7 +1433,7 @@ int schedule_cpu_switch(unsigned int cpu
         return -ENOMEM;
     }
 
-    pcpu_schedule_lock_irqsave(cpu, flags);
+    lock = pcpu_schedule_lock_irqsave(cpu, &flags);
 
     SCHED_OP(old_ops, tick_suspend, cpu);
     vpriv_old = idle->sched_priv;
@@ -1444,7 +1444,7 @@ int schedule_cpu_switch(unsigned int cpu
     SCHED_OP(new_ops, tick_resume, cpu);
     SCHED_OP(new_ops, insert_vcpu, idle);
 
-    pcpu_schedule_unlock_irqrestore(cpu, flags);
+    pcpu_schedule_unlock_irqrestore(lock, flags, cpu);
 
     SCHED_OP(old_ops, free_vdata, vpriv_old);
     SCHED_OP(old_ops, free_pdata, ppriv_old, cpu);
@@ -1502,10 +1502,11 @@ void schedule_dump(struct cpupool *c)
 
     for_each_cpu (i, cpus)
     {
-        pcpu_schedule_lock(i);
+        spinlock_t *lock = pcpu_schedule_lock(i);
+
         printk("CPU[%02d] ", i);
         SCHED_OP(sched, dump_cpu_state, i);
-        pcpu_schedule_unlock(i);
+        pcpu_schedule_unlock(lock, i);
     }
 }
 
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -1166,6 +1166,7 @@ csched_runq_sort(struct csched_private *
     struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
     struct list_head *runq, *elem, *next, *last_under;
     struct csched_vcpu *svc_elem;
+    spinlock_t *lock;
     unsigned long flags;
     int sort_epoch;
 
@@ -1175,7 +1176,7 @@ csched_runq_sort(struct csched_private *
 
     spc->runq_sort_last = sort_epoch;
 
-    pcpu_schedule_lock_irqsave(cpu, flags);
+    lock = pcpu_schedule_lock_irqsave(cpu, &flags);
 
     runq = &spc->runq;
     elem = runq->next;
@@ -1200,7 +1201,7 @@ csched_runq_sort(struct csched_private *
         elem = next;
     }
 
-    pcpu_schedule_unlock_irqrestore(cpu, flags);
+    pcpu_schedule_unlock_irqrestore(lock, flags, cpu);
 }
 
 static void
@@ -1563,7 +1564,9 @@ csched_load_balance(struct csched_privat
                  * could cause a deadlock if the peer CPU is also load
                  * balancing and trying to lock this CPU.
                  */
-                if ( !pcpu_schedule_trylock(peer_cpu) )
+                spinlock_t *lock = pcpu_schedule_trylock(peer_cpu);
+
+                if ( !lock )
                 {
                     SCHED_STAT_CRANK(steal_trylock_failed);
                     peer_cpu = cpumask_cycle(peer_cpu, &workers);
@@ -1573,7 +1576,7 @@ csched_load_balance(struct csched_privat
                 /* Any work over there to steal? */
                 speer = cpumask_test_cpu(peer_cpu, online) ?
                     csched_runq_steal(peer_cpu, cpu, snext->pri, bstep) :
NULL;
-                pcpu_schedule_unlock(peer_cpu);
+                pcpu_schedule_unlock(lock, peer_cpu);
 
                 /* As soon as one vcpu is found, balancing ends */
                 if ( speer != NULL )
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -882,15 +882,17 @@ csched_vcpu_insert(const struct schedule
      */
     if ( ! is_idle_vcpu(vc) )
     {
+        spinlock_t *lock;
+
         /* FIXME: Do we need the private lock here? */
         list_add_tail(&svc->sdom_elem, &svc->sdom->vcpu);
 
         /* Add vcpu to runqueue of initial processor */
-        vcpu_schedule_lock_irq(vc);
+        lock = vcpu_schedule_lock_irq(vc);
 
         runq_assign(ops, vc);
 
-        vcpu_schedule_unlock_irq(vc);
+        vcpu_schedule_unlock_irq(lock, vc);
 
         sdom->nr_vcpus++;
     }
@@ -917,14 +919,16 @@ csched_vcpu_remove(const struct schedule
 
     if ( ! is_idle_vcpu(vc) )
     {
+        spinlock_t *lock;
+
         SCHED_STAT_CRANK(vcpu_destroy);
 
         /* Remove from runqueue */
-        vcpu_schedule_lock_irq(vc);
+        lock = vcpu_schedule_lock_irq(vc);
 
         runq_deassign(ops, vc);
 
-        vcpu_schedule_unlock_irq(vc);
+        vcpu_schedule_unlock_irq(lock, vc);
 
         /* Remove from sdom list.  Don''t need a lock for this, as
it''s called
          * syncronously when nothing else can happen. */
@@ -1011,8 +1015,7 @@ csched_context_saved(const struct schedu
 {
     struct csched_vcpu * const svc = CSCHED_VCPU(vc);
     s_time_t now = NOW();
-
-    vcpu_schedule_lock_irq(vc);
+    spinlock_t *lock = vcpu_schedule_lock_irq(vc);
 
     BUG_ON( !is_idle_vcpu(vc) && svc->rqd != RQD(ops,
vc->processor));
 
@@ -1038,7 +1041,7 @@ csched_context_saved(const struct schedu
     else if ( !is_idle_vcpu(vc) )
         update_load(ops, svc->rqd, svc, -1, now);
 
-    vcpu_schedule_unlock_irq(vc);
+    vcpu_schedule_unlock_irq(lock, vc);
 }
 
 #define MAX_LOAD (1ULL<<60);
@@ -1456,14 +1459,14 @@ csched_dom_cntl(
                  * must never lock csched_priv.lock if we''re holding a
runqueue lock.
                  * Also, calling vcpu_schedule_lock() is enough, since IRQs
have already
                  * been disabled. */
-                vcpu_schedule_lock(svc->vcpu);
+                spinlock_t *lock = vcpu_schedule_lock(svc->vcpu);
 
                 BUG_ON(svc->rqd != RQD(ops, svc->vcpu->processor));
 
                 svc->weight = sdom->weight;
                 update_max_weight(svc->rqd, svc->weight, old_weight);
 
-                vcpu_schedule_unlock(svc->vcpu);
+                vcpu_schedule_unlock(lock, svc->vcpu);
             }
         }
     }
@@ -1993,6 +1996,7 @@ static void init_pcpu(const struct sched
     cpumask_set_cpu(cpu, &rqd->idle);
     cpumask_set_cpu(cpu, &rqd->active);
 
+    /* _Not_ pcpu_schedule_unlock(): per_cpu().schedule_lock changed! */
     spin_unlock(old_lock);
 
     cpumask_set_cpu(cpu, &prv->initialized);
--- a/xen/common/sched_sedf.c
+++ b/xen/common/sched_sedf.c
@@ -1350,14 +1350,16 @@ static int sedf_adjust_weights(struct cp
             if ( EDOM_INFO(p)->weight )
             {
                 /* Interrupts already off */
-                vcpu_schedule_lock(p);
+                spinlock_t *lock = vcpu_schedule_lock(p);
+
                 EDOM_INFO(p)->period_orig = 
                     EDOM_INFO(p)->period  = WEIGHT_PERIOD;
                 EDOM_INFO(p)->slice_orig                      
EDOM_INFO(p)->slice   =
                     (EDOM_INFO(p)->weight *
                      (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[cpu])) / sumw[cpu];
-                vcpu_schedule_unlock(p);
+
+                vcpu_schedule_unlock(lock, p);
             }
         }
     }
@@ -1418,21 +1420,24 @@ static int sedf_adjust(const struct sche
                 {
                     /* (Here and everywhere in the following) IRQs are already
off,
                      * hence vcpu_spin_lock() is the one. */
-                    vcpu_schedule_lock(v);
+                    spinlock_t *lock = vcpu_schedule_lock(v);
+
                     EDOM_INFO(v)->extraweight = op->u.sedf.weight;
                     EDOM_INFO(v)->weight = 0;
                     EDOM_INFO(v)->slice = 0;
                     EDOM_INFO(v)->period = WEIGHT_PERIOD;
-                    vcpu_schedule_unlock(v);
+                    vcpu_schedule_unlock(lock, v);
                 }
             }
             else
             {
                 /* Weight-driven domains with real-time execution */
-                for_each_vcpu ( p, v ) {
-                    vcpu_schedule_lock(v);
+                for_each_vcpu ( p, v )
+                {
+                    spinlock_t *lock = vcpu_schedule_lock(v);
+
                     EDOM_INFO(v)->weight = op->u.sedf.weight;
-                    vcpu_schedule_unlock(v);
+                    vcpu_schedule_unlock(lock, v);
                 }
             }
         }
@@ -1454,14 +1459,15 @@ static int sedf_adjust(const struct sche
             /* Time-driven domains */
             for_each_vcpu ( p, v )
             {
-                vcpu_schedule_lock(v);
+                spinlock_t *lock = vcpu_schedule_lock(v);
+
                 EDOM_INFO(v)->weight = 0;
                 EDOM_INFO(v)->extraweight = 0;
                 EDOM_INFO(v)->period_orig = 
                     EDOM_INFO(v)->period  = op->u.sedf.period;
                 EDOM_INFO(v)->slice_orig  = 
                     EDOM_INFO(v)->slice   = op->u.sedf.slice;
-                vcpu_schedule_unlock(v);
+                vcpu_schedule_unlock(lock, v);
             }
         }
 
@@ -1471,13 +1477,14 @@ static int sedf_adjust(const struct sche
 
         for_each_vcpu ( p, v )
         {
-            vcpu_schedule_lock(v);
+            spinlock_t *lock = vcpu_schedule_lock(v);
+
             EDOM_INFO(v)->status  = 
                 (EDOM_INFO(v)->status &
                  ~EXTRA_AWARE) | (op->u.sedf.extratime & EXTRA_AWARE);
             EDOM_INFO(v)->latency = op->u.sedf.latency;
             extraq_check(v);
-            vcpu_schedule_unlock(v);
+            vcpu_schedule_unlock(lock, v);
         }
     }
     else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -47,96 +47,70 @@ DECLARE_PER_CPU(struct schedule_data, sc
 DECLARE_PER_CPU(struct scheduler *, scheduler);
 DECLARE_PER_CPU(struct cpupool *, cpupool);
 
-static inline spinlock_t * pcpu_schedule_lock(int cpu)
-{
-    spinlock_t * lock=NULL;
-
-    for ( ; ; )
-    {
-        /* The per_cpu(v->processor) may also change, if changing
-         * cpu pool also changes the scheduler lock.  Retry
-         * until they match.
-         */
-        lock=per_cpu(schedule_data, cpu).schedule_lock;
-
-        spin_lock(lock);
-        if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) )
-            break;
-        spin_unlock(lock);
-    }
-    return lock;
+#define sched_lock(kind, param, cpu, irq, arg...) \
+static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
+{ \
+    for ( ; ; ) \
+    { \
+        spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock; \
+        /* \
+         * v->processor may change when grabbing the lock; but \
+         * per_cpu(v->processor) may also change, if changing cpu pool \
+         * also changes the scheduler lock.  Retry until they match. \
+         * \
+         * It may also be the case that v->processor may change but the \
+         * lock may be the same; this will succeed in that case. \
+         */ \
+        spin_lock##irq(lock, ## arg); \
+        if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) ) \
+            return lock; \
+        spin_unlock##irq(lock, ## arg); \
+    } \
 }
 
-static inline int pcpu_schedule_trylock(int cpu)
-{
-    spinlock_t * lock=NULL;
-
-    lock=per_cpu(schedule_data, cpu).schedule_lock;
-    if ( ! spin_trylock(lock) )
-        return 0;
-    if ( lock == per_cpu(schedule_data, cpu).schedule_lock )
-        return 1;
-    else
-    {
-        spin_unlock(lock);
-        return 0;
-    }
+#define sched_unlock(kind, param, cpu, irq, arg...) \
+static inline void kind##_schedule_unlock##irq(spinlock_t *lock \
+                                               EXTRA_TYPE(arg), param) \
+{ \
+    ASSERT(lock == per_cpu(schedule_data, cpu).schedule_lock); \
+    spin_unlock##irq(lock, ## arg); \
 }
 
-#define pcpu_schedule_lock_irq(p) \
-    do { local_irq_disable(); pcpu_schedule_lock(p); } while ( 0 )
-#define pcpu_schedule_lock_irqsave(p, flags) \
-    do { local_irq_save(flags); pcpu_schedule_lock(p); } while ( 0 )
+#define EXTRA_TYPE(arg)
+sched_lock(pcpu, unsigned int cpu,     cpu, )
+sched_lock(vcpu, const struct vcpu *v, v->processor, )
+sched_lock(pcpu, unsigned int cpu,     cpu,          _irq)
+sched_lock(vcpu, const struct vcpu *v, v->processor, _irq)
+sched_unlock(pcpu, unsigned int cpu,     cpu, )
+sched_unlock(vcpu, const struct vcpu *v, v->processor, )
+sched_unlock(pcpu, unsigned int cpu,     cpu,          _irq)
+sched_unlock(vcpu, const struct vcpu *v, v->processor, _irq)
+#undef EXTRA_TYPE
+
+#define EXTRA_TYPE(arg) , unsigned long arg
+#define spin_unlock_irqsave spin_unlock_irqrestore
+sched_lock(pcpu, unsigned int cpu,     cpu,          _irqsave, *flags)
+sched_lock(vcpu, const struct vcpu *v, v->processor, _irqsave, *flags)
+#undef spin_unlock_irqsave
+sched_unlock(pcpu, unsigned int cpu,     cpu,          _irqrestore, flags)
+sched_unlock(vcpu, const struct vcpu *v, v->processor, _irqrestore, flags)
+#undef EXTRA_TYPE
+
+#undef sched_unlock
+#undef sched_lock
 
-static inline void pcpu_schedule_unlock(int cpu)
+static inline spinlock_t *pcpu_schedule_trylock(unsigned int cpu)
 {
-    spin_unlock(per_cpu(schedule_data, cpu).schedule_lock);
-}
+    spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock;
 
-#define pcpu_schedule_unlock_irq(p) \
-    do { pcpu_schedule_unlock(p); local_irq_enable(); } while ( 0 )
-#define pcpu_schedule_unlock_irqrestore(p, flags) \
-    do { pcpu_schedule_unlock(p); local_irq_restore(flags); } while ( 0 )
-
-static inline void vcpu_schedule_lock(struct vcpu *v)
-{
-    spinlock_t * lock;
-
-    for ( ; ; )
-    {
-        /* v->processor may change when grabbing the lock; but
-         * per_cpu(v->processor) may also change, if changing
-         * cpu pool also changes the scheduler lock.  Retry
-         * until they match.
-         *
-         * It may also be the case that v->processor may change
-         * but the lock may be the same; this will succeed
-         * in that case.
-         */
-        lock=per_cpu(schedule_data, v->processor).schedule_lock;
-
-        spin_lock(lock);
-        if ( likely(lock == per_cpu(schedule_data,
v->processor).schedule_lock) )
-            break;
-        spin_unlock(lock);
-    }
-}
-
-#define vcpu_schedule_lock_irq(v) \
-    do { local_irq_disable(); vcpu_schedule_lock(v); } while ( 0 )
-#define vcpu_schedule_lock_irqsave(v, flags) \
-    do { local_irq_save(flags); vcpu_schedule_lock(v); } while ( 0 )
-
-static inline void vcpu_schedule_unlock(struct vcpu *v)
-{
-    spin_unlock(per_cpu(schedule_data, v->processor).schedule_lock);
+    if ( !spin_trylock(lock) )
+        return NULL;
+    if ( lock == per_cpu(schedule_data, cpu).schedule_lock )
+        return lock;
+    spin_unlock(lock);
+    return NULL;
 }
 
-#define vcpu_schedule_unlock_irq(v) \
-    do { vcpu_schedule_unlock(v); local_irq_enable(); } while ( 0 )
-#define vcpu_schedule_unlock_irqrestore(v, flags) \
-    do { vcpu_schedule_unlock(v); local_irq_restore(flags); } while ( 0 )
-
 struct task_slice {
     struct vcpu *task;
     s_time_t     time;




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Andrew Cooper

2013-Oct-11 14:29 UTC

head link

Re: [PATCH] scheduler: adjust internal locking interface

On 11/10/13 15:14, Jan Beulich wrote:> --- a/xen/include/xen/sched-if.h
> +++ b/xen/include/xen/sched-if.h
> @@ -47,96 +47,70 @@ DECLARE_PER_CPU(struct schedule_data, sc
>  DECLARE_PER_CPU(struct scheduler *, scheduler);
>  DECLARE_PER_CPU(struct cpupool *, cpupool);
>  
> -static inline spinlock_t * pcpu_schedule_lock(int cpu)
> -{
> -    spinlock_t * lock=NULL;
> -
> -    for ( ; ; )
> -    {
> -        /* The per_cpu(v->processor) may also change, if changing
> -         * cpu pool also changes the scheduler lock.  Retry
> -         * until they match.
> -         */
> -        lock=per_cpu(schedule_data, cpu).schedule_lock;
> -
> -        spin_lock(lock);
> -        if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) )
> -            break;
> -        spin_unlock(lock);
> -    }
> -    return lock;
> +#define sched_lock(kind, param, cpu, irq, arg...) \
> +static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg))
\
> +{ \
> +    for ( ; ; ) \
> +    { \
> +        spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock; \
> +        /* \
> +         * v->processor may change when grabbing the lock; but \
> +         * per_cpu(v->processor) may also change, if changing cpu pool
\
> +         * also changes the scheduler lock.  Retry until they match. \
> +         * \
> +         * It may also be the case that v->processor may change but the
\
> +         * lock may be the same; this will succeed in that case. \
> +         */ \
> +        spin_lock##irq(lock, ## arg); \
> +        if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) ) \
> +            return lock; \
> +        spin_unlock##irq(lock, ## arg); \
> +    } \
>  }
The readability of this (and others) would be much easier if the
''\''
were aligned on the RHS and out of view of the main body.

~Andrew
>  
> -static inline int pcpu_schedule_trylock(int cpu)
> -{
> -    spinlock_t * lock=NULL;
> -
> -    lock=per_cpu(schedule_data, cpu).schedule_lock;
> -    if ( ! spin_trylock(lock) )
> -        return 0;
> -    if ( lock == per_cpu(schedule_data, cpu).schedule_lock )
> -        return 1;
> -    else
> -    {
> -        spin_unlock(lock);
> -        return 0;
> -    }
> +#define sched_unlock(kind, param, cpu, irq, arg...) \
> +static inline void kind##_schedule_unlock##irq(spinlock_t *lock \
> +                                               EXTRA_TYPE(arg), param) \
> +{ \
> +    ASSERT(lock == per_cpu(schedule_data, cpu).schedule_lock); \
> +    spin_unlock##irq(lock, ## arg); \
>  }
>  
> -#define pcpu_schedule_lock_irq(p) \
> -    do { local_irq_disable(); pcpu_schedule_lock(p); } while ( 0 )
> -#define pcpu_schedule_lock_irqsave(p, flags) \
> -    do { local_irq_save(flags); pcpu_schedule_lock(p); } while ( 0 )
> +#define EXTRA_TYPE(arg)
> +sched_lock(pcpu, unsigned int cpu,     cpu, )
> +sched_lock(vcpu, const struct vcpu *v, v->processor, )
> +sched_lock(pcpu, unsigned int cpu,     cpu,          _irq)
> +sched_lock(vcpu, const struct vcpu *v, v->processor, _irq)
> +sched_unlock(pcpu, unsigned int cpu,     cpu, )
> +sched_unlock(vcpu, const struct vcpu *v, v->processor, )
> +sched_unlock(pcpu, unsigned int cpu,     cpu,          _irq)
> +sched_unlock(vcpu, const struct vcpu *v, v->processor, _irq)
> +#undef EXTRA_TYPE
> +
> +#define EXTRA_TYPE(arg) , unsigned long arg
> +#define spin_unlock_irqsave spin_unlock_irqrestore
> +sched_lock(pcpu, unsigned int cpu,     cpu,          _irqsave, *flags)
> +sched_lock(vcpu, const struct vcpu *v, v->processor, _irqsave, *flags)
> +#undef spin_unlock_irqsave
> +sched_unlock(pcpu, unsigned int cpu,     cpu,          _irqrestore, flags)
> +sched_unlock(vcpu, const struct vcpu *v, v->processor, _irqrestore,
flags)
> +#undef EXTRA_TYPE
> +
> +#undef sched_unlock
> +#undef sched_lock
>  
> -static inline void pcpu_schedule_unlock(int cpu)
> +static inline spinlock_t *pcpu_schedule_trylock(unsigned int cpu)
>  {
> -    spin_unlock(per_cpu(schedule_data, cpu).schedule_lock);
> -}
> +    spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock;
>  
> -#define pcpu_schedule_unlock_irq(p) \
> -    do { pcpu_schedule_unlock(p); local_irq_enable(); } while ( 0 )
> -#define pcpu_schedule_unlock_irqrestore(p, flags) \
> -    do { pcpu_schedule_unlock(p); local_irq_restore(flags); } while ( 0 )
> -
> -static inline void vcpu_schedule_lock(struct vcpu *v)
> -{
> -    spinlock_t * lock;
> -
> -    for ( ; ; )
> -    {
> -        /* v->processor may change when grabbing the lock; but
> -         * per_cpu(v->processor) may also change, if changing
> -         * cpu pool also changes the scheduler lock.  Retry
> -         * until they match.
> -         *
> -         * It may also be the case that v->processor may change
> -         * but the lock may be the same; this will succeed
> -         * in that case.
> -         */
> -        lock=per_cpu(schedule_data, v->processor).schedule_lock;
> -
> -        spin_lock(lock);
> -        if ( likely(lock == per_cpu(schedule_data,
v->processor).schedule_lock) )
> -            break;
> -        spin_unlock(lock);
> -    }
> -}
> -
> -#define vcpu_schedule_lock_irq(v) \
> -    do { local_irq_disable(); vcpu_schedule_lock(v); } while ( 0 )
> -#define vcpu_schedule_lock_irqsave(v, flags) \
> -    do { local_irq_save(flags); vcpu_schedule_lock(v); } while ( 0 )
> -
> -static inline void vcpu_schedule_unlock(struct vcpu *v)
> -{
> -    spin_unlock(per_cpu(schedule_data, v->processor).schedule_lock);
> +    if ( !spin_trylock(lock) )
> +        return NULL;
> +    if ( lock == per_cpu(schedule_data, cpu).schedule_lock )
> +        return lock;
> +    spin_unlock(lock);
> +    return NULL;
>  }
>  
> -#define vcpu_schedule_unlock_irq(v) \
> -    do { vcpu_schedule_unlock(v); local_irq_enable(); } while ( 0 )
> -#define vcpu_schedule_unlock_irqrestore(v, flags) \
> -    do { vcpu_schedule_unlock(v); local_irq_restore(flags); } while ( 0 )
> -
>  struct task_slice {
>      struct vcpu *task;
>      s_time_t     time;
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Jan Beulich

2013-Oct-11 14:37 UTC

head link

Re: [PATCH] scheduler: adjust internal locking interface

>>> On 11.10.13 at 16:29, Andrew Cooper
<andrew.cooper3@citrix.com> wrote:
> On 11/10/13 15:14, Jan Beulich wrote:
>> +#define sched_lock(kind, param, cpu, irq, arg...) \
>> +static inline spinlock_t *kind##_schedule_lock##irq(param
EXTRA_TYPE(arg))
> \
>> +{ \
>> +    for ( ; ; ) \
>> +    { \
>> +        spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock;
\
>> +        /* \
>> +         * v->processor may change when grabbing the lock; but \
>> +         * per_cpu(v->processor) may also change, if changing cpu
pool \
>> +         * also changes the scheduler lock.  Retry until they match. \
>> +         * \
>> +         * It may also be the case that v->processor may change but
the \
>> +         * lock may be the same; this will succeed in that case. \
>> +         */ \
>> +        spin_lock##irq(lock, ## arg); \
>> +        if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock)
) \
>> +            return lock; \
>> +        spin_unlock##irq(lock, ## arg); \
>> +    } \
>>  }
> 
> The readability of this (and others) would be much easier if the
''\''
> were aligned on the RHS and out of view of the main body.
This depends on the fonts you use - in a mail reader using variable
width fonts it reads much better the way I coded it. And while I
realized that there are may cases where there is right alignment of
these line continuations, I''m personally not in favor of this style,
and since the coding style document doesn''t say anything about it
I used my personal preference...

Jan

George Dunlap

2013-Oct-11 14:41 UTC

head link

Re: [PATCH] scheduler: adjust internal locking interface

On 11/10/13 15:37, Jan Beulich wrote:>>>> On 11.10.13 at 16:29, Andrew Cooper
<andrew.cooper3@citrix.com> wrote:
>> On 11/10/13 15:14, Jan Beulich wrote:
>>> +#define sched_lock(kind, param, cpu, irq, arg...) \
>>> +static inline spinlock_t *kind##_schedule_lock##irq(param
EXTRA_TYPE(arg))
>> \
>>> +{ \
>>> +    for ( ; ; ) \
>>> +    { \
>>> +        spinlock_t *lock = per_cpu(schedule_data,
cpu).schedule_lock; \
>>> +        /* \
>>> +         * v->processor may change when grabbing the lock; but
\
>>> +         * per_cpu(v->processor) may also change, if changing
cpu pool \
>>> +         * also changes the scheduler lock.  Retry until they
match. \
>>> +         * \
>>> +         * It may also be the case that v->processor may change
but the \
>>> +         * lock may be the same; this will succeed in that case. \
>>> +         */ \
>>> +        spin_lock##irq(lock, ## arg); \
>>> +        if ( likely(lock == per_cpu(schedule_data,
cpu).schedule_lock) ) \
>>> +            return lock; \
>>> +        spin_unlock##irq(lock, ## arg); \
>>> +    } \
>>>   }
>> The readability of this (and others) would be much easier if the
''\''
>> were aligned on the RHS and out of view of the main body.
> This depends on the fonts you use - in a mail reader using variable
> width fonts it reads much better the way I coded it. And while I
> realized that there are may cases where there is right alignment of
> these line continuations, I''m personally not in favor of this
style,
> and since the coding style document doesn''t say anything about it
> I used my personal preference...
But what we care about is how it looks in an editor.  I presume when 
you''re actually coding you use a fixed-width font? :-)

  -George

Jan Beulich

2013-Oct-11 15:03 UTC

head link

Re: [PATCH] scheduler: adjust internal locking interface

>>> On 11.10.13 at 16:41, George Dunlap
<george.dunlap@eu.citrix.com> wrote:
> On 11/10/13 15:37, Jan Beulich wrote:
>>>>> On 11.10.13 at 16:29, Andrew Cooper
<andrew.cooper3@citrix.com> wrote:
>>> On 11/10/13 15:14, Jan Beulich wrote:
>>>> +#define sched_lock(kind, param, cpu, irq, arg...) \
>>>> +static inline spinlock_t *kind##_schedule_lock##irq(param
EXTRA_TYPE(arg))
>>> \
>>>> +{ \
>>>> +    for ( ; ; ) \
>>>> +    { \
>>>> +        spinlock_t *lock = per_cpu(schedule_data,
cpu).schedule_lock; \
>>>> +        /* \
>>>> +         * v->processor may change when grabbing the lock;
but \
>>>> +         * per_cpu(v->processor) may also change, if
changing cpu pool \
>>>> +         * also changes the scheduler lock.  Retry until they
match. \
>>>> +         * \
>>>> +         * It may also be the case that v->processor may
change but the \
>>>> +         * lock may be the same; this will succeed in that
case. \
>>>> +         */ \
>>>> +        spin_lock##irq(lock, ## arg); \
>>>> +        if ( likely(lock == per_cpu(schedule_data,
cpu).schedule_lock) ) \
>>>> +            return lock; \
>>>> +        spin_unlock##irq(lock, ## arg); \
>>>> +    } \
>>>>   }
>>> The readability of this (and others) would be much easier if the
''\''
>>> were aligned on the RHS and out of view of the main body.
>> This depends on the fonts you use - in a mail reader using variable
>> width fonts it reads much better the way I coded it. And while I
>> realized that there are may cases where there is right alignment of
>> these line continuations, I''m personally not in favor of this
style,
>> and since the coding style document doesn''t say anything about
it
>> I used my personal preference...
> 
> But what we care about is how it looks in an editor.  I presume when 
> you''re actually coding you use a fixed-width font? :-)
Of course. But I have no problem reading it with the non-padded
backslashes...

Jan

Jan Beulich

2013-Oct-11 15:13 UTC

head link

[PATCH v2 0/2] scheduler locking adjustments

1: scheduler: adjust internal locking interface
2: sched: fix race between sched_move_domain() and vcpu_wake()

Signed-off-by: Jan Beulich <jbeulich@suse.com>

Jan Beulich

2013-Oct-11 15:16 UTC

head link

[PATCH v2 1/2] scheduler: adjust internal locking interface

Make the locking functions return the lock pointers, so they can be
passed to the unlocking functions (which in turn can check that the
lock is still actually providing the intended protection, i.e. the
parameters determining which lock is the right one didn''t change).

Further use proper spin lock primitives rather than open coded
local_irq_...() constructs, so that interrupts can be re-enabled as
appropriate while spinning.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -160,18 +160,16 @@ static inline void vcpu_runstate_change(
 
 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
 {
+    spinlock_t *lock = likely(v == current) ? NULL : vcpu_schedule_lock_irq(v);
     s_time_t delta;
 
-    if ( unlikely(v != current) )
-        vcpu_schedule_lock_irq(v);
-
     memcpy(runstate, &v->runstate, sizeof(*runstate));
     delta = NOW() - runstate->state_entry_time;
     if ( delta > 0 )
         runstate->time[runstate->state] += delta;
 
-    if ( unlikely(v != current) )
-        vcpu_schedule_unlock_irq(v);
+    if ( unlikely(lock != NULL) )
+        vcpu_schedule_unlock_irq(lock, v);
 }
 
 uint64_t get_cpu_idle_time(unsigned int cpu)
@@ -333,8 +331,7 @@ void sched_destroy_domain(struct domain 
 void vcpu_sleep_nosync(struct vcpu *v)
 {
     unsigned long flags;
-
-    vcpu_schedule_lock_irqsave(v, flags);
+    spinlock_t *lock = vcpu_schedule_lock_irqsave(v, &flags);
 
     if ( likely(!vcpu_runnable(v)) )
     {
@@ -344,7 +341,7 @@ void vcpu_sleep_nosync(struct vcpu *v)
         SCHED_OP(VCPU2OP(v), sleep, v);
     }
 
-    vcpu_schedule_unlock_irqrestore(v, flags);
+    vcpu_schedule_unlock_irqrestore(lock, flags, v);
 
     TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
 }
@@ -362,8 +359,7 @@ void vcpu_sleep_sync(struct vcpu *v)
 void vcpu_wake(struct vcpu *v)
 {
     unsigned long flags;
-
-    vcpu_schedule_lock_irqsave(v, flags);
+    spinlock_t *lock = vcpu_schedule_lock_irqsave(v, &flags);
 
     if ( likely(vcpu_runnable(v)) )
     {
@@ -377,7 +373,7 @@ void vcpu_wake(struct vcpu *v)
             vcpu_runstate_change(v, RUNSTATE_offline, NOW());
     }
 
-    vcpu_schedule_unlock_irqrestore(v, flags);
+    vcpu_schedule_unlock_irqrestore(lock, flags, v);
 
     TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
 }
@@ -528,10 +524,11 @@ static void vcpu_migrate(struct vcpu *v)
  */
 void vcpu_force_reschedule(struct vcpu *v)
 {
-    vcpu_schedule_lock_irq(v);
+    spinlock_t *lock = vcpu_schedule_lock_irq(v);
+
     if ( v->is_running )
         set_bit(_VPF_migrating, &v->pause_flags);
-    vcpu_schedule_unlock_irq(v);
+    vcpu_schedule_unlock_irq(lock, v);
 
     if ( test_bit(_VPF_migrating, &v->pause_flags) )
     {
@@ -546,7 +543,7 @@ void restore_vcpu_affinity(struct domain
 
     for_each_vcpu ( d, v )
     {
-        vcpu_schedule_lock_irq(v);
+        spinlock_t *lock = vcpu_schedule_lock_irq(v);
 
         if ( v->affinity_broken )
         {
@@ -559,13 +556,13 @@ void restore_vcpu_affinity(struct domain
         if ( v->processor == smp_processor_id() )
         {
             set_bit(_VPF_migrating, &v->pause_flags);
-            vcpu_schedule_unlock_irq(v);
+            vcpu_schedule_unlock_irq(lock, v);
             vcpu_sleep_nosync(v);
             vcpu_migrate(v);
         }
         else
         {
-            vcpu_schedule_unlock_irq(v);
+            vcpu_schedule_unlock_irq(lock, v);
         }
     }
 
@@ -592,7 +589,7 @@ int cpu_disable_scheduler(unsigned int c
     {
         for_each_vcpu ( d, v )
         {
-            vcpu_schedule_lock_irq(v);
+            spinlock_t *lock = vcpu_schedule_lock_irq(v);
 
             cpumask_and(&online_affinity, v->cpu_affinity,
c->cpu_valid);
             if ( cpumask_empty(&online_affinity) &&
@@ -613,13 +610,13 @@ int cpu_disable_scheduler(unsigned int c
             if ( v->processor == cpu )
             {
                 set_bit(_VPF_migrating, &v->pause_flags);
-                vcpu_schedule_unlock_irq(v);
+                vcpu_schedule_unlock_irq(lock, v);
                 vcpu_sleep_nosync(v);
                 vcpu_migrate(v);
             }
             else
             {
-                vcpu_schedule_unlock_irq(v);
+                vcpu_schedule_unlock_irq(lock, v);
             }
 
             /*
@@ -646,6 +643,7 @@ int vcpu_set_affinity(struct vcpu *v, co
 {
     cpumask_t online_affinity;
     cpumask_t *online;
+    spinlock_t *lock;
 
     if ( v->domain->is_pinned )
         return -EINVAL;
@@ -654,7 +652,7 @@ int vcpu_set_affinity(struct vcpu *v, co
     if ( cpumask_empty(&online_affinity) )
         return -EINVAL;
 
-    vcpu_schedule_lock_irq(v);
+    lock = vcpu_schedule_lock_irq(v);
 
     cpumask_copy(v->cpu_affinity, affinity);
 
@@ -662,7 +660,7 @@ int vcpu_set_affinity(struct vcpu *v, co
      * when changing the affinity */
     set_bit(_VPF_migrating, &v->pause_flags);
 
-    vcpu_schedule_unlock_irq(v);
+    vcpu_schedule_unlock_irq(lock, v);
 
     domain_update_node_affinity(v->domain);
 
@@ -776,10 +774,10 @@ static long do_poll(struct sched_poll *s
 static long do_yield(void)
 {
     struct vcpu * v=current;
+    spinlock_t *lock = vcpu_schedule_lock_irq(v);
 
-    vcpu_schedule_lock_irq(v);
     SCHED_OP(VCPU2OP(v), yield, v);
-    vcpu_schedule_unlock_irq(v);
+    vcpu_schedule_unlock_irq(lock, v);
 
     TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id,
current->vcpu_id);
     raise_softirq(SCHEDULE_SOFTIRQ);
@@ -1140,6 +1138,7 @@ static void schedule(void)
     unsigned long        *tasklet_work = &this_cpu(tasklet_work_to_do);
     bool_t                tasklet_work_scheduled = 0;
     struct schedule_data *sd;
+    spinlock_t           *lock;
     struct task_slice     next_slice;
     int cpu = smp_processor_id();
 
@@ -1166,7 +1165,7 @@ static void schedule(void)
         BUG();
     }
 
-    pcpu_schedule_lock_irq(cpu);
+    lock = pcpu_schedule_lock_irq(cpu);
 
     stop_timer(&sd->s_timer);
     
@@ -1183,7 +1182,7 @@ static void schedule(void)
 
     if ( unlikely(prev == next) )
     {
-        pcpu_schedule_unlock_irq(cpu);
+        pcpu_schedule_unlock_irq(lock, cpu);
         trace_continue_running(next);
         return continue_running(prev);
     }
@@ -1221,7 +1220,7 @@ static void schedule(void)
     ASSERT(!next->is_running);
     next->is_running = 1;
 
-    pcpu_schedule_unlock_irq(cpu);
+    pcpu_schedule_unlock_irq(lock, cpu);
 
     SCHED_STAT_CRANK(sched_ctx);
 
@@ -1408,6 +1407,7 @@ int schedule_cpu_switch(unsigned int cpu
 {
     unsigned long flags;
     struct vcpu *idle;
+    spinlock_t *lock;
     void *ppriv, *ppriv_old, *vpriv, *vpriv_old;
     struct scheduler *old_ops = per_cpu(scheduler, cpu);
     struct scheduler *new_ops = (c == NULL) ? &ops : c->sched;
@@ -1426,7 +1426,7 @@ int schedule_cpu_switch(unsigned int cpu
         return -ENOMEM;
     }
 
-    pcpu_schedule_lock_irqsave(cpu, flags);
+    lock = pcpu_schedule_lock_irqsave(cpu, &flags);
 
     SCHED_OP(old_ops, tick_suspend, cpu);
     vpriv_old = idle->sched_priv;
@@ -1437,7 +1437,7 @@ int schedule_cpu_switch(unsigned int cpu
     SCHED_OP(new_ops, tick_resume, cpu);
     SCHED_OP(new_ops, insert_vcpu, idle);
 
-    pcpu_schedule_unlock_irqrestore(cpu, flags);
+    pcpu_schedule_unlock_irqrestore(lock, flags, cpu);
 
     SCHED_OP(old_ops, free_vdata, vpriv_old);
     SCHED_OP(old_ops, free_pdata, ppriv_old, cpu);
@@ -1495,10 +1495,11 @@ void schedule_dump(struct cpupool *c)
 
     for_each_cpu (i, cpus)
     {
-        pcpu_schedule_lock(i);
+        spinlock_t *lock = pcpu_schedule_lock(i);
+
         printk("CPU[%02d] ", i);
         SCHED_OP(sched, dump_cpu_state, i);
-        pcpu_schedule_unlock(i);
+        pcpu_schedule_unlock(lock, i);
     }
 }
 
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -1166,6 +1166,7 @@ csched_runq_sort(struct csched_private *
     struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
     struct list_head *runq, *elem, *next, *last_under;
     struct csched_vcpu *svc_elem;
+    spinlock_t *lock;
     unsigned long flags;
     int sort_epoch;
 
@@ -1175,7 +1176,7 @@ csched_runq_sort(struct csched_private *
 
     spc->runq_sort_last = sort_epoch;
 
-    pcpu_schedule_lock_irqsave(cpu, flags);
+    lock = pcpu_schedule_lock_irqsave(cpu, &flags);
 
     runq = &spc->runq;
     elem = runq->next;
@@ -1200,7 +1201,7 @@ csched_runq_sort(struct csched_private *
         elem = next;
     }
 
-    pcpu_schedule_unlock_irqrestore(cpu, flags);
+    pcpu_schedule_unlock_irqrestore(lock, flags, cpu);
 }
 
 static void
@@ -1563,7 +1564,9 @@ csched_load_balance(struct csched_privat
                  * could cause a deadlock if the peer CPU is also load
                  * balancing and trying to lock this CPU.
                  */
-                if ( !pcpu_schedule_trylock(peer_cpu) )
+                spinlock_t *lock = pcpu_schedule_trylock(peer_cpu);
+
+                if ( !lock )
                 {
                     SCHED_STAT_CRANK(steal_trylock_failed);
                     peer_cpu = cpumask_cycle(peer_cpu, &workers);
@@ -1573,7 +1576,7 @@ csched_load_balance(struct csched_privat
                 /* Any work over there to steal? */
                 speer = cpumask_test_cpu(peer_cpu, online) ?
                     csched_runq_steal(peer_cpu, cpu, snext->pri, bstep) :
NULL;
-                pcpu_schedule_unlock(peer_cpu);
+                pcpu_schedule_unlock(lock, peer_cpu);
 
                 /* As soon as one vcpu is found, balancing ends */
                 if ( speer != NULL )
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -882,15 +882,17 @@ csched_vcpu_insert(const struct schedule
      */
     if ( ! is_idle_vcpu(vc) )
     {
+        spinlock_t *lock;
+
         /* FIXME: Do we need the private lock here? */
         list_add_tail(&svc->sdom_elem, &svc->sdom->vcpu);
 
         /* Add vcpu to runqueue of initial processor */
-        vcpu_schedule_lock_irq(vc);
+        lock = vcpu_schedule_lock_irq(vc);
 
         runq_assign(ops, vc);
 
-        vcpu_schedule_unlock_irq(vc);
+        vcpu_schedule_unlock_irq(lock, vc);
 
         sdom->nr_vcpus++;
     }
@@ -917,14 +919,16 @@ csched_vcpu_remove(const struct schedule
 
     if ( ! is_idle_vcpu(vc) )
     {
+        spinlock_t *lock;
+
         SCHED_STAT_CRANK(vcpu_destroy);
 
         /* Remove from runqueue */
-        vcpu_schedule_lock_irq(vc);
+        lock = vcpu_schedule_lock_irq(vc);
 
         runq_deassign(ops, vc);
 
-        vcpu_schedule_unlock_irq(vc);
+        vcpu_schedule_unlock_irq(lock, vc);
 
         /* Remove from sdom list.  Don''t need a lock for this, as
it''s called
          * syncronously when nothing else can happen. */
@@ -1011,8 +1015,7 @@ csched_context_saved(const struct schedu
 {
     struct csched_vcpu * const svc = CSCHED_VCPU(vc);
     s_time_t now = NOW();
-
-    vcpu_schedule_lock_irq(vc);
+    spinlock_t *lock = vcpu_schedule_lock_irq(vc);
 
     BUG_ON( !is_idle_vcpu(vc) && svc->rqd != RQD(ops,
vc->processor));
 
@@ -1038,7 +1041,7 @@ csched_context_saved(const struct schedu
     else if ( !is_idle_vcpu(vc) )
         update_load(ops, svc->rqd, svc, -1, now);
 
-    vcpu_schedule_unlock_irq(vc);
+    vcpu_schedule_unlock_irq(lock, vc);
 }
 
 #define MAX_LOAD (1ULL<<60);
@@ -1456,14 +1459,14 @@ csched_dom_cntl(
                  * must never lock csched_priv.lock if we''re holding a
runqueue lock.
                  * Also, calling vcpu_schedule_lock() is enough, since IRQs
have already
                  * been disabled. */
-                vcpu_schedule_lock(svc->vcpu);
+                spinlock_t *lock = vcpu_schedule_lock(svc->vcpu);
 
                 BUG_ON(svc->rqd != RQD(ops, svc->vcpu->processor));
 
                 svc->weight = sdom->weight;
                 update_max_weight(svc->rqd, svc->weight, old_weight);
 
-                vcpu_schedule_unlock(svc->vcpu);
+                vcpu_schedule_unlock(lock, svc->vcpu);
             }
         }
     }
@@ -1993,6 +1996,7 @@ static void init_pcpu(const struct sched
     cpumask_set_cpu(cpu, &rqd->idle);
     cpumask_set_cpu(cpu, &rqd->active);
 
+    /* _Not_ pcpu_schedule_unlock(): per_cpu().schedule_lock changed! */
     spin_unlock(old_lock);
 
     cpumask_set_cpu(cpu, &prv->initialized);
--- a/xen/common/sched_sedf.c
+++ b/xen/common/sched_sedf.c
@@ -1350,14 +1350,16 @@ static int sedf_adjust_weights(struct cp
             if ( EDOM_INFO(p)->weight )
             {
                 /* Interrupts already off */
-                vcpu_schedule_lock(p);
+                spinlock_t *lock = vcpu_schedule_lock(p);
+
                 EDOM_INFO(p)->period_orig = 
                     EDOM_INFO(p)->period  = WEIGHT_PERIOD;
                 EDOM_INFO(p)->slice_orig                      
EDOM_INFO(p)->slice   =
                     (EDOM_INFO(p)->weight *
                      (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[cpu])) / sumw[cpu];
-                vcpu_schedule_unlock(p);
+
+                vcpu_schedule_unlock(lock, p);
             }
         }
     }
@@ -1418,21 +1420,24 @@ static int sedf_adjust(const struct sche
                 {
                     /* (Here and everywhere in the following) IRQs are already
off,
                      * hence vcpu_spin_lock() is the one. */
-                    vcpu_schedule_lock(v);
+                    spinlock_t *lock = vcpu_schedule_lock(v);
+
                     EDOM_INFO(v)->extraweight = op->u.sedf.weight;
                     EDOM_INFO(v)->weight = 0;
                     EDOM_INFO(v)->slice = 0;
                     EDOM_INFO(v)->period = WEIGHT_PERIOD;
-                    vcpu_schedule_unlock(v);
+                    vcpu_schedule_unlock(lock, v);
                 }
             }
             else
             {
                 /* Weight-driven domains with real-time execution */
-                for_each_vcpu ( p, v ) {
-                    vcpu_schedule_lock(v);
+                for_each_vcpu ( p, v )
+                {
+                    spinlock_t *lock = vcpu_schedule_lock(v);
+
                     EDOM_INFO(v)->weight = op->u.sedf.weight;
-                    vcpu_schedule_unlock(v);
+                    vcpu_schedule_unlock(lock, v);
                 }
             }
         }
@@ -1454,14 +1459,15 @@ static int sedf_adjust(const struct sche
             /* Time-driven domains */
             for_each_vcpu ( p, v )
             {
-                vcpu_schedule_lock(v);
+                spinlock_t *lock = vcpu_schedule_lock(v);
+
                 EDOM_INFO(v)->weight = 0;
                 EDOM_INFO(v)->extraweight = 0;
                 EDOM_INFO(v)->period_orig = 
                     EDOM_INFO(v)->period  = op->u.sedf.period;
                 EDOM_INFO(v)->slice_orig  = 
                     EDOM_INFO(v)->slice   = op->u.sedf.slice;
-                vcpu_schedule_unlock(v);
+                vcpu_schedule_unlock(lock, v);
             }
         }
 
@@ -1471,13 +1477,14 @@ static int sedf_adjust(const struct sche
 
         for_each_vcpu ( p, v )
         {
-            vcpu_schedule_lock(v);
+            spinlock_t *lock = vcpu_schedule_lock(v);
+
             EDOM_INFO(v)->status  = 
                 (EDOM_INFO(v)->status &
                  ~EXTRA_AWARE) | (op->u.sedf.extratime & EXTRA_AWARE);
             EDOM_INFO(v)->latency = op->u.sedf.latency;
             extraq_check(v);
-            vcpu_schedule_unlock(v);
+            vcpu_schedule_unlock(lock, v);
         }
     }
     else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -47,96 +47,70 @@ DECLARE_PER_CPU(struct schedule_data, sc
 DECLARE_PER_CPU(struct scheduler *, scheduler);
 DECLARE_PER_CPU(struct cpupool *, cpupool);
 
-static inline spinlock_t * pcpu_schedule_lock(int cpu)
-{
-    spinlock_t * lock=NULL;
-
-    for ( ; ; )
-    {
-        /* The per_cpu(v->processor) may also change, if changing
-         * cpu pool also changes the scheduler lock.  Retry
-         * until they match.
-         */
-        lock=per_cpu(schedule_data, cpu).schedule_lock;
-
-        spin_lock(lock);
-        if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) )
-            break;
-        spin_unlock(lock);
-    }
-    return lock;
+#define sched_lock(kind, param, cpu, irq, arg...) \
+static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
+{ \
+    for ( ; ; ) \
+    { \
+        spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock; \
+        /* \
+         * v->processor may change when grabbing the lock; but \
+         * per_cpu(v->processor) may also change, if changing cpu pool \
+         * also changes the scheduler lock.  Retry until they match. \
+         * \
+         * It may also be the case that v->processor may change but the \
+         * lock may be the same; this will succeed in that case. \
+         */ \
+        spin_lock##irq(lock, ## arg); \
+        if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) ) \
+            return lock; \
+        spin_unlock##irq(lock, ## arg); \
+    } \
 }
 
-static inline int pcpu_schedule_trylock(int cpu)
-{
-    spinlock_t * lock=NULL;
-
-    lock=per_cpu(schedule_data, cpu).schedule_lock;
-    if ( ! spin_trylock(lock) )
-        return 0;
-    if ( lock == per_cpu(schedule_data, cpu).schedule_lock )
-        return 1;
-    else
-    {
-        spin_unlock(lock);
-        return 0;
-    }
+#define sched_unlock(kind, param, cpu, irq, arg...) \
+static inline void kind##_schedule_unlock##irq(spinlock_t *lock \
+                                               EXTRA_TYPE(arg), param) \
+{ \
+    ASSERT(lock == per_cpu(schedule_data, cpu).schedule_lock); \
+    spin_unlock##irq(lock, ## arg); \
 }
 
-#define pcpu_schedule_lock_irq(p) \
-    do { local_irq_disable(); pcpu_schedule_lock(p); } while ( 0 )
-#define pcpu_schedule_lock_irqsave(p, flags) \
-    do { local_irq_save(flags); pcpu_schedule_lock(p); } while ( 0 )
+#define EXTRA_TYPE(arg)
+sched_lock(pcpu, unsigned int cpu,     cpu, )
+sched_lock(vcpu, const struct vcpu *v, v->processor, )
+sched_lock(pcpu, unsigned int cpu,     cpu,          _irq)
+sched_lock(vcpu, const struct vcpu *v, v->processor, _irq)
+sched_unlock(pcpu, unsigned int cpu,     cpu, )
+sched_unlock(vcpu, const struct vcpu *v, v->processor, )
+sched_unlock(pcpu, unsigned int cpu,     cpu,          _irq)
+sched_unlock(vcpu, const struct vcpu *v, v->processor, _irq)
+#undef EXTRA_TYPE
+
+#define EXTRA_TYPE(arg) , unsigned long arg
+#define spin_unlock_irqsave spin_unlock_irqrestore
+sched_lock(pcpu, unsigned int cpu,     cpu,          _irqsave, *flags)
+sched_lock(vcpu, const struct vcpu *v, v->processor, _irqsave, *flags)
+#undef spin_unlock_irqsave
+sched_unlock(pcpu, unsigned int cpu,     cpu,          _irqrestore, flags)
+sched_unlock(vcpu, const struct vcpu *v, v->processor, _irqrestore, flags)
+#undef EXTRA_TYPE
+
+#undef sched_unlock
+#undef sched_lock
 
-static inline void pcpu_schedule_unlock(int cpu)
+static inline spinlock_t *pcpu_schedule_trylock(unsigned int cpu)
 {
-    spin_unlock(per_cpu(schedule_data, cpu).schedule_lock);
-}
+    spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock;
 
-#define pcpu_schedule_unlock_irq(p) \
-    do { pcpu_schedule_unlock(p); local_irq_enable(); } while ( 0 )
-#define pcpu_schedule_unlock_irqrestore(p, flags) \
-    do { pcpu_schedule_unlock(p); local_irq_restore(flags); } while ( 0 )
-
-static inline void vcpu_schedule_lock(struct vcpu *v)
-{
-    spinlock_t * lock;
-
-    for ( ; ; )
-    {
-        /* v->processor may change when grabbing the lock; but
-         * per_cpu(v->processor) may also change, if changing
-         * cpu pool also changes the scheduler lock.  Retry
-         * until they match.
-         *
-         * It may also be the case that v->processor may change
-         * but the lock may be the same; this will succeed
-         * in that case.
-         */
-        lock=per_cpu(schedule_data, v->processor).schedule_lock;
-
-        spin_lock(lock);
-        if ( likely(lock == per_cpu(schedule_data,
v->processor).schedule_lock) )
-            break;
-        spin_unlock(lock);
-    }
-}
-
-#define vcpu_schedule_lock_irq(v) \
-    do { local_irq_disable(); vcpu_schedule_lock(v); } while ( 0 )
-#define vcpu_schedule_lock_irqsave(v, flags) \
-    do { local_irq_save(flags); vcpu_schedule_lock(v); } while ( 0 )
-
-static inline void vcpu_schedule_unlock(struct vcpu *v)
-{
-    spin_unlock(per_cpu(schedule_data, v->processor).schedule_lock);
+    if ( !spin_trylock(lock) )
+        return NULL;
+    if ( lock == per_cpu(schedule_data, cpu).schedule_lock )
+        return lock;
+    spin_unlock(lock);
+    return NULL;
 }
 
-#define vcpu_schedule_unlock_irq(v) \
-    do { vcpu_schedule_unlock(v); local_irq_enable(); } while ( 0 )
-#define vcpu_schedule_unlock_irqrestore(v, flags) \
-    do { vcpu_schedule_unlock(v); local_irq_restore(flags); } while ( 0 )
-
 struct task_slice {
     struct vcpu *task;
     s_time_t     time;




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Jan Beulich

2013-Oct-11 15:17 UTC

head link

[PATCH v2 2/2] sched: fix race between sched_move_domain() and vcpu_wake()

From: David Vrabel <david.vrabel@citrix.com>

sched_move_domain() changes v->processor for all the domain''s VCPUs.
If another domain, softirq etc. triggers a simultaneous call to
vcpu_wake() (e.g., by setting an event channel as pending), then
vcpu_wake() may lock one schedule lock and try to unlock another.

vcpu_schedule_lock() attempts to handle this but only does so for the
window between reading the schedule_lock from the per-CPU data and the
spin_lock() call.  This does not help with sched_move_domain()
changing v->processor between the calls to vcpu_schedule_lock() and
vcpu_schedule_unlock().

Fix the race by taking the schedule_lock for v->processor in
sched_move_domain().

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Juergen Gross <juergen.gross@ts.fujitsu.com>
Acked-by: Keir Fraser <keir@xen.org>

Use vcpu_schedule_lock_irq() (which now returns the lock) to properly
retry the locking should the to be used lock have changed in the course
of acquiring it (issue pointed out by George Dunlap).

Add a comment explaining the state after the v->processor adjustment.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -276,6 +276,8 @@ int sched_move_domain(struct domain *d, 
     new_p = cpumask_first(c->cpu_valid);
     for_each_vcpu ( d, v )
     {
+        spinlock_t *lock;
+
         vcpudata = v->sched_priv;
 
         migrate_timer(&v->periodic_timer, new_p);
@@ -283,7 +285,16 @@ int sched_move_domain(struct domain *d, 
         migrate_timer(&v->poll_timer, new_p);
 
         cpumask_setall(v->cpu_affinity);
+
+        lock = vcpu_schedule_lock_irq(v);
         v->processor = new_p;
+        /*
+         * With v->processor modified we must not
+         * - make any further changes assuming we hold the scheduler lock,
+         * - use vcpu_schedule_unlock_irq().
+         */
+        spin_unlock_irq(lock);
+
         v->sched_priv = vcpu_priv[v->vcpu_id];
         evtchn_move_pirqs(v);
 





_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Andrew Cooper

2013-Oct-11 16:14 UTC

head link

Re: [PATCH v2 0/2] scheduler locking adjustments

On 11/10/13 16:13, Jan Beulich wrote:> 1: scheduler: adjust internal locking interface
> 2: sched: fix race between sched_move_domain() and vcpu_wake()
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
Both Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

It might be kind to leave some grep/cscope/ctags-fodder around for the
macro-constructed functions in the same way as mfn_t/gfn_t with
TYPE_SAFE but the names of the functions are certainly less obscure than
the TYPE_SAFE ones.

~Andrew
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

Keir Fraser

2013-Oct-11 17:17 UTC

head link

Re: [PATCH v2 0/2] scheduler locking adjustments

On 11/10/2013 16:13, "Jan Beulich" <JBeulich@suse.com> wrote:
> 1: scheduler: adjust internal locking interface
> 2: sched: fix race between sched_move_domain() and vcpu_wake()
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Keir Fraser <keir@xen.org>

Xen devel - Oct 2013 - [PATCH] scheduler: adjust internal locking interface

[PATCH] scheduler: adjust internal locking interface

Re: [PATCH] scheduler: adjust internal locking interface

Re: [PATCH] scheduler: adjust internal locking interface

Re: [PATCH] scheduler: adjust internal locking interface

Re: [PATCH] scheduler: adjust internal locking interface

[PATCH v2 0/2] scheduler locking adjustments

[PATCH v2 1/2] scheduler: adjust internal locking interface

[PATCH v2 2/2] sched: fix race between sched_move_domain() and vcpu_wake()

Re: [PATCH v2 0/2] scheduler locking adjustments

Re: [PATCH v2 0/2] scheduler locking adjustments