Make the locking functions return the lock pointers, so they can be passed to the unlocking functions (which in turn can check that the lock is still actually providing the intended protection, i.e. the parameters determining which lock is the right one didn''t change). Further use proper spin lock primitives rather than open coded local_irq_...() constructs, so that interrupts can be re-enabled as appropriate while spinning. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- Note that this implies David Vrabel''s patch titled "sched: fix race between sched_move_domain() and vcpu_wake()" to be applied already (as the change done there gets further adjusted here). --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -160,18 +160,16 @@ static inline void vcpu_runstate_change( void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate) { + spinlock_t *lock = likely(v == current) ? NULL : vcpu_schedule_lock_irq(v); s_time_t delta; - if ( unlikely(v != current) ) - vcpu_schedule_lock_irq(v); - memcpy(runstate, &v->runstate, sizeof(*runstate)); delta = NOW() - runstate->state_entry_time; if ( delta > 0 ) runstate->time[runstate->state] += delta; - if ( unlikely(v != current) ) - vcpu_schedule_unlock_irq(v); + if ( unlikely(lock != NULL) ) + vcpu_schedule_unlock_irq(lock, v); } uint64_t get_cpu_idle_time(unsigned int cpu) @@ -278,8 +276,7 @@ int sched_move_domain(struct domain *d, new_p = cpumask_first(c->cpu_valid); for_each_vcpu ( d, v ) { - spinlock_t *schedule_lock = per_cpu(schedule_data, - v->processor).schedule_lock; + spinlock_t *schedule_lock; vcpudata = v->sched_priv; @@ -289,8 +286,9 @@ int sched_move_domain(struct domain *d, cpumask_setall(v->cpu_affinity); - spin_lock_irq(schedule_lock); + schedule_lock = vcpu_schedule_lock_irq(v); v->processor = new_p; + /* _Not_ vcpu_schedule_unlock_irq(): v->processor changed! */ spin_unlock_irq(schedule_lock); v->sched_priv = vcpu_priv[v->vcpu_id]; @@ -340,8 +338,7 @@ void sched_destroy_domain(struct domain void vcpu_sleep_nosync(struct vcpu *v) { unsigned long flags; - - vcpu_schedule_lock_irqsave(v, flags); + spinlock_t *lock = vcpu_schedule_lock_irqsave(v, &flags); if ( likely(!vcpu_runnable(v)) ) { @@ -351,7 +348,7 @@ void vcpu_sleep_nosync(struct vcpu *v) SCHED_OP(VCPU2OP(v), sleep, v); } - vcpu_schedule_unlock_irqrestore(v, flags); + vcpu_schedule_unlock_irqrestore(lock, flags, v); TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id); } @@ -369,8 +366,7 @@ void vcpu_sleep_sync(struct vcpu *v) void vcpu_wake(struct vcpu *v) { unsigned long flags; - - vcpu_schedule_lock_irqsave(v, flags); + spinlock_t *lock = vcpu_schedule_lock_irqsave(v, &flags); if ( likely(vcpu_runnable(v)) ) { @@ -384,7 +380,7 @@ void vcpu_wake(struct vcpu *v) vcpu_runstate_change(v, RUNSTATE_offline, NOW()); } - vcpu_schedule_unlock_irqrestore(v, flags); + vcpu_schedule_unlock_irqrestore(lock, flags, v); TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id); } @@ -535,10 +531,11 @@ static void vcpu_migrate(struct vcpu *v) */ void vcpu_force_reschedule(struct vcpu *v) { - vcpu_schedule_lock_irq(v); + spinlock_t *lock = vcpu_schedule_lock_irq(v); + if ( v->is_running ) set_bit(_VPF_migrating, &v->pause_flags); - vcpu_schedule_unlock_irq(v); + vcpu_schedule_unlock_irq(lock, v); if ( test_bit(_VPF_migrating, &v->pause_flags) ) { @@ -553,7 +550,7 @@ void restore_vcpu_affinity(struct domain for_each_vcpu ( d, v ) { - vcpu_schedule_lock_irq(v); + spinlock_t *lock = vcpu_schedule_lock_irq(v); if ( v->affinity_broken ) { @@ -566,13 +563,13 @@ void restore_vcpu_affinity(struct domain if ( v->processor == smp_processor_id() ) { set_bit(_VPF_migrating, &v->pause_flags); - vcpu_schedule_unlock_irq(v); + vcpu_schedule_unlock_irq(lock, v); vcpu_sleep_nosync(v); vcpu_migrate(v); } else { - vcpu_schedule_unlock_irq(v); + vcpu_schedule_unlock_irq(lock, v); } } @@ -599,7 +596,7 @@ int cpu_disable_scheduler(unsigned int c { for_each_vcpu ( d, v ) { - vcpu_schedule_lock_irq(v); + spinlock_t *lock = vcpu_schedule_lock_irq(v); cpumask_and(&online_affinity, v->cpu_affinity, c->cpu_valid); if ( cpumask_empty(&online_affinity) && @@ -620,13 +617,13 @@ int cpu_disable_scheduler(unsigned int c if ( v->processor == cpu ) { set_bit(_VPF_migrating, &v->pause_flags); - vcpu_schedule_unlock_irq(v); + vcpu_schedule_unlock_irq(lock, v); vcpu_sleep_nosync(v); vcpu_migrate(v); } else { - vcpu_schedule_unlock_irq(v); + vcpu_schedule_unlock_irq(lock, v); } /* @@ -653,6 +650,7 @@ int vcpu_set_affinity(struct vcpu *v, co { cpumask_t online_affinity; cpumask_t *online; + spinlock_t *lock; if ( v->domain->is_pinned ) return -EINVAL; @@ -661,7 +659,7 @@ int vcpu_set_affinity(struct vcpu *v, co if ( cpumask_empty(&online_affinity) ) return -EINVAL; - vcpu_schedule_lock_irq(v); + lock = vcpu_schedule_lock_irq(v); cpumask_copy(v->cpu_affinity, affinity); @@ -669,7 +667,7 @@ int vcpu_set_affinity(struct vcpu *v, co * when changing the affinity */ set_bit(_VPF_migrating, &v->pause_flags); - vcpu_schedule_unlock_irq(v); + vcpu_schedule_unlock_irq(lock, v); domain_update_node_affinity(v->domain); @@ -783,10 +781,10 @@ static long do_poll(struct sched_poll *s static long do_yield(void) { struct vcpu * v=current; + spinlock_t *lock = vcpu_schedule_lock_irq(v); - vcpu_schedule_lock_irq(v); SCHED_OP(VCPU2OP(v), yield, v); - vcpu_schedule_unlock_irq(v); + vcpu_schedule_unlock_irq(lock, v); TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id, current->vcpu_id); raise_softirq(SCHEDULE_SOFTIRQ); @@ -1147,6 +1145,7 @@ static void schedule(void) unsigned long *tasklet_work = &this_cpu(tasklet_work_to_do); bool_t tasklet_work_scheduled = 0; struct schedule_data *sd; + spinlock_t *lock; struct task_slice next_slice; int cpu = smp_processor_id(); @@ -1173,7 +1172,7 @@ static void schedule(void) BUG(); } - pcpu_schedule_lock_irq(cpu); + lock = pcpu_schedule_lock_irq(cpu); stop_timer(&sd->s_timer); @@ -1190,7 +1189,7 @@ static void schedule(void) if ( unlikely(prev == next) ) { - pcpu_schedule_unlock_irq(cpu); + pcpu_schedule_unlock_irq(lock, cpu); trace_continue_running(next); return continue_running(prev); } @@ -1228,7 +1227,7 @@ static void schedule(void) ASSERT(!next->is_running); next->is_running = 1; - pcpu_schedule_unlock_irq(cpu); + pcpu_schedule_unlock_irq(lock, cpu); SCHED_STAT_CRANK(sched_ctx); @@ -1415,6 +1414,7 @@ int schedule_cpu_switch(unsigned int cpu { unsigned long flags; struct vcpu *idle; + spinlock_t *lock; void *ppriv, *ppriv_old, *vpriv, *vpriv_old; struct scheduler *old_ops = per_cpu(scheduler, cpu); struct scheduler *new_ops = (c == NULL) ? &ops : c->sched; @@ -1433,7 +1433,7 @@ int schedule_cpu_switch(unsigned int cpu return -ENOMEM; } - pcpu_schedule_lock_irqsave(cpu, flags); + lock = pcpu_schedule_lock_irqsave(cpu, &flags); SCHED_OP(old_ops, tick_suspend, cpu); vpriv_old = idle->sched_priv; @@ -1444,7 +1444,7 @@ int schedule_cpu_switch(unsigned int cpu SCHED_OP(new_ops, tick_resume, cpu); SCHED_OP(new_ops, insert_vcpu, idle); - pcpu_schedule_unlock_irqrestore(cpu, flags); + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); SCHED_OP(old_ops, free_vdata, vpriv_old); SCHED_OP(old_ops, free_pdata, ppriv_old, cpu); @@ -1502,10 +1502,11 @@ void schedule_dump(struct cpupool *c) for_each_cpu (i, cpus) { - pcpu_schedule_lock(i); + spinlock_t *lock = pcpu_schedule_lock(i); + printk("CPU[%02d] ", i); SCHED_OP(sched, dump_cpu_state, i); - pcpu_schedule_unlock(i); + pcpu_schedule_unlock(lock, i); } } --- a/xen/common/sched_credit.c +++ b/xen/common/sched_credit.c @@ -1166,6 +1166,7 @@ csched_runq_sort(struct csched_private * struct csched_pcpu * const spc = CSCHED_PCPU(cpu); struct list_head *runq, *elem, *next, *last_under; struct csched_vcpu *svc_elem; + spinlock_t *lock; unsigned long flags; int sort_epoch; @@ -1175,7 +1176,7 @@ csched_runq_sort(struct csched_private * spc->runq_sort_last = sort_epoch; - pcpu_schedule_lock_irqsave(cpu, flags); + lock = pcpu_schedule_lock_irqsave(cpu, &flags); runq = &spc->runq; elem = runq->next; @@ -1200,7 +1201,7 @@ csched_runq_sort(struct csched_private * elem = next; } - pcpu_schedule_unlock_irqrestore(cpu, flags); + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); } static void @@ -1563,7 +1564,9 @@ csched_load_balance(struct csched_privat * could cause a deadlock if the peer CPU is also load * balancing and trying to lock this CPU. */ - if ( !pcpu_schedule_trylock(peer_cpu) ) + spinlock_t *lock = pcpu_schedule_trylock(peer_cpu); + + if ( !lock ) { SCHED_STAT_CRANK(steal_trylock_failed); peer_cpu = cpumask_cycle(peer_cpu, &workers); @@ -1573,7 +1576,7 @@ csched_load_balance(struct csched_privat /* Any work over there to steal? */ speer = cpumask_test_cpu(peer_cpu, online) ? csched_runq_steal(peer_cpu, cpu, snext->pri, bstep) : NULL; - pcpu_schedule_unlock(peer_cpu); + pcpu_schedule_unlock(lock, peer_cpu); /* As soon as one vcpu is found, balancing ends */ if ( speer != NULL ) --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -882,15 +882,17 @@ csched_vcpu_insert(const struct schedule */ if ( ! is_idle_vcpu(vc) ) { + spinlock_t *lock; + /* FIXME: Do we need the private lock here? */ list_add_tail(&svc->sdom_elem, &svc->sdom->vcpu); /* Add vcpu to runqueue of initial processor */ - vcpu_schedule_lock_irq(vc); + lock = vcpu_schedule_lock_irq(vc); runq_assign(ops, vc); - vcpu_schedule_unlock_irq(vc); + vcpu_schedule_unlock_irq(lock, vc); sdom->nr_vcpus++; } @@ -917,14 +919,16 @@ csched_vcpu_remove(const struct schedule if ( ! is_idle_vcpu(vc) ) { + spinlock_t *lock; + SCHED_STAT_CRANK(vcpu_destroy); /* Remove from runqueue */ - vcpu_schedule_lock_irq(vc); + lock = vcpu_schedule_lock_irq(vc); runq_deassign(ops, vc); - vcpu_schedule_unlock_irq(vc); + vcpu_schedule_unlock_irq(lock, vc); /* Remove from sdom list. Don''t need a lock for this, as it''s called * syncronously when nothing else can happen. */ @@ -1011,8 +1015,7 @@ csched_context_saved(const struct schedu { struct csched_vcpu * const svc = CSCHED_VCPU(vc); s_time_t now = NOW(); - - vcpu_schedule_lock_irq(vc); + spinlock_t *lock = vcpu_schedule_lock_irq(vc); BUG_ON( !is_idle_vcpu(vc) && svc->rqd != RQD(ops, vc->processor)); @@ -1038,7 +1041,7 @@ csched_context_saved(const struct schedu else if ( !is_idle_vcpu(vc) ) update_load(ops, svc->rqd, svc, -1, now); - vcpu_schedule_unlock_irq(vc); + vcpu_schedule_unlock_irq(lock, vc); } #define MAX_LOAD (1ULL<<60); @@ -1456,14 +1459,14 @@ csched_dom_cntl( * must never lock csched_priv.lock if we''re holding a runqueue lock. * Also, calling vcpu_schedule_lock() is enough, since IRQs have already * been disabled. */ - vcpu_schedule_lock(svc->vcpu); + spinlock_t *lock = vcpu_schedule_lock(svc->vcpu); BUG_ON(svc->rqd != RQD(ops, svc->vcpu->processor)); svc->weight = sdom->weight; update_max_weight(svc->rqd, svc->weight, old_weight); - vcpu_schedule_unlock(svc->vcpu); + vcpu_schedule_unlock(lock, svc->vcpu); } } } @@ -1993,6 +1996,7 @@ static void init_pcpu(const struct sched cpumask_set_cpu(cpu, &rqd->idle); cpumask_set_cpu(cpu, &rqd->active); + /* _Not_ pcpu_schedule_unlock(): per_cpu().schedule_lock changed! */ spin_unlock(old_lock); cpumask_set_cpu(cpu, &prv->initialized); --- a/xen/common/sched_sedf.c +++ b/xen/common/sched_sedf.c @@ -1350,14 +1350,16 @@ static int sedf_adjust_weights(struct cp if ( EDOM_INFO(p)->weight ) { /* Interrupts already off */ - vcpu_schedule_lock(p); + spinlock_t *lock = vcpu_schedule_lock(p); + EDOM_INFO(p)->period_orig = EDOM_INFO(p)->period = WEIGHT_PERIOD; EDOM_INFO(p)->slice_orig EDOM_INFO(p)->slice = (EDOM_INFO(p)->weight * (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[cpu])) / sumw[cpu]; - vcpu_schedule_unlock(p); + + vcpu_schedule_unlock(lock, p); } } } @@ -1418,21 +1420,24 @@ static int sedf_adjust(const struct sche { /* (Here and everywhere in the following) IRQs are already off, * hence vcpu_spin_lock() is the one. */ - vcpu_schedule_lock(v); + spinlock_t *lock = vcpu_schedule_lock(v); + EDOM_INFO(v)->extraweight = op->u.sedf.weight; EDOM_INFO(v)->weight = 0; EDOM_INFO(v)->slice = 0; EDOM_INFO(v)->period = WEIGHT_PERIOD; - vcpu_schedule_unlock(v); + vcpu_schedule_unlock(lock, v); } } else { /* Weight-driven domains with real-time execution */ - for_each_vcpu ( p, v ) { - vcpu_schedule_lock(v); + for_each_vcpu ( p, v ) + { + spinlock_t *lock = vcpu_schedule_lock(v); + EDOM_INFO(v)->weight = op->u.sedf.weight; - vcpu_schedule_unlock(v); + vcpu_schedule_unlock(lock, v); } } } @@ -1454,14 +1459,15 @@ static int sedf_adjust(const struct sche /* Time-driven domains */ for_each_vcpu ( p, v ) { - vcpu_schedule_lock(v); + spinlock_t *lock = vcpu_schedule_lock(v); + EDOM_INFO(v)->weight = 0; EDOM_INFO(v)->extraweight = 0; EDOM_INFO(v)->period_orig = EDOM_INFO(v)->period = op->u.sedf.period; EDOM_INFO(v)->slice_orig = EDOM_INFO(v)->slice = op->u.sedf.slice; - vcpu_schedule_unlock(v); + vcpu_schedule_unlock(lock, v); } } @@ -1471,13 +1477,14 @@ static int sedf_adjust(const struct sche for_each_vcpu ( p, v ) { - vcpu_schedule_lock(v); + spinlock_t *lock = vcpu_schedule_lock(v); + EDOM_INFO(v)->status = (EDOM_INFO(v)->status & ~EXTRA_AWARE) | (op->u.sedf.extratime & EXTRA_AWARE); EDOM_INFO(v)->latency = op->u.sedf.latency; extraq_check(v); - vcpu_schedule_unlock(v); + vcpu_schedule_unlock(lock, v); } } else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo ) --- a/xen/include/xen/sched-if.h +++ b/xen/include/xen/sched-if.h @@ -47,96 +47,70 @@ DECLARE_PER_CPU(struct schedule_data, sc DECLARE_PER_CPU(struct scheduler *, scheduler); DECLARE_PER_CPU(struct cpupool *, cpupool); -static inline spinlock_t * pcpu_schedule_lock(int cpu) -{ - spinlock_t * lock=NULL; - - for ( ; ; ) - { - /* The per_cpu(v->processor) may also change, if changing - * cpu pool also changes the scheduler lock. Retry - * until they match. - */ - lock=per_cpu(schedule_data, cpu).schedule_lock; - - spin_lock(lock); - if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) ) - break; - spin_unlock(lock); - } - return lock; +#define sched_lock(kind, param, cpu, irq, arg...) \ +static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \ +{ \ + for ( ; ; ) \ + { \ + spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock; \ + /* \ + * v->processor may change when grabbing the lock; but \ + * per_cpu(v->processor) may also change, if changing cpu pool \ + * also changes the scheduler lock. Retry until they match. \ + * \ + * It may also be the case that v->processor may change but the \ + * lock may be the same; this will succeed in that case. \ + */ \ + spin_lock##irq(lock, ## arg); \ + if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) ) \ + return lock; \ + spin_unlock##irq(lock, ## arg); \ + } \ } -static inline int pcpu_schedule_trylock(int cpu) -{ - spinlock_t * lock=NULL; - - lock=per_cpu(schedule_data, cpu).schedule_lock; - if ( ! spin_trylock(lock) ) - return 0; - if ( lock == per_cpu(schedule_data, cpu).schedule_lock ) - return 1; - else - { - spin_unlock(lock); - return 0; - } +#define sched_unlock(kind, param, cpu, irq, arg...) \ +static inline void kind##_schedule_unlock##irq(spinlock_t *lock \ + EXTRA_TYPE(arg), param) \ +{ \ + ASSERT(lock == per_cpu(schedule_data, cpu).schedule_lock); \ + spin_unlock##irq(lock, ## arg); \ } -#define pcpu_schedule_lock_irq(p) \ - do { local_irq_disable(); pcpu_schedule_lock(p); } while ( 0 ) -#define pcpu_schedule_lock_irqsave(p, flags) \ - do { local_irq_save(flags); pcpu_schedule_lock(p); } while ( 0 ) +#define EXTRA_TYPE(arg) +sched_lock(pcpu, unsigned int cpu, cpu, ) +sched_lock(vcpu, const struct vcpu *v, v->processor, ) +sched_lock(pcpu, unsigned int cpu, cpu, _irq) +sched_lock(vcpu, const struct vcpu *v, v->processor, _irq) +sched_unlock(pcpu, unsigned int cpu, cpu, ) +sched_unlock(vcpu, const struct vcpu *v, v->processor, ) +sched_unlock(pcpu, unsigned int cpu, cpu, _irq) +sched_unlock(vcpu, const struct vcpu *v, v->processor, _irq) +#undef EXTRA_TYPE + +#define EXTRA_TYPE(arg) , unsigned long arg +#define spin_unlock_irqsave spin_unlock_irqrestore +sched_lock(pcpu, unsigned int cpu, cpu, _irqsave, *flags) +sched_lock(vcpu, const struct vcpu *v, v->processor, _irqsave, *flags) +#undef spin_unlock_irqsave +sched_unlock(pcpu, unsigned int cpu, cpu, _irqrestore, flags) +sched_unlock(vcpu, const struct vcpu *v, v->processor, _irqrestore, flags) +#undef EXTRA_TYPE + +#undef sched_unlock +#undef sched_lock -static inline void pcpu_schedule_unlock(int cpu) +static inline spinlock_t *pcpu_schedule_trylock(unsigned int cpu) { - spin_unlock(per_cpu(schedule_data, cpu).schedule_lock); -} + spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock; -#define pcpu_schedule_unlock_irq(p) \ - do { pcpu_schedule_unlock(p); local_irq_enable(); } while ( 0 ) -#define pcpu_schedule_unlock_irqrestore(p, flags) \ - do { pcpu_schedule_unlock(p); local_irq_restore(flags); } while ( 0 ) - -static inline void vcpu_schedule_lock(struct vcpu *v) -{ - spinlock_t * lock; - - for ( ; ; ) - { - /* v->processor may change when grabbing the lock; but - * per_cpu(v->processor) may also change, if changing - * cpu pool also changes the scheduler lock. Retry - * until they match. - * - * It may also be the case that v->processor may change - * but the lock may be the same; this will succeed - * in that case. - */ - lock=per_cpu(schedule_data, v->processor).schedule_lock; - - spin_lock(lock); - if ( likely(lock == per_cpu(schedule_data, v->processor).schedule_lock) ) - break; - spin_unlock(lock); - } -} - -#define vcpu_schedule_lock_irq(v) \ - do { local_irq_disable(); vcpu_schedule_lock(v); } while ( 0 ) -#define vcpu_schedule_lock_irqsave(v, flags) \ - do { local_irq_save(flags); vcpu_schedule_lock(v); } while ( 0 ) - -static inline void vcpu_schedule_unlock(struct vcpu *v) -{ - spin_unlock(per_cpu(schedule_data, v->processor).schedule_lock); + if ( !spin_trylock(lock) ) + return NULL; + if ( lock == per_cpu(schedule_data, cpu).schedule_lock ) + return lock; + spin_unlock(lock); + return NULL; } -#define vcpu_schedule_unlock_irq(v) \ - do { vcpu_schedule_unlock(v); local_irq_enable(); } while ( 0 ) -#define vcpu_schedule_unlock_irqrestore(v, flags) \ - do { vcpu_schedule_unlock(v); local_irq_restore(flags); } while ( 0 ) - struct task_slice { struct vcpu *task; s_time_t time; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Andrew Cooper
2013-Oct-11 14:29 UTC
Re: [PATCH] scheduler: adjust internal locking interface
On 11/10/13 15:14, Jan Beulich wrote:> --- a/xen/include/xen/sched-if.h > +++ b/xen/include/xen/sched-if.h > @@ -47,96 +47,70 @@ DECLARE_PER_CPU(struct schedule_data, sc > DECLARE_PER_CPU(struct scheduler *, scheduler); > DECLARE_PER_CPU(struct cpupool *, cpupool); > > -static inline spinlock_t * pcpu_schedule_lock(int cpu) > -{ > - spinlock_t * lock=NULL; > - > - for ( ; ; ) > - { > - /* The per_cpu(v->processor) may also change, if changing > - * cpu pool also changes the scheduler lock. Retry > - * until they match. > - */ > - lock=per_cpu(schedule_data, cpu).schedule_lock; > - > - spin_lock(lock); > - if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) ) > - break; > - spin_unlock(lock); > - } > - return lock; > +#define sched_lock(kind, param, cpu, irq, arg...) \ > +static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \ > +{ \ > + for ( ; ; ) \ > + { \ > + spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock; \ > + /* \ > + * v->processor may change when grabbing the lock; but \ > + * per_cpu(v->processor) may also change, if changing cpu pool \ > + * also changes the scheduler lock. Retry until they match. \ > + * \ > + * It may also be the case that v->processor may change but the \ > + * lock may be the same; this will succeed in that case. \ > + */ \ > + spin_lock##irq(lock, ## arg); \ > + if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) ) \ > + return lock; \ > + spin_unlock##irq(lock, ## arg); \ > + } \ > }The readability of this (and others) would be much easier if the ''\'' were aligned on the RHS and out of view of the main body. ~Andrew> > -static inline int pcpu_schedule_trylock(int cpu) > -{ > - spinlock_t * lock=NULL; > - > - lock=per_cpu(schedule_data, cpu).schedule_lock; > - if ( ! spin_trylock(lock) ) > - return 0; > - if ( lock == per_cpu(schedule_data, cpu).schedule_lock ) > - return 1; > - else > - { > - spin_unlock(lock); > - return 0; > - } > +#define sched_unlock(kind, param, cpu, irq, arg...) \ > +static inline void kind##_schedule_unlock##irq(spinlock_t *lock \ > + EXTRA_TYPE(arg), param) \ > +{ \ > + ASSERT(lock == per_cpu(schedule_data, cpu).schedule_lock); \ > + spin_unlock##irq(lock, ## arg); \ > } > > -#define pcpu_schedule_lock_irq(p) \ > - do { local_irq_disable(); pcpu_schedule_lock(p); } while ( 0 ) > -#define pcpu_schedule_lock_irqsave(p, flags) \ > - do { local_irq_save(flags); pcpu_schedule_lock(p); } while ( 0 ) > +#define EXTRA_TYPE(arg) > +sched_lock(pcpu, unsigned int cpu, cpu, ) > +sched_lock(vcpu, const struct vcpu *v, v->processor, ) > +sched_lock(pcpu, unsigned int cpu, cpu, _irq) > +sched_lock(vcpu, const struct vcpu *v, v->processor, _irq) > +sched_unlock(pcpu, unsigned int cpu, cpu, ) > +sched_unlock(vcpu, const struct vcpu *v, v->processor, ) > +sched_unlock(pcpu, unsigned int cpu, cpu, _irq) > +sched_unlock(vcpu, const struct vcpu *v, v->processor, _irq) > +#undef EXTRA_TYPE > + > +#define EXTRA_TYPE(arg) , unsigned long arg > +#define spin_unlock_irqsave spin_unlock_irqrestore > +sched_lock(pcpu, unsigned int cpu, cpu, _irqsave, *flags) > +sched_lock(vcpu, const struct vcpu *v, v->processor, _irqsave, *flags) > +#undef spin_unlock_irqsave > +sched_unlock(pcpu, unsigned int cpu, cpu, _irqrestore, flags) > +sched_unlock(vcpu, const struct vcpu *v, v->processor, _irqrestore, flags) > +#undef EXTRA_TYPE > + > +#undef sched_unlock > +#undef sched_lock > > -static inline void pcpu_schedule_unlock(int cpu) > +static inline spinlock_t *pcpu_schedule_trylock(unsigned int cpu) > { > - spin_unlock(per_cpu(schedule_data, cpu).schedule_lock); > -} > + spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock; > > -#define pcpu_schedule_unlock_irq(p) \ > - do { pcpu_schedule_unlock(p); local_irq_enable(); } while ( 0 ) > -#define pcpu_schedule_unlock_irqrestore(p, flags) \ > - do { pcpu_schedule_unlock(p); local_irq_restore(flags); } while ( 0 ) > - > -static inline void vcpu_schedule_lock(struct vcpu *v) > -{ > - spinlock_t * lock; > - > - for ( ; ; ) > - { > - /* v->processor may change when grabbing the lock; but > - * per_cpu(v->processor) may also change, if changing > - * cpu pool also changes the scheduler lock. Retry > - * until they match. > - * > - * It may also be the case that v->processor may change > - * but the lock may be the same; this will succeed > - * in that case. > - */ > - lock=per_cpu(schedule_data, v->processor).schedule_lock; > - > - spin_lock(lock); > - if ( likely(lock == per_cpu(schedule_data, v->processor).schedule_lock) ) > - break; > - spin_unlock(lock); > - } > -} > - > -#define vcpu_schedule_lock_irq(v) \ > - do { local_irq_disable(); vcpu_schedule_lock(v); } while ( 0 ) > -#define vcpu_schedule_lock_irqsave(v, flags) \ > - do { local_irq_save(flags); vcpu_schedule_lock(v); } while ( 0 ) > - > -static inline void vcpu_schedule_unlock(struct vcpu *v) > -{ > - spin_unlock(per_cpu(schedule_data, v->processor).schedule_lock); > + if ( !spin_trylock(lock) ) > + return NULL; > + if ( lock == per_cpu(schedule_data, cpu).schedule_lock ) > + return lock; > + spin_unlock(lock); > + return NULL; > } > > -#define vcpu_schedule_unlock_irq(v) \ > - do { vcpu_schedule_unlock(v); local_irq_enable(); } while ( 0 ) > -#define vcpu_schedule_unlock_irqrestore(v, flags) \ > - do { vcpu_schedule_unlock(v); local_irq_restore(flags); } while ( 0 ) > - > struct task_slice { > struct vcpu *task; > s_time_t time; > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Oct-11 14:37 UTC
Re: [PATCH] scheduler: adjust internal locking interface
>>> On 11.10.13 at 16:29, Andrew Cooper <andrew.cooper3@citrix.com> wrote: > On 11/10/13 15:14, Jan Beulich wrote: >> +#define sched_lock(kind, param, cpu, irq, arg...) \ >> +static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) > \ >> +{ \ >> + for ( ; ; ) \ >> + { \ >> + spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock; \ >> + /* \ >> + * v->processor may change when grabbing the lock; but \ >> + * per_cpu(v->processor) may also change, if changing cpu pool \ >> + * also changes the scheduler lock. Retry until they match. \ >> + * \ >> + * It may also be the case that v->processor may change but the \ >> + * lock may be the same; this will succeed in that case. \ >> + */ \ >> + spin_lock##irq(lock, ## arg); \ >> + if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) ) \ >> + return lock; \ >> + spin_unlock##irq(lock, ## arg); \ >> + } \ >> } > > The readability of this (and others) would be much easier if the ''\'' > were aligned on the RHS and out of view of the main body.This depends on the fonts you use - in a mail reader using variable width fonts it reads much better the way I coded it. And while I realized that there are may cases where there is right alignment of these line continuations, I''m personally not in favor of this style, and since the coding style document doesn''t say anything about it I used my personal preference... Jan
George Dunlap
2013-Oct-11 14:41 UTC
Re: [PATCH] scheduler: adjust internal locking interface
On 11/10/13 15:37, Jan Beulich wrote:>>>> On 11.10.13 at 16:29, Andrew Cooper <andrew.cooper3@citrix.com> wrote: >> On 11/10/13 15:14, Jan Beulich wrote: >>> +#define sched_lock(kind, param, cpu, irq, arg...) \ >>> +static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) >> \ >>> +{ \ >>> + for ( ; ; ) \ >>> + { \ >>> + spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock; \ >>> + /* \ >>> + * v->processor may change when grabbing the lock; but \ >>> + * per_cpu(v->processor) may also change, if changing cpu pool \ >>> + * also changes the scheduler lock. Retry until they match. \ >>> + * \ >>> + * It may also be the case that v->processor may change but the \ >>> + * lock may be the same; this will succeed in that case. \ >>> + */ \ >>> + spin_lock##irq(lock, ## arg); \ >>> + if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) ) \ >>> + return lock; \ >>> + spin_unlock##irq(lock, ## arg); \ >>> + } \ >>> } >> The readability of this (and others) would be much easier if the ''\'' >> were aligned on the RHS and out of view of the main body. > This depends on the fonts you use - in a mail reader using variable > width fonts it reads much better the way I coded it. And while I > realized that there are may cases where there is right alignment of > these line continuations, I''m personally not in favor of this style, > and since the coding style document doesn''t say anything about it > I used my personal preference...But what we care about is how it looks in an editor. I presume when you''re actually coding you use a fixed-width font? :-) -George
Jan Beulich
2013-Oct-11 15:03 UTC
Re: [PATCH] scheduler: adjust internal locking interface
>>> On 11.10.13 at 16:41, George Dunlap <george.dunlap@eu.citrix.com> wrote: > On 11/10/13 15:37, Jan Beulich wrote: >>>>> On 11.10.13 at 16:29, Andrew Cooper <andrew.cooper3@citrix.com> wrote: >>> On 11/10/13 15:14, Jan Beulich wrote: >>>> +#define sched_lock(kind, param, cpu, irq, arg...) \ >>>> +static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) >>> \ >>>> +{ \ >>>> + for ( ; ; ) \ >>>> + { \ >>>> + spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock; \ >>>> + /* \ >>>> + * v->processor may change when grabbing the lock; but \ >>>> + * per_cpu(v->processor) may also change, if changing cpu pool \ >>>> + * also changes the scheduler lock. Retry until they match. \ >>>> + * \ >>>> + * It may also be the case that v->processor may change but the \ >>>> + * lock may be the same; this will succeed in that case. \ >>>> + */ \ >>>> + spin_lock##irq(lock, ## arg); \ >>>> + if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) ) \ >>>> + return lock; \ >>>> + spin_unlock##irq(lock, ## arg); \ >>>> + } \ >>>> } >>> The readability of this (and others) would be much easier if the ''\'' >>> were aligned on the RHS and out of view of the main body. >> This depends on the fonts you use - in a mail reader using variable >> width fonts it reads much better the way I coded it. And while I >> realized that there are may cases where there is right alignment of >> these line continuations, I''m personally not in favor of this style, >> and since the coding style document doesn''t say anything about it >> I used my personal preference... > > But what we care about is how it looks in an editor. I presume when > you''re actually coding you use a fixed-width font? :-)Of course. But I have no problem reading it with the non-padded backslashes... Jan
1: scheduler: adjust internal locking interface 2: sched: fix race between sched_move_domain() and vcpu_wake() Signed-off-by: Jan Beulich <jbeulich@suse.com>
Jan Beulich
2013-Oct-11 15:16 UTC
[PATCH v2 1/2] scheduler: adjust internal locking interface
Make the locking functions return the lock pointers, so they can be passed to the unlocking functions (which in turn can check that the lock is still actually providing the intended protection, i.e. the parameters determining which lock is the right one didn''t change). Further use proper spin lock primitives rather than open coded local_irq_...() constructs, so that interrupts can be re-enabled as appropriate while spinning. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -160,18 +160,16 @@ static inline void vcpu_runstate_change( void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate) { + spinlock_t *lock = likely(v == current) ? NULL : vcpu_schedule_lock_irq(v); s_time_t delta; - if ( unlikely(v != current) ) - vcpu_schedule_lock_irq(v); - memcpy(runstate, &v->runstate, sizeof(*runstate)); delta = NOW() - runstate->state_entry_time; if ( delta > 0 ) runstate->time[runstate->state] += delta; - if ( unlikely(v != current) ) - vcpu_schedule_unlock_irq(v); + if ( unlikely(lock != NULL) ) + vcpu_schedule_unlock_irq(lock, v); } uint64_t get_cpu_idle_time(unsigned int cpu) @@ -333,8 +331,7 @@ void sched_destroy_domain(struct domain void vcpu_sleep_nosync(struct vcpu *v) { unsigned long flags; - - vcpu_schedule_lock_irqsave(v, flags); + spinlock_t *lock = vcpu_schedule_lock_irqsave(v, &flags); if ( likely(!vcpu_runnable(v)) ) { @@ -344,7 +341,7 @@ void vcpu_sleep_nosync(struct vcpu *v) SCHED_OP(VCPU2OP(v), sleep, v); } - vcpu_schedule_unlock_irqrestore(v, flags); + vcpu_schedule_unlock_irqrestore(lock, flags, v); TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id); } @@ -362,8 +359,7 @@ void vcpu_sleep_sync(struct vcpu *v) void vcpu_wake(struct vcpu *v) { unsigned long flags; - - vcpu_schedule_lock_irqsave(v, flags); + spinlock_t *lock = vcpu_schedule_lock_irqsave(v, &flags); if ( likely(vcpu_runnable(v)) ) { @@ -377,7 +373,7 @@ void vcpu_wake(struct vcpu *v) vcpu_runstate_change(v, RUNSTATE_offline, NOW()); } - vcpu_schedule_unlock_irqrestore(v, flags); + vcpu_schedule_unlock_irqrestore(lock, flags, v); TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id); } @@ -528,10 +524,11 @@ static void vcpu_migrate(struct vcpu *v) */ void vcpu_force_reschedule(struct vcpu *v) { - vcpu_schedule_lock_irq(v); + spinlock_t *lock = vcpu_schedule_lock_irq(v); + if ( v->is_running ) set_bit(_VPF_migrating, &v->pause_flags); - vcpu_schedule_unlock_irq(v); + vcpu_schedule_unlock_irq(lock, v); if ( test_bit(_VPF_migrating, &v->pause_flags) ) { @@ -546,7 +543,7 @@ void restore_vcpu_affinity(struct domain for_each_vcpu ( d, v ) { - vcpu_schedule_lock_irq(v); + spinlock_t *lock = vcpu_schedule_lock_irq(v); if ( v->affinity_broken ) { @@ -559,13 +556,13 @@ void restore_vcpu_affinity(struct domain if ( v->processor == smp_processor_id() ) { set_bit(_VPF_migrating, &v->pause_flags); - vcpu_schedule_unlock_irq(v); + vcpu_schedule_unlock_irq(lock, v); vcpu_sleep_nosync(v); vcpu_migrate(v); } else { - vcpu_schedule_unlock_irq(v); + vcpu_schedule_unlock_irq(lock, v); } } @@ -592,7 +589,7 @@ int cpu_disable_scheduler(unsigned int c { for_each_vcpu ( d, v ) { - vcpu_schedule_lock_irq(v); + spinlock_t *lock = vcpu_schedule_lock_irq(v); cpumask_and(&online_affinity, v->cpu_affinity, c->cpu_valid); if ( cpumask_empty(&online_affinity) && @@ -613,13 +610,13 @@ int cpu_disable_scheduler(unsigned int c if ( v->processor == cpu ) { set_bit(_VPF_migrating, &v->pause_flags); - vcpu_schedule_unlock_irq(v); + vcpu_schedule_unlock_irq(lock, v); vcpu_sleep_nosync(v); vcpu_migrate(v); } else { - vcpu_schedule_unlock_irq(v); + vcpu_schedule_unlock_irq(lock, v); } /* @@ -646,6 +643,7 @@ int vcpu_set_affinity(struct vcpu *v, co { cpumask_t online_affinity; cpumask_t *online; + spinlock_t *lock; if ( v->domain->is_pinned ) return -EINVAL; @@ -654,7 +652,7 @@ int vcpu_set_affinity(struct vcpu *v, co if ( cpumask_empty(&online_affinity) ) return -EINVAL; - vcpu_schedule_lock_irq(v); + lock = vcpu_schedule_lock_irq(v); cpumask_copy(v->cpu_affinity, affinity); @@ -662,7 +660,7 @@ int vcpu_set_affinity(struct vcpu *v, co * when changing the affinity */ set_bit(_VPF_migrating, &v->pause_flags); - vcpu_schedule_unlock_irq(v); + vcpu_schedule_unlock_irq(lock, v); domain_update_node_affinity(v->domain); @@ -776,10 +774,10 @@ static long do_poll(struct sched_poll *s static long do_yield(void) { struct vcpu * v=current; + spinlock_t *lock = vcpu_schedule_lock_irq(v); - vcpu_schedule_lock_irq(v); SCHED_OP(VCPU2OP(v), yield, v); - vcpu_schedule_unlock_irq(v); + vcpu_schedule_unlock_irq(lock, v); TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id, current->vcpu_id); raise_softirq(SCHEDULE_SOFTIRQ); @@ -1140,6 +1138,7 @@ static void schedule(void) unsigned long *tasklet_work = &this_cpu(tasklet_work_to_do); bool_t tasklet_work_scheduled = 0; struct schedule_data *sd; + spinlock_t *lock; struct task_slice next_slice; int cpu = smp_processor_id(); @@ -1166,7 +1165,7 @@ static void schedule(void) BUG(); } - pcpu_schedule_lock_irq(cpu); + lock = pcpu_schedule_lock_irq(cpu); stop_timer(&sd->s_timer); @@ -1183,7 +1182,7 @@ static void schedule(void) if ( unlikely(prev == next) ) { - pcpu_schedule_unlock_irq(cpu); + pcpu_schedule_unlock_irq(lock, cpu); trace_continue_running(next); return continue_running(prev); } @@ -1221,7 +1220,7 @@ static void schedule(void) ASSERT(!next->is_running); next->is_running = 1; - pcpu_schedule_unlock_irq(cpu); + pcpu_schedule_unlock_irq(lock, cpu); SCHED_STAT_CRANK(sched_ctx); @@ -1408,6 +1407,7 @@ int schedule_cpu_switch(unsigned int cpu { unsigned long flags; struct vcpu *idle; + spinlock_t *lock; void *ppriv, *ppriv_old, *vpriv, *vpriv_old; struct scheduler *old_ops = per_cpu(scheduler, cpu); struct scheduler *new_ops = (c == NULL) ? &ops : c->sched; @@ -1426,7 +1426,7 @@ int schedule_cpu_switch(unsigned int cpu return -ENOMEM; } - pcpu_schedule_lock_irqsave(cpu, flags); + lock = pcpu_schedule_lock_irqsave(cpu, &flags); SCHED_OP(old_ops, tick_suspend, cpu); vpriv_old = idle->sched_priv; @@ -1437,7 +1437,7 @@ int schedule_cpu_switch(unsigned int cpu SCHED_OP(new_ops, tick_resume, cpu); SCHED_OP(new_ops, insert_vcpu, idle); - pcpu_schedule_unlock_irqrestore(cpu, flags); + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); SCHED_OP(old_ops, free_vdata, vpriv_old); SCHED_OP(old_ops, free_pdata, ppriv_old, cpu); @@ -1495,10 +1495,11 @@ void schedule_dump(struct cpupool *c) for_each_cpu (i, cpus) { - pcpu_schedule_lock(i); + spinlock_t *lock = pcpu_schedule_lock(i); + printk("CPU[%02d] ", i); SCHED_OP(sched, dump_cpu_state, i); - pcpu_schedule_unlock(i); + pcpu_schedule_unlock(lock, i); } } --- a/xen/common/sched_credit.c +++ b/xen/common/sched_credit.c @@ -1166,6 +1166,7 @@ csched_runq_sort(struct csched_private * struct csched_pcpu * const spc = CSCHED_PCPU(cpu); struct list_head *runq, *elem, *next, *last_under; struct csched_vcpu *svc_elem; + spinlock_t *lock; unsigned long flags; int sort_epoch; @@ -1175,7 +1176,7 @@ csched_runq_sort(struct csched_private * spc->runq_sort_last = sort_epoch; - pcpu_schedule_lock_irqsave(cpu, flags); + lock = pcpu_schedule_lock_irqsave(cpu, &flags); runq = &spc->runq; elem = runq->next; @@ -1200,7 +1201,7 @@ csched_runq_sort(struct csched_private * elem = next; } - pcpu_schedule_unlock_irqrestore(cpu, flags); + pcpu_schedule_unlock_irqrestore(lock, flags, cpu); } static void @@ -1563,7 +1564,9 @@ csched_load_balance(struct csched_privat * could cause a deadlock if the peer CPU is also load * balancing and trying to lock this CPU. */ - if ( !pcpu_schedule_trylock(peer_cpu) ) + spinlock_t *lock = pcpu_schedule_trylock(peer_cpu); + + if ( !lock ) { SCHED_STAT_CRANK(steal_trylock_failed); peer_cpu = cpumask_cycle(peer_cpu, &workers); @@ -1573,7 +1576,7 @@ csched_load_balance(struct csched_privat /* Any work over there to steal? */ speer = cpumask_test_cpu(peer_cpu, online) ? csched_runq_steal(peer_cpu, cpu, snext->pri, bstep) : NULL; - pcpu_schedule_unlock(peer_cpu); + pcpu_schedule_unlock(lock, peer_cpu); /* As soon as one vcpu is found, balancing ends */ if ( speer != NULL ) --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -882,15 +882,17 @@ csched_vcpu_insert(const struct schedule */ if ( ! is_idle_vcpu(vc) ) { + spinlock_t *lock; + /* FIXME: Do we need the private lock here? */ list_add_tail(&svc->sdom_elem, &svc->sdom->vcpu); /* Add vcpu to runqueue of initial processor */ - vcpu_schedule_lock_irq(vc); + lock = vcpu_schedule_lock_irq(vc); runq_assign(ops, vc); - vcpu_schedule_unlock_irq(vc); + vcpu_schedule_unlock_irq(lock, vc); sdom->nr_vcpus++; } @@ -917,14 +919,16 @@ csched_vcpu_remove(const struct schedule if ( ! is_idle_vcpu(vc) ) { + spinlock_t *lock; + SCHED_STAT_CRANK(vcpu_destroy); /* Remove from runqueue */ - vcpu_schedule_lock_irq(vc); + lock = vcpu_schedule_lock_irq(vc); runq_deassign(ops, vc); - vcpu_schedule_unlock_irq(vc); + vcpu_schedule_unlock_irq(lock, vc); /* Remove from sdom list. Don''t need a lock for this, as it''s called * syncronously when nothing else can happen. */ @@ -1011,8 +1015,7 @@ csched_context_saved(const struct schedu { struct csched_vcpu * const svc = CSCHED_VCPU(vc); s_time_t now = NOW(); - - vcpu_schedule_lock_irq(vc); + spinlock_t *lock = vcpu_schedule_lock_irq(vc); BUG_ON( !is_idle_vcpu(vc) && svc->rqd != RQD(ops, vc->processor)); @@ -1038,7 +1041,7 @@ csched_context_saved(const struct schedu else if ( !is_idle_vcpu(vc) ) update_load(ops, svc->rqd, svc, -1, now); - vcpu_schedule_unlock_irq(vc); + vcpu_schedule_unlock_irq(lock, vc); } #define MAX_LOAD (1ULL<<60); @@ -1456,14 +1459,14 @@ csched_dom_cntl( * must never lock csched_priv.lock if we''re holding a runqueue lock. * Also, calling vcpu_schedule_lock() is enough, since IRQs have already * been disabled. */ - vcpu_schedule_lock(svc->vcpu); + spinlock_t *lock = vcpu_schedule_lock(svc->vcpu); BUG_ON(svc->rqd != RQD(ops, svc->vcpu->processor)); svc->weight = sdom->weight; update_max_weight(svc->rqd, svc->weight, old_weight); - vcpu_schedule_unlock(svc->vcpu); + vcpu_schedule_unlock(lock, svc->vcpu); } } } @@ -1993,6 +1996,7 @@ static void init_pcpu(const struct sched cpumask_set_cpu(cpu, &rqd->idle); cpumask_set_cpu(cpu, &rqd->active); + /* _Not_ pcpu_schedule_unlock(): per_cpu().schedule_lock changed! */ spin_unlock(old_lock); cpumask_set_cpu(cpu, &prv->initialized); --- a/xen/common/sched_sedf.c +++ b/xen/common/sched_sedf.c @@ -1350,14 +1350,16 @@ static int sedf_adjust_weights(struct cp if ( EDOM_INFO(p)->weight ) { /* Interrupts already off */ - vcpu_schedule_lock(p); + spinlock_t *lock = vcpu_schedule_lock(p); + EDOM_INFO(p)->period_orig = EDOM_INFO(p)->period = WEIGHT_PERIOD; EDOM_INFO(p)->slice_orig EDOM_INFO(p)->slice = (EDOM_INFO(p)->weight * (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[cpu])) / sumw[cpu]; - vcpu_schedule_unlock(p); + + vcpu_schedule_unlock(lock, p); } } } @@ -1418,21 +1420,24 @@ static int sedf_adjust(const struct sche { /* (Here and everywhere in the following) IRQs are already off, * hence vcpu_spin_lock() is the one. */ - vcpu_schedule_lock(v); + spinlock_t *lock = vcpu_schedule_lock(v); + EDOM_INFO(v)->extraweight = op->u.sedf.weight; EDOM_INFO(v)->weight = 0; EDOM_INFO(v)->slice = 0; EDOM_INFO(v)->period = WEIGHT_PERIOD; - vcpu_schedule_unlock(v); + vcpu_schedule_unlock(lock, v); } } else { /* Weight-driven domains with real-time execution */ - for_each_vcpu ( p, v ) { - vcpu_schedule_lock(v); + for_each_vcpu ( p, v ) + { + spinlock_t *lock = vcpu_schedule_lock(v); + EDOM_INFO(v)->weight = op->u.sedf.weight; - vcpu_schedule_unlock(v); + vcpu_schedule_unlock(lock, v); } } } @@ -1454,14 +1459,15 @@ static int sedf_adjust(const struct sche /* Time-driven domains */ for_each_vcpu ( p, v ) { - vcpu_schedule_lock(v); + spinlock_t *lock = vcpu_schedule_lock(v); + EDOM_INFO(v)->weight = 0; EDOM_INFO(v)->extraweight = 0; EDOM_INFO(v)->period_orig = EDOM_INFO(v)->period = op->u.sedf.period; EDOM_INFO(v)->slice_orig = EDOM_INFO(v)->slice = op->u.sedf.slice; - vcpu_schedule_unlock(v); + vcpu_schedule_unlock(lock, v); } } @@ -1471,13 +1477,14 @@ static int sedf_adjust(const struct sche for_each_vcpu ( p, v ) { - vcpu_schedule_lock(v); + spinlock_t *lock = vcpu_schedule_lock(v); + EDOM_INFO(v)->status = (EDOM_INFO(v)->status & ~EXTRA_AWARE) | (op->u.sedf.extratime & EXTRA_AWARE); EDOM_INFO(v)->latency = op->u.sedf.latency; extraq_check(v); - vcpu_schedule_unlock(v); + vcpu_schedule_unlock(lock, v); } } else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo ) --- a/xen/include/xen/sched-if.h +++ b/xen/include/xen/sched-if.h @@ -47,96 +47,70 @@ DECLARE_PER_CPU(struct schedule_data, sc DECLARE_PER_CPU(struct scheduler *, scheduler); DECLARE_PER_CPU(struct cpupool *, cpupool); -static inline spinlock_t * pcpu_schedule_lock(int cpu) -{ - spinlock_t * lock=NULL; - - for ( ; ; ) - { - /* The per_cpu(v->processor) may also change, if changing - * cpu pool also changes the scheduler lock. Retry - * until they match. - */ - lock=per_cpu(schedule_data, cpu).schedule_lock; - - spin_lock(lock); - if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) ) - break; - spin_unlock(lock); - } - return lock; +#define sched_lock(kind, param, cpu, irq, arg...) \ +static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \ +{ \ + for ( ; ; ) \ + { \ + spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock; \ + /* \ + * v->processor may change when grabbing the lock; but \ + * per_cpu(v->processor) may also change, if changing cpu pool \ + * also changes the scheduler lock. Retry until they match. \ + * \ + * It may also be the case that v->processor may change but the \ + * lock may be the same; this will succeed in that case. \ + */ \ + spin_lock##irq(lock, ## arg); \ + if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) ) \ + return lock; \ + spin_unlock##irq(lock, ## arg); \ + } \ } -static inline int pcpu_schedule_trylock(int cpu) -{ - spinlock_t * lock=NULL; - - lock=per_cpu(schedule_data, cpu).schedule_lock; - if ( ! spin_trylock(lock) ) - return 0; - if ( lock == per_cpu(schedule_data, cpu).schedule_lock ) - return 1; - else - { - spin_unlock(lock); - return 0; - } +#define sched_unlock(kind, param, cpu, irq, arg...) \ +static inline void kind##_schedule_unlock##irq(spinlock_t *lock \ + EXTRA_TYPE(arg), param) \ +{ \ + ASSERT(lock == per_cpu(schedule_data, cpu).schedule_lock); \ + spin_unlock##irq(lock, ## arg); \ } -#define pcpu_schedule_lock_irq(p) \ - do { local_irq_disable(); pcpu_schedule_lock(p); } while ( 0 ) -#define pcpu_schedule_lock_irqsave(p, flags) \ - do { local_irq_save(flags); pcpu_schedule_lock(p); } while ( 0 ) +#define EXTRA_TYPE(arg) +sched_lock(pcpu, unsigned int cpu, cpu, ) +sched_lock(vcpu, const struct vcpu *v, v->processor, ) +sched_lock(pcpu, unsigned int cpu, cpu, _irq) +sched_lock(vcpu, const struct vcpu *v, v->processor, _irq) +sched_unlock(pcpu, unsigned int cpu, cpu, ) +sched_unlock(vcpu, const struct vcpu *v, v->processor, ) +sched_unlock(pcpu, unsigned int cpu, cpu, _irq) +sched_unlock(vcpu, const struct vcpu *v, v->processor, _irq) +#undef EXTRA_TYPE + +#define EXTRA_TYPE(arg) , unsigned long arg +#define spin_unlock_irqsave spin_unlock_irqrestore +sched_lock(pcpu, unsigned int cpu, cpu, _irqsave, *flags) +sched_lock(vcpu, const struct vcpu *v, v->processor, _irqsave, *flags) +#undef spin_unlock_irqsave +sched_unlock(pcpu, unsigned int cpu, cpu, _irqrestore, flags) +sched_unlock(vcpu, const struct vcpu *v, v->processor, _irqrestore, flags) +#undef EXTRA_TYPE + +#undef sched_unlock +#undef sched_lock -static inline void pcpu_schedule_unlock(int cpu) +static inline spinlock_t *pcpu_schedule_trylock(unsigned int cpu) { - spin_unlock(per_cpu(schedule_data, cpu).schedule_lock); -} + spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock; -#define pcpu_schedule_unlock_irq(p) \ - do { pcpu_schedule_unlock(p); local_irq_enable(); } while ( 0 ) -#define pcpu_schedule_unlock_irqrestore(p, flags) \ - do { pcpu_schedule_unlock(p); local_irq_restore(flags); } while ( 0 ) - -static inline void vcpu_schedule_lock(struct vcpu *v) -{ - spinlock_t * lock; - - for ( ; ; ) - { - /* v->processor may change when grabbing the lock; but - * per_cpu(v->processor) may also change, if changing - * cpu pool also changes the scheduler lock. Retry - * until they match. - * - * It may also be the case that v->processor may change - * but the lock may be the same; this will succeed - * in that case. - */ - lock=per_cpu(schedule_data, v->processor).schedule_lock; - - spin_lock(lock); - if ( likely(lock == per_cpu(schedule_data, v->processor).schedule_lock) ) - break; - spin_unlock(lock); - } -} - -#define vcpu_schedule_lock_irq(v) \ - do { local_irq_disable(); vcpu_schedule_lock(v); } while ( 0 ) -#define vcpu_schedule_lock_irqsave(v, flags) \ - do { local_irq_save(flags); vcpu_schedule_lock(v); } while ( 0 ) - -static inline void vcpu_schedule_unlock(struct vcpu *v) -{ - spin_unlock(per_cpu(schedule_data, v->processor).schedule_lock); + if ( !spin_trylock(lock) ) + return NULL; + if ( lock == per_cpu(schedule_data, cpu).schedule_lock ) + return lock; + spin_unlock(lock); + return NULL; } -#define vcpu_schedule_unlock_irq(v) \ - do { vcpu_schedule_unlock(v); local_irq_enable(); } while ( 0 ) -#define vcpu_schedule_unlock_irqrestore(v, flags) \ - do { vcpu_schedule_unlock(v); local_irq_restore(flags); } while ( 0 ) - struct task_slice { struct vcpu *task; s_time_t time; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Oct-11 15:17 UTC
[PATCH v2 2/2] sched: fix race between sched_move_domain() and vcpu_wake()
From: David Vrabel <david.vrabel@citrix.com> sched_move_domain() changes v->processor for all the domain''s VCPUs. If another domain, softirq etc. triggers a simultaneous call to vcpu_wake() (e.g., by setting an event channel as pending), then vcpu_wake() may lock one schedule lock and try to unlock another. vcpu_schedule_lock() attempts to handle this but only does so for the window between reading the schedule_lock from the per-CPU data and the spin_lock() call. This does not help with sched_move_domain() changing v->processor between the calls to vcpu_schedule_lock() and vcpu_schedule_unlock(). Fix the race by taking the schedule_lock for v->processor in sched_move_domain(). Signed-off-by: David Vrabel <david.vrabel@citrix.com> Acked-by: Juergen Gross <juergen.gross@ts.fujitsu.com> Acked-by: Keir Fraser <keir@xen.org> Use vcpu_schedule_lock_irq() (which now returns the lock) to properly retry the locking should the to be used lock have changed in the course of acquiring it (issue pointed out by George Dunlap). Add a comment explaining the state after the v->processor adjustment. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -276,6 +276,8 @@ int sched_move_domain(struct domain *d, new_p = cpumask_first(c->cpu_valid); for_each_vcpu ( d, v ) { + spinlock_t *lock; + vcpudata = v->sched_priv; migrate_timer(&v->periodic_timer, new_p); @@ -283,7 +285,16 @@ int sched_move_domain(struct domain *d, migrate_timer(&v->poll_timer, new_p); cpumask_setall(v->cpu_affinity); + + lock = vcpu_schedule_lock_irq(v); v->processor = new_p; + /* + * With v->processor modified we must not + * - make any further changes assuming we hold the scheduler lock, + * - use vcpu_schedule_unlock_irq(). + */ + spin_unlock_irq(lock); + v->sched_priv = vcpu_priv[v->vcpu_id]; evtchn_move_pirqs(v); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
On 11/10/13 16:13, Jan Beulich wrote:> 1: scheduler: adjust internal locking interface > 2: sched: fix race between sched_move_domain() and vcpu_wake() > > Signed-off-by: Jan Beulich <jbeulich@suse.com> >Both Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> It might be kind to leave some grep/cscope/ctags-fodder around for the macro-constructed functions in the same way as mfn_t/gfn_t with TYPE_SAFE but the names of the functions are certainly less obscure than the TYPE_SAFE ones. ~Andrew> _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
On 11/10/2013 16:13, "Jan Beulich" <JBeulich@suse.com> wrote:> 1: scheduler: adjust internal locking interface > 2: sched: fix race between sched_move_domain() and vcpu_wake() > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Acked-by: Keir Fraser <keir@xen.org>