Jan Beulich
2013-Aug-23 07:51 UTC
[PATCH 0/5] adjust usage of certain cpumask_*() operations
While originally the plan was only to have a distinct, random cpumask_any(), in the course of that I spotted a few other places having room for improvement. 1: un-alias cpumask_any() from cpumask_first() 2: x86: use cpumask_any() in mask-to-APIC-ID conversions 3: credit2: replace cpumask_first() uses 4: credit1: replace cpumask_empty() uses 5: domctl: replace cpumask_weight() uses Signed-off-by: Jan Beulich <jbeulich@suse.com>
In order to achieve more symmetric distribution of certain things, cpumask_any() shouldn''t always pick the first CPU (which frequently will end up being CPU0). To facilitate that, introduce a library-like function to obtain random numbers. The per-architecture function is supposed to return zero if no valid random number can be obtained (implying that if occasionally zero got produced as random number, it wouldn''t be considered such). As fallback this uses the trivial algorithm from the C standard, extended to produce "unsigned int" results. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/common/Makefile +++ b/xen/common/Makefile @@ -17,6 +17,7 @@ obj-y += multicall.o obj-y += notifier.o obj-y += page_alloc.o obj-y += preempt.o +obj-y += random.o obj-y += rangeset.o obj-y += sched_credit.o obj-y += sched_credit2.o --- /dev/null +++ b/xen/common/random.c @@ -0,0 +1,29 @@ +#include <xen/percpu.h> +#include <xen/random.h> +#include <xen/time.h> +#include <asm/random.h> + +static DEFINE_PER_CPU(unsigned int, seed); + +unsigned int get_random(void) +{ + unsigned int next = this_cpu(seed), val = arch_get_random(); + + if ( unlikely(!next) ) + next = val ?: NOW(); + + if ( !val ) + { + unsigned int i; + + for ( i = 0; i < sizeof(val) * 8; i += 11 ) + { + next = next * 1103515245 + 12345; + val |= ((next >> 16) & 0x7ff) << i; + } + } + + this_cpu(seed) = next; + + return val; +} --- a/xen/include/asm-arm/percpu.h +++ b/xen/include/asm-arm/percpu.h @@ -2,6 +2,17 @@ #define __ARM_PERCPU_H__ #ifndef __ASSEMBLY__ + +#include <xen/types.h> +#include <asm/cpregs.h> +#if defined(CONFIG_ARM_32) +# include <asm/arm32/processor.h> +#elif defined(CONFIG_ARM_64) +# include <asm/arm64/processor.h> +#else +# error "unknown ARM variant" +#endif + extern char __per_cpu_start[], __per_cpu_data_end[]; extern unsigned long __per_cpu_offset[NR_CPUS]; void percpu_init_areas(void); --- /dev/null +++ b/xen/include/asm-arm/random.h @@ -0,0 +1,9 @@ +#ifndef __ASM_RANDOM_H__ +#define __ASM_RANDOM_H__ + +static inline unsigned int arch_get_random(void) +{ + return 0; +} + +#endif /* __ASM_RANDOM_H__ */ --- /dev/null +++ b/xen/include/asm-x86/random.h @@ -0,0 +1,16 @@ +#ifndef __ASM_RANDOM_H__ +#define __ASM_RANDOM_H__ + +#include <asm/processor.h> + +static inline unsigned int arch_get_random(void) +{ + unsigned int val = 0; + + if ( cpu_has(¤t_cpu_data, X86_FEATURE_RDRAND) ) + asm ( ".byte 0x0f,0xc7,0xf0" : "+a" (val) ); + + return val; +} + +#endif /* __ASM_RANDOM_H__ */ --- a/xen/include/xen/cpumask.h +++ b/xen/include/xen/cpumask.h @@ -77,6 +77,7 @@ #include <xen/bitmap.h> #include <xen/kernel.h> +#include <xen/random.h> typedef struct cpumask{ DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; @@ -245,7 +246,23 @@ static inline int cpumask_cycle(int n, c return nxt; } -#define cpumask_any(srcp) cpumask_first(srcp) +static inline unsigned int cpumask_any(const cpumask_t *srcp) +{ + unsigned int cpu = cpumask_first(srcp); + unsigned int w = cpumask_weight(srcp); + + if ( w > 1 && cpu < nr_cpu_ids ) + for ( w = get_random() % w; w--; ) + { + unsigned int next = cpumask_next(cpu, srcp); + + if ( next >= nr_cpu_ids ) + break; + cpu = next; + } + + return cpu; +} /* * Special-case data structure for "single bit set only" constant CPU masks. --- /dev/null +++ b/xen/include/xen/random.h @@ -0,0 +1,6 @@ +#ifndef __XEN_RANDOM_H__ +#define __XEN_RANDOM_H__ + +unsigned int get_random(void); + +#endif /* __XEN_RANDOM_H__ */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Aug-23 07:53 UTC
[PATCH 2/5] x86: use cpumask_any() in mask-to-APIC-ID conversions
This is to avoid picking CPU0 for almost any such operation, resulting in very uneven distribution of interrupt load. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/genapic/delivery.c +++ b/xen/arch/x86/genapic/delivery.c @@ -67,5 +67,5 @@ const cpumask_t *vector_allocation_cpuma unsigned int cpu_mask_to_apicid_phys(const cpumask_t *cpumask) { /* As we are using single CPU as destination, pick only one CPU here */ - return cpu_physical_id(cpumask_first(cpumask)); + return cpu_physical_id(cpumask_any(cpumask)); } --- a/xen/arch/x86/genapic/x2apic.c +++ b/xen/arch/x86/genapic/x2apic.c @@ -81,7 +81,7 @@ static const cpumask_t *vector_allocatio static unsigned int cpu_mask_to_apicid_x2apic_cluster(const cpumask_t *cpumask) { - unsigned int cpu = cpumask_first(cpumask); + unsigned int cpu = cpumask_any(cpumask); unsigned int dest = per_cpu(cpu_2_logical_apicid, cpu); const cpumask_t *cluster_cpus = per_cpu(cluster_cpus, cpu); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
... with cpumask_any() or cpumask_cycle(). In one case this also allows elimination of a cpumask_empty() call, and while doing this I also spotted a redundant use of cpumask_weight(). (When running on big systems, operations on CPU masks aren''t cheap enough to use them carelessly.) Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -515,9 +515,10 @@ runq_tickle(const struct scheduler *ops, cpumask_andnot(&mask, &rqd->idle, &rqd->tickled); /* If it''s not empty, choose one */ - if ( !cpumask_empty(&mask) ) + i = cpumask_cycle(cpu, &mask); + if ( i < nr_cpu_ids ) { - ipid = cpumask_first(&mask); + ipid = i; goto tickle; } @@ -1091,7 +1092,7 @@ choose_cpu(const struct scheduler *ops, else { d2printk("d%dv%d +\n", svc->vcpu->domain->domain_id, svc->vcpu->vcpu_id); - new_cpu = cpumask_first(&svc->migrate_rqd->active); + new_cpu = cpumask_cycle(vc->processor, &svc->migrate_rqd->active); goto out_up; } } @@ -1138,8 +1139,8 @@ choose_cpu(const struct scheduler *ops, new_cpu = vc->processor; else { - BUG_ON(cpumask_empty(&prv->rqd[min_rqi].active)); - new_cpu = cpumask_first(&prv->rqd[min_rqi].active); + new_cpu = cpumask_cycle(vc->processor, &prv->rqd[min_rqi].active); + BUG_ON(new_cpu >= nr_cpu_ids); } out_up: @@ -1219,7 +1220,7 @@ void migrate(const struct scheduler *ops on_runq=1; } __runq_deassign(svc); - svc->vcpu->processor = cpumask_first(&trqd->active); + svc->vcpu->processor = cpumask_any(&trqd->active); __runq_assign(svc, trqd); if ( on_runq ) { @@ -1299,8 +1300,9 @@ retry: load_max = st.orqd->b_avgload; cpus_max = cpumask_weight(&st.lrqd->active); - if ( cpumask_weight(&st.orqd->active) > cpus_max ) - cpus_max = cpumask_weight(&st.orqd->active); + i = cpumask_weight(&st.orqd->active); + if ( i > cpus_max ) + cpus_max = i; /* If we''re under 100% capacaty, only shift if load difference * is > 1. otherwise, shift if under 12.5% */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
In one case it was redundant with the operation it got combined with, and in the other it could easily be replaced by range checking the result of a subsequent operation. (When running on big systems, operations on CPU masks aren''t cheap enough to use them carelessly.) Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/common/sched_credit.c +++ b/xen/common/sched_credit.c @@ -641,7 +641,7 @@ _csched_cpu_pick(const struct scheduler cpu = cpumask_test_cpu(vc->processor, &cpus) ? vc->processor : cpumask_cycle(vc->processor, &cpus); - ASSERT( !cpumask_empty(&cpus) && cpumask_test_cpu(cpu, &cpus) ); + ASSERT(cpumask_test_cpu(cpu, &cpus)); /* * Try to find an idle processor within the above constraints. @@ -1520,10 +1520,9 @@ csched_load_balance(struct csched_privat cpumask_and(&workers, &workers, &node_to_cpumask(peer_node)); cpumask_clear_cpu(cpu, &workers); - if ( cpumask_empty(&workers) ) - goto next_node; - peer_cpu = cpumask_first(&workers); + if ( peer_cpu >= nr_cpu_ids ) + goto next_node; do { /* _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
In one case it could easily be replaced by range checking the result of a subsequent operation, and in general cpumask_next(), not always needing to scan the whole bitmap, is more efficient than the specific uses of cpumask_weight() here. (When running on big systems, operations on CPU masks aren''t cheap enough to use them carelessly.) Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -230,15 +230,15 @@ static unsigned int default_vcpu0_locati */ cpumask_copy(&cpu_exclude_map, per_cpu(cpu_sibling_mask, 0)); cpu = cpumask_first(&cpu_exclude_map); - if ( cpumask_weight(&cpu_exclude_map) > 1 ) - cpu = cpumask_next(cpu, &cpu_exclude_map); - ASSERT(cpu < nr_cpu_ids); + i = cpumask_next(cpu, &cpu_exclude_map); + if ( i < nr_cpu_ids ) + cpu = i; for_each_cpu(i, online) { if ( cpumask_test_cpu(i, &cpu_exclude_map) ) continue; if ( (i == cpumask_first(per_cpu(cpu_sibling_mask, i))) && - (cpumask_weight(per_cpu(cpu_sibling_mask, i)) > 1) ) + (cpumask_next(i, per_cpu(cpu_sibling_mask, i)) < nr_cpu_ids) ) continue; cpumask_or(&cpu_exclude_map, &cpu_exclude_map, per_cpu(cpu_sibling_mask, i)); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Keir Fraser
2013-Aug-23 08:43 UTC
Re: [PATCH 0/5] adjust usage of certain cpumask_*() operations
On 23/08/2013 08:51, "Jan Beulich" <JBeulich@suse.com> wrote:> While originally the plan was only to have a distinct, random > cpumask_any(), in the course of that I spotted a few other places > having room for improvement. > > 1: un-alias cpumask_any() from cpumask_first() > 2: x86: use cpumask_any() in mask-to-APIC-ID conversions > 3: credit2: replace cpumask_first() uses > 4: credit1: replace cpumask_empty() uses > 5: domctl: replace cpumask_weight() uses > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Acked-by: Keir Fraser <keir@xen.org>
George Dunlap
2013-Aug-23 11:37 UTC
Re: [PATCH 1/5] un-alias cpumask_any() from cpumask_first()
On Fri, Aug 23, 2013 at 8:52 AM, Jan Beulich <JBeulich@suse.com> wrote:> In order to achieve more symmetric distribution of certain things, > cpumask_any() shouldn''t always pick the first CPU (which frequently > will end up being CPU0). To facilitate that, introduce a library-like > function to obtain random numbers. > > The per-architecture function is supposed to return zero if no valid > random number can be obtained (implying that if occasionally zero got > produced as random number, it wouldn''t be considered such). > > As fallback this uses the trivial algorithm from the C standard, > extended to produce "unsigned int" results. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/common/Makefile > +++ b/xen/common/Makefile > @@ -17,6 +17,7 @@ obj-y += multicall.o > obj-y += notifier.o > obj-y += page_alloc.o > obj-y += preempt.o > +obj-y += random.o > obj-y += rangeset.o > obj-y += sched_credit.o > obj-y += sched_credit2.o > --- /dev/null > +++ b/xen/common/random.c > @@ -0,0 +1,29 @@ > +#include <xen/percpu.h> > +#include <xen/random.h> > +#include <xen/time.h> > +#include <asm/random.h> > + > +static DEFINE_PER_CPU(unsigned int, seed); > + > +unsigned int get_random(void) > +{ > + unsigned int next = this_cpu(seed), val = arch_get_random();arch_get_random() isn''t a random "fill in the value" call (like this_cpu(seed) is, it''s actually part of the algoritm. I think it''s easier to read if it''s separated from the variable declarations. But that''s a minor issue I''ll leave to you. Other than that: Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com> -George
On Fri, Aug 23, 2013 at 8:54 AM, Jan Beulich <JBeulich@suse.com> wrote:> ... with cpumask_any() or cpumask_cycle(). > > In one case this also allows elimination of a cpumask_empty() call, > and while doing this I also spotted a redundant use of > cpumask_weight(). (When running on big systems, operations on CPU masks > aren''t cheap enough to use them carelessly.) > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com>
On Fri, Aug 23, 2013 at 8:54 AM, Jan Beulich <JBeulich@suse.com> wrote:> In one case it was redundant with the operation it got combined with, > and in the other it could easily be replaced by range checking the > result of a subsequent operation. (When running on big systems, > operations on CPU masks aren''t cheap enough to use them carelessly.) > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Reviewed-by: George Dunlap <george.dunlap@eu.citrix.com>