Use dom0_op, pincpu, to set the domain allocation map. After setting the map, all vcpu allocations are restricted within the set map providing finer grain controls over which physical cpus vcpus use. Also works well with build-time vcpu creation which helps NUMA-aware memory allocators by providing a a vcpu to cpu mapping prior to memory allocation. -- Ryan Harper Software Engineer; Linux Technology Center IBM Corp., Austin, Tx (512) 838-9253 T/L: 678-9253 ryanh@us.ibm.com diffstat output: tools/examples/xmexample1 | 4 + tools/examples/xmexample2 | 3 + tools/python/xen/lowlevel/xc/xc.c | 2 tools/python/xen/xend/XendDomainInfo.py | 16 ++++- tools/python/xen/xm/create.py | 6 ++ xen/common/dom0_ops.c | 90 +++++++++++++++++--------------- xen/common/domain.c | 28 +++++++++ xen/common/schedule.c | 11 ++- xen/include/public/dom0_ops.h | 2 xen/include/xen/domain.h | 2 xen/include/xen/sched.h | 2 11 files changed, 114 insertions(+), 52 deletions(-) Signed-off-by: Ryan Harper <ryanh@us.ibm.com> --- diff -r b9dbc22e9929 tools/examples/xmexample1 --- a/tools/examples/xmexample1 Mon Oct 10 14:01:37 2005 +++ b/tools/examples/xmexample1 Mon Oct 10 09:58:46 2005 @@ -27,6 +27,10 @@ # Number of Virtual CPUS to use, default is 1 #vcpus = 1 + +# A bitmap of which physical cpus are vcpus allowed to use. +# ex1: 0x2 <-- bit 1 set means all vcpus will be created on CPU1 +#allocmap = 0xffffffff # default value, vcpus can run on any cpu. #---------------------------------------------------------------------------- # Define network interfaces. diff -r b9dbc22e9929 tools/examples/xmexample2 --- a/tools/examples/xmexample2 Mon Oct 10 14:01:37 2005 +++ b/tools/examples/xmexample2 Mon Oct 10 09:58:46 2005 @@ -58,6 +58,9 @@ # Number of Virtual CPUS to use, default is 1 #vcpus = 1 vcpus = 4 # make your domain a 4-way + +# A bitmap of which physical cpus are vcpus allowed to use. +allocmap = 0x2 # start all of your VCPUs on CPU1 #---------------------------------------------------------------------------- # Define network interfaces. diff -r b9dbc22e9929 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Mon Oct 10 14:01:37 2005 +++ b/tools/python/xen/lowlevel/xc/xc.c Mon Oct 10 09:58:46 2005 @@ -205,7 +205,7 @@ static char *kwd_list[] = { "dom", "vcpu", "cpumap", NULL }; - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|ii", kwd_list, + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|iL", kwd_list, &dom, &vcpu, &cpumap) ) return NULL; diff -r b9dbc22e9929 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Mon Oct 10 14:01:37 2005 +++ b/tools/python/xen/xend/XendDomainInfo.py Mon Oct 10 09:58:46 2005 @@ -266,6 +266,7 @@ result[''maxmem''] = get_cfg(''maxmem'', int) result[''maxmem_kb''] = get_cfg(''maxmem_kb'', int) result[''cpu''] = get_cfg(''cpu'', int) + result[''allocmap''] = get_cfg(''allocmap'', int) result[''image''] = get_cfg(''image'') try: @@ -438,6 +439,7 @@ defaultInfo(''cpu_weight'', lambda: 1.0) defaultInfo(''vcpus'', lambda: 1) defaultInfo(''vcpu_avail'', lambda: (1 << self.info[''vcpus'']) - 1) + defaultInfo(''allocmap'' , lambda: None) defaultInfo(''bootloader'', lambda: None) defaultInfo(''backend'', lambda: []) defaultInfo(''device'', lambda: []) @@ -1018,6 +1020,16 @@ self.image.handleBootloading() xc.domain_setcpuweight(self.domid, self.info[''cpu_weight'']) + cpu = self.info[''cpu''] + if cpu is not None and cpu != -1: + xc.domain_pincpu(self.domid, 0, 1 << cpu) + + # set the domain alloc map for future vcpus, + # repin VCPU0 according to the alloc map + allocmap = self.info[''allocmap''] + if self.domid and allocmap: + xc.domain_pincpu(self.domid, -1, allocmap) # domain allocmap + xc.domain_pincpu(self.domid, 0, allocmap) # repin VCPU0 # set the max, and allocate additional vcpus xc.set_max_vcpus(self.domid, self.info[''vcpus'']); @@ -1028,10 +1040,6 @@ m = self.image.getDomainMemory(self.info[''memory_KiB'']) xc.domain_setmaxmem(self.domid, m) xc.domain_memory_increase_reservation(self.domid, m, 0, 0) - - cpu = self.info[''cpu''] - if cpu is not None and cpu != -1: - xc.domain_pincpu(self.domid, 0, 1 << cpu) self.info[''start_time''] = time.time() diff -r b9dbc22e9929 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Mon Oct 10 14:01:37 2005 +++ b/tools/python/xen/xm/create.py Mon Oct 10 09:58:46 2005 @@ -151,6 +151,10 @@ gopts.var(''cpu'', val=''CPU'', fn=set_int, default=None, use="CPU to run the domain on.") + +gopts.var(''allocmap'', val=''ALLOCMAP'', + fn=set_int, default=None, + use="Set default cpumap used for allocating vcpus.") gopts.var(''vcpus'', val=''VCPUS'', fn=set_int, default=1, @@ -562,6 +566,8 @@ config.append([''maxmem'', vals.maxmem]) if vals.cpu is not None: config.append([''cpu'', vals.cpu]) + if vals.allocmap is not None: + config.append([''allocmap'', vals.allocmap]) if vals.cpu_weight is not None: config.append([''cpu_weight'', vals.cpu_weight]) if vals.blkif: diff -r b9dbc22e9929 xen/common/dom0_ops.c --- a/xen/common/dom0_ops.c Mon Oct 10 14:01:37 2005 +++ b/xen/common/dom0_ops.c Mon Oct 10 09:58:46 2005 @@ -248,7 +248,7 @@ { domid_t dom = op->u.pincpudomain.domain; struct domain *d = find_domain_by_id(dom); - struct vcpu *v; + struct vcpu *v = NULL; cpumap_t cpumap; @@ -258,27 +258,33 @@ break; } - if ( (op->u.pincpudomain.vcpu >= MAX_VIRT_CPUS) || - !d->vcpu[op->u.pincpudomain.vcpu] ) - { - ret = -EINVAL; - put_domain(d); - break; - } - - v = d->vcpu[op->u.pincpudomain.vcpu]; - if ( v == NULL ) - { - ret = -ESRCH; - put_domain(d); - break; - } - - if ( v == current ) - { - ret = -EINVAL; - put_domain(d); - break; + /* don''t bail on vcpu = -1 as that sets domain cpumap */ + if ( (op->u.pincpudomain.vcpu != -1) && + ((op->u.pincpudomain.vcpu >= MAX_VIRT_CPUS) || + !d->vcpu[op->u.pincpudomain.vcpu]) + ) + { + ret = -EINVAL; + put_domain(d); + break; + } + + /* don''t get a struct vcpu pointer for -1 op */ + if (op->u.pincpudomain.vcpu != -1) { + v = d->vcpu[op->u.pincpudomain.vcpu]; + if ( v == NULL ) + { + ret = -ESRCH; + put_domain(d); + break; + } + + if ( v == current ) + { + ret = -EINVAL; + put_domain(d); + break; + } } if ( copy_from_user(&cpumap, op->u.pincpudomain.cpumap, @@ -289,24 +295,28 @@ break; } - /* update cpumap for this vcpu */ - v->cpumap = cpumap; - - if ( cpumap == CPUMAP_RUNANYWHERE ) - { - clear_bit(_VCPUF_cpu_pinned, &v->vcpu_flags); - } - else - { - /* pick a new cpu from the usable map */ - int new_cpu = (int)find_first_set_bit(cpumap) % num_online_cpus(); - - vcpu_pause(v); - vcpu_migrate_cpu(v, new_cpu); - set_bit(_VCPUF_cpu_pinned, &v->vcpu_flags); - vcpu_unpause(v); - } - + /* update domain vcpu alloc map */ + if ( v == NULL ) { + d->allocmap = cpumap; + } else { + /* update cpumap for this vcpu */ + v->cpumap = cpumap; + + if ( cpumap == CPUMAP_RUNANYWHERE ) + { + clear_bit(_VCPUF_cpu_pinned, &v->vcpu_flags); + } + else + { + /* pick a new cpu from the usable map */ + int new_cpu = get_next_processor(d, v, &cpumap); + + vcpu_pause(v); + vcpu_migrate_cpu(v, new_cpu); + set_bit(_VCPUF_cpu_pinned, &v->vcpu_flags); + vcpu_unpause(v); + } + } put_domain(d); } break; diff -r b9dbc22e9929 xen/common/domain.c --- a/xen/common/domain.c Mon Oct 10 14:01:37 2005 +++ b/xen/common/domain.c Mon Oct 10 09:58:46 2005 @@ -41,6 +41,7 @@ atomic_set(&d->refcnt, 1); atomic_set(&v->pausecnt, 0); + d->allocmap = CPUMAP_RUNANYWHERE; d->domain_id = dom_id; v->processor = cpu; @@ -384,7 +385,7 @@ v = d->vcpu[vcpuid]; atomic_set(&v->pausecnt, 0); - v->cpumap = CPUMAP_RUNANYWHERE; + v->cpumap = d->allocmap; memcpy(&v->arch, &idle0_vcpu.arch, sizeof(v->arch)); @@ -489,6 +490,31 @@ return -ENOSYS; } + +/* find the least loaded processor , ignorning vcpu v, in cpumap_t *map */ +int get_next_processor(struct domain* d, struct vcpu *v, cpumap_t *map) { + struct vcpu *vc = NULL; + int pro, i, cnt[NR_CPUS] = { 0 }; + + /* count the processor layout for this dom, except for vcpu v + * whose processor field may not have been set yet. */ + for_each_vcpu( d, vc ) { + if (vc->vcpu_id != v->vcpu_id) + cnt[vc->processor]++; + } + + /* start from the first allowable cpu, guard against bogus cpus */ + pro = (int)find_first_set_bit(*map) % num_online_cpus(); + + /* pick least loaded processor in the map */ + for ( i = pro; i < num_online_cpus(); i++ ) { + if ( test_bit(i, &*map) && (cnt[i] <= cnt[pro]) ) + pro = i; + } + + return pro; +} + /* * Local variables: diff -r b9dbc22e9929 xen/common/schedule.c --- a/xen/common/schedule.c Mon Oct 10 14:01:37 2005 +++ b/xen/common/schedule.c Mon Oct 10 09:58:46 2005 @@ -125,12 +125,13 @@ v->next_in_list = vc->next_in_list; vc->next_in_list = v; - if (test_bit(_VCPUF_cpu_pinned, &vc->vcpu_flags)) { - v->processor = (vc->processor + 1) % num_online_cpus(); + + /* XXX: if previous vcpu was pinned, mark new vcpu as pinned why? */ + if (test_bit(_VCPUF_cpu_pinned, &vc->vcpu_flags)) set_bit(_VCPUF_cpu_pinned, &v->vcpu_flags); - } else { - v->processor = (vc->processor + 1) % num_online_cpus(); - } + + v->processor = get_next_processor(d, v, &d->allocmap); + } return v; diff -r b9dbc22e9929 xen/include/public/dom0_ops.h --- a/xen/include/public/dom0_ops.h Mon Oct 10 14:01:37 2005 +++ b/xen/include/public/dom0_ops.h Mon Oct 10 09:58:46 2005 @@ -181,7 +181,7 @@ typedef struct { /* IN variables. */ domid_t domain; - u16 vcpu; + s16 vcpu; cpumap_t *cpumap; } dom0_pincpudomain_t; diff -r b9dbc22e9929 xen/include/xen/domain.h --- a/xen/include/xen/domain.h Mon Oct 10 14:01:37 2005 +++ b/xen/include/xen/domain.h Mon Oct 10 09:58:46 2005 @@ -27,4 +27,6 @@ extern void dump_pageframe_info(struct domain *d); +int get_next_processor(struct domain *d, struct vcpu *v, cpumap_t *map); + #endif /* __XEN_DOMAIN_H__ */ diff -r b9dbc22e9929 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Mon Oct 10 14:01:37 2005 +++ b/xen/include/xen/sched.h Mon Oct 10 09:58:46 2005 @@ -136,6 +136,8 @@ /* Bitmask of CPUs which are holding onto this domain''s state. */ cpumask_t cpumask; + + cpumap_t allocmap; /* vcpu allocation bitmap */ struct arch_domain arch; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel