Parses VM vNUMA related config, verifies and sets default values for errorneous parameters. config options are: vnodes = 2 vnumamem = [2048, 2048] vdistance = [10, 40, 40, 10] vnuma_vcpumap ="1, 0, 1, 0" vnuma_vnodemap = [1, 0] TODO: - change to python list vnuma_vcpumap; - add an option to specify vdistance as half matrix; Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com> --- Changes since RFC v2: - vnuma memory areas are spicified in MBytes now; - added parsing for vnode to pnode map; - added support for python lists parsing; - added simplified form for vdistance config - [10, 20]; the first one is same node distance, the other is for the rest; - added memory release for unexpected exit; - remove overriding config parameters; - added default vNUMA memory sizes construction in case the option is omitted or incorrect; --- tools/libxl/xl_cmdimpl.c | 294 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 293 insertions(+), 1 deletion(-) diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index a8261be..1a03367 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -572,6 +572,89 @@ vcpp_out: return rc; } +static void vnuma_info_release(libxl_domain_build_info *info) +{ + info->nr_vnodes = 0; + if ( info->vnuma_memszs ) free(info->vnuma_memszs); + if ( info->vdistance ) free(info->vdistance); + if ( info->vcpu_to_vnode ) free(info->vcpu_to_vnode); + if ( info->vnode_to_pnode ) free(info->vnode_to_pnode); +} +static int get_list_item_uint(XLU_ConfigList *list, unsigned int i) +{ + const char *buf; + char *ep; + unsigned long ul; + int rc = -EINVAL; + buf = xlu_cfg_get_listitem(list, i); + if (!buf) + return rc; + ul = strtoul(buf, &ep, 10); + if (ep == buf) + return rc; + if (ul >= UINT16_MAX) + return rc; + return (int)ul; +} + +static void vdistance_default(unsigned int *vdistance, + unsigned int nr_vnodes, + unsigned int samenode, + unsigned int othernode) +{ + int i, j; + for (i = 0; i < nr_vnodes; i++) + for (j = 0; j < nr_vnodes; j++) + *(vdistance + j * nr_vnodes + i) = i == j ? samenode : othernode; +} + +static void vcputovnode_default(unsigned int *vcpu_to_vnode, + unsigned int nr_vnodes, + unsigned int max_vcpus) +{ + int i; + if (vcpu_to_vnode == NULL) + return; + for(i = 0; i < max_vcpus; i++) + vcpu_to_vnode[i] = i % nr_vnodes; +} + +static int vcputovnode_parse(char *cfg, unsigned int *vmap, + unsigned int nr_vnodes, + unsigned int nr_vcpus) +{ + char *toka, *endptr, *saveptra = NULL; + unsigned int *vmap_tmp = NULL, nodemap = 0, smask; + + int rc = 0; + int i; + rc = -EINVAL; + i = 0; + smask = ~(~0 << nr_vnodes); + if(vmap == NULL) + return rc; + vmap_tmp = (unsigned int *)malloc(sizeof(*vmap) * nr_vcpus); + memset(vmap_tmp, 0, sizeof(*vmap) * nr_vcpus); + for (toka = strtok_r(cfg, " ", &saveptra); toka; + toka = strtok_r(NULL, " ", &saveptra)) { + if (i >= nr_vcpus) goto vmap_parse_out; + vmap_tmp[i] = strtoul(toka, &endptr, 10); + nodemap |= (1 << vmap_tmp[i]); + if( endptr == toka) + goto vmap_parse_out; + i++; + } + memcpy(vmap, vmap_tmp, sizeof(*vmap) * nr_vcpus); + if( ((nodemap & smask) + 1) == (1 << nr_vnodes) ) + rc = i; + else + /* Not all nodes have vcpus, will use default map */ + rc = -EINVAL; +vmap_parse_out: + if (vmap_tmp != NULL) free(vmap_tmp); + return rc; +} + static void parse_config_data(const char *config_source, const char *config_data, int config_len, @@ -906,7 +989,12 @@ static void parse_config_data(const char *config_source, { char *cmdline = NULL; const char *root = NULL, *extra = ""; - + XLU_ConfigList *vnumamemcfg, *vdistancecfg, *vnodemap; + int nr_vnuma_regions, nr_vdist, nr_vnodemap; + unsigned long long vnuma_memparsed = 0; + unsigned int dist; + unsigned long ul; + xlu_cfg_replace_string (config, "kernel", &b_info->u.pv.kernel, 0); xlu_cfg_get_string (config, "root", &root, 0); @@ -924,6 +1012,210 @@ static void parse_config_data(const char *config_source, exit(1); } + if (!xlu_cfg_get_long (config, "vnodes", &l, 0)) { + b_info->nr_vnodes = l; + if (b_info->nr_vnodes != 0 && b_info->max_vcpus >= b_info->nr_vnodes) { + if (!xlu_cfg_get_list(config, "vnumamem", &vnumamemcfg, &nr_vnuma_regions, 0)) { + /* + * If number of regions parsed != number of nodes, check + * the memory configuration anyways and if its ok we adjust total + * number of nodes. The memory parsed is in MBytes. + */ + b_info->vnuma_memszs = calloc(b_info->nr_vnodes, + sizeof(*b_info->vnuma_memszs)); + if (b_info->vnuma_memszs == NULL) { + fprintf(stderr, "unable to allocate memory for vnuma ranges\n"); + exit(1); + } + char *ep; + for (i = 0; i < nr_vnuma_regions; i++) { + buf = xlu_cfg_get_listitem(vnumamemcfg, i); + if (!buf) { + fprintf(stderr, + "xl: Unable to get element %d in vnuma memroy list\n", i); + break; + } + ul = strtoul(buf, &ep, 10); + if (ep == buf) { + fprintf(stderr, + "xl: Invalid argument parsing vnumamem: %s\n", buf); + break; + } + if (ul >= UINT32_MAX) { + fprintf(stderr, "xl: vnuma memory range %lx is too big\n", ul); + break; + } + /* memory in MBytes */ + b_info->vnuma_memszs[i] = ul; + } + for(i = 0; i < nr_vnuma_regions; i++) + vnuma_memparsed = vnuma_memparsed + (b_info->vnuma_memszs[i]); + /* Now we have all inputs. Check for misconfigurations and adjust if needed */ + /* Amount of memory for vnodes same as total? */ + if((vnuma_memparsed << 10) == (b_info->max_memkb)) { + if(b_info->nr_vnodes != nr_vnuma_regions) + { + fprintf(stderr, "xl: vnuma nr_vnodes looks incorrect, adjusting to %d \n", nr_vnuma_regions); + b_info->nr_vnodes = nr_vnuma_regions; + } + } + else + { + fprintf(stderr, "WARNING: vNUMA memory %llu Kbytes is %s than total memory.\ + Each vnode will get equal memory size of %lu Kbytes\n", + vnuma_memparsed << 10, + (vnuma_memparsed << 10)> b_info->max_memkb ? "more" : "less", + b_info->max_memkb / b_info->nr_vnodes); + vnuma_memparsed = (b_info->max_memkb >> 10) / b_info->nr_vnodes; + unsigned long n; + /* reminder in MBytes */ + n = (b_info->max_memkb >> 10) % b_info->nr_vnodes; + /* get final sizes in MBytes */ + for(i = 0; i < (b_info->nr_vnodes - 1); i++) + b_info->vnuma_memszs[i] = vnuma_memparsed; + /* add the reminder to the last node */ + b_info->vnuma_memszs[i] = vnuma_memparsed + n; + } + } + else + { + b_info->vnuma_memszs = calloc(b_info->nr_vnodes, + sizeof(*b_info->vnuma_memszs)); + if (b_info->vnuma_memszs == NULL) { + fprintf(stderr, "unable to allocate memory for vnuma ranges\n"); + exit(1); + } + fprintf(stderr, "WARNING: vNUMA memory ranges were not specified,\ + using default equal vnode memory size %lu Kbytes to cover %lu Kbytes.\n", + b_info->max_memkb / b_info->nr_vnodes, + b_info->max_memkb); + vnuma_memparsed = b_info->max_memkb / b_info->nr_vnodes; + unsigned long n; + vnuma_memparsed = (b_info->max_memkb >> 10) / b_info->nr_vnodes; + /* reminder in MBytes */ + n = (b_info->max_memkb >> 10) % b_info->nr_vnodes; + /* get final sizes in MBytes */ + for(i = 0; i < (b_info->nr_vnodes - 1); i++) + b_info->vnuma_memszs[i] = vnuma_memparsed; + b_info->vnuma_memszs[i] = vnuma_memparsed + n; + } + if(!xlu_cfg_get_list(config, "vdistance", &vdistancecfg, &nr_vdist, 0)) { + b_info->vdistance = calloc(b_info->nr_vnodes * b_info->nr_vnodes, + sizeof(*b_info->vdistance)); + if (b_info->vdistance == NULL) { + vnuma_info_release(b_info); + exit(1); + } + vdistance_default(b_info->vdistance, b_info->nr_vnodes, 10, 20); + /* + * If only two elements are in the list, consider first as value for + * same node distance, the second as the rest of distances. + */ + if (nr_vdist == 2) { + int d1, d2; + d1 = get_list_item_uint(vdistancecfg, 0); + d2 = get_list_item_uint(vdistancecfg, 1); + if (d1 >= 0 && d2 >= 0) + vdistance_default(b_info->vdistance, b_info->nr_vnodes, d1, d2); + else + vdistance_default(b_info->vdistance, b_info->nr_vnodes, 10, 20); + } + else { + if(nr_vdist < (b_info->nr_vnodes * b_info->nr_vnodes)) { + /* + * not all distances were specified, use default values and set the ones, + * user did specify + */ + vdistance_default(b_info->vdistance, b_info->nr_vnodes, 10, 20); + for (i = 0; i < nr_vdist; i++) { + dist = get_list_item_uint(vdistancecfg, i); + if (dist >= 0) + b_info->vdistance[i] = dist; + else { + fprintf(stderr, "WARNING: vNUMA distance was not correctly specified\n"); + vnuma_info_release(b_info); + exit(1); + } + } + } + else { + for (i = 0; i < b_info->nr_vnodes * b_info->nr_vnodes; i++) { + dist = get_list_item_uint(vdistancecfg, i); + if (dist > 0) + b_info->vdistance[i] = dist; + else { + fprintf(stderr, "WARNING: vNUMA distance was not correctly specified\n"); + vnuma_info_release(b_info); + exit(1); + } + } + } + } + } + else { + b_info->vdistance = (unsigned int *)calloc(b_info->nr_vnodes * b_info->nr_vnodes, + sizeof(*b_info->vdistance)); + if (b_info->vdistance == NULL) { + vnuma_info_release(b_info); + exit(1); + } + vdistance_default(b_info->vdistance, b_info->nr_vnodes, 10, 20); + } + + b_info->vcpu_to_vnode = (unsigned int *)calloc(b_info->max_vcpus, + sizeof(*b_info->vcpu_to_vnode)); + if (b_info->vcpu_to_vnode == NULL) + exit(1); + if (!xlu_cfg_get_string(config, "vnuma_vcpumap", &buf, 0)) { + char *buf2 = strdup(buf); + if (vcputovnode_parse(buf2, b_info->vcpu_to_vnode, + b_info->nr_vnodes, b_info->max_vcpus) < 0) { + vcputovnode_default(b_info->vcpu_to_vnode, + b_info->nr_vnodes, + b_info->max_vcpus); + } + if(buf2) free(buf2); + } + else + vcputovnode_default(b_info->vcpu_to_vnode, b_info->nr_vnodes, b_info->max_vcpus); + + if (!xlu_cfg_get_list(config, "vnuma_vnodemap", &vnodemap, &nr_vnodemap, 0)) { + /* + * In case vnode to pnode is not correct, the map will defined later + * based on the machine architecture, configuration and memory availble + */ + if (nr_vnodemap == b_info->nr_vnodes) { + b_info->vnode_to_pnode = (unsigned int *)calloc(b_info->nr_vnodes, + sizeof(*b_info->vnode_to_pnode)); + if (b_info->vnode_to_pnode == NULL) { + vnuma_info_release(b_info); + exit(1); + } + + unsigned int vnodemask = 0, pnode, smask; + smask = ~(~0 << nr_vnodemap); + for (i = 0; i < nr_vnodemap; i++) { + pnode = get_list_item_uint(vnodemap, i); + if (pnode >= 0) { + vnodemask |= (1 << i); + b_info->vnode_to_pnode[i] = pnode; + } + } + /* Did it covered all vnodes in the mask? */ + if ( !((vnodemask & smask) + 1) == (1 << nr_vnodemap) ) { + fprintf(stderr, "WARNING: Not all vnodes were covered in vnodemap\n"); + free(b_info->vnode_to_pnode); + b_info->vnode_to_pnode = NULL; + } + } + } + } + else + b_info->nr_vnodes=0; + } + else + b_info->nr_vnodes = 0; + xlu_cfg_replace_string (config, "bootloader", &b_info->u.pv.bootloader, 0); switch (xlu_cfg_get_list_as_string_list(config, "bootloader_args", &b_info->u.pv.bootloader_args, 1)) -- 1.7.10.4