Parses VM vNUMA related config, verifies and
sets default values for errorneous parameters.
config options are:
vnodes = 2
vnumamem = [2048, 2048]
vdistance = [10, 40, 40, 10]
vnuma_vcpumap ="1, 0, 1, 0"
vnuma_vnodemap = [1, 0]
TODO:
- change to python list vnuma_vcpumap;
- add an option to specify vdistance as half matrix;
Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
Changes since RFC v2:
- vnuma memory areas are spicified in MBytes now;
- added parsing for vnode to pnode map;
- added support for python lists parsing;
- added simplified form for vdistance config - [10, 20];
the first one is same node distance, the other is for the rest;
- added memory release for unexpected exit;
- remove overriding config parameters;
- added default vNUMA memory sizes construction in
case the option is omitted or incorrect;
---
tools/libxl/xl_cmdimpl.c | 294 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 293 insertions(+), 1 deletion(-)
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index a8261be..1a03367 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -572,6 +572,89 @@ vcpp_out:
return rc;
}
+static void vnuma_info_release(libxl_domain_build_info *info)
+{
+ info->nr_vnodes = 0;
+ if ( info->vnuma_memszs ) free(info->vnuma_memszs);
+ if ( info->vdistance ) free(info->vdistance);
+ if ( info->vcpu_to_vnode ) free(info->vcpu_to_vnode);
+ if ( info->vnode_to_pnode ) free(info->vnode_to_pnode);
+}
+static int get_list_item_uint(XLU_ConfigList *list, unsigned int i)
+{
+ const char *buf;
+ char *ep;
+ unsigned long ul;
+ int rc = -EINVAL;
+ buf = xlu_cfg_get_listitem(list, i);
+ if (!buf)
+ return rc;
+ ul = strtoul(buf, &ep, 10);
+ if (ep == buf)
+ return rc;
+ if (ul >= UINT16_MAX)
+ return rc;
+ return (int)ul;
+}
+
+static void vdistance_default(unsigned int *vdistance,
+ unsigned int nr_vnodes,
+ unsigned int samenode,
+ unsigned int othernode)
+{
+ int i, j;
+ for (i = 0; i < nr_vnodes; i++)
+ for (j = 0; j < nr_vnodes; j++)
+ *(vdistance + j * nr_vnodes + i) = i == j ? samenode : othernode;
+}
+
+static void vcputovnode_default(unsigned int *vcpu_to_vnode,
+ unsigned int nr_vnodes,
+ unsigned int max_vcpus)
+{
+ int i;
+ if (vcpu_to_vnode == NULL)
+ return;
+ for(i = 0; i < max_vcpus; i++)
+ vcpu_to_vnode[i] = i % nr_vnodes;
+}
+
+static int vcputovnode_parse(char *cfg, unsigned int *vmap,
+ unsigned int nr_vnodes,
+ unsigned int nr_vcpus)
+{
+ char *toka, *endptr, *saveptra = NULL;
+ unsigned int *vmap_tmp = NULL, nodemap = 0, smask;
+
+ int rc = 0;
+ int i;
+ rc = -EINVAL;
+ i = 0;
+ smask = ~(~0 << nr_vnodes);
+ if(vmap == NULL)
+ return rc;
+ vmap_tmp = (unsigned int *)malloc(sizeof(*vmap) * nr_vcpus);
+ memset(vmap_tmp, 0, sizeof(*vmap) * nr_vcpus);
+ for (toka = strtok_r(cfg, " ", &saveptra); toka;
+ toka = strtok_r(NULL, " ", &saveptra)) {
+ if (i >= nr_vcpus) goto vmap_parse_out;
+ vmap_tmp[i] = strtoul(toka, &endptr, 10);
+ nodemap |= (1 << vmap_tmp[i]);
+ if( endptr == toka)
+ goto vmap_parse_out;
+ i++;
+ }
+ memcpy(vmap, vmap_tmp, sizeof(*vmap) * nr_vcpus);
+ if( ((nodemap & smask) + 1) == (1 << nr_vnodes) )
+ rc = i;
+ else
+ /* Not all nodes have vcpus, will use default map */
+ rc = -EINVAL;
+vmap_parse_out:
+ if (vmap_tmp != NULL) free(vmap_tmp);
+ return rc;
+}
+
static void parse_config_data(const char *config_source,
const char *config_data,
int config_len,
@@ -906,7 +989,12 @@ static void parse_config_data(const char *config_source,
{
char *cmdline = NULL;
const char *root = NULL, *extra = "";
-
+ XLU_ConfigList *vnumamemcfg, *vdistancecfg, *vnodemap;
+ int nr_vnuma_regions, nr_vdist, nr_vnodemap;
+ unsigned long long vnuma_memparsed = 0;
+ unsigned int dist;
+ unsigned long ul;
+
xlu_cfg_replace_string (config, "kernel",
&b_info->u.pv.kernel, 0);
xlu_cfg_get_string (config, "root", &root, 0);
@@ -924,6 +1012,210 @@ static void parse_config_data(const char *config_source,
exit(1);
}
+ if (!xlu_cfg_get_long (config, "vnodes", &l, 0)) {
+ b_info->nr_vnodes = l;
+ if (b_info->nr_vnodes != 0 && b_info->max_vcpus >=
b_info->nr_vnodes) {
+ if (!xlu_cfg_get_list(config, "vnumamem",
&vnumamemcfg, &nr_vnuma_regions, 0)) {
+ /*
+ * If number of regions parsed != number of nodes, check
+ * the memory configuration anyways and if its ok we adjust
total
+ * number of nodes. The memory parsed is in MBytes.
+ */
+ b_info->vnuma_memszs = calloc(b_info->nr_vnodes,
+
sizeof(*b_info->vnuma_memszs));
+ if (b_info->vnuma_memszs == NULL) {
+ fprintf(stderr, "unable to allocate memory for
vnuma ranges\n");
+ exit(1);
+ }
+ char *ep;
+ for (i = 0; i < nr_vnuma_regions; i++) {
+ buf = xlu_cfg_get_listitem(vnumamemcfg, i);
+ if (!buf) {
+ fprintf(stderr,
+ "xl: Unable to get element %d in vnuma
memroy list\n", i);
+ break;
+ }
+ ul = strtoul(buf, &ep, 10);
+ if (ep == buf) {
+ fprintf(stderr,
+ "xl: Invalid argument parsing
vnumamem: %s\n", buf);
+ break;
+ }
+ if (ul >= UINT32_MAX) {
+ fprintf(stderr, "xl: vnuma memory range %lx is
too big\n", ul);
+ break;
+ }
+ /* memory in MBytes */
+ b_info->vnuma_memszs[i] = ul;
+ }
+ for(i = 0; i < nr_vnuma_regions; i++)
+ vnuma_memparsed = vnuma_memparsed +
(b_info->vnuma_memszs[i]);
+ /* Now we have all inputs. Check for misconfigurations and
adjust if needed */
+ /* Amount of memory for vnodes same as total? */
+ if((vnuma_memparsed << 10) == (b_info->max_memkb))
{
+ if(b_info->nr_vnodes != nr_vnuma_regions)
+ {
+ fprintf(stderr, "xl: vnuma nr_vnodes looks
incorrect, adjusting to %d \n", nr_vnuma_regions);
+ b_info->nr_vnodes = nr_vnuma_regions;
+ }
+ }
+ else
+ {
+ fprintf(stderr, "WARNING: vNUMA memory %llu Kbytes
is %s than total memory.\
+ Each vnode will get equal memory size of %lu
Kbytes\n",
+ vnuma_memparsed << 10,
+ (vnuma_memparsed << 10)>
b_info->max_memkb ? "more" : "less",
+ b_info->max_memkb / b_info->nr_vnodes);
+ vnuma_memparsed = (b_info->max_memkb >> 10) /
b_info->nr_vnodes;
+ unsigned long n;
+ /* reminder in MBytes */
+ n = (b_info->max_memkb >> 10) %
b_info->nr_vnodes;
+ /* get final sizes in MBytes */
+ for(i = 0; i < (b_info->nr_vnodes - 1); i++)
+ b_info->vnuma_memszs[i] = vnuma_memparsed;
+ /* add the reminder to the last node */
+ b_info->vnuma_memszs[i] = vnuma_memparsed + n;
+ }
+ }
+ else
+ {
+ b_info->vnuma_memszs = calloc(b_info->nr_vnodes,
+
sizeof(*b_info->vnuma_memszs));
+ if (b_info->vnuma_memszs == NULL) {
+ fprintf(stderr, "unable to allocate memory for
vnuma ranges\n");
+ exit(1);
+ }
+ fprintf(stderr, "WARNING: vNUMA memory ranges were not
specified,\
+ using default equal vnode memory size %lu Kbytes to
cover %lu Kbytes.\n",
+ b_info->max_memkb / b_info->nr_vnodes,
+ b_info->max_memkb);
+ vnuma_memparsed = b_info->max_memkb /
b_info->nr_vnodes;
+ unsigned long n;
+ vnuma_memparsed = (b_info->max_memkb >> 10) /
b_info->nr_vnodes;
+ /* reminder in MBytes */
+ n = (b_info->max_memkb >> 10) %
b_info->nr_vnodes;
+ /* get final sizes in MBytes */
+ for(i = 0; i < (b_info->nr_vnodes - 1); i++)
+ b_info->vnuma_memszs[i] = vnuma_memparsed;
+ b_info->vnuma_memszs[i] = vnuma_memparsed + n;
+ }
+ if(!xlu_cfg_get_list(config, "vdistance",
&vdistancecfg, &nr_vdist, 0)) {
+ b_info->vdistance = calloc(b_info->nr_vnodes *
b_info->nr_vnodes,
+ sizeof(*b_info->vdistance));
+ if (b_info->vdistance == NULL) {
+ vnuma_info_release(b_info);
+ exit(1);
+ }
+ vdistance_default(b_info->vdistance,
b_info->nr_vnodes, 10, 20);
+ /*
+ * If only two elements are in the list, consider first as
value for
+ * same node distance, the second as the rest of distances.
+ */
+ if (nr_vdist == 2) {
+ int d1, d2;
+ d1 = get_list_item_uint(vdistancecfg, 0);
+ d2 = get_list_item_uint(vdistancecfg, 1);
+ if (d1 >= 0 && d2 >= 0)
+ vdistance_default(b_info->vdistance,
b_info->nr_vnodes, d1, d2);
+ else
+ vdistance_default(b_info->vdistance,
b_info->nr_vnodes, 10, 20);
+ }
+ else {
+ if(nr_vdist < (b_info->nr_vnodes *
b_info->nr_vnodes)) {
+ /*
+ * not all distances were specified, use default
values and set the ones,
+ * user did specify
+ */
+ vdistance_default(b_info->vdistance,
b_info->nr_vnodes, 10, 20);
+ for (i = 0; i < nr_vdist; i++) {
+ dist = get_list_item_uint(vdistancecfg, i);
+ if (dist >= 0)
+ b_info->vdistance[i] = dist;
+ else {
+ fprintf(stderr, "WARNING: vNUMA
distance was not correctly specified\n");
+ vnuma_info_release(b_info);
+ exit(1);
+ }
+ }
+ }
+ else {
+ for (i = 0; i < b_info->nr_vnodes *
b_info->nr_vnodes; i++) {
+ dist = get_list_item_uint(vdistancecfg, i);
+ if (dist > 0)
+ b_info->vdistance[i] = dist;
+ else {
+ fprintf(stderr, "WARNING: vNUMA
distance was not correctly specified\n");
+ vnuma_info_release(b_info);
+ exit(1);
+ }
+ }
+ }
+ }
+ }
+ else {
+ b_info->vdistance = (unsigned int
*)calloc(b_info->nr_vnodes * b_info->nr_vnodes,
+ sizeof(*b_info->vdistance));
+ if (b_info->vdistance == NULL) {
+ vnuma_info_release(b_info);
+ exit(1);
+ }
+ vdistance_default(b_info->vdistance,
b_info->nr_vnodes, 10, 20);
+ }
+
+ b_info->vcpu_to_vnode = (unsigned int
*)calloc(b_info->max_vcpus,
+
sizeof(*b_info->vcpu_to_vnode));
+ if (b_info->vcpu_to_vnode == NULL)
+ exit(1);
+ if (!xlu_cfg_get_string(config, "vnuma_vcpumap",
&buf, 0)) {
+ char *buf2 = strdup(buf);
+ if (vcputovnode_parse(buf2, b_info->vcpu_to_vnode,
+ b_info->nr_vnodes,
b_info->max_vcpus) < 0) {
+ vcputovnode_default(b_info->vcpu_to_vnode,
+ b_info->nr_vnodes,
+ b_info->max_vcpus);
+ }
+ if(buf2) free(buf2);
+ }
+ else
+ vcputovnode_default(b_info->vcpu_to_vnode,
b_info->nr_vnodes, b_info->max_vcpus);
+
+ if (!xlu_cfg_get_list(config, "vnuma_vnodemap",
&vnodemap, &nr_vnodemap, 0)) {
+ /*
+ * In case vnode to pnode is not correct, the map will
defined later
+ * based on the machine architecture, configuration and
memory availble
+ */
+ if (nr_vnodemap == b_info->nr_vnodes) {
+ b_info->vnode_to_pnode = (unsigned int
*)calloc(b_info->nr_vnodes,
+
sizeof(*b_info->vnode_to_pnode));
+ if (b_info->vnode_to_pnode == NULL) {
+ vnuma_info_release(b_info);
+ exit(1);
+ }
+
+ unsigned int vnodemask = 0, pnode, smask;
+ smask = ~(~0 << nr_vnodemap);
+ for (i = 0; i < nr_vnodemap; i++) {
+ pnode = get_list_item_uint(vnodemap, i);
+ if (pnode >= 0) {
+ vnodemask |= (1 << i);
+ b_info->vnode_to_pnode[i] = pnode;
+ }
+ }
+ /* Did it covered all vnodes in the mask? */
+ if ( !((vnodemask & smask) + 1) == (1 <<
nr_vnodemap) ) {
+ fprintf(stderr, "WARNING: Not all vnodes were
covered in vnodemap\n");
+ free(b_info->vnode_to_pnode);
+ b_info->vnode_to_pnode = NULL;
+ }
+ }
+ }
+ }
+ else
+ b_info->nr_vnodes=0;
+ }
+ else
+ b_info->nr_vnodes = 0;
+
xlu_cfg_replace_string (config, "bootloader",
&b_info->u.pv.bootloader, 0);
switch (xlu_cfg_get_list_as_string_list(config,
"bootloader_args",
&b_info->u.pv.bootloader_args, 1))
--
1.7.10.4