# HG changeset patch # User Olaf Hering <olaf@aepfle.de> # Date 1329378376 -3600 # Node ID d368cf36d66c1e8df60bd0a4868c171b6a929edc # Parent bf0a7c205687857a8f8d3bd3841654ed61828193 RFC: initial libxl support for xenpaging After the previous discussion about integration of xenpaging into xl/libxl it was not clear to me wether my proposal as a whole or only parts of it were rejected. So here is my current understanding of the comments I received. Add initial support to libxl to start xenpaging for a HVM guest. These are the considerations: - a knob in domU.cfg is needed to start xenpaging - xenpaging needs a target in KiB in "memory/target-tot_pages" -> the knob should be the target value in MiB: mem_target_paging=NUM if the value is 0, xenpaging is not started - an cmdline interface is needed to adjust "memory/target-tot_pages" at runtime -> it was suggested to use ''xl mem-set'' which should adjust both "memory/target" and "memory/target-tot_pages" at the same time -> maybe another cmdline interface should be ''xl mem-target-paging'' which adjusts "memory/target-tot_pages", and maybe ''xl mem-target-balloon'' which adjusts "memory/target" - libxl starts xenpaging with at least two cmdline options which specifys the pagefile to use and the dom_id. An optional "xenpaging_file=path" specifies the pagefile name. Optional additional cmdline options for xenpaging can be specified with a domU.cfg option "xenpaging_extra=[ ''opt'', ''opt'' ]" - currently maxmem= + memory= and mem_target_paging= can not be used because paging for a PoD guest is not implemented. This does not affect ballooning within the guest. - I have some ideas to add runtime tuneables for xenpaging. Should there be a "xl xenpaging_ctrl tuneable_name value" command, or should it be done with a new tool xenpaging_ctrl? If the latter, the proposed ''xl mem-target-*'' commands are not needed and this new helper could also adjust "memory/target-tot_pages". The patch below is just a forward port of my previous version. It adds three new config options, no xl or other changes: mem_target_paging=<int>, the amount of memory in MiB for the guest xenpaging_file=<string>, pagefile to use (optional) xenpaging_extra=[ ''string'', ''string'' ], optional cmdline args for xenpaging If ''mem_target_paging='' is not specified in config file, xenpaging will not start. If ''xenpaging_file='' is not specified in config file, /var/lib/xen/xenpaging/<domain_name>.<domaind_id>.paging is used. Signed-off-by: Olaf Hering <olaf@aepfle.de> diff -r bf0a7c205687 -r d368cf36d66c tools/libxl/libxl.h --- a/tools/libxl/libxl.h +++ b/tools/libxl/libxl.h @@ -322,6 +322,7 @@ int libxl_init_build_info(libxl_ctx *ctx typedef int (*libxl_console_ready)(libxl_ctx *ctx, uint32_t domid, void *priv); int libxl_domain_create_new(libxl_ctx *ctx, libxl_domain_config *d_config, libxl_console_ready cb, void *priv, uint32_t *domid); int libxl_domain_create_restore(libxl_ctx *ctx, libxl_domain_config *d_config, libxl_console_ready cb, void *priv, uint32_t *domid, int restore_fd); +int libxl__create_xenpaging(libxl_ctx *ctx, libxl_domain_config *d_config, uint32_t domid, char *path); void libxl_domain_config_dispose(libxl_domain_config *d_config); int libxl_domain_suspend(libxl_ctx *ctx, libxl_domain_suspend_info *info, uint32_t domid, int fd); diff -r bf0a7c205687 -r d368cf36d66c tools/libxl/libxl_create.c --- a/tools/libxl/libxl_create.c +++ b/tools/libxl/libxl_create.c @@ -457,6 +457,132 @@ static int store_libxl_entry(libxl__gc * libxl_device_model_version_to_string(b_info->device_model_version)); } +static int create_xenpaging(libxl__gc *gc, char *dom_name, uint32_t domid, + libxl_domain_build_info *b_info) +{ + libxl__spawner_starting *buf_starting; + libxl_string_list xpe = b_info->u.hvm.xenpaging_extra; + int i, rc; + char *logfile; + int logfile_w, null, need_pagefile; + char *path, *dom_path, *value; + char **args; + char *xp; + flexarray_t *xp_args; + libxl_ctx *ctx = libxl__gc_owner(gc); + + /* Nothing to do */ + if (!b_info->tot_memkb) + return 0; + + /* Check if paging is already enabled */ + dom_path = libxl__xs_get_dompath(gc, domid); + if (!dom_path ) { + rc = ERROR_NOMEM; + goto out; + } + path = libxl__sprintf(gc, "%s/xenpaging/state", dom_path); + if (!path ) { + rc = ERROR_NOMEM; + goto out; + } + value = xs_read(ctx->xsh, XBT_NULL, path, NULL); + rc = value && strcmp(value, "running") == 0; + free(value); + /* Already running, nothing to do */ + if (rc) + return 0; + + /* Check if xenpaging is present */ + xp = libxl__abs_path(gc, "xenpaging", libxl_libexec_path()); + if (access(xp, X_OK) < 0) { + LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "%s is not executable", xp); + rc = ERROR_FAIL; + goto out; + } + + /* Initialise settings for child */ + buf_starting = calloc(sizeof(*buf_starting), 1); + if (!buf_starting) { + rc = ERROR_NOMEM; + goto out; + } + buf_starting->domid = domid; + buf_starting->dom_path = dom_path; + buf_starting->pid_path = "xenpaging/xenpaging-pid"; + buf_starting->for_spawn = calloc(sizeof(libxl__spawn_starting), 1); + if (!buf_starting->for_spawn) { + rc = ERROR_NOMEM; + goto out; + } + + /* Assemble arguments for xenpaging */ + xp_args = flexarray_make(8, 1); + if (!xp_args) { + rc = ERROR_NOMEM; + goto out; + } + /* Set executable path */ + flexarray_append(xp_args, xp); + + /* Search pagefile option in extra flags */ + need_pagefile = 1; + for (i = 0; xpe && xpe[i]; i++) { + if (strcmp(xpe[i], "-f") == 0) { + need_pagefile = 0; + break; + } + } + /* Append pagefile option if its not in extra flags */ + if (need_pagefile) { + flexarray_append(xp_args, "-f"); + if (b_info->u.hvm.xenpaging_file) + flexarray_append(xp_args, b_info->u.hvm.xenpaging_file); + else + flexarray_append(xp_args, libxl__sprintf(gc, "%s/%s.%u.paging", + libxl_xenpaging_dir_path(), dom_name, domid)); + } + + /* Set maximum amount of memory xenpaging should handle */ + flexarray_append(xp_args, "-m"); + flexarray_append(xp_args, libxl__sprintf(gc, "%d", b_info->max_memkb)); + + /* Append extra args for pager */ + for (i = 0; xpe && xpe[i]; i++) + flexarray_append(xp_args, xpe[i]); + /* Append domid for pager */ + flexarray_append(xp_args, "-d"); + flexarray_append(xp_args, libxl__sprintf(gc, "%u", domid)); + flexarray_append(xp_args, NULL); + args = (char **) flexarray_contents(xp_args); + + /* Initialise logfile */ + libxl_create_logfile(ctx, libxl__sprintf(gc, "xenpaging-%s", dom_name), + &logfile); + logfile_w = open(logfile, O_WRONLY|O_CREAT, 0644); + free(logfile); + null = open("/dev/null", O_RDONLY); + + /* Spawn the child */ + rc = libxl__spawn_spawn(gc, buf_starting->for_spawn, "xenpaging", + libxl_spawner_record_pid, buf_starting); + if (rc < 0) + goto out_close; + if (!rc) { /* inner child */ + setsid(); + /* Finally run xenpaging */ + libxl__exec(null, logfile_w, logfile_w, xp, args); + } + rc = libxl__spawn_confirm_offspring_startup(gc, 5, "xenpaging", path, + "running", buf_starting); +out_close: + close(null); + close(logfile_w); + free(args); +out: + return rc; +} + static int do_domain_create(libxl__gc *gc, libxl_domain_config *d_config, libxl_console_ready cb, void *priv, uint32_t *domid_out, int restore_fd) @@ -633,6 +759,16 @@ static int do_domain_create(libxl__gc *g goto error_out; } + if (d_config->c_info.type == LIBXL_DOMAIN_TYPE_HVM) { + ret = create_xenpaging(gc, d_config->c_info.name, domid, + &d_config->b_info); + if (ret) { + LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, + "Failed to start xenpaging.\n"); + goto error_out; + } + } + *domid_out = domid; return 0; diff -r bf0a7c205687 -r d368cf36d66c tools/libxl/libxl_dom.c --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -127,7 +127,7 @@ int libxl__build_post(libxl__gc *gc, uin if (info->cpuid != NULL) libxl_cpuid_set(ctx, domid, info->cpuid); - ents = libxl__calloc(gc, 12 + (info->max_vcpus * 2) + 2, sizeof(char *)); + ents = libxl__calloc(gc, 14 + (info->max_vcpus * 2) + 2, sizeof(char *)); ents[0] = "memory/static-max"; ents[1] = libxl__sprintf(gc, "%d", info->max_memkb); ents[2] = "memory/target"; @@ -140,9 +140,11 @@ int libxl__build_post(libxl__gc *gc, uin ents[9] = libxl__sprintf(gc, "%"PRIu32, state->store_port); ents[10] = "store/ring-ref"; ents[11] = libxl__sprintf(gc, "%lu", state->store_mfn); + ents[12] = "memory/target-tot_pages"; + ents[13] = libxl__sprintf(gc, "%d", info->tot_memkb); for (i = 0; i < info->max_vcpus; i++) { - ents[12+(i*2)] = libxl__sprintf(gc, "cpu/%d/availability", i); - ents[12+(i*2)+1] = (i && info->cur_vcpus && !(info->cur_vcpus & (1 << i))) + ents[14+(i*2)] = libxl__sprintf(gc, "cpu/%d/availability", i); + ents[14+(i*2)+1] = (i && info->cur_vcpus && !(info->cur_vcpus & (1 << i))) ? "offline" : "online"; } diff -r bf0a7c205687 -r d368cf36d66c tools/libxl/libxl_memory.txt --- a/tools/libxl/libxl_memory.txt +++ b/tools/libxl/libxl_memory.txt @@ -1,28 +1,28 @@ /* === Domain memory breakdown: HVM guests ================================= - + +----------+ + - | | shadow | | - | +----------+ | - overhead | | extra | | - | | external | | - | +----------+ + | - | | extra | | | - | | internal | | | - + +----------+ + | | footprint - | | video | | | | - | +----------+ + + | | xen | - | | | | | | actual | maximum | - | | | | | | target | | - | | guest | | | build | | | - | | | | | start | | | - static | | | | | | | | - maximum | +----------+ | + + + + - | | | | - | | | | - | | balloon | | build - | | | | maximum - | | | | - + +----------+ + + + +----------+ + + | | shadow | | + | +----------+ | + overhead | | extra | | + | | external | | + | +----------+ + | + | | extra | | | + | | internal | | | + + +----------+ + | | footprint + | | video | | | | + | +----------+ + + + | | xen | + | | | | guest OS | | | actual | maximum | + | | guest | | real RAM | | | target | | + | | | | | | build | | | + | |----------+ + | | start + | | + static | | paging | | | | | + maximum | +----------+ | + + + + | | | | + | | | | + | | balloon | | build + | | | | maximum + | | | | + + +----------+ + extra internal = LIBXL_MAXMEM_CONSTANT @@ -34,6 +34,17 @@ libxl_domain_setmaxmem -> xen maximum libxl_set_memory_target -> actual target + build maximum = RAM as seen inside the virtual machine + Guest OS has to configure itself for this amount of memory + Increase/Decrease via memory hotplug of virtual hardware. + xl mem-max + build start = RAM usable by the guest OS + Guest OS sees balloon driver as memory hog + Increase/Decrease via commands to the balloon driver + xl mem-set + actual target = RAM allocated for the guest + Increase/Decrease via commands to paging daemon + xl mem-paging_target (?) === Domain memory breakdown: PV guests ================================= diff -r bf0a7c205687 -r d368cf36d66c tools/libxl/libxl_types.idl --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -207,6 +207,7 @@ libxl_domain_build_info = Struct("domain ("tsc_mode", libxl_tsc_mode), ("max_memkb", uint32), ("target_memkb", uint32), + ("tot_memkb", uint32), ("video_memkb", uint32), ("shadow_memkb", uint32), ("disable_migrate", bool), @@ -240,6 +241,8 @@ libxl_domain_build_info = Struct("domain ("vpt_align", bool), ("timer_mode", libxl_timer_mode), ("nested_hvm", bool), + ("xenpaging_file", string), + ("xenpaging_extra", libxl_string_list), ("no_incr_generationid", bool), ("nographic", bool), ("stdvga", bool), diff -r bf0a7c205687 -r d368cf36d66c tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -508,6 +508,28 @@ vcpp_out: return rc; } +static void parse_xenpaging_extra(const XLU_Config *config, libxl_string_list *xpe) +{ + XLU_ConfigList *args; + libxl_string_list l; + const char *val; + int nr_args = 0, i; + + if (xlu_cfg_get_list(config, "xenpaging_extra", &args, &nr_args, 1)) + return; + + l = xmalloc(sizeof(char*)*(nr_args + 1)); + if (!l) + return; + + l[nr_args] = NULL; + for (i = 0; i < nr_args; i++) { + val = xlu_cfg_get_listitem(args, i); + l[i] = val ? strdup(val) : NULL; + } + *xpe = l; +} + static void parse_config_data(const char *configfile_filename_report, const char *configfile_data, int configfile_len, @@ -629,6 +651,9 @@ static void parse_config_data(const char if (!xlu_cfg_get_long (config, "maxmem", &l, 0)) b_info->max_memkb = l * 1024; + if (!xlu_cfg_get_long (config, "mem_target_paging", &l, 0)) + b_info->tot_memkb = l * 1024; + if (xlu_cfg_get_string (config, "on_poweroff", &buf, 0)) buf = "destroy"; if (!parse_action_on_shutdown(buf, &d_config->on_poweroff)) { @@ -747,6 +772,10 @@ static void parse_config_data(const char if (!xlu_cfg_get_long (config, "nestedhvm", &l, 0)) b_info->u.hvm.nested_hvm = l; + + xlu_cfg_replace_string (config, "xenpaging_file", &b_info->u.hvm.xenpaging_file, 0); + parse_xenpaging_extra(config, &b_info->u.hvm.xenpaging_extra); + break; case LIBXL_DOMAIN_TYPE_PV: { diff -r bf0a7c205687 -r d368cf36d66c tools/xenpaging/xenpaging.c --- a/tools/xenpaging/xenpaging.c +++ b/tools/xenpaging/xenpaging.c @@ -39,6 +39,8 @@ /* Defines number of mfns a guest should use at a time, in KiB */ #define WATCH_TARGETPAGES "memory/target-tot_pages" +/* Defines path to startup confirmation */ +#define WATCH_STARTUP "xenpaging/state" static char *watch_target_tot_pages; static char *dom_path; static char watch_token[16]; @@ -845,6 +847,20 @@ static int evict_pages(struct xenpaging return num; } +static void xenpaging_confirm_startup(struct xenpaging *paging) +{ + xc_interface *xch = paging->xc_handle; + char *path; + int len; + + len = asprintf(&path, "%s/%s", dom_path, WATCH_STARTUP); + if ( len < 0 ) + return; + DPRINTF("confirming startup in %s\n", path); + xs_write(paging->xs_handle, XBT_NULL, path, "running", strlen("running")); + free(path); +} + int main(int argc, char *argv[]) { struct sigaction act; @@ -880,6 +896,9 @@ int main(int argc, char *argv[]) /* listen for page-in events to stop pager */ create_page_in_thread(paging); + /* Confirm startup to caller */ + xenpaging_confirm_startup(paging); + /* Swap pages in and out */ while ( 1 ) {
On Thu, 2012-02-16 at 07:47 +0000, Olaf Hering wrote:> # HG changeset patch > # User Olaf Hering <olaf@aepfle.de> > # Date 1329378376 -3600 > # Node ID d368cf36d66c1e8df60bd0a4868c171b6a929edc > # Parent bf0a7c205687857a8f8d3bd3841654ed61828193 > RFC: initial libxl support for xenpaging > > After the previous discussion about integration of xenpaging into xl/libxl it > was not clear to me wether my proposal as a whole or only parts of it were > rejected. So here is my current understanding of the comments I received. > > Add initial support to libxl to start xenpaging for a HVM guest. > These are the considerations: > - a knob in domU.cfg is needed to start xenpaging > - xenpaging needs a target in KiB in "memory/target-tot_pages" > -> the knob should be the target value in MiB: mem_target_paging=NUM > if the value is 0, xenpaging is not startedWasn''t the plan that the knob exported by xl should be a boolean and that libxl should have the full value in its API, or am I misremembering? IOW at the libxl layer we have the full semantics available to callers but at the xl layer we only expose one "target memory" value to users which we expect the guest to use ballooning to reach but which we "enforce" with paging if they don''t comply. [...]> - I have some ideas to add runtime tuneables for xenpaging. Should there be a > "xl xenpaging_ctrl tuneable_name value" command, or should it be done with a > new tool xenpaging_ctrl? If the latter, the proposed ''xl mem-target-*'' > commands are not needed and this new helper could also adjust > "memory/target-tot_pages".I think users would reasonably expect to always interact with the paging daemon via the toolstack, at least for normal non-debug operations, and so the paging daemon should not have any public interface. Ian.
On Fri, Feb 17, Ian Campbell wrote:> On Thu, 2012-02-16 at 07:47 +0000, Olaf Hering wrote: > > # HG changeset patch > > # User Olaf Hering <olaf@aepfle.de> > > # Date 1329378376 -3600 > > # Node ID d368cf36d66c1e8df60bd0a4868c171b6a929edc > > # Parent bf0a7c205687857a8f8d3bd3841654ed61828193 > > RFC: initial libxl support for xenpaging > > > > After the previous discussion about integration of xenpaging into xl/libxl it > > was not clear to me wether my proposal as a whole or only parts of it were > > rejected. So here is my current understanding of the comments I received. > > > > Add initial support to libxl to start xenpaging for a HVM guest. > > These are the considerations: > > - a knob in domU.cfg is needed to start xenpaging > > - xenpaging needs a target in KiB in "memory/target-tot_pages" > > -> the knob should be the target value in MiB: mem_target_paging=NUM > > if the value is 0, xenpaging is not started > > Wasn''t the plan that the knob exported by xl should be a boolean and > that libxl should have the full value in its API, or am I > misremembering?That was not clear to me, thats why I''m asking again.> IOW at the libxl layer we have the full semantics available to callers > but at the xl layer we only expose one "target memory" value to users > which we expect the guest to use ballooning to reach but which we > "enforce" with paging if they don''t comply.So if I understand that right there should be a new boolean, like xenpaging=yes/no? In the .cfg it can be either like this, which appears to mean the pager is started but has no target (or target is N itself): memory=N xenpaging=yes Or it could be like this (in which case the pager currently cant start due to lack of PoD support): memory=N maxmem=N+X xenpaging=yes In both cases "xl mem-set" will adjust both ballooning and paging values? Should there be xl commands to adjust just ballooning and/or paging? Regarding the tuning knobs, right now I can only think of the policy mru size and the number of evicts before checking for new events. So you propose to have something like "xl xenpaging domid knob value"? Olaf
On Fri, 2012-02-17 at 14:25 +0000, Olaf Hering wrote:> On Fri, Feb 17, Ian Campbell wrote: > > IOW at the libxl layer we have the full semantics available to callers > > but at the xl layer we only expose one "target memory" value to users > > which we expect the guest to use ballooning to reach but which we > > "enforce" with paging if they don''t comply. > > So if I understand that right there should be a new boolean, like > xenpaging=yes/no?Yes. Although you can probably omit the "xen" prefix in a Xen configuration file. [...]> Or it could be like this (in which case the pager currently cant start > due to lack of PoD support): > > memory=N > maxmem=N+X > xenpaging=yesThis is the style I was thinking of. Paging needs to either be compatible with or replace PoD IMHO (or perhaps simply be mutually exclusive with it as an interim solution, i.e. paging=yes disables pod).> In both cases "xl mem-set" will adjust both ballooning and paging values?Yes.> Should there be xl commands to adjust just ballooning and/or paging?Perhaps as debuging aids or something but I wouldn''t expect these to be commands which we would suggest end users needed to touch. The interactions between these different knobs and the semantics of changing just one or the other would be pretty complex. The only additional nob I can see being useful would be a minmem option which is the smallest amount of memory which the guest should think it has, i.e. it would be a lower bound on the ballooning target but not the paging target. The default would be some fraction of maxmem (similar to the minimum_target stuff in linux-2.6.18-xen.hg:drivers/xen/balloon/). This would be used to reduce the memory used by a domain past the point at which it would start OOMing etc.> Regarding the tuning knobs, right now I can only think of the policy mru > size and the number of evicts before checking for new events. > So you propose to have something like "xl xenpaging domid knob value"?The other option would be xl paging-knob-set domid value We seem to have commands in both forms already. Ian.
On Fri, Feb 17, Ian Campbell wrote:> Paging needs to either be compatible with or replace PoD IMHO (or > perhaps simply be mutually exclusive with it as an interim solution, > i.e. paging=yes disables pod).This has to be fixed in hypervisor at some point. PoD as such is very useful, and I expect adding the cooperation with paging is not very hard. It just takes time.> > Should there be xl commands to adjust just ballooning and/or paging? > > Perhaps as debuging aids or something but I wouldn''t expect these to be > commands which we would suggest end users needed to touch. The > interactions between these different knobs and the semantics of changing > just one or the other would be pretty complex.Where do you see the complexity? The balloon driver does not notice that the guest is paged, and xenpaging itself can cope with ballooning. Giving the host admin the choice to page more (at the expense of some IO and slight slowdown) without caring about the current memory constraints within the guest sounds useful to me.> The only additional nob I can see being useful would be a minmem option > which is the smallest amount of memory which the guest should think it > has, i.e. it would be a lower bound on the ballooning target but not the > paging target. The default would be some fraction of maxmem (similar to > the minimum_target stuff in linux-2.6.18-xen.hg:drivers/xen/balloon/). > This would be used to reduce the memory used by a domain past the point > at which it would start OOMing etc.Sounds useful to prevent accidents. Olaf
On Fri, 2012-02-17 at 15:24 +0000, Olaf Hering wrote:> On Fri, Feb 17, Ian Campbell wrote: > > > Paging needs to either be compatible with or replace PoD IMHO (or > > perhaps simply be mutually exclusive with it as an interim solution, > > i.e. paging=yes disables pod). > > This has to be fixed in hypervisor at some point. PoD as such is very > useful, and I expect adding the cooperation with paging is not very > hard. It just takes time. > > > > Should there be xl commands to adjust just ballooning and/or paging? > > > > Perhaps as debuging aids or something but I wouldn''t expect these to be > > commands which we would suggest end users needed to touch. The > > interactions between these different knobs and the semantics of changing > > just one or the other would be pretty complex. > > Where do you see the complexity?For users in determining what changing a given value will actually do, what values they need to use to get the behaviour which they want, what happens if they change first one value and then the other, how do they interact with mem-set and memmax, what are the various invalid or meaningless combinations etc. There''s also complexity for us in trying to decide what the right answer is to each of those questions and trying to implement it in a consistent way and explain it all to users.> The balloon driver does not notice that > the guest is paged, and xenpaging itself can cope with ballooning. > > Giving the host admin the choice to page more (at the expense of some IO > and slight slowdown) without caring about the current memory constraints > within the guest sounds useful to me.The "minmem" suggestion allows exactly this without exposing the user to too much of the underlying implementation details.> > The only additional nob I can see being useful would be a minmem option > > which is the smallest amount of memory which the guest should think it > > has, i.e. it would be a lower bound on the ballooning target but not the > > paging target. The default would be some fraction of maxmem (similar to > > the minimum_target stuff in linux-2.6.18-xen.hg:drivers/xen/balloon/). > > This would be used to reduce the memory used by a domain past the point > > at which it would start OOMing etc. > > Sounds useful to prevent accidents. > > Olaf
On Fri, Feb 17, Ian Campbell wrote:> On Fri, 2012-02-17 at 15:24 +0000, Olaf Hering wrote: > > Where do you see the complexity? > > For users in determining what changing a given value will actually do, > what values they need to use to get the behaviour which they want, what > happens if they change first one value and then the other, how do they > interact with mem-set and memmax, what are the various invalid or > meaningless combinations etc. > > There''s also complexity for us in trying to decide what the right answer > is to each of those questions and trying to implement it in a consistent > way and explain it all to users.A figure like the one in tools/libxl/libxl_memory.txt will explain it very well, if done right. Perhaps I should write up something based on that figure. Olaf
On Fri, 2012-02-17 at 15:43 +0000, Olaf Hering wrote:> On Fri, Feb 17, Ian Campbell wrote: > > > On Fri, 2012-02-17 at 15:24 +0000, Olaf Hering wrote: > > > Where do you see the complexity? > > > > For users in determining what changing a given value will actually do, > > what values they need to use to get the behaviour which they want, what > > happens if they change first one value and then the other, how do they > > interact with mem-set and memmax, what are the various invalid or > > meaningless combinations etc. > > > > There''s also complexity for us in trying to decide what the right answer > > is to each of those questions and trying to implement it in a consistent > > way and explain it all to users. > > A figure like the one in tools/libxl/libxl_memory.txt will explain it > very well, if done right.That''s exactly the sort of complexity we should not be exposing to the end user! It''s bad enough us developers trying to keep it all the moving parts straight in our heads...> Perhaps I should write up something based on that figure. > > Olaf
On Fri, Feb 17, Ian Campbell wrote:> That''s exactly the sort of complexity we should not be exposing to the > end user!Of course not as is! Right now one has to understand two values, maxmem and the size of the memhog called "guest balloon driver". Paging adds a third one. Olaf
On Fri, 2012-02-17 at 16:03 +0000, Olaf Hering wrote:> On Fri, Feb 17, Ian Campbell wrote: > > > That''s exactly the sort of complexity we should not be exposing to the > > end user! > > Of course not as is! > Right now one has to understand two values, maxmem and the size of the > memhog called "guest balloon driver". Paging adds a third one.Right, but that third one should not be "paging size" or anything like that, just like they should not really have to understand what "a memhog called "guest balloon driver"" is. Even today the thing we actually expose is just called "memory" in the configuration. The user cares about three things: 1. The maximum amount of memory a guest can use. 2. The amount of memory which the guest thinks it has. 3. The actual amount of memory the guest has The fact that these are implemented by some combination of paging and/or ballooning is not really of interest to the user. They only need to be aware that if #3 < #2 then there is a performance cost and that even if #2==#3 a guest which tries to use more memory than that will be suffer a performance penalty. My point is that the memory knobs should be exposed to the user of xl as semantically meaningful options without the need to refer to "the paging target" or "the balloon target" which is why there should not IMHO be "xl commands to adjust just ballooning and/or paging". Ian.
Andres Lagar-Cavilla
2012-Feb-17 16:55 UTC
Re: [PATCH] RFC: initial libxl support for xenpaging
> Date: Fri, 17 Feb 2012 16:43:18 +0000 > From: Ian Campbell <Ian.Campbell@citrix.com> > To: Olaf Hering <olaf@aepfle.de> > Cc: "xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com> > Subject: Re: [Xen-devel] [PATCH] RFC: initial libxl support for > xenpaging > Message-ID: <1329496998.3131.131.camel@zakaz.uk.xensource.com> > Content-Type: text/plain; charset="UTF-8" > > On Fri, 2012-02-17 at 16:03 +0000, Olaf Hering wrote: >> On Fri, Feb 17, Ian Campbell wrote: >> >> > That''s exactly the sort of complexity we should not be exposing to the >> > end user! >> >> Of course not as is! >> Right now one has to understand two values, maxmem and the size of the >> memhog called "guest balloon driver". Paging adds a third one. > > Right, but that third one should not be "paging size" or anything like > that, just like they should not really have to understand what "a memhog > called "guest balloon driver"" is. Even today the thing we actually > expose is just called "memory" in the configuration. > > The user cares about three things: > 1. The maximum amount of memory a guest can use. > 2. The amount of memory which the guest thinks it has. > 3. The actual amount of memory the guest has > > The fact that these are implemented by some combination of paging and/or > ballooning is not really of interest to the user. They only need to be > aware that if #3 < #2 then there is a performance cost and that even if > #2==#3 a guest which tries to use more memory than that will be suffer a > performance penalty. > > My point is that the memory knobs should be exposed to the user of xl as > semantically meaningful options without the need to refer to "the paging > target" or "the balloon target" which is why there should not IMHO be > "xl commands to adjust just ballooning and/or paging". >May I suggest you call these things ''memory'' and ''footprint''? Users can adjust the memory the guest thinks it has (#2 above), and the footprint the domain will actually occupy (#3). Then footprint can be adjusted by paging or something else. I don''t want to begin to think about how PoD would jive there. It only seems to affect ''footprint'' during bootup. Andres> Ian. >
On Fri, 2012-02-17 at 16:55 +0000, Andres Lagar-Cavilla wrote:> May I suggest you call these things ''memory'' and ''footprint''? Users can > adjust the memory the guest thinks it has (#2 above), and the footprint > the domain will actually occupy (#3).Footprint isn''t a bad idea for that name.> Then footprint can be adjusted by paging or something else. I don''t want > to begin to think about how PoD would jive there. It only seems to affect > ''footprint'' during bootup.Yeah, it''s basically a special case pager. Ian.
On Fri, Feb 17, 2012 at 4:43 PM, Ian Campbell <Ian.Campbell@citrix.com> wrote:> My point is that the memory knobs should be exposed to the user of xl as > semantically meaningful options without the need to refer to "the paging > target" or "the balloon target" which is why there should not IMHO be > "xl commands to adjust just ballooning and/or paging".I disagree with this. I agree completely that the default interface should just be, "How much memory is my VM using", and balloon drivers or paging shouldn''t have to be of concern to them. But there are times when admins will need to dig in deeper and play with specific values. We should make the common case simple, and the complicated case possible. What about the following interface: maxmem=X memory=M xenpaging=[off|on|delay] pagingdelay=60 pagingdelayboot=180 xl mem-set domain M xenpaging off: Set balloon target to M xenpaging on: Set paging target to M xenpaging delay: Set balloon target to M, and wait for actual memory to reach M. If it hasn''t reached it by $paging_delay seconds, set balloon target to M. xl mem-balloon-set domain M Set balloon target to M xl mem-paging-set domain M Set paging target to M Start-of-day: xenpaging off: Set balloon target to M, use PoD xenpaging on: ?? xenpaging delay: Set balloon target to M, use PoD. Wait $pagingdelayboot seconds, if target not reached, set paging? -George
On Mon, Feb 20, George Dunlap wrote:> Start-of-day: > xenpaging off: Set balloon target to M, use PoD > xenpaging on: ?? > xenpaging delay: Set balloon target to M, use PoD. Wait > $pagingdelayboot seconds, if target not reached, set paging?Is the delay required? If paging and PoD target is M, xenpaging will do nothing because the guest can not exceed M (it will crash with OOM). Olaf
On Mon, Feb 20, 2012 at 11:12 AM, Olaf Hering <olaf@aepfle.de> wrote:> On Mon, Feb 20, George Dunlap wrote: > >> Start-of-day: >> xenpaging off: Set balloon target to M, use PoD >> xenpaging on: ?? >> xenpaging delay: Set balloon target to M, use PoD. Wait >> $pagingdelayboot seconds, if target not reached, set paging? > > Is the delay required? > If paging and PoD target is M, xenpaging will do nothing because the > guest can not exceed M (it will crash with OOM).Ah, of course -- you don''t need paging because it already has M memory. Forgot about that. It would be nice, of course, if the pager could act as a back-fill for these PoD pages; but that''s another project, I think. So that leaves us with: maxmem=X memory=M xenpaging=[off|on|delay] pagingdelay=60 xl mem-set domain M xenpaging off: Set balloon target to M xenpaging on: Set paging target to M xenpaging delay: Set balloon target to M, and wait for actual memory to reach M. If it hasn''t reached it by $paging_delay seconds, set balloon target to M. xl mem-balloon-set domain M Set balloon target to M xl mem-paging-set domain M Set paging target to M Start-of-day: xenpaging off/delay: Set balloon target to M, use PoD xenpaging on: ?? Olaf, what do you do right now for booting a guest in paging mode with memory < maxmem? -George
On Mon, Feb 20, George Dunlap wrote:> Olaf, what do you do right now for booting a guest in paging mode with > memory < maxmem?xenpaging exits because XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE returns -EXDEV when PoD is detected. Olaf
On Mon, Feb 20, George Dunlap wrote:> xl mem-set domain M > xenpaging off: Set balloon target to M > xenpaging on: Set paging target to M > xenpaging delay: Set balloon target to M, and wait for actual memory > to reach M. If it hasn''t reached it by $paging_delay seconds, set > balloon target to M.The tristate instead of a boolean is not really needed as well. Right now a reduction of the balloon target depends on how fast the guests balloon driver can claim memory from the guest OS. This means it could take an infinite amount of time to reach the requested target. With paging the request to reduce the "footprint" can be reached as fast as xenpaging can page-out gfns (and page-in busy gfns). So having a delay or not seems to depend on how mem-set is supposed to react, either now or at an (kind of) undefined time in the future. If "now" is not the desired mode then xenpaging can slowly work toward a new lower target by calling its evict_pages() with low numbers and and an adjustable delay between calls. Olaf
On Mon, 2012-02-20 at 15:38 +0000, Olaf Hering wrote:> On Mon, Feb 20, George Dunlap wrote: > > > xl mem-set domain M > > xenpaging off: Set balloon target to M > > xenpaging on: Set paging target to M > > xenpaging delay: Set balloon target to M, and wait for actual memory > > to reach M. If it hasn''t reached it by $paging_delay seconds, set > > balloon target to M. > > The tristate instead of a boolean is not really needed as well. > > Right now a reduction of the balloon target depends on how fast the > guests balloon driver can claim memory from the guest OS. This means it > could take an infinite amount of time to reach the requested target. > With paging the request to reduce the "footprint" can be reached as fast > as xenpaging can page-out gfns (and page-in busy gfns). > > So having a delay or not seems to depend on how mem-set is supposed to > react, either now or at an (kind of) undefined time in the future. > > If "now" is not the desired mode then xenpaging can slowly work toward a > new lower target by calling its evict_pages() with low numbers and and > an adjustable delay between calls.I don''t think this is a good option. Consider the following cases: 1. The guest is cooperative and the balloon driver is active, but it will take 30s to reach the target. 2. The guest is uncooperative, or the balloon driver is inactive / broken / stuck / unable to get pages. In case 1, the paging driver will end up paging a bunch of pages out for 30s, only to page them back in again after the balloon driver reaches its target. Also, you''re basically guaranteed to hit the double-paging problem: the pager will find an old page and page it out; then in the very near future, the guest OS will decide to page *that very page* out, touch it (causing the pager to page it back in), then write it out to disk itself -- causing at least 3 page writes per page paged out. This will unnecessarily slow the system down (and the balloon driver down) and waste IO bandwidth. The degree to which the pager messes things up will depend on how fast you make it. Fast => messes up a lot slow => doesn''t mess much up. So slow is good in this case. In case 2, the time it takes to reach the target will depend entirely on the rate at which you page things out. Slow => takes a long time, fast => happens relatively quicly. So slow is bad in this case. So no matter what we choose, something is really bad. Under the "delay" option, 1 will behave optimally -- no wasted I/O, no double paging; and 2 will take a fixed amount of time before going full-bore; so it is, I believe, better in both cases. You don''t have to implement the "delay" option if you don''t want to; as long as we expose the low-level controls, the administrator can do the "fall-back" himself, and then someone (possibly me) can implement something like that in the future (either before or after the 4.2 release). However, there should be a way that paging can be on but "xl mem-set" will set the balloon target. If we have "paging=manual", that could mean, "I''ll start the pager daemon, but you''ll have to call the xl mem-set-paging-target yourself." -George
On Mon, Feb 20, 2012 at 3:19 PM, Olaf Hering <olaf@aepfle.de> wrote:> On Mon, Feb 20, George Dunlap wrote: > >> Olaf, what do you do right now for booting a guest in paging mode with >> memory < maxmem? > > xenpaging exits because XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE returns > -EXDEV when PoD is detected.OK. So we have two options, as I see it, if someone starts a guest with both xenpaging=yes and memory<maxmem: * Refuse to build the domain and return an error. * Figure out how to boot with memory<maxmem only with the pager. I guess this would involve building with half the memory, and telling the pager that some pages are pre-paged out...? If the second option is too much work before the release, we should go with the first option. -George
On Mon, 2012-02-20 at 10:44 +0000, George Dunlap wrote:> On Fri, Feb 17, 2012 at 4:43 PM, Ian Campbell <Ian.Campbell@citrix.com> wrote: > > > My point is that the memory knobs should be exposed to the user of xl as > > semantically meaningful options without the need to refer to "the paging > > target" or "the balloon target" which is why there should not IMHO be > > "xl commands to adjust just ballooning and/or paging". > > I disagree with this. I agree completely that the default interface > should just be, "How much memory is my VM using", and balloon drivers > or paging shouldn''t have to be of concern to them. But there are > times when admins will need to dig in deeper and play with specific > values. We should make the common case simple, and the complicated > case possible.yes, that seems reasonable. My main concern was that we appeared to be about to expose the complicated case as the primary interface that we would recommend people use in the common case / by default. I guess that''s what I was really trying to say when I said these advanced options should be debug only (i.e. for debug read "advanced" too). Ian.
On Mon, 2012-02-20 at 14:48 +0000, George Dunlap wrote:> On Mon, Feb 20, 2012 at 11:12 AM, Olaf Hering <olaf@aepfle.de> wrote: > > On Mon, Feb 20, George Dunlap wrote: > > > >> Start-of-day: > >> xenpaging off: Set balloon target to M, use PoD > >> xenpaging on: ?? > >> xenpaging delay: Set balloon target to M, use PoD. Wait > >> $pagingdelayboot seconds, if target not reached, set paging? > > > > Is the delay required? > > If paging and PoD target is M, xenpaging will do nothing because the > > guest can not exceed M (it will crash with OOM). > > Ah, of course -- you don''t need paging because it already has M > memory. Forgot about that. > > It would be nice, of course, if the pager could act as a back-fill for > these PoD pages; but that''s another project, I think. > > So that leaves us with: > > maxmem=X > memory=M > xenpaging=[off|on|delay] > pagingdelay=60FWIW these two can be expressed as: xenpaging=[off|on] pagingdelay=[0|60] (and lets drop the "xen" prefix) [...]> xl mem-set domain M > xenpaging off: Set balloon target to M > xenpaging on: Set paging target to M > xenpaging delay: Set balloon target to M, and wait for actual memory > to reach M. If it hasn''t reached it by $paging_delay seconds, set > balloon target to M.Did you mean "paging target" the second time you said "balloon target" in this one? I''ll assume so. I would also suggest s/If it hasn''t reached it by/After/ since I think that will simplify things somewhat and setting page target to M makes no odds if the guest has ballooned to M. I don''t really like mem-set having such completely different behaviour depending on whether paging is on or off. As you described before having paging on == set paging and balloon target to M results in fairly suboptimal behaviour and the name would also lead to people thinking it is the one they should use. So why not make the "on" case the same as your "delay" case and do away with the distinction? If advanced users really want what you describe as "on" then they can set the delay to 0. If the paging daemon could be start/stopped on demand (rather than being a domain build time choice) we could even consider making paging the default.> xl mem-balloon-set domain M > Set balloon target to M > xl mem-paging-set domain M > Set paging target to MHow do these interact with mem-set. Especially in the delay case? e.g. would mem-paging-set disable the after delay behaviour of mem-set? Should we have "mem-paging-set domain auto" to turn that back on? We also need to consider the behaviour of mem-set to increase things. Obviously you don''t want to leave paging target set to the smaller value for a minute after setting the balloon target. I think we want to set it straight away in that case, if not before setting the balloon. How about the following? I''ve tried to include the "user facing" description as well as the actual implementation. I think the "user facing" portion is actually where we disagree but I also suspect that we may not actually disagree -- it''s just that we are talking in terms of implementation so we don''t see that the user facing interface is the same in what we are each thinking of ;-) maxmem=X # maximum RAM the domain can ever see memory=M # current amount of RAM seen by the domain paging=[off|on] # allow the amount of memory a guest # thinks it has to differ from the # amount actually available to it (its # "footprint") pagingauto=[off|on] (dflt=on) # enable automatic enforcement of # "footprint" for guests which do not # voluntarily obey changes to memory=M pagingdelay=60 # amount of time to give a guest to # voluntarily comply before enforcing a # footprint xl mem-set domain M Sets the amount of RAM which the guest believes it has available to M. The guest should arrange to use only that much RAM and return the rest to the hypervisor (e.g. by using a balloon driver). If the guest does not do so then the host may use technical means to enforce the guest''s footprint of M. The guest may suffer a performance penalty for this enforcement. paging off: set balloon target to M. paging on: set balloon target to M. if pagingauto: wait delay IFF new target < old set paging target to M support -t <delay> to override default? xl mem-paging-set domain N Overrides the amount of RAM which the guest actually has available (its "footprint") to N. The host will use technical means to continue to provide the illusion to the guest that it has memory=M (as adjusted by mem-set). There may be a performance penalty for this. paging off: error paging on: set paging target set pagingauto=off xl mem-paging-set domain auto Automatically manage paging. Request that the guest uses memory=M (current value of memory, as adjusted by mem-set) enforced when the guest is uncooperative (as described in "mem-set") paging off: error paging on: set paging target to M set pagingauto=on No need for a separate balloon-set since that == mem-set with pagingauto=off. Perhaps a separate "mem-paging-set domain manual" would be handy to enable that mode without having to remember M so you can use it as N We could consider making "mem-paging-set domain N" fail with an error unless you previously set manual, to prevent users accidentally disabling the recommended automatic behaviour e.g. by typing mem-paging-set when they mean mem-set. I liked Andres'' suggestions of footprint as a term here BTW so I would prefer "mem-footprint-set" to "mem-paging-set" (at least I think so, I''m not 100% on that). If we don''t have balloon-set then avoiding the name paging seems like a good idea too. Other possible names might be "mem-override-set" or something. I don''t really like the extra configuration option for pagingauto but I think pagingauto and mem-{paging,footprint}-set should be considered advanced options and by default we would recommend that folks just set "paging=on" and use mem-set. It should be reasonably clear to users that if they disable auto mode then they are expected to understand what is happening sufficiently to make their own choices about paging targets etc. We can probably think of more useful algorithms than raw pagingdelay (i.e. based on rate of progress or something) which might be useful for larger domains making large changes to the balloon -- lets leave that aside for now though. Likewise "auto" mode allows scope for us to implement improved algorithms in the future. Ian.
On Tue, Feb 21, Ian Campbell wrote:> If the paging daemon could be start/stopped on demand (rather than being > a domain build time choice) we could even consider making paging the > default.It would be nice to allow starting paging on demand by xl mem-paging-set. xl could watch memory/target-tot_pages, if it changes and the pager wasnt started during creation, it could start it on request. Olaf
On Tue, 2012-02-21 at 11:27 +0000, Olaf Hering wrote:> On Tue, Feb 21, Ian Campbell wrote: > > > If the paging daemon could be start/stopped on demand (rather than being > > a domain build time choice) we could even consider making paging the > > default. > > It would be nice to allow starting paging on demand by xl mem-paging-set. > xl could watch memory/target-tot_pages, if it changes and the pager > wasnt started during creation, it could start it on request.Does it need the watch? Can''t the xl which is doing the "mem-set" (or whichever) just start the pager? Ian.
On Tue, 2012-02-21 at 09:59 +0000, Ian Campbell wrote:> On Mon, 2012-02-20 at 14:48 +0000, George Dunlap wrote: > > On Mon, Feb 20, 2012 at 11:12 AM, Olaf Hering <olaf@aepfle.de> wrote: > > > On Mon, Feb 20, George Dunlap wrote: > > > > > >> Start-of-day: > > >> xenpaging off: Set balloon target to M, use PoD > > >> xenpaging on: ?? > > >> xenpaging delay: Set balloon target to M, use PoD. Wait > > >> $pagingdelayboot seconds, if target not reached, set paging? > > > > > > Is the delay required? > > > If paging and PoD target is M, xenpaging will do nothing because the > > > guest can not exceed M (it will crash with OOM). > > > > Ah, of course -- you don''t need paging because it already has M > > memory. Forgot about that. > > > > It would be nice, of course, if the pager could act as a back-fill for > > these PoD pages; but that''s another project, I think. > > > > So that leaves us with: > > > > maxmem=X > > memory=M > > xenpaging=[off|on|delay] > > pagingdelay=60 > > FWIW these two can be expressed as: > xenpaging=[off|on] > pagingdelay=[0|60] > > (and lets drop the "xen" prefix) > > [...] > > xl mem-set domain M > > xenpaging off: Set balloon target to M > > xenpaging on: Set paging target to M > > xenpaging delay: Set balloon target to M, and wait for actual memory > > to reach M. If it hasn''t reached it by $paging_delay seconds, set > > balloon target to M. > > Did you mean "paging target" the second time you said "balloon target" > in this one? I''ll assume so.Er, yes, that''s what I meant. :-)> I would also suggest > s/If it hasn''t reached it by/After/ > since I think that will simplify things somewhat and setting page target > to M makes no odds if the guest has ballooned to M. > > I don''t really like mem-set having such completely different behaviour > depending on whether paging is on or off. > > As you described before having paging on == set paging and balloon > target to M results in fairly suboptimal behaviour and the name would > also lead to people thinking it is the one they should use. > > So why not make the "on" case the same as your "delay" case and do away > with the distinction? If advanced users really want what you describe as > "on" then they can set the delay to 0.The only thing with this is that then the command will by default pause until we reach the target, which may be several seconds. We should make sure to print a message saying that''s what we''re doing, so users don''t get confused.> If the paging daemon could be start/stopped on demand (rather than being > a domain build time choice) we could even consider making paging the > default. > > > xl mem-balloon-set domain M > > Set balloon target to M > > xl mem-paging-set domain M > > Set paging target to M > > How do these interact with mem-set. Especially in the delay case?My idea was that "xl mem-paging-set domain M" would basically be equivalent to xenstore-write /local/[whatever]/[whatever]: that is, no attempt at synchronization. If I do "xl mem-set" with a delay in one window, then do "xl mem-paging-set" in another window, the first one will take effect immediately, then the other one will take effect after the delay. If that''s not what you want, either Ctrl-C the first one or wait for it to finish. :-)> e.g. would mem-paging-set disable the after delay behaviour of mem-set? > Should we have "mem-paging-set domain auto" to turn that back on?That''s one of the reasons I conceived having mem-balloon-set: if you want to switch to full-manual, you just switch to using full-manual commands. If/when you have stuff in a state that the simpler command will work again, you can switch back.> We also need to consider the behaviour of mem-set to increase things. > Obviously you don''t want to leave paging target set to the smaller value > for a minute after setting the balloon target. I think we want to set it > straight away in that case, if not before setting the balloon.Yes; if we''re increasing the target, we should set paging-target immediately.> How about the following? I''ve tried to include the "user facing" > description as well as the actual implementation. I think the "user > facing" portion is actually where we disagree but I also suspect that we > may not actually disagree -- it''s just that we are talking in terms of > implementation so we don''t see that the user facing interface is the > same in what we are each thinking of ;-) > > maxmem=X # maximum RAM the domain can ever see > memory=M # current amount of RAM seen by the domain > paging=[off|on] # allow the amount of memory a guest > # thinks it has to differ from the > # amount actually available to it (its > # "footprint") > pagingauto=[off|on] (dflt=on) # enable automatic enforcement of > # "footprint" for guests which do not > # voluntarily obey changes to memory=M > pagingdelay=60 # amount of time to give a guest to > # voluntarily comply before enforcing a > # footprint > > xl mem-set domain M > Sets the amount of RAM which the guest believes it has available > to M. The guest should arrange to use only that much RAM and > return the rest to the hypervisor (e.g. by using a balloon > driver). If the guest does not do so then the host may use > technical means to enforce the guest''s footprint of M. The guest > may suffer a performance penalty for this enforcement. > > paging off: set balloon target to M. > paging on: set balloon target to M. > if pagingauto: > wait delay IFF new target < old > set paging target to M > support -t <delay> to override default? > > xl mem-paging-set domain N > Overrides the amount of RAM which the guest actually has > available (its "footprint") to N. The host will use technical > means to continue to provide the illusion to the guest that it > has memory=M (as adjusted by mem-set). There may be a > performance penalty for this. > > paging off: error > paging on: set paging target > set pagingauto=off > > xl mem-paging-set domain auto > Automatically manage paging. Request that the guest uses > memory=M (current value of memory, as adjusted by mem-set) > enforced when the guest is uncooperative (as described in > "mem-set") > > paging off: error > paging on: set paging target to M > set pagingauto=on > > No need for a separate balloon-set since that == mem-set with > pagingauto=off. > > Perhaps a separate "mem-paging-set domain manual" would be handy to > enable that mode without having to remember M so you can use it as NI''d be OK with this.> We could consider making "mem-paging-set domain N" fail with an error > unless you previously set manual, to prevent users accidentally > disabling the recommended automatic behaviour e.g. by typing > mem-paging-set when they mean mem-set. > > I liked Andres'' suggestions of footprint as a term here BTW so I would > prefer "mem-footprint-set" to "mem-paging-set" (at least I think so, I''m > not 100% on that). If we don''t have balloon-set then avoiding the name > paging seems like a good idea too. Other possible names might be > "mem-override-set" or something.Well for one, "footprint" to me would imply "I don''t care how you got there, just make it take this much memory". So saying in the docs that "xl mem-set" would attempt to set the memory footprint would be OK. But I definitely don''t think we should use "footprint" to mean "paging target". Even apart from the fact that the name to me means something else, the config options are called "paging". In any case, if it''s supposed to be an "advanced feature", it should be OK to expect the user to know (or find out) what paging means.> I don''t really like the extra configuration option for pagingauto but I > think pagingauto and mem-{paging,footprint}-set should be considered > advanced options and by default we would recommend that folks just set > "paging=on" and use mem-set. It should be reasonably clear to users that > if they disable auto mode then they are expected to understand what is > happening sufficiently to make their own choices about paging targets > etc. > > We can probably think of more useful algorithms than raw pagingdelay > (i.e. based on rate of progress or something) which might be useful for > larger domains making large changes to the balloon -- lets leave that > aside for now though. Likewise "auto" mode allows scope for us to > implement improved algorithms in the future.
On Tue, Feb 21, Ian Campbell wrote:> On Tue, 2012-02-21 at 11:27 +0000, Olaf Hering wrote: > > On Tue, Feb 21, Ian Campbell wrote: > > > > > If the paging daemon could be start/stopped on demand (rather than being > > > a domain build time choice) we could even consider making paging the > > > default. > > > > It would be nice to allow starting paging on demand by xl mem-paging-set. > > xl could watch memory/target-tot_pages, if it changes and the pager > > wasnt started during creation, it could start it on request. > > Does it need the watch? Can''t the xl which is doing the "mem-set" (or > whichever) just start the pager?Then xenpaging would belong to the xl monitoring process. But yes, either way is fine with me. Olaf
On Tue, 2012-02-21 at 12:20 +0000, George Dunlap wrote:> On Tue, 2012-02-21 at 09:59 +0000, Ian Campbell wrote:> > So why not make the "on" case the same as your "delay" case and do away > > with the distinction? If advanced users really want what you describe as > > "on" then they can set the delay to 0. > > The only thing with this is that then the command will by default pause > until we reach the target, which may be several seconds. We should make > sure to print a message saying that''s what we''re doing, so users don''t > get confused.I''m not sure this isn''t actually a feature... $ xl mem-set dom 128 xl: setting domain `dom'' memory target to 128M xl: waiting for domain to reach target: 60... 55... 50... (etc) 5... timeout! xl: domain `dom'' did not reach target (reached 156M) xl: enabling paging, setting domain footprint to 128M.> > > xl mem-balloon-set domain M > > > Set balloon target to M > > > xl mem-paging-set domain M > > > Set paging target to M > > > > How do these interact with mem-set. Especially in the delay case? > > My idea was that "xl mem-paging-set domain M" would basically be > equivalent to xenstore-write /local/[whatever]/[whatever]: that is, no > attempt at synchronization. If I do "xl mem-set" with a delay in one > window, then do "xl mem-paging-set" in another window, the first one > will take effect immediately, then the other one will take effect after > the delay. If that''s not what you want, either Ctrl-C the first one or > wait for it to finish. :-) > > > e.g. would mem-paging-set disable the after delay behaviour of mem-set? > > Should we have "mem-paging-set domain auto" to turn that back on? > > That''s one of the reasons I conceived having mem-balloon-set: if you > want to switch to full-manual, you just switch to using full-manual > commands. If/when you have stuff in a state that the simpler command > will work again, you can switch back.What if you switch back without making sure you are in such a state? I think switching between the two is where the potential for unexpected behaviour is most likely.> > > How about the following? I''ve tried to include the "user facing" > > description as well as the actual implementation. I think the "user > > facing" portion is actually where we disagree but I also suspect that we > > may not actually disagree -- it''s just that we are talking in terms of > > implementation so we don''t see that the user facing interface is the > > same in what we are each thinking of ;-) > > > > maxmem=X # maximum RAM the domain can ever see > > memory=M # current amount of RAM seen by the domain > > paging=[off|on] # allow the amount of memory a guest > > # thinks it has to differ from the > > # amount actually available to it (its > > # "footprint") > > pagingauto=[off|on] (dflt=on) # enable automatic enforcement of > > # "footprint" for guests which do not > > # voluntarily obey changes to memory=M > > pagingdelay=60 # amount of time to give a guest to > > # voluntarily comply before enforcing a > > # footprint > > > > xl mem-set domain M > > Sets the amount of RAM which the guest believes it has available > > to M. The guest should arrange to use only that much RAM and > > return the rest to the hypervisor (e.g. by using a balloon > > driver). If the guest does not do so then the host may use > > technical means to enforce the guest''s footprint of M. The guest > > may suffer a performance penalty for this enforcement. > > > > paging off: set balloon target to M. > > paging on: set balloon target to M. > > if pagingauto: > > wait delay IFF new target < old > > set paging target to M > > support -t <delay> to override default? > > > > xl mem-paging-set domain N > > Overrides the amount of RAM which the guest actually has > > available (its "footprint") to N. The host will use technical > > means to continue to provide the illusion to the guest that it > > has memory=M (as adjusted by mem-set). There may be a > > performance penalty for this. > > > > paging off: error > > paging on: set paging target > > set pagingauto=off > > > > xl mem-paging-set domain auto > > Automatically manage paging. Request that the guest uses > > memory=M (current value of memory, as adjusted by mem-set) > > enforced when the guest is uncooperative (as described in > > "mem-set") > > > > paging off: error > > paging on: set paging target to M > > set pagingauto=on > > > > No need for a separate balloon-set since that == mem-set with > > pagingauto=off. > > > > Perhaps a separate "mem-paging-set domain manual" would be handy to > > enable that mode without having to remember M so you can use it as N > > I''d be OK with this.Great. I like that you have to explicitly ask for the safety wheels to come off and explicitly put them back on again. It avoids the corner cases I alluded to above (at least I hope so).> > > We could consider making "mem-paging-set domain N" fail with an error > > unless you previously set manual, to prevent users accidentally > > disabling the recommended automatic behaviour e.g. by typing > > mem-paging-set when they mean mem-set. > > > > I liked Andres'' suggestions of footprint as a term here BTW so I would > > prefer "mem-footprint-set" to "mem-paging-set" (at least I think so, I''m > > not 100% on that). If we don''t have balloon-set then avoiding the name > > paging seems like a good idea too. Other possible names might be > > "mem-override-set" or something. > > Well for one, "footprint" to me would imply "I don''t care how you got > there, just make it take this much memory". So saying in the docs that > "xl mem-set" would attempt to set the memory footprint would be OK. But > I definitely don''t think we should use "footprint" to mean "paging > target". Even apart from the fact that the name to me means something > else, the config options are called "paging". In any case, if it''s > supposed to be an "advanced feature", it should be OK to expect the user > to know (or find out) what paging means.I see what you mean, yes. Without wishing to put words in Andres'' mouth I expect that he intended "footprint" to cover other technical means than paging too -- specifically I expect he was thinking of page sharing. (I suppose it also covers PoD to some extent too, although that is something of a special case) While I don''t expect there will be a knob to control number of shared pages (either you can share some pages or not, the settings would be more about how aggressively you search for sharable pages) it might be useful to consider the interaction between paging and sharing, I expect that most sharing configurations would want to have paging on at the same time (for safety). It seems valid to me to want to say "make the guest use this amount of actual RAM" and to achieve that by sharing what you can and then paging the rest. Ian.
On Thu, 2012-02-23 at 10:42 +0000, Ian Campbell wrote:> What if you switch back without making sure you are in such a state? I > think switching between the two is where the potential for unexpected > behaviour is most likely.Yeah, correctly predicting what would happen requires understanding what mem-set does under the hood.> I like that you have to explicitly ask for the safety wheels to come off > and explicitly put them back on again. It avoids the corner cases I > alluded to above (at least I hope so).Yes, I think your suggestion sounds more like driving a car with a proper hood, and less like driving a go-kart with the engine exposed. :-)> Without wishing to put words in Andres'' mouth I expect that he intended > "footprint" to cover other technical means than paging too -- > specifically I expect he was thinking of page sharing. (I suppose it > also covers PoD to some extent too, although that is something of a > special case) > > While I don''t expect there will be a knob to control number of shared > pages (either you can share some pages or not, the settings would be > more about how aggressively you search for sharable pages) it might be > useful to consider the interaction between paging and sharing, I expect > that most sharing configurations would want to have paging on at the > same time (for safety). It seems valid to me to want to say "make the > guest use this amount of actual RAM" and to achieve that by sharing what > you can and then paging the rest.Yes, it''s worth thinking about; as long as it doesn''t stall the paging UI too long. :-) The thing is, you can''t actually control how much sharing happens. That depends largely on whether the guests create and maintain pages which are share-able, and whether the sharing detection algorithm can find such pages. Even if two guests are sharing 95% of their pages, at any point one of the guests may simply go wild and change them all. So it seems to me that shared pages need to be treated like sunny days in the UK: Enjoy them while they''re there, but don''t count on them. :-) Given that, I think that each VM should have a "guaranteed minimum memory footprint", which would be the amount of actual host ram it will have if suddenly no shared pages become available. After that, there should be a policy of how to use the "windfall" or "bonus" pages generated by sharing. One sensible default policy would be "givers gain": Every guest which creates a page which happens to be shared by another VM gets a share of the pages freed up by the sharing. Another policy might be "communism", where the freed up pages are shared among all VMs, regardless of whose pages made the benefit possible. (In fact, if shared pages come from zero pages, they should probably be given to VMs with no zero pages, regardless of the policy.) However, I''d say the main public "knobs" should be just consist of two things: * xl mem-set memory-target. This is the minimum amount of physical RAM a guest can get; we make sure that the sum of these for all VMs does not exceed the host capacity. * xl sharing-policy [policy]. This tells the sharing system how to use the "windfall" pages gathered from page sharing. Then internally, the sharing system should combine the "minimum footprint" with the number of extra pages and the policy to set the amount of memory actually used (via balloon driver or paging). You could imagine a manual mode, where the administrator shares out the extra pages manually to VMs that he thinks needs them; but because those extra pages may go away at any time, that needs to be a separate knob (and preferably one which most admins don''t ever touch). Andres, what do you think? -George
Andres Lagar-Cavilla
2012-Feb-23 16:22 UTC
Re: [PATCH] RFC: initial libxl support for xenpaging
> On Thu, 2012-02-23 at 10:42 +0000, Ian Campbell wrote: >> What if you switch back without making sure you are in such a state? I >> think switching between the two is where the potential for unexpected >> behaviour is most likely. > > Yeah, correctly predicting what would happen requires understanding what > mem-set does under the hood. > >> I like that you have to explicitly ask for the safety wheels to come off >> and explicitly put them back on again. It avoids the corner cases I >> alluded to above (at least I hope so). > > Yes, I think your suggestion sounds more like driving a car with a > proper hood, and less like driving a go-kart with the engine > exposed. :-) > >> Without wishing to put words in Andres'' mouth I expect that he intended >> "footprint" to cover other technical means than paging too -- >> specifically I expect he was thinking of page sharing. (I suppose it >> also covers PoD to some extent too, although that is something of a >> special case) >> >> While I don''t expect there will be a knob to control number of shared >> pages (either you can share some pages or not, the settings would be >> more about how aggressively you search for sharable pages) it might be >> useful to consider the interaction between paging and sharing, I expect >> that most sharing configurations would want to have paging on at the >> same time (for safety). It seems valid to me to want to say "make the >> guest use this amount of actual RAM" and to achieve that by sharing what >> you can and then paging the rest. > > Yes, it''s worth thinking about; as long as it doesn''t stall the paging > UI too long. :-) > > The thing is, you can''t actually control how much sharing happens. That > depends largely on whether the guests create and maintain pages which > are share-able, and whether the sharing detection algorithm can find > such pages. Even if two guests are sharing 95% of their pages, at any > point one of the guests may simply go wild and change them all. So it > seems to me that shared pages need to be treated like sunny days in the > UK: Enjoy them while they''re there, but don''t count on them. :-) > > Given that, I think that each VM should have a "guaranteed minimum > memory footprint", which would be the amount of actual host ram it will > have if suddenly no shared pages become available. After that, there > should be a policy of how to use the "windfall" or "bonus" pages > generated by sharing. > > One sensible default policy would be "givers gain": Every guest which > creates a page which happens to be shared by another VM gets a share of > the pages freed up by the sharing. Another policy might be "communism", > where the freed up pages are shared among all VMs, regardless of whose > pages made the benefit possible. (In fact, if shared pages come from > zero pages, they should probably be given to VMs with no zero pages, > regardless of the policy.) > > However, I''d say the main public "knobs" should be just consist of two > things: > * xl mem-set memory-target. This is the minimum amount of physical RAM > a guest can get; we make sure that the sum of these for all VMs does not > exceed the host capacity. > * xl sharing-policy [policy]. This tells the sharing system how to use > the "windfall" pages gathered from page sharing. > > Then internally, the sharing system should combine the "minimum > footprint" with the number of extra pages and the policy to set the > amount of memory actually used (via balloon driver or paging). > > You could imagine a manual mode, where the administrator shares out the > extra pages manually to VMs that he thinks needs them; but because those > extra pages may go away at any time, that needs to be a separate knob > (and preferably one which most admins don''t ever touch). > > Andres, what do you think?I think it''s a lot to process :) I will issue a few statements in no particular order. How about we have a BoF/powwow on this at the Hackathon? For the sake of expediency we need a simple UI, with two/three obvious commands doing things, and then a full arsenal of knob-ery as a separate entity. I agree with the general sentiment here. I actually intended footprint to convey a human-understandable name for what paging is doing. I think if we try to combine under ''footprint'' all possible means of trimming pages from the guest, *in libxl*, we''ll end up pleasing nobody. Taking a few steps back, Olaf''s purpose is to be able to control the *one* knob xenpaging has with its linear sweep policy via libxl. (I guess you have a second knob, throttling how fast you try to page in things back) Somebody has to ask this: are you really sure you want to bake policies into libxl? What will toolstacks be left with? I think it''s great to wire some straightforward control of xenpaging into libxl -- as straightforward control of the balloon and PoD is already in place. But when the conversation starts escalating, the complexity of libxl grows exponentially, and I get all kinds of shivers. The original stated goal of libxl is to be a common substrate for toolstacks. Let toolstacks decide if they want fancier paging or Marxist sharing, or what not :) My two cents Andres> > -George > >
On Thu, 2012-02-23 at 16:22 +0000, Andres Lagar-Cavilla wrote:> I think it''s a lot to process :) I will issue a few statements in no > particular order. > > How about we have a BoF/powwow on this at the Hackathon? > > For the sake of expediency we need a simple UI, with two/three obvious > commands doing things, and then a full arsenal of knob-ery as a separate > entity. I agree with the general sentiment here. > > I actually intended footprint to convey a human-understandable name for > what paging is doing. I think if we try to combine under ''footprint'' all > possible means of trimming pages from the guest, *in libxl*, we''ll end up > pleasing nobody. > > Taking a few steps back, Olaf''s purpose is to be able to control the *one* > knob xenpaging has with its linear sweep policy via libxl. (I guess you > have a second knob, throttling how fast you try to page in things back) > > Somebody has to ask this: are you really sure you want to bake policies > into libxl? What will toolstacks be left with? I think it''s great to wire > some straightforward control of xenpaging into libxl -- as straightforward > control of the balloon and PoD is already in place. But when the > conversation starts escalating, the complexity of libxl grows > exponentially, and I get all kinds of shivers. > > The original stated goal of libxl is to be a common substrate for > toolstacks. Let toolstacks decide if they want fancier paging or Marxist > sharing, or what not :)Just a quick comment for clarification: We''re talking now about xl, not libxl. Libxl, as you say, will expose all the knobs to the toolstack, and allow the toolstack to do what it wishes. But a large number of our customers will be using xl, which is, in fact, a toolstack built on libxl. :-) It''s the interface to that toolstack we''re discussing. I''ll answer more in a bit. -George
On Thu, 2012-02-23 at 12:18 +0000, George Dunlap wrote:> On Thu, 2012-02-23 at 10:42 +0000, Ian Campbell wrote: > > What if you switch back without making sure you are in such a state? I > > think switching between the two is where the potential for unexpected > > behaviour is most likely. > > Yeah, correctly predicting what would happen requires understanding what > mem-set does under the hood. > > > I like that you have to explicitly ask for the safety wheels to come off > > and explicitly put them back on again. It avoids the corner cases I > > alluded to above (at least I hope so). > > Yes, I think your suggestion sounds more like driving a car with a > proper hood, and less like driving a go-kart with the engine > exposed. :-)yeah, I''m way past "live fast die young" ;-)> > > Without wishing to put words in Andres'' mouth I expect that he intended > > "footprint" to cover other technical means than paging too -- > > specifically I expect he was thinking of page sharing. (I suppose it > > also covers PoD to some extent too, although that is something of a > > special case) > > > > While I don''t expect there will be a knob to control number of shared > > pages (either you can share some pages or not, the settings would be > > more about how aggressively you search for sharable pages) it might be > > useful to consider the interaction between paging and sharing, I expect > > that most sharing configurations would want to have paging on at the > > same time (for safety). It seems valid to me to want to say "make the > > guest use this amount of actual RAM" and to achieve that by sharing what > > you can and then paging the rest. > > Yes, it''s worth thinking about; as long as it doesn''t stall the paging > UI too long. :-)Right. I think the only issue here is whether we make the control called "paging-foo" or "footprint-foo". I think your point that this control doesn''t actually control sharing is a good one. In reality it control''s paging and if sharing is enabled then it is a best effort thing which simply alleviates the need to do some amount of paging to reach the paging target.> The thing is, you can''t actually control how much sharing happens. That > depends largely on whether the guests create and maintain pages which > are share-able, and whether the sharing detection algorithm can find > such pages. Even if two guests are sharing 95% of their pages, at any > point one of the guests may simply go wild and change them all. So it > seems to me that shared pages need to be treated like sunny days in the > UK: Enjoy them while they''re there, but don''t count on them. :-) > > Given that, I think that each VM should have a "guaranteed minimum > memory footprint", which would be the amount of actual host ram it will > have if suddenly no shared pages become available. After that, there > should be a policy of how to use the "windfall" or "bonus" pages > generated by sharing.> One sensible default policy would be "givers gain": Every guest which > creates a page which happens to be shared by another VM gets a share of > the pages freed up by the sharing. Another policy might be "communism", > where the freed up pages are shared among all VMs, regardless of whose > pages made the benefit possible. (In fact, if shared pages come from > zero pages, they should probably be given to VMs with no zero pages, > regardless of the policy.)An easily policy to implement initially would be "do nothing and use tmem".> However, I''d say the main public "knobs" should be just consist of two > things: > * xl mem-set memory-target. This is the minimum amount of physical RAM > a guest can get; we make sure that the sum of these for all VMs does not > exceed the host capacity.Isn''t this what we''ve previously called mem-paging-set? We defined mem-set earlier as controlling the amount of RAM the guest _thinks_ it has, which is different.> * xl sharing-policy [policy]. This tells the sharing system how to use > the "windfall" pages gathered from page sharing. > > Then internally, the sharing system should combine the "minimum > footprint" with the number of extra pages and the policy to set the > amount of memory actually used (via balloon driver or paging).This is an argument in favour of mem-footprint-set rather than mem-paging set? Here is an updated version of my proposed interface which includes sharing, I think as you described (modulo the use of mem-paging-set where you said mem-set above). I also included "mem-paging-set manual" as an explicit thing with an error on "mem-paging-set N" if you don''t switch to manual mode. This might be too draconian -- I''m not wedded to it. maxmem=X # maximum RAM the domain can ever see memory=M # current amount of RAM seen by the # domain paging=[off|on] # allow the amount of memory a guest # thinks it has to differ from the # amount actually available to it (its # "footprint") pagingauto=[off|on] (dflt=on) # enable automatic enforcement of # "footprint" for guests which do not # voluntarily obey changes to memory=M pagingdelay=60 # amount of time to give a guest to # voluntarily comply before enforcing a # footprint pagesharing=[off|on] # cause this guest to share pages with # other similarly enabled guests where # possible. Requires paging=on. pageextrapolocy=... # controls what happens to extra pages # gain via sharing (could be combined # with pagesharing option: # [off|policy|...]) Open question -- does pagesharing=on require paging=on? I''ve tried to specify things below such that it does not, but it might simplify things to require this. xl mem-set domain M Sets the amount of RAM which the guest believes it has available to M. The guest should arrange to use only that much RAM and return the rest to the hypervisor (e.g. by using a balloon driver). If the guest does not do so then the host may use technical means to enforce the guest''s footprint of M. The guest may suffer a performance penalty for this enforcement. paging off: set balloon target to M. paging on: set balloon target to M. if pagingauto: wait delay IFF new target < old set paging target to M support -t <delay> to override default? Open question -- if a domain balloons to M as requested should it still be subject to sharing? There is a performance hit associated with sharing (far less than paging though?) but presumably the admin would not have enabled sharing if they didn''t want this, therefore I think it is right for sharing on to allow the guest to actually have <M assigned to it. Might be a function of the individual sharing policy? xl mem-paging-set domain manual Enables manual control of paging target. paging off: error paging on: set pagingauto=off sharing on: same as paging on. xl mem-paging-set domain N Overrides the amount of RAM which the guest actually has available (its "footprint") to N. The host will use technical means to continue to provide the illusion to the guest that it has memory=M (as adjusted by mem-set). There may be a performance penalty for this. paging off: error paging on: if pagingauto=on: error set paging target set pagingauto=off xl mem-paging-set domain auto Automatically manage paging. Request that the guest uses memory=M (current value of memory, as adjusted by mem-set) enforced when the guest is uncooperative (as described in "mem-set") paging off: error paging on: set paging target to M set pagingauto=on xl mem-sharing-policy-set domain [policy] Configures policy for use of extra pages. if !paging || pagingauto: If guest''s actual usage drops below M due to sharing then extra pages are distributed per the sharing policy. else: If If guest''s actual usage drops below N due to sharing then extra pages are distributed per the sharing policy. TBD potential policies. NB: shared pages reduce a domain''s actual usage. Therefore it is possible that sharing reduces the usage to less than the paging target. In this case no pages will be paged out. We should ensure that the sum over for all domains of: pagingauto(D)? M : N does not exceed the amount of host memory. Ian.
On Fri, Feb 24, Ian Campbell wrote:> Here is an updated version of my proposed interface which includes > sharing, I think as you described (modulo the use of mem-paging-set > where you said mem-set above). > > I also included "mem-paging-set manual" as an explicit thing with an > error on "mem-paging-set N" if you don''t switch to manual mode. This > might be too draconian -- I''m not wedded to it. > > maxmem=X # maximum RAM the domain can ever see > memory=M # current amount of RAM seen by the > # domain > paging=[off|on] # allow the amount of memory a guest > # thinks it has to differ from the > # amount actually available to it (its > # "footprint") > pagingauto=[off|on] (dflt=on) # enable automatic enforcement of > # "footprint" for guests which do not > # voluntarily obey changes to memory=M > pagingdelay=60 # amount of time to give a guest to > # voluntarily comply before enforcing a > # footprint > pagesharing=[off|on] # cause this guest to share pages with > # other similarly enabled guests where > # possible. Requires paging=on. > pageextrapolocy=... # controls what happens to extra pages > # gain via sharing (could be combined > # with pagesharing option: > # [off|policy|...]) > > Open question -- does pagesharing=on require paging=on? I''ve > tried to specify things below such that it does not, but it > might simplify things to require this. > > xl mem-set domain M > Sets the amount of RAM which the guest believes it has available > to M. The guest should arrange to use only that much RAM and > return the rest to the hypervisor (e.g. by using a balloon > driver). If the guest does not do so then the host may use > technical means to enforce the guest''s footprint of M. The guest > may suffer a performance penalty for this enforcement. > > paging off: set balloon target to M. > paging on: set balloon target to M. > if pagingauto: > wait delay IFF new target < old > set paging target to M > support -t <delay> to override default?Instead of having two now config options pagingauto= and pagingdelay=, what about ''xl mem-set -t <seconds>'' to adjust the fixed internal value pagingdelay=? Then ''-t 0'' could mean pagingauto=off, which means use both ballooning and paging to reach the "footprint" M.> Open question -- if a domain balloons to M as requested should > it still be subject to sharing? There is a performance hit > associated with sharing (far less than paging though?) but > presumably the admin would not have enabled sharing if they > didn''t want this, therefore I think it is right for sharing on > to allow the guest to actually have <M assigned to it. Might be > a function of the individual sharing policy? > > xl mem-paging-set domain manual > Enables manual control of paging target. > > paging off: error > paging on: set pagingauto=off > sharing on: same as paging on. > > xl mem-paging-set domain N > Overrides the amount of RAM which the guest actually has > available (its "footprint") to N. The host will use technical > means to continue to provide the illusion to the guest that it > has memory=M (as adjusted by mem-set). There may be a > performance penalty for this. > > paging off: errorCould this be the time to start the pager and give it target N?
On Fri, 2012-02-24 at 15:38 +0000, Olaf Hering wrote:> On Fri, Feb 24, Ian Campbell wrote: > > > Here is an updated version of my proposed interface which includes > > sharing, I think as you described (modulo the use of mem-paging-set > > where you said mem-set above). > > > > I also included "mem-paging-set manual" as an explicit thing with an > > error on "mem-paging-set N" if you don''t switch to manual mode. This > > might be too draconian -- I''m not wedded to it. > > > > maxmem=X # maximum RAM the domain can ever see > > memory=M # current amount of RAM seen by the > > # domain > > paging=[off|on] # allow the amount of memory a guest > > # thinks it has to differ from the > > # amount actually available to it (its > > # "footprint") > > pagingauto=[off|on] (dflt=on) # enable automatic enforcement of > > # "footprint" for guests which do not > > # voluntarily obey changes to memory=M > > pagingdelay=60 # amount of time to give a guest to > > # voluntarily comply before enforcing a > > # footprint > > pagesharing=[off|on] # cause this guest to share pages with > > # other similarly enabled guests where > > # possible. Requires paging=on. > > pageextrapolocy=... # controls what happens to extra pages > > # gain via sharing (could be combined > > # with pagesharing option: > > # [off|policy|...]) > > > > Open question -- does pagesharing=on require paging=on? I''ve > > tried to specify things below such that it does not, but it > > might simplify things to require this. > > > > xl mem-set domain M > > Sets the amount of RAM which the guest believes it has available > > to M. The guest should arrange to use only that much RAM and > > return the rest to the hypervisor (e.g. by using a balloon > > driver). If the guest does not do so then the host may use > > technical means to enforce the guest''s footprint of M. The guest > > may suffer a performance penalty for this enforcement. > > > > paging off: set balloon target to M. > > paging on: set balloon target to M. > > if pagingauto: > > wait delay IFF new target < old > > set paging target to M > > support -t <delay> to override default? > > Instead of having two now config options pagingauto= and pagingdelay=, > what about ''xl mem-set -t <seconds>'' to adjust the fixed internal value > pagingdelay=? Then ''-t 0'' could mean pagingauto=off, which means use > both ballooning and paging to reach the "footprint" M.So you mean: paging on: set balloon target to M. if pagingdelay > 0: wait delay IFF new target < old else: pagingauto=off set paging target to M or paging on: set balloon target to M. if pagingdelay > 0: wait delay IFF new target < old set paging target to M else: pagingauto=off ? (the difference being whether or not we set the paging at all if delay == 0). I don''t think I like overloading "-t 0" to also turn off auto mode like that. It makes it less explicit when you are disabling auto mode and entering "you have to know what you are doing" territory. With my original proposal you can do xl mem-set -t 0 D N and that will do both paging and ballooning enabled but will stay in auto mode, if that''s what you want. If you really want to also turn off auto mode then with my N-1th proposal you would do: xl mem-set -t 0 D N && xl mem-paging-set D N but that is more explicit about turning off auto mode. In my most recent proposal you''d have to do : xl mem-set -t 0 D N && xl mem-paging-set D manual && xl mem-paging-set D N which is a little _too_ explicit perhaps. I suspect the previous proposal was preferable in this regard?> > > Open question -- if a domain balloons to M as requested should > > it still be subject to sharing? There is a performance hit > > associated with sharing (far less than paging though?) but > > presumably the admin would not have enabled sharing if they > > didn''t want this, therefore I think it is right for sharing on > > to allow the guest to actually have <M assigned to it. Might be > > a function of the individual sharing policy? > > > > xl mem-paging-set domain manual > > Enables manual control of paging target. > > > > paging off: error > > paging on: set pagingauto=off > > sharing on: same as paging on. > > > > xl mem-paging-set domain N > > Overrides the amount of RAM which the guest actually has > > available (its "footprint") to N. The host will use technical > > means to continue to provide the illusion to the guest that it > > has memory=M (as adjusted by mem-set). There may be a > > performance penalty for this. > > > > paging off: error > > Could this be the time to start the pager and give it target N?Here and at the appropriate time when auto=true. Ian.
Andres Lagar-Cavilla
2012-Feb-24 17:12 UTC
Re: [PATCH] RFC: initial libxl support for xenpaging
> On Thu, 2012-02-23 at 12:18 +0000, George Dunlap wrote: >> On Thu, 2012-02-23 at 10:42 +0000, Ian Campbell wrote: >> > What if you switch back without making sure you are in such a state? I >> > think switching between the two is where the potential for unexpected >> > behaviour is most likely. >> >> Yeah, correctly predicting what would happen requires understanding what >> mem-set does under the hood. >> >> > I like that you have to explicitly ask for the safety wheels to come >> off >> > and explicitly put them back on again. It avoids the corner cases I >> > alluded to above (at least I hope so). >> >> Yes, I think your suggestion sounds more like driving a car with a >> proper hood, and less like driving a go-kart with the engine >> exposed. :-) > > yeah, I''m way past "live fast die young" ;-) > >> >> > Without wishing to put words in Andres'' mouth I expect that he >> intended >> > "footprint" to cover other technical means than paging too -- >> > specifically I expect he was thinking of page sharing. (I suppose it >> > also covers PoD to some extent too, although that is something of a >> > special case) >> > >> > While I don''t expect there will be a knob to control number of shared >> > pages (either you can share some pages or not, the settings would be >> > more about how aggressively you search for sharable pages) it might be >> > useful to consider the interaction between paging and sharing, I >> expect >> > that most sharing configurations would want to have paging on at the >> > same time (for safety). It seems valid to me to want to say "make the >> > guest use this amount of actual RAM" and to achieve that by sharing >> what >> > you can and then paging the rest. >> >> Yes, it''s worth thinking about; as long as it doesn''t stall the paging >> UI too long. :-) > > Right. I think the only issue here is whether we make the control called > "paging-foo" or "footprint-foo". > > I think your point that this control doesn''t actually control sharing is > a good one. In reality it control''s paging and if sharing is enabled > then it is a best effort thing which simply alleviates the need to do > some amount of paging to reach the paging target. > >> The thing is, you can''t actually control how much sharing happens. That >> depends largely on whether the guests create and maintain pages which >> are share-able, and whether the sharing detection algorithm can find >> such pages. Even if two guests are sharing 95% of their pages, at any >> point one of the guests may simply go wild and change them all. So it >> seems to me that shared pages need to be treated like sunny days in the >> UK: Enjoy them while they''re there, but don''t count on them. :-) >> >> Given that, I think that each VM should have a "guaranteed minimum >> memory footprint", which would be the amount of actual host ram it will >> have if suddenly no shared pages become available. After that, there >> should be a policy of how to use the "windfall" or "bonus" pages >> generated by sharing. > >> One sensible default policy would be "givers gain": Every guest which >> creates a page which happens to be shared by another VM gets a share of >> the pages freed up by the sharing. Another policy might be "communism", >> where the freed up pages are shared among all VMs, regardless of whose >> pages made the benefit possible. (In fact, if shared pages come from >> zero pages, they should probably be given to VMs with no zero pages, >> regardless of the policy.) > > An easily policy to implement initially would be "do nothing and use > tmem". > >> However, I''d say the main public "knobs" should be just consist of two >> things: >> * xl mem-set memory-target. This is the minimum amount of physical RAM >> a guest can get; we make sure that the sum of these for all VMs does not >> exceed the host capacity. > > Isn''t this what we''ve previously called mem-paging-set? We defined > mem-set earlier as controlling the amount of RAM the guest _thinks_ it > has, which is different. > >> * xl sharing-policy [policy]. This tells the sharing system how to use >> the "windfall" pages gathered from page sharing. >> >> Then internally, the sharing system should combine the "minimum >> footprint" with the number of extra pages and the policy to set the >> amount of memory actually used (via balloon driver or paging). > > This is an argument in favour of mem-footprint-set rather than > mem-paging set? > > Here is an updated version of my proposed interface which includes > sharing, I think as you described (modulo the use of mem-paging-set > where you said mem-set above). > > I also included "mem-paging-set manual" as an explicit thing with an > error on "mem-paging-set N" if you don''t switch to manual mode. This > might be too draconian -- I''m not wedded to it. > > maxmem=X # maximum RAM the domain can ever see > memory=M # current amount of RAM seen by the > # domain > paging=[off|on] # allow the amount of memory a guest > # thinks it has to differ from the > # amount actually available to it (its > # "footprint") > pagingauto=[off|on] (dflt=on) # enable automatic enforcement of > # "footprint" for guests which do not > # voluntarily obey changes to memory=M > pagingdelay=60 # amount of time to give a guest to > # voluntarily comply before enforcing a > # footprint > pagesharing=[off|on] # cause this guest to share pages with > # other similarly enabled guests where > # possible. Requires paging=on. > pageextrapolocy=... # controls what happens to extra pages > # gain via sharing (could be combined > # with pagesharing option: > # [off|policy|...]) > > Open question -- does pagesharing=on require paging=on? I''ve > tried to specify things below such that it does not, but it > might simplify things to require this.No it doesn''t, from the point of view of the hypervisor & libxc. It would be strictly an xl constraint. Note that the pager won''t be able to evict shared pages, so it will choose a non-shared victim. Word of caution. It is not easy to account for shared pages per domain. Right now the dominfo struct does not report back the count of "shared" pages for the domain -- although that''s trivially fixable. Even then, the problem is that a page counting as shared does not strictly mean "gone from the domain footprint". A shared page with two references (from dom A and dom B), that cow-breaks due to a write from dom A, will still be a shared page with a single reference from dom B, and count as a shared page for dom B. Also, all shared pages, whether they are referenced by one, two, or more domains, belong to dom_cow. What you could do is xc_sharing_freed_pages(), and split the result into all domains, for a rough estimate of how much each domain is saving in terms of sharing (divide in equal parts, or prorated by the domain shared count). Another tool is xc_memshr_debug_gfn, which will tell you how many refs to the shared page backing the gfn there are (if the gfn is indeed shared). Or, we could also have a "sharing_saved" count per domain to mean exactly what you''re looking for here.> > xl mem-set domain M > Sets the amount of RAM which the guest believes it has available > to M. The guest should arrange to use only that much RAM and > return the rest to the hypervisor (e.g. by using a balloon > driver). If the guest does not do so then the host may use > technical means to enforce the guest''s footprint of M. The guest > may suffer a performance penalty for this enforcement. > > paging off: set balloon target to M. > paging on: set balloon target to M. > if pagingauto: > wait delay IFF new target < old > set paging target to M > support -t <delay> to override default? > > Open question -- if a domain balloons to M as requested should > it still be subject to sharing? There is a performance hit > associated with sharing (far less than paging though?) but > presumably the admin would not have enabled sharing if they > didn''t want this, therefore I think it is right for sharing on > to allow the guest to actually have <M assigned to it. Might be > a function of the individual sharing policy?Sharing + balloon is mostly a bad idea. It''s not forbidden or broken from the hypervisor angle, but it''s a lose-lose game. Ballooning a shared page gains you nothing. The ballooner can''t know what is shared and what isn''t, in order to make an educated decision. And the sharer can''t foresee what the balloon will victimize. I would make those two mutually exclusive in xl.> > xl mem-paging-set domain manual > Enables manual control of paging target. > > paging off: error > paging on: set pagingauto=off > sharing on: same as paging on. > > xl mem-paging-set domain N > Overrides the amount of RAM which the guest actually has > available (its "footprint") to N. The host will use technical > means to continue to provide the illusion to the guest that it > has memory=M (as adjusted by mem-set). There may be a > performance penalty for this. > > paging off: error > paging on: if pagingauto=on: > error > set paging target > set pagingauto=off > > xl mem-paging-set domain auto > Automatically manage paging. Request that the guest uses > memory=M (current value of memory, as adjusted by mem-set) > enforced when the guest is uncooperative (as described in > "mem-set") > > paging off: error > paging on: set paging target to M > set pagingauto=on > > xl mem-sharing-policy-set domain [policy] > Configures policy for use of extra pages. > > if !paging || pagingauto: > If guest''s actual usage drops below M due to sharing > then extra pages are distributed per the sharing policy. > else: > If If guest''s actual usage drops below N due to sharing > then extra pages are distributed per the sharing policy.See above note on determining shared pages per domain. Andres> > TBD potential policies. > > NB: shared pages reduce a domain''s actual usage. Therefore it is > possible that sharing reduces the usage to less than the paging > target. In this case no pages will be paged out. > > We should ensure that the sum over for all domains of: > pagingauto(D)? M : N > does not exceed the amount of host memory. > > Ian. > >
On Fri, Feb 24, Ian Campbell wrote:> On Fri, 2012-02-24 at 15:38 +0000, Olaf Hering wrote: > > On Fri, Feb 24, Ian Campbell wrote: > > > > > Here is an updated version of my proposed interface which includes > > > sharing, I think as you described (modulo the use of mem-paging-set > > > where you said mem-set above). > > > > > > I also included "mem-paging-set manual" as an explicit thing with an > > > error on "mem-paging-set N" if you don''t switch to manual mode. This > > > might be too draconian -- I''m not wedded to it. > > > > > > maxmem=X # maximum RAM the domain can ever see > > > memory=M # current amount of RAM seen by the > > > # domain > > > paging=[off|on] # allow the amount of memory a guest > > > # thinks it has to differ from the > > > # amount actually available to it (its > > > # "footprint") > > > pagingauto=[off|on] (dflt=on) # enable automatic enforcement of > > > # "footprint" for guests which do not > > > # voluntarily obey changes to memory=M > > > pagingdelay=60 # amount of time to give a guest to > > > # voluntarily comply before enforcing a > > > # footprint > > > pagesharing=[off|on] # cause this guest to share pages with > > > # other similarly enabled guests where > > > # possible. Requires paging=on. > > > pageextrapolocy=... # controls what happens to extra pages > > > # gain via sharing (could be combined > > > # with pagesharing option: > > > # [off|policy|...]) > > > > > > Open question -- does pagesharing=on require paging=on? I''ve > > > tried to specify things below such that it does not, but it > > > might simplify things to require this. > > > > > > xl mem-set domain M > > > Sets the amount of RAM which the guest believes it has available > > > to M. The guest should arrange to use only that much RAM and > > > return the rest to the hypervisor (e.g. by using a balloon > > > driver). If the guest does not do so then the host may use > > > technical means to enforce the guest''s footprint of M. The guest > > > may suffer a performance penalty for this enforcement. > > > > > > paging off: set balloon target to M. > > > paging on: set balloon target to M. > > > if pagingauto: > > > wait delay IFF new target < old > > > set paging target to M > > > support -t <delay> to override default? > > > > Instead of having two now config options pagingauto= and pagingdelay=, > > what about ''xl mem-set -t <seconds>'' to adjust the fixed internal value > > pagingdelay=? Then ''-t 0'' could mean pagingauto=off, which means use > > both ballooning and paging to reach the "footprint" M. > > So you mean: > > paging on: set balloon target to M. > if pagingdelay > 0: > wait delay IFF new target < old > else: > pagingauto=off > set paging target to M > > or > > paging on: set balloon target to M. > if pagingdelay > 0: > wait delay IFF new target < old > set paging target to M > else: > pagingauto=off > > ? (the difference being whether or not we set the paging at all if delay > == 0). > > I don''t think I like overloading "-t 0" to also turn off auto mode like > that. It makes it less explicit when you are disabling auto mode and > entering "you have to know what you are doing" territory.I misunderstood what pagingauto= is supposed to do. So ''xl mem-set -t <sec> D N'' could be: set balloon target to N if paging && pagingauto: if pagingdelay > 0: wait pagingdelay set paging target to N And -t 0 would just set the paging target right away so that the footprint is reached fast, at the expense of concurent ballooning+paging. Is an extra pagingdelay= .cfg option is really helpful if the -t option exists? I think it would be part of libxl then, and other libxl users like libvirt may need to implement a knob similar to -t <sec> in their mem-set implementation? Or should the actual waiting of <sec> be done in libxl itself? Perhaps the pagingdelay= could be some sort of recommendation for those who implement the "mem-set" function. I''m just wondering where the actual waiting should be done, its not yet part of the proposal. I think the actual memory change is an asynchron event, so the xl monitoring process could do the actual work and the mem-set command is just the trigger. Other libxl users would need their own monitoring.> With my original proposal you can do > xl mem-set -t 0 D N > and that will do both paging and ballooning enabled but will stay in > auto mode, if that''s what you want. > > If you really want to also turn off auto mode then with my N-1th > proposal you would do: > xl mem-set -t 0 D N && xl mem-paging-set D N > but that is more explicit about turning off auto mode. In my most recent > proposal you''d have to do : > xl mem-set -t 0 D N && xl mem-paging-set D manual && xl mem-paging-set D N > which is a little _too_ explicit perhaps. I suspect the previous > proposal was preferable in this regard?About the ''xl mem-paging-set D manual'' part I''m not sure yet. Should ''xl mem-paging-set D N && xl mem-set D M'' undo the paging target from mem-paging-set? If yes, the manual mode is not needed. If no, then a manual/auto mode is needed. Olaf
On Fri, 2012-02-24 at 17:12 +0000, Andres Lagar-Cavilla wrote:> Sharing + balloon is mostly a bad idea. It''s not forbidden or broken > from the hypervisor angle, but it''s a lose-lose game. Ballooning a > shared page gains you nothing. The ballooner can''t know what is shared > and what isn''t, in order to make an educated decision. And the sharer > can''t foresee what the balloon will victimize.Doesn''t ballooning generally evict unused pages, because it allocates new _free_ pages, even if they were shared is there any benefit to doing so? Or is it more of a second order effect, e.g. ballooning can cause the guest to swap out stuff which could have been shared? Ian.
Andres Lagar-Cavilla
2012-Feb-27 14:45 UTC
Re: [PATCH] RFC: initial libxl support for xenpaging
> On Fri, 2012-02-24 at 17:12 +0000, Andres Lagar-Cavilla wrote: >> Sharing + balloon is mostly a bad idea. It''s not forbidden or broken >> from the hypervisor angle, but it''s a lose-lose game. Ballooning a >> shared page gains you nothing. The ballooner can''t know what is shared >> and what isn''t, in order to make an educated decision. And the sharer >> can''t foresee what the balloon will victimize. > > Doesn''t ballooning generally evict unused pages, because it allocates > new _free_ pages, even if they were shared is there any benefit to doing > so?Certainly the balloon will pick free pages. The sharing daemon should not have shared those, but it''s not unlikely that it will have. It''s a classic semantic gap problem, and we were discussing this in the context of paging ("the pager should not have paged out page table pages") Beyond free pages, a prime target for sharing are read-only disk buffers in the page cache. Those are victim #2 for the balloon.> > Or is it more of a second order effect, e.g. ballooning can cause the > guest to swap out stuff which could have been shared? >Absolutely. Andres> Ian. > >
On Mon, Feb 27, 2012 at 2:45 PM, Andres Lagar-Cavilla <andres@lagarcavilla.org> wrote:>> On Fri, 2012-02-24 at 17:12 +0000, Andres Lagar-Cavilla wrote: >>> Sharing + balloon is mostly a bad idea. It''s not forbidden or broken >>> from the hypervisor angle, but it''s a lose-lose game. Ballooning a >>> shared page gains you nothing. The ballooner can''t know what is shared >>> and what isn''t, in order to make an educated decision. And the sharer >>> can''t foresee what the balloon will victimize. >> >> Doesn''t ballooning generally evict unused pages, because it allocates >> new _free_ pages, even if they were shared is there any benefit to doing >> so? > > Certainly the balloon will pick free pages. The sharing daemon should not > have shared those, but it''s not unlikely that it will have. > > It''s a classic semantic gap problem, and we were discussing this in the > context of paging ("the pager should not have paged out page table pages") > > Beyond free pages, a prime target for sharing are read-only disk buffers > in the page cache. Those are victim #2 for the balloon.Not exactly -- victim #2 would be read-only disk buffers *which have not been read recently*. Buffers which are in active use will not be evicted. So although evicting these pages from the guests'' cache doesn''t buy the system any more memory, it doesn''t have a major impact on the guest either. In any case, if the guest experiences its own internal memory pressure, these pages will be the first to go anyway. After that, it will go for trying to evict infrequently-used read-write pages -- which, if the pager is active, will already have been paged out to disk; thus we''ll end up with the double-paging problem. This will have a much larger impact on performance than uselessly evicting little-used read-only pages. So I think that even though sharing+balloon will lead to some occasional sub-optimal behavior, it''s still a lot better than sharing+paging and no ballooning. Remember that ballooning was a technique introduced in the paper by VMWare that talked about page sharing -- they obviously thought sharing+ballooning was better than sharing+paging. -George
Andres Lagar-Cavilla
2012-Feb-28 15:25 UTC
Re: [PATCH] RFC: initial libxl support for xenpaging
> On Mon, Feb 27, 2012 at 2:45 PM, Andres Lagar-Cavilla > <andres@lagarcavilla.org> wrote: >>> On Fri, 2012-02-24 at 17:12 +0000, Andres Lagar-Cavilla wrote: >>>> Sharing + balloon is mostly a bad idea. It''s not forbidden or broken >>>> from the hypervisor angle, but it''s a lose-lose game. Ballooning a >>>> shared page gains you nothing. The ballooner can''t know what is shared >>>> and what isn''t, in order to make an educated decision. And the sharer >>>> can''t foresee what the balloon will victimize. >>> >>> Doesn''t ballooning generally evict unused pages, because it allocates >>> new _free_ pages, even if they were shared is there any benefit to >>> doing >>> so? >> >> Certainly the balloon will pick free pages. The sharing daemon should >> not >> have shared those, but it''s not unlikely that it will have. >> >> It''s a classic semantic gap problem, and we were discussing this in the >> context of paging ("the pager should not have paged out page table >> pages") >> >> Beyond free pages, a prime target for sharing are read-only disk buffers >> in the page cache. Those are victim #2 for the balloon. > > Not exactly -- victim #2 would be read-only disk buffers *which have > not been read recently*.Everything''s been read recently in Windows. Seriously ;)> Buffers which are in active use will not be > evicted. So although evicting these pages from the guests'' cache > doesn''t buy the system any more memory, it doesn''t have a major impact > on the guest either.That''s debatable. Maybe guests shouldn''t have a page cache then. Or a really small one. I''m not saying you''re wrong, I''m saying that the answer is, as with many things, "it depends"> > In any case, if the guest experiences its own internal memory > pressure, these pages will be the first to go anyway. After that, it > will go for trying to evict infrequently-used read-write pages -- > which, if the pager is active, will already have been paged out to > disk; thus we''ll end up with the double-paging problem. This will > have a much larger impact on performance than uselessly evicting > little-used read-only pages. > > So I think that even though sharing+balloon will lead to some > occasional sub-optimal behavior, it''s still a lot better than > sharing+paging and no ballooning. Remember that ballooning was a > technique introduced in the paper by VMWare that talked about page > sharing -- they obviously thought sharing+ballooning was better than > sharing+paging.Ties in with Dan''s thread. Depends on how much effort you spend choosing sharing and paging candidates, and how hard you inflate the balloon. I''m not in favor of any such overarching statement (even though I made one myself!!). Andres> > -George >
Oops, forgot to reply-to-all... ---------- Forwarded message ---------- From: George Dunlap <George.Dunlap@eu.citrix.com> Date: Tue, Feb 28, 2012 at 1:17 PM Subject: Re: [Xen-devel] [PATCH] RFC: initial libxl support for xenpaging To: Ian Campbell <Ian.Campbell@citrix.com> On Fri, Feb 24, 2012 at 10:11 AM, Ian Campbell <Ian.Campbell@citrix.com> wrote:>> However, I''d say the main public "knobs" should be just consist of two >> things: >> * xl mem-set memory-target. This is the minimum amount of physical RAM >> a guest can get; we make sure that the sum of these for all VMs does not >> exceed the host capacity. > > Isn''t this what we''ve previously called mem-paging-set? We defined > mem-set earlier as controlling the amount of RAM the guest _thinks_ it > has, which is different.No, I thought mem-set was supposed to be the Simple Knob, that the user turned to say, "I don''t care how you do it, just make the guest take X amount of RAM". The whole thing with the pagingdelay and all that was how long and whether that Simple Knob would set the balloon target first, before resorting to sharing. Since the user can''t really control how much sharing happens, it makes sense to me for this Simple Knob also be the "minimum memory this VM should get if all extra pages from sharing suddenly disappear".>> * xl sharing-policy [policy]. This tells the sharing system how to use >> the "windfall" pages gathered from page sharing. >> >> Then internally, the sharing system should combine the "minimum >> footprint" with the number of extra pages and the policy to set the >> amount of memory actually used (via balloon driver or paging). > > This is an argument in favour of mem-footprint-set rather than > mem-paging set? > > Here is an updated version of my proposed interface which includes > sharing, I think as you described (modulo the use of mem-paging-set > where you said mem-set above). > > I also included "mem-paging-set manual" as an explicit thing with an > error on "mem-paging-set N" if you don''t switch to manual mode. This > might be too draconian -- I''m not wedded to it. > > maxmem=X # maximum RAM the domain can ever see > memory=M # current amount of RAM seen by the > # domainWhat do you mean "seen by the domain"? If you mean "pages which aren''t ballooned", then it looks an awful lot to me like you''re (perhaps unintentionally) smuggling back into the interface "balloon target" and "paging target" (since "memory seen by the domain" would then always be equal to "balloon target", and "memory actually available" would always equal "paging target"). I thought the whole point was to hide all this complexity from the user, unless she wants to see it? Or am I misunderstanding something?> paging=[off|on] # allow the amount of memory a guest > # thinks it has to differ from the > # amount actually available to it (its > # "footprint") > pagingauto=[off|on] (dflt=on) # enable automatic enforcement of > # "footprint" for guests which do not > # voluntarily obey changes to memory=M > pagingdelay=60 # amount of time to give a guest to > # voluntarily comply before enforcing a > # footprint > pagesharing=[off|on] # cause this guest to share pages with > # other similarly enabled guests where > # possible. Requires paging=on. > pageextrapolocy=... # controls what happens to extra pages > # gain via sharing (could be combined > # with pagesharing option: > # [off|policy|...]) > > Open question -- does pagesharing=on require paging=on? I''ve > tried to specify things below such that it does not, but it > might simplify things to require this. > > xl mem-set domain M > Sets the amount of RAM which the guest believes it has available > to M. The guest should arrange to use only that much RAM and > return the rest to the hypervisor (e.g. by using a balloon > driver). If the guest does not do so then the host may use > technical means to enforce the guest''s footprint of M. The guest > may suffer a performance penalty for this enforcement. > > paging off: set balloon target to M. > paging on: set balloon target to M. > if pagingauto: > wait delay IFF new target < old > set paging target to M > support -t <delay> to override default? > > Open question -- if a domain balloons to M as requested should > it still be subject to sharing? There is a performance hit > associated with sharing (far less than paging though?) but > presumably the admin would not have enabled sharing if they > didn''t want this, therefore I think it is right for sharing on > to allow the guest to actually have <M assigned to it. Might be > a function of the individual sharing policy? > > xl mem-paging-set domain manual > Enables manual control of paging target. > > paging off: error > paging on: set pagingauto=off > sharing on: same as paging on. > > xl mem-paging-set domain N > Overrides the amount of RAM which the guest actually has > available (its "footprint") to N. The host will use technical > means to continue to provide the illusion to the guest that it > has memory=M (as adjusted by mem-set). There may be a > performance penalty for this. > > paging off: error > paging on: if pagingauto=on: > error > set paging target > set pagingauto=off > > xl mem-paging-set domain auto > Automatically manage paging. Request that the guest uses > memory=M (current value of memory, as adjusted by mem-set) > enforced when the guest is uncooperative (as described in > "mem-set") > > paging off: error > paging on: set paging target to M > set pagingauto=on > > xl mem-sharing-policy-set domain [policy] > Configures policy for use of extra pages. > > if !paging || pagingauto: > If guest''s actual usage drops below M due to sharing > then extra pages are distributed per the sharing policy. > else: > If If guest''s actual usage drops below N due to sharing > then extra pages are distributed per the sharing policy. > > TBD potential policies. > > NB: shared pages reduce a domain''s actual usage. Therefore it is > possible that sharing reduces the usage to less than the paging > target. In this case no pages will be paged out. > > We should ensure that the sum over for all domains of: > pagingauto(D)? M : N > does not exceed the amount of host memory. > > Ian. > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
Once again, didn''t reply-to-all. (Hmm, I''m pretty sure I actually did...) -George ---------- Forwarded message ---------- From: George Dunlap <George.Dunlap@eu.citrix.com> Date: Tue, Feb 28, 2012 at 3:43 PM Subject: Re: [Xen-devel] [PATCH] RFC: initial libxl support for xenpaging To: andres@lagarcavilla.org On Tue, Feb 28, 2012 at 3:25 PM, Andres Lagar-Cavilla <andres@lagarcavilla.org> wrote:>> On Mon, Feb 27, 2012 at 2:45 PM, Andres Lagar-Cavilla >> <andres@lagarcavilla.org> wrote: >>>> On Fri, 2012-02-24 at 17:12 +0000, Andres Lagar-Cavilla wrote: >>>>> Sharing + balloon is mostly a bad idea. It''s not forbidden or broken >>>>> from the hypervisor angle, but it''s a lose-lose game. Ballooning a >>>>> shared page gains you nothing. The ballooner can''t know what is shared >>>>> and what isn''t, in order to make an educated decision. And the sharer >>>>> can''t foresee what the balloon will victimize. >>>> >>>> Doesn''t ballooning generally evict unused pages, because it allocates >>>> new _free_ pages, even if they were shared is there any benefit to >>>> doing >>>> so? >>> >>> Certainly the balloon will pick free pages. The sharing daemon should >>> not >>> have shared those, but it''s not unlikely that it will have. >>> >>> It''s a classic semantic gap problem, and we were discussing this in the >>> context of paging ("the pager should not have paged out page table >>> pages") >>> >>> Beyond free pages, a prime target for sharing are read-only disk buffers >>> in the page cache. Those are victim #2 for the balloon. >> >> Not exactly -- victim #2 would be read-only disk buffers *which have >> not been read recently*. > > Everything''s been read recently in Windows. Seriously ;)Uum, do you really mean to say that Windows doesn''t use the accessed bit to try to find which pages are in active use? That''s rather surprising.>> Buffers which are in active use will not be >> evicted. So although evicting these pages from the guests'' cache >> doesn''t buy the system any more memory, it doesn''t have a major impact >> on the guest either. > > That''s debatable. Maybe guests shouldn''t have a page cache then. Or a > really small one. > > I''m not saying you''re wrong, I''m saying that the answer is, as with many > things, "it depends"Sure, it depends. If there''s lots of free memory, then sure, keep the stuff around; maybe it will come in handy. If memory is tight, then the ideal is to have the pages which are actually used frequently -- whether they''re in the page cache or elsewhere -- kept in memory, and the rest paged out to disk / handed back to Xen.>> In any case, if the guest experiences its own internal memory >> pressure, these pages will be the first to go anyway. After that, it >> will go for trying to evict infrequently-used read-write pages -- >> which, if the pager is active, will already have been paged out to >> disk; thus we''ll end up with the double-paging problem. This will >> have a much larger impact on performance than uselessly evicting >> little-used read-only pages. >> >> So I think that even though sharing+balloon will lead to some >> occasional sub-optimal behavior, it''s still a lot better than >> sharing+paging and no ballooning. Remember that ballooning was a >> technique introduced in the paper by VMWare that talked about page >> sharing -- they obviously thought sharing+ballooning was better than >> sharing+paging. > > Ties in with Dan''s thread. Depends on how much effort you spend choosing > sharing and paging candidates, and how hard you inflate the balloon. I''m > not in favor of any such overarching statement (even though I made one > myself!!).Yes, but at some point, if we want people to use this stuff, we have to have a simple answer for xl, and recommendations for more complicated answers. I think re the "simple answer" possibilities for xl and sharing, I think we have: * Take whatever advantage of sharing we can; if we need to reduce guest footprint (i.e., actual memory used), use ballooning first, paging only afterwards * Take whatever advantage of sharing we can; if we need to reduce guest footprint, use only paging. The first, as you say will have the disadvantage that early ballooning will reduce sharing without getting memory back, but later ballooning will evict pages properly without risking double paging. The second has the advantage that early paging will keep shared pages longer, but the disadvantage that when the guest starts paging, you''ll run into the double paging problem. As a cost-benefits analysis, I think the second comes out worse. (Of course, someone running some actual numbers would be the final answer for this one, but I''m not sure anyone has the time or inclination to do that at this point.) Now, a really clever toolstack could use paging first, and then switch to ballooning if it detects significant double-paging. But I think that''s more than we want from xl. -George
Ian Campbell
2012-Feb-28 17:12 UTC
Re: Fwd: [PATCH] RFC: initial libxl support for xenpaging
On Tue, 2012-02-28 at 13:17 +0000, George Dunlap wrote:> On Fri, Feb 24, 2012 at 10:11 AM, Ian Campbell<Ian.Campbell@citrix.com> wrote:> >> However, I''d say the main public "knobs" should be just consist oftwo> >> things: > >> * xl mem-set memory-target. This is the minimum amount of physicalRAM> >> a guest can get; we make sure that the sum of these for all VMsdoes not> >> exceed the host capacity. > > > > Isn''t this what we''ve previously called mem-paging-set? We defined > > mem-set earlier as controlling the amount of RAM the guest _thinks_it> > has, which is different. > > No, I thought mem-set was supposed to be the Simple Knob, that the > user turned to say, "I don''t care how you do it, just make the guest > take X amount of RAM". The whole thing with the pagingdelay and all > that was how long and whether that Simple Knob would set the balloon > target first, before resorting to sharing. Since the user can''t > really control how much sharing happens, it makes sense to me for this > Simple Knob also be the "minimum memory this VM should get if all > extra pages from sharing suddenly disappear".I think you might be correct. I suspect I wrote the above before I had fully integrated sharing into my understanding/proposal (I took a few iteration locally to get it "right") I don''t think this filtered into the actual interface proposal, or do you see somewhere which it did (modulo the discussion below)?> >> * xl sharing-policy [policy]. This tells the sharing system how touse> >> the "windfall" pages gathered from page sharing. > >> > >> Then internally, the sharing system should combine the "minimum > >> footprint" with the number of extra pages and the policy to set the > >> amount of memory actually used (via balloon driver or paging). > > > > This is an argument in favour of mem-footprint-set rather than > > mem-paging set? > > > > Here is an updated version of my proposed interface which includes > > sharing, I think as you described (modulo the use of mem-paging-set > > where you said mem-set above). > > > > I also included "mem-paging-set manual" as an explicit thing with an > > error on "mem-paging-set N" if you don''t switch to manual mode. This > > might be too draconian -- I''m not wedded to it. > > > > maxmem=X # maximum RAM the domain can eversee> > memory=M # current amount of RAM seen by the > > # domain > > What do you mean "seen by the domain"?> If you mean "pages which aren''t ballooned", then it looks an awful lot > to me like you''re (perhaps unintentionally) smuggling back into the > interface "balloon target" and "paging target" (since "memory seen by > the domain" would then always be equal to "balloon target", and > "memory actually available" would always equal "paging target"). I > thought the whole point was to hide all this complexity from the user, > unless she wants to see it? > > Or am I misunderstanding something?Pages which the guest sees is not the same as ballooning target if the guest has not met the target. So e.g. if a guest is using 10M and we do "mem-set 6M" but the guest only balloons to 8M then the amount of RAM "currently seen" by the guest is 8M not 6M. In this situation we would eventually decide to use paging at which point the actual RAM used by the guest would drop to 6M but as far as the guest knows it is still using 8M, e.g. it "currently sees" 8M while "memory actually available" is 6M. Also the balloon target remains 6M because we expect the guest to keep on trying. You are right though that for a well behaved guest there will be no practical difference between the balloon target and the amount of RAM seen by the guest, at least so long as ballooning is the mechanism by which we expect guest to meet these targets. I don''t mention sharing in the above because all sharing does is reduce the memory the guest "actually" has below what it "thinks" it has. So it might be that we do "mem-set 6M" and that the guest makes it to 8M but that 1M is shared. At which point we have "guest sees" == 8M and "actual" == 8-1 == 7M. Eventually we would enable paging to reach the desired "actual" == 6M, presumably by paging out an addition 1M. If at this point all of the guests pages suddenly become unshared then the pager will kick in and the amount of paged memory will presumably grow to 2M, maintaining the actual target of "6M" Another scenario would be where we mem-set 6M and the guest does actually meet that target, so "guest sees" == "actual" == 6M and there is no sharing or paging. If at this point we detect 1M worth of sharable pages, so now "guest sees" == 6M but "actual" == "5M" and we have 1M of spare memory to distribute as per the sharing policy. If the policy is such that we need to guarantee to be able to give the guest 6M again if it ends up unsharing everything then the sharing policy would only allow us to use that memory for "ephemeral" purposes. If the sharing policy does not guarantee that we can get that memory back then we may find ourselves in a situation where "guest sees" == 6M but "actual" == 5M, with the slack made up for by paging and not sharing, despite having done "mem-set 6M". IMHO the user was effectively asking for this (or at least acknowledging the possibility) when they chose that sharing policy. In this case the paging target would still be 6M and the pager would, I presume, be actively trying to reduce the amount of paged RAM, such that if some RAM becomes available it would suck it up and move closer to "actual" == 6M. Ian.
Andres Lagar-Cavilla
2012-Mar-06 23:07 UTC
Re: [PATCH] RFC: initial libxl support for xenpaging
> On Mon, Feb 27, 2012 at 2:45 PM, Andres Lagar-Cavilla > <andres@lagarcavilla.org> wrote: >>> On Fri, 2012-02-24 at 17:12 +0000, Andres Lagar-Cavilla wrote:I''m gonna top-post (gasp!). At the Hackathon, Ian C, Adin and I got together and held a brief pow-wow on the memory management interface. This is a rough summary. Comments highly welcome! Please keep in mind that this is a sketch of ideas, not an edict on naming and syntax rules. So pretty much everything below is changeable. Also note that xl is used liberally below, without distinction between xl and libxl. Partly due to a 1-1 match between the two in terms of the functionality explained, and partly due to ignorance :) The new interface has a master key for the domain memory footprint, and a policy key that allows fine-grained manipulation of how to achieve this footprint. xl mem-set foo This is a value in KiB. It will translate to writing the /local/domain/X/memory-policy/target key *note is does not write the traditional memory/target key. The reason will become clear later* xl mem-policy foo This is a string. It will translate to writing the /local/domain/X/memory-policy/actor key The per-domain xl daemon watches memory-policy/target. When this is written to it wakes up, reads the actor key, and if the actor key does not refer to xl, it will simply not care. Multiple actors can have the same behavior, and in this way only the right actor is awakened. By default, the actor key points to xl. But it could point to an external actor. For example, xenpaging itself could watch this, and get to act if the specified actor is ''xenpaging''. The xl policy can then be controlled with "power user" xl knobs. We won''t deatil the specific syntax here. By default, the policy is "first try to achieve the target via ballooning. If it can''t get there fast enough, resort to paging". The "power user" xl-specific knobs are: - policy: an enum of known policies -- first balloon then paging (default) -- only balloon -- only paging -- first paging then balloon - primary timeout: how long to wait before triggering the second option. Note that libxl will then be in charge of writing memory/target (to trigger the balloon), memory/target-tot_pages (xenpaging), launch xenpaging, etc. All internal bits no one outside of libxl have to care about. Note that this interface does not preclude sharing or tmem or "other", but doesnot incorporate them right now. The aim of the interface is to be extensible, so that after release 4.2 tmem/sharing/foo are added without breaking anything. Another goal is for global host memory controllers to get to manage domains'' footprints via this interface with minimal effort. Something like squeezed/xenballoond could set per domain actor values (to match today''s behavior, to the equivalent of "xl:balloon"), and then just call libxl mem-set. So, this is a bit of a 10K foot view in order to get the primary guidelines "right", and then dive again to hammer out the fine-grained details. Thanks, Andres>>>> Sharing + balloon is mostly a bad idea. It''s not forbidden or broken >>>> from the hypervisor angle, but it''s a lose-lose game. Ballooning a >>>> shared page gains you nothing. The ballooner can''t know what is shared >>>> and what isn''t, in order to make an educated decision. And the sharer >>>> can''t foresee what the balloon will victimize. >>> >>> Doesn''t ballooning generally evict unused pages, because it allocates >>> new _free_ pages, even if they were shared is there any benefit to >>> doing >>> so? >> >> Certainly the balloon will pick free pages. The sharing daemon should >> not >> have shared those, but it''s not unlikely that it will have. >> >> It''s a classic semantic gap problem, and we were discussing this in the >> context of paging ("the pager should not have paged out page table >> pages") >> >> Beyond free pages, a prime target for sharing are read-only disk buffers >> in the page cache. Those are victim #2 for the balloon. > > Not exactly -- victim #2 would be read-only disk buffers *which have > not been read recently*. Buffers which are in active use will not be > evicted. So although evicting these pages from the guests'' cache > doesn''t buy the system any more memory, it doesn''t have a major impact > on the guest either. > > In any case, if the guest experiences its own internal memory > pressure, these pages will be the first to go anyway. After that, it > will go for trying to evict infrequently-used read-write pages -- > which, if the pager is active, will already have been paged out to > disk; thus we''ll end up with the double-paging problem. This will > have a much larger impact on performance than uselessly evicting > little-used read-only pages. > > So I think that even though sharing+balloon will lead to some > occasional sub-optimal behavior, it''s still a lot better than > sharing+paging and no ballooning. Remember that ballooning was a > technique introduced in the paper by VMWare that talked about page > sharing -- they obviously thought sharing+ballooning was better than > sharing+paging. > > -George >