Olaf Hering
2013-Mar-18 16:40 UTC
[PATCH 0 of 6] set migrate constraints from cmdline, better xend.log logging
This series improves logging capabilities of xm migrate. It also adds new options to xm/xl migrate to tweak the max_iter/max_factor options passed to xc_domain_save. Being able to tweak those knobs helps with migrating large and busy guests. v2: - update "tools: set migration constraints from cmdline" v6 -> v7 - add "tools/xend: move assert to exception block" - this iteration is not runtime tested, yet Changes: tools/xc: print messages from xc_save with xc_report tools/xc: document printf calls in xc_restore tools/xc: rework xc_save.c:switch_qemu_logdirty tools: set migration constraints from cmdline tools: add xm migrate --log_progress option tools/xend: move assert to exception block docs/man/xl.pod.1 | 15 ++++ tools/libxc/xc_domain_save.c | 13 +++ tools/libxc/xc_private.h | 1 tools/libxc/xenguest.h | 2 tools/libxl/Makefile | 2 tools/libxl/libxl.c | 12 ++- tools/libxl/libxl.h | 21 +++++- tools/libxl/libxl_dom.c | 1 tools/libxl/libxl_internal.h | 3 tools/libxl/libxl_save_callout.c | 4 - tools/libxl/xl_cmdimpl.c | 34 ++++++++- tools/libxl/xl_cmdtable.c | 20 +++-- tools/python/xen/xend/XendCheckpoint.py | 23 +++++- tools/python/xen/xend/XendDomain.py | 13 +++ tools/python/xen/xend/XendDomainInfo.py | 14 ++++ tools/python/xen/xm/migrate.py | 21 ++++++ tools/xcutils/xc_restore.c | 1 tools/xcutils/xc_save.c | 112 ++++++++++++++++++-------------- 18 files changed, 236 insertions(+), 76 deletions(-)
Olaf Hering
2013-Mar-18 16:40 UTC
[PATCH 1 of 6] tools/xc: print messages from xc_save with xc_report
# HG changeset patch # User Olaf Hering <olaf@aepfle.de> # Date 1363620577 -3600 # Node ID d703fbcb7d94bf18205b7f963f45f0101ef59c6e # Parent f9f358b376a5d7426f2b25d952989d757bb620ff tools/xc: print messages from xc_save with xc_report Make use of xc_report in xc_save to log also pid if some error occoured. Signed-off-by: Olaf Hering <olaf@aepfle.de> diff -r f9f358b376a5 -r d703fbcb7d94 tools/libxc/xc_private.h --- a/tools/libxc/xc_private.h +++ b/tools/libxc/xc_private.h @@ -119,6 +119,7 @@ void xc_report_progress_step(xc_interfac /* anamorphic macros: struct xc_interface *xch must be in scope */ +#define WPRINTF(_f, _a...) xc_report(xch, xch->error_handler, XTL_WARN,0, _f , ## _a) #define IPRINTF(_f, _a...) xc_report(xch, xch->error_handler, XTL_INFO,0, _f , ## _a) #define DPRINTF(_f, _a...) xc_report(xch, xch->error_handler, XTL_DETAIL,0, _f , ## _a) #define DBGPRINTF(_f, _a...) xc_report(xch, xch->error_handler, XTL_DEBUG,0, _f , ## _a) diff -r f9f358b376a5 -r d703fbcb7d94 tools/xcutils/xc_save.c --- a/tools/xcutils/xc_save.c +++ b/tools/xcutils/xc_save.c @@ -7,6 +7,7 @@ * */ +#include <unistd.h> #include <err.h> #include <stdlib.h> #include <stdint.h> @@ -19,6 +20,7 @@ #include <fcntl.h> #include <err.h> +#include <xc_private.h> #include <xenstore.h> #include <xenctrl.h> #include <xenguest.h> @@ -51,16 +53,17 @@ static int compat_suspend(void) * receive the acknowledgement from the subscribe event channel. */ static int evtchn_suspend(void) { + xc_interface *xch = si.xch; int rc; rc = xc_evtchn_notify(si.xce, si.suspend_evtchn); if (rc < 0) { - warnx("failed to notify suspend request channel: %d", rc); + WPRINTF("failed to notify suspend request channel: %d", rc); return 0; } - if (xc_await_suspend(si.xch, si.xce, si.suspend_evtchn) < 0) { - warnx("suspend failed"); + if (xc_await_suspend(xch, si.xce, si.suspend_evtchn) < 0) { + WPRINTF("suspend failed"); return 0; } @@ -104,20 +107,27 @@ static int suspend(void* data) static int switch_qemu_logdirty(int domid, unsigned int enable, void *data) { + xc_interface *xch = si.xch; struct xs_handle *xs; char *path, *p, *ret_str, *cmd_str, **watch; unsigned int len; struct timeval tv; fd_set fdset; - if ((xs = xs_daemon_open()) == NULL) - errx(1, "Couldn''t contact xenstore"); - if (!(path = strdup("/local/domain/0/device-model/"))) - errx(1, "can''t get domain path in store"); + if ((xs = xs_daemon_open()) == NULL) { + PERROR("Couldn''t contact xenstore"); + exit(1); + } + if (!(path = strdup("/local/domain/0/device-model/"))) { + PERROR("can''t get domain path in store"); + exit(1); + } if (!(path = realloc(path, strlen(path) + 10 - + strlen("/logdirty/cmd") + 1))) - errx(1, "no memory for constructing xenstore path"); + + strlen("/logdirty/cmd") + 1))) { + PERROR("no memory for constructing xenstore path"); + exit(1); + } snprintf(path + strlen(path), 11, "%i", domid); strcat(path, "/logdirty/"); p = path + strlen(path); @@ -126,16 +136,22 @@ static int switch_qemu_logdirty(int domi /* Watch for qemu''s return value */ strcpy(p, "ret"); if (!xs_watch(xs, path, "qemu-logdirty-ret")) - errx(1, "can''t set watch in store (%s)\n", path); + { + ERROR("can''t set watch in store (%s)\n", path); + exit(1); + } - if (!(cmd_str = strdup( enable == 0 ? "disable" : "enable"))) - errx(1, "can''t get logdirty cmd path in store"); + if (!(cmd_str = strdup( enable == 0 ? "disable" : "enable"))) { + PERROR("can''t get logdirty cmd path in store"); + exit(1); + } /* Tell qemu that we want it to start logging dirty page to Xen */ strcpy(p, "cmd"); - if (!xs_write(xs, XBT_NULL, path, cmd_str, strlen(cmd_str))) - errx(1, "can''t write to store path (%s)\n", - path); + if (!xs_write(xs, XBT_NULL, path, cmd_str, strlen(cmd_str))) { + PERROR("can''t write to store path (%s)\n", path); + exit(1); + } /* Wait a while for qemu to signal that it has service logdirty command */ read_again: @@ -144,8 +160,10 @@ static int switch_qemu_logdirty(int domi FD_ZERO(&fdset); FD_SET(xs_fileno(xs), &fdset); - if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1) - errx(1, "timed out waiting for qemu logdirty response.\n"); + if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1) { + PERROR("timed out waiting for qemu logdirty response.\n"); + exit(1); + } watch = xs_read_watch(xs, &len); free(watch); @@ -166,6 +184,7 @@ static int switch_qemu_logdirty(int domi int main(int argc, char **argv) { + xc_interface *xch; unsigned int maxit, max_f, lflags; int io_fd, ret, port; struct save_callbacks callbacks; @@ -186,26 +205,26 @@ main(int argc, char **argv) lvl = si.flags & XCFLAGS_DEBUG ? XTL_DEBUG: XTL_DETAIL; lflags = XTL_STDIOSTREAM_SHOW_PID | XTL_STDIOSTREAM_HIDE_PROGRESS; l = (xentoollog_logger *)xtl_createlogger_stdiostream(stderr, lvl, lflags); - si.xch = xc_interface_open(l, 0, 0); + xch = si.xch = xc_interface_open(l, 0, 0); if (!si.xch) - errx(1, "failed to open control interface"); + errx(1, "[%lu] failed to open control interface", (unsigned long)getpid()); si.xce = xc_evtchn_open(NULL, 0); if (si.xce == NULL) - warnx("failed to open event channel handle"); + WPRINTF("failed to open event channel handle"); else { port = xs_suspend_evtchn_port(si.domid); if (port < 0) - warnx("failed to get the suspend evtchn port\n"); + WPRINTF("failed to get the suspend evtchn port\n"); else { si.suspend_evtchn xc_suspend_evtchn_init(si.xch, si.xce, si.domid, port); if (si.suspend_evtchn < 0) - warnx("suspend event channel initialization failed, " + WPRINTF("suspend event channel initialization failed, " "using slow path"); } }
Olaf Hering
2013-Mar-18 16:40 UTC
[PATCH 2 of 6] tools/xc: document printf calls in xc_restore
# HG changeset patch # User Olaf Hering <olaf@aepfle.de> # Date 1363620616 -3600 # Node ID 515929f13a2f1e95ca174847f4956284e43657fb # Parent d703fbcb7d94bf18205b7f963f45f0101ef59c6e tools/xc: document printf calls in xc_restore ... to prevent conversion to DPRINTF. Signed-off-by: Olaf Hering <olaf@aepfle.de> diff -r d703fbcb7d94 -r 515929f13a2f tools/xcutils/xc_restore.c --- a/tools/xcutils/xc_restore.c +++ b/tools/xcutils/xc_restore.c @@ -56,6 +56,7 @@ main(int argc, char **argv) if ( ret == 0 ) { + /* xend expects this output, part of protocol */ printf("store-mfn %li\n", store_mfn); if ( !hvm ) printf("console-mfn %li\n", console_mfn);
Olaf Hering
2013-Mar-18 16:40 UTC
[PATCH 3 of 6] tools/xc: rework xc_save.c:switch_qemu_logdirty
# HG changeset patch # User Olaf Hering <olaf@aepfle.de> # Date 1363620618 -3600 # Node ID b3ed5c4b5078e5bf90d23f7b09eda727af0d7e74 # Parent 515929f13a2f1e95ca174847f4956284e43657fb tools/xc: rework xc_save.c:switch_qemu_logdirty Rework code in switch_qemu_logdirty, fix also memleak. Signed-off-by: Olaf Hering <olaf@aepfle.de> diff -r 515929f13a2f -r b3ed5c4b5078 tools/xcutils/xc_save.c --- a/tools/xcutils/xc_save.c +++ b/tools/xcutils/xc_save.c @@ -7,6 +7,7 @@ * */ +#define _GNU_SOURCE #include <unistd.h> #include <err.h> #include <stdlib.h> @@ -109,8 +110,10 @@ static int switch_qemu_logdirty(int domi { xc_interface *xch = si.xch; struct xs_handle *xs; - char *path, *p, *ret_str, *cmd_str, **watch; + char *path, *dir_p, *ret_str, **watch; + const char *cmd_str; unsigned int len; + int ret, again; struct timeval tv; fd_set fdset; @@ -118,65 +121,56 @@ static int switch_qemu_logdirty(int domi PERROR("Couldn''t contact xenstore"); exit(1); } - if (!(path = strdup("/local/domain/0/device-model/"))) { - PERROR("can''t get domain path in store"); + + ret = asprintf(&path, "/local/domain/0/device-model/%i/logdirty/ret", domid); + if (ret < 0) { + ERROR("Couldn''t construct xenstore path"); exit(1); } - if (!(path = realloc(path, strlen(path) - + 10 - + strlen("/logdirty/cmd") + 1))) { - PERROR("no memory for constructing xenstore path"); - exit(1); - } - snprintf(path + strlen(path), 11, "%i", domid); - strcat(path, "/logdirty/"); - p = path + strlen(path); - + /* Pointer to directory */ + dir_p = path + ret - 3; /* Watch for qemu''s return value */ - strcpy(p, "ret"); - if (!xs_watch(xs, path, "qemu-logdirty-ret")) - { - ERROR("can''t set watch in store (%s)\n", path); + if (!xs_watch(xs, path, "qemu-logdirty-ret")) { + PERROR("can''t set watch in store (%s)", path); exit(1); } - if (!(cmd_str = strdup( enable == 0 ? "disable" : "enable"))) { - PERROR("can''t get logdirty cmd path in store"); + cmd_str = enable ? "enable" : "disable"; + + /* Tell qemu that we want it to start logging dirty pages to Xen */ + strcpy(dir_p, "cmd"); + if (!xs_write(xs, XBT_NULL, path, cmd_str, strlen(cmd_str))) { + PERROR("can''t write to store path (%s)", path); exit(1); } - /* Tell qemu that we want it to start logging dirty page to Xen */ - strcpy(p, "cmd"); - if (!xs_write(xs, XBT_NULL, path, cmd_str, strlen(cmd_str))) { - PERROR("can''t write to store path (%s)\n", path); - exit(1); - } + /* Restore initial path */ + strcpy(dir_p, "ret"); + /* Wait a while for qemu to signal that it has serviced logdirty command */ + do { + tv.tv_sec = 5; + tv.tv_usec = 0; + FD_ZERO(&fdset); + FD_SET(xs_fileno(xs), &fdset); + errno = 0; - /* Wait a while for qemu to signal that it has service logdirty command */ - read_again: - tv.tv_sec = 5; - tv.tv_usec = 0; - FD_ZERO(&fdset); - FD_SET(xs_fileno(xs), &fdset); - - if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1) { - PERROR("timed out waiting for qemu logdirty response.\n"); - exit(1); - } - - watch = xs_read_watch(xs, &len); - free(watch); - - strcpy(p, "ret"); - ret_str = xs_read(xs, XBT_NULL, path, &len); - if (ret_str == NULL || strcmp(ret_str, cmd_str)) + if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1) { + PERROR("timed out waiting for qemu logdirty response."); + exit(1); + } + + watch = xs_read_watch(xs, &len); + free(watch); + + ret_str = xs_read(xs, XBT_NULL, path, &len); + again = ret_str == NULL || strcmp(ret_str, cmd_str); + WPRINTF("Got ''%s'' from logdirty%s.\n", ret_str, again ? ", retrying" : ""); + free(ret_str); /* Watch fired but value is not yet right */ - goto read_again; + } while (again); free(path); - free(cmd_str); - free(ret_str); return 0; }
Olaf Hering
2013-Mar-18 16:40 UTC
[PATCH 4 of 6] tools: set migration constraints from cmdline
# HG changeset patch # User Olaf Hering <olaf@aepfle.de> # Date 1363623457 -3600 # Node ID a897276ed24d434daf5a5d3d8543343a23df2f8a # Parent b3ed5c4b5078e5bf90d23f7b09eda727af0d7e74 tools: set migration constraints from cmdline Add new options to xm/xl migrate to control the process of migration. The intention is to optionally abort the migration if it takes too long to migrate a busy guest due to the high number of new dirty pages. Currently the guest is suspended to transfer the remaining dirty pages. This transfer can take a long time, which can confuse the guest if its suspended for too long. The new options allow to override the built-in default values, which are not changed by this patch. --max_iters <number> Number of iterations before final suspend (default: 30) --max_factor <factor> Max amount of memory to transfer before final suspend (default: 3*RAM) --abort_if_busy Abort migration instead of doing final suspend. The changes to libxl change the API, handle LIBXL_API_VERSION == 0x040200. TODO: eventually add also --min_remaining (default value 50) in a seperate patch v7: - remove short options - update description of --abort_if_busy in xl.1 - extend description of --abort_if_busy in xl help - add comment to libxl_domain_suspend declaration, props is optional v6: - update the LIBXL_API_VERSION handling for libxl_domain_suspend change it to an inline function if LIBXL_API_VERSION is defined to 4.2.0 - rename libxl_save_properties to libxl_domain_suspend_properties - rename ->xlflags to ->flags within that struct v5: - adjust libxl_domain_suspend prototype, move flags, max_iters, max_factor into a new, optional struct libxl_save_properties - rename XCFLAGS_DOMSAVE_NOSUSPEND to XCFLAGS_DOMSAVE_ABORT_IF_BUSY - rename LIBXL_SUSPEND_NO_FINAL_SUSPEND to LIBXL_SUSPEND_ABORT_IF_BUSY - rename variables no_suspend to abort_if_busy - rename option -N/--no_suspend to -A/--abort_if_busy - update xl.1, extend description of -A option v4: - update default for no_suspend from None to 0 in XendCheckpoint.py:save - update logoutput in setMigrateConstraints - change xm migrate defaults from None to 0 - add new options to xl.1 - fix syntax error in XendDomain.py:domain_migrate_constraints_set - fix xm migrate -N option name to match xl migrate v3: - move logic errors in libxl__domain_suspend and fixed help text in cmd_table to separate patches - fix syntax error in XendCheckpoint.py - really pass max_iters and max_factor in libxl__xc_domain_save - make libxl_domain_suspend_0x040200 declaration globally visible - bump libxenlight.so SONAME from 2.0 to 2.1 due to changed libxl_domain_suspend v2: - use LIBXL_API_VERSION and define libxl_domain_suspend_0x040200 - fix logic error in min_reached check in xc_domain_save - add longopts - update --help text - correct description of migrate --help text Signed-off-by: Olaf Hering <olaf@aepfle.de> diff -r b3ed5c4b5078 -r a897276ed24d docs/man/xl.pod.1 --- a/docs/man/xl.pod.1 +++ b/docs/man/xl.pod.1 @@ -391,6 +391,21 @@ Send <config> instead of config file fro Print huge (!) amount of debug during the migration process. +=item B<--max_iters> I<number> + +Number of iterations before final suspend (default: 30) + +=item B<--max_factor> I<factor> + +Max amount of memory to transfer before final suspend (default: 3*RAM) + +=item B<--abort_if_busy> + +Abort migration instead of doing final suspend/transfer/resume if the +guest has still dirty pages after the number of iterations and/or the +amount of RAM transferred. This avoids long periods of time where the +guest is suspended. + =back =item B<remus> [I<OPTIONS>] I<domain-id> I<host> diff -r b3ed5c4b5078 -r a897276ed24d tools/libxc/xc_domain_save.c --- a/tools/libxc/xc_domain_save.c +++ b/tools/libxc/xc_domain_save.c @@ -804,6 +804,7 @@ int xc_domain_save(xc_interface *xch, in int rc = 1, frc, i, j, last_iter = 0, iter = 0; int live = (flags & XCFLAGS_LIVE); int debug = (flags & XCFLAGS_DEBUG); + int abort_if_busy = (flags & XCFLAGS_DOMSAVE_ABORT_IF_BUSY); int superpages = !!hvm; int race = 0, sent_last_iter, skip_this_iter = 0; unsigned int sent_this_iter = 0; @@ -1527,10 +1528,20 @@ int xc_domain_save(xc_interface *xch, in if ( live ) { + int min_reached = sent_this_iter + skip_this_iter < 50; if ( (iter >= max_iters) || - (sent_this_iter+skip_this_iter < 50) || + min_reached || (total_sent > dinfo->p2m_size*max_factor) ) { + if ( !min_reached && abort_if_busy ) + { + ERROR("Live migration aborted, as requested. (guest too busy?)" + " total_sent %lu iter %d, max_iters %u max_factor %u", + total_sent, iter, max_iters, max_factor); + rc = 1; + goto out; + } + DPRINTF("Start last iteration\n"); last_iter = 1; diff -r b3ed5c4b5078 -r a897276ed24d tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h +++ b/tools/libxc/xenguest.h @@ -28,6 +28,7 @@ #define XCFLAGS_HVM (1 << 2) #define XCFLAGS_STDVGA (1 << 3) #define XCFLAGS_CHECKPOINT_COMPRESS (1 << 4) +#define XCFLAGS_DOMSAVE_ABORT_IF_BUSY (1 << 5) #define X86_64_B_SIZE 64 #define X86_32_B_SIZE 32 diff -r b3ed5c4b5078 -r a897276ed24d tools/libxl/Makefile --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -5,7 +5,7 @@ XEN_ROOT = $(CURDIR)/../.. include $(XEN_ROOT)/tools/Rules.mk -MAJOR = 2.0 +MAJOR = 2.1 MINOR = 0 XLUMAJOR = 1.0 diff -r b3ed5c4b5078 -r a897276ed24d tools/libxl/libxl.c --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -756,7 +756,8 @@ static void domain_suspend_cb(libxl__egc } -int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, int flags, +int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, + const libxl_domain_suspend_properties *props, const libxl_asyncop_how *ao_how) { AO_CREATE(ctx, domid, ao_how); @@ -777,8 +778,13 @@ int libxl_domain_suspend(libxl_ctx *ctx, dss->domid = domid; dss->fd = fd; dss->type = type; - dss->live = flags & LIBXL_SUSPEND_LIVE; - dss->debug = flags & LIBXL_SUSPEND_DEBUG; + if (props) { + dss->live = props->flags & LIBXL_SUSPEND_LIVE; + dss->debug = props->flags & LIBXL_SUSPEND_DEBUG; + dss->max_iters = props->max_iters; + dss->max_factor = props->max_factor; + dss->xlflags = props->flags; + } libxl__domain_suspend(egc, dss); return AO_INPROGRESS; diff -r b3ed5c4b5078 -r a897276ed24d tools/libxl/libxl.h --- a/tools/libxl/libxl.h +++ b/tools/libxl/libxl.h @@ -507,12 +507,31 @@ int libxl_domain_create_restore(libxl_ct void libxl_domain_config_init(libxl_domain_config *d_config); void libxl_domain_config_dispose(libxl_domain_config *d_config); +typedef struct { + int flags; /* LIBXL_SUSPEND_* */ + int max_iters; + int max_factor; +} libxl_domain_suspend_properties; + int libxl_domain_suspend(libxl_ctx *ctx, uint32_t domid, int fd, - int flags, /* LIBXL_SUSPEND_* */ + const libxl_domain_suspend_properties *props, /* optional */ const libxl_asyncop_how *ao_how) LIBXL_EXTERNAL_CALLERS_ONLY; +#ifdef LIBXL_API_VERSION +#if LIBXL_API_VERSION == 0x040200 +static inline int libxl_domain_suspend_0x040200(libxl_ctx *ctx, uint32_t domid, int fd, + int flags, const libxl_asyncop_how *ao_how) +{ + libxl_domain_suspend_properties props = { .flags = flags }; + return libxl_domain_suspend(ctx, domid, fd, &props, ao_how); +} +#define libxl_domain_suspend libxl_domain_suspend_0x040200 +#endif +#endif + #define LIBXL_SUSPEND_DEBUG 1 #define LIBXL_SUSPEND_LIVE 2 +#define LIBXL_SUSPEND_ABORT_IF_BUSY 4 /* @param suspend_cancel [from xenctrl.h:xc_domain_resume( @param fast )] * If this parameter is true, use co-operative resume. The guest diff -r b3ed5c4b5078 -r a897276ed24d tools/libxl/libxl_dom.c --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -1297,6 +1297,7 @@ void libxl__domain_suspend(libxl__egc *e dss->xcflags = (live ? XCFLAGS_LIVE : 0) | (debug ? XCFLAGS_DEBUG : 0) + | (dss->xlflags & LIBXL_SUSPEND_ABORT_IF_BUSY ? XCFLAGS_DOMSAVE_ABORT_IF_BUSY : 0) | (dss->hvm ? XCFLAGS_HVM : 0); dss->suspend_eventchn = -1; diff -r b3ed5c4b5078 -r a897276ed24d tools/libxl/libxl_internal.h --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -2249,6 +2249,9 @@ struct libxl__domain_suspend_state { xc_evtchn *xce; /* event channel handle */ int suspend_eventchn; int hvm; + int max_iters; + int max_factor; + int xlflags; int xcflags; int guest_responded; const char *dm_savefile; diff -r b3ed5c4b5078 -r a897276ed24d tools/libxl/libxl_save_callout.c --- a/tools/libxl/libxl_save_callout.c +++ b/tools/libxl/libxl_save_callout.c @@ -108,8 +108,8 @@ void libxl__xc_domain_save(libxl__egc *e } const unsigned long argnums[] = { - dss->domid, 0, 0, dss->xcflags, dss->hvm, vm_generationid_addr, - toolstack_data_fd, toolstack_data_len, + dss->domid, dss->max_iters, dss->max_factor, dss->xcflags, dss->hvm, + vm_generationid_addr, toolstack_data_fd, toolstack_data_len, cbflags, }; diff -r b3ed5c4b5078 -r a897276ed24d tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -3201,7 +3201,7 @@ static int save_domain(uint32_t domid, c save_domain_core_writeconfig(fd, filename, config_data, config_len); - int rc = libxl_domain_suspend(ctx, domid, fd, 0, NULL); + int rc = libxl_domain_suspend(ctx, domid, fd, NULL, NULL); close(fd); if (rc < 0) @@ -3361,6 +3361,7 @@ static void migrate_do_preamble(int send } static void migrate_domain(uint32_t domid, const char *rune, int debug, + int max_iters, int max_factor, int abort_if_busy, const char *override_config_file) { pid_t child = -1; @@ -3369,7 +3370,12 @@ static void migrate_domain(uint32_t domi char *away_domname; char rc_buf; uint8_t *config_data; - int config_len, flags = LIBXL_SUSPEND_LIVE; + int config_len; + libxl_domain_suspend_properties props = { + .flags = LIBXL_SUSPEND_LIVE, + .max_iters = max_iters, + .max_factor = max_factor, + }; save_domain_core_begin(domid, override_config_file, &config_data, &config_len); @@ -3388,8 +3394,11 @@ static void migrate_domain(uint32_t domi xtl_stdiostream_adjust_flags(logger, XTL_STDIOSTREAM_HIDE_PROGRESS, 0); if (debug) - flags |= LIBXL_SUSPEND_DEBUG; - rc = libxl_domain_suspend(ctx, domid, send_fd, flags, NULL); + props.flags |= LIBXL_SUSPEND_DEBUG; + if (abort_if_busy) + props.flags |= LIBXL_SUSPEND_ABORT_IF_BUSY; + + rc = libxl_domain_suspend(ctx, domid, send_fd, &props, NULL); if (rc) { fprintf(stderr, "migration sender: libxl_domain_suspend failed" " (rc=%d)\n", rc); @@ -3782,13 +3791,17 @@ int main_migrate(int argc, char **argv) char *rune = NULL; char *host; int opt, daemonize = 1, monitor = 1, debug = 0; + int max_iters = 0, max_factor = 0, abort_if_busy = 0; static struct option opts[] = { {"debug", 0, 0, 0x100}, + {"max_iters", 1, 0, 0x101}, + {"max_factor", 1, 0, 0x102}, + {"abort_if_busy", 0, 0, 0x103}, COMMON_LONG_OPTS, {0, 0, 0, 0} }; - SWITCH_FOREACH_OPT(opt, "FC:s:e", opts, "migrate", 2) { + SWITCH_FOREACH_OPT(opt, "FC:s:eM:m:A", opts, "migrate", 2) { case ''C'': config_filename = optarg; break; @@ -3805,6 +3818,15 @@ int main_migrate(int argc, char **argv) case 0x100: debug = 1; break; + case 0x101: + max_iters = atoi(optarg); + break; + case 0x102: + max_factor = atoi(optarg); + break; + case 0x103: + abort_if_busy = 1; + break; } domid = find_domain(argv[optind]); @@ -3820,7 +3842,7 @@ int main_migrate(int argc, char **argv) return 1; } - migrate_domain(domid, rune, debug, config_filename); + migrate_domain(domid, rune, debug, max_iters, max_factor, abort_if_busy, config_filename); return 0; } diff -r b3ed5c4b5078 -r a897276ed24d tools/libxl/xl_cmdtable.c --- a/tools/libxl/xl_cmdtable.c +++ b/tools/libxl/xl_cmdtable.c @@ -147,14 +147,18 @@ struct cmd_spec cmd_table[] = { &main_migrate, 0, 1, "Migrate a domain to another host", "[options] <Domain> <host>", - "-h Print this help.\n" - "-C <config> Send <config> instead of config file from creation.\n" - "-s <sshcommand> Use <sshcommand> instead of ssh. String will be passed\n" - " to sh. If empty, run <host> instead of ssh <host> xl\n" - " migrate-receive [-d -e]\n" - "-e Do not wait in the background (on <host>) for the death\n" - " of the domain.\n" - "--debug Print huge (!) amount of debug during the migration process." + "-h Print this help.\n" + "-C <config> Send <config> instead of config file from creation.\n" + "-s <sshcommand> Use <sshcommand> instead of ssh. String will be passed\n" + " to sh. If empty, run <host> instead of ssh <host> xl\n" + " migrate-receive [-d -e]\n" + "-e Do not wait in the background (on <host>) for the death\n" + " of the domain.\n" + "--debug Print huge (!) amount of debug during the migration process.\n" + "--max_iters <number> Number of iterations before final suspend (default: 30)\n" + "--max_factor <factor> Max amount of memory to transfer before final suspend (default: 3*RAM).\n" + "--abort_if_busy Abort migration instead of doing final suspend, if number\n" + " of iterations or amount of transfered memory is exceeded." }, { "dump-core", &main_dump_core, 0, 1, diff -r b3ed5c4b5078 -r a897276ed24d tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py +++ b/tools/python/xen/xend/XendCheckpoint.py @@ -118,9 +118,19 @@ def save(fd, dominfo, network, live, dst # enabled. Passing "0" simply uses the defaults compiled into # libxenguest; see the comments and/or code in xc_linux_save() for # more information. + max_iters = dominfo.info.get(''max_iters'', "0") + max_factor = dominfo.info.get(''max_factor'', "0") + abort_if_busy = dominfo.info.get(''abort_if_busy'', "0") + if max_iters == "None": + max_iters = "0" + if max_factor == "None": + max_factor = "0" + if abort_if_busy == "None": + abort_if_busy = "0" cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd), - str(dominfo.getDomid()), "0", "0", - str(int(live) | (int(hvm) << 2)) ] + str(dominfo.getDomid()), + max_iters, max_factor, + str( int(live) | (int(hvm) << 2) | (int(abort_if_busy) << 5) ) ] log.debug("[xc_save]: %s", string.join(cmd)) def saveInputHandler(line, tochild): diff -r b3ed5c4b5078 -r a897276ed24d tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py +++ b/tools/python/xen/xend/XendDomain.py @@ -1832,6 +1832,18 @@ class XendDomain: log.exception(ex) raise XendError(str(ex)) + def domain_migrate_constraints_set(self, domid, max_iters, max_factor, abort_if_busy): + """Set the Migrate Constraints of this domain. + @param domid: Domain ID or Name + @param max_iters: Number of iterations before final suspend + @param max_factor: Max amount of memory to transfer before final suspend + @param abort_if_busy: Abort migration instead of doing final suspend + """ + dominfo = self.domain_lookup_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) + dominfo.setMigrateConstraints(max_iters, max_factor, abort_if_busy) + def domain_maxmem_set(self, domid, mem): """Set the memory limit for a domain. diff -r b3ed5c4b5078 -r a897276ed24d tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -1459,6 +1459,18 @@ class XendDomainInfo: pci_conf = self.info[''devices''][dev_uuid][1] return map(pci_dict_to_bdf_str, pci_conf[''devs'']) + def setMigrateConstraints(self, max_iters, max_factor, abort_if_busy): + """Set the Migrate Constraints of this domain. + @param max_iters: Number of iterations before final suspend + @param max_factor: Max amount of memory to transfer before final suspend + @param abort_if_busy: Abort migration instead of doing final suspend + """ + log.debug("Setting migration constraints of domain %s (%s) to ''%s'' ''%s'' ''%s''.", + self.info[''name_label''], str(self.domid), max_iters, max_factor, abort_if_busy) + self.info[''max_iters''] = str(max_iters) + self.info[''max_factor''] = str(max_factor) + self.info[''abort_if_busy''] = str(abort_if_busy) + def setMemoryTarget(self, target): """Set the memory target of this domain. @param target: In MiB. diff -r b3ed5c4b5078 -r a897276ed24d tools/python/xen/xm/migrate.py --- a/tools/python/xen/xm/migrate.py +++ b/tools/python/xen/xm/migrate.py @@ -55,6 +55,18 @@ gopts.opt(''change_home_server'', short=''c fn=set_true, default=0, use="Change home server for managed domains.") +gopts.opt(''max_iters'',val=''max_iters'', + fn=set_int, default=0, + use="Number of iterations before final suspend (default: 30).") + +gopts.opt(''max_factor'', val=''max_factor'', + fn=set_int, default=0, + use="Max amount of memory to transfer before final suspend (default: 3*RAM).") + +gopts.opt(''abort_if_busy'', + fn=set_true, default=0, + use="Abort migration instead of doing final suspend.") + def help(): return str(gopts) @@ -80,6 +92,10 @@ def main(argv): server.xenapi.VM.migrate(vm_ref, dst, bool(opts.vals.live), other_config) else: + server.xend.domain.migrate_constraints_set(dom, + opts.vals.max_iters, + opts.vals.max_factor, + opts.vals.abort_if_busy) server.xend.domain.migrate(dom, dst, opts.vals.live, opts.vals.port, opts.vals.node,
Olaf Hering
2013-Mar-18 16:40 UTC
[PATCH 5 of 6] tools: add xm migrate --log_progress option
# HG changeset patch # User Olaf Hering <olaf@aepfle.de> # Date 1363623591 -3600 # Node ID 9e5bec95fd04a05d0b009adcc3f10f635ab638be # Parent a897276ed24d434daf5a5d3d8543343a23df2f8a tools: add xm migrate --log_progress option xc_domain_save does print progress messages. These verbose messages are disabled per default to avoid flood in xend.log. Sometimes it is helpful to see progress when migrating large and busy guests. So add a new option to xm migrate to actually enable the printing of progress messsages. xl migrate is not modified with this change because it does not use the stdio logger. Signed-off-by: Olaf Hering <olaf@aepfle.de> diff -r a897276ed24d -r 9e5bec95fd04 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h +++ b/tools/libxc/xenguest.h @@ -29,6 +29,7 @@ #define XCFLAGS_STDVGA (1 << 3) #define XCFLAGS_CHECKPOINT_COMPRESS (1 << 4) #define XCFLAGS_DOMSAVE_ABORT_IF_BUSY (1 << 5) +#define XCFLAGS_PROGRESS (1 << 6) #define X86_64_B_SIZE 64 #define X86_32_B_SIZE 32 diff -r a897276ed24d -r 9e5bec95fd04 tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py +++ b/tools/python/xen/xend/XendCheckpoint.py @@ -121,16 +121,19 @@ def save(fd, dominfo, network, live, dst max_iters = dominfo.info.get(''max_iters'', "0") max_factor = dominfo.info.get(''max_factor'', "0") abort_if_busy = dominfo.info.get(''abort_if_busy'', "0") + log_save_progress = dominfo.info.get(''log_save_progress'', "0") if max_iters == "None": max_iters = "0" if max_factor == "None": max_factor = "0" if abort_if_busy == "None": abort_if_busy = "0" + if log_save_progress == "None": + log_save_progress = "0" cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd), str(dominfo.getDomid()), max_iters, max_factor, - str( int(live) | (int(hvm) << 2) | (int(abort_if_busy) << 5) ) ] + str( int(live) | (int(hvm) << 2) | (int(abort_if_busy) << 5) | (int(log_save_progress) << 6) ) ] log.debug("[xc_save]: %s", string.join(cmd)) def saveInputHandler(line, tochild): diff -r a897276ed24d -r 9e5bec95fd04 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py +++ b/tools/python/xen/xend/XendDomain.py @@ -1832,17 +1832,18 @@ class XendDomain: log.exception(ex) raise XendError(str(ex)) - def domain_migrate_constraints_set(self, domid, max_iters, max_factor, abort_if_busy): + def domain_migrate_constraints_set(self, domid, max_iters, max_factor, abort_if_busy, log_save_progress): """Set the Migrate Constraints of this domain. @param domid: Domain ID or Name @param max_iters: Number of iterations before final suspend @param max_factor: Max amount of memory to transfer before final suspend @param abort_if_busy: Abort migration instead of doing final suspend + @param log_save_progress: Log progress of migrate to xend.log """ dominfo = self.domain_lookup_nr(domid) if not dominfo: raise XendInvalidDomain(str(domid)) - dominfo.setMigrateConstraints(max_iters, max_factor, abort_if_busy) + dominfo.setMigrateConstraints(max_iters, max_factor, abort_if_busy, log_save_progress) def domain_maxmem_set(self, domid, mem): """Set the memory limit for a domain. diff -r a897276ed24d -r 9e5bec95fd04 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -1459,17 +1459,19 @@ class XendDomainInfo: pci_conf = self.info[''devices''][dev_uuid][1] return map(pci_dict_to_bdf_str, pci_conf[''devs'']) - def setMigrateConstraints(self, max_iters, max_factor, abort_if_busy): + def setMigrateConstraints(self, max_iters, max_factor, abort_if_busy, log_save_progress): """Set the Migrate Constraints of this domain. @param max_iters: Number of iterations before final suspend @param max_factor: Max amount of memory to transfer before final suspend @param abort_if_busy: Abort migration instead of doing final suspend + @param log_save_progress: Log progress of migrate to xend.log """ log.debug("Setting migration constraints of domain %s (%s) to ''%s'' ''%s'' ''%s''.", self.info[''name_label''], str(self.domid), max_iters, max_factor, abort_if_busy) self.info[''max_iters''] = str(max_iters) self.info[''max_factor''] = str(max_factor) self.info[''abort_if_busy''] = str(abort_if_busy) + self.info[''log_save_progress''] = str(log_save_progress) def setMemoryTarget(self, target): """Set the memory target of this domain. diff -r a897276ed24d -r 9e5bec95fd04 tools/python/xen/xm/migrate.py --- a/tools/python/xen/xm/migrate.py +++ b/tools/python/xen/xm/migrate.py @@ -67,6 +67,10 @@ gopts.opt(''abort_if_busy'', fn=set_true, default=0, use="Abort migration instead of doing final suspend.") +gopts.opt(''log_progress'', + fn=set_true, default=0, + use="Log progress of migration to xend.log") + def help(): return str(gopts) @@ -95,7 +99,8 @@ def main(argv): server.xend.domain.migrate_constraints_set(dom, opts.vals.max_iters, opts.vals.max_factor, - opts.vals.abort_if_busy) + opts.vals.abort_if_busy, + opts.vals.log_progress) server.xend.domain.migrate(dom, dst, opts.vals.live, opts.vals.port, opts.vals.node, diff -r a897276ed24d -r 9e5bec95fd04 tools/xcutils/xc_save.c --- a/tools/xcutils/xc_save.c +++ b/tools/xcutils/xc_save.c @@ -197,7 +197,8 @@ main(int argc, char **argv) si.suspend_evtchn = -1; lvl = si.flags & XCFLAGS_DEBUG ? XTL_DEBUG: XTL_DETAIL; - lflags = XTL_STDIOSTREAM_SHOW_PID | XTL_STDIOSTREAM_HIDE_PROGRESS; + lflags = XTL_STDIOSTREAM_SHOW_PID; + lflags |= si.flags & XCFLAGS_PROGRESS ? 0 : XTL_STDIOSTREAM_HIDE_PROGRESS; l = (xentoollog_logger *)xtl_createlogger_stdiostream(stderr, lvl, lflags); xch = si.xch = xc_interface_open(l, 0, 0); if (!si.xch)
Olaf Hering
2013-Mar-18 16:40 UTC
[PATCH 6 of 6] tools/xend: move assert to exception block
# HG changeset patch # User Olaf Hering <olaf@aepfle.de> # Date 1363624025 -3600 # Node ID 07a436dc7cbd710f219f9728704b5e8de8de0a39 # Parent 9e5bec95fd04a05d0b009adcc3f10f635ab638be tools/xend: move assert to exception block The two assert in restore trigger sometimes after hundreds of migrations. If they trigger the destination host will not destroy the newly created, still empty guest. After a second migration attempt to this host there will be two guets with the same name and uuid. This situation is poorly handled by the xm tools. With this change the guest will be destroyed. Signed-off-by: Olaf Hering <olaf@aepfle.de> diff -r 9e5bec95fd04 -r 07a436dc7cbd tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py +++ b/tools/python/xen/xend/XendCheckpoint.py @@ -262,9 +262,6 @@ def restore(xd, fd, dominfo = None, paus store_port = dominfo.getStorePort() console_port = dominfo.getConsolePort() - assert store_port - assert console_port - # if hvm, pass mem size to calculate the store_mfn if is_hvm: apic = int(dominfo.info[''platform''].get(''apic'', 0)) @@ -276,6 +273,9 @@ def restore(xd, fd, dominfo = None, paus pae = 0 try: + assert store_port + assert console_port + restore_image = image.create(dominfo, dominfo.info) memory = restore_image.getRequiredAvailableMemory( dominfo.info[''memory_dynamic_max''] / 1024)