Andrew Cooper
2013-Oct-03 16:11 UTC
[PATCH v7] tools/migrate: Fix regression when migrating from older version of Xen
Commit 00a4b65f8534c9e6521eab2e6ce796ae36037774 Sep 7 2010 "libxc: provide notification of final checkpoint to restore end" broke migration from any version of Xen using tools from prior to that commit Older tools have no idea about an XC_SAVE_ID_LAST_CHECKPOINT, causing newer tools xc_domain_restore() to start reading the qemu save record, as ctx->last_checkpoint is 0. The failure looks like: xc: error: Max batch size exceeded (1970103633). Giving up. where 1970103633 = 0x756d6551 = *(uint32_t*)"Qemu" With this fix in place, the behaviour for normal migrations is reverted to how it was before the regression; the migration is considered non-checkpointed right from the start. A XC_SAVE_ID_LAST_CHECKPOINT chunk seen in the migration stream is a nop. For checkpointed migrations the behaviour is unchanged. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> CC: Ian Campbell <Ian.Campbell@citrix.com> CC: Ian Jackson <Ian.Jackson@eu.citrix.com> CC: Shriram Rajagopalan <rshriram@cs.ubc.ca> --- Changes in v7: * Introduce the notion of LIBXL_ABI_VERSION 0x040400 * s/_FLAGS/_PARAMS/ for changes in libxl.h --- tools/libxc/xc_domain_restore.c | 3 ++- tools/libxc/xc_nomigrate.c | 2 +- tools/libxc/xenguest.h | 3 ++- tools/libxl/libxl.h | 33 ++++++++++++++++++++++++++++++- tools/libxl/libxl_create.c | 11 +++++++---- tools/libxl/libxl_internal.h | 1 + tools/libxl/libxl_save_callout.c | 2 +- tools/libxl/libxl_save_helper.c | 3 ++- tools/libxl/libxl_types.idl | 4 ++++ tools/libxl/xl_cmdimpl.c | 5 +++++ tools/python/xen/xend/XendCheckpoint.py | 2 +- tools/xcutils/xc_restore.c | 14 ++++++++----- 12 files changed, 67 insertions(+), 16 deletions(-) diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c index 939a76b..ecaf25d 100644 --- a/tools/libxc/xc_domain_restore.c +++ b/tools/libxc/xc_domain_restore.c @@ -1402,7 +1402,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, domid_t store_domid, unsigned int console_evtchn, unsigned long *console_mfn, domid_t console_domid, unsigned int hvm, unsigned int pae, int superpages, - int no_incr_generationid, + int no_incr_generationid, int checkpointed_stream, unsigned long *vm_generationid_addr, struct restore_callbacks *callbacks) { @@ -1474,6 +1474,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, ctx->superpages = superpages; ctx->hvm = hvm; + ctx->last_checkpoint = !checkpointed_stream; ctxt = xc_hypercall_buffer_alloc(xch, ctxt, sizeof(*ctxt)); diff --git a/tools/libxc/xc_nomigrate.c b/tools/libxc/xc_nomigrate.c index 73e7566..fb6d53e 100644 --- a/tools/libxc/xc_nomigrate.c +++ b/tools/libxc/xc_nomigrate.c @@ -35,7 +35,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, domid_t store_domid, unsigned int console_evtchn, unsigned long *console_mfn, domid_t console_domid, unsigned int hvm, unsigned int pae, int superpages, - int no_incr_generationid, + int no_incr_generationid, int checkpointed_stream, unsigned long *vm_generationid_addr, struct restore_callbacks *callbacks) { diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h index c12091f..a0e30e1 100644 --- a/tools/libxc/xenguest.h +++ b/tools/libxc/xenguest.h @@ -114,6 +114,7 @@ struct restore_callbacks { * @parm pae non-zero if this HVM domain has PAE support enabled * @parm superpages non-zero to allocate guest memory with superpages * @parm no_incr_generationid non-zero if generation id is NOT to be incremented + * @parm checkpointed_stream non-zero if the far end of the stream is using checkpointing * @parm vm_generationid_addr returned with the address of the generation id buffer * @parm callbacks non-NULL to receive a callback to restore toolstack * specific data @@ -124,7 +125,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, domid_t store_domid, unsigned int console_evtchn, unsigned long *console_mfn, domid_t console_domid, unsigned int hvm, unsigned int pae, int superpages, - int no_incr_generationid, + int no_incr_generationid, int checkpointed_stream, unsigned long *vm_generationid_addr, struct restore_callbacks *callbacks); /** diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h index 4cab294..0678e15 100644 --- a/tools/libxl/libxl.h +++ b/tools/libxl/libxl.h @@ -283,7 +283,8 @@ /* API compatibility. */ #ifdef LIBXL_API_VERSION -#if LIBXL_API_VERSION != 0x040200 && LIBXL_API_VERSION != 0x040300 +#if LIBXL_API_VERSION != 0x040200 && LIBXL_API_VERSION != 0x040300 && \ + LIBXL_API_VERSION != 0x040400 #error Unknown LIBXL_API_VERSION #endif #endif @@ -355,6 +356,14 @@ */ #define LIBXL_HAVE_SPICE_VDAGENT 1 +/* + * LIBXL_HAVE_DOMAIN_CREATE_RESTORE_PARAMS 1 + * + * If this is defined, libxl_domain_create_restore()''s API has changed to + * include a params structure. + */ +#define LIBXL_HAVE_DOMAIN_CREATE_RESTORE_PARAMS 1 + /* Functions annotated with LIBXL_EXTERNAL_CALLERS_ONLY may not be * called from within libxl itself. Callers outside libxl, who * do not #include libxl_internal.h, are fine. */ @@ -578,9 +587,31 @@ int libxl_domain_create_new(libxl_ctx *ctx, libxl_domain_config *d_config, LIBXL_EXTERNAL_CALLERS_ONLY; int libxl_domain_create_restore(libxl_ctx *ctx, libxl_domain_config *d_config, uint32_t *domid, int restore_fd, + const libxl_domain_restore_flags *flags, const libxl_asyncop_how *ao_how, const libxl_asyncprogress_how *aop_console_how) LIBXL_EXTERNAL_CALLERS_ONLY; + +#if defined(LIBXL_API_VERSION) && LIBXL_API_VERSION < 0x040400 + +int static inline libxl_domain_create_restore_0x040200( + libxl_ctx *ctx, libxl_domain_config *d_config, + uint32_t *domid, int restore_fd, + const libxl_asyncop_how *ao_how, + const libxl_asyncprogress_how *aop_console_how) + LIBXL_EXTERNAL_CALLERS_ONLY +{ + libxl_domain_restore_flags flags; + flags.checkpointed_stream = 0; + + return libxl_domain_create_restore( + ctx, d_config, domid, restore_fd, &flags, ao_how, aop_console_how); +} + +#define libxl_domain_create_restore libxl_domain_create_restore_0x040200 + +#endif + /* A progress report will be made via ao_console_how, of type * domain_create_console_available, when the domain''s primary * console is available and can be connected to. diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c index 7567238..4a6d4f6 100644 --- a/tools/libxl/libxl_create.c +++ b/tools/libxl/libxl_create.c @@ -1227,7 +1227,8 @@ static void domain_create_cb(libxl__egc *egc, static int do_domain_create(libxl_ctx *ctx, libxl_domain_config *d_config, uint32_t *domid, - int restore_fd, const libxl_asyncop_how *ao_how, + int restore_fd, int checkpointed_stream, + const libxl_asyncop_how *ao_how, const libxl_asyncprogress_how *aop_console_how) { AO_CREATE(ctx, 0, ao_how); @@ -1238,6 +1239,7 @@ static int do_domain_create(libxl_ctx *ctx, libxl_domain_config *d_config, cdcs->dcs.guest_config = d_config; cdcs->dcs.restore_fd = restore_fd; cdcs->dcs.callback = domain_create_cb; + cdcs->dcs.checkpointed_stream = checkpointed_stream; libxl__ao_progress_gethow(&cdcs->dcs.aop_console_how, aop_console_how); cdcs->domid_out = domid; @@ -1264,17 +1266,18 @@ int libxl_domain_create_new(libxl_ctx *ctx, libxl_domain_config *d_config, const libxl_asyncop_how *ao_how, const libxl_asyncprogress_how *aop_console_how) { - return do_domain_create(ctx, d_config, domid, -1, + return do_domain_create(ctx, d_config, domid, -1, 0, ao_how, aop_console_how); } int libxl_domain_create_restore(libxl_ctx *ctx, libxl_domain_config *d_config, uint32_t *domid, int restore_fd, + const libxl_domain_restore_flags *flags, const libxl_asyncop_how *ao_how, - const libxl_asyncprogress_how *aop_console_how) + const libxl_asyncprogress_how *aop_console_how) { return do_domain_create(ctx, d_config, domid, restore_fd, - ao_how, aop_console_how); + flags->checkpointed_stream, ao_how, aop_console_how); } /* diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index f051d91..4e15055 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -2514,6 +2514,7 @@ struct libxl__domain_create_state { libxl_asyncprogress_how aop_console_how; /* private to domain_create */ int guest_domid; + int checkpointed_stream; libxl__domain_build_state build_state; libxl__bootloader_state bl; libxl__stub_dm_spawn_state dmss; diff --git a/tools/libxl/libxl_save_callout.c b/tools/libxl/libxl_save_callout.c index f164e98..6e45b2f 100644 --- a/tools/libxl/libxl_save_callout.c +++ b/tools/libxl/libxl_save_callout.c @@ -60,7 +60,7 @@ void libxl__xc_domain_restore(libxl__egc *egc, libxl__domain_create_state *dcs, state->store_domid, state->console_port, state->console_domid, hvm, pae, superpages, no_incr_generationid, - cbflags, + cbflags, dcs->checkpointed_stream, }; dcs->shs.ao = ao; diff --git a/tools/libxl/libxl_save_helper.c b/tools/libxl/libxl_save_helper.c index 772251a..880565e 100644 --- a/tools/libxl/libxl_save_helper.c +++ b/tools/libxl/libxl_save_helper.c @@ -252,6 +252,7 @@ int main(int argc, char **argv) int superpages = strtoul(NEXTARG,0,10); int no_incr_genidad = strtoul(NEXTARG,0,10); unsigned cbflags = strtoul(NEXTARG,0,10); + int checkpointed = strtoul(NEXTARG,0,10); assert(!*++argv); helper_setcallbacks_restore(&helper_restore_callbacks, cbflags); @@ -264,7 +265,7 @@ int main(int argc, char **argv) r = xc_domain_restore(xch, io_fd, dom, store_evtchn, &store_mfn, store_domid, console_evtchn, &console_mfn, console_domid, hvm, pae, superpages, - no_incr_genidad, &genidad, + no_incr_genidad, checkpointed, &genidad, &helper_restore_callbacks); helper_stub_restore_results(store_mfn,console_mfn,genidad,0); complete(r); diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl index 049dbb5..f9ad571 100644 --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -259,6 +259,10 @@ libxl_domain_create_info = Struct("domain_create_info",[ ("run_hotplug_scripts",libxl_defbool), ], dir=DIR_IN) +libxl_domain_restore_flags = Struct("domain_restore_flags", [ + ("checkpointed_stream", integer), + ]) + MemKB = UInt(64, init_val = "LIBXL_MEMKB_DEFAULT") libxl_domain_sched_params = Struct("domain_sched_params",[ diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index a91b427..3ace31c 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -132,6 +132,7 @@ struct domain_create { int vnc; int vncautopass; int console_autoconnect; + int checkpointed_stream; const char *config_file; const char *extra_config; /* extra config string */ const char *restore_file; @@ -2064,8 +2065,11 @@ start: } if ( restoring ) { + libxl_domain_restore_flags flags; + flags.checkpointed_stream = dom_info->checkpointed_stream; ret = libxl_domain_create_restore(ctx, &d_config, &domid, restore_fd, + &flags, 0, autoconnect_console_how); /* * On subsequent reboot etc we should create the domain, not @@ -3679,6 +3683,7 @@ static void migrate_receive(int debug, int daemonize, int monitor, dom_info.paused = 1; dom_info.migrate_fd = recv_fd; dom_info.migration_domname_r = &migration_domname; + dom_info.checkpointed_stream = remus; rc = create_domain(&dom_info); if (rc < 0) { diff --git a/tools/python/xen/xend/XendCheckpoint.py b/tools/python/xen/xend/XendCheckpoint.py index fa09757..a433ffa 100644 --- a/tools/python/xen/xend/XendCheckpoint.py +++ b/tools/python/xen/xend/XendCheckpoint.py @@ -301,7 +301,7 @@ def restore(xd, fd, dominfo = None, paused = False, relocating = False): cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE), fd, dominfo.getDomid(), - store_port, console_port, int(is_hvm), pae, apic, superpages]) + store_port, console_port, int(is_hvm), pae, apic, superpages, 1]) log.debug("[xc_restore]: %s", string.join(cmd)) handler = RestoreInputHandler() diff --git a/tools/xcutils/xc_restore.c b/tools/xcutils/xc_restore.c index 35d725c..5ec90ac 100644 --- a/tools/xcutils/xc_restore.c +++ b/tools/xcutils/xc_restore.c @@ -19,7 +19,7 @@ int main(int argc, char **argv) { unsigned int domid, store_evtchn, console_evtchn; - unsigned int hvm, pae, apic, lflags; + unsigned int hvm, pae, apic, lflags, checkpointed; xc_interface *xch; int io_fd, ret; int superpages; @@ -27,9 +27,9 @@ main(int argc, char **argv) xentoollog_level lvl; xentoollog_logger *l; - if ( (argc != 8) && (argc != 9) ) + if ( !( argc >= 8 && argc <= 10) ) errx(1, "usage: %s iofd domid store_evtchn " - "console_evtchn hvm pae apic [superpages]", argv[0]); + "console_evtchn hvm pae apic [superpages [checkpointed]]", argv[0]); lvl = XTL_DETAIL; lflags = XTL_STDIOSTREAM_SHOW_PID | XTL_STDIOSTREAM_HIDE_PROGRESS; @@ -45,14 +45,18 @@ main(int argc, char **argv) hvm = atoi(argv[5]); pae = atoi(argv[6]); apic = atoi(argv[7]); - if ( argc == 9 ) + if ( argc >= 9 ) superpages = atoi(argv[8]); else superpages = !!hvm; + if ( argc >= 10 ) + checkpointed = atoi(argv[9]); + else + checkpointed = 0; ret = xc_domain_restore(xch, io_fd, domid, store_evtchn, &store_mfn, 0, console_evtchn, &console_mfn, 0, hvm, pae, superpages, - 0, NULL, NULL); + 0, checkpointed, NULL, NULL); if ( ret == 0 ) { -- 1.7.10.4
Shriram Rajagopalan
2013-Oct-07 03:00 UTC
Re: [PATCH v7] tools/migrate: Fix regression when migrating from older version of Xen
On Thu, Oct 3, 2013 at 9:11 AM, Andrew Cooper <andrew.cooper3@citrix.com>wrote:> Commit 00a4b65f8534c9e6521eab2e6ce796ae36037774 Sep 7 2010 > "libxc: provide notification of final checkpoint to restore end" > broke migration from any version of Xen using tools from prior to that > commit > > Older tools have no idea about an XC_SAVE_ID_LAST_CHECKPOINT, causing newer > tools xc_domain_restore() to start reading the qemu save record, as > ctx->last_checkpoint is 0. > > The failure looks like: > xc: error: Max batch size exceeded (1970103633). Giving up. > where 1970103633 = 0x756d6551 = *(uint32_t*)"Qemu" > > With this fix in place, the behaviour for normal migrations is reverted to > how > it was before the regression; the migration is considered non-checkpointed > right from the start. A XC_SAVE_ID_LAST_CHECKPOINT chunk seen in the > migration stream is a nop. For checkpointed migrations the behaviour is > unchanged. > > Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> > CC: Ian Campbell <Ian.Campbell@citrix.com> > CC: Ian Jackson <Ian.Jackson@eu.citrix.com> > CC: Shriram Rajagopalan <rshriram@cs.ubc.ca> > >ack (as far as the remus bits are concerned). thanks shriram _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Ian Campbell
2013-Oct-10 10:26 UTC
Re: [PATCH v7] tools/migrate: Fix regression when migrating from older version of Xen
On Thu, 2013-10-03 at 17:11 +0100, Andrew Cooper wrote:> + #define LIBXL_HAVE_DOMAIN_CREATE_RESTORE_PARAMS 1[...]> + const libxl_domain_restore_flags *flags,Looks like you forgot to change the type name...