Andrew Cooper
2013-Sep-26 13:45 UTC
[PATCH v5 RFC] tools/migrate: Fix regression when migrating from older version of Xen
Commit 00a4b65f8534c9e6521eab2e6ce796ae36037774 Sep 7 2010 "libxc: provide notification of final checkpoint to restore end" broke migration from any version of Xen using tools from prior to that commit Older tools have no idea about an XC_SAVE_ID_LAST_CHECKPOINT, causing newer tools xc_domain_restore() to start reading the qemu save record, as ctx->last_checkpoint is 0. The failure looks like: xc: error: Max batch size exceeded (1970103633). Giving up. where 1970103633 = 0x756d6551 = *(uint32_t*)"Qemu" With this fix in place, the behaviour for normal migrations is reverted to how it was before the regression; the migration is considered non-checkpointed right from the start. A XC_SAVE_ID_LAST_CHECKPOINT chunk seen in the migration stream is a nop. For checkpointed migrations the behaviour is unchanged. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> CC: Ian Campbell <Ian.Campbell@citrix.com> CC: Ian Jackson <Ian.Jackson@eu.citrix.com> --- Changes in v5: * Demoted to RFC, due to playing with LIBXL_ABI_VERSION. * This should be functionally similar to v4, but changes the way in which backwards compatibility is obtained --- tools/libxc/xc_domain_restore.c | 3 ++- tools/libxc/xc_nomigrate.c | 2 +- tools/libxc/xenguest.h | 3 ++- tools/libxl/libxl.h | 32 ++++++++++++++++++++++++++----- tools/libxl/libxl_create.c | 28 ++++++++++++++++++++------- tools/libxl/libxl_internal.h | 1 + tools/libxl/libxl_save_callout.c | 2 +- tools/libxl/libxl_save_helper.c | 3 ++- tools/libxl/xl_cmdimpl.c | 6 +++++- tools/python/xen/xend/XendCheckpoint.py | 2 +- tools/xcutils/xc_restore.c | 14 +++++++++----- 11 files changed, 72 insertions(+), 24 deletions(-) diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c index 939a76b..ecaf25d 100644 --- a/tools/libxc/xc_domain_restore.c +++ b/tools/libxc/xc_domain_restore.c @@ -1402,7 +1402,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, domid_t store_domid, unsigned int console_evtchn, unsigned long *console_mfn, domid_t console_domid, unsigned int hvm, unsigned int pae, int superpages, - int no_incr_generationid, + int no_incr_generationid, int checkpointed_stream, unsigned long *vm_generationid_addr, struct restore_callbacks *callbacks) { @@ -1474,6 +1474,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, ctx->superpages = superpages; ctx->hvm = hvm; + ctx->last_checkpoint = !checkpointed_stream; ctxt = xc_hypercall_buffer_alloc(xch, ctxt, sizeof(*ctxt)); diff --git a/tools/libxc/xc_nomigrate.c b/tools/libxc/xc_nomigrate.c index 73e7566..fb6d53e 100644 --- a/tools/libxc/xc_nomigrate.c +++ b/tools/libxc/xc_nomigrate.c @@ -35,7 +35,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, domid_t store_domid, unsigned int console_evtchn, unsigned long *console_mfn, domid_t console_domid, unsigned int hvm, unsigned int pae, int superpages, - int no_incr_generationid, + int no_incr_generationid, int checkpointed_stream, unsigned long *vm_generationid_addr, struct restore_callbacks *callbacks) { diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h index c12091f..a0e30e1 100644 --- a/tools/libxc/xenguest.h +++ b/tools/libxc/xenguest.h @@ -114,6 +114,7 @@ struct restore_callbacks { * @parm pae non-zero if this HVM domain has PAE support enabled * @parm superpages non-zero to allocate guest memory with superpages * @parm no_incr_generationid non-zero if generation id is NOT to be incremented + * @parm checkpointed_stream non-zero if the far end of the stream is using checkpointing * @parm vm_generationid_addr returned with the address of the generation id buffer * @parm callbacks non-NULL to receive a callback to restore toolstack * specific data @@ -124,7 +125,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, domid_t store_domid, unsigned int console_evtchn, unsigned long *console_mfn, domid_t console_domid, unsigned int hvm, unsigned int pae, int superpages, - int no_incr_generationid, + int no_incr_generationid, int checkpointed_stream, unsigned long *vm_generationid_addr, struct restore_callbacks *callbacks); /** diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h index 4cab294..28af9c2 100644 --- a/tools/libxl/libxl.h +++ b/tools/libxl/libxl.h @@ -355,6 +355,20 @@ */ #define LIBXL_HAVE_SPICE_VDAGENT 1 +/* + * LIBXL_HAVE_LIBXL_DOMAIN_CREATE_RESTORE_CHECKPOINTED 1 + * + * If this is defined, libxl_domain_create_restore()''s API has changed to + * include a checkpointed_stream flag which gets passed down to libxc. + */ +#define LIBXL_HAVE_LIBXL_DOMAIN_CREATE_RESTORE_CHECKPOINTED 1 +#if LIBXL_API_VERSION == 0x040200 || LIBXL_API_VERSION == 0x040300 +# define libxl_domain_create_restore libxl_domain_create_restore_V040200 +#else +# define libxl_domain_create_restore libxl_domain_create_restore_V040400 +#endif + + /* Functions annotated with LIBXL_EXTERNAL_CALLERS_ONLY may not be * called from within libxl itself. Callers outside libxl, who * do not #include libxl_internal.h, are fine. */ @@ -576,11 +590,19 @@ int libxl_domain_create_new(libxl_ctx *ctx, libxl_domain_config *d_config, const libxl_asyncop_how *ao_how, const libxl_asyncprogress_how *aop_console_how) LIBXL_EXTERNAL_CALLERS_ONLY; -int libxl_domain_create_restore(libxl_ctx *ctx, libxl_domain_config *d_config, - uint32_t *domid, int restore_fd, - const libxl_asyncop_how *ao_how, - const libxl_asyncprogress_how *aop_console_how) - LIBXL_EXTERNAL_CALLERS_ONLY; +int libxl_domain_create_restore_V040200( + libxl_ctx *ctx, libxl_domain_config *d_config, + uint32_t *domid, int restore_fd, + const libxl_asyncop_how *ao_how, + const libxl_asyncprogress_how *aop_console_how) + LIBXL_EXTERNAL_CALLERS_ONLY; +int libxl_domain_create_restore_V040400( + libxl_ctx *ctx, libxl_domain_config *d_config, + uint32_t *domid, int restore_fd, + const libxl_asyncop_how *ao_how, + const libxl_asyncprogress_how *aop_console_how, + int checkpointed_stream) + LIBXL_EXTERNAL_CALLERS_ONLY; /* A progress report will be made via ao_console_how, of type * domain_create_console_available, when the domain''s primary * console is available and can be connected to. diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c index 7567238..647fa58 100644 --- a/tools/libxl/libxl_create.c +++ b/tools/libxl/libxl_create.c @@ -1228,7 +1228,8 @@ static void domain_create_cb(libxl__egc *egc, static int do_domain_create(libxl_ctx *ctx, libxl_domain_config *d_config, uint32_t *domid, int restore_fd, const libxl_asyncop_how *ao_how, - const libxl_asyncprogress_how *aop_console_how) + const libxl_asyncprogress_how *aop_console_how, + int checkpointed_stream) { AO_CREATE(ctx, 0, ao_how); libxl__app_domain_create_state *cdcs; @@ -1238,6 +1239,7 @@ static int do_domain_create(libxl_ctx *ctx, libxl_domain_config *d_config, cdcs->dcs.guest_config = d_config; cdcs->dcs.restore_fd = restore_fd; cdcs->dcs.callback = domain_create_cb; + cdcs->dcs.checkpointed_stream = checkpointed_stream; libxl__ao_progress_gethow(&cdcs->dcs.aop_console_how, aop_console_how); cdcs->domid_out = domid; @@ -1265,16 +1267,28 @@ int libxl_domain_create_new(libxl_ctx *ctx, libxl_domain_config *d_config, const libxl_asyncprogress_how *aop_console_how) { return do_domain_create(ctx, d_config, domid, -1, - ao_how, aop_console_how); + ao_how, aop_console_how, 0); } -int libxl_domain_create_restore(libxl_ctx *ctx, libxl_domain_config *d_config, - uint32_t *domid, int restore_fd, - const libxl_asyncop_how *ao_how, - const libxl_asyncprogress_how *aop_console_how) +int libxl_domain_create_restore_V040200( + libxl_ctx *ctx, libxl_domain_config *d_config, + uint32_t *domid, int restore_fd, + const libxl_asyncop_how *ao_how, + const libxl_asyncprogress_how *aop_console_how) +{ + return do_domain_create(ctx, d_config, domid, restore_fd, + ao_how, aop_console_how, 0); +} + +int libxl_domain_create_restore_V040400( + libxl_ctx *ctx, libxl_domain_config *d_config, + uint32_t *domid, int restore_fd, + const libxl_asyncop_how *ao_how, + const libxl_asyncprogress_how *aop_console_how, + int checkpointed_stream) { return do_domain_create(ctx, d_config, domid, restore_fd, - ao_how, aop_console_how); + ao_how, aop_console_how, checkpointed_stream); } /* diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index f051d91..4e15055 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -2514,6 +2514,7 @@ struct libxl__domain_create_state { libxl_asyncprogress_how aop_console_how; /* private to domain_create */ int guest_domid; + int checkpointed_stream; libxl__domain_build_state build_state; libxl__bootloader_state bl; libxl__stub_dm_spawn_state dmss; diff --git a/tools/libxl/libxl_save_callout.c b/tools/libxl/libxl_save_callout.c index f164e98..6e45b2f 100644 --- a/tools/libxl/libxl_save_callout.c +++ b/tools/libxl/libxl_save_callout.c @@ -60,7 +60,7 @@ void libxl__xc_domain_restore(libxl__egc *egc, libxl__domain_create_state *dcs, state->store_domid, state->console_port, state->console_domid, hvm, pae, superpages, no_incr_generationid, - cbflags, + cbflags, dcs->checkpointed_stream, }; dcs->shs.ao = ao; diff --git a/tools/libxl/libxl_save_helper.c b/tools/libxl/libxl_save_helper.c index 772251a..880565e 100644 --- a/tools/libxl/libxl_save_helper.c +++ b/tools/libxl/libxl_save_helper.c @@ -252,6 +252,7 @@ int main(int argc, char **argv) int superpages = strtoul(NEXTARG,0,10); int no_incr_genidad = strtoul(NEXTARG,0,10); unsigned cbflags = strtoul(NEXTARG,0,10); + int checkpointed = strtoul(NEXTARG,0,10); assert(!*++argv); helper_setcallbacks_restore(&helper_restore_callbacks, cbflags); @@ -264,7 +265,7 @@ int main(int argc, char **argv) r = xc_domain_restore(xch, io_fd, dom, store_evtchn, &store_mfn, store_domid, console_evtchn, &console_mfn, console_domid, hvm, pae, superpages, - no_incr_genidad, &genidad, + no_incr_genidad, checkpointed, &genidad, &helper_restore_callbacks); helper_stub_restore_results(store_mfn,console_mfn,genidad,0); complete(r); diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index 642b130..95b7c5d 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -132,6 +132,7 @@ struct domain_create { int vnc; int vncautopass; int console_autoconnect; + int checkpointed_stream; const char *config_file; const char *extra_config; /* extra config string */ const char *restore_file; @@ -2035,7 +2036,9 @@ start: if ( restoring ) { ret = libxl_domain_create_restore(ctx, &d_config, &domid, restore_fd, - 0, autoconnect_console_how); + 0, autoconnect_console_how, + dom_info->checkpointed_stream); + /* * On subsequent reboot etc we should create the domain, not * restore/migrate-receive it again. @@ -3648,6 +3651,7 @@ static void migrate_receive(int debug, int daemonize, int monitor, dom_info.paused = 1; dom_info.migrate_fd = recv_fd; dom_info.migration_domname_r = &migration_domname; + dom_info.checkpointed_stream = remus; rc = create_domain(&dom_info); if (rc < 0) { diff --git a/tools/python/xen/xend/XendCheckpoint.py b/tools/python/xen/xend/XendCheckpoint.py index fa09757..a433ffa 100644 --- a/tools/python/xen/xend/XendCheckpoint.py +++ b/tools/python/xen/xend/XendCheckpoint.py @@ -301,7 +301,7 @@ def restore(xd, fd, dominfo = None, paused = False, relocating = False): cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE), fd, dominfo.getDomid(), - store_port, console_port, int(is_hvm), pae, apic, superpages]) + store_port, console_port, int(is_hvm), pae, apic, superpages, 1]) log.debug("[xc_restore]: %s", string.join(cmd)) handler = RestoreInputHandler() diff --git a/tools/xcutils/xc_restore.c b/tools/xcutils/xc_restore.c index 35d725c..5ec90ac 100644 --- a/tools/xcutils/xc_restore.c +++ b/tools/xcutils/xc_restore.c @@ -19,7 +19,7 @@ int main(int argc, char **argv) { unsigned int domid, store_evtchn, console_evtchn; - unsigned int hvm, pae, apic, lflags; + unsigned int hvm, pae, apic, lflags, checkpointed; xc_interface *xch; int io_fd, ret; int superpages; @@ -27,9 +27,9 @@ main(int argc, char **argv) xentoollog_level lvl; xentoollog_logger *l; - if ( (argc != 8) && (argc != 9) ) + if ( !( argc >= 8 && argc <= 10) ) errx(1, "usage: %s iofd domid store_evtchn " - "console_evtchn hvm pae apic [superpages]", argv[0]); + "console_evtchn hvm pae apic [superpages [checkpointed]]", argv[0]); lvl = XTL_DETAIL; lflags = XTL_STDIOSTREAM_SHOW_PID | XTL_STDIOSTREAM_HIDE_PROGRESS; @@ -45,14 +45,18 @@ main(int argc, char **argv) hvm = atoi(argv[5]); pae = atoi(argv[6]); apic = atoi(argv[7]); - if ( argc == 9 ) + if ( argc >= 9 ) superpages = atoi(argv[8]); else superpages = !!hvm; + if ( argc >= 10 ) + checkpointed = atoi(argv[9]); + else + checkpointed = 0; ret = xc_domain_restore(xch, io_fd, domid, store_evtchn, &store_mfn, 0, console_evtchn, &console_mfn, 0, hvm, pae, superpages, - 0, NULL, NULL); + 0, checkpointed, NULL, NULL); if ( ret == 0 ) { -- 1.7.10.4
Ian Campbell
2013-Sep-26 15:11 UTC
Re: [PATCH v5 RFC] tools/migrate: Fix regression when migrating from older version of Xen
Just commenting on the API compatibility bits here. Can you CC Shriram (remus guy) in the next iteration please. On Thu, 2013-09-26 at 14:45 +0100, Andrew Cooper wrote:> diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h > index 4cab294..28af9c2 100644 > --- a/tools/libxl/libxl.h > +++ b/tools/libxl/libxl.h > @@ -355,6 +355,20 @@ > */ > #define LIBXL_HAVE_SPICE_VDAGENT 1 > > +/* > + * LIBXL_HAVE_LIBXL_DOMAIN_CREATE_RESTORE_CHECKPOINTED 1LIBXL_HAVE_DOMAIN_CREATE... (no need to repeat the LIBXL, or at least we don''t elsewhere)> + * If this is defined, libxl_domain_create_restore()''s API has changed to > + * include a checkpointed_stream flag which gets passed down to libxc. > + */ > +#define LIBXL_HAVE_LIBXL_DOMAIN_CREATE_RESTORE_CHECKPOINTED 1 > +#if LIBXL_API_VERSION == 0x040200 || LIBXL_API_VERSION == 0x040300<= 0x040300 is fine I think.> +# define libxl_domain_create_restore libxl_domain_create_restore_V040200I prefer "...restore_0x040200" since it looks like the #define.> +#else > +# define libxl_domain_create_restore libxl_domain_create_restore_V040400 > +#endifI think you can omit the #else clause and just call the new function libxl_domain_create_restore. libxl will never set LIBXL_API_VERSION when building itself.> +int libxl_domain_create_restore_V040200( > + libxl_ctx *ctx, libxl_domain_config *d_config, > + uint32_t *domid, int restore_fd, > + const libxl_asyncop_how *ao_how, > + const libxl_asyncprogress_how *aop_console_how) > + LIBXL_EXTERNAL_CALLERS_ONLY; > +int libxl_domain_create_restore_V040400( > + libxl_ctx *ctx, libxl_domain_config *d_config, > + uint32_t *domid, int restore_fd, > + const libxl_asyncop_how *ao_how, > + const libxl_asyncprogress_how *aop_console_how, > + int checkpointed_stream) > + LIBXL_EXTERNAL_CALLERS_ONLY;We seem to habitually put the ao_how and aop_ stuff last, after the actual "arguments". Did we consider adding a libxl_domain_restore_params struct to contain the checkpointed flag? That would make this interface more easily extensible in the future.> +int libxl_domain_create_restore_V040200( > + libxl_ctx *ctx, libxl_domain_config *d_config, > + uint32_t *domid, int restore_fd, > + const libxl_asyncop_how *ao_how, > + const libxl_asyncprogress_how *aop_console_how) > +{ > + return do_domain_create(ctx, d_config, domid, restore_fd, > + ao_how, aop_console_how, 0); > +}Could static inline this as a call to the non suffixed version I think?
Andrew Cooper
2013-Sep-26 16:11 UTC
Re: [PATCH v5 RFC] tools/migrate: Fix regression when migrating from older version of Xen
On 26/09/13 16:11, Ian Campbell wrote:> Just commenting on the API compatibility bits here. > > Can you CC Shriram (remus guy) in the next iteration please. > > On Thu, 2013-09-26 at 14:45 +0100, Andrew Cooper wrote: >> diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h >> index 4cab294..28af9c2 100644 >> --- a/tools/libxl/libxl.h >> +++ b/tools/libxl/libxl.h >> @@ -355,6 +355,20 @@ >> */ >> #define LIBXL_HAVE_SPICE_VDAGENT 1 >> >> +/* >> + * LIBXL_HAVE_LIBXL_DOMAIN_CREATE_RESTORE_CHECKPOINTED 1 > LIBXL_HAVE_DOMAIN_CREATE... (no need to repeat the LIBXL, or at least we > don''t elsewhere) > >> + * If this is defined, libxl_domain_create_restore()''s API has changed to >> + * include a checkpointed_stream flag which gets passed down to libxc. >> + */ >> +#define LIBXL_HAVE_LIBXL_DOMAIN_CREATE_RESTORE_CHECKPOINTED 1 >> +#if LIBXL_API_VERSION == 0x040200 || LIBXL_API_VERSION == 0x040300 > <= 0x040300 is fine I think. > >> +# define libxl_domain_create_restore libxl_domain_create_restore_V040200 > I prefer "...restore_0x040200" since it looks like the #define. > >> +#else >> +# define libxl_domain_create_restore libxl_domain_create_restore_V040400 >> +#endif > I think you can omit the #else clause and just call the new function > libxl_domain_create_restore. libxl will never set LIBXL_API_VERSION when > building itself. > >> +int libxl_domain_create_restore_V040200( >> + libxl_ctx *ctx, libxl_domain_config *d_config, >> + uint32_t *domid, int restore_fd, >> + const libxl_asyncop_how *ao_how, >> + const libxl_asyncprogress_how *aop_console_how) >> + LIBXL_EXTERNAL_CALLERS_ONLY; >> +int libxl_domain_create_restore_V040400( >> + libxl_ctx *ctx, libxl_domain_config *d_config, >> + uint32_t *domid, int restore_fd, >> + const libxl_asyncop_how *ao_how, >> + const libxl_asyncprogress_how *aop_console_how, >> + int checkpointed_stream) >> + LIBXL_EXTERNAL_CALLERS_ONLY; > We seem to habitually put the ao_how and aop_ stuff last, after the > actual "arguments". > > Did we consider adding a libxl_domain_restore_params struct to contain > the checkpointed flag? That would make this interface more easily > extensible in the future.That was considered, but it would involve playing with the IDL (with which I have no experience), and involve substantially more changes for both in and out-of-tree consumers of the function.> >> +int libxl_domain_create_restore_V040200( >> + libxl_ctx *ctx, libxl_domain_config *d_config, >> + uint32_t *domid, int restore_fd, >> + const libxl_asyncop_how *ao_how, >> + const libxl_asyncprogress_how *aop_console_how) >> +{ >> + return do_domain_create(ctx, d_config, domid, restore_fd, >> + ao_how, aop_console_how, 0); >> +} > Could static inline this as a call to the non suffixed version I think? > >Cannot do both this and drop the #else clause. (unless I have missed something obvious) ~Andrew