Andrew Cooper
2013-Jul-22 18:48 UTC
[Patch v3] tools/migrate: Fix regression when migrating from older version of Xen
Commit 00a4b65f8534c9e6521eab2e6ce796ae36037774 Sep 7 2010 "libxc: provide notification of final checkpoint to restore end" broke migration from any version of Xen using tools from prior to that commit Older tools have no idea about an XC_SAVE_ID_LAST_CHECKPOINT, causing newer tools xc_domain_restore() to start reading the qemu save record, as ctx->last_checkpoint is 0. The failure looks like: xc: error: Max batch size exceeded (1970103633). Giving up. where 1970103633 = 0x756d6551 = *(uint32_t*)"Qemu" In the case of a regular migrate, the checkpointed_stream parameter can be always cleared. This will cause ctx->last_checkpoint to be unconditionally set. It doesn''t matter if whe find a XC_SAVE_ID_LAST_CHECKPOINT chunk. For a checkpointed migration, the parameter should be set, causing ctx->last_checkpoint to start clear and can be set at an appropriate point in the checkpointed stream. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> CC: Ian Campbell <Ian.Campbell@citrix.com> CC: Ian Jackson <Ian.Jackson@eu.citrix.com> CC: Shriram Rajagopalan <rshriram@cs.ubc.ca> --- Changes since v2: * Correct logic due to the inverted name, and plub the correct information through from remus Changes since v1: * Rename "last_checkpoint_unaware" to "checkpointed_stream" to be more generic yet still accurate as to the meaning and fix for the issue --- This has been compile tested, and functionally tested with XenServer making correct use of the new parameter in our use case requiring the bugfix in the first place. I am nowever not able to test xend or remus, but the code certainly should work. --- tools/libxc/xc_domain_restore.c | 3 ++- tools/libxc/xc_nomigrate.c | 2 +- tools/libxc/xenguest.h | 3 ++- tools/libxl/libxl_save_helper.c | 2 +- tools/python/xen/xend/XendCheckpoint.py | 2 +- tools/xcutils/xc_restore.c | 14 +++++++++----- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c index 63d36cd..297546c 100644 --- a/tools/libxc/xc_domain_restore.c +++ b/tools/libxc/xc_domain_restore.c @@ -1401,7 +1401,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, domid_t store_domid, unsigned int console_evtchn, unsigned long *console_mfn, domid_t console_domid, unsigned int hvm, unsigned int pae, int superpages, - int no_incr_generationid, + int no_incr_generationid, int checkpointed_stream, unsigned long *vm_generationid_addr, struct restore_callbacks *callbacks) { @@ -1472,6 +1472,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, ctx->superpages = superpages; ctx->hvm = hvm; + ctx->last_checkpoint = !checkpointed_stream; ctxt = xc_hypercall_buffer_alloc(xch, ctxt, sizeof(*ctxt)); diff --git a/tools/libxc/xc_nomigrate.c b/tools/libxc/xc_nomigrate.c index 73e7566..fb6d53e 100644 --- a/tools/libxc/xc_nomigrate.c +++ b/tools/libxc/xc_nomigrate.c @@ -35,7 +35,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, domid_t store_domid, unsigned int console_evtchn, unsigned long *console_mfn, domid_t console_domid, unsigned int hvm, unsigned int pae, int superpages, - int no_incr_generationid, + int no_incr_generationid, int checkpointed_stream, unsigned long *vm_generationid_addr, struct restore_callbacks *callbacks) { diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h index 4714bd2..2101810 100644 --- a/tools/libxc/xenguest.h +++ b/tools/libxc/xenguest.h @@ -114,6 +114,7 @@ struct restore_callbacks { * @parm pae non-zero if this HVM domain has PAE support enabled * @parm superpages non-zero to allocate guest memory with superpages * @parm no_incr_generationid non-zero if generation id is NOT to be incremented + * @parm checkpointed_stream non-zero if the far end of the stream is using checkpointing * @parm vm_generationid_addr returned with the address of the generation id buffer * @parm callbacks non-NULL to receive a callback to restore toolstack * specific data @@ -124,7 +125,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, domid_t store_domid, unsigned int console_evtchn, unsigned long *console_mfn, domid_t console_domid, unsigned int hvm, unsigned int pae, int superpages, - int no_incr_generationid, + int no_incr_generationid, int checkpointed_stream, unsigned long *vm_generationid_addr, struct restore_callbacks *callbacks); /** diff --git a/tools/libxl/libxl_save_helper.c b/tools/libxl/libxl_save_helper.c index 772251a..8f14b8b 100644 --- a/tools/libxl/libxl_save_helper.c +++ b/tools/libxl/libxl_save_helper.c @@ -264,7 +264,7 @@ int main(int argc, char **argv) r = xc_domain_restore(xch, io_fd, dom, store_evtchn, &store_mfn, store_domid, console_evtchn, &console_mfn, console_domid, hvm, pae, superpages, - no_incr_genidad, &genidad, + no_incr_genidad, 0, &genidad, &helper_restore_callbacks); helper_stub_restore_results(store_mfn,console_mfn,genidad,0); complete(r); diff --git a/tools/python/xen/xend/XendCheckpoint.py b/tools/python/xen/xend/XendCheckpoint.py index fa09757..a433ffa 100644 --- a/tools/python/xen/xend/XendCheckpoint.py +++ b/tools/python/xen/xend/XendCheckpoint.py @@ -301,7 +301,7 @@ def restore(xd, fd, dominfo = None, paused = False, relocating = False): cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE), fd, dominfo.getDomid(), - store_port, console_port, int(is_hvm), pae, apic, superpages]) + store_port, console_port, int(is_hvm), pae, apic, superpages, 1]) log.debug("[xc_restore]: %s", string.join(cmd)) handler = RestoreInputHandler() diff --git a/tools/xcutils/xc_restore.c b/tools/xcutils/xc_restore.c index 35d725c..5ec90ac 100644 --- a/tools/xcutils/xc_restore.c +++ b/tools/xcutils/xc_restore.c @@ -19,7 +19,7 @@ int main(int argc, char **argv) { unsigned int domid, store_evtchn, console_evtchn; - unsigned int hvm, pae, apic, lflags; + unsigned int hvm, pae, apic, lflags, checkpointed; xc_interface *xch; int io_fd, ret; int superpages; @@ -27,9 +27,9 @@ main(int argc, char **argv) xentoollog_level lvl; xentoollog_logger *l; - if ( (argc != 8) && (argc != 9) ) + if ( !( argc >= 8 && argc <= 10) ) errx(1, "usage: %s iofd domid store_evtchn " - "console_evtchn hvm pae apic [superpages]", argv[0]); + "console_evtchn hvm pae apic [superpages [checkpointed]]", argv[0]); lvl = XTL_DETAIL; lflags = XTL_STDIOSTREAM_SHOW_PID | XTL_STDIOSTREAM_HIDE_PROGRESS; @@ -45,14 +45,18 @@ main(int argc, char **argv) hvm = atoi(argv[5]); pae = atoi(argv[6]); apic = atoi(argv[7]); - if ( argc == 9 ) + if ( argc >= 9 ) superpages = atoi(argv[8]); else superpages = !!hvm; + if ( argc >= 10 ) + checkpointed = atoi(argv[9]); + else + checkpointed = 0; ret = xc_domain_restore(xch, io_fd, domid, store_evtchn, &store_mfn, 0, console_evtchn, &console_mfn, 0, hvm, pae, superpages, - 0, NULL, NULL); + 0, checkpointed, NULL, NULL); if ( ret == 0 ) { -- 1.7.10.4
Shriram Rajagopalan
2013-Jul-22 20:17 UTC
Re: [Patch v3] tools/migrate: Fix regression when migrating from older version of Xen
On Mon, Jul 22, 2013 at 2:48 PM, Andrew Cooper <andrew.cooper3@citrix.com> wrote:> Commit 00a4b65f8534c9e6521eab2e6ce796ae36037774 Sep 7 2010 > "libxc: provide notification of final checkpoint to restore end" > broke migration from any version of Xen using tools from prior to that commit > > Older tools have no idea about an XC_SAVE_ID_LAST_CHECKPOINT, causing newer > tools xc_domain_restore() to start reading the qemu save record, as > ctx->last_checkpoint is 0. > > The failure looks like: > xc: error: Max batch size exceeded (1970103633). Giving up. > where 1970103633 = 0x756d6551 = *(uint32_t*)"Qemu" > > In the case of a regular migrate, the checkpointed_stream parameter can be > always cleared. This will cause ctx->last_checkpoint to be unconditionally > set. It doesn''t matter if whe find a XC_SAVE_ID_LAST_CHECKPOINT chunk. > > For a checkpointed migration, the parameter should be set, causing > ctx->last_checkpoint to start clear and can be set at an appropriate point in > the checkpointed stream. > > Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> > CC: Ian Campbell <Ian.Campbell@citrix.com> > CC: Ian Jackson <Ian.Jackson@eu.citrix.com> > CC: Shriram Rajagopalan <rshriram@cs.ubc.ca> > > --- > Changes since v2: > * Correct logic due to the inverted name, and plub the correct information > through from remus > Changes since v1: > * Rename "last_checkpoint_unaware" to "checkpointed_stream" to be more > generic yet still accurate as to the meaning and fix for the issue > > --- > > This has been compile tested, and functionally tested with XenServer making > correct use of the new parameter in our use case requiring the bugfix in the > first place. I am nowever not able to test xend or remus, but the code > certainly should work. > --- > tools/libxc/xc_domain_restore.c | 3 ++- > tools/libxc/xc_nomigrate.c | 2 +- > tools/libxc/xenguest.h | 3 ++- > tools/libxl/libxl_save_helper.c | 2 +- > tools/python/xen/xend/XendCheckpoint.py | 2 +- > tools/xcutils/xc_restore.c | 14 +++++++++----- > 6 files changed, 16 insertions(+), 10 deletions(-) > > diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c > index 63d36cd..297546c 100644 > --- a/tools/libxc/xc_domain_restore.c > +++ b/tools/libxc/xc_domain_restore.c > @@ -1401,7 +1401,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, > domid_t store_domid, unsigned int console_evtchn, > unsigned long *console_mfn, domid_t console_domid, > unsigned int hvm, unsigned int pae, int superpages, > - int no_incr_generationid, > + int no_incr_generationid, int checkpointed_stream, > unsigned long *vm_generationid_addr, > struct restore_callbacks *callbacks) > { > @@ -1472,6 +1472,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, > > ctx->superpages = superpages; > ctx->hvm = hvm; > + ctx->last_checkpoint = !checkpointed_stream; > > ctxt = xc_hypercall_buffer_alloc(xch, ctxt, sizeof(*ctxt)); > > diff --git a/tools/libxc/xc_nomigrate.c b/tools/libxc/xc_nomigrate.c > index 73e7566..fb6d53e 100644 > --- a/tools/libxc/xc_nomigrate.c > +++ b/tools/libxc/xc_nomigrate.c > @@ -35,7 +35,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, > domid_t store_domid, unsigned int console_evtchn, > unsigned long *console_mfn, domid_t console_domid, > unsigned int hvm, unsigned int pae, int superpages, > - int no_incr_generationid, > + int no_incr_generationid, int checkpointed_stream, > unsigned long *vm_generationid_addr, > struct restore_callbacks *callbacks) > { > diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h > index 4714bd2..2101810 100644 > --- a/tools/libxc/xenguest.h > +++ b/tools/libxc/xenguest.h > @@ -114,6 +114,7 @@ struct restore_callbacks { > * @parm pae non-zero if this HVM domain has PAE support enabled > * @parm superpages non-zero to allocate guest memory with superpages > * @parm no_incr_generationid non-zero if generation id is NOT to be incremented > + * @parm checkpointed_stream non-zero if the far end of the stream is using checkpointing > * @parm vm_generationid_addr returned with the address of the generation id buffer > * @parm callbacks non-NULL to receive a callback to restore toolstack > * specific data > @@ -124,7 +125,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, > domid_t store_domid, unsigned int console_evtchn, > unsigned long *console_mfn, domid_t console_domid, > unsigned int hvm, unsigned int pae, int superpages, > - int no_incr_generationid, > + int no_incr_generationid, int checkpointed_stream, > unsigned long *vm_generationid_addr, > struct restore_callbacks *callbacks); > /** > diff --git a/tools/libxl/libxl_save_helper.c b/tools/libxl/libxl_save_helper.c > index 772251a..8f14b8b 100644 > --- a/tools/libxl/libxl_save_helper.c > +++ b/tools/libxl/libxl_save_helper.c > @@ -264,7 +264,7 @@ int main(int argc, char **argv) > r = xc_domain_restore(xch, io_fd, dom, store_evtchn, &store_mfn, > store_domid, console_evtchn, &console_mfn, > console_domid, hvm, pae, superpages, > - no_incr_genidad, &genidad, > + no_incr_genidad, 0, &genidad, > &helper_restore_callbacks); > helper_stub_restore_results(store_mfn,console_mfn,genidad,0); > complete(r); > diff --git a/tools/python/xen/xend/XendCheckpoint.py b/tools/python/xen/xend/XendCheckpoint.py > index fa09757..a433ffa 100644 > --- a/tools/python/xen/xend/XendCheckpoint.py > +++ b/tools/python/xen/xend/XendCheckpoint.py > @@ -301,7 +301,7 @@ def restore(xd, fd, dominfo = None, paused = False, relocating = False): > > cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE), > fd, dominfo.getDomid(), > - store_port, console_port, int(is_hvm), pae, apic, superpages]) > + store_port, console_port, int(is_hvm), pae, apic, superpages, 1]) > log.debug("[xc_restore]: %s", string.join(cmd)) > > handler = RestoreInputHandler() > diff --git a/tools/xcutils/xc_restore.c b/tools/xcutils/xc_restore.c > index 35d725c..5ec90ac 100644 > --- a/tools/xcutils/xc_restore.c > +++ b/tools/xcutils/xc_restore.c > @@ -19,7 +19,7 @@ int > main(int argc, char **argv) > { > unsigned int domid, store_evtchn, console_evtchn; > - unsigned int hvm, pae, apic, lflags; > + unsigned int hvm, pae, apic, lflags, checkpointed; > xc_interface *xch; > int io_fd, ret; > int superpages; > @@ -27,9 +27,9 @@ main(int argc, char **argv) > xentoollog_level lvl; > xentoollog_logger *l; > > - if ( (argc != 8) && (argc != 9) ) > + if ( !( argc >= 8 && argc <= 10) ) > errx(1, "usage: %s iofd domid store_evtchn " > - "console_evtchn hvm pae apic [superpages]", argv[0]); > + "console_evtchn hvm pae apic [superpages [checkpointed]]", argv[0]); > > lvl = XTL_DETAIL; > lflags = XTL_STDIOSTREAM_SHOW_PID | XTL_STDIOSTREAM_HIDE_PROGRESS; > @@ -45,14 +45,18 @@ main(int argc, char **argv) > hvm = atoi(argv[5]); > pae = atoi(argv[6]); > apic = atoi(argv[7]); > - if ( argc == 9 ) > + if ( argc >= 9 ) > superpages = atoi(argv[8]); > else > superpages = !!hvm; > + if ( argc >= 10 ) > + checkpointed = atoi(argv[9]); > + else > + checkpointed = 0; > > ret = xc_domain_restore(xch, io_fd, domid, store_evtchn, &store_mfn, 0, > console_evtchn, &console_mfn, 0, hvm, pae, superpages, > - 0, NULL, NULL); > + 0, checkpointed, NULL, NULL); > > if ( ret == 0 ) > { > -- > 1.7.10.4 >Yep, that plumbing logic via xc_restore should work. The code is fine by me. Acked-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
Ian Campbell
2013-Jul-22 20:31 UTC
Re: [Patch v3] tools/migrate: Fix regression when migrating from older version of Xen
On Mon, 2013-07-22 at 19:48 +0100, Andrew Cooper wrote:> diff --git a/tools/libxl/libxl_save_helper.c b/tools/libxl/libxl_save_helper.c > index 772251a..8f14b8b 100644 > --- a/tools/libxl/libxl_save_helper.c > +++ b/tools/libxl/libxl_save_helper.c > @@ -264,7 +264,7 @@ int main(int argc, char **argv) > r = xc_domain_restore(xch, io_fd, dom, store_evtchn, &store_mfn, > store_domid, console_evtchn, &console_mfn, > console_domid, hvm, pae, superpages, > - no_incr_genidad, &genidad, > + no_incr_genidad, 0, &genidad, > &helper_restore_callbacks); > helper_stub_restore_results(store_mfn,console_mfn,genidad,0); > complete(r);Unfortunately I think you may need to plumb this one through as well, to handle xl_cmdimpl.c:migrate_receive called with remus = 1. This probably means adding a flags argument to libxl_domain_create_restore. Bit tricky from a libxl API stability PoV, we''ll probably need to take advantage of the LIBXL_API_VERSION thing. Bit of a faff but doable, Olaf had a reasonable looking scheme for something in <f6fedb087442fb54e31d.1364481811@probook.site>. Ian.