This patch series introduces a basic framework to incorporate Remus into the libxl toolstack. The only functionality currently implemented is memory checkpointing. These patches depend on "V3 libxl: refactor suspend/resume code" patch series. and also on Stefano's "V4 libxl: save/restore qemu physmap" Changes in V3: * Rebased w.r.t Stefano's patches. Changes in V2: * Move libxl_domain_remus_start into the save_callbacks implementation patch * return proper error codes instead of -1. * Add documentation to docs/man/xl.pod.1 Shriram _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
rshriram@cs.ubc.ca
2012-Feb-03 07:00 UTC
[PATCH 1 of 2 V3] libxl: Remus - suspend/postflush/commit callbacks
# HG changeset patch # User Shriram Rajagopalan <rshriram@cs.ubc.ca> # Date 1328251593 28800 # Node ID 90e59c643c00c079996e13b75f89d1f0cd931a02 # Parent c7abecc14cceb18140335ebe20faad826282cd1f libxl: Remus - suspend/postflush/commit callbacks * Add libxl callback functions for Remus checkpoint suspend, postflush (aka resume) and checkpoint commit callbacks. * suspend callback is a stub that just bounces off libxl__domain_suspend_common_callback - which suspends the domain and saves the devices model state to a file. * resume callback currently just resumes the domain (and the device model). * commit callback just writes out the saved device model state to the network and sleeps for the checkpoint interval. * Introduce a new public API, libxl_domain_remus_start (currently a stub) that sets up the network and disk buffer and initiates continuous checkpointing. * Future patches will augument these callbacks/functions with more functionalities like issuing network buffer plug/unplug commands, disk checkpoint commands, etc. Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> diff -r c7abecc14cce -r 90e59c643c00 tools/libxl/libxl.c --- a/tools/libxl/libxl.c Thu Feb 02 22:46:33 2012 -0800 +++ b/tools/libxl/libxl.c Thu Feb 02 22:46:33 2012 -0800 @@ -471,6 +471,41 @@ libxl_vminfo * libxl_list_vm(libxl_ctx * return ptr; } +/* TODO: Explicit Checkpoint acknowledgements via recv_fd. */ +int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, + uint32_t domid, int send_fd, int recv_fd) +{ + GC_INIT(ctx); + libxl_domain_type type = libxl__domain_type(gc, domid); + int rc = 0; + + if (info == NULL) { + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, + "No remus_info structure supplied for domain %d", domid); + rc = ERROR_INVAL; + goto remus_fail; + } + + /* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */ + + /* Point of no return */ + rc = libxl__domain_suspend_common(gc, domid, send_fd, type, /* live */ 1, + /* debug */ 0, info); + + /* + * With Remus, if we reach this point, it means either + * backup died or some network error occurred preventing us + * from sending checkpoints. + */ + + /* TBD: Remus cleanup - i.e. detach qdisc, release other + * resources. + */ + remus_fail: + GC_FREE; + return rc; +} + int libxl_domain_suspend(libxl_ctx *ctx, libxl_domain_suspend_info *info, uint32_t domid, int fd) { @@ -480,7 +515,9 @@ int libxl_domain_suspend(libxl_ctx *ctx, int debug = info != NULL && info->flags & XL_SUSPEND_DEBUG; int rc = 0; - rc = libxl__domain_suspend_common(gc, domid, fd, type, live, debug); + rc = libxl__domain_suspend_common(gc, domid, fd, type, live, debug, + /* No Remus */ NULL); + if (!rc && type == LIBXL_DOMAIN_TYPE_HVM) rc = libxl__domain_save_device_model(gc, domid, fd); GC_FREE; diff -r c7abecc14cce -r 90e59c643c00 tools/libxl/libxl.h --- a/tools/libxl/libxl.h Thu Feb 02 22:46:33 2012 -0800 +++ b/tools/libxl/libxl.h Thu Feb 02 22:46:33 2012 -0800 @@ -266,6 +266,8 @@ typedef int (*libxl_console_ready)(libxl int libxl_domain_create_new(libxl_ctx *ctx, libxl_domain_config *d_config, libxl_console_ready cb, void *priv, uint32_t *domid); int libxl_domain_create_restore(libxl_ctx *ctx, libxl_domain_config *d_config, libxl_console_ready cb, void *priv, uint32_t *domid, int restore_fd); void libxl_domain_config_dispose(libxl_domain_config *d_config); +int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, + uint32_t domid, int send_fd, int recv_fd); int libxl_domain_suspend(libxl_ctx *ctx, libxl_domain_suspend_info *info, uint32_t domid, int fd); diff -r c7abecc14cce -r 90e59c643c00 tools/libxl/libxl_dom.c --- a/tools/libxl/libxl_dom.c Thu Feb 02 22:46:33 2012 -0800 +++ b/tools/libxl/libxl_dom.c Thu Feb 02 22:46:33 2012 -0800 @@ -467,6 +467,8 @@ struct suspendinfo { int hvm; unsigned int flags; int guest_responded; + int save_fd; /* Migration stream fd (for Remus) */ + int interval; /* checkpoint interval (for Remus) */ }; static int libxl__domain_suspend_common_switch_qemu_logdirty(int domid, unsigned int enable, void *data) @@ -731,9 +733,43 @@ static int libxl__toolstack_save(uint32_ return 0; } +static int libxl__remus_domain_suspend_callback(void *data) +{ + /* TODO: Issue disk and network checkpoint reqs. */ + return libxl__domain_suspend_common_callback(data); +} + +static int libxl__remus_domain_resume_callback(void *data) +{ + struct suspendinfo *si = data; + libxl_ctx *ctx = libxl__gc_owner(si->gc); + + /* Resumes the domain and the device model */ + if (libxl_domain_resume(ctx, si->domid, /* Fast Suspend */1)) + return 0; + + /* TODO: Deal with disk. Start a new network output buffer */ + return 1; +} + +static int libxl__remus_domain_checkpoint_callback(void *data) +{ + struct suspendinfo *si = data; + + /* This would go into tailbuf. */ + if (si->hvm && + libxl__domain_save_device_model(si->gc, si->domid, si->save_fd)) + return 0; + + /* TODO: Wait for disk and memory ack, release network buffer */ + usleep(si->interval * 1000); + return 1; +} + int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd, libxl_domain_type type, - int live, int debug) + int live, int debug, + const libxl_domain_remus_info *r_info) { libxl_ctx *ctx = libxl__gc_owner(gc); int flags; @@ -764,10 +800,20 @@ int libxl__domain_suspend_common(libxl__ return ERROR_INVAL; } + memset(&si, 0, sizeof(si)); flags = (live) ? XCFLAGS_LIVE : 0 | (debug) ? XCFLAGS_DEBUG : 0 | (hvm) ? XCFLAGS_HVM : 0; + if (r_info != NULL) { + si.interval = r_info->interval; + if (r_info->compression) + flags |= XCFLAGS_CHECKPOINT_COMPRESS; + si.save_fd = fd; + } + else + si.save_fd = -1; + si.domid = domid; si.flags = flags; si.hvm = hvm; @@ -791,7 +837,27 @@ int libxl__domain_suspend_common(libxl__ } memset(&callbacks, 0, sizeof(callbacks)); - callbacks.suspend = libxl__domain_suspend_common_callback; + if (r_info != NULL) { + /* save_callbacks: + * suspend - called after expiration of checkpoint interval, + * to *suspend* the domain. + * + * postcopy - called after the domain''s dirty pages have been + * copied into an output buffer. We *resume* the domain + * & the device model, return to the caller. Caller then + * flushes the output buffer, while the domain continues to run. + * + * checkpoint - called after the memory checkpoint has been flushed out + * into the network. Send the saved device state, *wait* + * for checkpoint ack and *release* the network buffer (TBD). + * Then *sleep* for the checkpoint interval. + */ + callbacks.suspend = libxl__remus_domain_suspend_callback; + callbacks.postcopy = libxl__remus_domain_resume_callback; + callbacks.checkpoint = libxl__remus_domain_checkpoint_callback; + } else + callbacks.suspend = libxl__domain_suspend_common_callback; + callbacks.switch_qemu_logdirty = libxl__domain_suspend_common_switch_qemu_logdirty; callbacks.toolstack_save = libxl__toolstack_save; callbacks.data = &si; diff -r c7abecc14cce -r 90e59c643c00 tools/libxl/libxl_internal.h --- a/tools/libxl/libxl_internal.h Thu Feb 02 22:46:33 2012 -0800 +++ b/tools/libxl/libxl_internal.h Thu Feb 02 22:46:33 2012 -0800 @@ -275,7 +275,8 @@ _hidden int libxl__domain_restore_common int fd); _hidden int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd, libxl_domain_type type, - int live, int debug); + int live, int debug, + const libxl_domain_remus_info *r_info); _hidden const char *libxl__device_model_savefile(libxl__gc *gc, uint32_t domid); _hidden int libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid); _hidden int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid); diff -r c7abecc14cce -r 90e59c643c00 tools/libxl/libxl_types.idl --- a/tools/libxl/libxl_types.idl Thu Feb 02 22:46:33 2012 -0800 +++ b/tools/libxl/libxl_types.idl Thu Feb 02 22:46:33 2012 -0800 @@ -397,3 +397,9 @@ libxl_sched_sedf = Struct("sched_sedf", ("extratime", integer), ("weight", integer), ], dispose_fn=None) + +libxl_domain_remus_info = Struct("domain_remus_info",[ + ("interval", integer), + ("blackhole", bool), + ("compression", bool), + ])
rshriram@cs.ubc.ca
2012-Feb-03 07:00 UTC
[PATCH 2 of 2 V3] libxl: Remus - xl remus command
# HG changeset patch # User Shriram Rajagopalan <rshriram@cs.ubc.ca> # Date 1328251593 28800 # Node ID 2965835b0c0062a359ac04c715d7af52c6ba60ee # Parent 90e59c643c00c079996e13b75f89d1f0cd931a02 libxl: Remus - xl remus command xl remus acts as a frontend to enable remus for a given domain. * At the moment, only memory checkpointing and blackhole replication is supported. Support for disk checkpointing and network buffering will be added in future. * Replication is done over ssh connection currently (like live migration with xl). Future versions will have an option to use simple tcp socket based replication channel (for both Remus & live migration). Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> diff -r 90e59c643c00 -r 2965835b0c00 docs/man/xl.pod.1 --- a/docs/man/xl.pod.1 Thu Feb 02 22:46:33 2012 -0800 +++ b/docs/man/xl.pod.1 Thu Feb 02 22:46:33 2012 -0800 @@ -348,6 +348,39 @@ Send <config> instead of config file fro =back +=item B<remus> [I<OPTIONS>] I<domain-id> I<host> + +Enable Remus HA for domain. By default B<xl> relies on ssh as a transport mechanism +between the two hosts. + +B<OPTIONS> + +=over 4 + +=item B<-i> I<MS> + +Checkpoint domain memory every MS milliseconds (default 200ms). + +=item B<-b> + +Replicate memory checkpoints to /dev/null (blackhole). + +=item B<-u> + +Disable memory checkpoint compression. + +=item B<-s> I<sshcommand> + +Use <sshcommand> instead of ssh. String will be passed to sh. If empty, run +<host> instead of ssh <host> xl migrate-receive -r [-e]. + +=item B<-e> + +On the new host, do not wait in the background (on <host>) for the death of the +domain. See the corresponding option of the I<create> subcommand. + +=back + =item B<pause> I<domain-id> Pause a domain. When in a paused state the domain will still consume diff -r 90e59c643c00 -r 2965835b0c00 tools/libxl/xl.h --- a/tools/libxl/xl.h Thu Feb 02 22:46:33 2012 -0800 +++ b/tools/libxl/xl.h Thu Feb 02 22:46:33 2012 -0800 @@ -94,6 +94,7 @@ int main_cpupoolnumasplit(int argc, char int main_getenforce(int argc, char **argv); int main_setenforce(int argc, char **argv); int main_loadpolicy(int argc, char **argv); +int main_remus(int argc, char **argv); void help(const char *command); diff -r 90e59c643c00 -r 2965835b0c00 tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c Thu Feb 02 22:46:33 2012 -0800 +++ b/tools/libxl/xl_cmdimpl.c Thu Feb 02 22:46:33 2012 -0800 @@ -2828,7 +2828,7 @@ static void core_dump_domain(const char } static void migrate_receive(int debug, int daemonize, int monitor, - int send_fd, int recv_fd) + int send_fd, int recv_fd, int remus) { int rc, rc2; char rc_buf; @@ -2863,6 +2863,41 @@ static void migrate_receive(int debug, i exit(-rc); } + if (remus) { + /* If we are here, it means that the sender (primary) has crashed. + * TODO: Split-Brain Check. + */ + fprintf(stderr, "migration target: Remus Failover for domain %u\n", + domid); + + /* + * If domain renaming fails, lets just continue (as we need the domain + * to be up & dom names may not matter much, as long as its reachable + * over network). + * + * If domain unpausing fails, destroy domain ? Or is it better to have + * a consistent copy of the domain (memory, cpu state, disk) + * on atleast one physical host ? Right now, lets just leave the domain + * as is and let the Administrator decide (or troubleshoot). + */ + if (migration_domname) { + rc = libxl_domain_rename(ctx, domid, migration_domname, + common_domname); + if (rc) + fprintf(stderr, "migration target (Remus): " + "Failed to rename domain from %s to %s:%d\n", + migration_domname, common_domname, rc); + } + + rc = libxl_domain_unpause(ctx, domid); + if (rc) + fprintf(stderr, "migration target (Remus): " + "Failed to unpause domain %s (id: %u):%d\n", + common_domname, domid, rc); + + exit(rc ? -ERROR_FAIL: 0); + } + fprintf(stderr, "migration target: Transfer complete," " requesting permission to start domain.\n"); @@ -2989,10 +3024,10 @@ int main_restore(int argc, char **argv) int main_migrate_receive(int argc, char **argv) { - int debug = 0, daemonize = 1, monitor = 1; + int debug = 0, daemonize = 1, monitor = 1, remus = 0; int opt; - while ((opt = def_getopt(argc, argv, "Fed", "migrate-receive", 0)) != -1) { + while ((opt = def_getopt(argc, argv, "Fedr", "migrate-receive", 0)) != -1) { switch (opt) { case 0: case 2: return opt; @@ -3006,6 +3041,9 @@ int main_migrate_receive(int argc, char case ''d'': debug = 1; break; + case ''r'': + remus = 1; + break; } } @@ -3014,7 +3052,8 @@ int main_migrate_receive(int argc, char return 2; } migrate_receive(debug, daemonize, monitor, - STDOUT_FILENO, STDIN_FILENO); + STDOUT_FILENO, STDIN_FILENO, + remus); return 0; } @@ -5951,6 +5990,106 @@ done: return ret; } +int main_remus(int argc, char **argv) +{ + int opt, rc, daemonize = 1; + const char *ssh_command = "ssh"; + char *host = NULL, *rune = NULL, *domain = NULL; + libxl_domain_remus_info r_info; + int send_fd = -1, recv_fd = -1; + pid_t child = -1; + uint8_t *config_data; + int config_len; + + memset(&r_info, 0, sizeof(libxl_domain_remus_info)); + /* Defaults */ + r_info.interval = 200; + r_info.blackhole = 0; + r_info.compression = 1; + + while ((opt = def_getopt(argc, argv, "bui:s:e", "remus", 2)) != -1) { + switch (opt) { + case 0: case 2: + return opt; + + case ''i'': + r_info.interval = atoi(optarg); + break; + case ''b'': + r_info.blackhole = 1; + break; + case ''u'': + r_info.compression = 0; + break; + case ''s'': + ssh_command = optarg; + break; + case ''e'': + daemonize = 0; + break; + } + } + + domain = argv[optind]; + host = argv[optind + 1]; + + if (r_info.blackhole) { + find_domain(domain); + send_fd = open("/dev/null", O_RDWR, 0644); + if (send_fd < 0) { + perror("failed to open /dev/null"); + exit(-1); + } + } else { + + /* + * TODO: change this to plain TCP socket based channel + * instead of SSH. For both live-migration and Remus. + */ + if (!ssh_command[0]) { + rune = host; + } else { + if (asprintf(&rune, "exec %s %s xl migrate-receive -r %s", + ssh_command, host, + daemonize ? "" : " -e") < 0) + return 1; + } + + save_domain_core_begin(domain, NULL, &config_data, &config_len); + + if (!config_len) { + fprintf(stderr, "No config file stored for running domain and " + "none supplied - cannot start remus.\n"); + exit(1); + } + + child = create_migration_child(rune, &send_fd, &recv_fd); + + migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len, + rune); + } + + /* Point of no return */ + rc = libxl_domain_remus_start(ctx, &r_info, domid, send_fd, recv_fd); + + /* If we are here, it means backup has failed/domain suspend failed. + * Try to resume the domain and exit gracefully. + * TODO: Split-Brain check. + */ + fprintf(stderr, "remus sender: libxl_domain_suspend failed" + " (rc=%d)\n", rc); + + if (rc == ERROR_GUEST_TIMEDOUT) + fprintf(stderr, "Failed to suspend domain at primary.\n"); + else { + fprintf(stderr, "Remus: Backup failed? resuming domain at primary.\n"); + libxl_domain_resume(ctx, domid, 1); + } + + close(send_fd); + return -ERROR_FAIL; +} + /* * Local variables: * mode: C diff -r 90e59c643c00 -r 2965835b0c00 tools/libxl/xl_cmdtable.c --- a/tools/libxl/xl_cmdtable.c Thu Feb 02 22:46:33 2012 -0800 +++ b/tools/libxl/xl_cmdtable.c Thu Feb 02 22:46:33 2012 -0800 @@ -412,6 +412,20 @@ struct cmd_spec cmd_table[] = { "Loads a new policy int the Flask Xen security module", "<policy file>", }, + { "remus", + &main_remus, 0, + "Enable Remus HA for domain", + "[options] <Domain> [<host>]", + "-i MS Checkpoint domain memory every MS milliseconds (def. 200ms).\n" + "-b Replicate memory checkpoints to /dev/null (blackhole)\n" + "-u Disable memory checkpoint compression.\n" + "-s <sshcommand> Use <sshcommand> instead of ssh. String will be passed\n" + " to sh. If empty, run <host> instead of \n" + " ssh <host> xl migrate-receive -r [-e]\n" + "-e Do not wait in the background (on <host>) for the death\n" + " of the domain." + + }, }; int cmdtable_len = sizeof(cmd_table)/sizeof(struct cmd_spec);
Ian Campbell
2012-Feb-09 12:38 UTC
Re: [PATCH 1 of 2 V3] libxl: Remus - suspend/postflush/commit callbacks
On Fri, 2012-02-03 at 07:00 +0000, rshriram@cs.ubc.ca wrote:> # HG changeset patch > # User Shriram Rajagopalan <rshriram@cs.ubc.ca> > # Date 1328251593 28800 > # Node ID 90e59c643c00c079996e13b75f89d1f0cd931a02 > # Parent c7abecc14cceb18140335ebe20faad826282cd1f > libxl: Remus - suspend/postflush/commit callbacks > > * Add libxl callback functions for Remus checkpoint suspend, postflush > (aka resume) and checkpoint commit callbacks. > * suspend callback is a stub that just bounces off > libxl__domain_suspend_common_callback - which suspends the domain and > saves the devices model state to a file. > * resume callback currently just resumes the domain (and the device model). > * commit callback just writes out the saved device model state to the > network and sleeps for the checkpoint interval. > * Introduce a new public API, libxl_domain_remus_start (currently a stub) > that sets up the network and disk buffer and initiates continuous > checkpointing. > > * Future patches will augument these callbacks/functions with more functionalities"augment"> like issuing network buffer plug/unplug commands, disk checkpoint commands, etc. > > Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> > > diff -r c7abecc14cce -r 90e59c643c00 tools/libxl/libxl.c > --- a/tools/libxl/libxl.c Thu Feb 02 22:46:33 2012 -0800 > +++ b/tools/libxl/libxl.c Thu Feb 02 22:46:33 2012 -0800 > @@ -471,6 +471,41 @@ libxl_vminfo * libxl_list_vm(libxl_ctx * > return ptr; > } > > +/* TODO: Explicit Checkpoint acknowledgements via recv_fd. */ > +int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, > + uint32_t domid, int send_fd, int recv_fd) > +{ > + GC_INIT(ctx); > + libxl_domain_type type = libxl__domain_type(gc, domid); > + int rc = 0; > + > + if (info == NULL) { > + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, > + "No remus_info structure supplied for domain %d", domid); > + rc = ERROR_INVAL; > + goto remus_fail; > + } > + > + /* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */Is it worth checking that the domain has no disks or network (IOW is this dangerous if they do?) [...]> @@ -791,7 +837,27 @@ int libxl__domain_suspend_common(libxl__ > } > > memset(&callbacks, 0, sizeof(callbacks)); > - callbacks.suspend = libxl__domain_suspend_common_callback; > + if (r_info != NULL) { > + /* save_callbacks: > + * suspend - called after expiration of checkpoint interval, > + * to *suspend* the domain. > + * > + * postcopy - called after the domain''s dirty pages have been > + * copied into an output buffer. We *resume* the domain > + * & the device model, return to the caller. Caller then > + * flushes the output buffer, while the domain continues to run. > + * > + * checkpoint - called after the memory checkpoint has been flushed out > + * into the network. Send the saved device state, *wait* > + * for checkpoint ack and *release* the network buffer (TBD). > + * Then *sleep* for the checkpoint interval. > + */I think this comment would be more useful in xenguest.h next to the callback struct. Otherwise the patch looks good. Ian.
On Fri, 2012-02-03 at 07:00 +0000, rshriram@cs.ubc.ca wrote:> # HG changeset patch > # User Shriram Rajagopalan <rshriram@cs.ubc.ca> > # Date 1328251593 28800 > # Node ID 2965835b0c0062a359ac04c715d7af52c6ba60ee > # Parent 90e59c643c00c079996e13b75f89d1f0cd931a02 > libxl: Remus - xl remus command > > xl remus acts as a frontend to enable remus for a given domain. > * At the moment, only memory checkpointing and blackhole replication is > supported. Support for disk checkpointing and network buffering will > be added in future.> * Replication is done over ssh connection currently (like live migration > with xl). Future versions will have an option to use simple tcp socket > based replication channel (for both Remus & live migration). > > Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> > > diff -r 90e59c643c00 -r 2965835b0c00 docs/man/xl.pod.1 > --- a/docs/man/xl.pod.1 Thu Feb 02 22:46:33 2012 -0800 > +++ b/docs/man/xl.pod.1 Thu Feb 02 22:46:33 2012 -0800 > @@ -348,6 +348,39 @@ Send <config> instead of config file fro > > =back > > +=item B<remus> [I<OPTIONS>] I<domain-id> I<host> > + > +Enable Remus HA for domain. By default B<xl> relies on ssh as a transport mechanism > +between the two hosts. > + > +B<OPTIONS> > + > +=over 4 > + > +=item B<-i> I<MS> > + > +Checkpoint domain memory every MS milliseconds (default 200ms). > + > +=item B<-b> > + > +Replicate memory checkpoints to /dev/null (blackhole).Is blackhole replication just a debug thing? If so would be useful to say so, if not then explaining when/why I might use this option would be helpful.> + /* > + * TODO: change this to plain TCP socket based channel > + * instead of SSH. For both live-migration and Remus. > + */IMHO comments like "TODO: split brain" are OK, because they highlight fairly critical missing pieces, but in general I don''t think we need to track everything which might potentially happen to the code in the future as TODO comments. Just add it to your own TODO list. o/w looks good, thanks. Ian.
Shriram Rajagopalan
2012-Feb-09 18:04 UTC
Re: [PATCH 1 of 2 V3] libxl: Remus - suspend/postflush/commit callbacks
On 2012-02-09, at 4:38 AM, Ian Campbell <Ian.Campbell@citrix.com> wrote:> On Fri, 2012-02-03 at 07:00 +0000, rshriram@cs.ubc.ca wrote: >> # >> >> +/* TODO: Explicit Checkpoint acknowledgements via recv_fd. */ >> +int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, >> + uint32_t domid, int send_fd, int recv_fd) >> +{ >> + GC_INIT(ctx); >> + libxl_domain_type type = libxl__domain_type(gc, domid); >> + int rc = 0; >> + >> + if (info == NULL) { >> + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, >> + "No remus_info structure supplied for domain %d", domid); >> + rc = ERROR_INVAL; >> + goto remus_fail; >> + } >> + >> + /* TBD: Remus setup - i.e. attach qdisc, enable disk buffering, etc */ > > Is it worth checking that the domain has no disks or network (IOW is > this dangerous if they do?) >A domain with no disks or network wouldnt be of much use though. Is it dangerous if they do but are not checkpointed ? Yes. Dangerous to what extent depends on how critical the application is. But then, this patch intends to put the framework in place so that people can at least play around with memory check pointing.> [...] >> @@ -791,7 +837,27 @@ int libxl__domain_suspend_common(libxl__ >> } >> >> memset(&callbacks, 0, sizeof(callbacks)); >> - callbacks.suspend = libxl__domain_suspend_common_callback; >> + if (r_info != NULL) { >> + /* save_callbacks: >> + * suspend - called after expiration of checkpoint interval, >> + * to *suspend* the domain. >> + * >> + * postcopy - called after the domain''s dirty pages have been >> + * copied into an output buffer. We *resume* the domain >> + * & the device model, return to the caller. Caller then >> + * flushes the output buffer, while the domain continues to run. >> + * >> + * checkpoint - called after the memory checkpoint has been flushed out >> + * into the network. Send the saved device state, *wait* >> + * for checkpoint ack and *release* the network buffer (TBD). >> + * Then *sleep* for the checkpoint interval. >> + */ > > I think this comment would be more useful in xenguest.h next to the > callback struct. > > Otherwise the patch looks good. > > Ian. > >