Shriram Rajagopalan
2012-Mar-22 19:48 UTC
[PATCH 0 of 4 RESEND V6] libxl: refactor suspend/resume code
This patch series refactors the suspend/resume code in libxl to minimize Remus specific code in libxl. There are a couple of trivial bug fixes too. Changes in V6: Rebase patches to the latest tip. Depends on Stefano''s "V6: libxl: save/restore qemu physmap". Changes in V5: Rebase patches to the latest tip. Depends on Stefano''s "V5: libxl: save/restore qemu physmap". Changes in V4: 1. Incorporated Ian Campbell''s comments on the suspend_cancel support patch. Changes in V3: 1. rebase patches based on Stefano''s patches use qmp_save instead of qmp_migrate 2. check if qemu moves to "running" state after resuming the device model 3. Moved comments on the co-operative suspend to libxl.h Changes in V2: 1. migrate code is refactored as save_config , create child, do_preamble instead of coaelscing them all into one single function. 2. More documentation for suspend_cancel parameter in domain_resume 3. Minor nits Shriram
Shriram Rajagopalan
2012-Mar-22 19:48 UTC
[PATCH 1 of 4 RESEND V6] libxl: QMP stop/resume & refactor QEMU suspend/resume/save
# HG changeset patch # User Shriram Rajagopalan <rshriram@cs.ubc.ca> # Date 1332445125 25200 # Node ID 50fbdc5a27cb09806c77c916e55adad81201043e # Parent 26b26ad3679a6ff995440de9b7cee5406f21aa7f libxl: QMP stop/resume & refactor QEMU suspend/resume/save Implement QMP stop and resume functionality and split device model save into 3 parts: suspend_dm(domid) save_dm(domid, fd) resume_dm(domid) Integrate Device model suspend into suspend_common_callback Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> Acked-by: Ian Campbell <ian.campbell@citrix.com> diff -r 26b26ad3679a -r 50fbdc5a27cb tools/libxl/libxl_dom.c --- a/tools/libxl/libxl_dom.c Thu Mar 22 12:38:40 2012 -0700 +++ b/tools/libxl/libxl_dom.c Thu Mar 22 12:38:45 2012 -0700 @@ -522,6 +522,54 @@ static int libxl__domain_suspend_common_ return rc ? 0 : 1; } +int libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid) +{ + libxl_ctx *ctx = libxl__gc_owner(gc); + int ret = 0; + const char *filename = libxl__device_model_savefile(gc, domid); + + switch (libxl__device_model_version_running(gc, domid)) { + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: { + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, + "Saving device model state to %s", filename); + libxl__qemu_traditional_cmd(gc, domid, "save"); + libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL); + break; + } + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN: + if (libxl__qmp_stop(gc, domid)) + return ERROR_FAIL; + /* Save DM state into filename */ + ret = libxl__qmp_save(gc, domid, filename); + if (ret) + unlink(filename); + break; + default: + return ERROR_INVAL; + } + + return ret; +} + +int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid) +{ + + switch (libxl__device_model_version_running(gc, domid)) { + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: { + libxl__qemu_traditional_cmd(gc, domid, "continue"); + libxl__wait_for_device_model(gc, domid, "running", NULL, NULL, NULL); + break; + } + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN: + if (libxl__qmp_resume(gc, domid)) + return ERROR_FAIL; + default: + return ERROR_INVAL; + } + + return 0; +} + static int libxl__domain_suspend_common_callback(void *data) { struct suspendinfo *si = data; @@ -551,7 +599,7 @@ static int libxl__domain_suspend_common_ return 0; } si->guest_responded = 1; - return 1; + goto guest_suspended; } if (si->hvm && (!hvm_pvdrv || hvm_s_state)) { @@ -629,7 +677,7 @@ static int libxl__domain_suspend_common_ shutdown_reason = (info.flags >> XEN_DOMINF_shutdownshift) & XEN_DOMINF_shutdownmask; if (shutdown_reason == SHUTDOWN_suspend) { LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "guest has suspended"); - return 1; + goto guest_suspended; } } @@ -638,6 +686,17 @@ static int libxl__domain_suspend_common_ LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest did not suspend"); return 0; + + guest_suspended: + if (si->hvm) { + ret = libxl__domain_suspend_device_model(si->gc, si->domid); + if (ret) { + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, + "libxl__domain_suspend_device_model failed ret=%d", ret); + return 0; + } + } + return 1; } static int libxl__toolstack_save(uint32_t domid, uint8_t **buf, @@ -802,23 +861,6 @@ int libxl__domain_save_device_model(libx struct stat st; uint32_t qemu_state_len; - switch (libxl__device_model_version_running(gc, domid)) { - case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: { - LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, - "Saving device model state to %s", filename); - libxl__qemu_traditional_cmd(gc, domid, "save"); - libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL); - break; - } - case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN: - ret = libxl__qmp_save(gc, domid, (char *)filename); - if (ret) - goto out; - break; - default: - return ERROR_INVAL; - } - if (stat(filename, &st) < 0) { LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Unable to stat qemu save file\n"); diff -r 26b26ad3679a -r 50fbdc5a27cb tools/libxl/libxl_internal.h --- a/tools/libxl/libxl_internal.h Thu Mar 22 12:38:40 2012 -0700 +++ b/tools/libxl/libxl_internal.h Thu Mar 22 12:38:45 2012 -0700 @@ -643,6 +643,8 @@ _hidden int libxl__domain_suspend_common libxl_domain_type type, int live, int debug); _hidden const char *libxl__device_model_savefile(libxl__gc *gc, uint32_t domid); +_hidden int libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid); +_hidden int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid); _hidden int libxl__domain_save_device_model(libxl__gc *gc, uint32_t domid, int fd); _hidden void libxl__userdata_destroyall(libxl__gc *gc, uint32_t domid); @@ -1027,6 +1029,10 @@ _hidden int libxl__qmp_query_serial(libx _hidden int libxl__qmp_pci_add(libxl__gc *gc, int d, libxl_device_pci *pcidev); _hidden int libxl__qmp_pci_del(libxl__gc *gc, int domid, libxl_device_pci *pcidev); +/* Suspend QEMU. */ +_hidden int libxl__qmp_stop(libxl__gc *gc, int domid); +/* Resume QEMU. */ +_hidden int libxl__qmp_resume(libxl__gc *gc, int domid); /* Save current QEMU state into fd. */ _hidden int libxl__qmp_save(libxl__gc *gc, int domid, const char *filename); /* close and free the QMP handler */ diff -r 26b26ad3679a -r 50fbdc5a27cb tools/libxl/libxl_qmp.c --- a/tools/libxl/libxl_qmp.c Thu Mar 22 12:38:40 2012 -0700 +++ b/tools/libxl/libxl_qmp.c Thu Mar 22 12:38:45 2012 -0700 @@ -827,6 +827,38 @@ static int qmp_change(libxl__gc *gc, lib return rc; } +int libxl__qmp_stop(libxl__gc *gc, int domid) +{ + libxl__qmp_handler *qmp = NULL; + int rc = 0; + + qmp = libxl__qmp_initialize(gc, domid); + if (!qmp) + return ERROR_FAIL; + + rc = qmp_synchronous_send(qmp, "stop", NULL, + NULL, NULL, qmp->timeout); + + libxl__qmp_close(qmp); + return rc; +} + +int libxl__qmp_resume(libxl__gc *gc, int domid) +{ + libxl__qmp_handler *qmp = NULL; + int rc = 0; + + qmp = libxl__qmp_initialize(gc, domid); + if (!qmp) + return ERROR_FAIL; + + rc = qmp_synchronous_send(qmp, "cont", NULL, + NULL, NULL, qmp->timeout); + + libxl__qmp_close(qmp); + return rc; +} + int libxl__qmp_initializations(libxl__gc *gc, uint32_t domid, const libxl_domain_config *guest_config) {
Shriram Rajagopalan
2012-Mar-22 19:48 UTC
[PATCH 2 of 4 RESEND V6] libxl: support suspend_cancel in domain_resume
# HG changeset patch # User Shriram Rajagopalan <rshriram@cs.ubc.ca> # Date 1332445129 25200 # Node ID 3b1ee06570c9a535667f69797084bae1b50852e4 # Parent 50fbdc5a27cb09806c77c916e55adad81201043e libxl: support suspend_cancel in domain_resume Add an extra parameter to libxl_domain_resume indicating if the caller wishes to use the SUSPEND_CANCEL style resume instead of the normal resume. Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> Acked-by: Ian Campbell <ian.campbell@citrix.com> diff -r 50fbdc5a27cb -r 3b1ee06570c9 tools/libxl/libxl.c --- a/tools/libxl/libxl.c Thu Mar 22 12:38:45 2012 -0700 +++ b/tools/libxl/libxl.c Thu Mar 22 12:38:49 2012 -0700 @@ -339,24 +339,29 @@ int libxl_domain_rename(libxl_ctx *ctx, return rc; } -int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid) +int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid, int suspend_cancel) { GC_INIT(ctx); int rc = 0; - if (LIBXL__DOMAIN_IS_TYPE(gc, domid, HVM)) { - LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Called domain_resume on " - "non-cooperative hvm domain %u", domid); - rc = ERROR_NI; - goto out; - } - if (xc_domain_resume(ctx->xch, domid, 0)) { + if (xc_domain_resume(ctx->xch, domid, suspend_cancel)) { LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xc_domain_resume failed for domain %u", domid); rc = ERROR_FAIL; goto out; } + + if (LIBXL__DOMAIN_IS_TYPE(gc, domid, HVM)) { + rc = libxl__domain_resume_device_model(gc, domid); + if (rc) { + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, + "failed to resume device model for domain %u:%d", + domid, rc); + goto out; + } + } + if (!xs_resume_domain(ctx->xsh, domid)) { LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xs_resume_domain failed for domain %u", diff -r 50fbdc5a27cb -r 3b1ee06570c9 tools/libxl/libxl.h --- a/tools/libxl/libxl.h Thu Mar 22 12:38:45 2012 -0700 +++ b/tools/libxl/libxl.h Thu Mar 22 12:38:49 2012 -0700 @@ -387,7 +387,12 @@ int libxl_domain_create_restore(libxl_ct void libxl_domain_config_dispose(libxl_domain_config *d_config); int libxl_domain_suspend(libxl_ctx *ctx, libxl_domain_suspend_info *info, uint32_t domid, int fd); -int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid); + +/* @param suspend_cancel [from xenctrl.h:xc_domain_resume( @param fast )] + * If this parameter is true, use co-operative resume. The guest + * must support this. + */ +int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid, int suspend_cancel); int libxl_domain_shutdown(libxl_ctx *ctx, uint32_t domid); int libxl_domain_reboot(libxl_ctx *ctx, uint32_t domid); int libxl_domain_destroy(libxl_ctx *ctx, uint32_t domid); diff -r 50fbdc5a27cb -r 3b1ee06570c9 tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c Thu Mar 22 12:38:45 2012 -0700 +++ b/tools/libxl/xl_cmdimpl.c Thu Mar 22 12:38:49 2012 -0700 @@ -2806,7 +2806,7 @@ static void migrate_domain(const char *d if (common_domname) { libxl_domain_rename(ctx, domid, away_domname, common_domname); } - rc = libxl_domain_resume(ctx, domid); + rc = libxl_domain_resume(ctx, domid, 0); if (!rc) fprintf(stderr, "migration sender: Resumed OK.\n"); fprintf(stderr, "Migration failed due to problems at target.\n"); @@ -2828,7 +2828,7 @@ static void migrate_domain(const char *d close(send_fd); migration_child_report(child, recv_fd); fprintf(stderr, "Migration failed, resuming at sender.\n"); - libxl_domain_resume(ctx, domid); + libxl_domain_resume(ctx, domid, 0); exit(-ERROR_FAIL); failed_badly:
Shriram Rajagopalan
2012-Mar-22 19:48 UTC
[PATCH 3 of 4 RESEND V6] libxl: refactor migrate_domain and generalize migrate_receive
# HG changeset patch # User Shriram Rajagopalan <rshriram@cs.ubc.ca> # Date 1332445130 25200 # Node ID 17e745b79fb13378b2cc6a575c488cf902007285 # Parent 3b1ee06570c9a535667f69797084bae1b50852e4 libxl: refactor migrate_domain and generalize migrate_receive Refactor some tasks like establishing the migration channel, initial migration protocol exchange into separate functions, to facilitate re-use, when remus support is introduced. Also, make migrate_receive generic (instead of resorting to stdin and stdout as the file descriptors for communication). Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> Acked-by: Ian Campbell <ian.campbell@citrix.com> diff -r 3b1ee06570c9 -r 17e745b79fb1 tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c Thu Mar 22 12:38:49 2012 -0700 +++ b/tools/libxl/xl_cmdimpl.c Thu Mar 22 12:38:50 2012 -0700 @@ -2586,6 +2586,43 @@ static int save_domain(const char *p, co exit(0); } +static pid_t create_migration_child(const char *rune, int *send_fd, + int *recv_fd) +{ + int sendpipe[2], recvpipe[2]; + pid_t child = -1; + + if (!rune || !send_fd || !recv_fd) + return -1; + + MUST( libxl_pipe(ctx, sendpipe) ); + MUST( libxl_pipe(ctx, recvpipe) ); + + child = libxl_fork(ctx); + if (child==-1) exit(1); + + if (!child) { + dup2(sendpipe[0], 0); + dup2(recvpipe[1], 1); + close(sendpipe[0]); close(sendpipe[1]); + close(recvpipe[0]); close(recvpipe[1]); + execlp("sh","sh","-c",rune,(char*)0); + perror("failed to exec sh"); + exit(-1); + } + + close(sendpipe[0]); + close(recvpipe[1]); + *send_fd = sendpipe[1]; + *recv_fd = recvpipe[0]; + + /* if receiver dies, we get an error and can clean up + rather than just dying */ + signal(SIGPIPE, SIG_IGN); + + return child; +} + static int migrate_read_fixedmessage(int fd, const void *msg, int msgsz, const char *what, const char *rune) { char buf[msgsz]; @@ -2671,53 +2708,17 @@ static void migration_child_report(pid_t migration_child = 0; } -static void migrate_domain(const char *domain_spec, const char *rune, - const char *override_config_file) +static void migrate_do_preamble(int send_fd, int recv_fd, pid_t child, + uint8_t *config_data, int config_len, + const char *rune) { - pid_t child = -1; - int rc; - int sendpipe[2], recvpipe[2]; - int send_fd, recv_fd; - libxl_domain_suspend_info suspinfo; - char *away_domname; - char rc_buf; - uint8_t *config_data; - int config_len; - - save_domain_core_begin(domain_spec, override_config_file, - &config_data, &config_len); - - if (!config_len) { - fprintf(stderr, "No config file stored for running domain and " - "none supplied - cannot migrate.\n"); + int rc = 0; + + if (send_fd < 0 || recv_fd < 0) { + fprintf(stderr, "migrate_do_preamble: invalid file descriptors\n"); exit(1); } - MUST( libxl_pipe(ctx, sendpipe) ); - MUST( libxl_pipe(ctx, recvpipe) ); - - child = libxl_fork(ctx); - if (child==-1) exit(1); - - if (!child) { - dup2(sendpipe[0], 0); - dup2(recvpipe[1], 1); - close(sendpipe[0]); close(sendpipe[1]); - close(recvpipe[0]); close(recvpipe[1]); - execlp("sh","sh","-c",rune,(char*)0); - perror("failed to exec sh"); - exit(-1); - } - - close(sendpipe[0]); - close(recvpipe[1]); - send_fd = sendpipe[1]; - recv_fd = recvpipe[0]; - - signal(SIGPIPE, SIG_IGN); - /* if receiver dies, we get an error and can clean up - rather than just dying */ - rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_banner, sizeof(migrate_receiver_banner)-1, "banner", rune); @@ -2730,6 +2731,34 @@ static void migrate_domain(const char *d save_domain_core_writeconfig(send_fd, "migration stream", config_data, config_len); +} + +static void migrate_domain(const char *domain_spec, const char *rune, + const char *override_config_file) +{ + pid_t child = -1; + int rc; + int send_fd = -1, recv_fd = -1; + libxl_domain_suspend_info suspinfo; + char *away_domname; + char rc_buf; + uint8_t *config_data; + int config_len; + + save_domain_core_begin(domain_spec, override_config_file, + &config_data, &config_len); + + if (!config_len) { + fprintf(stderr, "No config file stored for running domain and " + "none supplied - cannot migrate.\n"); + exit(1); + } + + child = create_migration_child(rune, &send_fd, &recv_fd); + + migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len, + rune); + xtl_stdiostream_adjust_flags(logger, XTL_STDIOSTREAM_HIDE_PROGRESS, 0); memset(&suspinfo, 0, sizeof(suspinfo)); @@ -2853,7 +2882,8 @@ static void core_dump_domain(const char if (rc) { fprintf(stderr,"core dump failed (rc=%d)\n",rc);exit(-1); } } -static void migrate_receive(int debug, int daemonize, int monitor) +static void migrate_receive(int debug, int daemonize, int monitor, + int send_fd, int recv_fd) { int rc, rc2; char rc_buf; @@ -2865,7 +2895,7 @@ static void migrate_receive(int debug, i fprintf(stderr, "migration target: Ready to receive domain.\n"); - CHK_ERRNO( libxl_write_exactly(ctx, 1, + CHK_ERRNO( libxl_write_exactly(ctx, send_fd, migrate_receiver_banner, sizeof(migrate_receiver_banner)-1, "migration ack stream", @@ -2877,7 +2907,7 @@ static void migrate_receive(int debug, i dom_info.monitor = monitor; dom_info.paused = 1; dom_info.restore_file = "incoming migration stream"; - dom_info.migrate_fd = 0; /* stdin */ + dom_info.migrate_fd = recv_fd; dom_info.migration_domname_r = &migration_domname; dom_info.incr_generationid = 0; @@ -2891,13 +2921,13 @@ static void migrate_receive(int debug, i fprintf(stderr, "migration target: Transfer complete," " requesting permission to start domain.\n"); - rc = libxl_write_exactly(ctx, 1, + rc = libxl_write_exactly(ctx, send_fd, migrate_receiver_ready, sizeof(migrate_receiver_ready), "migration ack stream", "ready message"); if (rc) exit(-rc); - rc = migrate_read_fixedmessage(0, migrate_permission_to_go, + rc = migrate_read_fixedmessage(recv_fd, migrate_permission_to_go, sizeof(migrate_permission_to_go), "GO message", 0); if (rc) goto perhaps_destroy_notify_rc; @@ -2916,7 +2946,7 @@ static void migrate_receive(int debug, i rc = 0; perhaps_destroy_notify_rc: - rc2 = libxl_write_exactly(ctx, 1, + rc2 = libxl_write_exactly(ctx, send_fd, migrate_report, sizeof(migrate_report), "migration ack stream", "success/failure report"); @@ -2924,7 +2954,7 @@ static void migrate_receive(int debug, i rc_buf = -rc; assert(!!rc_buf == !!rc); - rc2 = libxl_write_exactly(ctx, 1, &rc_buf, 1, + rc2 = libxl_write_exactly(ctx, send_fd, &rc_buf, 1, "migration ack stream", "success/failure code"); if (rc2) exit(-ERROR_BADFAIL); @@ -2942,7 +2972,7 @@ static void migrate_receive(int debug, i fprintf(stderr, "migration target: Cleanup OK, granting sender" " permission to resume.\n"); - rc2 = libxl_write_exactly(ctx, 1, + rc2 = libxl_write_exactly(ctx, send_fd, migrate_permission_to_go, sizeof(migrate_permission_to_go), "migration ack stream", @@ -3039,7 +3069,9 @@ int main_migrate_receive(int argc, char help("migrate-receive"); return 2; } - migrate_receive(debug, daemonize, monitor); + migrate_receive(debug, daemonize, monitor, + STDOUT_FILENO, STDIN_FILENO); + return 0; }
Shriram Rajagopalan
2012-Mar-22 19:48 UTC
[PATCH 4 of 4 RESEND V6] libxl: resume instead of unpause on xl save -c
# HG changeset patch # User Shriram Rajagopalan <rshriram@cs.ubc.ca> # Date 1332445132 25200 # Node ID 3a794805a04655b3d73eac00ccc1a51a00b0dbfa # Parent 17e745b79fb13378b2cc6a575c488cf902007285 libxl: resume instead of unpause on xl save -c The guest is "suspended" via libxl_domain_suspend when taking a snapshot. So call libxl_domain_resume instead of libxl_domain_unpause, when taking a checkpoint of the domain (using xl save -c). Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> Acked-by: Ian Campbell <ian.campbell@citrix.com> diff -r 17e745b79fb1 -r 3a794805a046 tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c Thu Mar 22 12:38:50 2012 -0700 +++ b/tools/libxl/xl_cmdimpl.c Thu Mar 22 12:38:52 2012 -0700 @@ -2579,7 +2579,7 @@ static int save_domain(const char *p, co close(fd); if (checkpoint) - libxl_domain_unpause(ctx, domid); + libxl_domain_resume(ctx, domid, 1); else libxl_domain_destroy(ctx, domid);