rshriram@cs.ubc.ca
2012-Feb-01 04:38 UTC
[PATCH 0 of 6 V2] libxl: refactor suspend/resume code
This patch series refactors the suspend/resume code to minimize Remus specific code in libxl. There are a couple of trivial bug fixes too. Changes since the previous version: 1. migrate code is refactored as save_config , create child, do_preamble instead of coaelscing them all into one single function. 2. More documentation for suspend_cancel parameter in domain_resume 3. Minor nits Shriram
rshriram@cs.ubc.ca
2012-Feb-01 04:38 UTC
[PATCH 1 of 6 V2] libxl: helper function to send commands to traditional qemu
# HG changeset patch # User Shriram Rajagopalan <rshriram@cs.ubc.ca> # Date 1328070813 28800 # Node ID 79ebe623f3bef4803ce20670642ea513c69712ea # Parent e2722b24dc0962de37215320b05d1bb7c4c42864 libxl: helper function to send commands to traditional qemu Introduce a helper function to send commands to traditional qemu. qemu_pci_add_xenstore, qemu_pci_remove_xenstore, libxl__domain_save_device_model and libxl_domain_unpause have been refactored to use this function. Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> Acked-by: Ian Campbell <ian.campbell@citrix.com> diff -r e2722b24dc09 -r 79ebe623f3be tools/libxl/libxl.c --- a/tools/libxl/libxl.c Thu Jan 26 17:43:31 2012 +0000 +++ b/tools/libxl/libxl.c Tue Jan 31 20:33:33 2012 -0800 @@ -517,7 +517,7 @@ int libxl_domain_unpause(libxl_ctx *ctx, path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid); state = libxl__xs_read(gc, XBT_NULL, path); if (state != NULL && !strcmp(state, "paused")) { - libxl__xs_write(gc, XBT_NULL, libxl__sprintf(gc, "/local/domain/0/device-model/%d/command", domid), "continue"); + libxl__qemu_traditional_cmd(gc, domid, "continue"); libxl__wait_for_device_model(gc, domid, "running", NULL, NULL, NULL); } diff -r e2722b24dc09 -r 79ebe623f3be tools/libxl/libxl_dom.c --- a/tools/libxl/libxl_dom.c Thu Jan 26 17:43:31 2012 +0000 +++ b/tools/libxl/libxl_dom.c Tue Jan 31 20:33:33 2012 -0800 @@ -349,6 +349,15 @@ out: return rc; } +int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid, + const char *cmd) +{ + char *path = NULL; + path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command", + domid); + return libxl__xs_write(gc, XBT_NULL, path, "%s", cmd); +} + int libxl__domain_restore_common(libxl__gc *gc, uint32_t domid, libxl_domain_build_info *info, libxl__domain_build_state *state, @@ -631,12 +640,9 @@ int libxl__domain_save_device_model(libx switch (libxl__device_model_version_running(gc, domid)) { case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: { - char *path = NULL; LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Saving device model state to %s", filename); - path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command", - domid); - libxl__xs_write(gc, XBT_NULL, path, "save"); + libxl__qemu_traditional_cmd(gc, domid, "save"); libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL); break; } diff -r e2722b24dc09 -r 79ebe623f3be tools/libxl/libxl_internal.h --- a/tools/libxl/libxl_internal.h Thu Jan 26 17:43:31 2012 +0000 +++ b/tools/libxl/libxl_internal.h Tue Jan 31 20:33:33 2012 -0800 @@ -263,6 +263,8 @@ _hidden int libxl__build_hvm(libxl__gc * libxl_device_model_info *dm_info, libxl__domain_build_state *state); +_hidden int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid, + const char *cmd); _hidden int libxl__domain_rename(libxl__gc *gc, uint32_t domid, const char *old_name, const char *new_name, xs_transaction_t trans); diff -r e2722b24dc09 -r 79ebe623f3be tools/libxl/libxl_pci.c --- a/tools/libxl/libxl_pci.c Thu Jan 26 17:43:31 2012 +0000 +++ b/tools/libxl/libxl_pci.c Tue Jan 31 20:33:33 2012 -0800 @@ -602,9 +602,8 @@ static int qemu_pci_add_xenstore(libxl__ libxl__xs_write(gc, XBT_NULL, path, PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); } - path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command", - domid); - xs_write(ctx->xsh, XBT_NULL, path, "pci-ins", strlen("pci-ins")); + + libxl__qemu_traditional_cmd(gc, domid, "pci-ins"); rc = libxl__wait_for_device_model(gc, domid, NULL, NULL, pci_ins_check, state); path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/parameter", @@ -857,12 +856,11 @@ static int qemu_pci_remove_xenstore(libx path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/parameter", domid); libxl__xs_write(gc, XBT_NULL, path, PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); - path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command", domid); /* Remove all functions at once atomically by only signalling * device-model for function 0 */ if ( !force && (pcidev->vdevfn & 0x7) == 0 ) { - xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem")); + libxl__qemu_traditional_cmd(gc, domid, "pci-rem"); if (libxl__wait_for_device_model(gc, domid, "pci-removed", NULL, NULL, NULL) < 0) { LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Device Model didn''t respond in time");
rshriram@cs.ubc.ca
2012-Feb-01 04:38 UTC
[PATCH 2 of 6 V2] libxl: bugfix: create_domain() return to caller if !daemonize
# HG changeset patch # User Shriram Rajagopalan <rshriram@cs.ubc.ca> # Date 1328070813 28800 # Node ID 9f0a67bd54db89a23078913db578df72c5dba2e3 # Parent 79ebe623f3bef4803ce20670642ea513c69712ea libxl: bugfix: create_domain() return to caller if !daemonize Currently the create_domain function does not honor the daemonize flag properly. It exits irrespective of the value of the flag. This patch fixes the issue. Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> Acked-by: Ian Campbell <ian.campbell@citrix.com> diff -r 79ebe623f3be -r 9f0a67bd54db tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c Tue Jan 31 20:33:33 2012 -0800 +++ b/tools/libxl/xl_cmdimpl.c Tue Jan 31 20:33:33 2012 -0800 @@ -1814,7 +1814,7 @@ waitpid_out: * If we have daemonized then do not return to the caller -- this has * already happened in the parent. */ - if ( !need_daemon ) + if ( daemonize && !need_daemon ) exit(ret); return ret;
rshriram@cs.ubc.ca
2012-Feb-01 04:38 UTC
[PATCH 3 of 6 V2] libxl: QMP stop/resume & refactor QEMU suspend/resume/save
# HG changeset patch # User Shriram Rajagopalan <rshriram@cs.ubc.ca> # Date 1328070813 28800 # Node ID 3ca830009da79443bb445d983a34f5f78664cdf4 # Parent 9f0a67bd54db89a23078913db578df72c5dba2e3 libxl: QMP stop/resume & refactor QEMU suspend/resume/save Implement QMP stop and resume functionality and split device model save into 3 parts: suspend_dm(domid) save_dm(domid, fd) resume_dm(domid) Integrate Device model suspend into suspend_common_callback Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> diff -r 9f0a67bd54db -r 3ca830009da7 tools/libxl/libxl_dom.c --- a/tools/libxl/libxl_dom.c Tue Jan 31 20:33:33 2012 -0800 +++ b/tools/libxl/libxl_dom.c Tue Jan 31 20:33:33 2012 -0800 @@ -425,6 +425,61 @@ static int libxl__domain_suspend_common_ return rc ? 0 : 1; } +int libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid) +{ + libxl_ctx *ctx = libxl__gc_owner(gc); + int ret = 0, fd2 = -1; + const char *filename = libxl__device_model_savefile(gc, domid); + + switch (libxl__device_model_version_running(gc, domid)) { + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: { + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, + "Saving device model state to %s", filename); + libxl__qemu_traditional_cmd(gc, domid, "save"); + libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL); + break; + } + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN: + if (libxl__qmp_stop(gc, domid)) + return ERROR_FAIL; + fd2 = open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); + if (fd2 < 0) { + LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, + "Unable to create a QEMU save file\n"); + return ERROR_FAIL; + } + /* Save DM state into fd2 */ + ret = libxl__qmp_migrate(gc, domid, fd2); + if (ret) + unlink(filename); + close(fd2); + fd2 = -1; + break; + default: + return ERROR_INVAL; + } + + return ret; +} + +int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid) +{ + + switch (libxl__device_model_version_running(gc, domid)) { + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: { + libxl__qemu_traditional_cmd(gc, domid, "continue"); + break; + } + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN: + if (libxl__qmp_resume(gc, domid)) + return ERROR_FAIL; + default: + return ERROR_INVAL; + } + + return 0; +} + static int libxl__domain_suspend_common_callback(void *data) { struct suspendinfo *si = data; @@ -454,7 +509,7 @@ static int libxl__domain_suspend_common_ return 0; } si->guest_responded = 1; - return 1; + goto suspend_dm; } if (si->hvm && (!hvm_pvdrv || hvm_s_state)) { @@ -532,7 +587,7 @@ static int libxl__domain_suspend_common_ shutdown_reason = (info.flags >> XEN_DOMINF_shutdownshift) & XEN_DOMINF_shutdownmask; if (shutdown_reason == SHUTDOWN_suspend) { LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "guest has suspended"); - return 1; + goto suspend_dm; } } @@ -541,6 +596,17 @@ static int libxl__domain_suspend_common_ LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest did not suspend"); return 0; + + suspend_dm: + if (si->hvm) { + ret = libxl__domain_suspend_device_model(si->gc, si->domid); + if (ret) { + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, + "libxl__domain_suspend_device_model failed ret=%d", ret); + return 0; + } + } + return 1; } int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd, @@ -638,32 +704,6 @@ int libxl__domain_save_device_model(libx struct stat st; uint32_t qemu_state_len; - switch (libxl__device_model_version_running(gc, domid)) { - case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: { - LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, - "Saving device model state to %s", filename); - libxl__qemu_traditional_cmd(gc, domid, "save"); - libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL); - break; - } - case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN: - fd2 = open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); - if (fd2 < 0) { - LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, - "Unable to create a QEMU save file\n"); - return ERROR_FAIL; - } - /* Save DM state into fd2 */ - ret = libxl__qmp_migrate(gc, domid, fd2); - if (ret) - goto out; - close(fd2); - fd2 = -1; - break; - default: - return ERROR_INVAL; - } - if (stat(filename, &st) < 0) { LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Unable to stat qemu save file\n"); diff -r 9f0a67bd54db -r 3ca830009da7 tools/libxl/libxl_internal.h --- a/tools/libxl/libxl_internal.h Tue Jan 31 20:33:33 2012 -0800 +++ b/tools/libxl/libxl_internal.h Tue Jan 31 20:33:33 2012 -0800 @@ -277,6 +277,8 @@ _hidden int libxl__domain_suspend_common libxl_domain_type type, int live, int debug); _hidden const char *libxl__device_model_savefile(libxl__gc *gc, uint32_t domid); +_hidden int libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid); +_hidden int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid); _hidden int libxl__domain_save_device_model(libxl__gc *gc, uint32_t domid, int fd); _hidden void libxl__userdata_destroyall(libxl__gc *gc, uint32_t domid); @@ -620,6 +622,10 @@ _hidden int libxl__qmp_query_serial(libx _hidden int libxl__qmp_pci_add(libxl__gc *gc, int d, libxl_device_pci *pcidev); _hidden int libxl__qmp_pci_del(libxl__gc *gc, int domid, libxl_device_pci *pcidev); +/* Suspend QEMU. */ +_hidden int libxl__qmp_stop(libxl__gc *gc, int domid); +/* Resume QEMU. */ +_hidden int libxl__qmp_resume(libxl__gc *gc, int domid); /* Save current QEMU state into fd. */ _hidden int libxl__qmp_migrate(libxl__gc *gc, int domid, int fd); /* close and free the QMP handler */ diff -r 9f0a67bd54db -r 3ca830009da7 tools/libxl/libxl_qmp.c --- a/tools/libxl/libxl_qmp.c Tue Jan 31 20:33:33 2012 -0800 +++ b/tools/libxl/libxl_qmp.c Tue Jan 31 20:33:33 2012 -0800 @@ -878,6 +878,38 @@ out: return rc; } +int libxl__qmp_stop(libxl__gc *gc, int domid) +{ + libxl__qmp_handler *qmp = NULL; + int rc = 0; + + qmp = libxl__qmp_initialize(libxl__gc_owner(gc), domid); + if (!qmp) + return ERROR_FAIL; + + rc = qmp_synchronous_send(qmp, "stop", NULL, + NULL, NULL, qmp->timeout); + + libxl__qmp_close(qmp); + return rc; +} + +int libxl__qmp_resume(libxl__gc *gc, int domid) +{ + libxl__qmp_handler *qmp = NULL; + int rc = 0; + + qmp = libxl__qmp_initialize(libxl__gc_owner(gc), domid); + if (!qmp) + return ERROR_FAIL; + + rc = qmp_synchronous_send(qmp, "cont", NULL, + NULL, NULL, qmp->timeout); + + libxl__qmp_close(qmp); + return rc; +} + int libxl__qmp_initializations(libxl_ctx *ctx, uint32_t domid) { libxl__qmp_handler *qmp = NULL;
rshriram@cs.ubc.ca
2012-Feb-01 04:38 UTC
[PATCH 4 of 6 V2] libxl: support suspend_cancel in domain_resume
# HG changeset patch # User Shriram Rajagopalan <rshriram@cs.ubc.ca> # Date 1328070814 28800 # Node ID 77041ffdf5c73d91b12e8367d0199e8431dec813 # Parent 3ca830009da79443bb445d983a34f5f78664cdf4 libxl: support suspend_cancel in domain_resume Add an extra parameter to libxl_domain_resume indicating if the caller wishes to use the SUSPEND_CANCEL style resume instead of the normal resume. Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> diff -r 3ca830009da7 -r 77041ffdf5c7 tools/libxl/libxl.c --- a/tools/libxl/libxl.c Tue Jan 31 20:33:33 2012 -0800 +++ b/tools/libxl/libxl.c Tue Jan 31 20:33:34 2012 -0800 @@ -229,24 +229,38 @@ int libxl_domain_rename(libxl_ctx *ctx, return rc; } -int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid) +/* From xc_resume.c .. + * Resuming execution of a domain after suspend can + * happen in one of two ways: + * 1. Resume with special return code. + * 2. Reset guest environment so it believes it is resumed in a new + * domain context. + * (2) should be used only for guests which cannot handle the special + * new return code. (1) is always safe (but slower). + */ +int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid, int suspend_cancel) { GC_INIT(ctx); int rc = 0; - if (LIBXL__DOMAIN_IS_TYPE(gc, domid, HVM)) { - LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Called domain_resume on " - "non-cooperative hvm domain %u", domid); - rc = ERROR_NI; - goto out; - } - if (xc_domain_resume(ctx->xch, domid, 0)) { + if (xc_domain_resume(ctx->xch, domid, suspend_cancel)) { LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xc_domain_resume failed for domain %u", domid); rc = ERROR_FAIL; goto out; } + + if (LIBXL__DOMAIN_IS_TYPE(gc, domid, HVM)) { + rc = libxl__domain_resume_device_model(gc, domid); + if (rc) { + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, + "failed to resume device model for domain %u:%d", + domid, rc); + goto out; + } + } + if (!xs_resume_domain(ctx->xsh, domid)) { LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xs_resume_domain failed for domain %u", diff -r 3ca830009da7 -r 77041ffdf5c7 tools/libxl/libxl.h --- a/tools/libxl/libxl.h Tue Jan 31 20:33:33 2012 -0800 +++ b/tools/libxl/libxl.h Tue Jan 31 20:33:34 2012 -0800 @@ -268,7 +268,7 @@ int libxl_domain_create_restore(libxl_ct void libxl_domain_config_dispose(libxl_domain_config *d_config); int libxl_domain_suspend(libxl_ctx *ctx, libxl_domain_suspend_info *info, uint32_t domid, int fd); -int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid); +int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid, int suspend_cancel); int libxl_domain_shutdown(libxl_ctx *ctx, uint32_t domid); int libxl_domain_reboot(libxl_ctx *ctx, uint32_t domid); int libxl_domain_destroy(libxl_ctx *ctx, uint32_t domid); diff -r 3ca830009da7 -r 77041ffdf5c7 tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c Tue Jan 31 20:33:33 2012 -0800 +++ b/tools/libxl/xl_cmdimpl.c Tue Jan 31 20:33:34 2012 -0800 @@ -2751,7 +2751,7 @@ static void migrate_domain(const char *d if (common_domname) { libxl_domain_rename(ctx, domid, away_domname, common_domname); } - rc = libxl_domain_resume(ctx, domid); + rc = libxl_domain_resume(ctx, domid, 1); if (!rc) fprintf(stderr, "migration sender: Resumed OK.\n"); fprintf(stderr, "Migration failed due to problems at target.\n"); @@ -2773,7 +2773,7 @@ static void migrate_domain(const char *d close(send_fd); migration_child_report(child, recv_fd); fprintf(stderr, "Migration failed, resuming at sender.\n"); - libxl_domain_resume(ctx, domid); + libxl_domain_resume(ctx, domid, 1); exit(-ERROR_FAIL); failed_badly:
rshriram@cs.ubc.ca
2012-Feb-01 04:38 UTC
[PATCH 5 of 6 V2] libxl: refactor migrate_domain and generalize migrate_receive
# HG changeset patch # User Shriram Rajagopalan <rshriram@cs.ubc.ca> # Date 1328070814 28800 # Node ID 68ca0e9ea59c4c67d3a10ce968cca7aff50184fe # Parent 77041ffdf5c73d91b12e8367d0199e8431dec813 libxl: refactor migrate_domain and generalize migrate_receive Refactor some tasks like establishing the migration channel, initial migration protocol exchange into separate functions, to facilitate re-use, when remus support is introduced. Also, make migrate_receive generic (instead of resorting to stdin and stdout as the file descriptors for communication). Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> diff -r 77041ffdf5c7 -r 68ca0e9ea59c tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c Tue Jan 31 20:33:34 2012 -0800 +++ b/tools/libxl/xl_cmdimpl.c Tue Jan 31 20:33:34 2012 -0800 @@ -2531,6 +2531,43 @@ static int save_domain(const char *p, co exit(0); } +static pid_t create_migration_child(const char *rune, int *send_fd, + int *recv_fd) +{ + int sendpipe[2], recvpipe[2]; + pid_t child = -1; + + if (!rune || !send_fd || !recv_fd) + return -1; + + MUST( libxl_pipe(ctx, sendpipe) ); + MUST( libxl_pipe(ctx, recvpipe) ); + + child = libxl_fork(ctx); + if (child==-1) exit(1); + + if (!child) { + dup2(sendpipe[0], 0); + dup2(recvpipe[1], 1); + close(sendpipe[0]); close(sendpipe[1]); + close(recvpipe[0]); close(recvpipe[1]); + execlp("sh","sh","-c",rune,(char*)0); + perror("failed to exec sh"); + exit(-1); + } + + close(sendpipe[0]); + close(recvpipe[1]); + *send_fd = sendpipe[1]; + *recv_fd = recvpipe[0]; + + /* if receiver dies, we get an error and can clean up + rather than just dying */ + signal(SIGPIPE, SIG_IGN); + + return child; +} + static int migrate_read_fixedmessage(int fd, const void *msg, int msgsz, const char *what, const char *rune) { char buf[msgsz]; @@ -2616,53 +2653,17 @@ static void migration_child_report(pid_t migration_child = 0; } -static void migrate_domain(const char *domain_spec, const char *rune, - const char *override_config_file) +static void migrate_do_preamble(int send_fd, int recv_fd, pid_t child, + uint8_t *config_data, int config_len, + const char *rune) { - pid_t child = -1; - int rc; - int sendpipe[2], recvpipe[2]; - int send_fd, recv_fd; - libxl_domain_suspend_info suspinfo; - char *away_domname; - char rc_buf; - uint8_t *config_data; - int config_len; - - save_domain_core_begin(domain_spec, override_config_file, - &config_data, &config_len); - - if (!config_len) { - fprintf(stderr, "No config file stored for running domain and " - "none supplied - cannot migrate.\n"); + int rc = 0; + + if (send_fd < 0 || recv_fd < 0) { + fprintf(stderr, "migrate_do_preamble: invalid file descriptors\n"); exit(1); } - MUST( libxl_pipe(ctx, sendpipe) ); - MUST( libxl_pipe(ctx, recvpipe) ); - - child = libxl_fork(ctx); - if (child==-1) exit(1); - - if (!child) { - dup2(sendpipe[0], 0); - dup2(recvpipe[1], 1); - close(sendpipe[0]); close(sendpipe[1]); - close(recvpipe[0]); close(recvpipe[1]); - execlp("sh","sh","-c",rune,(char*)0); - perror("failed to exec sh"); - exit(-1); - } - - close(sendpipe[0]); - close(recvpipe[1]); - send_fd = sendpipe[1]; - recv_fd = recvpipe[0]; - - signal(SIGPIPE, SIG_IGN); - /* if receiver dies, we get an error and can clean up - rather than just dying */ - rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_banner, sizeof(migrate_receiver_banner)-1, "banner", rune); @@ -2675,6 +2676,34 @@ static void migrate_domain(const char *d save_domain_core_writeconfig(send_fd, "migration stream", config_data, config_len); +} + +static void migrate_domain(const char *domain_spec, const char *rune, + const char *override_config_file) +{ + pid_t child = -1; + int rc; + int send_fd = -1, recv_fd = -1; + libxl_domain_suspend_info suspinfo; + char *away_domname; + char rc_buf; + uint8_t *config_data; + int config_len; + + save_domain_core_begin(domain_spec, override_config_file, + &config_data, &config_len); + + if (!config_len) { + fprintf(stderr, "No config file stored for running domain and " + "none supplied - cannot migrate.\n"); + exit(1); + } + + child = create_migration_child(rune, &send_fd, &recv_fd); + + migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len, + rune); + xtl_stdiostream_adjust_flags(logger, XTL_STDIOSTREAM_HIDE_PROGRESS, 0); memset(&suspinfo, 0, sizeof(suspinfo)); @@ -2798,7 +2827,8 @@ static void core_dump_domain(const char if (rc) { fprintf(stderr,"core dump failed (rc=%d)\n",rc);exit(-1); } } -static void migrate_receive(int debug, int daemonize, int monitor) +static void migrate_receive(int debug, int daemonize, int monitor, + int send_fd, int recv_fd) { int rc, rc2; char rc_buf; @@ -2810,7 +2840,7 @@ static void migrate_receive(int debug, i fprintf(stderr, "migration target: Ready to receive domain.\n"); - CHK_ERRNO( libxl_write_exactly(ctx, 1, + CHK_ERRNO( libxl_write_exactly(ctx, send_fd, migrate_receiver_banner, sizeof(migrate_receiver_banner)-1, "migration ack stream", @@ -2822,7 +2852,7 @@ static void migrate_receive(int debug, i dom_info.monitor = monitor; dom_info.paused = 1; dom_info.restore_file = "incoming migration stream"; - dom_info.migrate_fd = 0; /* stdin */ + dom_info.migrate_fd = recv_fd; dom_info.migration_domname_r = &migration_domname; dom_info.no_incr_generationid = 1; @@ -2836,13 +2866,13 @@ static void migrate_receive(int debug, i fprintf(stderr, "migration target: Transfer complete," " requesting permission to start domain.\n"); - rc = libxl_write_exactly(ctx, 1, + rc = libxl_write_exactly(ctx, send_fd, migrate_receiver_ready, sizeof(migrate_receiver_ready), "migration ack stream", "ready message"); if (rc) exit(-rc); - rc = migrate_read_fixedmessage(0, migrate_permission_to_go, + rc = migrate_read_fixedmessage(recv_fd, migrate_permission_to_go, sizeof(migrate_permission_to_go), "GO message", 0); if (rc) goto perhaps_destroy_notify_rc; @@ -2861,7 +2891,7 @@ static void migrate_receive(int debug, i rc = 0; perhaps_destroy_notify_rc: - rc2 = libxl_write_exactly(ctx, 1, + rc2 = libxl_write_exactly(ctx, send_fd, migrate_report, sizeof(migrate_report), "migration ack stream", "success/failure report"); @@ -2869,7 +2899,7 @@ static void migrate_receive(int debug, i rc_buf = -rc; assert(!!rc_buf == !!rc); - rc2 = libxl_write_exactly(ctx, 1, &rc_buf, 1, + rc2 = libxl_write_exactly(ctx, send_fd, &rc_buf, 1, "migration ack stream", "success/failure code"); if (rc2) exit(-ERROR_BADFAIL); @@ -2887,7 +2917,7 @@ static void migrate_receive(int debug, i fprintf(stderr, "migration target: Cleanup OK, granting sender" " permission to resume.\n"); - rc2 = libxl_write_exactly(ctx, 1, + rc2 = libxl_write_exactly(ctx, send_fd, migrate_permission_to_go, sizeof(migrate_permission_to_go), "migration ack stream", @@ -2983,7 +3013,9 @@ int main_migrate_receive(int argc, char help("migrate-receive"); return 2; } - migrate_receive(debug, daemonize, monitor); + migrate_receive(debug, daemonize, monitor, + STDOUT_FILENO, STDIN_FILENO); + return 0; }
rshriram@cs.ubc.ca
2012-Feb-01 04:38 UTC
[PATCH 6 of 6 V2] libxl: resume instead of unpause on xl save -c
# HG changeset patch # User Shriram Rajagopalan <rshriram@cs.ubc.ca> # Date 1328070814 28800 # Node ID 545f88c2bc73b063a5d0e7adbd95baed4d93790f # Parent 68ca0e9ea59c4c67d3a10ce968cca7aff50184fe libxl: resume instead of unpause on xl save -c The guest is "suspended" via libxl_domain_suspend when taking a snapshot. So call libxl_domain_resume instead of libxl_domain_unpause, when taking a checkpoint of the domain (using xl save -c). Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> diff -r 68ca0e9ea59c -r 545f88c2bc73 tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c Tue Jan 31 20:33:34 2012 -0800 +++ b/tools/libxl/xl_cmdimpl.c Tue Jan 31 20:33:34 2012 -0800 @@ -2524,7 +2524,7 @@ static int save_domain(const char *p, co close(fd); if (checkpoint) - libxl_domain_unpause(ctx, domid); + libxl_domain_resume(ctx, domid, 1); else libxl_domain_destroy(ctx, domid);
Stefano Stabellini
2012-Feb-01 14:14 UTC
Re: [PATCH 3 of 6 V2] libxl: QMP stop/resume & refactor QEMU suspend/resume/save
On Wed, 1 Feb 2012, rshriram@cs.ubc.ca wrote:> # HG changeset patch > # User Shriram Rajagopalan <rshriram@cs.ubc.ca> > # Date 1328070813 28800 > # Node ID 3ca830009da79443bb445d983a34f5f78664cdf4 > # Parent 9f0a67bd54db89a23078913db578df72c5dba2e3 > libxl: QMP stop/resume & refactor QEMU suspend/resume/save > > Implement QMP stop and resume functionality and split > device model save into 3 parts: > suspend_dm(domid) > save_dm(domid, fd) > resume_dm(domid) > > Integrate Device model suspend into suspend_common_callback > > Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> > > diff -r 9f0a67bd54db -r 3ca830009da7 tools/libxl/libxl_dom.c > --- a/tools/libxl/libxl_dom.c Tue Jan 31 20:33:33 2012 -0800 > +++ b/tools/libxl/libxl_dom.c Tue Jan 31 20:33:33 2012 -0800 > @@ -425,6 +425,61 @@ static int libxl__domain_suspend_common_ > return rc ? 0 : 1; > } > > +int libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid) > +{ > + libxl_ctx *ctx = libxl__gc_owner(gc); > + int ret = 0, fd2 = -1; > + const char *filename = libxl__device_model_savefile(gc, domid); > + > + switch (libxl__device_model_version_running(gc, domid)) { > + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: { > + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, > + "Saving device model state to %s", filename); > + libxl__qemu_traditional_cmd(gc, domid, "save"); > + libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL); > + break; > + } > + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN: > + if (libxl__qmp_stop(gc, domid)) > + return ERROR_FAIL; > + fd2 = open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); > + if (fd2 < 0) { > + LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, > + "Unable to create a QEMU save file\n"); > + return ERROR_FAIL; > + } > + /* Save DM state into fd2 */ > + ret = libxl__qmp_migrate(gc, domid, fd2); > + if (ret) > + unlink(filename); > + close(fd2); > + fd2 = -1; > + break; > + default: > + return ERROR_INVAL; > + } > +Why do you need to introduce libxl__qmp_stop and libxl__qmp_resume? Also keep in mind that I am about to change the command sent to save the state of devices to "save_devices" because "migrate" is not working properly with Xen.
Shriram Rajagopalan
2012-Feb-01 16:59 UTC
Re: [PATCH 3 of 6 V2] libxl: QMP stop/resume & refactor QEMU suspend/resume/save
On 2012-02-01, at 6:14 AM, Stefano Stabellini <stefano.stabellini@eu.citrix.com> wrote:> On Wed, 1 Feb 2012, rshriram@cs.ubc.ca wrote: >> # HG changeset patch >> # User Shriram Rajagopalan <rshriram@cs.ubc.ca> >> # Date 1328070813 28800 >> # Node ID 3ca830009da79443bb445d983a34f5f78664cdf4 >> # Parent 9f0a67bd54db89a23078913db578df72c5dba2e3 >> libxl: QMP stop/resume & refactor QEMU suspend/resume/save >> >> Implement QMP stop and resume functionality and split >> device model save into 3 parts: >> suspend_dm(domid) >> save_dm(domid, fd) >> resume_dm(domid) >> >> Integrate Device model suspend into suspend_common_callback >> >> Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> >> >> diff -r 9f0a67bd54db -r 3ca830009da7 tools/libxl/libxl_dom.c >> --- a/tools/libxl/libxl_dom.c Tue Jan 31 20:33:33 2012 -0800 >> +++ b/tools/libxl/libxl_dom.c Tue Jan 31 20:33:33 2012 -0800 >> @@ -425,6 +425,61 @@ static int libxl__domain_suspend_common_ >> return rc ? 0 : 1; >> } >> >> +int libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid) >> +{ >> + libxl_ctx *ctx = libxl__gc_owner(gc); >> + int ret = 0, fd2 = -1; >> + const char *filename = libxl__device_model_savefile(gc, domid); >> + >> + switch (libxl__device_model_version_running(gc, domid)) { >> + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: { >> + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, >> + "Saving device model state to %s", filename); >> + libxl__qemu_traditional_cmd(gc, domid, "save"); >> + libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL); >> + break; >> + } >> + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN: >> + if (libxl__qmp_stop(gc, domid)) >> + return ERROR_FAIL; >> + fd2 = open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); >> + if (fd2 < 0) { >> + LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, >> + "Unable to create a QEMU save file\n"); >> + return ERROR_FAIL; >> + } >> + /* Save DM state into fd2 */ >> + ret = libxl__qmp_migrate(gc, domid, fd2); >> + if (ret) >> + unlink(filename); >> + close(fd2); >> + fd2 = -1; >> + break; >> + default: >> + return ERROR_INVAL; >> + } >> + > > Why do you need to introduce libxl__qmp_stop and libxl__qmp_resume? > > Also keep in mind that I am about to change the command sent to save the > state of devices to "save_devices" because "migrate" is not working > properly with Xen. >I was modeling the qmp stuff similar to the old style qemu. Please correct me if I am wrong. Until qmp_migrate, qemu could still be writing data to the guest''s memory right? So, I thought qmp_stop was a sure shot way to ensure such stuff is not happening under the hood. With qmp_stop came qmp_resume.
Stefano Stabellini
2012-Feb-01 17:38 UTC
Re: [PATCH 3 of 6 V2] libxl: QMP stop/resume & refactor QEMU suspend/resume/save
On Wed, 1 Feb 2012, Shriram Rajagopalan wrote:> On 2012-02-01, at 6:14 AM, Stefano Stabellini <stefano.stabellini@eu.citrix.com> wrote: > > > On Wed, 1 Feb 2012, rshriram@cs.ubc.ca wrote: > >> # HG changeset patch > >> # User Shriram Rajagopalan <rshriram@cs.ubc.ca> > >> # Date 1328070813 28800 > >> # Node ID 3ca830009da79443bb445d983a34f5f78664cdf4 > >> # Parent 9f0a67bd54db89a23078913db578df72c5dba2e3 > >> libxl: QMP stop/resume & refactor QEMU suspend/resume/save > >> > >> Implement QMP stop and resume functionality and split > >> device model save into 3 parts: > >> suspend_dm(domid) > >> save_dm(domid, fd) > >> resume_dm(domid) > >> > >> Integrate Device model suspend into suspend_common_callback > >> > >> Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> > >> > >> diff -r 9f0a67bd54db -r 3ca830009da7 tools/libxl/libxl_dom.c > >> --- a/tools/libxl/libxl_dom.c Tue Jan 31 20:33:33 2012 -0800 > >> +++ b/tools/libxl/libxl_dom.c Tue Jan 31 20:33:33 2012 -0800 > >> @@ -425,6 +425,61 @@ static int libxl__domain_suspend_common_ > >> return rc ? 0 : 1; > >> } > >> > >> +int libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid) > >> +{ > >> + libxl_ctx *ctx = libxl__gc_owner(gc); > >> + int ret = 0, fd2 = -1; > >> + const char *filename = libxl__device_model_savefile(gc, domid); > >> + > >> + switch (libxl__device_model_version_running(gc, domid)) { > >> + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: { > >> + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, > >> + "Saving device model state to %s", filename); > >> + libxl__qemu_traditional_cmd(gc, domid, "save"); > >> + libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL); > >> + break; > >> + } > >> + case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN: > >> + if (libxl__qmp_stop(gc, domid)) > >> + return ERROR_FAIL; > >> + fd2 = open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); > >> + if (fd2 < 0) { > >> + LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, > >> + "Unable to create a QEMU save file\n"); > >> + return ERROR_FAIL; > >> + } > >> + /* Save DM state into fd2 */ > >> + ret = libxl__qmp_migrate(gc, domid, fd2); > >> + if (ret) > >> + unlink(filename); > >> + close(fd2); > >> + fd2 = -1; > >> + break; > >> + default: > >> + return ERROR_INVAL; > >> + } > >> + > > > > Why do you need to introduce libxl__qmp_stop and libxl__qmp_resume? > > > > Also keep in mind that I am about to change the command sent to save the > > state of devices to "save_devices" because "migrate" is not working > > properly with Xen. > > > > I was modeling the qmp stuff similar to the old style qemu. > > Please correct me if I am wrong. Until qmp_migrate, qemu could still be writing data to the guest''s memory right? > > So, I thought qmp_stop was a sure shot way to ensure such stuff is not happening under the hood.That is correct, however qemu is synchronous, so if the VM is paused qemu is not going to do anything because it is not receiving any ioreqs from xen. Moreover during the device state save we temporarily stop qemu to make sure that the device state we save is consistent. That said, explicitly stopping and resuming qemu should be harmless.> With qmp_stop came qmp_resume.