# HG changeset patch # User Ian Campbell <ian.campbell@citrix.com> # Date 1297679825 0 # Node ID 7648e0e731992c7d5d8b7e0d1c8615cc37a978e4 # Parent cfb1ab79e11d24ed3703b490a81d24c570cd2d91 libxl: fix migrate for HVM guests Prior to 22909:6868f7f3ab3f libxl would loop waiting simultaneously for the domain the acknowledge a PV suspend request (by clearing the XenStore node) and for the domain to actually suspend. For HVM guests without PV drivers this same loop was simply waiting for the domain to suspend. In 22909:6868f7f3ab3f the original loop was split into two loops (first waiting for the acknowledgement and then for the actual suspend). This caused libxl to incorrectly wait for an HVM guest without PV drivers to acknowledge the XenStore request, which is not something it would ever do. Fix this by only waiting for an acknowledgement from a guest which contains PV drivers. Previously we were also making the request regardless of whether the guest had PV drivers, change that to only make the request if the guest has PV drivers. Lastly there is no need to sample HVM_PARAM_ACPI_S_STATE twice and not doing so simplifies the test for PVHVM vs. normal HVM guests. Tested with: Windows with GPL PV drivers (event channel suspend mode) Windows without PV drivers (xc_domain_shutdown mode) Linux PV (PV with XenBus control node mode) Linux HVM (PVHVM with XenBus control node mode (*)) Linux HVM (xc_domain_shutdown mode) (*) In this case the kernel didn''t actually suspend, due to: PM: Device input1 failed to suspend: error -22 xen suspend: dpm_suspend_start -22 which may be a misconfiguration in my setup or may be a kernel bug, but the libxl side dealt with this as gracefully as it could. Signed-off-by: Ian Campbell <ian.campbell@citrix.com> diff -r cfb1ab79e11d -r 7648e0e73199 tools/libxl/libxl_dom.c --- a/tools/libxl/libxl_dom.c Mon Feb 14 09:25:44 2011 +0000 +++ b/tools/libxl/libxl_dom.c Mon Feb 14 10:37:05 2011 +0000 @@ -345,16 +345,21 @@ static int libxl__domain_suspend_common_ static int libxl__domain_suspend_common_callback(void *data) { struct suspendinfo *si = data; - unsigned long s_state = 0; + unsigned long hvm_s_state = 0, hvm_pvdrv = 0; int ret; char *path, *state = "suspend"; int watchdog; libxl_ctx *ctx = libxl__gc_owner(si->gc); xs_transaction_t t; - if (si->hvm) - xc_get_hvm_param(ctx->xch, si->domid, HVM_PARAM_ACPI_S_STATE, &s_state); - if ((s_state == 0) && (si->suspend_eventchn >= 0)) { + if (si->hvm) { + xc_get_hvm_param(ctx->xch, si->domid, HVM_PARAM_CALLBACK_IRQ, &hvm_pvdrv); + xc_get_hvm_param(ctx->xch, si->domid, HVM_PARAM_ACPI_S_STATE, &hvm_s_state); + } + + if ((hvm_s_state == 0) && (si->suspend_eventchn >= 0)) { + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "issuing %s suspend request via event channel", + si->hvm ? "PVHVM" : "PV"); ret = xc_evtchn_notify(si->xce, si->suspend_eventchn); if (ret < 0) { LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "xc_evtchn_notify failed ret=%d", ret); @@ -368,63 +373,67 @@ static int libxl__domain_suspend_common_ si->guest_responded = 1; return 1; } - path = libxl__sprintf(si->gc, "%s/control/shutdown", libxl__xs_get_dompath(si->gc, si->domid)); - libxl__xs_write(si->gc, XBT_NULL, path, "suspend"); - if (si->hvm) { - unsigned long hvm_pvdrv, hvm_s_state; - xc_get_hvm_param(ctx->xch, si->domid, HVM_PARAM_CALLBACK_IRQ, &hvm_pvdrv); - xc_get_hvm_param(ctx->xch, si->domid, HVM_PARAM_ACPI_S_STATE, &hvm_s_state); - if (!hvm_pvdrv || hvm_s_state) { - LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Calling xc_domain_shutdown on the domain"); - xc_domain_shutdown(ctx->xch, si->domid, SHUTDOWN_suspend); + + if (si->hvm && (!hvm_pvdrv || hvm_s_state)) { + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Calling xc_domain_shutdown on HVM domain"); + xc_domain_shutdown(ctx->xch, si->domid, SHUTDOWN_suspend); + /* The guest does not (need to) respond to this sort of request. */ + si->guest_responded = 1; + } else { + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "issuing %s suspend request via XenBus control node", + si->hvm ? "PVHVM" : "PV"); + + path = libxl__sprintf(si->gc, "%s/control/shutdown", libxl__xs_get_dompath(si->gc, si->domid)); + libxl__xs_write(si->gc, XBT_NULL, path, "suspend"); + + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "wait for the guest to acknowledge suspend request"); + watchdog = 60; + while (!strcmp(state, "suspend") && watchdog > 0) { + usleep(100000); + + state = libxl__xs_read(si->gc, XBT_NULL, path); + + watchdog--; } + + /* + * Guest appears to not be responding. Cancel the suspend + * request. + * + * We re-read the suspend node and clear it within a + * transaction in order to handle the case where we race + * against the guest catching up and acknowledging the request + * at the last minute. + */ + if (!strcmp(state, "suspend")) { + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest didn''t acknowledge suspend, cancelling request"); + retry_transaction: + t = xs_transaction_start(ctx->xsh); + + state = libxl__xs_read(si->gc, t, path); + + if (!strcmp(state, "suspend")) + libxl__xs_write(si->gc, t, path, ""); + + if (!xs_transaction_end(ctx->xsh, t, 0)) + if (errno == EAGAIN) + goto retry_transaction; + + } + + /* + * Final check for guest acknowledgement. The guest may have + * acknowledged while we were cancelling the request in which + * case we lost the race while cancelling and should continue. + */ + if (!strcmp(state, "suspend")) { + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest didn''t acknowledge suspend, request cancelled"); + return 0; + } + + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "guest acknowledged suspend request"); + si->guest_responded = 1; } - - LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "wait for the guest to acknowledge suspend request"); - watchdog = 60; - while (!strcmp(state, "suspend") && watchdog > 0) { - usleep(100000); - - state = libxl__xs_read(si->gc, XBT_NULL, path); - - watchdog--; - } - - /* - * Guest appears to not be responding. Cancel the suspend request. - * - * We re-read the suspend node and clear it within a transaction - * in order to handle the case where we race against the guest - * catching up and acknowledging the request at the last minute. - */ - if (!strcmp(state, "suspend")) { - LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest didn''t acknowledge suspend, cancelling request"); - retry_transaction: - t = xs_transaction_start(ctx->xsh); - - state = libxl__xs_read(si->gc, t, path); - - if (!strcmp(state, "suspend")) - libxl__xs_write(si->gc, t, path, ""); - - if (!xs_transaction_end(ctx->xsh, t, 0)) - if (errno == EAGAIN) - goto retry_transaction; - - } - - /* - * Final check for guest acknowledgement. The guest may have - * acknowledged while we were cancelling the request in which case - * we lost the race while cancelling and should continue. - */ - if (!strcmp(state, "suspend")) { - LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest didn''t acknowledge suspend, request cancelled"); - return 0; - } - - LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "guest acknowledged suspend request"); - si->guest_responded = 1; LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "wait for the guest to suspend"); watchdog = 60; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Stefano Stabellini
2011-Feb-14 11:43 UTC
Re: [Xen-devel] [PATCH] libxl: fix migrate for HVM guests
On Mon, 14 Feb 2011, Ian Campbell wrote:> # HG changeset patch > # User Ian Campbell <ian.campbell@citrix.com> > # Date 1297679825 0 > # Node ID 7648e0e731992c7d5d8b7e0d1c8615cc37a978e4 > # Parent cfb1ab79e11d24ed3703b490a81d24c570cd2d91 > libxl: fix migrate for HVM guests > > Prior to 22909:6868f7f3ab3f libxl would loop waiting simultaneously > for the domain the acknowledge a PV suspend request (by clearing the > XenStore node) and for the domain to actually suspend. For HVM guests > without PV drivers this same loop was simply waiting for the domain to > suspend. > > In 22909:6868f7f3ab3f the original loop was split into two loops > (first waiting for the acknowledgement and then for the actual > suspend). This caused libxl to incorrectly wait for an HVM guest > without PV drivers to acknowledge the XenStore request, which is not > something it would ever do. > > Fix this by only waiting for an acknowledgement from a guest which > contains PV drivers. > > Previously we were also making the request regardless of whether the > guest had PV drivers, change that to only make the request if the > guest has PV drivers. > > Lastly there is no need to sample HVM_PARAM_ACPI_S_STATE twice and not > doing so simplifies the test for PVHVM vs. normal HVM guests. > > Tested with: > Windows with GPL PV drivers (event channel suspend mode) > Windows without PV drivers (xc_domain_shutdown mode) > Linux PV (PV with XenBus control node mode) > Linux HVM (PVHVM with XenBus control node mode (*)) > Linux HVM (xc_domain_shutdown mode) > > (*) In this case the kernel didn''t actually suspend, due to: > PM: Device input1 failed to suspend: error -22 > xen suspend: dpm_suspend_start -22 > which may be a misconfiguration in my setup or may be a kernel > bug, but the libxl side dealt with this as gracefully as it could. > > Signed-off-by: Ian Campbell <ian.campbell@citrix.com>Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Jackson
2011-Feb-14 17:03 UTC
Re: [Xen-devel] [PATCH] libxl: fix migrate for HVM guests
Ian Campbell writes ("[Xen-devel] [PATCH] libxl: fix migrate for HVM guests"):> libxl: fix migrate for HVM guestsThanks, that looks sane to me. I have applied it. Ian. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel