# HG changeset patch
# User Ian Campbell <ian.campbell@citrix.com>
# Date 1297679825 0
# Node ID 7648e0e731992c7d5d8b7e0d1c8615cc37a978e4
# Parent cfb1ab79e11d24ed3703b490a81d24c570cd2d91
libxl: fix migrate for HVM guests
Prior to 22909:6868f7f3ab3f libxl would loop waiting simultaneously
for the domain the acknowledge a PV suspend request (by clearing the
XenStore node) and for the domain to actually suspend. For HVM guests
without PV drivers this same loop was simply waiting for the domain to
suspend.
In 22909:6868f7f3ab3f the original loop was split into two loops
(first waiting for the acknowledgement and then for the actual
suspend). This caused libxl to incorrectly wait for an HVM guest
without PV drivers to acknowledge the XenStore request, which is not
something it would ever do.
Fix this by only waiting for an acknowledgement from a guest which
contains PV drivers.
Previously we were also making the request regardless of whether the
guest had PV drivers, change that to only make the request if the
guest has PV drivers.
Lastly there is no need to sample HVM_PARAM_ACPI_S_STATE twice and not
doing so simplifies the test for PVHVM vs. normal HVM guests.
Tested with:
Windows with GPL PV drivers (event channel suspend mode)
Windows without PV drivers (xc_domain_shutdown mode)
Linux PV (PV with XenBus control node mode)
Linux HVM (PVHVM with XenBus control node mode (*))
Linux HVM (xc_domain_shutdown mode)
(*) In this case the kernel didn''t actually suspend, due to:
PM: Device input1 failed to suspend: error -22
xen suspend: dpm_suspend_start -22
which may be a misconfiguration in my setup or may be a kernel
bug, but the libxl side dealt with this as gracefully as it could.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
diff -r cfb1ab79e11d -r 7648e0e73199 tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c Mon Feb 14 09:25:44 2011 +0000
+++ b/tools/libxl/libxl_dom.c Mon Feb 14 10:37:05 2011 +0000
@@ -345,16 +345,21 @@ static int libxl__domain_suspend_common_
static int libxl__domain_suspend_common_callback(void *data)
{
struct suspendinfo *si = data;
- unsigned long s_state = 0;
+ unsigned long hvm_s_state = 0, hvm_pvdrv = 0;
int ret;
char *path, *state = "suspend";
int watchdog;
libxl_ctx *ctx = libxl__gc_owner(si->gc);
xs_transaction_t t;
- if (si->hvm)
- xc_get_hvm_param(ctx->xch, si->domid, HVM_PARAM_ACPI_S_STATE,
&s_state);
- if ((s_state == 0) && (si->suspend_eventchn >= 0)) {
+ if (si->hvm) {
+ xc_get_hvm_param(ctx->xch, si->domid, HVM_PARAM_CALLBACK_IRQ,
&hvm_pvdrv);
+ xc_get_hvm_param(ctx->xch, si->domid, HVM_PARAM_ACPI_S_STATE,
&hvm_s_state);
+ }
+
+ if ((hvm_s_state == 0) && (si->suspend_eventchn >= 0)) {
+ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "issuing %s suspend request via
event channel",
+ si->hvm ? "PVHVM" : "PV");
ret = xc_evtchn_notify(si->xce, si->suspend_eventchn);
if (ret < 0) {
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "xc_evtchn_notify failed
ret=%d", ret);
@@ -368,63 +373,67 @@ static int libxl__domain_suspend_common_
si->guest_responded = 1;
return 1;
}
- path = libxl__sprintf(si->gc, "%s/control/shutdown",
libxl__xs_get_dompath(si->gc, si->domid));
- libxl__xs_write(si->gc, XBT_NULL, path, "suspend");
- if (si->hvm) {
- unsigned long hvm_pvdrv, hvm_s_state;
- xc_get_hvm_param(ctx->xch, si->domid, HVM_PARAM_CALLBACK_IRQ,
&hvm_pvdrv);
- xc_get_hvm_param(ctx->xch, si->domid, HVM_PARAM_ACPI_S_STATE,
&hvm_s_state);
- if (!hvm_pvdrv || hvm_s_state) {
- LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Calling xc_domain_shutdown
on the domain");
- xc_domain_shutdown(ctx->xch, si->domid, SHUTDOWN_suspend);
+
+ if (si->hvm && (!hvm_pvdrv || hvm_s_state)) {
+ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Calling xc_domain_shutdown on
HVM domain");
+ xc_domain_shutdown(ctx->xch, si->domid, SHUTDOWN_suspend);
+ /* The guest does not (need to) respond to this sort of request. */
+ si->guest_responded = 1;
+ } else {
+ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "issuing %s suspend request via
XenBus control node",
+ si->hvm ? "PVHVM" : "PV");
+
+ path = libxl__sprintf(si->gc, "%s/control/shutdown",
libxl__xs_get_dompath(si->gc, si->domid));
+ libxl__xs_write(si->gc, XBT_NULL, path, "suspend");
+
+ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "wait for the guest to
acknowledge suspend request");
+ watchdog = 60;
+ while (!strcmp(state, "suspend") && watchdog > 0)
{
+ usleep(100000);
+
+ state = libxl__xs_read(si->gc, XBT_NULL, path);
+
+ watchdog--;
}
+
+ /*
+ * Guest appears to not be responding. Cancel the suspend
+ * request.
+ *
+ * We re-read the suspend node and clear it within a
+ * transaction in order to handle the case where we race
+ * against the guest catching up and acknowledging the request
+ * at the last minute.
+ */
+ if (!strcmp(state, "suspend")) {
+ LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest didn''t
acknowledge suspend, cancelling request");
+ retry_transaction:
+ t = xs_transaction_start(ctx->xsh);
+
+ state = libxl__xs_read(si->gc, t, path);
+
+ if (!strcmp(state, "suspend"))
+ libxl__xs_write(si->gc, t, path, "");
+
+ if (!xs_transaction_end(ctx->xsh, t, 0))
+ if (errno == EAGAIN)
+ goto retry_transaction;
+
+ }
+
+ /*
+ * Final check for guest acknowledgement. The guest may have
+ * acknowledged while we were cancelling the request in which
+ * case we lost the race while cancelling and should continue.
+ */
+ if (!strcmp(state, "suspend")) {
+ LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest didn''t
acknowledge suspend, request cancelled");
+ return 0;
+ }
+
+ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "guest acknowledged suspend
request");
+ si->guest_responded = 1;
}
-
- LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "wait for the guest to acknowledge
suspend request");
- watchdog = 60;
- while (!strcmp(state, "suspend") && watchdog > 0) {
- usleep(100000);
-
- state = libxl__xs_read(si->gc, XBT_NULL, path);
-
- watchdog--;
- }
-
- /*
- * Guest appears to not be responding. Cancel the suspend request.
- *
- * We re-read the suspend node and clear it within a transaction
- * in order to handle the case where we race against the guest
- * catching up and acknowledging the request at the last minute.
- */
- if (!strcmp(state, "suspend")) {
- LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest didn''t
acknowledge suspend, cancelling request");
- retry_transaction:
- t = xs_transaction_start(ctx->xsh);
-
- state = libxl__xs_read(si->gc, t, path);
-
- if (!strcmp(state, "suspend"))
- libxl__xs_write(si->gc, t, path, "");
-
- if (!xs_transaction_end(ctx->xsh, t, 0))
- if (errno == EAGAIN)
- goto retry_transaction;
-
- }
-
- /*
- * Final check for guest acknowledgement. The guest may have
- * acknowledged while we were cancelling the request in which case
- * we lost the race while cancelling and should continue.
- */
- if (!strcmp(state, "suspend")) {
- LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest didn''t
acknowledge suspend, request cancelled");
- return 0;
- }
-
- LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "guest acknowledged suspend
request");
- si->guest_responded = 1;
LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "wait for the guest to
suspend");
watchdog = 60;
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Stefano Stabellini
2011-Feb-14 11:43 UTC
Re: [Xen-devel] [PATCH] libxl: fix migrate for HVM guests
On Mon, 14 Feb 2011, Ian Campbell wrote:> # HG changeset patch > # User Ian Campbell <ian.campbell@citrix.com> > # Date 1297679825 0 > # Node ID 7648e0e731992c7d5d8b7e0d1c8615cc37a978e4 > # Parent cfb1ab79e11d24ed3703b490a81d24c570cd2d91 > libxl: fix migrate for HVM guests > > Prior to 22909:6868f7f3ab3f libxl would loop waiting simultaneously > for the domain the acknowledge a PV suspend request (by clearing the > XenStore node) and for the domain to actually suspend. For HVM guests > without PV drivers this same loop was simply waiting for the domain to > suspend. > > In 22909:6868f7f3ab3f the original loop was split into two loops > (first waiting for the acknowledgement and then for the actual > suspend). This caused libxl to incorrectly wait for an HVM guest > without PV drivers to acknowledge the XenStore request, which is not > something it would ever do. > > Fix this by only waiting for an acknowledgement from a guest which > contains PV drivers. > > Previously we were also making the request regardless of whether the > guest had PV drivers, change that to only make the request if the > guest has PV drivers. > > Lastly there is no need to sample HVM_PARAM_ACPI_S_STATE twice and not > doing so simplifies the test for PVHVM vs. normal HVM guests. > > Tested with: > Windows with GPL PV drivers (event channel suspend mode) > Windows without PV drivers (xc_domain_shutdown mode) > Linux PV (PV with XenBus control node mode) > Linux HVM (PVHVM with XenBus control node mode (*)) > Linux HVM (xc_domain_shutdown mode) > > (*) In this case the kernel didn''t actually suspend, due to: > PM: Device input1 failed to suspend: error -22 > xen suspend: dpm_suspend_start -22 > which may be a misconfiguration in my setup or may be a kernel > bug, but the libxl side dealt with this as gracefully as it could. > > Signed-off-by: Ian Campbell <ian.campbell@citrix.com>Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Jackson
2011-Feb-14 17:03 UTC
Re: [Xen-devel] [PATCH] libxl: fix migrate for HVM guests
Ian Campbell writes ("[Xen-devel] [PATCH] libxl: fix migrate for HVM
guests"):> libxl: fix migrate for HVM guests
Thanks, that looks sane to me. I have applied it.
Ian.
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel