Ian Campbell
2012-Aug-02 09:38 UTC
[PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy
# HG changeset patch
# User Ian Campbell <ian.campbell@citrix.com>
# Date 1343900058 -3600
# Node ID f345fbfa8f50975b5c327669ceaf68c1d098da8f
# Parent 075da4778b0a1a84680ef0acd26fcd3b01adeee4
libxl: fix cleanup of tap devices in libxl__device_destroy
We pass be_path to tapdisk_destroy but we''ve already deleted it so it
fails to
read tapdisk-params. However it appears that we need to destroy the tap device
after tearing down xenstore, to avoid the leak reported by Greg Wettstein in
<201207312141.q6VLfJje012656@wind.enjellic.com>.
So read the tapdisk-params in the cleanup transaction, before the remove, and
pass that down to destroy_tapdisk instead. tapdisk-params may of course be NULL
if the device isn''t a tap device.
There is no need to tear down the tap device from libxl__initiate_device_remove
since this ultimately calls libxl__device_destroy.
Propagate and log errors from libxl__device_destroy_tapdisk.
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
This patch depends on Ian Jacksons "libxl: unify libxl__device_destroy and
device_hotplug_done" and my "libxl: const correctness for
libxl__xs_path_cleanup"
diff -r 075da4778b0a -r f345fbfa8f50 tools/libxl/libxl_blktap2.c
--- a/tools/libxl/libxl_blktap2.c Thu Aug 02 10:22:28 2012 +0100
+++ b/tools/libxl/libxl_blktap2.c Thu Aug 02 10:34:18 2012 +0100
@@ -51,28 +51,36 @@ char *libxl__blktap_devpath(libxl__gc *g
}
-void libxl__device_destroy_tapdisk(libxl__gc *gc, char *be_path)
+int libxl__device_destroy_tapdisk(libxl__gc *gc, char *params)
{
- char *path, *params, *type, *disk;
+ char *type, *disk;
int err;
tap_list_t tap;
- path = libxl__sprintf(gc, "%s/tapdisk-params", be_path);
- if (!path) return;
-
- params = libxl__xs_read(gc, XBT_NULL, path);
- if (!params) return;
-
type = params;
disk = strchr(params, '':'');
- if (!disk) return;
+ if (!disk) {
+ LOG(ERROR, "Unable to parse params %s", params);
+ return ERROR_INVAL;
+ }
*disk++ = ''\0'';
err = tap_ctl_find(type, disk, &tap);
- if (err < 0) return;
+ if (err < 0) {
+ /* returns -errno */
+ LOGEV(ERROR, -err, "Unable to find type %s disk %s", type,
disk);
+ return ERROR_FAIL;
+ }
- tap_ctl_destroy(tap.id, tap.minor);
+ err = tap_ctl_destroy(tap.id, tap.minor);
+ if (err < 0) {
+ LOGEV(ERROR, -err, "Failed to destroy tap device id %d minor
%d",
+ tap.id, tap.minor);
+ return ERROR_FAIL;
+ }
+
+ return 0;
}
/*
diff -r 075da4778b0a -r f345fbfa8f50 tools/libxl/libxl_device.c
--- a/tools/libxl/libxl_device.c Thu Aug 02 10:22:28 2012 +0100
+++ b/tools/libxl/libxl_device.c Thu Aug 02 10:34:18 2012 +0100
@@ -522,8 +522,10 @@ DEFINE_DEVICES_ADD(nic)
int libxl__device_destroy(libxl__gc *gc, libxl__device *dev)
{
- char *be_path = libxl__device_backend_path(gc, dev);
+ const char *be_path = libxl__device_backend_path(gc, dev);
const char *fe_path = libxl__device_frontend_path(gc, dev);
+ const char *tapdisk_path = GCSPRINTF("%s/%s", be_path,
"tapdisk-params");
+ char *tapdisk_params;
xs_transaction_t t = 0;
int rc;
@@ -531,6 +533,9 @@ int libxl__device_destroy(libxl__gc *gc,
rc = libxl__xs_transaction_start(gc, &t);
if (rc) goto out;
+ /* May not exist if this is not a tap device */
+ tapdisk_params = libxl__xs_read(gc, t, tapdisk_path);
+
libxl__xs_path_cleanup(gc, t, fe_path);
libxl__xs_path_cleanup(gc, t, be_path);
@@ -539,7 +544,8 @@ int libxl__device_destroy(libxl__gc *gc,
if (rc < 0) goto out;
}
- libxl__device_destroy_tapdisk(gc, be_path);
+ if (tapdisk_params)
+ rc = libxl__device_destroy_tapdisk(gc, tapdisk_params);
out:
return rc;
@@ -789,8 +795,6 @@ void libxl__initiate_device_remove(libxl
if (rc < 0) goto out;
}
- libxl__device_destroy_tapdisk(gc, be_path);
-
rc = libxl__ev_devstate_wait(gc, &aodev->backend_ds,
device_backend_callback,
state_path, XenbusStateClosed,
diff -r 075da4778b0a -r f345fbfa8f50 tools/libxl/libxl_internal.h
--- a/tools/libxl/libxl_internal.h Thu Aug 02 10:22:28 2012 +0100
+++ b/tools/libxl/libxl_internal.h Thu Aug 02 10:34:18 2012 +0100
@@ -1344,8 +1344,9 @@ _hidden char *libxl__blktap_devpath(libx
/* libxl__device_destroy_tapdisk:
* Destroys any tapdisk process associated with the backend represented
* by be_path.
+ * Always logs on failure.
*/
-_hidden void libxl__device_destroy_tapdisk(libxl__gc *gc, char *be_path);
+_hidden int libxl__device_destroy_tapdisk(libxl__gc *gc, char *params);
_hidden int libxl__device_from_disk(libxl__gc *gc, uint32_t domid,
libxl_device_disk *disk,
diff -r 075da4778b0a -r f345fbfa8f50 tools/libxl/libxl_noblktap2.c
--- a/tools/libxl/libxl_noblktap2.c Thu Aug 02 10:22:28 2012 +0100
+++ b/tools/libxl/libxl_noblktap2.c Thu Aug 02 10:34:18 2012 +0100
@@ -28,8 +28,9 @@ char *libxl__blktap_devpath(libxl__gc *g
return NULL;
}
-void libxl__device_destroy_tapdisk(libxl__gc *gc, char *be_path)
+int libxl__device_destroy_tapdisk(libxl__gc *gc, char *params)
{
+ return 0;
}
/*
Ian Campbell
2012-Aug-02 09:45 UTC
Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy
> rc = libxl__xs_transaction_start(gc, &t); > if (rc) goto out; > > + /* May not exist if this is not a tap device */ > + tapdisk_params = libxl__xs_read(gc, t, tapdisk_path); > + > libxl__xs_path_cleanup(gc, t, fe_path); > libxl__xs_path_cleanup(gc, t, be_path);Do we deliberate ignore the error codes from these two? Ian.
Ian Jackson
2012-Aug-02 14:41 UTC
Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy
Ian Campbell writes ("[PATCH] libxl: fix cleanup of tap devices in
libxl__device_destroy"):> libxl: fix cleanup of tap devices in libxl__device_destroy
>
> We pass be_path to tapdisk_destroy but we''ve already deleted it so
it fails to
> read tapdisk-params. However it appears that we need to destroy the tap
device
> after tearing down xenstore, to avoid the leak reported by Greg Wettstein
in
> <201207312141.q6VLfJje012656@wind.enjellic.com>.
>
> So read the tapdisk-params in the cleanup transaction, before the remove,
and
> pass that down to destroy_tapdisk instead. tapdisk-params may of course be
NULL
> if the device isn''t a tap device.
>
> There is no need to tear down the tap device from
libxl__initiate_device_remove
> since this ultimately calls libxl__device_destroy.
>
> Propagate and log errors from libxl__device_destroy_tapdisk.
Can you please wrap your commit messages to 70ish rather than 80 ?
Here is a screenshot of my email client:
http://www.chiark.greenend.org.uk/~ijackson/volatile/2012/wrap-damage.png
The code all looks good. Just one comment:
> int libxl__device_destroy(libxl__gc *gc, libxl__device *dev)
> {
...> @@ -531,6 +533,9 @@ int libxl__device_destroy(libxl__gc *gc,
> rc = libxl__xs_transaction_start(gc, &t);
> if (rc) goto out;
>
> + /* May not exist if this is not a tap device */
> + tapdisk_params = libxl__xs_read(gc, t, tapdisk_path);
You can still use libxl__xs_read_checked. It considers ENOENT a
success (and therefore doesn''t log about it).
Ian.
Ian Jackson
2012-Aug-02 14:42 UTC
Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy
Ian Campbell writes ("Re: [Xen-devel] [PATCH] libxl: fix cleanup of tap
devices in libxl__device_destroy"):>
> > rc = libxl__xs_transaction_start(gc, &t);
> > if (rc) goto out;
> >
> > + /* May not exist if this is not a tap device */
> > + tapdisk_params = libxl__xs_read(gc, t, tapdisk_path);
> > +
> > libxl__xs_path_cleanup(gc, t, fe_path);
> > libxl__xs_path_cleanup(gc, t, be_path);
>
> Do we deliberate ignore the error codes from these two?
I don''t think so.
In general in this destroy path we should consider whether, on
failure, we should abandon the cleanup or note the error and carry on.
Ian.
Ian Campbell
2012-Aug-02 14:43 UTC
Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy
On Thu, 2012-08-02 at 15:41 +0100, Ian Jackson wrote:> Ian Campbell writes ("[PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy"): > > libxl: fix cleanup of tap devices in libxl__device_destroy > > > > We pass be_path to tapdisk_destroy but we''ve already deleted it so it fails to > > read tapdisk-params. However it appears that we need to destroy the tap device > > after tearing down xenstore, to avoid the leak reported by Greg Wettstein in > > <201207312141.q6VLfJje012656@wind.enjellic.com>. > > > > So read the tapdisk-params in the cleanup transaction, before the remove, and > > pass that down to destroy_tapdisk instead. tapdisk-params may of course be NULL > > if the device isn''t a tap device. > > > > There is no need to tear down the tap device from libxl__initiate_device_remove > > since this ultimately calls libxl__device_destroy. > > > > Propagate and log errors from libxl__device_destroy_tapdisk. > > Can you please wrap your commit messages to 70ish rather than 80 ? > Here is a screenshot of my email client: > http://www.chiark.greenend.org.uk/~ijackson/volatile/2012/wrap-damage.pngIf someone tells me the rune to put into vimrc such that "gqj" does this then sure.> > The code all looks good. Just one comment: > > > int libxl__device_destroy(libxl__gc *gc, libxl__device *dev) > > { > ... > > @@ -531,6 +533,9 @@ int libxl__device_destroy(libxl__gc *gc, > > rc = libxl__xs_transaction_start(gc, &t); > > if (rc) goto out; > > > > + /* May not exist if this is not a tap device */ > > + tapdisk_params = libxl__xs_read(gc, t, tapdisk_path); > > You can still use libxl__xs_read_checked. It considers ENOENT a > success (and therefore doesn''t log about it).OK.
Ian Campbell
2012-Aug-02 14:45 UTC
Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy
On Thu, 2012-08-02 at 15:42 +0100, Ian Jackson wrote:> Ian Campbell writes ("Re: [Xen-devel] [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy"): > > > > > rc = libxl__xs_transaction_start(gc, &t); > > > if (rc) goto out; > > > > > > + /* May not exist if this is not a tap device */ > > > + tapdisk_params = libxl__xs_read(gc, t, tapdisk_path); > > > + > > > libxl__xs_path_cleanup(gc, t, fe_path); > > > libxl__xs_path_cleanup(gc, t, be_path); > > > > Do we deliberate ignore the error codes from these two? > > I don''t think so. > > In general in this destroy path we should consider whether, on > failure, we should abandon the cleanup or note the error and carry on.Since this is a destroy operation note it and carry on I think, so as to clean up as much as we are able. BTW, is there a libxl__xs_transaction_abort missing in this function too?
Ian Jackson
2012-Aug-02 14:48 UTC
linewrapping commit messages (was Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy)
Ian Campbell writes ("Re: [PATCH] libxl: fix cleanup of tap devices in
libxl__device_destroy"):> On Thu, 2012-08-02 at 15:41 +0100, Ian Jackson wrote:
> > Can you please wrap your commit messages to 70ish rather than 80 ?
> > Here is a screenshot of my email client:
> >
http://www.chiark.greenend.org.uk/~ijackson/volatile/2012/wrap-damage.png
>
> If someone tells me the rune to put into vimrc such that "gqj"
does this
> then sure.
I asked IRC and people said:
:q! emacs
and
:set wm=10
Take your pick :-).
Ian.
Ian Jackson
2012-Aug-02 14:54 UTC
Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy
Ian Campbell writes ("Re: [Xen-devel] [PATCH] libxl: fix cleanup of tap
devices in libxl__device_destroy"):> On Thu, 2012-08-02 at 15:42 +0100, Ian Jackson wrote:
> > In general in this destroy path we should consider whether, on
> > failure, we should abandon the cleanup or note the error and carry on.
>
> Since this is a destroy operation note it and carry on I think, so as to
> clean up as much as we are able.
Right. Good then I guess it is OK. (We do risk leaking some stuff in
xenstore which you''d have to use low-level tools to remove but
that''s
probably acceptable if things are so bad you can''t remove stuff from
xenstore.)
> BTW, is there a libxl__xs_transaction_abort missing in this function
> too?
Yes. I have edited my patch to fix this. (See below; I will repost
it with v5 of my series, later today I think.)
Ian.
From: Ian Jackson <ian.jackson@eu.citrix.com>
Subject: [PATCH] libxl: unify libxl__device_destroy and device_hotplug_done
device_hotplug_done contains an open-coded but improved version of
libxl__device_destroy. So move the contents of device_hotplug_done
into libxl__device_destroy, deleting the old code, and replace it at
its old location with a function call.
Add the missing call to libxl__xs_transaction_abort (which was present
in neither version and technically speaking is always a no-op with
this code as it stands at the moment because no-one does "goto out"
other than after libxl__xs_transaction_start or _commit).
Also fix the error handling: the rc from the destroy should be
propagated into the aodev.
Reported-by: Ian Campbell <Ian.Campbell@citrix.com>
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
-
Changes in v5 of series:
* Also add missing xs abort.
---
tools/libxl/libxl_device.c | 36 +++++++++++++-----------------------
1 files changed, 13 insertions(+), 23 deletions(-)
diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c
index da0c3ea..95b169e 100644
--- a/tools/libxl/libxl_device.c
+++ b/tools/libxl/libxl_device.c
@@ -513,22 +513,24 @@ int libxl__device_destroy(libxl__gc *gc, libxl__device
*dev)
char *be_path = libxl__device_backend_path(gc, dev);
char *fe_path = libxl__device_frontend_path(gc, dev);
xs_transaction_t t = 0;
- int rc = 0;
+ int rc;
+
+ for (;;) {
+ rc = libxl__xs_transaction_start(gc, &t);
+ if (rc) goto out;
- do {
- t = xs_transaction_start(CTX->xsh);
libxl__xs_path_cleanup(gc, t, fe_path);
libxl__xs_path_cleanup(gc, t, be_path);
- rc = !xs_transaction_end(CTX->xsh, t, 0);
- } while (rc && errno == EAGAIN);
- if (rc) {
- LOGE(ERROR, "unable to finish transaction");
- goto out;
+
+ rc = libxl__xs_transaction_commit(gc, &t);
+ if (!rc) break;
+ if (rc < 0) goto out;
}
libxl__device_destroy_tapdisk(gc, be_path);
out:
+ libxl__xs_transaction_abort(gc, &t);
return rc;
}
@@ -993,29 +995,17 @@ error:
static void device_hotplug_done(libxl__egc *egc, libxl__ao_device *aodev)
{
STATE_AO_GC(aodev->ao);
- char *be_path = libxl__device_backend_path(gc, aodev->dev);
- char *fe_path = libxl__device_frontend_path(gc, aodev->dev);
- xs_transaction_t t = 0;
int rc;
device_hotplug_clean(gc, aodev);
/* Clean xenstore if it''s a disconnection */
if (aodev->action == DEVICE_DISCONNECT) {
- for (;;) {
- rc = libxl__xs_transaction_start(gc, &t);
- if (rc) goto out;
-
- libxl__xs_path_cleanup(gc, t, fe_path);
- libxl__xs_path_cleanup(gc, t, be_path);
-
- rc = libxl__xs_transaction_commit(gc, &t);
- if (!rc) break;
- if (rc < 0) goto out;
- }
+ rc = libxl__device_destroy(gc, aodev->dev);
+ if (!aodev->rc)
+ aodev->rc = rc;
}
-out:
aodev->callback(egc, aodev);
return;
}
--
tg: (7fc019d..) t/xen/xl.device-destroy-unify (depends on: t/xen/gitignore)
Ian Campbell
2012-Aug-02 14:55 UTC
Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy
On Thu, 2012-08-02 at 15:41 +0100, Ian Jackson wrote:> Ian Campbell writes ("[PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy"): > > libxl: fix cleanup of tap devices in libxl__device_destroy > > > > We pass be_path to tapdisk_destroy but we''ve already deleted it so it fails to > > read tapdisk-params. However it appears that we need to destroy the tap device > > after tearing down xenstore, to avoid the leak reported by Greg Wettstein in > > <201207312141.q6VLfJje012656@wind.enjellic.com>. > > > > So read the tapdisk-params in the cleanup transaction, before the remove, and > > pass that down to destroy_tapdisk instead. tapdisk-params may of course be NULL > > if the device isn''t a tap device. > > > > There is no need to tear down the tap device from libxl__initiate_device_remove > > since this ultimately calls libxl__device_destroy. > > > > Propagate and log errors from libxl__device_destroy_tapdisk. > > Can you please wrap your commit messages to 70ish rather than 80 ? > Here is a screenshot of my email client: > http://www.chiark.greenend.org.uk/~ijackson/volatile/2012/wrap-damage.png > > The code all looks good. Just one comment: > > > int libxl__device_destroy(libxl__gc *gc, libxl__device *dev) > > { > ... > > @@ -531,6 +533,9 @@ int libxl__device_destroy(libxl__gc *gc, > > rc = libxl__xs_transaction_start(gc, &t); > > if (rc) goto out; > > > > + /* May not exist if this is not a tap device */ > > + tapdisk_params = libxl__xs_read(gc, t, tapdisk_path); > > You can still use libxl__xs_read_checked. It considers ENOENT a > success (and therefore doesn''t log about it).tapdisk_params cannot be const as read_checked requires because tapdisk_destroy modified the string. I suppose I could add a strdup inside.> > Ian.