Ian Campbell
2012-Aug-02 09:38 UTC
[PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy
# HG changeset patch # User Ian Campbell <ian.campbell@citrix.com> # Date 1343900058 -3600 # Node ID f345fbfa8f50975b5c327669ceaf68c1d098da8f # Parent 075da4778b0a1a84680ef0acd26fcd3b01adeee4 libxl: fix cleanup of tap devices in libxl__device_destroy We pass be_path to tapdisk_destroy but we''ve already deleted it so it fails to read tapdisk-params. However it appears that we need to destroy the tap device after tearing down xenstore, to avoid the leak reported by Greg Wettstein in <201207312141.q6VLfJje012656@wind.enjellic.com>. So read the tapdisk-params in the cleanup transaction, before the remove, and pass that down to destroy_tapdisk instead. tapdisk-params may of course be NULL if the device isn''t a tap device. There is no need to tear down the tap device from libxl__initiate_device_remove since this ultimately calls libxl__device_destroy. Propagate and log errors from libxl__device_destroy_tapdisk. Signed-off-by: Ian Campbell <ian.campbell@citrix.com> --- This patch depends on Ian Jacksons "libxl: unify libxl__device_destroy and device_hotplug_done" and my "libxl: const correctness for libxl__xs_path_cleanup" diff -r 075da4778b0a -r f345fbfa8f50 tools/libxl/libxl_blktap2.c --- a/tools/libxl/libxl_blktap2.c Thu Aug 02 10:22:28 2012 +0100 +++ b/tools/libxl/libxl_blktap2.c Thu Aug 02 10:34:18 2012 +0100 @@ -51,28 +51,36 @@ char *libxl__blktap_devpath(libxl__gc *g } -void libxl__device_destroy_tapdisk(libxl__gc *gc, char *be_path) +int libxl__device_destroy_tapdisk(libxl__gc *gc, char *params) { - char *path, *params, *type, *disk; + char *type, *disk; int err; tap_list_t tap; - path = libxl__sprintf(gc, "%s/tapdisk-params", be_path); - if (!path) return; - - params = libxl__xs_read(gc, XBT_NULL, path); - if (!params) return; - type = params; disk = strchr(params, '':''); - if (!disk) return; + if (!disk) { + LOG(ERROR, "Unable to parse params %s", params); + return ERROR_INVAL; + } *disk++ = ''\0''; err = tap_ctl_find(type, disk, &tap); - if (err < 0) return; + if (err < 0) { + /* returns -errno */ + LOGEV(ERROR, -err, "Unable to find type %s disk %s", type, disk); + return ERROR_FAIL; + } - tap_ctl_destroy(tap.id, tap.minor); + err = tap_ctl_destroy(tap.id, tap.minor); + if (err < 0) { + LOGEV(ERROR, -err, "Failed to destroy tap device id %d minor %d", + tap.id, tap.minor); + return ERROR_FAIL; + } + + return 0; } /* diff -r 075da4778b0a -r f345fbfa8f50 tools/libxl/libxl_device.c --- a/tools/libxl/libxl_device.c Thu Aug 02 10:22:28 2012 +0100 +++ b/tools/libxl/libxl_device.c Thu Aug 02 10:34:18 2012 +0100 @@ -522,8 +522,10 @@ DEFINE_DEVICES_ADD(nic) int libxl__device_destroy(libxl__gc *gc, libxl__device *dev) { - char *be_path = libxl__device_backend_path(gc, dev); + const char *be_path = libxl__device_backend_path(gc, dev); const char *fe_path = libxl__device_frontend_path(gc, dev); + const char *tapdisk_path = GCSPRINTF("%s/%s", be_path, "tapdisk-params"); + char *tapdisk_params; xs_transaction_t t = 0; int rc; @@ -531,6 +533,9 @@ int libxl__device_destroy(libxl__gc *gc, rc = libxl__xs_transaction_start(gc, &t); if (rc) goto out; + /* May not exist if this is not a tap device */ + tapdisk_params = libxl__xs_read(gc, t, tapdisk_path); + libxl__xs_path_cleanup(gc, t, fe_path); libxl__xs_path_cleanup(gc, t, be_path); @@ -539,7 +544,8 @@ int libxl__device_destroy(libxl__gc *gc, if (rc < 0) goto out; } - libxl__device_destroy_tapdisk(gc, be_path); + if (tapdisk_params) + rc = libxl__device_destroy_tapdisk(gc, tapdisk_params); out: return rc; @@ -789,8 +795,6 @@ void libxl__initiate_device_remove(libxl if (rc < 0) goto out; } - libxl__device_destroy_tapdisk(gc, be_path); - rc = libxl__ev_devstate_wait(gc, &aodev->backend_ds, device_backend_callback, state_path, XenbusStateClosed, diff -r 075da4778b0a -r f345fbfa8f50 tools/libxl/libxl_internal.h --- a/tools/libxl/libxl_internal.h Thu Aug 02 10:22:28 2012 +0100 +++ b/tools/libxl/libxl_internal.h Thu Aug 02 10:34:18 2012 +0100 @@ -1344,8 +1344,9 @@ _hidden char *libxl__blktap_devpath(libx /* libxl__device_destroy_tapdisk: * Destroys any tapdisk process associated with the backend represented * by be_path. + * Always logs on failure. */ -_hidden void libxl__device_destroy_tapdisk(libxl__gc *gc, char *be_path); +_hidden int libxl__device_destroy_tapdisk(libxl__gc *gc, char *params); _hidden int libxl__device_from_disk(libxl__gc *gc, uint32_t domid, libxl_device_disk *disk, diff -r 075da4778b0a -r f345fbfa8f50 tools/libxl/libxl_noblktap2.c --- a/tools/libxl/libxl_noblktap2.c Thu Aug 02 10:22:28 2012 +0100 +++ b/tools/libxl/libxl_noblktap2.c Thu Aug 02 10:34:18 2012 +0100 @@ -28,8 +28,9 @@ char *libxl__blktap_devpath(libxl__gc *g return NULL; } -void libxl__device_destroy_tapdisk(libxl__gc *gc, char *be_path) +int libxl__device_destroy_tapdisk(libxl__gc *gc, char *params) { + return 0; } /*
Ian Campbell
2012-Aug-02 09:45 UTC
Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy
> rc = libxl__xs_transaction_start(gc, &t); > if (rc) goto out; > > + /* May not exist if this is not a tap device */ > + tapdisk_params = libxl__xs_read(gc, t, tapdisk_path); > + > libxl__xs_path_cleanup(gc, t, fe_path); > libxl__xs_path_cleanup(gc, t, be_path);Do we deliberate ignore the error codes from these two? Ian.
Ian Jackson
2012-Aug-02 14:41 UTC
Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy
Ian Campbell writes ("[PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy"):> libxl: fix cleanup of tap devices in libxl__device_destroy > > We pass be_path to tapdisk_destroy but we''ve already deleted it so it fails to > read tapdisk-params. However it appears that we need to destroy the tap device > after tearing down xenstore, to avoid the leak reported by Greg Wettstein in > <201207312141.q6VLfJje012656@wind.enjellic.com>. > > So read the tapdisk-params in the cleanup transaction, before the remove, and > pass that down to destroy_tapdisk instead. tapdisk-params may of course be NULL > if the device isn''t a tap device. > > There is no need to tear down the tap device from libxl__initiate_device_remove > since this ultimately calls libxl__device_destroy. > > Propagate and log errors from libxl__device_destroy_tapdisk.Can you please wrap your commit messages to 70ish rather than 80 ? Here is a screenshot of my email client: http://www.chiark.greenend.org.uk/~ijackson/volatile/2012/wrap-damage.png The code all looks good. Just one comment:> int libxl__device_destroy(libxl__gc *gc, libxl__device *dev) > {...> @@ -531,6 +533,9 @@ int libxl__device_destroy(libxl__gc *gc, > rc = libxl__xs_transaction_start(gc, &t); > if (rc) goto out; > > + /* May not exist if this is not a tap device */ > + tapdisk_params = libxl__xs_read(gc, t, tapdisk_path);You can still use libxl__xs_read_checked. It considers ENOENT a success (and therefore doesn''t log about it). Ian.
Ian Jackson
2012-Aug-02 14:42 UTC
Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy
Ian Campbell writes ("Re: [Xen-devel] [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy"):> > > rc = libxl__xs_transaction_start(gc, &t); > > if (rc) goto out; > > > > + /* May not exist if this is not a tap device */ > > + tapdisk_params = libxl__xs_read(gc, t, tapdisk_path); > > + > > libxl__xs_path_cleanup(gc, t, fe_path); > > libxl__xs_path_cleanup(gc, t, be_path); > > Do we deliberate ignore the error codes from these two?I don''t think so. In general in this destroy path we should consider whether, on failure, we should abandon the cleanup or note the error and carry on. Ian.
Ian Campbell
2012-Aug-02 14:43 UTC
Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy
On Thu, 2012-08-02 at 15:41 +0100, Ian Jackson wrote:> Ian Campbell writes ("[PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy"): > > libxl: fix cleanup of tap devices in libxl__device_destroy > > > > We pass be_path to tapdisk_destroy but we''ve already deleted it so it fails to > > read tapdisk-params. However it appears that we need to destroy the tap device > > after tearing down xenstore, to avoid the leak reported by Greg Wettstein in > > <201207312141.q6VLfJje012656@wind.enjellic.com>. > > > > So read the tapdisk-params in the cleanup transaction, before the remove, and > > pass that down to destroy_tapdisk instead. tapdisk-params may of course be NULL > > if the device isn''t a tap device. > > > > There is no need to tear down the tap device from libxl__initiate_device_remove > > since this ultimately calls libxl__device_destroy. > > > > Propagate and log errors from libxl__device_destroy_tapdisk. > > Can you please wrap your commit messages to 70ish rather than 80 ? > Here is a screenshot of my email client: > http://www.chiark.greenend.org.uk/~ijackson/volatile/2012/wrap-damage.pngIf someone tells me the rune to put into vimrc such that "gqj" does this then sure.> > The code all looks good. Just one comment: > > > int libxl__device_destroy(libxl__gc *gc, libxl__device *dev) > > { > ... > > @@ -531,6 +533,9 @@ int libxl__device_destroy(libxl__gc *gc, > > rc = libxl__xs_transaction_start(gc, &t); > > if (rc) goto out; > > > > + /* May not exist if this is not a tap device */ > > + tapdisk_params = libxl__xs_read(gc, t, tapdisk_path); > > You can still use libxl__xs_read_checked. It considers ENOENT a > success (and therefore doesn''t log about it).OK.
Ian Campbell
2012-Aug-02 14:45 UTC
Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy
On Thu, 2012-08-02 at 15:42 +0100, Ian Jackson wrote:> Ian Campbell writes ("Re: [Xen-devel] [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy"): > > > > > rc = libxl__xs_transaction_start(gc, &t); > > > if (rc) goto out; > > > > > > + /* May not exist if this is not a tap device */ > > > + tapdisk_params = libxl__xs_read(gc, t, tapdisk_path); > > > + > > > libxl__xs_path_cleanup(gc, t, fe_path); > > > libxl__xs_path_cleanup(gc, t, be_path); > > > > Do we deliberate ignore the error codes from these two? > > I don''t think so. > > In general in this destroy path we should consider whether, on > failure, we should abandon the cleanup or note the error and carry on.Since this is a destroy operation note it and carry on I think, so as to clean up as much as we are able. BTW, is there a libxl__xs_transaction_abort missing in this function too?
Ian Jackson
2012-Aug-02 14:48 UTC
linewrapping commit messages (was Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy)
Ian Campbell writes ("Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy"):> On Thu, 2012-08-02 at 15:41 +0100, Ian Jackson wrote: > > Can you please wrap your commit messages to 70ish rather than 80 ? > > Here is a screenshot of my email client: > > http://www.chiark.greenend.org.uk/~ijackson/volatile/2012/wrap-damage.png > > If someone tells me the rune to put into vimrc such that "gqj" does this > then sure.I asked IRC and people said: :q! emacs and :set wm=10 Take your pick :-). Ian.
Ian Jackson
2012-Aug-02 14:54 UTC
Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy
Ian Campbell writes ("Re: [Xen-devel] [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy"):> On Thu, 2012-08-02 at 15:42 +0100, Ian Jackson wrote: > > In general in this destroy path we should consider whether, on > > failure, we should abandon the cleanup or note the error and carry on. > > Since this is a destroy operation note it and carry on I think, so as to > clean up as much as we are able.Right. Good then I guess it is OK. (We do risk leaking some stuff in xenstore which you''d have to use low-level tools to remove but that''s probably acceptable if things are so bad you can''t remove stuff from xenstore.)> BTW, is there a libxl__xs_transaction_abort missing in this function > too?Yes. I have edited my patch to fix this. (See below; I will repost it with v5 of my series, later today I think.) Ian. From: Ian Jackson <ian.jackson@eu.citrix.com> Subject: [PATCH] libxl: unify libxl__device_destroy and device_hotplug_done device_hotplug_done contains an open-coded but improved version of libxl__device_destroy. So move the contents of device_hotplug_done into libxl__device_destroy, deleting the old code, and replace it at its old location with a function call. Add the missing call to libxl__xs_transaction_abort (which was present in neither version and technically speaking is always a no-op with this code as it stands at the moment because no-one does "goto out" other than after libxl__xs_transaction_start or _commit). Also fix the error handling: the rc from the destroy should be propagated into the aodev. Reported-by: Ian Campbell <Ian.Campbell@citrix.com> Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com> Acked-by: Ian Campbell <ian.campbell@citrix.com> - Changes in v5 of series: * Also add missing xs abort. --- tools/libxl/libxl_device.c | 36 +++++++++++++----------------------- 1 files changed, 13 insertions(+), 23 deletions(-) diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c index da0c3ea..95b169e 100644 --- a/tools/libxl/libxl_device.c +++ b/tools/libxl/libxl_device.c @@ -513,22 +513,24 @@ int libxl__device_destroy(libxl__gc *gc, libxl__device *dev) char *be_path = libxl__device_backend_path(gc, dev); char *fe_path = libxl__device_frontend_path(gc, dev); xs_transaction_t t = 0; - int rc = 0; + int rc; + + for (;;) { + rc = libxl__xs_transaction_start(gc, &t); + if (rc) goto out; - do { - t = xs_transaction_start(CTX->xsh); libxl__xs_path_cleanup(gc, t, fe_path); libxl__xs_path_cleanup(gc, t, be_path); - rc = !xs_transaction_end(CTX->xsh, t, 0); - } while (rc && errno == EAGAIN); - if (rc) { - LOGE(ERROR, "unable to finish transaction"); - goto out; + + rc = libxl__xs_transaction_commit(gc, &t); + if (!rc) break; + if (rc < 0) goto out; } libxl__device_destroy_tapdisk(gc, be_path); out: + libxl__xs_transaction_abort(gc, &t); return rc; } @@ -993,29 +995,17 @@ error: static void device_hotplug_done(libxl__egc *egc, libxl__ao_device *aodev) { STATE_AO_GC(aodev->ao); - char *be_path = libxl__device_backend_path(gc, aodev->dev); - char *fe_path = libxl__device_frontend_path(gc, aodev->dev); - xs_transaction_t t = 0; int rc; device_hotplug_clean(gc, aodev); /* Clean xenstore if it''s a disconnection */ if (aodev->action == DEVICE_DISCONNECT) { - for (;;) { - rc = libxl__xs_transaction_start(gc, &t); - if (rc) goto out; - - libxl__xs_path_cleanup(gc, t, fe_path); - libxl__xs_path_cleanup(gc, t, be_path); - - rc = libxl__xs_transaction_commit(gc, &t); - if (!rc) break; - if (rc < 0) goto out; - } + rc = libxl__device_destroy(gc, aodev->dev); + if (!aodev->rc) + aodev->rc = rc; } -out: aodev->callback(egc, aodev); return; } -- tg: (7fc019d..) t/xen/xl.device-destroy-unify (depends on: t/xen/gitignore)
Ian Campbell
2012-Aug-02 14:55 UTC
Re: [PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy
On Thu, 2012-08-02 at 15:41 +0100, Ian Jackson wrote:> Ian Campbell writes ("[PATCH] libxl: fix cleanup of tap devices in libxl__device_destroy"): > > libxl: fix cleanup of tap devices in libxl__device_destroy > > > > We pass be_path to tapdisk_destroy but we''ve already deleted it so it fails to > > read tapdisk-params. However it appears that we need to destroy the tap device > > after tearing down xenstore, to avoid the leak reported by Greg Wettstein in > > <201207312141.q6VLfJje012656@wind.enjellic.com>. > > > > So read the tapdisk-params in the cleanup transaction, before the remove, and > > pass that down to destroy_tapdisk instead. tapdisk-params may of course be NULL > > if the device isn''t a tap device. > > > > There is no need to tear down the tap device from libxl__initiate_device_remove > > since this ultimately calls libxl__device_destroy. > > > > Propagate and log errors from libxl__device_destroy_tapdisk. > > Can you please wrap your commit messages to 70ish rather than 80 ? > Here is a screenshot of my email client: > http://www.chiark.greenend.org.uk/~ijackson/volatile/2012/wrap-damage.png > > The code all looks good. Just one comment: > > > int libxl__device_destroy(libxl__gc *gc, libxl__device *dev) > > { > ... > > @@ -531,6 +533,9 @@ int libxl__device_destroy(libxl__gc *gc, > > rc = libxl__xs_transaction_start(gc, &t); > > if (rc) goto out; > > > > + /* May not exist if this is not a tap device */ > > + tapdisk_params = libxl__xs_read(gc, t, tapdisk_path); > > You can still use libxl__xs_read_checked. It considers ENOENT a > success (and therefore doesn''t log about it).tapdisk_params cannot be const as read_checked requires because tapdisk_destroy modified the string. I suppose I could add a strdup inside.> > Ian.