Roger Pau Monne
2012-Jul-26 19:12 UTC
[PATCH 0/2] libxl: fixes for device addition/removal
Fixes for two regresions introduced by the hotplug series. [PATCH 1/2] libxl: fix race in libxl__devices_destroy [PATCH 2/2] libxl: allow to create a domain without disks
Roger Pau Monne
2012-Jul-26 19:12 UTC
[PATCH 1/2] libxl: fix race in libxl__devices_destroy
Don''t have a fixed number of devices in the aodevs array, and instead size it depending on the devices present in xenstore. Also change the console destroy path to a switch instead of an if. Signed-off-by: Roger Pau Monne <roger.pau@citrix.com> Cc: Ian Jackson <ian.jackson@eu.citrix.com> Cc: Ian Campbell <ian.campbell@eu.citrix.com> --- tools/libxl/libxl_device.c | 123 ++++++++++++++++++------------------------ tools/libxl/libxl_internal.h | 10 +++- 2 files changed, 62 insertions(+), 71 deletions(-) diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c index da0c3ea..4667261 100644 --- a/tools/libxl/libxl_device.c +++ b/tools/libxl/libxl_device.c @@ -58,50 +58,6 @@ int libxl__parse_backend_path(libxl__gc *gc, return libxl__device_kind_from_string(strkind, &dev->backend_kind); } -static int libxl__num_devices(libxl__gc *gc, uint32_t domid) -{ - char *path; - unsigned int num_kinds, num_devs; - char **kinds = NULL, **devs = NULL; - int i, j, rc = 0; - libxl__device dev; - libxl__device_kind kind; - int numdevs = 0; - - path = GCSPRINTF("/local/domain/%d/device", domid); - kinds = libxl__xs_directory(gc, XBT_NULL, path, &num_kinds); - if (!kinds) { - if (errno != ENOENT) { - LOGE(ERROR, "unable to get xenstore device listing %s", path); - rc = ERROR_FAIL; - goto out; - } - num_kinds = 0; - } - for (i = 0; i < num_kinds; i++) { - if (libxl__device_kind_from_string(kinds[i], &kind)) - continue; - if (kind == LIBXL__DEVICE_KIND_CONSOLE) - continue; - - path = GCSPRINTF("/local/domain/%d/device/%s", domid, kinds[i]); - devs = libxl__xs_directory(gc, XBT_NULL, path, &num_devs); - if (!devs) - continue; - for (j = 0; j < num_devs; j++) { - path = GCSPRINTF("/local/domain/%d/device/%s/%s/backend", - domid, kinds[i], devs[j]); - path = libxl__xs_read(gc, XBT_NULL, path); - if (path && libxl__parse_backend_path(gc, path, &dev) == 0) { - numdevs++; - } - } - } -out: - if (rc) return rc; - return numdevs; -} - int libxl__nic_type(libxl__gc *gc, libxl__device *dev, libxl_nic_type *nictype) { char *snictype, *be_path; @@ -451,9 +407,15 @@ void libxl__prepare_ao_devices(libxl__ao *ao, libxl__ao_devices *aodevs) GCNEW_ARRAY(aodevs->array, aodevs->size); for (int i = 0; i < aodevs->size; i++) { - aodevs->array[i].aodevs = aodevs; - libxl__prepare_ao_device(ao, &aodevs->array[i]); + GCNEW(aodevs->array[i]); + aodevs->array[i]->aodevs = aodevs; + libxl__prepare_ao_device(ao, aodevs->array[i]); } + /* + * Since we have already added all the aodevs, and marked them + * as active, we can mark the addition operation as finished. + */ + aodevs->in_addition = 0; } void libxl__ao_devices_callback(libxl__egc *egc, libxl__ao_device *aodev) @@ -463,12 +425,15 @@ void libxl__ao_devices_callback(libxl__egc *egc, libxl__ao_device *aodev) int i, error = 0; aodev->active = 0; + if (aodevs->in_addition) + return; + for (i = 0; i < aodevs->size; i++) { - if (aodevs->array[i].active) + if (aodevs->array[i]->active) return; - if (aodevs->array[i].rc) - error = aodevs->array[i].rc; + if (aodevs->array[i]->rc) + error = aodevs->array[i]->rc; } aodevs->callback(egc, aodevs, error); @@ -495,9 +460,9 @@ void libxl__ao_devices_callback(libxl__egc *egc, libxl__ao_device *aodev) int i; \ int end = start + d_config->num_##type##s; \ for (i = start; i < end; i++) { \ - aodevs->array[i].callback = libxl__ao_devices_callback; \ + aodevs->array[i]->callback = libxl__ao_devices_callback; \ libxl__device_##type##_add(egc, domid, &d_config->type##s[i-start],\ - &aodevs->array[i]); \ + aodevs->array[i]); \ } \ } @@ -545,20 +510,20 @@ void libxl__devices_destroy(libxl__egc *egc, libxl__devices_remove_state *drs) char *path; unsigned int num_kinds, num_dev_xsentries; char **kinds = NULL, **devs = NULL; - int i, j, numdev = 0, rc = 0; + int i, j, rc = 0; libxl__device *dev; libxl__ao_devices *aodevs = &drs->aodevs; libxl__ao_device *aodev; libxl__device_kind kind; - aodevs->size = libxl__num_devices(gc, drs->domid); - if (aodevs->size < 0) { - LOG(ERROR, "unable to get number of devices for domain %u", drs->domid); - rc = aodevs->size; - goto out; - } - - libxl__prepare_ao_devices(drs->ao, aodevs); + /* + * Initialize values of the aodevs structure manually, + * since we don''t know the number of devices we are going + * to unplug yet. + */ + aodevs->in_addition = 1; + aodevs->size = 0; + aodevs->array = NULL; aodevs->callback = devices_remove_callback; path = libxl__sprintf(gc, "/local/domain/%d/device", domid); @@ -588,21 +553,28 @@ void libxl__devices_destroy(libxl__egc *egc, libxl__devices_remove_state *drs) dev->domid = domid; dev->kind = kind; dev->devid = atoi(devs[j]); - if (dev->backend_kind == LIBXL__DEVICE_KIND_CONSOLE) { + switch (dev->backend_kind) { + case LIBXL__DEVICE_KIND_CONSOLE: /* Currently console devices can be destroyed * synchronously by just removing xenstore entries, * this is what libxl__device_destroy does. */ libxl__device_destroy(gc, dev); - continue; + break; + default: + GCREALLOC_ARRAY(aodevs->array, aodevs->size + 1); + GCNEW(aodev); + aodevs->array[aodevs->size] = aodev; + libxl__prepare_ao_device(ao, aodev); + aodev->aodevs = aodevs; + aodev->action = DEVICE_DISCONNECT; + aodev->dev = dev; + aodev->callback = libxl__ao_devices_callback; + aodev->force = drs->force; + libxl__initiate_device_remove(egc, aodev); + aodevs->size++; + break; } - aodev = &aodevs->array[numdev]; - aodev->action = DEVICE_DISCONNECT; - aodev->dev = dev; - aodev->callback = libxl__ao_devices_callback; - aodev->force = drs->force; - libxl__initiate_device_remove(egc, aodev); - numdev++; } } } @@ -624,7 +596,18 @@ void libxl__devices_destroy(libxl__egc *egc, libxl__devices_remove_state *drs) } out: - if (!numdev) drs->callback(egc, drs, rc); + if (!aodevs->size) drs->callback(egc, drs, rc); + else { + /* + * Create a dummy aodev to check if all the other aodevs are finished, + * and call the callback. + */ + GCNEW(aodev); + libxl__prepare_ao_device(ao, aodev); + aodev->aodevs = aodevs; + aodevs->in_addition = 0; + libxl__ao_devices_callback(egc, aodev); + } return; } diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 3e91529..3ee3a09 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -1808,6 +1808,8 @@ _hidden void libxl__prepare_ao_device(libxl__ao *ao, libxl__ao_device *aodev); /* Prepare a bunch of devices for addition/removal. Every ao_device in * ao_devices is set to ''active'', and the ao_device ''base'' field is set to * the one pointed by aodevs. + * Once this function is called, the aodevs->size value cannot be changed, + * and no further aodevs can be added to the array. */ _hidden void libxl__prepare_ao_devices(libxl__ao *ao, libxl__ao_devices *aodevs); @@ -1849,9 +1851,15 @@ struct libxl__ao_device { */ typedef void libxl__devices_callback(libxl__egc*, libxl__ao_devices*, int rc); struct libxl__ao_devices { - libxl__ao_device *array; + libxl__ao_device **array; int size; libxl__devices_callback *callback; + /* + * if in_addition == 1 means we are currently adding devices + * to the array, if in_addition == 0, means we have added all + * the devices. + */ + int in_addition; }; /* -- 1.7.7.5 (Apple Git-26)
Roger Pau Monne
2012-Jul-26 19:12 UTC
[PATCH 2/2] libxl: allow to create a domain without disks
Without this patch, the process gets stuck waiting for someone to call the callback, since no disk devices are added. Signed-off-by: Roger Pau Monne <roger.pau@citrix.com> Cc: Ian Jackson <ian.jackson@eu.citrix.com> Cc: Ian Campbell <ian.campbell@eu.citrix.com> --- tools/libxl/libxl_create.c | 12 ++++++++---- tools/libxl/libxl_dm.c | 13 +++++++++---- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c index 5859c09..3f129bf 100644 --- a/tools/libxl/libxl_create.c +++ b/tools/libxl/libxl_create.c @@ -907,11 +907,15 @@ static void domcreate_rebuild_done(libxl__egc *egc, store_libxl_entry(gc, domid, &d_config->b_info); - dcs->aodevs.size = d_config->num_disks; - dcs->aodevs.callback = domcreate_launch_dm; - libxl__prepare_ao_devices(ao, &dcs->aodevs); - libxl__add_disks(egc, ao, domid, 0, d_config, &dcs->aodevs); + if (d_config->num_disks > 0) { + dcs->aodevs.size = d_config->num_disks; + dcs->aodevs.callback = domcreate_launch_dm; + libxl__prepare_ao_devices(ao, &dcs->aodevs); + libxl__add_disks(egc, ao, domid, 0, d_config, &dcs->aodevs); + return; + } + domcreate_launch_dm(egc, &dcs->aodevs, 0); return; error_out: diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c index f2e9572..667aec7 100644 --- a/tools/libxl/libxl_dm.c +++ b/tools/libxl/libxl_dm.c @@ -856,11 +856,16 @@ retry_transaction: if (errno == EAGAIN) goto retry_transaction; - sdss->aodevs.size = dm_config->num_disks; - sdss->aodevs.callback = spawn_stub_launch_dm; - libxl__prepare_ao_devices(ao, &sdss->aodevs); - libxl__add_disks(egc, ao, dm_domid, 0, dm_config, &sdss->aodevs); + if (dm_config->num_disks > 0) { + sdss->aodevs.size = dm_config->num_disks; + sdss->aodevs.callback = spawn_stub_launch_dm; + libxl__prepare_ao_devices(ao, &sdss->aodevs); + libxl__add_disks(egc, ao, dm_domid, 0, dm_config, &sdss->aodevs); + free(args); + return; + } + spawn_stub_launch_dm(egc, &sdss->aodevs, 0); free(args); return; -- 1.7.7.5 (Apple Git-26)
Roger Pau Monne
2012-Jul-26 19:22 UTC
Re: [PATCH 0/2] libxl: fixes for device addition/removal
Roger Pau Monne wrote:> Fixes for two regresions introduced by the hotplug series. > > [PATCH 1/2] libxl: fix race in libxl__devices_destroy > [PATCH 2/2] libxl: allow to create a domain without disksForgot to say, but the changes can also be found on: git://xenbits.xen.org/people/royger/xen.git fixes_for_aodev Thanks.
Ian Campbell
2012-Jul-27 08:15 UTC
Re: [PATCH 1/2] libxl: fix race in libxl__devices_destroy
On Thu, 2012-07-26 at 20:12 +0100, Roger Pau Monne wrote:> Don''t have a fixed number of devices in the aodevs array, and instead > size it depending on the devices present in xenstore. Also change the > console destroy path to a switch instead of an if. > > Signed-off-by: Roger Pau Monne <roger.pau@citrix.com> > Cc: Ian Jackson <ian.jackson@eu.citrix.com> > Cc: Ian Campbell <ian.campbell@eu.citrix.com> > --- > tools/libxl/libxl_device.c | 123 ++++++++++++++++++------------------------ > tools/libxl/libxl_internal.h | 10 +++- > 2 files changed, 62 insertions(+), 71 deletions(-) > > diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c > index da0c3ea..4667261 100644 > --- a/tools/libxl/libxl_device.c > +++ b/tools/libxl/libxl_device.c > int libxl__nic_type(libxl__gc *gc, libxl__device *dev, libxl_nic_type *nictype) > { > char *snictype, *be_path; > @@ -451,9 +407,15 @@ void libxl__prepare_ao_devices(libxl__ao *ao, libxl__ao_devices *aodevs) > > GCNEW_ARRAY(aodevs->array, aodevs->size); > for (int i = 0; i < aodevs->size; i++) { > - aodevs->array[i].aodevs = aodevs; > - libxl__prepare_ao_device(ao, &aodevs->array[i]); > + GCNEW(aodevs->array[i]); > + aodevs->array[i]->aodevs = aodevs; > + libxl__prepare_ao_device(ao, aodevs->array[i]); > } > + /* > + * Since we have already added all the aodevs, and marked them > + * as active, we can mark the addition operation as finished. > + */ > + aodevs->in_addition = 0; > } > > void libxl__ao_devices_callback(libxl__egc *egc, libxl__ao_device *aodev) > > - aodevs->size = libxl__num_devices(gc, drs->domid); > - if (aodevs->size < 0) { > - LOG(ERROR, "unable to get number of devices for domain %u", drs->domid); > - rc = aodevs->size; > - goto out; > - } > - > - libxl__prepare_ao_devices(drs->ao, aodevs); > + /* > + * Initialize values of the aodevs structure manually, > + * since we don''t know the number of devices we are going > + * to unplug yet. > + */ > + aodevs->in_addition = 1; > + aodevs->size = 0; > + aodevs->array = NULL;This suggests to me that libxl__prepare_ao_devices should take the size as an argument and initialise that too, setting the array to NULL or allocating it as appropriate.> aodevs->callback = devices_remove_callback; > > path = libxl__sprintf(gc, "/local/domain/%d/device", domid); > @@ -588,21 +553,28 @@ void libxl__devices_destroy(libxl__egc *egc, libxl__devices_remove_state *drs) > dev->domid = domid; > dev->kind = kind; > dev->devid = atoi(devs[j]); > - if (dev->backend_kind == LIBXL__DEVICE_KIND_CONSOLE) { > + switch (dev->backend_kind) { > + case LIBXL__DEVICE_KIND_CONSOLE: > /* Currently console devices can be destroyed > * synchronously by just removing xenstore entries, > * this is what libxl__device_destroy does. > */ > libxl__device_destroy(gc, dev); > - continue; > + break; > + default: > + GCREALLOC_ARRAY(aodevs->array, aodevs->size + 1); > + GCNEW(aodev); > + aodevs->array[aodevs->size] = aodev; > + libxl__prepare_ao_device(ao, aodev);You could make this return a new aodev, aodev = libxl__prepare_ao_device(ao); ? You might even encapsulate the array realloc into aodev = libxl__ao_devices_append(aodevs) ?> + aodev->aodevs = aodevs; > + aodev->action = DEVICE_DISCONNECT; > + aodev->dev = dev; > + aodev->callback = libxl__ao_devices_callback; > + aodev->force = drs->force; > + libxl__initiate_device_remove(egc, aodev); > + aodevs->size++; > + break; > } > - aodev = &aodevs->array[numdev]; > - aodev->action = DEVICE_DISCONNECT; > - aodev->dev = dev; > - aodev->callback = libxl__ao_devices_callback; > - aodev->force = drs->force; > - libxl__initiate_device_remove(egc, aodev); > - numdev++; > } > } > } > @@ -624,7 +596,18 @@ void libxl__devices_destroy(libxl__egc *egc, libxl__devices_remove_state *drs) > } > > out: > - if (!numdev) drs->callback(egc, drs, rc); > + if (!aodevs->size) drs->callback(egc, drs, rc); > + else { > + /* > + * Create a dummy aodev to check if all the other aodevs are finished, > + * and call the callback. > + */ > + GCNEW(aodev); > + libxl__prepare_ao_device(ao, aodev); > + aodev->aodevs = aodevs; > + aodevs->in_addition = 0; > + libxl__ao_devices_callback(egc, aodev); > + }This dummy event strikes me as a bit odd. Would it not be possible to loop over xenstore creating the aodevs array and only then have a second loop which calls libxl__initiate_device_remove(egc, aodev) for each in turn? Probably you would encapsulate this into a helper libxl__as_devices_initiate which would call the callback if size == 0. That would do away with this whole "in_addition" phase since you wouldn''t actually initiate anything until you had created the entire list.> return; > } > > diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h > index 3e91529..3ee3a09 100644 > --- a/tools/libxl/libxl_internal.h > +++ b/tools/libxl/libxl_internal.h > @@ -1808,6 +1808,8 @@ _hidden void libxl__prepare_ao_device(libxl__ao *ao, libxl__ao_device *aodev); > /* Prepare a bunch of devices for addition/removal. Every ao_device in > * ao_devices is set to ''active'', and the ao_device ''base'' field is set to > * the one pointed by aodevs. > + * Once this function is called, the aodevs->size value cannot be changed, > + * and no further aodevs can be added to the array. > */ > _hidden void libxl__prepare_ao_devices(libxl__ao *ao, > libxl__ao_devices *aodevs); > @@ -1849,9 +1851,15 @@ struct libxl__ao_device { > */ > typedef void libxl__devices_callback(libxl__egc*, libxl__ao_devices*, int rc); > struct libxl__ao_devices { > - libxl__ao_device *array; > + libxl__ao_device **array; > int size; > libxl__devices_callback *callback; > + /* > + * if in_addition == 1 means we are currently adding devices > + * to the array, if in_addition == 0, means we have added all > + * the devices. > + */ > + int in_addition; > }; > > /*
Ian Campbell
2012-Jul-27 08:22 UTC
Re: [PATCH 2/2] libxl: allow to create a domain without disks
On Thu, 2012-07-26 at 20:12 +0100, Roger Pau Monne wrote:> Without this patch, the process gets stuck waiting for someone to call > the callback, since no disk devices are added. > > Signed-off-by: Roger Pau Monne <roger.pau@citrix.com> > Cc: Ian Jackson <ian.jackson@eu.citrix.com> > Cc: Ian Campbell <ian.campbell@eu.citrix.com> > --- > tools/libxl/libxl_create.c | 12 ++++++++---- > tools/libxl/libxl_dm.c | 13 +++++++++---- > 2 files changed, 17 insertions(+), 8 deletions(-) > > diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c > index 5859c09..3f129bf 100644 > --- a/tools/libxl/libxl_create.c > +++ b/tools/libxl/libxl_create.c > @@ -907,11 +907,15 @@ static void domcreate_rebuild_done(libxl__egc *egc, > > store_libxl_entry(gc, domid, &d_config->b_info); > > - dcs->aodevs.size = d_config->num_disks; > - dcs->aodevs.callback = domcreate_launch_dm; > - libxl__prepare_ao_devices(ao, &dcs->aodevs); > - libxl__add_disks(egc, ao, domid, 0, d_config, &dcs->aodevs); > + if (d_config->num_disks > 0) { > + dcs->aodevs.size = d_config->num_disks; > + dcs->aodevs.callback = domcreate_launch_dm; > + libxl__prepare_ao_devices(ao, &dcs->aodevs); > + libxl__add_disks(egc, ao, domid, 0, d_config, &dcs->aodevs); > + return; > + } > > + domcreate_launch_dm(egc, &dcs->aodevs, 0);Would it make sense for libxl__add_FOO to detect aodevs.size == 0 and call the callback direct? Rather than open coding this pattern here, in the stub dom disk case and in both the NIC cases too?> return; > > error_out: > diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c > index f2e9572..667aec7 100644 > --- a/tools/libxl/libxl_dm.c > +++ b/tools/libxl/libxl_dm.c > @@ -856,11 +856,16 @@ retry_transaction: > if (errno == EAGAIN) > goto retry_transaction; > > - sdss->aodevs.size = dm_config->num_disks; > - sdss->aodevs.callback = spawn_stub_launch_dm; > - libxl__prepare_ao_devices(ao, &sdss->aodevs); > - libxl__add_disks(egc, ao, dm_domid, 0, dm_config, &sdss->aodevs); > + if (dm_config->num_disks > 0) { > + sdss->aodevs.size = dm_config->num_disks; > + sdss->aodevs.callback = spawn_stub_launch_dm; > + libxl__prepare_ao_devices(ao, &sdss->aodevs); > + libxl__add_disks(egc, ao, dm_domid, 0, dm_config, &sdss->aodevs); > + free(args); > + return; > + } > > + spawn_stub_launch_dm(egc, &sdss->aodevs, 0); > free(args); > return; >
Ian Jackson
2012-Jul-27 11:06 UTC
Re: [PATCH 1/2] libxl: fix race in libxl__devices_destroy
Ian Campbell writes ("Re: [PATCH 1/2] libxl: fix race in libxl__devices_destroy"):> On Thu, 2012-07-26 at 20:12 +0100, Roger Pau Monne wrote:...> > out: > > - if (!numdev) drs->callback(egc, drs, rc); > > + if (!aodevs->size) drs->callback(egc, drs, rc); > > + else { > > + /* > > + * Create a dummy aodev to check if all the other aodevs are finished, > > + * and call the callback. > > + */ > > + GCNEW(aodev); > > + libxl__prepare_ao_device(ao, aodev); > > + aodev->aodevs = aodevs; > > + aodevs->in_addition = 0; > > + libxl__ao_devices_callback(egc, aodev); > > + } > > This dummy event strikes me as a bit odd.It was my suggestion. However Roger hasn''t done quite what I intended to suggest. If we were to _first_ create an aodev which represents the scan for devices, then when we complete the scan we can simply complete that aodev in the entirely normal way and everything will work correctly.> Would it not be possible to loop over xenstore creating the aodevs array > and only then have a second loop which calls > libxl__initiate_device_remove(egc, aodev) for each in turn? Probably you > would encapsulate this into a helper libxl__as_devices_initiate which > would call the callback if size == 0.IMO it''s generally better to do everything at once, rather than multiple parallel loops. That avoids (a) races (b) mistakes arising from changes in one place but not the other.> That would do away with this whole "in_addition" phase since you > wouldn''t actually initiate anything until you had created the entire > list.Using an aodev to represent this, instead of an ad hoc "in_addition", would regularise this too. I''ll reply more fully to Roger''s patches later. Ian.
Ian Jackson
2012-Jul-27 13:44 UTC
Re: [PATCH 1/2] libxl: fix race in libxl__devices_destroy
Roger Pau Monne writes ("[PATCH 1/2] libxl: fix race in libxl__devices_destroy"):> Don''t have a fixed number of devices in the aodevs array, and instead > size it depending on the devices present in xenstore. Also change the > console destroy path to a switch instead of an if....> GCNEW_ARRAY(aodevs->array, aodevs->size); > for (int i = 0; i < aodevs->size; i++) { > - aodevs->array[i].aodevs = aodevs; > - libxl__prepare_ao_device(ao, &aodevs->array[i]); > + GCNEW(aodevs->array[i]); > + aodevs->array[i]->aodevs = aodevs; > + libxl__prepare_ao_device(ao, aodevs->array[i]);I agree with Ian C''s comments on this.> + /* > + * Since we have already added all the aodevs, and marked them > + * as active, we can mark the addition operation as finished. > + */ > + aodevs->in_addition = 0;And this is rather messy; it requires thinking about to make sure everything is correct. How about making libxl__prepare_ao_devices automatically create the "we are still searching for stuff to do" aodev ? You might want a libxl__finished_preparing_ao_devices to hide the "searching" ao entirely. If you do it this way then the "no disks" corner case comes out in the wash too.> @@ -588,21 +553,28 @@ void libxl__devices_destroy(libxl__egc *egc, libxl__devices_remove_state *drs)...> + default: > + GCREALLOC_ARRAY(aodevs->array, aodevs->size + 1); > + GCNEW(aodev); > + aodevs->array[aodevs->size] = aodev;Open coding this is pretty ugly. I agree with Ian C that libxl__prepare_ao_device should do this. Ian.
Ian Jackson
2012-Jul-27 18:26 UTC
Re: [PATCH 1/2] libxl: fix race in libxl__devices_destroy
Ian Jackson writes ("Re: [PATCH 1/2] libxl: fix race in libxl__devices_destroy"):> How about making libxl__prepare_ao_devices automatically create the > "we are still searching for stuff to do" aodev ?Here is what I came up with. I have compiled it but not tested it yet. Despite me introducing some new functions the number of lines of code is unchanged - indeed, it shrinks if you don''t count comments :-). Ian. From: Ian Jackson <ian.jackson@eu.citrix.com> Subject: [PATCH RFC] libxl: fix device counting race in libxl__devices_destroy Don''t have a fixed number of devices in the aodevs array, and instead size it depending on the devices present in xenstore. Somewhat formalise the multiple device addition/removal machinery to make this clearer and easier to do. As a side-effect we fix a few "lost thread of control" bug which would occur if there were no devices of a particular kind. (Various if statements which checked for there being no devices have become redundant, but are retained to avoid making the patch bigger.) Specifically: * Users of libxl__ao_devices are no longer expected to know in advance how many device operations they are going to do. Instead they can initiate them one at a time, between bracketing calls to "begin" and "prepared". * The array of aodevs used for this is dynamically sized; to support this it''s an array of pointers rather than of structs. * Users of libxl__ao_devices are presented with a more opaque interface. They are are no longer expected to, themselves, - look into the array of aodevs (this is now private) - know that the individual addition/removal completions are handled by libxl__ao_devices_callback (this callback function is now a private function for the multidev machinery) - ever deal with populating the contents of an aodevs * The doc comments relating to some of the members of libxl__ao_device are clarified. (And the member `aodevs'' is moved to put it with the other members with the same status.) * The multidev machinery allocates an aodev to represent the operation of preparing all of the other operations. See the comment in libxl__multidev_begin. A wrinkle is that the functions are called "multidev" but the structs are called "libxl__ao_devices" and "aodevs". I have given these functions this name to distinguish them from "libxl__ao_device" and "aodev" and so forth by more than just the use of the plural "s" suffix. Perhaps eventually we will rename the structs. Signed-off-by: Roger Pau Monne <roger.pau@citrix.com> Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com> Cc: Ian Campbell <ian.campbell@eu.citrix.com> - Changes in v3: * New multidev interfaces - extensive changes. --- tools/libxl/libxl_create.c | 8 +- tools/libxl/libxl_device.c | 128 +++++++++++++++++++----------------------- tools/libxl/libxl_dm.c | 8 +- tools/libxl/libxl_internal.h | 66 +++++++++++++--------- 4 files changed, 105 insertions(+), 105 deletions(-) diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c index aafacd8..3265d69 100644 --- a/tools/libxl/libxl_create.c +++ b/tools/libxl/libxl_create.c @@ -909,10 +909,10 @@ static void domcreate_rebuild_done(libxl__egc *egc, store_libxl_entry(gc, domid, &d_config->b_info); - dcs->aodevs.size = d_config->num_disks; + libxl__multidev_begin(ao, &dcs->aodevs); dcs->aodevs.callback = domcreate_launch_dm; - libxl__prepare_ao_devices(ao, &dcs->aodevs); libxl__add_disks(egc, ao, domid, 0, d_config, &dcs->aodevs); + libxl__multidev_prepared(egc, &dcs->aodevs, 0); return; @@ -1039,10 +1039,10 @@ static void domcreate_devmodel_started(libxl__egc *egc, /* Plug nic interfaces */ if (d_config->num_nics > 0) { /* Attach nics */ - dcs->aodevs.size = d_config->num_nics; + libxl__multidev_begin(ao, &dcs->aodevs); dcs->aodevs.callback = domcreate_attach_pci; - libxl__prepare_ao_devices(ao, &dcs->aodevs); libxl__add_nics(egc, ao, domid, 0, d_config, &dcs->aodevs); + libxl__multidev_prepared(egc, &dcs->aodevs, 0); return; } diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c index da0c3ea..d3dfdbc 100644 --- a/tools/libxl/libxl_device.c +++ b/tools/libxl/libxl_device.c @@ -58,50 +58,6 @@ int libxl__parse_backend_path(libxl__gc *gc, return libxl__device_kind_from_string(strkind, &dev->backend_kind); } -static int libxl__num_devices(libxl__gc *gc, uint32_t domid) -{ - char *path; - unsigned int num_kinds, num_devs; - char **kinds = NULL, **devs = NULL; - int i, j, rc = 0; - libxl__device dev; - libxl__device_kind kind; - int numdevs = 0; - - path = GCSPRINTF("/local/domain/%d/device", domid); - kinds = libxl__xs_directory(gc, XBT_NULL, path, &num_kinds); - if (!kinds) { - if (errno != ENOENT) { - LOGE(ERROR, "unable to get xenstore device listing %s", path); - rc = ERROR_FAIL; - goto out; - } - num_kinds = 0; - } - for (i = 0; i < num_kinds; i++) { - if (libxl__device_kind_from_string(kinds[i], &kind)) - continue; - if (kind == LIBXL__DEVICE_KIND_CONSOLE) - continue; - - path = GCSPRINTF("/local/domain/%d/device/%s", domid, kinds[i]); - devs = libxl__xs_directory(gc, XBT_NULL, path, &num_devs); - if (!devs) - continue; - for (j = 0; j < num_devs; j++) { - path = GCSPRINTF("/local/domain/%d/device/%s/%s/backend", - domid, kinds[i], devs[j]); - path = libxl__xs_read(gc, XBT_NULL, path); - if (path && libxl__parse_backend_path(gc, path, &dev) == 0) { - numdevs++; - } - } - } -out: - if (rc) return rc; - return numdevs; -} - int libxl__nic_type(libxl__gc *gc, libxl__device *dev, libxl_nic_type *nictype) { char *snictype, *be_path; @@ -445,40 +401,80 @@ void libxl__prepare_ao_device(libxl__ao *ao, libxl__ao_device *aodev) libxl__ev_child_init(&aodev->child); } -void libxl__prepare_ao_devices(libxl__ao *ao, libxl__ao_devices *aodevs) +/* multidev */ + +void libxl__multidev_begin(libxl__ao *ao, libxl__ao_devices *aodevs) { AO_GC; - GCNEW_ARRAY(aodevs->array, aodevs->size); - for (int i = 0; i < aodevs->size; i++) { - aodevs->array[i].aodevs = aodevs; - libxl__prepare_ao_device(ao, &aodevs->array[i]); + aodevs->ao = ao; + aodevs->array = 0; + aodevs->used = aodevs->allocd = 0; + + /* We allocate an aodev to represent the operation of preparing + * all of the other operations. This operation is completed when + * we have started all the others (ie, when the user calls + * _prepared). That arranges automatically that + * (i) we do not think we have finished even if one of the + * operations completes while we are still preparing + * (ii) if we are starting zero operations, we do still + * make the callback as soon as we know this fact + * (iii) we have a nice consistent way to deal with any + * error that might occur while deciding what to initiate + */ + aodevs->preparation = libxl__multidev_prepare(aodevs); +} + +static void multidev_one_callback(libxl__egc *egc, libxl__ao_device *aodev); + +libxl__ao_device *libxl__multidev_prepare(libxl__ao_devices *aodevs) { + STATE_AO_GC(aodevs->ao); + libxl__ao_device *aodev; + + GCNEW(aodev); + aodev->aodevs = aodevs; + aodev->callback = multidev_one_callback; + libxl__prepare_ao_device(ao, aodev); + + if (aodevs->used >= aodevs->allocd) { + aodevs->allocd = aodevs->used * 2 + 5; + GCREALLOC_ARRAY(aodevs->array, aodevs->allocd); } + aodevs->array[aodevs->used++] = aodev; + + return aodev; } -void libxl__ao_devices_callback(libxl__egc *egc, libxl__ao_device *aodev) +static void multidev_one_callback(libxl__egc *egc, libxl__ao_device *aodev) { STATE_AO_GC(aodev->ao); libxl__ao_devices *aodevs = aodev->aodevs; int i, error = 0; aodev->active = 0; - for (i = 0; i < aodevs->size; i++) { - if (aodevs->array[i].active) + + for (i = 0; i < aodevs->used; i++) { + if (aodevs->array[i]->active) return; - if (aodevs->array[i].rc) - error = aodevs->array[i].rc; + if (aodevs->array[i]->rc) + error = aodevs->array[i]->rc; } aodevs->callback(egc, aodevs, error); return; } +void libxl__multidev_prepared(libxl__egc *egc, libxl__ao_devices *aodevs, + int rc) +{ + multidev_one_callback(egc, aodevs->preparation); +} + /******************************************************************************/ /* Macro for defining the functions that will add a bunch of disks when - * inside an async op. + * inside an async op with multidev. * This macro is added to prevent repetition of code. * * The following functions are defined: @@ -495,9 +491,9 @@ void libxl__ao_devices_callback(libxl__egc *egc, libxl__ao_device *aodev) int i; \ int end = start + d_config->num_##type##s; \ for (i = start; i < end; i++) { \ - aodevs->array[i].callback = libxl__ao_devices_callback; \ + libxl__ao_device *aodev = libxl__multidev_prepare(aodevs); \ libxl__device_##type##_add(egc, domid, &d_config->type##s[i-start],\ - &aodevs->array[i]); \ + aodev); \ } \ } @@ -545,20 +541,13 @@ void libxl__devices_destroy(libxl__egc *egc, libxl__devices_remove_state *drs) char *path; unsigned int num_kinds, num_dev_xsentries; char **kinds = NULL, **devs = NULL; - int i, j, numdev = 0, rc = 0; + int i, j, rc = 0; libxl__device *dev; libxl__ao_devices *aodevs = &drs->aodevs; libxl__ao_device *aodev; libxl__device_kind kind; - aodevs->size = libxl__num_devices(gc, drs->domid); - if (aodevs->size < 0) { - LOG(ERROR, "unable to get number of devices for domain %u", drs->domid); - rc = aodevs->size; - goto out; - } - - libxl__prepare_ao_devices(drs->ao, aodevs); + libxl__multidev_begin(ao, aodevs); aodevs->callback = devices_remove_callback; path = libxl__sprintf(gc, "/local/domain/%d/device", domid); @@ -596,13 +585,11 @@ void libxl__devices_destroy(libxl__egc *egc, libxl__devices_remove_state *drs) libxl__device_destroy(gc, dev); continue; } - aodev = &aodevs->array[numdev]; + aodev = libxl__multidev_prepare(aodevs); aodev->action = DEVICE_DISCONNECT; aodev->dev = dev; - aodev->callback = libxl__ao_devices_callback; aodev->force = drs->force; libxl__initiate_device_remove(egc, aodev); - numdev++; } } } @@ -624,8 +611,7 @@ void libxl__devices_destroy(libxl__egc *egc, libxl__devices_remove_state *drs) } out: - if (!numdev) drs->callback(egc, drs, rc); - return; + libxl__multidev_prepared(egc, aodevs, rc); } /* Callbacks for device related operations */ diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c index f2e9572..177642b 100644 --- a/tools/libxl/libxl_dm.c +++ b/tools/libxl/libxl_dm.c @@ -856,10 +856,10 @@ retry_transaction: if (errno == EAGAIN) goto retry_transaction; - sdss->aodevs.size = dm_config->num_disks; + libxl__multidev_begin(ao, &sdss->aodevs); sdss->aodevs.callback = spawn_stub_launch_dm; - libxl__prepare_ao_devices(ao, &sdss->aodevs); libxl__add_disks(egc, ao, dm_domid, 0, dm_config, &sdss->aodevs); + libxl__multidev_prepared(egc, &sdss->aodevs, 0); free(args); return; @@ -982,10 +982,10 @@ static void spawn_stubdom_pvqemu_cb(libxl__egc *egc, if (rc) goto out; if (d_config->num_nics > 0) { - sdss->aodevs.size = d_config->num_nics; + libxl__multidev_begin(ao, &sdss->aodevs); sdss->aodevs.callback = stubdom_pvqemu_cb; - libxl__prepare_ao_devices(ao, &sdss->aodevs); libxl__add_nics(egc, ao, dm_domid, 0, d_config, &sdss->aodevs); + libxl__multidev_prepared(egc, &sdss->aodevs, 0); return; } diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index f0c94e8..7072b09 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -1810,20 +1810,6 @@ typedef void libxl__device_callback(libxl__egc*, libxl__ao_device*); */ _hidden void libxl__prepare_ao_device(libxl__ao *ao, libxl__ao_device *aodev); -/* Prepare a bunch of devices for addition/removal. Every ao_device in - * ao_devices is set to ''active'', and the ao_device ''base'' field is set to - * the one pointed by aodevs. - */ -_hidden void libxl__prepare_ao_devices(libxl__ao *ao, - libxl__ao_devices *aodevs); - -/* Generic callback to use when adding/removing several devices, this will - * check if the given aodev is the last one, and call the callback in the - * parent libxl__ao_devices struct, passing the appropriate error if found. - */ -_hidden void libxl__ao_devices_callback(libxl__egc *egc, - libxl__ao_device *aodev); - struct libxl__ao_device { /* filled in by user */ libxl__ao *ao; @@ -1831,32 +1817,60 @@ struct libxl__ao_device { libxl__device *dev; int force; libxl__device_callback *callback; - /* private for implementation */ - int active; + /* return value, zeroed by user on entry, is valid on callback */ int rc; + /* private for multidevs */ + int active; + libxl__ao_devices *aodevs; /* reference to the containing multidev */ + /* private for add/remove implementation */ libxl__ev_devstate backend_ds; /* Bodge for Qemu devices, also used for timeout of hotplug execution */ libxl__ev_time timeout; - /* Used internally to have a reference to the upper libxl__ao_devices - * struct when present */ - libxl__ao_devices *aodevs; /* device hotplug execution */ const char *what; int num_exec; libxl__ev_child child; }; -/* Helper struct to simply the plug/unplug of multiple devices at the same - * time. - * - * This structure holds several devices, and the callback is only called - * when all the devices inside of the array have finished. - */ +/* + * Multiple devices "multidevs" handling. + * + * Firstly, you should + * libxl__multidev_begin + * multidevs->callback = ... + * Then zero or more times + * libxl__multidev_prepare + * libal__initiate_device_{remove/addition}. + * Finally, once + * libxl__multidev_prepared + * which will result (perhaps reentrantly) in one call to callback(). + */ + +/* Starts preparing to add/remove a bunch of devices. */ +_hidden void libxl__multidev_begin(libxl__ao *ao, libxl__ao_devices*); + +/* Prepares to add/remove one of many devices. Returns a libxl__ao_device + * which has had libxl__prepare_ao_device called, and which has also + * had ->callback set. The user should not mess with aodev->callback. */ +_hidden libxl__ao_device *libxl__multidev_prepare(libxl__ao_devices*); + +/* Notifies the multidev machinery that we have now finished preparing + * and initiating devices. multidev->callback may then be called as + * soon as there are no prepared but not completed operations + * outstanding, perhaps reentrantly. If rc!=0 (error should have been + * logged) multidev->callback will get a non-zero rc. + * callback may be set by the user at any point before prepared. */ +_hidden void libxl__multidev_prepared(libxl__egc*, libxl__ao_devices*, int rc); + typedef void libxl__devices_callback(libxl__egc*, libxl__ao_devices*, int rc); struct libxl__ao_devices { - libxl__ao_device *array; - int size; + /* set by user: */ libxl__devices_callback *callback; + /* for private use by libxl__...ao_devices... machinery: */ + libxl__ao *ao; + libxl__ao_device **array; + int used, allocd; + libxl__ao_device *preparation; }; /* -- tg: (45b79a7..) t/xen/xl.devnum-race (depends on: t/xen/xl.pty-pollhup)
Ian Campbell
2012-Jul-31 13:26 UTC
Re: [PATCH 1/2] libxl: fix race in libxl__devices_destroy
On Fri, 2012-07-27 at 19:26 +0100, Ian Jackson wrote:> +void libxl__multidev_prepared(libxl__egc *egc, libxl__ao_devices *aodevs, > + int rc) > +{ > + multidev_one_callback(egc, aodevs->preparation);Don''t we need to propagate rc here? Perhaps with aodevs->preparation->rc= rc ?> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h > index f0c94e8..7072b09 100644 > --- a/tools/libxl/libxl_internal.h > +++ b/tools/libxl/libxl_internal.h > @@ -1810,20 +1810,6 @@ typedef void libxl__device_callback(libxl__egc*, libxl__ao_device*);[...]> +/* > + * Multiple devices "multidevs" handling. > + * > + * Firstly, you should > + * libxl__multidev_begin > + * multidevs->callback = ... > + * Then zero or more times > + * libxl__multidev_prepare > + * libal__initiate_device_{remove/addition}. > + * Finally, once > + * libxl__multidev_prepared > + * which will result (perhaps reentrantly) in one call to callback().Briefly mention the multidev vs ao_devices naming? Or just do the rename now in a new patch. Ian.
Ian Jackson
2012-Jul-31 14:44 UTC
Re: [PATCH 1/2] libxl: fix race in libxl__devices_destroy
Ian Campbell writes ("Re: [PATCH 1/2] libxl: fix race in libxl__devices_destroy"):> On Fri, 2012-07-27 at 19:26 +0100, Ian Jackson wrote: > > > +void libxl__multidev_prepared(libxl__egc *egc, libxl__ao_devices *aodevs, > > + int rc) > > +{ > > + multidev_one_callback(egc, aodevs->preparation); > > Don''t we need to propagate rc here? Perhaps with > aodevs->preparation->rc= rc > ?Yes, good catch. This pattern where we have to explicitly assign to the rc in the aodev is perhaps less than ideal. I spotted a similar bug in one of Roger''s patches. I wonder if there''s a way to get around this.> > diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h > > index f0c94e8..7072b09 100644 > > --- a/tools/libxl/libxl_internal.h > > +++ b/tools/libxl/libxl_internal.h > > @@ -1810,20 +1810,6 @@ typedef void libxl__device_callback(libxl__egc*, libxl__ao_device*); > [...] > > +/* > > + * Multiple devices "multidevs" handling. > > + * > > + * Firstly, you should > > + * libxl__multidev_begin > > + * multidevs->callback = ... > > + * Then zero or more times > > + * libxl__multidev_prepare > > + * libal__initiate_device_{remove/addition}. > > + * Finally, once > > + * libxl__multidev_prepared > > + * which will result (perhaps reentrantly) in one call to callback(). > > Briefly mention the multidev vs ao_devices naming? Or just do the rename > now in a new patch.If you''re happy with that renaming now then I will do the latter. Ian.
Ian Campbell
2012-Jul-31 14:48 UTC
Re: [PATCH 1/2] libxl: fix race in libxl__devices_destroy
On Tue, 2012-07-31 at 15:44 +0100, Ian Jackson wrote:> Ian Campbell writes ("Re: [PATCH 1/2] libxl: fix race in libxl__devices_destroy"): > > On Fri, 2012-07-27 at 19:26 +0100, Ian Jackson wrote: > > > > > +void libxl__multidev_prepared(libxl__egc *egc, libxl__ao_devices *aodevs, > > > + int rc) > > > +{ > > > + multidev_one_callback(egc, aodevs->preparation); > > > > Don''t we need to propagate rc here? Perhaps with > > aodevs->preparation->rc= rc > > ? > > Yes, good catch. This pattern where we have to explicitly assign to > the rc in the aodev is perhaps less than ideal. I spotted a similar > bug in one of Roger''s patches. > > I wonder if there''s a way to get around this. > > > > diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h > > > index f0c94e8..7072b09 100644 > > > --- a/tools/libxl/libxl_internal.h > > > +++ b/tools/libxl/libxl_internal.h > > > @@ -1810,20 +1810,6 @@ typedef void libxl__device_callback(libxl__egc*, libxl__ao_device*); > > [...] > > > +/* > > > + * Multiple devices "multidevs" handling. > > > + * > > > + * Firstly, you should > > > + * libxl__multidev_begin > > > + * multidevs->callback = ... > > > + * Then zero or more times > > > + * libxl__multidev_prepare > > > + * libal__initiate_device_{remove/addition}. > > > + * Finally, once > > > + * libxl__multidev_prepared > > > + * which will result (perhaps reentrantly) in one call to callback(). > > > > Briefly mention the multidev vs ao_devices naming? Or just do the rename > > now in a new patch. > > If you''re happy with that renaming now then I will do the latter.It''s mostly mechanical so its pretty safe. Lets do it.