Hi all, this patch implements local_attach support for QDISK: that means that you can use qcow2 as disk format for your PV guests disks and use pygrub to retrieve the kernel and initrd in dom0. The idea is that we start a QEMU instance at boot time to listen to local_attach requests. When libxl_device_disk_local_attach is called on a QDISK images, libxl sets up a backend/frontend pair in dom0 for the disk so that blkfront in dom0 will create a new xvd device for it. Then pygrub can be pointed at this device to retrieve kernel and initrd. Stefano Stabellini (6): libxl: libxl_device_disk_local_attach return a new libxl_device_disk libxl: introduce libxl__device_generic_add_t libxl: add an xs_transaction_t parameter to two libxl functions libxl: introduce libxl__device_disk_add_t libxl: introduce libxl__alloc_vdev xl/libxl: implement QDISK libxl_device_disk_local_attach tools/hotplug/Linux/init.d/sysconfig.xencommons | 4 + tools/hotplug/Linux/init.d/xencommons | 4 + tools/libxl/libxl.c | 195 ++++++++++++++++++----- tools/libxl/libxl.h | 3 +- tools/libxl/libxl_bootloader.c | 9 +- tools/libxl/libxl_device.c | 36 +++-- tools/libxl/libxl_internal.h | 2 + 7 files changed, 196 insertions(+), 57 deletions(-) Cheers, Stefano
Stefano Stabellini
2012-Mar-27 13:59 UTC
[PATCH 1/6] libxl: libxl_device_disk_local_attach return a new libxl_device_disk
The caller passes an additional pre-allocated libxl_device_disk struct to libxl_device_disk_local_attach, that it is going to fill it with informations about the new locally attached disk. The new libxl_device_disk should be passed to libxl_device_disk_local_detach afterwards. This patch is just about adding the new parameter and updating the caller. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> --- tools/libxl/libxl.c | 46 ++++++++++++++++++++++++++-------------- tools/libxl/libxl.h | 3 +- tools/libxl/libxl_bootloader.c | 9 +++++-- 3 files changed, 38 insertions(+), 20 deletions(-) diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c index 5344366..d33fbdf 100644 --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -1644,63 +1644,77 @@ out: return ret; } -char * libxl_device_disk_local_attach(libxl_ctx *ctx, libxl_device_disk *disk) +char * libxl_device_disk_local_attach(libxl_ctx *ctx, const libxl_device_disk *disk, + libxl_device_disk **new_disk) { GC_INIT(ctx); char *dev = NULL; char *ret = NULL; int rc; - - rc = libxl__device_disk_setdefault(gc, disk); + libxl_device_disk *tmpdisk = (libxl_device_disk *) + malloc(sizeof(libxl_device_disk)); + if (tmpdisk == NULL) goto out; + + *new_disk = tmpdisk; + memcpy(tmpdisk, disk, sizeof(libxl_device_disk)); + if (tmpdisk->pdev_path != NULL) + tmpdisk->pdev_path = strdup(tmpdisk->pdev_path); + if (tmpdisk->script != NULL) + tmpdisk->script = strdup(tmpdisk->script); + tmpdisk->vdev = NULL; + + rc = libxl__device_disk_setdefault(gc, tmpdisk); if (rc) goto out; - switch (disk->backend) { + switch (tmpdisk->backend) { case LIBXL_DISK_BACKEND_PHY: LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "locally attaching PHY disk %s", - disk->pdev_path); - dev = disk->pdev_path; + tmpdisk->pdev_path); + dev = tmpdisk->pdev_path; break; case LIBXL_DISK_BACKEND_TAP: - switch (disk->format) { + switch (tmpdisk->format) { case LIBXL_DISK_FORMAT_RAW: /* optimise away the early tapdisk attach in this case */ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "locally attaching" " tap disk %s directly (ie without using blktap)", - disk->pdev_path); - dev = disk->pdev_path; + tmpdisk->pdev_path); + dev = tmpdisk->pdev_path; break; case LIBXL_DISK_FORMAT_VHD: - dev = libxl__blktap_devpath(gc, disk->pdev_path, - disk->format); + dev = libxl__blktap_devpath(gc, tmpdisk->pdev_path, + tmpdisk->format); break; case LIBXL_DISK_FORMAT_QCOW: case LIBXL_DISK_FORMAT_QCOW2: abort(); /* prevented by libxl__device_disk_set_backend */ default: LIBXL__LOG(ctx, LIBXL__LOG_ERROR, - "unrecognized disk format: %d", disk->format); + "unrecognized disk format: %d", tmpdisk->format); break; } break; case LIBXL_DISK_BACKEND_QDISK: - if (disk->format != LIBXL_DISK_FORMAT_RAW) { + if (tmpdisk->format != LIBXL_DISK_FORMAT_RAW) { LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "cannot locally" " attach a qdisk image if the format is not raw"); break; } LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "locally attaching qdisk %s\n", disk->pdev_path); - dev = disk->pdev_path; + dev = tmpdisk->pdev_path; break; default: LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "unrecognized disk backend " - "type: %d", disk->backend); + "type: %d", tmpdisk->backend); break; } out: - if (dev != NULL) + if (dev != NULL) { ret = strdup(dev); + tmpdisk->vdev = strdup(tmpdisk->vdev); + } GC_FREE; return ret; } diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h index 6b69030..d297b04 100644 --- a/tools/libxl/libxl.h +++ b/tools/libxl/libxl.h @@ -540,7 +540,8 @@ int libxl_cdrom_insert(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk); * Make a disk available in this (the control) domain. Returns path to * a device. */ -char * libxl_device_disk_local_attach(libxl_ctx *ctx, libxl_device_disk *disk); +char * libxl_device_disk_local_attach(libxl_ctx *ctx, const libxl_device_disk *disk, + libxl_device_disk **new_disk); int libxl_device_disk_local_detach(libxl_ctx *ctx, libxl_device_disk *disk); /* Network Interfaces */ diff --git a/tools/libxl/libxl_bootloader.c b/tools/libxl/libxl_bootloader.c index 2774062..0abc31f 100644 --- a/tools/libxl/libxl_bootloader.c +++ b/tools/libxl/libxl_bootloader.c @@ -330,6 +330,7 @@ int libxl_run_bootloader(libxl_ctx *ctx, char *fifo = NULL; char *diskpath = NULL; char **args = NULL; + libxl_device_disk *tmpdisk = NULL; char tempdir_template[] = "/var/run/libxl/bl.XXXXXX"; char *tempdir; @@ -386,7 +387,7 @@ int libxl_run_bootloader(libxl_ctx *ctx, goto out_close; } - diskpath = libxl_device_disk_local_attach(ctx, disk); + diskpath = libxl_device_disk_local_attach(ctx, disk, &tmpdisk); if (!diskpath) { goto out_close; } @@ -452,9 +453,11 @@ int libxl_run_bootloader(libxl_ctx *ctx, rc = 0; out_close: - if (diskpath) { - libxl_device_disk_local_detach(ctx, disk); + if (diskpath) free(diskpath); + if (tmpdisk) { + libxl_device_disk_local_detach(ctx, tmpdisk); + free(tmpdisk); } if (fifo_fd > -1) close(fifo_fd); -- 1.7.2.5
Stefano Stabellini
2012-Mar-27 13:59 UTC
[PATCH 2/6] libxl: introduce libxl__device_generic_add_t
Introduce libxl__device_generic_add_t that takes an xs_transaction_t as parameter. Use libxl__device_generic_add_t to implement libxl__device_generic_add. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> --- tools/libxl/libxl_device.c | 36 ++++++++++++++++++++++++++---------- tools/libxl/libxl_internal.h | 2 ++ 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c index c2880e0..c6f9044 100644 --- a/tools/libxl/libxl_device.c +++ b/tools/libxl/libxl_device.c @@ -61,12 +61,37 @@ int libxl__parse_backend_path(libxl__gc *gc, int libxl__device_generic_add(libxl__gc *gc, libxl__device *device, char **bents, char **fents) { + int rc = 0; + xs_transaction_t t; + libxl_ctx *ctx = libxl__gc_owner(gc); + +retry_transaction: + t = xs_transaction_start(ctx->xsh); + + rc = libxl__device_generic_add_t(gc, t, device, bents, fents); + + if (!xs_transaction_end(ctx->xsh, t, 0)) { + if (errno == EAGAIN) + goto retry_transaction; + else + LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xs transaction failed"); + } + return rc; +} + +int libxl__device_generic_add_t(libxl__gc *gc, xs_transaction_t t, + libxl__device *device, char **bents, char **fents) +{ libxl_ctx *ctx = libxl__gc_owner(gc); char *frontend_path, *backend_path; - xs_transaction_t t; struct xs_permissions frontend_perms[2]; struct xs_permissions backend_perms[2]; + if (t == XBT_NULL) + /* we need a valid transaction: call libxl__device_generic_add + * to create one for us */ + return libxl__device_generic_add(gc, device, bents, fents); + frontend_path = libxl__device_frontend_path(gc, device); backend_path = libxl__device_backend_path(gc, device); @@ -80,8 +105,6 @@ int libxl__device_generic_add(libxl__gc *gc, libxl__device *device, backend_perms[1].id = device->domid; backend_perms[1].perms = XS_PERM_READ; -retry_transaction: - t = xs_transaction_start(ctx->xsh); /* FIXME: read frontend_path and check state before removing stuff */ if (fents) { @@ -100,13 +123,6 @@ retry_transaction: libxl__xs_writev(gc, t, backend_path, bents); } - if (!xs_transaction_end(ctx->xsh, t, 0)) { - if (errno == EAGAIN) - goto retry_transaction; - else - LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xs transaction failed"); - } - return 0; } diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index e0a1070..f97213f 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -667,6 +667,8 @@ _hidden int libxl__device_console_add(libxl__gc *gc, uint32_t domid, _hidden int libxl__device_generic_add(libxl__gc *gc, libxl__device *device, char **bents, char **fents); +_hidden int libxl__device_generic_add_t(libxl__gc *gc, xs_transaction_t t, + libxl__device *device, char **bents, char **fents); _hidden char *libxl__device_backend_path(libxl__gc *gc, libxl__device *device); _hidden char *libxl__device_frontend_path(libxl__gc *gc, libxl__device *device); _hidden int libxl__parse_backend_path(libxl__gc *gc, const char *path, -- 1.7.2.5
Stefano Stabellini
2012-Mar-27 13:59 UTC
[PATCH 3/6] libxl: add an xs_transaction_t parameter to two libxl functions
Add an additional xs_transaction_t parameter to libxl__device_disk_from_xs_be and libxl__append_disk_list_of_type. Update all the callers. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> --- tools/libxl/libxl.c | 26 ++++++++++++++------------ 1 files changed, 14 insertions(+), 12 deletions(-) diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c index d33fbdf..ce5e7be 100644 --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -1432,6 +1432,7 @@ out: } static void libxl__device_disk_from_xs_be(libxl__gc *gc, + xs_transaction_t t, const char *be_path, libxl_device_disk *disk) { @@ -1441,7 +1442,7 @@ static void libxl__device_disk_from_xs_be(libxl__gc *gc, libxl_device_disk_init(disk); - tmp = xs_read(ctx->xsh, XBT_NULL, + tmp = xs_read(ctx->xsh, t, libxl__sprintf(gc, "%s/params", be_path), &len); if (tmp && strchr(tmp, '':'')) { disk->pdev_path = strdup(strchr(tmp, '':'') + 1); @@ -1450,12 +1451,12 @@ static void libxl__device_disk_from_xs_be(libxl__gc *gc, disk->pdev_path = tmp; } libxl_string_to_backend(ctx, - libxl__xs_read(gc, XBT_NULL, + libxl__xs_read(gc, t, libxl__sprintf(gc, "%s/type", be_path)), &(disk->backend)); - disk->vdev = xs_read(ctx->xsh, XBT_NULL, + disk->vdev = xs_read(ctx->xsh, t, libxl__sprintf(gc, "%s/dev", be_path), &len); - tmp = libxl__xs_read(gc, XBT_NULL, libxl__sprintf + tmp = libxl__xs_read(gc, t, libxl__sprintf (gc, "%s/removable", be_path)); if (tmp) @@ -1463,13 +1464,13 @@ static void libxl__device_disk_from_xs_be(libxl__gc *gc, else disk->removable = 0; - tmp = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/mode", be_path)); + tmp = libxl__xs_read(gc, t, libxl__sprintf(gc, "%s/mode", be_path)); if (!strcmp(tmp, "w")) disk->readwrite = 1; else disk->readwrite = 0; - tmp = libxl__xs_read(gc, XBT_NULL, + tmp = libxl__xs_read(gc, t, libxl__sprintf(gc, "%s/device-type", be_path)); disk->is_cdrom = !strcmp(tmp, "cdrom"); @@ -1499,7 +1500,7 @@ int libxl_vdev_to_device_disk(libxl_ctx *ctx, uint32_t domid, if (!path) goto out; - libxl__device_disk_from_xs_be(gc, path, disk); + libxl__device_disk_from_xs_be(gc, XBT_NULL, path, disk); rc = 0; out: @@ -1510,6 +1511,7 @@ out: static int libxl__append_disk_list_of_type(libxl__gc *gc, uint32_t domid, + xs_transaction_t t, const char *type, libxl_device_disk **disks, int *ndisks) @@ -1521,7 +1523,7 @@ static int libxl__append_disk_list_of_type(libxl__gc *gc, be_path = libxl__sprintf(gc, "%s/backend/%s/%d", libxl__xs_get_dompath(gc, 0), type, domid); - dir = libxl__xs_directory(gc, XBT_NULL, be_path, &n); + dir = libxl__xs_directory(gc, t, be_path, &n); if (dir) { libxl_device_disk *tmp; tmp = realloc(*disks, sizeof (libxl_device_disk) * (*ndisks + n)); @@ -1534,7 +1536,7 @@ static int libxl__append_disk_list_of_type(libxl__gc *gc, for (; pdisk < pdisk_end; pdisk++, dir++) { const char *p; p = libxl__sprintf(gc, "%s/%s", be_path, *dir); - libxl__device_disk_from_xs_be(gc, p, pdisk); + libxl__device_disk_from_xs_be(gc, t, p, pdisk); pdisk->backend_domid = 0; } } @@ -1549,13 +1551,13 @@ libxl_device_disk *libxl_device_disk_list(libxl_ctx *ctx, uint32_t domid, int *n *num = 0; - rc = libxl__append_disk_list_of_type(gc, domid, "vbd", &disks, num); + rc = libxl__append_disk_list_of_type(gc, domid, XBT_NULL, "vbd", &disks, num); if (rc) goto out_err; - rc = libxl__append_disk_list_of_type(gc, domid, "tap", &disks, num); + rc = libxl__append_disk_list_of_type(gc, domid, XBT_NULL, "tap", &disks, num); if (rc) goto out_err; - rc = libxl__append_disk_list_of_type(gc, domid, "qdisk", &disks, num); + rc = libxl__append_disk_list_of_type(gc, domid, XBT_NULL, "qdisk", &disks, num); if (rc) goto out_err; GC_FREE; -- 1.7.2.5
Stefano Stabellini
2012-Mar-27 13:59 UTC
[PATCH 4/6] libxl: introduce libxl__device_disk_add_t
Introduce libxl__device_disk_add_t that takes an additional xs_transaction_t paramter. Implement libxl_device_disk_add using libxl__device_disk_add_t. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> --- tools/libxl/libxl.c | 19 ++++++++++++++----- 1 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c index ce5e7be..fe63fd5 100644 --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -1278,14 +1278,16 @@ static int libxl__device_from_disk(libxl__gc *gc, uint32_t domid, return 0; } -int libxl_device_disk_add(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk) + +static int libxl__device_disk_add_t(libxl__gc *gc, uint32_t domid, xs_transaction_t t, + libxl_device_disk *disk) { - GC_INIT(ctx); flexarray_t *front; flexarray_t *back; char *dev; libxl__device device; int major, minor, rc; + libxl_ctx *ctx = gc->owner; rc = libxl__device_disk_setdefault(gc, disk); if (rc) goto out; @@ -1381,9 +1383,9 @@ int libxl_device_disk_add(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *dis flexarray_append(front, "device-type"); flexarray_append(front, disk->is_cdrom ? "cdrom" : "disk"); - libxl__device_generic_add(gc, &device, - libxl__xs_kvs_of_flexarray(gc, back, back->count), - libxl__xs_kvs_of_flexarray(gc, front, front->count)); + libxl__device_generic_add_t(gc, t, &device, + libxl__xs_kvs_of_flexarray(gc, back, back->count), + libxl__xs_kvs_of_flexarray(gc, front, front->count)); rc = 0; @@ -1391,6 +1393,13 @@ out_free: flexarray_free(back); flexarray_free(front); out: + return rc; +} + +int libxl_device_disk_add(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk) +{ + GC_INIT(ctx); + int rc = libxl__device_disk_add_t(gc, domid, XBT_NULL, disk); GC_FREE; return rc; } -- 1.7.2.5
Introduce libxl__alloc_vdev: find a spare virtual block device in the domain passed as argument. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> --- tools/libxl/libxl.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 60 insertions(+), 0 deletions(-) diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c index fe63fd5..fa7898a 100644 --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -1228,6 +1228,13 @@ int libxl_vncviewer_exec(libxl_ctx *ctx, uint32_t domid, int autopass) /******************************************************************************/ +static int libxl__append_disk_list_of_type(libxl__gc *gc, + uint32_t domid, + xs_transaction_t t, + const char *type, + libxl_device_disk **disks, + int *ndisks); + int libxl__device_disk_setdefault(libxl__gc *gc, libxl_device_disk *disk) { int rc; @@ -1278,6 +1285,59 @@ static int libxl__device_from_disk(libxl__gc *gc, uint32_t domid, return 0; } +static int libxl__vdev_to_index(libxl__gc *gc, char *vdev) +{ + if (vdev == NULL) + return 0; + if (strlen(vdev) > 4 || strlen(vdev) < 4) + return 0; + /* assume xvdz is the last one available */ + if (vdev[3] >= ''z'') + return -1; + return vdev[3] - ''a''; +} + +static char* libxl__index_to_vdev(libxl__gc *gc, int idx) +{ + if (idx < 0) + return NULL; + return libxl__sprintf(gc, "xvd%c", ''a'' + idx); +} + +static char * libxl__alloc_vdev(libxl__gc *gc, uint32_t domid, xs_transaction_t t, + libxl_device_disk *disk) +{ + int rc = 0; + libxl_device_disk *disks = NULL; + int num = 0, idx = -1, max_idx = -1, i = 0; + + rc = libxl__append_disk_list_of_type(gc, domid, t, "vbd", &disks, &num); + if (rc) goto out; + + rc = libxl__append_disk_list_of_type(gc, domid, t, "tap", &disks, &num); + if (rc) goto out; + + rc = libxl__append_disk_list_of_type(gc, domid, t, "qdisk", &disks, &num); + if (rc) goto out; + + for (i = 0; i < num; i++) { + idx = libxl__vdev_to_index(gc, disks[i].vdev); + if (idx < 0) { + max_idx = -1; + goto out; + } + if (idx > max_idx) + max_idx = idx; + } + max_idx++; +out: + while (disks && num) { + num--; + libxl_device_disk_dispose(&disks[num]); + } + free(disks); + return libxl__index_to_vdev(gc, max_idx); +} static int libxl__device_disk_add_t(libxl__gc *gc, uint32_t domid, xs_transaction_t t, libxl_device_disk *disk) -- 1.7.2.5
Stefano Stabellini
2012-Mar-27 13:59 UTC
[PATCH 6/6] xl/libxl: implement QDISK libxl_device_disk_local_attach
- Spawn a QEMU instance at boot time to handle disk local attach requests. - Implement libxl_device_disk_local_attach for QDISKs in terms of a regular disk attach with the frontend and backend running in the same domain. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> --- tools/hotplug/Linux/init.d/sysconfig.xencommons | 4 ++ tools/hotplug/Linux/init.d/xencommons | 4 ++ tools/libxl/libxl.c | 46 +++++++++++++++++----- 3 files changed, 43 insertions(+), 11 deletions(-) diff --git a/tools/hotplug/Linux/init.d/sysconfig.xencommons b/tools/hotplug/Linux/init.d/sysconfig.xencommons index 6543204..28cf2eb 100644 --- a/tools/hotplug/Linux/init.d/sysconfig.xencommons +++ b/tools/hotplug/Linux/init.d/sysconfig.xencommons @@ -12,3 +12,7 @@ # Running xenbackendd in debug mode #XENBACKENDD_DEBUG=[yes|on|1] + +# qemu path and log file +#QEMU_XEN=/usr/lib/xen/bin/qemu-system-i386 +#QEMU_XEN_LOG=/var/log/xen/qemu-dm-dom0.log diff --git a/tools/hotplug/Linux/init.d/xencommons b/tools/hotplug/Linux/init.d/xencommons index 6c72dd8..f328492 100644 --- a/tools/hotplug/Linux/init.d/xencommons +++ b/tools/hotplug/Linux/init.d/xencommons @@ -103,6 +103,10 @@ do_start () { xenconsoled --pid-file=$XENCONSOLED_PIDFILE $XENCONSOLED_ARGS test -z "$XENBACKENDD_DEBUG" || XENBACKENDD_ARGS="-d" test "`uname`" != "NetBSD" || xenbackendd $XENBACKENDD_ARGS + echo Starting QEMU as disk backend for dom0 + test -z "$QEMU_XEN" && QEMU_XEN=/usr/lib/xen/bin/qemu-system-i386 + test -z "$QEMU_XEN_LOG" && QEMU_XEN_LOG=/var/log/xen/qemu-dm-dom0.log + $QEMU_XEN -xen-domid 0 -xen-attach -name dom0 -nographic -M xenpv -daemonize &>$QEMU_XEN_LOG } do_stop () { echo Stopping xenconsoled diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c index fa7898a..4c89a56 100644 --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -1767,13 +1767,30 @@ char * libxl_device_disk_local_attach(libxl_ctx *ctx, const libxl_device_disk *d break; case LIBXL_DISK_BACKEND_QDISK: if (tmpdisk->format != LIBXL_DISK_FORMAT_RAW) { - LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "cannot locally" - " attach a qdisk image if the format is not raw"); - break; + xs_transaction_t t; +retry_transaction: + t = xs_transaction_start(ctx->xsh); + /* use 0 as the domid of the toolstack domain for now */ + tmpdisk->vdev = libxl__alloc_vdev(gc, 0, t, tmpdisk); + if (tmpdisk->vdev == NULL) { + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "libxl__alloc_vdev failed\n"); + xs_transaction_end(ctx->xsh, t, 1); + goto out; + } + if (libxl__device_disk_add_t(gc, 0, t, tmpdisk)) { + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "libxl_device_disk_add failed\n"); + xs_transaction_end(ctx->xsh, t, 1); + goto out; + } + if (!xs_transaction_end(ctx->xsh, t, 0)) + if (errno == EAGAIN) + goto retry_transaction; + dev = libxl__sprintf(gc, "/dev/%s", tmpdisk->vdev); + } else { + dev = tmpdisk->pdev_path; } LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "locally attaching qdisk %s\n", - disk->pdev_path); - dev = tmpdisk->pdev_path; + dev); break; default: LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "unrecognized disk backend " @@ -1792,12 +1809,19 @@ char * libxl_device_disk_local_attach(libxl_ctx *ctx, const libxl_device_disk *d int libxl_device_disk_local_detach(libxl_ctx *ctx, libxl_device_disk *disk) { - /* Nothing to do for PHYSTYPE_PHY. */ - - /* - * For other device types assume that the blktap2 process is - * needed by the soon to be started domain and do nothing. - */ + switch (disk->backend) { + case LIBXL_DISK_BACKEND_QDISK: + if (disk->format != LIBXL_DISK_FORMAT_RAW) + return libxl_device_disk_destroy(ctx, 0, disk); + break; + default: + /* + * Nothing to do for PHYSTYPE_PHY. + * For other device types assume that the blktap2 process is + * needed by the soon to be started domain and do nothing. + */ + break; + } return 0; } -- 1.7.2.5
Ian Campbell
2012-Mar-27 14:04 UTC
Re: [PATCH 2/6] libxl: introduce libxl__device_generic_add_t
On Tue, 2012-03-27 at 14:59 +0100, Stefano Stabellini wrote:> Introduce libxl__device_generic_add_t that takes an xs_transaction_t as > parameter. > Use libxl__device_generic_add_t to implement libxl__device_generic_add.I think it would be better to just change the existing API to add a transaction.> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > --- > tools/libxl/libxl_device.c | 36 ++++++++++++++++++++++++++---------- > tools/libxl/libxl_internal.h | 2 ++ > 2 files changed, 28 insertions(+), 10 deletions(-) > > diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c > index c2880e0..c6f9044 100644 > --- a/tools/libxl/libxl_device.c > +++ b/tools/libxl/libxl_device.c > @@ -61,12 +61,37 @@ int libxl__parse_backend_path(libxl__gc *gc, > int libxl__device_generic_add(libxl__gc *gc, libxl__device *device, > char **bents, char **fents) > { > + int rc = 0; > + xs_transaction_t t; > + libxl_ctx *ctx = libxl__gc_owner(gc); > + > +retry_transaction: > + t = xs_transaction_start(ctx->xsh); > + > + rc = libxl__device_generic_add_t(gc, t, device, bents, fents); > + > + if (!xs_transaction_end(ctx->xsh, t, 0)) { > + if (errno == EAGAIN) > + goto retry_transaction; > + else > + LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xs transaction failed"); > + } > + return rc; > +} > + > +int libxl__device_generic_add_t(libxl__gc *gc, xs_transaction_t t, > + libxl__device *device, char **bents, char **fents) > +{ > libxl_ctx *ctx = libxl__gc_owner(gc); > char *frontend_path, *backend_path; > - xs_transaction_t t; > struct xs_permissions frontend_perms[2]; > struct xs_permissions backend_perms[2]; > > + if (t == XBT_NULL) > + /* we need a valid transaction: call libxl__device_generic_add > + * to create one for us */ > + return libxl__device_generic_add(gc, device, bents, fents);This function will in turn start a new transaction and then call us straight back again, which is all a bit of a roundabout way to do things. Can''t this just be a t = xs_transaction_start?> + > frontend_path = libxl__device_frontend_path(gc, device); > backend_path = libxl__device_backend_path(gc, device); > > @@ -80,8 +105,6 @@ int libxl__device_generic_add(libxl__gc *gc, libxl__device *device, > backend_perms[1].id = device->domid; > backend_perms[1].perms = XS_PERM_READ; > > -retry_transaction: > - t = xs_transaction_start(ctx->xsh); > /* FIXME: read frontend_path and check state before removing stuff */ > > if (fents) { > @@ -100,13 +123,6 @@ retry_transaction: > libxl__xs_writev(gc, t, backend_path, bents); > } > > - if (!xs_transaction_end(ctx->xsh, t, 0)) { > - if (errno == EAGAIN) > - goto retry_transaction; > - else > - LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xs transaction failed"); > - } > - > return 0; > } > > diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h > index e0a1070..f97213f 100644 > --- a/tools/libxl/libxl_internal.h > +++ b/tools/libxl/libxl_internal.h > @@ -667,6 +667,8 @@ _hidden int libxl__device_console_add(libxl__gc *gc, uint32_t domid, > > _hidden int libxl__device_generic_add(libxl__gc *gc, libxl__device *device, > char **bents, char **fents); > +_hidden int libxl__device_generic_add_t(libxl__gc *gc, xs_transaction_t t, > + libxl__device *device, char **bents, char **fents); > _hidden char *libxl__device_backend_path(libxl__gc *gc, libxl__device *device); > _hidden char *libxl__device_frontend_path(libxl__gc *gc, libxl__device *device); > _hidden int libxl__parse_backend_path(libxl__gc *gc, const char *path,
Ian Campbell
2012-Mar-27 14:07 UTC
Re: [PATCH 1/6] libxl: libxl_device_disk_local_attach return a new libxl_device_disk
On Tue, 2012-03-27 at 14:59 +0100, Stefano Stabellini wrote:> The caller passes an additional pre-allocated libxl_device_disk struct > to libxl_device_disk_local_attach, that it is going to fill it with > informations about the new locally attached disk. > The new libxl_device_disk should be passed to > libxl_device_disk_local_detach afterwards. > > This patch is just about adding the new parameter and updating the > caller. > > Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > --- > tools/libxl/libxl.c | 46 ++++++++++++++++++++++++++-------------- > tools/libxl/libxl.h | 3 +- > tools/libxl/libxl_bootloader.c | 9 +++++-- > 3 files changed, 38 insertions(+), 20 deletions(-) > > diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c > index 5344366..d33fbdf 100644 > --- a/tools/libxl/libxl.c > +++ b/tools/libxl/libxl.c > @@ -1644,63 +1644,77 @@ out: > return ret; > } > > -char * libxl_device_disk_local_attach(libxl_ctx *ctx, libxl_device_disk *disk) > +char * libxl_device_disk_local_attach(libxl_ctx *ctx, const libxl_device_disk *disk, > + libxl_device_disk **new_disk)I think this would be better as a caller allocated libxl_device_disk *. That''s much simpler since the caller can just put it on the stack or in an existing datastructure (I expect a suitable one comes into being with IanJ''s async patch series). Do we need this as a public facing API? Does anything use it? I think it should be an implementation detail of libxl_device_disk_add where domid == SELF? That would also allow you to use the gc here.> { > GC_INIT(ctx); > char *dev = NULL; > char *ret = NULL; > int rc; > - > - rc = libxl__device_disk_setdefault(gc, disk); > + libxl_device_disk *tmpdisk = (libxl_device_disk *) > + malloc(sizeof(libxl_device_disk)); > + if (tmpdisk == NULL) goto out; > + > + *new_disk = tmpdisk; > + memcpy(tmpdisk, disk, sizeof(libxl_device_disk)); > + if (tmpdisk->pdev_path != NULL) > + tmpdisk->pdev_path = strdup(tmpdisk->pdev_path); > + if (tmpdisk->script != NULL) > + tmpdisk->script = strdup(tmpdisk->script); > + tmpdisk->vdev = NULL; > + > + rc = libxl__device_disk_setdefault(gc, tmpdisk); > if (rc) goto out; > > - switch (disk->backend) { > + switch (tmpdisk->backend) { > case LIBXL_DISK_BACKEND_PHY: > LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "locally attaching PHY disk %s", > - disk->pdev_path); > - dev = disk->pdev_path; > + tmpdisk->pdev_path); > + dev = tmpdisk->pdev_path; > break; > case LIBXL_DISK_BACKEND_TAP: > - switch (disk->format) { > + switch (tmpdisk->format) { > case LIBXL_DISK_FORMAT_RAW: > /* optimise away the early tapdisk attach in this case */ > LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "locally attaching" > " tap disk %s directly (ie without using blktap)", > - disk->pdev_path); > - dev = disk->pdev_path; > + tmpdisk->pdev_path); > + dev = tmpdisk->pdev_path; > break; > case LIBXL_DISK_FORMAT_VHD: > - dev = libxl__blktap_devpath(gc, disk->pdev_path, > - disk->format); > + dev = libxl__blktap_devpath(gc, tmpdisk->pdev_path, > + tmpdisk->format); > break; > case LIBXL_DISK_FORMAT_QCOW: > case LIBXL_DISK_FORMAT_QCOW2: > abort(); /* prevented by libxl__device_disk_set_backend */ > default: > LIBXL__LOG(ctx, LIBXL__LOG_ERROR, > - "unrecognized disk format: %d", disk->format); > + "unrecognized disk format: %d", tmpdisk->format); > break; > } > break; > case LIBXL_DISK_BACKEND_QDISK: > - if (disk->format != LIBXL_DISK_FORMAT_RAW) { > + if (tmpdisk->format != LIBXL_DISK_FORMAT_RAW) { > LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "cannot locally" > " attach a qdisk image if the format is not raw"); > break; > } > LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "locally attaching qdisk %s\n", > disk->pdev_path); > - dev = disk->pdev_path; > + dev = tmpdisk->pdev_path; > break; > default: > LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "unrecognized disk backend " > - "type: %d", disk->backend); > + "type: %d", tmpdisk->backend); > break; > } > > out: > - if (dev != NULL) > + if (dev != NULL) { > ret = strdup(dev); > + tmpdisk->vdev = strdup(tmpdisk->vdev); > + } > GC_FREE; > return ret; > } > diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h > index 6b69030..d297b04 100644 > --- a/tools/libxl/libxl.h > +++ b/tools/libxl/libxl.h > @@ -540,7 +540,8 @@ int libxl_cdrom_insert(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk); > * Make a disk available in this (the control) domain. Returns path to > * a device. > */ > -char * libxl_device_disk_local_attach(libxl_ctx *ctx, libxl_device_disk *disk); > +char * libxl_device_disk_local_attach(libxl_ctx *ctx, const libxl_device_disk *disk, > + libxl_device_disk **new_disk); > int libxl_device_disk_local_detach(libxl_ctx *ctx, libxl_device_disk *disk); > > /* Network Interfaces */ > diff --git a/tools/libxl/libxl_bootloader.c b/tools/libxl/libxl_bootloader.c > index 2774062..0abc31f 100644 > --- a/tools/libxl/libxl_bootloader.c > +++ b/tools/libxl/libxl_bootloader.c > @@ -330,6 +330,7 @@ int libxl_run_bootloader(libxl_ctx *ctx, > char *fifo = NULL; > char *diskpath = NULL; > char **args = NULL; > + libxl_device_disk *tmpdisk = NULL; > > char tempdir_template[] = "/var/run/libxl/bl.XXXXXX"; > char *tempdir; > @@ -386,7 +387,7 @@ int libxl_run_bootloader(libxl_ctx *ctx, > goto out_close; > } > > - diskpath = libxl_device_disk_local_attach(ctx, disk); > + diskpath = libxl_device_disk_local_attach(ctx, disk, &tmpdisk); > if (!diskpath) { > goto out_close; > } > @@ -452,9 +453,11 @@ int libxl_run_bootloader(libxl_ctx *ctx, > > rc = 0; > out_close: > - if (diskpath) { > - libxl_device_disk_local_detach(ctx, disk); > + if (diskpath) > free(diskpath); > + if (tmpdisk) { > + libxl_device_disk_local_detach(ctx, tmpdisk); > + free(tmpdisk); > } > if (fifo_fd > -1) > close(fifo_fd);
Ian Campbell
2012-Mar-27 14:17 UTC
Re: [PATCH 3/6] libxl: add an xs_transaction_t parameter to two libxl functions
On Tue, 2012-03-27 at 14:59 +0100, Stefano Stabellini wrote:> Add an additional xs_transaction_t parameter to > libxl__device_disk_from_xs_be and libxl__append_disk_list_of_type. > > Update all the callers. > > Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>Acked-by: Ian Campbell <ian.campbell@citrix.com>> --- > tools/libxl/libxl.c | 26 ++++++++++++++------------ > 1 files changed, 14 insertions(+), 12 deletions(-) > > diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c > index d33fbdf..ce5e7be 100644 > --- a/tools/libxl/libxl.c > +++ b/tools/libxl/libxl.c > @@ -1432,6 +1432,7 @@ out: > } > > static void libxl__device_disk_from_xs_be(libxl__gc *gc, > + xs_transaction_t t, > const char *be_path, > libxl_device_disk *disk) > { > @@ -1441,7 +1442,7 @@ static void libxl__device_disk_from_xs_be(libxl__gc *gc, > > libxl_device_disk_init(disk); > > - tmp = xs_read(ctx->xsh, XBT_NULL, > + tmp = xs_read(ctx->xsh, t, > libxl__sprintf(gc, "%s/params", be_path), &len); > if (tmp && strchr(tmp, '':'')) { > disk->pdev_path = strdup(strchr(tmp, '':'') + 1); > @@ -1450,12 +1451,12 @@ static void libxl__device_disk_from_xs_be(libxl__gc *gc, > disk->pdev_path = tmp; > } > libxl_string_to_backend(ctx, > - libxl__xs_read(gc, XBT_NULL, > + libxl__xs_read(gc, t, > libxl__sprintf(gc, "%s/type", be_path)), > &(disk->backend)); > - disk->vdev = xs_read(ctx->xsh, XBT_NULL, > + disk->vdev = xs_read(ctx->xsh, t, > libxl__sprintf(gc, "%s/dev", be_path), &len); > - tmp = libxl__xs_read(gc, XBT_NULL, libxl__sprintf > + tmp = libxl__xs_read(gc, t, libxl__sprintf > (gc, "%s/removable", be_path)); > > if (tmp) > @@ -1463,13 +1464,13 @@ static void libxl__device_disk_from_xs_be(libxl__gc *gc, > else > disk->removable = 0; > > - tmp = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/mode", be_path)); > + tmp = libxl__xs_read(gc, t, libxl__sprintf(gc, "%s/mode", be_path)); > if (!strcmp(tmp, "w")) > disk->readwrite = 1; > else > disk->readwrite = 0; > > - tmp = libxl__xs_read(gc, XBT_NULL, > + tmp = libxl__xs_read(gc, t, > libxl__sprintf(gc, "%s/device-type", be_path)); > disk->is_cdrom = !strcmp(tmp, "cdrom"); > > @@ -1499,7 +1500,7 @@ int libxl_vdev_to_device_disk(libxl_ctx *ctx, uint32_t domid, > if (!path) > goto out; > > - libxl__device_disk_from_xs_be(gc, path, disk); > + libxl__device_disk_from_xs_be(gc, XBT_NULL, path, disk); > > rc = 0; > out: > @@ -1510,6 +1511,7 @@ out: > > static int libxl__append_disk_list_of_type(libxl__gc *gc, > uint32_t domid, > + xs_transaction_t t, > const char *type, > libxl_device_disk **disks, > int *ndisks) > @@ -1521,7 +1523,7 @@ static int libxl__append_disk_list_of_type(libxl__gc *gc, > > be_path = libxl__sprintf(gc, "%s/backend/%s/%d", > libxl__xs_get_dompath(gc, 0), type, domid); > - dir = libxl__xs_directory(gc, XBT_NULL, be_path, &n); > + dir = libxl__xs_directory(gc, t, be_path, &n); > if (dir) { > libxl_device_disk *tmp; > tmp = realloc(*disks, sizeof (libxl_device_disk) * (*ndisks + n)); > @@ -1534,7 +1536,7 @@ static int libxl__append_disk_list_of_type(libxl__gc *gc, > for (; pdisk < pdisk_end; pdisk++, dir++) { > const char *p; > p = libxl__sprintf(gc, "%s/%s", be_path, *dir); > - libxl__device_disk_from_xs_be(gc, p, pdisk); > + libxl__device_disk_from_xs_be(gc, t, p, pdisk); > pdisk->backend_domid = 0; > } > } > @@ -1549,13 +1551,13 @@ libxl_device_disk *libxl_device_disk_list(libxl_ctx *ctx, uint32_t domid, int *n > > *num = 0; > > - rc = libxl__append_disk_list_of_type(gc, domid, "vbd", &disks, num); > + rc = libxl__append_disk_list_of_type(gc, domid, XBT_NULL, "vbd", &disks, num); > if (rc) goto out_err; > > - rc = libxl__append_disk_list_of_type(gc, domid, "tap", &disks, num); > + rc = libxl__append_disk_list_of_type(gc, domid, XBT_NULL, "tap", &disks, num); > if (rc) goto out_err; > > - rc = libxl__append_disk_list_of_type(gc, domid, "qdisk", &disks, num); > + rc = libxl__append_disk_list_of_type(gc, domid, XBT_NULL, "qdisk", &disks, num); > if (rc) goto out_err; > > GC_FREE;
On Tue, 2012-03-27 at 14:59 +0100, Stefano Stabellini wrote:> Introduce libxl__alloc_vdev: find a spare virtual block device in the > domain passed as argument. > > Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > --- > tools/libxl/libxl.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 files changed, 60 insertions(+), 0 deletions(-) > > diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c > index fe63fd5..fa7898a 100644 > --- a/tools/libxl/libxl.c > +++ b/tools/libxl/libxl.c > @@ -1228,6 +1228,13 @@ int libxl_vncviewer_exec(libxl_ctx *ctx, uint32_t domid, int autopass) > > /******************************************************************************/ > > +static int libxl__append_disk_list_of_type(libxl__gc *gc, > + uint32_t domid, > + xs_transaction_t t, > + const char *type, > + libxl_device_disk **disks, > + int *ndisks); > + > int libxl__device_disk_setdefault(libxl__gc *gc, libxl_device_disk *disk) > { > int rc; > @@ -1278,6 +1285,59 @@ static int libxl__device_from_disk(libxl__gc *gc, uint32_t domid, > return 0; > } > > +static int libxl__vdev_to_index(libxl__gc *gc, char *vdev) > +{ > + if (vdev == NULL) > + return 0; > + if (strlen(vdev) > 4 || strlen(vdev) < 4) > + return 0; > + /* assume xvdz is the last one available */ > + if (vdev[3] >= ''z'') > + return -1; > + return vdev[3] - ''a''; > +} > + > +static char* libxl__index_to_vdev(libxl__gc *gc, int idx) > +{ > + if (idx < 0) > + return NULL; > + return libxl__sprintf(gc, "xvd%c", ''a'' + idx); > +} > + > +static char * libxl__alloc_vdev(libxl__gc *gc, uint32_t domid, xs_transaction_t t, > + libxl_device_disk *disk) > +{ > + int rc = 0; > + libxl_device_disk *disks = NULL; > + int num = 0, idx = -1, max_idx = -1, i = 0; > + > + rc = libxl__append_disk_list_of_type(gc, domid, t, "vbd", &disks, &num); > + if (rc) goto out; > + > + rc = libxl__append_disk_list_of_type(gc, domid, t, "tap", &disks, &num); > + if (rc) goto out; > + > + rc = libxl__append_disk_list_of_type(gc, domid, t, "qdisk", &disks, &num); > + if (rc) goto out;This is basically an open-coded version of libxl_disk_list, isn''t it? For this use though wouldn''t it be as easy to simply iterate over idx checking if a frontend dir exists?> + > + for (i = 0; i < num; i++) { > + idx = libxl__vdev_to_index(gc, disks[i].vdev); > + if (idx < 0) { > + max_idx = -1; > + goto out; > + } > + if (idx > max_idx) > + max_idx = idx; > + } > + max_idx++; > +out: > + while (disks && num) { > + num--; > + libxl_device_disk_dispose(&disks[num]); > + } > + free(disks); > + return libxl__index_to_vdev(gc, max_idx); > +} > > static int libxl__device_disk_add_t(libxl__gc *gc, uint32_t domid, xs_transaction_t t, > libxl_device_disk *disk)
Ian Jackson
2012-Mar-27 16:21 UTC
Re: [PATCH 1/6] libxl: libxl_device_disk_local_attach return a new libxl_device_disk
Stefano Stabellini writes ("[PATCH 1/6] libxl: libxl_device_disk_local_attach return a new libxl_device_disk"):> -char * libxl_device_disk_local_attach(libxl_ctx *ctx, libxl_device_disk *disk) > +char * libxl_device_disk_local_attach(libxl_ctx *ctx, const libxl_device_disk *disk, > + libxl_device_disk **new_disk)Long line. Shouldn''t we make this a libxl-internal function ? If we do that we can allocate the new libxl_device_disk from the gc.> + if (tmpdisk->pdev_path != NULL) > + tmpdisk->pdev_path = strdup(tmpdisk->pdev_path); > + if (tmpdisk->script != NULL) > + tmpdisk->script = strdup(tmpdisk->script); > + tmpdisk->vdev = NULL;Perhaps you should put my "malloc always fails" patch on the bottom of your series ? That means you can write tmpdisk->pdev_path = libxl__strdup(0, tmpdisk->pdev_path); which will crash more sanely if malloc fails.> - switch (disk->backend) { > + switch (tmpdisk->backend) {If you renamed the formal parameter rather than introducing a new variable this would all be a lot easier to read (both the patch, and the resulting code).> out: > - if (dev != NULL) > + if (dev != NULL) { > ret = strdup(dev); > + tmpdisk->vdev = strdup(tmpdisk->vdev); > + } > GC_FREE; > return ret;This leaks tmpdisk if the function fails. Or, alternatively, you expect the caller to always free *new_disk even if _attach fails, which is very counterintuitive. This would be solved if you made this an internal function and used the gc. Ian.
Ian Jackson
2012-Mar-27 16:50 UTC
Re: [PATCH 2/6] libxl: introduce libxl__device_generic_add_t
Stefano Stabellini writes ("[PATCH 2/6] libxl: introduce libxl__device_generic_add_t"):> Introduce libxl__device_generic_add_t that takes an xs_transaction_t as > parameter. > Use libxl__device_generic_add_t to implement libxl__device_generic_add.Wait, what ? When I suggested these wrappers I thought we were talking about externally-facing functions which aren''t allowed to have libxenstore types in their signatures. For internal functions why not just add the parameter always and allow the callers to pass 0 ?> +retry_transaction: > + t = xs_transaction_start(ctx->xsh); > + > + rc = libxl__device_generic_add_t(gc, t, device, bents, fents); > + > + if (!xs_transaction_end(ctx->xsh, t, 0)) { > + if (errno == EAGAIN) > + goto retry_transaction; > + else > + LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xs transaction failed"); > + } > + return rc;Can we avoid introducing more of these transaction loops using goto ? How about we invent some helper functions: int libxl__xs_transaction_init(libxl__gc *gc, xs_transaction_t **my_out, xs_transaction_t *from_caller) { if (from_caller) { *my_out = from_caller; return 0; } *my_out = get new transaction or log error, set rc; return rc; } int libxl__xs_transaction_done(libxl__gc *gc, xs_transaction_t **my_io) { xs_transaction_t *from_caller) { if (from_caller) { *my_out = 0; return 0; } r = xs_transaction_end blah; if (!r) { *my_io = 0; return 0; } if (errno != EAGAIN) { log; report error; } return libxl__xs_transaction_get(gc, my_io, from_caller); } void libxl__xs_transaction_cleanup(libxl__gc *gc, xs_transaction_t **my) { xs_transaction_end(,,1) plus some error logging; } #define LOOP_WITH_XS_TRANSACTION(my_t, from_caller) \ for (({ \ (rc) = libxl__xs_transaction_get((gc),(&my_t),(from_caller)) \ if ((rc)) goto error_out; \ }); \ (my_t); \ ({ \ rc = libxl__xs_transaction_done(gc,&(my_t),(from_caller)); \ if (rc) goto out; \ }) Then this:> +retry_transaction: > + t = xs_transaction_start(ctx->xsh); > + > + STUFF > + > + if (!xs_transaction_end(ctx->xsh, t, 0)) { > + if (errno == EAGAIN) > + goto retry_transaction; > + else > + LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xs transaction failed"); > + } > + return rc;Turns into: LOOP_WITH_XS_TRANSACTION(t, caller_t) { STUFF } ... error_out: libxl__xs_transaction_cleanup(gc,t); And as a bonus you can declare the function to take an xs_transaction_t so that it can be called either from within such a loop, or standalone. If we were feeling really advanced we could do away with the cleanup if we put the transaction in the gc. Ian.
Ian Jackson
2012-Mar-27 16:50 UTC
Re: [PATCH 3/6] libxl: add an xs_transaction_t parameter to two libxl functions
Stefano Stabellini writes ("[PATCH 3/6] libxl: add an xs_transaction_t parameter to two libxl functions"):> Add an additional xs_transaction_t parameter to > libxl__device_disk_from_xs_be and libxl__append_disk_list_of_type.Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Stefano Stabellini writes ("[PATCH 5/6] libxl: introduce libxl__alloc_vdev"):> Introduce libxl__alloc_vdev: find a spare virtual block device in the > domain passed as argument....> +static int libxl__vdev_to_index(libxl__gc *gc, char *vdev) > +{ > + if (vdev == NULL) > + return 0; > + if (strlen(vdev) > 4 || strlen(vdev) < 4) > + return 0; > + /* assume xvdz is the last one available */This is a broken half-reimplementation of libxl__device_disk_dev_number.> + free(disks); > + return libxl__index_to_vdev(gc, max_idx);And you don''t need this function because you can use the disk number directly (converted to a string of decimal digits) as the vdev. Of course you can''t then using the resulting vdev as a pathname in /dev/ but that''s nonportable anyway; different guest OSs (and here we are playing the role of the guest) may have different conventions. You need a separate function to convert vdev numbers (as returned from _disk_dev_number) to pathnames in /dev.> + for (i = 0; i < num; i++) { > + idx = libxl__vdev_to_index(gc, disks[i].vdev);Furthermore, for the purpose for which you''re using this, you should not start at zero (xvda). You should start at at least 26 (xvdA) and possibly later. That way you are less likely to clash with other statically-allocated vbds. Ian.
Ian Jackson
2012-Mar-27 17:20 UTC
Re: [PATCH 6/6] xl/libxl: implement QDISK libxl_device_disk_local_attach
Stefano Stabellini writes ("[PATCH 6/6] xl/libxl: implement QDISK libxl_device_disk_local_attach"):> - Spawn a QEMU instance at boot time to handle disk local attach > requests.This is a bit strange. Why does this need to be a single daemon ? Can''t we have one qemu per disk ?> - Implement libxl_device_disk_local_attach for QDISKs in terms of a > regular disk attach with the frontend and backend running in the same > domain....> + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "libxl__alloc_vdev failed\n");Too many long lines for me to comfortably read I''m afraid. Ian.
Ian Campbell writes ("Re: [PATCH 5/6] libxl: introduce libxl__alloc_vdev"):> For this use though wouldn''t it be as easy to simply iterate over idx > checking if a frontend dir exists?Yes, I agree, and I think that might well be better. Ian.
Stefano Stabellini
2012-Mar-30 10:42 UTC
Re: [PATCH 1/6] libxl: libxl_device_disk_local_attach return a new libxl_device_disk
On Tue, 27 Mar 2012, Ian Campbell wrote:> On Tue, 2012-03-27 at 14:59 +0100, Stefano Stabellini wrote: > > The caller passes an additional pre-allocated libxl_device_disk struct > > to libxl_device_disk_local_attach, that it is going to fill it with > > informations about the new locally attached disk. > > The new libxl_device_disk should be passed to > > libxl_device_disk_local_detach afterwards. > > > > This patch is just about adding the new parameter and updating the > > caller. > > > > Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > > --- > > tools/libxl/libxl.c | 46 ++++++++++++++++++++++++++-------------- > > tools/libxl/libxl.h | 3 +- > > tools/libxl/libxl_bootloader.c | 9 +++++-- > > 3 files changed, 38 insertions(+), 20 deletions(-) > > > > diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c > > index 5344366..d33fbdf 100644 > > --- a/tools/libxl/libxl.c > > +++ b/tools/libxl/libxl.c > > @@ -1644,63 +1644,77 @@ out: > > return ret; > > } > > > > -char * libxl_device_disk_local_attach(libxl_ctx *ctx, libxl_device_disk *disk) > > +char * libxl_device_disk_local_attach(libxl_ctx *ctx, const libxl_device_disk *disk, > > + libxl_device_disk **new_disk) > > I think this would be better as a caller allocated libxl_device_disk *. > That''s much simpler since the caller can just put it on the stack or in > an existing datastructure (I expect a suitable one comes into being with > IanJ''s async patch series). > > Do we need this as a public facing API? > Does anything use it? I think it > should be an implementation detail of libxl_device_disk_add where domid > == SELF? That would also allow you to use the gc here.Yes, you are right, it is better as an internal function. In that case we can just as easily allocate the new disk on the gc because the caller doesn''t need to care about deallocating it. I am not sure about making local_attach an implementation detail of libxl_device_disk, especially considering that it is not yet clear how to get "SELF" right now so I would avoid doing it in this patch.
Stefano Stabellini
2012-Mar-30 10:43 UTC
Re: [PATCH 1/6] libxl: libxl_device_disk_local_attach return a new libxl_device_disk
On Tue, 27 Mar 2012, Ian Jackson wrote:> Stefano Stabellini writes ("[PATCH 1/6] libxl: libxl_device_disk_local_attach return a new libxl_device_disk"): > > -char * libxl_device_disk_local_attach(libxl_ctx *ctx, libxl_device_disk *disk) > > +char * libxl_device_disk_local_attach(libxl_ctx *ctx, const libxl_device_disk *disk, > > + libxl_device_disk **new_disk) > > Long line. > > Shouldn''t we make this a libxl-internal function ? If we do that we > can allocate the new libxl_device_disk from the gc. > > > + if (tmpdisk->pdev_path != NULL) > > + tmpdisk->pdev_path = strdup(tmpdisk->pdev_path); > > + if (tmpdisk->script != NULL) > > + tmpdisk->script = strdup(tmpdisk->script); > > + tmpdisk->vdev = NULL; > > Perhaps you should put my "malloc always fails" patch on the bottom of > your series ? That means you can write > tmpdisk->pdev_path = libxl__strdup(0, tmpdisk->pdev_path); > which will crash more sanely if malloc fails. > > > - switch (disk->backend) { > > + switch (tmpdisk->backend) { > > If you renamed the formal parameter rather than introducing a new > variable this would all be a lot easier to read (both the patch, and > the resulting code). > > > out: > > - if (dev != NULL) > > + if (dev != NULL) { > > ret = strdup(dev); > > + tmpdisk->vdev = strdup(tmpdisk->vdev); > > + } > > GC_FREE; > > return ret; > > This leaks tmpdisk if the function fails. Or, alternatively, you > expect the caller to always free *new_disk even if _attach fails, > which is very counterintuitive. > > This would be solved if you made this an internal function and used > the gc.Right, I''ll do that.
Stefano Stabellini
2012-Mar-30 10:44 UTC
Re: [PATCH 2/6] libxl: introduce libxl__device_generic_add_t
On Tue, 27 Mar 2012, Ian Campbell wrote:> On Tue, 2012-03-27 at 14:59 +0100, Stefano Stabellini wrote: > > Introduce libxl__device_generic_add_t that takes an xs_transaction_t as > > parameter. > > Use libxl__device_generic_add_t to implement libxl__device_generic_add. > > I think it would be better to just change the existing API to add a > transaction.OK> > Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > > --- > > tools/libxl/libxl_device.c | 36 ++++++++++++++++++++++++++---------- > > tools/libxl/libxl_internal.h | 2 ++ > > 2 files changed, 28 insertions(+), 10 deletions(-) > > > > diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c > > index c2880e0..c6f9044 100644 > > --- a/tools/libxl/libxl_device.c > > +++ b/tools/libxl/libxl_device.c > > @@ -61,12 +61,37 @@ int libxl__parse_backend_path(libxl__gc *gc, > > int libxl__device_generic_add(libxl__gc *gc, libxl__device *device, > > char **bents, char **fents) > > { > > + int rc = 0; > > + xs_transaction_t t; > > + libxl_ctx *ctx = libxl__gc_owner(gc); > > + > > +retry_transaction: > > + t = xs_transaction_start(ctx->xsh); > > + > > + rc = libxl__device_generic_add_t(gc, t, device, bents, fents); > > + > > + if (!xs_transaction_end(ctx->xsh, t, 0)) { > > + if (errno == EAGAIN) > > + goto retry_transaction; > > + else > > + LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xs transaction failed"); > > + } > > + return rc; > > +} > > + > > +int libxl__device_generic_add_t(libxl__gc *gc, xs_transaction_t t, > > + libxl__device *device, char **bents, char **fents) > > +{ > > libxl_ctx *ctx = libxl__gc_owner(gc); > > char *frontend_path, *backend_path; > > - xs_transaction_t t; > > struct xs_permissions frontend_perms[2]; > > struct xs_permissions backend_perms[2]; > > > > + if (t == XBT_NULL) > > + /* we need a valid transaction: call libxl__device_generic_add > > + * to create one for us */ > > + return libxl__device_generic_add(gc, device, bents, fents); > > This function will in turn start a new transaction and then call us > straight back again, which is all a bit of a roundabout way to do > things. Can''t this just be a t = xs_transaction_start?this goes away if we just have libxl__device_generic_add with the additional paramter
Stefano Stabellini
2012-Mar-30 10:46 UTC
Re: [PATCH 2/6] libxl: introduce libxl__device_generic_add_t
On Tue, 27 Mar 2012, Ian Jackson wrote:> Stefano Stabellini writes ("[PATCH 2/6] libxl: introduce libxl__device_generic_add_t"): > > Introduce libxl__device_generic_add_t that takes an xs_transaction_t as > > parameter. > > Use libxl__device_generic_add_t to implement libxl__device_generic_add. > > Wait, what ? When I suggested these wrappers I thought we were > talking about externally-facing functions which aren''t allowed to have > libxenstore types in their signatures. > > For internal functions why not just add the parameter always and allow > the callers to pass 0 ?OK, I''ll do that.> > +retry_transaction: > > + t = xs_transaction_start(ctx->xsh); > > + > > + rc = libxl__device_generic_add_t(gc, t, device, bents, fents); > > + > > + if (!xs_transaction_end(ctx->xsh, t, 0)) { > > + if (errno == EAGAIN) > > + goto retry_transaction; > > + else > > + LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xs transaction failed"); > > + } > > + return rc; > > Can we avoid introducing more of these transaction loops using goto ?I am afraid I actually prefer the simple goto scheme than the convoluted code below.> How about we invent some helper functions: > > int libxl__xs_transaction_init(libxl__gc *gc, > xs_transaction_t **my_out, > xs_transaction_t *from_caller) { > if (from_caller) { *my_out = from_caller; return 0; } > > *my_out = get new transaction or log error, set rc; > return rc; > } > > int libxl__xs_transaction_done(libxl__gc *gc, > xs_transaction_t **my_io) { > xs_transaction_t *from_caller) { > if (from_caller) { *my_out = 0; return 0; } > > r = xs_transaction_end blah; > if (!r) { *my_io = 0; return 0; } > if (errno != EAGAIN) { log; report error; } > return libxl__xs_transaction_get(gc, my_io, from_caller); > } > > void libxl__xs_transaction_cleanup(libxl__gc *gc, xs_transaction_t **my) { > xs_transaction_end(,,1) plus some error logging; > } > > #define LOOP_WITH_XS_TRANSACTION(my_t, from_caller) \ > for (({ \ > (rc) = libxl__xs_transaction_get((gc),(&my_t),(from_caller)) \ > if ((rc)) goto error_out; \ > }); \ > (my_t); \ > ({ \ > rc = libxl__xs_transaction_done(gc,&(my_t),(from_caller)); \ > if (rc) goto out; \ > }) > > Then this: > > > +retry_transaction: > > + t = xs_transaction_start(ctx->xsh); > > + > > + STUFF > > + > > + if (!xs_transaction_end(ctx->xsh, t, 0)) { > > + if (errno == EAGAIN) > > + goto retry_transaction; > > + else > > + LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "xs transaction failed"); > > + } > > + return rc; > > Turns into: > > LOOP_WITH_XS_TRANSACTION(t, caller_t) { > STUFF > } > ... > > error_out: > libxl__xs_transaction_cleanup(gc,t); > > And as a bonus you can declare the function to take an > xs_transaction_t so that it can be called either from within such a > loop, or standalone. > > If we were feeling really advanced we could do away with the cleanup > if we put the transaction in the gc. > > > Ian. >
Stefano Stabellini
2012-Mar-30 10:50 UTC
Re: [PATCH 6/6] xl/libxl: implement QDISK libxl_device_disk_local_attach
On Tue, 27 Mar 2012, Ian Jackson wrote:> Stefano Stabellini writes ("[PATCH 6/6] xl/libxl: implement QDISK libxl_device_disk_local_attach"): > > - Spawn a QEMU instance at boot time to handle disk local attach > > requests. > > This is a bit strange. Why does this need to be a single daemon ? > Can''t we have one qemu per disk ?Why is a bit strange? It has always been the case that QEMU PV would take care of all the PV backends for a single domain. Moreover why would you want more QEMUs when you can handle everything you need with just one and a single thread (except the inevitable xenstore thread)?> > - Implement libxl_device_disk_local_attach for QDISKs in terms of a > > regular disk attach with the frontend and backend running in the same > > domain. > ... > > + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "libxl__alloc_vdev failed\n"); > > Too many long lines for me to comfortably read I''m afraid.I''ll fix all the long lines I can find in the series, sorry about that.
Stefano Stabellini
2012-Mar-30 11:43 UTC
Re: [PATCH 5/6] libxl: introduce libxl__alloc_vdev
On Tue, 27 Mar 2012, Ian Campbell wrote:> > +static char * libxl__alloc_vdev(libxl__gc *gc, uint32_t domid, xs_transaction_t t, > > + libxl_device_disk *disk) > > +{ > > + int rc = 0; > > + libxl_device_disk *disks = NULL; > > + int num = 0, idx = -1, max_idx = -1, i = 0; > > + > > + rc = libxl__append_disk_list_of_type(gc, domid, t, "vbd", &disks, &num); > > + if (rc) goto out; > > + > > + rc = libxl__append_disk_list_of_type(gc, domid, t, "tap", &disks, &num); > > + if (rc) goto out; > > + > > + rc = libxl__append_disk_list_of_type(gc, domid, t, "qdisk", &disks, &num); > > + if (rc) goto out; > > This is basically an open-coded version of libxl_disk_list, isn''t it?yes it is similar, but as you can see the "smart" bit is in libxl__append_disk_list_of_type anyway> For this use though wouldn''t it be as easy to simply iterate over idx > checking if a frontend dir exists?We could try one at a time: xvd(a + idx) -> dev_number -> xs_read * 3 this could work, even though we would still need to read 3 possible paths (tap, qdisk, vbd) for each one. It would be more efficient than calling libxl__append_disk_list_of_type three times, but is it actually simpler?
Stefano Stabellini
2012-Mar-30 11:57 UTC
Re: [PATCH 5/6] libxl: introduce libxl__alloc_vdev
On Tue, 27 Mar 2012, Ian Jackson wrote:> Stefano Stabellini writes ("[PATCH 5/6] libxl: introduce libxl__alloc_vdev"): > > Introduce libxl__alloc_vdev: find a spare virtual block device in the > > domain passed as argument. > ... > > +static int libxl__vdev_to_index(libxl__gc *gc, char *vdev) > > +{ > > + if (vdev == NULL) > > + return 0; > > + if (strlen(vdev) > 4 || strlen(vdev) < 4) > > + return 0; > > + /* assume xvdz is the last one available */ > > This is a broken half-reimplementation of > libxl__device_disk_dev_number.No it is not the same thing. This function doesn''t return any meaningful number representing the disk in a unique way, it basically returns the last letter of the disk name as an integer (minus offset).> > + free(disks); > > + return libxl__index_to_vdev(gc, max_idx); > > And you don''t need this function because you can use the disk number > directly (converted to a string of decimal digits) as the vdev.These two functions are not the same as libxl__device_disk_dev_number and friends. They are just helper functions for libxl__alloc_vdev. I could rename them libxl__vdev_helper_idx and libxl__vdev_helper_name to avoid confusion.> Of course you can''t then using the resulting vdev as a pathname in > /dev/ but that''s nonportable anyway; different guest OSs (and here we > are playing the role of the guest) may have different conventions. > You need a separate function to convert vdev numbers (as returned from > _disk_dev_number) to pathnames in /dev.This is a good point and the key issue here. The frontend is actually free to choose whatever name it wants for the device it is going to create from the dev number. We don''t have a proper way to get this device name, the closest thing we have is libxl__device_disk_dev_number that from a vdev returns the dev number. So I think that the best thing we can do is rely on it to find out the device name, either doing: [1] xvd(a + i) -> libxl__device_disk_dev_number -> xs_read or reading the "dev" node under the xenstore backend path, that is generated using the same libxl function (this is what I am doing now). I fail to see why one of these two approaches would be better than the other, but if you think that [1] is the best, I can change my code.> > + for (i = 0; i < num; i++) { > > + idx = libxl__vdev_to_index(gc, disks[i].vdev); > > Furthermore, for the purpose for which you''re using this, you should > not start at zero (xvda). You should start at at least 26 (xvdA) and > possibly later. That way you are less likely to clash with other > statically-allocated vbds.Another interesting observation but I disagree: why should we expect "statically-allocated vbds" below xvdA? How many do you think we are going to find? Why 26? I think it is very arbitrary. We should just make sure that our allocation policy works well and start from 0.
On Fri, 2012-03-30 at 12:43 +0100, Stefano Stabellini wrote:> On Tue, 27 Mar 2012, Ian Campbell wrote: > > > +static char * libxl__alloc_vdev(libxl__gc *gc, uint32_t domid, xs_transaction_t t, > > > + libxl_device_disk *disk) > > > +{ > > > + int rc = 0; > > > + libxl_device_disk *disks = NULL; > > > + int num = 0, idx = -1, max_idx = -1, i = 0; > > > + > > > + rc = libxl__append_disk_list_of_type(gc, domid, t, "vbd", &disks, &num); > > > + if (rc) goto out; > > > + > > > + rc = libxl__append_disk_list_of_type(gc, domid, t, "tap", &disks, &num); > > > + if (rc) goto out; > > > + > > > + rc = libxl__append_disk_list_of_type(gc, domid, t, "qdisk", &disks, &num); > > > + if (rc) goto out; > > > > This is basically an open-coded version of libxl_disk_list, isn''t it? > > yes it is similar, but as you can see the "smart" bit is in > libxl__append_disk_list_of_type anywayIt''s also in the list "vbd", "tap", "qdisk" which now need to be sync''d in more than one place if it changes.> > For this use though wouldn''t it be as easy to simply iterate over idx > > checking if a frontend dir exists? > > We could try one at a time: > > xvd(a + idx) -> dev_number -> xs_read * 3 > > this could work, even though we would still need to read 3 possible > paths (tap, qdisk, vbd) for each one.You can check for the existence of the frontend dir here. I think this is safe since only the content of the dir is(/should be) writeable by the guest, the existence ofd that node is completely controlled by the toolstack. Ian.> It would be more efficient than calling libxl__append_disk_list_of_type > three times, but is it actually simpler?
Stefano Stabellini
2012-Mar-30 12:57 UTC
Re: [PATCH 5/6] libxl: introduce libxl__alloc_vdev
On Fri, 30 Mar 2012, Ian Campbell wrote:> On Fri, 2012-03-30 at 12:43 +0100, Stefano Stabellini wrote: > > On Tue, 27 Mar 2012, Ian Campbell wrote: > > > > +static char * libxl__alloc_vdev(libxl__gc *gc, uint32_t domid, xs_transaction_t t, > > > > + libxl_device_disk *disk) > > > > +{ > > > > + int rc = 0; > > > > + libxl_device_disk *disks = NULL; > > > > + int num = 0, idx = -1, max_idx = -1, i = 0; > > > > + > > > > + rc = libxl__append_disk_list_of_type(gc, domid, t, "vbd", &disks, &num); > > > > + if (rc) goto out; > > > > + > > > > + rc = libxl__append_disk_list_of_type(gc, domid, t, "tap", &disks, &num); > > > > + if (rc) goto out; > > > > + > > > > + rc = libxl__append_disk_list_of_type(gc, domid, t, "qdisk", &disks, &num); > > > > + if (rc) goto out; > > > > > > This is basically an open-coded version of libxl_disk_list, isn''t it? > > > > yes it is similar, but as you can see the "smart" bit is in > > libxl__append_disk_list_of_type anyway > > It''s also in the list "vbd", "tap", "qdisk" which now need to be sync''d > in more than one place if it changes. > > > > For this use though wouldn''t it be as easy to simply iterate over idx > > > checking if a frontend dir exists? > > > > We could try one at a time: > > > > xvd(a + idx) -> dev_number -> xs_read * 3 > > > > this could work, even though we would still need to read 3 possible > > paths (tap, qdisk, vbd) for each one. > > You can check for the existence of the frontend dir here. I think this > is safe since only the content of the dir is(/should be) writeable by > the guest, the existence ofd that node is completely controlled by the > toolstack.I like it, I''ll do that
Ian Jackson
2012-Mar-30 13:47 UTC
Re: [PATCH 6/6] xl/libxl: implement QDISK libxl_device_disk_local_attach
Stefano Stabellini writes ("Re: [PATCH 6/6] xl/libxl: implement QDISK libxl_device_disk_local_attach"):> On Tue, 27 Mar 2012, Ian Jackson wrote: > > Stefano Stabellini writes ("[PATCH 6/6] xl/libxl: implement QDISK libxl_device_disk_local_attach"): > > > - Spawn a QEMU instance at boot time to handle disk local attach > > > requests. > > > > This is a bit strange. Why does this need to be a single daemon ? > > Can''t we have one qemu per disk ? > > Why is a bit strange? It has always been the case that QEMU PV would > take care of all the PV backends for a single domain. Moreover why would > you want more QEMUs when you can handle everything you need with just > one and a single thread (except the inevitable xenstore thread)?Offhand I can think of at least two reasons to prever separate qemus (at least one per domain): Firstly, the performance scalability will be improved if we don''t have to funnel all the IO through a single process. Secondly, it avoids propagating failures of any kind from one domain to another. Thirdly, it will make it easier to do disaggregation later if we feel like it. Ian.
Stefano Stabellini writes ("Re: [PATCH 5/6] libxl: introduce libxl__alloc_vdev"):> On Tue, 27 Mar 2012, Ian Jackson wrote: > > Furthermore, for the purpose for which you''re using this, you should > > not start at zero (xvda). You should start at at least 26 (xvdA) and > > possibly later. That way you are less likely to clash with other > > statically-allocated vbds. > > Another interesting observation but I disagree: why should we expect > "statically-allocated vbds" below xvdA? How many do you think we are > going to find? Why 26? I think it is very arbitrary. > We should just make sure that our allocation policy works well and start > from 0.Your design assumes that the system administrator has not configured, for themselves, any disks to be exposed to dom0 (eg by a driver domain). If they have (eg, run "xl block-attach 0 vdev=xvda ...") then they will expect it to work, and they will expect to be able to control the device name that this appears as in dom0. Therefore the namespace of xvd* names in dom0 is not freely available to us as libxl programmers; we need to avoid trampling all over it. Since we need to be able to allocate some vbds dynamically, the right approach is to decree that some portion of the dom0 vbd space is reserved for static allocations by the administrator, and that the remainder is for dynamic allocations by software which picks a free vbd. Naturally the static space should come first.> > Of course you can''t then using the resulting vdev as a pathname in > > /dev/ but that''s nonportable anyway; different guest OSs (and here we > > are playing the role of the guest) may have different conventions. > > You need a separate function to convert vdev numbers (as returned from > > _disk_dev_number) to pathnames in /dev. > > This is a good point and the key issue here. > > The frontend is actually free to choose whatever name it wants for the > device it is going to create from the dev number.I don''t think that is true. On each individual guest platform, the relationship between vbd numbers (the actual interface between frontend and backend), and whatever that guest has for device names, needs to be statically defined. If we are assuming in this case, as we are, a vaguely unixy guest (which we must do as libxl because we do not support libxl on non-unix guests), we are free to assume that vbds which are presented to dom0[*] appear with names in /dev/. ([*] Strictly, to the domain running this particular libxl code. This might be dom0 or it might be some kind of disaggregated builder/toolstack domain.) We are also free to make an assumption, for each individual dom0 platform which we support, about exactly how these vbd numbers map to device names - at least, we are free to make this assumption for dom0 platforms which make suitable promises. If there are dom0 platforms which do not make suitable promises then for this whole scheme to work these platforms need to provide a way for us to find the /dev/ name of a particular vbd exposed to this guest. In the case of Linux, the vbd number to /dev/ name mapping is reasonably fixed. It needs to remain that way in general because changing the device names inside guests, when the host configuration remains the same, disrupts the operation and administration of the guest. So we can write a function which converts vbd numbers to Linux (pvops) device names. Or if we prefer we can probably go grobbling in sysfs to find the same information but that''s likely to be more painful. But this same code needs to be different on BSD (say) because the BSD frontend driver has a different scheme for assigning names to vbds based on their numbers.> So I think that the best thing we can do is rely on it to find out the > device name, either doing: > > [1] xvd(a + i) -> libxl__device_disk_dev_number -> xs_readThis is not correct, because the device name in the guest is not written to xenstore anywhere. It is private to the guest. (In this case the "guest" is actually dom0, but the point stands.)> or reading the "dev" node under the xenstore backend path, that is > generated using the same libxl function (this is what I am doing now). > I fail to see why one of these two approaches would be better than the > other, but if you think that [1] is the best, I can change my code.Again, you are confusing the virtual name given in the domain config file with the actual device name in the guest''s /dev, which may not be the same.> > This is a broken half-reimplementation of > > libxl__device_disk_dev_number. > > No it is not the same thing. This function doesn''t return any meaningful > number representing the disk in a unique way, it basically returns the last > letter of the disk name as an integer (minus offset).But you haven''t explained why that is correct.> > > + free(disks); > > > + return libxl__index_to_vdev(gc, max_idx); > > > > And you don''t need this function because you can use the disk number > > directly (converted to a string of decimal digits) as the vdev. > > These two functions are not the same as libxl__device_disk_dev_number > and friends. They are just helper functions for libxl__alloc_vdev. > I could rename them libxl__vdev_helper_idx and libxl__vdev_helper_name > to avoid confusion.The code would still be wrong. Ian.
Stefano Stabellini
2012-Mar-30 14:55 UTC
Re: [PATCH 6/6] xl/libxl: implement QDISK libxl_device_disk_local_attach
On Fri, 30 Mar 2012, Ian Jackson wrote:> Stefano Stabellini writes ("Re: [PATCH 6/6] xl/libxl: implement QDISK libxl_device_disk_local_attach"): > > On Tue, 27 Mar 2012, Ian Jackson wrote: > > > Stefano Stabellini writes ("[PATCH 6/6] xl/libxl: implement QDISK libxl_device_disk_local_attach"): > > > > - Spawn a QEMU instance at boot time to handle disk local attach > > > > requests. > > > > > > This is a bit strange. Why does this need to be a single daemon ? > > > Can''t we have one qemu per disk ? > > > > Why is a bit strange? It has always been the case that QEMU PV would > > take care of all the PV backends for a single domain. Moreover why would > > you want more QEMUs when you can handle everything you need with just > > one and a single thread (except the inevitable xenstore thread)? > > Offhand I can think of at least two reasons to prever separate qemus > (at least one per domain):We have one per domain, in this case one for dom0. Keep in mind that the destination here is domain 0.> Firstly, the performance scalability will > be improved if we don''t have to funnel all the IO through a single > process.Given that the process in question is using AIO, it doesn'' matter much.> Secondly, it avoids propagating failures of any kind from > one domain to another.There are no two domains involved here, only one: domain 0. Also, should we spawn a new Linux kernel for each domain we want a backend for then? Even with our disaggregated architecture we never thought of having a backend/driver domain per guest domain.> Thirdly, it will make it easier to do > disaggregation later if we feel like it.Disaggregation is ortogonal to this: we are going to have a qemu disk backend in each domain we want to be able to do local_attach.
Stefano Stabellini
2012-Mar-30 15:23 UTC
Re: [PATCH 5/6] libxl: introduce libxl__alloc_vdev
On Fri, 30 Mar 2012, Ian Jackson wrote:> Stefano Stabellini writes ("Re: [PATCH 5/6] libxl: introduce libxl__alloc_vdev"): > > On Tue, 27 Mar 2012, Ian Jackson wrote: > > > Furthermore, for the purpose for which you''re using this, you should > > > not start at zero (xvda). You should start at at least 26 (xvdA) and > > > possibly later. That way you are less likely to clash with other > > > statically-allocated vbds. > > > > Another interesting observation but I disagree: why should we expect > > "statically-allocated vbds" below xvdA? How many do you think we are > > going to find? Why 26? I think it is very arbitrary. > > We should just make sure that our allocation policy works well and start > > from 0. > > Your design assumes that the system administrator has not configured, > for themselves, any disks to be exposed to dom0 (eg by a driver > domain). If they have (eg, run "xl block-attach 0 vdev=xvda ...") > then they will expect it to work, and they will expect to be able to > control the device name that this appears as in dom0. Therefore the > namespace of xvd* names in dom0 is not freely available to us as libxl > programmers; we need to avoid trampling all over it. > > Since we need to be able to allocate some vbds dynamically, the right > approach is to decree that some portion of the dom0 vbd space is > reserved for static allocations by the administrator, and that the > remainder is for dynamic allocations by software which picks a free > vbd. Naturally the static space should come first.When you hotplug a new disk on your system, for example a new USB disk to your native Linux box, usually Linux chooses the device name for you. I don''t see why this should be any different. The admin is going to call instead: xl block-attach 0 and then checkout dmesg.> > > Of course you can''t then using the resulting vdev as a pathname in > > > /dev/ but that''s nonportable anyway; different guest OSs (and here we > > > are playing the role of the guest) may have different conventions. > > > You need a separate function to convert vdev numbers (as returned from > > > _disk_dev_number) to pathnames in /dev. > > > > This is a good point and the key issue here. > > > > The frontend is actually free to choose whatever name it wants for the > > device it is going to create from the dev number. > > I don''t think that is true. On each individual guest platform, the > relationship between vbd numbers (the actual interface between > frontend and backend), and whatever that guest has for device names, > needs to be statically defined.I disagree: when we introduced docs/txt/misc/vbd-interface.txt we never specified what names the guest kernel is allowed to choose for a particular vbd encoding. See the following quote: "The Xen interface does not specify what name a device should have in the guest (nor what major/minor device number it should have in the guest, if the guest has such a concept)."> > So I think that the best thing we can do is rely on it to find out the > > device name, either doing: > > > > [1] xvd(a + i) -> libxl__device_disk_dev_number -> xs_read > > This is not correct, because the device name in the guest is not > written to xenstore anywhere. It is private to the guest. > > (In this case the "guest" is actually dom0, but the point stands.) > > > or reading the "dev" node under the xenstore backend path, that is > > generated using the same libxl function (this is what I am doing now). > > I fail to see why one of these two approaches would be better than the > > other, but if you think that [1] is the best, I can change my code. > > Again, you are confusing the virtual name given in the domain config > file with the actual device name in the guest''s /dev, which may not be > the same.What I find confusing is that before you say that "the relationship between vbd numbers and whatever that guest has for device names needs to be statically defined", but then you say that "the device name in the guest is not written to xenstore anywhere. It is private to the guest." In any case that was a conscious decision: considering that we have no way to know the device name in the guest (see above quote), the best thing we can do is guessing. The best guess we can have is using the mapping implemented in libxl__device_disk_dev_number. Maybe we can slightly improve the situation moving libxl__alloc_vdev to an OS specific file so that we can have a Linux and a BSD implementation. Both are going to be "guessing", in particular the Linux version is just going to use libxl__device_disk_dev_number, but the BSD version might want to do something different.
Stefano Stabellini writes ("Re: [Xen-devel] [PATCH 5/6] libxl: introduce libxl__alloc_vdev"):> On Fri, 30 Mar 2012, Ian Jackson wrote: > > Since we need to be able to allocate some vbds dynamically, the right > > approach is to decree that some portion of the dom0 vbd space is > > reserved for static allocations by the administrator, and that the > > remainder is for dynamic allocations by software which picks a free > > vbd. Naturally the static space should come first. > > When you hotplug a new disk on your system, for example a new USB disk > to your native Linux box, usually Linux chooses the device name for > you. I don''t see why this should be any different.It is different because Xen vbds do in practice appear in dom0 with a stable name. So this is something that people have reasonably come to rely on.> The admin is going to call instead: > xl block-attach 0 > and then checkout dmesg.This is hardly automatable. Doing the same thing with udev rules is quite hard work.> > I don''t think that is true. On each individual guest platform, the > > relationship between vbd numbers (the actual interface between > > frontend and backend), and whatever that guest has for device names, > > needs to be statically defined. > > I disagree: when we introduced docs/txt/misc/vbd-interface.txt we never > specified what names the guest kernel is allowed to choose for a > particular vbd encoding. See the following quote: > > "The Xen interface does not specify what name a device should have in the > guest (nor what major/minor device number it should have in the guest, > if the guest has such a concept)."This refers to the promises made by each side of the Xen VBD interface to the corresponding other side. The host''s environment is not allowed to assume things about the guest''s device naming conventions. However, that does not mean that the guest should not document its naming conventions. Perhaps this needs to be clarified in the document.> What I find confusing is that before you say that "the relationship > between vbd numbers and whatever that guest has for device names needs > to be statically defined", but then you say that "the device name in the > guest is not written to xenstore anywhere. It is private to the guest."It is private to the guest and the guest administrator and the guest documentation. The guest is not required to notify the host of what name it has chosen. (And if it does choose a name that name might be some crazy thing; imagine if the guest is MVS/370 or something.)> Maybe we can slightly improve the situation moving libxl__alloc_vdev to > an OS specific file so that we can have a Linux and a BSD implementation.We only need a specific implementation of the final mapping from vbd number to guest-specific device name. Ian.
Ian Jackson
2012-Apr-02 16:18 UTC
Re: [PATCH 2/6] libxl: introduce libxl__device_generic_add_t
Stefano Stabellini writes ("Re: [Xen-devel] [PATCH 2/6] libxl: introduce libxl__device_generic_add_t"):> On Tue, 27 Mar 2012, Ian Jackson wrote: > > Can we avoid introducing more of these transaction loops using goto ? > > I am afraid I actually prefer the simple goto scheme than the convoluted > code below.I really can''t stomach any more of these gotos. If you don''t want to abstract it away, can''t you at least write the loop as a loop with for() or while() ? Ian.
Stefano Stabellini
2012-Apr-03 12:03 UTC
Re: [PATCH 2/6] libxl: introduce libxl__device_generic_add_t
On Mon, 2 Apr 2012, Ian Jackson wrote:> Stefano Stabellini writes ("Re: [Xen-devel] [PATCH 2/6] libxl: introduce libxl__device_generic_add_t"): > > On Tue, 27 Mar 2012, Ian Jackson wrote: > > > Can we avoid introducing more of these transaction loops using goto ? > > > > I am afraid I actually prefer the simple goto scheme than the convoluted > > code below. > > I really can''t stomach any more of these gotos. If you don''t want to > abstract it away, can''t you at least write the loop as a loop with > for() or while() ?Yes, I can do that.
Stefano Stabellini
2012-Apr-03 13:15 UTC
Re: [PATCH 5/6] libxl: introduce libxl__alloc_vdev
On Mon, 2 Apr 2012, Ian Jackson wrote:> Stefano Stabellini writes ("Re: [Xen-devel] [PATCH 5/6] libxl: introduce libxl__alloc_vdev"): > > On Fri, 30 Mar 2012, Ian Jackson wrote: > > > Since we need to be able to allocate some vbds dynamically, the right > > > approach is to decree that some portion of the dom0 vbd space is > > > reserved for static allocations by the administrator, and that the > > > remainder is for dynamic allocations by software which picks a free > > > vbd. Naturally the static space should come first. > > > > When you hotplug a new disk on your system, for example a new USB disk > > to your native Linux box, usually Linux chooses the device name for > > you. I don''t see why this should be any different. > > It is different because Xen vbds do in practice appear in dom0 with a > stable name. So this is something that people have reasonably come to > rely on. > > > The admin is going to call instead: > > xl block-attach 0 > > and then checkout dmesg. > > This is hardly automatable. Doing the same thing with udev rules is > quite hard work.I don''t feel that strongly about this, but given that the naming scheme used in the guest is actually arbitrary at the moment (as in not document or well defined anywhere), I would start the dynamic space before the 26th device: I have the feeling that after "xvdz" the behavior is going to be even less "defined". I suggest "xvdm" as a starting point.> > > I don''t think that is true. On each individual guest platform, the > > > relationship between vbd numbers (the actual interface between > > > frontend and backend), and whatever that guest has for device names, > > > needs to be statically defined. > > > > I disagree: when we introduced docs/txt/misc/vbd-interface.txt we never > > specified what names the guest kernel is allowed to choose for a > > particular vbd encoding. See the following quote: > > > > "The Xen interface does not specify what name a device should have in the > > guest (nor what major/minor device number it should have in the guest, > > if the guest has such a concept)." > > This refers to the promises made by each side of the Xen VBD interface > to the corresponding other side. The host''s environment is not > allowed to assume things about the guest''s device naming conventions. > > However, that does not mean that the guest should not document its > naming conventions. Perhaps this needs to be clarified in the > document.Right, but unless I am missing something there isn''t such a thing at the moment, at least in Linux. I''ll try to come up with a linux devid to vdev function true to the original.
Stefano Stabellini writes ("Re: [Xen-devel] [PATCH 5/6] libxl: introduce libxl__alloc_vdev"):> I don''t feel that strongly about this, but given that the naming scheme > used in the guest is actually arbitrary at the moment (as in not document > or well defined anywhere), I would start the dynamic space before the > 26th device: I have the feeling that after "xvdz" the behavior is going > to be even less "defined". I suggest "xvdm" as a starting point.That''s fine, if the starting point is configurable.> > However, that does not mean that the guest should not document its > > naming conventions. Perhaps this needs to be clarified in the > > document. > > Right, but unless I am missing something there isn''t such a thing at the > moment, at least in Linux. > > I''ll try to come up with a linux devid to vdev function true to the > original.That would be good. If you felt like documenting the Linux behaviour properly that would be good. The section "Notes on Linux as a guest" in docs/misc/vbd-interface.text. TBH I think Linux /should/ use the same numbering scheme as the name supplied in the host config file, just for sanity''s sake. Ian.
Stefano Stabellini
2012-Apr-03 14:19 UTC
Re: [PATCH 5/6] libxl: introduce libxl__alloc_vdev
On Tue, 3 Apr 2012, Ian Jackson wrote:> Stefano Stabellini writes ("Re: [Xen-devel] [PATCH 5/6] libxl: introduce libxl__alloc_vdev"): > > I don''t feel that strongly about this, but given that the naming scheme > > used in the guest is actually arbitrary at the moment (as in not document > > or well defined anywhere), I would start the dynamic space before the > > 26th device: I have the feeling that after "xvdz" the behavior is going > > to be even less "defined". I suggest "xvdm" as a starting point. > > That''s fine, if the starting point is configurable.I don''t think it is worth adding one more option for something like this: nobody should be using this option anyway. I am still unconvinced there is a valid use case for this. Can''t we just agree on a starting point, any really?> > > However, that does not mean that the guest should not document its > > > naming conventions. Perhaps this needs to be clarified in the > > > document. > > > > Right, but unless I am missing something there isn''t such a thing at the > > moment, at least in Linux. > > > > I''ll try to come up with a linux devid to vdev function true to the > > original. > > That would be good. If you felt like documenting the Linux behaviour > properly that would be good. The section "Notes on Linux as a guest" > in docs/misc/vbd-interface.text.the best place for this would be in the Linux Documentation
Stefano Stabellini writes ("Re: [Xen-devel] [PATCH 5/6] libxl: introduce libxl__alloc_vdev"): ...> I don''t think it is worth adding one more option for something like > this: nobody should be using this option anyway. I am still unconvinced > there is a valid use case for this.I think it''s important. An additional config option is IMO necessary to allow the admin to impose their vbd allocation strategy.> > That would be good. If you felt like documenting the Linux behaviour > > properly that would be good. The section "Notes on Linux as a guest" > > in docs/misc/vbd-interface.text. > > the best place for this would be in the Linux DocumentationOK... Ian.
Stefano Stabellini
2012-Apr-10 17:37 UTC
Re: [PATCH 5/6] libxl: introduce libxl__alloc_vdev
On Tue, 3 Apr 2012, Ian Jackson wrote:> Stefano Stabellini writes ("Re: [Xen-devel] [PATCH 5/6] libxl: introduce libxl__alloc_vdev"): > ... > > I don''t think it is worth adding one more option for something like > > this: nobody should be using this option anyway. I am still unconvinced > > there is a valid use case for this. > > I think it''s important. An additional config option is IMO necessary > to allow the admin to impose their vbd allocation strategy.It shouldn''t be a per-VM paramater (part of libxl_domain_config, for example) and I don''t think it is a good idea to plumb a new parameter through from XL to libxl_create for this purpose. What did you have in mind when you thought about having a new paramater for this?
Stefano Stabellini
2012-Apr-11 15:49 UTC
Re: [PATCH 5/6] libxl: introduce libxl__alloc_vdev
On Tue, 10 Apr 2012, Stefano Stabellini wrote:> On Tue, 3 Apr 2012, Ian Jackson wrote: > > Stefano Stabellini writes ("Re: [Xen-devel] [PATCH 5/6] libxl: introduce libxl__alloc_vdev"): > > ... > > > I don''t think it is worth adding one more option for something like > > > this: nobody should be using this option anyway. I am still unconvinced > > > there is a valid use case for this. > > > > I think it''s important. An additional config option is IMO necessary > > to allow the admin to impose their vbd allocation strategy. > > It shouldn''t be a per-VM paramater (part of libxl_domain_config, for > example) and I don''t think it is a good idea to plumb a new parameter > through from XL to libxl_create for this purpose. > > What did you have in mind when you thought about having a new paramater > for this?In case it wasn''t obvious I decided to go for modifying libxl_create, see the new version of the series.