This is a patch to forward-port a Xend behaviour. Xend writes IO ABI used for all frontends. Blkfront before 2.6.26 relies on this behaviour otherwise guest cannot boot when running in 32-on-64 mode. Blkfront after 2.6.26 writes that node itself, in which case it''s just an overwrite to an existing node which should be OK. In fact Xend writes the ABI for all frontends including console and vif. But nowadays only old disk frontends rely on that behaviour so that we only write the ABI for disk frontends in libxl, minimizing the impact. Signed-off-by: Wei Liu <wei.liu2@citrix.com> Cc: Ian Campbell <ian.campbell@citrix.com> Cc: Roger Pau Monne <roger.pau@citrix.com> Cc: Valtteri Kiviniemi <kiviniemi.valtteri@gmail.com> --- tools/libxc/xc_dom_arm.c | 13 +++++++++++++ tools/libxc/xc_dom_x86.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ tools/libxc/xenctrl.h | 12 ++++++++++++ tools/libxl/libxl.c | 25 +++++++++++++++++++++++++ 4 files changed, 94 insertions(+) diff --git a/tools/libxc/xc_dom_arm.c b/tools/libxc/xc_dom_arm.c index 041832e..acdac9e 100644 --- a/tools/libxc/xc_dom_arm.c +++ b/tools/libxc/xc_dom_arm.c @@ -29,6 +29,19 @@ #define CONSOLE_PFN_OFFSET 0 #define XENSTORE_PFN_OFFSET 1 +/* get guest IO ABI */ +int xc_domain_get_native_protocol(xc_interface *xch, + uint32_t domid, + char **guest_abi); +{ + *guest_abi = strdup(XEN_IO_PROTO_ABI_ARM); + + if ( *guest_abi == NULL ) + return -ENOMEM; + + return 0; +} + /* ------------------------------------------------------------------------ */ /* * arm guests are hybrid and start off with paging disabled, therefore no diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c index eb9ac07..3745d54 100644 --- a/tools/libxc/xc_dom_x86.c +++ b/tools/libxc/xc_dom_x86.c @@ -47,6 +47,50 @@ #define round_down(addr, mask) ((addr) & ~(mask)) #define round_up(addr, mask) ((addr) | (mask)) +/* get guest IO ABI */ +int xc_domain_get_native_protocol(xc_interface *xch, + uint32_t domid, + char **guest_abi) +{ + int ret; + uint32_t guest_width; + const char *protocol = NULL; + DECLARE_DOMCTL; + + *guest_abi = NULL; + + memset(&domctl, 0, sizeof(domctl)); + domctl.domain = domid; + domctl.cmd = XEN_DOMCTL_get_address_size; + + ret = do_domctl(xch, &domctl); + + if ( ret ) + goto out; + + guest_width = domctl.u.address_size.size; + + switch (guest_width) { + case 32: /* 32 bit guest */ + protocol = XEN_IO_PROTO_ABI_X86_32; + break; + case 64: /* 64 bit guest */ + protocol = XEN_IO_PROTO_ABI_X86_64; + break; + default: + ret = -EINVAL; + goto out; + } + + *guest_abi = strdup(protocol); + + if ( *guest_abi == NULL ) + ret = -ENOMEM; + + out: + return ret; +} + static unsigned long nr_page_tables(struct xc_dom_image *dom, xen_vaddr_t start, xen_vaddr_t end, unsigned long bits) diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h index 50853af..83cca1f 100644 --- a/tools/libxc/xenctrl.h +++ b/tools/libxc/xenctrl.h @@ -637,6 +637,18 @@ int xc_domain_hvm_setcontext(xc_interface *xch, uint32_t size); /** + * This function will return the guest word width + * + * @parm xch a handle to an open hypervisor interface + * @parm domid the domain to get address width for + * @parm guest_abi guest''s navtive IO ABI string + * @return 0 on success, -ev on failure + */ +int xc_domain_get_native_protocol(xc_interface *xch, + uint32_t domid, + char **guest_abi); + +/** * This function returns information about the execution context of a * particular vcpu of a domain. * diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c index 572c2c6..3f6afad 100644 --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -2037,6 +2037,13 @@ static void device_disk_add(libxl__egc *egc, uint32_t domid, int rc; libxl_ctx *ctx = gc->owner; xs_transaction_t t = XBT_NULL; + char *protocol = NULL; + + libxl_domain_type type = libxl__domain_type(gc, domid); + if (type == LIBXL_DOMAIN_TYPE_INVALID) { + rc = ERROR_FAIL; + goto out; + } for (;;) { rc = libxl__xs_transaction_start(gc, &t); @@ -2156,10 +2163,28 @@ static void device_disk_add(libxl__egc *egc, uint32_t domid, flexarray_append(front, "device-type"); flexarray_append(front, disk->is_cdrom ? "cdrom" : "disk"); + /* + * Old PV kernels before 2.6.26 rely on tool stack to write + * disk native protocol to frontend node. Xend does this, port + * this behaviour to xl. + * + * New kernels write this node themselves. In that case it just + * overwrites an existing node which is OK. + */ + if (type == LIBXL_DOMAIN_TYPE_PV) { + if (!xc_domain_get_native_protocol(ctx->xch, domid, &protocol)) { + flexarray_append(front, "protocol"); + flexarray_append(front, protocol); + } + } + libxl__device_generic_add(gc, t, device, libxl__xs_kvs_of_flexarray(gc, back, back->count), libxl__xs_kvs_of_flexarray(gc, front, front->count)); + if (protocol) + free(protocol); + rc = libxl__xs_transaction_commit(gc, &t); if (!rc) break; if (rc < 0) goto out; -- 1.7.10.4
On Wed, 2013-04-24 at 19:20 +0100, Wei Liu wrote:> This is a patch to forward-port a Xend behaviour. Xend writes IO ABI used for > all frontends. Blkfront before 2.6.26 relies on this behaviour otherwise guest > cannot boot when running in 32-on-64 mode. Blkfront after 2.6.26 writes that > node itself, in which case it''s just an overwrite to an existing node which > should be OK. > > In fact Xend writes the ABI for all frontends including console and vif. But > nowadays only old disk frontends rely on that behaviour so that we only write > the ABI for disk frontends in libxl, minimizing the impact. > > Signed-off-by: Wei Liu <wei.liu2@citrix.com> > Cc: Ian Campbell <ian.campbell@citrix.com> > Cc: Roger Pau Monne <roger.pau@citrix.com> > Cc: Valtteri Kiviniemi <kiviniemi.valtteri@gmail.com> > --- > tools/libxc/xc_dom_arm.c | 13 +++++++++++++ > tools/libxc/xc_dom_x86.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ > tools/libxc/xenctrl.h | 12 ++++++++++++ > tools/libxl/libxl.c | 25 +++++++++++++++++++++++++ > 4 files changed, 94 insertions(+) > > diff --git a/tools/libxc/xc_dom_arm.c b/tools/libxc/xc_dom_arm.c > index 041832e..acdac9e 100644 > --- a/tools/libxc/xc_dom_arm.c > +++ b/tools/libxc/xc_dom_arm.c > @@ -29,6 +29,19 @@ > #define CONSOLE_PFN_OFFSET 0 > #define XENSTORE_PFN_OFFSET 1 > > +/* get guest IO ABI */ > +int xc_domain_get_native_protocol(xc_interface *xch, > + uint32_t domid, > + char **guest_abi);With the correct placement of const in here I think you can avoid the potential for memory allocation failure as well as the need for the caller to free by just: *guest_abi = XEN_IO_PROTO_ABI_xxx Perhaps even: const char *xc_domain_get_native_protocol(xch, domid) ... return XEN_IO_PROTO... In the X86 case where the domctl can fail we can PERROR and return NULL, since the caller doesn''t really care why the ABI isn''t available. Or instead of NULL we can return the protocol of whichever bit width we are compiler for. Probably NULL is safer though.> diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h > index 50853af..83cca1f 100644 > --- a/tools/libxc/xenctrl.h > +++ b/tools/libxc/xenctrl.h > @@ -637,6 +637,18 @@ int xc_domain_hvm_setcontext(xc_interface *xch, > uint32_t size); > > /** > + * This function will return the guest word width > + * > + * @parm xch a handle to an open hypervisor interface > + * @parm domid the domain to get address width for > + * @parm guest_abi guest''s navtive IO ABI string > + * @return 0 on success, -ev on failure > + */ > +int xc_domain_get_native_protocol(xc_interface *xch, > + uint32_t domid, > + char **guest_abi); > + > +/** > * This function returns information about the execution context of a > * particular vcpu of a domain. > * > diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c > index 572c2c6..3f6afad 100644 > --- a/tools/libxl/libxl.c > +++ b/tools/libxl/libxl.c > @@ -2037,6 +2037,13 @@ static void device_disk_add(libxl__egc *egc, uint32_t domid, > int rc; > libxl_ctx *ctx = gc->owner; > xs_transaction_t t = XBT_NULL; > + char *protocol = NULL; > + > + libxl_domain_type type = libxl__domain_type(gc, domid);Did we decide that Xend only does this for PV guests? Would there be any harm in doing this for all PV disks, whether attached to PV or HVM domains? Would remove some code ;-)> + if (type == LIBXL_DOMAIN_TYPE_INVALID) { > + rc = ERROR_FAIL; > + goto out; > + } > > for (;;) { > rc = libxl__xs_transaction_start(gc, &t); > @@ -2156,10 +2163,28 @@ static void device_disk_add(libxl__egc *egc, uint32_t domid, > flexarray_append(front, "device-type"); > flexarray_append(front, disk->is_cdrom ? "cdrom" : "disk"); > > + /* > + * Old PV kernels before 2.6.26 rely on tool stack to write > + * disk native protocol to frontend node. Xend does this, port > + * this behaviour to xl. > + * > + * New kernels write this node themselves. In that case it just > + * overwrites an existing node which is OK. > + */ > + if (type == LIBXL_DOMAIN_TYPE_PV) { > + if (!xc_domain_get_native_protocol(ctx->xch, domid, &protocol)) { > + flexarray_append(front, "protocol"); > + flexarray_append(front, protocol); > + } > + } > + > libxl__device_generic_add(gc, t, device, > libxl__xs_kvs_of_flexarray(gc, back, back->count), > libxl__xs_kvs_of_flexarray(gc, front, front->count)); > > + if (protocol) > + free(protocol); > + > rc = libxl__xs_transaction_commit(gc, &t); > if (!rc) break; > if (rc < 0) goto out;
On Thu, Apr 25, 2013 at 09:33:23AM +0100, Ian Campbell wrote: [...]> > +/* get guest IO ABI */ > > +int xc_domain_get_native_protocol(xc_interface *xch, > > + uint32_t domid, > > + char **guest_abi); > > With the correct placement of const in here I think you can avoid the > potential for memory allocation failure as well as the need for the > caller to free by just: > *guest_abi = XEN_IO_PROTO_ABI_xxx > > Perhaps even: > const char *xc_domain_get_native_protocol(xch, domid) > ... > return XEN_IO_PROTO... > > In the X86 case where the domctl can fail we can PERROR and return NULL, > since the caller doesn''t really care why the ABI isn''t available. Or > instead of NULL we can return the protocol of whichever bit width we are > compiler for. Probably NULL is safer though. >OK.> > diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h > > index 50853af..83cca1f 100644 > > --- a/tools/libxc/xenctrl.h > > +++ b/tools/libxc/xenctrl.h > > @@ -637,6 +637,18 @@ int xc_domain_hvm_setcontext(xc_interface *xch, > > uint32_t size); > > > > /** > > + * This function will return the guest word width > > + * > > + * @parm xch a handle to an open hypervisor interface > > + * @parm domid the domain to get address width for > > + * @parm guest_abi guest''s navtive IO ABI string > > + * @return 0 on success, -ev on failure > > + */ > > +int xc_domain_get_native_protocol(xc_interface *xch, > > + uint32_t domid, > > + char **guest_abi); > > + > > +/** > > * This function returns information about the execution context of a > > * particular vcpu of a domain. > > * > > diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c > > index 572c2c6..3f6afad 100644 > > --- a/tools/libxl/libxl.c > > +++ b/tools/libxl/libxl.c > > @@ -2037,6 +2037,13 @@ static void device_disk_add(libxl__egc *egc, uint32_t domid, > > int rc; > > libxl_ctx *ctx = gc->owner; > > xs_transaction_t t = XBT_NULL; > > + char *protocol = NULL; > > + > > + libxl_domain_type type = libxl__domain_type(gc, domid); > > Did we decide that Xend only does this for PV guests? Would there be any > harm in doing this for all PV disks, whether attached to PV or HVM > domains? Would remove some code ;-) >This string is propogated from Python libxc binding, and it is only placed in pyxc_linux_build. So unless I''m mistaken, Xend only does this for PV. Removing the type check and write the node for each type of domain should do no harm. For newer frontends, they just blindly write that node whether it pre-exists or not. Wei.
On Thu, 2013-04-25 at 15:57 +0100, Wei Liu wrote:> This string is propogated from Python libxc binding, and it is only > placed in pyxc_linux_build. So unless I''m mistaken, Xend only does this > for PV. > > Removing the type check and write the node for each type of domain > should do no harm. For newer frontends, they just blindly write that > node whether it pre-exists or not.We should probably stick with the xend behaviour, even if it is slightly more code. Ian.