Gianni Tedesco
2010-Aug-02 14:58 UTC
[Xen-devel] [PATCH 0 of 6] xl PCI passthrough updates v3
my current libxl PCI-passthrough patch-queue rebased modulo what has alread been applied. Changes since v1: 1. Move pci FLR function in to libxl_pci.c - got it all in now. 2. Properly initialize pci device structs in new code 3. Incorporate suggestions from Stefano wrt. API and putting sysfs paths in macros. 4. Rename libxl_device_pci_list to libxl_device_pci_list_assigned due to change in parameters for consistency with the rest of libxl PCI API. 5. Also introduced a patch to centralise parsing of PCI BDF''s and allow omission of the PCI domain as a short-hand for both config files and hot-plug command parameters. This also fixes an infinite loop in xl create if there is a parse error in the pci config. Changes since v2: 1. Use SYS_PCI_DEV macro in libxl_device_pci_reset 2. Fix error in libxl_device_pci_list_assigned() as pointed out by Ian Campbell 3. Get rid of scan_sys_pcidir() and implement inside libxl_device_pci_list_assignable() since we''re not supporting pcistub driver now 4. New patch: prevent attempting removal of non-attached device 5. New patch: implement pci attach to explicitly defined virtual PCI slot 6. New patch: corresponding to qemu-dm patch, detect pci insertion errors which are otherwise undetectable and cause a hang 7. New patch: implement PCI passthrough for multi-function _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
tools/libxl/libxl_pci.c | 115 ++++++++++++++++++++++------------------------- 1 files changed, 55 insertions(+), 60 deletions(-) # HG changeset patch # User Gianni Tedesco <gianni.tedesco@citrix.com> # Date 1280759953 -3600 # Branch pci-patches-v3 # Node ID 4a5c274586acb410a8c46a14b9451d8acd60ac1c # Parent 9f49667fec7197c935c822f7caea64b98007c605 xl: PCI code cleanups Get rid of scan_sys_pcidir() and open-code it inside libxl_device_pci_list_assignable() since it''s not a generically re-useable function and we''re not supporting pcistub driver now. Also use macros for sysfs dirs in libxl_device_pci_reset Signed-off-by: Gianni Tedesco <gianni.tedesco@citrix.com> diff -r 9f49667fec71 -r 4a5c274586ac tools/libxl/libxl_pci.c --- a/tools/libxl/libxl_pci.c Fri Jul 30 15:22:39 2010 +0100 +++ b/tools/libxl/libxl_pci.c Mon Aug 02 15:39:13 2010 +0100 @@ -325,6 +325,57 @@ static int is_assigned(libxl_device_pci return 0; } +int libxl_device_pci_list_assignable(libxl_ctx *ctx, libxl_device_pci **list, int *num) +{ + libxl_device_pci *pcidevs = NULL, *new, *assigned; + struct dirent *de; + DIR *dir; + int rc, num_assigned; + + *num = 0; + *list = NULL; + + rc = get_all_assigned_devices(ctx, &assigned, &num_assigned); + if ( rc ) + return rc; + + dir = opendir(SYSFS_PCIBACK_DRIVER); + if ( NULL == dir ) { + if ( errno == ENOENT ) { + XL_LOG(ctx, XL_LOG_ERROR, "Looks like pciback driver not loaded"); + }else{ + XL_LOG_ERRNO(ctx, XL_LOG_ERROR, "Couldn''t open %s", SYSFS_PCIBACK_DRIVER); + } + free(assigned); + return ERROR_FAIL; + } + + while( (de = readdir(dir)) ) { + unsigned dom, bus, dev, func; + if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 ) + continue; + + if ( is_assigned(assigned, num_assigned, dom, bus, dev, func) ) + continue; + + new = realloc(pcidevs, ((*num) + 1) * sizeof(*new)); + if ( NULL == new ) + continue; + + pcidevs = new; + new = pcidevs + *num; + + memset(new, 0, sizeof(*new)); + libxl_device_pci_init(new, dom, bus, dev, func, 0); + (*num)++; + } + + closedir(dir); + free(assigned); + *list = pcidevs; + return 0; +} + static int do_pci_add(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) { char *path; @@ -554,63 +605,6 @@ out: return 0; } -static libxl_device_pci *scan_sys_pcidir(libxl_device_pci *assigned, - int num_assigned, const char *path, int *num) -{ - libxl_device_pci *pcidevs = NULL, *new; - struct dirent *de; - DIR *dir; - - dir = opendir(path); - if ( NULL == dir ) - return pcidevs; - - while( (de = readdir(dir)) ) { - unsigned dom, bus, dev, func; - if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 ) - continue; - - if ( is_assigned(assigned, num_assigned, dom, bus, dev, func) ) - continue; - - new = realloc(pcidevs, ((*num) + 1) * sizeof(*new)); - if ( NULL == new ) - continue; - - pcidevs = new; - new = pcidevs + *num; - - memset(new, 0, sizeof(*new)); - libxl_device_pci_init(new, dom, bus, dev, func, 0); - (*num)++; - } - - closedir(dir); - return pcidevs; -} - -int libxl_device_pci_list_assignable(libxl_ctx *ctx, libxl_device_pci **list, int *num) -{ - libxl_device_pci *pcidevs = NULL; - libxl_device_pci *assigned; - int num_assigned, rc; - - *num = 0; - *list = NULL; - - rc = get_all_assigned_devices(ctx, &assigned, &num_assigned); - if ( rc ) - return rc; - - pcidevs = scan_sys_pcidir(assigned, num_assigned, - SYSFS_PCIBACK_DRIVER, num); - - free(assigned); - if ( *num ) - *list = pcidevs; - return 0; -} - int libxl_device_pci_list_assigned(libxl_ctx *ctx, libxl_device_pci **list, uint32_t domid, int *num) { char *be_path, *num_devs, *xsdev, *xsvdevfn, *xsopts; @@ -623,7 +617,7 @@ int libxl_device_pci_list_assigned(libxl if (!num_devs) { *num = 0; *list = NULL; - return ERROR_FAIL; + return 0; } n = atoi(num_devs); pcidevs = calloc(n, sizeof(libxl_device_pci)); @@ -689,9 +683,10 @@ int libxl_device_pci_init(libxl_device_p int libxl_device_pci_reset(libxl_ctx *ctx, unsigned int domain, unsigned int bus, unsigned int dev, unsigned int func) { - char *reset = "/sys/bus/pci/drivers/pciback/do_flr"; + char *reset; int fd, rc; + reset = libxl_sprintf(ctx, "%s/pciback/do_flr", SYSFS_PCI_DEV); fd = open(reset, O_WRONLY); if (fd > 0) { char *buf = libxl_sprintf(ctx, PCI_BDF, domain, bus, dev, func); @@ -703,7 +698,7 @@ int libxl_device_pci_reset(libxl_ctx *ct } if (errno != ENOENT) XL_LOG_ERRNO(ctx, XL_LOG_ERROR, "Failed to access pciback path %s", reset); - reset = libxl_sprintf(ctx, "/sys/bus/pci/devices/"PCI_BDF"/reset", domain, bus, dev, func); + reset = libxl_sprintf(ctx, "%s/"PCI_BDF"/reset", SYSFS_PCI_DEV, domain, bus, dev, func); fd = open(reset, O_WRONLY); if (fd > 0) { rc = write(fd, "1", 1); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Gianni Tedesco
2010-Aug-02 14:58 UTC
[Xen-devel] [PATCH 2 of 6] xl: centralize BDF parsing in to libxl
tools/libxl/libxl.h | 6 +--- tools/libxl/libxl_pci.c | 71 +++++++++++++++++++++++++++++++++++++---------- tools/libxl/xl_cmdimpl.c | 44 ++++------------------------- 3 files changed, 64 insertions(+), 57 deletions(-) # HG changeset patch # User Gianni Tedesco <gianni.tedesco@citrix.com> # Date 1280760048 -3600 # Branch pci-patches-v3 # Node ID d6e7e75ccb48a29eecb895613f017043ae267318 # Parent 4a5c274586acb410a8c46a14b9451d8acd60ac1c xl: centralize BDF parsing in to libxl Introduce a new libxl call libxl_device_pci_parse_bdf() and use it consistently. This patch also fixes an infinite loop bug in xl create. If there is a parse-error on any pci config file entry then xl will attempt to skip it, but since num_pcidevs is used to index the config list and is not incremented when skipping, this caused an infinite loop. Solve that problem by introducing a new loop counter variable. Note that virtual PCI slots are not parsed by the new code. However this is a step towards support for virtual slots and multi-function device assignment since only one location in the code will need to be changed now. Signed-off-by: Gianni Tedesco <gianni.tedesco@citrix.com> diff -r 4a5c274586ac -r d6e7e75ccb48 tools/libxl/libxl.h --- a/tools/libxl/libxl.h Mon Aug 02 15:39:13 2010 +0100 +++ b/tools/libxl/libxl.h Mon Aug 02 15:40:48 2010 +0100 @@ -516,16 +516,12 @@ int libxl_device_vfb_add(libxl_ctx *ctx, int libxl_device_vfb_clean_shutdown(libxl_ctx *ctx, uint32_t domid); int libxl_device_vfb_hard_shutdown(libxl_ctx *ctx, uint32_t domid); -#define PCI_BDF "%04x:%02x:%02x.%01x" -#define PCI_BDF_VDEVFN "%04x:%02x:%02x.%01x@%02x" int libxl_device_pci_add(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev); int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev); int libxl_device_pci_shutdown(libxl_ctx *ctx, uint32_t domid); int libxl_device_pci_list_assigned(libxl_ctx *ctx, libxl_device_pci **list, uint32_t domid, int *num); int libxl_device_pci_list_assignable(libxl_ctx *ctx, libxl_device_pci **list, int *num); -int libxl_device_pci_init(libxl_device_pci *pcidev, unsigned int domain, - unsigned int bus, unsigned int dev, - unsigned int func, unsigned int vdevfn); +int libxl_device_pci_parse_bdf(libxl_device_pci *pcidev, const char *str); /* * Functions for allowing users of libxl to store private data diff -r 4a5c274586ac -r d6e7e75ccb48 tools/libxl/libxl_pci.c --- a/tools/libxl/libxl_pci.c Mon Aug 02 15:39:13 2010 +0100 +++ b/tools/libxl/libxl_pci.c Mon Aug 02 15:40:48 2010 +0100 @@ -36,6 +36,59 @@ #include "libxl_internal.h" #include "flexarray.h" +#define PCI_BDF "%04x:%02x:%02x.%01x" +#define PCI_BDF_SHORT "%02x:%02x.%01x" +#define PCI_BDF_VDEVFN "%04x:%02x:%02x.%01x@%02x" + +static int pcidev_init(libxl_device_pci *pcidev, unsigned int domain, + unsigned int bus, unsigned int dev, + unsigned int func, unsigned int vdevfn) +{ + pcidev->domain = domain; + pcidev->bus = bus; + pcidev->dev = dev; + pcidev->func = func; + pcidev->vdevfn = vdevfn; + return 0; +} + +int libxl_device_pci_parse_bdf(libxl_device_pci *pcidev, const char *str) +{ + unsigned dom, bus, dev, func; + char *p, *buf2; + int rc; + + if ( NULL == (buf2 = strdup(str)) ) + return ERROR_NOMEM; + + p = strtok(buf2, ","); + + if ( sscanf(str, PCI_BDF, &dom, &bus, &dev, &func) != 4 ) { + dom = 0; + if ( sscanf(str, PCI_BDF_SHORT, &bus, &dev, &func) != 3 ) { + rc = ERROR_FAIL; + goto out; + } + } + + rc = pcidev_init(pcidev, dom, bus, dev, func, 0); + + while ((p = strtok(NULL, ",=")) != NULL) { + while (*p == '' '') + p++; + if (!strcmp(p, "msitranslate")) { + p = strtok(NULL, ",="); + pcidev->msitranslate = atoi(p); + } else if (!strcmp(p, "power_mgmt")) { + p = strtok(NULL, ",="); + pcidev->power_mgmt = atoi(p); + } + } +out: + free(buf2); + return rc; +} + static int libxl_create_pci_backend(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev, int num) { flexarray_t *front; @@ -288,7 +341,7 @@ static int get_all_assigned_devices(libx if ( sscanf(bdf, PCI_BDF, &dom, &bus, &dev, &func) != 4 ) continue; - libxl_device_pci_init(pcidevs + *num, dom, bus, dev, func, 0); + pcidev_init(pcidevs + *num, dom, bus, dev, func, 0); (*num)++; } } @@ -366,7 +419,7 @@ int libxl_device_pci_list_assignable(lib new = pcidevs + *num; memset(new, 0, sizeof(*new)); - libxl_device_pci_init(new, dom, bus, dev, func, 0); + pcidev_init(new, dom, bus, dev, func, 0); (*num)++; } @@ -629,7 +682,7 @@ int libxl_device_pci_list_assigned(libxl xsvdevfn = libxl_xs_read(ctx, XBT_NULL, libxl_sprintf(ctx, "%s/vdevfn-%d", be_path, i)); if (xsvdevfn) vdevfn = strtol(xsvdevfn, (char **) NULL, 16); - libxl_device_pci_init(pcidevs + i, domain, bus, dev, func, vdevfn); + pcidev_init(pcidevs + i, domain, bus, dev, func, vdevfn); xsopts = libxl_xs_read(ctx, XBT_NULL, libxl_sprintf(ctx, "%s/opts-%d", be_path, i)); if (xsopts) { char *saveptr; @@ -668,18 +721,6 @@ int libxl_device_pci_shutdown(libxl_ctx return 0; } -int libxl_device_pci_init(libxl_device_pci *pcidev, unsigned int domain, - unsigned int bus, unsigned int dev, - unsigned int func, unsigned int vdevfn) -{ - pcidev->domain = domain; - pcidev->bus = bus; - pcidev->dev = dev; - pcidev->func = func; - pcidev->vdevfn = vdevfn; - return 0; -} - int libxl_device_pci_reset(libxl_ctx *ctx, unsigned int domain, unsigned int bus, unsigned int dev, unsigned int func) { diff -r 4a5c274586ac -r d6e7e75ccb48 tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c Mon Aug 02 15:39:13 2010 +0100 +++ b/tools/libxl/xl_cmdimpl.c Mon Aug 02 15:40:48 2010 +0100 @@ -485,7 +485,7 @@ static void printf_info(int domid, for (i = 0; i < d_config->num_pcidevs; i++) { printf("\t(device\n"); printf("\t\t(pci\n"); - printf("\t\t\t(pci dev "PCI_BDF_VDEVFN")\n", + printf("\t\t\t(pci dev %04x:%02x:%02x.%01x@%02x)\n", d_config->pcidevs[i].domain, d_config->pcidevs[i].bus, d_config->pcidevs[i].dev, d_config->pcidevs[i].func, d_config->pcidevs[i].vdevfn); @@ -943,46 +943,20 @@ skip_vfb: pci_power_mgmt = l; if (!xlu_cfg_get_list (config, "pci", &pcis, 0)) { + int i; d_config->num_pcidevs = 0; d_config->pcidevs = NULL; - while ((buf = xlu_cfg_get_listitem (pcis, d_config->num_pcidevs)) != NULL) { + for(i = 0; (buf = xlu_cfg_get_listitem (pcis, i)) != NULL; i++) { libxl_device_pci *pcidev; - unsigned int domain = 0, bus = 0, dev = 0, func = 0, vdevfn = 0; - char *buf2 = strdup(buf); - char *p; d_config->pcidevs = (libxl_device_pci *) realloc(d_config->pcidevs, sizeof (libxl_device_pci) * (d_config->num_pcidevs + 1)); pcidev = d_config->pcidevs + d_config->num_pcidevs; memset(pcidev, 0x00, sizeof(libxl_device_pci)); - p = strtok(buf2, ","); - if (!p) - goto skip_pci; - if (sscanf(p, PCI_BDF_VDEVFN, &domain, &bus, &dev, &func, &vdevfn) < 4) { - domain = 0; - if (sscanf(p, "%02x:%02x.%01x@%02x", &bus, &dev, &func, &vdevfn) < 3) { - fprintf(stderr,"xl: Unable to parse pci bdf (%s)\n", p); - goto skip_pci; - } - } - - libxl_device_pci_init(pcidev, domain, bus, dev, func, vdevfn); pcidev->msitranslate = pci_msitranslate; pcidev->power_mgmt = pci_power_mgmt; - while ((p = strtok(NULL, ",=")) != NULL) { - while (*p == '' '') - p++; - if (!strcmp(p, "msitranslate")) { - p = strtok(NULL, ",="); - pcidev->msitranslate = atoi(p); - } else if (!strcmp(p, "power_mgmt")) { - p = strtok(NULL, ",="); - pcidev->power_mgmt = atoi(p); - } - } - d_config->num_pcidevs++; -skip_pci: - free(buf2); + if (!libxl_device_pci_parse_bdf(pcidev, buf)) + d_config->num_pcidevs++; } } @@ -2001,16 +1975,14 @@ int main_pcilist(int argc, char **argv) void pcidetach(char *dom, char *bdf) { libxl_device_pci pcidev; - unsigned int domain, bus, dev, func; find_domain(dom); memset(&pcidev, 0x00, sizeof(pcidev)); - if (sscanf(bdf, PCI_BDF, &domain, &bus, &dev, &func) != 4) { + if (libxl_device_pci_parse_bdf(&pcidev, bdf)) { fprintf(stderr, "pci-detach: malformed BDF specification \"%s\"\n", bdf); exit(2); } - libxl_device_pci_init(&pcidev, domain, bus, dev, func, 0); libxl_device_pci_remove(&ctx, domid, &pcidev); } @@ -2043,16 +2015,14 @@ int main_pcidetach(int argc, char **argv void pciattach(char *dom, char *bdf, char *vs) { libxl_device_pci pcidev; - unsigned int domain, bus, dev, func; find_domain(dom); memset(&pcidev, 0x00, sizeof(pcidev)); - if (sscanf(bdf, PCI_BDF, &domain, &bus, &dev, &func) != 4) { + if (libxl_device_pci_parse_bdf(&pcidev, bdf)) { fprintf(stderr, "pci-attach: malformed BDF specification \"%s\"\n", bdf); exit(2); } - libxl_device_pci_init(&pcidev, domain, bus, dev, func, 0); libxl_device_pci_add(&ctx, domid, &pcidev); } _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Gianni Tedesco
2010-Aug-02 14:58 UTC
[Xen-devel] [PATCH 3 of 6] xl: prevent attempts to remove non-attached pci pass-through devices
tools/libxl/libxl_pci.c | 12 ++++++++++-- 1 files changed, 10 insertions(+), 2 deletions(-) # HG changeset patch # User Gianni Tedesco <gianni.tedesco@citrix.com> # Date 1280760101 -3600 # Branch pci-patches-v3 # Node ID a993021324d0c7d23a16290b7a722c2878506c2e # Parent d6e7e75ccb48a29eecb895613f017043ae267318 xl: prevent attempts to remove non-attached pci pass-through devices Signed-off-by: Gianni Tedesco <gianni.tedesco@citrix.com> diff -r d6e7e75ccb48 -r a993021324d0 tools/libxl/libxl_pci.c --- a/tools/libxl/libxl_pci.c Mon Aug 02 15:40:48 2010 +0100 +++ b/tools/libxl/libxl_pci.c Mon Aug 02 15:41:41 2010 +0100 @@ -564,12 +564,20 @@ int libxl_device_pci_add(libxl_ctx *ctx, int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) { + libxl_device_pci *assigned; char *path; char *state; - int hvm, rc; + int hvm, rc, num; int stubdomid = 0; - /* TODO: check if the device can be detached */ + if ( !libxl_device_pci_list_assigned(ctx, &assigned, domid, &num) ) { + if ( !is_assigned(assigned, num, pcidev->domain, + pcidev->bus, pcidev->dev, pcidev->func) ) { + XL_LOG(ctx, XL_LOG_ERROR, "PCI device not attached to this domain"); + return ERROR_INVAL; + } + } + libxl_device_pci_remove_xenstore(ctx, domid, pcidev); hvm = is_hvm(ctx, domid); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Gianni Tedesco
2010-Aug-02 14:58 UTC
[Xen-devel] [PATCH 4 of 6] xl: implement pci attach to explicitly defined virtual PCI slot
tools/libxl/libxl.h | 2 +- tools/libxl/libxl_pci.c | 156 ++++++++++++++++++++++++++++++++++++++-------- tools/libxl/xl_cmdimpl.c | 6 +- 3 files changed, 133 insertions(+), 31 deletions(-) # HG changeset patch # User Gianni Tedesco <gianni.tedesco@citrix.com> # Date 1280760240 -3600 # Branch pci-patches-v3 # Node ID 4aea113e2ce2a46c908e62cace4bf9a710ae7c51 # Parent a993021324d0c7d23a16290b7a722c2878506c2e xl: implement pci attach to explicitly defined virtual PCI slot Move to state-machine parser for PCI BDF''s now that the number of possible sscanf expressions is sufficiently large to make it worth it. Also implement parsing of virtual PCI slot numbers. Signed-off-by: Gianni Tedesco <gianni.tedesco@citrix.com> diff -r a993021324d0 -r 4aea113e2ce2 tools/libxl/libxl.h --- a/tools/libxl/libxl.h Mon Aug 02 15:41:41 2010 +0100 +++ b/tools/libxl/libxl.h Mon Aug 02 15:44:00 2010 +0100 @@ -521,7 +521,7 @@ int libxl_device_pci_remove(libxl_ctx *c int libxl_device_pci_shutdown(libxl_ctx *ctx, uint32_t domid); int libxl_device_pci_list_assigned(libxl_ctx *ctx, libxl_device_pci **list, uint32_t domid, int *num); int libxl_device_pci_list_assignable(libxl_ctx *ctx, libxl_device_pci **list, int *num); -int libxl_device_pci_parse_bdf(libxl_device_pci *pcidev, const char *str); +int libxl_device_pci_parse_bdf(libxl_ctx *ctx, libxl_device_pci *pcidev, const char *str); /* * Functions for allowing users of libxl to store private data diff -r a993021324d0 -r 4aea113e2ce2 tools/libxl/libxl_pci.c --- a/tools/libxl/libxl_pci.c Mon Aug 02 15:41:41 2010 +0100 +++ b/tools/libxl/libxl_pci.c Mon Aug 02 15:44:00 2010 +0100 @@ -52,41 +52,143 @@ static int pcidev_init(libxl_device_pci return 0; } -int libxl_device_pci_parse_bdf(libxl_device_pci *pcidev, const char *str) +static int hex_convert(const char *str, unsigned int *val, unsigned int mask) { - unsigned dom, bus, dev, func; - char *p, *buf2; - int rc; + unsigned long ret; + char *end; - if ( NULL == (buf2 = strdup(str)) ) + ret = strtoul(str, &end, 16); + if ( end == str || *end != ''\0'' ) + return -1; + if ( ret & ~mask ) + return -1; + *val = (unsigned int)ret & mask; + return 0; +} + +#define STATE_DOMAIN 0 +#define STATE_BUS 1 +#define STATE_DEV 2 +#define STATE_FUNC 3 +#define STATE_VSLOT 4 +#define STATE_OPTIONS_K 6 +#define STATE_OPTIONS_V 7 +#define STATE_TERMINAL 8 +int libxl_device_pci_parse_bdf(libxl_ctx *ctx, libxl_device_pci *pcidev, const char *str) +{ + unsigned state = STATE_DOMAIN; + unsigned dom, bus, dev, func, vslot = 0; + char *buf2, *tok, *ptr, *end, *optkey = NULL; + + if ( NULL == (buf2 = ptr = strdup(str)) ) return ERROR_NOMEM; - p = strtok(buf2, ","); - - if ( sscanf(str, PCI_BDF, &dom, &bus, &dev, &func) != 4 ) { - dom = 0; - if ( sscanf(str, PCI_BDF_SHORT, &bus, &dev, &func) != 3 ) { - rc = ERROR_FAIL; - goto out; + for(tok = ptr, end = ptr + strlen(ptr) + 1; ptr < end; ptr++) { + switch(state) { + case STATE_DOMAIN: + if ( *ptr == '':'' ) { + state = STATE_BUS; + *ptr = ''\0''; + if ( hex_convert(tok, &dom, 0xffff) ) + goto parse_error; + tok = ptr + 1; + } + break; + case STATE_BUS: + if ( *ptr == '':'' ) { + state = STATE_DEV; + *ptr = ''\0''; + if ( hex_convert(tok, &bus, 0xff) ) + goto parse_error; + tok = ptr + 1; + }else if ( *ptr == ''.'' ) { + state = STATE_FUNC; + *ptr = ''\0''; + if ( dom & ~0xff ) + goto parse_error; + bus = dom; + dom = 0; + if ( hex_convert(tok, &dev, 0xff) ) + goto parse_error; + tok = ptr + 1; + } + break; + case STATE_DEV: + if ( *ptr == ''.'' ) { + state = STATE_FUNC; + *ptr = ''\0''; + if ( hex_convert(tok, &dev, 0xff) ) + goto parse_error; + tok = ptr + 1; + } + break; + case STATE_FUNC: + if ( *ptr == ''\0'' || *ptr == ''@'' || *ptr == '','' ) { + switch( *ptr ) { + case ''\0'': + state = STATE_TERMINAL; + break; + case ''@'': + state = STATE_VSLOT; + break; + case '','': + state = STATE_OPTIONS_K; + break; + } + *ptr = ''\0''; + if ( hex_convert(tok, &func, 0x7) ) + goto parse_error; + tok = ptr + 1; + } + break; + case STATE_VSLOT: + if ( *ptr == ''\0'' || *ptr == '','' ) { + state = ( *ptr == '','' ) ? STATE_OPTIONS_K : STATE_TERMINAL; + *ptr = ''\0''; + if ( hex_convert(tok, &vslot, 0xff) ) + goto parse_error; + tok = ptr + 1; + } + break; + case STATE_OPTIONS_K: + if ( *ptr == ''='' ) { + state = STATE_OPTIONS_V; + *ptr = ''\0''; + optkey = tok; + tok = ptr + 1; + } + break; + case STATE_OPTIONS_V: + if ( *ptr == '','' || *ptr == ''\0'' ) { + state = (*ptr == '','') ? STATE_OPTIONS_K : STATE_TERMINAL; + *ptr = ''\0''; + if ( !strcmp(optkey, "msitranslate") ) { + pcidev->msitranslate = atoi(tok); + }else if ( !strcmp(optkey, "power_mgmt") ) { + pcidev->power_mgmt = atoi(tok); + }else{ + XL_LOG(ctx, XL_LOG_WARNING, + "Unknown PCI BDF option: %s", optkey); + } + tok = ptr + 1; + } + default: + break; } } - rc = pcidev_init(pcidev, dom, bus, dev, func, 0); + free(buf2); - while ((p = strtok(NULL, ",=")) != NULL) { - while (*p == '' '') - p++; - if (!strcmp(p, "msitranslate")) { - p = strtok(NULL, ",="); - pcidev->msitranslate = atoi(p); - } else if (!strcmp(p, "power_mgmt")) { - p = strtok(NULL, ",="); - pcidev->power_mgmt = atoi(p); - } - } -out: - free(buf2); - return rc; + if ( tok != ptr || state != STATE_TERMINAL ) + goto parse_error; + + pcidev_init(pcidev, dom, bus, dev, func, vslot << 3); + + return 0; + +parse_error: + printf("parse error: %s\n", str); + return ERROR_INVAL; } static int libxl_create_pci_backend(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev, int num) diff -r a993021324d0 -r 4aea113e2ce2 tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c Mon Aug 02 15:41:41 2010 +0100 +++ b/tools/libxl/xl_cmdimpl.c Mon Aug 02 15:44:00 2010 +0100 @@ -955,7 +955,7 @@ skip_vfb: pcidev->msitranslate = pci_msitranslate; pcidev->power_mgmt = pci_power_mgmt; - if (!libxl_device_pci_parse_bdf(pcidev, buf)) + if (!libxl_device_pci_parse_bdf(&ctx, pcidev, buf)) d_config->num_pcidevs++; } } @@ -1979,7 +1979,7 @@ void pcidetach(char *dom, char *bdf) find_domain(dom); memset(&pcidev, 0x00, sizeof(pcidev)); - if (libxl_device_pci_parse_bdf(&pcidev, bdf)) { + if (libxl_device_pci_parse_bdf(&ctx, &pcidev, bdf)) { fprintf(stderr, "pci-detach: malformed BDF specification \"%s\"\n", bdf); exit(2); } @@ -2019,7 +2019,7 @@ void pciattach(char *dom, char *bdf, cha find_domain(dom); memset(&pcidev, 0x00, sizeof(pcidev)); - if (libxl_device_pci_parse_bdf(&pcidev, bdf)) { + if (libxl_device_pci_parse_bdf(&ctx, &pcidev, bdf)) { fprintf(stderr, "pci-attach: malformed BDF specification \"%s\"\n", bdf); exit(2); } _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Gianni Tedesco
2010-Aug-02 14:58 UTC
[Xen-devel] [PATCH 5 of 6] xl: detect pci-insert-failed dm status on pci-passthrough
tools/libxl/libxl.c | 10 +++++----- tools/libxl/libxl_device.c | 26 +++++++++++++++++--------- tools/libxl/libxl_internal.h | 2 ++ tools/libxl/libxl_pci.c | 24 +++++++++++++++++++++--- 4 files changed, 45 insertions(+), 17 deletions(-) # HG changeset patch # User Gianni Tedesco <gianni.tedesco@citrix.com> # Date 1280760458 -3600 # Branch pci-patches-v3 # Node ID eb8ee481bc063b18b39feaa76d9b757331ed3a9f # Parent 4aea113e2ce2a46c908e62cace4bf9a710ae7c51 xl: detect pci-insert-failed dm status on pci-passthrough NOTE: This functionality depends on a corresponding qemu-dm patch to work as expected. Should be safe to use with an un-patched qemu-dm as before. libxl_wait_for_device_model can only wait for one status value, re-work the API so that a callback function can chose between several different possible status values for qemu-dm and fix up all callers appropriately. In the case of PCI device insert we succeed if qemu-dm reports "pci-device-inserted" and error out instead of hanging forever if it fails since qemu-dm now reports a status of "pci-insert-failed". Signed-off-by: Gianni Tedesco <gianni.tedesco@citrix.com> diff -r 4aea113e2ce2 -r eb8ee481bc06 tools/libxl/libxl.c --- a/tools/libxl/libxl.c Mon Aug 02 15:44:00 2010 +0100 +++ b/tools/libxl/libxl.c Mon Aug 02 15:47:38 2010 +0100 @@ -1420,12 +1420,12 @@ int libxl_detach_device_model(libxl_ctx int libxl_confirm_device_model_startup(libxl_ctx *ctx, libxl_device_model_starting *starting) { - int problem = libxl_wait_for_device_model(ctx, starting->domid, "running", - libxl_spawn_check, - starting->for_spawn); - int detach = libxl_detach_device_model(ctx, starting); + int problem = libxl_wait_for_device_model(ctx, starting->domid, "running", NULL, NULL); + int detach; + if ( !problem ) + problem = libxl_spawn_check(ctx, starting->for_spawn); + detach = libxl_detach_device_model(ctx, starting); return problem ? problem : detach; - return 0; } diff -r 4aea113e2ce2 -r eb8ee481bc06 tools/libxl/libxl_device.c --- a/tools/libxl/libxl_device.c Mon Aug 02 15:44:00 2010 +0100 +++ b/tools/libxl/libxl_device.c Mon Aug 02 15:47:38 2010 +0100 @@ -380,6 +380,8 @@ int libxl_device_del(libxl_ctx *ctx, lib int libxl_wait_for_device_model(libxl_ctx *ctx, uint32_t domid, char *state, int (*check_callback)(libxl_ctx *ctx, + uint32_t domid, + const char *state, void *userdata), void *check_callback_userdata) { @@ -402,18 +404,24 @@ int libxl_wait_for_device_model(libxl_ct nfds = xs_fileno(xsh) + 1; while (rc > 0 || (!rc && tv.tv_sec > 0)) { p = xs_read(xsh, XBT_NULL, path, &len); - if (p && (!state || !strcmp(state, p))) { - free(p); - xs_unwatch(xsh, path, path); - xs_daemon_close(xsh); - if (check_callback) { - rc = check_callback(ctx, check_callback_userdata); - if (rc) return rc; - } - return 0; + if ( NULL == p ) + goto again; + + if ( NULL != state && strcmp(p, state) ) + goto again; + + if ( NULL != check_callback ) { + rc = (*check_callback)(ctx, domid, p, check_callback_userdata); + if ( rc > 0 ) + goto again; } + free(p); + xs_unwatch(xsh, path, path); + xs_daemon_close(xsh); + return rc; again: + free(p); FD_ZERO(&rfds); FD_SET(xs_fileno(xsh), &rfds); rc = select(nfds, &rfds, NULL, NULL, &tv); diff -r 4aea113e2ce2 -r eb8ee481bc06 tools/libxl/libxl_internal.h --- a/tools/libxl/libxl_internal.h Mon Aug 02 15:44:00 2010 +0100 +++ b/tools/libxl/libxl_internal.h Mon Aug 02 15:47:38 2010 +0100 @@ -162,6 +162,8 @@ int libxl_devices_destroy(libxl_ctx *ctx int libxl_wait_for_device_model(libxl_ctx *ctx, uint32_t domid, char *state, int (*check_callback)(libxl_ctx *ctx, + uint32_t domid, + const char *state, void *userdata), void *check_callback_userdata); int libxl_wait_for_backend(libxl_ctx *ctx, char *be_path, char *state); diff -r 4aea113e2ce2 -r eb8ee481bc06 tools/libxl/libxl_pci.c --- a/tools/libxl/libxl_pci.c Mon Aug 02 15:44:00 2010 +0100 +++ b/tools/libxl/libxl_pci.c Mon Aug 02 15:47:38 2010 +0100 @@ -531,6 +531,20 @@ int libxl_device_pci_list_assignable(lib return 0; } +static int pci_ins_check(libxl_ctx *ctx, uint32_t domid, const char *state, void *priv) +{ + char *orig_state = priv; + + if ( !strcmp(state, "pci-insert-failed") ) + return -1; + if ( !strcmp(state, "pci-inserted") ) + return 0; + if ( !strcmp(state, orig_state) ) + return 1; + + return 1; +} + static int do_pci_add(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) { char *path; @@ -553,13 +567,17 @@ static int do_pci_add(libxl_ctx *ctx, ui pcidev->bus, pcidev->dev, pcidev->func); path = libxl_sprintf(ctx, "/local/domain/0/device-model/%d/command", domid); xs_write(ctx->xsh, XBT_NULL, path, "pci-ins", strlen("pci-ins")); - if (libxl_wait_for_device_model(ctx, domid, "pci-inserted", NULL, NULL) < 0) - XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn''t respond in time"); + rc = libxl_wait_for_device_model(ctx, domid, NULL, pci_ins_check, state); path = libxl_sprintf(ctx, "/local/domain/0/device-model/%d/parameter", domid); vdevfn = libxl_xs_read(ctx, XBT_NULL, path); - sscanf(vdevfn + 2, "%x", &pcidev->vdevfn); path = libxl_sprintf(ctx, "/local/domain/0/device-model/%d/state", domid); + if ( rc < 0 ) + XL_LOG(ctx, XL_LOG_ERROR, "qemu refused to add device: %s", vdevfn); + else if ( sscanf(vdevfn, "0x%x", &pcidev->vdevfn) != 1 ) + rc = -1; xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state)); + if ( rc ) + return ERROR_FAIL; } else { char *sysfs_path = libxl_sprintf(ctx, SYSFS_PCI_DEV"/"PCI_BDF"/resource", pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Gianni Tedesco
2010-Aug-02 14:58 UTC
[Xen-devel] [PATCH 6 of 6] xl: implement multifunction device PCI pass-through
tools/libxl/libxl.h | 1 + tools/libxl/libxl_pci.c | 118 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 111 insertions(+), 8 deletions(-) # HG changeset patch # User Gianni Tedesco <gianni.tedesco@citrix.com> # Date 1280760698 -3600 # Branch pci-patches-v3 # Node ID 6379343447e58cc7b83e27beba9c35e62c232d9d # Parent eb8ee481bc063b18b39feaa76d9b757331ed3a9f xl: implement multifunction device PCI pass-through Implement PCI pass-through for multi-function devices. The supported BDF notation is: BB:DD.* - therefore passing-through a subset of functions or remapping the function numbers is not supported. Largely because I don''t see how that can ever be safe or sane (perhaps for SR-IOV cards?) Letting qemu automatically select the virtual slot is not supported since I don''t believe that qemu-dm can actually handle that case. Signed-off-by: Gianni Tedesco <gianni.tedesco@citrix.com> diff -r eb8ee481bc06 -r 6379343447e5 tools/libxl/libxl.h --- a/tools/libxl/libxl.h Mon Aug 02 15:47:38 2010 +0100 +++ b/tools/libxl/libxl.h Mon Aug 02 15:51:38 2010 +0100 @@ -308,6 +308,7 @@ typedef struct { unsigned int vdevfn; bool msitranslate; bool power_mgmt; + bool multifunction; } libxl_device_pci; enum { diff -r eb8ee481bc06 -r 6379343447e5 tools/libxl/libxl_pci.c --- a/tools/libxl/libxl_pci.c Mon Aug 02 15:47:38 2010 +0100 +++ b/tools/libxl/libxl_pci.c Mon Aug 02 15:51:38 2010 +0100 @@ -136,8 +136,13 @@ int libxl_device_pci_parse_bdf(libxl_ctx break; } *ptr = ''\0''; - if ( hex_convert(tok, &func, 0x7) ) - goto parse_error; + if ( !strcmp(tok, "*") ) { + pcidev->multifunction = 1; + }else{ + if ( hex_convert(tok, &func, 0x7) ) + goto parse_error; + pcidev->multifunction = 0; + } tok = ptr + 1; } break; @@ -531,6 +536,50 @@ int libxl_device_pci_list_assignable(lib return 0; } +static int pci_multifunction_check(libxl_ctx *ctx, libxl_device_pci *pcidev, unsigned int *func_mask) +{ + struct dirent *de; + DIR *dir; + + *func_mask = 0; + + dir = opendir(SYSFS_PCI_DEV); + if ( NULL == dir ) { + XL_LOG_ERRNO(ctx, XL_LOG_ERROR, "Couldn''t open %s", SYSFS_PCI_DEV); + return -1; + } + + while( (de = readdir(dir)) ) { + unsigned dom, bus, dev, func; + struct stat st; + char *path; + + if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 ) + continue; + if ( pcidev->domain != dom ) + continue; + if ( pcidev->bus != bus ) + continue; + if ( pcidev->dev != dev ) + continue; + + path = libxl_sprintf(ctx, "%s/" PCI_BDF, SYSFS_PCIBACK_DRIVER, dom, bus, dev, func); + if ( lstat(path, &st) ) { + if ( errno == ENOENT ) + XL_LOG(ctx, XL_LOG_ERROR, PCI_BDF " is not assigned to pciback driver", + dom, bus, dev, func); + else + XL_LOG_ERRNO(ctx, XL_LOG_ERROR, "Couldn''t lstat %s", path); + closedir(dir); + return -1; + } + (*func_mask) |= (1 << func); + } + + closedir(dir); + return 0; +} + static int pci_ins_check(libxl_ctx *ctx, uint32_t domid, const char *state, void *priv) { char *orig_state = priv; @@ -679,10 +728,32 @@ int libxl_device_pci_add(libxl_ctx *ctx, return rc; } + if ( pcidev->multifunction ) { + unsigned int i, orig_vdev, func_mask, rc = 0; + orig_vdev = pcidev->vdevfn & ~7U; + + if ( pci_multifunction_check(ctx, pcidev, &func_mask) ) + return ERROR_FAIL; + if ( !(pcidev->vdevfn >> 3) ) { + XL_LOG(ctx, XL_LOG_ERROR, "Must specify a v-slot for multi-function devices"); + return ERROR_FAIL; + } + + for(i = 7; i < 8; --i) { + if ( (1 << i) & func_mask ) { + pcidev->func = i; + pcidev->vdevfn = orig_vdev | i; + if ( do_pci_add(ctx, domid, pcidev) ) + rc = ERROR_FAIL; + } + } + return rc; + } + return do_pci_add(ctx, domid, pcidev); } -int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) +static int do_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) { libxl_device_pci *assigned; char *path; @@ -711,10 +782,15 @@ int libxl_device_pci_remove(libxl_ctx *c libxl_xs_write(ctx, XBT_NULL, path, PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); path = libxl_sprintf(ctx, "/local/domain/0/device-model/%d/command", domid); - xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem")); - if (libxl_wait_for_device_model(ctx, domid, "pci-removed", NULL, NULL) < 0) { - XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn''t respond in time"); - return ERROR_FAIL; + + /* Remove all functions at once atomically by only signalling + * device-model for function 0 */ + if ( !pcidev->multifunction || pcidev->func == 0 ) { + xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem")); + if (libxl_wait_for_device_model(ctx, domid, "pci-removed", NULL, NULL) < 0) { + XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn''t respond in time"); + return ERROR_FAIL; + } } path = libxl_sprintf(ctx, "/local/domain/0/device-model/%d/state", domid); xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state)); @@ -769,7 +845,10 @@ skip1: fclose(f); } out: - libxl_device_pci_reset(ctx, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); + /* don''t do multiple resets while some functions are still passed through */ + if ( !pcidev->multifunction || pcidev->func == 0 ) { + libxl_device_pci_reset(ctx, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); + } if (!libxl_is_stubdom(ctx, domid, NULL)) { rc = xc_deassign_device(ctx->xch, domid, pcidev->value); @@ -786,6 +865,29 @@ out: return 0; } +int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) +{ + if ( pcidev->multifunction ) { + unsigned int i, orig_vdev, func_mask, rc; + orig_vdev = pcidev->vdevfn & ~7U; + + if ( pci_multifunction_check(ctx, pcidev, &func_mask) ) + return ERROR_FAIL; + + for(i = 7; i < 8; --i) { + if ( (1 << i) & func_mask ) { + pcidev->func = i; + pcidev->vdevfn = orig_vdev | i; + if ( do_pci_remove(ctx, domid, pcidev) ) + rc = ERROR_FAIL; + } + } + return rc; + } + + return do_pci_remove(ctx, domid, pcidev); +} + int libxl_device_pci_list_assigned(libxl_ctx *ctx, libxl_device_pci **list, uint32_t domid, int *num) { char *be_path, *num_devs, *xsdev, *xsvdevfn, *xsopts; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Stefano Stabellini
2010-Aug-04 14:52 UTC
[Xen-devel] Re: [PATCH 6 of 6] xl: implement multifunction device PCI pass-through
On Mon, 2 Aug 2010, Gianni Tedesco (3P) wrote:> tools/libxl/libxl.h | 1 + > tools/libxl/libxl_pci.c | 118 ++++++++++++++++++++++++++++++++++++++++++++--- > 2 files changed, 111 insertions(+), 8 deletions(-) > > > # HG changeset patch > # User Gianni Tedesco <gianni.tedesco@citrix.com> > # Date 1280760698 -3600 > # Branch pci-patches-v3 > # Node ID 6379343447e58cc7b83e27beba9c35e62c232d9d > # Parent eb8ee481bc063b18b39feaa76d9b757331ed3a9f > xl: implement multifunction device PCI pass-through > > Implement PCI pass-through for multi-function devices. The supported BDF > notation is: BB:DD.* - therefore passing-through a subset of functions or > remapping the function numbers is not supported. Largely because I don''t see > how that can ever be safe or sane (perhaps for SR-IOV cards?) > > Letting qemu automatically select the virtual slot is not supported since I > don''t believe that qemu-dm can actually handle that case. > > Signed-off-by: Gianni Tedesco <gianni.tedesco@citrix.com> > > diff -r eb8ee481bc06 -r 6379343447e5 tools/libxl/libxl.h > --- a/tools/libxl/libxl.h Mon Aug 02 15:47:38 2010 +0100 > +++ b/tools/libxl/libxl.h Mon Aug 02 15:51:38 2010 +0100 > @@ -308,6 +308,7 @@ typedef struct { > unsigned int vdevfn; > bool msitranslate; > bool power_mgmt; > + bool multifunction; > } libxl_device_pci; > > enum { > diff -r eb8ee481bc06 -r 6379343447e5 tools/libxl/libxl_pci.c > --- a/tools/libxl/libxl_pci.c Mon Aug 02 15:47:38 2010 +0100 > +++ b/tools/libxl/libxl_pci.c Mon Aug 02 15:51:38 2010 +0100 > @@ -136,8 +136,13 @@ int libxl_device_pci_parse_bdf(libxl_ctx > break; > } > *ptr = ''\0''; > - if ( hex_convert(tok, &func, 0x7) ) > - goto parse_error; > + if ( !strcmp(tok, "*") ) { > + pcidev->multifunction = 1; > + }else{ > + if ( hex_convert(tok, &func, 0x7) ) > + goto parse_error; > + pcidev->multifunction = 0; > + } > tok = ptr + 1; > } > break;Couldn''t you add the func_mask somewhere in libxl_device_pci, instead of calling pci_multifunction_check multilple times? Would be possible to make the normal passthrough case just a subset of the general multifunction passthrough case to simplify the code?> @@ -531,6 +536,50 @@ int libxl_device_pci_list_assignable(lib > return 0; > } > > +static int pci_multifunction_check(libxl_ctx *ctx, libxl_device_pci *pcidev, unsigned int *func_mask) > +{a comment on top of this function to explain what it is actually checking would be nice> + struct dirent *de; > + DIR *dir; > + > + *func_mask = 0; > + > + dir = opendir(SYSFS_PCI_DEV); > + if ( NULL == dir ) { > + XL_LOG_ERRNO(ctx, XL_LOG_ERROR, "Couldn''t open %s", SYSFS_PCI_DEV); > + return -1; > + } > + > + while( (de = readdir(dir)) ) { > + unsigned dom, bus, dev, func; > + struct stat st; > + char *path; > + > + if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 ) > + continue; > + if ( pcidev->domain != dom ) > + continue; > + if ( pcidev->bus != bus ) > + continue; > + if ( pcidev->dev != dev ) > + continue; > + > + path = libxl_sprintf(ctx, "%s/" PCI_BDF, SYSFS_PCIBACK_DRIVER, dom, bus, dev, func); > + if ( lstat(path, &st) ) { > + if ( errno == ENOENT ) > + XL_LOG(ctx, XL_LOG_ERROR, PCI_BDF " is not assigned to pciback driver", > + dom, bus, dev, func); > + else > + XL_LOG_ERRNO(ctx, XL_LOG_ERROR, "Couldn''t lstat %s", path); > + closedir(dir); > + return -1; > + } > + (*func_mask) |= (1 << func); > + } > + > + closedir(dir); > + return 0; > +} > + > static int pci_ins_check(libxl_ctx *ctx, uint32_t domid, const char *state, void *priv) > { > char *orig_state = priv; > @@ -679,10 +728,32 @@ int libxl_device_pci_add(libxl_ctx *ctx, > return rc; > } > > + if ( pcidev->multifunction ) { > + unsigned int i, orig_vdev, func_mask, rc = 0; > + orig_vdev = pcidev->vdevfn & ~7U; > + > + if ( pci_multifunction_check(ctx, pcidev, &func_mask) ) > + return ERROR_FAIL; > + if ( !(pcidev->vdevfn >> 3) ) { > + XL_LOG(ctx, XL_LOG_ERROR, "Must specify a v-slot for multi-function devices"); > + return ERROR_FAIL; > + } > + > + for(i = 7; i < 8; --i) {shouldn''t this be for(i = 7; i >= 0; --i)?> + if ( (1 << i) & func_mask ) { > + pcidev->func = i; > + pcidev->vdevfn = orig_vdev | i; > + if ( do_pci_add(ctx, domid, pcidev) ) > + rc = ERROR_FAIL; > + } > + } > + return rc; > + } > + > return do_pci_add(ctx, domid, pcidev); > } > > -int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) > +static int do_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) > { > libxl_device_pci *assigned; > char *path; > @@ -711,10 +782,15 @@ int libxl_device_pci_remove(libxl_ctx *c > libxl_xs_write(ctx, XBT_NULL, path, PCI_BDF, pcidev->domain, > pcidev->bus, pcidev->dev, pcidev->func); > path = libxl_sprintf(ctx, "/local/domain/0/device-model/%d/command", domid); > - xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem")); > - if (libxl_wait_for_device_model(ctx, domid, "pci-removed", NULL, NULL) < 0) { > - XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn''t respond in time"); > - return ERROR_FAIL; > + > + /* Remove all functions at once atomically by only signalling > + * device-model for function 0 */ > + if ( !pcidev->multifunction || pcidev->func == 0 ) { > + xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem")); > + if (libxl_wait_for_device_model(ctx, domid, "pci-removed", NULL, NULL) < 0) { > + XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn''t respond in time"); > + return ERROR_FAIL; > + } > } > path = libxl_sprintf(ctx, "/local/domain/0/device-model/%d/state", domid); > xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state)); > @@ -769,7 +845,10 @@ skip1: > fclose(f); > } > out: > - libxl_device_pci_reset(ctx, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); > + /* don''t do multiple resets while some functions are still passed through */ > + if ( !pcidev->multifunction || pcidev->func == 0 ) { > + libxl_device_pci_reset(ctx, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); > + } > > if (!libxl_is_stubdom(ctx, domid, NULL)) { > rc = xc_deassign_device(ctx->xch, domid, pcidev->value); > @@ -786,6 +865,29 @@ out: > return 0; > } > > +int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) > +{ > + if ( pcidev->multifunction ) { > + unsigned int i, orig_vdev, func_mask, rc; > + orig_vdev = pcidev->vdevfn & ~7U; > + > + if ( pci_multifunction_check(ctx, pcidev, &func_mask) ) > + return ERROR_FAIL; > + > + for(i = 7; i < 8; --i) {same here> + if ( (1 << i) & func_mask ) { > + pcidev->func = i; > + pcidev->vdevfn = orig_vdev | i; > + if ( do_pci_remove(ctx, domid, pcidev) ) > + rc = ERROR_FAIL; > + } > + } > + return rc; > + } > + > + return do_pci_remove(ctx, domid, pcidev); > +} > + > int libxl_device_pci_list_assigned(libxl_ctx *ctx, libxl_device_pci **list, uint32_t domid, int *num) > { > char *be_path, *num_devs, *xsdev, *xsvdevfn, *xsopts; >applied all the other patches in the series _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Gianni Tedesco
2010-Aug-04 15:07 UTC
[Xen-devel] Re: [PATCH 6 of 6] xl: implement multifunction device PCI pass-through
On Wed, 2010-08-04 at 15:52 +0100, Stefano Stabellini wrote:> On Mon, 2 Aug 2010, Gianni Tedesco (3P) wrote: > > tools/libxl/libxl.h | 1 + > > tools/libxl/libxl_pci.c | 118 ++++++++++++++++++++++++++++++++++++++++++++--- > > 2 files changed, 111 insertions(+), 8 deletions(-) > > > > > > # HG changeset patch > > # User Gianni Tedesco <gianni.tedesco@citrix.com> > > # Date 1280760698 -3600 > > # Branch pci-patches-v3 > > # Node ID 6379343447e58cc7b83e27beba9c35e62c232d9d > > # Parent eb8ee481bc063b18b39feaa76d9b757331ed3a9f > > xl: implement multifunction device PCI pass-through > > > > Implement PCI pass-through for multi-function devices. The supported BDF > > notation is: BB:DD.* - therefore passing-through a subset of functions or > > remapping the function numbers is not supported. Largely because I don''t see > > how that can ever be safe or sane (perhaps for SR-IOV cards?) > > > > Letting qemu automatically select the virtual slot is not supported since I > > don''t believe that qemu-dm can actually handle that case. > > > > Signed-off-by: Gianni Tedesco <gianni.tedesco@citrix.com> > > > > diff -r eb8ee481bc06 -r 6379343447e5 tools/libxl/libxl.h > > --- a/tools/libxl/libxl.h Mon Aug 02 15:47:38 2010 +0100 > > +++ b/tools/libxl/libxl.h Mon Aug 02 15:51:38 2010 +0100 > > @@ -308,6 +308,7 @@ typedef struct { > > unsigned int vdevfn; > > bool msitranslate; > > bool power_mgmt; > > + bool multifunction; > > } libxl_device_pci; > > > > enum { > > diff -r eb8ee481bc06 -r 6379343447e5 tools/libxl/libxl_pci.c > > --- a/tools/libxl/libxl_pci.c Mon Aug 02 15:47:38 2010 +0100 > > +++ b/tools/libxl/libxl_pci.c Mon Aug 02 15:51:38 2010 +0100 > > @@ -136,8 +136,13 @@ int libxl_device_pci_parse_bdf(libxl_ctx > > break; > > } > > *ptr = ''\0''; > > - if ( hex_convert(tok, &func, 0x7) ) > > - goto parse_error; > > + if ( !strcmp(tok, "*") ) { > > + pcidev->multifunction = 1; > > + }else{ > > + if ( hex_convert(tok, &func, 0x7) ) > > + goto parse_error; > > + pcidev->multifunction = 0; > > + } > > tok = ptr + 1; > > } > > break; > > Couldn''t you add the func_mask somewhere in libxl_device_pci, instead of > calling pci_multifunction_check multilple times? > Would be possible to make the normal passthrough case just a subset of > the general multifunction passthrough case to simplify the code?Not sure what you mean by this, the parsing parts don''t (and oughtn''t) frob about with sysfs nodes etc. The pci_multifunction_check function is called once per device add and once per device remove iff the parser had seen XX:YY.* notation indicating multifunction devices.> > > @@ -531,6 +536,50 @@ int libxl_device_pci_list_assignable(lib > > return 0; > > } > > > > +static int pci_multifunction_check(libxl_ctx *ctx, libxl_device_pci *pcidev, unsigned int *func_mask) > > +{ > > a comment on top of this function to explain what it is actually checking would be niceThis function checks that all functions of a device are bound to pciback driver. It also initialises a bit-mask of which function numbers are present on that device. I shall add the comment in next rev.> > + struct dirent *de; > > + DIR *dir; > > + > > + *func_mask = 0; > > + > > + dir = opendir(SYSFS_PCI_DEV); > > + if ( NULL == dir ) { > > + XL_LOG_ERRNO(ctx, XL_LOG_ERROR, "Couldn''t open %s", SYSFS_PCI_DEV); > > + return -1; > > + } > > + > > + while( (de = readdir(dir)) ) { > > + unsigned dom, bus, dev, func; > > + struct stat st; > > + char *path; > > + > > + if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 ) > > + continue; > > + if ( pcidev->domain != dom ) > > + continue; > > + if ( pcidev->bus != bus ) > > + continue; > > + if ( pcidev->dev != dev ) > > + continue; > > + > > + path = libxl_sprintf(ctx, "%s/" PCI_BDF, SYSFS_PCIBACK_DRIVER, dom, bus, dev, func); > > + if ( lstat(path, &st) ) { > > + if ( errno == ENOENT ) > > + XL_LOG(ctx, XL_LOG_ERROR, PCI_BDF " is not assigned to pciback driver", > > + dom, bus, dev, func); > > + else > > + XL_LOG_ERRNO(ctx, XL_LOG_ERROR, "Couldn''t lstat %s", path); > > + closedir(dir); > > + return -1; > > + } > > + (*func_mask) |= (1 << func); > > + } > > + > > + closedir(dir); > > + return 0; > > +} > > + > > static int pci_ins_check(libxl_ctx *ctx, uint32_t domid, const char *state, void *priv) > > { > > char *orig_state = priv; > > @@ -679,10 +728,32 @@ int libxl_device_pci_add(libxl_ctx *ctx, > > return rc; > > } > > > > + if ( pcidev->multifunction ) { > > + unsigned int i, orig_vdev, func_mask, rc = 0; > > + orig_vdev = pcidev->vdevfn & ~7U; > > + > > + if ( pci_multifunction_check(ctx, pcidev, &func_mask) ) > > + return ERROR_FAIL; > > + if ( !(pcidev->vdevfn >> 3) ) { > > + XL_LOG(ctx, XL_LOG_ERROR, "Must specify a v-slot for multi-function devices"); > > + return ERROR_FAIL; > > + } > > + > > + for(i = 7; i < 8; --i) { > > shouldn''t this be for(i = 7; i >= 0; --i)?That will generate a compiler warning since unsigned int''s are always >0. The code relies on an underflow condition. However, I could change it to int if you think that makes it clearer.> > + if ( (1 << i) & func_mask ) { > > + pcidev->func = i; > > + pcidev->vdevfn = orig_vdev | i; > > + if ( do_pci_add(ctx, domid, pcidev) ) > > + rc = ERROR_FAIL; > > + } > > + } > > + return rc; > > + } > > + > > return do_pci_add(ctx, domid, pcidev); > > } > > > > -int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) > > +static int do_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) > > { > > libxl_device_pci *assigned; > > char *path; > > @@ -711,10 +782,15 @@ int libxl_device_pci_remove(libxl_ctx *c > > libxl_xs_write(ctx, XBT_NULL, path, PCI_BDF, pcidev->domain, > > pcidev->bus, pcidev->dev, pcidev->func); > > path = libxl_sprintf(ctx, "/local/domain/0/device-model/%d/command", domid); > > - xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem")); > > - if (libxl_wait_for_device_model(ctx, domid, "pci-removed", NULL, NULL) < 0) { > > - XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn''t respond in time"); > > - return ERROR_FAIL; > > + > > + /* Remove all functions at once atomically by only signalling > > + * device-model for function 0 */ > > + if ( !pcidev->multifunction || pcidev->func == 0 ) { > > + xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem")); > > + if (libxl_wait_for_device_model(ctx, domid, "pci-removed", NULL, NULL) < 0) { > > + XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn''t respond in time"); > > + return ERROR_FAIL; > > + } > > } > > path = libxl_sprintf(ctx, "/local/domain/0/device-model/%d/state", domid); > > xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state)); > > @@ -769,7 +845,10 @@ skip1: > > fclose(f); > > } > > out: > > - libxl_device_pci_reset(ctx, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); > > + /* don''t do multiple resets while some functions are still passed through */ > > + if ( !pcidev->multifunction || pcidev->func == 0 ) { > > + libxl_device_pci_reset(ctx, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); > > + } > > > > if (!libxl_is_stubdom(ctx, domid, NULL)) { > > rc = xc_deassign_device(ctx->xch, domid, pcidev->value); > > @@ -786,6 +865,29 @@ out: > > return 0; > > } > > > > +int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) > > +{ > > + if ( pcidev->multifunction ) { > > + unsigned int i, orig_vdev, func_mask, rc; > > + orig_vdev = pcidev->vdevfn & ~7U; > > + > > + if ( pci_multifunction_check(ctx, pcidev, &func_mask) ) > > + return ERROR_FAIL; > > + > > + for(i = 7; i < 8; --i) { > > same here > > > + if ( (1 << i) & func_mask ) { > > + pcidev->func = i; > > + pcidev->vdevfn = orig_vdev | i; > > + if ( do_pci_remove(ctx, domid, pcidev) ) > > + rc = ERROR_FAIL; > > + } > > + } > > + return rc; > > + } > > + > > + return do_pci_remove(ctx, domid, pcidev); > > +} > > + > > int libxl_device_pci_list_assigned(libxl_ctx *ctx, libxl_device_pci **list, uint32_t domid, int *num) > > { > > char *be_path, *num_devs, *xsdev, *xsvdevfn, *xsopts; > > > > > applied all the other patches in the seriesOK thanks. I will also need to handle cleanup in the error path of multifunction device addition as discussed. This can actually happen quite easily if function 0 alone is assigned to another domain because pci_multifunction_check() doesn''t look in xenstore and also because of missing safety checks. Specifically, when adding a non-multifunction device we need a check to make sure that the physical device really has only one function. I am thinking that we probably ought to scrap the XX:YY.* notation and enforce that function is ALWAYS set to 0. Then when adding/removing the device we should check for additional functions and handle them transparently to the user. As I said in the commit message I am unaware of any valid uses for separating functions but I need to look at how SR-IOV works. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Stefano Stabellini
2010-Aug-04 15:47 UTC
[Xen-devel] Re: [PATCH 6 of 6] xl: implement multifunction device PCI pass-through
On Wed, 4 Aug 2010, Gianni Tedesco (3P) wrote:> On Wed, 2010-08-04 at 15:52 +0100, Stefano Stabellini wrote: > > On Mon, 2 Aug 2010, Gianni Tedesco (3P) wrote: > > > tools/libxl/libxl.h | 1 + > > > tools/libxl/libxl_pci.c | 118 ++++++++++++++++++++++++++++++++++++++++++++--- > > > 2 files changed, 111 insertions(+), 8 deletions(-) > > > > > > > > > # HG changeset patch > > > # User Gianni Tedesco <gianni.tedesco@citrix.com> > > > # Date 1280760698 -3600 > > > # Branch pci-patches-v3 > > > # Node ID 6379343447e58cc7b83e27beba9c35e62c232d9d > > > # Parent eb8ee481bc063b18b39feaa76d9b757331ed3a9f > > > xl: implement multifunction device PCI pass-through > > > > > > Implement PCI pass-through for multi-function devices. The supported BDF > > > notation is: BB:DD.* - therefore passing-through a subset of functions or > > > remapping the function numbers is not supported. Largely because I don''t see > > > how that can ever be safe or sane (perhaps for SR-IOV cards?) > > > > > > Letting qemu automatically select the virtual slot is not supported since I > > > don''t believe that qemu-dm can actually handle that case. > > > > > > Signed-off-by: Gianni Tedesco <gianni.tedesco@citrix.com> > > > > > > diff -r eb8ee481bc06 -r 6379343447e5 tools/libxl/libxl.h > > > --- a/tools/libxl/libxl.h Mon Aug 02 15:47:38 2010 +0100 > > > +++ b/tools/libxl/libxl.h Mon Aug 02 15:51:38 2010 +0100 > > > @@ -308,6 +308,7 @@ typedef struct { > > > unsigned int vdevfn; > > > bool msitranslate; > > > bool power_mgmt; > > > + bool multifunction; > > > } libxl_device_pci; > > > > > > enum { > > > diff -r eb8ee481bc06 -r 6379343447e5 tools/libxl/libxl_pci.c > > > --- a/tools/libxl/libxl_pci.c Mon Aug 02 15:47:38 2010 +0100 > > > +++ b/tools/libxl/libxl_pci.c Mon Aug 02 15:51:38 2010 +0100 > > > @@ -136,8 +136,13 @@ int libxl_device_pci_parse_bdf(libxl_ctx > > > break; > > > } > > > *ptr = ''\0''; > > > - if ( hex_convert(tok, &func, 0x7) ) > > > - goto parse_error; > > > + if ( !strcmp(tok, "*") ) { > > > + pcidev->multifunction = 1; > > > + }else{ > > > + if ( hex_convert(tok, &func, 0x7) ) > > > + goto parse_error; > > > + pcidev->multifunction = 0; > > > + } > > > tok = ptr + 1; > > > } > > > break; > > > > Couldn''t you add the func_mask somewhere in libxl_device_pci, instead of > > calling pci_multifunction_check multilple times? > > Would be possible to make the normal passthrough case just a subset of > > the general multifunction passthrough case to simplify the code? > > Not sure what you mean by this, the parsing parts don''t (and oughtn''t) > frob about with sysfs nodes etc. The pci_multifunction_check function is > called once per device add and once per device remove iff the parser had > seen XX:YY.* notation indicating multifunction devices. >I mean we could have a mask instead of an interger in libxl_device_pci to specify which functions should be assigned and in the single function case the mask will have only one bit set. At this point single function passthrough become just a case of multifunction passthrough. I am not entirely sure that this approach would produce better code though.> > > > > @@ -531,6 +536,50 @@ int libxl_device_pci_list_assignable(lib > > > return 0; > > > } > > > > > > +static int pci_multifunction_check(libxl_ctx *ctx, libxl_device_pci *pcidev, unsigned int *func_mask) > > > +{ > > > > a comment on top of this function to explain what it is actually checking would be nice > > This function checks that all functions of a device are bound to pciback > driver. It also initialises a bit-mask of which function numbers are > present on that device. > > I shall add the comment in next rev. > > > > + struct dirent *de; > > > + DIR *dir; > > > + > > > + *func_mask = 0; > > > + > > > + dir = opendir(SYSFS_PCI_DEV); > > > + if ( NULL == dir ) { > > > + XL_LOG_ERRNO(ctx, XL_LOG_ERROR, "Couldn''t open %s", SYSFS_PCI_DEV); > > > + return -1; > > > + } > > > + > > > + while( (de = readdir(dir)) ) { > > > + unsigned dom, bus, dev, func; > > > + struct stat st; > > > + char *path; > > > + > > > + if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 ) > > > + continue; > > > + if ( pcidev->domain != dom ) > > > + continue; > > > + if ( pcidev->bus != bus ) > > > + continue; > > > + if ( pcidev->dev != dev ) > > > + continue; > > > + > > > + path = libxl_sprintf(ctx, "%s/" PCI_BDF, SYSFS_PCIBACK_DRIVER, dom, bus, dev, func); > > > + if ( lstat(path, &st) ) { > > > + if ( errno == ENOENT ) > > > + XL_LOG(ctx, XL_LOG_ERROR, PCI_BDF " is not assigned to pciback driver", > > > + dom, bus, dev, func); > > > + else > > > + XL_LOG_ERRNO(ctx, XL_LOG_ERROR, "Couldn''t lstat %s", path); > > > + closedir(dir); > > > + return -1; > > > + } > > > + (*func_mask) |= (1 << func); > > > + } > > > + > > > + closedir(dir); > > > + return 0; > > > +} > > > + > > > static int pci_ins_check(libxl_ctx *ctx, uint32_t domid, const char *state, void *priv) > > > { > > > char *orig_state = priv; > > > @@ -679,10 +728,32 @@ int libxl_device_pci_add(libxl_ctx *ctx, > > > return rc; > > > } > > > > > > + if ( pcidev->multifunction ) { > > > + unsigned int i, orig_vdev, func_mask, rc = 0; > > > + orig_vdev = pcidev->vdevfn & ~7U; > > > + > > > + if ( pci_multifunction_check(ctx, pcidev, &func_mask) ) > > > + return ERROR_FAIL; > > > + if ( !(pcidev->vdevfn >> 3) ) { > > > + XL_LOG(ctx, XL_LOG_ERROR, "Must specify a v-slot for multi-function devices"); > > > + return ERROR_FAIL; > > > + } > > > + > > > + for(i = 7; i < 8; --i) { > > > > shouldn''t this be for(i = 7; i >= 0; --i)? > > That will generate a compiler warning since unsigned int''s are always >> 0. The code relies on an underflow condition. However, I could change it > to int if you think that makes it clearer. >yeah please do that> > > + if ( (1 << i) & func_mask ) { > > > + pcidev->func = i; > > > + pcidev->vdevfn = orig_vdev | i; > > > + if ( do_pci_add(ctx, domid, pcidev) ) > > > + rc = ERROR_FAIL; > > > + } > > > + } > > > + return rc; > > > + } > > > + > > > return do_pci_add(ctx, domid, pcidev); > > > } > > > > > > -int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) > > > +static int do_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) > > > { > > > libxl_device_pci *assigned; > > > char *path; > > > @@ -711,10 +782,15 @@ int libxl_device_pci_remove(libxl_ctx *c > > > libxl_xs_write(ctx, XBT_NULL, path, PCI_BDF, pcidev->domain, > > > pcidev->bus, pcidev->dev, pcidev->func); > > > path = libxl_sprintf(ctx, "/local/domain/0/device-model/%d/command", domid); > > > - xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem")); > > > - if (libxl_wait_for_device_model(ctx, domid, "pci-removed", NULL, NULL) < 0) { > > > - XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn''t respond in time"); > > > - return ERROR_FAIL; > > > + > > > + /* Remove all functions at once atomically by only signalling > > > + * device-model for function 0 */ > > > + if ( !pcidev->multifunction || pcidev->func == 0 ) { > > > + xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem")); > > > + if (libxl_wait_for_device_model(ctx, domid, "pci-removed", NULL, NULL) < 0) { > > > + XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn''t respond in time"); > > > + return ERROR_FAIL; > > > + } > > > } > > > path = libxl_sprintf(ctx, "/local/domain/0/device-model/%d/state", domid); > > > xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state)); > > > @@ -769,7 +845,10 @@ skip1: > > > fclose(f); > > > } > > > out: > > > - libxl_device_pci_reset(ctx, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); > > > + /* don''t do multiple resets while some functions are still passed through */ > > > + if ( !pcidev->multifunction || pcidev->func == 0 ) { > > > + libxl_device_pci_reset(ctx, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); > > > + } > > > > > > if (!libxl_is_stubdom(ctx, domid, NULL)) { > > > rc = xc_deassign_device(ctx->xch, domid, pcidev->value); > > > @@ -786,6 +865,29 @@ out: > > > return 0; > > > } > > > > > > +int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) > > > +{ > > > + if ( pcidev->multifunction ) { > > > + unsigned int i, orig_vdev, func_mask, rc; > > > + orig_vdev = pcidev->vdevfn & ~7U; > > > + > > > + if ( pci_multifunction_check(ctx, pcidev, &func_mask) ) > > > + return ERROR_FAIL; > > > + > > > + for(i = 7; i < 8; --i) { > > > > same here > > > > > + if ( (1 << i) & func_mask ) { > > > + pcidev->func = i; > > > + pcidev->vdevfn = orig_vdev | i; > > > + if ( do_pci_remove(ctx, domid, pcidev) ) > > > + rc = ERROR_FAIL; > > > + } > > > + } > > > + return rc; > > > + } > > > + > > > + return do_pci_remove(ctx, domid, pcidev); > > > +} > > > + > > > int libxl_device_pci_list_assigned(libxl_ctx *ctx, libxl_device_pci **list, uint32_t domid, int *num) > > > { > > > char *be_path, *num_devs, *xsdev, *xsvdevfn, *xsopts; > > > > > > > > > applied all the other patches in the series > > OK thanks. > > I will also need to handle cleanup in the error path of multifunction > device addition as discussed. This can actually happen quite easily if > function 0 alone is assigned to another domain because > pci_multifunction_check() doesn''t look in xenstore and also because of > missing safety checks. Specifically, when adding a non-multifunction > device we need a check to make sure that the physical device really has > only one function. > > I am thinking that we probably ought to scrap the XX:YY.* notation and > enforce that function is ALWAYS set to 0. Then when adding/removing the > device we should check for additional functions and handle them > transparently to the user. As I said in the commit message I am unaware > of any valid uses for separating functions but I need to look at how > SR-IOV works. > >With SR-IOV devices you can easily have valid BDFs like 0000:05:1c.2, so enforcing that function is always 0 is not a good idea. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Gianni Tedesco
2010-Aug-04 15:51 UTC
[Xen-devel] Re: [PATCH 6 of 6] xl: implement multifunction device PCI pass-through
On Wed, 2010-08-04 at 16:47 +0100, Stefano Stabellini wrote:> On Wed, 4 Aug 2010, Gianni Tedesco (3P) wrote: > > On Wed, 2010-08-04 at 15:52 +0100, Stefano Stabellini wrote: > > > On Mon, 2 Aug 2010, Gianni Tedesco (3P) wrote: > > > > tools/libxl/libxl.h | 1 + > > > > tools/libxl/libxl_pci.c | 118 ++++++++++++++++++++++++++++++++++++++++++++--- > > > > 2 files changed, 111 insertions(+), 8 deletions(-) > > > > > > > > > > > > # HG changeset patch > > > > # User Gianni Tedesco <gianni.tedesco@citrix.com> > > > > # Date 1280760698 -3600 > > > > # Branch pci-patches-v3 > > > > # Node ID 6379343447e58cc7b83e27beba9c35e62c232d9d > > > > # Parent eb8ee481bc063b18b39feaa76d9b757331ed3a9f > > > > xl: implement multifunction device PCI pass-through > > > > > > > > Implement PCI pass-through for multi-function devices. The supported BDF > > > > notation is: BB:DD.* - therefore passing-through a subset of functions or > > > > remapping the function numbers is not supported. Largely because I don''t see > > > > how that can ever be safe or sane (perhaps for SR-IOV cards?) > > > > > > > > Letting qemu automatically select the virtual slot is not supported since I > > > > don''t believe that qemu-dm can actually handle that case. > > > > > > > > Signed-off-by: Gianni Tedesco <gianni.tedesco@citrix.com> > > > > > > > > diff -r eb8ee481bc06 -r 6379343447e5 tools/libxl/libxl.h > > > > --- a/tools/libxl/libxl.h Mon Aug 02 15:47:38 2010 +0100 > > > > +++ b/tools/libxl/libxl.h Mon Aug 02 15:51:38 2010 +0100 > > > > @@ -308,6 +308,7 @@ typedef struct { > > > > unsigned int vdevfn; > > > > bool msitranslate; > > > > bool power_mgmt; > > > > + bool multifunction; > > > > } libxl_device_pci; > > > > > > > > enum { > > > > diff -r eb8ee481bc06 -r 6379343447e5 tools/libxl/libxl_pci.c > > > > --- a/tools/libxl/libxl_pci.c Mon Aug 02 15:47:38 2010 +0100 > > > > +++ b/tools/libxl/libxl_pci.c Mon Aug 02 15:51:38 2010 +0100 > > > > @@ -136,8 +136,13 @@ int libxl_device_pci_parse_bdf(libxl_ctx > > > > break; > > > > } > > > > *ptr = ''\0''; > > > > - if ( hex_convert(tok, &func, 0x7) ) > > > > - goto parse_error; > > > > + if ( !strcmp(tok, "*") ) { > > > > + pcidev->multifunction = 1; > > > > + }else{ > > > > + if ( hex_convert(tok, &func, 0x7) ) > > > > + goto parse_error; > > > > + pcidev->multifunction = 0; > > > > + } > > > > tok = ptr + 1; > > > > } > > > > break; > > > > > > Couldn''t you add the func_mask somewhere in libxl_device_pci, instead of > > > calling pci_multifunction_check multilple times? > > > Would be possible to make the normal passthrough case just a subset of > > > the general multifunction passthrough case to simplify the code? > > > > Not sure what you mean by this, the parsing parts don''t (and oughtn''t) > > frob about with sysfs nodes etc. The pci_multifunction_check function is > > called once per device add and once per device remove iff the parser had > > seen XX:YY.* notation indicating multifunction devices. > > > > I mean we could have a mask instead of an interger in libxl_device_pci > to specify which functions should be assigned and in the single function > case the mask will have only one bit set. > At this point single function passthrough become just a case of > multifunction passthrough. > I am not entirely sure that this approach would produce better code > though.I see what you mean. I could set it to #define PCI_FUNCTION_EVERYTHING ~0 at parse-time then in multifunction_check unset all the non-existent functions.> > > > > > > > @@ -531,6 +536,50 @@ int libxl_device_pci_list_assignable(lib > > > > return 0; > > > > } > > > > > > > > +static int pci_multifunction_check(libxl_ctx *ctx, libxl_device_pci *pcidev, unsigned int *func_mask) > > > > +{ > > > > > > a comment on top of this function to explain what it is actually checking would be nice > > > > This function checks that all functions of a device are bound to pciback > > driver. It also initialises a bit-mask of which function numbers are > > present on that device. > > > > I shall add the comment in next rev. > > > > > > + struct dirent *de; > > > > + DIR *dir; > > > > + > > > > + *func_mask = 0; > > > > + > > > > + dir = opendir(SYSFS_PCI_DEV); > > > > + if ( NULL == dir ) { > > > > + XL_LOG_ERRNO(ctx, XL_LOG_ERROR, "Couldn''t open %s", SYSFS_PCI_DEV); > > > > + return -1; > > > > + } > > > > + > > > > + while( (de = readdir(dir)) ) { > > > > + unsigned dom, bus, dev, func; > > > > + struct stat st; > > > > + char *path; > > > > + > > > > + if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 ) > > > > + continue; > > > > + if ( pcidev->domain != dom ) > > > > + continue; > > > > + if ( pcidev->bus != bus ) > > > > + continue; > > > > + if ( pcidev->dev != dev ) > > > > + continue; > > > > + > > > > + path = libxl_sprintf(ctx, "%s/" PCI_BDF, SYSFS_PCIBACK_DRIVER, dom, bus, dev, func); > > > > + if ( lstat(path, &st) ) { > > > > + if ( errno == ENOENT ) > > > > + XL_LOG(ctx, XL_LOG_ERROR, PCI_BDF " is not assigned to pciback driver", > > > > + dom, bus, dev, func); > > > > + else > > > > + XL_LOG_ERRNO(ctx, XL_LOG_ERROR, "Couldn''t lstat %s", path); > > > > + closedir(dir); > > > > + return -1; > > > > + } > > > > + (*func_mask) |= (1 << func); > > > > + } > > > > + > > > > + closedir(dir); > > > > + return 0; > > > > +} > > > > + > > > > static int pci_ins_check(libxl_ctx *ctx, uint32_t domid, const char *state, void *priv) > > > > { > > > > char *orig_state = priv; > > > > @@ -679,10 +728,32 @@ int libxl_device_pci_add(libxl_ctx *ctx, > > > > return rc; > > > > } > > > > > > > > + if ( pcidev->multifunction ) { > > > > + unsigned int i, orig_vdev, func_mask, rc = 0; > > > > + orig_vdev = pcidev->vdevfn & ~7U; > > > > + > > > > + if ( pci_multifunction_check(ctx, pcidev, &func_mask) ) > > > > + return ERROR_FAIL; > > > > + if ( !(pcidev->vdevfn >> 3) ) { > > > > + XL_LOG(ctx, XL_LOG_ERROR, "Must specify a v-slot for multi-function devices"); > > > > + return ERROR_FAIL; > > > > + } > > > > + > > > > + for(i = 7; i < 8; --i) { > > > > > > shouldn''t this be for(i = 7; i >= 0; --i)? > > > > That will generate a compiler warning since unsigned int''s are always >> > 0. The code relies on an underflow condition. However, I could change it > > to int if you think that makes it clearer. > > > > yeah please do thatOK> > > > + if ( (1 << i) & func_mask ) { > > > > + pcidev->func = i; > > > > + pcidev->vdevfn = orig_vdev | i; > > > > + if ( do_pci_add(ctx, domid, pcidev) ) > > > > + rc = ERROR_FAIL; > > > > + } > > > > + } > > > > + return rc; > > > > + } > > > > + > > > > return do_pci_add(ctx, domid, pcidev); > > > > } > > > > > > > > -int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) > > > > +static int do_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) > > > > { > > > > libxl_device_pci *assigned; > > > > char *path; > > > > @@ -711,10 +782,15 @@ int libxl_device_pci_remove(libxl_ctx *c > > > > libxl_xs_write(ctx, XBT_NULL, path, PCI_BDF, pcidev->domain, > > > > pcidev->bus, pcidev->dev, pcidev->func); > > > > path = libxl_sprintf(ctx, "/local/domain/0/device-model/%d/command", domid); > > > > - xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem")); > > > > - if (libxl_wait_for_device_model(ctx, domid, "pci-removed", NULL, NULL) < 0) { > > > > - XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn''t respond in time"); > > > > - return ERROR_FAIL; > > > > + > > > > + /* Remove all functions at once atomically by only signalling > > > > + * device-model for function 0 */ > > > > + if ( !pcidev->multifunction || pcidev->func == 0 ) { > > > > + xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem")); > > > > + if (libxl_wait_for_device_model(ctx, domid, "pci-removed", NULL, NULL) < 0) { > > > > + XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn''t respond in time"); > > > > + return ERROR_FAIL; > > > > + } > > > > } > > > > path = libxl_sprintf(ctx, "/local/domain/0/device-model/%d/state", domid); > > > > xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state)); > > > > @@ -769,7 +845,10 @@ skip1: > > > > fclose(f); > > > > } > > > > out: > > > > - libxl_device_pci_reset(ctx, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); > > > > + /* don''t do multiple resets while some functions are still passed through */ > > > > + if ( !pcidev->multifunction || pcidev->func == 0 ) { > > > > + libxl_device_pci_reset(ctx, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func); > > > > + } > > > > > > > > if (!libxl_is_stubdom(ctx, domid, NULL)) { > > > > rc = xc_deassign_device(ctx->xch, domid, pcidev->value); > > > > @@ -786,6 +865,29 @@ out: > > > > return 0; > > > > } > > > > > > > > +int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) > > > > +{ > > > > + if ( pcidev->multifunction ) { > > > > + unsigned int i, orig_vdev, func_mask, rc; > > > > + orig_vdev = pcidev->vdevfn & ~7U; > > > > + > > > > + if ( pci_multifunction_check(ctx, pcidev, &func_mask) ) > > > > + return ERROR_FAIL; > > > > + > > > > + for(i = 7; i < 8; --i) { > > > > > > same here > > > > > > > + if ( (1 << i) & func_mask ) { > > > > + pcidev->func = i; > > > > + pcidev->vdevfn = orig_vdev | i; > > > > + if ( do_pci_remove(ctx, domid, pcidev) ) > > > > + rc = ERROR_FAIL; > > > > + } > > > > + } > > > > + return rc; > > > > + } > > > > + > > > > + return do_pci_remove(ctx, domid, pcidev); > > > > +} > > > > + > > > > int libxl_device_pci_list_assigned(libxl_ctx *ctx, libxl_device_pci **list, uint32_t domid, int *num) > > > > { > > > > char *be_path, *num_devs, *xsdev, *xsvdevfn, *xsopts; > > > > > > > > > > > > > applied all the other patches in the series > > > > OK thanks. > > > > I will also need to handle cleanup in the error path of multifunction > > device addition as discussed. This can actually happen quite easily if > > function 0 alone is assigned to another domain because > > pci_multifunction_check() doesn''t look in xenstore and also because of > > missing safety checks. Specifically, when adding a non-multifunction > > device we need a check to make sure that the physical device really has > > only one function. > > > > I am thinking that we probably ought to scrap the XX:YY.* notation and > > enforce that function is ALWAYS set to 0. Then when adding/removing the > > device we should check for additional functions and handle them > > transparently to the user. As I said in the commit message I am unaware > > of any valid uses for separating functions but I need to look at how > > SR-IOV works. > > > > > > With SR-IOV devices you can easily have valid BDFs like 0000:05:1c.2, so > enforcing that function is always 0 is not a good idea.I will have to look in to that and try come up something that is safe in all such cases. For now I will just re-work the patch along the lines you mentioned. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel