Zhao, Yu
2008-Sep-27 08:59 UTC
[Xen-devel] [PATCH 4/9] dom0 PCI: support SR-IOV capability
Add Single Root I/O Virtualization (SR-IOV) support. Signed-off-by: Yu Zhao <yu.zhao@intel.com> diff -r 040046b91eb7 -r 75504b97c0ab drivers/pci/Kconfig --- a/drivers/pci/Kconfig Sat Sep 27 01:25:31 2008 -0400 +++ b/drivers/pci/Kconfig Sat Sep 27 01:27:01 2008 -0400 @@ -27,3 +27,14 @@ When in doubt, say N. +config PCI_IOV + bool "PCI SR-IOV support" + depends on PCI + select PCI_MSI + default n + help + This option allows device drivers to enable Single Root I/O + Virtualization. Each Virtual Function''s PCI configuration + space can be accessed using its own Bus, Device and Function + Number (Routing ID). Each Virtual Function also has PCI Memory + Space, which is used to map its own register set. diff -r 040046b91eb7 -r 75504b97c0ab drivers/pci/Makefile --- a/drivers/pci/Makefile Sat Sep 27 01:25:31 2008 -0400 +++ b/drivers/pci/Makefile Sat Sep 27 01:27:01 2008 -0400 @@ -47,3 +47,5 @@ ifeq ($(CONFIG_PCI_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif + +obj-$(CONFIG_PCI_IOV) += iov.o diff -r 040046b91eb7 -r 75504b97c0ab drivers/pci/iov.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drivers/pci/iov.c Sat Sep 27 01:27:01 2008 -0400 @@ -0,0 +1,832 @@ +/* + * drivers/pci/iov.c + * + * Copyright (C) 2008 Intel Corporation + * + * PCI Express Single Root I/O Virtualization capability support. + */ + +#include <linux/ctype.h> +#include <linux/string.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <asm/page.h> +#include "pci.h" + +#define VF_NAME_LEN 8 + + +struct iov_attr { + struct attribute attr; + ssize_t (*show)(struct kobject *, + struct iov_attr *, char *); + ssize_t (*store)(struct kobject *, + struct iov_attr *, const char *, size_t); +}; + +#define iov_config_attr(field) \ +static ssize_t field##_show(struct kobject *kobj, \ + struct iov_attr *attr, char *buf) \ +{ \ + struct pci_iov *iov = container_of(kobj, struct pci_iov, kobj); \ + \ + return sprintf(buf, "%d\n", iov->field); \ +} + +iov_config_attr(is_enabled); +iov_config_attr(totalvfs); +iov_config_attr(initialvfs); +iov_config_attr(numvfs); + +struct vf_entry { + int vfn; + struct kobject kobj; + struct pci_iov *iov; + struct iov_attr *attr; + char name[VF_NAME_LEN]; + char (*param)[PCI_IOV_PARAM_LEN]; +}; + +static ssize_t iov_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct iov_attr *ia = container_of(attr, struct iov_attr, attr); + + return ia->show ? ia->show(kobj, ia, buf) : -EIO; +} + +static ssize_t iov_attr_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t len) +{ + struct iov_attr *ia = container_of(attr, struct iov_attr, attr); + + return ia->store ? ia->store(kobj, ia, buf, len) : -EIO; +} + +static struct sysfs_ops iov_attr_ops = { + .show = iov_attr_show, + .store = iov_attr_store, +}; + +static struct kobj_type iov_ktype = { + .sysfs_ops = &iov_attr_ops, +}; + +static inline void vf_rid(struct pci_dev *dev, int vfn, u8 *busnr, u8 *devfn) +{ + u16 rid; + + rid = (dev->bus->number << 8) + dev->devfn + + dev->iov->offset + dev->iov->stride * vfn; + *busnr = rid >> 8; + *devfn = rid & 0xff; +} + +static int vf_add(struct pci_dev *dev, int vfn) +{ + int i; + int rc; + u8 busnr, devfn; + unsigned long size; + struct pci_dev *new; + struct pci_bus *bus; + struct resource *res; + + vf_rid(dev, vfn, &busnr, &devfn); + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return -ENOMEM; + + list_for_each_entry(bus, &dev->bus->children, node) + if (bus->number == busnr) { + new->bus = bus; + break; + } + + BUG_ON(!new->bus); + new->sysdata = bus->sysdata; + new->dev.parent = dev->dev.parent; + new->dev.bus = dev->dev.bus; + new->devfn = devfn; + new->hdr_type = PCI_HEADER_TYPE_NORMAL; + new->multifunction = 0; + new->vendor = dev->vendor; + pci_read_config_word(dev, dev->iov->cap + PCI_IOV_VF_DID, &new->device); + new->cfg_size = PCI_CFG_SPACE_EXP_SIZE; + new->error_state = pci_channel_io_normal; + new->dma_mask = 0xffffffff; + + sprintf(pci_name(new), "%04x:%02x:%02x.%d", pci_domain_nr(bus), + busnr, PCI_SLOT(devfn), PCI_FUNC(devfn)); + + new->class = dev->class; + new->current_state = PCI_UNKNOWN; + new->irq = 0; + + for (i = 0; i < PCI_IOV_NUM_BAR; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + if (!res->parent) + continue; + new->resource[i].name = pci_name(new); + new->resource[i].flags = res->flags; + size = (res->end - res->start + 1) / dev->iov->totalvfs; + new->resource[i].start = res->start + size * vfn; + new->resource[i].end = new->resource[i].start + size - 1; + rc = request_resource(res, &new->resource[i]); + BUG_ON(rc); + } + + new->subsystem_vendor = dev->subsystem_vendor; + pci_read_config_word(new, PCI_SUBSYSTEM_ID, &new->subsystem_device); + + pci_device_add(new, bus); + pci_bus_add_device(new); + return 0; +} + +static void vf_remove(struct pci_dev *dev, int vfn) +{ + u8 busnr, devfn; + struct pci_dev *tmp; + + vf_rid(dev, vfn, &busnr, &devfn); + + tmp = pci_find_slot(busnr, devfn); + if (!tmp) + return; + pci_remove_bus_device(tmp); +} + +static int iov_enable(struct pci_iov *iov) +{ + int rc; + int i, j; + u16 ctrl; + + if (!iov->notify) + return -ENODEV; + + if (iov->is_enabled) + return 0; + + iov->notify(iov->dev, iov->numvfs | PCI_IOV_ENABLE); + pci_read_config_word(iov->dev, iov->cap + PCI_IOV_CTRL, &ctrl); + ctrl |= (PCI_IOV_CTRL_VFE | PCI_IOV_CTRL_MSE); + pci_write_config_word(iov->dev, iov->cap + PCI_IOV_CTRL, ctrl); + ssleep(1); + + for (i = 0; i < iov->numvfs; i++) { + rc = vf_add(iov->dev, i); + if (rc) + goto failed; + } + + iov->notify(iov->dev, iov->numvfs | + PCI_IOV_ENABLE | PCI_IOV_POST_EVENT); + iov->is_enabled = 1; + return 0; + +failed: + for (j = 0; j < i; j++) + vf_remove(iov->dev, j); + + pci_read_config_word(iov->dev, iov->cap + PCI_IOV_CTRL, &ctrl); + ctrl &= ~(PCI_IOV_CTRL_VFE | PCI_IOV_CTRL_MSE); + pci_write_config_word(iov->dev, iov->cap + PCI_IOV_CTRL, ctrl); + ssleep(1); + + return rc; +} + +static int iov_disable(struct pci_iov *iov) +{ + int i; + u16 ctrl; + + if (!iov->notify) + return -ENODEV; + + if (!iov->is_enabled) + return 0; + + iov->notify(iov->dev, PCI_IOV_DISABLE); + for (i = 0; i < iov->numvfs; i++) + vf_remove(iov->dev, i); + + pci_read_config_word(iov->dev, iov->cap + PCI_IOV_CTRL, &ctrl); + ctrl &= ~(PCI_IOV_CTRL_VFE | PCI_IOV_CTRL_MSE); + pci_write_config_word(iov->dev, iov->cap + PCI_IOV_CTRL, ctrl); + ssleep(1); + + iov->notify(iov->dev, PCI_IOV_DISABLE | PCI_IOV_POST_EVENT); + iov->is_enabled = 0; + return 0; +} + +static int iov_set_numvfs(struct pci_iov *iov, int numvfs) +{ + u16 offset, stride; + + if (!iov->notify) + return -ENODEV; + + if (numvfs == iov->numvfs) + return 0; + + if (numvfs < 0 || numvfs > iov->initialvfs || iov->is_enabled) + return -EINVAL; + + pci_write_config_word(iov->dev, iov->cap + PCI_IOV_NUM_VF, numvfs); + pci_read_config_word(iov->dev, iov->cap + PCI_IOV_VF_OFFSET, &offset); + pci_read_config_word(iov->dev, iov->cap + PCI_IOV_VF_STRIDE, &stride); + if ((numvfs && !offset) || (numvfs > 1 && !stride)) + return -EIO; + + iov->offset = offset; + iov->stride = stride; + iov->numvfs = numvfs; + return 0; +} + +static ssize_t is_enabled_store(struct kobject *kobj, struct iov_attr *attr, + const char *buf, size_t count) +{ + int rc; + long enable; + struct pci_iov *iov = container_of(kobj, struct pci_iov, kobj); + + enable = simple_strtoll(buf, NULL, 0); + + mutex_lock(&iov->mutex); + switch (enable) { + case 0: + rc = iov_disable(iov); + break; + case 1: + rc = iov_enable(iov); + break; + default: + rc = -EINVAL; + } + mutex_unlock(&iov->mutex); + + return rc ? rc : count; +} + +static ssize_t numvfs_store(struct kobject *kobj, struct iov_attr *attr, + const char *buf, size_t count) +{ + int rc; + long numvfs; + struct pci_iov *iov = container_of(kobj, struct pci_iov, kobj); + + numvfs = simple_strtoll(buf, NULL, 0); + + mutex_lock(&iov->mutex); + rc = iov_set_numvfs(iov, numvfs); + mutex_unlock(&iov->mutex); + + return rc ? rc : count; +} + + +static struct iov_attr iov_attr[] = { + __ATTR_RO(totalvfs), + __ATTR_RO(initialvfs), + __ATTR(numvfs, S_IWUSR | S_IRUGO, numvfs_show, numvfs_store), + __ATTR(enable, S_IWUSR | S_IRUGO, is_enabled_show, is_enabled_store), +}; + +static ssize_t vf_show(struct kobject *kobj, struct iov_attr *attr, + char *buf) +{ + int vfn; + struct vf_entry *ve = container_of(kobj, struct vf_entry, kobj); + + vfn = attr - ve->attr; + ve->iov->notify(ve->iov->dev, vfn | PCI_IOV_RD_CONF); + + return sprintf(buf, "%s\n", ve->param[vfn]); +} + +static ssize_t vf_store(struct kobject *kobj, struct iov_attr *attr, + const char *buf, size_t count) +{ + int vfn; + struct vf_entry *ve = container_of(kobj, struct vf_entry, kobj); + + vfn = attr - ve->attr; + sscanf(buf, "%63s", ve->param[vfn]); + ve->iov->notify(ve->iov->dev, vfn | PCI_IOV_WR_CONF); + + return count; +} + +static ssize_t rid_show(struct kobject *kobj, struct iov_attr *attr, + char *buf) +{ + u8 busnr, devfn; + struct vf_entry *ve = container_of(kobj, struct vf_entry, kobj); + + vf_rid(ve->iov->dev, ve->vfn, &busnr, &devfn); + + return sprintf(buf, "%04x:%02x:%02x.%d\n", + pci_domain_nr(ve->iov->dev->bus), + busnr, PCI_SLOT(devfn), PCI_FUNC(devfn)); +} + +static struct iov_attr vf_attr = __ATTR_RO(rid); + +int iov_alloc_bus(struct pci_bus *bus, int busnr) +{ + int i; + int rc = 0; + struct pci_bus *child, *next; + struct list_head head; + + INIT_LIST_HEAD(&head); + + down_write(&pci_bus_sem); + + for (i = bus->number + 1; i <= busnr; i++) { + list_for_each_entry(child, &bus->children, node) + if (child->number == i) + break; + if (child->number == i) + continue; + child = pci_alloc_child_bus(bus, NULL, i); + if (!child) { + rc = -ENOMEM; + break; + } + child->subordinate = i; + list_add_tail(&child->node, &head); + } + + if (rc) + list_for_each_entry_safe(child, next, &head, node) + kfree(child); + else + list_for_each_entry_safe(child, next, &head, node) + list_move_tail(&child->node, &bus->children); + + up_write(&pci_bus_sem); + + return rc; +} + +void iov_release_bus(struct pci_bus *bus) +{ + struct pci_dev *dev; + struct pci_bus *child, *next; + struct list_head head; + + INIT_LIST_HEAD(&head); + + down_write(&pci_bus_sem); + + list_for_each_entry(dev, &bus->devices, bus_list) + if (dev->iov && dev->iov->notify) + goto done; + + list_for_each_entry_safe(child, next, &bus->children, node) + if (!child->bridge) + list_move(&child->node, &head); +done: + up_write(&pci_bus_sem); + + list_for_each_entry_safe(child, next, &head, node) + pci_remove_bus(child); +} + +/** + * pci_iov_init - initialize device''s SR-IOV capability + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + * + * The major differences between Virtual Function and PCI device are: + * 1) the device with multiple bus numbers uses internal routing, so + * there is no explicit bridge device in this case. + * 2) Virtual Function memory spaces are designated by BARs encapsulated + * in the capability structure, and the BARs in Virtual Function PCI + * configuration space are read-only zero. + */ +int pci_iov_init(struct pci_dev *dev) +{ + int i; + int pos; + u32 pgsz; + u16 ctrl, total, initial, offset, stride; + struct pci_iov *iov; + struct resource *res; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_IOV); + if (!pos) + return -ENODEV; + + ctrl = pci_ari_enabled(dev) ? PCI_IOV_CTRL_ARI : 0; + pci_write_config_word(dev, pos + PCI_IOV_CTRL, ctrl); + ssleep(1); + + pci_read_config_word(dev, pos + PCI_IOV_TOTAL_VF, &total); + pci_read_config_word(dev, pos + PCI_IOV_INITIAL_VF, &initial); + pci_write_config_word(dev, pos + PCI_IOV_NUM_VF, initial); + pci_read_config_word(dev, pos + PCI_IOV_VF_OFFSET, &offset); + pci_read_config_word(dev, pos + PCI_IOV_VF_STRIDE, &stride); + if (!total || initial > total || (initial && !offset) || + (initial > 1 && !stride)) + return -EIO; + + pci_read_config_dword(dev, pos + PCI_IOV_SUP_PGSIZE, &pgsz); + i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0; + pgsz &= ~((1 << i) - 1); + if (!pgsz) + return -EIO; + + pgsz &= ~(pgsz - 1); + pci_write_config_dword(dev, pos + PCI_IOV_SYS_PGSIZE, pgsz); + + iov = kzalloc(sizeof(*iov), GFP_KERNEL); + if (!iov) + return -ENOMEM; + + iov->dev = dev; + iov->cap = pos; + iov->totalvfs = total; + iov->initialvfs = initial; + iov->offset = offset; + iov->stride = stride; + iov->align = pgsz << 12; + mutex_init(&iov->mutex); + + for (i = 0; i < PCI_IOV_NUM_BAR; i++) { + res = dev->resource + PCI_IOV_RESOURCES + i; + pos = iov->cap + PCI_IOV_BAR_0 + i * 4; + i += pci_read_base(dev, pci_bar_unknown, res, pos); + if (!res->flags) + continue; + res->end = res->start + (res->end - res->start + 1) * total - 1; + } + + dev->iov = iov; + dev_info(&dev->dev, "SR-IOV capability is initialized\n"); + + return 0; +} + +/** + * pci_iov_release - release resources used by SR-IOV capability + * @dev: the PCI device + */ +void pci_iov_release(struct pci_dev *dev) +{ + if (!dev->iov) + return; + + mutex_destroy(&dev->iov->mutex); + kfree(dev->iov); + dev->iov = NULL; +} + +/** + * pci_iov_create_sysfs - create sysfs for SR-IOV capability + * @dev: the PCI device + */ +void pci_iov_create_sysfs(struct pci_dev *dev) +{ + int rc; + int i, j; + struct pci_iov *iov = dev->iov; + + if (!iov) + return; + + iov->ve = kzalloc(sizeof(*iov->ve) * iov->totalvfs, GFP_KERNEL); + if (!iov->ve) + return; + + for (i = 0; i < iov->totalvfs; i++) { + iov->ve[i].vfn = i; + iov->ve[i].iov = iov; + } + + iov->kobj.ktype = &iov_ktype; + iov->kobj.parent = &dev->dev.kobj; + kobject_set_name(&iov->kobj, "iov"); + rc = kobject_register(&iov->kobj); + if (rc) + goto failed1; + + for (i = 0; i < ARRAY_SIZE(iov_attr); i++) { + rc = sysfs_create_file(&iov->kobj, &iov_attr[i].attr); + if (rc) + goto failed2; + } + + for (i = 0; i < iov->totalvfs; i++) { + iov->ve[i].kobj.ktype = &iov_ktype; + iov->ve[i].kobj.parent = &iov->kobj; + sprintf(iov->ve[i].name, "%d", i); + kobject_set_name(&iov->ve[i].kobj, iov->ve[i].name); + rc = kobject_register(&iov->ve[i].kobj); + if (rc) + goto failed3; + rc = sysfs_create_file(&iov->ve[i].kobj, &vf_attr.attr); + if (rc) { + kobject_unregister(&iov->ve[i].kobj); + goto failed3; + } + } + + return; + +failed3: + for (j = 0; j < i; j++) { + sysfs_remove_file(&iov->ve[j].kobj, &vf_attr.attr); + kobject_unregister(&iov->ve[j].kobj); + } +failed2: + for (j = 0; j < i; j++) + sysfs_remove_file(&dev->iov->kobj, &iov_attr[j].attr); + kobject_unregister(&iov->kobj); +failed1: + kfree(iov->ve); + iov->ve = NULL; + + dev_err(&dev->dev, "can''t create sysfs for SR-IOV.\n"); +} + +/** + * pci_iov_remove_sysfs - remove sysfs of SR-IOV capability + * @dev: the PCI device + */ +void pci_iov_remove_sysfs(struct pci_dev *dev) +{ + int i; + struct pci_iov *iov = dev->iov; + + if (!iov || !iov->ve) + return; + + for (i = 0; i < iov->totalvfs; i++) { + sysfs_remove_file(&iov->ve[i].kobj, &vf_attr.attr); + kobject_unregister(&iov->ve[i].kobj); + } + + for (i = 0; i < ARRAY_SIZE(iov_attr); i++) + sysfs_remove_file(&dev->iov->kobj, &iov_attr[i].attr); + + kobject_unregister(&iov->kobj); + kfree(iov->ve); +} + +int pci_iov_resource_align(struct pci_dev *dev, int resno) +{ + if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCES_END) + return 0; + + BUG_ON(!dev->iov); + + return dev->iov->align; +} + +int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type) +{ + if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCES_END) + return 0; + + BUG_ON(!dev->iov); + + *type = pci_bar_unknown; + return dev->iov->cap + PCI_IOV_BAR_0 + + 4 * (resno - PCI_IOV_RESOURCES); +} + +/** + * pci_iov_register - register SR-IOV service + * @dev: the PCI device + * @notify: callback function for SR-IOV events + * @entries: sysfs entries used by Physical Function driver + * + * Returns 0 on success, or negative on failure. + */ +int pci_iov_register(struct pci_dev *dev, int (*notify)(struct pci_dev *, u32), + char **entries) +{ + int rc; + int n, i, j, k; + u8 busnr, devfn; + struct iov_attr *attr; + struct pci_iov *iov = dev->iov; + + if (!iov || !iov->ve) + return -ENODEV; + + if (!notify) + return -EINVAL; + + vf_rid(dev, iov->totalvfs - 1, &busnr, &devfn); + if (busnr > dev->bus->subordinate) + return -EIO; + + iov->notify = notify; + rc = iov_alloc_bus(dev->bus, busnr); + if (rc) + return rc; + + for (n = 0; entries && entries[n] && *entries[n]; n++) + ; + if (!n) + return 0; + + for (i = 0; i < iov->totalvfs; i++) { + rc = -ENOMEM; + iov->ve[i].param = kzalloc(PCI_IOV_PARAM_LEN * n, GFP_KERNEL); + if (!iov->ve[i].param) + goto failed; + attr = kzalloc(sizeof(*attr) * n, GFP_KERNEL); + if (!attr) { + kfree(iov->ve[i].param); + goto failed; + } + iov->ve[i].attr = attr; + for (j = 0; j < n; j++) { + attr[j].attr.name = entries[j]; + attr[j].attr.mode = S_IWUSR | S_IRUGO; + attr[j].show = vf_show; + attr[j].store = vf_store; + rc = sysfs_create_file(&iov->ve[i].kobj, &attr[j].attr); + if (rc) { + while (j--) + sysfs_remove_file(&iov->ve[i].kobj, + &attr[j].attr); + kfree(iov->ve[i].attr); + kfree(iov->ve[i].param); + goto failed; + } + } + } + + iov->nentries = n; + return 0; + +failed: + for (k = 0; k < i; k++) { + for (j = 0; j < n; j++) + sysfs_remove_file(&iov->ve[k].kobj, + &iov->ve[k].attr[j].attr); + kfree(iov->ve[k].attr); + kfree(iov->ve[k].param); + } + + return rc; +} +EXPORT_SYMBOL_GPL(pci_iov_register); + +/** + * pci_iov_unregister - unregister SR-IOV service + * @dev: the PCI device + */ +void pci_iov_unregister(struct pci_dev *dev) +{ + int i, j; + struct pci_iov *iov = dev->iov; + + BUG_ON(!iov || !iov->notify); + + if (!iov->nentries) + return; + + for (i = 0; i < iov->totalvfs; i++) { + for (j = 0; j < iov->nentries; j++) + sysfs_remove_file(&iov->ve[i].kobj, + &iov->ve[i].attr[j].attr); + kfree(iov->ve[i].attr); + kfree(iov->ve[i].param); + } + iov->notify = NULL; + iov_release_bus(dev->bus); +} +EXPORT_SYMBOL_GPL(pci_iov_unregister); + +/** + * pci_iov_enable - enable SR-IOV capability + * @dev: the PCI device + * @numvfs: number of VFs to be available + * + * Returns 0 on success, or negative on failure. + */ +int pci_iov_enable(struct pci_dev *dev, int numvfs) +{ + int rc; + struct pci_iov *iov = dev->iov; + + if (!iov) + return -ENODEV; + + if (!iov->notify) + return -EINVAL; + + mutex_lock(&iov->mutex); + rc = iov_set_numvfs(iov, numvfs); + if (rc) + goto done; + rc = iov_enable(iov); +done: + mutex_unlock(&iov->mutex); + + return rc; +} +EXPORT_SYMBOL_GPL(pci_iov_enable); + +/** + * pci_iov_disable - disable SR-IOV capability + * @dev: the PCI device + * + * Should be called upon Physical Function driver removal, and power + * state change. All previous allocated Virtual Functions are reclaimed. + */ +void pci_iov_disable(struct pci_dev *dev) +{ + struct pci_iov *iov = dev->iov; + + BUG_ON(!iov || !iov->notify); + mutex_lock(&iov->mutex); + iov_disable(iov); + mutex_unlock(&iov->mutex); +} +EXPORT_SYMBOL_GPL(pci_iov_disable); + +/** + * pci_iov_read_config - read SR-IOV configurations + * @dev: the PCI device + * @vfn: Virtual Function Number + * @entry: the entry to be read + * @buf: the buffer to be filled + * @size: size of the buffer + * + * Returns 0 on success, or negative on failure. + */ +int pci_iov_read_config(struct pci_dev *dev, int vfn, + char *entry, char *buf, int size) +{ + int i; + struct pci_iov *iov = dev->iov; + + if (!iov) + return -ENODEV; + + if (!iov->notify || !iov->ve || !iov->nentries) + return -EINVAL; + + if (vfn < 0 || vfn >= iov->totalvfs) + return -EINVAL; + + for (i = 0; i < iov->nentries; i++) + if (!strcmp(iov->ve[vfn].attr[i].attr.name, entry)) { + strncpy(buf, iov->ve[vfn].param[i], size); + buf[size - 1] = ''\0''; + return 0; + } + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(pci_iov_read_config); + +/** + * pci_iov_write_config - write SR-IOV configurations + * @dev: the PCI device + * @vfn: Virtual Function Number + * @entry: the entry to be written + * @buf: the buffer contains configurations + * + * Returns 0 on success, or negative on failure. + */ +int pci_iov_write_config(struct pci_dev *dev, int vfn, + char *entry, char *buf) +{ + int i; + struct pci_iov *iov = dev->iov; + + if (!iov) + return -ENODEV; + + if (!iov->notify || !iov->ve || !iov->nentries) + return -EINVAL; + + if (vfn < 0 || vfn >= iov->totalvfs) + return -EINVAL; + + for (i = 0; i < iov->nentries; i++) + if (!strcmp(iov->ve[vfn].attr[i].attr.name, entry)) { + strncpy(iov->ve[vfn].param[i], buf, PCI_IOV_PARAM_LEN); + iov->ve[vfn].param[i][PCI_IOV_PARAM_LEN - 1] = ''\0''; + return 0; + } + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(pci_iov_write_config); diff -r 040046b91eb7 -r 75504b97c0ab drivers/pci/pci-sysfs.c --- a/drivers/pci/pci-sysfs.c Sat Sep 27 01:25:31 2008 -0400 +++ b/drivers/pci/pci-sysfs.c Sat Sep 27 01:27:01 2008 -0400 @@ -559,6 +559,9 @@ } /* add platform-specific attributes */ pcibios_add_platform_entries(pdev); + + /* Single Root I/O Virtualization */ + pci_iov_create_sysfs(pdev); return 0; } @@ -587,6 +590,8 @@ kfree(pdev->rom_attr); } } + + pci_iov_remove_sysfs(pdev); } static int __init pci_sysfs_init(void) diff -r 040046b91eb7 -r 75504b97c0ab drivers/pci/pci.c --- a/drivers/pci/pci.c Sat Sep 27 01:25:31 2008 -0400 +++ b/drivers/pci/pci.c Sat Sep 27 01:27:01 2008 -0400 @@ -921,11 +921,17 @@ */ int pci_resource_alignment(struct pci_dev *dev, int resno) { + resource_size_t align; struct resource *res = dev->resource + resno; if (resno <= PCI_ROM_RESOURCE) return res->end - res->start + 1; - else if (resno <= PCI_BRIDGE_RES_END) + else if (resno < PCI_BRIDGE_RESOURCES) { + /* may be device specific resource */ + align = pci_iov_resource_align(dev, resno); + if (align) + return align; + } else if (resno <= PCI_BRIDGE_RES_END) return res->start; dev_err(&dev->dev, "alignment: invalid resource #%d\n", resno); @@ -942,12 +948,19 @@ */ int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type) { + int reg; + if (resno < PCI_ROM_RESOURCE) { *type = pci_bar_unknown; return PCI_BASE_ADDRESS_0 + 4 * resno; } else if (resno == PCI_ROM_RESOURCE) { *type = pci_bar_rom; return dev->rom_base_reg; + } else if (resno < PCI_BRIDGE_RESOURCES) { + /* may be device specific resource */ + reg = pci_iov_resource_bar(dev, resno, type); + if (reg) + return reg; } dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno); diff -r 040046b91eb7 -r 75504b97c0ab drivers/pci/pci.h --- a/drivers/pci/pci.h Sat Sep 27 01:25:31 2008 -0400 +++ b/drivers/pci/pci.h Sat Sep 27 01:27:01 2008 -0400 @@ -133,4 +133,59 @@ return dev->ari_enabled; } +/* Single Root I/O Virtualization */ +#define PCI_IOV_PARAM_LEN 64 + +struct vf_entry; + +struct pci_iov { + int cap; /* capability position */ + int align; /* page size used to map memory space */ + int is_enabled; /* status of SR-IOV */ + int nentries; /* number of sysfs entries used by PF driver */ + u16 totalvfs; /* total VFs associated with the PF */ + u16 initialvfs; /* initial VFs associated with the PF */ + u16 numvfs; /* number of VFs available */ + u16 offset; /* first VF Routing ID offset */ + u16 stride; /* following VF stride */ + struct mutex mutex; /* lock for SR-IOV */ + struct kobject kobj; /* koject for IOV */ + struct pci_dev *dev; /* Physical Function */ + struct vf_entry *ve; /* Virtual Function related */ + int (*notify)(struct pci_dev *, u32); /* event callback function */ +}; + +#ifdef CONFIG_PCI_IOV +extern int pci_iov_init(struct pci_dev *dev); +extern void pci_iov_release(struct pci_dev *dev); +void pci_iov_create_sysfs(struct pci_dev *dev); +void pci_iov_remove_sysfs(struct pci_dev *dev); +extern int pci_iov_resource_align(struct pci_dev *dev, int resno); +extern int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type); +#else +static inline int pci_iov_init(struct pci_dev *dev) +{ + return -EIO; +} +static inline void pci_iov_release(struct pci_dev *dev) +{ +} +static inline void pci_iov_create_sysfs(struct pci_dev *dev) +{ +} +static inline void pci_iov_remove_sysfs(struct pci_dev *dev) +{ +} +static inline int pci_iov_resource_align(struct pci_dev *dev, int resno) +{ + return 0; +} +static inline int pci_iov_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type) +{ + return 0; +} +#endif /* CONFIG_PCI_IOV */ + #endif /* DRIVERS_PCI_H */ diff -r 040046b91eb7 -r 75504b97c0ab drivers/pci/probe.c --- a/drivers/pci/probe.c Sat Sep 27 01:25:31 2008 -0400 +++ b/drivers/pci/probe.c Sat Sep 27 01:27:01 2008 -0400 @@ -760,6 +760,7 @@ struct pci_dev *pci_dev; pci_dev = to_pci_dev(dev); + pci_iov_release(pci_dev); kfree(pci_dev); } @@ -886,6 +887,9 @@ /* Alternative Routing-ID Forwarding */ pci_ari_init(dev); + + /* Single Root I/O Virtualization */ + pci_iov_init(dev); /* * Add the device to our list of discovered devices diff -r 040046b91eb7 -r 75504b97c0ab include/linux/pci.h --- a/include/linux/pci.h Sat Sep 27 01:25:31 2008 -0400 +++ b/include/linux/pci.h Sat Sep 27 01:27:01 2008 -0400 @@ -77,6 +77,12 @@ /* #6: expansion ROM */ PCI_ROM_RESOURCE, + /* device specific resources */ +#ifdef CONFIG_PCI_IOV + PCI_IOV_RESOURCES, + PCI_IOV_RESOURCES_END = PCI_IOV_RESOURCES + PCI_IOV_NUM_BAR - 1, +#endif + /* address space assigned to buses behind the bridge */ #ifndef PCI_BRIDGE_RES_NUM #define PCI_BRIDGE_RES_NUM 4 @@ -128,6 +134,8 @@ char cap_nr; u32 data[0]; }; + +struct pci_iov; /* * The pci_dev structure is used to describe PCI devices. @@ -200,6 +208,7 @@ struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */ int rom_attr_enabled; /* has display of the rom attribute been enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ + struct pci_iov *iov; }; #define pci_dev_g(n) list_entry(n, struct pci_dev, global_list) @@ -811,5 +820,54 @@ #define PCIPCI_VSFX 16 #define PCIPCI_ALIMAGIK 32 +/* SR-IOV events masks */ +#define PCI_IOV_VIRTFN_ID 0x0000FFFFU /* Virtual Function Number */ +#define PCI_IOV_NUM_VIRTFN 0x0000FFFFU /* num of Virtual Functions */ +#define PCI_IOV_EVENT_TYPE 0x80000000U /* event type (pre/post) */ +/* SR-IOV events values */ +#define PCI_IOV_ENABLE 0x00010000U /* SR-IOV enable request */ +#define PCI_IOV_DISABLE 0x00020000U /* SR-IOV disable request */ +#define PCI_IOV_RD_CONF 0x00040000U /* read configuration */ +#define PCI_IOV_WR_CONF 0x00080000U /* write configuration */ +#define PCI_IOV_POST_EVENT 0x80000000U /* post event */ + +#ifdef CONFIG_PCI_IOV +extern int pci_iov_enable(struct pci_dev *dev, int numvfs); +extern void pci_iov_disable(struct pci_dev *dev); +extern int pci_iov_register(struct pci_dev *dev, + int (*notify)(struct pci_dev *dev, u32 event), char **entries); +extern void pci_iov_unregister(struct pci_dev *dev); +extern int pci_iov_read_config(struct pci_dev *dev, int id, + char *entry, char *buf, int size); +extern int pci_iov_write_config(struct pci_dev *dev, int id, + char *entry, char *buf); +#else +static inline int pci_iov_enable(struct pci_dev *dev, int numvfs) +{ + return -EIO; +} +static inline void pci_iov_disable(struct pci_dev *dev) +{ +} +static inline int pci_iov_register(struct pci_dev *dev, + int (*notify)(struct pci_dev *dev, u32 event), char **entries) +{ + return -EIO; +} +static inline void pci_iov_unregister(struct pci_dev *dev) +{ +} +static inline int pci_iov_read_config(struct pci_dev *dev, int id, + char *entry, char *buf, int size) +{ + return -EIO; +} +static inline int pci_iov_write_config(struct pci_dev *dev, int id, + char *entry, char *buf) +{ + return -EIO; +} +#endif /* CONFIG_PCI_IOV */ + #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ diff -r 040046b91eb7 -r 75504b97c0ab include/linux/pci_regs.h --- a/include/linux/pci_regs.h Sat Sep 27 01:25:31 2008 -0400 +++ b/include/linux/pci_regs.h Sat Sep 27 01:27:01 2008 -0400 @@ -332,6 +332,7 @@ #define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ #define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ #define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ +#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ #define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ #define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ #define PCI_EXP_DEVCAP 4 /* Device capabilities */ @@ -393,6 +394,7 @@ #define PCI_EXT_CAP_ID_DSN 3 #define PCI_EXT_CAP_ID_PWR 4 #define PCI_EXT_CAP_ID_ARI 14 +#define PCI_EXT_CAP_ID_IOV 16 /* Advanced Error Reporting */ #define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ @@ -478,4 +480,23 @@ #define PCI_ARI_CTRL_ACS 0x0002 /* ACS Function Groups Enable */ #define PCI_ARI_CTRL_FG(x) (((x) >> 4) & 7) /* Function Group */ +/* Single Root I/O Virtualization */ +#define PCI_IOV_CAP 0x04 /* SR-IOV Capabilities */ +#define PCI_IOV_CTRL 0x08 /* SR-IOV Control */ +#define PCI_IOV_CTRL_VFE 0x01 /* VF Enable */ +#define PCI_IOV_CTRL_MSE 0x08 /* VF Memory Space Enable */ +#define PCI_IOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */ +#define PCI_IOV_STATUS 0x0a /* SR-IOV Status */ +#define PCI_IOV_INITIAL_VF 0x0c /* Initial VFs */ +#define PCI_IOV_TOTAL_VF 0x0e /* Total VFs */ +#define PCI_IOV_NUM_VF 0x10 /* Number of VFs */ +#define PCI_IOV_FUNC_LINK 0x12 /* Function Dependency Link */ +#define PCI_IOV_VF_OFFSET 0x14 /* First VF Offset */ +#define PCI_IOV_VF_STRIDE 0x16 /* Following VF Stride */ +#define PCI_IOV_VF_DID 0x1a /* VF Device ID */ +#define PCI_IOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */ +#define PCI_IOV_SYS_PGSIZE 0x20 /* System Page Size */ +#define PCI_IOV_BAR_0 0x24 /* VF BAR0 */ +#define PCI_IOV_NUM_BAR 6 /* Number of VF BARs */ + #endif /* LINUX_PCI_REGS_H */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel