Isaku Yamahata
2009-Apr-02 04:01 UTC
[Xen-devel] [PATCH 1/3] PCIe IO space multiplexing: Linux part
PCI pass through: PCIe IO space multiplexing This patch is for PCIe IO space multiplexing. This is required for more than 16 HVM domain to boot from PCIe pass through device. Linux as dom0 exclusively assigns IO space to downstream PCI bridges and the assignment unit of PCI bridge IO space is 4K. So the only up to 16 PCIe device can be accessed via IO space within 64K IO ports. PCI expansion ROM BIOS often uses IO port access to boot from the device, so on virtualized environment, it means only up to 16 guest domain can boot from pass-through device. This patch allows PCIe IO space sharing of pass-through device. - reassign IO space of PCIe devices specified by "guestiomuldev=[<segment>:]<bus>:<dev>[,[<segment:><bus>:dev]][,...]" to be shared. This is implemented as Linux PCI quirk fixup. The sharing unit is PCIe switch. Ie IO space of the end point devices under the same switch will be shared. If there are more than one switches, two areas of IO space will be used. - And the driver which arbitrates the accesses to the multiplexed PCIe IO space. Later qemu-dm will use this. Limitation: IO port of IO shared devices can''t be accessed from dom0 Linux device driver. But this wouldn''t be a big issue because PCIe specification discourages the use of IO space and recommends that IO space should be used only for bootable device with ROM code. OS device driver should work without IO space access. Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp> diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -37,6 +37,12 @@ config PCI_GUESTDEV help Say Y here if you want to reserve PCI device for passthrough. +config PCI_IOMULTI + bool "PCI Device IO Multiplex for Passthrough" + depends on PCI && ACPI + default y + help + Say Y here if you need io multiplexing. config PCI_IOV bool "PCI IOV support" depends on PCI diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -4,6 +4,7 @@ obj-y += access.o bus.o probe.o remove.o pci.o quirks.o \ pci-driver.o search.o pci-sysfs.o rom.o setup-res.o +obj-$(CONFIG_PCI_IOMULTI) += iomulti.o obj-$(CONFIG_PCI_REASSIGN) += reassigndev.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_PCI_GUESTDEV) += guestdev.o diff --git a/drivers/pci/iomulti.c b/drivers/pci/iomulti.c new file mode 100644 --- /dev/null +++ b/drivers/pci/iomulti.c @@ -0,0 +1,1121 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (c) 2009 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + */ + +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/miscdevice.h> +#include <linux/pci.h> +#include <linux/sort.h> + +#include <asm/setup.h> +#include <asm/uaccess.h> + +#include "iomulti.h" + +#define PCI_NUM_BARS 6 +#define PCI_BUS_MAX 255 +#define PCI_DEV_MAX 31 +#define PCI_FUNC_MAX 7 +#define PCI_NUM_FUNC 8 + +/* see pci_resource_len */ +static inline resource_size_t pci_iomul_len(const struct resource* r) +{ + if (r->start == 0 && r->start == r->end) + return 0; + return r->end - r->start + 1; +} + +#define ROUND_UP(x, a) (((x) + (a) - 1) & ~((a) - 1)) +/* stolen from pbus_size_io() */ +static unsigned long pdev_size_io(struct pci_dev *pdev) +{ + unsigned long size = 0, size1 = 0; + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *r = &pdev->resource[i]; + unsigned long r_size; + + if (!(r->flags & IORESOURCE_IO)) + continue; + + r_size = r->end - r->start + 1; + + if (r_size < 0x400) + /* Might be re-aligned for ISA */ + size += r_size; + else + size1 += r_size; + } + +/* To be fixed in 2.5: we should have sort of HAVE_ISA + flag in the struct pci_bus. */ +#if defined(CONFIG_ISA) || defined(CONFIG_EISA) + size = (size & 0xff) + ((size & ~0xffUL) << 2); +#endif + size = ROUND_UP(size + size1, 4096); + return size; +} + +/* + * primary bus number of PCI-PCI bridge in switch on which + * this slots sits. + * i.e. the primary bus number of PCI-PCI bridge of downstream port + * or root port in switch. + * the secondary bus number of PCI-PCI bridge of upstream port + * in switch. + */ +static inline unsigned char pci_dev_switch_busnr(struct pci_dev *pdev) +{ + if (pci_find_capability(pdev, PCI_CAP_ID_EXP)) + return pdev->bus->primary; + return pdev->bus->number; +} + +struct pci_iomul_func { + int segment; + uint8_t bus; + uint8_t devfn; + + /* only start and end are used */ + unsigned long io_size; + uint8_t io_bar; + struct resource resource[PCI_NUM_BARS]; +}; + +struct pci_iomul_switch { + struct list_head list; /* bus_list_lock protects */ + + /* + * This lock the following entry and following + * pci_iomul_slot/pci_iomul_func. + */ + struct mutex lock; + + struct resource *io_region; + unsigned int count; + struct pci_dev *current_pdev; + + int segment; + uint8_t bus; + + uint32_t io_base; + uint32_t io_limit; + + /* func which has the largeset io size*/ + struct pci_iomul_func *func; + + struct list_head slots; +}; + +struct pci_iomul_slot { + struct list_head sibling; + + /* + * busnr + * when pcie, the primary busnr of the PCI-PCI bridge on which + * this devices sits. + */ + uint8_t switch_busnr; + + /* device */ + int segment; + uint8_t bus; + uint8_t dev; + + struct pci_iomul_func *func[PCI_NUM_FUNC]; +}; + +static LIST_HEAD(switch_list); +static DEFINE_MUTEX(switch_list_lock); + +/*****************************************************************************/ +struct pci_iomul_switch *__pci_iomul_find_switch_locked(int segment, + uint8_t bus) +{ + struct pci_iomul_switch *sw; + + list_for_each_entry(sw, &switch_list, list) { + if (sw->segment == segment && sw->bus == bus) + return sw; + } + return NULL; +} + +struct pci_iomul_switch *__pci_iomul_find_switch(int segment, uint8_t bus) +{ + struct pci_iomul_switch *found = NULL; + + mutex_lock(&switch_list_lock); + found = __pci_iomul_find_switch_locked(segment, bus); + mutex_unlock(&switch_list_lock); + return found; +} + +struct pci_iomul_switch *pci_iomul_find_switch(int segment, uint8_t bus) +{ + struct pci_iomul_switch *tmp; + struct pci_iomul_switch *found = NULL; + + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + + mutex_lock(&switch_list_lock); + found = __pci_iomul_find_switch_locked(segment, bus); + if (found != NULL) { + mutex_unlock(&switch_list_lock); + kfree(tmp); + return found; + } + if (tmp != NULL) { + tmp->segment = segment; + tmp->bus = bus; + tmp->func = NULL; + mutex_init(&tmp->lock); + INIT_LIST_HEAD(&tmp->slots); + list_add(&tmp->list, &switch_list); + } + mutex_unlock(&switch_list_lock); + return tmp; +} + +struct pci_iomul_slot *pci_iomul_find_slot_locked(struct pci_iomul_switch *sw, + uint8_t busnr, uint8_t dev) +{ + struct pci_iomul_slot *slot; + + BUG_ON(!mutex_is_locked(&sw->lock)); + list_for_each_entry(slot, &sw->slots, sibling) { + if (slot->bus == busnr && slot->dev == dev) + return slot; + } + return NULL; +} + +struct pci_iomul_slot *pci_iomul_find_slot(struct pci_iomul_switch *sw, + uint8_t busnr, uint8_t dev) +{ + struct pci_iomul_slot *found; + + mutex_lock(&sw->lock); + found = pci_iomul_find_slot_locked(sw, busnr, dev); + mutex_unlock(&sw->lock); + return found; +} + +void pci_iomul_find_switch_slot(int segment, uint8_t bus, uint8_t dev, + struct pci_iomul_switch **swp, + struct pci_iomul_slot **slot) +{ + struct pci_iomul_switch *sw; + struct pci_iomul_slot *s; + + *swp = NULL; + *slot = NULL; + + mutex_lock(&switch_list_lock); + list_for_each_entry(sw, &switch_list, list) { + if (sw->segment != segment) + continue; + s = pci_iomul_find_slot(sw, bus, dev); + if (s != NULL) { + *swp = sw; + *slot = s; + break; + } + } + mutex_unlock(&switch_list_lock); +} + +static int __init pci_iomul_slot_init(struct pci_dev *pdev, + struct pci_iomul_slot *slot) +{ + u16 rpcap; + u16 cap; + struct pci_iomul_switch *sw; + + rpcap = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (!rpcap) { + /* pci device isn''t supported */ + printk(KERN_INFO + "PCI: sharing io port of non PCIe device %s " + "isn''t supported. ignoring.\n", + pci_name(pdev)); + return -ENOSYS; + } + + pci_read_config_word(pdev, rpcap + PCI_CAP_FLAGS, &cap); + switch ((cap & PCI_EXP_FLAGS_TYPE) >> 4) { + case PCI_EXP_TYPE_RC_END: + printk(KERN_INFO + "PCI: io port sharing of root complex integrated " + "endpoint %s isn''t supported. ignoring.\n", + pci_name(pdev)); + return -ENOSYS; + case PCI_EXP_TYPE_ENDPOINT: + case PCI_EXP_TYPE_LEG_END: + break; + default: + printk(KERN_INFO + "PCI: io port sharing of non endpoint %s " + "doesn''t make sense. ignoring.\n", + pci_name(pdev)); + return -EINVAL; + } + + slot->segment = pci_domain_nr(pdev->bus); + slot->bus = pdev->bus->number; + slot->dev = PCI_SLOT(pdev->devfn); + slot->switch_busnr = pci_dev_switch_busnr(pdev); + + sw = pci_iomul_find_switch(slot->segment, slot->switch_busnr); + if (sw == NULL) + return -ENOMEM; + + mutex_lock(&sw->lock); + if (pci_iomul_find_slot_locked(sw, slot->bus, slot->dev)) { + mutex_unlock(&sw->lock); + return -EEXIST; + } else { + list_add(&slot->sibling, &sw->slots); + } + mutex_unlock(&sw->lock); + return 0; +} + +static struct pci_iomul_slot *pci_iomul_slot_alloc(struct pci_dev *pdev) +{ + struct pci_iomul_slot *slot; + + slot = kzalloc(sizeof(*slot), GFP_KERNEL); + if (slot == NULL) + return NULL; + + if (pci_iomul_slot_init(pdev, slot) != 0) { + kfree(slot); + return NULL; + } + return slot; +} + +/*****************************************************************************/ +static int __init pci_get_sbd(const char *str, + int *segment__, uint8_t *bus__, uint8_t *dev__) +{ + int segment; + int bus; + int dev; + + if (sscanf(str, "%x:%x:%x", &segment, &bus, &dev) != 3) { + if (sscanf(str, "%x:%x", &bus, &dev) == 2) + segment = 0; + else + return -EINVAL; + } + + if (segment < 0 || INT_MAX <= segment) + return -EINVAL; + if (bus < 0 || PCI_BUS_MAX < bus) + return -EINVAL; + if (dev < 0 || PCI_DEV_MAX < dev) + return -EINVAL; + + *segment__ = segment; + *bus__ = bus; + *dev__ = dev; + return 0; +} + +static char iomul_param[COMMAND_LINE_SIZE]; +#define TOKEN_MAX 10 /* SSSS:BB:DD length is 10 */ +static int pci_is_iomul_dev_param(struct pci_dev *pdev) +{ + int len; + char *p; + char *next_str; + + for (p = &iomul_param[0]; *p != ''\0''; p = next_str + 1) { + next_str = strchr(p, '',''); + if (next_str != NULL) + len = next_str - p; + else + len = strlen(p); + + if (len > 0 && len <= TOKEN_MAX) { + char tmp[TOKEN_MAX+1]; + int seg; + uint8_t bus; + uint8_t dev; + + strncpy(tmp, p, len); + *(tmp + len) = ''\0''; + if (pci_get_sbd(tmp, &seg, &bus, &dev) == 0 && + pci_domain_nr(pdev->bus) == seg && + pdev->bus->number == bus && + PCI_SLOT(pdev->devfn) == dev) + return 1; + } + if (next_str == NULL) + break; + } + + return 0; +} + +/* + * Format: [<segment>:]<bus>:<dev>[,[<segment>:]<bus>:<dev>[,...] + */ +static int __init pci_iomul_param_setup(char *str) +{ + if (strlen(str) >= COMMAND_LINE_SIZE) + return 0; + + /* parse it after pci bus scanning */ + strncpy(iomul_param, str, sizeof(iomul_param)); + return 1; +} +__setup("guestiomuldev=", pci_iomul_param_setup); + +/*****************************************************************************/ +static void pci_iomul_set_bridge_io_window(struct pci_dev *bridge, + uint32_t io_base, uint32_t io_limit) +{ + uint16_t l; + uint32_t upper16; + + io_base >>= 12; + io_base <<= 4; + io_limit >>= 12; + io_limit <<= 4; + l = (io_base & 0xff) | ((io_limit & 0xff) << 8); + upper16 = ((io_base & 0xffff00) >> 8) | + (((io_limit & 0xffff00) >> 8) << 16); + + /* Temporarily disable the I/O range before updating PCI_IO_BASE. */ + pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0x0000ffff); + /* Update lower 16 bits of I/O base/limit. */ + pci_write_config_word(bridge, PCI_IO_BASE, l); + /* Update upper 16 bits of I/O base/limit. */ + pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, upper16); +} + +static void pci_disable_bridge_io_window(struct pci_dev *bridge) +{ + /* set base = 0xffffff limit = 0x0 */ + pci_iomul_set_bridge_io_window(bridge, 0xffffff, 0); +} + +static int __devinit pci_iomul_func_scan(struct pci_dev *pdev, + struct pci_iomul_slot *slot, + uint8_t func) +{ + struct pci_iomul_func *f; + unsigned int i; + + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (f == NULL) + return -ENOMEM; + + f->segment = slot->segment; + f->bus = slot->bus; + f->devfn = PCI_DEVFN(slot->dev, func); + f->io_size = pdev_size_io(pdev); + + for (i = 0; i < PCI_NUM_BARS; i++) { + if (!(pci_resource_flags(pdev, i) & IORESOURCE_IO)) + continue; + if (pci_resource_len(pdev, i) == 0) + continue; + + f->io_bar |= 1 << i; + f->resource[i] = pdev->resource[i]; + } + + if (f->io_bar) + slot->func[func] = f; + else + kfree(f); + return 0; +} + +static void __devinit pci_iomul_fixup_ioresource(struct pci_dev *pdev, + struct pci_iomul_func *func, + int reassign, int dealloc) +{ + uint8_t i; + struct resource *r; + + printk(KERN_INFO "PCI: deallocating io resource[%s]. io size 0x%lx\n", + pci_name(pdev), func->io_size); + for (i = 0; i < PCI_NUM_BARS; i++) { + r = &pdev->resource[i]; + if (!(func->io_bar & (1 << i))) + continue; + + if (reassign) { + r->end -= r->start; + r->start = 0; + pci_update_resource(pdev, i); + func->resource[i] = *r; + } + + if (dealloc) + r->flags = 0; /* don''t allocate this resource */ + } + + /* parent PCI-PCI bridge */ + if (!reassign) + return; + pdev = pdev->bus->self; + if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST) + return; + pci_disable_bridge_io_window(pdev); + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + r = &pdev->resource[i]; + if (!(r->flags & IORESOURCE_IO)) + continue; + + r->end -= r->start; + r->start = 0; + if (i < PCI_BRIDGE_RESOURCES) + pci_update_resource(pdev, i); + } +} + +static void __devinit quirk_iomul_dealloc_ioresource(struct pci_dev *pdev) +{ + struct pci_iomul_switch *sw; + struct pci_iomul_slot *slot; + struct pci_iomul_func *f; + struct pci_iomul_func *__f; + + if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) + return; + if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST) + return; /* PCI Host Bridge isn''t a target device */ + if (!pci_is_iomul_dev_param(pdev)) + return; + + pci_iomul_find_switch_slot(pci_domain_nr(pdev->bus), pdev->bus->number, + PCI_SLOT(pdev->devfn), &sw, &slot); + if (sw == NULL || slot == NULL) { + slot = pci_iomul_slot_alloc(pdev); + if (slot == NULL) + return; + sw = __pci_iomul_find_switch(pci_domain_nr(pdev->bus), + pci_dev_switch_busnr(pdev)); + BUG_ON(sw == NULL); + } + + printk(KERN_INFO "PCI: disable device and release io resource[%s].\n", + pci_name(pdev)); + pci_disable_device(pdev); + + if (pci_iomul_func_scan(pdev, slot, PCI_FUNC(pdev->devfn)) != 0) + return; + + f = slot->func[PCI_FUNC(pdev->devfn)]; + if (f == NULL) + return; + + __f = sw->func; + if (__f == NULL || __f->io_size < f->io_size) { + if (__f != NULL) { + struct pci_bus *__pbus; + struct pci_dev *__pdev; + + __pbus = pci_find_bus(__f->segment, __f->bus); + BUG_ON(__pbus == NULL); + __pdev = pci_get_slot(__pbus, __f->devfn); + BUG_ON(__pdev == NULL); + pci_iomul_fixup_ioresource(__pdev, __f, 0, 1); + pci_dev_put(__pdev); + } + + pci_iomul_fixup_ioresource(pdev, f, 1, 0); + sw->func = f; + } else { + pci_iomul_fixup_ioresource(pdev, f, 1, 1); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, + quirk_iomul_dealloc_ioresource); + + +static void pci_iomul_read_bridge_io(struct pci_iomul_switch *sw) +{ + struct pci_iomul_func *f = sw->func; + + struct pci_bus *pbus; + struct pci_dev *pdev; + struct pci_dev *bridge; + + uint16_t l; + uint16_t base_upper16; + uint16_t limit_upper16; + uint32_t io_base; + uint32_t io_limit; + + pbus = pci_find_bus(f->segment, f->bus); + BUG_ON(pbus == NULL); + + pdev = pci_get_slot(pbus, f->devfn); + BUG_ON(pdev == NULL); + + bridge = pdev->bus->self; + pci_read_config_word(bridge, PCI_IO_BASE, &l); + pci_read_config_word(bridge, PCI_IO_BASE_UPPER16, &base_upper16); + pci_read_config_word(bridge, PCI_IO_LIMIT_UPPER16, &limit_upper16); + + io_base = (l & 0xf0) | ((uint32_t)base_upper16 << 8); + io_base <<= 8; + io_limit = (l >> 8) | ((uint32_t)limit_upper16 << 8); + io_limit <<= 8; + io_limit |= 0xfff; + + sw->io_base = io_base; + sw->io_limit = io_limit; + + pci_dev_put(pdev); + printk(KERN_INFO "PCI: bridge %s base 0x%x limit 0x%x\n", + pci_name(bridge), sw->io_base, sw->io_limit); +} + +static void pci_iomul_setup_brige(struct pci_dev *bridge, + uint32_t io_base, uint32_t io_limit) +{ + uint16_t cmd; + + if ((bridge->class >> 8) == PCI_CLASS_BRIDGE_HOST) + return; + + pci_iomul_set_bridge_io_window(bridge, io_base, io_limit); + + /* and forcibly enables IO */ + pci_read_config_word(bridge, PCI_COMMAND, &cmd); + if (!(cmd & PCI_COMMAND_IO)) { + cmd |= PCI_COMMAND_IO; + printk(KERN_INFO "PCI: Forcibly Enabling IO %s\n", + pci_name(bridge)); + pci_write_config_word(bridge, PCI_COMMAND, cmd); + } +} + +struct __bar { + unsigned long size; + uint8_t bar; +}; + +/* decending order */ +static int pci_iomul_bar_cmp(const void *lhs__, const void *rhs__) +{ + const struct __bar *lhs = (struct __bar*)lhs__; + const struct __bar *rhs = (struct __bar*)rhs__; + return - (lhs->size - rhs->size); +} + +static void pci_iomul_setup_dev(struct pci_dev *pdev, + struct pci_iomul_func *f, + uint32_t io_base) +{ + struct __bar bars[PCI_NUM_BARS]; + int i; + uint8_t num_bars = 0; + struct resource *r; + + printk(KERN_INFO "PCI: Forcibly assign IO %s from 0x%x\n", + pci_name(pdev), io_base); + + for (i = 0; i < PCI_NUM_BARS; i++) { + if (!(f->io_bar & (1 << i))) + continue; + + r = &f->resource[i]; + bars[num_bars].size = pci_iomul_len(r); + bars[num_bars].bar = i; + + num_bars++; + } + + sort(bars, num_bars, sizeof(bars[0]), &pci_iomul_bar_cmp, NULL); + + for (i = 0; i < num_bars; i++) { + struct resource *fr = &f->resource[bars[i].bar]; + r = &pdev->resource[bars[i].bar]; + + BUG_ON(r->start != 0); + r->start += io_base; + r->end += io_base; + r->flags = fr->flags; + + fr->start = r->start; + fr->end = r->end; + + /* allocate the io region */ + pci_update_resource(pdev, bars[i].bar); + + io_base += bars[i].size; + } +} + +static void __devinit pci_iomul_release_io_resource(struct pci_dev *pdev, + struct pci_iomul_func *f) +{ + int i; + for (i = 0; i < PCI_NUM_BARS; i++) { + if (pci_resource_flags(pdev, i) & IORESOURCE_IO && + pdev->resource[i].parent != NULL) { + f->resource[i] = pdev->resource[i]; + release_resource(&pdev->resource[i]); + } + } + + /* parent PCI-PCI bridge */ + pdev = pdev->bus->self; + if ((pdev->class >> 8) != PCI_CLASS_BRIDGE_HOST) { + for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) { + if (pci_resource_flags(pdev, i) & IORESOURCE_IO && + pdev->resource[i].parent != NULL) { + release_resource(&pdev->resource[i]); + } + } + } +} + +static void __devinit quirk_iomul_reassign_ioresource(struct pci_dev *pdev) +{ + struct pci_iomul_switch *sw; + struct pci_iomul_slot *slot; + struct pci_iomul_func *sf; + struct pci_iomul_func *f; + + pci_iomul_find_switch_slot(pci_domain_nr(pdev->bus), pdev->bus->number, + PCI_SLOT(pdev->devfn), &sw, &slot); + if (sw == NULL || slot == NULL) + return; + + if (sw->io_base == 0) + pci_iomul_read_bridge_io(sw); + if (sw->io_base == 0 || sw->io_base > sw->io_limit) + return; + + sf = sw->func; + f = slot->func[PCI_FUNC(pdev->devfn)]; + if (sf == NULL || f == NULL) + /* + * this case can happen when all the specified devices + * don''t have io space + */ + return; + + if ((pci_domain_nr(pdev->bus) != sf->segment || + pdev->bus->number != sf->bus || + PCI_SLOT(pdev->devfn) != PCI_SLOT(sf->devfn)) && + PCI_FUNC(pdev->devfn) == 0) { + pci_iomul_setup_brige(pdev->bus->self, + sw->io_base, sw->io_limit); + } + + BUG_ON(f->io_size > sw->io_limit - sw->io_base + 1); + if (/* f == sf */ + pci_domain_nr(pdev->bus) == sf->segment && + pdev->bus->number == sf->bus && + pdev->devfn == sf->devfn) + pci_iomul_release_io_resource(pdev, f); + else + pci_iomul_setup_dev(pdev, f, sw->io_base); +} + +DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, + quirk_iomul_reassign_ioresource); + +/*****************************************************************************/ +struct pci_iomul_data { + struct mutex lock; + + struct pci_dev *pdev; + struct pci_iomul_switch *sw; + struct pci_iomul_slot *slot; + struct pci_iomul_func *func; +}; + +static int pci_iomul_func_ioport(struct pci_iomul_func *func, + uint8_t bar, uint64_t offset, int *port) +{ + if (!(func->io_bar & (1 << bar))) + return -EINVAL; + + *port = func->resource[bar].start + offset; + if (*port < func->resource[bar].start || + *port > func->resource[bar].end) + return -EINVAL; + + return 0; +} + +static void __pci_iomul_enable_io(struct pci_dev *pdev) +{ + uint16_t cmd; + pci_dev_get(pdev); + pci_read_config_word(pdev, PCI_COMMAND, &cmd); + cmd |= PCI_COMMAND_IO; + pci_write_config_word(pdev, PCI_COMMAND, cmd); +} + +static void __pci_iomul_disable_io(struct pci_dev *pdev) +{ + uint16_t cmd; + pci_read_config_word(pdev, PCI_COMMAND, &cmd); + cmd &= ~PCI_COMMAND_IO; + pci_write_config_word(pdev, PCI_COMMAND, cmd); + pci_dev_put(pdev); +} + +static int pci_iomul_open(struct inode *inode, struct file *filp) +{ + struct pci_iomul_data *iomul; + iomul = kmalloc(sizeof(*iomul), GFP_KERNEL); + if (iomul == NULL) + return -ENOMEM; + + mutex_init(&iomul->lock); + iomul->pdev = NULL; + iomul->sw = NULL; + iomul->func = NULL; + filp->private_data = (void*)iomul; + + return 0; +} + +static int pci_iomul_release(struct inode *inode, struct file *filp) +{ + struct pci_iomul_data *iomul + (struct pci_iomul_data*)filp->private_data; + struct pci_iomul_switch *sw; + + mutex_lock(&iomul->lock); + sw = iomul->sw; + if (iomul->pdev != NULL) { + if (sw != NULL) { + mutex_lock(&sw->lock); + if (sw->current_pdev == iomul->pdev) { + __pci_iomul_disable_io(sw->current_pdev); + sw->current_pdev = NULL; + } + sw->count--; + if (sw->count == 0) { + release_region(sw->io_region->start, sw->io_region->end - sw->io_region->start + 1); + sw->io_region = NULL; + } + mutex_unlock(&sw->lock); + } + pci_dev_put(iomul->pdev); + } + mutex_unlock(&iomul->lock); + + kfree(iomul); + return 0; +} + +static long pci_iomul_setup(struct pci_iomul_data *iomul, + struct pci_iomul_setup __user *arg) +{ + long error = 0; + struct pci_iomul_setup setup; + struct pci_iomul_switch *sw; + struct pci_iomul_slot *slot; + struct pci_bus *pbus; + struct pci_dev *pdev = NULL; + + if (copy_from_user(&setup, arg, sizeof(setup))) + return -EFAULT; + + mutex_lock(&iomul->lock); + if (iomul->sw != NULL) { + error = -EBUSY; + goto out; + } + + pci_iomul_find_switch_slot(setup.segment, setup.bus, setup.dev, + &sw, &slot); + if (sw == NULL || slot == NULL) { + error = -ENODEV; + goto out; + } + + if (slot->func[setup.func] == NULL) { + error = -ENODEV; + goto out; + } + + pbus = pci_find_bus(slot->segment, slot->bus); + if (pbus == NULL) { + error = -ENODEV; + goto out; + } + + pdev = pci_get_slot(pbus, PCI_DEVFN(setup.dev, setup.func)); + if (pdev == NULL) { + error = -ENODEV; + goto out; + } + + mutex_lock(&sw->lock); + if (sw->count == 0) { + BUG_ON(sw->io_region != NULL); + sw->io_region + request_region(sw->io_base, + sw->io_limit - sw->io_base + 1, + "PCI IO multiplexer"); + if (sw->io_region == NULL) { + mutex_unlock(&sw->lock); + error = -EBUSY; + goto out; + } + } + sw->count++; + mutex_unlock(&sw->lock); + + iomul->pdev = pdev; + iomul->sw = sw; + iomul->slot = slot; + iomul->func = slot->func[setup.func]; + +out: + mutex_unlock(&iomul->lock); + if (error != 0 && pdev != NULL) + pci_dev_put(pdev); + return error; +} + +static long pci_iomul_disable_io(struct pci_iomul_data *iomul) +{ + long error = 0; + struct pci_iomul_switch *sw; + struct pci_dev *pdev; + + mutex_lock(&iomul->lock); + sw = iomul->sw; + pdev = iomul->pdev; + if (sw == NULL || pdev == NULL) { + mutex_unlock(&iomul->lock); + return -ENODEV; + } + mutex_lock(&sw->lock); + + if (sw->current_pdev == pdev) { + __pci_iomul_disable_io(pdev); + sw->current_pdev = NULL; + } + + mutex_unlock(&sw->lock); + mutex_unlock(&iomul->lock); + return error; +} + +static void pci_iomul_switch(struct pci_iomul_switch *sw, + struct pci_dev *next_pdev) +{ + if (sw->current_pdev == next_pdev) + /* nothing to do */ + return; + + if (sw->current_pdev != NULL) + __pci_iomul_disable_io(sw->current_pdev); + + __pci_iomul_enable_io(next_pdev); + sw->current_pdev = next_pdev; +} + +static long pci_iomul_in(struct pci_iomul_data *iomul, + struct pci_iomul_in __user *arg) +{ + struct pci_iomul_in in; + struct pci_iomul_switch *sw; + struct pci_iomul_func *func; + + long error = 0; + int port; + uint32_t value = 0; + + if (copy_from_user(&in, arg, sizeof(in))) + return -EFAULT; + + mutex_lock(&iomul->lock); + sw = iomul->sw; + func = iomul->func; + if (sw == NULL || func == NULL) { + mutex_unlock(&iomul->lock); + return -ENODEV; + } + + mutex_lock(&sw->lock); + + error = pci_iomul_func_ioport(func, in.bar, in.offset, &port); + if (error) + goto out; + + pci_iomul_switch(sw, iomul->pdev); + switch (in.size) { + case 4: + value = inl(port); + break; + case 2: + value = inw(port); + break; + case 1: + value = inb(port); + break; + default: + error = -EINVAL; + break; + } + +out: + mutex_unlock(&sw->lock); + mutex_unlock(&iomul->lock); + + if (error == 0 && put_user(value, &arg->value)) + return -EFAULT; + return error; +} + +static long pci_iomul_out(struct pci_iomul_data *iomul, + struct pci_iomul_out __user *arg) +{ + struct pci_iomul_in out; + struct pci_iomul_switch *sw; + struct pci_iomul_func *func; + + long error = 0; + int port; + + if (copy_from_user(&out, arg, sizeof(out))) + return -EFAULT; + + mutex_lock(&iomul->lock); + sw = iomul->sw; + func = iomul->func; + if (sw == NULL || func == NULL) { + mutex_unlock(&iomul->lock); + return -ENODEV; + } + + mutex_lock(&sw->lock); + + error = pci_iomul_func_ioport(func, out.bar, out.offset, &port); + if (error) + goto out; + + pci_iomul_switch(sw, iomul->pdev); + switch (out.size) { + case 4: + outl(out.value, port); + break; + case 2: + outw(out.value, port); + break; + case 1: + outb(out.value, port); + break; + default: + error = -EINVAL; + break; + } + +out: + mutex_unlock(&sw->lock); + mutex_unlock(&iomul->lock); + return error; +} + +static long pci_iomul_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + long error; + struct pci_iomul_data *iomul + (struct pci_iomul_data*)filp->private_data; + + switch (cmd) { + case PCI_IOMUL_SETUP: + error = pci_iomul_setup(iomul, + (struct pci_iomul_setup __user *)arg); + break; + case PCI_IOMUL_DISABLE_IO: + error = pci_iomul_disable_io(iomul); + break; + case PCI_IOMUL_IN: + error = pci_iomul_in(iomul, (struct pci_iomul_in __user *)arg); + break; + case PCI_IOMUL_OUT: + error = pci_iomul_out(iomul, + (struct pci_iomul_out __user *)arg); + break; + default: + error = -ENOSYS; + break; + } + + return error; +} + +static const struct file_operations pci_iomul_fops = { + .owner = THIS_MODULE, + + .open = pci_iomul_open, /* nonseekable_open */ + .release = pci_iomul_release, + + .unlocked_ioctl = pci_iomul_ioctl, +}; + +static struct miscdevice pci_iomul_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "pci_iomul", + .fops = &pci_iomul_fops, +}; + +static int pci_iomul_init(void) +{ + int error; + error = misc_register(&pci_iomul_miscdev); + if (error != 0) { + printk(KERN_ALERT "Couldn''t register /dev/misc/pci_iomul"); + return error; + } + printk("PCI IO multiplexer device installed.\n"); + return 0; +} + +#if 0 +static void pci_iomul_cleanup(void) +{ + misc_deregister(&pci_iomul_miscdev); +} +#endif + +/* + * This must be called after pci fixup final which is called by + * device_initcall(pci_init). + */ +late_initcall(pci_iomul_init); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Isaku Yamahata <yamahata@valinux.co.jp>"); +MODULE_DESCRIPTION("PCI IO space multiplexing driver"); diff --git a/drivers/pci/iomulti.h b/drivers/pci/iomulti.h new file mode 100644 --- /dev/null +++ b/drivers/pci/iomulti.h @@ -0,0 +1,51 @@ +#ifndef PCI_IOMULTI_H +#define PCI_IOMULTI_H +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (c) 2009 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + */ + +struct pci_iomul_setup { + uint16_t segment; + uint8_t bus; + uint8_t dev; + uint8_t func; +}; + +struct pci_iomul_in { + uint8_t bar; + uint64_t offset; + + uint8_t size; + uint32_t value; +}; + +struct pci_iomul_out { + uint8_t bar; + uint64_t offset; + + uint8_t size; + uint32_t value; +}; + +#define PCI_IOMUL_SETUP _IOW (''P'', 0, struct pci_iomul_setup) +#define PCI_IOMUL_DISABLE_IO _IO (''P'', 1) +#define PCI_IOMUL_IN _IOWR(''P'', 2, struct pci_iomul_in) +#define PCI_IOMUL_OUT _IOW (''P'', 3, struct pci_iomul_out) + +#endif /* PCI_IOMULTI_H */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel