Espen Skoglund
2008-Jul-04 16:31 UTC
[Xen-devel] [PATCH 0/7] PCI device register/unregister + pci_dev cleanups
Here''s a set of patches that adds a pci device register/unregister hypercall to Xen. The patchsets add support of SR-IOV, ARI, and hot-pluggable PCI devices. There''s also a bunch of patches that cleans up some of the Xen internal handling of pci_dev structures. The first three patches only deals with cleanups of pci_dev structure handling. 1 - xen: Move pci_dev lists from hvm to arch_domain 2 - xen: Restructure VT-d device scope and PCI bridge handling 3 - xen: Add management and locking of PCI device structures The next two patches add the actual hypercall. 4 - xen: Add hypercall for adding and removing PCI devices 5 - xenlinux: Add hypercall for adding and removing PCI devices The next patch comes in two options. 6a - xenlinux: Option 1: Hook Linux''s PCI probe and remove callbacks 6b - xenlinux: Option 2: Add PCI device add/remove guards to Linux The final patch removes the probing of the PCI bus in the VT-d code. 7 - Remove PCI device enumaration in VT-d code I have not tested the code with AMD machines. I''ve just added dummy callbacks for the AMD IOMMU. With these patchsets the PCI device parsing is pretty much taken completely out of Xen itself. eSk _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Espen Skoglund
2008-Jul-04 16:35 UTC
[Xen-devel] [PATCH 1/7] PCI device register/unregister + pci_dev cleanups
Move pci_dev lists from hvm to arch_domain Move the pci_dev list from hvm to arch_domain since PCI devs are no longer hvm specific. Also removed locking for pci_dev lists. Will reintroduce them later. Signed-off-by: Espen Skoglund <espen.skoglund@netronome.com> -- arch/x86/domain.c | 2 ++ arch/x86/hvm/hvm.c | 2 +- arch/x86/hvm/svm/svm.c | 2 +- arch/x86/hvm/vmx/vmcs.c | 3 +-- arch/x86/hvm/vmx/vmx.c | 2 +- arch/x86/mm/shadow/multi.c | 3 +-- drivers/passthrough/amd/pci_amd_iommu.c | 17 ++++------------- drivers/passthrough/iommu.c | 9 +++------ drivers/passthrough/vtd/dmar.h | 4 ---- drivers/passthrough/vtd/iommu.c | 17 ++++------------- include/asm-x86/domain.h | 4 ++++ include/asm-x86/hvm/svm/amd-iommu-proto.h | 4 ---- include/xen/hvm/iommu.h | 5 ----- include/xen/pci.h | 7 ++++++- 14 files changed, 28 insertions(+), 53 deletions(-) -- diff -r d826a1479fec xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Mon Jun 30 19:52:08 2008 +0100 +++ b/xen/arch/x86/domain.c Tue Jul 01 17:10:44 2008 +0100 @@ -349,6 +349,8 @@ is_hvm_domain(d) && hvm_funcs.hap_supported && (domcr_flags & DOMCRF_hap); + + INIT_LIST_HEAD(&d->arch.pdev_list); d->arch.relmem = RELMEM_not_started; INIT_LIST_HEAD(&d->arch.relmem_list); diff -r d826a1479fec xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Mon Jun 30 19:52:08 2008 +0100 +++ b/xen/arch/x86/hvm/hvm.c Tue Jul 01 17:10:44 2008 +0100 @@ -903,7 +903,7 @@ } } - if ( !list_empty(&domain_hvm_iommu(v->domain)->pdev_list) ) + if ( has_arch_pdevs(v->domain) ) { if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) ) { diff -r d826a1479fec xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Mon Jun 30 19:52:08 2008 +0100 +++ b/xen/arch/x86/hvm/svm/svm.c Tue Jul 01 17:10:44 2008 +0100 @@ -1132,7 +1132,7 @@ static void svm_wbinvd_intercept(void) { - if ( !list_empty(&(domain_hvm_iommu(current->domain)->pdev_list)) ) + if ( has_arch_pdevs(current->domain) ) on_each_cpu(wbinvd_ipi, NULL, 1, 1); } diff -r d826a1479fec xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Mon Jun 30 19:52:08 2008 +0100 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Tue Jul 01 17:10:44 2008 +0100 @@ -849,8 +849,7 @@ * there is no wbinvd exit, or * 2: execute wbinvd on all dirty pCPUs when guest wbinvd exits. */ - if ( !list_empty(&(domain_hvm_iommu(v->domain)->pdev_list)) && - !cpu_has_wbinvd_exiting ) + if ( has_arch_pdevs(v->domain) && !cpu_has_wbinvd_exiting ) { int cpu = v->arch.hvm_vmx.active_cpu; if ( cpu != -1 ) diff -r d826a1479fec xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Mon Jun 30 19:52:08 2008 +0100 +++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Jul 01 17:10:44 2008 +0100 @@ -1926,7 +1926,7 @@ static void vmx_wbinvd_intercept(void) { - if ( list_empty(&(domain_hvm_iommu(current->domain)->pdev_list)) ) + if ( !has_arch_pdevs(current->domain) ) return; if ( cpu_has_wbinvd_exiting ) diff -r d826a1479fec xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Mon Jun 30 19:52:08 2008 +0100 +++ b/xen/arch/x86/mm/shadow/multi.c Tue Jul 01 17:10:44 2008 +0100 @@ -840,8 +840,7 @@ * For HVM domains with direct access to MMIO areas, set the correct * caching attributes in the shadows to match what was asked for. */ - if ( (level == 1) && is_hvm_domain(d) && - !list_empty(&(domain_hvm_iommu(d)->pdev_list)) && + if ( (level == 1) && is_hvm_domain(d) && has_arch_pdevs(d) && !is_xen_heap_mfn(mfn_x(target_mfn)) ) { unsigned int type; diff -r d826a1479fec xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Mon Jun 30 19:52:08 2008 +0100 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Jul 01 17:10:44 2008 +0100 @@ -292,7 +292,6 @@ static void amd_iommu_setup_dom0_devices(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); struct amd_iommu *iommu; struct pci_dev *pdev; int bus, dev, func; @@ -314,7 +313,7 @@ pdev = xmalloc(struct pci_dev); pdev->bus = bus; pdev->devfn = PCI_DEVFN(dev, func); - list_add_tail(&pdev->list, &hd->pdev_list); + list_add_tail(&pdev->domain_list, &d->arch.pdev_list); bdf = (bus << 8) | pdev->devfn; /* supported device? */ @@ -490,12 +489,9 @@ static int reassign_device( struct domain *source, struct domain *target, u8 bus, u8 devfn) { - struct hvm_iommu *source_hd = domain_hvm_iommu(source); - struct hvm_iommu *target_hd = domain_hvm_iommu(target); struct pci_dev *pdev; struct amd_iommu *iommu; int bdf; - unsigned long flags; for_each_pdev ( source, pdev ) { @@ -520,11 +516,7 @@ amd_iommu_disable_domain_device(source, iommu, bdf); /* Move pci device from the source domain to target domain. */ - spin_lock_irqsave(&source_hd->iommu_list_lock, flags); - spin_lock_irqsave(&target_hd->iommu_list_lock, flags); - list_move(&pdev->list, &target_hd->pdev_list); - spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags); - spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags); + list_move(&pdev->domain_list, &target->arch.pdev_list); amd_iommu_setup_domain_device(target, iommu, bdf); amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n", @@ -559,12 +551,11 @@ static void release_domain_devices(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); struct pci_dev *pdev; - while ( !list_empty(&hd->pdev_list) ) + while ( has_arch_pdevs(d) ) { - pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list); + pdev = list_entry(d->arch.pdev_list.next, typeof(*pdev), domain_list); pdev_flr(pdev->bus, pdev->devfn); amd_iov_info("release domain %d devices %x:%x.%x\n", d->domain_id, pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); diff -r d826a1479fec xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Mon Jun 30 19:52:08 2008 +0100 +++ b/xen/drivers/passthrough/iommu.c Tue Jul 01 17:10:44 2008 +0100 @@ -35,8 +35,6 @@ struct hvm_iommu *hd = domain_hvm_iommu(domain); spin_lock_init(&hd->mapping_lock); - spin_lock_init(&hd->iommu_list_lock); - INIT_LIST_HEAD(&hd->pdev_list); INIT_LIST_HEAD(&hd->g2m_ioport_list); if ( !iommu_enabled ) @@ -68,7 +66,7 @@ if ( (rc = hd->platform_ops->assign_device(d, bus, devfn)) ) return rc; - if ( has_iommu_pdevs(d) && !is_hvm_domain(d) && !need_iommu(d) ) + if ( has_arch_pdevs(d) && !is_hvm_domain(d) && !need_iommu(d) ) { d->need_iommu = 1; return iommu_populate_page_table(d); @@ -190,7 +188,7 @@ hd->platform_ops->reassign_device(d, dom0, bus, devfn); - if ( !has_iommu_pdevs(d) && need_iommu(d) ) + if ( !has_arch_pdevs(d) && need_iommu(d) ) { d->need_iommu = 0; hd->platform_ops->teardown(d); @@ -242,8 +240,7 @@ group_id = ops->get_device_group_id(bus, devfn); - list_for_each_entry(pdev, - &(dom0->arch.hvm_domain.hvm_iommu.pdev_list), list) + for_each_pdev( d, pdev ) { if ( (pdev->bus == bus) && (pdev->devfn == devfn) ) continue; diff -r d826a1479fec xen/drivers/passthrough/vtd/dmar.h --- a/xen/drivers/passthrough/vtd/dmar.h Mon Jun 30 19:52:08 2008 +0100 +++ b/xen/drivers/passthrough/vtd/dmar.h Tue Jul 01 17:10:44 2008 +0100 @@ -70,10 +70,6 @@ list_for_each_entry(iommu, \ &(domain->arch.hvm_domain.hvm_iommu.iommu_list), list) -#define for_each_pdev(domain, pdev) \ - list_for_each_entry(pdev, \ - &(domain->arch.hvm_domain.hvm_iommu.pdev_list), list) - #define for_each_drhd_unit(drhd) \ list_for_each_entry(drhd, &acpi_drhd_units, list) #define for_each_rmrr_device(rmrr, pdev) \ diff -r d826a1479fec xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Mon Jun 30 19:52:08 2008 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.c Tue Jul 01 17:10:44 2008 +0100 @@ -1023,8 +1023,6 @@ u64 i; struct acpi_drhd_unit *drhd; - INIT_LIST_HEAD(&hd->pdev_list); - drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list); iommu = drhd->iommu; @@ -1366,12 +1364,10 @@ u8 bus, u8 devfn) { struct hvm_iommu *source_hd = domain_hvm_iommu(source); - struct hvm_iommu *target_hd = domain_hvm_iommu(target); struct pci_dev *pdev, *pdev2; struct acpi_drhd_unit *drhd; struct iommu *iommu; int status; - unsigned long flags; int found = 0; pdev_flr(bus, devfn); @@ -1388,11 +1384,7 @@ domain_context_unmap(iommu, pdev); /* Move pci device from the source domain to target domain. */ - spin_lock_irqsave(&source_hd->iommu_list_lock, flags); - spin_lock_irqsave(&target_hd->iommu_list_lock, flags); - list_move(&pdev->list, &target_hd->pdev_list); - spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags); - spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags); + list_move(&pdev->domain_list, &target->arch.pdev_list); for_each_pdev ( source, pdev2 ) { @@ -1413,12 +1405,11 @@ void return_devices_to_dom0(struct domain *d) { - struct hvm_iommu *hd = domain_hvm_iommu(d); struct pci_dev *pdev; - while ( !list_empty(&hd->pdev_list) ) + while ( has_arch_pdevs(d) ) { - pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list); + pdev = list_entry(d->arch.pdev_list.next, typeof(*pdev), domain_list); pci_cleanup_msi(pdev->bus, pdev->devfn); reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn); } @@ -1631,7 +1622,7 @@ pdev = xmalloc(struct pci_dev); pdev->bus = bus; pdev->devfn = PCI_DEVFN(dev, func); - list_add_tail(&pdev->list, &hd->pdev_list); + list_add_tail(&pdev->domain_list, &d->arch.pdev_list); drhd = acpi_find_matched_drhd_unit(pdev); ret = domain_context_mapping(d, drhd->iommu, pdev); diff -r d826a1479fec xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Mon Jun 30 19:52:08 2008 +0100 +++ b/xen/include/asm-x86/domain.h Tue Jul 01 17:10:44 2008 +0100 @@ -228,6 +228,7 @@ struct rangeset *ioport_caps; uint32_t pci_cf8; + struct list_head pdev_list; struct hvm_domain hvm_domain; struct paging_domain paging; @@ -265,6 +266,9 @@ cpuid_input_t cpuids[MAX_CPUID_INPUT]; } __cacheline_aligned; + +#define has_arch_pdevs(d) (!list_empty(&(d)->arch.pdev_list)) + #ifdef __i386__ struct pae_l3_cache { diff -r d826a1479fec xen/include/asm-x86/hvm/svm/amd-iommu-proto.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Mon Jun 30 19:52:08 2008 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Tue Jul 01 17:10:44 2008 +0100 @@ -27,10 +27,6 @@ #define for_each_amd_iommu(amd_iommu) \ list_for_each_entry(amd_iommu, \ &amd_iommu_head, list) - -#define for_each_pdev(domain, pdev) \ - list_for_each_entry(pdev, \ - &(domain->arch.hvm_domain.hvm_iommu.pdev_list), list) #define DMA_32BIT_MASK 0x00000000ffffffffULL #define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK) diff -r d826a1479fec xen/include/xen/hvm/iommu.h --- a/xen/include/xen/hvm/iommu.h Mon Jun 30 19:52:08 2008 +0100 +++ b/xen/include/xen/hvm/iommu.h Tue Jul 01 17:10:44 2008 +0100 @@ -36,8 +36,6 @@ }; struct hvm_iommu { - spinlock_t iommu_list_lock; /* protect iommu specific lists */ - struct list_head pdev_list; /* direct accessed pci devices */ u64 pgd_maddr; /* io page directory machine address */ spinlock_t mapping_lock; /* io page table lock */ int agaw; /* adjusted guest address width, 0 is level 2 30-bit */ @@ -55,7 +53,4 @@ struct iommu_ops *platform_ops; }; -#define has_iommu_pdevs(domain) \ - (!list_empty(&(domain->arch.hvm_domain.hvm_iommu.pdev_list))) - #endif /* __ASM_X86_HVM_IOMMU_H__ */ diff -r d826a1479fec xen/include/xen/pci.h --- a/xen/include/xen/pci.h Mon Jun 30 19:52:08 2008 +0100 +++ b/xen/include/xen/pci.h Tue Jul 01 17:10:44 2008 +0100 @@ -25,7 +25,7 @@ #define PCI_FUNC(devfn) ((devfn) & 0x07) struct pci_dev { - struct list_head list; + struct list_head domain_list; struct list_head msi_dev_list; u8 bus; u8 devfn; @@ -50,4 +50,9 @@ int pci_find_cap_offset(u8 bus, u8 dev, u8 func, u8 cap); int pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap); + +#define for_each_pdev(domain, pdev) \ + list_for_each_entry(pdev, &(domain->arch.pdev_list), domain_list) + + #endif /* __XEN_PCI_H__ */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Espen Skoglund
2008-Jul-04 16:35 UTC
[Xen-devel] [PATCH 2/7] PCI device register/unregister + pci_dev cleanups
Restructure VT-d device scope and PCI bridge handling Create a bitmap for each device scope indicating which buses are covered by the scope. Upon mapping PCI-PCI bridges we now detect whether we have a bridge to a non-PCIe bus. If so, all devices mapped on that bus are squashed to the requester-id of the bridge. Bridges to PCIe busses are ignored. The requester-id squashing also determines the iommu device group id for the device. Signed-off-by: Espen Skoglund <espen.skoglund@netronome.com> -- drivers/passthrough/vtd/dmar.c | 338 +++++++++++++------------------------ drivers/passthrough/vtd/dmar.h | 40 ++-- drivers/passthrough/vtd/intremap.c | 4 drivers/passthrough/vtd/iommu.c | 319 ++++++++++++++++------------------ drivers/passthrough/vtd/utils.c | 8 include/xen/pci.h | 19 +- 6 files changed, 312 insertions(+), 416 deletions(-) -- diff -r b03d6bcc0178 xen/drivers/passthrough/vtd/dmar.c --- a/xen/drivers/passthrough/vtd/dmar.c Fri Jul 04 16:12:44 2008 +0100 +++ b/xen/drivers/passthrough/vtd/dmar.c Fri Jul 04 16:27:43 2008 +0100 @@ -44,6 +44,26 @@ LIST_HEAD(acpi_atsr_units); u8 dmar_host_address_width; + +void dmar_scope_add_buses(struct dmar_scope *scope, u16 sec_bus, u16 sub_bus) +{ + sub_bus &= 0xff; + if (sec_bus > sub_bus) + return; + + while ( sec_bus <= sub_bus ) + set_bit(sec_bus++, scope->buses); +} + +void dmar_scope_remove_buses(struct dmar_scope *scope, u16 sec_bus, u16 sub_bus) +{ + sub_bus &= 0xff; + if (sec_bus > sub_bus) + return; + + while ( sec_bus <= sub_bus ) + clear_bit(sec_bus++, scope->buses); +} static int __init acpi_register_drhd_unit(struct acpi_drhd_unit *drhd) { @@ -94,21 +114,6 @@ return NULL; } -static int acpi_pci_device_match(struct pci_dev *devices, int cnt, - struct pci_dev *dev) -{ - int i; - - for ( i = 0; i < cnt; i++ ) - { - if ( (dev->bus == devices->bus) && - (dev->devfn == devices->devfn) ) - return 1; - devices++; - } - return 0; -} - static int __init acpi_register_atsr_unit(struct acpi_atsr_unit *atsr) { /* @@ -122,39 +127,36 @@ return 0; } -struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *dev) +struct acpi_drhd_unit * acpi_find_matched_drhd_unit(u8 bus, u8 devfn) { struct acpi_drhd_unit *drhd; - struct acpi_drhd_unit *include_all_drhd; + struct acpi_drhd_unit *found = NULL, *include_all = NULL; + int i; - include_all_drhd = NULL; list_for_each_entry ( drhd, &acpi_drhd_units, list ) { + for (i = 0; i < drhd->scope.devices_cnt; i++) + if ( drhd->scope.devices[i] == PCI_BDF2(bus, devfn) ) + return drhd; + + if ( test_bit(bus, drhd->scope.buses) ) + found = drhd; + if ( drhd->include_all ) - { - include_all_drhd = drhd; - continue; - } - - if ( acpi_pci_device_match(drhd->devices, - drhd->devices_cnt, dev) ) - return drhd; + include_all = drhd; } - if ( include_all_drhd ) - return include_all_drhd; - - return NULL; + return found ? found : include_all; } +/* + * Count number of devices in device scope. Do not include PCI sub + * hierarchies. + */ static int scope_device_count(void *start, void *end) { struct acpi_dev_scope *scope; - u16 bus, sub_bus, sec_bus; - struct acpi_pci_path *path; - int depth, count = 0; - u8 dev, func; - u32 l; + int count = 0; while ( start < end ) { @@ -162,73 +164,14 @@ if ( (scope->length < MIN_SCOPE_LEN) || (scope->dev_type >= ACPI_DEV_ENTRY_COUNT) ) { - dprintk(XENLOG_WARNING VTDPREFIX, "Invalid device scope\n"); + dprintk(XENLOG_WARNING VTDPREFIX, "Invalid device scope.\n"); return -EINVAL; } - path = (struct acpi_pci_path *)(scope + 1); - bus = scope->start_bus; - depth = (scope->length - sizeof(struct acpi_dev_scope)) - / sizeof(struct acpi_pci_path); - while ( --depth > 0 ) - { - bus = pci_conf_read8( - bus, path->dev, path->fn, PCI_SECONDARY_BUS); - path++; - } - - if ( scope->dev_type == ACPI_DEV_ENDPOINT ) - { - dprintk(XENLOG_INFO VTDPREFIX, - "found endpoint: bdf = %x:%x:%x\n", - bus, path->dev, path->fn); + if ( scope->dev_type == ACPI_DEV_ENDPOINT || + scope->dev_type == ACPI_DEV_IOAPIC || + scope->dev_type == ACPI_DEV_MSI_HPET ) count++; - } - else if ( scope->dev_type == ACPI_DEV_P2PBRIDGE ) - { - dprintk(XENLOG_INFO VTDPREFIX, - "found bridge: bdf = %x:%x:%x\n", - bus, path->dev, path->fn); - sec_bus = pci_conf_read8( - bus, path->dev, path->fn, PCI_SECONDARY_BUS); - sub_bus = pci_conf_read8( - bus, path->dev, path->fn, PCI_SUBORDINATE_BUS); - - while ( sec_bus <= sub_bus ) - { - for ( dev = 0; dev < 32; dev++ ) - { - for ( func = 0; func < 8; func++ ) - { - l = pci_conf_read32( - sec_bus, dev, func, PCI_VENDOR_ID); - - /* some broken boards return 0 or - * ~0 if a slot is empty - */ - if ( l == 0xffffffff || l == 0x00000000 || - l == 0x0000ffff || l == 0xffff0000 ) - break; - count++; - } - } - sec_bus++; - } - } - else if ( scope->dev_type == ACPI_DEV_IOAPIC ) - { - dprintk(XENLOG_INFO VTDPREFIX, - "found IOAPIC: bdf = %x:%x:%x\n", - bus, path->dev, path->fn); - count++; - } - else - { - dprintk(XENLOG_INFO VTDPREFIX, - "found MSI HPET: bdf = %x:%x:%x\n", - bus, path->dev, path->fn); - count++; - } start += scope->length; } @@ -236,132 +179,96 @@ return count; } -static int __init acpi_parse_dev_scope( - void *start, void *end, void *acpi_entry, int type) + +static int __init acpi_parse_dev_scope(void *start, void *end, + void *acpi_entry, int type) { - struct acpi_dev_scope *scope; + struct dmar_scope *scope = acpi_entry; + struct acpi_ioapic_unit *acpi_ioapic_unit; + struct acpi_dev_scope *acpi_scope; u16 bus, sub_bus, sec_bus; struct acpi_pci_path *path; - struct acpi_ioapic_unit *acpi_ioapic_unit = NULL; - int depth; - struct pci_dev *pdev; - u8 dev, func; - u32 l; + int depth, cnt, didx = 0; - int *cnt = NULL; - struct pci_dev **devices = NULL; - struct acpi_drhd_unit *dmaru = (struct acpi_drhd_unit *) acpi_entry; - struct acpi_rmrr_unit *rmrru = (struct acpi_rmrr_unit *) acpi_entry; - struct acpi_atsr_unit *atsru = (struct acpi_atsr_unit *) acpi_entry; + if ( (cnt = scope_device_count(start, end)) < 0 ) + return cnt; - switch (type) { - case DMAR_TYPE: - cnt = &(dmaru->devices_cnt); - devices = &(dmaru->devices); - break; - case RMRR_TYPE: - cnt = &(rmrru->devices_cnt); - devices = &(rmrru->devices); - break; - case ATSR_TYPE: - cnt = &(atsru->devices_cnt); - devices = &(atsru->devices); - break; - default: - dprintk(XENLOG_ERR VTDPREFIX, "invalid vt-d acpi entry type\n"); + scope->devices_cnt = cnt; + if ( cnt > 0 ) + { + scope->devices = xmalloc_array(u16, cnt); + if ( !scope->devices ) + return -ENOMEM; + memset(scope->devices, 0, sizeof(u16) * cnt); } - *cnt = scope_device_count(start, end); - if ( *cnt == 0 ) - { - dprintk(XENLOG_INFO VTDPREFIX, "acpi_parse_dev_scope: no device\n"); - return 0; - } - - *devices = xmalloc_array(struct pci_dev, *cnt); - if ( !*devices ) - return -ENOMEM; - memset(*devices, 0, sizeof(struct pci_dev) * (*cnt)); - - pdev = *devices; while ( start < end ) { - scope = start; - path = (struct acpi_pci_path *)(scope + 1); - depth = (scope->length - sizeof(struct acpi_dev_scope)) + acpi_scope = start; + path = (struct acpi_pci_path *)(acpi_scope + 1); + depth = (acpi_scope->length - sizeof(struct acpi_dev_scope)) / sizeof(struct acpi_pci_path); - bus = scope->start_bus; + bus = acpi_scope->start_bus; while ( --depth > 0 ) { - bus = pci_conf_read8( - bus, path->dev, path->fn, PCI_SECONDARY_BUS); + bus = pci_conf_read8(bus, path->dev, path->fn, PCI_SECONDARY_BUS); path++; } + + switch ( acpi_scope->dev_type ) + { + case ACPI_DEV_P2PBRIDGE: + { + sec_bus = pci_conf_read8( + bus, path->dev, path->fn, PCI_SECONDARY_BUS); + sub_bus = pci_conf_read8( + bus, path->dev, path->fn, PCI_SUBORDINATE_BUS); + dprintk(XENLOG_INFO VTDPREFIX, + "found bridge: bdf = %x:%x.%x sec = %x sub = %x\n", + bus, path->dev, path->fn, sec_bus, sub_bus); - if ( scope->dev_type == ACPI_DEV_ENDPOINT ) + dmar_scope_add_buses(scope, sec_bus, sub_bus); + break; + } + + case ACPI_DEV_MSI_HPET: + dprintk(XENLOG_INFO VTDPREFIX, "found MSI HPET: bdf = %x:%x.%x\n", + bus, path->dev, path->fn); + scope->devices[didx++] = PCI_BDF(bus, path->dev, path->fn); + break; + + case ACPI_DEV_ENDPOINT: + dprintk(XENLOG_INFO VTDPREFIX, "found endpoint: bdf = %x:%x.%x\n", + bus, path->dev, path->fn); + scope->devices[didx++] = PCI_BDF(bus, path->dev, path->fn); + break; + + case ACPI_DEV_IOAPIC: { - dprintk(XENLOG_INFO VTDPREFIX, - "found endpoint: bdf = %x:%x:%x\n", + dprintk(XENLOG_INFO VTDPREFIX, "found IOAPIC: bdf = %x:%x.%x\n", bus, path->dev, path->fn); - pdev->bus = bus; - pdev->devfn = PCI_DEVFN(path->dev, path->fn); - pdev++; + + if ( type == DMAR_TYPE ) + { + struct acpi_drhd_unit *drhd = acpi_entry; + acpi_ioapic_unit = xmalloc(struct acpi_ioapic_unit); + if ( !acpi_ioapic_unit ) + return -ENOMEM; + acpi_ioapic_unit->apic_id = acpi_scope->enum_id; + acpi_ioapic_unit->ioapic.bdf.bus = bus; + acpi_ioapic_unit->ioapic.bdf.dev = path->dev; + acpi_ioapic_unit->ioapic.bdf.func = path->fn; + list_add(&acpi_ioapic_unit->list, &drhd->ioapic_list); + } + + scope->devices[didx++] = PCI_BDF(bus, path->dev, path->fn); + break; } - else if ( scope->dev_type == ACPI_DEV_P2PBRIDGE ) - { - dprintk(XENLOG_INFO VTDPREFIX, - "found bridge: bus = %x dev = %x func = %x\n", - bus, path->dev, path->fn); - sec_bus = pci_conf_read8( - bus, path->dev, path->fn, PCI_SECONDARY_BUS); - sub_bus = pci_conf_read8( - bus, path->dev, path->fn, PCI_SUBORDINATE_BUS); + } - while ( sec_bus <= sub_bus ) - { - for ( dev = 0; dev < 32; dev++ ) - { - for ( func = 0; func < 8; func++ ) - { - l = pci_conf_read32( - sec_bus, dev, func, PCI_VENDOR_ID); - - /* some broken boards return 0 or - * ~0 if a slot is empty - */ - if ( l == 0xffffffff || l == 0x00000000 || - l == 0x0000ffff || l == 0xffff0000 ) - break; - - pdev->bus = sec_bus; - pdev->devfn = PCI_DEVFN(dev, func); - pdev++; - } - } - sec_bus++; - } - } - else if ( scope->dev_type == ACPI_DEV_IOAPIC ) - { - acpi_ioapic_unit = xmalloc(struct acpi_ioapic_unit); - if ( !acpi_ioapic_unit ) - return -ENOMEM; - acpi_ioapic_unit->apic_id = scope->enum_id; - acpi_ioapic_unit->ioapic.bdf.bus = bus; - acpi_ioapic_unit->ioapic.bdf.dev = path->dev; - acpi_ioapic_unit->ioapic.bdf.func = path->fn; - list_add(&acpi_ioapic_unit->list, &dmaru->ioapic_list); - dprintk(XENLOG_INFO VTDPREFIX, - "found IOAPIC: bus = %x dev = %x func = %x\n", - bus, path->dev, path->fn); - } - else - dprintk(XENLOG_INFO VTDPREFIX, - "found MSI HPET: bus = %x dev = %x func = %x\n", - bus, path->dev, path->fn); - start += scope->length; - } + start += acpi_scope->length; + } return 0; } @@ -370,10 +277,17 @@ acpi_parse_one_drhd(struct acpi_dmar_entry_header *header) { struct acpi_table_drhd * drhd = (struct acpi_table_drhd *)header; + void *dev_scope_start, *dev_scope_end; struct acpi_drhd_unit *dmaru; int ret = 0; - static int include_all; - void *dev_scope_start, *dev_scope_end; + static int include_all = 0; + + if ( include_all ) + { + dprintk(XENLOG_WARNING VTDPREFIX, + "DMAR unit with INCLUDE_ALL is not not the last unit.\n"); + return -EINVAL; + } dmaru = xmalloc(struct acpi_drhd_unit); if ( !dmaru ) @@ -387,20 +301,13 @@ dmaru->address); dev_scope_start = (void *)(drhd + 1); - dev_scope_end = ((void *)drhd) + header->length; + dev_scope_end = ((void *)drhd) + header->length; ret = acpi_parse_dev_scope(dev_scope_start, dev_scope_end, dmaru, DMAR_TYPE); if ( dmaru->include_all ) { dprintk(XENLOG_INFO VTDPREFIX, "found INCLUDE_ALL\n"); - /* Only allow one INCLUDE_ALL */ - if ( include_all ) - { - dprintk(XENLOG_WARNING VTDPREFIX, - "Only one INCLUDE_ALL device scope is allowed\n"); - ret = -EINVAL; - } include_all = 1; } @@ -430,7 +337,8 @@ dev_scope_end = ((void *)rmrr) + header->length; ret = acpi_parse_dev_scope(dev_scope_start, dev_scope_end, rmrru, RMRR_TYPE); - if ( ret || (rmrru->devices_cnt == 0) ) + + if ( ret || (rmrru->scope.devices_cnt == 0) ) xfree(rmrru); else acpi_register_rmrr_unit(rmrru); diff -r b03d6bcc0178 xen/drivers/passthrough/vtd/dmar.h --- a/xen/drivers/passthrough/vtd/dmar.h Fri Jul 04 16:12:44 2008 +0100 +++ b/xen/drivers/passthrough/vtd/dmar.h Fri Jul 04 16:27:43 2008 +0100 @@ -40,48 +40,48 @@ }ioapic; }; +struct dmar_scope { + DECLARE_BITMAP(buses, 256); /* buses owned by this unit */ + u16 *devices; /* devices owned by this unit */ + int devices_cnt; +}; + struct acpi_drhd_unit { + struct dmar_scope scope; /* must be first member of struct */ struct list_head list; - u64 address; /* register base address of the unit */ - struct pci_dev *devices; /* target devices */ - int devices_cnt; + u64 address; /* register base address of the unit */ u8 include_all:1; struct iommu *iommu; struct list_head ioapic_list; }; struct acpi_rmrr_unit { + struct dmar_scope scope; /* must be first member of struct */ struct list_head list; u64 base_address; u64 end_address; - struct pci_dev *devices; /* target devices */ - int devices_cnt; u8 allow_all:1; }; struct acpi_atsr_unit { + struct dmar_scope scope; /* must be first member of struct */ struct list_head list; - struct pci_dev *devices; /* target devices */ - int devices_cnt; u8 all_ports:1; }; -#define for_each_iommu(domain, iommu) \ - list_for_each_entry(iommu, \ - &(domain->arch.hvm_domain.hvm_iommu.iommu_list), list) #define for_each_drhd_unit(drhd) \ list_for_each_entry(drhd, &acpi_drhd_units, list) -#define for_each_rmrr_device(rmrr, pdev) \ - list_for_each_entry(rmrr, &acpi_rmrr_units, list) { \ - int _i; \ - for (_i = 0; _i < rmrr->devices_cnt; _i++) { \ - pdev = &(rmrr->devices[_i]); -#define end_for_each_rmrr_device(rmrr, pdev) \ - } \ - } -struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *dev); +#define for_each_rmrr_device(rmrr, bdf, idx) \ + list_for_each_entry(rmrr, &acpi_rmrr_units, list) \ + /* assume there never is a bdf == 0 */ \ + for (idx = 0; (bdf = rmrr->scope.devices[i]) && \ + idx < rmrr->scope.devices_cnt; idx++) + +struct acpi_drhd_unit * acpi_find_matched_drhd_unit(u8 bus, u8 devfn); +void dmar_scope_add_buses(struct dmar_scope *scope, u16 sec, u16 sub); +void dmar_scope_remove_buses(struct dmar_scope *scope, u16 sec, u16 sub); #define DMAR_TYPE 1 #define RMRR_TYPE 2 @@ -91,6 +91,6 @@ int vtd_hw_check(void); void disable_pmr(struct iommu *iommu); -int is_usb_device(struct pci_dev *pdev); +int is_usb_device(u8 bus, u8 devfn); #endif /* _DMAR_H_ */ diff -r b03d6bcc0178 xen/drivers/passthrough/vtd/intremap.c --- a/xen/drivers/passthrough/vtd/intremap.c Fri Jul 04 16:12:44 2008 +0100 +++ b/xen/drivers/passthrough/vtd/intremap.c Fri Jul 04 16:27:43 2008 +0100 @@ -394,7 +394,7 @@ struct iommu *iommu = NULL; struct ir_ctrl *ir_ctrl; - drhd = acpi_find_matched_drhd_unit(pdev); + drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn); iommu = drhd->iommu; ir_ctrl = iommu_ir_ctrl(iommu); @@ -412,7 +412,7 @@ struct iommu *iommu = NULL; struct ir_ctrl *ir_ctrl; - drhd = acpi_find_matched_drhd_unit(msi_desc->dev); + drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn); iommu = drhd->iommu; ir_ctrl = iommu_ir_ctrl(iommu); diff -r b03d6bcc0178 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Fri Jul 04 16:12:44 2008 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.c Fri Jul 04 16:27:43 2008 +0100 @@ -1089,8 +1089,8 @@ if ( ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) ) context_set_translation_type(*context, CONTEXT_TT_PASS_THRU); else +#endif { -#endif /* Ensure we have pagetables allocated down to leaf PTE. */ if ( hd->pgd_maddr == 0 ) { @@ -1119,9 +1119,7 @@ context_set_address_root(*context, pgd_maddr); context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); -#ifdef CONTEXT_PASSTHRU } -#endif /* * domain_id 0 is not valid on Intel''s IOMMU, force domain_id to @@ -1150,115 +1148,128 @@ #define PCI_BASE_CLASS_BRIDGE 0x06 #define PCI_CLASS_BRIDGE_PCI 0x0604 -#define DEV_TYPE_PCIe_ENDPOINT 1 -#define DEV_TYPE_PCI_BRIDGE 2 -#define DEV_TYPE_PCI 3 +enum { + DEV_TYPE_PCIe_ENDPOINT, + DEV_TYPE_PCIe_BRIDGE, + DEV_TYPE_PCI_BRIDGE, + DEV_TYPE_PCI, +}; -int pdev_type(struct pci_dev *dev) +int pdev_type(u8 bus, u8 devfn) { u16 class_device; - u16 status; + u16 status, creg; + int pos; + u8 d = PCI_SLOT(devfn), f = PCI_FUNC(devfn); - class_device = pci_conf_read16(dev->bus, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), PCI_CLASS_DEVICE); + class_device = pci_conf_read16(bus, d, f, PCI_CLASS_DEVICE); if ( class_device == PCI_CLASS_BRIDGE_PCI ) - return DEV_TYPE_PCI_BRIDGE; + { + pos = pci_find_next_cap(bus, devfn, PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP); + if ( !pos ) + return DEV_TYPE_PCI_BRIDGE; + creg = pci_conf_read16(bus, d, f, pos + PCI_EXP_FLAGS); + return ((creg & PCI_EXP_FLAGS_TYPE) >> 4) == PCI_EXP_TYPE_PCI_BRIDGE ? + DEV_TYPE_PCI_BRIDGE : DEV_TYPE_PCIe_BRIDGE; + } - status = pci_conf_read16(dev->bus, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), PCI_STATUS); - + status = pci_conf_read16(bus, d, f, PCI_STATUS); if ( !(status & PCI_STATUS_CAP_LIST) ) return DEV_TYPE_PCI; - if ( pci_find_next_cap(dev->bus, dev->devfn, - PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP) ) + if ( pci_find_next_cap(bus, devfn, PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP) ) return DEV_TYPE_PCIe_ENDPOINT; return DEV_TYPE_PCI; } #define MAX_BUSES 256 -struct pci_dev bus2bridge[MAX_BUSES]; +static struct { u8 map, bus, devfn; } bus2bridge[MAX_BUSES]; -static int domain_context_mapping( - struct domain *domain, - struct iommu *iommu, - struct pci_dev *pdev) +static int find_pcie_endpoint(u8 *bus, u8 *devfn) { + int cnt = 0; + + if ( *bus == 0 ) + /* assume integrated PCI devices in RC have valid requester-id */ + return 1; + + if ( !bus2bridge[*bus].map ) + return 0; + + while ( bus2bridge[*bus].map ) + { + *devfn = bus2bridge[*bus].devfn; + *bus = bus2bridge[*bus].bus; + if ( cnt++ >= MAX_BUSES ) + return 0; + } + + return 1; +} + +static int domain_context_mapping(struct domain *domain, u8 bus, u8 devfn) +{ + struct acpi_drhd_unit *drhd; int ret = 0; - int dev, func, sec_bus, sub_bus; + u16 sec_bus, sub_bus, ob, odf; u32 type; - type = pdev_type(pdev); + drhd = acpi_find_matched_drhd_unit(bus, devfn); + if ( !drhd ) + return -ENODEV; + + type = pdev_type(bus, devfn); switch ( type ) { + case DEV_TYPE_PCIe_BRIDGE: + break; + case DEV_TYPE_PCI_BRIDGE: - sec_bus = pci_conf_read8( - pdev->bus, PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS); + sec_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + PCI_SECONDARY_BUS); + sub_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + PCI_SUBORDINATE_BUS); - if ( bus2bridge[sec_bus].bus == 0 ) + for ( sub_bus &= 0xff; sec_bus <= sub_bus; sec_bus++ ) { - bus2bridge[sec_bus].bus = pdev->bus; - bus2bridge[sec_bus].devfn = pdev->devfn; + bus2bridge[sec_bus].map = 1; + bus2bridge[sec_bus].bus = bus; + bus2bridge[sec_bus].devfn = devfn; + } + break; + + case DEV_TYPE_PCIe_ENDPOINT: + gdprintk(XENLOG_INFO VTDPREFIX, + "domain_context_mapping:PCIe: bdf = %x:%x.%x\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn); + break; + + case DEV_TYPE_PCI: + gdprintk(XENLOG_INFO VTDPREFIX, + "domain_context_mapping:PCI: bdf = %x:%x.%x\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + + ob = bus; odf = devfn; + if ( !find_pcie_endpoint(&bus, &devfn) ) + { + gdprintk(XENLOG_WARNING VTDPREFIX, "domain_context_mapping:invalid"); + break; } - sub_bus = pci_conf_read8( - pdev->bus, PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS); + if ( ob != bus || odf != devfn ) + gdprintk(XENLOG_INFO VTDPREFIX, + "domain_context_mapping:map: bdf = %x:%x.%x -> %x:%x.%x\n", + ob, PCI_SLOT(odf), PCI_FUNC(odf), + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn); + break; - if ( sec_bus != sub_bus ) - gdprintk(XENLOG_WARNING VTDPREFIX, - "context_context_mapping: nested PCI bridge not " - "supported: bdf = %x:%x:%x sec_bus = %x sub_bus = %x\n", - pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), - sec_bus, sub_bus); - break; - case DEV_TYPE_PCIe_ENDPOINT: - gdprintk(XENLOG_INFO VTDPREFIX, - "domain_context_mapping:PCIe : bdf = %x:%x:%x\n", - pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); - ret = domain_context_mapping_one(domain, iommu, - (u8)(pdev->bus), (u8)(pdev->devfn)); - break; - case DEV_TYPE_PCI: - gdprintk(XENLOG_INFO VTDPREFIX, - "domain_context_mapping:PCI: bdf = %x:%x:%x\n", - pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); - - if ( pdev->bus == 0 ) - ret = domain_context_mapping_one( - domain, iommu, (u8)(pdev->bus), (u8)(pdev->devfn)); - else - { - if ( bus2bridge[pdev->bus].bus != 0 ) - gdprintk(XENLOG_WARNING VTDPREFIX, - "domain_context_mapping:bus2bridge" - "[%d].bus != 0\n", pdev->bus); - - ret = domain_context_mapping_one( - domain, iommu, - (u8)(bus2bridge[pdev->bus].bus), - (u8)(bus2bridge[pdev->bus].devfn)); - - /* now map everything behind the PCI bridge */ - for ( dev = 0; dev < 32; dev++ ) - { - for ( func = 0; func < 8; func++ ) - { - ret = domain_context_mapping_one( - domain, iommu, - pdev->bus, (u8)PCI_DEVFN(dev, func)); - if ( ret ) - return ret; - } - } - } - break; default: gdprintk(XENLOG_ERR VTDPREFIX, - "domain_context_mapping:unknown type : bdf = %x:%x:%x\n", - pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + "domain_context_mapping:unknown type : bdf = %x:%x.%x\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); ret = -EINVAL; break; } @@ -1266,9 +1277,7 @@ return ret; } -static int domain_context_unmap_one( - struct iommu *iommu, - u8 bus, u8 devfn) +static int domain_context_unmap_one(struct iommu *iommu, u8 bus, u8 devfn) { struct context_entry *context, *context_entries; unsigned long flags; @@ -1296,61 +1305,39 @@ return 0; } -static int domain_context_unmap( - struct iommu *iommu, - struct pci_dev *pdev) +static int domain_context_unmap(u8 bus, u8 devfn) { + struct acpi_drhd_unit *drhd; int ret = 0; - int dev, func, sec_bus, sub_bus; u32 type; - type = pdev_type(pdev); + drhd = acpi_find_matched_drhd_unit(bus, devfn); + if ( !drhd ) + return -ENODEV; + + type = pdev_type(bus, devfn); switch ( type ) { + case DEV_TYPE_PCIe_BRIDGE: + break; + case DEV_TYPE_PCI_BRIDGE: - sec_bus = pci_conf_read8( - pdev->bus, PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS); - sub_bus = pci_conf_read8( - pdev->bus, PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS); + ret = domain_context_unmap_one(drhd->iommu, bus, devfn); break; + case DEV_TYPE_PCIe_ENDPOINT: - ret = domain_context_unmap_one(iommu, - (u8)(pdev->bus), (u8)(pdev->devfn)); + ret = domain_context_unmap_one(drhd->iommu, bus, devfn); break; + case DEV_TYPE_PCI: - if ( pdev->bus == 0 ) - ret = domain_context_unmap_one( - iommu, (u8)(pdev->bus), (u8)(pdev->devfn)); - else - { - if ( bus2bridge[pdev->bus].bus != 0 ) - gdprintk(XENLOG_WARNING VTDPREFIX, - "domain_context_unmap:" - "bus2bridge[%d].bus != 0\n", pdev->bus); + if ( find_pcie_endpoint(&bus, &devfn) ) + ret = domain_context_unmap_one(drhd->iommu, bus, devfn); + break; - ret = domain_context_unmap_one(iommu, - (u8)(bus2bridge[pdev->bus].bus), - (u8)(bus2bridge[pdev->bus].devfn)); - - /* Unmap everything behind the PCI bridge */ - for ( dev = 0; dev < 32; dev++ ) - { - for ( func = 0; func < 8; func++ ) - { - ret = domain_context_unmap_one( - iommu, pdev->bus, (u8)PCI_DEVFN(dev, func)); - if ( ret ) - return ret; - } - } - } - break; default: gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_unmap:unknown type: bdf = %x:%x:%x\n", - pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); ret = -EINVAL; break; } @@ -1364,7 +1351,7 @@ u8 bus, u8 devfn) { struct hvm_iommu *source_hd = domain_hvm_iommu(source); - struct pci_dev *pdev, *pdev2; + struct pci_dev *pdev; struct acpi_drhd_unit *drhd; struct iommu *iommu; int status; @@ -1378,27 +1365,28 @@ return; - found: - drhd = acpi_find_matched_drhd_unit(pdev); +found: + drhd = acpi_find_matched_drhd_unit(bus, devfn); iommu = drhd->iommu; - domain_context_unmap(iommu, pdev); + domain_context_unmap(bus, devfn); /* Move pci device from the source domain to target domain. */ list_move(&pdev->domain_list, &target->arch.pdev_list); - for_each_pdev ( source, pdev2 ) + for_each_pdev ( source, pdev ) { - drhd = acpi_find_matched_drhd_unit(pdev2); + drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn); if ( drhd->iommu == iommu ) { found = 1; break; } } + if ( !found ) clear_bit(iommu->index, &source_hd->iommu_bitmap); - status = domain_context_mapping(target, iommu, pdev); + status = domain_context_mapping(target, bus, devfn); if ( status != 0 ) gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n"); } @@ -1436,19 +1424,13 @@ iommu_domid_release(d); } -static int domain_context_mapped(struct pci_dev *pdev) +static int domain_context_mapped(u8 bus, u8 devfn) { struct acpi_drhd_unit *drhd; - struct iommu *iommu; - int ret; for_each_drhd_unit ( drhd ) - { - iommu = drhd->iommu; - ret = device_context_mapped(iommu, pdev->bus, pdev->devfn); - if ( ret ) - return ret; - } + if ( device_context_mapped(drhd->iommu, bus, devfn) ) + return 1; return 0; } @@ -1570,12 +1552,10 @@ return 0; } -static int iommu_prepare_rmrr_dev( - struct domain *d, - struct acpi_rmrr_unit *rmrr, - struct pci_dev *pdev) +static int iommu_prepare_rmrr_dev(struct domain *d, + struct acpi_rmrr_unit *rmrr, + u8 bus, u8 devfn) { - struct acpi_drhd_unit *drhd; u64 size; int ret; @@ -1587,10 +1567,9 @@ if ( ret ) return ret; - if ( domain_context_mapped(pdev) == 0 ) + if ( domain_context_mapped(bus, devfn) == 0 ) { - drhd = acpi_find_matched_drhd_unit(pdev); - ret = domain_context_mapping(d, drhd->iommu, pdev); + ret = domain_context_mapping(d, bus, devfn); if ( !ret ) return 0; } @@ -1601,7 +1580,6 @@ static void setup_dom0_devices(struct domain *d) { struct hvm_iommu *hd; - struct acpi_drhd_unit *drhd; struct pci_dev *pdev; int bus, dev, func, ret; u32 l; @@ -1624,8 +1602,7 @@ pdev->devfn = PCI_DEVFN(dev, func); list_add_tail(&pdev->domain_list, &d->arch.pdev_list); - drhd = acpi_find_matched_drhd_unit(pdev); - ret = domain_context_mapping(d, drhd->iommu, pdev); + ret = domain_context_mapping(d, pdev->bus, pdev->devfn); if ( ret != 0 ) gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n"); @@ -1701,15 +1678,16 @@ static void setup_dom0_rmrr(struct domain *d) { struct acpi_rmrr_unit *rmrr; - struct pci_dev *pdev; - int ret; + u16 bdf; + int ret, i; - for_each_rmrr_device ( rmrr, pdev ) - ret = iommu_prepare_rmrr_dev(d, rmrr, pdev); + for_each_rmrr_device ( rmrr, bdf, i ) + { + ret = iommu_prepare_rmrr_dev(d, rmrr, PCI_BUS(bdf), PCI_DEVFN2(bdf)); if ( ret ) gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: mapping reserved region failed\n"); - end_for_each_rmrr_device ( rmrr, pdev ) + } } int intel_vtd_setup(void) @@ -1769,25 +1747,26 @@ int intel_iommu_assign_device(struct domain *d, u8 bus, u8 devfn) { struct acpi_rmrr_unit *rmrr; - struct pci_dev *pdev; - int ret = 0; + int ret = 0, i; + u16 bdf; if ( list_empty(&acpi_drhd_units) ) return ret; reassign_device_ownership(dom0, d, bus, devfn); - /* Setup rmrr identify mapping */ - for_each_rmrr_device( rmrr, pdev ) - if ( pdev->bus == bus && pdev->devfn == devfn ) + /* Setup rmrr identity mapping */ + for_each_rmrr_device( rmrr, bdf, i ) + { + if ( PCI_BUS(bdf) == bus && PCI_DEVFN2(bdf) == devfn ) { /* FIXME: Because USB RMRR conflicts with guest bios region, * ignore USB RMRR temporarily. */ - if ( is_usb_device(pdev) ) + if ( is_usb_device(bus, devfn) ) return 0; - ret = iommu_prepare_rmrr_dev(d, rmrr, pdev); + ret = iommu_prepare_rmrr_dev(d, rmrr, bus, devfn); if ( ret ) { gdprintk(XENLOG_ERR VTDPREFIX, @@ -1795,9 +1774,17 @@ return ret; } } - end_for_each_rmrr_device(rmrr, pdev) + } return ret; +} + +static int intel_iommu_group_id(u8 bus, u8 devfn) +{ + if ( !bus2bridge[bus].map || find_pcie_endpoint(&bus, &devfn) ) + return PCI_BDF2(bus, devfn); + else + return -1; } u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS]; @@ -1881,7 +1868,7 @@ .map_page = intel_iommu_map_page, .unmap_page = intel_iommu_unmap_page, .reassign_device = reassign_device_ownership, - .get_device_group_id = NULL, + .get_device_group_id = intel_iommu_group_id, }; /* diff -r b03d6bcc0178 xen/drivers/passthrough/vtd/utils.c --- a/xen/drivers/passthrough/vtd/utils.c Fri Jul 04 16:12:44 2008 +0100 +++ b/xen/drivers/passthrough/vtd/utils.c Fri Jul 04 16:27:43 2008 +0100 @@ -32,12 +32,10 @@ #define SEABURG 0x4000 #define C_STEP 2 -int is_usb_device(struct pci_dev *pdev) +int is_usb_device(u8 bus, u8 devfn) { - u8 bus = pdev->bus; - u8 dev = PCI_SLOT(pdev->devfn); - u8 func = PCI_FUNC(pdev->devfn); - u16 class = pci_conf_read16(bus, dev, func, PCI_CLASS_DEVICE); + u16 class = pci_conf_read16(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + PCI_CLASS_DEVICE); return (class == 0xc03); } diff -r b03d6bcc0178 xen/include/xen/pci.h --- a/xen/include/xen/pci.h Fri Jul 04 16:12:44 2008 +0100 +++ b/xen/include/xen/pci.h Fri Jul 04 16:27:43 2008 +0100 @@ -20,9 +20,13 @@ * 7:3 = slot * 2:0 = function */ -#define PCI_DEVFN(slot,func) (((slot & 0x1f) << 3) | (func & 0x07)) -#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) -#define PCI_FUNC(devfn) ((devfn) & 0x07) +#define PCI_BUS(bdf) (((bdf) >> 8) & 0xff) +#define PCI_SLOT(bdf) (((bdf) >> 3) & 0x1f) +#define PCI_FUNC(bdf) ((bdf) & 0x07) +#define PCI_DEVFN(d,f) (((d & 0x1f) << 3) | (f & 0x07)) +#define PCI_DEVFN2(bdf) ((bdf) & 0xff) +#define PCI_BDF(b,d,f) (((b * 0xff) << 8) | PCI_DEVFN(d,f)) +#define PCI_BDF2(b,df) (((b & 0xff) << 8) | (df & 0xff)) struct pci_dev { struct list_head domain_list; @@ -31,6 +35,10 @@ u8 devfn; struct list_head msi_list; }; + +#define for_each_pdev(domain, pdev) \ + list_for_each_entry(pdev, &(domain->arch.pdev_list), domain_list) + uint8_t pci_conf_read8( unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg); @@ -50,9 +58,4 @@ int pci_find_cap_offset(u8 bus, u8 dev, u8 func, u8 cap); int pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap); - -#define for_each_pdev(domain, pdev) \ - list_for_each_entry(pdev, &(domain->arch.pdev_list), domain_list) - - #endif /* __XEN_PCI_H__ */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Espen Skoglund
2008-Jul-04 16:36 UTC
[Xen-devel] [PATCH 3/7] PCI device register/unregister + pci_dev cleanups
Add management and locking of PCI device structures Add functions for managing pci_dev structures. Create a list containing all current pci_devs. Remove msi_pdev_list. Create a read-write lock protecting all pci_dev lists. Add spinlocks for pci_dev access. Do necessary modifications to MSI code. Signed-off-by: Espen Skoglund <espen.skoglund@netronome.com> -- b/xen/drivers/passthrough/pci.c | 124 ++++++++++++++++++++ xen/arch/x86/i8259.c | 3 xen/arch/x86/msi.c | 172 +++++++++++----------------- xen/arch/x86/physdev.c | 4 xen/drivers/passthrough/Makefile | 1 xen/drivers/passthrough/amd/pci_amd_iommu.c | 72 ++++++----- xen/drivers/passthrough/iommu.c | 5 xen/drivers/passthrough/vtd/iommu.c | 73 ++++++----- xen/include/asm-x86/msi.h | 4 xen/include/xen/iommu.h | 9 - xen/include/xen/pci.h | 23 +++ 11 files changed, 304 insertions(+), 186 deletions(-) -- diff -r 5b0699fb81a5 xen/arch/x86/i8259.c --- a/xen/arch/x86/i8259.c Fri Jul 04 17:04:47 2008 +0100 +++ b/xen/arch/x86/i8259.c Fri Jul 04 17:19:39 2008 +0100 @@ -382,7 +382,6 @@ static struct irqaction cascade = { no_action, "cascade", NULL}; -extern struct list_head msi_pdev_list; void __init init_IRQ(void) { int i; @@ -419,7 +418,5 @@ outb(LATCH >> 8, PIT_CH0); /* MSB */ setup_irq(2, &cascade); - - INIT_LIST_HEAD(&msi_pdev_list); } diff -r 5b0699fb81a5 xen/arch/x86/msi.c --- a/xen/arch/x86/msi.c Fri Jul 04 17:04:47 2008 +0100 +++ b/xen/arch/x86/msi.c Fri Jul 04 17:19:39 2008 +0100 @@ -28,21 +28,6 @@ #include <xen/iommu.h> extern int msi_irq_enable; - -/* PCI-dev list with MSI/MSIX capabilities */ -DEFINE_SPINLOCK(msi_pdev_lock); -struct list_head msi_pdev_list; - -struct pci_dev *get_msi_pdev(u8 bus, u8 devfn) -{ - struct pci_dev *pdev = NULL; - - list_for_each_entry(pdev, &msi_pdev_list, msi_dev_list) - if ( pdev->bus == bus && pdev->devfn == devfn ) - return pdev; - - return NULL; -} /* bitmap indicate which fixed map is free */ DEFINE_SPINLOCK(msix_fixmap_lock); @@ -112,10 +97,8 @@ } } -void read_msi_msg(unsigned int irq, struct msi_msg *msg) +static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) { - struct msi_desc *entry = irq_desc[irq].msi_desc; - switch ( entry->msi_attrib.type ) { case PCI_CAP_ID_MSI: @@ -147,7 +130,7 @@ { void __iomem *base; base = entry->mask_base + - entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); @@ -164,9 +147,6 @@ static int set_vector_msi(struct msi_desc *entry) { - irq_desc_t *desc; - unsigned long flags; - if ( entry->vector >= NR_VECTORS ) { dprintk(XENLOG_ERR, "Trying to install msi data for Vector %d\n", @@ -174,19 +154,12 @@ return -EINVAL; } - desc = &irq_desc[entry->vector]; - spin_lock_irqsave(&desc->lock, flags); - desc->msi_desc = entry; - spin_unlock_irqrestore(&desc->lock, flags); - + irq_desc[entry->vector].msi_desc = entry; return 0; } static int unset_vector_msi(int vector) { - irq_desc_t *desc; - unsigned long flags; - if ( vector >= NR_VECTORS ) { dprintk(XENLOG_ERR, "Trying to uninstall msi data for Vector %d\n", @@ -194,18 +167,12 @@ return -EINVAL; } - desc = &irq_desc[vector]; - spin_lock_irqsave(&desc->lock, flags); - desc->msi_desc = NULL; - spin_unlock_irqrestore(&desc->lock, flags); - + irq_desc[vector].msi_desc = NULL; return 0; } -void write_msi_msg(unsigned int irq, struct msi_msg *msg) +static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) { - struct msi_desc *entry = irq_desc[irq].msi_desc; - if ( vtd_enabled ) msi_msg_write_remap_rte(entry, msg); @@ -254,6 +221,7 @@ void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { + struct msi_desc *desc = irq_desc[irq].msi_desc; struct msi_msg msg; unsigned int dest; @@ -263,12 +231,18 @@ mask = TARGET_CPUS; dest = cpu_mask_to_apicid(mask); - read_msi_msg(irq, &msg); + if ( !desc ) + return; + + ASSERT(spin_is_locked(&irq_desc[vector].lock)); + spin_lock(&desc->dev->lock); + read_msi_msg(desc, &msg); msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; msg.address_lo |= MSI_ADDR_DEST_ID(dest); - write_msi_msg(irq, &msg); + write_msi_msg(desc, &msg); + spin_unlock(&desc->dev->lock); } static void msi_set_enable(struct pci_dev *dev, int enable) @@ -290,7 +264,7 @@ } } -void msix_set_enable(struct pci_dev *dev, int enable) +static void msix_set_enable(struct pci_dev *dev, int enable) { int pos; u16 control; @@ -335,6 +309,7 @@ { struct msi_desc *entry = irq_desc[irq].msi_desc; + ASSERT(spin_is_locked(&irq_desc[vector].lock)); BUG_ON(!entry || !entry->dev); switch (entry->msi_attrib.type) { case PCI_CAP_ID_MSI: @@ -401,7 +376,7 @@ msi_compose_msg(dev, desc->vector, &msg); set_vector_msi(desc); - write_msi_msg(desc->vector, &msg); + write_msi_msg(irq_desc[desc->vector].msi_desc, &msg); return 0; } @@ -415,8 +390,8 @@ { struct msi_desc *entry; + ASSERT(spin_is_locked(&irq_desc[vector].lock)); entry = irq_desc[vector].msi_desc; - teardown_msi_vector(vector); if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) @@ -619,35 +594,22 @@ static int __pci_enable_msi(u8 bus, u8 devfn, int vector) { int status; - struct pci_dev *dev; + struct pci_dev *pdev; - dev = get_msi_pdev(bus, devfn); - if ( !dev ) + pdev = pci_lock_pdev(bus, devfn); + if ( !pdev ) + return -ENODEV; + + if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSI) ) { - dev = xmalloc(struct pci_dev); - if ( !dev ) - return -ENOMEM; - dev->bus = bus; - dev->devfn = devfn; - INIT_LIST_HEAD(&dev->msi_list); - } - - if ( find_msi_entry(dev, vector, PCI_CAP_ID_MSI) ) - { + spin_unlock(&pdev->lock); dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on device \ %02x:%02x.%01x.\n", vector, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); return 0; } - status = msi_capability_init(dev, vector); - - if ( dev != get_msi_pdev(bus, devfn) ) - { - spin_lock(&msi_pdev_lock); - list_add_tail(&dev->msi_dev_list, &msi_pdev_list); - spin_unlock(&msi_pdev_lock); - } - + status = msi_capability_init(pdev, vector); + spin_unlock(&pdev->lock); return status; } @@ -660,6 +622,13 @@ u8 bus, slot, func; entry = irq_desc[vector].msi_desc; + if ( !entry ) + return; + /* + * Lock here is safe. msi_desc can not be removed without holding + * both irq_desc[].lock (which we do) and pdev->lock. + */ + spin_lock(&entry->dev->lock); dev = entry->dev; bus = dev->bus; slot = PCI_SLOT(dev->devfn); @@ -674,6 +643,7 @@ msi_free_vector(vector); pci_conf_write16(bus, slot, func, msi_control_reg(pos), control); + spin_unlock(&dev->lock); } /** @@ -694,47 +664,35 @@ static int __pci_enable_msix(u8 bus, u8 devfn, int vector, int entry_nr) { int status, pos, nr_entries; - struct pci_dev *dev; + struct pci_dev *pdev; u16 control; u8 slot = PCI_SLOT(devfn); u8 func = PCI_FUNC(devfn); + + pdev = pci_lock_pdev(bus, devfn); + if ( !pdev ) + return -ENODEV; pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX); control = pci_conf_read16(bus, slot, func, msi_control_reg(pos)); nr_entries = multi_msix_capable(control); if (entry_nr > nr_entries) + { + spin_unlock(&pdev->lock); return -EINVAL; - - /* Check whether driver already requested for MSI-X irqs */ - dev = get_msi_pdev(bus, devfn); - - if ( !dev ) - { - dev = xmalloc(struct pci_dev); - if ( !dev ) - return -ENOMEM; - dev->bus = bus; - dev->devfn = devfn; - INIT_LIST_HEAD(&dev->msi_list); } - if ( find_msi_entry(dev, vector, PCI_CAP_ID_MSIX) ) + if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSIX) ) { + spin_unlock(&pdev->lock); dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on \ device %02x:%02x.%01x.\n", vector, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); return 0; } - status = msix_capability_init(dev, vector, entry_nr); - - if ( dev != get_msi_pdev(bus, devfn) ) - { - spin_lock(&msi_pdev_lock); - list_add_tail(&dev->msi_dev_list, &msi_pdev_list); - spin_unlock(&msi_pdev_lock); - } - + status = msix_capability_init(pdev, vector, entry_nr); + spin_unlock(&pdev->lock); return status; } @@ -747,6 +705,13 @@ u8 bus, slot, func; entry = irq_desc[vector].msi_desc; + if ( !entry ) + return; + /* + * Lock here is safe. msi_desc can not be removed without holding + * both irq_desc[].lock (which we do) and pdev->lock. + */ + spin_lock(&entry->dev->lock); dev = entry->dev; bus = dev->bus; slot = PCI_SLOT(dev->devfn); @@ -761,10 +726,12 @@ msi_free_vector(vector); pci_conf_write16(bus, slot, func, msix_control_reg(pos), control); + spin_unlock(&dev->lock); } int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi) { + ASSERT(spin_is_locked(&irq_desc[vector].lock)); if ( msi ) return __pci_enable_msi(bus, devfn, vector); else @@ -773,9 +740,11 @@ void pci_disable_msi(int vector) { - irq_desc_t *desc; + irq_desc_t *desc = &irq_desc[vector]; + ASSERT(spin_is_locked(&desc->lock)); + if ( !desc->msi_desc ) + return; - desc = &irq_desc[vector]; if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) __pci_disable_msi(vector); else if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX ) @@ -789,9 +758,17 @@ irq_desc_t *desc; unsigned long flags; +retry: list_for_each_entry_safe( entry, tmp, &dev->msi_list, list ) { desc = &irq_desc[entry->vector]; + + local_irq_save(flags); + if ( !spin_trylock(&desc->lock) ) + { + local_irq_restore(flags); + goto retry; + } spin_lock_irqsave(&desc->lock, flags); if ( desc->handler == &pci_msi_type ) @@ -800,22 +777,17 @@ BUG_ON(desc->status & IRQ_GUEST); desc->handler = &no_irq_type; } - spin_unlock_irqrestore(&desc->lock, flags); msi_free_vector(entry->vector); + spin_unlock_irqrestore(&desc->lock, flags); } } -void pci_cleanup_msi(u8 bus, u8 devfn) +void pci_cleanup_msi(struct pci_dev *pdev) { - struct pci_dev *dev = get_msi_pdev(bus, devfn); - - if ( !dev ) - return; - /* Disable MSI and/or MSI-X */ - msi_set_enable(dev, 0); - msix_set_enable(dev, 0); - msi_free_vectors(dev); + msi_set_enable(pdev, 0); + msix_set_enable(pdev, 0); + msi_free_vectors(pdev); } diff -r 5b0699fb81a5 xen/arch/x86/physdev.c --- a/xen/arch/x86/physdev.c Fri Jul 04 17:04:47 2008 +0100 +++ b/xen/arch/x86/physdev.c Fri Jul 04 17:19:39 2008 +0100 @@ -114,12 +114,12 @@ gdprintk(XENLOG_G_ERR, "Map vector %x to msi while it is in use\n", vector); desc->handler = &pci_msi_type; - spin_unlock_irqrestore(&desc->lock, flags); ret = pci_enable_msi(map->msi_info.bus, map->msi_info.devfn, vector, map->msi_info.entry_nr, map->msi_info.msi); + spin_unlock_irqrestore(&desc->lock, flags); if ( ret ) goto done; } @@ -161,10 +161,10 @@ irq_desc_t *desc; desc = &irq_desc[vector]; + spin_lock_irqsave(&desc->lock, flags); if ( desc->msi_desc ) pci_disable_msi(vector); - spin_lock_irqsave(&desc->lock, flags); if ( desc->handler == &pci_msi_type ) { /* MSI is not shared, so should be released already */ diff -r 5b0699fb81a5 xen/drivers/passthrough/Makefile --- a/xen/drivers/passthrough/Makefile Fri Jul 04 17:04:47 2008 +0100 +++ b/xen/drivers/passthrough/Makefile Fri Jul 04 17:19:39 2008 +0100 @@ -3,3 +3,4 @@ obj-y += iommu.o obj-y += io.o +obj-y += pci.o diff -r 5b0699fb81a5 xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Jul 04 17:04:47 2008 +0100 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Jul 04 17:19:39 2008 +0100 @@ -298,6 +298,7 @@ u32 l; int bdf; + write_lock(&pcidevs_lock); for ( bus = 0; bus < 256; bus++ ) { for ( dev = 0; dev < 32; dev++ ) @@ -310,10 +311,9 @@ (l == 0x0000ffff) || (l == 0xffff0000) ) continue; - pdev = xmalloc(struct pci_dev); - pdev->bus = bus; - pdev->devfn = PCI_DEVFN(dev, func); - list_add_tail(&pdev->domain_list, &d->arch.pdev_list); + pdev = alloc_pdev(bus, PCI_DEVFN(dev, func)); + pdev->domain = d; + list_add(&pdev->domain_list, &d->arch.pdev_list); bdf = (bus << 8) | pdev->devfn; /* supported device? */ @@ -325,6 +325,7 @@ } } } + write_unlock(&pcidevs_lock); } int amd_iov_detect(void) @@ -493,38 +494,37 @@ struct amd_iommu *iommu; int bdf; - for_each_pdev ( source, pdev ) + pdev = pci_lock_domain_pdev(source, bus, devfn); + if ( !pdev ) + return -ENODEV; + + bdf = (bus << 8) | devfn; + /* supported device? */ + iommu = (bdf < ivrs_bdf_entries) ? + find_iommu_for_device(bus, pdev->devfn) : NULL; + + if ( !iommu ) { - if ( (pdev->bus != bus) || (pdev->devfn != devfn) ) - continue; + spin_unlock(&pdev->lock); + amd_iov_error("Fail to find iommu." + " %x:%x.%x cannot be assigned to domain %d\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id); + return -ENODEV; + } - pdev->bus = bus; - pdev->devfn = devfn; + amd_iommu_disable_domain_device(source, iommu, bdf); - bdf = (bus << 8) | devfn; - /* supported device? */ - iommu = (bdf < ivrs_bdf_entries) ? - find_iommu_for_device(bus, pdev->devfn) : NULL; + write_lock(&pcidevs_lock); + list_move(&pdev->domain_list, &target->arch.pdev_list); + write_unlock(&pcidevs_lock); + pdev->domain = target; - if ( !iommu ) - { - amd_iov_error("Fail to find iommu." - " %x:%x.%x cannot be assigned to domain %d\n", - bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id); - return -ENODEV; - } - - amd_iommu_disable_domain_device(source, iommu, bdf); - /* Move pci device from the source domain to target domain. */ - list_move(&pdev->domain_list, &target->arch.pdev_list); - - amd_iommu_setup_domain_device(target, iommu, bdf); - amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n", + amd_iommu_setup_domain_device(target, iommu, bdf); + amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn), source->domain_id, target->domain_id); - break; - } + spin_unlock(&pdev->lock); return 0; } @@ -552,14 +552,16 @@ static void release_domain_devices(struct domain *d) { struct pci_dev *pdev; + u8 bus, devfn; - while ( has_arch_pdevs(d) ) + while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) ) { - pdev = list_entry(d->arch.pdev_list.next, typeof(*pdev), domain_list); pdev_flr(pdev->bus, pdev->devfn); + bus = pdev->bus; devfn = pdev->devfn; + spin_unlock(&pdev->lock); amd_iov_info("release domain %d devices %x:%x.%x\n", d->domain_id, - pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); - reassign_device(d, dom0, pdev->bus, pdev->devfn); + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + reassign_device(d, dom0, bus, devfn); } } @@ -619,11 +621,11 @@ release_domain_devices(d); } -static void amd_iommu_return_device( +static int amd_iommu_return_device( struct domain *s, struct domain *t, u8 bus, u8 devfn) { pdev_flr(bus, devfn); - reassign_device(s, t, bus, devfn); + return reassign_device(s, t, bus, devfn); } static int amd_iommu_group_id(u8 bus, u8 devfn) diff -r 5b0699fb81a5 xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Fri Jul 04 17:04:47 2008 +0100 +++ b/xen/drivers/passthrough/iommu.c Fri Jul 04 17:19:39 2008 +0100 @@ -240,6 +240,7 @@ group_id = ops->get_device_group_id(bus, devfn); + read_lock(&pcidevs_lock); for_each_pdev( d, pdev ) { if ( (pdev->bus == bus) && (pdev->devfn == devfn) ) @@ -252,10 +253,14 @@ bdf |= (pdev->bus & 0xff) << 16; bdf |= (pdev->devfn & 0xff) << 8; if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) ) + { + read_unlock(&pcidevs_lock); return -1; + } i++; } } + read_unlock(&pcidevs_lock); return i; } diff -r 5b0699fb81a5 xen/drivers/passthrough/pci.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/passthrough/pci.c Fri Jul 04 17:19:39 2008 +0100 @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2008, Netronome Systems, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <xen/sched.h> +#include <xen/pci.h> +#include <xen/list.h> +#include <xen/prefetch.h> +#include <xen/keyhandler.h> + + +LIST_HEAD(alldevs_list); +rwlock_t pcidevs_lock = RW_LOCK_UNLOCKED; + +struct pci_dev *alloc_pdev(u8 bus, u8 devfn) +{ + struct pci_dev *pdev; + + list_for_each_entry ( pdev, &alldevs_list, alldevs_list ) + if ( pdev->bus == bus && pdev->devfn == devfn ) + return pdev; + + pdev = xmalloc(struct pci_dev); + if ( !pdev ) + return NULL; + + *((u8*) &pdev->bus) = bus; + *((u8*) &pdev->devfn) = devfn; + pdev->domain = NULL; + spin_lock_init(&pdev->lock); + INIT_LIST_HEAD(&pdev->msi_list); + list_add(&pdev->alldevs_list, &alldevs_list); + + return pdev; +} + +void free_pdev(struct pci_dev *pdev) +{ + list_del(&pdev->alldevs_list); + xfree(pdev); +} + +struct pci_dev *pci_lock_pdev(int bus, int devfn) +{ + struct pci_dev *pdev; + + read_lock(&pcidevs_lock); + list_for_each_entry ( pdev, &alldevs_list, alldevs_list ) + if ( (pdev->bus == bus || bus == -1) && + (pdev->devfn == devfn || devfn == -1) ) + { + spin_lock(&pdev->lock); + read_unlock(&pcidevs_lock); + return pdev; + } + read_unlock(&pcidevs_lock); + + return NULL; +} + +struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn) +{ + struct pci_dev *pdev; + + read_lock(&pcidevs_lock); + list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list ) + { + spin_lock(&pdev->lock); + if ( (pdev->bus == bus || bus == -1) && + (pdev->devfn == devfn || devfn == -1) && + (pdev->domain == d) ) + { + read_unlock(&pcidevs_lock); + return pdev; + } + spin_unlock(&pdev->lock); + } + read_unlock(&pcidevs_lock); + + return NULL; +} + +static void dump_pci_devices(unsigned char ch) +{ + struct pci_dev *pdev; + struct msi_desc *msi; + + printk("==== PCI devices ====\n"); + read_lock(&pcidevs_lock); + + list_for_each_entry ( pdev, &alldevs_list, alldevs_list ) + { + spin_lock(&pdev->lock); + printk("%02x:%02x.%x - dom %-3d - MSIs < ", + pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), + pdev->domain ? pdev->domain->domain_id : -1); + list_for_each_entry ( msi, &pdev->msi_list, list ) + printk("%d ", msi->vector); + printk(">\n"); + spin_unlock(&pdev->lock); + } + + read_unlock(&pcidevs_lock); +} + +static int __init setup_dump_pcidevs(void) +{ + register_keyhandler(''P'', dump_pci_devices, "dump PCI devices"); + return 0; +} +__initcall(setup_dump_pcidevs); diff -r 5b0699fb81a5 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Fri Jul 04 17:04:47 2008 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.c Fri Jul 04 17:19:39 2008 +0100 @@ -1345,7 +1345,7 @@ return ret; } -void reassign_device_ownership( +static int reassign_device_ownership( struct domain *source, struct domain *target, u8 bus, u8 devfn) @@ -1353,61 +1353,62 @@ struct hvm_iommu *source_hd = domain_hvm_iommu(source); struct pci_dev *pdev; struct acpi_drhd_unit *drhd; - struct iommu *iommu; - int status; - int found = 0; + struct iommu *pdev_iommu; + int ret, found = 0; + + if ( !(pdev = pci_lock_domain_pdev(source, bus, devfn)) ) + return -ENODEV; pdev_flr(bus, devfn); - - for_each_pdev( source, pdev ) - if ( (pdev->bus == bus) && (pdev->devfn == devfn) ) - goto found; - - return; - -found: drhd = acpi_find_matched_drhd_unit(bus, devfn); - iommu = drhd->iommu; + pdev_iommu = drhd->iommu; domain_context_unmap(bus, devfn); - /* Move pci device from the source domain to target domain. */ + write_lock(&pcidevs_lock); list_move(&pdev->domain_list, &target->arch.pdev_list); + write_unlock(&pcidevs_lock); + pdev->domain = target; + ret = domain_context_mapping(target, bus, devfn); + spin_unlock(&pdev->lock); + + read_lock(&pcidevs_lock); for_each_pdev ( source, pdev ) { drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn); - if ( drhd->iommu == iommu ) + if ( drhd->iommu == pdev_iommu ) { found = 1; break; } } + read_unlock(&pcidevs_lock); if ( !found ) - clear_bit(iommu->index, &source_hd->iommu_bitmap); + clear_bit(pdev_iommu->index, &source_hd->iommu_bitmap); - status = domain_context_mapping(target, bus, devfn); - if ( status != 0 ) - gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n"); + return ret; } void return_devices_to_dom0(struct domain *d) { struct pci_dev *pdev; - while ( has_arch_pdevs(d) ) + while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) ) { - pdev = list_entry(d->arch.pdev_list.next, typeof(*pdev), domain_list); - pci_cleanup_msi(pdev->bus, pdev->devfn); + pci_cleanup_msi(pdev); + spin_unlock(&pdev->lock); reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn); } #ifdef VTD_DEBUG + read_lock(&pcidevs_lock); for_each_pdev ( dom0, pdev ) dprintk(XENLOG_INFO VTDPREFIX, "return_devices_to_dom0:%x: bdf = %x:%x:%x\n", dom0->domain_id, pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + read_unlock(&pcidevs_lock); #endif } @@ -1568,11 +1569,7 @@ return ret; if ( domain_context_mapped(bus, devfn) == 0 ) - { ret = domain_context_mapping(d, bus, devfn); - if ( !ret ) - return 0; - } return ret; } @@ -1586,6 +1583,7 @@ hd = domain_hvm_iommu(d); + write_lock(&pcidevs_lock); for ( bus = 0; bus < 256; bus++ ) { for ( dev = 0; dev < 32; dev++ ) @@ -1597,10 +1595,10 @@ if ( (l == 0xffffffff) || (l == 0x00000000) || (l == 0x0000ffff) || (l == 0xffff0000) ) continue; - pdev = xmalloc(struct pci_dev); - pdev->bus = bus; - pdev->devfn = PCI_DEVFN(dev, func); - list_add_tail(&pdev->domain_list, &d->arch.pdev_list); + + pdev = alloc_pdev(bus, PCI_DEVFN(dev, func)); + pdev->domain = d; + list_add(&pdev->domain_list, &d->arch.pdev_list); ret = domain_context_mapping(d, pdev->bus, pdev->devfn); if ( ret != 0 ) @@ -1609,6 +1607,7 @@ } } } + write_unlock(&pcidevs_lock); } void clear_fault_bits(struct iommu *iommu) @@ -1737,9 +1736,11 @@ { struct pci_dev *pdev; - for_each_pdev( dom0, pdev ) - if ( (pdev->bus == bus ) && (pdev->devfn == devfn) ) - return 0; + if ( (pdev = pci_lock_domain_pdev(dom0, bus, devfn)) ) + { + spin_unlock(&pdev->lock); + return 0; + } return 1; } @@ -1751,9 +1752,11 @@ u16 bdf; if ( list_empty(&acpi_drhd_units) ) + return -ENODEV; + + ret = reassign_device_ownership(dom0, d, bus, devfn); + if ( ret ) return ret; - - reassign_device_ownership(dom0, d, bus, devfn); /* Setup rmrr identity mapping */ for_each_rmrr_device( rmrr, bdf, i ) diff -r 5b0699fb81a5 xen/include/asm-x86/msi.h --- a/xen/include/asm-x86/msi.h Fri Jul 04 17:04:47 2008 +0100 +++ b/xen/include/asm-x86/msi.h Fri Jul 04 17:19:39 2008 +0100 @@ -63,12 +63,10 @@ /* Helper functions */ extern void mask_msi_irq(unsigned int irq); extern void unmask_msi_irq(unsigned int irq); -extern void read_msi_msg(unsigned int irq, struct msi_msg *msg); -extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); extern void set_msi_irq_affinity(unsigned int irq, cpumask_t mask); extern int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi); extern void pci_disable_msi(int vector); -extern void pci_cleanup_msi(u8 bus, u8 devfn); +extern void pci_cleanup_msi(struct pci_dev *pdev); struct msi_desc { struct { diff -r 5b0699fb81a5 xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Fri Jul 04 17:04:47 2008 +0100 +++ b/xen/include/xen/iommu.h Fri Jul 04 17:19:39 2008 +0100 @@ -56,6 +56,8 @@ struct intel_iommu *intel; }; +int iommu_add_device(u8 bus, u8 devfn); +void iommu_remove_device(u8 bus, u8 devfn); int iommu_domain_init(struct domain *d); void iommu_domain_destroy(struct domain *d); int device_assigned(u8 bus, u8 devfn); @@ -63,9 +65,6 @@ void deassign_device(struct domain *d, u8 bus, u8 devfn); int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs); -void reassign_device_ownership(struct domain *source, - struct domain *target, - u8 bus, u8 devfn); int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn); int iommu_unmap_page(struct domain *d, unsigned long gfn); void iommu_domain_teardown(struct domain *d); @@ -99,8 +98,8 @@ void (*teardown)(struct domain *d); int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn); int (*unmap_page)(struct domain *d, unsigned long gfn); - void (*reassign_device)(struct domain *s, struct domain *t, - u8 bus, u8 devfn); + int (*reassign_device)(struct domain *s, struct domain *t, + u8 bus, u8 devfn); int (*get_device_group_id)(u8 bus, u8 devfn); }; diff -r 5b0699fb81a5 xen/include/xen/pci.h --- a/xen/include/xen/pci.h Fri Jul 04 17:04:47 2008 +0100 +++ b/xen/include/xen/pci.h Fri Jul 04 17:19:39 2008 +0100 @@ -10,6 +10,7 @@ #include <xen/config.h> #include <xen/types.h> #include <xen/list.h> +#include <xen/spinlock.h> /* * The PCI interface treats multi-function devices as independent @@ -29,15 +30,31 @@ #define PCI_BDF2(b,df) (((b & 0xff) << 8) | (df & 0xff)) struct pci_dev { + struct list_head alldevs_list; struct list_head domain_list; - struct list_head msi_dev_list; - u8 bus; - u8 devfn; struct list_head msi_list; + struct domain *domain; + const u8 bus; + const u8 devfn; + spinlock_t lock; }; #define for_each_pdev(domain, pdev) \ list_for_each_entry(pdev, &(domain->arch.pdev_list), domain_list) + +/* + * The pcidevs_lock write-lock must be held when doing alloc_pdev() or + * free_pdev(). Never de-reference pdev without holding pdev->lock or + * pcidevs_lock. Always aquire pcidevs_lock before pdev->lock when + * doing free_pdev(). + */ + +extern rwlock_t pcidevs_lock; + +struct pci_dev *alloc_pdev(u8 bus, u8 devfn); +void free_pdev(struct pci_dev *pdev); +struct pci_dev *pci_lock_pdev(int bus, int devfn); +struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn); uint8_t pci_conf_read8( _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Espen Skoglund
2008-Jul-04 16:37 UTC
[Xen-devel] [PATCH 4/7] PCI device register/unregister + pci_dev cleanups
xen-pci-addremove.patch Add hypercall for adding and removing PCI devices The add hypercall will add a new PCI device and register it. The remove hypercall will remove the pci_dev strucure for the device. The IOMMU hardware (if present) will be notifed as well. Signed-off-by: Espen Skoglund <espen.skoglund@netronome.com> Signed-off-by: Joshua LeVasseur <joshua.levasseur@netronome.com> -- arch/x86/physdev.c | 26 ++++++++++++++++ drivers/passthrough/amd/pci_amd_iommu.c | 12 +++++++ drivers/passthrough/iommu.c | 26 ++++++++++++++++ drivers/passthrough/pci.c | 52 ++++++++++++++++++++++++++++++++ drivers/passthrough/vtd/iommu.c | 39 +++++++++++++++++------- include/public/physdev.h | 11 ++++++ include/xen/iommu.h | 6 ++- include/xen/pci.h | 2 + 8 files changed, 161 insertions(+), 13 deletions(-) -- diff -r 795e85588ded xen/arch/x86/physdev.c --- a/xen/arch/x86/physdev.c Fri Jul 04 16:27:50 2008 +0100 +++ b/xen/arch/x86/physdev.c Fri Jul 04 16:31:03 2008 +0100 @@ -500,6 +500,32 @@ break; } + case PHYSDEVOP_manage_pci_add: { + struct physdev_manage_pci manage_pci; + ret = -EPERM; + if ( !IS_PRIV(v->domain) ) + break; + ret = -EFAULT; + if ( copy_from_guest(&manage_pci, arg, 1) != 0 ) + break; + + ret = pci_add_device(manage_pci.bus, manage_pci.devfn); + break; + } + + case PHYSDEVOP_manage_pci_remove: { + struct physdev_manage_pci manage_pci; + ret = -EPERM; + if ( !IS_PRIV(v->domain) ) + break; + ret = -EFAULT; + if ( copy_from_guest(&manage_pci, arg, 1) != 0 ) + break; + + ret = pci_remove_device(manage_pci.bus, manage_pci.devfn); + break; + } + default: ret = -ENOSYS; break; diff -r 795e85588ded xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Jul 04 16:27:50 2008 +0100 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Jul 04 16:31:03 2008 +0100 @@ -628,6 +628,16 @@ return reassign_device(s, t, bus, devfn); } +static int amd_iommu_add_device(struct pci_dev *pdev) +{ + return 0; +} + +static int amd_iommu_remove_device(struct pci_dev *pdev) +{ + return 0; +} + static int amd_iommu_group_id(u8 bus, u8 devfn) { int rt; @@ -640,6 +650,8 @@ struct iommu_ops amd_iommu_ops = { .init = amd_iommu_domain_init, + .add_device = amd_iommu_add_device, + .remove_device = amd_iommu_remove_device, .assign_device = amd_iommu_assign_device, .teardown = amd_iommu_domain_destroy, .map_page = amd_iommu_map_page, diff -r 795e85588ded xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Fri Jul 04 16:27:50 2008 +0100 +++ b/xen/drivers/passthrough/iommu.c Fri Jul 04 16:31:03 2008 +0100 @@ -53,6 +53,32 @@ } return hd->platform_ops->init(domain); +} + +int iommu_add_device(struct pci_dev *pdev) +{ + struct hvm_iommu *hd; + if ( !pdev->domain ) + return -EINVAL; + + hd = domain_hvm_iommu(pdev->domain); + if ( !iommu_enabled || !hd->platform_ops ) + return 0; + + return hd->platform_ops->add_device(pdev); +} + +int iommu_remove_device(struct pci_dev *pdev) +{ + struct hvm_iommu *hd; + if ( !pdev->domain ) + return -EINVAL; + + hd = domain_hvm_iommu(pdev->domain); + if ( !iommu_enabled || !hd->platform_ops ) + return 0; + + return hd->platform_ops->remove_device(pdev); } int assign_device(struct domain *d, u8 bus, u8 devfn) diff -r 795e85588ded xen/drivers/passthrough/pci.c --- a/xen/drivers/passthrough/pci.c Fri Jul 04 16:27:50 2008 +0100 +++ b/xen/drivers/passthrough/pci.c Fri Jul 04 16:31:03 2008 +0100 @@ -19,6 +19,7 @@ #include <xen/pci.h> #include <xen/list.h> #include <xen/prefetch.h> +#include <xen/iommu.h> #include <xen/keyhandler.h> @@ -93,6 +94,57 @@ return NULL; } +int pci_add_device(u8 bus, u8 devfn) +{ + struct pci_dev *pdev; + int ret = -ENOMEM; + + write_lock(&pcidevs_lock); + pdev = alloc_pdev(bus, devfn); + if ( !pdev ) + goto out; + + ret = 0; + spin_lock(&pdev->lock); + if ( !pdev->domain ) + { + pdev->domain = dom0; + list_add(&pdev->domain_list, &dom0->arch.pdev_list); + ret = iommu_add_device(pdev); + } + spin_unlock(&pdev->lock); + printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); + +out: + write_unlock(&pcidevs_lock); + return ret; +} + +int pci_remove_device(u8 bus, u8 devfn) +{ + struct pci_dev *pdev; + int ret = -ENODEV;; + + write_lock(&pcidevs_lock); + list_for_each_entry ( pdev, &alldevs_list, alldevs_list ) + if ( pdev->bus == bus && pdev->devfn == devfn ) + { + spin_lock(&pdev->lock); + ret = iommu_remove_device(pdev); + if ( pdev->domain ) + list_del(&pdev->domain_list); + pci_cleanup_msi(pdev); + free_pdev(pdev); + printk(XENLOG_DEBUG "PCI remove device %02x:%02x.%x\n", bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); + break; + } + + write_unlock(&pcidevs_lock); + return ret; +} + static void dump_pci_devices(unsigned char ch) { struct pci_dev *pdev; diff -r 795e85588ded xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Fri Jul 04 16:27:50 2008 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.c Fri Jul 04 16:31:03 2008 +0100 @@ -1223,13 +1223,15 @@ switch ( type ) { case DEV_TYPE_PCIe_BRIDGE: - break; - case DEV_TYPE_PCI_BRIDGE: sec_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), PCI_SECONDARY_BUS); sub_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), PCI_SUBORDINATE_BUS); + /*dmar_scope_add_buses(&drhd->scope, sec_bus, sub_bus);*/ + + if ( type == DEV_TYPE_PCIe_BRIDGE ) + break; for ( sub_bus &= 0xff; sec_bus <= sub_bus; sec_bus++ ) { @@ -1308,6 +1310,7 @@ static int domain_context_unmap(u8 bus, u8 devfn) { struct acpi_drhd_unit *drhd; + u16 sec_bus, sub_bus; int ret = 0; u32 type; @@ -1319,10 +1322,14 @@ switch ( type ) { case DEV_TYPE_PCIe_BRIDGE: - break; - case DEV_TYPE_PCI_BRIDGE: - ret = domain_context_unmap_one(drhd->iommu, bus, devfn); + sec_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + PCI_SECONDARY_BUS); + sub_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + PCI_SUBORDINATE_BUS); + /*dmar_scope_remove_buses(&drhd->scope, sec_bus, sub_bus);*/ + if ( DEV_TYPE_PCI_BRIDGE ) + ret = domain_context_unmap_one(drhd->iommu, bus, devfn); break; case DEV_TYPE_PCIe_ENDPOINT: @@ -1574,11 +1581,23 @@ return ret; } +static int intel_iommu_add_device(struct pci_dev *pdev) +{ + if ( !pdev->domain ) + return -EINVAL; + return domain_context_mapping(pdev->domain, pdev->bus, pdev->devfn); +} + +static int intel_iommu_remove_device(struct pci_dev *pdev) +{ + return domain_context_unmap(pdev->bus, pdev->devfn); +} + static void setup_dom0_devices(struct domain *d) { struct hvm_iommu *hd; struct pci_dev *pdev; - int bus, dev, func, ret; + int bus, dev, func; u32 l; hd = domain_hvm_iommu(d); @@ -1599,11 +1618,7 @@ pdev = alloc_pdev(bus, PCI_DEVFN(dev, func)); pdev->domain = d; list_add(&pdev->domain_list, &d->arch.pdev_list); - - ret = domain_context_mapping(d, pdev->bus, pdev->devfn); - if ( ret != 0 ) - gdprintk(XENLOG_ERR VTDPREFIX, - "domain_context_mapping failed\n"); + domain_context_mapping(d, pdev->bus, pdev->devfn); } } } @@ -1866,6 +1881,8 @@ struct iommu_ops intel_iommu_ops = { .init = intel_iommu_domain_init, + .add_device = intel_iommu_add_device, + .remove_device = intel_iommu_remove_device, .assign_device = intel_iommu_assign_device, .teardown = iommu_domain_teardown, .map_page = intel_iommu_map_page, diff -r 795e85588ded xen/include/public/physdev.h --- a/xen/include/public/physdev.h Fri Jul 04 16:27:50 2008 +0100 +++ b/xen/include/public/physdev.h Fri Jul 04 16:31:03 2008 +0100 @@ -154,6 +154,17 @@ typedef struct physdev_unmap_pirq physdev_unmap_pirq_t; DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t); +#define PHYSDEVOP_manage_pci_add 15 +#define PHYSDEVOP_manage_pci_remove 16 +struct physdev_manage_pci { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; + +typedef struct physdev_manage_pci physdev_manage_pci_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t); + /* * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() * hypercall since 0x00030202. diff -r 795e85588ded xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Fri Jul 04 16:27:50 2008 +0100 +++ b/xen/include/xen/iommu.h Fri Jul 04 16:31:03 2008 +0100 @@ -56,8 +56,8 @@ struct intel_iommu *intel; }; -int iommu_add_device(u8 bus, u8 devfn); -void iommu_remove_device(u8 bus, u8 devfn); +int iommu_add_device(struct pci_dev *pdev); +int iommu_remove_device(struct pci_dev *pdev); int iommu_domain_init(struct domain *d); void iommu_domain_destroy(struct domain *d); int device_assigned(u8 bus, u8 devfn); @@ -94,6 +94,8 @@ struct iommu_ops { int (*init)(struct domain *d); + int (*add_device)(struct pci_dev *pdev); + int (*remove_device)(struct pci_dev *pdev); int (*assign_device)(struct domain *d, u8 bus, u8 devfn); void (*teardown)(struct domain *d); int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn); diff -r 795e85588ded xen/include/xen/pci.h --- a/xen/include/xen/pci.h Fri Jul 04 16:27:50 2008 +0100 +++ b/xen/include/xen/pci.h Fri Jul 04 16:31:03 2008 +0100 @@ -56,6 +56,8 @@ struct pci_dev *pci_lock_pdev(int bus, int devfn); struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn); +int pci_add_device(u8 bus, u8 devfn); +int pci_remove_device(u8 bus, u8 devfn); uint8_t pci_conf_read8( unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Espen Skoglund
2008-Jul-04 16:39 UTC
[Xen-devel] [PATCH 5/7] PCI device register/unregister + pci_dev cleanups
Add hypercall for adding and removing PCI devices Signed-off-by: Joshua LeVasseur <joshua.levasseur@netronome.com> -- physdev.h | 11 +++++++++++ 1 file changed, 11 insertions(+) -- --- a/include/xen/interface/physdev.h Fri Jun 27 16:07:56 2008 +0100 +++ b/include/xen/interface/physdev.h Thu Jul 03 18:02:39 2008 -0400 @@ -154,6 +154,17 @@ typedef struct physdev_unmap_pirq physdev_unmap_pirq_t; DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t); +#define PHYSDEVOP_manage_pci_add 15 +#define PHYSDEVOP_manage_pci_remove 16 +struct physdev_manage_pci { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; + +typedef struct physdev_manage_pci physdev_manage_pci_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t); + /* * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() * hypercall since 0x00030202. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Espen Skoglund
2008-Jul-04 16:40 UTC
[Xen-devel] [PATCH 6a/7] PCI device register/unregister + pci_dev cleanups
Option 1: Hook Linux''s PCI probe and remove callbacks Hijack the pci_bus_type probe and remove callbacks. This option only requires modification to the Xen specific part of Linux. Signed-off-by: Joshua LeVasseur <joshua.levasseur@netronome.com> -- b/drivers/xen/core/pci.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/xen/core/Makefile | 1 2 files changed, 62 insertions(+) -- --- a/drivers/xen/core/Makefile Fri Jul 04 14:52:40 2008 +0100 +++ b/drivers/xen/core/Makefile Fri Jul 04 14:54:57 2008 +0100 @@ -4,6 +4,7 @@ obj-y := evtchn.o gnttab.o features.o reboot.o machine_reboot.o firmware.o +obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_PROC_FS) += xen_proc.o obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor_sysfs.o obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o diff -r 535aecec5599 drivers/xen/core/pci.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drivers/xen/core/pci.c Fri Jul 04 14:54:57 2008 +0100 @@ -0,0 +1,61 @@ +/* + * vim:shiftwidth=8:noexpandtab + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <xen/interface/physdev.h> + +static int (*pci_bus_probe)(struct device *dev); +static int (*pci_bus_remove)(struct device *dev); + +static int pci_bus_probe_wrapper(struct device *dev) +{ + int r; + struct pci_dev *pci_dev = to_pci_dev(dev); + struct physdev_manage_pci manage_pci; + manage_pci.bus = pci_dev->bus->number; + manage_pci.devfn = pci_dev->devfn; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, &manage_pci); + if (r) + return r; + + r = pci_bus_probe(dev); + if (r) + HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, &manage_pci); + + return r; +} + +static int pci_bus_remove_wrapper(struct device *dev) +{ + int r; + struct pci_dev *pci_dev = to_pci_dev(dev); + struct physdev_manage_pci manage_pci; + manage_pci.bus = pci_dev->bus->number; + manage_pci.devfn = pci_dev->devfn; + + r = pci_bus_remove(dev); + /* dev and pci_dev are no longer valid!! */ + + HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, &manage_pci); + return r; +} + +static int __init hook_pci_bus(void) +{ + if (!is_running_on_xen() || !is_initial_xendomain()) + return 0; + + pci_bus_probe = pci_bus_type.probe; + pci_bus_type.probe = pci_bus_probe_wrapper; + + pci_bus_remove = pci_bus_type.remove; + pci_bus_type.remove = pci_bus_remove_wrapper; + + return 0; +} + +core_initcall(hook_pci_bus); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Espen Skoglund
2008-Jul-04 16:41 UTC
[Xen-devel] [PATCH 6b/7] PCI device register/unregister + pci_dev cleanups
Option 2: Add PCI device add/remove guards to Linux Before calling the device probe function and after calling the device remove function invoke guard callbacks if so registered. This option may be viewed as cleaner. It also proves helpful if someone wants to implement coarse grained IOMMU protection in native Linux. Signed-off-by: Espen Skoglund <espen.skoglund@netronome.com> -- b/drivers/xen/core/pci_guard.c | 47 +++++++++++++++++++++++++++++++++++++++++ drivers/pci/pci-driver.c | 11 +++++++-- drivers/xen/core/Makefile | 1 include/linux/pci.h | 23 ++++++++++++++++++++ 4 files changed, 80 insertions(+), 2 deletions(-) -- diff -r ef74eb78b86c drivers/pci/pci-driver.c --- a/drivers/pci/pci-driver.c Fri Jul 04 14:54:57 2008 +0100 +++ b/drivers/pci/pci-driver.c Fri Jul 04 14:57:12 2008 +0100 @@ -16,6 +16,8 @@ /* * Registration of PCI drivers and handling of hot-pluggable devices. */ + +struct pci_dev_guard *pci_dev_guard = NULL; /* * Dynamic device IDs are disabled for !CONFIG_HOTPLUG @@ -233,9 +235,13 @@ drv = to_pci_driver(dev->driver); pci_dev = to_pci_dev(dev); pci_dev_get(pci_dev); - error = __pci_device_probe(drv, pci_dev); - if (error) + error = pci_dev_guard_enable(pci_dev); + if (!error) + error = __pci_device_probe(drv, pci_dev); + if (error) { + pci_dev_guard_disable(pci_dev); pci_dev_put(pci_dev); + } return error; } @@ -260,6 +266,7 @@ * horrible the crap we have to deal with is when we are awake... */ + pci_dev_guard_disable(pci_dev); pci_dev_put(pci_dev); return 0; } diff -r ef74eb78b86c drivers/xen/core/Makefile --- a/drivers/xen/core/Makefile Fri Jul 04 14:54:57 2008 +0100 +++ b/drivers/xen/core/Makefile Fri Jul 04 14:57:12 2008 +0100 @@ -12,3 +12,4 @@ obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o obj-$(CONFIG_KEXEC) += machine_kexec.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o +obj-$(CONFIG_PCI) += pci_guard.o diff -r ef74eb78b86c drivers/xen/core/pci_guard.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drivers/xen/core/pci_guard.c Fri Jul 04 14:57:12 2008 +0100 @@ -0,0 +1,47 @@ +/* + * vim:shiftwidth=8:noexpandtab + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <xen/interface/physdev.h> + +static int xen_pci_dev_guard_enable(struct pci_dev *pci_dev) +{ + struct physdev_manage_pci manage_pci; + manage_pci.bus = pci_dev->bus->number; + manage_pci.devfn = pci_dev->devfn; + + return HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, &manage_pci); +} + +static int xen_pci_dev_guard_disable(struct pci_dev *pci_dev) +{ + struct physdev_manage_pci manage_pci; + manage_pci.bus = pci_dev->bus->number; + manage_pci.devfn = pci_dev->devfn; + + return HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, &manage_pci); +} + +static struct pci_dev_guard xen_pci_dev_guard = { + .enable = xen_pci_dev_guard_enable, + .disable = xen_pci_dev_guard_disable, +}; + +static int __init init_pci_guard(void) +{ + if (!is_running_on_xen() || !is_initial_xendomain()) + return 0; + + if (pci_dev_guard) { + printk(KERN_ERR "Can''t use pci_dev_guard\n"); + return 0; + } + + pci_dev_guard = &xen_pci_dev_guard; + return 0; +} +core_initcall(init_pci_guard); + diff -r ef74eb78b86c include/linux/pci.h --- a/include/linux/pci.h Fri Jul 04 14:54:57 2008 +0100 +++ b/include/linux/pci.h Fri Jul 04 14:57:12 2008 +0100 @@ -338,6 +338,29 @@ /* Device driver may resume normal operations */ void (*resume)(struct pci_dev *dev); }; + +/* ---------------------------------------------------------------- */ + +struct pci_dev_guard { + int (*enable)(struct pci_dev *dev); + int (*disable)(struct pci_dev *dev); +}; + +extern struct pci_dev_guard *pci_dev_guard; + +static inline int pci_dev_guard_enable(struct pci_dev *pci_dev) +{ + if (pci_dev_guard && pci_dev_guard->enable) + return pci_dev_guard->enable(pci_dev); + return 0; +} + +static inline int pci_dev_guard_disable(struct pci_dev *pci_dev) +{ + if (pci_dev_guard && pci_dev_guard->disable) + return pci_dev_guard->disable(pci_dev); + return 0; +} /* ---------------------------------------------------------------- */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Espen Skoglund
2008-Jul-04 16:43 UTC
[Xen-devel] [PATCH 4/7] PCI device register/unregister + pci_dev cleanups
Remove PCI device enumaration in VT-d code Signed-off-by: Espen Skoglund <espen.skoglund@netronome.com> -- iommu.c | 60 ++++++------------------------------------------------------ 1 file changed, 6 insertions(+), 54 deletions(-) -- diff -r 82f38dc16ce2 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Thu Jul 03 22:46:26 2008 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.c Fri Jul 04 15:57:53 2008 +0100 @@ -40,9 +40,6 @@ static spinlock_t domid_bitmap_lock; /* protect domain id bitmap */ static int domid_bitmap_size; /* domain id bitmap size in bits */ static unsigned long *domid_bitmap; /* iommu domain id bitmap */ - -static void setup_dom0_devices(struct domain *d); -static void setup_dom0_rmrr(struct domain *d); #define DID_FIELD_WIDTH 16 #define DID_HIGH_OFFSET 8 @@ -1045,10 +1042,6 @@ iommu_map_page(d, i, i); } - - setup_dom0_devices(d); - setup_dom0_rmrr(d); - iommu_flush_all(); for_each_drhd_unit ( drhd ) @@ -1333,12 +1326,18 @@ break; case DEV_TYPE_PCIe_ENDPOINT: + gdprintk(XENLOG_INFO VTDPREFIX, "domain_context_unmap:PCIe: " + "bdf = %x:%x.%x\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); ret = domain_context_unmap_one(drhd->iommu, bus, devfn); break; case DEV_TYPE_PCI: if ( find_pcie_endpoint(&bus, &devfn) ) + { + gdprintk(XENLOG_INFO VTDPREFIX, "domain_context_unmap:PCI: " + "bdf = %x:%x.%x\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); ret = domain_context_unmap_one(drhd->iommu, bus, devfn); + } break; default: @@ -1593,38 +1592,6 @@ return domain_context_unmap(pdev->bus, pdev->devfn); } -static void setup_dom0_devices(struct domain *d) -{ - struct hvm_iommu *hd; - struct pci_dev *pdev; - int bus, dev, func; - u32 l; - - hd = domain_hvm_iommu(d); - - write_lock(&pcidevs_lock); - for ( bus = 0; bus < 256; bus++ ) - { - for ( dev = 0; dev < 32; dev++ ) - { - for ( func = 0; func < 8; func++ ) - { - l = pci_conf_read32(bus, dev, func, PCI_VENDOR_ID); - /* some broken boards return 0 or ~0 if a slot is empty: */ - if ( (l == 0xffffffff) || (l == 0x00000000) || - (l == 0x0000ffff) || (l == 0xffff0000) ) - continue; - - pdev = alloc_pdev(bus, PCI_DEVFN(dev, func)); - pdev->domain = d; - list_add(&pdev->domain_list, &d->arch.pdev_list); - domain_context_mapping(d, pdev->bus, pdev->devfn); - } - } - } - write_unlock(&pcidevs_lock); -} - void clear_fault_bits(struct iommu *iommu) { u64 val; @@ -1687,21 +1654,6 @@ } return 0; -} - -static void setup_dom0_rmrr(struct domain *d) -{ - struct acpi_rmrr_unit *rmrr; - u16 bdf; - int ret, i; - - for_each_rmrr_device ( rmrr, bdf, i ) - { - ret = iommu_prepare_rmrr_dev(d, rmrr, PCI_BUS(bdf), PCI_DEVFN2(bdf)); - if ( ret ) - gdprintk(XENLOG_ERR VTDPREFIX, - "IOMMU: mapping reserved region failed\n"); - } } int intel_vtd_setup(void) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel