While I''m unaware of devices making use of this functionality in proper ways, the goal of this patch set is to leverage the enabling of the specified behavior as a workaround for devices that behave as if they made use of this functionality _without_ advertising so in the PCIe capability structure. While it would have been possible to leave the generic IOMMU code untouched, and deal with the creation of the necessary device context entries in the individual IOMMUs'' implementations, I felt that it was cleaner to have as much of the necessary abstraction in the generic layer. The adjustments in particular imply that for the relevant operations, (PCI-dev, devfn) tuples get passed, with the PCI device referring to the real device and devfn representing either the real device or the phantom function. Consequently, for any operation intended to deal with the real device, the devfn of the device itself must be used, whereas for anything targeting the phantom function the passed in value is the correct one to pass on. 1: IOMMU: adjust (re)assign operation parameters 2: IOMMU: adjust add/remove operation parameters 3: VT-d: adjust context map/unmap parameters 4: AMD IOMMU: adjust flush function parameters 5: IOMMU: consolidate pdev_type() and cache its result for a given device 6: IOMMU: add phantom function support 7: VT-d: relax source qualifier for MSI of phantom functions 8: IOMMU: add option to specify devices behaving like ones using phantom functions The patch set meanwhile got tested on the affected systems. Signed-off-by: Jan Beulich <jbeulich@suse.com>
Jan Beulich
2012-Dec-06 14:10 UTC
[PATCH 1/8] IOMMU: adjust (re)assign operation parameters
... to use a (struct pci_dev *, devfn) pair. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -332,34 +332,31 @@ void amd_iommu_disable_domain_device(str disable_ats_device(iommu->seg, bus, devfn); } -static int reassign_device( struct domain *source, struct domain *target, - u16 seg, u8 bus, u8 devfn) +static int reassign_device(struct domain *source, struct domain *target, + u8 devfn, struct pci_dev *pdev) { - struct pci_dev *pdev; struct amd_iommu *iommu; int bdf; struct hvm_iommu *t = domain_hvm_iommu(target); - ASSERT(spin_is_locked(&pcidevs_lock)); - pdev = pci_get_pdev_by_domain(source, seg, bus, devfn); - if ( !pdev ) - return -ENODEV; - - bdf = PCI_BDF2(bus, devfn); - iommu = find_iommu_for_device(seg, bdf); + bdf = PCI_BDF2(pdev->bus, pdev->devfn); + iommu = find_iommu_for_device(pdev->seg, bdf); if ( !iommu ) { AMD_IOMMU_DEBUG("Fail to find iommu." " %04x:%02x:%x02.%x cannot be assigned to dom%d\n", - seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id); return -ENODEV; } amd_iommu_disable_domain_device(source, iommu, bdf); - list_move(&pdev->domain_list, &target->arch.pdev_list); - pdev->domain = target; + if ( devfn == pdev->devfn ) + { + list_move(&pdev->domain_list, &target->arch.pdev_list); + pdev->domain = target; + } /* IO page tables might be destroyed after pci-detach the last device * In this case, we have to re-allocate root table for next pci-attach.*/ @@ -368,17 +365,18 @@ static int reassign_device( struct domai amd_iommu_setup_domain_device(target, iommu, bdf); AMD_IOMMU_DEBUG("Re-assign %04x:%02x:%02x.%u from dom%d to dom%d\n", - seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), source->domain_id, target->domain_id); return 0; } -static int amd_iommu_assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn) +static int amd_iommu_assign_device(struct domain *d, u8 devfn, + struct pci_dev *pdev) { - struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg); - int bdf = PCI_BDF2(bus, devfn); - int req_id = get_dma_requestor_id(seg, bdf); + struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(pdev->seg); + int bdf = PCI_BDF2(pdev->bus, devfn); + int req_id = get_dma_requestor_id(pdev->seg, bdf); if ( ivrs_mappings[req_id].unity_map_enable ) { @@ -390,7 +388,7 @@ static int amd_iommu_assign_device(struc ivrs_mappings[req_id].read_permission); } - return reassign_device(dom0, d, seg, bus, devfn); + return reassign_device(dom0, d, devfn, pdev); } static void deallocate_next_page_table(struct page_info* pg, int level) @@ -451,12 +449,6 @@ static void amd_iommu_domain_destroy(str amd_iommu_flush_all_pages(d); } -static int amd_iommu_return_device( - struct domain *s, struct domain *t, u16 seg, u8 bus, u8 devfn) -{ - return reassign_device(s, t, seg, bus, devfn); -} - static int amd_iommu_add_device(struct pci_dev *pdev) { struct amd_iommu *iommu; @@ -593,7 +585,7 @@ const struct iommu_ops amd_iommu_ops = { .teardown = amd_iommu_domain_destroy, .map_page = amd_iommu_map_page, .unmap_page = amd_iommu_unmap_page, - .reassign_device = amd_iommu_return_device, + .reassign_device = reassign_device, .get_device_group_id = amd_iommu_group_id, .update_ire_from_apic = amd_iommu_ioapic_update_ire, .update_ire_from_msi = amd_iommu_msi_msg_update_ire, --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -233,11 +233,16 @@ static int assign_device(struct domain * return -EXDEV; spin_lock(&pcidevs_lock); - pdev = pci_get_pdev(seg, bus, devfn); - if ( pdev ) - pdev->fault.count = 0; + pdev = pci_get_pdev_by_domain(dom0, seg, bus, devfn); + if ( !pdev ) + { + rc = pci_get_pdev(seg, bus, devfn) ? -EBUSY : -ENODEV; + goto done; + } + + pdev->fault.count = 0; - if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn)) ) + if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) ) goto done; if ( has_arch_pdevs(d) && !need_iommu(d) ) @@ -368,18 +373,11 @@ int deassign_device(struct domain *d, u1 return -EINVAL; ASSERT(spin_is_locked(&pcidevs_lock)); - pdev = pci_get_pdev(seg, bus, devfn); + pdev = pci_get_pdev_by_domain(d, seg, bus, devfn); if ( !pdev ) return -ENODEV; - if ( pdev->domain != d ) - { - dprintk(XENLOG_G_ERR, - "d%d: deassign a device not owned\n", d->domain_id); - return -EINVAL; - } - - ret = hd->platform_ops->reassign_device(d, dom0, seg, bus, devfn); + ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev); if ( ret ) { dprintk(XENLOG_G_ERR, --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -1658,17 +1658,10 @@ out: static int reassign_device_ownership( struct domain *source, struct domain *target, - u16 seg, u8 bus, u8 devfn) + u8 devfn, struct pci_dev *pdev) { - struct pci_dev *pdev; int ret; - ASSERT(spin_is_locked(&pcidevs_lock)); - pdev = pci_get_pdev_by_domain(source, seg, bus, devfn); - - if (!pdev) - return -ENODEV; - /* * Devices assigned to untrusted domains (here assumed to be any domU) * can attempt to send arbitrary LAPIC/MSI messages. We are unprotected @@ -1677,16 +1670,19 @@ static int reassign_device_ownership( if ( (target != dom0) && !iommu_intremap ) untrusted_msi = 1; - ret = domain_context_unmap(source, seg, bus, devfn); + ret = domain_context_unmap(source, pdev->seg, pdev->bus, devfn); if ( ret ) return ret; - ret = domain_context_mapping(target, seg, bus, devfn); + ret = domain_context_mapping(target, pdev->seg, pdev->bus, devfn); if ( ret ) return ret; - list_move(&pdev->domain_list, &target->arch.pdev_list); - pdev->domain = target; + if ( devfn == pdev->devfn ) + { + list_move(&pdev->domain_list, &target->arch.pdev_list); + pdev->domain = target; + } return ret; } @@ -2202,36 +2198,26 @@ int __init intel_vtd_setup(void) } static int intel_iommu_assign_device( - struct domain *d, u16 seg, u8 bus, u8 devfn) + struct domain *d, u8 devfn, struct pci_dev *pdev) { struct acpi_rmrr_unit *rmrr; int ret = 0, i; - struct pci_dev *pdev; - u16 bdf; + u16 bdf, seg; + u8 bus; if ( list_empty(&acpi_drhd_units) ) return -ENODEV; - ASSERT(spin_is_locked(&pcidevs_lock)); - pdev = pci_get_pdev(seg, bus, devfn); - if (!pdev) - return -ENODEV; - - if (pdev->domain != dom0) - { - dprintk(XENLOG_ERR VTDPREFIX, - "IOMMU: assign a assigned device\n"); - return -EBUSY; - } - - ret = reassign_device_ownership(dom0, d, seg, bus, devfn); + ret = reassign_device_ownership(dom0, d, devfn, pdev); if ( ret ) goto done; /* FIXME: Because USB RMRR conflicts with guest bios region, * ignore USB RMRR temporarily. */ - if ( is_usb_device(seg, bus, devfn) ) + seg = pdev->seg; + bus = pdev->bus; + if ( is_usb_device(seg, bus, pdev->devfn) ) { ret = 0; goto done; --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -97,13 +97,13 @@ struct iommu_ops { int (*add_device)(struct pci_dev *pdev); int (*enable_device)(struct pci_dev *pdev); int (*remove_device)(struct pci_dev *pdev); - int (*assign_device)(struct domain *d, u16 seg, u8 bus, u8 devfn); + int (*assign_device)(struct domain *, u8 devfn, struct pci_dev *); void (*teardown)(struct domain *d); int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn, unsigned int flags); int (*unmap_page)(struct domain *d, unsigned long gfn); int (*reassign_device)(struct domain *s, struct domain *t, - u16 seg, u8 bus, u8 devfn); + u8 devfn, struct pci_dev *); int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn); void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned int value); void (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2012-Dec-06 14:11 UTC
[PATCH 2/8] IOMMU: adjust add/remove operation parameters
... to use a (struct pci_dev *, devfn) pair. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -83,14 +83,14 @@ static void disable_translation(u32 *dte } static void amd_iommu_setup_domain_device( - struct domain *domain, struct amd_iommu *iommu, int bdf) + struct domain *domain, struct amd_iommu *iommu, + u8 devfn, struct pci_dev *pdev) { void *dte; unsigned long flags; int req_id, valid = 1; int dte_i = 0; - u8 bus = PCI_BUS(bdf); - u8 devfn = PCI_DEVFN2(bdf); + u8 bus = pdev->bus; struct hvm_iommu *hd = domain_hvm_iommu(domain); @@ -103,7 +103,7 @@ static void amd_iommu_setup_domain_devic dte_i = 1; /* get device-table entry */ - req_id = get_dma_requestor_id(iommu->seg, bdf); + req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn)); dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE); spin_lock_irqsave(&iommu->lock, flags); @@ -115,7 +115,7 @@ static void amd_iommu_setup_domain_devic (u32 *)dte, page_to_maddr(hd->root_table), hd->domain_id, hd->paging_mode, valid); - if ( pci_ats_device(iommu->seg, bus, devfn) && + if ( pci_ats_device(iommu->seg, bus, pdev->devfn) && iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) ) iommu_dte_set_iotlb((u32 *)dte, dte_i); @@ -132,32 +132,31 @@ static void amd_iommu_setup_domain_devic ASSERT(spin_is_locked(&pcidevs_lock)); - if ( pci_ats_device(iommu->seg, bus, devfn) && - !pci_ats_enabled(iommu->seg, bus, devfn) ) + if ( pci_ats_device(iommu->seg, bus, pdev->devfn) && + !pci_ats_enabled(iommu->seg, bus, pdev->devfn) ) { - struct pci_dev *pdev; + if ( devfn == pdev->devfn ) + enable_ats_device(iommu->seg, bus, devfn); - enable_ats_device(iommu->seg, bus, devfn); - - ASSERT(spin_is_locked(&pcidevs_lock)); - pdev = pci_get_pdev(iommu->seg, bus, devfn); - - ASSERT( pdev != NULL ); amd_iommu_flush_iotlb(pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0); } } -static void __init amd_iommu_setup_dom0_device(struct pci_dev *pdev) +static int __init amd_iommu_setup_dom0_device(u8 devfn, struct pci_dev *pdev) { int bdf = PCI_BDF2(pdev->bus, pdev->devfn); struct amd_iommu *iommu = find_iommu_for_device(pdev->seg, bdf); - if ( likely(iommu != NULL) ) - amd_iommu_setup_domain_device(pdev->domain, iommu, bdf); - else + if ( unlikely(!iommu) ) + { AMD_IOMMU_DEBUG("No iommu for device %04x:%02x:%02x.%u\n", pdev->seg, pdev->bus, - PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + PCI_SLOT(devfn), PCI_FUNC(devfn)); + return -ENODEV; + } + + amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev); + return 0; } int __init amd_iov_detect(void) @@ -295,16 +294,16 @@ static void __init amd_iommu_dom0_init(s } void amd_iommu_disable_domain_device(struct domain *domain, - struct amd_iommu *iommu, int bdf) + struct amd_iommu *iommu, + u8 devfn, struct pci_dev *pdev) { void *dte; unsigned long flags; int req_id; - u8 bus = PCI_BUS(bdf); - u8 devfn = PCI_DEVFN2(bdf); + u8 bus = pdev->bus; BUG_ON ( iommu->dev_table.buffer == NULL ); - req_id = get_dma_requestor_id(iommu->seg, bdf); + req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn)); dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE); spin_lock_irqsave(&iommu->lock, flags); @@ -312,7 +311,7 @@ void amd_iommu_disable_domain_device(str { disable_translation((u32 *)dte); - if ( pci_ats_device(iommu->seg, bus, devfn) && + if ( pci_ats_device(iommu->seg, bus, pdev->devfn) && iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) ) iommu_dte_set_iotlb((u32 *)dte, 0); @@ -327,7 +326,8 @@ void amd_iommu_disable_domain_device(str ASSERT(spin_is_locked(&pcidevs_lock)); - if ( pci_ats_device(iommu->seg, bus, devfn) && + if ( devfn == pdev->devfn && + pci_ats_device(iommu->seg, bus, devfn) && pci_ats_enabled(iommu->seg, bus, devfn) ) disable_ats_device(iommu->seg, bus, devfn); } @@ -350,7 +350,7 @@ static int reassign_device(struct domain return -ENODEV; } - amd_iommu_disable_domain_device(source, iommu, bdf); + amd_iommu_disable_domain_device(source, iommu, devfn, pdev); if ( devfn == pdev->devfn ) { @@ -363,7 +363,7 @@ static int reassign_device(struct domain if ( t->root_table == NULL ) allocate_domain_resources(t); - amd_iommu_setup_domain_device(target, iommu, bdf); + amd_iommu_setup_domain_device(target, iommu, devfn, pdev); AMD_IOMMU_DEBUG("Re-assign %04x:%02x:%02x.%u from dom%d to dom%d\n", pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), source->domain_id, target->domain_id); @@ -449,7 +449,7 @@ static void amd_iommu_domain_destroy(str amd_iommu_flush_all_pages(d); } -static int amd_iommu_add_device(struct pci_dev *pdev) +static int amd_iommu_add_device(u8 devfn, struct pci_dev *pdev) { struct amd_iommu *iommu; u16 bdf; @@ -462,16 +462,16 @@ static int amd_iommu_add_device(struct p { AMD_IOMMU_DEBUG("Fail to find iommu." " %04x:%02x:%02x.%u cannot be assigned to dom%d\n", - pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn), pdev->domain->domain_id); + pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + pdev->domain->domain_id); return -ENODEV; } - amd_iommu_setup_domain_device(pdev->domain, iommu, bdf); + amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev); return 0; } -static int amd_iommu_remove_device(struct pci_dev *pdev) +static int amd_iommu_remove_device(u8 devfn, struct pci_dev *pdev) { struct amd_iommu *iommu; u16 bdf; @@ -484,12 +484,12 @@ static int amd_iommu_remove_device(struc { AMD_IOMMU_DEBUG("Fail to find iommu." " %04x:%02x:%02x.%u cannot be removed from dom%d\n", - pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn), pdev->domain->domain_id); + pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + pdev->domain->domain_id); return -ENODEV; } - amd_iommu_disable_domain_device(pdev->domain, iommu, bdf); + amd_iommu_disable_domain_device(pdev->domain, iommu, devfn, pdev); return 0; } --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -168,7 +168,7 @@ int iommu_add_device(struct pci_dev *pde if ( !iommu_enabled || !hd->platform_ops ) return 0; - return hd->platform_ops->add_device(pdev); + return hd->platform_ops->add_device(pdev->devfn, pdev); } int iommu_enable_device(struct pci_dev *pdev) @@ -198,7 +198,7 @@ int iommu_remove_device(struct pci_dev * if ( !iommu_enabled || !hd->platform_ops ) return 0; - return hd->platform_ops->remove_device(pdev); + return hd->platform_ops->remove_device(pdev->devfn, pdev); } /* --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -743,7 +743,7 @@ int __init scan_pci_devices(void) struct setup_dom0 { struct domain *d; - void (*handler)(struct pci_dev *); + int (*handler)(u8 devfn, struct pci_dev *); }; static int __init _setup_dom0_pci_devices(struct pci_seg *pseg, void *arg) @@ -764,12 +764,12 @@ static int __init _setup_dom0_pci_device { pdev->domain = ctxt->d; list_add(&pdev->domain_list, &ctxt->d->arch.pdev_list); - ctxt->handler(pdev); + ctxt->handler(devfn, pdev); } else if ( pdev->domain == dom_xen ) { pdev->domain = ctxt->d; - ctxt->handler(pdev); + ctxt->handler(devfn, pdev); pdev->domain = dom_xen; } else if ( pdev->domain != ctxt->d ) @@ -783,7 +783,7 @@ static int __init _setup_dom0_pci_device } void __init setup_dom0_pci_devices( - struct domain *d, void (*handler)(struct pci_dev *)) + struct domain *d, int (*handler)(u8 devfn, struct pci_dev *)) { struct setup_dom0 ctxt = { .d = d, .handler = handler }; --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -50,7 +50,7 @@ int nr_iommus; static struct tasklet vtd_fault_tasklet; -static void setup_dom0_device(struct pci_dev *); +static int setup_dom0_device(u8 devfn, struct pci_dev *); static void setup_dom0_rmrr(struct domain *d); static int domain_iommu_domid(struct domain *d, @@ -1873,7 +1873,7 @@ static int rmrr_identity_mapping(struct return 0; } -static int intel_iommu_add_device(struct pci_dev *pdev) +static int intel_iommu_add_device(u8 devfn, struct pci_dev *pdev) { struct acpi_rmrr_unit *rmrr; u16 bdf; @@ -1884,8 +1884,7 @@ static int intel_iommu_add_device(struct if ( !pdev->domain ) return -EINVAL; - ret = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, - pdev->devfn); + ret = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, devfn); if ( ret ) { dprintk(XENLOG_ERR VTDPREFIX, "d%d: context mapping failed\n", @@ -1897,7 +1896,7 @@ static int intel_iommu_add_device(struct { if ( rmrr->segment == pdev->seg && PCI_BUS(bdf) == pdev->bus && - PCI_DEVFN2(bdf) == pdev->devfn ) + PCI_DEVFN2(bdf) == devfn ) { ret = rmrr_identity_mapping(pdev->domain, rmrr); if ( ret ) @@ -1922,7 +1921,7 @@ static int intel_iommu_enable_device(str return ret >= 0 ? 0 : ret; } -static int intel_iommu_remove_device(struct pci_dev *pdev) +static int intel_iommu_remove_device(u8 devfn, struct pci_dev *pdev) { struct acpi_rmrr_unit *rmrr; u16 bdf; @@ -1940,19 +1939,22 @@ static int intel_iommu_remove_device(str { if ( rmrr->segment == pdev->seg && PCI_BUS(bdf) == pdev->bus && - PCI_DEVFN2(bdf) == pdev->devfn ) + PCI_DEVFN2(bdf) == devfn ) return 0; } } - return domain_context_unmap(pdev->domain, pdev->seg, pdev->bus, - pdev->devfn); + return domain_context_unmap(pdev->domain, pdev->seg, pdev->bus, devfn); } -static void __init setup_dom0_device(struct pci_dev *pdev) +static int __init setup_dom0_device(u8 devfn, struct pci_dev *pdev) { - domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, pdev->devfn); - pci_vtd_quirk(pdev); + int err; + + err = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, devfn); + if ( !err && devfn == pdev->devfn ) + pci_vtd_quirk(pdev); + return err; } void clear_fault_bits(struct iommu *iommu) --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -94,9 +94,9 @@ struct msi_msg; struct iommu_ops { int (*init)(struct domain *d); void (*dom0_init)(struct domain *d); - int (*add_device)(struct pci_dev *pdev); + int (*add_device)(u8 devfn, struct pci_dev *); int (*enable_device)(struct pci_dev *pdev); - int (*remove_device)(struct pci_dev *pdev); + int (*remove_device)(u8 devfn, struct pci_dev *); int (*assign_device)(struct domain *, u8 devfn, struct pci_dev *); void (*teardown)(struct domain *d); int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn, --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -100,7 +100,8 @@ struct pci_dev *pci_lock_pdev(int seg, i struct pci_dev *pci_lock_domain_pdev( struct domain *, int seg, int bus, int devfn); -void setup_dom0_pci_devices(struct domain *, void (*)(struct pci_dev *)); +void setup_dom0_pci_devices(struct domain *, + int (*)(u8 devfn, struct pci_dev *)); void pci_release_devices(struct domain *d); int pci_add_segment(u16 seg); const unsigned long *pci_get_ro_map(u16 seg); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
... to use a (struct pci_dev *, devfn) pair. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/drivers/passthrough/vtd/extern.h +++ b/xen/drivers/passthrough/vtd/extern.h @@ -79,7 +79,7 @@ void free_pgtable_maddr(u64 maddr); void *map_vtd_domain_page(u64 maddr); void unmap_vtd_domain_page(void *va); int domain_context_mapping_one(struct domain *domain, struct iommu *iommu, - u8 bus, u8 devfn); + u8 bus, u8 devfn, const struct pci_dev *); int domain_context_unmap_one(struct domain *domain, struct iommu *iommu, u8 bus, u8 devfn); --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -1275,7 +1275,7 @@ static void __init intel_iommu_dom0_init int domain_context_mapping_one( struct domain *domain, struct iommu *iommu, - u8 bus, u8 devfn) + u8 bus, u8 devfn, const struct pci_dev *pdev) { struct hvm_iommu *hd = domain_hvm_iommu(domain); struct context_entry *context, *context_entries; @@ -1292,11 +1292,9 @@ int domain_context_mapping_one( if ( context_present(*context) ) { int res = 0; - struct pci_dev *pdev = NULL; - /* First try to get domain ownership from device structure. If that''s + /* Try to get domain ownership from device structure. If that''s * not available, try to read it from the context itself. */ - pdev = pci_get_pdev(seg, bus, devfn); if ( pdev ) { if ( pdev->domain != domain ) @@ -1417,13 +1415,12 @@ int domain_context_mapping_one( } static int domain_context_mapping( - struct domain *domain, u16 seg, u8 bus, u8 devfn) + struct domain *domain, u8 devfn, const struct pci_dev *pdev) { struct acpi_drhd_unit *drhd; int ret = 0; u32 type; - u8 secbus; - struct pci_dev *pdev = pci_get_pdev(seg, bus, devfn); + u8 seg = pdev->seg, bus = pdev->bus, secbus; drhd = acpi_find_matched_drhd_unit(pdev); if ( !drhd ) @@ -1444,8 +1441,9 @@ static int domain_context_mapping( dprintk(VTDPREFIX, "d%d:PCIe: map %04x:%02x:%02x.%u\n", domain->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); - ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn); - if ( !ret && ats_device(pdev, drhd) > 0 ) + ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, + pdev); + if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 ) enable_ats_device(seg, bus, devfn); break; @@ -1456,14 +1454,16 @@ static int domain_context_mapping( domain->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); - ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn); + ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, + pdev); if ( ret ) break; if ( find_upstream_bridge(seg, &bus, &devfn, &secbus) < 1 ) break; - ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn); + ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, + pci_get_pdev(seg, bus, devfn)); /* * Devices behind PCIe-to-PCI/PCIx bridge may generate different @@ -1472,7 +1472,8 @@ static int domain_context_mapping( */ if ( !ret && pdev_type(seg, bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE && (secbus != pdev->bus || pdev->devfn != 0) ) - ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0); + ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0, + pci_get_pdev(seg, secbus, 0)); break; @@ -1545,18 +1546,15 @@ int domain_context_unmap_one( } static int domain_context_unmap( - struct domain *domain, u16 seg, u8 bus, u8 devfn) + struct domain *domain, u8 devfn, const struct pci_dev *pdev) { struct acpi_drhd_unit *drhd; struct iommu *iommu; int ret = 0; u32 type; - u8 tmp_bus, tmp_devfn, secbus; - struct pci_dev *pdev = pci_get_pdev(seg, bus, devfn); + u8 seg = pdev->seg, bus = pdev->bus, tmp_bus, tmp_devfn, secbus; int found = 0; - BUG_ON(!pdev); - drhd = acpi_find_matched_drhd_unit(pdev); if ( !drhd ) return -ENODEV; @@ -1576,7 +1574,7 @@ static int domain_context_unmap( domain->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); ret = domain_context_unmap_one(domain, iommu, bus, devfn); - if ( !ret && ats_device(pdev, drhd) > 0 ) + if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 ) disable_ats_device(seg, bus, devfn); break; @@ -1670,11 +1668,11 @@ static int reassign_device_ownership( if ( (target != dom0) && !iommu_intremap ) untrusted_msi = 1; - ret = domain_context_unmap(source, pdev->seg, pdev->bus, devfn); + ret = domain_context_unmap(source, devfn, pdev); if ( ret ) return ret; - ret = domain_context_mapping(target, pdev->seg, pdev->bus, devfn); + ret = domain_context_mapping(target, devfn, pdev); if ( ret ) return ret; @@ -1884,7 +1882,7 @@ static int intel_iommu_add_device(u8 dev if ( !pdev->domain ) return -EINVAL; - ret = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, devfn); + ret = domain_context_mapping(pdev->domain, devfn, pdev); if ( ret ) { dprintk(XENLOG_ERR VTDPREFIX, "d%d: context mapping failed\n", @@ -1944,14 +1942,14 @@ static int intel_iommu_remove_device(u8 } } - return domain_context_unmap(pdev->domain, pdev->seg, pdev->bus, devfn); + return domain_context_unmap(pdev->domain, devfn, pdev); } static int __init setup_dom0_device(u8 devfn, struct pci_dev *pdev) { int err; - err = domain_context_mapping(pdev->domain, pdev->seg, pdev->bus, devfn); + err = domain_context_mapping(pdev->domain, devfn, pdev); if ( !err && devfn == pdev->devfn ) pci_vtd_quirk(pdev); return err; --- a/xen/drivers/passthrough/vtd/quirks.c +++ b/xen/drivers/passthrough/vtd/quirks.c @@ -288,7 +288,7 @@ static void map_me_phantom_function(stru /* map or unmap ME phantom function */ if ( map ) domain_context_mapping_one(domain, drhd->iommu, 0, - PCI_DEVFN(dev, 7)); + PCI_DEVFN(dev, 7), NULL); else domain_context_unmap_one(domain, drhd->iommu, 0, PCI_DEVFN(dev, 7)); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
... to use a (struct pci_dev *, devfn) pair. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/drivers/passthrough/amd/iommu_cmd.c +++ b/xen/drivers/passthrough/amd/iommu_cmd.c @@ -287,12 +287,12 @@ void invalidate_iommu_all(struct amd_iom send_iommu_command(iommu, cmd); } -void amd_iommu_flush_iotlb(struct pci_dev *pdev, +void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev, uint64_t gaddr, unsigned int order) { unsigned long flags; struct amd_iommu *iommu; - unsigned int bdf, req_id, queueid, maxpend; + unsigned int req_id, queueid, maxpend; struct pci_ats_dev *ats_pdev; if ( !ats_enabled ) @@ -305,8 +305,8 @@ void amd_iommu_flush_iotlb(struct pci_de if ( !pci_ats_enabled(ats_pdev->seg, ats_pdev->bus, ats_pdev->devfn) ) return; - bdf = PCI_BDF2(ats_pdev->bus, ats_pdev->devfn); - iommu = find_iommu_for_device(ats_pdev->seg, bdf); + iommu = find_iommu_for_device(ats_pdev->seg, + PCI_BDF2(ats_pdev->bus, ats_pdev->devfn)); if ( !iommu ) { @@ -319,7 +319,7 @@ void amd_iommu_flush_iotlb(struct pci_de if ( !iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) ) return; - req_id = get_dma_requestor_id(iommu->seg, bdf); + req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(ats_pdev->bus, devfn)); queueid = req_id; maxpend = ats_pdev->ats_queue_depth & 0xff; @@ -339,7 +339,7 @@ static void amd_iommu_flush_all_iotlbs(s return; for_each_pdev( d, pdev ) - amd_iommu_flush_iotlb(pdev, gaddr, order); + amd_iommu_flush_iotlb(pdev->devfn, pdev, gaddr, order); } /* Flush iommu cache after p2m changes. */ --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -138,7 +138,7 @@ static void amd_iommu_setup_domain_devic if ( devfn == pdev->devfn ) enable_ats_device(iommu->seg, bus, devfn); - amd_iommu_flush_iotlb(pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0); + amd_iommu_flush_iotlb(devfn, pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0); } } --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h @@ -78,8 +78,8 @@ void iommu_dte_set_guest_cr3(u32 *dte, u void amd_iommu_flush_all_pages(struct domain *d); void amd_iommu_flush_pages(struct domain *d, unsigned long gfn, unsigned int order); -void amd_iommu_flush_iotlb(struct pci_dev *pdev, uint64_t gaddr, - unsigned int order); +void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev, + uint64_t gaddr, unsigned int order); void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf); void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf); void amd_iommu_flush_all_caches(struct amd_iommu *iommu); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2012-Dec-06 14:13 UTC
[PATCH 5/8] IOMMU/PCI: consolidate pdev_type() and cache its result for a given device
Add an "unknown" device types as well as one for PCI-to-PCIe bridges (the latter of which other IOMMU code with or without this patch doesn''t appear to handle properly). Make sure we don''t mistake a device for which we can''t access its config space as a legacy PCI device (after all we in fact don''t know how to deal with such a device, and hence shouldn''t try to). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -142,7 +142,7 @@ static struct pci_dev *alloc_pdev(struct spin_lock_init(&pdev->msix_table_lock); /* update bus2bridge */ - switch ( pdev_type(pseg->nr, bus, devfn) ) + switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) ) { u8 sec_bus, sub_bus; @@ -182,7 +182,7 @@ static struct pci_dev *alloc_pdev(struct static void free_pdev(struct pci_seg *pseg, struct pci_dev *pdev) { /* update bus2bridge */ - switch ( pdev_type(pseg->nr, pdev->bus, pdev->devfn) ) + switch ( pdev->type ) { u8 dev, func, sec_bus, sub_bus; @@ -200,6 +200,9 @@ static void free_pdev(struct pci_seg *ps pseg->bus2bridge[sec_bus] = pseg->bus2bridge[pdev->bus]; spin_unlock(&pseg->bus2bridge_lock); break; + + default: + break; } list_del(&pdev->alldevs_list); @@ -587,20 +590,30 @@ void pci_release_devices(struct domain * #define PCI_CLASS_BRIDGE_PCI 0x0604 -int pdev_type(u16 seg, u8 bus, u8 devfn) +enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn) { u16 class_device, creg; u8 d = PCI_SLOT(devfn), f = PCI_FUNC(devfn); int pos = pci_find_cap_offset(seg, bus, d, f, PCI_CAP_ID_EXP); class_device = pci_conf_read16(seg, bus, d, f, PCI_CLASS_DEVICE); - if ( class_device == PCI_CLASS_BRIDGE_PCI ) + switch ( class_device ) { + case PCI_CLASS_BRIDGE_PCI: if ( !pos ) return DEV_TYPE_LEGACY_PCI_BRIDGE; creg = pci_conf_read16(seg, bus, d, f, pos + PCI_EXP_FLAGS); - return ((creg & PCI_EXP_FLAGS_TYPE) >> 4) == PCI_EXP_TYPE_PCI_BRIDGE ? - DEV_TYPE_PCIe2PCI_BRIDGE : DEV_TYPE_PCIe_BRIDGE; + switch ( (creg & PCI_EXP_FLAGS_TYPE) >> 4 ) + { + case PCI_EXP_TYPE_PCI_BRIDGE: + return DEV_TYPE_PCIe2PCI_BRIDGE; + case PCI_EXP_TYPE_PCIE_BRIDGE: + return DEV_TYPE_PCI2PCIe_BRIDGE; + } + return DEV_TYPE_PCIe_BRIDGE; + + case 0x0000: case 0xffff: + return DEV_TYPE_PCI_UNKNOWN; } return pos ? DEV_TYPE_PCIe_ENDPOINT : DEV_TYPE_PCI; --- a/xen/drivers/passthrough/vtd/intremap.c +++ b/xen/drivers/passthrough/vtd/intremap.c @@ -430,7 +430,6 @@ void io_apic_write_remap_rte( static void set_msi_source_id(struct pci_dev *pdev, struct iremap_entry *ire) { - int type; u16 seg; u8 bus, devfn, secbus; int ret; @@ -441,8 +440,7 @@ static void set_msi_source_id(struct pci seg = pdev->seg; bus = pdev->bus; devfn = pdev->devfn; - type = pdev_type(seg, bus, devfn); - switch ( type ) + switch ( pdev->type ) { case DEV_TYPE_PCIe_BRIDGE: case DEV_TYPE_PCIe2PCI_BRIDGE: @@ -474,7 +472,7 @@ static void set_msi_source_id(struct pci default: dprintk(XENLOG_WARNING VTDPREFIX, "d%d: unknown(%u): %04x:%02x:%02x.%u\n", - pdev->domain->domain_id, type, + pdev->domain->domain_id, pdev->type, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); break; } --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -1419,7 +1419,6 @@ static int domain_context_mapping( { struct acpi_drhd_unit *drhd; int ret = 0; - u32 type; u8 seg = pdev->seg, bus = pdev->bus, secbus; drhd = acpi_find_matched_drhd_unit(pdev); @@ -1428,8 +1427,7 @@ static int domain_context_mapping( ASSERT(spin_is_locked(&pcidevs_lock)); - type = pdev_type(seg, bus, devfn); - switch ( type ) + switch ( pdev->type ) { case DEV_TYPE_PCIe_BRIDGE: case DEV_TYPE_PCIe2PCI_BRIDGE: @@ -1479,7 +1477,7 @@ static int domain_context_mapping( default: dprintk(XENLOG_ERR VTDPREFIX, "d%d:unknown(%u): %04x:%02x:%02x.%u\n", - domain->domain_id, type, + domain->domain_id, pdev->type, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); ret = -EINVAL; break; @@ -1551,7 +1549,6 @@ static int domain_context_unmap( struct acpi_drhd_unit *drhd; struct iommu *iommu; int ret = 0; - u32 type; u8 seg = pdev->seg, bus = pdev->bus, tmp_bus, tmp_devfn, secbus; int found = 0; @@ -1560,8 +1557,7 @@ static int domain_context_unmap( return -ENODEV; iommu = drhd->iommu; - type = pdev_type(seg, bus, devfn); - switch ( type ) + switch ( pdev->type ) { case DEV_TYPE_PCIe_BRIDGE: case DEV_TYPE_PCIe2PCI_BRIDGE: @@ -1608,7 +1604,7 @@ static int domain_context_unmap( default: dprintk(XENLOG_ERR VTDPREFIX, "d%d:unknown(%u): %04x:%02x:%02x.%u\n", - domain->domain_id, type, + domain->domain_id, pdev->type, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); ret = -EINVAL; goto out; --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -62,6 +62,17 @@ struct pci_dev { const u16 seg; const u8 bus; const u8 devfn; + + enum pdev_type { + DEV_TYPE_PCI_UNKNOWN, + DEV_TYPE_PCIe_ENDPOINT, + DEV_TYPE_PCIe_BRIDGE, // PCIe root port, switch + DEV_TYPE_PCIe2PCI_BRIDGE, // PCIe-to-PCI/PCIx bridge + DEV_TYPE_PCI2PCIe_BRIDGE, // PCI/PCIx-to-PCIe bridge + DEV_TYPE_LEGACY_PCI_BRIDGE, // Legacy PCI bridge + DEV_TYPE_PCI, + } type; + struct pci_dev_info info; struct arch_pci_dev arch; struct { @@ -83,18 +94,10 @@ struct pci_dev { extern spinlock_t pcidevs_lock; -enum { - DEV_TYPE_PCIe_ENDPOINT, - DEV_TYPE_PCIe_BRIDGE, // PCIe root port, switch - DEV_TYPE_PCIe2PCI_BRIDGE, // PCIe-to-PCI/PCIx bridge - DEV_TYPE_LEGACY_PCI_BRIDGE, // Legacy PCI bridge - DEV_TYPE_PCI, -}; - bool_t pci_known_segment(u16 seg); int pci_device_detect(u16 seg, u8 bus, u8 dev, u8 func); int scan_pci_devices(void); -int pdev_type(u16 seg, u8 bus, u8 devfn); +enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn); int find_upstream_bridge(u16 seg, u8 *bus, u8 *devfn, u8 *secbus); struct pci_dev *pci_lock_pdev(int seg, int bus, int devfn); struct pci_dev *pci_lock_domain_pdev( --- a/xen/include/xen/pci_regs.h +++ b/xen/include/xen/pci_regs.h @@ -371,6 +371,9 @@ #define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ #define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ #define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ +#define PCI_EXP_TYPE_PCIE_BRIDGE 0x8 /* PCI/PCI-X to PCIE Bridge */ +#define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ +#define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */ #define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ #define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ #define PCI_EXP_DEVCAP 4 /* Device capabilities */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Apart from generating device context entries for the base function, all phantom functions also need context entries to be generated for them. In order to distinguish different use cases, a variant of pci_get_pdev() is being introduced that, even when passed a phantom function number, would return the underlying actual device. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/drivers/passthrough/amd/iommu_cmd.c +++ b/xen/drivers/passthrough/amd/iommu_cmd.c @@ -339,7 +339,15 @@ static void amd_iommu_flush_all_iotlbs(s return; for_each_pdev( d, pdev ) - amd_iommu_flush_iotlb(pdev->devfn, pdev, gaddr, order); + { + u8 devfn = pdev->devfn; + + do { + amd_iommu_flush_iotlb(devfn, pdev, gaddr, order); + devfn += pdev->phantom_stride; + } while ( devfn != pdev->devfn && + PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) ); + } } /* Flush iommu cache after p2m changes. */ --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -667,7 +667,7 @@ void parse_ppr_log_entry(struct amd_iomm devfn = PCI_DEVFN2(device_id); spin_lock(&pcidevs_lock); - pdev = pci_get_pdev(iommu->seg, bus, devfn); + pdev = pci_get_real_pdev(iommu->seg, bus, devfn); spin_unlock(&pcidevs_lock); if ( pdev ) --- a/xen/drivers/passthrough/amd/iommu_map.c +++ b/xen/drivers/passthrough/amd/iommu_map.c @@ -598,7 +598,6 @@ static int update_paging_mode(struct dom for_each_pdev( d, pdev ) { bdf = PCI_BDF2(pdev->bus, pdev->devfn); - req_id = get_dma_requestor_id(pdev->seg, bdf); iommu = find_iommu_for_device(pdev->seg, bdf); if ( !iommu ) { @@ -607,16 +606,21 @@ static int update_paging_mode(struct dom } spin_lock_irqsave(&iommu->lock, flags); - device_entry = iommu->dev_table.buffer + - (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE); - - /* valid = 0 only works for dom0 passthrough mode */ - amd_iommu_set_root_page_table((u32 *)device_entry, - page_to_maddr(hd->root_table), - hd->domain_id, - hd->paging_mode, 1); - - amd_iommu_flush_device(iommu, req_id); + do { + req_id = get_dma_requestor_id(pdev->seg, bdf); + device_entry = iommu->dev_table.buffer + + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE); + + /* valid = 0 only works for dom0 passthrough mode */ + amd_iommu_set_root_page_table((u32 *)device_entry, + page_to_maddr(hd->root_table), + hd->domain_id, + hd->paging_mode, 1); + + amd_iommu_flush_device(iommu, req_id); + bdf += pdev->phantom_stride; + } while ( PCI_DEVFN2(bdf) != pdev->devfn && + PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) ); spin_unlock_irqrestore(&iommu->lock, flags); } --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -158,6 +158,8 @@ void __init iommu_dom0_init(struct domai int iommu_add_device(struct pci_dev *pdev) { struct hvm_iommu *hd; + int rc; + u8 devfn; if ( !pdev->domain ) return -EINVAL; @@ -168,7 +170,20 @@ int iommu_add_device(struct pci_dev *pde if ( !iommu_enabled || !hd->platform_ops ) return 0; - return hd->platform_ops->add_device(pdev->devfn, pdev); + rc = hd->platform_ops->add_device(pdev->devfn, pdev); + if ( rc || !pdev->phantom_stride ) + return rc; + + for ( devfn = pdev->devfn ; ; ) + { + devfn += pdev->phantom_stride; + if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) + return 0; + rc = hd->platform_ops->add_device(devfn, pdev); + if ( rc ) + printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n", + pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc); + } } int iommu_enable_device(struct pci_dev *pdev) @@ -191,6 +206,8 @@ int iommu_enable_device(struct pci_dev * int iommu_remove_device(struct pci_dev *pdev) { struct hvm_iommu *hd; + u8 devfn; + if ( !pdev->domain ) return -EINVAL; @@ -198,6 +215,22 @@ int iommu_remove_device(struct pci_dev * if ( !iommu_enabled || !hd->platform_ops ) return 0; + for ( devfn = pdev->devfn ; pdev->phantom_stride; ) + { + int rc; + + devfn += pdev->phantom_stride; + if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) + break; + rc = hd->platform_ops->remove_device(devfn, pdev); + if ( !rc ) + continue; + + printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n", + pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc); + return rc; + } + return hd->platform_ops->remove_device(pdev->devfn, pdev); } @@ -245,6 +278,18 @@ static int assign_device(struct domain * if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) ) goto done; + for ( ; pdev->phantom_stride; rc = 0 ) + { + devfn += pdev->phantom_stride; + if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) + break; + rc = hd->platform_ops->assign_device(d, devfn, pdev); + if ( rc ) + printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed (%d)\n", + d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + rc); + } + if ( has_arch_pdevs(d) && !need_iommu(d) ) { d->need_iommu = 1; @@ -377,6 +422,21 @@ int deassign_device(struct domain *d, u1 if ( !pdev ) return -ENODEV; + while ( pdev->phantom_stride ) + { + devfn += pdev->phantom_stride; + if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) + break; + ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev); + if ( !ret ) + continue; + + printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n", + d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret); + return ret; + } + + devfn = pdev->devfn; ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev); if ( ret ) { --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -144,6 +144,8 @@ static struct pci_dev *alloc_pdev(struct /* update bus2bridge */ switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) ) { + int pos; + u16 cap; u8 sec_bus, sub_bus; case DEV_TYPE_PCIe_BRIDGE: @@ -167,6 +169,20 @@ static struct pci_dev *alloc_pdev(struct break; case DEV_TYPE_PCIe_ENDPOINT: + pos = pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn), + PCI_FUNC(devfn), PCI_CAP_ID_EXP); + BUG_ON(!pos); + cap = pci_conf_read16(pseg->nr, bus, PCI_SLOT(devfn), + PCI_FUNC(devfn), pos + PCI_EXP_DEVCAP); + if ( cap & PCI_EXP_DEVCAP_PHANTOM ) + { + pdev->phantom_stride = 8 >> MASK_EXTR(cap, + PCI_EXP_DEVCAP_PHANTOM); + if ( PCI_FUNC(devfn) >= pdev->phantom_stride ) + pdev->phantom_stride = 0; + } + break; + case DEV_TYPE_PCI: break; @@ -290,6 +306,27 @@ struct pci_dev *pci_get_pdev(int seg, in return NULL; } +struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn) +{ + struct pci_dev *pdev; + int stride; + + if ( seg < 0 || bus < 0 || devfn < 0 ) + return NULL; + + for ( pdev = pci_get_pdev(seg, bus, devfn), stride = 4; + !pdev && stride; stride >>= 1 ) + { + if ( !(devfn & (8 - stride)) ) + continue; + pdev = pci_get_pdev(seg, bus, devfn & ~(8 - stride)); + if ( pdev && stride != pdev->phantom_stride ) + pdev = NULL; + } + + return pdev; +} + struct pci_dev *pci_get_pdev_by_domain( struct domain *d, int seg, int bus, int devfn) { @@ -488,8 +525,19 @@ int pci_add_device(u16 seg, u8 bus, u8 d out: spin_unlock(&pcidevs_lock); - printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type, - seg, bus, slot, func); + if ( !ret ) + { + printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type, + seg, bus, slot, func); + while ( pdev->phantom_stride ) + { + func += pdev->phantom_stride; + if ( PCI_SLOT(func) ) + break; + printk(XENLOG_DEBUG "PCI phantom %04x:%02x:%02x.%u\n", + seg, bus, slot, func); + } + } return ret; } @@ -681,7 +729,7 @@ void pci_check_disable_device(u16 seg, u u16 cword; spin_lock(&pcidevs_lock); - pdev = pci_get_pdev(seg, bus, devfn); + pdev = pci_get_real_pdev(seg, bus, devfn); if ( pdev ) { if ( now < pdev->fault.time || @@ -698,6 +746,7 @@ void pci_check_disable_device(u16 seg, u /* Tell the device to stop DMAing; we can''t rely on the guest to * control it for us. */ + devfn = pdev->devfn; cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), PCI_COMMAND); pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), @@ -759,6 +808,27 @@ struct setup_dom0 { int (*handler)(u8 devfn, struct pci_dev *); }; +static void setup_one_dom0_device(const struct setup_dom0 *ctxt, + struct pci_dev *pdev) +{ + u8 devfn = pdev->devfn; + + do { + int err = ctxt->handler(devfn, pdev); + + if ( err ) + { + printk(XENLOG_ERR "setup %04x:%02x:%02x.%u for d%d failed (%d)\n", + pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + ctxt->d->domain_id, err); + if ( devfn == pdev->devfn ) + return; + } + devfn += pdev->phantom_stride; + } while ( devfn != pdev->devfn && + PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) ); +} + static int __init _setup_dom0_pci_devices(struct pci_seg *pseg, void *arg) { struct setup_dom0 *ctxt = arg; @@ -777,12 +847,12 @@ static int __init _setup_dom0_pci_device { pdev->domain = ctxt->d; list_add(&pdev->domain_list, &ctxt->d->arch.pdev_list); - ctxt->handler(devfn, pdev); + setup_one_dom0_device(ctxt, pdev); } else if ( pdev->domain == dom_xen ) { pdev->domain = ctxt->d; - ctxt->handler(devfn, pdev); + setup_one_dom0_device(ctxt, pdev); pdev->domain = dom_xen; } else if ( pdev->domain != ctxt->d ) --- a/xen/include/xen/lib.h +++ b/xen/include/xen/lib.h @@ -58,6 +58,9 @@ do { #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]) + __must_be_array(x)) +#define MASK_EXTR(v, m) (((v) & (m)) / ((m) & -(m))) +#define MASK_INSR(v, m) (((v) * ((m) & -(m))) & (m)) + #define reserve_bootmem(_p,_l) ((void)0) struct domain; --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -63,6 +63,8 @@ struct pci_dev { const u8 bus; const u8 devfn; + u8 phantom_stride; + enum pdev_type { DEV_TYPE_PCI_UNKNOWN, DEV_TYPE_PCIe_ENDPOINT, @@ -114,6 +116,7 @@ int pci_ro_device(int seg, int bus, int void arch_pci_ro_device(int seg, int bdf); int pci_hide_device(int bus, int devfn); struct pci_dev *pci_get_pdev(int seg, int bus, int devfn); +struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn); struct pci_dev *pci_get_pdev_by_domain( struct domain *, int seg, int bus, int devfn); void pci_check_disable_device(u16 seg, u8 bus, u8 devfn); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2012-Dec-06 14:15 UTC
[PATCH 7/8] VT-d: relax source qualifier for MSI of phantom functions
With ordinary requests allowed to come from phantom functions, the remapping tables ought to be set up to allow for MSI triggers to come from other than the "real" device too. Signed-off-by: Jan Beulich <jbeulich@suse.com> Acked-by: "Zhang, Xiantao" <xiantao.zhang@intel.com> --- a/xen/drivers/passthrough/vtd/intremap.c +++ b/xen/drivers/passthrough/vtd/intremap.c @@ -442,13 +442,22 @@ static void set_msi_source_id(struct pci devfn = pdev->devfn; switch ( pdev->type ) { + unsigned int sq; + case DEV_TYPE_PCIe_BRIDGE: case DEV_TYPE_PCIe2PCI_BRIDGE: case DEV_TYPE_LEGACY_PCI_BRIDGE: break; case DEV_TYPE_PCIe_ENDPOINT: - set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16, PCI_BDF2(bus, devfn)); + switch ( pdev->phantom_stride ) + { + case 1: sq = SQ_13_IGNORE_3; break; + case 2: sq = SQ_13_IGNORE_2; break; + case 4: sq = SQ_13_IGNORE_1; break; + default: sq = SQ_ALL_16; break; + } + set_ire_sid(ire, SVT_VERIFY_SID_SQ, sq, PCI_BDF2(bus, devfn)); break; case DEV_TYPE_PCI: _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2012-Dec-06 14:15 UTC
[PATCH 8/8] IOMMU: add option to specify devices behaving like ones using phantom functions
At least certain Marvell SATA controllers are known to issue bus master requests with a non-zero function as origin, despite themselves being single function devices. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -698,6 +698,16 @@ Defaults to booting secondary processors Default: `on` +### pci-phantom +> `=[<seg>:]<bus>:<device>,<stride>` + +Mark a group of PCI devices as using phantom functions without actually +advertising so, so the IOMMU can create translation contexts for them. + +All numbers specified must be hexadecimal ones. + +This option can be specified more than once (up to 8 times at present). + ### ple\_gap > `= <integer>` --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -121,6 +121,49 @@ const unsigned long *pci_get_ro_map(u16 return pseg ? pseg->ro_map : NULL; } +static struct phantom_dev { + u16 seg; + u8 bus, slot, stride; +} phantom_devs[8]; +static unsigned int nr_phantom_devs; + +static void __init parse_phantom_dev(char *str) { + const char *s = str; + struct phantom_dev phantom; + + if ( !s || !*s || nr_phantom_devs >= ARRAY_SIZE(phantom_devs) ) + return; + + phantom.seg = simple_strtol(s, &s, 16); + if ( *s != '':'' ) + return; + + phantom.bus = simple_strtol(s + 1, &s, 16); + if ( *s == '','' ) + { + phantom.slot = phantom.bus; + phantom.bus = phantom.seg; + phantom.seg = 0; + } + else if ( *s == '':'' ) + phantom.slot = simple_strtol(s + 1, &s, 16); + else + return; + + if ( *s != '','' ) + return; + switch ( phantom.stride = simple_strtol(s + 1, &s, 0) ) + { + case 1: case 2: case 4: + if ( *s ) + default: + return; + } + + phantom_devs[nr_phantom_devs++] = phantom; +} +custom_param("pci-phantom", parse_phantom_dev); + static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn) { struct pci_dev *pdev; @@ -181,6 +224,20 @@ static struct pci_dev *alloc_pdev(struct if ( PCI_FUNC(devfn) >= pdev->phantom_stride ) pdev->phantom_stride = 0; } + else + { + unsigned int i; + + for ( i = 0; i < nr_phantom_devs; ++i ) + if ( phantom_devs[i].seg == pseg->nr && + phantom_devs[i].bus == bus && + phantom_devs[i].slot == PCI_SLOT(devfn) && + phantom_devs[i].stride > PCI_FUNC(devfn) ) + { + pdev->phantom_stride = phantom_devs[i].stride; + break; + } + } break; case DEV_TYPE_PCI: _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Looks fine to me. Thanks, Jan! Acked for this series of the patch. Xiantao> -----Original Message----- > From: Jan Beulich [mailto:JBeulich@suse.com] > Sent: Thursday, December 06, 2012 10:05 PM > To: xen-devel > Cc: Wei Huang; Wei Wang; Zhang, Xiantao > Subject: [PATCH 0/8] IOMMU: add phantom function support > > While I''m unaware of devices making use of this functionality in > proper ways, the goal of this patch set is to leverage the enabling > of the specified behavior as a workaround for devices that behave > as if they made use of this functionality _without_ advertising so > in the PCIe capability structure. > > While it would have been possible to leave the generic IOMMU > code untouched, and deal with the creation of the necessary > device context entries in the individual IOMMUs'' implementations, > I felt that it was cleaner to have as much of the necessary > abstraction in the generic layer. > > The adjustments in particular imply that for the relevant > operations, (PCI-dev, devfn) tuples get passed, with the PCI > device referring to the real device and devfn representing > either the real device or the phantom function. Consequently, > for any operation intended to deal with the real device, the > devfn of the device itself must be used, whereas for anything > targeting the phantom function the passed in value is the > correct one to pass on. > > 1: IOMMU: adjust (re)assign operation parameters > 2: IOMMU: adjust add/remove operation parameters > 3: VT-d: adjust context map/unmap parameters > 4: AMD IOMMU: adjust flush function parameters > 5: IOMMU: consolidate pdev_type() and cache its result for a given device > 6: IOMMU: add phantom function support > 7: VT-d: relax source qualifier for MSI of phantom functions > 8: IOMMU: add option to specify devices behaving like ones using phantom > functions > > The patch set meanwhile got tested on the affected systems. > > Signed-off-by: Jan Beulich <jbeulich@suse.com>