Add a driver for the ACPI VIOT table, which provides topology information for para-virtual IOMMUs. Enable virtio-iommu on non-devicetree platforms, including x86. Since v3 [1] I fixed a build bug for !CONFIG_IOMMU_API. Joerg offered to take this series through the IOMMU tree, which requires Acks for patches 1-3. You can find a QEMU implementation at [2], with extra support for testing all VIOT nodes including MMIO-based endpoints and IOMMU. This series is at [3]. [1] https://lore.kernel.org/linux-iommu/20210602154444.1077006-1-jean-philippe at linaro.org/ [2] https://jpbrucker.net/git/qemu/log/?h=virtio-iommu/acpi [3] https://jpbrucker.net/git/linux/log/?h=virtio-iommu/acpi Jean-Philippe Brucker (6): ACPI: arm64: Move DMA setup operations out of IORT ACPI: Move IOMMU setup code out of IORT ACPI: Add driver for the VIOT table iommu/dma: Pass address limit rather than size to iommu_setup_dma_ops() iommu/dma: Simplify calls to iommu_setup_dma_ops() iommu/virtio: Enable x86 support drivers/acpi/Kconfig | 3 + drivers/iommu/Kconfig | 4 +- drivers/acpi/Makefile | 2 + drivers/acpi/arm64/Makefile | 1 + include/acpi/acpi_bus.h | 3 + include/linux/acpi.h | 3 + include/linux/acpi_iort.h | 14 +- include/linux/acpi_viot.h | 19 ++ include/linux/dma-iommu.h | 4 +- arch/arm64/mm/dma-mapping.c | 2 +- drivers/acpi/arm64/dma.c | 50 +++++ drivers/acpi/arm64/iort.c | 129 ++----------- drivers/acpi/bus.c | 2 + drivers/acpi/scan.c | 78 +++++++- drivers/acpi/viot.c | 364 +++++++++++++++++++++++++++++++++++ drivers/iommu/amd/iommu.c | 9 +- drivers/iommu/dma-iommu.c | 17 +- drivers/iommu/intel/iommu.c | 10 +- drivers/iommu/virtio-iommu.c | 8 + MAINTAINERS | 8 + 20 files changed, 580 insertions(+), 150 deletions(-) create mode 100644 include/linux/acpi_viot.h create mode 100644 drivers/acpi/arm64/dma.c create mode 100644 drivers/acpi/viot.c -- 2.31.1
Jean-Philippe Brucker
2021-Jun-10 07:51 UTC
[PATCH v4 1/6] ACPI: arm64: Move DMA setup operations out of IORT
Extract generic DMA setup code out of IORT, so it can be reused by VIOT. Keep it in drivers/acpi/arm64 for now, since it could break x86 platforms that haven't run this code so far, if they have invalid tables. Signed-off-by: Jean-Philippe Brucker <jean-philippe at linaro.org> --- drivers/acpi/arm64/Makefile | 1 + include/linux/acpi.h | 3 +++ include/linux/acpi_iort.h | 6 ++--- drivers/acpi/arm64/dma.c | 50 ++++++++++++++++++++++++++++++++++ drivers/acpi/arm64/iort.c | 54 ++++++------------------------------- drivers/acpi/scan.c | 2 +- 6 files changed, 66 insertions(+), 50 deletions(-) create mode 100644 drivers/acpi/arm64/dma.c diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index 6ff50f4ed947..66acbe77f46e 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_ACPI_IORT) += iort.o obj-$(CONFIG_ACPI_GTDT) += gtdt.o +obj-y += dma.o diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c60745f657e9..7aaa9559cc19 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -259,9 +259,12 @@ void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); #ifdef CONFIG_ARM64 void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa); +void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size); #else static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } +static inline void +acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) { } #endif int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 1a12baa58e40..f7f054833afd 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -34,7 +34,7 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 id, void acpi_configure_pmsi_domain(struct device *dev); int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ -void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size); +int iort_dma_get_ranges(struct device *dev, u64 *size); const struct iommu_ops *iort_iommu_configure_id(struct device *dev, const u32 *id_in); int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); @@ -48,8 +48,8 @@ static inline struct irq_domain *iort_get_device_domain( { return NULL; } static inline void acpi_configure_pmsi_domain(struct device *dev) { } /* IOMMU interface */ -static inline void iort_dma_setup(struct device *dev, u64 *dma_addr, - u64 *size) { } +static inline int iort_dma_get_ranges(struct device *dev, u64 *size) +{ return -ENODEV; } static inline const struct iommu_ops *iort_iommu_configure_id( struct device *dev, const u32 *id_in) { return NULL; } diff --git a/drivers/acpi/arm64/dma.c b/drivers/acpi/arm64/dma.c new file mode 100644 index 000000000000..f16739ad3cc0 --- /dev/null +++ b/drivers/acpi/arm64/dma.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/acpi.h> +#include <linux/acpi_iort.h> +#include <linux/device.h> +#include <linux/dma-direct.h> + +void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) +{ + int ret; + u64 end, mask; + u64 dmaaddr = 0, size = 0, offset = 0; + + /* + * If @dev is expected to be DMA-capable then the bus code that created + * it should have initialised its dma_mask pointer by this point. For + * now, we'll continue the legacy behaviour of coercing it to the + * coherent mask if not, but we'll no longer do so quietly. + */ + if (!dev->dma_mask) { + dev_warn(dev, "DMA mask not set\n"); + dev->dma_mask = &dev->coherent_dma_mask; + } + + if (dev->coherent_dma_mask) + size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1); + else + size = 1ULL << 32; + + ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size); + if (ret == -ENODEV) + ret = iort_dma_get_ranges(dev, &size); + if (!ret) { + /* + * Limit coherent and dma mask based on size retrieved from + * firmware. + */ + end = dmaaddr + size - 1; + mask = DMA_BIT_MASK(ilog2(end) + 1); + dev->bus_dma_limit = end; + dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); + *dev->dma_mask = min(*dev->dma_mask, mask); + } + + *dma_addr = dmaaddr; + *dma_size = size; + + ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size); + + dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : ""); +} diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 3912a1f6058e..a940be1cf2af 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1144,56 +1144,18 @@ static int rc_dma_get_range(struct device *dev, u64 *size) } /** - * iort_dma_setup() - Set-up device DMA parameters. + * iort_dma_get_ranges() - Look up DMA addressing limit for the device + * @dev: device to lookup + * @size: DMA range size result pointer * - * @dev: device to configure - * @dma_addr: device DMA address result pointer - * @dma_size: DMA range size result pointer + * Return: 0 on success, an error otherwise. */ -void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) +int iort_dma_get_ranges(struct device *dev, u64 *size) { - u64 end, mask, dmaaddr = 0, size = 0, offset = 0; - int ret; - - /* - * If @dev is expected to be DMA-capable then the bus code that created - * it should have initialised its dma_mask pointer by this point. For - * now, we'll continue the legacy behaviour of coercing it to the - * coherent mask if not, but we'll no longer do so quietly. - */ - if (!dev->dma_mask) { - dev_warn(dev, "DMA mask not set\n"); - dev->dma_mask = &dev->coherent_dma_mask; - } - - if (dev->coherent_dma_mask) - size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1); + if (dev_is_pci(dev)) + return rc_dma_get_range(dev, size); else - size = 1ULL << 32; - - ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size); - if (ret == -ENODEV) - ret = dev_is_pci(dev) ? rc_dma_get_range(dev, &size) - : nc_dma_get_range(dev, &size); - - if (!ret) { - /* - * Limit coherent and dma mask based on size retrieved from - * firmware. - */ - end = dmaaddr + size - 1; - mask = DMA_BIT_MASK(ilog2(end) + 1); - dev->bus_dma_limit = end; - dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); - *dev->dma_mask = min(*dev->dma_mask, mask); - } - - *dma_addr = dmaaddr; - *dma_size = size; - - ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size); - - dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : ""); + return nc_dma_get_range(dev, size); } static void __init acpi_iort_register_irq(int hwirq, const char *name, diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index e10d38ac7cf2..ea613df8f913 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1537,7 +1537,7 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, return 0; } - iort_dma_setup(dev, &dma_addr, &size); + acpi_arch_dma_setup(dev, &dma_addr, &size); iommu = iort_iommu_configure_id(dev, input_id); if (PTR_ERR(iommu) == -EPROBE_DEFER) -- 2.31.1
Jean-Philippe Brucker
2021-Jun-10 07:51 UTC
[PATCH v4 2/6] ACPI: Move IOMMU setup code out of IORT
Extract the code that sets up the IOMMU infrastructure from IORT, since it can be reused by VIOT. Move it one level up into a new acpi_iommu_configure_id() function, which calls the IORT parsing function which in turn calls the acpi_iommu_fwspec_init() helper. Signed-off-by: Jean-Philippe Brucker <jean-philippe at linaro.org> --- include/acpi/acpi_bus.h | 3 ++ include/linux/acpi_iort.h | 8 ++--- drivers/acpi/arm64/iort.c | 75 +++++---------------------------------- drivers/acpi/scan.c | 73 ++++++++++++++++++++++++++++++++++++- 4 files changed, 87 insertions(+), 72 deletions(-) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 3a82faac5767..41f092a269f6 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -588,6 +588,9 @@ struct acpi_pci_root { bool acpi_dma_supported(struct acpi_device *adev); enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); +int acpi_iommu_fwspec_init(struct device *dev, u32 id, + struct fwnode_handle *fwnode, + const struct iommu_ops *ops); int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, u64 *size); int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index f7f054833afd..f1f0842a2cb2 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -35,8 +35,7 @@ void acpi_configure_pmsi_domain(struct device *dev); int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ int iort_dma_get_ranges(struct device *dev, u64 *size); -const struct iommu_ops *iort_iommu_configure_id(struct device *dev, - const u32 *id_in); +int iort_iommu_configure_id(struct device *dev, const u32 *id_in); int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); phys_addr_t acpi_iort_dma_get_max_cpu_address(void); #else @@ -50,9 +49,8 @@ static inline void acpi_configure_pmsi_domain(struct device *dev) { } /* IOMMU interface */ static inline int iort_dma_get_ranges(struct device *dev, u64 *size) { return -ENODEV; } -static inline const struct iommu_ops *iort_iommu_configure_id( - struct device *dev, const u32 *id_in) -{ return NULL; } +static inline int iort_iommu_configure_id(struct device *dev, const u32 *id_in) +{ return -ENODEV; } static inline int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { return 0; } diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index a940be1cf2af..b5b021e064b6 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -806,23 +806,6 @@ static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev) return NULL; } -static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev) -{ - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - - return (fwspec && fwspec->ops) ? fwspec->ops : NULL; -} - -static inline int iort_add_device_replay(struct device *dev) -{ - int err = 0; - - if (dev->bus && !device_iommu_mapped(dev)) - err = iommu_probe_device(dev); - - return err; -} - /** * iort_iommu_msi_get_resv_regions - Reserved region driver helper * @dev: Device from iommu_get_resv_regions() @@ -900,18 +883,6 @@ static inline bool iort_iommu_driver_enabled(u8 type) } } -static int arm_smmu_iort_xlate(struct device *dev, u32 streamid, - struct fwnode_handle *fwnode, - const struct iommu_ops *ops) -{ - int ret = iommu_fwspec_init(dev, fwnode, ops); - - if (!ret) - ret = iommu_fwspec_add_ids(dev, &streamid, 1); - - return ret; -} - static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node) { struct acpi_iort_root_complex *pci_rc; @@ -946,7 +917,7 @@ static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node, return iort_iommu_driver_enabled(node->type) ? -EPROBE_DEFER : -ENODEV; - return arm_smmu_iort_xlate(dev, streamid, iort_fwnode, ops); + return acpi_iommu_fwspec_init(dev, streamid, iort_fwnode, ops); } struct iort_pci_alias_info { @@ -1020,24 +991,14 @@ static int iort_nc_iommu_map_id(struct device *dev, * @dev: device to configure * @id_in: optional input id const value pointer * - * Returns: iommu_ops pointer on configuration success - * NULL on configuration failure + * Returns: 0 on success, <0 on failure */ -const struct iommu_ops *iort_iommu_configure_id(struct device *dev, - const u32 *id_in) +int iort_iommu_configure_id(struct device *dev, const u32 *id_in) { struct acpi_iort_node *node; - const struct iommu_ops *ops; + const struct iommu_ops *ops = NULL; int err = -ENODEV; - /* - * If we already translated the fwspec there - * is nothing left to do, return the iommu_ops. - */ - ops = iort_fwspec_iommu_ops(dev); - if (ops) - return ops; - if (dev_is_pci(dev)) { struct iommu_fwspec *fwspec; struct pci_bus *bus = to_pci_dev(dev)->bus; @@ -1046,7 +1007,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, node = iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX, iort_match_node_callback, &bus->dev); if (!node) - return NULL; + return -ENODEV; info.node = node; err = pci_for_each_dma_alias(to_pci_dev(dev), @@ -1059,7 +1020,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, node = iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT, iort_match_node_callback, dev); if (!node) - return NULL; + return -ENODEV; err = id_in ? iort_nc_iommu_map_id(dev, node, id_in) : iort_nc_iommu_map(dev, node); @@ -1068,32 +1029,14 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, iort_named_component_init(dev, node); } - /* - * If we have reason to believe the IOMMU driver missed the initial - * add_device callback for dev, replay it to get things in order. - */ - if (!err) { - ops = iort_fwspec_iommu_ops(dev); - err = iort_add_device_replay(dev); - } - - /* Ignore all other errors apart from EPROBE_DEFER */ - if (err == -EPROBE_DEFER) { - ops = ERR_PTR(err); - } else if (err) { - dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); - ops = NULL; - } - - return ops; + return err; } #else int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { return 0; } -const struct iommu_ops *iort_iommu_configure_id(struct device *dev, - const u32 *input_id) -{ return NULL; } +int iort_iommu_configure_id(struct device *dev, const u32 *input_id) +{ return -ENODEV; } #endif static int nc_dma_get_range(struct device *dev, u64 *size) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index ea613df8f913..0c53c8533300 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/acpi.h> #include <linux/acpi_iort.h> +#include <linux/iommu.h> #include <linux/signal.h> #include <linux/kthread.h> #include <linux/dmi.h> @@ -1520,6 +1521,76 @@ int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, return ret >= 0 ? 0 : ret; } +#ifdef CONFIG_IOMMU_API +int acpi_iommu_fwspec_init(struct device *dev, u32 id, + struct fwnode_handle *fwnode, + const struct iommu_ops *ops) +{ + int ret = iommu_fwspec_init(dev, fwnode, ops); + + if (!ret) + ret = iommu_fwspec_add_ids(dev, &id, 1); + + return ret; +} + +static inline const struct iommu_ops *acpi_iommu_fwspec_ops(struct device *dev) +{ + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + return fwspec ? fwspec->ops : NULL; +} + +static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, + const u32 *id_in) +{ + int err; + const struct iommu_ops *ops; + + /* + * If we already translated the fwspec there is nothing left to do, + * return the iommu_ops. + */ + ops = acpi_iommu_fwspec_ops(dev); + if (ops) + return ops; + + err = iort_iommu_configure_id(dev, id_in); + + /* + * If we have reason to believe the IOMMU driver missed the initial + * add_device callback for dev, replay it to get things in order. + */ + if (!err && dev->bus && !device_iommu_mapped(dev)) + err = iommu_probe_device(dev); + + /* Ignore all other errors apart from EPROBE_DEFER */ + if (err == -EPROBE_DEFER) { + return ERR_PTR(err); + } else if (err) { + dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); + return NULL; + } + return acpi_iommu_fwspec_ops(dev); +} + +#else /* !CONFIG_IOMMU_API */ + +int acpi_iommu_fwspec_init(struct device *dev, u32 id, + struct fwnode_handle *fwnode, + const struct iommu_ops *ops) +{ + return -ENODEV; +} + +static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, + const u32 *id_in) +{ + return NULL; +} + +#endif /* !CONFIG_IOMMU_API */ + /** * acpi_dma_configure_id - Set-up DMA configuration for the device. * @dev: The pointer to the device @@ -1539,7 +1610,7 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, acpi_arch_dma_setup(dev, &dma_addr, &size); - iommu = iort_iommu_configure_id(dev, input_id); + iommu = acpi_iommu_configure_id(dev, input_id); if (PTR_ERR(iommu) == -EPROBE_DEFER) return -EPROBE_DEFER; -- 2.31.1
Jean-Philippe Brucker
2021-Jun-10 07:51 UTC
[PATCH v4 3/6] ACPI: Add driver for the VIOT table
The ACPI Virtual I/O Translation Table describes topology of para-virtual platforms, similarly to vendor tables DMAR, IVRS and IORT. For now it describes the relation between virtio-iommu and the endpoints it manages. Three steps are needed to configure DMA of endpoints: (1) acpi_viot_init(): parse the VIOT table, find or create the fwnode associated to each vIOMMU device. (2) When probing the vIOMMU device, the driver registers its IOMMU ops within the IOMMU subsystem. This step doesn't require any intervention from the VIOT driver. (3) viot_iommu_configure(): before binding the endpoint to a driver, find the associated IOMMU ops. Register them, along with the endpoint ID, into the device's iommu_fwspec. If step (3) happens before step (2), it is deferred until the IOMMU is initialized, then retried. Signed-off-by: Jean-Philippe Brucker <jean-philippe at linaro.org> --- drivers/acpi/Kconfig | 3 + drivers/iommu/Kconfig | 1 + drivers/acpi/Makefile | 2 + include/linux/acpi_viot.h | 19 ++ drivers/acpi/bus.c | 2 + drivers/acpi/scan.c | 3 + drivers/acpi/viot.c | 364 ++++++++++++++++++++++++++++++++++++++ MAINTAINERS | 8 + 8 files changed, 402 insertions(+) create mode 100644 include/linux/acpi_viot.h create mode 100644 drivers/acpi/viot.c diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index eedec61e3476..3758c6940ed7 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -526,6 +526,9 @@ endif source "drivers/acpi/pmic/Kconfig" +config ACPI_VIOT + bool + endif # ACPI config X86_PM_TIMER diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 1f111b399bca..aff8a4830dd1 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -403,6 +403,7 @@ config VIRTIO_IOMMU depends on ARM64 select IOMMU_API select INTERVAL_TREE + select ACPI_VIOT if ACPI help Para-virtualised IOMMU driver with virtio. diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 700b41adf2db..a6e644c48987 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -118,3 +118,5 @@ video-objs += acpi_video.o video_detect.o obj-y += dptf/ obj-$(CONFIG_ARM64) += arm64/ + +obj-$(CONFIG_ACPI_VIOT) += viot.o diff --git a/include/linux/acpi_viot.h b/include/linux/acpi_viot.h new file mode 100644 index 000000000000..1eb8ee5b0e5f --- /dev/null +++ b/include/linux/acpi_viot.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ACPI_VIOT_H__ +#define __ACPI_VIOT_H__ + +#include <linux/acpi.h> + +#ifdef CONFIG_ACPI_VIOT +void __init acpi_viot_init(void); +int viot_iommu_configure(struct device *dev); +#else +static inline void acpi_viot_init(void) {} +static inline int viot_iommu_configure(struct device *dev) +{ + return -ENODEV; +} +#endif + +#endif /* __ACPI_VIOT_H__ */ diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index be7da23fad76..b835ca702ff0 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -27,6 +27,7 @@ #include <linux/dmi.h> #endif #include <linux/acpi_iort.h> +#include <linux/acpi_viot.h> #include <linux/pci.h> #include <acpi/apei.h> #include <linux/suspend.h> @@ -1339,6 +1340,7 @@ static int __init acpi_init(void) pci_mmcfg_late_init(); acpi_iort_init(); acpi_scan_init(); + acpi_viot_init(); acpi_ec_init(); acpi_debugfs_init(); acpi_sleep_proc_init(); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 0c53c8533300..4fa684fdfda8 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/acpi.h> #include <linux/acpi_iort.h> +#include <linux/acpi_viot.h> #include <linux/iommu.h> #include <linux/signal.h> #include <linux/kthread.h> @@ -1556,6 +1557,8 @@ static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, return ops; err = iort_iommu_configure_id(dev, id_in); + if (err && err != -EPROBE_DEFER) + err = viot_iommu_configure(dev); /* * If we have reason to believe the IOMMU driver missed the initial diff --git a/drivers/acpi/viot.c b/drivers/acpi/viot.c new file mode 100644 index 000000000000..892cd9fa7b6d --- /dev/null +++ b/drivers/acpi/viot.c @@ -0,0 +1,364 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Virtual I/O topology + * + * The Virtual I/O Translation Table (VIOT) describes the topology of + * para-virtual IOMMUs and the endpoints they manage. The OS uses it to + * initialize devices in the right order, preventing endpoints from issuing DMA + * before their IOMMU is ready. + * + * When binding a driver to a device, before calling the device driver's probe() + * method, the driver infrastructure calls dma_configure(). At that point the + * VIOT driver looks for an IOMMU associated to the device in the VIOT table. + * If an IOMMU exists and has been initialized, the VIOT driver initializes the + * device's IOMMU fwspec, allowing the DMA infrastructure to invoke the IOMMU + * ops when the device driver configures DMA mappings. If an IOMMU exists and + * hasn't yet been initialized, VIOT returns -EPROBE_DEFER to postpone probing + * the device until the IOMMU is available. + */ +#define pr_fmt(fmt) "ACPI: VIOT: " fmt + +#include <linux/acpi_viot.h> +#include <linux/dma-iommu.h> +#include <linux/fwnode.h> +#include <linux/iommu.h> +#include <linux/list.h> +#include <linux/pci.h> +#include <linux/platform_device.h> + +struct viot_iommu { + /* Node offset within the table */ + unsigned int offset; + struct fwnode_handle *fwnode; + struct list_head list; +}; + +struct viot_endpoint { + union { + /* PCI range */ + struct { + u16 segment_start; + u16 segment_end; + u16 bdf_start; + u16 bdf_end; + }; + /* MMIO */ + u64 address; + }; + u32 endpoint_id; + struct viot_iommu *viommu; + struct list_head list; +}; + +static struct acpi_table_viot *viot; +static LIST_HEAD(viot_iommus); +static LIST_HEAD(viot_pci_ranges); +static LIST_HEAD(viot_mmio_endpoints); + +static int __init viot_check_bounds(const struct acpi_viot_header *hdr) +{ + struct acpi_viot_header *start, *end, *hdr_end; + + start = ACPI_ADD_PTR(struct acpi_viot_header, viot, + max_t(size_t, sizeof(*viot), viot->node_offset)); + end = ACPI_ADD_PTR(struct acpi_viot_header, viot, viot->header.length); + hdr_end = ACPI_ADD_PTR(struct acpi_viot_header, hdr, sizeof(*hdr)); + + if (hdr < start || hdr_end > end) { + pr_err(FW_BUG "Node pointer overflows\n"); + return -EOVERFLOW; + } + if (hdr->length < sizeof(*hdr)) { + pr_err(FW_BUG "Empty node\n"); + return -EINVAL; + } + return 0; +} + +static int __init viot_get_pci_iommu_fwnode(struct viot_iommu *viommu, + u16 segment, u16 bdf) +{ + struct pci_dev *pdev; + struct fwnode_handle *fwnode; + + pdev = pci_get_domain_bus_and_slot(segment, PCI_BUS_NUM(bdf), + bdf & 0xff); + if (!pdev) { + pr_err("Could not find PCI IOMMU\n"); + return -ENODEV; + } + + fwnode = pdev->dev.fwnode; + if (!fwnode) { + /* + * PCI devices aren't necessarily described by ACPI. Create a + * fwnode so the IOMMU subsystem can identify this device. + */ + fwnode = acpi_alloc_fwnode_static(); + if (!fwnode) { + pci_dev_put(pdev); + return -ENOMEM; + } + set_primary_fwnode(&pdev->dev, fwnode); + } + viommu->fwnode = pdev->dev.fwnode; + pci_dev_put(pdev); + return 0; +} + +static int __init viot_get_mmio_iommu_fwnode(struct viot_iommu *viommu, + u64 address) +{ + struct acpi_device *adev; + struct resource res = { + .start = address, + .end = address, + .flags = IORESOURCE_MEM, + }; + + adev = acpi_resource_consumer(&res); + if (!adev) { + pr_err("Could not find MMIO IOMMU\n"); + return -EINVAL; + } + viommu->fwnode = &adev->fwnode; + return 0; +} + +static struct viot_iommu * __init viot_get_iommu(unsigned int offset) +{ + int ret; + struct viot_iommu *viommu; + struct acpi_viot_header *hdr = ACPI_ADD_PTR(struct acpi_viot_header, + viot, offset); + union { + struct acpi_viot_virtio_iommu_pci pci; + struct acpi_viot_virtio_iommu_mmio mmio; + } *node = (void *)hdr; + + list_for_each_entry(viommu, &viot_iommus, list) + if (viommu->offset == offset) + return viommu; + + if (viot_check_bounds(hdr)) + return NULL; + + viommu = kzalloc(sizeof(*viommu), GFP_KERNEL); + if (!viommu) + return NULL; + + viommu->offset = offset; + switch (hdr->type) { + case ACPI_VIOT_NODE_VIRTIO_IOMMU_PCI: + if (hdr->length < sizeof(node->pci)) + goto err_free; + + ret = viot_get_pci_iommu_fwnode(viommu, node->pci.segment, + node->pci.bdf); + break; + case ACPI_VIOT_NODE_VIRTIO_IOMMU_MMIO: + if (hdr->length < sizeof(node->mmio)) + goto err_free; + + ret = viot_get_mmio_iommu_fwnode(viommu, + node->mmio.base_address); + break; + default: + ret = -EINVAL; + } + if (ret) + goto err_free; + + list_add(&viommu->list, &viot_iommus); + return viommu; + +err_free: + kfree(viommu); + return NULL; +} + +static int __init viot_parse_node(const struct acpi_viot_header *hdr) +{ + int ret = -EINVAL; + struct list_head *list; + struct viot_endpoint *ep; + union { + struct acpi_viot_mmio mmio; + struct acpi_viot_pci_range pci; + } *node = (void *)hdr; + + if (viot_check_bounds(hdr)) + return -EINVAL; + + if (hdr->type == ACPI_VIOT_NODE_VIRTIO_IOMMU_PCI || + hdr->type == ACPI_VIOT_NODE_VIRTIO_IOMMU_MMIO) + return 0; + + ep = kzalloc(sizeof(*ep), GFP_KERNEL); + if (!ep) + return -ENOMEM; + + switch (hdr->type) { + case ACPI_VIOT_NODE_PCI_RANGE: + if (hdr->length < sizeof(node->pci)) { + pr_err(FW_BUG "Invalid PCI node size\n"); + goto err_free; + } + + ep->segment_start = node->pci.segment_start; + ep->segment_end = node->pci.segment_end; + ep->bdf_start = node->pci.bdf_start; + ep->bdf_end = node->pci.bdf_end; + ep->endpoint_id = node->pci.endpoint_start; + ep->viommu = viot_get_iommu(node->pci.output_node); + list = &viot_pci_ranges; + break; + case ACPI_VIOT_NODE_MMIO: + if (hdr->length < sizeof(node->mmio)) { + pr_err(FW_BUG "Invalid MMIO node size\n"); + goto err_free; + } + + ep->address = node->mmio.base_address; + ep->endpoint_id = node->mmio.endpoint; + ep->viommu = viot_get_iommu(node->mmio.output_node); + list = &viot_mmio_endpoints; + break; + default: + pr_warn("Unsupported node %x\n", hdr->type); + ret = 0; + goto err_free; + } + + /* + * To be compatible with future versions of the table which may include + * other node types, keep parsing. + */ + if (!ep->viommu) { + pr_warn("No IOMMU node found\n"); + ret = 0; + goto err_free; + } + + list_add(&ep->list, list); + return 0; + +err_free: + kfree(ep); + return ret; +} + +/** + * acpi_viot_init - Parse the VIOT table + * + * Parse the VIOT table, prepare the list of endpoints to be used during DMA + * setup of devices. + */ +void __init acpi_viot_init(void) +{ + int i; + acpi_status status; + struct acpi_table_header *hdr; + struct acpi_viot_header *node; + + status = acpi_get_table(ACPI_SIG_VIOT, 0, &hdr); + if (ACPI_FAILURE(status)) { + if (status != AE_NOT_FOUND) { + const char *msg = acpi_format_exception(status); + + pr_err("Failed to get table, %s\n", msg); + } + return; + } + + viot = (void *)hdr; + + node = ACPI_ADD_PTR(struct acpi_viot_header, viot, viot->node_offset); + for (i = 0; i < viot->node_count; i++) { + if (viot_parse_node(node)) + return; + + node = ACPI_ADD_PTR(struct acpi_viot_header, node, + node->length); + } +} + +static int viot_dev_iommu_init(struct device *dev, struct viot_iommu *viommu, + u32 epid) +{ + const struct iommu_ops *ops; + + if (!viommu) + return -ENODEV; + + /* We're not translating ourself */ + if (viommu->fwnode == dev->fwnode) + return -EINVAL; + + ops = iommu_ops_from_fwnode(viommu->fwnode); + if (!ops) + return IS_ENABLED(CONFIG_VIRTIO_IOMMU) ? + -EPROBE_DEFER : -ENODEV; + + return acpi_iommu_fwspec_init(dev, epid, viommu->fwnode, ops); +} + +static int viot_pci_dev_iommu_init(struct pci_dev *pdev, u16 dev_id, void *data) +{ + u32 epid; + struct viot_endpoint *ep; + u32 domain_nr = pci_domain_nr(pdev->bus); + + list_for_each_entry(ep, &viot_pci_ranges, list) { + if (domain_nr >= ep->segment_start && + domain_nr <= ep->segment_end && + dev_id >= ep->bdf_start && + dev_id <= ep->bdf_end) { + epid = ((domain_nr - ep->segment_start) << 16) + + dev_id - ep->bdf_start + ep->endpoint_id; + + /* + * If we found a PCI range managed by the viommu, we're + * the one that has to request ACS. + */ + pci_request_acs(); + + return viot_dev_iommu_init(&pdev->dev, ep->viommu, + epid); + } + } + return -ENODEV; +} + +static int viot_mmio_dev_iommu_init(struct platform_device *pdev) +{ + struct resource *mem; + struct viot_endpoint *ep; + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) + return -ENODEV; + + list_for_each_entry(ep, &viot_mmio_endpoints, list) { + if (ep->address == mem->start) + return viot_dev_iommu_init(&pdev->dev, ep->viommu, + ep->endpoint_id); + } + return -ENODEV; +} + +/** + * viot_iommu_configure - Setup IOMMU ops for an endpoint described by VIOT + * @dev: the endpoint + * + * Return: 0 on success, <0 on failure + */ +int viot_iommu_configure(struct device *dev) +{ + if (dev_is_pci(dev)) + return pci_for_each_dma_alias(to_pci_dev(dev), + viot_pci_dev_iommu_init, NULL); + else if (dev_is_platform(dev)) + return viot_mmio_dev_iommu_init(to_platform_device(dev)); + return -ENODEV; +} diff --git a/MAINTAINERS b/MAINTAINERS index b706dd20ff2b..8d71591f979a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -431,6 +431,14 @@ W: https://01.org/linux-acpi B: https://bugzilla.kernel.org F: drivers/acpi/acpi_video.c +ACPI VIOT DRIVER +M: Jean-Philippe Brucker <jean-philippe at linaro.org> +L: linux-acpi at vger.kernel.org +L: iommu at lists.linux-foundation.org +S: Maintained +F: drivers/acpi/viot.c +F: include/linux/acpi_viot.h + ACPI WMI DRIVER L: platform-driver-x86 at vger.kernel.org S: Orphan -- 2.31.1
Jean-Philippe Brucker
2021-Jun-10 07:51 UTC
[PATCH v4 4/6] iommu/dma: Pass address limit rather than size to iommu_setup_dma_ops()
Passing a 64-bit address width to iommu_setup_dma_ops() is valid on virtual platforms, but isn't currently possible. The overflow check in iommu_dma_init_domain() prevents this even when @dma_base isn't 0. Pass a limit address instead of a size, so callers don't have to fake a size to work around the check. Signed-off-by: Jean-Philippe Brucker <jean-philippe at linaro.org> --- include/linux/dma-iommu.h | 4 ++-- arch/arm64/mm/dma-mapping.c | 2 +- drivers/iommu/amd/iommu.c | 2 +- drivers/iommu/dma-iommu.c | 12 ++++++------ drivers/iommu/intel/iommu.c | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 6e75a2d689b4..758ca4694257 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -19,7 +19,7 @@ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); void iommu_put_dma_cookie(struct iommu_domain *domain); /* Setup call for arch DMA mapping code */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size); +void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); /* The DMA API isn't _quite_ the whole story, though... */ /* @@ -50,7 +50,7 @@ struct msi_msg; struct device; static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, - u64 size) + u64 dma_limit) { } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 4bf1dd3eb041..7bd1d2199141 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -50,7 +50,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, dev->dma_coherent = coherent; if (iommu) - iommu_setup_dma_ops(dev, dma_base, size); + iommu_setup_dma_ops(dev, dma_base, size - dma_base - 1); #ifdef CONFIG_XEN if (xen_swiotlb_detect()) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3ac42bbdefc6..94b96d81fcfd 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1713,7 +1713,7 @@ static void amd_iommu_probe_finalize(struct device *dev) /* Domains are initialized for this device - have a look what we ended up with */ domain = iommu_get_domain_for_dev(dev); if (domain->type == IOMMU_DOMAIN_DMA) - iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, 0); + iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, U64_MAX); else set_dma_ops(dev, NULL); } diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 7bcdd1205535..c62e19bed302 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -319,16 +319,16 @@ static bool dev_is_untrusted(struct device *dev) * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() * @base: IOVA at which the mappable address space starts - * @size: Size of IOVA space + * @limit: Last address of the IOVA space * @dev: Device the domain is being initialised for * - * @base and @size should be exact multiples of IOMMU page granularity to + * @base and @limit + 1 should be exact multiples of IOMMU page granularity to * avoid rounding surprises. If necessary, we reserve the page at address 0 * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but * any change which could make prior IOVAs invalid will fail. */ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, - u64 size, struct device *dev) + dma_addr_t limit, struct device *dev) { struct iommu_dma_cookie *cookie = domain->iova_cookie; unsigned long order, base_pfn; @@ -346,7 +346,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, /* Check the domain allows at least some access to the device... */ if (domain->geometry.force_aperture) { if (base > domain->geometry.aperture_end || - base + size <= domain->geometry.aperture_start) { + limit < domain->geometry.aperture_start) { pr_warn("specified DMA range outside IOMMU capability\n"); return -EFAULT; } @@ -1308,7 +1308,7 @@ static const struct dma_map_ops iommu_dma_ops = { * The IOMMU core code allocates the default DMA domain, which the underlying * IOMMU driver needs to support via the dma-iommu layer. */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size) +void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); @@ -1320,7 +1320,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size) * underlying IOMMU driver needs to support via the dma-iommu layer. */ if (domain->type == IOMMU_DOMAIN_DMA) { - if (iommu_dma_init_domain(domain, dma_base, size, dev)) + if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev)) goto out_err; dev->dma_ops = &iommu_dma_ops; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index be35284a2016..85f18342603c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5171,7 +5171,7 @@ static void intel_iommu_probe_finalize(struct device *dev) if (domain && domain->type == IOMMU_DOMAIN_DMA) iommu_setup_dma_ops(dev, base, - __DOMAIN_MAX_ADDR(dmar_domain->gaw) - base); + __DOMAIN_MAX_ADDR(dmar_domain->gaw)); else set_dma_ops(dev, NULL); } -- 2.31.1
Jean-Philippe Brucker
2021-Jun-10 07:51 UTC
[PATCH v4 5/6] iommu/dma: Simplify calls to iommu_setup_dma_ops()
dma-iommu uses the address bounds described in domain->geometry during IOVA allocation. The address size parameters of iommu_setup_dma_ops() are useful for describing additional limits set by the platform firmware, but aren't needed for drivers that call this function from probe_finalize(). The base parameter can be zero because dma-iommu already removes the first IOVA page, and the limit parameter can be U64_MAX because it's only checked against the domain geometry. Simplify calls to iommu_setup_dma_ops(). Signed-off-by: Jean-Philippe Brucker <jean-philippe at linaro.org> --- drivers/iommu/amd/iommu.c | 9 +-------- drivers/iommu/dma-iommu.c | 4 +++- drivers/iommu/intel/iommu.c | 10 +--------- 3 files changed, 5 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 94b96d81fcfd..d3123bc05c08 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1708,14 +1708,7 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) static void amd_iommu_probe_finalize(struct device *dev) { - struct iommu_domain *domain; - - /* Domains are initialized for this device - have a look what we ended up with */ - domain = iommu_get_domain_for_dev(dev); - if (domain->type == IOMMU_DOMAIN_DMA) - iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, U64_MAX); - else - set_dma_ops(dev, NULL); + iommu_setup_dma_ops(dev, 0, U64_MAX); } static void amd_iommu_release_device(struct device *dev) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index c62e19bed302..175f8eaeb5b3 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1322,7 +1322,9 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) if (domain->type == IOMMU_DOMAIN_DMA) { if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev)) goto out_err; - dev->dma_ops = &iommu_dma_ops; + set_dma_ops(dev, &iommu_dma_ops); + } else { + set_dma_ops(dev, NULL); } return; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 85f18342603c..8d866940692a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5165,15 +5165,7 @@ static void intel_iommu_release_device(struct device *dev) static void intel_iommu_probe_finalize(struct device *dev) { - dma_addr_t base = IOVA_START_PFN << VTD_PAGE_SHIFT; - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct dmar_domain *dmar_domain = to_dmar_domain(domain); - - if (domain && domain->type == IOMMU_DOMAIN_DMA) - iommu_setup_dma_ops(dev, base, - __DOMAIN_MAX_ADDR(dmar_domain->gaw)); - else - set_dma_ops(dev, NULL); + iommu_setup_dma_ops(dev, 0, U64_MAX); } static void intel_iommu_get_resv_regions(struct device *device, -- 2.31.1
Jean-Philippe Brucker
2021-Jun-10 07:51 UTC
[PATCH v4 6/6] iommu/virtio: Enable x86 support
With the VIOT support in place, x86 platforms can now use the virtio-iommu. Because the other x86 IOMMU drivers aren't yet ready to use the acpi_dma_setup() path, x86 doesn't implement arch_setup_dma_ops() at the moment. Similarly to Vt-d and AMD IOMMU, call iommu_setup_dma_ops() from probe_finalize(). Acked-by: Joerg Roedel <jroedel at suse.de> Acked-by: Michael S. Tsirkin <mst at redhat.com> Signed-off-by: Jean-Philippe Brucker <jean-philippe at linaro.org> --- drivers/iommu/Kconfig | 3 ++- drivers/iommu/dma-iommu.c | 1 + drivers/iommu/virtio-iommu.c | 8 ++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index aff8a4830dd1..07b7c25cbed8 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -400,8 +400,9 @@ config HYPERV_IOMMU config VIRTIO_IOMMU tristate "Virtio IOMMU driver" depends on VIRTIO - depends on ARM64 + depends on (ARM64 || X86) select IOMMU_API + select IOMMU_DMA select INTERVAL_TREE select ACPI_VIOT if ACPI help diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 175f8eaeb5b3..46ed43c400cf 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1332,6 +1332,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", dev_name(dev)); } +EXPORT_SYMBOL_GPL(iommu_setup_dma_ops); static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, phys_addr_t msi_addr, struct iommu_domain *domain) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 218fe8560e8d..77aee1207ced 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -1026,6 +1026,13 @@ static struct iommu_device *viommu_probe_device(struct device *dev) return ERR_PTR(ret); } +static void viommu_probe_finalize(struct device *dev) +{ +#ifndef CONFIG_ARCH_HAS_SETUP_DMA_OPS + iommu_setup_dma_ops(dev, 0, U64_MAX); +#endif +} + static void viommu_release_device(struct device *dev) { struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); @@ -1062,6 +1069,7 @@ static struct iommu_ops viommu_ops = { .iova_to_phys = viommu_iova_to_phys, .iotlb_sync = viommu_iotlb_sync, .probe_device = viommu_probe_device, + .probe_finalize = viommu_probe_finalize, .release_device = viommu_release_device, .device_group = viommu_device_group, .get_resv_regions = viommu_get_resv_regions, -- 2.31.1
Hi Rafael, On Thu, Jun 10, 2021 at 09:51:27AM +0200, Jean-Philippe Brucker wrote:> Add a driver for the ACPI VIOT table, which provides topology > information for para-virtual IOMMUs. Enable virtio-iommu on > non-devicetree platforms, including x86. > > Since v3 [1] I fixed a build bug for !CONFIG_IOMMU_API. Joerg offered to > take this series through the IOMMU tree, which requires Acks for patches > 1-3.I was wondering if you could take a look at patches 1-3, otherwise we'll miss the mark for 5.14 since I won't be able to resend next week. The series adds support for virtio-iommu on QEMU and cloud hypervisor. Thanks, Jean> > You can find a QEMU implementation at [2], with extra support for > testing all VIOT nodes including MMIO-based endpoints and IOMMU. > This series is at [3]. > > [1] https://lore.kernel.org/linux-iommu/20210602154444.1077006-1-jean-philippe at linaro.org/ > [2] https://jpbrucker.net/git/qemu/log/?h=virtio-iommu/acpi > [3] https://jpbrucker.net/git/linux/log/?h=virtio-iommu/acpi > > > Jean-Philippe Brucker (6): > ACPI: arm64: Move DMA setup operations out of IORT > ACPI: Move IOMMU setup code out of IORT > ACPI: Add driver for the VIOT table > iommu/dma: Pass address limit rather than size to > iommu_setup_dma_ops() > iommu/dma: Simplify calls to iommu_setup_dma_ops() > iommu/virtio: Enable x86 support > > drivers/acpi/Kconfig | 3 + > drivers/iommu/Kconfig | 4 +- > drivers/acpi/Makefile | 2 + > drivers/acpi/arm64/Makefile | 1 + > include/acpi/acpi_bus.h | 3 + > include/linux/acpi.h | 3 + > include/linux/acpi_iort.h | 14 +- > include/linux/acpi_viot.h | 19 ++ > include/linux/dma-iommu.h | 4 +- > arch/arm64/mm/dma-mapping.c | 2 +- > drivers/acpi/arm64/dma.c | 50 +++++ > drivers/acpi/arm64/iort.c | 129 ++----------- > drivers/acpi/bus.c | 2 + > drivers/acpi/scan.c | 78 +++++++- > drivers/acpi/viot.c | 364 +++++++++++++++++++++++++++++++++++ > drivers/iommu/amd/iommu.c | 9 +- > drivers/iommu/dma-iommu.c | 17 +- > drivers/iommu/intel/iommu.c | 10 +- > drivers/iommu/virtio-iommu.c | 8 + > MAINTAINERS | 8 + > 20 files changed, 580 insertions(+), 150 deletions(-) > create mode 100644 include/linux/acpi_viot.h > create mode 100644 drivers/acpi/arm64/dma.c > create mode 100644 drivers/acpi/viot.c > > -- > 2.31.1 >
Hi Jean, On 6/10/21 9:51 AM, Jean-Philippe Brucker wrote:> Add a driver for the ACPI VIOT table, which provides topology > information for para-virtual IOMMUs. Enable virtio-iommu on > non-devicetree platforms, including x86. > > Since v3 [1] I fixed a build bug for !CONFIG_IOMMU_API. Joerg offered to > take this series through the IOMMU tree, which requires Acks for patches > 1-3. > > You can find a QEMU implementation at [2], with extra support for > testing all VIOT nodes including MMIO-based endpoints and IOMMU. > This series is at [3]. > > [1] https://lore.kernel.org/linux-iommu/20210602154444.1077006-1-jean-philippe at linaro.org/ > [2] https://jpbrucker.net/git/qemu/log/?h=virtio-iommu/acpi > [3] https://jpbrucker.net/git/linux/log/?h=virtio-iommu/acpiI tested the series on both aarch64 and x86_64 with qemu. It works for me. Feel free to add my T-b. Tested-by: Eric Auger <eric.auger at redhat.com> Thanks Eric> > > Jean-Philippe Brucker (6): > ACPI: arm64: Move DMA setup operations out of IORT > ACPI: Move IOMMU setup code out of IORT > ACPI: Add driver for the VIOT table > iommu/dma: Pass address limit rather than size to > iommu_setup_dma_ops() > iommu/dma: Simplify calls to iommu_setup_dma_ops() > iommu/virtio: Enable x86 support > > drivers/acpi/Kconfig | 3 + > drivers/iommu/Kconfig | 4 +- > drivers/acpi/Makefile | 2 + > drivers/acpi/arm64/Makefile | 1 + > include/acpi/acpi_bus.h | 3 + > include/linux/acpi.h | 3 + > include/linux/acpi_iort.h | 14 +- > include/linux/acpi_viot.h | 19 ++ > include/linux/dma-iommu.h | 4 +- > arch/arm64/mm/dma-mapping.c | 2 +- > drivers/acpi/arm64/dma.c | 50 +++++ > drivers/acpi/arm64/iort.c | 129 ++----------- > drivers/acpi/bus.c | 2 + > drivers/acpi/scan.c | 78 +++++++- > drivers/acpi/viot.c | 364 +++++++++++++++++++++++++++++++++++ > drivers/iommu/amd/iommu.c | 9 +- > drivers/iommu/dma-iommu.c | 17 +- > drivers/iommu/intel/iommu.c | 10 +- > drivers/iommu/virtio-iommu.c | 8 + > MAINTAINERS | 8 + > 20 files changed, 580 insertions(+), 150 deletions(-) > create mode 100644 include/linux/acpi_viot.h > create mode 100644 drivers/acpi/arm64/dma.c > create mode 100644 drivers/acpi/viot.c >