The proposal about virtio-iommu support page tables is being discussed in the virtio-comment mailing list[1]. This patch-set based on Jean's virtio-iommu/pgtables branch[2] tries to follow the proposal and add the basic VT-d IO page table support to virtio-iommu. On Intel platform with VT-d nested translation enabled, there are two main benefits for enabling virtual IOMMU support VT-d IO page table: 1) Allowing vSVM (aka vSVA) usage. Virtual Shared Virtual Addressing (vSVA) allows the virtual processor and virtual device to use the same virtual addresses. 2) Accelerating DMA buffer map operation for vIOVA usage by removing the context switch on DMA buffer map operation. (Note: this patch-set doesn't include the whole patch-set for enabling vSVM on virtio-iommu, only includes the part for vIOVA case. However, the vSVM enabling patch-set needs to base on this patch-set.) There are three changes in this patch-set: 1) The first patch is a bug fixing patch that tries to resolve an issue about IOTLB invalidation request with incorrect page size. 2) The next 3 patches are about adding generic IO page table support to VT-d driver. 3) The last one introduces the VT-d page format table to virtio-iommu driver. The patch-set is also available at github: https://github.com/TinaZhangZW/linux/tree/vt-d-pgtable The QEMU part is available here: https://github.com/TinaZhangZW/qemu/tree/virtio-iommu/vt-d-pgtable [1]:https://lists.oasis-open.org/archives/virtio-comment/202310/msg00018.html [2]:https://jpbrucker.net/git/linux/log/?h=virtio-iommu/pgtables Tina Zhang (5): iommu/virtio-iommu: Correct the values of granule and nr_pages iommu/vt-d: Add generic IO page table support iommu/io-pgtable: Introduce struct vtd_cfg iommu/vt-d: Adapt alloc_pgtable interface to be used by others iommu/virtio-iommu: Support attaching VT-d IO pgtable drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/iommu.c | 157 ++++++++++++++++++++++++++++++ drivers/iommu/intel/iommu.h | 7 ++ drivers/iommu/io-pgtable.c | 3 + drivers/iommu/virtio-iommu.c | 27 ++++- include/linux/io-pgtable.h | 7 ++ include/uapi/linux/virtio_iommu.h | 26 +++++ 7 files changed, 226 insertions(+), 2 deletions(-) -- 2.39.3
Tina Zhang
2023-Nov-06 07:12 UTC
[RFC PATCH 1/5] iommu/virtio-iommu: Correct the values of granule and nr_pages
The value of granule is ilog2(pgsize). When the value of pgsize isn't a power of two, granule would make pgsize less than the actual size of pgsize. E.g., if pgsize = 0x6000 and granule = ilog2(gather->pgsize), then granule = 0xe. 2^0xe = 0x4000 makes the pgsize (0x4000) smaller than the actual pgsize (0x6000). Invalidating IOTLB with smaller range would lead to cache incoherence. So, roundup pgsize value to the nearest power of 2 to make sure the granule won't make pgsize less than the actual size. The value of "gather->end - gather->start + 1" also needs similar adjustment. Signed-off-by: Tina Zhang <tina.zhang at intel.com> --- drivers/iommu/virtio-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 08e310672e57..b1ceaac974e2 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -1289,8 +1289,8 @@ static void viommu_iotlb_sync(struct iommu_domain *domain, if (!gather->pgsize) return; - granule = ilog2(gather->pgsize); - nr_pages = (gather->end - gather->start + 1) >> granule; + granule = ilog2(__roundup_pow_of_two(gather->pgsize)); + nr_pages = __roundup_pow_of_two(gather->end - gather->start + 1) >> granule; req = (struct virtio_iommu_req_invalidate) { .head.type = VIRTIO_IOMMU_T_INVALIDATE, .inv_gran = cpu_to_le16(VIRTIO_IOMMU_INVAL_G_VA), -- 2.39.3
Tina Zhang
2023-Nov-06 07:12 UTC
[RFC PATCH 2/5] iommu/vt-d: Add generic IO page table support
Add basic hook up code to implement generic IO page table framework. Signed-off-by: Tina Zhang <tina.zhang at intel.com> --- drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/iommu.c | 94 +++++++++++++++++++++++++++++++++++++ drivers/iommu/intel/iommu.h | 7 +++ drivers/iommu/io-pgtable.c | 3 ++ include/linux/io-pgtable.h | 2 + 5 files changed, 107 insertions(+) diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 2e56bd79f589..8334e7e50e69 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -15,6 +15,7 @@ config INTEL_IOMMU select DMA_OPS select IOMMU_API select IOMMU_IOVA + select IOMMU_IO_PGTABLE select NEED_DMA_MAP_STATE select DMAR_TABLE select SWIOTLB diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index dbcdf7b95b9f..80bd1993861c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -23,6 +23,7 @@ #include <linux/syscore_ops.h> #include <linux/tboot.h> #include <uapi/linux/iommufd.h> +#include <linux/io-pgtable.h> #include "iommu.h" #include "../dma-iommu.h" @@ -67,6 +68,20 @@ #define LEVEL_STRIDE (9) #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) +#define io_pgtable_cfg_to_dmar_pgtable(x) \ + container_of((x), struct dmar_io_pgtable, pgtbl_cfg) + +#define io_pgtable_to_dmar_pgtable(x) \ + container_of((x), struct dmar_io_pgtable, iop) + +#define io_pgtable_to_dmar_domain(x) \ + container_of(io_pgtable_to_dmar_pgtable(x), \ + struct dmar_domain, dmar_iop) + +#define io_pgtable_ops_to_dmar_domain(x) \ + container_of(io_pgtable_to_dmar_pgtable(io_pgtable_ops_to_pgtable(x)), \ + struct dmar_domain, dmar_iop) + static inline int agaw_to_level(int agaw) { return agaw + 2; @@ -5171,3 +5186,82 @@ int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob) return ret; } + +static void flush_all(void *cookie) +{ +} + +static void flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) +{ +} + +static const struct iommu_flush_ops flush_ops = { + .tlb_flush_all = flush_all, + .tlb_flush_walk = flush_walk, + .tlb_add_page = add_page, +}; + +static void free_pgtable(struct io_pgtable *iop) +{ + struct dmar_domain *dmar_domain = io_pgtable_to_dmar_domain(iop); + + if (dmar_domain->pgd) { + LIST_HEAD(freelist); + + domain_unmap(dmar_domain, 0, DOMAIN_MAX_PFN(dmar_domain->gaw), &freelist); + put_pages_list(&freelist); + } +} + +static int pgtable_map_pages(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int iommu_prot, gfp_t gfp, size_t *mapped) +{ + struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops); + + return intel_iommu_map_pages(&dmar_domain->domain, iova, paddr, pgsize, + pgcount, iommu_prot, gfp, mapped); +} + +static size_t pgtable_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) +{ + struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops); + + return intel_iommu_unmap_pages(&dmar_domain->domain, iova, pgsize, + pgcount, gather); +} + +static phys_addr_t pgtable_iova_to_phys(struct io_pgtable_ops *ops, + unsigned long iova) +{ + struct dmar_domain *dmar_domain = io_pgtable_ops_to_dmar_domain(ops); + + return intel_iommu_iova_to_phys(&dmar_domain->domain, iova); +} + +static struct io_pgtable *alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct dmar_io_pgtable *pgtable = io_pgtable_cfg_to_dmar_pgtable(cfg); + + pgtable->iop.ops.map_pages = pgtable_map_pages; + pgtable->iop.ops.unmap_pages = pgtable_unmap_pages; + pgtable->iop.ops.iova_to_phys = pgtable_iova_to_phys; + + cfg->tlb = &flush_ops; + + return &pgtable->iop; +} + +struct io_pgtable_init_fns io_pgtable_intel_iommu_init_fns = { + .alloc = alloc_pgtable, + .free = free_pgtable, +}; diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 8d0aac71c135..5207fea6477a 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -18,6 +18,7 @@ #include <linux/list.h> #include <linux/iommu.h> #include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/io-pgtable.h> #include <linux/dmar.h> #include <linux/bitfield.h> #include <linux/xarray.h> @@ -579,6 +580,11 @@ struct iommu_domain_info { * to VT-d spec, section 9.3 */ }; +struct dmar_io_pgtable { + struct io_pgtable_cfg pgtbl_cfg; + struct io_pgtable iop; +}; + struct dmar_domain { int nid; /* node id */ struct xarray iommu_array; /* Attached IOMMU array */ @@ -633,6 +639,7 @@ struct dmar_domain { struct iommu_domain domain; /* generic domain data structure for iommu core */ + struct dmar_io_pgtable dmar_iop; }; /* diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index 5755dee96a68..533b27557290 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -35,6 +35,9 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { #ifdef CONFIG_IOMMU_IO_PGTABLE_VIRT [VIRT_IO_PGTABLE] = &io_pgtable_virt_init_fns, #endif +#ifdef CONFIG_INTEL_IOMMU + [INTEL_IOMMU] = &io_pgtable_intel_iommu_init_fns, +#endif }; struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index bdcade2c4844..b2857c18f963 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -20,6 +20,7 @@ enum io_pgtable_fmt { APPLE_DART, APPLE_DART2, VIRT_IO_PGTABLE, + INTEL_IOMMU, IO_PGTABLE_NUM_FMTS, }; @@ -281,5 +282,6 @@ extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns; extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns; extern struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns; extern struct io_pgtable_init_fns io_pgtable_virt_init_fns; +extern struct io_pgtable_init_fns io_pgtable_intel_iommu_init_fns; #endif /* __IO_PGTABLE_H */ -- 2.39.3
Tina Zhang
2023-Nov-06 07:12 UTC
[RFC PATCH 3/5] iommu/io-pgtable: Introduce struct vtd_cfg
VT-d hardware cap/ecap information is needed for driver to generate VT-d format IO page table. Add struct vtd_cfg to keep the info. Signed-off-by: Tina Zhang <tina.zhang at intel.com> --- include/linux/io-pgtable.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index b2857c18f963..ae6a2e44b027 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -147,6 +147,11 @@ struct io_pgtable_cfg { u32 n_ttbrs; } apple_dart_cfg; + struct { + u64 cap_reg; + u64 ecap_reg; + } vtd_cfg; + struct { dma_addr_t pgd; } virt; -- 2.39.3
Tina Zhang
2023-Nov-06 07:12 UTC
[RFC PATCH 4/5] iommu/vt-d: Adapt alloc_pgtable interface to be used by others
The generic IO page table framework provides a set of interfaces for invoking IO page table operations. Other entity (e.g., virtio-iommu driver) can use the interface to ask VT-d driver to generate a VT-d format IO page table. This patch adds the support. Signed-off-by: Tina Zhang <tina.zhang at intel.com> --- drivers/iommu/intel/iommu.c | 69 +++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 80bd1993861c..d714e780a031 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5248,17 +5248,80 @@ static phys_addr_t pgtable_iova_to_phys(struct io_pgtable_ops *ops, return intel_iommu_iova_to_phys(&dmar_domain->domain, iova); } +static void __iommu_calculate_cfg(struct io_pgtable_cfg *cfg) +{ + unsigned long fl_sagaw, sl_sagaw, sagaw; + int agaw, addr_width; + + fl_sagaw = BIT(2) | (cap_fl5lp_support(cfg->vtd_cfg.cap_reg) ? BIT(3) : 0); + sl_sagaw = cap_sagaw(cfg->vtd_cfg.cap_reg); + sagaw = fl_sagaw & sl_sagaw; + + for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); agaw >= 0; agaw--) { + if (test_bit(agaw, &sagaw)) + break; + } + + addr_width = agaw_to_width(agaw); + if (cfg->ias > addr_width) + cfg->ias = addr_width; + if (cfg->oas != addr_width) + cfg->oas = addr_width; +} + static struct io_pgtable *alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) { - struct dmar_io_pgtable *pgtable = io_pgtable_cfg_to_dmar_pgtable(cfg); + struct dmar_io_pgtable *pgtable; + struct dmar_domain *domain; + int adjust_width; + + /* Platform must have nested translation support */ + if (!ecap_nest(cfg->vtd_cfg.ecap_reg)) + return NULL; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return NULL; + + domain->nid = NUMA_NO_NODE; + domain->use_first_level = true; + domain->has_iotlb_device = false; + INIT_LIST_HEAD(&domain->devices); + spin_lock_init(&domain->lock); + xa_init(&domain->iommu_array); + + /* calculate AGAW */ + __iommu_calculate_cfg(cfg); + domain->gaw = cfg->ias; + adjust_width = guestwidth_to_adjustwidth(domain->gaw); + domain->agaw = width_to_agaw(adjust_width); + + domain->iommu_coherency = ecap_smpwc(cfg->vtd_cfg.ecap_reg); + domain->force_snooping = true; + domain->iommu_superpage = cap_fl1gp_support(cfg->vtd_cfg.ecap_reg) ? 2 : 1; + domain->max_addr = 0; + + cfg->coherent_walk = domain->iommu_coherency; + + pgtable = &domain->dmar_iop; + /* always allocate the top pgd */ + domain->pgd = alloc_pgtable_page(domain->nid, GFP_KERNEL); + if (!domain->pgd) + goto out_free_domain; + domain_flush_cache(domain, domain->pgd, PAGE_SIZE); + + cfg->virt.pgd = virt_to_phys(domain->pgd); + cfg->tlb = &flush_ops; pgtable->iop.ops.map_pages = pgtable_map_pages; pgtable->iop.ops.unmap_pages = pgtable_unmap_pages; pgtable->iop.ops.iova_to_phys = pgtable_iova_to_phys; - cfg->tlb = &flush_ops; - return &pgtable->iop; + +out_free_domain: + kfree(domain); + return NULL; } struct io_pgtable_init_fns io_pgtable_intel_iommu_init_fns = { -- 2.39.3
Tina Zhang
2023-Nov-06 07:12 UTC
[RFC PATCH 5/5] iommu/virtio-iommu: Support attaching VT-d IO pgtable
Add VT-d IO page table support to ATTACH_TABLE request. Signed-off-by: Tina Zhang <tina.zhang at intel.com> --- drivers/iommu/virtio-iommu.c | 23 +++++++++++++++++++++++ include/uapi/linux/virtio_iommu.h | 26 ++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index b1ceaac974e2..b02eeb1d27a4 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -991,12 +991,25 @@ static int viommu_attach_pgtable(struct viommu_domain *vdomain, }; /* TODO: bypass flag? */ + if (vdomain->bypass == true) + return 0; switch (fmt) { case VIRT_IO_PGTABLE: req.format = cpu_to_le16(VIRTIO_IOMMU_FORMAT_PGTF_VIRT); req.pgd = cpu_to_le64((u64)cfg->virt.pgd); break; + case INTEL_IOMMU: { + struct virtio_iommu_req_attach_pgt_vtd *vtd_req + (struct virtio_iommu_req_attach_pgt_vtd *)&req; + + vtd_req->format = cpu_to_le16(VIRTIO_IOMMU_FORMAT_PGTF_VTD); + vtd_req->pgd = cpu_to_le64((u64)cfg->virt.pgd); + vtd_req->addr_width = cpu_to_le32(cfg->oas); + vtd_req->pasid = IOMMU_NO_PASID; + break; + } + default: return -EINVAL; }; @@ -1034,6 +1047,16 @@ static int viommu_setup_pgtable(struct viommu_domain *vdomain, case VIRTIO_IOMMU_FORMAT_PGTF_VIRT: fmt = VIRT_IO_PGTABLE; break; + case VIRTIO_IOMMU_FORMAT_PGTF_VTD: + { + struct virtio_iommu_probe_pgt_vtd *vtd_desc + (struct virtio_iommu_probe_pgt_vtd *)desc; + + cfg.vtd_cfg.cap_reg = le64_to_cpu(vtd_desc->cap_reg); + cfg.vtd_cfg.ecap_reg = le64_to_cpu(vtd_desc->ecap_reg); + fmt = INTEL_IOMMU; + break; + } default: dev_warn(vdev->dev, "unsupported page table format 0x%x\n", le16_to_cpu(desc->format)); diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h index 656be1f3d926..17e0d5fcdd54 100644 --- a/include/uapi/linux/virtio_iommu.h +++ b/include/uapi/linux/virtio_iommu.h @@ -139,6 +139,22 @@ struct virtio_iommu_req_attach_pgt_virt { struct virtio_iommu_req_tail tail; }; +/* Vt-d I/O Page Table Descriptor */ +struct virtio_iommu_req_attach_pgt_vtd { + struct virtio_iommu_req_head head; + __le32 domain; + __le32 endpoint; + __le32 flags; + __le16 format; + __u8 reserved[2]; + __le32 pasid; + __le64 pgd; + __le64 fl_flags; + __le32 addr_width; + __u8 reserved2[36]; + struct virtio_iommu_req_tail tail; +}; + #define VIRTIO_IOMMU_MAP_F_READ (1 << 0) #define VIRTIO_IOMMU_MAP_F_WRITE (1 << 1) #define VIRTIO_IOMMU_MAP_F_MMIO (1 << 2) @@ -224,6 +240,8 @@ struct virtio_iommu_probe_pasid_size { #define VIRTIO_IOMMU_FORMAT_PSTF_ARM_SMMU_V3 2 /* Virt I/O page table format */ #define VIRTIO_IOMMU_FORMAT_PGTF_VIRT 3 +/* VT-d I/O page table format */ +#define VIRTIO_IOMMU_FORMAT_PGTF_VTD 4 struct virtio_iommu_probe_table_format { struct virtio_iommu_probe_property head; @@ -231,6 +249,14 @@ struct virtio_iommu_probe_table_format { __u8 reserved[2]; }; +struct virtio_iommu_probe_pgt_vtd { + struct virtio_iommu_probe_property head; + __le16 format; + __u8 reserved[2]; + __le64 cap_reg; + __le64 ecap_reg; +}; + struct virtio_iommu_req_probe { struct virtio_iommu_req_head head; __le32 endpoint; -- 2.39.3