Dongli Zhang
2021-Feb-03 23:37 UTC
[Nouveau] [PATCH RFC v1 5/6] xen-swiotlb: convert variables to arrays
This patch converts several xen-swiotlb related variables to arrays, in order to maintain stat/status for different swiotlb buffers. Here are variables involved: - xen_io_tlb_start and xen_io_tlb_end - xen_io_tlb_nslabs - MAX_DMA_BITS There is no functional change and this is to prepare to enable 64-bit xen-swiotlb. Cc: Joe Jin <joe.jin at oracle.com> Signed-off-by: Dongli Zhang <dongli.zhang at oracle.com> --- drivers/xen/swiotlb-xen.c | 75 +++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 662638093542..e18cae693cdc 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -39,15 +39,17 @@ #include <asm/xen/page-coherent.h> #include <trace/events/swiotlb.h> -#define MAX_DMA_BITS 32 /* * Used to do a quick range check in swiotlb_tbl_unmap_single and * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this * API. */ -static char *xen_io_tlb_start, *xen_io_tlb_end; -static unsigned long xen_io_tlb_nslabs; +static char *xen_io_tlb_start[SWIOTLB_MAX], *xen_io_tlb_end[SWIOTLB_MAX]; +static unsigned long xen_io_tlb_nslabs[SWIOTLB_MAX]; + +static int max_dma_bits[] = {32, 64}; + /* * Quick lookup value of the bus address of the IOTLB. */ @@ -112,8 +114,8 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr) * in our domain. Therefore _only_ check address within our domain. */ if (pfn_valid(PFN_DOWN(paddr))) { - return paddr >= virt_to_phys(xen_io_tlb_start) && - paddr < virt_to_phys(xen_io_tlb_end); + return paddr >= virt_to_phys(xen_io_tlb_start[SWIOTLB_LO]) && + paddr < virt_to_phys(xen_io_tlb_end[SWIOTLB_LO]); } return 0; } @@ -137,7 +139,7 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs) p + (i << IO_TLB_SHIFT), get_order(slabs << IO_TLB_SHIFT), dma_bits, &dma_handle); - } while (rc && dma_bits++ < MAX_DMA_BITS); + } while (rc && dma_bits++ < max_dma_bits[SWIOTLB_LO]); if (rc) return rc; @@ -148,12 +150,13 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs) static unsigned long xen_set_nslabs(unsigned long nr_tbl) { if (!nr_tbl) { - xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT); - xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE); + xen_io_tlb_nslabs[SWIOTLB_LO] = (64 * 1024 * 1024 >> IO_TLB_SHIFT); + xen_io_tlb_nslabs[SWIOTLB_LO] = ALIGN(xen_io_tlb_nslabs[SWIOTLB_LO], + IO_TLB_SEGSIZE); } else - xen_io_tlb_nslabs = nr_tbl; + xen_io_tlb_nslabs[SWIOTLB_LO] = nr_tbl; - return xen_io_tlb_nslabs << IO_TLB_SHIFT; + return xen_io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT; } enum xen_swiotlb_err { @@ -184,16 +187,16 @@ int __ref xen_swiotlb_init(int verbose, bool early) enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN; unsigned int repeat = 3; - xen_io_tlb_nslabs = swiotlb_nr_tbl(SWIOTLB_LO); + xen_io_tlb_nslabs[SWIOTLB_LO] = swiotlb_nr_tbl(SWIOTLB_LO); retry: - bytes = xen_set_nslabs(xen_io_tlb_nslabs); - order = get_order(xen_io_tlb_nslabs << IO_TLB_SHIFT); + bytes = xen_set_nslabs(xen_io_tlb_nslabs[SWIOTLB_LO]); + order = get_order(xen_io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT); /* * IO TLB memory already allocated. Just use it. */ if (io_tlb_start[SWIOTLB_LO] != 0) { - xen_io_tlb_start = phys_to_virt(io_tlb_start[SWIOTLB_LO]); + xen_io_tlb_start[SWIOTLB_LO] = phys_to_virt(io_tlb_start[SWIOTLB_LO]); goto end; } @@ -201,76 +204,78 @@ int __ref xen_swiotlb_init(int verbose, bool early) * Get IO TLB memory from any location. */ if (early) { - xen_io_tlb_start = memblock_alloc(PAGE_ALIGN(bytes), + xen_io_tlb_start[SWIOTLB_LO] = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE); - if (!xen_io_tlb_start) + if (!xen_io_tlb_start[SWIOTLB_LO]) panic("%s: Failed to allocate %lu bytes align=0x%lx\n", __func__, PAGE_ALIGN(bytes), PAGE_SIZE); } else { #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - xen_io_tlb_start = (void *)xen_get_swiotlb_free_pages(order); - if (xen_io_tlb_start) + xen_io_tlb_start[SWIOTLB_LO] = (void *)xen_get_swiotlb_free_pages(order); + if (xen_io_tlb_start[SWIOTLB_LO]) break; order--; } if (order != get_order(bytes)) { pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n", (PAGE_SIZE << order) >> 20); - xen_io_tlb_nslabs = SLABS_PER_PAGE << order; - bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; + xen_io_tlb_nslabs[SWIOTLB_LO] = SLABS_PER_PAGE << order; + bytes = xen_io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT; } } - if (!xen_io_tlb_start) { + if (!xen_io_tlb_start[SWIOTLB_LO]) { m_ret = XEN_SWIOTLB_ENOMEM; goto error; } /* * And replace that memory with pages under 4GB. */ - rc = xen_swiotlb_fixup(xen_io_tlb_start, + rc = xen_swiotlb_fixup(xen_io_tlb_start[SWIOTLB_LO], bytes, - xen_io_tlb_nslabs); + xen_io_tlb_nslabs[SWIOTLB_LO]); if (rc) { if (early) - memblock_free(__pa(xen_io_tlb_start), + memblock_free(__pa(xen_io_tlb_start[SWIOTLB_LO]), PAGE_ALIGN(bytes)); else { - free_pages((unsigned long)xen_io_tlb_start, order); - xen_io_tlb_start = NULL; + free_pages((unsigned long)xen_io_tlb_start[SWIOTLB_LO], order); + xen_io_tlb_start[SWIOTLB_LO] = NULL; } m_ret = XEN_SWIOTLB_EFIXUP; goto error; } if (early) { - if (swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, + if (swiotlb_init_with_tbl(xen_io_tlb_start[SWIOTLB_LO], + xen_io_tlb_nslabs[SWIOTLB_LO], SWIOTLB_LO, verbose)) panic("Cannot allocate SWIOTLB buffer"); rc = 0; } else - rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, - xen_io_tlb_nslabs, SWIOTLB_LO); + rc = swiotlb_late_init_with_tbl(xen_io_tlb_start[SWIOTLB_LO], + xen_io_tlb_nslabs[SWIOTLB_LO], + SWIOTLB_LO); end: - xen_io_tlb_end = xen_io_tlb_start + bytes; + xen_io_tlb_end[SWIOTLB_LO] = xen_io_tlb_start[SWIOTLB_LO] + bytes; if (!rc) swiotlb_set_max_segment(PAGE_SIZE, SWIOTLB_LO); return rc; error: if (repeat--) { - xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */ - (xen_io_tlb_nslabs >> 1)); + xen_io_tlb_nslabs[SWIOTLB_LO] = max(1024UL, /* Min is 2MB */ + (xen_io_tlb_nslabs[SWIOTLB_LO] >> 1)); pr_info("Lowering to %luMB\n", - (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20); + (xen_io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT) >> 20); goto retry; } pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc); if (early) panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc); else - free_pages((unsigned long)xen_io_tlb_start, order); + free_pages((unsigned long)xen_io_tlb_start[SWIOTLB_LO], order); return rc; } @@ -561,7 +566,7 @@ xen_swiotlb_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, static int xen_swiotlb_dma_supported(struct device *hwdev, u64 mask) { - return xen_virt_to_bus(hwdev, xen_io_tlb_end - 1) <= mask; + return xen_virt_to_bus(hwdev, xen_io_tlb_end[SWIOTLB_LO] - 1) <= mask; } const struct dma_map_ops xen_swiotlb_dma_ops = { -- 2.17.1
Christoph Hellwig
2021-Feb-04 08:40 UTC
[Nouveau] [PATCH RFC v1 5/6] xen-swiotlb: convert variables to arrays
So one thing that has been on my mind for a while: I'd really like to kill the separate dma ops in Xen swiotlb. If we compare xen-swiotlb to swiotlb the main difference seems to be: - additional reasons to bounce I/O vs the plain DMA capable - the possibility to do a hypercall on arm/arm64 - an extra translation layer before doing the phys_to_dma and vice versa - an special memory allocator I wonder if inbetween a few jump labels or other no overhead enablement options and possibly better use of the dma_range_map we could kill off most of swiotlb-xen instead of maintaining all this code duplication?