Dongli Zhang
2021-Feb-03  23:37 UTC
[Nouveau] [PATCH RFC v1 0/6] swiotlb: 64-bit DMA buffer
This RFC is to introduce the 2nd swiotlb buffer for 64-bit DMA access.  The
prototype is based on v5.11-rc6.
The state of the art swiotlb pre-allocates <=32-bit memory in order to meet
the DMA mask requirement for some 32-bit legacy device. Considering most
devices nowadays support 64-bit DMA and IOMMU is available, the swiotlb is
not used for most of times, except:
1. The xen PVM domain requires the DMA addresses to both (1) <= the device
dma mask, and (2) continuous in machine address. Therefore, the 64-bit
device may still require swiotlb on PVM domain.
2. From source code the AMD SME/SEV will enable SWIOTLB_FORCE. As a result
it is always required to allocate from swiotlb buffer even the device dma
mask is 64-bit.
sme_early_init()
-> if (sev_active())
       swiotlb_force = SWIOTLB_FORCE;
Therefore, this RFC introduces the 2nd swiotlb buffer for 64-bit DMA
access.  For instance, the swiotlb_tbl_map_single() allocates from the 2nd
64-bit buffer if the device DMA mask min_not_zero(*hwdev->dma_mask,
hwdev->bus_dma_limit) is 64-bit.  With the RFC, the Xen/AMD will be able to
allocate >4GB swiotlb buffer.
With it being 64-bit, you can (not in this patch set but certainly
possible) allocate this at runtime. Meaning the size could change depending
on the device MMIO buffers, etc.
I have tested the patch set on Xen PVM dom0 boot via QEMU. The dom0 is boot
via:
qemu-system-x86_64 -smp 8 -m 20G -enable-kvm -vnc :9 \
-net nic -net user,hostfwd=tcp::5029-:22 \
-hda disk.img \
-device nvme,drive=nvme0,serial=deudbeaf1,max_ioqpairs=16 \
-drive file=test.qcow2,if=none,id=nvme0 \
-serial stdio
The "swiotlb=65536,1048576,force" is to configure 32-bit swiotlb as
128MB
and 64-bit swiotlb as 2048MB. The swiotlb is enforced.
vm# cat /proc/cmdline 
placeholder root=UUID=4e942d60-c228-4caf-b98e-f41c365d9703 ro text
swiotlb=65536,1048576,force quiet splash
[    5.119877] Booting paravirtualized kernel on Xen
... ...
[    5.190423] software IO TLB: Low Mem mapped [mem
0x0000000234e00000-0x000000023ce00000] (128MB)
[    6.276161] software IO TLB: High Mem mapped [mem
0x0000000166f33000-0x00000001e6f33000] (2048MB)
0x0000000234e00000 is mapped to 0x00000000001c0000 (32-bit machine address)
0x000000023ce00000-1 is mapped to 0x000000000ff3ffff (32-bit machine address)
0x0000000166f33000 is mapped to 0x00000004b7280000 (64-bit machine address)
0x00000001e6f33000-1 is mapped to 0x000000033a07ffff (64-bit machine address)
While running fio for emulated-NVMe, the swiotlb is allocating from 64-bit
io_tlb_used-highmem.
vm# cat /sys/kernel/debug/swiotlb/io_tlb_nslabs
65536
vm# cat /sys/kernel/debug/swiotlb/io_tlb_used
258
vm# cat /sys/kernel/debug/swiotlb/io_tlb_nslabs-highmem
1048576
vm# cat /sys/kernel/debug/swiotlb/io_tlb_used-highmem
58880
I also tested virtio-scsi (with
"disable-legacy=on,iommu_platform=true") on
VM with AMD SEV enabled.
qemu-system-x86_64 -enable-kvm -machine q35 -smp 36 -m 20G \
-drive if=pflash,format=raw,unit=0,file=OVMF_CODE.pure-efi.fd,readonly \
-drive if=pflash,format=raw,unit=1,file=OVMF_VARS.fd \
-hda ol7-uefi.qcow2 -serial stdio -vnc :9 \
-net nic -net user,hostfwd=tcp::5029-:22 \
-cpu EPYC -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1 \
-machine memory-encryption=sev0 \
-device virtio-scsi-pci,id=scsi,disable-legacy=on,iommu_platform=true \
-device scsi-hd,drive=disk0 \
-drive file=test.qcow2,if=none,id=disk0,format=qcow2
The "swiotlb=65536,1048576" is to configure 32-bit swiotlb as 128MB
and
64-bit swiotlb as 2048MB. We do not need to force swiotlb because AMD SEV
will set SWIOTLB_FORCE.
# cat /proc/cmdline
BOOT_IMAGE=/vmlinuz-5.11.0-rc6swiotlb+ root=/dev/mapper/ol-root ro
crashkernel=auto rd.lvm.lv=ol/root rd.lvm.lv=ol/swap rhgb quiet
LANG=en_US.UTF-8 swiotlb=65536,1048576
[    0.729790] AMD Memory Encryption Features active: SEV
... ...
[    2.113147] software IO TLB: Low Mem mapped [mem
0x0000000073e1e000-0x000000007be1e000] (128MB)
[    2.113151] software IO TLB: High Mem mapped [mem
0x00000004e8400000-0x0000000568400000] (2048MB)
While running fio for virtio-scsi, the swiotlb is allocating from 64-bit
io_tlb_used-highmem.
vm# cat /sys/kernel/debug/swiotlb/io_tlb_nslabs
65536
vm# cat /sys/kernel/debug/swiotlb/io_tlb_used
0
vm# cat /sys/kernel/debug/swiotlb/io_tlb_nslabs-highmem
1048576
vm# cat /sys/kernel/debug/swiotlb/io_tlb_used-highmem
64647
Please let me know if there is any feedback for this idea and RFC.
Dongli Zhang (6):
   swiotlb: define new enumerated type
   swiotlb: convert variables to arrays
   swiotlb: introduce swiotlb_get_type() to calculate swiotlb buffer type
   swiotlb: enable 64-bit swiotlb
   xen-swiotlb: convert variables to arrays
   xen-swiotlb: enable 64-bit xen-swiotlb
 arch/mips/cavium-octeon/dma-octeon.c         |   3 +-
 arch/powerpc/kernel/dma-swiotlb.c            |   2 +-
 arch/powerpc/platforms/pseries/svm.c         |   8 +-
 arch/x86/kernel/pci-swiotlb.c                |   5 +-
 arch/x86/pci/sta2x11-fixup.c                 |   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_internal.c |   4 +-
 drivers/gpu/drm/i915/i915_scatterlist.h      |   2 +-
 drivers/gpu/drm/nouveau/nouveau_ttm.c        |   2 +-
 drivers/mmc/host/sdhci.c                     |   2 +-
 drivers/pci/xen-pcifront.c                   |   2 +-
 drivers/xen/swiotlb-xen.c                    | 123 ++++---
 include/linux/swiotlb.h                      |  49 ++-
 kernel/dma/swiotlb.c                         | 382 +++++++++++++---------
 13 files changed, 363 insertions(+), 223 deletions(-)
Thank you very much!
Dongli Zhang
Dongli Zhang
2021-Feb-03  23:37 UTC
[Nouveau] [PATCH RFC v1 1/6] swiotlb: define new enumerated type
This is just to define new enumerated type without functional change.
The 'SWIOTLB_LO' is to index legacy 32-bit swiotlb buffer, while the
'SWIOTLB_HI' is to index the 64-bit buffer.
This is to prepare to enable 64-bit swiotlb.
Cc: Joe Jin <joe.jin at oracle.com>
Signed-off-by: Dongli Zhang <dongli.zhang at oracle.com>
---
 include/linux/swiotlb.h | 6 ++++++
 1 file changed, 6 insertions(+)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index d9c9fc9ca5d2..ca125c1b1281 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -17,6 +17,12 @@ enum swiotlb_force {
 	SWIOTLB_NO_FORCE,	/* swiotlb=noforce */
 };
 
+enum swiotlb_t {
+	SWIOTLB_LO,
+	SWIOTLB_HI,
+	SWIOTLB_MAX,
+};
+
 /*
  * Maximum allowable number of contiguous slabs to map,
  * must be a power of 2.  What is the appropriate value ?
-- 
2.17.1
Dongli Zhang
2021-Feb-03  23:37 UTC
[Nouveau] [PATCH RFC v1 2/6] swiotlb: convert variables to arrays
This patch converts several swiotlb related variables to arrays, in
order to maintain stat/status for different swiotlb buffers. Here are
variables involved:
- io_tlb_start and io_tlb_end
- io_tlb_nslabs and io_tlb_used
- io_tlb_list
- io_tlb_index
- max_segment
- io_tlb_orig_addr
- no_iotlb_memory
There is no functional change and this is to prepare to enable 64-bit
swiotlb.
Cc: Joe Jin <joe.jin at oracle.com>
Signed-off-by: Dongli Zhang <dongli.zhang at oracle.com>
---
 arch/powerpc/platforms/pseries/svm.c |   6 +-
 drivers/xen/swiotlb-xen.c            |   4 +-
 include/linux/swiotlb.h              |   5 +-
 kernel/dma/swiotlb.c                 | 257 ++++++++++++++-------------
 4 files changed, 140 insertions(+), 132 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/svm.c
b/arch/powerpc/platforms/pseries/svm.c
index 7b739cc7a8a9..9f8842d0da1f 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -55,9 +55,9 @@ void __init svm_swiotlb_init(void)
 	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false))
 		return;
 
-	if (io_tlb_start)
-		memblock_free_early(io_tlb_start,
-				    PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+	if (io_tlb_start[SWIOTLB_LO])
+		memblock_free_early(io_tlb_start[SWIOTLB_LO],
+				    PAGE_ALIGN(io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT));
 	panic("SVM: Cannot allocate SWIOTLB buffer");
 }
 
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 2b385c1b4a99..3261880ad859 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -192,8 +192,8 @@ int __ref xen_swiotlb_init(int verbose, bool early)
 	/*
 	 * IO TLB memory already allocated. Just use it.
 	 */
-	if (io_tlb_start != 0) {
-		xen_io_tlb_start = phys_to_virt(io_tlb_start);
+	if (io_tlb_start[SWIOTLB_LO] != 0) {
+		xen_io_tlb_start = phys_to_virt(io_tlb_start[SWIOTLB_LO]);
 		goto end;
 	}
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index ca125c1b1281..777046cd4d1b 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -76,11 +76,12 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
 
 #ifdef CONFIG_SWIOTLB
 extern enum swiotlb_force swiotlb_force;
-extern phys_addr_t io_tlb_start, io_tlb_end;
+extern phys_addr_t io_tlb_start[], io_tlb_end[];
 
 static inline bool is_swiotlb_buffer(phys_addr_t paddr)
 {
-	return paddr >= io_tlb_start && paddr < io_tlb_end;
+	return paddr >= io_tlb_start[SWIOTLB_LO] &&
+	       paddr < io_tlb_end[SWIOTLB_LO];
 }
 
 void __init swiotlb_exit(void);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 7c42df6e6100..1fbb65daa2dd 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -69,38 +69,38 @@ enum swiotlb_force swiotlb_force;
  * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by
this
  * API.
  */
-phys_addr_t io_tlb_start, io_tlb_end;
+phys_addr_t io_tlb_start[SWIOTLB_MAX], io_tlb_end[SWIOTLB_MAX];
 
 /*
  * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
  * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
  */
-static unsigned long io_tlb_nslabs;
+static unsigned long io_tlb_nslabs[SWIOTLB_MAX];
 
 /*
  * The number of used IO TLB block
  */
-static unsigned long io_tlb_used;
+static unsigned long io_tlb_used[SWIOTLB_MAX];
 
 /*
  * This is a free list describing the number of free entries available from
  * each index
  */
-static unsigned int *io_tlb_list;
-static unsigned int io_tlb_index;
+static unsigned int *io_tlb_list[SWIOTLB_MAX];
+static unsigned int io_tlb_index[SWIOTLB_MAX];
 
 /*
  * Max segment that we can provide which (if pages are contingous) will
  * not be bounced (unless SWIOTLB_FORCE is set).
  */
-static unsigned int max_segment;
+static unsigned int max_segment[SWIOTLB_MAX];
 
 /*
  * We need to save away the original address corresponding to a mapped entry
  * for the sync operations.
  */
 #define INVALID_PHYS_ADDR (~(phys_addr_t)0)
-static phys_addr_t *io_tlb_orig_addr;
+static phys_addr_t *io_tlb_orig_addr[SWIOTLB_MAX];
 
 /*
  * Protect the above data structures in the map and unmap calls
@@ -113,9 +113,9 @@ static int __init
 setup_io_tlb_npages(char *str)
 {
 	if (isdigit(*str)) {
-		io_tlb_nslabs = simple_strtoul(str, &str, 0);
+		io_tlb_nslabs[SWIOTLB_LO] = simple_strtoul(str, &str, 0);
 		/* avoid tail segment of size < IO_TLB_SEGSIZE */
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+		io_tlb_nslabs[SWIOTLB_LO] = ALIGN(io_tlb_nslabs[SWIOTLB_LO], IO_TLB_SEGSIZE);
 	}
 	if (*str == ',')
 		++str;
@@ -123,40 +123,40 @@ setup_io_tlb_npages(char *str)
 		swiotlb_force = SWIOTLB_FORCE;
 	} else if (!strcmp(str, "noforce")) {
 		swiotlb_force = SWIOTLB_NO_FORCE;
-		io_tlb_nslabs = 1;
+		io_tlb_nslabs[SWIOTLB_LO] = 1;
 	}
 
 	return 0;
 }
 early_param("swiotlb", setup_io_tlb_npages);
 
-static bool no_iotlb_memory;
+static bool no_iotlb_memory[SWIOTLB_MAX];
 
 unsigned long swiotlb_nr_tbl(void)
 {
-	return unlikely(no_iotlb_memory) ? 0 : io_tlb_nslabs;
+	return unlikely(no_iotlb_memory[SWIOTLB_LO]) ? 0 : io_tlb_nslabs[SWIOTLB_LO];
 }
 EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
 
 unsigned int swiotlb_max_segment(void)
 {
-	return unlikely(no_iotlb_memory) ? 0 : max_segment;
+	return unlikely(no_iotlb_memory[SWIOTLB_LO]) ? 0 : max_segment[SWIOTLB_LO];
 }
 EXPORT_SYMBOL_GPL(swiotlb_max_segment);
 
 void swiotlb_set_max_segment(unsigned int val)
 {
 	if (swiotlb_force == SWIOTLB_FORCE)
-		max_segment = 1;
+		max_segment[SWIOTLB_LO] = 1;
 	else
-		max_segment = rounddown(val, PAGE_SIZE);
+		max_segment[SWIOTLB_LO] = rounddown(val, PAGE_SIZE);
 }
 
 unsigned long swiotlb_size_or_default(void)
 {
 	unsigned long size;
 
-	size = io_tlb_nslabs << IO_TLB_SHIFT;
+	size = io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT;
 
 	return size ? size : (IO_TLB_DEFAULT_SIZE);
 }
@@ -170,10 +170,10 @@ void __init swiotlb_adjust_size(unsigned long new_size)
 	 * architectures such as those supporting memory encryption to
 	 * adjust/expand SWIOTLB size for their use.
 	 */
-	if (!io_tlb_nslabs) {
+	if (!io_tlb_nslabs[SWIOTLB_LO]) {
 		size = ALIGN(new_size, 1 << IO_TLB_SHIFT);
-		io_tlb_nslabs = size >> IO_TLB_SHIFT;
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+		io_tlb_nslabs[SWIOTLB_LO] = size >> IO_TLB_SHIFT;
+		io_tlb_nslabs[SWIOTLB_LO] = ALIGN(io_tlb_nslabs[SWIOTLB_LO], IO_TLB_SEGSIZE);
 
 		pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size
>> 20);
 	}
@@ -181,15 +181,16 @@ void __init swiotlb_adjust_size(unsigned long new_size)
 
 void swiotlb_print_info(void)
 {
-	unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+	unsigned long bytes = io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT;
 
-	if (no_iotlb_memory) {
+	if (no_iotlb_memory[SWIOTLB_LO]) {
 		pr_warn("No low mem\n");
 		return;
 	}
 
-	pr_info("mapped [mem %pa-%pa] (%luMB)\n", &io_tlb_start,
&io_tlb_end,
-	       bytes >> 20);
+	pr_info("mapped [mem %pa-%pa] (%luMB)\n",
+		&io_tlb_start[SWIOTLB_LO], &io_tlb_end[SWIOTLB_LO],
+		bytes >> 20);
 }
 
 /*
@@ -203,11 +204,11 @@ void __init swiotlb_update_mem_attributes(void)
 	void *vaddr;
 	unsigned long bytes;
 
-	if (no_iotlb_memory || late_alloc)
+	if (no_iotlb_memory[SWIOTLB_LO] || late_alloc)
 		return;
 
-	vaddr = phys_to_virt(io_tlb_start);
-	bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
+	vaddr = phys_to_virt(io_tlb_start[SWIOTLB_LO]);
+	bytes = PAGE_ALIGN(io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT);
 	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
 	memset(vaddr, 0, bytes);
 }
@@ -219,38 +220,38 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long
nslabs, int verbose)
 
 	bytes = nslabs << IO_TLB_SHIFT;
 
-	io_tlb_nslabs = nslabs;
-	io_tlb_start = __pa(tlb);
-	io_tlb_end = io_tlb_start + bytes;
+	io_tlb_nslabs[SWIOTLB_LO] = nslabs;
+	io_tlb_start[SWIOTLB_LO] = __pa(tlb);
+	io_tlb_end[SWIOTLB_LO] = io_tlb_start[SWIOTLB_LO] + bytes;
 
 	/*
 	 * Allocate and initialize the free list array.  This array is used
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
 	 * between io_tlb_start and io_tlb_end.
 	 */
-	alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(int));
-	io_tlb_list = memblock_alloc(alloc_size, PAGE_SIZE);
-	if (!io_tlb_list)
+	alloc_size = PAGE_ALIGN(io_tlb_nslabs[SWIOTLB_LO] * sizeof(int));
+	io_tlb_list[SWIOTLB_LO] = memblock_alloc(alloc_size, PAGE_SIZE);
+	if (!io_tlb_list[SWIOTLB_LO])
 		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
 		      __func__, alloc_size, PAGE_SIZE);
 
-	alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t));
-	io_tlb_orig_addr = memblock_alloc(alloc_size, PAGE_SIZE);
-	if (!io_tlb_orig_addr)
+	alloc_size = PAGE_ALIGN(io_tlb_nslabs[SWIOTLB_LO] * sizeof(phys_addr_t));
+	io_tlb_orig_addr[SWIOTLB_LO] = memblock_alloc(alloc_size, PAGE_SIZE);
+	if (!io_tlb_orig_addr[SWIOTLB_LO])
 		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
 		      __func__, alloc_size, PAGE_SIZE);
 
-	for (i = 0; i < io_tlb_nslabs; i++) {
-		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+	for (i = 0; i < io_tlb_nslabs[SWIOTLB_LO]; i++) {
+		io_tlb_list[SWIOTLB_LO][i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+		io_tlb_orig_addr[SWIOTLB_LO][i] = INVALID_PHYS_ADDR;
 	}
-	io_tlb_index = 0;
-	no_iotlb_memory = false;
+	io_tlb_index[SWIOTLB_LO] = 0;
+	no_iotlb_memory[SWIOTLB_LO] = false;
 
 	if (verbose)
 		swiotlb_print_info();
 
-	swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
+	swiotlb_set_max_segment(io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT);
 	return 0;
 }
 
@@ -265,25 +266,25 @@ swiotlb_init(int verbose)
 	unsigned char *vstart;
 	unsigned long bytes;
 
-	if (!io_tlb_nslabs) {
-		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+	if (!io_tlb_nslabs[SWIOTLB_LO]) {
+		io_tlb_nslabs[SWIOTLB_LO] = (default_size >> IO_TLB_SHIFT);
+		io_tlb_nslabs[SWIOTLB_LO] = ALIGN(io_tlb_nslabs[SWIOTLB_LO], IO_TLB_SEGSIZE);
 	}
 
-	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+	bytes = io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT;
 
 	/* Get IO TLB memory from the low pages */
 	vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE);
-	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
+	if (vstart && !swiotlb_init_with_tbl(vstart,
io_tlb_nslabs[SWIOTLB_LO], verbose))
 		return;
 
-	if (io_tlb_start) {
-		memblock_free_early(io_tlb_start,
-				    PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
-		io_tlb_start = 0;
+	if (io_tlb_start[SWIOTLB_LO]) {
+		memblock_free_early(io_tlb_start[SWIOTLB_LO],
+				    PAGE_ALIGN(io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT));
+		io_tlb_start[SWIOTLB_LO] = 0;
 	}
 	pr_warn("Cannot allocate buffer");
-	no_iotlb_memory = true;
+	no_iotlb_memory[SWIOTLB_LO] = true;
 }
 
 /*
@@ -294,22 +295,22 @@ swiotlb_init(int verbose)
 int
 swiotlb_late_init_with_default_size(size_t default_size)
 {
-	unsigned long bytes, req_nslabs = io_tlb_nslabs;
+	unsigned long bytes, req_nslabs = io_tlb_nslabs[SWIOTLB_LO];
 	unsigned char *vstart = NULL;
 	unsigned int order;
 	int rc = 0;
 
-	if (!io_tlb_nslabs) {
-		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+	if (!io_tlb_nslabs[SWIOTLB_LO]) {
+		io_tlb_nslabs[SWIOTLB_LO] = (default_size >> IO_TLB_SHIFT);
+		io_tlb_nslabs[SWIOTLB_LO] = ALIGN(io_tlb_nslabs[SWIOTLB_LO], IO_TLB_SEGSIZE);
 	}
 
 	/*
 	 * Get IO TLB memory from the low pages
 	 */
-	order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
-	io_tlb_nslabs = SLABS_PER_PAGE << order;
-	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+	order = get_order(io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT);
+	io_tlb_nslabs[SWIOTLB_LO] = SLABS_PER_PAGE << order;
+	bytes = io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT;
 
 	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
 		vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
@@ -320,15 +321,15 @@ swiotlb_late_init_with_default_size(size_t default_size)
 	}
 
 	if (!vstart) {
-		io_tlb_nslabs = req_nslabs;
+		io_tlb_nslabs[SWIOTLB_LO] = req_nslabs;
 		return -ENOMEM;
 	}
 	if (order != get_order(bytes)) {
 		pr_warn("only able to allocate %ld MB\n",
 			(PAGE_SIZE << order) >> 20);
-		io_tlb_nslabs = SLABS_PER_PAGE << order;
+		io_tlb_nslabs[SWIOTLB_LO] = SLABS_PER_PAGE << order;
 	}
-	rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
+	rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs[SWIOTLB_LO]);
 	if (rc)
 		free_pages((unsigned long)vstart, order);
 
@@ -337,10 +338,10 @@ swiotlb_late_init_with_default_size(size_t default_size)
 
 static void swiotlb_cleanup(void)
 {
-	io_tlb_end = 0;
-	io_tlb_start = 0;
-	io_tlb_nslabs = 0;
-	max_segment = 0;
+	io_tlb_end[SWIOTLB_LO] = 0;
+	io_tlb_start[SWIOTLB_LO] = 0;
+	io_tlb_nslabs[SWIOTLB_LO] = 0;
+	max_segment[SWIOTLB_LO] = 0;
 }
 
 int
@@ -350,9 +351,9 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 
 	bytes = nslabs << IO_TLB_SHIFT;
 
-	io_tlb_nslabs = nslabs;
-	io_tlb_start = virt_to_phys(tlb);
-	io_tlb_end = io_tlb_start + bytes;
+	io_tlb_nslabs[SWIOTLB_LO] = nslabs;
+	io_tlb_start[SWIOTLB_LO] = virt_to_phys(tlb);
+	io_tlb_end[SWIOTLB_LO] = io_tlb_start[SWIOTLB_LO] + bytes;
 
 	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
 	memset(tlb, 0, bytes);
@@ -362,37 +363,37 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long
nslabs)
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
 	 * between io_tlb_start and io_tlb_end.
 	 */
-	io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
-	                              get_order(io_tlb_nslabs * sizeof(int)));
-	if (!io_tlb_list)
+	io_tlb_list[SWIOTLB_LO] = (unsigned int *)__get_free_pages(GFP_KERNEL,
+			get_order(io_tlb_nslabs[SWIOTLB_LO] * sizeof(int)));
+	if (!io_tlb_list[SWIOTLB_LO])
 		goto cleanup3;
 
-	io_tlb_orig_addr = (phys_addr_t *)
+	io_tlb_orig_addr[SWIOTLB_LO] = (phys_addr_t *)
 		__get_free_pages(GFP_KERNEL,
-				 get_order(io_tlb_nslabs *
+				 get_order(io_tlb_nslabs[SWIOTLB_LO] *
 					   sizeof(phys_addr_t)));
-	if (!io_tlb_orig_addr)
+	if (!io_tlb_orig_addr[SWIOTLB_LO])
 		goto cleanup4;
 
-	for (i = 0; i < io_tlb_nslabs; i++) {
-		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+	for (i = 0; i < io_tlb_nslabs[SWIOTLB_LO]; i++) {
+		io_tlb_list[SWIOTLB_LO][i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+		io_tlb_orig_addr[SWIOTLB_LO][i] = INVALID_PHYS_ADDR;
 	}
-	io_tlb_index = 0;
-	no_iotlb_memory = false;
+	io_tlb_index[SWIOTLB_LO] = 0;
+	no_iotlb_memory[SWIOTLB_LO] = false;
 
 	swiotlb_print_info();
 
 	late_alloc = 1;
 
-	swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
+	swiotlb_set_max_segment(io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT);
 
 	return 0;
 
 cleanup4:
-	free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
+	free_pages((unsigned long)io_tlb_list[SWIOTLB_LO],
get_order(io_tlb_nslabs[SWIOTLB_LO] *
 	                                                 sizeof(int)));
-	io_tlb_list = NULL;
+	io_tlb_list[SWIOTLB_LO] = NULL;
 cleanup3:
 	swiotlb_cleanup();
 	return -ENOMEM;
@@ -400,23 +401,23 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long
nslabs)
 
 void __init swiotlb_exit(void)
 {
-	if (!io_tlb_orig_addr)
+	if (!io_tlb_orig_addr[SWIOTLB_LO])
 		return;
 
 	if (late_alloc) {
-		free_pages((unsigned long)io_tlb_orig_addr,
-			   get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
-		free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
-								 sizeof(int)));
-		free_pages((unsigned long)phys_to_virt(io_tlb_start),
-			   get_order(io_tlb_nslabs << IO_TLB_SHIFT));
+		free_pages((unsigned long)io_tlb_orig_addr[SWIOTLB_LO],
+			   get_order(io_tlb_nslabs[SWIOTLB_LO] * sizeof(phys_addr_t)));
+		free_pages((unsigned long)io_tlb_list[SWIOTLB_LO],
+			   get_order(io_tlb_nslabs[SWIOTLB_LO] * sizeof(int)));
+		free_pages((unsigned long)phys_to_virt(io_tlb_start[SWIOTLB_LO]),
+			   get_order(io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT));
 	} else {
-		memblock_free_late(__pa(io_tlb_orig_addr),
-				   PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
-		memblock_free_late(__pa(io_tlb_list),
-				   PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
-		memblock_free_late(io_tlb_start,
-				   PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+		memblock_free_late(__pa(io_tlb_orig_addr[SWIOTLB_LO]),
+				   PAGE_ALIGN(io_tlb_nslabs[SWIOTLB_LO] * sizeof(phys_addr_t)));
+		memblock_free_late(__pa(io_tlb_list[SWIOTLB_LO]),
+				   PAGE_ALIGN(io_tlb_nslabs[SWIOTLB_LO] * sizeof(int)));
+		memblock_free_late(io_tlb_start[SWIOTLB_LO],
+				   PAGE_ALIGN(io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT));
 	}
 	swiotlb_cleanup();
 }
@@ -465,7 +466,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
phys_addr_t orig_addr,
 		size_t mapping_size, size_t alloc_size,
 		enum dma_data_direction dir, unsigned long attrs)
 {
-	dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start);
+	dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev,
io_tlb_start[SWIOTLB_LO]);
 	unsigned long flags;
 	phys_addr_t tlb_addr;
 	unsigned int nslots, stride, index, wrap;
@@ -475,7 +476,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
phys_addr_t orig_addr,
 	unsigned long max_slots;
 	unsigned long tmp_io_tlb_used;
 
-	if (no_iotlb_memory)
+	if (no_iotlb_memory[SWIOTLB_LO])
 		panic("Can not allocate SWIOTLB buffer earlier and can't now provide
you with the DMA bounce buffer");
 
 	if (mem_encrypt_active())
@@ -518,11 +519,11 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
phys_addr_t orig_addr,
 	 */
 	spin_lock_irqsave(&io_tlb_lock, flags);
 
-	if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
+	if (unlikely(nslots > io_tlb_nslabs[SWIOTLB_LO] - io_tlb_used[SWIOTLB_LO]))
 		goto not_found;
 
-	index = ALIGN(io_tlb_index, stride);
-	if (index >= io_tlb_nslabs)
+	index = ALIGN(io_tlb_index[SWIOTLB_LO], stride);
+	if (index >= io_tlb_nslabs[SWIOTLB_LO])
 		index = 0;
 	wrap = index;
 
@@ -530,7 +531,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
phys_addr_t orig_addr,
 		while (iommu_is_span_boundary(index, nslots, offset_slots,
 					      max_slots)) {
 			index += stride;
-			if (index >= io_tlb_nslabs)
+			if (index >= io_tlb_nslabs[SWIOTLB_LO])
 				index = 0;
 			if (index == wrap)
 				goto not_found;
@@ -541,39 +542,42 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
phys_addr_t orig_addr,
 		 * contiguous buffers, we allocate the buffers from that slot
 		 * and mark the entries as '0' indicating unavailable.
 		 */
-		if (io_tlb_list[index] >= nslots) {
+		if (io_tlb_list[SWIOTLB_LO][index] >= nslots) {
 			int count = 0;
 
 			for (i = index; i < (int) (index + nslots); i++)
-				io_tlb_list[i] = 0;
-			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1)
&& io_tlb_list[i]; i--)
-				io_tlb_list[i] = ++count;
-			tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+				io_tlb_list[SWIOTLB_LO][i] = 0;
+			for (i = index - 1;
+			     (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) &&
+			     io_tlb_list[SWIOTLB_LO][i];
+			     i--)
+				io_tlb_list[SWIOTLB_LO][i] = ++count;
+			tlb_addr = io_tlb_start[SWIOTLB_LO] + (index << IO_TLB_SHIFT);
 
 			/*
 			 * Update the indices to avoid searching in the next
 			 * round.
 			 */
-			io_tlb_index = ((index + nslots) < io_tlb_nslabs
+			io_tlb_index[SWIOTLB_LO] = ((index + nslots) < io_tlb_nslabs[SWIOTLB_LO]
 					? (index + nslots) : 0);
 
 			goto found;
 		}
 		index += stride;
-		if (index >= io_tlb_nslabs)
+		if (index >= io_tlb_nslabs[SWIOTLB_LO])
 			index = 0;
 	} while (index != wrap);
 
 not_found:
-	tmp_io_tlb_used = io_tlb_used;
+	tmp_io_tlb_used = io_tlb_used[SWIOTLB_LO];
 
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
 		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu
(slots), used %lu (slots)\n",
-			 alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
+			 alloc_size, io_tlb_nslabs[SWIOTLB_LO], tmp_io_tlb_used);
 	return (phys_addr_t)DMA_MAPPING_ERROR;
 found:
-	io_tlb_used += nslots;
+	io_tlb_used[SWIOTLB_LO] += nslots;
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 
 	/*
@@ -582,7 +586,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
phys_addr_t orig_addr,
 	 * needed.
 	 */
 	for (i = 0; i < nslots; i++)
-		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
+		io_tlb_orig_addr[SWIOTLB_LO][index+i] = orig_addr + (i <<
IO_TLB_SHIFT);
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
 	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
 		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
@@ -599,8 +603,8 @@ void swiotlb_tbl_unmap_single(struct device *hwdev,
phys_addr_t tlb_addr,
 {
 	unsigned long flags;
 	int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >>
IO_TLB_SHIFT;
-	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
-	phys_addr_t orig_addr = io_tlb_orig_addr[index];
+	int index = (tlb_addr - io_tlb_start[SWIOTLB_LO]) >> IO_TLB_SHIFT;
+	phys_addr_t orig_addr = io_tlb_orig_addr[SWIOTLB_LO][index];
 
 	/*
 	 * First, sync the memory before unmapping the entry
@@ -619,23 +623,26 @@ void swiotlb_tbl_unmap_single(struct device *hwdev,
phys_addr_t tlb_addr,
 	spin_lock_irqsave(&io_tlb_lock, flags);
 	{
 		count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
-			 io_tlb_list[index + nslots] : 0);
+			 io_tlb_list[SWIOTLB_LO][index + nslots] : 0);
 		/*
 		 * Step 1: return the slots to the free list, merging the
 		 * slots with superceeding slots
 		 */
 		for (i = index + nslots - 1; i >= index; i--) {
-			io_tlb_list[i] = ++count;
-			io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+			io_tlb_list[SWIOTLB_LO][i] = ++count;
+			io_tlb_orig_addr[SWIOTLB_LO][i] = INVALID_PHYS_ADDR;
 		}
 		/*
 		 * Step 2: merge the returned slots with the preceding slots,
 		 * if available (non zero)
 		 */
-		for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1)
&& io_tlb_list[i]; i--)
-			io_tlb_list[i] = ++count;
+		for (i = index - 1;
+		     (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) &&
+		     io_tlb_list[SWIOTLB_LO][i];
+		     i--)
+			io_tlb_list[SWIOTLB_LO][i] = ++count;
 
-		io_tlb_used -= nslots;
+		io_tlb_used[SWIOTLB_LO] -= nslots;
 	}
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 }
@@ -644,8 +651,8 @@ void swiotlb_tbl_sync_single(struct device *hwdev,
phys_addr_t tlb_addr,
 			     size_t size, enum dma_data_direction dir,
 			     enum dma_sync_target target)
 {
-	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
-	phys_addr_t orig_addr = io_tlb_orig_addr[index];
+	int index = (tlb_addr - io_tlb_start[SWIOTLB_LO]) >> IO_TLB_SHIFT;
+	phys_addr_t orig_addr = io_tlb_orig_addr[SWIOTLB_LO][index];
 
 	if (orig_addr == INVALID_PHYS_ADDR)
 		return;
@@ -716,7 +723,7 @@ bool is_swiotlb_active(void)
 	 * When SWIOTLB is initialized, even if io_tlb_start points to physical
 	 * address zero, io_tlb_end surely doesn't.
 	 */
-	return io_tlb_end != 0;
+	return io_tlb_end[SWIOTLB_LO] != 0;
 }
 
 #ifdef CONFIG_DEBUG_FS
@@ -726,8 +733,8 @@ static int __init swiotlb_create_debugfs(void)
 	struct dentry *root;
 
 	root = debugfs_create_dir("swiotlb", NULL);
-	debugfs_create_ulong("io_tlb_nslabs", 0400, root,
&io_tlb_nslabs);
-	debugfs_create_ulong("io_tlb_used", 0400, root, &io_tlb_used);
+	debugfs_create_ulong("io_tlb_nslabs", 0400, root,
&io_tlb_nslabs[SWIOTLB_LO]);
+	debugfs_create_ulong("io_tlb_used", 0400, root,
&io_tlb_used[SWIOTLB_LO]);
 	return 0;
 }
 
-- 
2.17.1
Dongli Zhang
2021-Feb-03  23:37 UTC
[Nouveau] [PATCH RFC v1 3/6] swiotlb: introduce swiotlb_get_type() to calculate swiotlb buffer type
This patch introduces swiotlb_get_type() in order to calculate which
swiotlb buffer the given DMA address is belong to.
This is to prepare to enable 64-bit swiotlb.
Cc: Joe Jin <joe.jin at oracle.com>
Signed-off-by: Dongli Zhang <dongli.zhang at oracle.com>
---
 include/linux/swiotlb.h | 14 ++++++++++++++
 kernel/dma/swiotlb.c    |  2 ++
 2 files changed, 16 insertions(+)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 777046cd4d1b..3d5980d77810 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -3,6 +3,7 @@
 #define __LINUX_SWIOTLB_H
 
 #include <linux/dma-direction.h>
+#include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/limits.h>
@@ -23,6 +24,8 @@ enum swiotlb_t {
 	SWIOTLB_MAX,
 };
 
+extern int swiotlb_nr;
+
 /*
  * Maximum allowable number of contiguous slabs to map,
  * must be a power of 2.  What is the appropriate value ?
@@ -84,6 +87,17 @@ static inline bool is_swiotlb_buffer(phys_addr_t paddr)
 	       paddr < io_tlb_end[SWIOTLB_LO];
 }
 
+static inline int swiotlb_get_type(phys_addr_t paddr)
+{
+	int i;
+
+	for (i = 0; i < swiotlb_nr; i++)
+		if (paddr >= io_tlb_start[i] && paddr < io_tlb_end[i])
+			return i;
+
+	return -ENOENT;
+}
+
 void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 1fbb65daa2dd..c91d3d2c3936 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -109,6 +109,8 @@ static DEFINE_SPINLOCK(io_tlb_lock);
 
 static int late_alloc;
 
+int swiotlb_nr = 1;
+
 static int __init
 setup_io_tlb_npages(char *str)
 {
-- 
2.17.1
Dongli Zhang
2021-Feb-03  23:37 UTC
[Nouveau] [PATCH RFC v1 4/6] swiotlb: enable 64-bit swiotlb
This patch is to enable the 64-bit swiotlb buffer.
The state of the art swiotlb pre-allocates <=32-bit memory in order to meet
the DMA mask requirement for some 32-bit legacy device. Considering most
devices nowadays support 64-bit DMA and IOMMU is available, the swiotlb is
not used for most of the times, except:
1. The xen PVM domain requires the DMA addresses to both (1) less than the
dma mask, and (2) continuous in machine address. Therefore, the 64-bit
device may still require swiotlb on PVM domain.
2. From source code the AMD SME/SEV will enable SWIOTLB_FORCE. As a result
it is always required to allocate from swiotlb buffer even the device dma
mask is 64-bit.
sme_early_init()
-> if (sev_active())
   swiotlb_force = SWIOTLB_FORCE;
Therefore, this patch introduces the 2nd swiotlb buffer for 64-bit DMA
access. For instance, the swiotlb_tbl_map_single() allocates from the 2nd
64-bit buffer if the device DMA mask is
min_not_zero(*hwdev->dma_mask, hwdev->bus_dma_limit).
The example to configure 64-bit swiotlb is
"swiotlb=65536,524288,force"
or "swiotlb=,524288,force", where 524288 is the size of 64-bit buffer.
With the patch, the kernel will be able to allocate >4GB swiotlb buffer.
This patch is only for swiotlb, not including xen-swiotlb.
Cc: Joe Jin <joe.jin at oracle.com>
Signed-off-by: Dongli Zhang <dongli.zhang at oracle.com>
---
 arch/mips/cavium-octeon/dma-octeon.c         |   3 +-
 arch/powerpc/kernel/dma-swiotlb.c            |   2 +-
 arch/powerpc/platforms/pseries/svm.c         |   2 +-
 arch/x86/kernel/pci-swiotlb.c                |   5 +-
 arch/x86/pci/sta2x11-fixup.c                 |   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_internal.c |   4 +-
 drivers/gpu/drm/i915/i915_scatterlist.h      |   2 +-
 drivers/gpu/drm/nouveau/nouveau_ttm.c        |   2 +-
 drivers/mmc/host/sdhci.c                     |   2 +-
 drivers/pci/xen-pcifront.c                   |   2 +-
 drivers/xen/swiotlb-xen.c                    |   9 +-
 include/linux/swiotlb.h                      |  28 +-
 kernel/dma/swiotlb.c                         | 339 +++++++++++--------
 13 files changed, 238 insertions(+), 164 deletions(-)
diff --git a/arch/mips/cavium-octeon/dma-octeon.c
b/arch/mips/cavium-octeon/dma-octeon.c
index df70308db0e6..3480555d908a 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -245,6 +245,7 @@ void __init plat_swiotlb_setup(void)
 		panic("%s: Failed to allocate %zu bytes align=%lx\n",
 		      __func__, swiotlbsize, PAGE_SIZE);
 
-	if (swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1) == -ENOMEM)
+	if (swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs,
+				  SWIOTLB_LO, 1) == -ENOMEM)
 		panic("Cannot allocate SWIOTLB buffer");
 }
diff --git a/arch/powerpc/kernel/dma-swiotlb.c
b/arch/powerpc/kernel/dma-swiotlb.c
index fc7816126a40..88113318c53f 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -20,7 +20,7 @@ void __init swiotlb_detect_4g(void)
 static int __init check_swiotlb_enabled(void)
 {
 	if (ppc_swiotlb_enable)
-		swiotlb_print_info();
+		swiotlb_print_info(SWIOTLB_LO);
 	else
 		swiotlb_exit();
 
diff --git a/arch/powerpc/platforms/pseries/svm.c
b/arch/powerpc/platforms/pseries/svm.c
index 9f8842d0da1f..77910e4ffad8 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -52,7 +52,7 @@ void __init svm_swiotlb_init(void)
 	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
 	vstart = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE);
-	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false))
+	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs,
SWIOTLB_LO, false))
 		return;
 
 	if (io_tlb_start[SWIOTLB_LO])
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index c2cfa5e7c152..950624fd95a4 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -67,12 +67,15 @@ void __init pci_swiotlb_init(void)
 
 void __init pci_swiotlb_late_init(void)
 {
+	int i;
+
 	/* An IOMMU turned us off. */
 	if (!swiotlb)
 		swiotlb_exit();
 	else {
 		printk(KERN_INFO "PCI-DMA: "
 		       "Using software bounce buffering for IO (SWIOTLB)\n");
-		swiotlb_print_info();
+		for (i = 0; i < swiotlb_nr; i++)
+			swiotlb_print_info(i);
 	}
 }
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 7d2525691854..c440520b2055 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -57,7 +57,7 @@ static void sta2x11_new_instance(struct pci_dev *pdev)
 		int size = STA2X11_SWIOTLB_SIZE;
 		/* First instance: register your own swiotlb area */
 		dev_info(&pdev->dev, "Using SWIOTLB (size %i)\n", size);
-		if (swiotlb_late_init_with_default_size(size))
+		if (swiotlb_late_init_with_default_size(size), SWIOTLB_LO)
 			dev_emerg(&pdev->dev, "init swiotlb failed\n");
 	}
 	list_add(&instance->list, &sta2x11_instance_list);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index ad22f42541bd..947683f2e568 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -42,10 +42,10 @@ static int i915_gem_object_get_pages_internal(struct
drm_i915_gem_object *obj)
 
 	max_order = MAX_ORDER;
 #ifdef CONFIG_SWIOTLB
-	if (swiotlb_nr_tbl()) {
+	if (swiotlb_nr_tbl(SWIOTLB_LO)) {
 		unsigned int max_segment;
 
-		max_segment = swiotlb_max_segment();
+		max_segment = swiotlb_max_segment(SWIOTLB_LO);
 		if (max_segment) {
 			max_segment = max_t(unsigned int, max_segment,
 					    PAGE_SIZE) >> PAGE_SHIFT;
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h
b/drivers/gpu/drm/i915/i915_scatterlist.h
index 9cb26a224034..c63c7f6941f6 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.h
+++ b/drivers/gpu/drm/i915/i915_scatterlist.h
@@ -118,7 +118,7 @@ static inline unsigned int i915_sg_page_sizes(struct
scatterlist *sg)
 
 static inline unsigned int i915_sg_segment_size(void)
 {
-	unsigned int size = swiotlb_max_segment();
+	unsigned int size = swiotlb_max_segment(SWIOTLB_LO);
 
 	if (size == 0)
 		size = UINT_MAX;
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c
b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index a37bc3d7b38b..0919b207ac47 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -321,7 +321,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 	}
 
 #if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86)
-	need_swiotlb = !!swiotlb_nr_tbl();
+	need_swiotlb = !!swiotlb_nr_tbl(SWIOTLB_LO);
 #endif
 
 	ret = ttm_bo_device_init(&drm->ttm.bdev, &nouveau_bo_driver,
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 646823ddd317..1f7fb912d5a9 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -4582,7 +4582,7 @@ int sdhci_setup_host(struct sdhci_host *host)
 		mmc->max_segs = SDHCI_MAX_SEGS;
 	} else if (host->flags & SDHCI_USE_SDMA) {
 		mmc->max_segs = 1;
-		if (swiotlb_max_segment()) {
+		if (swiotlb_max_segment(SWIOTLB_LO)) {
 			unsigned int max_req_size = (1 << IO_TLB_SHIFT) *
 						IO_TLB_SEGSIZE;
 			mmc->max_req_size = min(mmc->max_req_size,
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index c6fe0cfec0f6..9509ed9b4126 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -693,7 +693,7 @@ static int pcifront_connect_and_init_dma(struct
pcifront_device *pdev)
 
 	spin_unlock(&pcifront_dev_lock);
 
-	if (!err && !swiotlb_nr_tbl()) {
+	if (!err && !swiotlb_nr_tbl(SWIOTLB_LO)) {
 		err = pci_xen_swiotlb_init_late();
 		if (err)
 			dev_err(&pdev->xdev->dev, "Could not setup SWIOTLB!\n");
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 3261880ad859..662638093542 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -184,7 +184,7 @@ int __ref xen_swiotlb_init(int verbose, bool early)
 	enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN;
 	unsigned int repeat = 3;
 
-	xen_io_tlb_nslabs = swiotlb_nr_tbl();
+	xen_io_tlb_nslabs = swiotlb_nr_tbl(SWIOTLB_LO);
 retry:
 	bytes = xen_set_nslabs(xen_io_tlb_nslabs);
 	order = get_order(xen_io_tlb_nslabs << IO_TLB_SHIFT);
@@ -245,16 +245,17 @@ int __ref xen_swiotlb_init(int verbose, bool early)
 	}
 	if (early) {
 		if (swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs,
-			 verbose))
+					  SWIOTLB_LO, verbose))
 			panic("Cannot allocate SWIOTLB buffer");
 		rc = 0;
 	} else
-		rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
+		rc = swiotlb_late_init_with_tbl(xen_io_tlb_start,
+						xen_io_tlb_nslabs, SWIOTLB_LO);
 
 end:
 	xen_io_tlb_end = xen_io_tlb_start + bytes;
 	if (!rc)
-		swiotlb_set_max_segment(PAGE_SIZE);
+		swiotlb_set_max_segment(PAGE_SIZE, SWIOTLB_LO);
 
 	return rc;
 error:
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 3d5980d77810..8ba45fddfb14 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -43,11 +43,14 @@ extern int swiotlb_nr;
 #define IO_TLB_DEFAULT_SIZE (64UL<<20)
 
 extern void swiotlb_init(int verbose);
-int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
-extern unsigned long swiotlb_nr_tbl(void);
+int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs,
+			  enum swiotlb_t type, int verbose);
+extern unsigned long swiotlb_nr_tbl(enum swiotlb_t type);
 unsigned long swiotlb_size_or_default(void);
-extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
-extern int swiotlb_late_init_with_default_size(size_t default_size);
+extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs,
+				      enum swiotlb_t type);
+extern int swiotlb_late_init_with_default_size(size_t default_size,
+					       enum swiotlb_t type);
 extern void __init swiotlb_update_mem_attributes(void);
 
 /*
@@ -83,8 +86,13 @@ extern phys_addr_t io_tlb_start[], io_tlb_end[];
 
 static inline bool is_swiotlb_buffer(phys_addr_t paddr)
 {
-	return paddr >= io_tlb_start[SWIOTLB_LO] &&
-	       paddr < io_tlb_end[SWIOTLB_LO];
+	int i;
+
+	for (i = 0; i < swiotlb_nr; i++)
+		if (paddr >= io_tlb_start[i] && paddr < io_tlb_end[i])
+			return true;
+
+	return false;
 }
 
 static inline int swiotlb_get_type(phys_addr_t paddr)
@@ -99,7 +107,7 @@ static inline int swiotlb_get_type(phys_addr_t paddr)
 }
 
 void __init swiotlb_exit(void);
-unsigned int swiotlb_max_segment(void);
+unsigned int swiotlb_max_segment(enum swiotlb_t type);
 size_t swiotlb_max_mapping_size(struct device *dev);
 bool is_swiotlb_active(void);
 void __init swiotlb_adjust_size(unsigned long new_size);
@@ -112,7 +120,7 @@ static inline bool is_swiotlb_buffer(phys_addr_t paddr)
 static inline void swiotlb_exit(void)
 {
 }
-static inline unsigned int swiotlb_max_segment(void)
+static inline unsigned int swiotlb_max_segment(enum swiotlb_t type)
 {
 	return 0;
 }
@@ -131,7 +139,7 @@ static inline void swiotlb_adjust_size(unsigned long
new_size)
 }
 #endif /* CONFIG_SWIOTLB */
 
-extern void swiotlb_print_info(void);
-extern void swiotlb_set_max_segment(unsigned int);
+extern void swiotlb_print_info(enum swiotlb_t type);
+extern void swiotlb_set_max_segment(unsigned int val, enum swiotlb_t type);
 
 #endif /* __LINUX_SWIOTLB_H */
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index c91d3d2c3936..cd28db5b016a 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -111,6 +111,11 @@ static int late_alloc;
 
 int swiotlb_nr = 1;
 
+static const char * const swiotlb_name[] = {
+	"Low Mem",
+	"High Mem"
+};
+
 static int __init
 setup_io_tlb_npages(char *str)
 {
@@ -121,11 +126,25 @@ setup_io_tlb_npages(char *str)
 	}
 	if (*str == ',')
 		++str;
+
+	if (isdigit(*str)) {
+		io_tlb_nslabs[SWIOTLB_HI] = simple_strtoul(str, &str, 0);
+		/* avoid tail segment of size < IO_TLB_SEGSIZE */
+		io_tlb_nslabs[SWIOTLB_HI] = ALIGN(io_tlb_nslabs[SWIOTLB_HI], IO_TLB_SEGSIZE);
+
+		swiotlb_nr = 2;
+	}
+
+	if (*str == ',')
+		++str;
+
 	if (!strcmp(str, "force")) {
 		swiotlb_force = SWIOTLB_FORCE;
 	} else if (!strcmp(str, "noforce")) {
 		swiotlb_force = SWIOTLB_NO_FORCE;
 		io_tlb_nslabs[SWIOTLB_LO] = 1;
+		if (swiotlb_nr > 1)
+			io_tlb_nslabs[SWIOTLB_HI] = 1;
 	}
 
 	return 0;
@@ -134,24 +153,24 @@ early_param("swiotlb", setup_io_tlb_npages);
 
 static bool no_iotlb_memory[SWIOTLB_MAX];
 
-unsigned long swiotlb_nr_tbl(void)
+unsigned long swiotlb_nr_tbl(enum swiotlb_t type)
 {
-	return unlikely(no_iotlb_memory[SWIOTLB_LO]) ? 0 : io_tlb_nslabs[SWIOTLB_LO];
+	return unlikely(no_iotlb_memory[type]) ? 0 : io_tlb_nslabs[type];
 }
 EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
 
-unsigned int swiotlb_max_segment(void)
+unsigned int swiotlb_max_segment(enum swiotlb_t type)
 {
-	return unlikely(no_iotlb_memory[SWIOTLB_LO]) ? 0 : max_segment[SWIOTLB_LO];
+	return unlikely(no_iotlb_memory[type]) ? 0 : max_segment[type];
 }
 EXPORT_SYMBOL_GPL(swiotlb_max_segment);
 
-void swiotlb_set_max_segment(unsigned int val)
+void swiotlb_set_max_segment(unsigned int val, enum swiotlb_t type)
 {
 	if (swiotlb_force == SWIOTLB_FORCE)
-		max_segment[SWIOTLB_LO] = 1;
+		max_segment[type] = 1;
 	else
-		max_segment[SWIOTLB_LO] = rounddown(val, PAGE_SIZE);
+		max_segment[type] = rounddown(val, PAGE_SIZE);
 }
 
 unsigned long swiotlb_size_or_default(void)
@@ -181,18 +200,18 @@ void __init swiotlb_adjust_size(unsigned long new_size)
 	}
 }
 
-void swiotlb_print_info(void)
+void swiotlb_print_info(enum swiotlb_t type)
 {
-	unsigned long bytes = io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT;
+	unsigned long bytes = io_tlb_nslabs[type] << IO_TLB_SHIFT;
 
-	if (no_iotlb_memory[SWIOTLB_LO]) {
-		pr_warn("No low mem\n");
+	if (no_iotlb_memory[type]) {
+		pr_warn("No low mem with %s\n", swiotlb_name[type]);
 		return;
 	}
 
-	pr_info("mapped [mem %pa-%pa] (%luMB)\n",
-		&io_tlb_start[SWIOTLB_LO], &io_tlb_end[SWIOTLB_LO],
-		bytes >> 20);
+	pr_info("%s mapped [mem %pa-%pa] (%luMB)\n",
+		swiotlb_name[type],
+		&io_tlb_start[type], &io_tlb_end[type], bytes >> 20);
 }
 
 /*
@@ -205,88 +224,104 @@ void __init swiotlb_update_mem_attributes(void)
 {
 	void *vaddr;
 	unsigned long bytes;
+	int i;
 
-	if (no_iotlb_memory[SWIOTLB_LO] || late_alloc)
-		return;
+	for (i = 0; i < swiotlb_nr; i++) {
+		if (no_iotlb_memory[i] || late_alloc)
+			continue;
 
-	vaddr = phys_to_virt(io_tlb_start[SWIOTLB_LO]);
-	bytes = PAGE_ALIGN(io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT);
-	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
-	memset(vaddr, 0, bytes);
+		vaddr = phys_to_virt(io_tlb_start[i]);
+		bytes = PAGE_ALIGN(io_tlb_nslabs[i] << IO_TLB_SHIFT);
+		set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
+		memset(vaddr, 0, bytes);
+	}
 }
 
-int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
+int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs,
+				 enum swiotlb_t type, int verbose)
 {
 	unsigned long i, bytes;
 	size_t alloc_size;
 
 	bytes = nslabs << IO_TLB_SHIFT;
 
-	io_tlb_nslabs[SWIOTLB_LO] = nslabs;
-	io_tlb_start[SWIOTLB_LO] = __pa(tlb);
-	io_tlb_end[SWIOTLB_LO] = io_tlb_start[SWIOTLB_LO] + bytes;
+	io_tlb_nslabs[type] = nslabs;
+	io_tlb_start[type] = __pa(tlb);
+	io_tlb_end[type] = io_tlb_start[type] + bytes;
 
 	/*
 	 * Allocate and initialize the free list array.  This array is used
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
 	 * between io_tlb_start and io_tlb_end.
 	 */
-	alloc_size = PAGE_ALIGN(io_tlb_nslabs[SWIOTLB_LO] * sizeof(int));
-	io_tlb_list[SWIOTLB_LO] = memblock_alloc(alloc_size, PAGE_SIZE);
-	if (!io_tlb_list[SWIOTLB_LO])
+	alloc_size = PAGE_ALIGN(io_tlb_nslabs[type] * sizeof(int));
+	io_tlb_list[type] = memblock_alloc(alloc_size, PAGE_SIZE);
+	if (!io_tlb_list[type])
 		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
 		      __func__, alloc_size, PAGE_SIZE);
 
-	alloc_size = PAGE_ALIGN(io_tlb_nslabs[SWIOTLB_LO] * sizeof(phys_addr_t));
-	io_tlb_orig_addr[SWIOTLB_LO] = memblock_alloc(alloc_size, PAGE_SIZE);
-	if (!io_tlb_orig_addr[SWIOTLB_LO])
+	alloc_size = PAGE_ALIGN(io_tlb_nslabs[type] * sizeof(phys_addr_t));
+	io_tlb_orig_addr[type] = memblock_alloc(alloc_size, PAGE_SIZE);
+	if (!io_tlb_orig_addr[type])
 		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
 		      __func__, alloc_size, PAGE_SIZE);
 
-	for (i = 0; i < io_tlb_nslabs[SWIOTLB_LO]; i++) {
-		io_tlb_list[SWIOTLB_LO][i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-		io_tlb_orig_addr[SWIOTLB_LO][i] = INVALID_PHYS_ADDR;
+	for (i = 0; i < io_tlb_nslabs[type]; i++) {
+		io_tlb_list[type][i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+		io_tlb_orig_addr[type][i] = INVALID_PHYS_ADDR;
 	}
-	io_tlb_index[SWIOTLB_LO] = 0;
-	no_iotlb_memory[SWIOTLB_LO] = false;
+	io_tlb_index[type] = 0;
+	no_iotlb_memory[type] = false;
 
 	if (verbose)
-		swiotlb_print_info();
+		swiotlb_print_info(type);
 
-	swiotlb_set_max_segment(io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT);
+	swiotlb_set_max_segment(io_tlb_nslabs[type] << IO_TLB_SHIFT, type);
 	return 0;
 }
 
-/*
- * Statically reserve bounce buffer space and initialize bounce buffer data
- * structures for the software IO TLB used to implement the DMA API.
- */
-void  __init
-swiotlb_init(int verbose)
+static void __init
+swiotlb_init_type(enum swiotlb_t type, int verbose)
 {
 	size_t default_size = IO_TLB_DEFAULT_SIZE;
 	unsigned char *vstart;
 	unsigned long bytes;
 
-	if (!io_tlb_nslabs[SWIOTLB_LO]) {
-		io_tlb_nslabs[SWIOTLB_LO] = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs[SWIOTLB_LO] = ALIGN(io_tlb_nslabs[SWIOTLB_LO], IO_TLB_SEGSIZE);
+	if (!io_tlb_nslabs[type]) {
+		io_tlb_nslabs[type] = (default_size >> IO_TLB_SHIFT);
+		io_tlb_nslabs[type] = ALIGN(io_tlb_nslabs[type], IO_TLB_SEGSIZE);
 	}
 
-	bytes = io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT;
+	bytes = io_tlb_nslabs[type] << IO_TLB_SHIFT;
+
+	if (type == SWIOTLB_LO)
+		vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE);
+	else
+		vstart = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE);
 
-	/* Get IO TLB memory from the low pages */
-	vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE);
-	if (vstart && !swiotlb_init_with_tbl(vstart,
io_tlb_nslabs[SWIOTLB_LO], verbose))
+	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs[type],
type, verbose))
 		return;
 
-	if (io_tlb_start[SWIOTLB_LO]) {
-		memblock_free_early(io_tlb_start[SWIOTLB_LO],
-				    PAGE_ALIGN(io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT));
-		io_tlb_start[SWIOTLB_LO] = 0;
+	if (io_tlb_start[type]) {
+		memblock_free_early(io_tlb_start[type],
+				    PAGE_ALIGN(io_tlb_nslabs[type] << IO_TLB_SHIFT));
+		io_tlb_start[type] = 0;
 	}
-	pr_warn("Cannot allocate buffer");
-	no_iotlb_memory[SWIOTLB_LO] = true;
+	pr_warn("Cannot allocate buffer %s\n", swiotlb_name[type]);
+	no_iotlb_memory[type] = true;
+}
+
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer data
+ * structures for the software IO TLB used to implement the DMA API.
+ */
+void  __init
+swiotlb_init(int verbose)
+{
+	int i;
+
+	for (i = 0; i < swiotlb_nr; i++)
+		swiotlb_init_type(i, verbose);
 }
 
 /*
@@ -295,67 +330,68 @@ swiotlb_init(int verbose)
  * This should be just like above, but with some error catching.
  */
 int
-swiotlb_late_init_with_default_size(size_t default_size)
+swiotlb_late_init_with_default_size(size_t default_size, enum swiotlb_t type)
 {
-	unsigned long bytes, req_nslabs = io_tlb_nslabs[SWIOTLB_LO];
+	unsigned long bytes, req_nslabs = io_tlb_nslabs[type];
 	unsigned char *vstart = NULL;
 	unsigned int order;
 	int rc = 0;
+	gfp_t gfp_mask = (type == SWIOTLB_LO) ? GFP_DMA | __GFP_NOWARN :
+						GFP_KERNEL | __GFP_NOWARN;
 
-	if (!io_tlb_nslabs[SWIOTLB_LO]) {
-		io_tlb_nslabs[SWIOTLB_LO] = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs[SWIOTLB_LO] = ALIGN(io_tlb_nslabs[SWIOTLB_LO], IO_TLB_SEGSIZE);
+	if (!io_tlb_nslabs[type]) {
+		io_tlb_nslabs[type] = (default_size >> IO_TLB_SHIFT);
+		io_tlb_nslabs[type] = ALIGN(io_tlb_nslabs[type], IO_TLB_SEGSIZE);
 	}
 
 	/*
 	 * Get IO TLB memory from the low pages
 	 */
-	order = get_order(io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT);
-	io_tlb_nslabs[SWIOTLB_LO] = SLABS_PER_PAGE << order;
-	bytes = io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT;
+	order = get_order(io_tlb_nslabs[type] << IO_TLB_SHIFT);
+	io_tlb_nslabs[type] = SLABS_PER_PAGE << order;
+	bytes = io_tlb_nslabs[type] << IO_TLB_SHIFT;
 
 	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-		vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
-						  order);
+		vstart = (void *)__get_free_pages(gfp_mask, order);
 		if (vstart)
 			break;
 		order--;
 	}
 
 	if (!vstart) {
-		io_tlb_nslabs[SWIOTLB_LO] = req_nslabs;
+		io_tlb_nslabs[type] = req_nslabs;
 		return -ENOMEM;
 	}
 	if (order != get_order(bytes)) {
 		pr_warn("only able to allocate %ld MB\n",
 			(PAGE_SIZE << order) >> 20);
-		io_tlb_nslabs[SWIOTLB_LO] = SLABS_PER_PAGE << order;
+		io_tlb_nslabs[type] = SLABS_PER_PAGE << order;
 	}
-	rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs[SWIOTLB_LO]);
+	rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs[type], type);
 	if (rc)
 		free_pages((unsigned long)vstart, order);
 
 	return rc;
 }
 
-static void swiotlb_cleanup(void)
+static void swiotlb_cleanup(enum swiotlb_t type)
 {
-	io_tlb_end[SWIOTLB_LO] = 0;
-	io_tlb_start[SWIOTLB_LO] = 0;
-	io_tlb_nslabs[SWIOTLB_LO] = 0;
-	max_segment[SWIOTLB_LO] = 0;
+	io_tlb_end[type] = 0;
+	io_tlb_start[type] = 0;
+	io_tlb_nslabs[type] = 0;
+	max_segment[type] = 0;
 }
 
 int
-swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
+swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs, enum swiotlb_t
type)
 {
 	unsigned long i, bytes;
 
 	bytes = nslabs << IO_TLB_SHIFT;
 
-	io_tlb_nslabs[SWIOTLB_LO] = nslabs;
-	io_tlb_start[SWIOTLB_LO] = virt_to_phys(tlb);
-	io_tlb_end[SWIOTLB_LO] = io_tlb_start[SWIOTLB_LO] + bytes;
+	io_tlb_nslabs[type] = nslabs;
+	io_tlb_start[type] = virt_to_phys(tlb);
+	io_tlb_end[type] = io_tlb_start[type] + bytes;
 
 	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
 	memset(tlb, 0, bytes);
@@ -365,63 +401,67 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long
nslabs)
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
 	 * between io_tlb_start and io_tlb_end.
 	 */
-	io_tlb_list[SWIOTLB_LO] = (unsigned int *)__get_free_pages(GFP_KERNEL,
-			get_order(io_tlb_nslabs[SWIOTLB_LO] * sizeof(int)));
-	if (!io_tlb_list[SWIOTLB_LO])
+	io_tlb_list[type] = (unsigned int *)__get_free_pages(GFP_KERNEL,
+			get_order(io_tlb_nslabs[type] * sizeof(int)));
+	if (!io_tlb_list[type])
 		goto cleanup3;
 
-	io_tlb_orig_addr[SWIOTLB_LO] = (phys_addr_t *)
+	io_tlb_orig_addr[type] = (phys_addr_t *)
 		__get_free_pages(GFP_KERNEL,
-				 get_order(io_tlb_nslabs[SWIOTLB_LO] *
+				 get_order(io_tlb_nslabs[type] *
 					   sizeof(phys_addr_t)));
-	if (!io_tlb_orig_addr[SWIOTLB_LO])
+	if (!io_tlb_orig_addr[type])
 		goto cleanup4;
 
-	for (i = 0; i < io_tlb_nslabs[SWIOTLB_LO]; i++) {
-		io_tlb_list[SWIOTLB_LO][i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-		io_tlb_orig_addr[SWIOTLB_LO][i] = INVALID_PHYS_ADDR;
+	for (i = 0; i < io_tlb_nslabs[type]; i++) {
+		io_tlb_list[type][i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+		io_tlb_orig_addr[type][i] = INVALID_PHYS_ADDR;
 	}
-	io_tlb_index[SWIOTLB_LO] = 0;
-	no_iotlb_memory[SWIOTLB_LO] = false;
+	io_tlb_index[type] = 0;
+	no_iotlb_memory[type] = false;
 
-	swiotlb_print_info();
+	swiotlb_print_info(type);
 
 	late_alloc = 1;
 
-	swiotlb_set_max_segment(io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT);
+	swiotlb_set_max_segment(io_tlb_nslabs[type] << IO_TLB_SHIFT, type);
 
 	return 0;
 
 cleanup4:
-	free_pages((unsigned long)io_tlb_list[SWIOTLB_LO],
get_order(io_tlb_nslabs[SWIOTLB_LO] *
+	free_pages((unsigned long)io_tlb_list[type], get_order(io_tlb_nslabs[type] *
 	                                                 sizeof(int)));
-	io_tlb_list[SWIOTLB_LO] = NULL;
+	io_tlb_list[type] = NULL;
 cleanup3:
-	swiotlb_cleanup();
+	swiotlb_cleanup(type);
 	return -ENOMEM;
 }
 
 void __init swiotlb_exit(void)
 {
-	if (!io_tlb_orig_addr[SWIOTLB_LO])
-		return;
+	int i;
 
-	if (late_alloc) {
-		free_pages((unsigned long)io_tlb_orig_addr[SWIOTLB_LO],
-			   get_order(io_tlb_nslabs[SWIOTLB_LO] * sizeof(phys_addr_t)));
-		free_pages((unsigned long)io_tlb_list[SWIOTLB_LO],
-			   get_order(io_tlb_nslabs[SWIOTLB_LO] * sizeof(int)));
-		free_pages((unsigned long)phys_to_virt(io_tlb_start[SWIOTLB_LO]),
-			   get_order(io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT));
-	} else {
-		memblock_free_late(__pa(io_tlb_orig_addr[SWIOTLB_LO]),
-				   PAGE_ALIGN(io_tlb_nslabs[SWIOTLB_LO] * sizeof(phys_addr_t)));
-		memblock_free_late(__pa(io_tlb_list[SWIOTLB_LO]),
-				   PAGE_ALIGN(io_tlb_nslabs[SWIOTLB_LO] * sizeof(int)));
-		memblock_free_late(io_tlb_start[SWIOTLB_LO],
-				   PAGE_ALIGN(io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT));
+	for (i = 0; i < swiotlb_nr; i++) {
+		if (!io_tlb_orig_addr[i])
+			continue;
+
+		if (late_alloc) {
+			free_pages((unsigned long)io_tlb_orig_addr[i],
+				   get_order(io_tlb_nslabs[i] * sizeof(phys_addr_t)));
+			free_pages((unsigned long)io_tlb_list[i],
+				   get_order(io_tlb_nslabs[i] * sizeof(int)));
+			free_pages((unsigned long)phys_to_virt(io_tlb_start[i]),
+				   get_order(io_tlb_nslabs[i] << IO_TLB_SHIFT));
+		} else {
+			memblock_free_late(__pa(io_tlb_orig_addr[i]),
+					   PAGE_ALIGN(io_tlb_nslabs[i] * sizeof(phys_addr_t)));
+			memblock_free_late(__pa(io_tlb_list[i]),
+					   PAGE_ALIGN(io_tlb_nslabs[i] * sizeof(int)));
+			memblock_free_late(io_tlb_start[i],
+					   PAGE_ALIGN(io_tlb_nslabs[i] << IO_TLB_SHIFT));
+		}
+		swiotlb_cleanup(i);
 	}
-	swiotlb_cleanup();
 }
 
 /*
@@ -468,7 +508,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
phys_addr_t orig_addr,
 		size_t mapping_size, size_t alloc_size,
 		enum dma_data_direction dir, unsigned long attrs)
 {
-	dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev,
io_tlb_start[SWIOTLB_LO]);
+	dma_addr_t tbl_dma_addr;
 	unsigned long flags;
 	phys_addr_t tlb_addr;
 	unsigned int nslots, stride, index, wrap;
@@ -477,8 +517,16 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
phys_addr_t orig_addr,
 	unsigned long offset_slots;
 	unsigned long max_slots;
 	unsigned long tmp_io_tlb_used;
+	unsigned long dma_mask = min_not_zero(*hwdev->dma_mask,
+					      hwdev->bus_dma_limit);
+	int type;
 
-	if (no_iotlb_memory[SWIOTLB_LO])
+	if (swiotlb_nr > 1 && dma_mask == DMA_BIT_MASK(64))
+		type = SWIOTLB_HI;
+	else
+		type = SWIOTLB_LO;
+
+	if (no_iotlb_memory[type])
 		panic("Can not allocate SWIOTLB buffer earlier and can't now provide
you with the DMA bounce buffer");
 
 	if (mem_encrypt_active())
@@ -490,6 +538,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
phys_addr_t orig_addr,
 		return (phys_addr_t)DMA_MAPPING_ERROR;
 	}
 
+	tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start[type]);
+
 	mask = dma_get_seg_boundary(hwdev);
 
 	tbl_dma_addr &= mask;
@@ -521,11 +571,11 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
phys_addr_t orig_addr,
 	 */
 	spin_lock_irqsave(&io_tlb_lock, flags);
 
-	if (unlikely(nslots > io_tlb_nslabs[SWIOTLB_LO] - io_tlb_used[SWIOTLB_LO]))
+	if (unlikely(nslots > io_tlb_nslabs[type] - io_tlb_used[type]))
 		goto not_found;
 
-	index = ALIGN(io_tlb_index[SWIOTLB_LO], stride);
-	if (index >= io_tlb_nslabs[SWIOTLB_LO])
+	index = ALIGN(io_tlb_index[type], stride);
+	if (index >= io_tlb_nslabs[type])
 		index = 0;
 	wrap = index;
 
@@ -533,7 +583,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
phys_addr_t orig_addr,
 		while (iommu_is_span_boundary(index, nslots, offset_slots,
 					      max_slots)) {
 			index += stride;
-			if (index >= io_tlb_nslabs[SWIOTLB_LO])
+			if (index >= io_tlb_nslabs[type])
 				index = 0;
 			if (index == wrap)
 				goto not_found;
@@ -544,42 +594,42 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
phys_addr_t orig_addr,
 		 * contiguous buffers, we allocate the buffers from that slot
 		 * and mark the entries as '0' indicating unavailable.
 		 */
-		if (io_tlb_list[SWIOTLB_LO][index] >= nslots) {
+		if (io_tlb_list[type][index] >= nslots) {
 			int count = 0;
 
 			for (i = index; i < (int) (index + nslots); i++)
-				io_tlb_list[SWIOTLB_LO][i] = 0;
+				io_tlb_list[type][i] = 0;
 			for (i = index - 1;
 			     (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) &&
-			     io_tlb_list[SWIOTLB_LO][i];
+			     io_tlb_list[type][i];
 			     i--)
-				io_tlb_list[SWIOTLB_LO][i] = ++count;
-			tlb_addr = io_tlb_start[SWIOTLB_LO] + (index << IO_TLB_SHIFT);
+				io_tlb_list[type][i] = ++count;
+			tlb_addr = io_tlb_start[type] + (index << IO_TLB_SHIFT);
 
 			/*
 			 * Update the indices to avoid searching in the next
 			 * round.
 			 */
-			io_tlb_index[SWIOTLB_LO] = ((index + nslots) < io_tlb_nslabs[SWIOTLB_LO]
+			io_tlb_index[type] = ((index + nslots) < io_tlb_nslabs[type]
 					? (index + nslots) : 0);
 
 			goto found;
 		}
 		index += stride;
-		if (index >= io_tlb_nslabs[SWIOTLB_LO])
+		if (index >= io_tlb_nslabs[type])
 			index = 0;
 	} while (index != wrap);
 
 not_found:
-	tmp_io_tlb_used = io_tlb_used[SWIOTLB_LO];
+	tmp_io_tlb_used = io_tlb_used[type];
 
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
-		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu
(slots), used %lu (slots)\n",
-			 alloc_size, io_tlb_nslabs[SWIOTLB_LO], tmp_io_tlb_used);
+		dev_warn(hwdev, "%s swiotlb buffer is full (sz: %zd bytes), total %lu
(slots), used %lu (slots)\n",
+			 swiotlb_name[type], alloc_size, io_tlb_nslabs[type], tmp_io_tlb_used);
 	return (phys_addr_t)DMA_MAPPING_ERROR;
 found:
-	io_tlb_used[SWIOTLB_LO] += nslots;
+	io_tlb_used[type] += nslots;
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 
 	/*
@@ -588,7 +638,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
phys_addr_t orig_addr,
 	 * needed.
 	 */
 	for (i = 0; i < nslots; i++)
-		io_tlb_orig_addr[SWIOTLB_LO][index+i] = orig_addr + (i <<
IO_TLB_SHIFT);
+		io_tlb_orig_addr[type][index+i] = orig_addr + (i << IO_TLB_SHIFT);
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
 	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
 		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
@@ -605,8 +655,9 @@ void swiotlb_tbl_unmap_single(struct device *hwdev,
phys_addr_t tlb_addr,
 {
 	unsigned long flags;
 	int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >>
IO_TLB_SHIFT;
-	int index = (tlb_addr - io_tlb_start[SWIOTLB_LO]) >> IO_TLB_SHIFT;
-	phys_addr_t orig_addr = io_tlb_orig_addr[SWIOTLB_LO][index];
+	int type = swiotlb_get_type(tlb_addr);
+	int index = (tlb_addr - io_tlb_start[type]) >> IO_TLB_SHIFT;
+	phys_addr_t orig_addr = io_tlb_orig_addr[type][index];
 
 	/*
 	 * First, sync the memory before unmapping the entry
@@ -625,14 +676,14 @@ void swiotlb_tbl_unmap_single(struct device *hwdev,
phys_addr_t tlb_addr,
 	spin_lock_irqsave(&io_tlb_lock, flags);
 	{
 		count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
-			 io_tlb_list[SWIOTLB_LO][index + nslots] : 0);
+			 io_tlb_list[type][index + nslots] : 0);
 		/*
 		 * Step 1: return the slots to the free list, merging the
 		 * slots with superceeding slots
 		 */
 		for (i = index + nslots - 1; i >= index; i--) {
-			io_tlb_list[SWIOTLB_LO][i] = ++count;
-			io_tlb_orig_addr[SWIOTLB_LO][i] = INVALID_PHYS_ADDR;
+			io_tlb_list[type][i] = ++count;
+			io_tlb_orig_addr[type][i] = INVALID_PHYS_ADDR;
 		}
 		/*
 		 * Step 2: merge the returned slots with the preceding slots,
@@ -640,11 +691,11 @@ void swiotlb_tbl_unmap_single(struct device *hwdev,
phys_addr_t tlb_addr,
 		 */
 		for (i = index - 1;
 		     (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) &&
-		     io_tlb_list[SWIOTLB_LO][i];
+		     io_tlb_list[type][i];
 		     i--)
-			io_tlb_list[SWIOTLB_LO][i] = ++count;
+			io_tlb_list[type][i] = ++count;
 
-		io_tlb_used[SWIOTLB_LO] -= nslots;
+		io_tlb_used[type] -= nslots;
 	}
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 }
@@ -653,8 +704,9 @@ void swiotlb_tbl_sync_single(struct device *hwdev,
phys_addr_t tlb_addr,
 			     size_t size, enum dma_data_direction dir,
 			     enum dma_sync_target target)
 {
-	int index = (tlb_addr - io_tlb_start[SWIOTLB_LO]) >> IO_TLB_SHIFT;
-	phys_addr_t orig_addr = io_tlb_orig_addr[SWIOTLB_LO][index];
+	int type = swiotlb_get_type(tlb_addr);
+	int index = (tlb_addr - io_tlb_start[type]) >> IO_TLB_SHIFT;
+	phys_addr_t orig_addr = io_tlb_orig_addr[type][index];
 
 	if (orig_addr == INVALID_PHYS_ADDR)
 		return;
@@ -737,6 +789,15 @@ static int __init swiotlb_create_debugfs(void)
 	root = debugfs_create_dir("swiotlb", NULL);
 	debugfs_create_ulong("io_tlb_nslabs", 0400, root,
&io_tlb_nslabs[SWIOTLB_LO]);
 	debugfs_create_ulong("io_tlb_used", 0400, root,
&io_tlb_used[SWIOTLB_LO]);
+
+	if (swiotlb_nr == 1)
+		return 0;
+
+	debugfs_create_ulong("io_tlb_nslabs-highmem", 0400,
+			     root, &io_tlb_nslabs[SWIOTLB_HI]);
+	debugfs_create_ulong("io_tlb_used-highmem", 0400,
+			     root, &io_tlb_used[SWIOTLB_HI]);
+
 	return 0;
 }
 
-- 
2.17.1
Dongli Zhang
2021-Feb-03  23:37 UTC
[Nouveau] [PATCH RFC v1 5/6] xen-swiotlb: convert variables to arrays
This patch converts several xen-swiotlb related variables to arrays, in
order to maintain stat/status for different swiotlb buffers. Here are
variables involved:
- xen_io_tlb_start and xen_io_tlb_end
- xen_io_tlb_nslabs
- MAX_DMA_BITS
There is no functional change and this is to prepare to enable 64-bit
xen-swiotlb.
Cc: Joe Jin <joe.jin at oracle.com>
Signed-off-by: Dongli Zhang <dongli.zhang at oracle.com>
---
 drivers/xen/swiotlb-xen.c | 75 +++++++++++++++++++++------------------
 1 file changed, 40 insertions(+), 35 deletions(-)
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 662638093542..e18cae693cdc 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -39,15 +39,17 @@
 #include <asm/xen/page-coherent.h>
 
 #include <trace/events/swiotlb.h>
-#define MAX_DMA_BITS 32
 /*
  * Used to do a quick range check in swiotlb_tbl_unmap_single and
  * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by
this
  * API.
  */
 
-static char *xen_io_tlb_start, *xen_io_tlb_end;
-static unsigned long xen_io_tlb_nslabs;
+static char *xen_io_tlb_start[SWIOTLB_MAX], *xen_io_tlb_end[SWIOTLB_MAX];
+static unsigned long xen_io_tlb_nslabs[SWIOTLB_MAX];
+
+static int max_dma_bits[] = {32, 64};
+
 /*
  * Quick lookup value of the bus address of the IOTLB.
  */
@@ -112,8 +114,8 @@ static int is_xen_swiotlb_buffer(struct device *dev,
dma_addr_t dma_addr)
 	 * in our domain. Therefore _only_ check address within our domain.
 	 */
 	if (pfn_valid(PFN_DOWN(paddr))) {
-		return paddr >= virt_to_phys(xen_io_tlb_start) &&
-		       paddr < virt_to_phys(xen_io_tlb_end);
+		return paddr >= virt_to_phys(xen_io_tlb_start[SWIOTLB_LO]) &&
+		       paddr < virt_to_phys(xen_io_tlb_end[SWIOTLB_LO]);
 	}
 	return 0;
 }
@@ -137,7 +139,7 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long
nslabs)
 				p + (i << IO_TLB_SHIFT),
 				get_order(slabs << IO_TLB_SHIFT),
 				dma_bits, &dma_handle);
-		} while (rc && dma_bits++ < MAX_DMA_BITS);
+		} while (rc && dma_bits++ < max_dma_bits[SWIOTLB_LO]);
 		if (rc)
 			return rc;
 
@@ -148,12 +150,13 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long
nslabs)
 static unsigned long xen_set_nslabs(unsigned long nr_tbl)
 {
 	if (!nr_tbl) {
-		xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
-		xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
+		xen_io_tlb_nslabs[SWIOTLB_LO] = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
+		xen_io_tlb_nslabs[SWIOTLB_LO] = ALIGN(xen_io_tlb_nslabs[SWIOTLB_LO],
+						      IO_TLB_SEGSIZE);
 	} else
-		xen_io_tlb_nslabs = nr_tbl;
+		xen_io_tlb_nslabs[SWIOTLB_LO] = nr_tbl;
 
-	return xen_io_tlb_nslabs << IO_TLB_SHIFT;
+	return xen_io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT;
 }
 
 enum xen_swiotlb_err {
@@ -184,16 +187,16 @@ int __ref xen_swiotlb_init(int verbose, bool early)
 	enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN;
 	unsigned int repeat = 3;
 
-	xen_io_tlb_nslabs = swiotlb_nr_tbl(SWIOTLB_LO);
+	xen_io_tlb_nslabs[SWIOTLB_LO] = swiotlb_nr_tbl(SWIOTLB_LO);
 retry:
-	bytes = xen_set_nslabs(xen_io_tlb_nslabs);
-	order = get_order(xen_io_tlb_nslabs << IO_TLB_SHIFT);
+	bytes = xen_set_nslabs(xen_io_tlb_nslabs[SWIOTLB_LO]);
+	order = get_order(xen_io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT);
 
 	/*
 	 * IO TLB memory already allocated. Just use it.
 	 */
 	if (io_tlb_start[SWIOTLB_LO] != 0) {
-		xen_io_tlb_start = phys_to_virt(io_tlb_start[SWIOTLB_LO]);
+		xen_io_tlb_start[SWIOTLB_LO] = phys_to_virt(io_tlb_start[SWIOTLB_LO]);
 		goto end;
 	}
 
@@ -201,76 +204,78 @@ int __ref xen_swiotlb_init(int verbose, bool early)
 	 * Get IO TLB memory from any location.
 	 */
 	if (early) {
-		xen_io_tlb_start = memblock_alloc(PAGE_ALIGN(bytes),
+		xen_io_tlb_start[SWIOTLB_LO] = memblock_alloc(PAGE_ALIGN(bytes),
 						  PAGE_SIZE);
-		if (!xen_io_tlb_start)
+		if (!xen_io_tlb_start[SWIOTLB_LO])
 			panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
 			      __func__, PAGE_ALIGN(bytes), PAGE_SIZE);
 	} else {
 #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
 		while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-			xen_io_tlb_start = (void *)xen_get_swiotlb_free_pages(order);
-			if (xen_io_tlb_start)
+			xen_io_tlb_start[SWIOTLB_LO] = (void *)xen_get_swiotlb_free_pages(order);
+			if (xen_io_tlb_start[SWIOTLB_LO])
 				break;
 			order--;
 		}
 		if (order != get_order(bytes)) {
 			pr_warn("Warning: only able to allocate %ld MB for software IO
TLB\n",
 				(PAGE_SIZE << order) >> 20);
-			xen_io_tlb_nslabs = SLABS_PER_PAGE << order;
-			bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
+			xen_io_tlb_nslabs[SWIOTLB_LO] = SLABS_PER_PAGE << order;
+			bytes = xen_io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT;
 		}
 	}
-	if (!xen_io_tlb_start) {
+	if (!xen_io_tlb_start[SWIOTLB_LO]) {
 		m_ret = XEN_SWIOTLB_ENOMEM;
 		goto error;
 	}
 	/*
 	 * And replace that memory with pages under 4GB.
 	 */
-	rc = xen_swiotlb_fixup(xen_io_tlb_start,
+	rc = xen_swiotlb_fixup(xen_io_tlb_start[SWIOTLB_LO],
 			       bytes,
-			       xen_io_tlb_nslabs);
+			       xen_io_tlb_nslabs[SWIOTLB_LO]);
 	if (rc) {
 		if (early)
-			memblock_free(__pa(xen_io_tlb_start),
+			memblock_free(__pa(xen_io_tlb_start[SWIOTLB_LO]),
 				      PAGE_ALIGN(bytes));
 		else {
-			free_pages((unsigned long)xen_io_tlb_start, order);
-			xen_io_tlb_start = NULL;
+			free_pages((unsigned long)xen_io_tlb_start[SWIOTLB_LO], order);
+			xen_io_tlb_start[SWIOTLB_LO] = NULL;
 		}
 		m_ret = XEN_SWIOTLB_EFIXUP;
 		goto error;
 	}
 	if (early) {
-		if (swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs,
+		if (swiotlb_init_with_tbl(xen_io_tlb_start[SWIOTLB_LO],
+					  xen_io_tlb_nslabs[SWIOTLB_LO],
 					  SWIOTLB_LO, verbose))
 			panic("Cannot allocate SWIOTLB buffer");
 		rc = 0;
 	} else
-		rc = swiotlb_late_init_with_tbl(xen_io_tlb_start,
-						xen_io_tlb_nslabs, SWIOTLB_LO);
+		rc = swiotlb_late_init_with_tbl(xen_io_tlb_start[SWIOTLB_LO],
+						xen_io_tlb_nslabs[SWIOTLB_LO],
+						SWIOTLB_LO);
 
 end:
-	xen_io_tlb_end = xen_io_tlb_start + bytes;
+	xen_io_tlb_end[SWIOTLB_LO] = xen_io_tlb_start[SWIOTLB_LO] + bytes;
 	if (!rc)
 		swiotlb_set_max_segment(PAGE_SIZE, SWIOTLB_LO);
 
 	return rc;
 error:
 	if (repeat--) {
-		xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */
-					(xen_io_tlb_nslabs >> 1));
+		xen_io_tlb_nslabs[SWIOTLB_LO] = max(1024UL, /* Min is 2MB */
+					(xen_io_tlb_nslabs[SWIOTLB_LO] >> 1));
 		pr_info("Lowering to %luMB\n",
-			(xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20);
+			(xen_io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT) >> 20);
 		goto retry;
 	}
 	pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc);
 	if (early)
 		panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
 	else
-		free_pages((unsigned long)xen_io_tlb_start, order);
+		free_pages((unsigned long)xen_io_tlb_start[SWIOTLB_LO], order);
 	return rc;
 }
 
@@ -561,7 +566,7 @@ xen_swiotlb_sync_sg_for_device(struct device *dev, struct
scatterlist *sgl,
 static int
 xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
-	return xen_virt_to_bus(hwdev, xen_io_tlb_end - 1) <= mask;
+	return xen_virt_to_bus(hwdev, xen_io_tlb_end[SWIOTLB_LO] - 1) <= mask;
 }
 
 const struct dma_map_ops xen_swiotlb_dma_ops = {
-- 
2.17.1
Dongli Zhang
2021-Feb-03  23:37 UTC
[Nouveau] [PATCH RFC v1 6/6] xen-swiotlb: enable 64-bit xen-swiotlb
This patch is to enable the 64-bit xen-swiotlb buffer.
For Xen PVM DMA address, the 64-bit device will be able to allocate from
64-bit swiotlb buffer.
Cc: Joe Jin <joe.jin at oracle.com>
Signed-off-by: Dongli Zhang <dongli.zhang at oracle.com>
---
 drivers/xen/swiotlb-xen.c | 117 ++++++++++++++++++++++++--------------
 1 file changed, 74 insertions(+), 43 deletions(-)
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index e18cae693cdc..c9ab07809e32 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -108,27 +108,36 @@ static int is_xen_swiotlb_buffer(struct device *dev,
dma_addr_t dma_addr)
 	unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr));
 	unsigned long xen_pfn = bfn_to_local_pfn(bfn);
 	phys_addr_t paddr = (phys_addr_t)xen_pfn << XEN_PAGE_SHIFT;
+	int i;
 
 	/* If the address is outside our domain, it CAN
 	 * have the same virtual address as another address
 	 * in our domain. Therefore _only_ check address within our domain.
 	 */
-	if (pfn_valid(PFN_DOWN(paddr))) {
-		return paddr >= virt_to_phys(xen_io_tlb_start[SWIOTLB_LO]) &&
-		       paddr < virt_to_phys(xen_io_tlb_end[SWIOTLB_LO]);
-	}
+	if (!pfn_valid(PFN_DOWN(paddr)))
+		return 0;
+
+	for (i = 0; i < swiotlb_nr; i++)
+		if (paddr >= virt_to_phys(xen_io_tlb_start[i]) &&
+		    paddr < virt_to_phys(xen_io_tlb_end[i]))
+			return 1;
+
 	return 0;
 }
 
 static int
-xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
+xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs,
+		  enum swiotlb_t type)
 {
 	int i, rc;
 	int dma_bits;
 	dma_addr_t dma_handle;
 	phys_addr_t p = virt_to_phys(buf);
 
-	dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
+	if (type == SWIOTLB_HI)
+		dma_bits = max_dma_bits[SWIOTLB_HI];
+	else
+		dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
 
 	i = 0;
 	do {
@@ -139,7 +148,7 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long
nslabs)
 				p + (i << IO_TLB_SHIFT),
 				get_order(slabs << IO_TLB_SHIFT),
 				dma_bits, &dma_handle);
-		} while (rc && dma_bits++ < max_dma_bits[SWIOTLB_LO]);
+		} while (rc && dma_bits++ < max_dma_bits[type]);
 		if (rc)
 			return rc;
 
@@ -147,16 +156,17 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long
nslabs)
 	} while (i < nslabs);
 	return 0;
 }
-static unsigned long xen_set_nslabs(unsigned long nr_tbl)
+
+static unsigned long xen_set_nslabs(unsigned long nr_tbl, enum swiotlb_t type)
 {
 	if (!nr_tbl) {
-		xen_io_tlb_nslabs[SWIOTLB_LO] = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
-		xen_io_tlb_nslabs[SWIOTLB_LO] = ALIGN(xen_io_tlb_nslabs[SWIOTLB_LO],
+		xen_io_tlb_nslabs[type] = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
+		xen_io_tlb_nslabs[type] = ALIGN(xen_io_tlb_nslabs[type],
 						      IO_TLB_SEGSIZE);
 	} else
-		xen_io_tlb_nslabs[SWIOTLB_LO] = nr_tbl;
+		xen_io_tlb_nslabs[type] = nr_tbl;
 
-	return xen_io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT;
+	return xen_io_tlb_nslabs[type] << IO_TLB_SHIFT;
 }
 
 enum xen_swiotlb_err {
@@ -180,23 +190,24 @@ static const char *xen_swiotlb_error(enum xen_swiotlb_err
err)
 	}
 	return "";
 }
-int __ref xen_swiotlb_init(int verbose, bool early)
+
+static int xen_swiotlb_init_type(int verbose, bool early, enum swiotlb_t type)
 {
 	unsigned long bytes, order;
 	int rc = -ENOMEM;
 	enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN;
 	unsigned int repeat = 3;
 
-	xen_io_tlb_nslabs[SWIOTLB_LO] = swiotlb_nr_tbl(SWIOTLB_LO);
+	xen_io_tlb_nslabs[type] = swiotlb_nr_tbl(type);
 retry:
-	bytes = xen_set_nslabs(xen_io_tlb_nslabs[SWIOTLB_LO]);
-	order = get_order(xen_io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT);
+	bytes = xen_set_nslabs(xen_io_tlb_nslabs[type], type);
+	order = get_order(xen_io_tlb_nslabs[type] << IO_TLB_SHIFT);
 
 	/*
 	 * IO TLB memory already allocated. Just use it.
 	 */
-	if (io_tlb_start[SWIOTLB_LO] != 0) {
-		xen_io_tlb_start[SWIOTLB_LO] = phys_to_virt(io_tlb_start[SWIOTLB_LO]);
+	if (io_tlb_start[type] != 0) {
+		xen_io_tlb_start[type] = phys_to_virt(io_tlb_start[type]);
 		goto end;
 	}
 
@@ -204,81 +215,95 @@ int __ref xen_swiotlb_init(int verbose, bool early)
 	 * Get IO TLB memory from any location.
 	 */
 	if (early) {
-		xen_io_tlb_start[SWIOTLB_LO] = memblock_alloc(PAGE_ALIGN(bytes),
+		xen_io_tlb_start[type] = memblock_alloc(PAGE_ALIGN(bytes),
 						  PAGE_SIZE);
-		if (!xen_io_tlb_start[SWIOTLB_LO])
+		if (!xen_io_tlb_start[type])
 			panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
 			      __func__, PAGE_ALIGN(bytes), PAGE_SIZE);
 	} else {
 #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
 		while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-			xen_io_tlb_start[SWIOTLB_LO] = (void *)xen_get_swiotlb_free_pages(order);
-			if (xen_io_tlb_start[SWIOTLB_LO])
+			xen_io_tlb_start[type] = (void *)xen_get_swiotlb_free_pages(order);
+			if (xen_io_tlb_start[type])
 				break;
 			order--;
 		}
 		if (order != get_order(bytes)) {
 			pr_warn("Warning: only able to allocate %ld MB for software IO
TLB\n",
 				(PAGE_SIZE << order) >> 20);
-			xen_io_tlb_nslabs[SWIOTLB_LO] = SLABS_PER_PAGE << order;
-			bytes = xen_io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT;
+			xen_io_tlb_nslabs[type] = SLABS_PER_PAGE << order;
+			bytes = xen_io_tlb_nslabs[type] << IO_TLB_SHIFT;
 		}
 	}
-	if (!xen_io_tlb_start[SWIOTLB_LO]) {
+	if (!xen_io_tlb_start[type]) {
 		m_ret = XEN_SWIOTLB_ENOMEM;
 		goto error;
 	}
 	/*
 	 * And replace that memory with pages under 4GB.
 	 */
-	rc = xen_swiotlb_fixup(xen_io_tlb_start[SWIOTLB_LO],
+	rc = xen_swiotlb_fixup(xen_io_tlb_start[type],
 			       bytes,
-			       xen_io_tlb_nslabs[SWIOTLB_LO]);
+			       xen_io_tlb_nslabs[type],
+			       type);
 	if (rc) {
 		if (early)
-			memblock_free(__pa(xen_io_tlb_start[SWIOTLB_LO]),
+			memblock_free(__pa(xen_io_tlb_start[type]),
 				      PAGE_ALIGN(bytes));
 		else {
-			free_pages((unsigned long)xen_io_tlb_start[SWIOTLB_LO], order);
-			xen_io_tlb_start[SWIOTLB_LO] = NULL;
+			free_pages((unsigned long)xen_io_tlb_start[type], order);
+			xen_io_tlb_start[type] = NULL;
 		}
 		m_ret = XEN_SWIOTLB_EFIXUP;
 		goto error;
 	}
 	if (early) {
-		if (swiotlb_init_with_tbl(xen_io_tlb_start[SWIOTLB_LO],
-					  xen_io_tlb_nslabs[SWIOTLB_LO],
-					  SWIOTLB_LO, verbose))
+		if (swiotlb_init_with_tbl(xen_io_tlb_start[type],
+					  xen_io_tlb_nslabs[type],
+					  type, verbose))
 			panic("Cannot allocate SWIOTLB buffer");
 		rc = 0;
 	} else
-		rc = swiotlb_late_init_with_tbl(xen_io_tlb_start[SWIOTLB_LO],
-						xen_io_tlb_nslabs[SWIOTLB_LO],
-						SWIOTLB_LO);
+		rc = swiotlb_late_init_with_tbl(xen_io_tlb_start[type],
+						xen_io_tlb_nslabs[type],
+						type);
 
 end:
-	xen_io_tlb_end[SWIOTLB_LO] = xen_io_tlb_start[SWIOTLB_LO] + bytes;
+	xen_io_tlb_end[type] = xen_io_tlb_start[type] + bytes;
 	if (!rc)
-		swiotlb_set_max_segment(PAGE_SIZE, SWIOTLB_LO);
+		swiotlb_set_max_segment(PAGE_SIZE, type);
 
 	return rc;
 error:
 	if (repeat--) {
-		xen_io_tlb_nslabs[SWIOTLB_LO] = max(1024UL, /* Min is 2MB */
-					(xen_io_tlb_nslabs[SWIOTLB_LO] >> 1));
+		xen_io_tlb_nslabs[type] = max(1024UL, /* Min is 2MB */
+					(xen_io_tlb_nslabs[type] >> 1));
 		pr_info("Lowering to %luMB\n",
-			(xen_io_tlb_nslabs[SWIOTLB_LO] << IO_TLB_SHIFT) >> 20);
+			(xen_io_tlb_nslabs[type] << IO_TLB_SHIFT) >> 20);
 		goto retry;
 	}
 	pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc);
 	if (early)
 		panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
 	else
-		free_pages((unsigned long)xen_io_tlb_start[SWIOTLB_LO], order);
+		free_pages((unsigned long)xen_io_tlb_start[type], order);
 	return rc;
 }
 
+int __ref xen_swiotlb_init(int verbose, bool early)
+{
+	int i, rc;
+
+	for (i = 0; i < swiotlb_nr; i++) {
+		rc = xen_swiotlb_init_type(verbose, early, i);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
 static void *
 xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			   dma_addr_t *dma_handle, gfp_t flags,
@@ -566,7 +591,13 @@ xen_swiotlb_sync_sg_for_device(struct device *dev, struct
scatterlist *sgl,
 static int
 xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
-	return xen_virt_to_bus(hwdev, xen_io_tlb_end[SWIOTLB_LO] - 1) <= mask;
+	int i;
+
+	for (i = 0; i < swiotlb_nr; i++)
+		if (xen_virt_to_bus(hwdev, xen_io_tlb_end[i] - 1) <= mask)
+			return true;
+
+	return false;
 }
 
 const struct dma_map_ops xen_swiotlb_dma_ops = {
-- 
2.17.1