XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero
copy feature of xsk (XDP socket) needs to be supported by the driver. The
performance of zero copy is very good.
ENV: Qemu with vhost.
                   vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS
-----------------------------|---------------|------------------|------------
xmit by sockperf:     90%    |   100%        |                  |  318967
xmit by xsk:          100%   |   30%         |   33%            | 1192064
recv by sockperf:     100%   |   68%         |   100%           |  692288
recv by xsk:          100%   |   33%         |   43%            |  771670
Before achieving the function of Virtio-Net, we also have to let virtio core
support these features:
1. virtio core support premapped
2. virtio core support reset per-queue
3. introduce DMA APIs to virtio core
Please review.
Thanks.
v4:
 1. rename map_inter to dma_map_internal
 2. fix: Excess function parameter 'vq' description in
'virtqueue_dma_dev'
v3:
 1. add map_inter to struct desc state to reocrd whether virtio core do dma map
v2:
 1. based on sgs[0]->dma_address to judgment is premapped
 2. based on extra.addr to judgment to do unmap for no-indirect desc
 3. based on indir_desc to judgment to do unmap for indirect desc
 4. rename virtqueue_get_dma_dev to virtqueue_dma_dev
v1:
 1. expose dma device. NO introduce the api for dma and sync
 2. split some commit for review.
Xuan Zhuo (11):
  virtio_ring: split: separate dma codes
  virtio_ring: packed: separate dma codes
  virtio_ring: packed-indirect: separate dma codes
  virtio_ring: split: support premapped
  virtio_ring: packed: support premapped
  virtio_ring: packed-indirect: support premapped
  virtio_ring: update document for virtqueue_add_*
  virtio_ring: introduce virtqueue_dma_dev()
  virtio_ring: correct the expression of the description of
    virtqueue_resize()
  virtio_ring: separate the logic of reset/enable from virtqueue_resize
  virtio_ring: introduce virtqueue_reset()
 drivers/virtio/virtio.c      |   6 +
 drivers/virtio/virtio_ring.c | 342 +++++++++++++++++++++++++----------
 include/linux/virtio.h       |   4 +
 3 files changed, 255 insertions(+), 97 deletions(-)
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-22  02:56 UTC
[PATCH vhost v4 01/11] virtio_ring: split: separate dma codes
DMA-related logic is separated from the virtqueue_add_split() to
one new function. DMA address will be saved as sg->dma_address if
use_dma_api is true, then virtqueue_add_split() will use it directly.
Unmap operation will be simpler.
The purpose of this is to facilitate subsequent support to receive
dma address mapped by drivers.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 121 +++++++++++++++++++++++++++--------
 1 file changed, 93 insertions(+), 28 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 41144b5246a8..fe704ca6c813 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -379,6 +379,14 @@ static dma_addr_t vring_map_one_sg(const struct
vring_virtqueue *vq,
 			    direction);
 }
 
+static dma_addr_t vring_sg_address(struct scatterlist *sg)
+{
+	if (sg->dma_address)
+		return sg->dma_address;
+
+	return (dma_addr_t)sg_phys(sg);
+}
+
 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
 				   void *cpu_addr, size_t size,
 				   enum dma_data_direction direction)
@@ -520,6 +528,80 @@ static inline unsigned int virtqueue_add_desc_split(struct
virtqueue *vq,
 	return next;
 }
 
+static void virtqueue_unmap_sgs(struct vring_virtqueue *vq,
+				struct scatterlist *sgs[],
+				unsigned int total_sg,
+				unsigned int out_sgs,
+				unsigned int in_sgs)
+{
+	struct scatterlist *sg;
+	unsigned int n;
+
+	if (!vq->use_dma_api)
+		return;
+
+	for (n = 0; n < out_sgs; n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			if (!sg->dma_address)
+				return;
+
+			dma_unmap_page(vring_dma_dev(vq), sg->dma_address,
+				       sg->length, DMA_TO_DEVICE);
+		}
+	}
+
+	for (; n < (out_sgs + in_sgs); n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			if (!sg->dma_address)
+				return;
+
+			dma_unmap_page(vring_dma_dev(vq), sg->dma_address,
+				       sg->length, DMA_FROM_DEVICE);
+		}
+	}
+}
+
+static int virtqueue_map_sgs(struct vring_virtqueue *vq,
+			     struct scatterlist *sgs[],
+			     unsigned int total_sg,
+			     unsigned int out_sgs,
+			     unsigned int in_sgs)
+{
+	struct scatterlist *sg;
+	unsigned int n;
+
+	if (!vq->use_dma_api)
+		return 0;
+
+	for (n = 0; n < out_sgs; n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
+
+			if (vring_mapping_error(vq, addr))
+				goto err;
+
+			sg->dma_address = addr;
+		}
+	}
+
+	for (; n < (out_sgs + in_sgs); n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
+
+			if (vring_mapping_error(vq, addr))
+				goto err;
+
+			sg->dma_address = addr;
+		}
+	}
+
+	return 0;
+
+err:
+	virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+	return -ENOMEM;
+}
+
 static inline int virtqueue_add_split(struct virtqueue *_vq,
 				      struct scatterlist *sgs[],
 				      unsigned int total_sg,
@@ -532,9 +614,9 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct scatterlist *sg;
 	struct vring_desc *desc;
-	unsigned int i, n, avail, descs_used, prev, err_idx;
-	int head;
+	unsigned int i, n, avail, descs_used, prev;
 	bool indirect;
+	int head;
 
 	START_USE(vq);
 
@@ -586,32 +668,30 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
 		return -ENOSPC;
 	}
 
+	if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
+		return -ENOMEM;
+
 	for (n = 0; n < out_sgs; n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
-			if (vring_mapping_error(vq, addr))
-				goto unmap_release;
-
 			prev = i;
 			/* Note that we trust indirect descriptor
 			 * table since it use stream DMA mapping.
 			 */
-			i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
+			i = virtqueue_add_desc_split(_vq, desc, i,
+						     vring_sg_address(sg),
+						     sg->length,
 						     VRING_DESC_F_NEXT,
 						     indirect);
 		}
 	}
 	for (; n < (out_sgs + in_sgs); n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
-			if (vring_mapping_error(vq, addr))
-				goto unmap_release;
-
 			prev = i;
 			/* Note that we trust indirect descriptor
 			 * table since it use stream DMA mapping.
 			 */
-			i = virtqueue_add_desc_split(_vq, desc, i, addr,
+			i = virtqueue_add_desc_split(_vq, desc, i,
+						     vring_sg_address(sg),
 						     sg->length,
 						     VRING_DESC_F_NEXT |
 						     VRING_DESC_F_WRITE,
@@ -679,22 +759,7 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
 	return 0;
 
 unmap_release:
-	err_idx = i;
-
-	if (indirect)
-		i = 0;
-	else
-		i = head;
-
-	for (n = 0; n < total_sg; n++) {
-		if (i == err_idx)
-			break;
-		if (indirect) {
-			vring_unmap_one_split_indirect(vq, &desc[i]);
-			i = virtio16_to_cpu(_vq->vdev, desc[i].next);
-		} else
-			i = vring_unmap_one_split(vq, i);
-	}
+	virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
 
 	if (indirect)
 		kfree(desc);
-- 
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-22  02:56 UTC
[PATCH vhost v4 02/11] virtio_ring: packed: separate dma codes
DMA-related logic is separated from the virtqueue_add_packed(). DMA
address will be saved as sg->dma_address, then virtqueue_add_packed()
will use it directly. Unmap operation will be simpler.
The purpose of this is to facilitate subsequent support to receive
dma address mapped by drivers.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 37 +++++++-----------------------------
 1 file changed, 7 insertions(+), 30 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index fe704ca6c813..42e8c9d44161 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1430,9 +1430,9 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct vring_packed_desc *desc;
 	struct scatterlist *sg;
-	unsigned int i, n, c, descs_used, err_idx;
+	unsigned int i, n, c, descs_used;
 	__le16 head_flags, flags;
-	u16 head, id, prev, curr, avail_used_flags;
+	u16 head, id, prev, curr;
 	int err;
 
 	START_USE(vq);
@@ -1461,7 +1461,6 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
 	}
 
 	head = vq->packed.next_avail_idx;
-	avail_used_flags = vq->packed.avail_used_flags;
 
 	WARN_ON_ONCE(total_sg > vq->packed.vring.num &&
!vq->indirect);
 
@@ -1479,15 +1478,13 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
 	id = vq->free_head;
 	BUG_ON(id == vq->packed.vring.num);
 
+	if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
+		return -ENOMEM;
+
 	curr = id;
 	c = 0;
 	for (n = 0; n < out_sgs + in_sgs; n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
-					DMA_TO_DEVICE : DMA_FROM_DEVICE);
-			if (vring_mapping_error(vq, addr))
-				goto unmap_release;
-
 			flags = cpu_to_le16(vq->packed.avail_used_flags |
 				    (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
 				    (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
@@ -1496,12 +1493,12 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
 			else
 				desc[i].flags = flags;
 
-			desc[i].addr = cpu_to_le64(addr);
+			desc[i].addr = cpu_to_le64(vring_sg_address(sg));
 			desc[i].len = cpu_to_le32(sg->length);
 			desc[i].id = cpu_to_le16(id);
 
 			if (unlikely(vq->use_dma_api)) {
-				vq->packed.desc_extra[curr].addr = addr;
+				vq->packed.desc_extra[curr].addr = vring_sg_address(sg);
 				vq->packed.desc_extra[curr].len = sg->length;
 				vq->packed.desc_extra[curr].flags  					le16_to_cpu(flags);
@@ -1547,26 +1544,6 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
 	END_USE(vq);
 
 	return 0;
-
-unmap_release:
-	err_idx = i;
-	i = head;
-	curr = vq->free_head;
-
-	vq->packed.avail_used_flags = avail_used_flags;
-
-	for (n = 0; n < total_sg; n++) {
-		if (i == err_idx)
-			break;
-		vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
-		curr = vq->packed.desc_extra[curr].next;
-		i++;
-		if (i >= vq->packed.vring.num)
-			i = 0;
-	}
-
-	END_USE(vq);
-	return -EIO;
 }
 
 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
-- 
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-22  02:56 UTC
[PATCH vhost v4 03/11] virtio_ring: packed-indirect: separate dma codes
DMA-related logic is separated from the virtqueue_add_indirect_packed().
DMA address will be saved as sg->dma_address, then
virtqueue_add_indirect_packed() will use it directly. Unmap operation
will be simpler.
The purpose of this is to facilitate subsequent support to receive
dma address mapped by drivers.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 42e8c9d44161..c8ed4aef9462 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1314,7 +1314,7 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
 {
 	struct vring_packed_desc *desc;
 	struct scatterlist *sg;
-	unsigned int i, n, err_idx;
+	unsigned int i, n;
 	u16 head, id;
 	dma_addr_t addr;
 
@@ -1334,16 +1334,14 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
 	id = vq->free_head;
 	BUG_ON(id == vq->packed.vring.num);
 
+	if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
+		return -ENOMEM;
+
 	for (n = 0; n < out_sgs + in_sgs; n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			addr = vring_map_one_sg(vq, sg, n < out_sgs ?
-					DMA_TO_DEVICE : DMA_FROM_DEVICE);
-			if (vring_mapping_error(vq, addr))
-				goto unmap_release;
-
 			desc[i].flags = cpu_to_le16(n < out_sgs ?
 						0 : VRING_DESC_F_WRITE);
-			desc[i].addr = cpu_to_le64(addr);
+			desc[i].addr = cpu_to_le64(vring_sg_address(sg));
 			desc[i].len = cpu_to_le32(sg->length);
 			i++;
 		}
@@ -1407,10 +1405,7 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
 	return 0;
 
 unmap_release:
-	err_idx = i;
-
-	for (i = 0; i < err_idx; i++)
-		vring_unmap_desc_packed(vq, &desc[i]);
+	virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
 
 	kfree(desc);
 
-- 
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-22  02:56 UTC
[PATCH vhost v4 04/11] virtio_ring: split: support premapped
virtio core only supports virtual addresses, dma is completed in virtio
core.
In some scenarios (such as the AF_XDP), the memory is allocated
and DMA mapping is completed in advance, so it is necessary for us to
support passing the DMA address to virtio core.
Drives can use sg->dma_address to pass the mapped dma address to virtio
core. If one sg->dma_address is used then all sgs must use
sg->dma_address, otherwise all must be null when passing it to the APIs
of virtio.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index c8ed4aef9462..a2a77a0dafe6 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -70,6 +70,7 @@
 struct vring_desc_state_split {
 	void *data;			/* Data for callback. */
 	struct vring_desc *indir_desc;	/* Indirect descriptor, if any. */
+	bool dma_map_internal;		/* Do dma map internally. */
 };
 
 struct vring_desc_state_packed {
@@ -448,7 +449,7 @@ static void vring_unmap_one_split_indirect(const struct
vring_virtqueue *vq,
 }
 
 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
-					  unsigned int i)
+					  unsigned int i, bool dma_map_internal)
 {
 	struct vring_desc_extra *extra = vq->split.desc_extra;
 	u16 flags;
@@ -465,6 +466,9 @@ static unsigned int vring_unmap_one_split(const struct
vring_virtqueue *vq,
 				 (flags & VRING_DESC_F_WRITE) ?
 				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 	} else {
+		if (!dma_map_internal)
+			goto out;
+
 		dma_unmap_page(vring_dma_dev(vq),
 			       extra[i].addr,
 			       extra[i].len,
@@ -615,7 +619,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 	struct scatterlist *sg;
 	struct vring_desc *desc;
 	unsigned int i, n, avail, descs_used, prev;
-	bool indirect;
+	bool indirect, dma_map_internal;
 	int head;
 
 	START_USE(vq);
@@ -668,7 +672,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 		return -ENOSPC;
 	}
 
-	if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
+	dma_map_internal = !sgs[0]->dma_address;
+	if (dma_map_internal && virtqueue_map_sgs(vq, sgs, total_sg, out_sgs,
in_sgs))
 		return -ENOMEM;
 
 	for (n = 0; n < out_sgs; n++) {
@@ -734,6 +739,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 		vq->split.desc_state[head].indir_desc = desc;
 	else
 		vq->split.desc_state[head].indir_desc = ctx;
+	vq->split.desc_state[head].dma_map_internal = dma_map_internal;
 
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
@@ -759,7 +765,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 	return 0;
 
 unmap_release:
-	virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+	if (dma_map_internal)
+		virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
 
 	if (indirect)
 		kfree(desc);
@@ -804,20 +811,22 @@ static void detach_buf_split(struct vring_virtqueue *vq,
unsigned int head,
 {
 	unsigned int i, j;
 	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
+	bool dma_map_internal;
 
 	/* Clear data ptr. */
 	vq->split.desc_state[head].data = NULL;
+	dma_map_internal = vq->split.desc_state[head].dma_map_internal;
 
 	/* Put back on free list: unmap first-level descriptors and find end */
 	i = head;
 
 	while (vq->split.vring.desc[i].flags & nextflag) {
-		vring_unmap_one_split(vq, i);
+		vring_unmap_one_split(vq, i, dma_map_internal);
 		i = vq->split.desc_extra[i].next;
 		vq->vq.num_free++;
 	}
 
-	vring_unmap_one_split(vq, i);
+	vring_unmap_one_split(vq, i, dma_map_internal);
 	vq->split.desc_extra[i].next = vq->free_head;
 	vq->free_head = head;
 
@@ -839,8 +848,10 @@ static void detach_buf_split(struct vring_virtqueue *vq,
unsigned int head,
 				VRING_DESC_F_INDIRECT));
 		BUG_ON(len == 0 || len % sizeof(struct vring_desc));
 
-		for (j = 0; j < len / sizeof(struct vring_desc); j++)
-			vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+		if (dma_map_internal) {
+			for (j = 0; j < len / sizeof(struct vring_desc); j++)
+				vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+		}
 
 		kfree(indir_desc);
 		vq->split.desc_state[head].indir_desc = NULL;
-- 
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-22  02:56 UTC
[PATCH vhost v4 05/11] virtio_ring: packed: support premapped
virtio core only supports virtual addresses, dma is completed in virtio
core.
In some scenarios (such as the AF_XDP), the memory is allocated
and DMA mapping is completed in advance, so it is necessary for us to
support passing the DMA address to virtio core.
Drives can use sg->dma_address to pass the mapped dma address to virtio
core. If one sg->dma_address is used then all sgs must use
sg->dma_address, otherwise all must be null when passing it to the APIs
of virtio.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a2a77a0dafe6..cafb720cbdc1 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -78,6 +78,7 @@ struct vring_desc_state_packed {
 	struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
 	u16 num;			/* Descriptor list length. */
 	u16 last;			/* The last desc state in a list. */
+	bool dma_map_internal;		/* Do dma map internally. */
 };
 
 struct vring_desc_extra {
@@ -1259,7 +1260,8 @@ static inline u16 packed_last_used(u16 last_used_idx)
 }
 
 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
-				     struct vring_desc_extra *extra)
+				     struct vring_desc_extra *extra,
+				     bool dma_map_internal)
 {
 	u16 flags;
 
@@ -1274,6 +1276,9 @@ static void vring_unmap_extra_packed(const struct
vring_virtqueue *vq,
 				 (flags & VRING_DESC_F_WRITE) ?
 				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 	} else {
+		if (!dma_map_internal)
+			return;
+
 		dma_unmap_page(vring_dma_dev(vq),
 			       extra->addr, extra->len,
 			       (flags & VRING_DESC_F_WRITE) ?
@@ -1439,6 +1444,7 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
 	unsigned int i, n, c, descs_used;
 	__le16 head_flags, flags;
 	u16 head, id, prev, curr;
+	bool dma_map_internal;
 	int err;
 
 	START_USE(vq);
@@ -1484,7 +1490,8 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
 	id = vq->free_head;
 	BUG_ON(id == vq->packed.vring.num);
 
-	if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
+	dma_map_internal = !sgs[0]->dma_address;
+	if (dma_map_internal && virtqueue_map_sgs(vq, sgs, total_sg, out_sgs,
in_sgs))
 		return -ENOMEM;
 
 	curr = id;
@@ -1536,6 +1543,7 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
 	vq->packed.desc_state[id].data = data;
 	vq->packed.desc_state[id].indir_desc = ctx;
 	vq->packed.desc_state[id].last = prev;
+	vq->packed.desc_state[id].dma_map_internal = dma_map_internal;
 
 	/*
 	 * A driver MUST NOT make the first descriptor in the list
@@ -1621,7 +1629,8 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
 		curr = id;
 		for (i = 0; i < state->num; i++) {
 			vring_unmap_extra_packed(vq,
-						 &vq->packed.desc_extra[curr]);
+						 &vq->packed.desc_extra[curr],
+						 state->dma_map_internal);
 			curr = vq->packed.desc_extra[curr].next;
 		}
 	}
-- 
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-22  02:56 UTC
[PATCH vhost v4 06/11] virtio_ring: packed-indirect: support premapped
virtio core only supports virtual addresses, dma is completed in virtio
core.
In some scenarios (such as the AF_XDP), the memory is allocated
and DMA mapping is completed in advance, so it is necessary for us to
support passing the DMA address to virtio core.
Drives can use sg->dma_address to pass the mapped dma address to virtio
core. If one sg->dma_address is used then all sgs must use
sg->dma_address,
otherwise all dma_address must be null when passing it to the APIs of
virtio.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index cafb720cbdc1..623ee7442336 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1333,6 +1333,7 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
 	unsigned int i, n;
 	u16 head, id;
 	dma_addr_t addr;
+	bool dma_map_internal;
 
 	head = vq->packed.next_avail_idx;
 	desc = alloc_indirect_packed(total_sg, gfp);
@@ -1350,7 +1351,8 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
 	id = vq->free_head;
 	BUG_ON(id == vq->packed.vring.num);
 
-	if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
+	dma_map_internal = !sgs[0]->dma_address;
+	if (dma_map_internal && virtqueue_map_sgs(vq, sgs, total_sg, out_sgs,
in_sgs))
 		return -ENOMEM;
 
 	for (n = 0; n < out_sgs + in_sgs; n++) {
@@ -1412,6 +1414,8 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
 	vq->packed.desc_state[id].data = data;
 	vq->packed.desc_state[id].indir_desc = desc;
 	vq->packed.desc_state[id].last = id;
+	vq->packed.desc_state[id].dma_map_internal = dma_map_internal;
+
 
 	vq->num_added += 1;
 
@@ -1421,7 +1425,8 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
 	return 0;
 
 unmap_release:
-	virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+	if (dma_map_internal)
+		virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
 
 	kfree(desc);
 
@@ -1643,7 +1648,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
 		if (!desc)
 			return;
 
-		if (vq->use_dma_api) {
+		if (vq->use_dma_api && state->dma_map_internal) {
 			len = vq->packed.desc_extra[id].len;
 			for (i = 0; i < len / sizeof(struct vring_packed_desc);
 					i++)
-- 
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-22  02:56 UTC
[PATCH vhost v4 07/11] virtio_ring: update document for virtqueue_add_*
Update the document of virtqueue_add_* series API, allowing the callers to use sg->dma_address to pass the dma address to Virtio Core. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 623ee7442336..3a558ca350c6 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2192,6 +2192,10 @@ static inline int virtqueue_add(struct virtqueue *_vq, * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_sgs(struct virtqueue *_vq, @@ -2226,6 +2230,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_sgs); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_outbuf(struct virtqueue *vq, @@ -2248,6 +2256,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_inbuf(struct virtqueue *vq, @@ -2271,6 +2283,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-22  02:56 UTC
[PATCH vhost v4 08/11] virtio_ring: introduce virtqueue_dma_dev()
Added virtqueue_dma_dev() to get DMA device for virtio. Then the
caller can do dma operation in advance. The purpose is to keep memory
mapped across multiple add/get buf operations.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
Acked-by: Jason Wang <jasowang at redhat.com>
---
 drivers/virtio/virtio.c      |  6 ++++++
 drivers/virtio/virtio_ring.c | 17 +++++++++++++++++
 include/linux/virtio.h       |  2 ++
 3 files changed, 25 insertions(+)
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 3893dc29eb26..11c5035369e2 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
+#include <linux/dma-mapping.h>
 #include <linux/virtio.h>
 #include <linux/spinlock.h>
 #include <linux/virtio_config.h>
@@ -243,6 +244,11 @@ static int virtio_dev_probe(struct device *_d)
 	u64 driver_features;
 	u64 driver_features_legacy;
 
+	_d->dma_mask = &_d->coherent_dma_mask;
+	err = dma_set_mask_and_coherent(_d, DMA_BIT_MASK(64));
+	if (err)
+		return err;
+
 	/* We have a driver! */
 	virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
 
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 3a558ca350c6..6b2334951e5f 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2299,6 +2299,23 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
 
+/**
+ * virtqueue_dma_dev - get the dma dev
+ * @_vq: the struct virtqueue we're talking about.
+ *
+ * Returns the dma dev. That can been used for dma api.
+ */
+struct device *virtqueue_dma_dev(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	if (vq->use_dma_api)
+		return vring_dma_dev(vq);
+	else
+		return &vq->vq.vdev->dev;
+}
+EXPORT_SYMBOL_GPL(virtqueue_dma_dev);
+
 /**
  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
  * @_vq: the struct virtqueue
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 2b472514c49b..1fa50191cf0a 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -61,6 +61,8 @@ int virtqueue_add_sgs(struct virtqueue *vq,
 		      void *data,
 		      gfp_t gfp);
 
+struct device *virtqueue_dma_dev(struct virtqueue *vq);
+
 bool virtqueue_kick(struct virtqueue *vq);
 
 bool virtqueue_kick_prepare(struct virtqueue *vq);
-- 
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-22  02:56 UTC
[PATCH vhost v4 09/11] virtio_ring: correct the expression of the description of virtqueue_resize()
Modify the "useless" to a more accurate "unused". Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> Acked-by: Jason Wang <jasowang at redhat.com> --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 6b2334951e5f..a31cf71c1689 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2704,7 +2704,7 @@ EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); * virtqueue_resize - resize the vring of vq * @_vq: the struct virtqueue we're talking about. * @num: new ring num - * @recycle: callback for recycle the useless buffer + * @recycle: callback to recycle unused buffers * * When it is really necessary to create a new vring, it will set the current vq * into the reset state. Then call the passed callback to recycle the buffer -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-22  02:57 UTC
[PATCH vhost v4 10/11] virtio_ring: separate the logic of reset/enable from virtqueue_resize
The subsequent reset function will reuse these logic.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
Acked-by: Jason Wang <jasowang at redhat.com>
---
 drivers/virtio/virtio_ring.c | 58 ++++++++++++++++++++++++------------
 1 file changed, 39 insertions(+), 19 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a31cf71c1689..bd79d681022b 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2158,6 +2158,43 @@ static int virtqueue_resize_packed(struct virtqueue *_vq,
u32 num)
 	return -ENOMEM;
 }
 
+static int virtqueue_disable_and_recycle(struct virtqueue *_vq,
+					 void (*recycle)(struct virtqueue *vq, void *buf))
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct virtio_device *vdev = vq->vq.vdev;
+	void *buf;
+	int err;
+
+	if (!vq->we_own_ring)
+		return -EPERM;
+
+	if (!vdev->config->disable_vq_and_reset)
+		return -ENOENT;
+
+	if (!vdev->config->enable_vq_after_reset)
+		return -ENOENT;
+
+	err = vdev->config->disable_vq_and_reset(_vq);
+	if (err)
+		return err;
+
+	while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
+		recycle(_vq, buf);
+
+	return 0;
+}
+
+static int virtqueue_enable_after_reset(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct virtio_device *vdev = vq->vq.vdev;
+
+	if (vdev->config->enable_vq_after_reset(_vq))
+		return -EBUSY;
+
+	return 0;
+}
 
 /*
  * Generic functions and exported symbols.
@@ -2728,13 +2765,8 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
 		     void (*recycle)(struct virtqueue *vq, void *buf))
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
-	struct virtio_device *vdev = vq->vq.vdev;
-	void *buf;
 	int err;
 
-	if (!vq->we_own_ring)
-		return -EPERM;
-
 	if (num > vq->vq.num_max)
 		return -E2BIG;
 
@@ -2744,28 +2776,16 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
 	if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) ==
num)
 		return 0;
 
-	if (!vdev->config->disable_vq_and_reset)
-		return -ENOENT;
-
-	if (!vdev->config->enable_vq_after_reset)
-		return -ENOENT;
-
-	err = vdev->config->disable_vq_and_reset(_vq);
+	err = virtqueue_disable_and_recycle(_vq, recycle);
 	if (err)
 		return err;
 
-	while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
-		recycle(_vq, buf);
-
 	if (vq->packed_ring)
 		err = virtqueue_resize_packed(_vq, num);
 	else
 		err = virtqueue_resize_split(_vq, num);
 
-	if (vdev->config->enable_vq_after_reset(_vq))
-		return -EBUSY;
-
-	return err;
+	return virtqueue_enable_after_reset(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_resize);
 
-- 
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-22  02:57 UTC
[PATCH vhost v4 11/11] virtio_ring: introduce virtqueue_reset()
Introduce virtqueue_reset() to release all buffer inside vq.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
Acked-by: Jason Wang <jasowang at redhat.com>
---
 drivers/virtio/virtio_ring.c | 33 +++++++++++++++++++++++++++++++++
 include/linux/virtio.h       |  2 ++
 2 files changed, 35 insertions(+)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index bd79d681022b..38058de7f1b9 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2789,6 +2789,39 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
 }
 EXPORT_SYMBOL_GPL(virtqueue_resize);
 
+/**
+ * virtqueue_reset - detach and recycle all unused buffers
+ * @_vq: the struct virtqueue we're talking about.
+ * @recycle: callback to recycle unused buffers
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error.
+ * 0: success.
+ * -EBUSY: Failed to sync with device, vq may not work properly
+ * -ENOENT: Transport or device not supported
+ * -EPERM: Operation not permitted
+ */
+int virtqueue_reset(struct virtqueue *_vq,
+		    void (*recycle)(struct virtqueue *vq, void *buf))
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	int err;
+
+	err = virtqueue_disable_and_recycle(_vq, recycle);
+	if (err)
+		return err;
+
+	if (vq->packed_ring)
+		virtqueue_reinit_packed(vq);
+	else
+		virtqueue_reinit_split(vq);
+
+	return virtqueue_enable_after_reset(_vq);
+}
+EXPORT_SYMBOL_GPL(virtqueue_reset);
+
 /* Only available for split ring */
 struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      unsigned int num,
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 1fa50191cf0a..22bbd06ef8c8 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -97,6 +97,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
 
 int virtqueue_resize(struct virtqueue *vq, u32 num,
 		     void (*recycle)(struct virtqueue *vq, void *buf));
+int virtqueue_reset(struct virtqueue *vq,
+		    void (*recycle)(struct virtqueue *vq, void *buf));
 
 /**
  * struct virtio_device - representation of a device using virtio
-- 
2.32.0.3.g01195cf9f
Possibly Parallel Threads
- [PATCH vhost v1 06/12] virtio_ring: packed: separate DMA codes
- [PATCH vhost v3 00/11] virtio core prepares for AF_XDP
- [PATCH vhost v4 00/11] virtio core prepares for AF_XDP
- [PATCH vhost v5 00/11] virtio core prepares for AF_XDP
- [PATCH vhost v6 00/11] virtio core prepares for AF_XDP