XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero
copy feature of xsk (XDP socket) needs to be supported by the driver. The
performance of zero copy is very good.
ENV: Qemu with vhost.
                   vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS
-----------------------------|---------------|------------------|------------
xmit by sockperf:     90%    |   100%        |                  |  318967
xmit by xsk:          100%   |   30%         |   33%            | 1192064
recv by sockperf:     100%   |   68%         |   100%           |  692288
recv by xsk:          100%   |   33%         |   43%            |  771670
Before achieving the function of Virtio-Net, we also have to let virtio core
support these features:
1. virtio core support premapped
2. virtio core support reset per-queue
3. introduce DMA APIs to virtio core
Please review.
Thanks.
Xuan Zhuo (10):
  virtio_ring: split: refactor virtqueue_add_split() for premapped
  virtio_ring: packed: separate prepare code from
    virtuque_add_indirect_packed()
  virtio_ring: packed: refactor virtqueue_add_packed() for premapped
  virtio_ring: split: introduce virtqueue_add_split_premapped()
  virtio_ring: packed: introduce virtqueue_add_packed_premapped()
  virtio_ring: introduce virtqueue_add_inbuf_premapped()
  virtio_ring: add api virtio_dma_map() for advance dma
  virtio_ring: introduce dma sync api for virtio
  virtio_ring: correct the expression of the description of
    virtqueue_resize()
  virtio_ring: introduce virtqueue_reset()
 drivers/virtio/virtio_ring.c | 792 ++++++++++++++++++++++++++++-------
 include/linux/virtio.h       |  29 ++
 2 files changed, 659 insertions(+), 162 deletions(-)
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14  07:26 UTC
[PATCH vhost 01/10] virtio_ring: split: refactor virtqueue_add_split() for premapped
DMA-related logic is separated from the virtqueue_add_split to prepare
for subsequent support for premapped.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 219 ++++++++++++++++++++++++-----------
 1 file changed, 152 insertions(+), 67 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 41144b5246a8..560ee30d942c 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -520,29 +520,83 @@ static inline unsigned int virtqueue_add_desc_split(struct
virtqueue *vq,
 	return next;
 }
 
-static inline int virtqueue_add_split(struct virtqueue *_vq,
-				      struct scatterlist *sgs[],
-				      unsigned int total_sg,
-				      unsigned int out_sgs,
-				      unsigned int in_sgs,
-				      void *data,
-				      void *ctx,
-				      gfp_t gfp)
+static int virtqueue_map_sgs(struct vring_virtqueue *vq,
+			     struct scatterlist *sgs[],
+			     unsigned int total_sg,
+			     unsigned int out_sgs,
+			     unsigned int in_sgs)
 {
-	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct scatterlist *sg;
-	struct vring_desc *desc;
-	unsigned int i, n, avail, descs_used, prev, err_idx;
-	int head;
-	bool indirect;
+	unsigned int n;
 
-	START_USE(vq);
+	for (n = 0; n < out_sgs; n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
+
+			if (vring_mapping_error(vq, addr))
+				return -ENOMEM;
+
+			sg->dma_address = addr;
+		}
+	}
+	for (; n < (out_sgs + in_sgs); n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
+
+			if (vring_mapping_error(vq, addr))
+				return -ENOMEM;
+
+			sg->dma_address = addr;
+		}
+	}
+
+	return 0;
+}
+
+static void virtqueue_unmap_sgs(struct vring_virtqueue *vq,
+				struct scatterlist *sgs[],
+				unsigned int total_sg,
+				unsigned int out_sgs,
+				unsigned int in_sgs)
+{
+	struct scatterlist *sg;
+	unsigned int n;
+
+	for (n = 0; n < out_sgs; n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			if (!sg->dma_address)
+				return;
+
+			dma_unmap_single(vring_dma_dev(vq), sg->dma_address,
+					 sg->length, DMA_TO_DEVICE);
+		}
+	}
+	for (; n < (out_sgs + in_sgs); n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			if (!sg->dma_address)
+				return;
+
+			dma_unmap_single(vring_dma_dev(vq), sg->dma_address,
+					 sg->length, DMA_FROM_DEVICE);
+		}
+	}
+}
+
+static inline int virtqueue_add_split_prepare(struct vring_virtqueue *vq,
+					      unsigned int total_sg,
+					      unsigned int out_sgs,
+					      void *data,
+					      void *ctx,
+					      gfp_t gfp,
+					      struct vring_desc **pdesc)
+{
+	struct vring_desc *desc;
+	unsigned int descs_used;
 
 	BUG_ON(data == NULL);
 	BUG_ON(ctx && vq->indirect);
 
 	if (unlikely(vq->broken)) {
-		END_USE(vq);
 		return -EIO;
 	}
 
@@ -550,27 +604,17 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
 
 	BUG_ON(total_sg == 0);
 
-	head = vq->free_head;
-
 	if (virtqueue_use_indirect(vq, total_sg))
-		desc = alloc_indirect_split(_vq, total_sg, gfp);
+		desc = alloc_indirect_split(&vq->vq, total_sg, gfp);
 	else {
 		desc = NULL;
 		WARN_ON_ONCE(total_sg > vq->split.vring.num &&
!vq->indirect);
 	}
 
-	if (desc) {
-		/* Use a single buffer which doesn't continue */
-		indirect = true;
-		/* Set up rest to use this indirect table. */
-		i = 0;
+	if (desc)
 		descs_used = 1;
-	} else {
-		indirect = false;
-		desc = vq->split.vring.desc;
-		i = head;
+	else
 		descs_used = total_sg;
-	}
 
 	if (unlikely(vq->vq.num_free < descs_used)) {
 		pr_debug("Can't add buf len %i - avail = %i\n",
@@ -580,38 +624,64 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
 		 * host should service the ring ASAP. */
 		if (out_sgs)
 			vq->notify(&vq->vq);
-		if (indirect)
-			kfree(desc);
-		END_USE(vq);
+		kfree(desc);
 		return -ENOSPC;
 	}
 
+	*pdesc = desc;
+
+	return 0;
+}
+
+static inline int virtqueue_add_split_vring(struct vring_virtqueue *vq,
+					    struct scatterlist *sgs[],
+					    unsigned int total_sg,
+					    unsigned int out_sgs,
+					    unsigned int in_sgs,
+					    struct vring_desc *desc)
+{
+	unsigned int n, i, avail, descs_used, prev;
+	struct virtqueue *_vq = &vq->vq;
+	struct scatterlist *sg;
+	bool indirect;
+	int head;
+
+	head = vq->free_head;
+
+	if (desc) {
+		/* Use a single buffer which doesn't continue */
+		indirect = true;
+		/* Set up rest to use this indirect table. */
+		i = 0;
+		descs_used = 1;
+	} else {
+		indirect = false;
+		desc = vq->split.vring.desc;
+		i = head;
+		descs_used = total_sg;
+	}
+
 	for (n = 0; n < out_sgs; n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
-			if (vring_mapping_error(vq, addr))
-				goto unmap_release;
-
 			prev = i;
 			/* Note that we trust indirect descriptor
 			 * table since it use stream DMA mapping.
 			 */
-			i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
+			i = virtqueue_add_desc_split(_vq, desc, i,
+						     sg->dma_address,
+						     sg->length,
 						     VRING_DESC_F_NEXT,
 						     indirect);
 		}
 	}
 	for (; n < (out_sgs + in_sgs); n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
-			if (vring_mapping_error(vq, addr))
-				goto unmap_release;
-
 			prev = i;
 			/* Note that we trust indirect descriptor
 			 * table since it use stream DMA mapping.
 			 */
-			i = virtqueue_add_desc_split(_vq, desc, i, addr,
+			i = virtqueue_add_desc_split(_vq, desc, i,
+						     sg->dma_address,
 						     sg->length,
 						     VRING_DESC_F_NEXT |
 						     VRING_DESC_F_WRITE,
@@ -630,7 +700,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 			vq, desc, total_sg * sizeof(struct vring_desc),
 			DMA_TO_DEVICE);
 		if (vring_mapping_error(vq, addr))
-			goto unmap_release;
+			return -ENOMEM;
 
 		virtqueue_add_desc_split(_vq, vq->split.vring.desc,
 					 head, addr,
@@ -648,13 +718,6 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
 	else
 		vq->free_head = i;
 
-	/* Store token and indirect buffer state. */
-	vq->split.desc_state[head].data = data;
-	if (indirect)
-		vq->split.desc_state[head].indir_desc = desc;
-	else
-		vq->split.desc_state[head].indir_desc = ctx;
-
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
 	avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
@@ -677,30 +740,52 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
 		virtqueue_kick(_vq);
 
 	return 0;
+}
 
-unmap_release:
-	err_idx = i;
+static inline int virtqueue_add_split(struct virtqueue *_vq,
+				      struct scatterlist *sgs[],
+				      unsigned int total_sg,
+				      unsigned int out_sgs,
+				      unsigned int in_sgs,
+				      void *data,
+				      void *ctx,
+				      gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_desc *desc;
+	int head;
+	int err;
 
-	if (indirect)
-		i = 0;
-	else
-		i = head;
+	START_USE(vq);
 
-	for (n = 0; n < total_sg; n++) {
-		if (i == err_idx)
-			break;
-		if (indirect) {
-			vring_unmap_one_split_indirect(vq, &desc[i]);
-			i = virtio16_to_cpu(_vq->vdev, desc[i].next);
-		} else
-			i = vring_unmap_one_split(vq, i);
-	}
+	/* check vq state and try to alloc desc for indirect. */
+	err = virtqueue_add_split_prepare(vq, total_sg, out_sgs, data, ctx, gfp,
&desc);
+	if (err)
+		goto end;
 
-	if (indirect)
-		kfree(desc);
+	err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+	if (err)
+		goto err;
 
+	head = vq->free_head;
+	err = virtqueue_add_split_vring(vq, sgs, total_sg, out_sgs, in_sgs, desc);
+	if (err)
+		goto err;
+
+	/* Store token and indirect buffer state. */
+	vq->split.desc_state[head].data = data;
+	vq->split.desc_state[head].indir_desc = desc ? desc : ctx;
+
+	goto end;
+
+err:
+	virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+
+	kfree(desc);
+
+end:
 	END_USE(vq);
-	return -ENOMEM;
+	return err;
 }
 
 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
-- 
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14  07:26 UTC
[PATCH vhost 02/10] virtio_ring: packed: separate prepare code from virtuque_add_indirect_packed()
Separating the logic of allocating indirect desc and checking queue
status to the upper layer function.
The proposal of this is convenient to refactor virtqueue_add_packed()
for premapped.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 560ee30d942c..42b1ff87518e 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1330,25 +1330,14 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
 					 unsigned int out_sgs,
 					 unsigned int in_sgs,
 					 void *data,
-					 gfp_t gfp)
+					 struct vring_packed_desc *desc)
 {
-	struct vring_packed_desc *desc;
 	struct scatterlist *sg;
 	unsigned int i, n, err_idx;
 	u16 head, id;
 	dma_addr_t addr;
 
 	head = vq->packed.next_avail_idx;
-	desc = alloc_indirect_packed(total_sg, gfp);
-	if (!desc)
-		return -ENOMEM;
-
-	if (unlikely(vq->vq.num_free < 1)) {
-		pr_debug("Can't add buf len 1 - avail = 0\n");
-		kfree(desc);
-		END_USE(vq);
-		return -ENOSPC;
-	}
 
 	i = 0;
 	id = vq->free_head;
@@ -1470,11 +1459,17 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
 	BUG_ON(total_sg == 0);
 
 	if (virtqueue_use_indirect(vq, total_sg)) {
-		err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
-						    in_sgs, data, gfp);
-		if (err != -ENOMEM) {
-			END_USE(vq);
-			return err;
+		desc = alloc_indirect_packed(total_sg, gfp);
+		if (desc) {
+			if (unlikely(vq->vq.num_free < 1)) {
+				pr_debug("Can't add buf len 1 - avail = 0\n");
+				kfree(desc);
+				END_USE(vq);
+				return -ENOSPC;
+			}
+
+			return virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
+							     in_sgs, data, desc);
 		}
 
 		/* fall back on direct */
-- 
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14  07:26 UTC
[PATCH vhost 03/10] virtio_ring: packed: refactor virtqueue_add_packed() for premapped
DMA-related logic is separated from virtqueue_add_packed to prepare for
the subsequent support for premapped.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 150 ++++++++++++++++++-----------------
 1 file changed, 78 insertions(+), 72 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 42b1ff87518e..47b6f9152f9f 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1329,7 +1329,6 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
 					 unsigned int total_sg,
 					 unsigned int out_sgs,
 					 unsigned int in_sgs,
-					 void *data,
 					 struct vring_packed_desc *desc)
 {
 	struct scatterlist *sg;
@@ -1345,14 +1344,9 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
 
 	for (n = 0; n < out_sgs + in_sgs; n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			addr = vring_map_one_sg(vq, sg, n < out_sgs ?
-					DMA_TO_DEVICE : DMA_FROM_DEVICE);
-			if (vring_mapping_error(vq, addr))
-				goto unmap_release;
-
 			desc[i].flags = cpu_to_le16(n < out_sgs ?
 						0 : VRING_DESC_F_WRITE);
-			desc[i].addr = cpu_to_le64(addr);
+			desc[i].addr = cpu_to_le64(sg->dma_address);
 			desc[i].len = cpu_to_le32(sg->length);
 			i++;
 		}
@@ -1363,7 +1357,7 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
 			total_sg * sizeof(struct vring_packed_desc),
 			DMA_TO_DEVICE);
 	if (vring_mapping_error(vq, addr))
-		goto unmap_release;
+		return -ENOMEM;
 
 	vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
 	vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
@@ -1404,53 +1398,30 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
 
 	/* Store token and indirect buffer state. */
 	vq->packed.desc_state[id].num = 1;
-	vq->packed.desc_state[id].data = data;
 	vq->packed.desc_state[id].indir_desc = desc;
 	vq->packed.desc_state[id].last = id;
 
 	vq->num_added += 1;
 
 	pr_debug("Added buffer head %i to %p\n", head, vq);
-	END_USE(vq);
 
 	return 0;
-
-unmap_release:
-	err_idx = i;
-
-	for (i = 0; i < err_idx; i++)
-		vring_unmap_desc_packed(vq, &desc[i]);
-
-	kfree(desc);
-
-	END_USE(vq);
-	return -ENOMEM;
 }
 
-static inline int virtqueue_add_packed(struct virtqueue *_vq,
-				       struct scatterlist *sgs[],
-				       unsigned int total_sg,
-				       unsigned int out_sgs,
-				       unsigned int in_sgs,
-				       void *data,
-				       void *ctx,
-				       gfp_t gfp)
+static inline int virtqueue_add_packed_prepare(struct vring_virtqueue *vq,
+					       unsigned int total_sg,
+					       void *data,
+					       void *ctx,
+					       struct vring_packed_desc **pdesc,
+					       gfp_t gfp)
 {
-	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct vring_packed_desc *desc;
-	struct scatterlist *sg;
-	unsigned int i, n, c, descs_used, err_idx;
-	__le16 head_flags, flags;
-	u16 head, id, prev, curr, avail_used_flags;
-	int err;
-
-	START_USE(vq);
+	unsigned int descs_used;
 
 	BUG_ON(data == NULL);
 	BUG_ON(ctx && vq->indirect);
 
 	if (unlikely(vq->broken)) {
-		END_USE(vq);
 		return -EIO;
 	}
 
@@ -1458,39 +1429,55 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
 
 	BUG_ON(total_sg == 0);
 
+	desc = NULL;
+
 	if (virtqueue_use_indirect(vq, total_sg)) {
 		desc = alloc_indirect_packed(total_sg, gfp);
 		if (desc) {
 			if (unlikely(vq->vq.num_free < 1)) {
 				pr_debug("Can't add buf len 1 - avail = 0\n");
 				kfree(desc);
-				END_USE(vq);
 				return -ENOSPC;
 			}
 
-			return virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
-							     in_sgs, data, desc);
+			return 0;
 		}
 
 		/* fall back on direct */
 	}
 
-	head = vq->packed.next_avail_idx;
-	avail_used_flags = vq->packed.avail_used_flags;
-
 	WARN_ON_ONCE(total_sg > vq->packed.vring.num &&
!vq->indirect);
 
-	desc = vq->packed.vring.desc;
-	i = head;
 	descs_used = total_sg;
 
 	if (unlikely(vq->vq.num_free < descs_used)) {
 		pr_debug("Can't add buf len %i - avail = %i\n",
 			 descs_used, vq->vq.num_free);
-		END_USE(vq);
 		return -ENOSPC;
 	}
 
+	*pdesc = desc;
+
+	return 0;
+}
+
+static void virtqueue_add_packed_vring(struct vring_virtqueue *vq,
+				       struct scatterlist *sgs[],
+				       unsigned int total_sg,
+				       unsigned int out_sgs,
+				       unsigned int in_sgs)
+{
+	struct vring_packed_desc *desc;
+	struct scatterlist *sg;
+	unsigned int i, n, c, descs_used;
+	__le16 head_flags, flags;
+	u16 head, id, prev, curr;
+
+	desc = vq->packed.vring.desc;
+	head = vq->packed.next_avail_idx;
+	i = head;
+	descs_used = total_sg;
+
 	id = vq->free_head;
 	BUG_ON(id == vq->packed.vring.num);
 
@@ -1498,11 +1485,6 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
 	c = 0;
 	for (n = 0; n < out_sgs + in_sgs; n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
-					DMA_TO_DEVICE : DMA_FROM_DEVICE);
-			if (vring_mapping_error(vq, addr))
-				goto unmap_release;
-
 			flags = cpu_to_le16(vq->packed.avail_used_flags |
 				    (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
 				    (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
@@ -1511,12 +1493,12 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
 			else
 				desc[i].flags = flags;
 
-			desc[i].addr = cpu_to_le64(addr);
+			desc[i].addr = cpu_to_le64(sg->dma_address);
 			desc[i].len = cpu_to_le32(sg->length);
 			desc[i].id = cpu_to_le16(id);
 
 			if (unlikely(vq->use_dma_api)) {
-				vq->packed.desc_extra[curr].addr = addr;
+				vq->packed.desc_extra[curr].addr = sg->dma_address;
 				vq->packed.desc_extra[curr].len = sg->length;
 				vq->packed.desc_extra[curr].flags  					le16_to_cpu(flags);
@@ -1545,8 +1527,6 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
 
 	/* Store token. */
 	vq->packed.desc_state[id].num = descs_used;
-	vq->packed.desc_state[id].data = data;
-	vq->packed.desc_state[id].indir_desc = ctx;
 	vq->packed.desc_state[id].last = prev;
 
 	/*
@@ -1559,29 +1539,55 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
 	vq->num_added += descs_used;
 
 	pr_debug("Added buffer head %i to %p\n", head, vq);
-	END_USE(vq);
+}
 
-	return 0;
+static inline int virtqueue_add_packed(struct virtqueue *_vq,
+				       struct scatterlist *sgs[],
+				       unsigned int total_sg,
+				       unsigned int out_sgs,
+				       unsigned int in_sgs,
+				       void *data,
+				       void *ctx,
+				       gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_packed_desc *desc;
+	u16 id;
+	int err;
 
-unmap_release:
-	err_idx = i;
-	i = head;
-	curr = vq->free_head;
+	START_USE(vq);
 
-	vq->packed.avail_used_flags = avail_used_flags;
+	/* check vq state and try to alloc desc for indirect. */
+	err = virtqueue_add_packed_prepare(vq, total_sg, data, ctx, &desc, gfp);
+	if (err)
+		goto end;
 
-	for (n = 0; n < total_sg; n++) {
-		if (i == err_idx)
-			break;
-		vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
-		curr = vq->packed.desc_extra[curr].next;
-		i++;
-		if (i >= vq->packed.vring.num)
-			i = 0;
+	err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+	if (err)
+		goto err;
+
+	id = vq->free_head;
+
+	if (desc) {
+		err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, in_sgs,
desc);
+		if (err)
+			goto err;
+	} else {
+		virtqueue_add_packed_vring(vq, sgs, total_sg, out_sgs, in_sgs);
+		vq->packed.desc_state[id].indir_desc = ctx;
 	}
 
+	vq->packed.desc_state[id].data = data;
+
+	goto end;
+
+err:
+	virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+	kfree(desc);
+
+end:
 	END_USE(vq);
-	return -EIO;
+	return err;
 }
 
 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
-- 
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14  07:26 UTC
[PATCH vhost 04/10] virtio_ring: split: introduce virtqueue_add_split_premapped()
virtqueue_add_split() only supports virtual addresses, dma is completed
in virtqueue_add_split().
In some scenarios (such as the AF_XDP scenario), the memory is allocated
and DMA is completed in advance, so it is necessary for us to support
passing the DMA address to virtio core.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 100 +++++++++++++++++++++++++++++++++--
 include/linux/virtio.h       |   5 ++
 2 files changed, 100 insertions(+), 5 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 47b6f9152f9f..a31155abe101 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -70,6 +70,7 @@
 struct vring_desc_state_split {
 	void *data;			/* Data for callback. */
 	struct vring_desc *indir_desc;	/* Indirect descriptor, if any. */
+	bool premapped;
 };
 
 struct vring_desc_state_packed {
@@ -440,7 +441,7 @@ static void vring_unmap_one_split_indirect(const struct
vring_virtqueue *vq,
 }
 
 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
-					  unsigned int i)
+					  unsigned int i, bool premapped)
 {
 	struct vring_desc_extra *extra = vq->split.desc_extra;
 	u16 flags;
@@ -457,6 +458,9 @@ static unsigned int vring_unmap_one_split(const struct
vring_virtqueue *vq,
 				 (flags & VRING_DESC_F_WRITE) ?
 				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 	} else {
+		if (premapped)
+			goto out;
+
 		dma_unmap_page(vring_dma_dev(vq),
 			       extra[i].addr,
 			       extra[i].len,
@@ -788,6 +792,47 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
 	return err;
 }
 
+static inline int virtqueue_add_split_premapped(struct virtqueue *_vq,
+						struct scatterlist *sgs[],
+						unsigned int total_sg,
+						unsigned int out_sgs,
+						unsigned int in_sgs,
+						void *data,
+						void *ctx,
+						gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_desc *desc;
+	int head;
+	int err;
+
+	START_USE(vq);
+
+	/* check vq state and try to alloc desc for indirect. */
+	err = virtqueue_add_split_prepare(vq, total_sg, out_sgs, data, ctx, gfp,
&desc);
+	if (err)
+		goto end;
+
+	head = vq->free_head;
+	err = virtqueue_add_split_vring(vq, sgs, total_sg, out_sgs, in_sgs, desc);
+	if (err)
+		goto err;
+
+	/* Store token and indirect buffer state. */
+	vq->split.desc_state[head].data = data;
+	vq->split.desc_state[head].indir_desc = desc ? desc : ctx;
+	vq->split.desc_state[head].premapped = true;
+
+	goto end;
+
+err:
+	kfree(desc);
+
+end:
+	END_USE(vq);
+	return err;
+}
+
 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -824,20 +869,23 @@ static void detach_buf_split(struct vring_virtqueue *vq,
unsigned int head,
 {
 	unsigned int i, j;
 	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
+	bool premapped;
 
 	/* Clear data ptr. */
 	vq->split.desc_state[head].data = NULL;
 
+	premapped = vq->split.desc_state[head].premapped;
+
 	/* Put back on free list: unmap first-level descriptors and find end */
 	i = head;
 
 	while (vq->split.vring.desc[i].flags & nextflag) {
-		vring_unmap_one_split(vq, i);
+		vring_unmap_one_split(vq, i, premapped);
 		i = vq->split.desc_extra[i].next;
 		vq->vq.num_free++;
 	}
 
-	vring_unmap_one_split(vq, i);
+	vring_unmap_one_split(vq, i, premapped);
 	vq->split.desc_extra[i].next = vq->free_head;
 	vq->free_head = head;
 
@@ -859,8 +907,10 @@ static void detach_buf_split(struct vring_virtqueue *vq,
unsigned int head,
 				VRING_DESC_F_INDIRECT));
 		BUG_ON(len == 0 || len % sizeof(struct vring_desc));
 
-		for (j = 0; j < len / sizeof(struct vring_desc); j++)
-			vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+		if (!premapped) {
+			for (j = 0; j < len / sizeof(struct vring_desc); j++)
+				vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+		}
 
 		kfree(indir_desc);
 		vq->split.desc_state[head].indir_desc = NULL;
@@ -2204,6 +2254,21 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 					out_sgs, in_sgs, data, ctx, gfp);
 }
 
+static inline int virtqueue_add_premapped(struct virtqueue *_vq,
+					  struct scatterlist *sgs[],
+					  unsigned int total_sg,
+					  unsigned int out_sgs,
+					  unsigned int in_sgs,
+					  void *data,
+					  void *ctx,
+					  gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return virtqueue_add_split_premapped(_vq, sgs, total_sg, out_sgs,
+					     in_sgs, data, ctx, gfp);
+}
+
 /**
  * virtqueue_add_sgs - expose buffers to other end
  * @_vq: the struct virtqueue we're talking about.
@@ -2261,6 +2326,31 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
 
+/**
+ * virtqueue_add_outbuf_premapped - expose output buffers to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg readable by other side
+ * @data: the token identifying the buffer.
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * It is required that all addrs have completed DMA operations. And use
+ * sg->dma_address, sg->length to pass addr and length.
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
+				   struct scatterlist *sg, unsigned int num,
+				   void *data,
+				   gfp_t gfp)
+{
+	return virtqueue_add_premapped(vq, &sg, num, 1, 0, data, NULL, gfp);
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped);
+
 /**
  * virtqueue_add_inbuf - expose input buffers to other end
  * @vq: the struct virtqueue we're talking about.
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index dcab9c7e8784..d8b472a7dcae 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -43,6 +43,11 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
 			 void *data,
 			 gfp_t gfp);
 
+int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
+				   struct scatterlist *sg, unsigned int num,
+				   void *data,
+				   gfp_t gfp);
+
 int virtqueue_add_inbuf(struct virtqueue *vq,
 			struct scatterlist sg[], unsigned int num,
 			void *data,
-- 
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14  07:26 UTC
[PATCH vhost 05/10] virtio_ring: packed: introduce virtqueue_add_packed_premapped()
virtqueue_add_packed() only supports virtual addresses, dma is completed
in virtqueue_add_packed().
In some scenarios (such as the AF_XDP scenario), the memory is allocated
and DMA is completed in advance, so it is necessary for us to support
passing the DMA address to virtio core.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 67 ++++++++++++++++++++++++++++++++----
 1 file changed, 61 insertions(+), 6 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a31155abe101..79244ccbae9e 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -78,6 +78,7 @@ struct vring_desc_state_packed {
 	struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
 	u16 num;			/* Descriptor list length. */
 	u16 last;			/* The last desc state in a list. */
+	bool premapped;
 };
 
 struct vring_desc_extra {
@@ -1318,7 +1319,8 @@ static inline u16 packed_last_used(u16 last_used_idx)
 }
 
 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
-				     struct vring_desc_extra *extra)
+				     struct vring_desc_extra *extra,
+				     bool premapped)
 {
 	u16 flags;
 
@@ -1333,6 +1335,9 @@ static void vring_unmap_extra_packed(const struct
vring_virtqueue *vq,
 				 (flags & VRING_DESC_F_WRITE) ?
 				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
 	} else {
+		if (premapped)
+			return;
+
 		dma_unmap_page(vring_dma_dev(vq),
 			       extra->addr, extra->len,
 			       (flags & VRING_DESC_F_WRITE) ?
@@ -1382,7 +1387,7 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
 					 struct vring_packed_desc *desc)
 {
 	struct scatterlist *sg;
-	unsigned int i, n, err_idx;
+	unsigned int i, n;
 	u16 head, id;
 	dma_addr_t addr;
 
@@ -1640,6 +1645,51 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
 	return err;
 }
 
+static inline int virtqueue_add_packed_premapped(struct virtqueue *_vq,
+						 struct scatterlist *sgs[],
+						 unsigned int total_sg,
+						 unsigned int out_sgs,
+						 unsigned int in_sgs,
+						 void *data,
+						 void *ctx,
+						 gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_packed_desc *desc;
+	u16 id;
+	int err;
+
+	START_USE(vq);
+
+	/* check vq state and try to alloc desc for indirect. */
+	err = virtqueue_add_packed_prepare(vq, total_sg, data, ctx, &desc, gfp);
+	if (err)
+		goto end;
+
+	id = vq->free_head;
+
+	if (desc) {
+		err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, in_sgs,
desc);
+		if (err)
+			goto err;
+	} else {
+		virtqueue_add_packed_vring(vq, sgs, total_sg, out_sgs, in_sgs);
+		vq->packed.desc_state[id].indir_desc = ctx;
+	}
+
+	vq->packed.desc_state[id].data = data;
+	vq->packed.desc_state[id].premapped = true;
+
+	goto end;
+
+err:
+	kfree(desc);
+
+end:
+	END_USE(vq);
+	return err;
+}
+
 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -1695,8 +1745,10 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
 	struct vring_desc_state_packed *state = NULL;
 	struct vring_packed_desc *desc;
 	unsigned int i, curr;
+	bool premapped;
 
 	state = &vq->packed.desc_state[id];
+	premapped = state->premapped;
 
 	/* Clear data ptr. */
 	state->data = NULL;
@@ -1709,7 +1761,8 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
 		curr = id;
 		for (i = 0; i < state->num; i++) {
 			vring_unmap_extra_packed(vq,
-						 &vq->packed.desc_extra[curr]);
+						 &vq->packed.desc_extra[curr],
+						 premapped);
 			curr = vq->packed.desc_extra[curr].next;
 		}
 	}
@@ -1722,7 +1775,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
 		if (!desc)
 			return;
 
-		if (vq->use_dma_api) {
+		if (vq->use_dma_api && !premapped) {
 			len = vq->packed.desc_extra[id].len;
 			for (i = 0; i < len / sizeof(struct vring_packed_desc);
 					i++)
@@ -2265,8 +2318,10 @@ static inline int virtqueue_add_premapped(struct
virtqueue *_vq,
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	return virtqueue_add_split_premapped(_vq, sgs, total_sg, out_sgs,
-					     in_sgs, data, ctx, gfp);
+	return vq->packed_ring ? virtqueue_add_packed_premapped(_vq, sgs, total_sg,
out_sgs,
+								in_sgs, data, ctx, gfp) :
+				virtqueue_add_split_premapped(_vq, sgs, total_sg, out_sgs,
+							      in_sgs, data, ctx, gfp);
 }
 
 /**
-- 
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14  07:27 UTC
[PATCH vhost 06/10] virtio_ring: introduce virtqueue_add_inbuf_premapped()
Introduce virtqueue_add_inbuf_premapped() to submit premapped sgs.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 25 +++++++++++++++++++++++++
 include/linux/virtio.h       |  5 +++++
 2 files changed, 30 insertions(+)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 79244ccbae9e..cd9364eb2345 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2452,6 +2452,31 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
 
+/**
+ * virtqueue_add_inbuf_premapped - expose input buffers to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg writable by other side
+ * @data: the token identifying the buffer.
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * It is required that all addrs have completed DMA operations. And use
+ * sg->dma_address, sg->length to pass addr and length.
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_inbuf_premapped(struct virtqueue *vq,
+				  struct scatterlist *sg, unsigned int num,
+				  void *data,
+				  gfp_t gfp)
+{
+	return virtqueue_add_premapped(vq, &sg, num, 0, 1, data, NULL, gfp);
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_premapped);
+
 /**
  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
  * @_vq: the struct virtqueue
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index d8b472a7dcae..3ebb346ebb7c 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -59,6 +59,11 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
 			    void *ctx,
 			    gfp_t gfp);
 
+int virtqueue_add_inbuf_premapped(struct virtqueue *vq,
+				  struct scatterlist *sg, unsigned int num,
+				  void *data,
+				  gfp_t gfp);
+
 int virtqueue_add_sgs(struct virtqueue *vq,
 		      struct scatterlist *sgs[],
 		      unsigned int out_sgs,
-- 
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14  07:27 UTC
[PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
Added virtio_dma_map() to map DMA addresses for virtual memory in
advance. The purpose is to keep memory mapped across multiple add/get
buf operations.
Added virtio_dma_unmap() for unmap DMA address.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
 include/linux/virtio.h       |  9 ++++
 2 files changed, 101 insertions(+)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index cd9364eb2345..855338609c7f 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue
*vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
 
+/**
+ * virtio_dma_map_page - get the DMA addr of the memory for virtio device
+ * @dev: virtio device
+ * @page: the page of the memory to DMA
+ * @offset: the offset of the memory inside page
+ * @length: memory length
+ * @dir: DMA direction
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ *
+ * Returns the DMA addr. DMA_MAPPING_ERROR means error.
+ */
+dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t
offset,
+			       unsigned int length, enum dma_data_direction dir)
+{
+	struct virtio_device *vdev = dev_to_virtio(dev);
+
+	if (!vring_use_dma_api(vdev))
+		return page_to_phys(page) + offset;
+
+	return dma_map_page(vdev->dev.parent, page, offset, length, dir);
+}
+
+/**
+ * virtio_dma_map - get the DMA addr of the memory for virtio device
+ * @dev: virtio device
+ * @addr: the addr to DMA
+ * @length: memory length
+ * @dir: DMA direction
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ *
+ * Returns the DMA addr.
+ */
+dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
+			  enum dma_data_direction dir)
+{
+	struct page *page;
+	size_t offset;
+
+	page = virt_to_page(addr);
+	offset = offset_in_page(addr);
+
+	return virtio_dma_map_page(dev, page, offset, length, dir);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_map);
+
+/**
+ * virtio_dma_mapping_error - check dma address
+ * @dev: virtio device
+ * @addr: DMA address
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ *
+ * Returns 0 means dma valid. Other means invalid dma address.
+ */
+int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
+{
+	struct virtio_device *vdev = dev_to_virtio(dev);
+
+	if (!vring_use_dma_api(vdev))
+		return 0;
+
+	return dma_mapping_error(vdev->dev.parent, addr);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
+
+/**
+ * virtio_dma_unmap - unmap DMA addr
+ * @dev: virtio device
+ * @dma: DMA address
+ * @length: memory length
+ * @dir: DMA direction
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ */
+void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
+		      enum dma_data_direction dir)
+{
+	struct virtio_device *vdev = dev_to_virtio(dev);
+
+	if (!vring_use_dma_api(vdev))
+		return;
+
+	dma_unmap_page(vdev->dev.parent, dma, length, dir);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_unmap);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 3ebb346ebb7c..b5fa71476737 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -9,6 +9,7 @@
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
 #include <linux/gfp.h>
+#include <linux/dma-mapping.h>
 
 /**
  * struct virtqueue - a queue to register buffers for sending or receiving.
@@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
 #define module_virtio_driver(__virtio_driver) \
 	module_driver(__virtio_driver, register_virtio_driver, \
 			unregister_virtio_driver)
+
+dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t
offset,
+			       unsigned int length, enum dma_data_direction dir);
+dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
+			  enum dma_data_direction dir);
+int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
+void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
+		      enum dma_data_direction dir);
 #endif /* _LINUX_VIRTIO_H */
-- 
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14  07:27 UTC
[PATCH vhost 08/10] virtio_ring: introduce dma sync api for virtio
These API has been introduced:
* virtio_dma_need_sync
* virtio_dma_sync_single_range_for_cpu
* virtio_dma_sync_single_range_for_device
These APIs can be used together with the premapped mechanism to sync the
DMA address.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 70 ++++++++++++++++++++++++++++++++++++
 include/linux/virtio.h       |  8 +++++
 2 files changed, 78 insertions(+)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 855338609c7f..84129b8c3e2a 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -3264,4 +3264,74 @@ void virtio_dma_unmap(struct device *dev, dma_addr_t dma,
unsigned int length,
 }
 EXPORT_SYMBOL_GPL(virtio_dma_unmap);
 
+/**
+ * virtio_dma_need_sync - check a dma address needs sync
+ * @dev: virtio device
+ * @addr: DMA address
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ */
+bool virtio_dma_need_sync(struct device *dev, dma_addr_t addr)
+{
+	struct virtio_device *vdev = dev_to_virtio(dev);
+
+	if (!vring_use_dma_api(vdev))
+		return 0;
+
+	return dma_need_sync(vdev->dev.parent, addr);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_need_sync);
+
+/**
+ * virtio_dma_sync_single_range_for_cpu - dma sync for cpu
+ * @dev: virtio device
+ * @addr: DMA address
+ * @offset: DMA address offset
+ * @size: mem size for sync
+ * @dir: DMA direction
+ *
+ * Before calling this function, use virtio_dma_need_sync() to confirm that the
+ * DMA address really needs to be synchronized
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ */
+void virtio_dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr,
+					  unsigned long offset, size_t size,
+					  enum dma_data_direction dir)
+{
+	struct virtio_device *vdev = dev_to_virtio(dev);
+
+	dma_sync_single_range_for_cpu(vdev->dev.parent, addr, offset,
+				      size, DMA_BIDIRECTIONAL);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_sync_single_range_for_cpu);
+
+/**
+ * virtio_dma_sync_single_range_for_device - dma sync for device
+ * @dev: virtio device
+ * @addr: DMA address
+ * @offset: DMA address offset
+ * @size: mem size for sync
+ * @dir: DMA direction
+ *
+ * Before calling this function, use virtio_dma_need_sync() to confirm that the
+ * DMA address really needs to be synchronized
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ */
+void virtio_dma_sync_single_range_for_device(struct device *dev,
+					     dma_addr_t addr,
+					     unsigned long offset, size_t size,
+					     enum dma_data_direction dir)
+{
+	struct virtio_device *vdev = dev_to_virtio(dev);
+
+	dma_sync_single_range_for_device(vdev->dev.parent, addr, offset,
+					 size, DMA_BIDIRECTIONAL);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_sync_single_range_for_device);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b5fa71476737..d0e707d744a0 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -225,4 +225,12 @@ dma_addr_t virtio_dma_map(struct device *dev, void *addr,
unsigned int length,
 int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
 void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
 		      enum dma_data_direction dir);
+bool virtio_dma_need_sync(struct device *dev, dma_addr_t addr);
+void virtio_dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr,
+					  unsigned long offset, size_t size,
+					  enum dma_data_direction dir);
+void virtio_dma_sync_single_range_for_device(struct device *dev,
+					     dma_addr_t addr,
+					     unsigned long offset, size_t size,
+					     enum dma_data_direction dir);
 #endif /* _LINUX_VIRTIO_H */
-- 
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14  07:27 UTC
[PATCH vhost 09/10] virtio_ring: correct the expression of the description of virtqueue_resize()
Modify the "useless" to a more accurate "unused". Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 84129b8c3e2a..2ba60a14f557 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2865,7 +2865,7 @@ EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); * virtqueue_resize - resize the vring of vq * @_vq: the struct virtqueue we're talking about. * @num: new ring num - * @recycle: callback for recycle the useless buffer + * @recycle: callback to recycle unused buffers * * When it is really necessary to create a new vring, it will set the current vq * into the reset state. Then call the passed callback to recycle the buffer -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14  07:27 UTC
[PATCH vhost 10/10] virtio_ring: introduce virtqueue_reset()
Introduce virtqueue_reset() to release all buffer inside vq.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 50 ++++++++++++++++++++++++++++++++++++
 include/linux/virtio.h       |  2 ++
 2 files changed, 52 insertions(+)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 2ba60a14f557..2750a365439a 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2930,6 +2930,56 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
 }
 EXPORT_SYMBOL_GPL(virtqueue_resize);
 
+/**
+ * virtqueue_reset - detach and recycle all unused buffers
+ * @_vq: the struct virtqueue we're talking about.
+ * @recycle: callback to recycle unused buffers
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error.
+ * 0: success.
+ * -EBUSY: Failed to sync with device, vq may not work properly
+ * -ENOENT: Transport or device not supported
+ * -EPERM: Operation not permitted
+ */
+int virtqueue_reset(struct virtqueue *_vq,
+		    void (*recycle)(struct virtqueue *vq, void *buf))
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct virtio_device *vdev = vq->vq.vdev;
+	void *buf;
+	int err;
+
+	if (!vq->we_own_ring)
+		return -EPERM;
+
+	if (!vdev->config->disable_vq_and_reset)
+		return -ENOENT;
+
+	if (!vdev->config->enable_vq_after_reset)
+		return -ENOENT;
+
+	err = vdev->config->disable_vq_and_reset(_vq);
+	if (err)
+		return err;
+
+	while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
+		recycle(_vq, buf);
+
+	if (vq->packed_ring)
+		virtqueue_reinit_packed(vq);
+	else
+		virtqueue_reinit_split(vq);
+
+	if (vdev->config->enable_vq_after_reset(_vq))
+		return -EBUSY;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(virtqueue_reset);
+
 /* Only available for split ring */
 struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      unsigned int num,
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index d0e707d744a0..cf4c157e4e75 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -106,6 +106,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
 
 int virtqueue_resize(struct virtqueue *vq, u32 num,
 		     void (*recycle)(struct virtqueue *vq, void *buf));
+int virtqueue_reset(struct virtqueue *vq,
+		    void (*recycle)(struct virtqueue *vq, void *buf));
 
 /**
  * struct virtio_device - representation of a device using virtio
-- 
2.32.0.3.g01195cf9f
On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo at linux.alibaba.com> wrote:> > XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero > copy feature of xsk (XDP socket) needs to be supported by the driver. The > performance of zero copy is very good. > > ENV: Qemu with vhost. > > vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS > -----------------------------|---------------|------------------|------------ > xmit by sockperf: 90% | 100% | | 318967 > xmit by xsk: 100% | 30% | 33% | 1192064What's the setup of this test? CPU model/frequency, packet size, zerocopy enabled or not. (I remember I can get better performance with my old laptop through pktgen (about 2Mpps)) Thanks> recv by sockperf: 100% | 68% | 100% | 692288 > recv by xsk: 100% | 33% | 43% | 771670 > > Before achieving the function of Virtio-Net, we also have to let virtio core > support these features: > > 1. virtio core support premapped > 2. virtio core support reset per-queue > 3. introduce DMA APIs to virtio core > > Please review. > > Thanks. > > Xuan Zhuo (10): > virtio_ring: split: refactor virtqueue_add_split() for premapped > virtio_ring: packed: separate prepare code from > virtuque_add_indirect_packed() > virtio_ring: packed: refactor virtqueue_add_packed() for premapped > virtio_ring: split: introduce virtqueue_add_split_premapped() > virtio_ring: packed: introduce virtqueue_add_packed_premapped() > virtio_ring: introduce virtqueue_add_inbuf_premapped() > virtio_ring: add api virtio_dma_map() for advance dma > virtio_ring: introduce dma sync api for virtio > virtio_ring: correct the expression of the description of > virtqueue_resize() > virtio_ring: introduce virtqueue_reset() > > drivers/virtio/virtio_ring.c | 792 ++++++++++++++++++++++++++++------- > include/linux/virtio.h | 29 ++ > 2 files changed, 659 insertions(+), 162 deletions(-) > > -- > 2.32.0.3.g01195cf9f >
Maybe Matching Threads
- [PATCH vhost v1 00/12] virtio core prepares for AF_XDP
- [PATCH vhost v7 00/11] virtio core prepares for AF_XDP
- [PATCH vhost v3 00/11] virtio core prepares for AF_XDP
- [PATCH vhost v4 00/11] virtio core prepares for AF_XDP
- [PATCH vhost v5 00/11] virtio core prepares for AF_XDP