XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero
copy feature of xsk (XDP socket) needs to be supported by the driver. The
performance of zero copy is very good.
ENV: Qemu with vhost.
vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS
-----------------------------|---------------|------------------|------------
xmit by sockperf: 90% | 100% | | 318967
xmit by xsk: 100% | 30% | 33% | 1192064
recv by sockperf: 100% | 68% | 100% | 692288
recv by xsk: 100% | 33% | 43% | 771670
Before achieving the function of Virtio-Net, we also have to let virtio core
support these features:
1. virtio core support premapped
2. virtio core support reset per-queue
3. introduce DMA APIs to virtio core
Please review.
Thanks.
Xuan Zhuo (10):
virtio_ring: split: refactor virtqueue_add_split() for premapped
virtio_ring: packed: separate prepare code from
virtuque_add_indirect_packed()
virtio_ring: packed: refactor virtqueue_add_packed() for premapped
virtio_ring: split: introduce virtqueue_add_split_premapped()
virtio_ring: packed: introduce virtqueue_add_packed_premapped()
virtio_ring: introduce virtqueue_add_inbuf_premapped()
virtio_ring: add api virtio_dma_map() for advance dma
virtio_ring: introduce dma sync api for virtio
virtio_ring: correct the expression of the description of
virtqueue_resize()
virtio_ring: introduce virtqueue_reset()
drivers/virtio/virtio_ring.c | 792 ++++++++++++++++++++++++++++-------
include/linux/virtio.h | 29 ++
2 files changed, 659 insertions(+), 162 deletions(-)
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:26 UTC
[PATCH vhost 01/10] virtio_ring: split: refactor virtqueue_add_split() for premapped
DMA-related logic is separated from the virtqueue_add_split to prepare
for subsequent support for premapped.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 219 ++++++++++++++++++++++++-----------
1 file changed, 152 insertions(+), 67 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 41144b5246a8..560ee30d942c 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -520,29 +520,83 @@ static inline unsigned int virtqueue_add_desc_split(struct
virtqueue *vq,
return next;
}
-static inline int virtqueue_add_split(struct virtqueue *_vq,
- struct scatterlist *sgs[],
- unsigned int total_sg,
- unsigned int out_sgs,
- unsigned int in_sgs,
- void *data,
- void *ctx,
- gfp_t gfp)
+static int virtqueue_map_sgs(struct vring_virtqueue *vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ unsigned int in_sgs)
{
- struct vring_virtqueue *vq = to_vvq(_vq);
struct scatterlist *sg;
- struct vring_desc *desc;
- unsigned int i, n, avail, descs_used, prev, err_idx;
- int head;
- bool indirect;
+ unsigned int n;
- START_USE(vq);
+ for (n = 0; n < out_sgs; n++) {
+ for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+ dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
+
+ if (vring_mapping_error(vq, addr))
+ return -ENOMEM;
+
+ sg->dma_address = addr;
+ }
+ }
+ for (; n < (out_sgs + in_sgs); n++) {
+ for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+ dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
+
+ if (vring_mapping_error(vq, addr))
+ return -ENOMEM;
+
+ sg->dma_address = addr;
+ }
+ }
+
+ return 0;
+}
+
+static void virtqueue_unmap_sgs(struct vring_virtqueue *vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ unsigned int in_sgs)
+{
+ struct scatterlist *sg;
+ unsigned int n;
+
+ for (n = 0; n < out_sgs; n++) {
+ for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+ if (!sg->dma_address)
+ return;
+
+ dma_unmap_single(vring_dma_dev(vq), sg->dma_address,
+ sg->length, DMA_TO_DEVICE);
+ }
+ }
+ for (; n < (out_sgs + in_sgs); n++) {
+ for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+ if (!sg->dma_address)
+ return;
+
+ dma_unmap_single(vring_dma_dev(vq), sg->dma_address,
+ sg->length, DMA_FROM_DEVICE);
+ }
+ }
+}
+
+static inline int virtqueue_add_split_prepare(struct vring_virtqueue *vq,
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ void *data,
+ void *ctx,
+ gfp_t gfp,
+ struct vring_desc **pdesc)
+{
+ struct vring_desc *desc;
+ unsigned int descs_used;
BUG_ON(data == NULL);
BUG_ON(ctx && vq->indirect);
if (unlikely(vq->broken)) {
- END_USE(vq);
return -EIO;
}
@@ -550,27 +604,17 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
BUG_ON(total_sg == 0);
- head = vq->free_head;
-
if (virtqueue_use_indirect(vq, total_sg))
- desc = alloc_indirect_split(_vq, total_sg, gfp);
+ desc = alloc_indirect_split(&vq->vq, total_sg, gfp);
else {
desc = NULL;
WARN_ON_ONCE(total_sg > vq->split.vring.num &&
!vq->indirect);
}
- if (desc) {
- /* Use a single buffer which doesn't continue */
- indirect = true;
- /* Set up rest to use this indirect table. */
- i = 0;
+ if (desc)
descs_used = 1;
- } else {
- indirect = false;
- desc = vq->split.vring.desc;
- i = head;
+ else
descs_used = total_sg;
- }
if (unlikely(vq->vq.num_free < descs_used)) {
pr_debug("Can't add buf len %i - avail = %i\n",
@@ -580,38 +624,64 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
* host should service the ring ASAP. */
if (out_sgs)
vq->notify(&vq->vq);
- if (indirect)
- kfree(desc);
- END_USE(vq);
+ kfree(desc);
return -ENOSPC;
}
+ *pdesc = desc;
+
+ return 0;
+}
+
+static inline int virtqueue_add_split_vring(struct vring_virtqueue *vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ unsigned int in_sgs,
+ struct vring_desc *desc)
+{
+ unsigned int n, i, avail, descs_used, prev;
+ struct virtqueue *_vq = &vq->vq;
+ struct scatterlist *sg;
+ bool indirect;
+ int head;
+
+ head = vq->free_head;
+
+ if (desc) {
+ /* Use a single buffer which doesn't continue */
+ indirect = true;
+ /* Set up rest to use this indirect table. */
+ i = 0;
+ descs_used = 1;
+ } else {
+ indirect = false;
+ desc = vq->split.vring.desc;
+ i = head;
+ descs_used = total_sg;
+ }
+
for (n = 0; n < out_sgs; n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
- if (vring_mapping_error(vq, addr))
- goto unmap_release;
-
prev = i;
/* Note that we trust indirect descriptor
* table since it use stream DMA mapping.
*/
- i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
+ i = virtqueue_add_desc_split(_vq, desc, i,
+ sg->dma_address,
+ sg->length,
VRING_DESC_F_NEXT,
indirect);
}
}
for (; n < (out_sgs + in_sgs); n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
- if (vring_mapping_error(vq, addr))
- goto unmap_release;
-
prev = i;
/* Note that we trust indirect descriptor
* table since it use stream DMA mapping.
*/
- i = virtqueue_add_desc_split(_vq, desc, i, addr,
+ i = virtqueue_add_desc_split(_vq, desc, i,
+ sg->dma_address,
sg->length,
VRING_DESC_F_NEXT |
VRING_DESC_F_WRITE,
@@ -630,7 +700,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
vq, desc, total_sg * sizeof(struct vring_desc),
DMA_TO_DEVICE);
if (vring_mapping_error(vq, addr))
- goto unmap_release;
+ return -ENOMEM;
virtqueue_add_desc_split(_vq, vq->split.vring.desc,
head, addr,
@@ -648,13 +718,6 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
else
vq->free_head = i;
- /* Store token and indirect buffer state. */
- vq->split.desc_state[head].data = data;
- if (indirect)
- vq->split.desc_state[head].indir_desc = desc;
- else
- vq->split.desc_state[head].indir_desc = ctx;
-
/* Put entry in available array (but don't update avail->idx until they
* do sync). */
avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
@@ -677,30 +740,52 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
virtqueue_kick(_vq);
return 0;
+}
-unmap_release:
- err_idx = i;
+static inline int virtqueue_add_split(struct virtqueue *_vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ unsigned int in_sgs,
+ void *data,
+ void *ctx,
+ gfp_t gfp)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ struct vring_desc *desc;
+ int head;
+ int err;
- if (indirect)
- i = 0;
- else
- i = head;
+ START_USE(vq);
- for (n = 0; n < total_sg; n++) {
- if (i == err_idx)
- break;
- if (indirect) {
- vring_unmap_one_split_indirect(vq, &desc[i]);
- i = virtio16_to_cpu(_vq->vdev, desc[i].next);
- } else
- i = vring_unmap_one_split(vq, i);
- }
+ /* check vq state and try to alloc desc for indirect. */
+ err = virtqueue_add_split_prepare(vq, total_sg, out_sgs, data, ctx, gfp,
&desc);
+ if (err)
+ goto end;
- if (indirect)
- kfree(desc);
+ err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+ if (err)
+ goto err;
+ head = vq->free_head;
+ err = virtqueue_add_split_vring(vq, sgs, total_sg, out_sgs, in_sgs, desc);
+ if (err)
+ goto err;
+
+ /* Store token and indirect buffer state. */
+ vq->split.desc_state[head].data = data;
+ vq->split.desc_state[head].indir_desc = desc ? desc : ctx;
+
+ goto end;
+
+err:
+ virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+
+ kfree(desc);
+
+end:
END_USE(vq);
- return -ENOMEM;
+ return err;
}
static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:26 UTC
[PATCH vhost 02/10] virtio_ring: packed: separate prepare code from virtuque_add_indirect_packed()
Separating the logic of allocating indirect desc and checking queue
status to the upper layer function.
The proposal of this is convenient to refactor virtqueue_add_packed()
for premapped.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 29 ++++++++++++-----------------
1 file changed, 12 insertions(+), 17 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 560ee30d942c..42b1ff87518e 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1330,25 +1330,14 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
unsigned int out_sgs,
unsigned int in_sgs,
void *data,
- gfp_t gfp)
+ struct vring_packed_desc *desc)
{
- struct vring_packed_desc *desc;
struct scatterlist *sg;
unsigned int i, n, err_idx;
u16 head, id;
dma_addr_t addr;
head = vq->packed.next_avail_idx;
- desc = alloc_indirect_packed(total_sg, gfp);
- if (!desc)
- return -ENOMEM;
-
- if (unlikely(vq->vq.num_free < 1)) {
- pr_debug("Can't add buf len 1 - avail = 0\n");
- kfree(desc);
- END_USE(vq);
- return -ENOSPC;
- }
i = 0;
id = vq->free_head;
@@ -1470,11 +1459,17 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
BUG_ON(total_sg == 0);
if (virtqueue_use_indirect(vq, total_sg)) {
- err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
- in_sgs, data, gfp);
- if (err != -ENOMEM) {
- END_USE(vq);
- return err;
+ desc = alloc_indirect_packed(total_sg, gfp);
+ if (desc) {
+ if (unlikely(vq->vq.num_free < 1)) {
+ pr_debug("Can't add buf len 1 - avail = 0\n");
+ kfree(desc);
+ END_USE(vq);
+ return -ENOSPC;
+ }
+
+ return virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
+ in_sgs, data, desc);
}
/* fall back on direct */
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:26 UTC
[PATCH vhost 03/10] virtio_ring: packed: refactor virtqueue_add_packed() for premapped
DMA-related logic is separated from virtqueue_add_packed to prepare for
the subsequent support for premapped.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 150 ++++++++++++++++++-----------------
1 file changed, 78 insertions(+), 72 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 42b1ff87518e..47b6f9152f9f 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1329,7 +1329,6 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
unsigned int total_sg,
unsigned int out_sgs,
unsigned int in_sgs,
- void *data,
struct vring_packed_desc *desc)
{
struct scatterlist *sg;
@@ -1345,14 +1344,9 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
for (n = 0; n < out_sgs + in_sgs; n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- addr = vring_map_one_sg(vq, sg, n < out_sgs ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (vring_mapping_error(vq, addr))
- goto unmap_release;
-
desc[i].flags = cpu_to_le16(n < out_sgs ?
0 : VRING_DESC_F_WRITE);
- desc[i].addr = cpu_to_le64(addr);
+ desc[i].addr = cpu_to_le64(sg->dma_address);
desc[i].len = cpu_to_le32(sg->length);
i++;
}
@@ -1363,7 +1357,7 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
total_sg * sizeof(struct vring_packed_desc),
DMA_TO_DEVICE);
if (vring_mapping_error(vq, addr))
- goto unmap_release;
+ return -ENOMEM;
vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
@@ -1404,53 +1398,30 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
/* Store token and indirect buffer state. */
vq->packed.desc_state[id].num = 1;
- vq->packed.desc_state[id].data = data;
vq->packed.desc_state[id].indir_desc = desc;
vq->packed.desc_state[id].last = id;
vq->num_added += 1;
pr_debug("Added buffer head %i to %p\n", head, vq);
- END_USE(vq);
return 0;
-
-unmap_release:
- err_idx = i;
-
- for (i = 0; i < err_idx; i++)
- vring_unmap_desc_packed(vq, &desc[i]);
-
- kfree(desc);
-
- END_USE(vq);
- return -ENOMEM;
}
-static inline int virtqueue_add_packed(struct virtqueue *_vq,
- struct scatterlist *sgs[],
- unsigned int total_sg,
- unsigned int out_sgs,
- unsigned int in_sgs,
- void *data,
- void *ctx,
- gfp_t gfp)
+static inline int virtqueue_add_packed_prepare(struct vring_virtqueue *vq,
+ unsigned int total_sg,
+ void *data,
+ void *ctx,
+ struct vring_packed_desc **pdesc,
+ gfp_t gfp)
{
- struct vring_virtqueue *vq = to_vvq(_vq);
struct vring_packed_desc *desc;
- struct scatterlist *sg;
- unsigned int i, n, c, descs_used, err_idx;
- __le16 head_flags, flags;
- u16 head, id, prev, curr, avail_used_flags;
- int err;
-
- START_USE(vq);
+ unsigned int descs_used;
BUG_ON(data == NULL);
BUG_ON(ctx && vq->indirect);
if (unlikely(vq->broken)) {
- END_USE(vq);
return -EIO;
}
@@ -1458,39 +1429,55 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
BUG_ON(total_sg == 0);
+ desc = NULL;
+
if (virtqueue_use_indirect(vq, total_sg)) {
desc = alloc_indirect_packed(total_sg, gfp);
if (desc) {
if (unlikely(vq->vq.num_free < 1)) {
pr_debug("Can't add buf len 1 - avail = 0\n");
kfree(desc);
- END_USE(vq);
return -ENOSPC;
}
- return virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
- in_sgs, data, desc);
+ return 0;
}
/* fall back on direct */
}
- head = vq->packed.next_avail_idx;
- avail_used_flags = vq->packed.avail_used_flags;
-
WARN_ON_ONCE(total_sg > vq->packed.vring.num &&
!vq->indirect);
- desc = vq->packed.vring.desc;
- i = head;
descs_used = total_sg;
if (unlikely(vq->vq.num_free < descs_used)) {
pr_debug("Can't add buf len %i - avail = %i\n",
descs_used, vq->vq.num_free);
- END_USE(vq);
return -ENOSPC;
}
+ *pdesc = desc;
+
+ return 0;
+}
+
+static void virtqueue_add_packed_vring(struct vring_virtqueue *vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ unsigned int in_sgs)
+{
+ struct vring_packed_desc *desc;
+ struct scatterlist *sg;
+ unsigned int i, n, c, descs_used;
+ __le16 head_flags, flags;
+ u16 head, id, prev, curr;
+
+ desc = vq->packed.vring.desc;
+ head = vq->packed.next_avail_idx;
+ i = head;
+ descs_used = total_sg;
+
id = vq->free_head;
BUG_ON(id == vq->packed.vring.num);
@@ -1498,11 +1485,6 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
c = 0;
for (n = 0; n < out_sgs + in_sgs; n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (vring_mapping_error(vq, addr))
- goto unmap_release;
-
flags = cpu_to_le16(vq->packed.avail_used_flags |
(++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
(n < out_sgs ? 0 : VRING_DESC_F_WRITE));
@@ -1511,12 +1493,12 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
else
desc[i].flags = flags;
- desc[i].addr = cpu_to_le64(addr);
+ desc[i].addr = cpu_to_le64(sg->dma_address);
desc[i].len = cpu_to_le32(sg->length);
desc[i].id = cpu_to_le16(id);
if (unlikely(vq->use_dma_api)) {
- vq->packed.desc_extra[curr].addr = addr;
+ vq->packed.desc_extra[curr].addr = sg->dma_address;
vq->packed.desc_extra[curr].len = sg->length;
vq->packed.desc_extra[curr].flags le16_to_cpu(flags);
@@ -1545,8 +1527,6 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
/* Store token. */
vq->packed.desc_state[id].num = descs_used;
- vq->packed.desc_state[id].data = data;
- vq->packed.desc_state[id].indir_desc = ctx;
vq->packed.desc_state[id].last = prev;
/*
@@ -1559,29 +1539,55 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
vq->num_added += descs_used;
pr_debug("Added buffer head %i to %p\n", head, vq);
- END_USE(vq);
+}
- return 0;
+static inline int virtqueue_add_packed(struct virtqueue *_vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ unsigned int in_sgs,
+ void *data,
+ void *ctx,
+ gfp_t gfp)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ struct vring_packed_desc *desc;
+ u16 id;
+ int err;
-unmap_release:
- err_idx = i;
- i = head;
- curr = vq->free_head;
+ START_USE(vq);
- vq->packed.avail_used_flags = avail_used_flags;
+ /* check vq state and try to alloc desc for indirect. */
+ err = virtqueue_add_packed_prepare(vq, total_sg, data, ctx, &desc, gfp);
+ if (err)
+ goto end;
- for (n = 0; n < total_sg; n++) {
- if (i == err_idx)
- break;
- vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
- curr = vq->packed.desc_extra[curr].next;
- i++;
- if (i >= vq->packed.vring.num)
- i = 0;
+ err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+ if (err)
+ goto err;
+
+ id = vq->free_head;
+
+ if (desc) {
+ err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, in_sgs,
desc);
+ if (err)
+ goto err;
+ } else {
+ virtqueue_add_packed_vring(vq, sgs, total_sg, out_sgs, in_sgs);
+ vq->packed.desc_state[id].indir_desc = ctx;
}
+ vq->packed.desc_state[id].data = data;
+
+ goto end;
+
+err:
+ virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+ kfree(desc);
+
+end:
END_USE(vq);
- return -EIO;
+ return err;
}
static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:26 UTC
[PATCH vhost 04/10] virtio_ring: split: introduce virtqueue_add_split_premapped()
virtqueue_add_split() only supports virtual addresses, dma is completed
in virtqueue_add_split().
In some scenarios (such as the AF_XDP scenario), the memory is allocated
and DMA is completed in advance, so it is necessary for us to support
passing the DMA address to virtio core.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 100 +++++++++++++++++++++++++++++++++--
include/linux/virtio.h | 5 ++
2 files changed, 100 insertions(+), 5 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 47b6f9152f9f..a31155abe101 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -70,6 +70,7 @@
struct vring_desc_state_split {
void *data; /* Data for callback. */
struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
+ bool premapped;
};
struct vring_desc_state_packed {
@@ -440,7 +441,7 @@ static void vring_unmap_one_split_indirect(const struct
vring_virtqueue *vq,
}
static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
- unsigned int i)
+ unsigned int i, bool premapped)
{
struct vring_desc_extra *extra = vq->split.desc_extra;
u16 flags;
@@ -457,6 +458,9 @@ static unsigned int vring_unmap_one_split(const struct
vring_virtqueue *vq,
(flags & VRING_DESC_F_WRITE) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
} else {
+ if (premapped)
+ goto out;
+
dma_unmap_page(vring_dma_dev(vq),
extra[i].addr,
extra[i].len,
@@ -788,6 +792,47 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
return err;
}
+static inline int virtqueue_add_split_premapped(struct virtqueue *_vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ unsigned int in_sgs,
+ void *data,
+ void *ctx,
+ gfp_t gfp)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ struct vring_desc *desc;
+ int head;
+ int err;
+
+ START_USE(vq);
+
+ /* check vq state and try to alloc desc for indirect. */
+ err = virtqueue_add_split_prepare(vq, total_sg, out_sgs, data, ctx, gfp,
&desc);
+ if (err)
+ goto end;
+
+ head = vq->free_head;
+ err = virtqueue_add_split_vring(vq, sgs, total_sg, out_sgs, in_sgs, desc);
+ if (err)
+ goto err;
+
+ /* Store token and indirect buffer state. */
+ vq->split.desc_state[head].data = data;
+ vq->split.desc_state[head].indir_desc = desc ? desc : ctx;
+ vq->split.desc_state[head].premapped = true;
+
+ goto end;
+
+err:
+ kfree(desc);
+
+end:
+ END_USE(vq);
+ return err;
+}
+
static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
@@ -824,20 +869,23 @@ static void detach_buf_split(struct vring_virtqueue *vq,
unsigned int head,
{
unsigned int i, j;
__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
+ bool premapped;
/* Clear data ptr. */
vq->split.desc_state[head].data = NULL;
+ premapped = vq->split.desc_state[head].premapped;
+
/* Put back on free list: unmap first-level descriptors and find end */
i = head;
while (vq->split.vring.desc[i].flags & nextflag) {
- vring_unmap_one_split(vq, i);
+ vring_unmap_one_split(vq, i, premapped);
i = vq->split.desc_extra[i].next;
vq->vq.num_free++;
}
- vring_unmap_one_split(vq, i);
+ vring_unmap_one_split(vq, i, premapped);
vq->split.desc_extra[i].next = vq->free_head;
vq->free_head = head;
@@ -859,8 +907,10 @@ static void detach_buf_split(struct vring_virtqueue *vq,
unsigned int head,
VRING_DESC_F_INDIRECT));
BUG_ON(len == 0 || len % sizeof(struct vring_desc));
- for (j = 0; j < len / sizeof(struct vring_desc); j++)
- vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+ if (!premapped) {
+ for (j = 0; j < len / sizeof(struct vring_desc); j++)
+ vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+ }
kfree(indir_desc);
vq->split.desc_state[head].indir_desc = NULL;
@@ -2204,6 +2254,21 @@ static inline int virtqueue_add(struct virtqueue *_vq,
out_sgs, in_sgs, data, ctx, gfp);
}
+static inline int virtqueue_add_premapped(struct virtqueue *_vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ unsigned int in_sgs,
+ void *data,
+ void *ctx,
+ gfp_t gfp)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
+ return virtqueue_add_split_premapped(_vq, sgs, total_sg, out_sgs,
+ in_sgs, data, ctx, gfp);
+}
+
/**
* virtqueue_add_sgs - expose buffers to other end
* @_vq: the struct virtqueue we're talking about.
@@ -2261,6 +2326,31 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
}
EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
+/**
+ * virtqueue_add_outbuf_premapped - expose output buffers to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg readable by other side
+ * @data: the token identifying the buffer.
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * It is required that all addrs have completed DMA operations. And use
+ * sg->dma_address, sg->length to pass addr and length.
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
+ struct scatterlist *sg, unsigned int num,
+ void *data,
+ gfp_t gfp)
+{
+ return virtqueue_add_premapped(vq, &sg, num, 1, 0, data, NULL, gfp);
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped);
+
/**
* virtqueue_add_inbuf - expose input buffers to other end
* @vq: the struct virtqueue we're talking about.
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index dcab9c7e8784..d8b472a7dcae 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -43,6 +43,11 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
void *data,
gfp_t gfp);
+int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
+ struct scatterlist *sg, unsigned int num,
+ void *data,
+ gfp_t gfp);
+
int virtqueue_add_inbuf(struct virtqueue *vq,
struct scatterlist sg[], unsigned int num,
void *data,
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:26 UTC
[PATCH vhost 05/10] virtio_ring: packed: introduce virtqueue_add_packed_premapped()
virtqueue_add_packed() only supports virtual addresses, dma is completed
in virtqueue_add_packed().
In some scenarios (such as the AF_XDP scenario), the memory is allocated
and DMA is completed in advance, so it is necessary for us to support
passing the DMA address to virtio core.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 67 ++++++++++++++++++++++++++++++++----
1 file changed, 61 insertions(+), 6 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a31155abe101..79244ccbae9e 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -78,6 +78,7 @@ struct vring_desc_state_packed {
struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
u16 num; /* Descriptor list length. */
u16 last; /* The last desc state in a list. */
+ bool premapped;
};
struct vring_desc_extra {
@@ -1318,7 +1319,8 @@ static inline u16 packed_last_used(u16 last_used_idx)
}
static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
- struct vring_desc_extra *extra)
+ struct vring_desc_extra *extra,
+ bool premapped)
{
u16 flags;
@@ -1333,6 +1335,9 @@ static void vring_unmap_extra_packed(const struct
vring_virtqueue *vq,
(flags & VRING_DESC_F_WRITE) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
} else {
+ if (premapped)
+ return;
+
dma_unmap_page(vring_dma_dev(vq),
extra->addr, extra->len,
(flags & VRING_DESC_F_WRITE) ?
@@ -1382,7 +1387,7 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
struct vring_packed_desc *desc)
{
struct scatterlist *sg;
- unsigned int i, n, err_idx;
+ unsigned int i, n;
u16 head, id;
dma_addr_t addr;
@@ -1640,6 +1645,51 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
return err;
}
+static inline int virtqueue_add_packed_premapped(struct virtqueue *_vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ unsigned int in_sgs,
+ void *data,
+ void *ctx,
+ gfp_t gfp)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ struct vring_packed_desc *desc;
+ u16 id;
+ int err;
+
+ START_USE(vq);
+
+ /* check vq state and try to alloc desc for indirect. */
+ err = virtqueue_add_packed_prepare(vq, total_sg, data, ctx, &desc, gfp);
+ if (err)
+ goto end;
+
+ id = vq->free_head;
+
+ if (desc) {
+ err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, in_sgs,
desc);
+ if (err)
+ goto err;
+ } else {
+ virtqueue_add_packed_vring(vq, sgs, total_sg, out_sgs, in_sgs);
+ vq->packed.desc_state[id].indir_desc = ctx;
+ }
+
+ vq->packed.desc_state[id].data = data;
+ vq->packed.desc_state[id].premapped = true;
+
+ goto end;
+
+err:
+ kfree(desc);
+
+end:
+ END_USE(vq);
+ return err;
+}
+
static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
@@ -1695,8 +1745,10 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
struct vring_desc_state_packed *state = NULL;
struct vring_packed_desc *desc;
unsigned int i, curr;
+ bool premapped;
state = &vq->packed.desc_state[id];
+ premapped = state->premapped;
/* Clear data ptr. */
state->data = NULL;
@@ -1709,7 +1761,8 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
curr = id;
for (i = 0; i < state->num; i++) {
vring_unmap_extra_packed(vq,
- &vq->packed.desc_extra[curr]);
+ &vq->packed.desc_extra[curr],
+ premapped);
curr = vq->packed.desc_extra[curr].next;
}
}
@@ -1722,7 +1775,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
if (!desc)
return;
- if (vq->use_dma_api) {
+ if (vq->use_dma_api && !premapped) {
len = vq->packed.desc_extra[id].len;
for (i = 0; i < len / sizeof(struct vring_packed_desc);
i++)
@@ -2265,8 +2318,10 @@ static inline int virtqueue_add_premapped(struct
virtqueue *_vq,
{
struct vring_virtqueue *vq = to_vvq(_vq);
- return virtqueue_add_split_premapped(_vq, sgs, total_sg, out_sgs,
- in_sgs, data, ctx, gfp);
+ return vq->packed_ring ? virtqueue_add_packed_premapped(_vq, sgs, total_sg,
out_sgs,
+ in_sgs, data, ctx, gfp) :
+ virtqueue_add_split_premapped(_vq, sgs, total_sg, out_sgs,
+ in_sgs, data, ctx, gfp);
}
/**
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:27 UTC
[PATCH vhost 06/10] virtio_ring: introduce virtqueue_add_inbuf_premapped()
Introduce virtqueue_add_inbuf_premapped() to submit premapped sgs.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 25 +++++++++++++++++++++++++
include/linux/virtio.h | 5 +++++
2 files changed, 30 insertions(+)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 79244ccbae9e..cd9364eb2345 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2452,6 +2452,31 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
}
EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
+/**
+ * virtqueue_add_inbuf_premapped - expose input buffers to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg writable by other side
+ * @data: the token identifying the buffer.
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * It is required that all addrs have completed DMA operations. And use
+ * sg->dma_address, sg->length to pass addr and length.
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_inbuf_premapped(struct virtqueue *vq,
+ struct scatterlist *sg, unsigned int num,
+ void *data,
+ gfp_t gfp)
+{
+ return virtqueue_add_premapped(vq, &sg, num, 0, 1, data, NULL, gfp);
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_premapped);
+
/**
* virtqueue_kick_prepare - first half of split virtqueue_kick call.
* @_vq: the struct virtqueue
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index d8b472a7dcae..3ebb346ebb7c 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -59,6 +59,11 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
void *ctx,
gfp_t gfp);
+int virtqueue_add_inbuf_premapped(struct virtqueue *vq,
+ struct scatterlist *sg, unsigned int num,
+ void *data,
+ gfp_t gfp);
+
int virtqueue_add_sgs(struct virtqueue *vq,
struct scatterlist *sgs[],
unsigned int out_sgs,
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:27 UTC
[PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
Added virtio_dma_map() to map DMA addresses for virtual memory in
advance. The purpose is to keep memory mapped across multiple add/get
buf operations.
Added virtio_dma_unmap() for unmap DMA address.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++
include/linux/virtio.h | 9 ++++
2 files changed, 101 insertions(+)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index cd9364eb2345..855338609c7f 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue
*vq)
}
EXPORT_SYMBOL_GPL(virtqueue_get_vring);
+/**
+ * virtio_dma_map_page - get the DMA addr of the memory for virtio device
+ * @dev: virtio device
+ * @page: the page of the memory to DMA
+ * @offset: the offset of the memory inside page
+ * @length: memory length
+ * @dir: DMA direction
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ *
+ * Returns the DMA addr. DMA_MAPPING_ERROR means error.
+ */
+dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t
offset,
+ unsigned int length, enum dma_data_direction dir)
+{
+ struct virtio_device *vdev = dev_to_virtio(dev);
+
+ if (!vring_use_dma_api(vdev))
+ return page_to_phys(page) + offset;
+
+ return dma_map_page(vdev->dev.parent, page, offset, length, dir);
+}
+
+/**
+ * virtio_dma_map - get the DMA addr of the memory for virtio device
+ * @dev: virtio device
+ * @addr: the addr to DMA
+ * @length: memory length
+ * @dir: DMA direction
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ *
+ * Returns the DMA addr.
+ */
+dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
+ enum dma_data_direction dir)
+{
+ struct page *page;
+ size_t offset;
+
+ page = virt_to_page(addr);
+ offset = offset_in_page(addr);
+
+ return virtio_dma_map_page(dev, page, offset, length, dir);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_map);
+
+/**
+ * virtio_dma_mapping_error - check dma address
+ * @dev: virtio device
+ * @addr: DMA address
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ *
+ * Returns 0 means dma valid. Other means invalid dma address.
+ */
+int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr)
+{
+ struct virtio_device *vdev = dev_to_virtio(dev);
+
+ if (!vring_use_dma_api(vdev))
+ return 0;
+
+ return dma_mapping_error(vdev->dev.parent, addr);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_mapping_error);
+
+/**
+ * virtio_dma_unmap - unmap DMA addr
+ * @dev: virtio device
+ * @dma: DMA address
+ * @length: memory length
+ * @dir: DMA direction
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ */
+void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
+ enum dma_data_direction dir)
+{
+ struct virtio_device *vdev = dev_to_virtio(dev);
+
+ if (!vring_use_dma_api(vdev))
+ return;
+
+ dma_unmap_page(vdev->dev.parent, dma, length, dir);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_unmap);
+
MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 3ebb346ebb7c..b5fa71476737 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -9,6 +9,7 @@
#include <linux/device.h>
#include <linux/mod_devicetable.h>
#include <linux/gfp.h>
+#include <linux/dma-mapping.h>
/**
* struct virtqueue - a queue to register buffers for sending or receiving.
@@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv);
#define module_virtio_driver(__virtio_driver) \
module_driver(__virtio_driver, register_virtio_driver, \
unregister_virtio_driver)
+
+dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t
offset,
+ unsigned int length, enum dma_data_direction dir);
+dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length,
+ enum dma_data_direction dir);
+int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
+void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
+ enum dma_data_direction dir);
#endif /* _LINUX_VIRTIO_H */
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:27 UTC
[PATCH vhost 08/10] virtio_ring: introduce dma sync api for virtio
These API has been introduced:
* virtio_dma_need_sync
* virtio_dma_sync_single_range_for_cpu
* virtio_dma_sync_single_range_for_device
These APIs can be used together with the premapped mechanism to sync the
DMA address.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 70 ++++++++++++++++++++++++++++++++++++
include/linux/virtio.h | 8 +++++
2 files changed, 78 insertions(+)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 855338609c7f..84129b8c3e2a 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -3264,4 +3264,74 @@ void virtio_dma_unmap(struct device *dev, dma_addr_t dma,
unsigned int length,
}
EXPORT_SYMBOL_GPL(virtio_dma_unmap);
+/**
+ * virtio_dma_need_sync - check a dma address needs sync
+ * @dev: virtio device
+ * @addr: DMA address
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ */
+bool virtio_dma_need_sync(struct device *dev, dma_addr_t addr)
+{
+ struct virtio_device *vdev = dev_to_virtio(dev);
+
+ if (!vring_use_dma_api(vdev))
+ return 0;
+
+ return dma_need_sync(vdev->dev.parent, addr);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_need_sync);
+
+/**
+ * virtio_dma_sync_single_range_for_cpu - dma sync for cpu
+ * @dev: virtio device
+ * @addr: DMA address
+ * @offset: DMA address offset
+ * @size: mem size for sync
+ * @dir: DMA direction
+ *
+ * Before calling this function, use virtio_dma_need_sync() to confirm that the
+ * DMA address really needs to be synchronized
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ */
+void virtio_dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir)
+{
+ struct virtio_device *vdev = dev_to_virtio(dev);
+
+ dma_sync_single_range_for_cpu(vdev->dev.parent, addr, offset,
+ size, DMA_BIDIRECTIONAL);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_sync_single_range_for_cpu);
+
+/**
+ * virtio_dma_sync_single_range_for_device - dma sync for device
+ * @dev: virtio device
+ * @addr: DMA address
+ * @offset: DMA address offset
+ * @size: mem size for sync
+ * @dir: DMA direction
+ *
+ * Before calling this function, use virtio_dma_need_sync() to confirm that the
+ * DMA address really needs to be synchronized
+ *
+ * This API is only for pre-mapped buffers, for non premapped buffers virtio
+ * core handles DMA API internally.
+ */
+void virtio_dma_sync_single_range_for_device(struct device *dev,
+ dma_addr_t addr,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir)
+{
+ struct virtio_device *vdev = dev_to_virtio(dev);
+
+ dma_sync_single_range_for_device(vdev->dev.parent, addr, offset,
+ size, DMA_BIDIRECTIONAL);
+}
+EXPORT_SYMBOL_GPL(virtio_dma_sync_single_range_for_device);
+
MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b5fa71476737..d0e707d744a0 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -225,4 +225,12 @@ dma_addr_t virtio_dma_map(struct device *dev, void *addr,
unsigned int length,
int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr);
void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length,
enum dma_data_direction dir);
+bool virtio_dma_need_sync(struct device *dev, dma_addr_t addr);
+void virtio_dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir);
+void virtio_dma_sync_single_range_for_device(struct device *dev,
+ dma_addr_t addr,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir);
#endif /* _LINUX_VIRTIO_H */
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:27 UTC
[PATCH vhost 09/10] virtio_ring: correct the expression of the description of virtqueue_resize()
Modify the "useless" to a more accurate "unused". Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 84129b8c3e2a..2ba60a14f557 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2865,7 +2865,7 @@ EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); * virtqueue_resize - resize the vring of vq * @_vq: the struct virtqueue we're talking about. * @num: new ring num - * @recycle: callback for recycle the useless buffer + * @recycle: callback to recycle unused buffers * * When it is really necessary to create a new vring, it will set the current vq * into the reset state. Then call the passed callback to recycle the buffer -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:27 UTC
[PATCH vhost 10/10] virtio_ring: introduce virtqueue_reset()
Introduce virtqueue_reset() to release all buffer inside vq.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 50 ++++++++++++++++++++++++++++++++++++
include/linux/virtio.h | 2 ++
2 files changed, 52 insertions(+)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 2ba60a14f557..2750a365439a 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2930,6 +2930,56 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
}
EXPORT_SYMBOL_GPL(virtqueue_resize);
+/**
+ * virtqueue_reset - detach and recycle all unused buffers
+ * @_vq: the struct virtqueue we're talking about.
+ * @recycle: callback to recycle unused buffers
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error.
+ * 0: success.
+ * -EBUSY: Failed to sync with device, vq may not work properly
+ * -ENOENT: Transport or device not supported
+ * -EPERM: Operation not permitted
+ */
+int virtqueue_reset(struct virtqueue *_vq,
+ void (*recycle)(struct virtqueue *vq, void *buf))
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ struct virtio_device *vdev = vq->vq.vdev;
+ void *buf;
+ int err;
+
+ if (!vq->we_own_ring)
+ return -EPERM;
+
+ if (!vdev->config->disable_vq_and_reset)
+ return -ENOENT;
+
+ if (!vdev->config->enable_vq_after_reset)
+ return -ENOENT;
+
+ err = vdev->config->disable_vq_and_reset(_vq);
+ if (err)
+ return err;
+
+ while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
+ recycle(_vq, buf);
+
+ if (vq->packed_ring)
+ virtqueue_reinit_packed(vq);
+ else
+ virtqueue_reinit_split(vq);
+
+ if (vdev->config->enable_vq_after_reset(_vq))
+ return -EBUSY;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(virtqueue_reset);
+
/* Only available for split ring */
struct virtqueue *vring_new_virtqueue(unsigned int index,
unsigned int num,
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index d0e707d744a0..cf4c157e4e75 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -106,6 +106,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
int virtqueue_resize(struct virtqueue *vq, u32 num,
void (*recycle)(struct virtqueue *vq, void *buf));
+int virtqueue_reset(struct virtqueue *vq,
+ void (*recycle)(struct virtqueue *vq, void *buf));
/**
* struct virtio_device - representation of a device using virtio
--
2.32.0.3.g01195cf9f
On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo at linux.alibaba.com> wrote:> > XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero > copy feature of xsk (XDP socket) needs to be supported by the driver. The > performance of zero copy is very good. > > ENV: Qemu with vhost. > > vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS > -----------------------------|---------------|------------------|------------ > xmit by sockperf: 90% | 100% | | 318967 > xmit by xsk: 100% | 30% | 33% | 1192064What's the setup of this test? CPU model/frequency, packet size, zerocopy enabled or not. (I remember I can get better performance with my old laptop through pktgen (about 2Mpps)) Thanks> recv by sockperf: 100% | 68% | 100% | 692288 > recv by xsk: 100% | 33% | 43% | 771670 > > Before achieving the function of Virtio-Net, we also have to let virtio core > support these features: > > 1. virtio core support premapped > 2. virtio core support reset per-queue > 3. introduce DMA APIs to virtio core > > Please review. > > Thanks. > > Xuan Zhuo (10): > virtio_ring: split: refactor virtqueue_add_split() for premapped > virtio_ring: packed: separate prepare code from > virtuque_add_indirect_packed() > virtio_ring: packed: refactor virtqueue_add_packed() for premapped > virtio_ring: split: introduce virtqueue_add_split_premapped() > virtio_ring: packed: introduce virtqueue_add_packed_premapped() > virtio_ring: introduce virtqueue_add_inbuf_premapped() > virtio_ring: add api virtio_dma_map() for advance dma > virtio_ring: introduce dma sync api for virtio > virtio_ring: correct the expression of the description of > virtqueue_resize() > virtio_ring: introduce virtqueue_reset() > > drivers/virtio/virtio_ring.c | 792 ++++++++++++++++++++++++++++------- > include/linux/virtio.h | 29 ++ > 2 files changed, 659 insertions(+), 162 deletions(-) > > -- > 2.32.0.3.g01195cf9f >