XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero
copy feature of xsk (XDP socket) needs to be supported by the driver. The
performance of zero copy is very good.
ENV: Qemu with vhost.
vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS
-----------------------------|---------------|------------------|------------
xmit by sockperf: 90% | 100% | | 318967
xmit by xsk: 100% | 30% | 33% | 1192064
recv by sockperf: 100% | 68% | 100% | 692288
recv by xsk: 100% | 33% | 43% | 771670
Before achieving the function of Virtio-Net, we also have to let virtio core
support these features:
1. virtio core support premapped
2. virtio core support reset per-queue
3. introduce DMA APIs to virtio core
Please review.
Thanks.
v2:
1. based on sgs[0]->dma_address to judgment is premapped
2. based on extra.addr to judgment to do unmap for no-indirect desc
3. based on indir_desc to judgment to do unmap for indirect desc
4. rename virtqueue_get_dma_dev to virtqueue_dma_dev
v1:
1. expose dma device. NO introduce the api for dma and sync
2. split some commit for review.
Xuan Zhuo (12):
virtio_ring: split: separate dma codes
virtio_ring: packed: separate dma codes
virtio_ring: packed-indirect: separate dma codes
virtio_ring: split: support premapped
virtio_ring: packed: support premapped
virtio_ring: split-indirect: support premapped
virtio_ring: packed-indirect: support premapped
virtio_ring: update document for virtqueue_add_*
virtio_ring: introduce virtqueue_dma_dev()
virtio_ring: correct the expression of the description of
virtqueue_resize()
virtio_ring: separate the logic of reset/enable from virtqueue_resize
virtio_ring: introduce virtqueue_reset()
drivers/virtio/virtio.c | 6 +
drivers/virtio/virtio_ring.c | 354 +++++++++++++++++++++++++----------
include/linux/virtio.h | 4 +
3 files changed, 260 insertions(+), 104 deletions(-)
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 01/12] virtio_ring: split: separate dma codes
DMA-related logic is separated from the virtqueue_add_split() to
one new function. DMA address will be saved as sg->dma_address, then
virtqueue_add_split() will use it directly. Unmap operation will be
simpler.
The purpose of this is to facilitate subsequent support to receive
dma address mapped by drivers.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 110 ++++++++++++++++++++++++++---------
1 file changed, 82 insertions(+), 28 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 41144b5246a8..8ace2f503953 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -520,6 +520,77 @@ static inline unsigned int virtqueue_add_desc_split(struct
virtqueue *vq,
return next;
}
+static void virtqueue_unmap_sgs(struct vring_virtqueue *vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ unsigned int in_sgs)
+{
+ struct scatterlist *sg;
+ unsigned int n;
+
+ if (!vq->use_dma_api)
+ return;
+
+ for (n = 0; n < out_sgs; n++) {
+ for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+ if (!sg->dma_address)
+ return;
+
+ dma_unmap_page(vring_dma_dev(vq), sg->dma_address,
+ sg->length, DMA_TO_DEVICE);
+ }
+ }
+
+ for (; n < (out_sgs + in_sgs); n++) {
+ for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+ if (!sg->dma_address)
+ return;
+
+ dma_unmap_page(vring_dma_dev(vq), sg->dma_address,
+ sg->length, DMA_FROM_DEVICE);
+ }
+ }
+}
+
+static int virtqueue_map_sgs(struct vring_virtqueue *vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ unsigned int in_sgs)
+{
+ struct scatterlist *sg;
+ unsigned int n;
+
+ for (n = 0; n < out_sgs; n++) {
+ for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+ dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
+
+ if (vring_mapping_error(vq, addr))
+ goto err;
+
+ sg->dma_address = addr;
+ }
+ }
+
+ for (; n < (out_sgs + in_sgs); n++) {
+ for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+ dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
+
+ if (vring_mapping_error(vq, addr))
+ goto err;
+
+ sg->dma_address = addr;
+ }
+ }
+
+ return 0;
+
+err:
+ virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+ return -ENOMEM;
+}
+
static inline int virtqueue_add_split(struct virtqueue *_vq,
struct scatterlist *sgs[],
unsigned int total_sg,
@@ -532,9 +603,9 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
struct vring_virtqueue *vq = to_vvq(_vq);
struct scatterlist *sg;
struct vring_desc *desc;
- unsigned int i, n, avail, descs_used, prev, err_idx;
- int head;
+ unsigned int i, n, avail, descs_used, prev;
bool indirect;
+ int head;
START_USE(vq);
@@ -586,32 +657,30 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
return -ENOSPC;
}
+ if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
+ return -ENOMEM;
+
for (n = 0; n < out_sgs; n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
- if (vring_mapping_error(vq, addr))
- goto unmap_release;
-
prev = i;
/* Note that we trust indirect descriptor
* table since it use stream DMA mapping.
*/
- i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
+ i = virtqueue_add_desc_split(_vq, desc, i,
+ sg->dma_address,
+ sg->length,
VRING_DESC_F_NEXT,
indirect);
}
}
for (; n < (out_sgs + in_sgs); n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
- if (vring_mapping_error(vq, addr))
- goto unmap_release;
-
prev = i;
/* Note that we trust indirect descriptor
* table since it use stream DMA mapping.
*/
- i = virtqueue_add_desc_split(_vq, desc, i, addr,
+ i = virtqueue_add_desc_split(_vq, desc, i,
+ sg->dma_address,
sg->length,
VRING_DESC_F_NEXT |
VRING_DESC_F_WRITE,
@@ -679,22 +748,7 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
return 0;
unmap_release:
- err_idx = i;
-
- if (indirect)
- i = 0;
- else
- i = head;
-
- for (n = 0; n < total_sg; n++) {
- if (i == err_idx)
- break;
- if (indirect) {
- vring_unmap_one_split_indirect(vq, &desc[i]);
- i = virtio16_to_cpu(_vq->vdev, desc[i].next);
- } else
- i = vring_unmap_one_split(vq, i);
- }
+ virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
if (indirect)
kfree(desc);
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 02/12] virtio_ring: packed: separate dma codes
DMA-related logic is separated from the virtqueue_add_packed(). DMA
address will be saved as sg->dma_address, then virtqueue_add_packed()
will use it directly. Unmap operation will be simpler.
The purpose of this is to facilitate subsequent support to receive
dma address mapped by drivers.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 37 +++++++-----------------------------
1 file changed, 7 insertions(+), 30 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 8ace2f503953..b4beb51072f7 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1419,9 +1419,9 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
struct vring_virtqueue *vq = to_vvq(_vq);
struct vring_packed_desc *desc;
struct scatterlist *sg;
- unsigned int i, n, c, descs_used, err_idx;
+ unsigned int i, n, c, descs_used;
__le16 head_flags, flags;
- u16 head, id, prev, curr, avail_used_flags;
+ u16 head, id, prev, curr;
int err;
START_USE(vq);
@@ -1450,7 +1450,6 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
}
head = vq->packed.next_avail_idx;
- avail_used_flags = vq->packed.avail_used_flags;
WARN_ON_ONCE(total_sg > vq->packed.vring.num &&
!vq->indirect);
@@ -1468,15 +1467,13 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
id = vq->free_head;
BUG_ON(id == vq->packed.vring.num);
+ if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
+ return -ENOMEM;
+
curr = id;
c = 0;
for (n = 0; n < out_sgs + in_sgs; n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (vring_mapping_error(vq, addr))
- goto unmap_release;
-
flags = cpu_to_le16(vq->packed.avail_used_flags |
(++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
(n < out_sgs ? 0 : VRING_DESC_F_WRITE));
@@ -1485,12 +1482,12 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
else
desc[i].flags = flags;
- desc[i].addr = cpu_to_le64(addr);
+ desc[i].addr = cpu_to_le64(sg->dma_address);
desc[i].len = cpu_to_le32(sg->length);
desc[i].id = cpu_to_le16(id);
if (unlikely(vq->use_dma_api)) {
- vq->packed.desc_extra[curr].addr = addr;
+ vq->packed.desc_extra[curr].addr = sg->dma_address;
vq->packed.desc_extra[curr].len = sg->length;
vq->packed.desc_extra[curr].flags le16_to_cpu(flags);
@@ -1536,26 +1533,6 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
END_USE(vq);
return 0;
-
-unmap_release:
- err_idx = i;
- i = head;
- curr = vq->free_head;
-
- vq->packed.avail_used_flags = avail_used_flags;
-
- for (n = 0; n < total_sg; n++) {
- if (i == err_idx)
- break;
- vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
- curr = vq->packed.desc_extra[curr].next;
- i++;
- if (i >= vq->packed.vring.num)
- i = 0;
- }
-
- END_USE(vq);
- return -EIO;
}
static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 03/12] virtio_ring: packed-indirect: separate dma codes
DMA-related logic is separated from the virtqueue_add_indirect_packed().
DMA address will be saved as sg->dma_address, then
virtqueue_add_indirect_packed() will use it directly. Unmap operation
will be simpler.
The purpose of this is to facilitate subsequent support to receive
dma address mapped by drivers.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index b4beb51072f7..221ff54fe58b 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1303,7 +1303,7 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
{
struct vring_packed_desc *desc;
struct scatterlist *sg;
- unsigned int i, n, err_idx;
+ unsigned int i, n;
u16 head, id;
dma_addr_t addr;
@@ -1323,16 +1323,14 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
id = vq->free_head;
BUG_ON(id == vq->packed.vring.num);
+ if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
+ return -ENOMEM;
+
for (n = 0; n < out_sgs + in_sgs; n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- addr = vring_map_one_sg(vq, sg, n < out_sgs ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (vring_mapping_error(vq, addr))
- goto unmap_release;
-
desc[i].flags = cpu_to_le16(n < out_sgs ?
0 : VRING_DESC_F_WRITE);
- desc[i].addr = cpu_to_le64(addr);
+ desc[i].addr = cpu_to_le64(sg->dma_address);
desc[i].len = cpu_to_le32(sg->length);
i++;
}
@@ -1396,10 +1394,7 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
return 0;
unmap_release:
- err_idx = i;
-
- for (i = 0; i < err_idx; i++)
- vring_unmap_desc_packed(vq, &desc[i]);
+ virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
kfree(desc);
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 04/12] virtio_ring: split: support premapped
virtio core only supports virtual addresses, dma is completed in virtio
core.
In some scenarios (such as the AF_XDP), the memory is allocated
and DMA is completed in advance, so it is necessary for us to support
passing the DMA address to virtio core.
Drives can use sg->dma_address to pass the mapped dma address to virtio
core. If one sg->dma_address is used then all sgs must use sg->
dma_address, otherwise all dma_address must be null.
On the non-indirect path, if dma_address is used, extra.addr will be
set to DMA_MAPPING_ERROR. So when do unmap, we can pass it.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 221ff54fe58b..61deaf0a4faf 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -457,6 +457,9 @@ static unsigned int vring_unmap_one_split(const struct
vring_virtqueue *vq,
(flags & VRING_DESC_F_WRITE) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
} else {
+ if (extra[i].addr == DMA_MAPPING_ERROR)
+ goto out;
+
dma_unmap_page(vring_dma_dev(vq),
extra[i].addr,
extra[i].len,
@@ -497,7 +500,8 @@ static inline unsigned int virtqueue_add_desc_split(struct
virtqueue *vq,
dma_addr_t addr,
unsigned int len,
u16 flags,
- bool indirect)
+ bool indirect,
+ bool do_map)
{
struct vring_virtqueue *vring = to_vvq(vq);
struct vring_desc_extra *extra = vring->split.desc_extra;
@@ -511,7 +515,7 @@ static inline unsigned int virtqueue_add_desc_split(struct
virtqueue *vq,
next = extra[i].next;
desc[i].next = cpu_to_virtio16(vq->vdev, next);
- extra[i].addr = addr;
+ extra[i].addr = do_map ? addr : DMA_MAPPING_ERROR;
extra[i].len = len;
extra[i].flags = flags;
} else
@@ -604,7 +608,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
struct scatterlist *sg;
struct vring_desc *desc;
unsigned int i, n, avail, descs_used, prev;
- bool indirect;
+ bool indirect, do_map;
int head;
START_USE(vq);
@@ -657,7 +661,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
return -ENOSPC;
}
- if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
+ do_map = !sgs[0]->dma_address;
+ if (do_map && virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
return -ENOMEM;
for (n = 0; n < out_sgs; n++) {
@@ -670,7 +675,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
sg->dma_address,
sg->length,
VRING_DESC_F_NEXT,
- indirect);
+ indirect, do_map);
}
}
for (; n < (out_sgs + in_sgs); n++) {
@@ -684,7 +689,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
sg->length,
VRING_DESC_F_NEXT |
VRING_DESC_F_WRITE,
- indirect);
+ indirect, do_map);
}
}
/* Last one doesn't continue. */
@@ -705,7 +710,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
head, addr,
total_sg * sizeof(struct vring_desc),
VRING_DESC_F_INDIRECT,
- false);
+ false, true);
}
/* We're using some buffers from the free list. */
@@ -748,7 +753,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
return 0;
unmap_release:
- virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+ if (do_map)
+ virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
if (indirect)
kfree(desc);
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 05/12] virtio_ring: packed: support premapped
virtio core only supports virtual addresses, dma is completed in virtio
core.
In some scenarios (such as the AF_XDP), the memory is allocated
and DMA is completed in advance, so it is necessary for us to support
passing the DMA address to virtio core.
Drives can use sg->dma_address to pass the mapped dma address to virtio
core. If one sg->dma_address is used then all sgs must use
sg->dma_address,
otherwise all dma_address must be null.
On the non-indirect path, if dma_address is used, extra.addr will be
set to DMA_MAPPING_ERROR. So when do unmap, we can pass it.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 61deaf0a4faf..66a071e3bdef 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1258,6 +1258,9 @@ static void vring_unmap_extra_packed(const struct
vring_virtqueue *vq,
(flags & VRING_DESC_F_WRITE) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
} else {
+ if (extra->addr == DMA_MAPPING_ERROR)
+ return;
+
dma_unmap_page(vring_dma_dev(vq),
extra->addr, extra->len,
(flags & VRING_DESC_F_WRITE) ?
@@ -1423,6 +1426,7 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
unsigned int i, n, c, descs_used;
__le16 head_flags, flags;
u16 head, id, prev, curr;
+ bool do_map;
int err;
START_USE(vq);
@@ -1468,7 +1472,8 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
id = vq->free_head;
BUG_ON(id == vq->packed.vring.num);
- if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
+ do_map = !sgs[0]->dma_address;
+ if (do_map && virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
return -ENOMEM;
curr = id;
@@ -1488,7 +1493,8 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
desc[i].id = cpu_to_le16(id);
if (unlikely(vq->use_dma_api)) {
- vq->packed.desc_extra[curr].addr = sg->dma_address;
+ vq->packed.desc_extra[curr].addr + do_map ? sg->dma_address :
DMA_MAPPING_ERROR;
vq->packed.desc_extra[curr].len = sg->length;
vq->packed.desc_extra[curr].flags le16_to_cpu(flags);
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 06/12] virtio_ring: split-indirect: support premapped
virtio core only supports virtual addresses, dma is completed in virtio
core.
In some scenarios (such as the AF_XDP), the memory is allocated
and DMA is completed in advance, so it is necessary for us to support
passing the DMA address to virtio core.
Drives can use sg->dma_address to pass the mapped dma address to virtio
core. If one sg->dma_address is used then all sgs must use
sg->dma_address,
otherwise all dma_address must be null.
On the indirect path, if dma_address is used, desc_state.indir_desc will
be mixed with VRING_INDIRECT_PREMAPPED. So when do unmap, we can pass it.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 28 ++++++++++++++++++++++------
1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 66a071e3bdef..11827d2e56a8 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -231,6 +231,18 @@ static void vring_free(struct virtqueue *_vq);
* Helpers.
*/
+#define VRING_INDIRECT_PREMAPPED BIT(0)
+
+#define desc_mix_dma_map(do_map, desc) \
+ (do_map ? desc : (typeof(desc))((unsigned long)(desc) |
VRING_INDIRECT_PREMAPPED))
+
+#define desc_rm_dma_map(desc) \
+ ((typeof(desc))((unsigned long)(desc) & ~VRING_INDIRECT_PREMAPPED))
+
+#define desc_map_inter(desc) \
+ !((unsigned long)(desc) & VRING_INDIRECT_PREMAPPED)
+
+
#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq,
@@ -725,7 +737,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
/* Store token and indirect buffer state. */
vq->split.desc_state[head].data = data;
if (indirect)
- vq->split.desc_state[head].indir_desc = desc;
+ vq->split.desc_state[head].indir_desc = desc_mix_dma_map(do_map, desc);
else
vq->split.desc_state[head].indir_desc = ctx;
@@ -820,22 +832,26 @@ static void detach_buf_split(struct vring_virtqueue *vq,
unsigned int head,
vq->vq.num_free++;
if (vq->indirect) {
- struct vring_desc *indir_desc - vq->split.desc_state[head].indir_desc;
+ struct vring_desc *mix = vq->split.desc_state[head].indir_desc;
+ struct vring_desc *indir_desc;
u32 len;
/* Free the indirect table, if any, now that it's unmapped. */
- if (!indir_desc)
+ if (!mix)
return;
+ indir_desc = desc_rm_dma_map(mix);
+
len = vq->split.desc_extra[head].len;
BUG_ON(!(vq->split.desc_extra[head].flags &
VRING_DESC_F_INDIRECT));
BUG_ON(len == 0 || len % sizeof(struct vring_desc));
- for (j = 0; j < len / sizeof(struct vring_desc); j++)
- vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+ if (desc_map_inter(mix)) {
+ for (j = 0; j < len / sizeof(struct vring_desc); j++)
+ vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+ }
kfree(indir_desc);
vq->split.desc_state[head].indir_desc = NULL;
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 07/12] virtio_ring: packed-indirect: support premapped
virtio core only supports virtual addresses, dma is completed in virtio
core.
In some scenarios (such as the AF_XDP), the memory is allocated
and DMA is completed in advance, so it is necessary for us to support
passing the DMA address to virtio core.
Drives can use sg->dma_address to pass the mapped dma address to virtio
core. If one sg->dma_address is used then all sgs must use
sg->dma_address,
otherwise all dma_address must be null.
On the indirect path, if dma_address is used, desc_state.indir_desc will
be mixed with VRING_INDIRECT_NO_DMA_MAP. So when do unmap, we can pass it.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 11827d2e56a8..b23d301effb5 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1331,6 +1331,7 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
unsigned int i, n;
u16 head, id;
dma_addr_t addr;
+ bool do_map;
head = vq->packed.next_avail_idx;
desc = alloc_indirect_packed(total_sg, gfp);
@@ -1348,7 +1349,8 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
id = vq->free_head;
BUG_ON(id == vq->packed.vring.num);
- if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
+ do_map = !sgs[0]->dma_address;
+ if (do_map && virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
return -ENOMEM;
for (n = 0; n < out_sgs + in_sgs; n++) {
@@ -1408,7 +1410,7 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
/* Store token and indirect buffer state. */
vq->packed.desc_state[id].num = 1;
vq->packed.desc_state[id].data = data;
- vq->packed.desc_state[id].indir_desc = desc;
+ vq->packed.desc_state[id].indir_desc = desc_mix_dma_map(do_map, desc);
vq->packed.desc_state[id].last = id;
vq->num_added += 1;
@@ -1419,7 +1421,8 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
return 0;
unmap_release:
- virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+ if (do_map)
+ virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
kfree(desc);
@@ -1633,14 +1636,17 @@ static void detach_buf_packed(struct vring_virtqueue
*vq,
}
if (vq->indirect) {
+ struct vring_packed_desc *mix;
u32 len;
/* Free the indirect table, if any, now that it's unmapped. */
- desc = state->indir_desc;
- if (!desc)
+ mix = state->indir_desc;
+ if (!mix)
return;
- if (vq->use_dma_api) {
+ desc = desc_rm_dma_map(mix);
+
+ if (vq->use_dma_api && desc_map_inter(mix)) {
len = vq->packed.desc_extra[id].len;
for (i = 0; i < len / sizeof(struct vring_packed_desc);
i++)
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 08/12] virtio_ring: update document for virtqueue_add_*
Update the document of virtqueue_add_* series API, allowing the callers to use sg->dma_address to pass the dma address to Virtio Core. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b23d301effb5..216ac8654982 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2190,6 +2190,10 @@ static inline int virtqueue_add(struct virtqueue *_vq, * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_sgs(struct virtqueue *_vq, @@ -2224,6 +2228,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_sgs); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_outbuf(struct virtqueue *vq, @@ -2246,6 +2254,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_inbuf(struct virtqueue *vq, @@ -2269,6 +2281,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 09/12] virtio_ring: introduce virtqueue_dma_dev()
Added virtqueue_dma_dev() to get DMA device for virtio. Then the
caller can do dma operation in advance. The purpose is to keep memory
mapped across multiple add/get buf operations.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
Acked-by: Jason Wang <jasowang at redhat.com>
---
drivers/virtio/virtio.c | 6 ++++++
drivers/virtio/virtio_ring.c | 17 +++++++++++++++++
include/linux/virtio.h | 2 ++
3 files changed, 25 insertions(+)
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 3893dc29eb26..11c5035369e2 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/dma-mapping.h>
#include <linux/virtio.h>
#include <linux/spinlock.h>
#include <linux/virtio_config.h>
@@ -243,6 +244,11 @@ static int virtio_dev_probe(struct device *_d)
u64 driver_features;
u64 driver_features_legacy;
+ _d->dma_mask = &_d->coherent_dma_mask;
+ err = dma_set_mask_and_coherent(_d, DMA_BIT_MASK(64));
+ if (err)
+ return err;
+
/* We have a driver! */
virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 216ac8654982..f63637c288a0 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2297,6 +2297,23 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
}
EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
+/**
+ * virtqueue_dma_dev - get the dma dev
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * Returns the dma dev. That can been used for dma api.
+ */
+struct device *virtqueue_dma_dev(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
+ if (vq->use_dma_api)
+ return vring_dma_dev(vq);
+ else
+ return &vq->vq.vdev->dev;
+}
+EXPORT_SYMBOL_GPL(virtqueue_dma_dev);
+
/**
* virtqueue_kick_prepare - first half of split virtqueue_kick call.
* @_vq: the struct virtqueue
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 2b472514c49b..1fa50191cf0a 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -61,6 +61,8 @@ int virtqueue_add_sgs(struct virtqueue *vq,
void *data,
gfp_t gfp);
+struct device *virtqueue_dma_dev(struct virtqueue *vq);
+
bool virtqueue_kick(struct virtqueue *vq);
bool virtqueue_kick_prepare(struct virtqueue *vq);
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 10/12] virtio_ring: correct the expression of the description of virtqueue_resize()
Modify the "useless" to a more accurate "unused". Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> Acked-by: Jason Wang <jasowang at redhat.com> --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index f63637c288a0..a705485fea47 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2702,7 +2702,7 @@ EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); * virtqueue_resize - resize the vring of vq * @_vq: the struct virtqueue we're talking about. * @num: new ring num - * @recycle: callback for recycle the useless buffer + * @recycle: callback to recycle unused buffers * * When it is really necessary to create a new vring, it will set the current vq * into the reset state. Then call the passed callback to recycle the buffer -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 11/12] virtio_ring: separate the logic of reset/enable from virtqueue_resize
The subsequent reset function will reuse these logic.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 58 ++++++++++++++++++++++++------------
1 file changed, 39 insertions(+), 19 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a705485fea47..f26bd7bbff5e 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2156,6 +2156,43 @@ static int virtqueue_resize_packed(struct virtqueue *_vq,
u32 num)
return -ENOMEM;
}
+static int virtqueue_disable_and_recycle(struct virtqueue *_vq,
+ void (*recycle)(struct virtqueue *vq, void *buf))
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ struct virtio_device *vdev = vq->vq.vdev;
+ void *buf;
+ int err;
+
+ if (!vq->we_own_ring)
+ return -EPERM;
+
+ if (!vdev->config->disable_vq_and_reset)
+ return -ENOENT;
+
+ if (!vdev->config->enable_vq_after_reset)
+ return -ENOENT;
+
+ err = vdev->config->disable_vq_and_reset(_vq);
+ if (err)
+ return err;
+
+ while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
+ recycle(_vq, buf);
+
+ return 0;
+}
+
+static int virtqueue_enable_after_reset(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ struct virtio_device *vdev = vq->vq.vdev;
+
+ if (vdev->config->enable_vq_after_reset(_vq))
+ return -EBUSY;
+
+ return 0;
+}
/*
* Generic functions and exported symbols.
@@ -2726,13 +2763,8 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
void (*recycle)(struct virtqueue *vq, void *buf))
{
struct vring_virtqueue *vq = to_vvq(_vq);
- struct virtio_device *vdev = vq->vq.vdev;
- void *buf;
int err;
- if (!vq->we_own_ring)
- return -EPERM;
-
if (num > vq->vq.num_max)
return -E2BIG;
@@ -2742,28 +2774,16 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) ==
num)
return 0;
- if (!vdev->config->disable_vq_and_reset)
- return -ENOENT;
-
- if (!vdev->config->enable_vq_after_reset)
- return -ENOENT;
-
- err = vdev->config->disable_vq_and_reset(_vq);
+ err = virtqueue_disable_and_recycle(_vq, recycle);
if (err)
return err;
- while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
- recycle(_vq, buf);
-
if (vq->packed_ring)
err = virtqueue_resize_packed(_vq, num);
else
err = virtqueue_resize_split(_vq, num);
- if (vdev->config->enable_vq_after_reset(_vq))
- return -EBUSY;
-
- return err;
+ return virtqueue_enable_after_reset(_vq);
}
EXPORT_SYMBOL_GPL(virtqueue_resize);
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 12/12] virtio_ring: introduce virtqueue_reset()
Introduce virtqueue_reset() to release all buffer inside vq.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 33 +++++++++++++++++++++++++++++++++
include/linux/virtio.h | 2 ++
2 files changed, 35 insertions(+)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index f26bd7bbff5e..1a8de916bb20 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2787,6 +2787,39 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
}
EXPORT_SYMBOL_GPL(virtqueue_resize);
+/**
+ * virtqueue_reset - detach and recycle all unused buffers
+ * @_vq: the struct virtqueue we're talking about.
+ * @recycle: callback to recycle unused buffers
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error.
+ * 0: success.
+ * -EBUSY: Failed to sync with device, vq may not work properly
+ * -ENOENT: Transport or device not supported
+ * -EPERM: Operation not permitted
+ */
+int virtqueue_reset(struct virtqueue *_vq,
+ void (*recycle)(struct virtqueue *vq, void *buf))
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ int err;
+
+ err = virtqueue_disable_and_recycle(_vq, recycle);
+ if (err)
+ return err;
+
+ if (vq->packed_ring)
+ virtqueue_reinit_packed(vq);
+ else
+ virtqueue_reinit_split(vq);
+
+ return virtqueue_enable_after_reset(_vq);
+}
+EXPORT_SYMBOL_GPL(virtqueue_reset);
+
/* Only available for split ring */
struct virtqueue *vring_new_virtqueue(unsigned int index,
unsigned int num,
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 1fa50191cf0a..22bbd06ef8c8 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -97,6 +97,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
int virtqueue_resize(struct virtqueue *vq, u32 num,
void (*recycle)(struct virtqueue *vq, void *buf));
+int virtqueue_reset(struct virtqueue *vq,
+ void (*recycle)(struct virtqueue *vq, void *buf));
/**
* struct virtio_device - representation of a device using virtio
--
2.32.0.3.g01195cf9f
Michael S. Tsirkin
2023-Mar-10 09:05 UTC
[PATCH vhost v2 00/12] virtio core prepares for AF_XDP
On Wed, Mar 08, 2023 at 02:44:31PM +0800, Xuan Zhuo wrote:> XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero > copy feature of xsk (XDP socket) needs to be supported by the driver. The > performance of zero copy is very good. > > ENV: Qemu with vhost. > > vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS > -----------------------------|---------------|------------------|------------ > xmit by sockperf: 90% | 100% | | 318967 > xmit by xsk: 100% | 30% | 33% | 1192064 > recv by sockperf: 100% | 68% | 100% | 692288 > recv by xsk: 100% | 33% | 43% | 771670 > > Before achieving the function of Virtio-Net, we also have to let virtio core > support these features: > > 1. virtio core support premapped > 2. virtio core support reset per-queue > 3. introduce DMA APIs to virtio core > > Please review.Jason can I get some acks on this?> Thanks. > > v2: > 1. based on sgs[0]->dma_address to judgment is premapped > 2. based on extra.addr to judgment to do unmap for no-indirect desc > 3. based on indir_desc to judgment to do unmap for indirect desc > 4. rename virtqueue_get_dma_dev to virtqueue_dma_dev > > v1: > 1. expose dma device. NO introduce the api for dma and sync > 2. split some commit for review. > > Xuan Zhuo (12): > virtio_ring: split: separate dma codes > virtio_ring: packed: separate dma codes > virtio_ring: packed-indirect: separate dma codes > virtio_ring: split: support premapped > virtio_ring: packed: support premapped > virtio_ring: split-indirect: support premapped > virtio_ring: packed-indirect: support premapped > virtio_ring: update document for virtqueue_add_* > virtio_ring: introduce virtqueue_dma_dev() > virtio_ring: correct the expression of the description of > virtqueue_resize() > virtio_ring: separate the logic of reset/enable from virtqueue_resize > virtio_ring: introduce virtqueue_reset() > > drivers/virtio/virtio.c | 6 + > drivers/virtio/virtio_ring.c | 354 +++++++++++++++++++++++++---------- > include/linux/virtio.h | 4 + > 3 files changed, 260 insertions(+), 104 deletions(-) > > -- > 2.32.0.3.g01195cf9f