XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero
copy feature of xsk (XDP socket) needs to be supported by the driver. The
performance of zero copy is very good.
ENV: Qemu with vhost.
vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS
-----------------------------|---------------|------------------|------------
xmit by sockperf: 90% | 100% | | 318967
xmit by xsk: 100% | 30% | 33% | 1192064
recv by sockperf: 100% | 68% | 100% | 692288
recv by xsk: 100% | 33% | 43% | 771670
Before achieving the function of Virtio-Net, we also have to let virtio core
support these features:
1. virtio core support premapped
2. virtio core support reset per-queue
3. introduce DMA APIs to virtio core
Please review.
Thanks.
v6:
1. change the size of the flags to u32.
v5:
1. fix for error handler
2. add flags to record internal dma mapping
v4:
1. rename map_inter to dma_map_internal
2. fix: Excess function parameter 'vq' description in
'virtqueue_dma_dev'
v3:
1. add map_inter to struct desc state to reocrd whether virtio core do dma map
v2:
1. based on sgs[0]->dma_address to judgment is premapped
2. based on extra.addr to judgment to do unmap for no-indirect desc
3. based on indir_desc to judgment to do unmap for indirect desc
4. rename virtqueue_get_dma_dev to virtqueue_dma_dev
v1:
1. expose dma device. NO introduce the api for dma and sync
2. split some commit for review.
Xuan Zhuo (11):
virtio_ring: split: separate dma codes
virtio_ring: packed: separate dma codes
virtio_ring: packed-indirect: separate dma codes
virtio_ring: split: support premapped
virtio_ring: packed: support premapped
virtio_ring: packed-indirect: support premapped
virtio_ring: update document for virtqueue_add_*
virtio_ring: introduce virtqueue_dma_dev()
virtio_ring: correct the expression of the description of
virtqueue_resize()
virtio_ring: separate the logic of reset/enable from virtqueue_resize
virtio_ring: introduce virtqueue_reset()
drivers/virtio/virtio.c | 6 +
drivers/virtio/virtio_ring.c | 352 +++++++++++++++++++++++++----------
include/linux/virtio.h | 4 +
3 files changed, 265 insertions(+), 97 deletions(-)
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-27 04:05 UTC
[PATCH vhost v6 01/11] virtio_ring: split: separate dma codes
DMA-related logic is separated from the virtqueue_add_split() to
one new function. DMA address will be saved as sg->dma_address if
use_dma_api is true, then virtqueue_add_split() will use it directly.
Unmap operation will be simpler.
The purpose of this is to facilitate subsequent support to receive
dma address mapped by drivers.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 122 +++++++++++++++++++++++++++--------
1 file changed, 94 insertions(+), 28 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 41144b5246a8..2aafb7da793d 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -379,6 +379,14 @@ static dma_addr_t vring_map_one_sg(const struct
vring_virtqueue *vq,
direction);
}
+static dma_addr_t vring_sg_address(struct scatterlist *sg)
+{
+ if (sg->dma_address)
+ return sg->dma_address;
+
+ return (dma_addr_t)sg_phys(sg);
+}
+
static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
void *cpu_addr, size_t size,
enum dma_data_direction direction)
@@ -520,6 +528,80 @@ static inline unsigned int virtqueue_add_desc_split(struct
virtqueue *vq,
return next;
}
+static void virtqueue_unmap_sgs(struct vring_virtqueue *vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ unsigned int in_sgs)
+{
+ struct scatterlist *sg;
+ unsigned int n;
+
+ if (!vq->use_dma_api)
+ return;
+
+ for (n = 0; n < out_sgs; n++) {
+ for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+ if (!sg->dma_address)
+ return;
+
+ dma_unmap_page(vring_dma_dev(vq), sg->dma_address,
+ sg->length, DMA_TO_DEVICE);
+ }
+ }
+
+ for (; n < (out_sgs + in_sgs); n++) {
+ for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+ if (!sg->dma_address)
+ return;
+
+ dma_unmap_page(vring_dma_dev(vq), sg->dma_address,
+ sg->length, DMA_FROM_DEVICE);
+ }
+ }
+}
+
+static int virtqueue_map_sgs(struct vring_virtqueue *vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+ unsigned int out_sgs,
+ unsigned int in_sgs)
+{
+ struct scatterlist *sg;
+ unsigned int n;
+
+ if (!vq->use_dma_api)
+ return 0;
+
+ for (n = 0; n < out_sgs; n++) {
+ for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+ dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
+
+ if (vring_mapping_error(vq, addr))
+ goto err;
+
+ sg->dma_address = addr;
+ }
+ }
+
+ for (; n < (out_sgs + in_sgs); n++) {
+ for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+ dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
+
+ if (vring_mapping_error(vq, addr))
+ goto err;
+
+ sg->dma_address = addr;
+ }
+ }
+
+ return 0;
+
+err:
+ virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+ return -ENOMEM;
+}
+
static inline int virtqueue_add_split(struct virtqueue *_vq,
struct scatterlist *sgs[],
unsigned int total_sg,
@@ -532,9 +614,9 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
struct vring_virtqueue *vq = to_vvq(_vq);
struct scatterlist *sg;
struct vring_desc *desc;
- unsigned int i, n, avail, descs_used, prev, err_idx;
- int head;
+ unsigned int i, n, avail, descs_used, prev;
bool indirect;
+ int head;
START_USE(vq);
@@ -586,32 +668,30 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
return -ENOSPC;
}
+ if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
+ goto err_map;
+
for (n = 0; n < out_sgs; n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
- if (vring_mapping_error(vq, addr))
- goto unmap_release;
-
prev = i;
/* Note that we trust indirect descriptor
* table since it use stream DMA mapping.
*/
- i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
+ i = virtqueue_add_desc_split(_vq, desc, i,
+ vring_sg_address(sg),
+ sg->length,
VRING_DESC_F_NEXT,
indirect);
}
}
for (; n < (out_sgs + in_sgs); n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
- if (vring_mapping_error(vq, addr))
- goto unmap_release;
-
prev = i;
/* Note that we trust indirect descriptor
* table since it use stream DMA mapping.
*/
- i = virtqueue_add_desc_split(_vq, desc, i, addr,
+ i = virtqueue_add_desc_split(_vq, desc, i,
+ vring_sg_address(sg),
sg->length,
VRING_DESC_F_NEXT |
VRING_DESC_F_WRITE,
@@ -679,23 +759,9 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
return 0;
unmap_release:
- err_idx = i;
-
- if (indirect)
- i = 0;
- else
- i = head;
-
- for (n = 0; n < total_sg; n++) {
- if (i == err_idx)
- break;
- if (indirect) {
- vring_unmap_one_split_indirect(vq, &desc[i]);
- i = virtio16_to_cpu(_vq->vdev, desc[i].next);
- } else
- i = vring_unmap_one_split(vq, i);
- }
+ virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+err_map:
if (indirect)
kfree(desc);
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-27 04:05 UTC
[PATCH vhost v6 02/11] virtio_ring: packed: separate dma codes
DMA-related logic is separated from the virtqueue_add_packed(). DMA
address will be saved as sg->dma_address, then virtqueue_add_packed()
will use it directly. Unmap operation will be simpler.
The purpose of this is to facilitate subsequent support to receive
dma address mapped by drivers.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
Acked-by: Jason Wang <jasowang at redhat.com>
---
drivers/virtio/virtio_ring.c | 39 +++++++++---------------------------
1 file changed, 9 insertions(+), 30 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 2aafb7da793d..b1bf7266daa0 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1431,9 +1431,9 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
struct vring_virtqueue *vq = to_vvq(_vq);
struct vring_packed_desc *desc;
struct scatterlist *sg;
- unsigned int i, n, c, descs_used, err_idx;
+ unsigned int i, n, c, descs_used;
__le16 head_flags, flags;
- u16 head, id, prev, curr, avail_used_flags;
+ u16 head, id, prev, curr;
int err;
START_USE(vq);
@@ -1462,7 +1462,6 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
}
head = vq->packed.next_avail_idx;
- avail_used_flags = vq->packed.avail_used_flags;
WARN_ON_ONCE(total_sg > vq->packed.vring.num &&
!vq->indirect);
@@ -1480,15 +1479,15 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
id = vq->free_head;
BUG_ON(id == vq->packed.vring.num);
+ if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) {
+ END_USE(vq);
+ return -EIO;
+ }
+
curr = id;
c = 0;
for (n = 0; n < out_sgs + in_sgs; n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (vring_mapping_error(vq, addr))
- goto unmap_release;
-
flags = cpu_to_le16(vq->packed.avail_used_flags |
(++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
(n < out_sgs ? 0 : VRING_DESC_F_WRITE));
@@ -1497,12 +1496,12 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
else
desc[i].flags = flags;
- desc[i].addr = cpu_to_le64(addr);
+ desc[i].addr = cpu_to_le64(vring_sg_address(sg));
desc[i].len = cpu_to_le32(sg->length);
desc[i].id = cpu_to_le16(id);
if (unlikely(vq->use_dma_api)) {
- vq->packed.desc_extra[curr].addr = addr;
+ vq->packed.desc_extra[curr].addr = vring_sg_address(sg);
vq->packed.desc_extra[curr].len = sg->length;
vq->packed.desc_extra[curr].flags le16_to_cpu(flags);
@@ -1548,26 +1547,6 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
END_USE(vq);
return 0;
-
-unmap_release:
- err_idx = i;
- i = head;
- curr = vq->free_head;
-
- vq->packed.avail_used_flags = avail_used_flags;
-
- for (n = 0; n < total_sg; n++) {
- if (i == err_idx)
- break;
- vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
- curr = vq->packed.desc_extra[curr].next;
- i++;
- if (i >= vq->packed.vring.num)
- i = 0;
- }
-
- END_USE(vq);
- return -EIO;
}
static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-27 04:05 UTC
[PATCH vhost v6 03/11] virtio_ring: packed-indirect: separate dma codes
DMA-related logic is separated from the virtqueue_add_indirect_packed().
DMA address will be saved as sg->dma_address, then
virtqueue_add_indirect_packed() will use it directly. Unmap operation
will be simpler.
The purpose of this is to facilitate subsequent support to receive
dma address mapped by drivers.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
Acked-by: Jason Wang <jasowang at redhat.com>
---
drivers/virtio/virtio_ring.c | 18 +++++++-----------
1 file changed, 7 insertions(+), 11 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index b1bf7266daa0..3ada30b475d2 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1315,7 +1315,7 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
{
struct vring_packed_desc *desc;
struct scatterlist *sg;
- unsigned int i, n, err_idx;
+ unsigned int i, n;
u16 head, id;
dma_addr_t addr;
@@ -1335,16 +1335,14 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
id = vq->free_head;
BUG_ON(id == vq->packed.vring.num);
+ if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
+ goto err_map;
+
for (n = 0; n < out_sgs + in_sgs; n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- addr = vring_map_one_sg(vq, sg, n < out_sgs ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (vring_mapping_error(vq, addr))
- goto unmap_release;
-
desc[i].flags = cpu_to_le16(n < out_sgs ?
0 : VRING_DESC_F_WRITE);
- desc[i].addr = cpu_to_le64(addr);
+ desc[i].addr = cpu_to_le64(vring_sg_address(sg));
desc[i].len = cpu_to_le32(sg->length);
i++;
}
@@ -1408,11 +1406,9 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
return 0;
unmap_release:
- err_idx = i;
-
- for (i = 0; i < err_idx; i++)
- vring_unmap_desc_packed(vq, &desc[i]);
+ virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+err_map:
kfree(desc);
END_USE(vq);
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-27 04:05 UTC
[PATCH vhost v6 04/11] virtio_ring: split: support premapped
virtio core only supports virtual addresses, dma is completed in virtio
core.
In some scenarios (such as the AF_XDP), the memory is allocated
and DMA mapping is completed in advance, so it is necessary for us to
support passing the DMA address to virtio core.
Drives can use sg->dma_address to pass the mapped dma address to virtio
core. If one sg->dma_address is used then all sgs must use
sg->dma_address, otherwise all must be null when passing it to the APIs
of virtio.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 31 +++++++++++++++++++++++--------
1 file changed, 23 insertions(+), 8 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 3ada30b475d2..9d6acd59e3e0 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -67,9 +67,13 @@
#define LAST_ADD_TIME_INVALID(vq)
#endif
+#define VRING_STATE_F_MAP_INTERNAL BIT(0)
+
struct vring_desc_state_split {
void *data; /* Data for callback. */
struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
+ u32 flags; /* State flags. */
+ u32 padding;
};
struct vring_desc_state_packed {
@@ -448,7 +452,7 @@ static void vring_unmap_one_split_indirect(const struct
vring_virtqueue *vq,
}
static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
- unsigned int i)
+ unsigned int i, bool dma_map_internal)
{
struct vring_desc_extra *extra = vq->split.desc_extra;
u16 flags;
@@ -465,6 +469,9 @@ static unsigned int vring_unmap_one_split(const struct
vring_virtqueue *vq,
(flags & VRING_DESC_F_WRITE) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
} else {
+ if (!dma_map_internal)
+ goto out;
+
dma_unmap_page(vring_dma_dev(vq),
extra[i].addr,
extra[i].len,
@@ -615,7 +622,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
struct scatterlist *sg;
struct vring_desc *desc;
unsigned int i, n, avail, descs_used, prev;
- bool indirect;
+ bool indirect, dma_map_internal;
int head;
START_USE(vq);
@@ -668,7 +675,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
return -ENOSPC;
}
- if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
+ dma_map_internal = !sgs[0]->dma_address;
+ if (dma_map_internal && virtqueue_map_sgs(vq, sgs, total_sg, out_sgs,
in_sgs))
goto err_map;
for (n = 0; n < out_sgs; n++) {
@@ -735,6 +743,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
else
vq->split.desc_state[head].indir_desc = ctx;
+ vq->split.desc_state[head].flags = dma_map_internal ?
VRING_STATE_F_MAP_INTERNAL : 0;
+
/* Put entry in available array (but don't update avail->idx until they
* do sync). */
avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
@@ -759,7 +769,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
return 0;
unmap_release:
- virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+ if (dma_map_internal)
+ virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
err_map:
if (indirect)
@@ -805,20 +816,22 @@ static void detach_buf_split(struct vring_virtqueue *vq,
unsigned int head,
{
unsigned int i, j;
__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
+ bool dma_map_internal;
/* Clear data ptr. */
vq->split.desc_state[head].data = NULL;
+ dma_map_internal = !!(vq->split.desc_state[head].flags &
VRING_STATE_F_MAP_INTERNAL);
/* Put back on free list: unmap first-level descriptors and find end */
i = head;
while (vq->split.vring.desc[i].flags & nextflag) {
- vring_unmap_one_split(vq, i);
+ vring_unmap_one_split(vq, i, dma_map_internal);
i = vq->split.desc_extra[i].next;
vq->vq.num_free++;
}
- vring_unmap_one_split(vq, i);
+ vring_unmap_one_split(vq, i, dma_map_internal);
vq->split.desc_extra[i].next = vq->free_head;
vq->free_head = head;
@@ -840,8 +853,10 @@ static void detach_buf_split(struct vring_virtqueue *vq,
unsigned int head,
VRING_DESC_F_INDIRECT));
BUG_ON(len == 0 || len % sizeof(struct vring_desc));
- for (j = 0; j < len / sizeof(struct vring_desc); j++)
- vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+ if (dma_map_internal) {
+ for (j = 0; j < len / sizeof(struct vring_desc); j++)
+ vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+ }
kfree(indir_desc);
vq->split.desc_state[head].indir_desc = NULL;
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-27 04:05 UTC
[PATCH vhost v6 05/11] virtio_ring: packed: support premapped
virtio core only supports virtual addresses, dma is completed in virtio
core.
In some scenarios (such as the AF_XDP), the memory is allocated
and DMA mapping is completed in advance, so it is necessary for us to
support passing the DMA address to virtio core.
Drives can use sg->dma_address to pass the mapped dma address to virtio
core. If one sg->dma_address is used then all sgs must use
sg->dma_address, otherwise all must be null when passing it to the APIs
of virtio.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 9d6acd59e3e0..2afff1dc6c74 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -81,6 +81,7 @@ struct vring_desc_state_packed {
struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
u16 num; /* Descriptor list length. */
u16 last; /* The last desc state in a list. */
+ u32 flags; /* State flags. */
};
struct vring_desc_extra {
@@ -1264,7 +1265,8 @@ static inline u16 packed_last_used(u16 last_used_idx)
}
static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
- struct vring_desc_extra *extra)
+ struct vring_desc_extra *extra,
+ bool dma_map_internal)
{
u16 flags;
@@ -1279,6 +1281,9 @@ static void vring_unmap_extra_packed(const struct
vring_virtqueue *vq,
(flags & VRING_DESC_F_WRITE) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
} else {
+ if (!dma_map_internal)
+ return;
+
dma_unmap_page(vring_dma_dev(vq),
extra->addr, extra->len,
(flags & VRING_DESC_F_WRITE) ?
@@ -1445,6 +1450,7 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
unsigned int i, n, c, descs_used;
__le16 head_flags, flags;
u16 head, id, prev, curr;
+ bool dma_map_internal;
int err;
START_USE(vq);
@@ -1490,7 +1496,8 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
id = vq->free_head;
BUG_ON(id == vq->packed.vring.num);
- if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) {
+ dma_map_internal = !sgs[0]->dma_address;
+ if (dma_map_internal && virtqueue_map_sgs(vq, sgs, total_sg, out_sgs,
in_sgs)) {
END_USE(vq);
return -EIO;
}
@@ -1544,6 +1551,7 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
vq->packed.desc_state[id].data = data;
vq->packed.desc_state[id].indir_desc = ctx;
vq->packed.desc_state[id].last = prev;
+ vq->packed.desc_state[id].flags = dma_map_internal ?
VRING_STATE_F_MAP_INTERNAL : 0;
/*
* A driver MUST NOT make the first descriptor in the list
@@ -1615,8 +1623,10 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
struct vring_desc_state_packed *state = NULL;
struct vring_packed_desc *desc;
unsigned int i, curr;
+ bool dma_map_internal;
state = &vq->packed.desc_state[id];
+ dma_map_internal = !!(state->flags & VRING_STATE_F_MAP_INTERNAL);
/* Clear data ptr. */
state->data = NULL;
@@ -1629,7 +1639,8 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
curr = id;
for (i = 0; i < state->num; i++) {
vring_unmap_extra_packed(vq,
- &vq->packed.desc_extra[curr]);
+ &vq->packed.desc_extra[curr],
+ dma_map_internal);
curr = vq->packed.desc_extra[curr].next;
}
}
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-27 04:05 UTC
[PATCH vhost v6 06/11] virtio_ring: packed-indirect: support premapped
virtio core only supports virtual addresses, dma is completed in virtio
core.
In some scenarios (such as the AF_XDP), the memory is allocated
and DMA mapping is completed in advance, so it is necessary for us to
support passing the DMA address to virtio core.
Drives can use sg->dma_address to pass the mapped dma address to virtio
core. If one sg->dma_address is used then all sgs must use
sg->dma_address,
otherwise all dma_address must be null when passing it to the APIs of
virtio.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
---
drivers/virtio/virtio_ring.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 2afff1dc6c74..d5dffbe50070 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1338,6 +1338,7 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
unsigned int i, n;
u16 head, id;
dma_addr_t addr;
+ bool dma_map_internal;
head = vq->packed.next_avail_idx;
desc = alloc_indirect_packed(total_sg, gfp);
@@ -1355,7 +1356,8 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
id = vq->free_head;
BUG_ON(id == vq->packed.vring.num);
- if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs))
+ dma_map_internal = !sgs[0]->dma_address;
+ if (dma_map_internal && virtqueue_map_sgs(vq, sgs, total_sg, out_sgs,
in_sgs))
goto err_map;
for (n = 0; n < out_sgs + in_sgs; n++) {
@@ -1417,6 +1419,8 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
vq->packed.desc_state[id].data = data;
vq->packed.desc_state[id].indir_desc = desc;
vq->packed.desc_state[id].last = id;
+ vq->packed.desc_state[id].flags = dma_map_internal ?
VRING_STATE_F_MAP_INTERNAL : 0;
+
vq->num_added += 1;
@@ -1426,7 +1430,8 @@ static int virtqueue_add_indirect_packed(struct
vring_virtqueue *vq,
return 0;
unmap_release:
- virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
+ if (dma_map_internal)
+ virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs);
err_map:
kfree(desc);
@@ -1653,7 +1658,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
if (!desc)
return;
- if (vq->use_dma_api) {
+ if (vq->use_dma_api && dma_map_internal) {
len = vq->packed.desc_extra[id].len;
for (i = 0; i < len / sizeof(struct vring_packed_desc);
i++)
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-27 04:05 UTC
[PATCH vhost v6 07/11] virtio_ring: update document for virtqueue_add_*
Update the document of virtqueue_add_* series API, allowing the callers to use sg->dma_address to pass the dma address to Virtio Core. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index d5dffbe50070..0b198ec3ff92 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2202,6 +2202,10 @@ static inline int virtqueue_add(struct virtqueue *_vq, * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_sgs(struct virtqueue *_vq, @@ -2236,6 +2240,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_sgs); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_outbuf(struct virtqueue *vq, @@ -2258,6 +2266,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_inbuf(struct virtqueue *vq, @@ -2281,6 +2293,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-27 04:05 UTC
[PATCH vhost v6 08/11] virtio_ring: introduce virtqueue_dma_dev()
Added virtqueue_dma_dev() to get DMA device for virtio. Then the
caller can do dma operation in advance. The purpose is to keep memory
mapped across multiple add/get buf operations.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
Acked-by: Jason Wang <jasowang at redhat.com>
---
drivers/virtio/virtio.c | 6 ++++++
drivers/virtio/virtio_ring.c | 17 +++++++++++++++++
include/linux/virtio.h | 2 ++
3 files changed, 25 insertions(+)
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 3893dc29eb26..11c5035369e2 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/dma-mapping.h>
#include <linux/virtio.h>
#include <linux/spinlock.h>
#include <linux/virtio_config.h>
@@ -243,6 +244,11 @@ static int virtio_dev_probe(struct device *_d)
u64 driver_features;
u64 driver_features_legacy;
+ _d->dma_mask = &_d->coherent_dma_mask;
+ err = dma_set_mask_and_coherent(_d, DMA_BIT_MASK(64));
+ if (err)
+ return err;
+
/* We have a driver! */
virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 0b198ec3ff92..7b538c16c9b5 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2309,6 +2309,23 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
}
EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
+/**
+ * virtqueue_dma_dev - get the dma dev
+ * @_vq: the struct virtqueue we're talking about.
+ *
+ * Returns the dma dev. That can been used for dma api.
+ */
+struct device *virtqueue_dma_dev(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
+ if (vq->use_dma_api)
+ return vring_dma_dev(vq);
+ else
+ return &vq->vq.vdev->dev;
+}
+EXPORT_SYMBOL_GPL(virtqueue_dma_dev);
+
/**
* virtqueue_kick_prepare - first half of split virtqueue_kick call.
* @_vq: the struct virtqueue
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 2b472514c49b..1fa50191cf0a 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -61,6 +61,8 @@ int virtqueue_add_sgs(struct virtqueue *vq,
void *data,
gfp_t gfp);
+struct device *virtqueue_dma_dev(struct virtqueue *vq);
+
bool virtqueue_kick(struct virtqueue *vq);
bool virtqueue_kick_prepare(struct virtqueue *vq);
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-27 04:05 UTC
[PATCH vhost v6 09/11] virtio_ring: correct the expression of the description of virtqueue_resize()
Modify the "useless" to a more accurate "unused". Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> Acked-by: Jason Wang <jasowang at redhat.com> --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 7b538c16c9b5..46516d86aa0a 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2714,7 +2714,7 @@ EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); * virtqueue_resize - resize the vring of vq * @_vq: the struct virtqueue we're talking about. * @num: new ring num - * @recycle: callback for recycle the useless buffer + * @recycle: callback to recycle unused buffers * * When it is really necessary to create a new vring, it will set the current vq * into the reset state. Then call the passed callback to recycle the buffer -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-27 04:05 UTC
[PATCH vhost v6 10/11] virtio_ring: separate the logic of reset/enable from virtqueue_resize
The subsequent reset function will reuse these logic.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
Acked-by: Jason Wang <jasowang at redhat.com>
---
drivers/virtio/virtio_ring.c | 58 ++++++++++++++++++++++++------------
1 file changed, 39 insertions(+), 19 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 46516d86aa0a..6b5d29fbb83d 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2168,6 +2168,43 @@ static int virtqueue_resize_packed(struct virtqueue *_vq,
u32 num)
return -ENOMEM;
}
+static int virtqueue_disable_and_recycle(struct virtqueue *_vq,
+ void (*recycle)(struct virtqueue *vq, void *buf))
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ struct virtio_device *vdev = vq->vq.vdev;
+ void *buf;
+ int err;
+
+ if (!vq->we_own_ring)
+ return -EPERM;
+
+ if (!vdev->config->disable_vq_and_reset)
+ return -ENOENT;
+
+ if (!vdev->config->enable_vq_after_reset)
+ return -ENOENT;
+
+ err = vdev->config->disable_vq_and_reset(_vq);
+ if (err)
+ return err;
+
+ while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
+ recycle(_vq, buf);
+
+ return 0;
+}
+
+static int virtqueue_enable_after_reset(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ struct virtio_device *vdev = vq->vq.vdev;
+
+ if (vdev->config->enable_vq_after_reset(_vq))
+ return -EBUSY;
+
+ return 0;
+}
/*
* Generic functions and exported symbols.
@@ -2738,13 +2775,8 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
void (*recycle)(struct virtqueue *vq, void *buf))
{
struct vring_virtqueue *vq = to_vvq(_vq);
- struct virtio_device *vdev = vq->vq.vdev;
- void *buf;
int err;
- if (!vq->we_own_ring)
- return -EPERM;
-
if (num > vq->vq.num_max)
return -E2BIG;
@@ -2754,28 +2786,16 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) ==
num)
return 0;
- if (!vdev->config->disable_vq_and_reset)
- return -ENOENT;
-
- if (!vdev->config->enable_vq_after_reset)
- return -ENOENT;
-
- err = vdev->config->disable_vq_and_reset(_vq);
+ err = virtqueue_disable_and_recycle(_vq, recycle);
if (err)
return err;
- while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
- recycle(_vq, buf);
-
if (vq->packed_ring)
err = virtqueue_resize_packed(_vq, num);
else
err = virtqueue_resize_split(_vq, num);
- if (vdev->config->enable_vq_after_reset(_vq))
- return -EBUSY;
-
- return err;
+ return virtqueue_enable_after_reset(_vq);
}
EXPORT_SYMBOL_GPL(virtqueue_resize);
--
2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-27 04:05 UTC
[PATCH vhost v6 11/11] virtio_ring: introduce virtqueue_reset()
Introduce virtqueue_reset() to release all buffer inside vq.
Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com>
Acked-by: Jason Wang <jasowang at redhat.com>
---
drivers/virtio/virtio_ring.c | 33 +++++++++++++++++++++++++++++++++
include/linux/virtio.h | 2 ++
2 files changed, 35 insertions(+)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 6b5d29fbb83d..e4cffd63fd06 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -2799,6 +2799,39 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
}
EXPORT_SYMBOL_GPL(virtqueue_resize);
+/**
+ * virtqueue_reset - detach and recycle all unused buffers
+ * @_vq: the struct virtqueue we're talking about.
+ * @recycle: callback to recycle unused buffers
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error.
+ * 0: success.
+ * -EBUSY: Failed to sync with device, vq may not work properly
+ * -ENOENT: Transport or device not supported
+ * -EPERM: Operation not permitted
+ */
+int virtqueue_reset(struct virtqueue *_vq,
+ void (*recycle)(struct virtqueue *vq, void *buf))
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+ int err;
+
+ err = virtqueue_disable_and_recycle(_vq, recycle);
+ if (err)
+ return err;
+
+ if (vq->packed_ring)
+ virtqueue_reinit_packed(vq);
+ else
+ virtqueue_reinit_split(vq);
+
+ return virtqueue_enable_after_reset(_vq);
+}
+EXPORT_SYMBOL_GPL(virtqueue_reset);
+
/* Only available for split ring */
struct virtqueue *vring_new_virtqueue(unsigned int index,
unsigned int num,
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 1fa50191cf0a..22bbd06ef8c8 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -97,6 +97,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
int virtqueue_resize(struct virtqueue *vq, u32 num,
void (*recycle)(struct virtqueue *vq, void *buf));
+int virtqueue_reset(struct virtqueue *vq,
+ void (*recycle)(struct virtqueue *vq, void *buf));
/**
* struct virtio_device - representation of a device using virtio
--
2.32.0.3.g01195cf9f
Maybe Matching Threads
- [PATCH vhost v5 00/11] virtio core prepares for AF_XDP
- [PATCH vhost v8 00/12] virtio core prepares for AF_XDP
- [PATCH vhost v7 00/11] virtio core prepares for AF_XDP
- [PATCH vhost v4 00/11] virtio core prepares for AF_XDP
- [PATCH vhost v4 04/11] virtio_ring: split: support premapped