XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero copy feature of xsk (XDP socket) needs to be supported by the driver. The performance of zero copy is very good. ENV: Qemu with vhost. vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS -----------------------------|---------------|------------------|------------ xmit by sockperf: 90% | 100% | | 318967 xmit by xsk: 100% | 30% | 33% | 1192064 recv by sockperf: 100% | 68% | 100% | 692288 recv by xsk: 100% | 33% | 43% | 771670 Before achieving the function of Virtio-Net, we also have to let virtio core support these features: 1. virtio core support premapped 2. virtio core support reset per-queue 3. introduce DMA APIs to virtio core Please review. Thanks. v2: 1. based on sgs[0]->dma_address to judgment is premapped 2. based on extra.addr to judgment to do unmap for no-indirect desc 3. based on indir_desc to judgment to do unmap for indirect desc 4. rename virtqueue_get_dma_dev to virtqueue_dma_dev v1: 1. expose dma device. NO introduce the api for dma and sync 2. split some commit for review. Xuan Zhuo (12): virtio_ring: split: separate dma codes virtio_ring: packed: separate dma codes virtio_ring: packed-indirect: separate dma codes virtio_ring: split: support premapped virtio_ring: packed: support premapped virtio_ring: split-indirect: support premapped virtio_ring: packed-indirect: support premapped virtio_ring: update document for virtqueue_add_* virtio_ring: introduce virtqueue_dma_dev() virtio_ring: correct the expression of the description of virtqueue_resize() virtio_ring: separate the logic of reset/enable from virtqueue_resize virtio_ring: introduce virtqueue_reset() drivers/virtio/virtio.c | 6 + drivers/virtio/virtio_ring.c | 354 +++++++++++++++++++++++++---------- include/linux/virtio.h | 4 + 3 files changed, 260 insertions(+), 104 deletions(-) -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 01/12] virtio_ring: split: separate dma codes
DMA-related logic is separated from the virtqueue_add_split() to one new function. DMA address will be saved as sg->dma_address, then virtqueue_add_split() will use it directly. Unmap operation will be simpler. The purpose of this is to facilitate subsequent support to receive dma address mapped by drivers. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 110 ++++++++++++++++++++++++++--------- 1 file changed, 82 insertions(+), 28 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 41144b5246a8..8ace2f503953 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -520,6 +520,77 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, return next; } +static void virtqueue_unmap_sgs(struct vring_virtqueue *vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs) +{ + struct scatterlist *sg; + unsigned int n; + + if (!vq->use_dma_api) + return; + + for (n = 0; n < out_sgs; n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + if (!sg->dma_address) + return; + + dma_unmap_page(vring_dma_dev(vq), sg->dma_address, + sg->length, DMA_TO_DEVICE); + } + } + + for (; n < (out_sgs + in_sgs); n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + if (!sg->dma_address) + return; + + dma_unmap_page(vring_dma_dev(vq), sg->dma_address, + sg->length, DMA_FROM_DEVICE); + } + } +} + +static int virtqueue_map_sgs(struct vring_virtqueue *vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs) +{ + struct scatterlist *sg; + unsigned int n; + + for (n = 0; n < out_sgs; n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); + + if (vring_mapping_error(vq, addr)) + goto err; + + sg->dma_address = addr; + } + } + + for (; n < (out_sgs + in_sgs); n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); + + if (vring_mapping_error(vq, addr)) + goto err; + + sg->dma_address = addr; + } + } + + return 0; + +err: + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + return -ENOMEM; +} + static inline int virtqueue_add_split(struct virtqueue *_vq, struct scatterlist *sgs[], unsigned int total_sg, @@ -532,9 +603,9 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, struct vring_virtqueue *vq = to_vvq(_vq); struct scatterlist *sg; struct vring_desc *desc; - unsigned int i, n, avail, descs_used, prev, err_idx; - int head; + unsigned int i, n, avail, descs_used, prev; bool indirect; + int head; START_USE(vq); @@ -586,32 +657,30 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, return -ENOSPC; } + if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) + return -ENOMEM; + for (n = 0; n < out_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); - if (vring_mapping_error(vq, addr)) - goto unmap_release; - prev = i; /* Note that we trust indirect descriptor * table since it use stream DMA mapping. */ - i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, + i = virtqueue_add_desc_split(_vq, desc, i, + sg->dma_address, + sg->length, VRING_DESC_F_NEXT, indirect); } } for (; n < (out_sgs + in_sgs); n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) - goto unmap_release; - prev = i; /* Note that we trust indirect descriptor * table since it use stream DMA mapping. */ - i = virtqueue_add_desc_split(_vq, desc, i, addr, + i = virtqueue_add_desc_split(_vq, desc, i, + sg->dma_address, sg->length, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE, @@ -679,22 +748,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, return 0; unmap_release: - err_idx = i; - - if (indirect) - i = 0; - else - i = head; - - for (n = 0; n < total_sg; n++) { - if (i == err_idx) - break; - if (indirect) { - vring_unmap_one_split_indirect(vq, &desc[i]); - i = virtio16_to_cpu(_vq->vdev, desc[i].next); - } else - i = vring_unmap_one_split(vq, i); - } + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); if (indirect) kfree(desc); -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 02/12] virtio_ring: packed: separate dma codes
DMA-related logic is separated from the virtqueue_add_packed(). DMA address will be saved as sg->dma_address, then virtqueue_add_packed() will use it directly. Unmap operation will be simpler. The purpose of this is to facilitate subsequent support to receive dma address mapped by drivers. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 37 +++++++----------------------------- 1 file changed, 7 insertions(+), 30 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 8ace2f503953..b4beb51072f7 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1419,9 +1419,9 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, struct vring_virtqueue *vq = to_vvq(_vq); struct vring_packed_desc *desc; struct scatterlist *sg; - unsigned int i, n, c, descs_used, err_idx; + unsigned int i, n, c, descs_used; __le16 head_flags, flags; - u16 head, id, prev, curr, avail_used_flags; + u16 head, id, prev, curr; int err; START_USE(vq); @@ -1450,7 +1450,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, } head = vq->packed.next_avail_idx; - avail_used_flags = vq->packed.avail_used_flags; WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); @@ -1468,15 +1467,13 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, id = vq->free_head; BUG_ON(id == vq->packed.vring.num); + if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) + return -ENOMEM; + curr = id; c = 0; for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) - goto unmap_release; - flags = cpu_to_le16(vq->packed.avail_used_flags | (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); @@ -1485,12 +1482,12 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, else desc[i].flags = flags; - desc[i].addr = cpu_to_le64(addr); + desc[i].addr = cpu_to_le64(sg->dma_address); desc[i].len = cpu_to_le32(sg->length); desc[i].id = cpu_to_le16(id); if (unlikely(vq->use_dma_api)) { - vq->packed.desc_extra[curr].addr = addr; + vq->packed.desc_extra[curr].addr = sg->dma_address; vq->packed.desc_extra[curr].len = sg->length; vq->packed.desc_extra[curr].flags le16_to_cpu(flags); @@ -1536,26 +1533,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, END_USE(vq); return 0; - -unmap_release: - err_idx = i; - i = head; - curr = vq->free_head; - - vq->packed.avail_used_flags = avail_used_flags; - - for (n = 0; n < total_sg; n++) { - if (i == err_idx) - break; - vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); - curr = vq->packed.desc_extra[curr].next; - i++; - if (i >= vq->packed.vring.num) - i = 0; - } - - END_USE(vq); - return -EIO; } static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 03/12] virtio_ring: packed-indirect: separate dma codes
DMA-related logic is separated from the virtqueue_add_indirect_packed(). DMA address will be saved as sg->dma_address, then virtqueue_add_indirect_packed() will use it directly. Unmap operation will be simpler. The purpose of this is to facilitate subsequent support to receive dma address mapped by drivers. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b4beb51072f7..221ff54fe58b 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1303,7 +1303,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, { struct vring_packed_desc *desc; struct scatterlist *sg; - unsigned int i, n, err_idx; + unsigned int i, n; u16 head, id; dma_addr_t addr; @@ -1323,16 +1323,14 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, id = vq->free_head; BUG_ON(id == vq->packed.vring.num); + if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) + return -ENOMEM; + for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - addr = vring_map_one_sg(vq, sg, n < out_sgs ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) - goto unmap_release; - desc[i].flags = cpu_to_le16(n < out_sgs ? 0 : VRING_DESC_F_WRITE); - desc[i].addr = cpu_to_le64(addr); + desc[i].addr = cpu_to_le64(sg->dma_address); desc[i].len = cpu_to_le32(sg->length); i++; } @@ -1396,10 +1394,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, return 0; unmap_release: - err_idx = i; - - for (i = 0; i < err_idx; i++) - vring_unmap_desc_packed(vq, &desc[i]); + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); kfree(desc); -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 04/12] virtio_ring: split: support premapped
virtio core only supports virtual addresses, dma is completed in virtio core. In some scenarios (such as the AF_XDP), the memory is allocated and DMA is completed in advance, so it is necessary for us to support passing the DMA address to virtio core. Drives can use sg->dma_address to pass the mapped dma address to virtio core. If one sg->dma_address is used then all sgs must use sg-> dma_address, otherwise all dma_address must be null. On the non-indirect path, if dma_address is used, extra.addr will be set to DMA_MAPPING_ERROR. So when do unmap, we can pass it. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 221ff54fe58b..61deaf0a4faf 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -457,6 +457,9 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { + if (extra[i].addr == DMA_MAPPING_ERROR) + goto out; + dma_unmap_page(vring_dma_dev(vq), extra[i].addr, extra[i].len, @@ -497,7 +500,8 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, dma_addr_t addr, unsigned int len, u16 flags, - bool indirect) + bool indirect, + bool do_map) { struct vring_virtqueue *vring = to_vvq(vq); struct vring_desc_extra *extra = vring->split.desc_extra; @@ -511,7 +515,7 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, next = extra[i].next; desc[i].next = cpu_to_virtio16(vq->vdev, next); - extra[i].addr = addr; + extra[i].addr = do_map ? addr : DMA_MAPPING_ERROR; extra[i].len = len; extra[i].flags = flags; } else @@ -604,7 +608,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, struct scatterlist *sg; struct vring_desc *desc; unsigned int i, n, avail, descs_used, prev; - bool indirect; + bool indirect, do_map; int head; START_USE(vq); @@ -657,7 +661,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, return -ENOSPC; } - if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) + do_map = !sgs[0]->dma_address; + if (do_map && virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) return -ENOMEM; for (n = 0; n < out_sgs; n++) { @@ -670,7 +675,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, sg->dma_address, sg->length, VRING_DESC_F_NEXT, - indirect); + indirect, do_map); } } for (; n < (out_sgs + in_sgs); n++) { @@ -684,7 +689,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, sg->length, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE, - indirect); + indirect, do_map); } } /* Last one doesn't continue. */ @@ -705,7 +710,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, head, addr, total_sg * sizeof(struct vring_desc), VRING_DESC_F_INDIRECT, - false); + false, true); } /* We're using some buffers from the free list. */ @@ -748,7 +753,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, return 0; unmap_release: - virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + if (do_map) + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); if (indirect) kfree(desc); -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 05/12] virtio_ring: packed: support premapped
virtio core only supports virtual addresses, dma is completed in virtio core. In some scenarios (such as the AF_XDP), the memory is allocated and DMA is completed in advance, so it is necessary for us to support passing the DMA address to virtio core. Drives can use sg->dma_address to pass the mapped dma address to virtio core. If one sg->dma_address is used then all sgs must use sg->dma_address, otherwise all dma_address must be null. On the non-indirect path, if dma_address is used, extra.addr will be set to DMA_MAPPING_ERROR. So when do unmap, we can pass it. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 61deaf0a4faf..66a071e3bdef 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1258,6 +1258,9 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { + if (extra->addr == DMA_MAPPING_ERROR) + return; + dma_unmap_page(vring_dma_dev(vq), extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? @@ -1423,6 +1426,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, unsigned int i, n, c, descs_used; __le16 head_flags, flags; u16 head, id, prev, curr; + bool do_map; int err; START_USE(vq); @@ -1468,7 +1472,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, id = vq->free_head; BUG_ON(id == vq->packed.vring.num); - if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) + do_map = !sgs[0]->dma_address; + if (do_map && virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) return -ENOMEM; curr = id; @@ -1488,7 +1493,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, desc[i].id = cpu_to_le16(id); if (unlikely(vq->use_dma_api)) { - vq->packed.desc_extra[curr].addr = sg->dma_address; + vq->packed.desc_extra[curr].addr + do_map ? sg->dma_address : DMA_MAPPING_ERROR; vq->packed.desc_extra[curr].len = sg->length; vq->packed.desc_extra[curr].flags le16_to_cpu(flags); -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 06/12] virtio_ring: split-indirect: support premapped
virtio core only supports virtual addresses, dma is completed in virtio core. In some scenarios (such as the AF_XDP), the memory is allocated and DMA is completed in advance, so it is necessary for us to support passing the DMA address to virtio core. Drives can use sg->dma_address to pass the mapped dma address to virtio core. If one sg->dma_address is used then all sgs must use sg->dma_address, otherwise all dma_address must be null. On the indirect path, if dma_address is used, desc_state.indir_desc will be mixed with VRING_INDIRECT_PREMAPPED. So when do unmap, we can pass it. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 66a071e3bdef..11827d2e56a8 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -231,6 +231,18 @@ static void vring_free(struct virtqueue *_vq); * Helpers. */ +#define VRING_INDIRECT_PREMAPPED BIT(0) + +#define desc_mix_dma_map(do_map, desc) \ + (do_map ? desc : (typeof(desc))((unsigned long)(desc) | VRING_INDIRECT_PREMAPPED)) + +#define desc_rm_dma_map(desc) \ + ((typeof(desc))((unsigned long)(desc) & ~VRING_INDIRECT_PREMAPPED)) + +#define desc_map_inter(desc) \ + !((unsigned long)(desc) & VRING_INDIRECT_PREMAPPED) + + #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq, @@ -725,7 +737,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, /* Store token and indirect buffer state. */ vq->split.desc_state[head].data = data; if (indirect) - vq->split.desc_state[head].indir_desc = desc; + vq->split.desc_state[head].indir_desc = desc_mix_dma_map(do_map, desc); else vq->split.desc_state[head].indir_desc = ctx; @@ -820,22 +832,26 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, vq->vq.num_free++; if (vq->indirect) { - struct vring_desc *indir_desc - vq->split.desc_state[head].indir_desc; + struct vring_desc *mix = vq->split.desc_state[head].indir_desc; + struct vring_desc *indir_desc; u32 len; /* Free the indirect table, if any, now that it's unmapped. */ - if (!indir_desc) + if (!mix) return; + indir_desc = desc_rm_dma_map(mix); + len = vq->split.desc_extra[head].len; BUG_ON(!(vq->split.desc_extra[head].flags & VRING_DESC_F_INDIRECT)); BUG_ON(len == 0 || len % sizeof(struct vring_desc)); - for (j = 0; j < len / sizeof(struct vring_desc); j++) - vring_unmap_one_split_indirect(vq, &indir_desc[j]); + if (desc_map_inter(mix)) { + for (j = 0; j < len / sizeof(struct vring_desc); j++) + vring_unmap_one_split_indirect(vq, &indir_desc[j]); + } kfree(indir_desc); vq->split.desc_state[head].indir_desc = NULL; -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 07/12] virtio_ring: packed-indirect: support premapped
virtio core only supports virtual addresses, dma is completed in virtio core. In some scenarios (such as the AF_XDP), the memory is allocated and DMA is completed in advance, so it is necessary for us to support passing the DMA address to virtio core. Drives can use sg->dma_address to pass the mapped dma address to virtio core. If one sg->dma_address is used then all sgs must use sg->dma_address, otherwise all dma_address must be null. On the indirect path, if dma_address is used, desc_state.indir_desc will be mixed with VRING_INDIRECT_NO_DMA_MAP. So when do unmap, we can pass it. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 11827d2e56a8..b23d301effb5 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1331,6 +1331,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, unsigned int i, n; u16 head, id; dma_addr_t addr; + bool do_map; head = vq->packed.next_avail_idx; desc = alloc_indirect_packed(total_sg, gfp); @@ -1348,7 +1349,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, id = vq->free_head; BUG_ON(id == vq->packed.vring.num); - if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) + do_map = !sgs[0]->dma_address; + if (do_map && virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) return -ENOMEM; for (n = 0; n < out_sgs + in_sgs; n++) { @@ -1408,7 +1410,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, /* Store token and indirect buffer state. */ vq->packed.desc_state[id].num = 1; vq->packed.desc_state[id].data = data; - vq->packed.desc_state[id].indir_desc = desc; + vq->packed.desc_state[id].indir_desc = desc_mix_dma_map(do_map, desc); vq->packed.desc_state[id].last = id; vq->num_added += 1; @@ -1419,7 +1421,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, return 0; unmap_release: - virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + if (do_map) + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); kfree(desc); @@ -1633,14 +1636,17 @@ static void detach_buf_packed(struct vring_virtqueue *vq, } if (vq->indirect) { + struct vring_packed_desc *mix; u32 len; /* Free the indirect table, if any, now that it's unmapped. */ - desc = state->indir_desc; - if (!desc) + mix = state->indir_desc; + if (!mix) return; - if (vq->use_dma_api) { + desc = desc_rm_dma_map(mix); + + if (vq->use_dma_api && desc_map_inter(mix)) { len = vq->packed.desc_extra[id].len; for (i = 0; i < len / sizeof(struct vring_packed_desc); i++) -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 08/12] virtio_ring: update document for virtqueue_add_*
Update the document of virtqueue_add_* series API, allowing the callers to use sg->dma_address to pass the dma address to Virtio Core. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b23d301effb5..216ac8654982 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2190,6 +2190,10 @@ static inline int virtqueue_add(struct virtqueue *_vq, * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_sgs(struct virtqueue *_vq, @@ -2224,6 +2228,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_sgs); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_outbuf(struct virtqueue *vq, @@ -2246,6 +2254,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_inbuf(struct virtqueue *vq, @@ -2269,6 +2281,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 09/12] virtio_ring: introduce virtqueue_dma_dev()
Added virtqueue_dma_dev() to get DMA device for virtio. Then the caller can do dma operation in advance. The purpose is to keep memory mapped across multiple add/get buf operations. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> Acked-by: Jason Wang <jasowang at redhat.com> --- drivers/virtio/virtio.c | 6 ++++++ drivers/virtio/virtio_ring.c | 17 +++++++++++++++++ include/linux/virtio.h | 2 ++ 3 files changed, 25 insertions(+) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 3893dc29eb26..11c5035369e2 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <linux/dma-mapping.h> #include <linux/virtio.h> #include <linux/spinlock.h> #include <linux/virtio_config.h> @@ -243,6 +244,11 @@ static int virtio_dev_probe(struct device *_d) u64 driver_features; u64 driver_features_legacy; + _d->dma_mask = &_d->coherent_dma_mask; + err = dma_set_mask_and_coherent(_d, DMA_BIT_MASK(64)); + if (err) + return err; + /* We have a driver! */ virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 216ac8654982..f63637c288a0 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2297,6 +2297,23 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); +/** + * virtqueue_dma_dev - get the dma dev + * @vq: the struct virtqueue we're talking about. + * + * Returns the dma dev. That can been used for dma api. + */ +struct device *virtqueue_dma_dev(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (vq->use_dma_api) + return vring_dma_dev(vq); + else + return &vq->vq.vdev->dev; +} +EXPORT_SYMBOL_GPL(virtqueue_dma_dev); + /** * virtqueue_kick_prepare - first half of split virtqueue_kick call. * @_vq: the struct virtqueue diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 2b472514c49b..1fa50191cf0a 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -61,6 +61,8 @@ int virtqueue_add_sgs(struct virtqueue *vq, void *data, gfp_t gfp); +struct device *virtqueue_dma_dev(struct virtqueue *vq); + bool virtqueue_kick(struct virtqueue *vq); bool virtqueue_kick_prepare(struct virtqueue *vq); -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 10/12] virtio_ring: correct the expression of the description of virtqueue_resize()
Modify the "useless" to a more accurate "unused". Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> Acked-by: Jason Wang <jasowang at redhat.com> --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index f63637c288a0..a705485fea47 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2702,7 +2702,7 @@ EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); * virtqueue_resize - resize the vring of vq * @_vq: the struct virtqueue we're talking about. * @num: new ring num - * @recycle: callback for recycle the useless buffer + * @recycle: callback to recycle unused buffers * * When it is really necessary to create a new vring, it will set the current vq * into the reset state. Then call the passed callback to recycle the buffer -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 11/12] virtio_ring: separate the logic of reset/enable from virtqueue_resize
The subsequent reset function will reuse these logic. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 58 ++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index a705485fea47..f26bd7bbff5e 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2156,6 +2156,43 @@ static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num) return -ENOMEM; } +static int virtqueue_disable_and_recycle(struct virtqueue *_vq, + void (*recycle)(struct virtqueue *vq, void *buf)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = vq->vq.vdev; + void *buf; + int err; + + if (!vq->we_own_ring) + return -EPERM; + + if (!vdev->config->disable_vq_and_reset) + return -ENOENT; + + if (!vdev->config->enable_vq_after_reset) + return -ENOENT; + + err = vdev->config->disable_vq_and_reset(_vq); + if (err) + return err; + + while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) + recycle(_vq, buf); + + return 0; +} + +static int virtqueue_enable_after_reset(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = vq->vq.vdev; + + if (vdev->config->enable_vq_after_reset(_vq)) + return -EBUSY; + + return 0; +} /* * Generic functions and exported symbols. @@ -2726,13 +2763,8 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)) { struct vring_virtqueue *vq = to_vvq(_vq); - struct virtio_device *vdev = vq->vq.vdev; - void *buf; int err; - if (!vq->we_own_ring) - return -EPERM; - if (num > vq->vq.num_max) return -E2BIG; @@ -2742,28 +2774,16 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num) return 0; - if (!vdev->config->disable_vq_and_reset) - return -ENOENT; - - if (!vdev->config->enable_vq_after_reset) - return -ENOENT; - - err = vdev->config->disable_vq_and_reset(_vq); + err = virtqueue_disable_and_recycle(_vq, recycle); if (err) return err; - while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) - recycle(_vq, buf); - if (vq->packed_ring) err = virtqueue_resize_packed(_vq, num); else err = virtqueue_resize_split(_vq, num); - if (vdev->config->enable_vq_after_reset(_vq)) - return -EBUSY; - - return err; + return virtqueue_enable_after_reset(_vq); } EXPORT_SYMBOL_GPL(virtqueue_resize); -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-08 06:44 UTC
[PATCH vhost v2 12/12] virtio_ring: introduce virtqueue_reset()
Introduce virtqueue_reset() to release all buffer inside vq. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 33 +++++++++++++++++++++++++++++++++ include/linux/virtio.h | 2 ++ 2 files changed, 35 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index f26bd7bbff5e..1a8de916bb20 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2787,6 +2787,39 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, } EXPORT_SYMBOL_GPL(virtqueue_resize); +/** + * virtqueue_reset - detach and recycle all unused buffers + * @_vq: the struct virtqueue we're talking about. + * @recycle: callback to recycle unused buffers + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error. + * 0: success. + * -EBUSY: Failed to sync with device, vq may not work properly + * -ENOENT: Transport or device not supported + * -EPERM: Operation not permitted + */ +int virtqueue_reset(struct virtqueue *_vq, + void (*recycle)(struct virtqueue *vq, void *buf)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + int err; + + err = virtqueue_disable_and_recycle(_vq, recycle); + if (err) + return err; + + if (vq->packed_ring) + virtqueue_reinit_packed(vq); + else + virtqueue_reinit_split(vq); + + return virtqueue_enable_after_reset(_vq); +} +EXPORT_SYMBOL_GPL(virtqueue_reset); + /* Only available for split ring */ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int num, diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 1fa50191cf0a..22bbd06ef8c8 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -97,6 +97,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq); int virtqueue_resize(struct virtqueue *vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)); +int virtqueue_reset(struct virtqueue *vq, + void (*recycle)(struct virtqueue *vq, void *buf)); /** * struct virtio_device - representation of a device using virtio -- 2.32.0.3.g01195cf9f
Michael S. Tsirkin
2023-Mar-10 09:05 UTC
[PATCH vhost v2 00/12] virtio core prepares for AF_XDP
On Wed, Mar 08, 2023 at 02:44:31PM +0800, Xuan Zhuo wrote:> XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero > copy feature of xsk (XDP socket) needs to be supported by the driver. The > performance of zero copy is very good. > > ENV: Qemu with vhost. > > vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS > -----------------------------|---------------|------------------|------------ > xmit by sockperf: 90% | 100% | | 318967 > xmit by xsk: 100% | 30% | 33% | 1192064 > recv by sockperf: 100% | 68% | 100% | 692288 > recv by xsk: 100% | 33% | 43% | 771670 > > Before achieving the function of Virtio-Net, we also have to let virtio core > support these features: > > 1. virtio core support premapped > 2. virtio core support reset per-queue > 3. introduce DMA APIs to virtio core > > Please review.Jason can I get some acks on this?> Thanks. > > v2: > 1. based on sgs[0]->dma_address to judgment is premapped > 2. based on extra.addr to judgment to do unmap for no-indirect desc > 3. based on indir_desc to judgment to do unmap for indirect desc > 4. rename virtqueue_get_dma_dev to virtqueue_dma_dev > > v1: > 1. expose dma device. NO introduce the api for dma and sync > 2. split some commit for review. > > Xuan Zhuo (12): > virtio_ring: split: separate dma codes > virtio_ring: packed: separate dma codes > virtio_ring: packed-indirect: separate dma codes > virtio_ring: split: support premapped > virtio_ring: packed: support premapped > virtio_ring: split-indirect: support premapped > virtio_ring: packed-indirect: support premapped > virtio_ring: update document for virtqueue_add_* > virtio_ring: introduce virtqueue_dma_dev() > virtio_ring: correct the expression of the description of > virtqueue_resize() > virtio_ring: separate the logic of reset/enable from virtqueue_resize > virtio_ring: introduce virtqueue_reset() > > drivers/virtio/virtio.c | 6 + > drivers/virtio/virtio_ring.c | 354 +++++++++++++++++++++++++---------- > include/linux/virtio.h | 4 + > 3 files changed, 260 insertions(+), 104 deletions(-) > > -- > 2.32.0.3.g01195cf9f