XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero copy feature of xsk (XDP socket) needs to be supported by the driver. The performance of zero copy is very good. ENV: Qemu with vhost. vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS -----------------------------|---------------|------------------|------------ xmit by sockperf: 90% | 100% | | 318967 xmit by xsk: 100% | 30% | 33% | 1192064 recv by sockperf: 100% | 68% | 100% | 692288 recv by xsk: 100% | 33% | 43% | 771670 Before achieving the function of Virtio-Net, we also have to let virtio core support these features: 1. virtio core support premapped 2. virtio core support reset per-queue 3. introduce DMA APIs to virtio core Please review. Thanks. Xuan Zhuo (10): virtio_ring: split: refactor virtqueue_add_split() for premapped virtio_ring: packed: separate prepare code from virtuque_add_indirect_packed() virtio_ring: packed: refactor virtqueue_add_packed() for premapped virtio_ring: split: introduce virtqueue_add_split_premapped() virtio_ring: packed: introduce virtqueue_add_packed_premapped() virtio_ring: introduce virtqueue_add_inbuf_premapped() virtio_ring: add api virtio_dma_map() for advance dma virtio_ring: introduce dma sync api for virtio virtio_ring: correct the expression of the description of virtqueue_resize() virtio_ring: introduce virtqueue_reset() drivers/virtio/virtio_ring.c | 792 ++++++++++++++++++++++++++++------- include/linux/virtio.h | 29 ++ 2 files changed, 659 insertions(+), 162 deletions(-) -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:26 UTC
[PATCH vhost 01/10] virtio_ring: split: refactor virtqueue_add_split() for premapped
DMA-related logic is separated from the virtqueue_add_split to prepare for subsequent support for premapped. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 219 ++++++++++++++++++++++++----------- 1 file changed, 152 insertions(+), 67 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 41144b5246a8..560ee30d942c 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -520,29 +520,83 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, return next; } -static inline int virtqueue_add_split(struct virtqueue *_vq, - struct scatterlist *sgs[], - unsigned int total_sg, - unsigned int out_sgs, - unsigned int in_sgs, - void *data, - void *ctx, - gfp_t gfp) +static int virtqueue_map_sgs(struct vring_virtqueue *vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs) { - struct vring_virtqueue *vq = to_vvq(_vq); struct scatterlist *sg; - struct vring_desc *desc; - unsigned int i, n, avail, descs_used, prev, err_idx; - int head; - bool indirect; + unsigned int n; - START_USE(vq); + for (n = 0; n < out_sgs; n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); + + if (vring_mapping_error(vq, addr)) + return -ENOMEM; + + sg->dma_address = addr; + } + } + for (; n < (out_sgs + in_sgs); n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); + + if (vring_mapping_error(vq, addr)) + return -ENOMEM; + + sg->dma_address = addr; + } + } + + return 0; +} + +static void virtqueue_unmap_sgs(struct vring_virtqueue *vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs) +{ + struct scatterlist *sg; + unsigned int n; + + for (n = 0; n < out_sgs; n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + if (!sg->dma_address) + return; + + dma_unmap_single(vring_dma_dev(vq), sg->dma_address, + sg->length, DMA_TO_DEVICE); + } + } + for (; n < (out_sgs + in_sgs); n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + if (!sg->dma_address) + return; + + dma_unmap_single(vring_dma_dev(vq), sg->dma_address, + sg->length, DMA_FROM_DEVICE); + } + } +} + +static inline int virtqueue_add_split_prepare(struct vring_virtqueue *vq, + unsigned int total_sg, + unsigned int out_sgs, + void *data, + void *ctx, + gfp_t gfp, + struct vring_desc **pdesc) +{ + struct vring_desc *desc; + unsigned int descs_used; BUG_ON(data == NULL); BUG_ON(ctx && vq->indirect); if (unlikely(vq->broken)) { - END_USE(vq); return -EIO; } @@ -550,27 +604,17 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, BUG_ON(total_sg == 0); - head = vq->free_head; - if (virtqueue_use_indirect(vq, total_sg)) - desc = alloc_indirect_split(_vq, total_sg, gfp); + desc = alloc_indirect_split(&vq->vq, total_sg, gfp); else { desc = NULL; WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); } - if (desc) { - /* Use a single buffer which doesn't continue */ - indirect = true; - /* Set up rest to use this indirect table. */ - i = 0; + if (desc) descs_used = 1; - } else { - indirect = false; - desc = vq->split.vring.desc; - i = head; + else descs_used = total_sg; - } if (unlikely(vq->vq.num_free < descs_used)) { pr_debug("Can't add buf len %i - avail = %i\n", @@ -580,38 +624,64 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, * host should service the ring ASAP. */ if (out_sgs) vq->notify(&vq->vq); - if (indirect) - kfree(desc); - END_USE(vq); + kfree(desc); return -ENOSPC; } + *pdesc = desc; + + return 0; +} + +static inline int virtqueue_add_split_vring(struct vring_virtqueue *vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + struct vring_desc *desc) +{ + unsigned int n, i, avail, descs_used, prev; + struct virtqueue *_vq = &vq->vq; + struct scatterlist *sg; + bool indirect; + int head; + + head = vq->free_head; + + if (desc) { + /* Use a single buffer which doesn't continue */ + indirect = true; + /* Set up rest to use this indirect table. */ + i = 0; + descs_used = 1; + } else { + indirect = false; + desc = vq->split.vring.desc; + i = head; + descs_used = total_sg; + } + for (n = 0; n < out_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); - if (vring_mapping_error(vq, addr)) - goto unmap_release; - prev = i; /* Note that we trust indirect descriptor * table since it use stream DMA mapping. */ - i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, + i = virtqueue_add_desc_split(_vq, desc, i, + sg->dma_address, + sg->length, VRING_DESC_F_NEXT, indirect); } } for (; n < (out_sgs + in_sgs); n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) - goto unmap_release; - prev = i; /* Note that we trust indirect descriptor * table since it use stream DMA mapping. */ - i = virtqueue_add_desc_split(_vq, desc, i, addr, + i = virtqueue_add_desc_split(_vq, desc, i, + sg->dma_address, sg->length, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE, @@ -630,7 +700,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, vq, desc, total_sg * sizeof(struct vring_desc), DMA_TO_DEVICE); if (vring_mapping_error(vq, addr)) - goto unmap_release; + return -ENOMEM; virtqueue_add_desc_split(_vq, vq->split.vring.desc, head, addr, @@ -648,13 +718,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, else vq->free_head = i; - /* Store token and indirect buffer state. */ - vq->split.desc_state[head].data = data; - if (indirect) - vq->split.desc_state[head].indir_desc = desc; - else - vq->split.desc_state[head].indir_desc = ctx; - /* Put entry in available array (but don't update avail->idx until they * do sync). */ avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); @@ -677,30 +740,52 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, virtqueue_kick(_vq); return 0; +} -unmap_release: - err_idx = i; +static inline int virtqueue_add_split(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + void *ctx, + gfp_t gfp) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct vring_desc *desc; + int head; + int err; - if (indirect) - i = 0; - else - i = head; + START_USE(vq); - for (n = 0; n < total_sg; n++) { - if (i == err_idx) - break; - if (indirect) { - vring_unmap_one_split_indirect(vq, &desc[i]); - i = virtio16_to_cpu(_vq->vdev, desc[i].next); - } else - i = vring_unmap_one_split(vq, i); - } + /* check vq state and try to alloc desc for indirect. */ + err = virtqueue_add_split_prepare(vq, total_sg, out_sgs, data, ctx, gfp, &desc); + if (err) + goto end; - if (indirect) - kfree(desc); + err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + if (err) + goto err; + head = vq->free_head; + err = virtqueue_add_split_vring(vq, sgs, total_sg, out_sgs, in_sgs, desc); + if (err) + goto err; + + /* Store token and indirect buffer state. */ + vq->split.desc_state[head].data = data; + vq->split.desc_state[head].indir_desc = desc ? desc : ctx; + + goto end; + +err: + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + + kfree(desc); + +end: END_USE(vq); - return -ENOMEM; + return err; } static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:26 UTC
[PATCH vhost 02/10] virtio_ring: packed: separate prepare code from virtuque_add_indirect_packed()
Separating the logic of allocating indirect desc and checking queue status to the upper layer function. The proposal of this is convenient to refactor virtqueue_add_packed() for premapped. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 560ee30d942c..42b1ff87518e 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1330,25 +1330,14 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, unsigned int out_sgs, unsigned int in_sgs, void *data, - gfp_t gfp) + struct vring_packed_desc *desc) { - struct vring_packed_desc *desc; struct scatterlist *sg; unsigned int i, n, err_idx; u16 head, id; dma_addr_t addr; head = vq->packed.next_avail_idx; - desc = alloc_indirect_packed(total_sg, gfp); - if (!desc) - return -ENOMEM; - - if (unlikely(vq->vq.num_free < 1)) { - pr_debug("Can't add buf len 1 - avail = 0\n"); - kfree(desc); - END_USE(vq); - return -ENOSPC; - } i = 0; id = vq->free_head; @@ -1470,11 +1459,17 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, BUG_ON(total_sg == 0); if (virtqueue_use_indirect(vq, total_sg)) { - err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, - in_sgs, data, gfp); - if (err != -ENOMEM) { - END_USE(vq); - return err; + desc = alloc_indirect_packed(total_sg, gfp); + if (desc) { + if (unlikely(vq->vq.num_free < 1)) { + pr_debug("Can't add buf len 1 - avail = 0\n"); + kfree(desc); + END_USE(vq); + return -ENOSPC; + } + + return virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, + in_sgs, data, desc); } /* fall back on direct */ -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:26 UTC
[PATCH vhost 03/10] virtio_ring: packed: refactor virtqueue_add_packed() for premapped
DMA-related logic is separated from virtqueue_add_packed to prepare for the subsequent support for premapped. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 150 ++++++++++++++++++----------------- 1 file changed, 78 insertions(+), 72 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 42b1ff87518e..47b6f9152f9f 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1329,7 +1329,6 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, unsigned int total_sg, unsigned int out_sgs, unsigned int in_sgs, - void *data, struct vring_packed_desc *desc) { struct scatterlist *sg; @@ -1345,14 +1344,9 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - addr = vring_map_one_sg(vq, sg, n < out_sgs ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) - goto unmap_release; - desc[i].flags = cpu_to_le16(n < out_sgs ? 0 : VRING_DESC_F_WRITE); - desc[i].addr = cpu_to_le64(addr); + desc[i].addr = cpu_to_le64(sg->dma_address); desc[i].len = cpu_to_le32(sg->length); i++; } @@ -1363,7 +1357,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, total_sg * sizeof(struct vring_packed_desc), DMA_TO_DEVICE); if (vring_mapping_error(vq, addr)) - goto unmap_release; + return -ENOMEM; vq->packed.vring.desc[head].addr = cpu_to_le64(addr); vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * @@ -1404,53 +1398,30 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, /* Store token and indirect buffer state. */ vq->packed.desc_state[id].num = 1; - vq->packed.desc_state[id].data = data; vq->packed.desc_state[id].indir_desc = desc; vq->packed.desc_state[id].last = id; vq->num_added += 1; pr_debug("Added buffer head %i to %p\n", head, vq); - END_USE(vq); return 0; - -unmap_release: - err_idx = i; - - for (i = 0; i < err_idx; i++) - vring_unmap_desc_packed(vq, &desc[i]); - - kfree(desc); - - END_USE(vq); - return -ENOMEM; } -static inline int virtqueue_add_packed(struct virtqueue *_vq, - struct scatterlist *sgs[], - unsigned int total_sg, - unsigned int out_sgs, - unsigned int in_sgs, - void *data, - void *ctx, - gfp_t gfp) +static inline int virtqueue_add_packed_prepare(struct vring_virtqueue *vq, + unsigned int total_sg, + void *data, + void *ctx, + struct vring_packed_desc **pdesc, + gfp_t gfp) { - struct vring_virtqueue *vq = to_vvq(_vq); struct vring_packed_desc *desc; - struct scatterlist *sg; - unsigned int i, n, c, descs_used, err_idx; - __le16 head_flags, flags; - u16 head, id, prev, curr, avail_used_flags; - int err; - - START_USE(vq); + unsigned int descs_used; BUG_ON(data == NULL); BUG_ON(ctx && vq->indirect); if (unlikely(vq->broken)) { - END_USE(vq); return -EIO; } @@ -1458,39 +1429,55 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, BUG_ON(total_sg == 0); + desc = NULL; + if (virtqueue_use_indirect(vq, total_sg)) { desc = alloc_indirect_packed(total_sg, gfp); if (desc) { if (unlikely(vq->vq.num_free < 1)) { pr_debug("Can't add buf len 1 - avail = 0\n"); kfree(desc); - END_USE(vq); return -ENOSPC; } - return virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, - in_sgs, data, desc); + return 0; } /* fall back on direct */ } - head = vq->packed.next_avail_idx; - avail_used_flags = vq->packed.avail_used_flags; - WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); - desc = vq->packed.vring.desc; - i = head; descs_used = total_sg; if (unlikely(vq->vq.num_free < descs_used)) { pr_debug("Can't add buf len %i - avail = %i\n", descs_used, vq->vq.num_free); - END_USE(vq); return -ENOSPC; } + *pdesc = desc; + + return 0; +} + +static void virtqueue_add_packed_vring(struct vring_virtqueue *vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs) +{ + struct vring_packed_desc *desc; + struct scatterlist *sg; + unsigned int i, n, c, descs_used; + __le16 head_flags, flags; + u16 head, id, prev, curr; + + desc = vq->packed.vring.desc; + head = vq->packed.next_avail_idx; + i = head; + descs_used = total_sg; + id = vq->free_head; BUG_ON(id == vq->packed.vring.num); @@ -1498,11 +1485,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, c = 0; for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) - goto unmap_release; - flags = cpu_to_le16(vq->packed.avail_used_flags | (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); @@ -1511,12 +1493,12 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, else desc[i].flags = flags; - desc[i].addr = cpu_to_le64(addr); + desc[i].addr = cpu_to_le64(sg->dma_address); desc[i].len = cpu_to_le32(sg->length); desc[i].id = cpu_to_le16(id); if (unlikely(vq->use_dma_api)) { - vq->packed.desc_extra[curr].addr = addr; + vq->packed.desc_extra[curr].addr = sg->dma_address; vq->packed.desc_extra[curr].len = sg->length; vq->packed.desc_extra[curr].flags le16_to_cpu(flags); @@ -1545,8 +1527,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, /* Store token. */ vq->packed.desc_state[id].num = descs_used; - vq->packed.desc_state[id].data = data; - vq->packed.desc_state[id].indir_desc = ctx; vq->packed.desc_state[id].last = prev; /* @@ -1559,29 +1539,55 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, vq->num_added += descs_used; pr_debug("Added buffer head %i to %p\n", head, vq); - END_USE(vq); +} - return 0; +static inline int virtqueue_add_packed(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + void *ctx, + gfp_t gfp) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct vring_packed_desc *desc; + u16 id; + int err; -unmap_release: - err_idx = i; - i = head; - curr = vq->free_head; + START_USE(vq); - vq->packed.avail_used_flags = avail_used_flags; + /* check vq state and try to alloc desc for indirect. */ + err = virtqueue_add_packed_prepare(vq, total_sg, data, ctx, &desc, gfp); + if (err) + goto end; - for (n = 0; n < total_sg; n++) { - if (i == err_idx) - break; - vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); - curr = vq->packed.desc_extra[curr].next; - i++; - if (i >= vq->packed.vring.num) - i = 0; + err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + if (err) + goto err; + + id = vq->free_head; + + if (desc) { + err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, in_sgs, desc); + if (err) + goto err; + } else { + virtqueue_add_packed_vring(vq, sgs, total_sg, out_sgs, in_sgs); + vq->packed.desc_state[id].indir_desc = ctx; } + vq->packed.desc_state[id].data = data; + + goto end; + +err: + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + kfree(desc); + +end: END_USE(vq); - return -EIO; + return err; } static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:26 UTC
[PATCH vhost 04/10] virtio_ring: split: introduce virtqueue_add_split_premapped()
virtqueue_add_split() only supports virtual addresses, dma is completed in virtqueue_add_split(). In some scenarios (such as the AF_XDP scenario), the memory is allocated and DMA is completed in advance, so it is necessary for us to support passing the DMA address to virtio core. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 100 +++++++++++++++++++++++++++++++++-- include/linux/virtio.h | 5 ++ 2 files changed, 100 insertions(+), 5 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 47b6f9152f9f..a31155abe101 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -70,6 +70,7 @@ struct vring_desc_state_split { void *data; /* Data for callback. */ struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ + bool premapped; }; struct vring_desc_state_packed { @@ -440,7 +441,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, } static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, - unsigned int i) + unsigned int i, bool premapped) { struct vring_desc_extra *extra = vq->split.desc_extra; u16 flags; @@ -457,6 +458,9 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { + if (premapped) + goto out; + dma_unmap_page(vring_dma_dev(vq), extra[i].addr, extra[i].len, @@ -788,6 +792,47 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, return err; } +static inline int virtqueue_add_split_premapped(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + void *ctx, + gfp_t gfp) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct vring_desc *desc; + int head; + int err; + + START_USE(vq); + + /* check vq state and try to alloc desc for indirect. */ + err = virtqueue_add_split_prepare(vq, total_sg, out_sgs, data, ctx, gfp, &desc); + if (err) + goto end; + + head = vq->free_head; + err = virtqueue_add_split_vring(vq, sgs, total_sg, out_sgs, in_sgs, desc); + if (err) + goto err; + + /* Store token and indirect buffer state. */ + vq->split.desc_state[head].data = data; + vq->split.desc_state[head].indir_desc = desc ? desc : ctx; + vq->split.desc_state[head].premapped = true; + + goto end; + +err: + kfree(desc); + +end: + END_USE(vq); + return err; +} + static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -824,20 +869,23 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, { unsigned int i, j; __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); + bool premapped; /* Clear data ptr. */ vq->split.desc_state[head].data = NULL; + premapped = vq->split.desc_state[head].premapped; + /* Put back on free list: unmap first-level descriptors and find end */ i = head; while (vq->split.vring.desc[i].flags & nextflag) { - vring_unmap_one_split(vq, i); + vring_unmap_one_split(vq, i, premapped); i = vq->split.desc_extra[i].next; vq->vq.num_free++; } - vring_unmap_one_split(vq, i); + vring_unmap_one_split(vq, i, premapped); vq->split.desc_extra[i].next = vq->free_head; vq->free_head = head; @@ -859,8 +907,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, VRING_DESC_F_INDIRECT)); BUG_ON(len == 0 || len % sizeof(struct vring_desc)); - for (j = 0; j < len / sizeof(struct vring_desc); j++) - vring_unmap_one_split_indirect(vq, &indir_desc[j]); + if (!premapped) { + for (j = 0; j < len / sizeof(struct vring_desc); j++) + vring_unmap_one_split_indirect(vq, &indir_desc[j]); + } kfree(indir_desc); vq->split.desc_state[head].indir_desc = NULL; @@ -2204,6 +2254,21 @@ static inline int virtqueue_add(struct virtqueue *_vq, out_sgs, in_sgs, data, ctx, gfp); } +static inline int virtqueue_add_premapped(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + void *ctx, + gfp_t gfp) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + return virtqueue_add_split_premapped(_vq, sgs, total_sg, out_sgs, + in_sgs, data, ctx, gfp); +} + /** * virtqueue_add_sgs - expose buffers to other end * @_vq: the struct virtqueue we're talking about. @@ -2261,6 +2326,31 @@ int virtqueue_add_outbuf(struct virtqueue *vq, } EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); +/** + * virtqueue_add_outbuf_premapped - expose output buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sg: scatterlist (must be well-formed and terminated!) + * @num: the number of entries in @sg readable by other side + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * It is required that all addrs have completed DMA operations. And use + * sg->dma_address, sg->length to pass addr and length. + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_outbuf_premapped(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + gfp_t gfp) +{ + return virtqueue_add_premapped(vq, &sg, num, 1, 0, data, NULL, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped); + /** * virtqueue_add_inbuf - expose input buffers to other end * @vq: the struct virtqueue we're talking about. diff --git a/include/linux/virtio.h b/include/linux/virtio.h index dcab9c7e8784..d8b472a7dcae 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -43,6 +43,11 @@ int virtqueue_add_outbuf(struct virtqueue *vq, void *data, gfp_t gfp); +int virtqueue_add_outbuf_premapped(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + gfp_t gfp); + int virtqueue_add_inbuf(struct virtqueue *vq, struct scatterlist sg[], unsigned int num, void *data, -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:26 UTC
[PATCH vhost 05/10] virtio_ring: packed: introduce virtqueue_add_packed_premapped()
virtqueue_add_packed() only supports virtual addresses, dma is completed in virtqueue_add_packed(). In some scenarios (such as the AF_XDP scenario), the memory is allocated and DMA is completed in advance, so it is necessary for us to support passing the DMA address to virtio core. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 67 ++++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 6 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index a31155abe101..79244ccbae9e 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -78,6 +78,7 @@ struct vring_desc_state_packed { struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ u16 num; /* Descriptor list length. */ u16 last; /* The last desc state in a list. */ + bool premapped; }; struct vring_desc_extra { @@ -1318,7 +1319,8 @@ static inline u16 packed_last_used(u16 last_used_idx) } static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, - struct vring_desc_extra *extra) + struct vring_desc_extra *extra, + bool premapped) { u16 flags; @@ -1333,6 +1335,9 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { + if (premapped) + return; + dma_unmap_page(vring_dma_dev(vq), extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? @@ -1382,7 +1387,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, struct vring_packed_desc *desc) { struct scatterlist *sg; - unsigned int i, n, err_idx; + unsigned int i, n; u16 head, id; dma_addr_t addr; @@ -1640,6 +1645,51 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, return err; } +static inline int virtqueue_add_packed_premapped(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + void *ctx, + gfp_t gfp) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct vring_packed_desc *desc; + u16 id; + int err; + + START_USE(vq); + + /* check vq state and try to alloc desc for indirect. */ + err = virtqueue_add_packed_prepare(vq, total_sg, data, ctx, &desc, gfp); + if (err) + goto end; + + id = vq->free_head; + + if (desc) { + err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, in_sgs, desc); + if (err) + goto err; + } else { + virtqueue_add_packed_vring(vq, sgs, total_sg, out_sgs, in_sgs); + vq->packed.desc_state[id].indir_desc = ctx; + } + + vq->packed.desc_state[id].data = data; + vq->packed.desc_state[id].premapped = true; + + goto end; + +err: + kfree(desc); + +end: + END_USE(vq); + return err; +} + static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -1695,8 +1745,10 @@ static void detach_buf_packed(struct vring_virtqueue *vq, struct vring_desc_state_packed *state = NULL; struct vring_packed_desc *desc; unsigned int i, curr; + bool premapped; state = &vq->packed.desc_state[id]; + premapped = state->premapped; /* Clear data ptr. */ state->data = NULL; @@ -1709,7 +1761,8 @@ static void detach_buf_packed(struct vring_virtqueue *vq, curr = id; for (i = 0; i < state->num; i++) { vring_unmap_extra_packed(vq, - &vq->packed.desc_extra[curr]); + &vq->packed.desc_extra[curr], + premapped); curr = vq->packed.desc_extra[curr].next; } } @@ -1722,7 +1775,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, if (!desc) return; - if (vq->use_dma_api) { + if (vq->use_dma_api && !premapped) { len = vq->packed.desc_extra[id].len; for (i = 0; i < len / sizeof(struct vring_packed_desc); i++) @@ -2265,8 +2318,10 @@ static inline int virtqueue_add_premapped(struct virtqueue *_vq, { struct vring_virtqueue *vq = to_vvq(_vq); - return virtqueue_add_split_premapped(_vq, sgs, total_sg, out_sgs, - in_sgs, data, ctx, gfp); + return vq->packed_ring ? virtqueue_add_packed_premapped(_vq, sgs, total_sg, out_sgs, + in_sgs, data, ctx, gfp) : + virtqueue_add_split_premapped(_vq, sgs, total_sg, out_sgs, + in_sgs, data, ctx, gfp); } /** -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:27 UTC
[PATCH vhost 06/10] virtio_ring: introduce virtqueue_add_inbuf_premapped()
Introduce virtqueue_add_inbuf_premapped() to submit premapped sgs. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 25 +++++++++++++++++++++++++ include/linux/virtio.h | 5 +++++ 2 files changed, 30 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 79244ccbae9e..cd9364eb2345 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2452,6 +2452,31 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); +/** + * virtqueue_add_inbuf_premapped - expose input buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sg: scatterlist (must be well-formed and terminated!) + * @num: the number of entries in @sg writable by other side + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * It is required that all addrs have completed DMA operations. And use + * sg->dma_address, sg->length to pass addr and length. + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_inbuf_premapped(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + gfp_t gfp) +{ + return virtqueue_add_premapped(vq, &sg, num, 0, 1, data, NULL, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_premapped); + /** * virtqueue_kick_prepare - first half of split virtqueue_kick call. * @_vq: the struct virtqueue diff --git a/include/linux/virtio.h b/include/linux/virtio.h index d8b472a7dcae..3ebb346ebb7c 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -59,6 +59,11 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, void *ctx, gfp_t gfp); +int virtqueue_add_inbuf_premapped(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + gfp_t gfp); + int virtqueue_add_sgs(struct virtqueue *vq, struct scatterlist *sgs[], unsigned int out_sgs, -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:27 UTC
[PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
Added virtio_dma_map() to map DMA addresses for virtual memory in advance. The purpose is to keep memory mapped across multiple add/get buf operations. Added virtio_dma_unmap() for unmap DMA address. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 92 ++++++++++++++++++++++++++++++++++++ include/linux/virtio.h | 9 ++++ 2 files changed, 101 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index cd9364eb2345..855338609c7f 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq) } EXPORT_SYMBOL_GPL(virtqueue_get_vring); +/** + * virtio_dma_map_page - get the DMA addr of the memory for virtio device + * @dev: virtio device + * @page: the page of the memory to DMA + * @offset: the offset of the memory inside page + * @length: memory length + * @dir: DMA direction + * + * This API is only for pre-mapped buffers, for non premapped buffers virtio + * core handles DMA API internally. + * + * Returns the DMA addr. DMA_MAPPING_ERROR means error. + */ +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset, + unsigned int length, enum dma_data_direction dir) +{ + struct virtio_device *vdev = dev_to_virtio(dev); + + if (!vring_use_dma_api(vdev)) + return page_to_phys(page) + offset; + + return dma_map_page(vdev->dev.parent, page, offset, length, dir); +} + +/** + * virtio_dma_map - get the DMA addr of the memory for virtio device + * @dev: virtio device + * @addr: the addr to DMA + * @length: memory length + * @dir: DMA direction + * + * This API is only for pre-mapped buffers, for non premapped buffers virtio + * core handles DMA API internally. + * + * Returns the DMA addr. + */ +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length, + enum dma_data_direction dir) +{ + struct page *page; + size_t offset; + + page = virt_to_page(addr); + offset = offset_in_page(addr); + + return virtio_dma_map_page(dev, page, offset, length, dir); +} +EXPORT_SYMBOL_GPL(virtio_dma_map); + +/** + * virtio_dma_mapping_error - check dma address + * @dev: virtio device + * @addr: DMA address + * + * This API is only for pre-mapped buffers, for non premapped buffers virtio + * core handles DMA API internally. + * + * Returns 0 means dma valid. Other means invalid dma address. + */ +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr) +{ + struct virtio_device *vdev = dev_to_virtio(dev); + + if (!vring_use_dma_api(vdev)) + return 0; + + return dma_mapping_error(vdev->dev.parent, addr); +} +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error); + +/** + * virtio_dma_unmap - unmap DMA addr + * @dev: virtio device + * @dma: DMA address + * @length: memory length + * @dir: DMA direction + * + * This API is only for pre-mapped buffers, for non premapped buffers virtio + * core handles DMA API internally. + */ +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length, + enum dma_data_direction dir) +{ + struct virtio_device *vdev = dev_to_virtio(dev); + + if (!vring_use_dma_api(vdev)) + return; + + dma_unmap_page(vdev->dev.parent, dma, length, dir); +} +EXPORT_SYMBOL_GPL(virtio_dma_unmap); + MODULE_LICENSE("GPL"); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 3ebb346ebb7c..b5fa71476737 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -9,6 +9,7 @@ #include <linux/device.h> #include <linux/mod_devicetable.h> #include <linux/gfp.h> +#include <linux/dma-mapping.h> /** * struct virtqueue - a queue to register buffers for sending or receiving. @@ -216,4 +217,12 @@ void unregister_virtio_driver(struct virtio_driver *drv); #define module_virtio_driver(__virtio_driver) \ module_driver(__virtio_driver, register_virtio_driver, \ unregister_virtio_driver) + +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, size_t offset, + unsigned int length, enum dma_data_direction dir); +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length, + enum dma_data_direction dir); +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr); +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length, + enum dma_data_direction dir); #endif /* _LINUX_VIRTIO_H */ -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:27 UTC
[PATCH vhost 08/10] virtio_ring: introduce dma sync api for virtio
These API has been introduced: * virtio_dma_need_sync * virtio_dma_sync_single_range_for_cpu * virtio_dma_sync_single_range_for_device These APIs can be used together with the premapped mechanism to sync the DMA address. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 70 ++++++++++++++++++++++++++++++++++++ include/linux/virtio.h | 8 +++++ 2 files changed, 78 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 855338609c7f..84129b8c3e2a 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -3264,4 +3264,74 @@ void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length, } EXPORT_SYMBOL_GPL(virtio_dma_unmap); +/** + * virtio_dma_need_sync - check a dma address needs sync + * @dev: virtio device + * @addr: DMA address + * + * This API is only for pre-mapped buffers, for non premapped buffers virtio + * core handles DMA API internally. + */ +bool virtio_dma_need_sync(struct device *dev, dma_addr_t addr) +{ + struct virtio_device *vdev = dev_to_virtio(dev); + + if (!vring_use_dma_api(vdev)) + return 0; + + return dma_need_sync(vdev->dev.parent, addr); +} +EXPORT_SYMBOL_GPL(virtio_dma_need_sync); + +/** + * virtio_dma_sync_single_range_for_cpu - dma sync for cpu + * @dev: virtio device + * @addr: DMA address + * @offset: DMA address offset + * @size: mem size for sync + * @dir: DMA direction + * + * Before calling this function, use virtio_dma_need_sync() to confirm that the + * DMA address really needs to be synchronized + * + * This API is only for pre-mapped buffers, for non premapped buffers virtio + * core handles DMA API internally. + */ +void virtio_dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir) +{ + struct virtio_device *vdev = dev_to_virtio(dev); + + dma_sync_single_range_for_cpu(vdev->dev.parent, addr, offset, + size, DMA_BIDIRECTIONAL); +} +EXPORT_SYMBOL_GPL(virtio_dma_sync_single_range_for_cpu); + +/** + * virtio_dma_sync_single_range_for_device - dma sync for device + * @dev: virtio device + * @addr: DMA address + * @offset: DMA address offset + * @size: mem size for sync + * @dir: DMA direction + * + * Before calling this function, use virtio_dma_need_sync() to confirm that the + * DMA address really needs to be synchronized + * + * This API is only for pre-mapped buffers, for non premapped buffers virtio + * core handles DMA API internally. + */ +void virtio_dma_sync_single_range_for_device(struct device *dev, + dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir) +{ + struct virtio_device *vdev = dev_to_virtio(dev); + + dma_sync_single_range_for_device(vdev->dev.parent, addr, offset, + size, DMA_BIDIRECTIONAL); +} +EXPORT_SYMBOL_GPL(virtio_dma_sync_single_range_for_device); + MODULE_LICENSE("GPL"); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index b5fa71476737..d0e707d744a0 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -225,4 +225,12 @@ dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int length, int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr); void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int length, enum dma_data_direction dir); +bool virtio_dma_need_sync(struct device *dev, dma_addr_t addr); +void virtio_dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir); +void virtio_dma_sync_single_range_for_device(struct device *dev, + dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir); #endif /* _LINUX_VIRTIO_H */ -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:27 UTC
[PATCH vhost 09/10] virtio_ring: correct the expression of the description of virtqueue_resize()
Modify the "useless" to a more accurate "unused". Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 84129b8c3e2a..2ba60a14f557 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2865,7 +2865,7 @@ EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); * virtqueue_resize - resize the vring of vq * @_vq: the struct virtqueue we're talking about. * @num: new ring num - * @recycle: callback for recycle the useless buffer + * @recycle: callback to recycle unused buffers * * When it is really necessary to create a new vring, it will set the current vq * into the reset state. Then call the passed callback to recycle the buffer -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Feb-14 07:27 UTC
[PATCH vhost 10/10] virtio_ring: introduce virtqueue_reset()
Introduce virtqueue_reset() to release all buffer inside vq. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 50 ++++++++++++++++++++++++++++++++++++ include/linux/virtio.h | 2 ++ 2 files changed, 52 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 2ba60a14f557..2750a365439a 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2930,6 +2930,56 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, } EXPORT_SYMBOL_GPL(virtqueue_resize); +/** + * virtqueue_reset - detach and recycle all unused buffers + * @_vq: the struct virtqueue we're talking about. + * @recycle: callback to recycle unused buffers + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error. + * 0: success. + * -EBUSY: Failed to sync with device, vq may not work properly + * -ENOENT: Transport or device not supported + * -EPERM: Operation not permitted + */ +int virtqueue_reset(struct virtqueue *_vq, + void (*recycle)(struct virtqueue *vq, void *buf)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = vq->vq.vdev; + void *buf; + int err; + + if (!vq->we_own_ring) + return -EPERM; + + if (!vdev->config->disable_vq_and_reset) + return -ENOENT; + + if (!vdev->config->enable_vq_after_reset) + return -ENOENT; + + err = vdev->config->disable_vq_and_reset(_vq); + if (err) + return err; + + while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) + recycle(_vq, buf); + + if (vq->packed_ring) + virtqueue_reinit_packed(vq); + else + virtqueue_reinit_split(vq); + + if (vdev->config->enable_vq_after_reset(_vq)) + return -EBUSY; + + return 0; +} +EXPORT_SYMBOL_GPL(virtqueue_reset); + /* Only available for split ring */ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int num, diff --git a/include/linux/virtio.h b/include/linux/virtio.h index d0e707d744a0..cf4c157e4e75 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -106,6 +106,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq); int virtqueue_resize(struct virtqueue *vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)); +int virtqueue_reset(struct virtqueue *vq, + void (*recycle)(struct virtqueue *vq, void *buf)); /** * struct virtio_device - representation of a device using virtio -- 2.32.0.3.g01195cf9f
On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo <xuanzhuo at linux.alibaba.com> wrote:> > XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero > copy feature of xsk (XDP socket) needs to be supported by the driver. The > performance of zero copy is very good. > > ENV: Qemu with vhost. > > vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS > -----------------------------|---------------|------------------|------------ > xmit by sockperf: 90% | 100% | | 318967 > xmit by xsk: 100% | 30% | 33% | 1192064What's the setup of this test? CPU model/frequency, packet size, zerocopy enabled or not. (I remember I can get better performance with my old laptop through pktgen (about 2Mpps)) Thanks> recv by sockperf: 100% | 68% | 100% | 692288 > recv by xsk: 100% | 33% | 43% | 771670 > > Before achieving the function of Virtio-Net, we also have to let virtio core > support these features: > > 1. virtio core support premapped > 2. virtio core support reset per-queue > 3. introduce DMA APIs to virtio core > > Please review. > > Thanks. > > Xuan Zhuo (10): > virtio_ring: split: refactor virtqueue_add_split() for premapped > virtio_ring: packed: separate prepare code from > virtuque_add_indirect_packed() > virtio_ring: packed: refactor virtqueue_add_packed() for premapped > virtio_ring: split: introduce virtqueue_add_split_premapped() > virtio_ring: packed: introduce virtqueue_add_packed_premapped() > virtio_ring: introduce virtqueue_add_inbuf_premapped() > virtio_ring: add api virtio_dma_map() for advance dma > virtio_ring: introduce dma sync api for virtio > virtio_ring: correct the expression of the description of > virtqueue_resize() > virtio_ring: introduce virtqueue_reset() > > drivers/virtio/virtio_ring.c | 792 ++++++++++++++++++++++++++++------- > include/linux/virtio.h | 29 ++ > 2 files changed, 659 insertions(+), 162 deletions(-) > > -- > 2.32.0.3.g01195cf9f >