XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero copy feature of xsk (XDP socket) needs to be supported by the driver. The performance of zero copy is very good. ENV: Qemu with vhost. vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS -----------------------------|---------------|------------------|------------ xmit by sockperf: 90% | 100% | | 318967 xmit by xsk: 100% | 30% | 33% | 1192064 recv by sockperf: 100% | 68% | 100% | 692288 recv by xsk: 100% | 33% | 43% | 771670 Before achieving the function of Virtio-Net, we also have to let virtio core support these features: 1. virtio core support premapped 2. virtio core support reset per-queue 3. introduce DMA APIs to virtio core Please review. Thanks. v1: 1. expose dma device. NO introduce the api for dma and sync 2. split some commit for review. Xuan Zhuo (12): virtio_ring: split: refactor virtqueue_add_split() for premapped virtio_ring: split: separate DMA codes virtio_ring: split: introduce virtqueue_add_split_premapped() virtio_ring: packed: separate prepare code from virtuque_add_indirect_packed() virtio_ring: packed: refactor virtqueue_add_packed() for premapped virtio_ring: packed: separate DMA codes virtio_ring: packed: introduce virtqueue_add_packed_premapped() virtio_ring: introduce virtqueue_add_inbuf_premapped() virtio_ring: introduce virtqueue_get_dma_dev() virtio_ring: correct the expression of the description of virtqueue_resize() virtio_ring: separate the logic of reset/enable from virtqueue_resize virtio_ring: introduce virtqueue_reset() drivers/virtio/virtio.c | 6 + drivers/virtio/virtio_ring.c | 636 ++++++++++++++++++++++++----------- include/linux/virtio.h | 14 + 3 files changed, 466 insertions(+), 190 deletions(-) -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-02 11:58 UTC
[PATCH vhost v1 01/12] virtio_ring: split: refactor virtqueue_add_split() for premapped
This commit splits virtqueue_add_split() to two functions. The purpose of such splitting is to separate DMA operations. The first function includes all codes that may fail before the DMA operation. The subsequent part is used as the second function. In this way, we can perform DMA operations in the middle of the two functions. If the first function fails, we do not need to perform DMA operations. If it is premapped, we can pass the DMA operation. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 131 +++++++++++++++++++++++------------ 1 file changed, 88 insertions(+), 43 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 41144b5246a8..3005893ecc61 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -520,57 +520,38 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, return next; } -static inline int virtqueue_add_split(struct virtqueue *_vq, - struct scatterlist *sgs[], - unsigned int total_sg, - unsigned int out_sgs, - unsigned int in_sgs, - void *data, - void *ctx, - gfp_t gfp) +/* note: return NULL means no indirect that is valid. */ +static struct vring_desc *virtqueue_get_desc_split(struct vring_virtqueue *vq, + unsigned int total_sg, + unsigned int out_sgs, + void *data, + void *ctx, + gfp_t gfp) { - struct vring_virtqueue *vq = to_vvq(_vq); - struct scatterlist *sg; struct vring_desc *desc; - unsigned int i, n, avail, descs_used, prev, err_idx; - int head; - bool indirect; - - START_USE(vq); + unsigned int descs_used; BUG_ON(data == NULL); BUG_ON(ctx && vq->indirect); - if (unlikely(vq->broken)) { - END_USE(vq); - return -EIO; - } + if (unlikely(vq->broken)) + return ERR_PTR(-EIO); LAST_ADD_TIME_UPDATE(vq); BUG_ON(total_sg == 0); - head = vq->free_head; - if (virtqueue_use_indirect(vq, total_sg)) - desc = alloc_indirect_split(_vq, total_sg, gfp); + desc = alloc_indirect_split(&vq->vq, total_sg, gfp); else { desc = NULL; WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); } - if (desc) { - /* Use a single buffer which doesn't continue */ - indirect = true; - /* Set up rest to use this indirect table. */ - i = 0; + if (desc) descs_used = 1; - } else { - indirect = false; - desc = vq->split.vring.desc; - i = head; + else descs_used = total_sg; - } if (unlikely(vq->vq.num_free < descs_used)) { pr_debug("Can't add buf len %i - avail = %i\n", @@ -580,10 +561,39 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, * host should service the ring ASAP. */ if (out_sgs) vq->notify(&vq->vq); - if (indirect) - kfree(desc); - END_USE(vq); - return -ENOSPC; + kfree(desc); + return ERR_PTR(-ENOSPC); + } + + return desc; +} + +static inline int virtqueue_add_vring_split(struct vring_virtqueue *vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + struct vring_desc *desc) +{ + struct virtqueue *_vq = &vq->vq; + struct scatterlist *sg; + unsigned int i, n, avail, descs_used, prev, err_idx; + int head; + bool indirect; + + head = vq->free_head; + + if (desc) { + /* Use a single buffer which doesn't continue */ + indirect = true; + /* Set up rest to use this indirect table. */ + i = 0; + descs_used = 1; + } else { + indirect = false; + desc = vq->split.vring.desc; + i = head; + descs_used = total_sg; } for (n = 0; n < out_sgs; n++) { @@ -648,13 +658,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, else vq->free_head = i; - /* Store token and indirect buffer state. */ - vq->split.desc_state[head].data = data; - if (indirect) - vq->split.desc_state[head].indir_desc = desc; - else - vq->split.desc_state[head].indir_desc = ctx; - /* Put entry in available array (but don't update avail->idx until they * do sync). */ avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); @@ -703,6 +706,48 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, return -ENOMEM; } +static inline int virtqueue_add_split(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + void *ctx, + gfp_t gfp) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct vring_desc *desc; + int head; + int err; + + START_USE(vq); + + /* check vq state and try to alloc desc for indirect. */ + desc = virtqueue_get_desc_split(vq, total_sg, out_sgs, data, ctx, gfp); + if (IS_ERR(desc)) { + err = PTR_ERR(desc); + goto end; + } + + head = vq->free_head; + err = virtqueue_add_vring_split(vq, sgs, total_sg, out_sgs, in_sgs, desc); + if (err) + goto err; + + /* Store token and indirect buffer state. */ + vq->split.desc_state[head].data = data; + vq->split.desc_state[head].indir_desc = desc ? desc : ctx; + + goto end; + +err: + kfree(desc); + +end: + END_USE(vq); + return err; +} + static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-02 11:58 UTC
[PATCH vhost v1 02/12] virtio_ring: split: separate DMA codes
DMA-related logic is separated from the virtqueue_add_vring_split() to prepare for subsequent support for premapped. DMA address will be saved as sg->dma_address, then virtqueue_add_vring_split() will use it directly. If it is a premapped scene, the transmitted sgs should have saved DMA address in dma_address, and in virtio core, we need to pass virtqueue_map_sgs(). Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 116 ++++++++++++++++++++++++----------- 1 file changed, 80 insertions(+), 36 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 3005893ecc61..17520f0d7649 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -520,6 +520,73 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, return next; } +static int virtqueue_map_sgs(struct vring_virtqueue *vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs) +{ + struct scatterlist *sg; + unsigned int n; + + for (n = 0; n < out_sgs; n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); + + if (vring_mapping_error(vq, addr)) + return -ENOMEM; + + sg->dma_address = addr; + } + } + + for (; n < (out_sgs + in_sgs); n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); + + if (vring_mapping_error(vq, addr)) + return -ENOMEM; + + sg->dma_address = addr; + } + } + + return 0; +} + +static void virtqueue_unmap_sgs(struct vring_virtqueue *vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs) +{ + struct scatterlist *sg; + unsigned int n; + + if (!vq->use_dma_api) + return; + + for (n = 0; n < out_sgs; n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + if (!sg->dma_address) + return; + + dma_unmap_page(vring_dma_dev(vq), sg->dma_address, + sg->length, DMA_TO_DEVICE); + } + } + + for (; n < (out_sgs + in_sgs); n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + if (!sg->dma_address) + return; + + dma_unmap_page(vring_dma_dev(vq), sg->dma_address, + sg->length, DMA_FROM_DEVICE); + } + } +} + /* note: return NULL means no indirect that is valid. */ static struct vring_desc *virtqueue_get_desc_split(struct vring_virtqueue *vq, unsigned int total_sg, @@ -577,7 +644,7 @@ static inline int virtqueue_add_vring_split(struct vring_virtqueue *vq, { struct virtqueue *_vq = &vq->vq; struct scatterlist *sg; - unsigned int i, n, avail, descs_used, prev, err_idx; + unsigned int i, n, avail, descs_used, prev; int head; bool indirect; @@ -598,30 +665,25 @@ static inline int virtqueue_add_vring_split(struct vring_virtqueue *vq, for (n = 0; n < out_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); - if (vring_mapping_error(vq, addr)) - goto unmap_release; - prev = i; /* Note that we trust indirect descriptor * table since it use stream DMA mapping. */ - i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, + i = virtqueue_add_desc_split(_vq, desc, i, + sg->dma_address, + sg->length, VRING_DESC_F_NEXT, indirect); } } for (; n < (out_sgs + in_sgs); n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) - goto unmap_release; - prev = i; /* Note that we trust indirect descriptor * table since it use stream DMA mapping. */ - i = virtqueue_add_desc_split(_vq, desc, i, addr, + i = virtqueue_add_desc_split(_vq, desc, i, + sg->dma_address, sg->length, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE, @@ -640,7 +702,7 @@ static inline int virtqueue_add_vring_split(struct vring_virtqueue *vq, vq, desc, total_sg * sizeof(struct vring_desc), DMA_TO_DEVICE); if (vring_mapping_error(vq, addr)) - goto unmap_release; + return -ENOMEM; virtqueue_add_desc_split(_vq, vq->split.vring.desc, head, addr, @@ -680,30 +742,6 @@ static inline int virtqueue_add_vring_split(struct vring_virtqueue *vq, virtqueue_kick(_vq); return 0; - -unmap_release: - err_idx = i; - - if (indirect) - i = 0; - else - i = head; - - for (n = 0; n < total_sg; n++) { - if (i == err_idx) - break; - if (indirect) { - vring_unmap_one_split_indirect(vq, &desc[i]); - i = virtio16_to_cpu(_vq->vdev, desc[i].next); - } else - i = vring_unmap_one_split(vq, i); - } - - if (indirect) - kfree(desc); - - END_USE(vq); - return -ENOMEM; } static inline int virtqueue_add_split(struct virtqueue *_vq, @@ -729,6 +767,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, goto end; } + err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + if (err) + goto err; + head = vq->free_head; err = virtqueue_add_vring_split(vq, sgs, total_sg, out_sgs, in_sgs, desc); if (err) @@ -741,6 +783,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, goto end; err: + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + kfree(desc); end: -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-02 11:58 UTC
[PATCH vhost v1 03/12] virtio_ring: split: introduce virtqueue_add_split_premapped()
virtqueue_add_split() only supports virtual addresses, dma is completed in virtqueue_add_split(). In some scenarios (such as the AF_XDP scenario), the memory is allocated and DMA is completed in advance, so it is necessary for us to support passing the DMA address to virtio core. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 74 +++++++++++++++++++++++++++++++----- include/linux/virtio.h | 5 +++ 2 files changed, 69 insertions(+), 10 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 17520f0d7649..5b186ce73d35 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -70,6 +70,7 @@ struct vring_desc_state_split { void *data; /* Data for callback. */ struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ + bool dma_map; /* Addr is mapped by virtio core or not. */ }; struct vring_desc_state_packed { @@ -440,7 +441,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, } static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, - unsigned int i) + unsigned int i, bool dma_map) { struct vring_desc_extra *extra = vq->split.desc_extra; u16 flags; @@ -457,6 +458,9 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { + if (!dma_map) + goto out; + dma_unmap_page(vring_dma_dev(vq), extra[i].addr, extra[i].len, @@ -751,6 +755,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, unsigned int in_sgs, void *data, void *ctx, + bool dma_map, gfp_t gfp) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -767,9 +772,11 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, goto end; } - err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs); - if (err) - goto err; + if (dma_map) { + err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + if (err) + goto err; + } head = vq->free_head; err = virtqueue_add_vring_split(vq, sgs, total_sg, out_sgs, in_sgs, desc); @@ -779,11 +786,13 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, /* Store token and indirect buffer state. */ vq->split.desc_state[head].data = data; vq->split.desc_state[head].indir_desc = desc ? desc : ctx; + vq->split.desc_state[head].dma_map = dma_map; goto end; err: - virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + if (dma_map) + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); kfree(desc); @@ -828,20 +837,23 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, { unsigned int i, j; __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); + bool dma_map; /* Clear data ptr. */ vq->split.desc_state[head].data = NULL; + dma_map = vq->split.desc_state[head].dma_map; + /* Put back on free list: unmap first-level descriptors and find end */ i = head; while (vq->split.vring.desc[i].flags & nextflag) { - vring_unmap_one_split(vq, i); + vring_unmap_one_split(vq, i, dma_map); i = vq->split.desc_extra[i].next; vq->vq.num_free++; } - vring_unmap_one_split(vq, i); + vring_unmap_one_split(vq, i, dma_map); vq->split.desc_extra[i].next = vq->free_head; vq->free_head = head; @@ -863,8 +875,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, VRING_DESC_F_INDIRECT)); BUG_ON(len == 0 || len % sizeof(struct vring_desc)); - for (j = 0; j < len / sizeof(struct vring_desc); j++) - vring_unmap_one_split_indirect(vq, &indir_desc[j]); + if (dma_map) { + for (j = 0; j < len / sizeof(struct vring_desc); j++) + vring_unmap_one_split_indirect(vq, &indir_desc[j]); + } kfree(indir_desc); vq->split.desc_state[head].indir_desc = NULL; @@ -2204,7 +2218,22 @@ static inline int virtqueue_add(struct virtqueue *_vq, return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, out_sgs, in_sgs, data, ctx, gfp) : virtqueue_add_split(_vq, sgs, total_sg, - out_sgs, in_sgs, data, ctx, gfp); + out_sgs, in_sgs, data, ctx, true, gfp); +} + +static inline int virtqueue_add_premapped(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + void *ctx, + gfp_t gfp) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + return virtqueue_add_split(_vq, sgs, total_sg, out_sgs, in_sgs, data, + ctx, false, gfp); } /** @@ -2264,6 +2293,31 @@ int virtqueue_add_outbuf(struct virtqueue *vq, } EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); +/** + * virtqueue_add_outbuf_premapped - expose output buffers with dma address to other end + * @vq: the struct virtqueue we're talking about. + * @sg: scatterlist (must be well-formed and terminated!) + * @num: the number of entries in @sg readable by other side + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * It is required that all addrs have completed DMA operations. And use + * sg->dma_address, sg->length to pass addr and length. + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_outbuf_premapped(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + gfp_t gfp) +{ + return virtqueue_add_premapped(vq, &sg, num, 1, 0, data, NULL, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped); + /** * virtqueue_add_inbuf - expose input buffers to other end * @vq: the struct virtqueue we're talking about. diff --git a/include/linux/virtio.h b/include/linux/virtio.h index dcab9c7e8784..d8b472a7dcae 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -43,6 +43,11 @@ int virtqueue_add_outbuf(struct virtqueue *vq, void *data, gfp_t gfp); +int virtqueue_add_outbuf_premapped(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + gfp_t gfp); + int virtqueue_add_inbuf(struct virtqueue *vq, struct scatterlist sg[], unsigned int num, void *data, -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-02 11:59 UTC
[PATCH vhost v1 04/12] virtio_ring: packed: separate prepare code from virtuque_add_indirect_packed()
Separating the logic of allocating indirect desc and checking queue status to the upper layer function. The proposal of this is convenient to refactor virtqueue_add_packed() for premapped. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 5b186ce73d35..178edf1171e2 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1348,25 +1348,14 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, unsigned int out_sgs, unsigned int in_sgs, void *data, - gfp_t gfp) + struct vring_packed_desc *desc) { - struct vring_packed_desc *desc; struct scatterlist *sg; unsigned int i, n, err_idx; u16 head, id; dma_addr_t addr; head = vq->packed.next_avail_idx; - desc = alloc_indirect_packed(total_sg, gfp); - if (!desc) - return -ENOMEM; - - if (unlikely(vq->vq.num_free < 1)) { - pr_debug("Can't add buf len 1 - avail = 0\n"); - kfree(desc); - END_USE(vq); - return -ENOSPC; - } i = 0; id = vq->free_head; @@ -1488,11 +1477,17 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, BUG_ON(total_sg == 0); if (virtqueue_use_indirect(vq, total_sg)) { - err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, - in_sgs, data, gfp); - if (err != -ENOMEM) { - END_USE(vq); - return err; + desc = alloc_indirect_packed(total_sg, gfp); + if (desc) { + if (unlikely(vq->vq.num_free < 1)) { + pr_debug("Can't add buf len 1 - avail = 0\n"); + kfree(desc); + END_USE(vq); + return -ENOSPC; + } + + return virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, + in_sgs, data, desc); } /* fall back on direct */ -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-02 11:59 UTC
[PATCH vhost v1 05/12] virtio_ring: packed: refactor virtqueue_add_packed() for premapped
This commit splits virtqueue_add_packed() to two functions. The purpose of such splitting is to separate DMA operations. The first function includes all codes that may fail before the DMA operation. The subsequent part is used as the second function. In this way, we can perform DMA operations in the middle of the two functions. If the first function fails, we do not need to perform DMA operations. If it is premapped, we can pass the DMA operation. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 120 +++++++++++++++++++++++------------ 1 file changed, 81 insertions(+), 39 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 178edf1171e2..6796cbee0207 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1347,7 +1347,6 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, unsigned int total_sg, unsigned int out_sgs, unsigned int in_sgs, - void *data, struct vring_packed_desc *desc) { struct scatterlist *sg; @@ -1422,14 +1421,12 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, /* Store token and indirect buffer state. */ vq->packed.desc_state[id].num = 1; - vq->packed.desc_state[id].data = data; vq->packed.desc_state[id].indir_desc = desc; vq->packed.desc_state[id].last = id; vq->num_added += 1; pr_debug("Added buffer head %i to %p\n", head, vq); - END_USE(vq); return 0; @@ -1441,74 +1438,76 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, kfree(desc); - END_USE(vq); return -ENOMEM; } -static inline int virtqueue_add_packed(struct virtqueue *_vq, - struct scatterlist *sgs[], - unsigned int total_sg, - unsigned int out_sgs, - unsigned int in_sgs, - void *data, - void *ctx, - gfp_t gfp) +static inline struct vring_packed_desc *virtqueue_get_desc_packed(struct vring_virtqueue *vq, + unsigned int total_sg, + void *data, + void *ctx, + gfp_t gfp) { - struct vring_virtqueue *vq = to_vvq(_vq); struct vring_packed_desc *desc; - struct scatterlist *sg; - unsigned int i, n, c, descs_used, err_idx; - __le16 head_flags, flags; - u16 head, id, prev, curr, avail_used_flags; - int err; - - START_USE(vq); + unsigned int descs_used; BUG_ON(data == NULL); BUG_ON(ctx && vq->indirect); - if (unlikely(vq->broken)) { - END_USE(vq); - return -EIO; - } + if (unlikely(vq->broken)) + return ERR_PTR(-EIO); LAST_ADD_TIME_UPDATE(vq); BUG_ON(total_sg == 0); + desc = NULL; + if (virtqueue_use_indirect(vq, total_sg)) { desc = alloc_indirect_packed(total_sg, gfp); if (desc) { if (unlikely(vq->vq.num_free < 1)) { pr_debug("Can't add buf len 1 - avail = 0\n"); kfree(desc); - END_USE(vq); - return -ENOSPC; + return ERR_PTR(-ENOSPC); } - return virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, - in_sgs, data, desc); + return NULL; } /* fall back on direct */ } - head = vq->packed.next_avail_idx; - avail_used_flags = vq->packed.avail_used_flags; - WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); - desc = vq->packed.vring.desc; - i = head; descs_used = total_sg; if (unlikely(vq->vq.num_free < descs_used)) { pr_debug("Can't add buf len %i - avail = %i\n", descs_used, vq->vq.num_free); - END_USE(vq); - return -ENOSPC; + return ERR_PTR(-ENOSPC); } + return desc; +} + +static inline int virtqueue_add_vring_packed(struct vring_virtqueue *vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs) +{ + struct vring_packed_desc *desc; + struct scatterlist *sg; + unsigned int i, n, c, descs_used, err_idx; + __le16 head_flags, flags; + u16 head, id, prev, curr, avail_used_flags; + + desc = vq->packed.vring.desc; + head = vq->packed.next_avail_idx; + i = head; + descs_used = total_sg; + avail_used_flags = vq->packed.avail_used_flags; + id = vq->free_head; BUG_ON(id == vq->packed.vring.num); @@ -1563,8 +1562,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, /* Store token. */ vq->packed.desc_state[id].num = descs_used; - vq->packed.desc_state[id].data = data; - vq->packed.desc_state[id].indir_desc = ctx; vq->packed.desc_state[id].last = prev; /* @@ -1577,7 +1574,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, vq->num_added += descs_used; pr_debug("Added buffer head %i to %p\n", head, vq); - END_USE(vq); return 0; @@ -1598,10 +1594,56 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, i = 0; } - END_USE(vq); return -EIO; } +static inline int virtqueue_add_packed(struct virtqueue *_vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + void *ctx, + gfp_t gfp) +{ + + struct vring_virtqueue *vq = to_vvq(_vq); + struct vring_packed_desc *desc; + u16 id; + int err; + + START_USE(vq); + + /* check vq state and try to alloc desc for indirect. */ + desc = virtqueue_get_desc_packed(vq, total_sg, data, ctx, gfp); + if (IS_ERR(desc)) { + err = PTR_ERR(desc); + goto end; + } + + id = vq->free_head; + + if (desc) { + err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, in_sgs, desc); + if (err) + goto err; + } else { + virtqueue_add_vring_packed(vq, sgs, total_sg, out_sgs, in_sgs); + vq->packed.desc_state[id].indir_desc = ctx; + } + + vq->packed.desc_state[id].data = data; + + goto end; + +err: + kfree(desc); + +end: + END_USE(vq); + return err; +} + static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-02 11:59 UTC
[PATCH vhost v1 06/12] virtio_ring: packed: separate DMA codes
DMA-related logic is separated from the virtqueue_add_vring_packed() to prepare for subsequent support for premapped. DMA address will be saved as sg->dma_address, then virtqueue_add_vring_packed() will use it directly. If it is a premapped scene, the transmitted sgs should have saved DMA address in dma_address, and in virtio core, we need to pass virtqueue_map_sgs(). Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 59 ++++++++---------------------------- 1 file changed, 12 insertions(+), 47 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 6796cbee0207..81a9c0692fec 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1350,7 +1350,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, struct vring_packed_desc *desc) { struct scatterlist *sg; - unsigned int i, n, err_idx; + unsigned int i, n; u16 head, id; dma_addr_t addr; @@ -1362,14 +1362,9 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - addr = vring_map_one_sg(vq, sg, n < out_sgs ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) - goto unmap_release; - desc[i].flags = cpu_to_le16(n < out_sgs ? 0 : VRING_DESC_F_WRITE); - desc[i].addr = cpu_to_le64(addr); + desc[i].addr = cpu_to_le64(sg->dma_address); desc[i].len = cpu_to_le32(sg->length); i++; } @@ -1380,7 +1375,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, total_sg * sizeof(struct vring_packed_desc), DMA_TO_DEVICE); if (vring_mapping_error(vq, addr)) - goto unmap_release; + return -ENOMEM; vq->packed.vring.desc[head].addr = cpu_to_le64(addr); vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * @@ -1429,16 +1424,6 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, pr_debug("Added buffer head %i to %p\n", head, vq); return 0; - -unmap_release: - err_idx = i; - - for (i = 0; i < err_idx; i++) - vring_unmap_desc_packed(vq, &desc[i]); - - kfree(desc); - - return -ENOMEM; } static inline struct vring_packed_desc *virtqueue_get_desc_packed(struct vring_virtqueue *vq, @@ -1498,15 +1483,14 @@ static inline int virtqueue_add_vring_packed(struct vring_virtqueue *vq, { struct vring_packed_desc *desc; struct scatterlist *sg; - unsigned int i, n, c, descs_used, err_idx; + unsigned int i, n, c, descs_used; __le16 head_flags, flags; - u16 head, id, prev, curr, avail_used_flags; + u16 head, id, prev, curr; desc = vq->packed.vring.desc; head = vq->packed.next_avail_idx; i = head; descs_used = total_sg; - avail_used_flags = vq->packed.avail_used_flags; id = vq->free_head; BUG_ON(id == vq->packed.vring.num); @@ -1515,11 +1499,6 @@ static inline int virtqueue_add_vring_packed(struct vring_virtqueue *vq, c = 0; for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) - goto unmap_release; - flags = cpu_to_le16(vq->packed.avail_used_flags | (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); @@ -1528,12 +1507,12 @@ static inline int virtqueue_add_vring_packed(struct vring_virtqueue *vq, else desc[i].flags = flags; - desc[i].addr = cpu_to_le64(addr); + desc[i].addr = cpu_to_le64(sg->dma_address); desc[i].len = cpu_to_le32(sg->length); desc[i].id = cpu_to_le16(id); if (unlikely(vq->use_dma_api)) { - vq->packed.desc_extra[curr].addr = addr; + vq->packed.desc_extra[curr].addr = sg->dma_address; vq->packed.desc_extra[curr].len = sg->length; vq->packed.desc_extra[curr].flags le16_to_cpu(flags); @@ -1576,25 +1555,6 @@ static inline int virtqueue_add_vring_packed(struct vring_virtqueue *vq, pr_debug("Added buffer head %i to %p\n", head, vq); return 0; - -unmap_release: - err_idx = i; - i = head; - curr = vq->free_head; - - vq->packed.avail_used_flags = avail_used_flags; - - for (n = 0; n < total_sg; n++) { - if (i == err_idx) - break; - vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); - curr = vq->packed.desc_extra[curr].next; - i++; - if (i >= vq->packed.vring.num) - i = 0; - } - - return -EIO; } static inline int virtqueue_add_packed(struct virtqueue *_vq, @@ -1621,6 +1581,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, goto end; } + err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + if (err) + goto err; + id = vq->free_head; if (desc) { @@ -1637,6 +1601,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, goto end; err: + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); kfree(desc); end: -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-02 11:59 UTC
[PATCH vhost v1 07/12] virtio_ring: packed: introduce virtqueue_add_packed_premapped()
virtqueue_add_packed() only supports virtual addresses, dma is completed in virtqueue_add_packed(). In some scenarios (such as the AF_XDP scenario), the memory is allocated and DMA is completed in advance, so it is necessary for us to support passing the DMA address to virtio core. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 81a9c0692fec..5e98f828236d 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -78,6 +78,7 @@ struct vring_desc_state_packed { struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ u16 num; /* Descriptor list length. */ u16 last; /* The last desc state in a list. */ + bool dma_map; /* Addr is mapped by virtio core or not. */ }; struct vring_desc_extra { @@ -1286,7 +1287,8 @@ static inline u16 packed_last_used(u16 last_used_idx) } static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, - struct vring_desc_extra *extra) + struct vring_desc_extra *extra, + bool dma_map) { u16 flags; @@ -1301,6 +1303,9 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { + if (!dma_map) + return; + dma_unmap_page(vring_dma_dev(vq), extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? @@ -1564,6 +1569,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, unsigned int in_sgs, void *data, void *ctx, + bool dma_map, gfp_t gfp) { @@ -1581,9 +1587,11 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, goto end; } - err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs); - if (err) - goto err; + if (dma_map) { + err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + if (err) + goto err; + } id = vq->free_head; @@ -1597,11 +1605,14 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, } vq->packed.desc_state[id].data = data; + vq->packed.desc_state[id].dma_map = dma_map; goto end; err: - virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + if (dma_map) + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + kfree(desc); end: @@ -1664,8 +1675,10 @@ static void detach_buf_packed(struct vring_virtqueue *vq, struct vring_desc_state_packed *state = NULL; struct vring_packed_desc *desc; unsigned int i, curr; + bool dma_map; state = &vq->packed.desc_state[id]; + dma_map = state->dma_map; /* Clear data ptr. */ state->data = NULL; @@ -1678,7 +1691,8 @@ static void detach_buf_packed(struct vring_virtqueue *vq, curr = id; for (i = 0; i < state->num; i++) { vring_unmap_extra_packed(vq, - &vq->packed.desc_extra[curr]); + &vq->packed.desc_extra[curr], + dma_map); curr = vq->packed.desc_extra[curr].next; } } @@ -1691,7 +1705,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, if (!desc) return; - if (vq->use_dma_api) { + if (vq->use_dma_api && dma_map) { len = vq->packed.desc_extra[id].len; for (i = 0; i < len / sizeof(struct vring_packed_desc); i++) @@ -2218,7 +2232,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, struct vring_virtqueue *vq = to_vvq(_vq); return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, - out_sgs, in_sgs, data, ctx, gfp) : + out_sgs, in_sgs, data, ctx, true, gfp) : virtqueue_add_split(_vq, sgs, total_sg, out_sgs, in_sgs, data, ctx, true, gfp); } @@ -2234,8 +2248,10 @@ static inline int virtqueue_add_premapped(struct virtqueue *_vq, { struct vring_virtqueue *vq = to_vvq(_vq); - return virtqueue_add_split(_vq, sgs, total_sg, out_sgs, in_sgs, data, - ctx, false, gfp); + return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, + out_sgs, in_sgs, data, ctx, false, gfp) : + virtqueue_add_split(_vq, sgs, total_sg, out_sgs, + in_sgs, data, ctx, false, gfp); } /** -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-02 11:59 UTC
[PATCH vhost v1 08/12] virtio_ring: introduce virtqueue_add_inbuf_premapped()
Introduce virtqueue_add_inbuf_premapped() to submit premapped sgs. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 25 +++++++++++++++++++++++++ include/linux/virtio.h | 5 +++++ 2 files changed, 30 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 5e98f828236d..53df12a8ab97 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2382,6 +2382,31 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); +/** + * virtqueue_add_inbuf_premapped - expose input buffers with dma address to other end + * @vq: the struct virtqueue we're talking about. + * @sg: scatterlist (must be well-formed and terminated!) + * @num: the number of entries in @sg writable by other side + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * It is required that all addrs have completed DMA operations. And use + * sg->dma_address, sg->length to pass addr and length. + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_inbuf_premapped(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + gfp_t gfp) +{ + return virtqueue_add_premapped(vq, &sg, num, 0, 1, data, NULL, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_premapped); + /** * virtqueue_kick_prepare - first half of split virtqueue_kick call. * @_vq: the struct virtqueue diff --git a/include/linux/virtio.h b/include/linux/virtio.h index d8b472a7dcae..3ebb346ebb7c 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -59,6 +59,11 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, void *ctx, gfp_t gfp); +int virtqueue_add_inbuf_premapped(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + gfp_t gfp); + int virtqueue_add_sgs(struct virtqueue *vq, struct scatterlist *sgs[], unsigned int out_sgs, -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-02 11:59 UTC
[PATCH vhost v1 09/12] virtio_ring: introduce virtqueue_get_dma_dev()
Added virtqueue_get_dma_dev() to get DMA device for virtio. Then the caller can do dma operation in advance. The purpose is to keep memory mapped across multiple add/get buf operations. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio.c | 6 ++++++ drivers/virtio/virtio_ring.c | 17 +++++++++++++++++ include/linux/virtio.h | 2 ++ 3 files changed, 25 insertions(+) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index b9a80aedee1b..062b6381f5c9 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <linux/dma-mapping.h> #include <linux/virtio.h> #include <linux/spinlock.h> #include <linux/virtio_config.h> @@ -243,6 +244,11 @@ static int virtio_dev_probe(struct device *_d) u64 driver_features; u64 driver_features_legacy; + _d->dma_mask = &_d->coherent_dma_mask; + err = dma_set_mask_and_coherent(_d, DMA_BIT_MASK(64)); + if (err) + return err; + /* We have a driver! */ virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 53df12a8ab97..9b6a0febae58 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2407,6 +2407,23 @@ int virtqueue_add_inbuf_premapped(struct virtqueue *vq, } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_premapped); +/** + * virtqueue_get_dma_dev - expose the dma dev + * @_vq: the struct virtqueue we're talking about. + * + * Returns the dma dev. That can been used for dma api. + */ +struct device *virtqueue_get_dma_dev(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (vq->use_dma_api) + return vring_dma_dev(vq); + else + return &vq->vq.vdev->dev; +} +EXPORT_SYMBOL_GPL(virtqueue_get_dma_dev); + /** * virtqueue_kick_prepare - first half of split virtqueue_kick call. * @_vq: the struct virtqueue diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 3ebb346ebb7c..ac028b07f684 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -71,6 +71,8 @@ int virtqueue_add_sgs(struct virtqueue *vq, void *data, gfp_t gfp); +struct device *virtqueue_get_dma_dev(struct virtqueue *vq); + bool virtqueue_kick(struct virtqueue *vq); bool virtqueue_kick_prepare(struct virtqueue *vq); -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-02 11:59 UTC
[PATCH vhost v1 10/12] virtio_ring: correct the expression of the description of virtqueue_resize()
Modify the "useless" to a more accurate "unused". Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> Acked-by: Jason Wang <jasowang at redhat.com> --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 9b6a0febae58..500cb8563f2b 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2812,7 +2812,7 @@ EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); * virtqueue_resize - resize the vring of vq * @_vq: the struct virtqueue we're talking about. * @num: new ring num - * @recycle: callback for recycle the useless buffer + * @recycle: callback to recycle unused buffers * * When it is really necessary to create a new vring, it will set the current vq * into the reset state. Then call the passed callback to recycle the buffer -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-02 11:59 UTC
[PATCH vhost v1 11/12] virtio_ring: separate the logic of reset/enable from virtqueue_resize
The subsequent reset function will reuse these logic. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 58 ++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 500cb8563f2b..5ab9136a363e 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2215,6 +2215,43 @@ static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num) return -ENOMEM; } +static int virtqueue_disable_and_recycle(struct virtqueue *_vq, + void (*recycle)(struct virtqueue *vq, void *buf)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = vq->vq.vdev; + void *buf; + int err; + + if (!vq->we_own_ring) + return -EPERM; + + if (!vdev->config->disable_vq_and_reset) + return -ENOENT; + + if (!vdev->config->enable_vq_after_reset) + return -ENOENT; + + err = vdev->config->disable_vq_and_reset(_vq); + if (err) + return err; + + while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) + recycle(_vq, buf); + + return 0; +} + +static int virtqueue_enable_after_reset(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = vq->vq.vdev; + + if (vdev->config->enable_vq_after_reset(_vq)) + return -EBUSY; + + return 0; +} /* * Generic functions and exported symbols. @@ -2836,13 +2873,8 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)) { struct vring_virtqueue *vq = to_vvq(_vq); - struct virtio_device *vdev = vq->vq.vdev; - void *buf; int err; - if (!vq->we_own_ring) - return -EPERM; - if (num > vq->vq.num_max) return -E2BIG; @@ -2852,28 +2884,16 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num) return 0; - if (!vdev->config->disable_vq_and_reset) - return -ENOENT; - - if (!vdev->config->enable_vq_after_reset) - return -ENOENT; - - err = vdev->config->disable_vq_and_reset(_vq); + err = virtqueue_disable_and_recycle(_vq, recycle); if (err) return err; - while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) - recycle(_vq, buf); - if (vq->packed_ring) err = virtqueue_resize_packed(_vq, num); else err = virtqueue_resize_split(_vq, num); - if (vdev->config->enable_vq_after_reset(_vq)) - return -EBUSY; - - return err; + return virtqueue_enable_after_reset(_vq); } EXPORT_SYMBOL_GPL(virtqueue_resize); -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-02 11:59 UTC
[PATCH vhost v1 12/12] virtio_ring: introduce virtqueue_reset()
Introduce virtqueue_reset() to release all buffer inside vq. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 33 +++++++++++++++++++++++++++++++++ include/linux/virtio.h | 2 ++ 2 files changed, 35 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 5ab9136a363e..bbec7c6709dc 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2897,6 +2897,39 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, } EXPORT_SYMBOL_GPL(virtqueue_resize); +/** + * virtqueue_reset - detach and recycle all unused buffers + * @_vq: the struct virtqueue we're talking about. + * @recycle: callback to recycle unused buffers + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error. + * 0: success. + * -EBUSY: Failed to sync with device, vq may not work properly + * -ENOENT: Transport or device not supported + * -EPERM: Operation not permitted + */ +int virtqueue_reset(struct virtqueue *_vq, + void (*recycle)(struct virtqueue *vq, void *buf)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + int err; + + err = virtqueue_disable_and_recycle(_vq, recycle); + if (err) + return err; + + if (vq->packed_ring) + virtqueue_reinit_packed(vq); + else + virtqueue_reinit_split(vq); + + return virtqueue_enable_after_reset(_vq); +} +EXPORT_SYMBOL_GPL(virtqueue_reset); + /* Only available for split ring */ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int num, diff --git a/include/linux/virtio.h b/include/linux/virtio.h index ac028b07f684..7e93d0dc973e 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -107,6 +107,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq); int virtqueue_resize(struct virtqueue *vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)); +int virtqueue_reset(struct virtqueue *vq, + void (*recycle)(struct virtqueue *vq, void *buf)); /** * struct virtio_device - representation of a device using virtio -- 2.32.0.3.g01195cf9f