XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero copy feature of xsk (XDP socket) needs to be supported by the driver. The performance of zero copy is very good. ENV: Qemu with vhost. vhost cpu | Guest APP CPU |Guest Softirq CPU | PPS -----------------------------|---------------|------------------|------------ xmit by sockperf: 90% | 100% | | 318967 xmit by xsk: 100% | 30% | 33% | 1192064 recv by sockperf: 100% | 68% | 100% | 692288 recv by xsk: 100% | 33% | 43% | 771670 Before achieving the function of Virtio-Net, we also have to let virtio core support these features: 1. virtio core support premapped 2. virtio core support reset per-queue 3. introduce DMA APIs to virtio core Please review. Thanks. v5: 1. fix for error handler 2. add flags to record internal dma mapping v4: 1. rename map_inter to dma_map_internal 2. fix: Excess function parameter 'vq' description in 'virtqueue_dma_dev' v3: 1. add map_inter to struct desc state to reocrd whether virtio core do dma map v2: 1. based on sgs[0]->dma_address to judgment is premapped 2. based on extra.addr to judgment to do unmap for no-indirect desc 3. based on indir_desc to judgment to do unmap for indirect desc 4. rename virtqueue_get_dma_dev to virtqueue_dma_dev v1: 1. expose dma device. NO introduce the api for dma and sync 2. split some commit for review. Xuan Zhuo (11): virtio_ring: split: separate dma codes virtio_ring: packed: separate dma codes virtio_ring: packed-indirect: separate dma codes virtio_ring: split: support premapped virtio_ring: packed: support premapped virtio_ring: packed-indirect: support premapped virtio_ring: update document for virtqueue_add_* virtio_ring: introduce virtqueue_dma_dev() virtio_ring: correct the expression of the description of virtqueue_resize() virtio_ring: separate the logic of reset/enable from virtqueue_resize virtio_ring: introduce virtqueue_reset() drivers/virtio/virtio.c | 6 + drivers/virtio/virtio_ring.c | 351 +++++++++++++++++++++++++---------- include/linux/virtio.h | 4 + 3 files changed, 264 insertions(+), 97 deletions(-) -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-24 06:30 UTC
[PATCH vhost v5 01/11] virtio_ring: split: separate dma codes
DMA-related logic is separated from the virtqueue_add_split() to one new function. DMA address will be saved as sg->dma_address if use_dma_api is true, then virtqueue_add_split() will use it directly. Unmap operation will be simpler. The purpose of this is to facilitate subsequent support to receive dma address mapped by drivers. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 122 +++++++++++++++++++++++++++-------- 1 file changed, 94 insertions(+), 28 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 41144b5246a8..2aafb7da793d 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -379,6 +379,14 @@ static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, direction); } +static dma_addr_t vring_sg_address(struct scatterlist *sg) +{ + if (sg->dma_address) + return sg->dma_address; + + return (dma_addr_t)sg_phys(sg); +} + static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, void *cpu_addr, size_t size, enum dma_data_direction direction) @@ -520,6 +528,80 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, return next; } +static void virtqueue_unmap_sgs(struct vring_virtqueue *vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs) +{ + struct scatterlist *sg; + unsigned int n; + + if (!vq->use_dma_api) + return; + + for (n = 0; n < out_sgs; n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + if (!sg->dma_address) + return; + + dma_unmap_page(vring_dma_dev(vq), sg->dma_address, + sg->length, DMA_TO_DEVICE); + } + } + + for (; n < (out_sgs + in_sgs); n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + if (!sg->dma_address) + return; + + dma_unmap_page(vring_dma_dev(vq), sg->dma_address, + sg->length, DMA_FROM_DEVICE); + } + } +} + +static int virtqueue_map_sgs(struct vring_virtqueue *vq, + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs) +{ + struct scatterlist *sg; + unsigned int n; + + if (!vq->use_dma_api) + return 0; + + for (n = 0; n < out_sgs; n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); + + if (vring_mapping_error(vq, addr)) + goto err; + + sg->dma_address = addr; + } + } + + for (; n < (out_sgs + in_sgs); n++) { + for (sg = sgs[n]; sg; sg = sg_next(sg)) { + dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); + + if (vring_mapping_error(vq, addr)) + goto err; + + sg->dma_address = addr; + } + } + + return 0; + +err: + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + return -ENOMEM; +} + static inline int virtqueue_add_split(struct virtqueue *_vq, struct scatterlist *sgs[], unsigned int total_sg, @@ -532,9 +614,9 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, struct vring_virtqueue *vq = to_vvq(_vq); struct scatterlist *sg; struct vring_desc *desc; - unsigned int i, n, avail, descs_used, prev, err_idx; - int head; + unsigned int i, n, avail, descs_used, prev; bool indirect; + int head; START_USE(vq); @@ -586,32 +668,30 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, return -ENOSPC; } + if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) + goto err_map; + for (n = 0; n < out_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); - if (vring_mapping_error(vq, addr)) - goto unmap_release; - prev = i; /* Note that we trust indirect descriptor * table since it use stream DMA mapping. */ - i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, + i = virtqueue_add_desc_split(_vq, desc, i, + vring_sg_address(sg), + sg->length, VRING_DESC_F_NEXT, indirect); } } for (; n < (out_sgs + in_sgs); n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) - goto unmap_release; - prev = i; /* Note that we trust indirect descriptor * table since it use stream DMA mapping. */ - i = virtqueue_add_desc_split(_vq, desc, i, addr, + i = virtqueue_add_desc_split(_vq, desc, i, + vring_sg_address(sg), sg->length, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE, @@ -679,23 +759,9 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, return 0; unmap_release: - err_idx = i; - - if (indirect) - i = 0; - else - i = head; - - for (n = 0; n < total_sg; n++) { - if (i == err_idx) - break; - if (indirect) { - vring_unmap_one_split_indirect(vq, &desc[i]); - i = virtio16_to_cpu(_vq->vdev, desc[i].next); - } else - i = vring_unmap_one_split(vq, i); - } + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); +err_map: if (indirect) kfree(desc); -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-24 06:30 UTC
[PATCH vhost v5 02/11] virtio_ring: packed: separate dma codes
DMA-related logic is separated from the virtqueue_add_packed(). DMA address will be saved as sg->dma_address, then virtqueue_add_packed() will use it directly. Unmap operation will be simpler. The purpose of this is to facilitate subsequent support to receive dma address mapped by drivers. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> Acked-by: Jason Wang <jasowang at redhat.com> --- drivers/virtio/virtio_ring.c | 39 +++++++++--------------------------- 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 2aafb7da793d..b1bf7266daa0 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1431,9 +1431,9 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, struct vring_virtqueue *vq = to_vvq(_vq); struct vring_packed_desc *desc; struct scatterlist *sg; - unsigned int i, n, c, descs_used, err_idx; + unsigned int i, n, c, descs_used; __le16 head_flags, flags; - u16 head, id, prev, curr, avail_used_flags; + u16 head, id, prev, curr; int err; START_USE(vq); @@ -1462,7 +1462,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, } head = vq->packed.next_avail_idx; - avail_used_flags = vq->packed.avail_used_flags; WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); @@ -1480,15 +1479,15 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, id = vq->free_head; BUG_ON(id == vq->packed.vring.num); + if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) { + END_USE(vq); + return -EIO; + } + curr = id; c = 0; for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) - goto unmap_release; - flags = cpu_to_le16(vq->packed.avail_used_flags | (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); @@ -1497,12 +1496,12 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, else desc[i].flags = flags; - desc[i].addr = cpu_to_le64(addr); + desc[i].addr = cpu_to_le64(vring_sg_address(sg)); desc[i].len = cpu_to_le32(sg->length); desc[i].id = cpu_to_le16(id); if (unlikely(vq->use_dma_api)) { - vq->packed.desc_extra[curr].addr = addr; + vq->packed.desc_extra[curr].addr = vring_sg_address(sg); vq->packed.desc_extra[curr].len = sg->length; vq->packed.desc_extra[curr].flags le16_to_cpu(flags); @@ -1548,26 +1547,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, END_USE(vq); return 0; - -unmap_release: - err_idx = i; - i = head; - curr = vq->free_head; - - vq->packed.avail_used_flags = avail_used_flags; - - for (n = 0; n < total_sg; n++) { - if (i == err_idx) - break; - vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); - curr = vq->packed.desc_extra[curr].next; - i++; - if (i >= vq->packed.vring.num) - i = 0; - } - - END_USE(vq); - return -EIO; } static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-24 06:30 UTC
[PATCH vhost v5 03/11] virtio_ring: packed-indirect: separate dma codes
DMA-related logic is separated from the virtqueue_add_indirect_packed(). DMA address will be saved as sg->dma_address, then virtqueue_add_indirect_packed() will use it directly. Unmap operation will be simpler. The purpose of this is to facilitate subsequent support to receive dma address mapped by drivers. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> Acked-by: Jason Wang <jasowang at redhat.com> --- drivers/virtio/virtio_ring.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b1bf7266daa0..3ada30b475d2 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1315,7 +1315,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, { struct vring_packed_desc *desc; struct scatterlist *sg; - unsigned int i, n, err_idx; + unsigned int i, n; u16 head, id; dma_addr_t addr; @@ -1335,16 +1335,14 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, id = vq->free_head; BUG_ON(id == vq->packed.vring.num); + if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) + goto err_map; + for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { - addr = vring_map_one_sg(vq, sg, n < out_sgs ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (vring_mapping_error(vq, addr)) - goto unmap_release; - desc[i].flags = cpu_to_le16(n < out_sgs ? 0 : VRING_DESC_F_WRITE); - desc[i].addr = cpu_to_le64(addr); + desc[i].addr = cpu_to_le64(vring_sg_address(sg)); desc[i].len = cpu_to_le32(sg->length); i++; } @@ -1408,11 +1406,9 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, return 0; unmap_release: - err_idx = i; - - for (i = 0; i < err_idx; i++) - vring_unmap_desc_packed(vq, &desc[i]); + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); +err_map: kfree(desc); END_USE(vq); -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-24 06:30 UTC
[PATCH vhost v5 04/11] virtio_ring: split: support premapped
virtio core only supports virtual addresses, dma is completed in virtio core. In some scenarios (such as the AF_XDP), the memory is allocated and DMA mapping is completed in advance, so it is necessary for us to support passing the DMA address to virtio core. Drives can use sg->dma_address to pass the mapped dma address to virtio core. If one sg->dma_address is used then all sgs must use sg->dma_address, otherwise all must be null when passing it to the APIs of virtio. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 3ada30b475d2..1c3084a8f4e3 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -67,9 +67,12 @@ #define LAST_ADD_TIME_INVALID(vq) #endif +#define VRING_STATE_F_MAP_INTERNAL BIT(0) + struct vring_desc_state_split { void *data; /* Data for callback. */ struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ + u64 flags; /* State flags. */ }; struct vring_desc_state_packed { @@ -448,7 +451,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, } static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, - unsigned int i) + unsigned int i, bool dma_map_internal) { struct vring_desc_extra *extra = vq->split.desc_extra; u16 flags; @@ -465,6 +468,9 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { + if (!dma_map_internal) + goto out; + dma_unmap_page(vring_dma_dev(vq), extra[i].addr, extra[i].len, @@ -615,7 +621,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, struct scatterlist *sg; struct vring_desc *desc; unsigned int i, n, avail, descs_used, prev; - bool indirect; + bool indirect, dma_map_internal; int head; START_USE(vq); @@ -668,7 +674,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, return -ENOSPC; } - if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) + dma_map_internal = !sgs[0]->dma_address; + if (dma_map_internal && virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) goto err_map; for (n = 0; n < out_sgs; n++) { @@ -735,6 +742,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, else vq->split.desc_state[head].indir_desc = ctx; + vq->split.desc_state[head].flags = dma_map_internal ? VRING_STATE_F_MAP_INTERNAL : 0; + /* Put entry in available array (but don't update avail->idx until they * do sync). */ avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); @@ -759,7 +768,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, return 0; unmap_release: - virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + if (dma_map_internal) + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); err_map: if (indirect) @@ -805,20 +815,22 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, { unsigned int i, j; __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); + bool dma_map_internal; /* Clear data ptr. */ vq->split.desc_state[head].data = NULL; + dma_map_internal = !!(vq->split.desc_state[head].flags & VRING_STATE_F_MAP_INTERNAL); /* Put back on free list: unmap first-level descriptors and find end */ i = head; while (vq->split.vring.desc[i].flags & nextflag) { - vring_unmap_one_split(vq, i); + vring_unmap_one_split(vq, i, dma_map_internal); i = vq->split.desc_extra[i].next; vq->vq.num_free++; } - vring_unmap_one_split(vq, i); + vring_unmap_one_split(vq, i, dma_map_internal); vq->split.desc_extra[i].next = vq->free_head; vq->free_head = head; @@ -840,8 +852,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, VRING_DESC_F_INDIRECT)); BUG_ON(len == 0 || len % sizeof(struct vring_desc)); - for (j = 0; j < len / sizeof(struct vring_desc); j++) - vring_unmap_one_split_indirect(vq, &indir_desc[j]); + if (dma_map_internal) { + for (j = 0; j < len / sizeof(struct vring_desc); j++) + vring_unmap_one_split_indirect(vq, &indir_desc[j]); + } kfree(indir_desc); vq->split.desc_state[head].indir_desc = NULL; -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-24 06:30 UTC
[PATCH vhost v5 05/11] virtio_ring: packed: support premapped
virtio core only supports virtual addresses, dma is completed in virtio core. In some scenarios (such as the AF_XDP), the memory is allocated and DMA mapping is completed in advance, so it is necessary for us to support passing the DMA address to virtio core. Drives can use sg->dma_address to pass the mapped dma address to virtio core. If one sg->dma_address is used then all sgs must use sg->dma_address, otherwise all must be null when passing it to the APIs of virtio. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 1c3084a8f4e3..df6d514a681a 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -78,6 +78,7 @@ struct vring_desc_state_split { struct vring_desc_state_packed { void *data; /* Data for callback. */ struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u64 flags; /* State flags. */ u16 num; /* Descriptor list length. */ u16 last; /* The last desc state in a list. */ }; @@ -1263,7 +1264,8 @@ static inline u16 packed_last_used(u16 last_used_idx) } static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, - struct vring_desc_extra *extra) + struct vring_desc_extra *extra, + bool dma_map_internal) { u16 flags; @@ -1278,6 +1280,9 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { + if (!dma_map_internal) + return; + dma_unmap_page(vring_dma_dev(vq), extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? @@ -1444,6 +1449,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, unsigned int i, n, c, descs_used; __le16 head_flags, flags; u16 head, id, prev, curr; + bool dma_map_internal; int err; START_USE(vq); @@ -1489,7 +1495,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, id = vq->free_head; BUG_ON(id == vq->packed.vring.num); - if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) { + dma_map_internal = !sgs[0]->dma_address; + if (dma_map_internal && virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) { END_USE(vq); return -EIO; } @@ -1543,6 +1550,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, vq->packed.desc_state[id].data = data; vq->packed.desc_state[id].indir_desc = ctx; vq->packed.desc_state[id].last = prev; + vq->packed.desc_state[id].flags = dma_map_internal ? VRING_STATE_F_MAP_INTERNAL : 0; /* * A driver MUST NOT make the first descriptor in the list @@ -1614,8 +1622,10 @@ static void detach_buf_packed(struct vring_virtqueue *vq, struct vring_desc_state_packed *state = NULL; struct vring_packed_desc *desc; unsigned int i, curr; + bool dma_map_internal; state = &vq->packed.desc_state[id]; + dma_map_internal = !!(state->flags & VRING_STATE_F_MAP_INTERNAL); /* Clear data ptr. */ state->data = NULL; @@ -1628,7 +1638,8 @@ static void detach_buf_packed(struct vring_virtqueue *vq, curr = id; for (i = 0; i < state->num; i++) { vring_unmap_extra_packed(vq, - &vq->packed.desc_extra[curr]); + &vq->packed.desc_extra[curr], + dma_map_internal); curr = vq->packed.desc_extra[curr].next; } } -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-24 06:30 UTC
[PATCH vhost v5 06/11] virtio_ring: packed-indirect: support premapped
virtio core only supports virtual addresses, dma is completed in virtio core. In some scenarios (such as the AF_XDP), the memory is allocated and DMA mapping is completed in advance, so it is necessary for us to support passing the DMA address to virtio core. Drives can use sg->dma_address to pass the mapped dma address to virtio core. If one sg->dma_address is used then all sgs must use sg->dma_address, otherwise all dma_address must be null when passing it to the APIs of virtio. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index df6d514a681a..511d791ba9cc 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1337,6 +1337,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, unsigned int i, n; u16 head, id; dma_addr_t addr; + bool dma_map_internal; head = vq->packed.next_avail_idx; desc = alloc_indirect_packed(total_sg, gfp); @@ -1354,7 +1355,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, id = vq->free_head; BUG_ON(id == vq->packed.vring.num); - if (virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) + dma_map_internal = !sgs[0]->dma_address; + if (dma_map_internal && virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs)) goto err_map; for (n = 0; n < out_sgs + in_sgs; n++) { @@ -1416,6 +1418,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, vq->packed.desc_state[id].data = data; vq->packed.desc_state[id].indir_desc = desc; vq->packed.desc_state[id].last = id; + vq->packed.desc_state[id].flags = dma_map_internal ? VRING_STATE_F_MAP_INTERNAL : 0; + vq->num_added += 1; @@ -1425,7 +1429,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, return 0; unmap_release: - virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); + if (dma_map_internal) + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); err_map: kfree(desc); @@ -1652,7 +1657,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, if (!desc) return; - if (vq->use_dma_api) { + if (vq->use_dma_api && dma_map_internal) { len = vq->packed.desc_extra[id].len; for (i = 0; i < len / sizeof(struct vring_packed_desc); i++) -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-24 06:30 UTC
[PATCH vhost v5 07/11] virtio_ring: update document for virtqueue_add_*
Update the document of virtqueue_add_* series API, allowing the callers to use sg->dma_address to pass the dma address to Virtio Core. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 511d791ba9cc..fab956784564 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2201,6 +2201,10 @@ static inline int virtqueue_add(struct virtqueue *_vq, * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_sgs(struct virtqueue *_vq, @@ -2235,6 +2239,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_sgs); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_outbuf(struct virtqueue *vq, @@ -2257,6 +2265,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_inbuf(struct virtqueue *vq, @@ -2280,6 +2292,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * + * If the caller has done dma map then use sg->dma_address to pass dma address. + * If one sg->dma_address is used, then all sgs must use sg->dma_address; + * otherwise all sg->dma_address must be NULL. + * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-24 06:30 UTC
[PATCH vhost v5 08/11] virtio_ring: introduce virtqueue_dma_dev()
Added virtqueue_dma_dev() to get DMA device for virtio. Then the caller can do dma operation in advance. The purpose is to keep memory mapped across multiple add/get buf operations. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> Acked-by: Jason Wang <jasowang at redhat.com> --- drivers/virtio/virtio.c | 6 ++++++ drivers/virtio/virtio_ring.c | 17 +++++++++++++++++ include/linux/virtio.h | 2 ++ 3 files changed, 25 insertions(+) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 3893dc29eb26..11c5035369e2 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <linux/dma-mapping.h> #include <linux/virtio.h> #include <linux/spinlock.h> #include <linux/virtio_config.h> @@ -243,6 +244,11 @@ static int virtio_dev_probe(struct device *_d) u64 driver_features; u64 driver_features_legacy; + _d->dma_mask = &_d->coherent_dma_mask; + err = dma_set_mask_and_coherent(_d, DMA_BIT_MASK(64)); + if (err) + return err; + /* We have a driver! */ virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index fab956784564..e47f55cd125b 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2308,6 +2308,23 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); +/** + * virtqueue_dma_dev - get the dma dev + * @_vq: the struct virtqueue we're talking about. + * + * Returns the dma dev. That can been used for dma api. + */ +struct device *virtqueue_dma_dev(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + if (vq->use_dma_api) + return vring_dma_dev(vq); + else + return &vq->vq.vdev->dev; +} +EXPORT_SYMBOL_GPL(virtqueue_dma_dev); + /** * virtqueue_kick_prepare - first half of split virtqueue_kick call. * @_vq: the struct virtqueue diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 2b472514c49b..1fa50191cf0a 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -61,6 +61,8 @@ int virtqueue_add_sgs(struct virtqueue *vq, void *data, gfp_t gfp); +struct device *virtqueue_dma_dev(struct virtqueue *vq); + bool virtqueue_kick(struct virtqueue *vq); bool virtqueue_kick_prepare(struct virtqueue *vq); -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-24 06:30 UTC
[PATCH vhost v5 09/11] virtio_ring: correct the expression of the description of virtqueue_resize()
Modify the "useless" to a more accurate "unused". Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> Acked-by: Jason Wang <jasowang at redhat.com> --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index e47f55cd125b..14035bd8487e 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2713,7 +2713,7 @@ EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); * virtqueue_resize - resize the vring of vq * @_vq: the struct virtqueue we're talking about. * @num: new ring num - * @recycle: callback for recycle the useless buffer + * @recycle: callback to recycle unused buffers * * When it is really necessary to create a new vring, it will set the current vq * into the reset state. Then call the passed callback to recycle the buffer -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-24 06:30 UTC
[PATCH vhost v5 10/11] virtio_ring: separate the logic of reset/enable from virtqueue_resize
The subsequent reset function will reuse these logic. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> Acked-by: Jason Wang <jasowang at redhat.com> --- drivers/virtio/virtio_ring.c | 58 ++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 14035bd8487e..dd6a7abeba4d 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2167,6 +2167,43 @@ static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num) return -ENOMEM; } +static int virtqueue_disable_and_recycle(struct virtqueue *_vq, + void (*recycle)(struct virtqueue *vq, void *buf)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = vq->vq.vdev; + void *buf; + int err; + + if (!vq->we_own_ring) + return -EPERM; + + if (!vdev->config->disable_vq_and_reset) + return -ENOENT; + + if (!vdev->config->enable_vq_after_reset) + return -ENOENT; + + err = vdev->config->disable_vq_and_reset(_vq); + if (err) + return err; + + while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) + recycle(_vq, buf); + + return 0; +} + +static int virtqueue_enable_after_reset(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = vq->vq.vdev; + + if (vdev->config->enable_vq_after_reset(_vq)) + return -EBUSY; + + return 0; +} /* * Generic functions and exported symbols. @@ -2737,13 +2774,8 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)) { struct vring_virtqueue *vq = to_vvq(_vq); - struct virtio_device *vdev = vq->vq.vdev; - void *buf; int err; - if (!vq->we_own_ring) - return -EPERM; - if (num > vq->vq.num_max) return -E2BIG; @@ -2753,28 +2785,16 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num) return 0; - if (!vdev->config->disable_vq_and_reset) - return -ENOENT; - - if (!vdev->config->enable_vq_after_reset) - return -ENOENT; - - err = vdev->config->disable_vq_and_reset(_vq); + err = virtqueue_disable_and_recycle(_vq, recycle); if (err) return err; - while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) - recycle(_vq, buf); - if (vq->packed_ring) err = virtqueue_resize_packed(_vq, num); else err = virtqueue_resize_split(_vq, num); - if (vdev->config->enable_vq_after_reset(_vq)) - return -EBUSY; - - return err; + return virtqueue_enable_after_reset(_vq); } EXPORT_SYMBOL_GPL(virtqueue_resize); -- 2.32.0.3.g01195cf9f
Xuan Zhuo
2023-Mar-24 06:30 UTC
[PATCH vhost v5 11/11] virtio_ring: introduce virtqueue_reset()
Introduce virtqueue_reset() to release all buffer inside vq. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> Acked-by: Jason Wang <jasowang at redhat.com> --- drivers/virtio/virtio_ring.c | 33 +++++++++++++++++++++++++++++++++ include/linux/virtio.h | 2 ++ 2 files changed, 35 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index dd6a7abeba4d..3a210b58cac3 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2798,6 +2798,39 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, } EXPORT_SYMBOL_GPL(virtqueue_resize); +/** + * virtqueue_reset - detach and recycle all unused buffers + * @_vq: the struct virtqueue we're talking about. + * @recycle: callback to recycle unused buffers + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error. + * 0: success. + * -EBUSY: Failed to sync with device, vq may not work properly + * -ENOENT: Transport or device not supported + * -EPERM: Operation not permitted + */ +int virtqueue_reset(struct virtqueue *_vq, + void (*recycle)(struct virtqueue *vq, void *buf)) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + int err; + + err = virtqueue_disable_and_recycle(_vq, recycle); + if (err) + return err; + + if (vq->packed_ring) + virtqueue_reinit_packed(vq); + else + virtqueue_reinit_split(vq); + + return virtqueue_enable_after_reset(_vq); +} +EXPORT_SYMBOL_GPL(virtqueue_reset); + /* Only available for split ring */ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int num, diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 1fa50191cf0a..22bbd06ef8c8 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -97,6 +97,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq); int virtqueue_resize(struct virtqueue *vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)); +int virtqueue_reset(struct virtqueue *vq, + void (*recycle)(struct virtqueue *vq, void *buf)); /** * struct virtio_device - representation of a device using virtio -- 2.32.0.3.g01195cf9f
Possibly Parallel Threads
- [PATCH vhost v6 00/11] virtio core prepares for AF_XDP
- [PATCH vhost v8 00/12] virtio core prepares for AF_XDP
- [PATCH vhost v7 00/11] virtio core prepares for AF_XDP
- [PATCH vhost v4 00/11] virtio core prepares for AF_XDP
- [PATCH vhost v4 04/11] virtio_ring: split: support premapped