virtqueue_add() only supports virtual addresses, dma is completed in virtqueue_add(). In some scenarios (such as the AF_XDP scenario), DMA is completed in advance, so it is necessary for us to support passing the DMA address to virtqueue_add(). This patch set stipulates that if sg->dma_address is not NULL, use this address as the DMA address. And record this information in extra->flags, which can be skipped when executing dma unmap. extra->flags |= VRING_DESC_F_PREDMA; But the indirect desc does not have a corresponding extra, so the second and third patches of this patch set are to allocate the corresponding extra while allocating the indirect desc. Each desc must have a corresponding extra because it is possible in an sgs some are advance DMA, while others are virtual addresses. So we must allocate an extra for each indirect desc. Xuan Zhuo (6): virtio: rename vring_unmap_state_packed() to vring_unmap_extra_packed() virtio: split: alloc indirect desc with extra virtio: packed: alloc indirect desc with extra virtio: split: virtqueue_add_split() support dma address virtio: packed: virtqueue_add_packed() support dma address virtio: add api virtio_dma_map() for advance dma drivers/virtio/virtio_ring.c | 387 ++++++++++++++++++++--------------- include/linux/virtio.h | 9 + 2 files changed, 232 insertions(+), 164 deletions(-) -- 2.31.0
Xuan Zhuo
2022-Jan-07 06:33 UTC
[PATCH 1/6] virtio: rename vring_unmap_state_packed() to vring_unmap_extra_packed()
The actual parameter handled by vring_unmap_state_packed() is that vring_desc_extra, so this function should use "extra" instead of "state". Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 028b05d44546..81531cbb08a7 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -984,24 +984,24 @@ static struct virtqueue *vring_create_virtqueue_split( * Packed ring specific functions - *_packed(). */ -static void vring_unmap_state_packed(const struct vring_virtqueue *vq, - struct vring_desc_extra *state) +static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, + struct vring_desc_extra *extra) { u16 flags; if (!vq->use_dma_api) return; - flags = state->flags; + flags = extra->flags; if (flags & VRING_DESC_F_INDIRECT) { dma_unmap_single(vring_dma_dev(vq), - state->addr, state->len, + extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { dma_unmap_page(vring_dma_dev(vq), - state->addr, state->len, + extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } @@ -1301,8 +1301,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, for (n = 0; n < total_sg; n++) { if (i == err_idx) break; - vring_unmap_state_packed(vq, - &vq->packed.desc_extra[curr]); + vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); curr = vq->packed.desc_extra[curr].next; i++; if (i >= vq->packed.vring.num) @@ -1381,8 +1380,8 @@ static void detach_buf_packed(struct vring_virtqueue *vq, if (unlikely(vq->use_dma_api)) { curr = id; for (i = 0; i < state->num; i++) { - vring_unmap_state_packed(vq, - &vq->packed.desc_extra[curr]); + vring_unmap_extra_packed(vq, + &vq->packed.desc_extra[curr]); curr = vq->packed.desc_extra[curr].next; } } -- 2.31.0
In the scenario where indirect is not used, each desc corresponds to an extra, which is used to record information such as dma, flags, and next. In the scenario of using indirect, the assigned desc does not have the corresponding extra record dma information, and the dma information must be obtained from the desc when unmap. This patch allocates the corresponding extra array when indirect desc is allocated. This has these advantages: 1. Record the dma information of desc, no need to read desc when unmap 2. It will be more convenient and unified in processing 3. Some additional information can be recorded in extra, which will be used in subsequent patches. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 197 ++++++++++++++++------------------- 1 file changed, 91 insertions(+), 106 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 81531cbb08a7..64b4d2b03016 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -66,9 +66,21 @@ #define LAST_ADD_TIME_INVALID(vq) #endif +struct vring_desc_extra { + dma_addr_t addr; /* Descriptor DMA addr. */ + u32 len; /* Descriptor length. */ + u16 flags; /* Descriptor flags. */ + u16 next; /* The next desc state in a list. */ +}; + +struct vring_indirect_split { + struct vring_desc_extra *extra; + struct vring_desc desc[]; +}; + struct vring_desc_state_split { void *data; /* Data for callback. */ - struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ + struct vring_indirect_split *in;/* Indirect descriptor, if any. */ }; struct vring_desc_state_packed { @@ -78,13 +90,6 @@ struct vring_desc_state_packed { u16 last; /* The last desc state in a list. */ }; -struct vring_desc_extra { - dma_addr_t addr; /* Descriptor DMA addr. */ - u32 len; /* Descriptor length. */ - u16 flags; /* Descriptor flags. */ - u16 next; /* The next desc state in a list. */ -}; - struct vring_virtqueue { struct virtqueue vq; @@ -369,66 +374,40 @@ static int vring_mapping_error(const struct vring_virtqueue *vq, * Split ring specific functions - *_split(). */ -static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, - struct vring_desc *desc) -{ - u16 flags; - - if (!vq->use_dma_api) - return; - - flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); - - if (flags & VRING_DESC_F_INDIRECT) { - dma_unmap_single(vring_dma_dev(vq), - virtio64_to_cpu(vq->vq.vdev, desc->addr), - virtio32_to_cpu(vq->vq.vdev, desc->len), - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } else { - dma_unmap_page(vring_dma_dev(vq), - virtio64_to_cpu(vq->vq.vdev, desc->addr), - virtio32_to_cpu(vq->vq.vdev, desc->len), - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } -} - static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, - unsigned int i) + struct vring_desc_extra *extra) { - struct vring_desc_extra *extra = vq->split.desc_extra; u16 flags; if (!vq->use_dma_api) goto out; - flags = extra[i].flags; + flags = extra->flags; if (flags & VRING_DESC_F_INDIRECT) { dma_unmap_single(vring_dma_dev(vq), - extra[i].addr, - extra[i].len, + extra->addr, + extra->len, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { dma_unmap_page(vring_dma_dev(vq), - extra[i].addr, - extra[i].len, + extra->addr, + extra->len, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } out: - return extra[i].next; + return extra->next; } -static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, - unsigned int total_sg, - gfp_t gfp) +static struct vring_indirect_split *alloc_indirect_split(struct virtqueue *_vq, + unsigned int total_sg, + gfp_t gfp) { - struct vring_desc *desc; - unsigned int i; + struct vring_indirect_split *in; + unsigned int i, size; /* * We require lowmem mappings for the descriptors because @@ -437,40 +416,52 @@ static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, */ gfp &= ~__GFP_HIGHMEM; - desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); - if (!desc) + size = sizeof(struct vring_desc) + sizeof(struct vring_desc_extra); + size = size * total_sg + sizeof(*in); + + in = kmalloc(size, gfp); + if (!in) return NULL; + in->extra = (struct vring_desc_extra *)(in->desc + total_sg); + for (i = 0; i < total_sg; i++) - desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); - return desc; + in->extra[i].next = i + 1; + + return in; } static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, - struct vring_desc *desc, + struct vring_indirect_split *in, unsigned int i, dma_addr_t addr, unsigned int len, - u16 flags, - bool indirect) + u16 flags) { struct vring_virtqueue *vring = to_vvq(vq); - struct vring_desc_extra *extra = vring->split.desc_extra; + struct vring_desc_extra *extra; + struct vring_desc *desc; u16 next; - desc[i].flags = cpu_to_virtio16(vq->vdev, flags); - desc[i].addr = cpu_to_virtio64(vq->vdev, addr); - desc[i].len = cpu_to_virtio32(vq->vdev, len); + if (!in) { + desc = vring->split.vring.desc + i; + extra = vring->split.desc_extra + i; + + } else { + desc = in->desc + i; + extra = in->extra + i; + } + + next = extra->next; - if (!indirect) { - next = extra[i].next; - desc[i].next = cpu_to_virtio16(vq->vdev, next); + desc->flags = cpu_to_virtio16(vq->vdev, flags); + desc->addr = cpu_to_virtio64(vq->vdev, addr); + desc->len = cpu_to_virtio32(vq->vdev, len); + desc->next = cpu_to_virtio16(vq->vdev, next); - extra[i].addr = addr; - extra[i].len = len; - extra[i].flags = flags; - } else - next = virtio16_to_cpu(vq->vdev, desc[i].next); + extra->addr = addr; + extra->len = len; + extra->flags = flags; return next; } @@ -485,11 +476,11 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, gfp_t gfp) { struct vring_virtqueue *vq = to_vvq(_vq); + struct vring_indirect_split *in = NULL; struct scatterlist *sg; struct vring_desc *desc; unsigned int i, n, avail, descs_used, prev, err_idx; int head; - bool indirect; START_USE(vq); @@ -507,21 +498,21 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, head = vq->free_head; - if (virtqueue_use_indirect(_vq, total_sg)) - desc = alloc_indirect_split(_vq, total_sg, gfp); - else { + if (virtqueue_use_indirect(_vq, total_sg)) { + in = alloc_indirect_split(_vq, total_sg, gfp); + if (in) + desc = in->desc; + } else { desc = NULL; WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); } if (desc) { /* Use a single buffer which doesn't continue */ - indirect = true; /* Set up rest to use this indirect table. */ i = 0; descs_used = 1; } else { - indirect = false; desc = vq->split.vring.desc; i = head; descs_used = total_sg; @@ -535,8 +526,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, * host should service the ring ASAP. */ if (out_sgs) vq->notify(&vq->vq); - if (indirect) - kfree(desc); + kfree(in); END_USE(vq); return -ENOSPC; } @@ -551,9 +541,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, /* Note that we trust indirect descriptor * table since it use stream DMA mapping. */ - i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, - VRING_DESC_F_NEXT, - indirect); + i = virtqueue_add_desc_split(_vq, in, i, addr, sg->length, + VRING_DESC_F_NEXT); } } for (; n < (out_sgs + in_sgs); n++) { @@ -566,20 +555,19 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, /* Note that we trust indirect descriptor * table since it use stream DMA mapping. */ - i = virtqueue_add_desc_split(_vq, desc, i, addr, + i = virtqueue_add_desc_split(_vq, in, i, addr, sg->length, VRING_DESC_F_NEXT | - VRING_DESC_F_WRITE, - indirect); + VRING_DESC_F_WRITE); } } /* Last one doesn't continue. */ desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); - if (!indirect && vq->use_dma_api) + if (!in && vq->use_dma_api) vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags & ~VRING_DESC_F_NEXT; - if (indirect) { + if (in) { /* Now that the indirect table is filled in, map it. */ dma_addr_t addr = vring_map_single( vq, desc, total_sg * sizeof(struct vring_desc), @@ -587,28 +575,26 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, if (vring_mapping_error(vq, addr)) goto unmap_release; - virtqueue_add_desc_split(_vq, vq->split.vring.desc, - head, addr, + virtqueue_add_desc_split(_vq, NULL, head, addr, total_sg * sizeof(struct vring_desc), - VRING_DESC_F_INDIRECT, - false); + VRING_DESC_F_INDIRECT); } /* We're using some buffers from the free list. */ vq->vq.num_free -= descs_used; /* Update free pointer */ - if (indirect) + if (in) vq->free_head = vq->split.desc_extra[head].next; else vq->free_head = i; /* Store token and indirect buffer state. */ vq->split.desc_state[head].data = data; - if (indirect) - vq->split.desc_state[head].indir_desc = desc; + if (in) + vq->split.desc_state[head].in = in; else - vq->split.desc_state[head].indir_desc = ctx; + vq->split.desc_state[head].in = ctx; /* Put entry in available array (but don't update avail->idx until they * do sync). */ @@ -636,7 +622,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, unmap_release: err_idx = i; - if (indirect) + if (in) i = 0; else i = head; @@ -644,15 +630,13 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, for (n = 0; n < total_sg; n++) { if (i == err_idx) break; - if (indirect) { - vring_unmap_one_split_indirect(vq, &desc[i]); - i = virtio16_to_cpu(_vq->vdev, desc[i].next); - } else - i = vring_unmap_one_split(vq, i); + if (in) + i = vring_unmap_one_split(vq, in->extra + i); + else + i = vring_unmap_one_split(vq, vq->split.desc_extra + i); } - if (indirect) - kfree(desc); + kfree(in); END_USE(vq); return -ENOMEM; @@ -702,12 +686,12 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, i = head; while (vq->split.vring.desc[i].flags & nextflag) { - vring_unmap_one_split(vq, i); + vring_unmap_one_split(vq, vq->split.desc_extra + i); i = vq->split.desc_extra[i].next; vq->vq.num_free++; } - vring_unmap_one_split(vq, i); + vring_unmap_one_split(vq, vq->split.desc_extra + i); vq->split.desc_extra[i].next = vq->free_head; vq->free_head = head; @@ -715,12 +699,13 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, vq->vq.num_free++; if (vq->indirect) { - struct vring_desc *indir_desc - vq->split.desc_state[head].indir_desc; + struct vring_indirect_split *in; u32 len; + in = vq->split.desc_state[head].in; + /* Free the indirect table, if any, now that it's unmapped. */ - if (!indir_desc) + if (!in) return; len = vq->split.desc_extra[head].len; @@ -730,12 +715,12 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, BUG_ON(len == 0 || len % sizeof(struct vring_desc)); for (j = 0; j < len / sizeof(struct vring_desc); j++) - vring_unmap_one_split_indirect(vq, &indir_desc[j]); + vring_unmap_one_split(vq, in->extra + j); - kfree(indir_desc); - vq->split.desc_state[head].indir_desc = NULL; + kfree(in); + vq->split.desc_state[head].in = NULL; } else if (ctx) { - *ctx = vq->split.desc_state[head].indir_desc; + *ctx = vq->split.desc_state[head].in; } } -- 2.31.0
Xuan Zhuo
2022-Jan-07 06:33 UTC
[PATCH 3/6] virtio: packed: alloc indirect desc with extra
In the scenario where indirect is not used, each desc corresponds to an extra, which is used to record information such as dma, flags, and next. In the scenario of using indirect, the assigned desc does not have the corresponding extra record dma information, and the dma information must be obtained from the desc when unmap. This patch allocates the corresponding extra array when indirect desc is allocated. This has these advantages: 1. Record the dma information of desc, no need to read desc when unmap 2. It will be more convenient and unified in processing 3. Some additional information can be recorded in extra, which will be used in subsequent patches. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 100 +++++++++++++++++------------------ 1 file changed, 49 insertions(+), 51 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 64b4d2b03016..7420741cb750 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -78,6 +78,11 @@ struct vring_indirect_split { struct vring_desc desc[]; }; +struct vring_indirect_packed { + struct vring_desc_extra *extra; + struct vring_packed_desc desc[]; +}; + struct vring_desc_state_split { void *data; /* Data for callback. */ struct vring_indirect_split *in;/* Indirect descriptor, if any. */ @@ -85,7 +90,7 @@ struct vring_desc_state_split { struct vring_desc_state_packed { void *data; /* Data for callback. */ - struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + struct vring_indirect_packed *in; /* Indirect descriptor, if any. */ u16 num; /* Descriptor list length. */ u16 last; /* The last desc state in a list. */ }; @@ -992,35 +997,11 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, } } -static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, - struct vring_packed_desc *desc) -{ - u16 flags; - - if (!vq->use_dma_api) - return; - - flags = le16_to_cpu(desc->flags); - - if (flags & VRING_DESC_F_INDIRECT) { - dma_unmap_single(vring_dma_dev(vq), - le64_to_cpu(desc->addr), - le32_to_cpu(desc->len), - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } else { - dma_unmap_page(vring_dma_dev(vq), - le64_to_cpu(desc->addr), - le32_to_cpu(desc->len), - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } -} - -static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, - gfp_t gfp) +static struct vring_indirect_packed *alloc_indirect_packed(unsigned int total_sg, + gfp_t gfp) { - struct vring_packed_desc *desc; + struct vring_indirect_packed *in; + int size; /* * We require lowmem mappings for the descriptors because @@ -1029,9 +1010,16 @@ static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, */ gfp &= ~__GFP_HIGHMEM; - desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); + size = sizeof(struct vring_packed_desc) + sizeof(struct vring_desc_extra); + size = size * total_sg + sizeof(*in); + + in = kmalloc(size, gfp); + if (!in) + return NULL; - return desc; + in->extra = (struct vring_desc_extra *)(in->desc + total_sg); + + return in; } static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, @@ -1042,20 +1030,22 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, void *data, gfp_t gfp) { + struct vring_indirect_packed *in; struct vring_packed_desc *desc; + struct vring_desc_extra *extra; struct scatterlist *sg; unsigned int i, n, err_idx; - u16 head, id; + u16 head, id, flags; dma_addr_t addr; head = vq->packed.next_avail_idx; - desc = alloc_indirect_packed(total_sg, gfp); - if (!desc) + in = alloc_indirect_packed(total_sg, gfp); + if (!in) return -ENOMEM; if (unlikely(vq->vq.num_free < 1)) { pr_debug("Can't add buf len 1 - avail = 0\n"); - kfree(desc); + kfree(in); END_USE(vq); return -ENOSPC; } @@ -1071,16 +1061,24 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, if (vring_mapping_error(vq, addr)) goto unmap_release; - desc[i].flags = cpu_to_le16(n < out_sgs ? - 0 : VRING_DESC_F_WRITE); - desc[i].addr = cpu_to_le64(addr); - desc[i].len = cpu_to_le32(sg->length); + flags = n < out_sgs ? 0 : VRING_DESC_F_WRITE; + + desc = in->desc + i; + desc->flags = cpu_to_le16(flags); + desc->addr = cpu_to_le64(addr); + desc->len = cpu_to_le32(sg->length); + + extra = in->extra + i; + extra->addr = addr; + extra->len = sg->length; + extra->flags = flags; + i++; } } /* Now that the indirect table is filled in, map it. */ - addr = vring_map_single(vq, desc, + addr = vring_map_single(vq, in->desc, total_sg * sizeof(struct vring_packed_desc), DMA_TO_DEVICE); if (vring_mapping_error(vq, addr)) @@ -1126,7 +1124,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, /* Store token and indirect buffer state. */ vq->packed.desc_state[id].num = 1; vq->packed.desc_state[id].data = data; - vq->packed.desc_state[id].indir_desc = desc; + vq->packed.desc_state[id].in = in; vq->packed.desc_state[id].last = id; vq->num_added += 1; @@ -1140,9 +1138,9 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, err_idx = i; for (i = 0; i < err_idx; i++) - vring_unmap_desc_packed(vq, &desc[i]); + vring_unmap_extra_packed(vq, in->extra + i); - kfree(desc); + kfree(in); END_USE(vq); return -ENOMEM; @@ -1259,7 +1257,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, /* Store token. */ vq->packed.desc_state[id].num = descs_used; vq->packed.desc_state[id].data = data; - vq->packed.desc_state[id].indir_desc = ctx; + vq->packed.desc_state[id].in = ctx; vq->packed.desc_state[id].last = prev; /* @@ -1350,7 +1348,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, unsigned int id, void **ctx) { struct vring_desc_state_packed *state = NULL; - struct vring_packed_desc *desc; + struct vring_indirect_packed *in; unsigned int i, curr; state = &vq->packed.desc_state[id]; @@ -1375,20 +1373,20 @@ static void detach_buf_packed(struct vring_virtqueue *vq, u32 len; /* Free the indirect table, if any, now that it's unmapped. */ - desc = state->indir_desc; - if (!desc) + in = state->in; + if (!in) return; if (vq->use_dma_api) { len = vq->packed.desc_extra[id].len; for (i = 0; i < len / sizeof(struct vring_packed_desc); i++) - vring_unmap_desc_packed(vq, &desc[i]); + vring_unmap_extra_packed(vq, in->extra + i); } - kfree(desc); - state->indir_desc = NULL; + kfree(in); + state->in = NULL; } else if (ctx) { - *ctx = state->indir_desc; + *ctx = state->in; } } -- 2.31.0
Xuan Zhuo
2022-Jan-07 06:33 UTC
[PATCH 4/6] virtio: split: virtqueue_add_split() support dma address
virtqueue_add_split() only supports virtual addresses, dma is completed in virtqueue_add_split(). In some scenarios (such as the AF_XDP scenario), the memory is allocated and DMA is completed in advance, so it is necessary for us to support passing the DMA address to virtqueue_add_split(). This patch stipulates that if sg->dma_address is not NULL, use this address as the DMA address. And record this information in extra->flags, which can be skipped when executing dma unmap. extra->flags |= VRING_DESC_F_PREDMA; This relies on the previous patch, in the indirect scenario, for each desc allocated, an extra is allocated at the same time. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 7420741cb750..add8430d9678 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -66,6 +66,9 @@ #define LAST_ADD_TIME_INVALID(vq) #endif +/* This means the buffer dma is pre-alloc. Just used by vring_desc_extra */ +#define VRING_DESC_F_PREDMA (1 << 15) + struct vring_desc_extra { dma_addr_t addr; /* Descriptor DMA addr. */ u32 len; /* Descriptor length. */ @@ -336,11 +339,19 @@ static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) return vq->vq.vdev->dev.parent; } +static inline bool sg_is_predma(struct scatterlist *sg) +{ + return !!sg->dma_address; +} + /* Map one sg entry. */ static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg, enum dma_data_direction direction) { + if (sg_is_predma(sg)) + return sg_dma_address(sg); + if (!vq->use_dma_api) return (dma_addr_t)sg_phys(sg); @@ -396,6 +407,9 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { + if (flags & VRING_DESC_F_PREDMA) + goto out; + dma_unmap_page(vring_dma_dev(vq), extra->addr, extra->len, @@ -441,7 +455,8 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, unsigned int i, dma_addr_t addr, unsigned int len, - u16 flags) + u16 flags, + bool predma) { struct vring_virtqueue *vring = to_vvq(vq); struct vring_desc_extra *extra; @@ -468,6 +483,9 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, extra->len = len; extra->flags = flags; + if (predma) + extra->flags |= VRING_DESC_F_PREDMA; + return next; } @@ -547,7 +565,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, * table since it use stream DMA mapping. */ i = virtqueue_add_desc_split(_vq, in, i, addr, sg->length, - VRING_DESC_F_NEXT); + VRING_DESC_F_NEXT, + sg_is_predma(sg)); } } for (; n < (out_sgs + in_sgs); n++) { @@ -563,7 +582,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, i = virtqueue_add_desc_split(_vq, in, i, addr, sg->length, VRING_DESC_F_NEXT | - VRING_DESC_F_WRITE); + VRING_DESC_F_WRITE, + sg_is_predma(sg)); } } /* Last one doesn't continue. */ @@ -582,7 +602,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, virtqueue_add_desc_split(_vq, NULL, head, addr, total_sg * sizeof(struct vring_desc), - VRING_DESC_F_INDIRECT); + VRING_DESC_F_INDIRECT, false); } /* We're using some buffers from the free list. */ -- 2.31.0
Xuan Zhuo
2022-Jan-07 06:33 UTC
[PATCH 5/6] virtio: packed: virtqueue_add_packed() support dma address
virtqueue_add_packed() only supports virtual addresses, dma is completed in virtqueue_add_packed(). In some scenarios (such as the AF_XDP scenario), the memory is allocated and DMA is completed in advance, so it is necessary for us to support passing the DMA address to virtqueue_add_packed(). This patch stipulates that if sg->dma_address is not NULL, use this address as the DMA address. And record this information in extra->flags, which can be skipped when executing dma unmap. extra->flags |= VRING_DESC_F_PREDMA; This relies on the previous patch, in the indirect scenario, for each desc allocated, an extra is allocated at the same time. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index add8430d9678..e165bc2e1344 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1010,6 +1010,9 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { + if (flags & VRING_DESC_F_PREDMA) + return; + dma_unmap_page(vring_dma_dev(vq), extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? @@ -1092,6 +1095,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, extra->addr = addr; extra->len = sg->length; extra->flags = flags; + if (sg_is_predma(sg)) + extra->flags |= VRING_DESC_F_PREDMA; i++; } @@ -1249,9 +1254,14 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, if (unlikely(vq->use_dma_api)) { vq->packed.desc_extra[curr].addr = addr; vq->packed.desc_extra[curr].len = sg->length; - vq->packed.desc_extra[curr].flags - le16_to_cpu(flags); } + + vq->packed.desc_extra[curr].flags = le16_to_cpu(flags); + + if (sg_is_predma(sg)) + vq->packed.desc_extra[curr].flags |+ VRING_DESC_F_PREDMA; + prev = curr; curr = vq->packed.desc_extra[curr].next; -- 2.31.0
Xuan Zhuo
2022-Jan-07 06:33 UTC
[PATCH 6/6] virtio: add api virtio_dma_map() for advance dma
Added virtio_dma_map() to map DMA addresses for virtual memory in advance. The purpose of adding this function is to check vring_use_dma_api() for virtio dma operation and get vdev->dev.parent as the parameter of dma_map_page(). Added virtio_dma_unmap() for unmap DMA address. Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- drivers/virtio/virtio_ring.c | 47 ++++++++++++++++++++++++++++++++++++ include/linux/virtio.h | 9 +++++++ 2 files changed, 56 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index e165bc2e1344..f4a0fb85df27 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2472,4 +2472,51 @@ const struct vring *virtqueue_get_vring(struct virtqueue *vq) } EXPORT_SYMBOL_GPL(virtqueue_get_vring); +/** + * virtio_dma_map - get the DMA addr of the memory for virtio device + * @vdev: virtio device + * @page: the page of the memory to DMA + * @offset: the offset of the memory inside page + * @length: memory length + * @dir: DMA direction + * + * Returns the DMA addr. Zero means error. + */ +dma_addr_t virtio_dma_map(struct virtio_device *vdev, + struct page *page, size_t offset, + unsigned int length, + enum dma_data_direction dir) +{ + dma_addr_t addr; + + if (!vring_use_dma_api(vdev)) + return page_to_phys(page) + offset; + + addr = dma_map_page(vdev->dev.parent, page, offset, length, dir); + + if (dma_mapping_error(vdev->dev.parent, addr)) + return 0; + + return addr; +} +EXPORT_SYMBOL_GPL(virtio_dma_map); + +/** + * virtio_dma_unmap - unmap DMA addr + * @vdev: virtio device + * @dma: DMA address + * @length: memory length + * @dir: DMA direction + */ +void virtio_dma_unmap(struct virtio_device *vdev, + dma_addr_t dma, unsigned int length, + enum dma_data_direction dir) +{ + if (!vring_use_dma_api(vdev)) + return; + + dma_unmap_page(vdev->dev.parent, dma, length, dir); +} +EXPORT_SYMBOL_GPL(virtio_dma_unmap); + MODULE_LICENSE("GPL"); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 41edbc01ffa4..6e6c6e18ecf8 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -9,6 +9,7 @@ #include <linux/device.h> #include <linux/mod_devicetable.h> #include <linux/gfp.h> +#include <linux/dma-mapping.h> /** * virtqueue - a queue to register buffers for sending or receiving. @@ -195,4 +196,12 @@ void unregister_virtio_driver(struct virtio_driver *drv); #define module_virtio_driver(__virtio_driver) \ module_driver(__virtio_driver, register_virtio_driver, \ unregister_virtio_driver) + +dma_addr_t virtio_dma_map(struct virtio_device *vdev, + struct page *page, size_t offset, + unsigned int length, + enum dma_data_direction dir); +void virtio_dma_unmap(struct virtio_device *vdev, + dma_addr_t dma, unsigned int length, + enum dma_data_direction dir); #endif /* _LINUX_VIRTIO_H */ -- 2.31.0
On Fri, Jan 07, 2022 at 02:33:00PM +0800, Xuan Zhuo wrote:> virtqueue_add() only supports virtual addresses, dma is completed in > virtqueue_add(). > > In some scenarios (such as the AF_XDP scenario), DMA is completed in advance, so > it is necessary for us to support passing the DMA address to virtqueue_add(). > > This patch set stipulates that if sg->dma_address is not NULL, use this > address as the DMA address. And record this information in extra->flags, > which can be skipped when executing dma unmap. > > extra->flags |= VRING_DESC_F_PREDMA; > > But the indirect desc does not have a corresponding extra, so the second and > third patches of this patch set are to allocate the corresponding extra while > allocating the indirect desc. Each desc must have a corresponding extra because > it is possible in an sgs some are advance DMA, while others are virtual > addresses. So we must allocate an extra for each indirect desc.I didn't realize AF_XDP didn't have space to stuff the header into. Jason, is that expected? It would be best to fix that, performance is best if header is linear with the data ... Or maybe we can reduce the use of indirect somewhat, at least while the ring is mostly empty?> Xuan Zhuo (6): > virtio: rename vring_unmap_state_packed() to > vring_unmap_extra_packed() > virtio: split: alloc indirect desc with extra > virtio: packed: alloc indirect desc with extra > virtio: split: virtqueue_add_split() support dma address > virtio: packed: virtqueue_add_packed() support dma address > virtio: add api virtio_dma_map() for advance dma > > drivers/virtio/virtio_ring.c | 387 ++++++++++++++++++++--------------- > include/linux/virtio.h | 9 + > 2 files changed, 232 insertions(+), 164 deletions(-) > > -- > 2.31.0