Jason Wang
2023-May-18 07:54 UTC
[PATCH vhost v9 05/12] virtio_ring: split: virtqueue_add_split() support premapped
On Thu, May 18, 2023 at 3:41?PM Xuan Zhuo <xuanzhuo at linux.alibaba.com> wrote:> > On Thu, 18 May 2023 03:11:25 -0400, "Michael S. Tsirkin" <mst at redhat.com> wrote: > > On Thu, May 18, 2023 at 02:51:57PM +0800, Jason Wang wrote: > > > On Wed, May 17, 2023 at 10:23?AM Xuan Zhuo <xuanzhuo at linux.alibaba.com> wrote: > > > > > > > > virtqueue_add_split() only supports virtual addresses, dma is completed > > > > in virtqueue_add_split(). > > > > > > > > In some scenarios (such as the AF_XDP scenario), the memory is allocated > > > > and DMA is completed in advance, so it is necessary for us to support > > > > passing the DMA address to virtqueue_add_split(). > > > > > > > > Record this information in desc_state, we can skip unmap based on this > > > > when executing dma unmap. > > > > > > I would also suggest documenting why a per descriptor metadata is > > > needed instead of a per virtqueue one. > > > > I think we could make it per virtqueue. That would mean all code in > > virtio net would have to change to do dma mapping itself instead of > > relying on virtio core though. Which is maybe a good idea? Definitely a > > very intrusive change though, will need a lot of performance testing > > to make sure we don't break anything. > > In fact, we have tried this idea. > > The problem is the detach and unmap. > > We need to get all DMA Addresses from virtio-ring to unmap. Currently, it does > not support to return the DMA Address,I'm not sure I got here, but we've already stored the DMA address in desc_extra?> and for SKB, we need to get multiple DMA > Addresses at one time.Could you elaborate on this? Thanks> > This need to modify the logic of Virtio-Ring detach. Besides this, I also agree > with this idea. > > Thanks. > > > > > > > > > > > > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> > > > > --- > > > > drivers/virtio/virtio_ring.c | 38 +++++++++++++++++++++++++++--------- > > > > 1 file changed, 29 insertions(+), 9 deletions(-) > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > > > > index e2fc50c05bec..bd5e84afab37 100644 > > > > --- a/drivers/virtio/virtio_ring.c > > > > +++ b/drivers/virtio/virtio_ring.c > > > > @@ -70,6 +70,7 @@ > > > > struct vring_desc_state_split { > > > > void *data; /* Data for callback. */ > > > > struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ > > > > + bool premapped; /* DMA mapping is done by driver. */ > > > > > > Going back to the original discussion around where this should be > > > placed. I wonder if we can find a common place to store this since it > > > has nothing related to virtqueue layout. Maybe desc_extra? And it > > > would be even better if we can avoid stressing the cache like above. > > > > > > > }; > > > > > > > > struct vring_desc_state_packed { > > > > @@ -356,8 +357,14 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq) > > > > > > > > /* Map one sg entry. */ > > > > static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg, > > > > - enum dma_data_direction direction, static dma_addr_t *addr) > > > > + enum dma_data_direction direction, > > > > + bool premapped, dma_addr_t *addr) > > > > > > having things like: > > > > > > int func(bool do) > > > { > > > if (!do) > > > return; > > > } > > > > > > is a hint that the check needs to be done by the caller? > > > > > > And this change should work for both packed and split. I think we need > > > to squash the packed changes here. > > > > > > Looking at how packed virtqueue uses this in this patch, I don't think > > > this patch can even be built. I will wait for a new version and > > > continue the review from there. > > > > > > Thanks > > > > > > > > > > > > > { > > > > + if (premapped) { > > > > + *addr = sg_dma_address(sg); > > > > + return 0; > > > > + } > > > > + > > > > if (!vq->use_dma_api) { > > > > /* > > > > * If DMA is not used, KMSAN doesn't know that the scatterlist > > > > @@ -445,7 +452,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, > > > > } > > > > > > > > static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, > > > > - unsigned int i) > > > > + unsigned int i, bool premapped) > > > > { > > > > struct vring_desc_extra *extra = vq->split.desc_extra; > > > > u16 flags; > > > > @@ -462,6 +469,9 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, > > > > (flags & VRING_DESC_F_WRITE) ? > > > > DMA_FROM_DEVICE : DMA_TO_DEVICE); > > > > } else { > > > > + if (premapped) > > > > + goto out; > > > > + > > > > dma_unmap_page(vring_dma_dev(vq), > > > > extra[i].addr, > > > > extra[i].len, > > > > @@ -532,6 +542,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, > > > > unsigned int in_sgs, > > > > void *data, > > > > void *ctx, > > > > + bool premapped, > > > > gfp_t gfp) > > > > { > > > > struct vring_virtqueue *vq = to_vvq(_vq); > > > > @@ -595,7 +606,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, > > > > for (sg = sgs[n]; sg; sg = sg_next(sg)) { > > > > dma_addr_t addr; > > > > > > > > - if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr)) > > > > + if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, premapped, &addr)) > > > > goto unmap_release; > > > > > > > > prev = i; > > > > @@ -611,7 +622,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, > > > > for (sg = sgs[n]; sg; sg = sg_next(sg)) { > > > > dma_addr_t addr; > > > > > > > > - if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr)) > > > > + if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, premapped, &addr)) > > > > goto unmap_release; > > > > > > > > prev = i; > > > > @@ -657,6 +668,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, > > > > > > > > /* Store token and indirect buffer state. */ > > > > vq->split.desc_state[head].data = data; > > > > + vq->split.desc_state[head].premapped = premapped; > > > > if (indirect) > > > > vq->split.desc_state[head].indir_desc = desc; > > > > else > > > > @@ -686,6 +698,14 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, > > > > return 0; > > > > > > > > unmap_release: > > > > + if (premapped) { > > > > + if (indirect) > > > > + kfree(desc); > > > > + > > > > + END_USE(vq); > > > > + return -ENOMEM; > > > > + } > > > > + > > > > err_idx = i; > > > > > > > > if (indirect) > > > > @@ -700,7 +720,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, > > > > vring_unmap_one_split_indirect(vq, &desc[i]); > > > > i = virtio16_to_cpu(_vq->vdev, desc[i].next); > > > > } else > > > > - i = vring_unmap_one_split(vq, i); > > > > + i = vring_unmap_one_split(vq, i, false); > > > > } > > > > > > > > if (indirect) > > > > @@ -757,12 +777,12 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, > > > > i = head; > > > > > > > > while (vq->split.vring.desc[i].flags & nextflag) { > > > > - vring_unmap_one_split(vq, i); > > > > + vring_unmap_one_split(vq, i, state->premapped); > > > > i = vq->split.desc_extra[i].next; > > > > vq->vq.num_free++; > > > > } > > > > > > > > - vring_unmap_one_split(vq, i); > > > > + vring_unmap_one_split(vq, i, state->premapped); > > > > vq->split.desc_extra[i].next = vq->free_head; > > > > vq->free_head = head; > > > > > > > > @@ -783,7 +803,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, > > > > VRING_DESC_F_INDIRECT)); > > > > BUG_ON(len == 0 || len % sizeof(struct vring_desc)); > > > > > > > > - if (vq->use_dma_api) { > > > > + if (vq->use_dma_api && !state->premapped) { > > > > for (j = 0; j < len / sizeof(struct vring_desc); j++) > > > > vring_unmap_one_split_indirect(vq, &indir_desc[j]); > > > > } > > > > @@ -2143,7 +2163,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, > > > > return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, > > > > out_sgs, in_sgs, data, ctx, gfp) : > > > > virtqueue_add_split(_vq, sgs, total_sg, > > > > - out_sgs, in_sgs, data, ctx, gfp); > > > > + out_sgs, in_sgs, data, ctx, premapped, gfp); > > > > } > > > > > > > > /** > > > > -- > > > > 2.32.0.3.g01195cf9f > > > > > > >
Xuan Zhuo
2023-May-18 07:56 UTC
[PATCH vhost v9 05/12] virtio_ring: split: virtqueue_add_split() support premapped
On Thu, 18 May 2023 15:54:09 +0800, Jason Wang <jasowang at redhat.com> wrote:> On Thu, May 18, 2023 at 3:41?PM Xuan Zhuo <xuanzhuo at linux.alibaba.com> wrote: > > > > On Thu, 18 May 2023 03:11:25 -0400, "Michael S. Tsirkin" <mst at redhat.com> wrote: > > > On Thu, May 18, 2023 at 02:51:57PM +0800, Jason Wang wrote: > > > > On Wed, May 17, 2023 at 10:23?AM Xuan Zhuo <xuanzhuo at linux.alibaba.com> wrote: > > > > > > > > > > virtqueue_add_split() only supports virtual addresses, dma is completed > > > > > in virtqueue_add_split(). > > > > > > > > > > In some scenarios (such as the AF_XDP scenario), the memory is allocated > > > > > and DMA is completed in advance, so it is necessary for us to support > > > > > passing the DMA address to virtqueue_add_split(). > > > > > > > > > > Record this information in desc_state, we can skip unmap based on this > > > > > when executing dma unmap. > > > > > > > > I would also suggest documenting why a per descriptor metadata is > > > > needed instead of a per virtqueue one. > > > > > > I think we could make it per virtqueue. That would mean all code in > > > virtio net would have to change to do dma mapping itself instead of > > > relying on virtio core though. Which is maybe a good idea? Definitely a > > > very intrusive change though, will need a lot of performance testing > > > to make sure we don't break anything. > > > > In fact, we have tried this idea. > > > > The problem is the detach and unmap. > > > > We need to get all DMA Addresses from virtio-ring to unmap. Currently, it does > > not support to return the DMA Address, > > I'm not sure I got here, but we've already stored the DMA address in desc_extra?I mean we need to get the dma address from the virtio-core to virtio-net. Thanks.> > > and for SKB, we need to get multiple DMA > > Addresses at one time. > > Could you elaborate on this? > > Thanks > > > > > This need to modify the logic of Virtio-Ring detach. Besides this, I also agree > > with this idea. > > > > Thanks. > > > > > > > > > > > > > > > > > > > > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> > > > > > --- > > > > > drivers/virtio/virtio_ring.c | 38 +++++++++++++++++++++++++++--------- > > > > > 1 file changed, 29 insertions(+), 9 deletions(-) > > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > > > > > index e2fc50c05bec..bd5e84afab37 100644 > > > > > --- a/drivers/virtio/virtio_ring.c > > > > > +++ b/drivers/virtio/virtio_ring.c > > > > > @@ -70,6 +70,7 @@ > > > > > struct vring_desc_state_split { > > > > > void *data; /* Data for callback. */ > > > > > struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ > > > > > + bool premapped; /* DMA mapping is done by driver. */ > > > > > > > > Going back to the original discussion around where this should be > > > > placed. I wonder if we can find a common place to store this since it > > > > has nothing related to virtqueue layout. Maybe desc_extra? And it > > > > would be even better if we can avoid stressing the cache like above. > > > > > > > > > }; > > > > > > > > > > struct vring_desc_state_packed { > > > > > @@ -356,8 +357,14 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq) > > > > > > > > > > /* Map one sg entry. */ > > > > > static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg, > > > > > - enum dma_data_direction direction, static dma_addr_t *addr) > > > > > + enum dma_data_direction direction, > > > > > + bool premapped, dma_addr_t *addr) > > > > > > > > having things like: > > > > > > > > int func(bool do) > > > > { > > > > if (!do) > > > > return; > > > > } > > > > > > > > is a hint that the check needs to be done by the caller? > > > > > > > > And this change should work for both packed and split. I think we need > > > > to squash the packed changes here. > > > > > > > > Looking at how packed virtqueue uses this in this patch, I don't think > > > > this patch can even be built. I will wait for a new version and > > > > continue the review from there. > > > > > > > > Thanks > > > > > > > > > > > > > > > > > { > > > > > + if (premapped) { > > > > > + *addr = sg_dma_address(sg); > > > > > + return 0; > > > > > + } > > > > > + > > > > > if (!vq->use_dma_api) { > > > > > /* > > > > > * If DMA is not used, KMSAN doesn't know that the scatterlist > > > > > @@ -445,7 +452,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, > > > > > } > > > > > > > > > > static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, > > > > > - unsigned int i) > > > > > + unsigned int i, bool premapped) > > > > > { > > > > > struct vring_desc_extra *extra = vq->split.desc_extra; > > > > > u16 flags; > > > > > @@ -462,6 +469,9 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, > > > > > (flags & VRING_DESC_F_WRITE) ? > > > > > DMA_FROM_DEVICE : DMA_TO_DEVICE); > > > > > } else { > > > > > + if (premapped) > > > > > + goto out; > > > > > + > > > > > dma_unmap_page(vring_dma_dev(vq), > > > > > extra[i].addr, > > > > > extra[i].len, > > > > > @@ -532,6 +542,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, > > > > > unsigned int in_sgs, > > > > > void *data, > > > > > void *ctx, > > > > > + bool premapped, > > > > > gfp_t gfp) > > > > > { > > > > > struct vring_virtqueue *vq = to_vvq(_vq); > > > > > @@ -595,7 +606,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, > > > > > for (sg = sgs[n]; sg; sg = sg_next(sg)) { > > > > > dma_addr_t addr; > > > > > > > > > > - if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr)) > > > > > + if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, premapped, &addr)) > > > > > goto unmap_release; > > > > > > > > > > prev = i; > > > > > @@ -611,7 +622,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, > > > > > for (sg = sgs[n]; sg; sg = sg_next(sg)) { > > > > > dma_addr_t addr; > > > > > > > > > > - if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr)) > > > > > + if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, premapped, &addr)) > > > > > goto unmap_release; > > > > > > > > > > prev = i; > > > > > @@ -657,6 +668,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, > > > > > > > > > > /* Store token and indirect buffer state. */ > > > > > vq->split.desc_state[head].data = data; > > > > > + vq->split.desc_state[head].premapped = premapped; > > > > > if (indirect) > > > > > vq->split.desc_state[head].indir_desc = desc; > > > > > else > > > > > @@ -686,6 +698,14 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, > > > > > return 0; > > > > > > > > > > unmap_release: > > > > > + if (premapped) { > > > > > + if (indirect) > > > > > + kfree(desc); > > > > > + > > > > > + END_USE(vq); > > > > > + return -ENOMEM; > > > > > + } > > > > > + > > > > > err_idx = i; > > > > > > > > > > if (indirect) > > > > > @@ -700,7 +720,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, > > > > > vring_unmap_one_split_indirect(vq, &desc[i]); > > > > > i = virtio16_to_cpu(_vq->vdev, desc[i].next); > > > > > } else > > > > > - i = vring_unmap_one_split(vq, i); > > > > > + i = vring_unmap_one_split(vq, i, false); > > > > > } > > > > > > > > > > if (indirect) > > > > > @@ -757,12 +777,12 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, > > > > > i = head; > > > > > > > > > > while (vq->split.vring.desc[i].flags & nextflag) { > > > > > - vring_unmap_one_split(vq, i); > > > > > + vring_unmap_one_split(vq, i, state->premapped); > > > > > i = vq->split.desc_extra[i].next; > > > > > vq->vq.num_free++; > > > > > } > > > > > > > > > > - vring_unmap_one_split(vq, i); > > > > > + vring_unmap_one_split(vq, i, state->premapped); > > > > > vq->split.desc_extra[i].next = vq->free_head; > > > > > vq->free_head = head; > > > > > > > > > > @@ -783,7 +803,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, > > > > > VRING_DESC_F_INDIRECT)); > > > > > BUG_ON(len == 0 || len % sizeof(struct vring_desc)); > > > > > > > > > > - if (vq->use_dma_api) { > > > > > + if (vq->use_dma_api && !state->premapped) { > > > > > for (j = 0; j < len / sizeof(struct vring_desc); j++) > > > > > vring_unmap_one_split_indirect(vq, &indir_desc[j]); > > > > > } > > > > > @@ -2143,7 +2163,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, > > > > > return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, > > > > > out_sgs, in_sgs, data, ctx, gfp) : > > > > > virtqueue_add_split(_vq, sgs, total_sg, > > > > > - out_sgs, in_sgs, data, ctx, gfp); > > > > > + out_sgs, in_sgs, data, ctx, premapped, gfp); > > > > > } > > > > > > > > > > /** > > > > > -- > > > > > 2.32.0.3.g01195cf9f > > > > > > > > > > >