thr3ads.net - Virtualization - [RFC PATCH v4 20/20] vdpa: Add custom IOTLB translations to SVQ [Oct 2021]

If this information is useful, please help other people find it:
Share via:
Jason Wang
2021-Oct-21 02:38 UTC
[RFC PATCH v4 20/20] vdpa: Add custom IOTLB translations to SVQ

On Wed, Oct 20, 2021 at 7:57 PM Eugenio Perez Martin
<eperezma at redhat.com> wrote:>
> On Wed, Oct 20, 2021 at 11:03 AM Jason Wang <jasowang at redhat.com>
wrote:
> >
> > On Wed, Oct 20, 2021 at 2:52 PM Eugenio Perez Martin
> > <eperezma at redhat.com> wrote:
> > >
> > > On Wed, Oct 20, 2021 at 4:07 AM Jason Wang <jasowang at
redhat.com> wrote:
> > > >
> > > > On Wed, Oct 20, 2021 at 10:02 AM Jason Wang <jasowang at
redhat.com> wrote:
> > > > >
> > > > > On Tue, Oct 19, 2021 at 6:29 PM Eugenio Perez Martin
> > > > > <eperezma at redhat.com> wrote:
> > > > > >
> > > > > > On Tue, Oct 19, 2021 at 11:25 AM Jason Wang
<jasowang at redhat.com> wrote:
> > > > > > >
> > > > > > >
> > > > > > > ? 2021/10/1 ??3:06, Eugenio P?rez ??:
> > > > > > > > Use translations added in VhostIOVATree
in SVQ.
> > > > > > > >
> > > > > > > > Now every element needs to store the
previous address also, so VirtQueue
> > > > > > > > can consume the elements properly. This
adds a little overhead per VQ
> > > > > > > > element, having to allocate more memory
to stash them. As a possible
> > > > > > > > optimization, this allocation could be
avoided if the descriptor is not
> > > > > > > > a chain but a single one, but this is
left undone.
> > > > > > > >
> > > > > > > > TODO: iova range should be queried
before, and add logic to fail when
> > > > > > > > GPA is outside of its range and memory
listener or svq add it.
> > > > > > > >
> > > > > > > > Signed-off-by: Eugenio P?rez
<eperezma at redhat.com>
> > > > > > > > ---
> > > > > > > >   hw/virtio/vhost-shadow-virtqueue.h |  
4 +-
> > > > > > > >   hw/virtio/vhost-shadow-virtqueue.c |
130 ++++++++++++++++++++++++-----
> > > > > > > >   hw/virtio/vhost-vdpa.c             | 
40 ++++++++-
> > > > > > > >   hw/virtio/trace-events             |  
1 +
> > > > > > > >   4 files changed, 152 insertions(+), 23
deletions(-)
> > > > > > > >
> > > > > > > > diff --git
a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
> > > > > > > > index b7baa424a7..a0e6b5267a 100644
> > > > > > > > --- a/hw/virtio/vhost-shadow-virtqueue.h
> > > > > > > > +++ b/hw/virtio/vhost-shadow-virtqueue.h
> > > > > > > > @@ -11,6 +11,7 @@
> > > > > > > >   #define VHOST_SHADOW_VIRTQUEUE_H
> > > > > > > >
> > > > > > > >   #include "hw/virtio/vhost.h"
> > > > > > > > +#include
"hw/virtio/vhost-iova-tree.h"
> > > > > > > >
> > > > > > > >   typedef struct VhostShadowVirtqueue
VhostShadowVirtqueue;
> > > > > > > >
> > > > > > > > @@ -28,7 +29,8 @@ bool
vhost_svq_start(struct vhost_dev *dev, unsigned idx,
> > > > > > > >   void vhost_svq_stop(struct vhost_dev
*dev, unsigned idx,
> > > > > > > >                      
VhostShadowVirtqueue *svq);
> > > > > > > >
> > > > > > > > -VhostShadowVirtqueue
*vhost_svq_new(struct vhost_dev *dev, int idx);
> > > > > > > > +VhostShadowVirtqueue
*vhost_svq_new(struct vhost_dev *dev, int idx,
> > > > > > > > +                                   
VhostIOVATree *iova_map);
> > > > > > > >
> > > > > > > >   void
vhost_svq_free(VhostShadowVirtqueue *vq);
> > > > > > > >
> > > > > > > > diff --git
a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> > > > > > > > index 2fd0bab75d..9db538547e 100644
> > > > > > > > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > > > > > > > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > > > > > > > @@ -11,12 +11,19 @@
> > > > > > > >   #include
"hw/virtio/vhost-shadow-virtqueue.h"
> > > > > > > >   #include "hw/virtio/vhost.h"
> > > > > > > >   #include
"hw/virtio/virtio-access.h"
> > > > > > > > +#include
"hw/virtio/vhost-iova-tree.h"
> > > > > > > >
> > > > > > > >   #include
"standard-headers/linux/vhost_types.h"
> > > > > > > >
> > > > > > > >   #include
"qemu/error-report.h"
> > > > > > > >   #include "qemu/main-loop.h"
> > > > > > > >
> > > > > > > > +typedef struct SVQElement {
> > > > > > > > +    VirtQueueElement elem;
> > > > > > > > +    void **in_sg_stash;
> > > > > > > > +    void **out_sg_stash;
> > > > > > > > +} SVQElement;
> > > > > > > > +
> > > > > > > >   /* Shadow virtqueue to relay
notifications */
> > > > > > > >   typedef struct VhostShadowVirtqueue {
> > > > > > > >       /* Shadow vring */
> > > > > > > > @@ -46,8 +53,11 @@ typedef struct
VhostShadowVirtqueue {
> > > > > > > >       /* Virtio device */
> > > > > > > >       VirtIODevice *vdev;
> > > > > > > >
> > > > > > > > +    /* IOVA mapping if used */
> > > > > > > > +    VhostIOVATree *iova_map;
> > > > > > > > +
> > > > > > > >       /* Map for returning guest's
descriptors */
> > > > > > > > -    VirtQueueElement **ring_id_maps;
> > > > > > > > +    SVQElement **ring_id_maps;
> > > > > > > >
> > > > > > > >       /* Next head to expose to device
*/
> > > > > > > >       uint16_t avail_idx_shadow;
> > > > > > > > @@ -79,13 +89,6 @@ bool
vhost_svq_valid_device_features(uint64_t *dev_features)
> > > > > > > >               continue;
> > > > > > > >
> > > > > > > >           case VIRTIO_F_ACCESS_PLATFORM:
> > > > > > > > -            /* SVQ needs this feature
disabled. Can't continue */
> > > > > > > > -            if (*dev_features &
BIT_ULL(b)) {
> > > > > > > > -                clear_bit(b,
dev_features);
> > > > > > > > -                r = false;
> > > > > > > > -            }
> > > > > > > > -            break;
> > > > > > > > -
> > > > > > > >           case VIRTIO_F_VERSION_1:
> > > > > > > >               /* SVQ needs this feature,
so can't continue */
> > > > > > > >               if (!(*dev_features &
BIT_ULL(b))) {
> > > > > > > > @@ -126,6 +129,64 @@ static void
vhost_svq_set_notification(VhostShadowVirtqueue *svq, bool enable)
> > > > > > > >       }
> > > > > > > >   }
> > > > > > > >
> > > > > > > > +static void vhost_svq_stash_addr(void
***stash, const struct iovec *iov,
> > > > > > > > +                                 size_t
num)
> > > > > > > > +{
> > > > > > > > +    size_t i;
> > > > > > > > +
> > > > > > > > +    if (num == 0) {
> > > > > > > > +        return;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    *stash = g_new(void *, num);
> > > > > > > > +    for (i = 0; i < num; ++i) {
> > > > > > > > +        (*stash)[i] = iov[i].iov_base;
> > > > > > > > +    }
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void vhost_svq_unstash_addr(void
**stash, struct iovec *iov, size_t num)
> > > > > > > > +{
> > > > > > > > +    size_t i;
> > > > > > > > +
> > > > > > > > +    if (num == 0) {
> > > > > > > > +        return;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    for (i = 0; i < num; ++i) {
> > > > > > > > +        iov[i].iov_base = stash[i];
> > > > > > > > +    }
> > > > > > > > +    g_free(stash);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void
vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
> > > > > > > > +                                    
struct iovec *iovec, size_t num)
> > > > > > > > +{
> > > > > > > > +    size_t i;
> > > > > > > > +
> > > > > > > > +    for (i = 0; i < num; ++i) {
> > > > > > > > +        VhostDMAMap needle = {
> > > > > > > > +            .translated_addr =
iovec[i].iov_base,
> > > > > > > > +            .size = iovec[i].iov_len,
> > > > > > > > +        };
> > > > > > > > +        size_t off;
> > > > > > > > +
> > > > > > > > +        const VhostDMAMap *map =
vhost_iova_tree_find_iova(svq->iova_map,
> > > > > > > > +                                       
&needle);
> > > > > > >
> > > > > > >
> > > > > > > Is it possible that we end up with more than
one maps here?
> > > > > > >
> > > > > >
> > > > > > Actually it is possible, since there is no
guarantee that one
> > > > > > descriptor (or indirect descriptor) maps exactly
to one iov. It could
> > > > > > map to many if qemu vaddr is not contiguous but
GPA + size is. This is
> > > > > > something that must be fixed for the next
revision, so thanks for
> > > > > > pointing it out!
> > > > > >
> > > > > > Taking that into account, the condition that svq
vring avail_idx -
> > > > > > used_idx was always less or equal than guest's
vring avail_idx -
> > > > > > used_idx is not true anymore. Checking for that
before adding buffers
> > > > > > to SVQ is the easy part, but how could we recover
in that case?
> > > > > >
> > > > > > I think that the easy solution is to check for
more available buffers
> > > > > > unconditionally at the end of
vhost_svq_handle_call, which handles the
> > > > > > SVQ used and is supposed to make more room for
available buffers. So
> > > > > > vhost_handle_guest_kick would not check if eventfd
is set or not
> > > > > > anymore.
> > > > > >
> > > > > > Would that make sense?
> > > > >
> > > > > Yes, I think it should work.
> > > >
> > > > Btw, I wonder how to handle indirect descriptors. SVQ
doesn't use
> > > > indirect descriptors for now, but it looks like a must
otherwise we
> > > > may end up SVQ is full before VQ.
> > > >
> > >
> > > We can get to that situation without indirect too, if a single
> > > descriptor maps to more than one sg buffer. The next revision is
going
> > > to control that too.
> > >
> > > > It looks to me an easy way is to always use indirect
descriptors if #sg >= 2?
> > > >
> > >
> > > I will use that, but that does not solve the case where a
descriptor
> > > maps to > 1 different buffers in qemu vaddr.
> >
> > Right, so we need to deal with the case when SVQ is out of space.
> >
> >
> > > So I think that some
> > > check after marking descriptors as used is a must somehow.
> >
> > I thought it should be before processing the available buffer?
>
> Yes, I meant after that. Somehow, because I include checking the
> number of sg buffers as "processing". :).
>
> > It's
> > the guest driver that make sure there's sufficient space for used
> > ring?
> >
>
> (I think we are talking the same with different words, but just in
> case I will develop the idea here with an example).
>
> The guest is able to check if there is enough space in the SVQ's
> vring, but not in the device's vring. As an example of this, imagine
> that a guest makes available a GPA contiguous buffer of 64K, one
> descriptor. However, this memory is divided into 16 chunks of 4K in
> qemu's VA space. Imagine that at this moment there are only eight
> slots free in each vring, and that neither communication is using
> indirect descriptors.
>
> The guest only needs 1 descriptor available to make that buffer
> available, so it will add to avail ring. But SVQ needs 16 chained
> descriptors, so the buffer is not going to reach the device until it
> makes at least 8 more descriptors as used. SVQ checked for the amount
> of available room, as you said, but it cannot forward the available
> one.
>
> Since the guest already sent kick when it made the descriptor
> available, we need another mechanism to know when we have all the
> needed free slots in the SVQ vring. And that's what I meant with the
> check after marking some buffers as available.
>
> I still think it is not worth it to protect the forwarding methods of
> hogging BQL, since there must be a limit sooner or later, but it is
> something that is worth putting on the table again. But this requires
> changes for the next version for sure.
Ok.
>
> I can think in more scenarios, like guest making available an indirect
> descriptor of vq size that needs to be splitted in even more sgs. Qemu
> already does not support more than 1024 sgs buffers in VirtQueue, but
> a driver (as SVQ) must *not* create an indirect descriptor chain
> longer than the Queue Size. Should we always increase vq size to 1024
> always? I think these are highly unlikely, but again these concerns
> must be at least commented here.
>
> Does it make sense?
Right. So I think the SVQ codes should be ready to handle all those cases.

Thanks
>
> Thanks!
>
> > Thanks
> >
> > >
> > >
> > > > Thanks
> > > >
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > > Thanks!
> > > > > >
> > > > > > >
> > > > > > > > +        /*
> > > > > > > > +         * Map cannot be NULL since
iova map contains all guest space and
> > > > > > > > +         * qemu already has a physical
address mapped
> > > > > > > > +         */
> > > > > > > > +        assert(map);
> > > > > > > > +
> > > > > > > > +        /*
> > > > > > > > +         * Map->iova chunk size is
ignored. What to do if descriptor
> > > > > > > > +         * (addr, size) does not fit is
delegated to the device.
> > > > > > > > +         */
> > > > > > > > +        off = needle.translated_addr -
map->translated_addr;
> > > > > > > > +        iovec[i].iov_base = (void
*)(map->iova + off);
> > > > > > > > +    }
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >   static void
vhost_vring_write_descs(VhostShadowVirtqueue *svq,
> > > > > > > >                                      
const struct iovec *iovec,
> > > > > > > >                                      
size_t num, bool more_descs, bool write)
> > > > > > > > @@ -156,8 +217,9 @@ static void
vhost_vring_write_descs(VhostShadowVirtqueue *svq,
> > > > > > > >   }
> > > > > > > >
> > > > > > > >   static unsigned
vhost_svq_add_split(VhostShadowVirtqueue *svq,
> > > > > > > > -                                   
VirtQueueElement *elem)
> > > > > > > > +                                   
SVQElement *svq_elem)
> > > > > > > >   {
> > > > > > > > +    VirtQueueElement *elem =
&svq_elem->elem;
> > > > > > > >       int head;
> > > > > > > >       unsigned avail_idx;
> > > > > > > >       vring_avail_t *avail =
svq->vring.avail;
> > > > > > > > @@ -167,6 +229,12 @@ static unsigned
vhost_svq_add_split(VhostShadowVirtqueue *svq,
> > > > > > > >       /* We need some descriptors here
*/
> > > > > > > >       assert(elem->out_num ||
elem->in_num);
> > > > > > > >
> > > > > > > > +   
vhost_svq_stash_addr(&svq_elem->in_sg_stash, elem->in_sg,
elem->in_num);
> > > > > > > > +   
vhost_svq_stash_addr(&svq_elem->out_sg_stash, elem->out_sg,
elem->out_num);
> > > > > > >
> > > > > > >
> > > > > > > I wonder if we can solve the trick like stash
and unstash with a
> > > > > > > dedicated sgs in svq_elem, instead of reusing
the elem.
> > > > > > >
> > > > > >
> > > > > > Actually yes, it would be way simpler to use a new
sgs array in
> > > > > > svq_elem. I will change that.
> > > > > >
> > > > > > Thanks!
> > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > >
> > > > > > > > +
> > > > > > > > +    vhost_svq_translate_addr(svq,
elem->in_sg, elem->in_num);
> > > > > > > > +    vhost_svq_translate_addr(svq,
elem->out_sg, elem->out_num);
> > > > > > > > +
> > > > > > > >       vhost_vring_write_descs(svq,
elem->out_sg, elem->out_num,
> > > > > > > >                              
elem->in_num > 0, false);
> > > > > > > >       vhost_vring_write_descs(svq,
elem->in_sg, elem->in_num, false, true);
> > > > > > > > @@ -187,7 +255,7 @@ static unsigned
vhost_svq_add_split(VhostShadowVirtqueue *svq,
> > > > > > > >
> > > > > > > >   }
> > > > > > > >
> > > > > > > > -static void
vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
> > > > > > > > +static void
vhost_svq_add(VhostShadowVirtqueue *svq, SVQElement *elem)
> > > > > > > >   {
> > > > > > > >       unsigned qemu_head =
vhost_svq_add_split(svq, elem);
> > > > > > > >
> > > > > > > > @@ -221,7 +289,7 @@ static void
vhost_handle_guest_kick(EventNotifier *n)
> > > > > > > >           }
> > > > > > > >
> > > > > > > >           while (true) {
> > > > > > > > -            VirtQueueElement *elem =
virtqueue_pop(svq->vq, sizeof(*elem));
> > > > > > > > +            SVQElement *elem =
virtqueue_pop(svq->vq, sizeof(*elem));
> > > > > > > >               if (!elem) {
> > > > > > > >                   break;
> > > > > > > >               }
> > > > > > > > @@ -247,7 +315,7 @@ static bool
vhost_svq_more_used(VhostShadowVirtqueue *svq)
> > > > > > > >       return svq->used_idx !=
svq->shadow_used_idx;
> > > > > > > >   }
> > > > > > > >
> > > > > > > > -static VirtQueueElement
*vhost_svq_get_buf(VhostShadowVirtqueue *svq)
> > > > > > > > +static SVQElement
*vhost_svq_get_buf(VhostShadowVirtqueue *svq)
> > > > > > > >   {
> > > > > > > >       vring_desc_t *descs =
svq->vring.desc;
> > > > > > > >       const vring_used_t *used =
svq->vring.used;
> > > > > > > > @@ -279,7 +347,7 @@ static
VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq)
> > > > > > > >       descs[used_elem.id].next =
svq->free_head;
> > > > > > > >       svq->free_head = used_elem.id;
> > > > > > > >
> > > > > > > > -   
svq->ring_id_maps[used_elem.id]->len = used_elem.len;
> > > > > > > > +   
svq->ring_id_maps[used_elem.id]->elem.len = used_elem.len;
> > > > > > > >       return
g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
> > > > > > > >   }
> > > > > > > >
> > > > > > > > @@ -296,12 +364,19 @@ static void
vhost_svq_handle_call_no_test(EventNotifier *n)
> > > > > > > >
> > > > > > > >          
vhost_svq_set_notification(svq, false);
> > > > > > > >           while (true) {
> > > > > > > > -            g_autofree VirtQueueElement
*elem = vhost_svq_get_buf(svq);
> > > > > > > > -            if (!elem) {
> > > > > > > > +            g_autofree SVQElement
*svq_elem = vhost_svq_get_buf(svq);
> > > > > > > > +            VirtQueueElement *elem;
> > > > > > > > +            if (!svq_elem) {
> > > > > > > >                   break;
> > > > > > > >               }
> > > > > > > >
> > > > > > > >               assert(i <
svq->vring.num);
> > > > > > > > +            elem =
&svq_elem->elem;
> > > > > > > > +
> > > > > > > > +           
vhost_svq_unstash_addr(svq_elem->in_sg_stash, elem->in_sg,
> > > > > > > > +                                  
elem->in_num);
> > > > > > > > +           
vhost_svq_unstash_addr(svq_elem->out_sg_stash, elem->out_sg,
> > > > > > > > +                                  
elem->out_num);
> > > > > > > >               virtqueue_fill(vq, elem,
elem->len, i++);
> > > > > > > >           }
> > > > > > > >
> > > > > > > > @@ -451,14 +526,24 @@ void
vhost_svq_stop(struct vhost_dev *dev, unsigned idx,
> > > > > > > >      
event_notifier_set_handler(&svq->host_notifier, NULL);
> > > > > > > >
> > > > > > > >       for (i = 0; i <
svq->vring.num; ++i) {
> > > > > > > > -        g_autofree VirtQueueElement
*elem = svq->ring_id_maps[i];
> > > > > > > > +        g_autofree SVQElement *svq_elem
= svq->ring_id_maps[i];
> > > > > > > > +        VirtQueueElement *elem;
> > > > > > > > +
> > > > > > > > +        if (!svq_elem) {
> > > > > > > > +            continue;
> > > > > > > > +        }
> > > > > > > > +
> > > > > > > > +        elem = &svq_elem->elem;
> > > > > > > > +       
vhost_svq_unstash_addr(svq_elem->in_sg_stash, elem->in_sg,
> > > > > > > > +                              
elem->in_num);
> > > > > > > > +       
vhost_svq_unstash_addr(svq_elem->out_sg_stash, elem->out_sg,
> > > > > > > > +                              
elem->out_num);
> > > > > > > > +
> > > > > > > >           /*
> > > > > > > >            * Although the doc says we
must unpop in order, it's ok to unpop
> > > > > > > >            * everything.
> > > > > > > >            */
> > > > > > > > -        if (elem) {
> > > > > > > > -            virtqueue_unpop(svq->vq,
elem, elem->len);
> > > > > > > > -        }
> > > > > > > > +        virtqueue_unpop(svq->vq,
elem, elem->len);
> > > > > > > >       }
> > > > > > > >   }
> > > > > > > >
> > > > > > > > @@ -466,7 +551,8 @@ void
vhost_svq_stop(struct vhost_dev *dev, unsigned idx,
> > > > > > > >    * Creates vhost shadow virtqueue, and
instruct vhost device to use the shadow
> > > > > > > >    * methods and file descriptors.
> > > > > > > >    */
> > > > > > > > -VhostShadowVirtqueue
*vhost_svq_new(struct vhost_dev *dev, int idx)
> > > > > > > > +VhostShadowVirtqueue
*vhost_svq_new(struct vhost_dev *dev, int idx,
> > > > > > > > +                                   
VhostIOVATree *iova_map)
> > > > > > > >   {
> > > > > > > >       int vq_idx = dev->vq_index +
idx;
> > > > > > > >       unsigned num =
virtio_queue_get_num(dev->vdev, vq_idx);
> > > > > > > > @@ -500,11 +586,13 @@
VhostShadowVirtqueue *vhost_svq_new(struct vhost_dev *dev, int idx)
> > > > > > > >       memset(svq->vring.desc, 0,
driver_size);
> > > > > > > >       svq->vring.used =
qemu_memalign(qemu_real_host_page_size, device_size);
> > > > > > > >       memset(svq->vring.used, 0,
device_size);
> > > > > > > > +    svq->iova_map = iova_map;
> > > > > > > > +
> > > > > > > >       for (i = 0; i < num - 1; i++) {
> > > > > > > >           svq->vring.desc[i].next =
cpu_to_le16(i + 1);
> > > > > > > >       }
> > > > > > > >
> > > > > > > > -    svq->ring_id_maps =
g_new0(VirtQueueElement *, num);
> > > > > > > > +    svq->ring_id_maps =
g_new0(SVQElement *, num);
> > > > > > > >      
event_notifier_set_handler(&svq->call_notifier,
> > > > > > > >                                 
vhost_svq_handle_call);
> > > > > > > >       return g_steal_pointer(&svq);
> > > > > > > > diff --git a/hw/virtio/vhost-vdpa.c
b/hw/virtio/vhost-vdpa.c
> > > > > > > > index a9c680b487..f5a12fee9d 100644
> > > > > > > > --- a/hw/virtio/vhost-vdpa.c
> > > > > > > > +++ b/hw/virtio/vhost-vdpa.c
> > > > > > > > @@ -176,6 +176,18 @@ static void
vhost_vdpa_listener_region_add(MemoryListener *listener,
> > > > > > > >                                         
vaddr, section->readonly);
> > > > > > > >
> > > > > > > >       llsize = int128_sub(llend,
int128_make64(iova));
> > > > > > > > +    if (v->shadow_vqs_enabled) {
> > > > > > > > +        VhostDMAMap mem_region = {
> > > > > > > > +            .translated_addr = vaddr,
> > > > > > > > +            .size =
int128_get64(llsize) - 1,
> > > > > > > > +            .perm =
IOMMU_ACCESS_FLAG(true, section->readonly),
> > > > > > > > +        };
> > > > > > > > +
> > > > > > > > +        int r =
vhost_iova_tree_alloc(v->iova_map, &mem_region);
> > > > > > > > +        assert(r == VHOST_DMA_MAP_OK);
> > > > > > > > +
> > > > > > > > +        iova = mem_region.iova;
> > > > > > > > +    }
> > > > > > > >
> > > > > > > >       ret = vhost_vdpa_dma_map(v, iova,
int128_get64(llsize),
> > > > > > > >                                vaddr,
section->readonly);
> > > > > > > > @@ -754,6 +766,23 @@ static bool 
vhost_vdpa_force_iommu(struct vhost_dev *dev)
> > > > > > > >       return true;
> > > > > > > >   }
> > > > > > > >
> > > > > > > > +static int
vhost_vdpa_get_iova_range(struct vhost_dev *dev,
> > > > > > > > +                                    
hwaddr *first, hwaddr *last)
> > > > > > > > +{
> > > > > > > > +    int ret;
> > > > > > > > +    struct vhost_vdpa_iova_range range;
> > > > > > > > +
> > > > > > > > +    ret = vhost_vdpa_call(dev,
VHOST_VDPA_GET_IOVA_RANGE, &range);
> > > > > > > > +    if (ret != 0) {
> > > > > > > > +        return ret;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    *first = range.first;
> > > > > > > > +    *last = range.last;
> > > > > > > > +   
trace_vhost_vdpa_get_iova_range(dev, *first, *last);
> > > > > > > > +    return ret;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >   /**
> > > > > > > >    * Maps QEMU vaddr memory to device in
a suitable way for shadow virtqueue:
> > > > > > > >    * - It always reference qemu memory
address, not guest's memory.
> > > > > > > > @@ -881,6 +910,7 @@ static bool
vhost_vdpa_svq_start_vq(struct vhost_dev *dev, unsigned idx)
> > > > > > > >   static unsigned
vhost_vdpa_enable_svq(struct vhost_vdpa *v, bool enable)
> > > > > > > >   {
> > > > > > > >       struct vhost_dev *hdev =
v->dev;
> > > > > > > > +    hwaddr iova_first, iova_last;
> > > > > > > >       unsigned n;
> > > > > > > >       int r;
> > > > > > > >
> > > > > > > > @@ -894,7 +924,7 @@ static unsigned
vhost_vdpa_enable_svq(struct vhost_vdpa *v, bool enable)
> > > > > > > >           /* Allocate resources */
> > > > > > > >          
assert(v->shadow_vqs->len == 0);
> > > > > > > >           for (n = 0; n <
hdev->nvqs; ++n) {
> > > > > > > > -            VhostShadowVirtqueue *svq =
vhost_svq_new(hdev, n);
> > > > > > > > +            VhostShadowVirtqueue *svq =
vhost_svq_new(hdev, n, v->iova_map);
> > > > > > > >               if (unlikely(!svq)) {
> > > > > > > >                  
g_ptr_array_set_size(v->shadow_vqs, 0);
> > > > > > > >                   return 0;
> > > > > > > > @@ -903,6 +933,8 @@ static unsigned
vhost_vdpa_enable_svq(struct vhost_vdpa *v, bool enable)
> > > > > > > >           }
> > > > > > > >       }
> > > > > > > >
> > > > > > > > +    r = vhost_vdpa_get_iova_range(hdev,
&iova_first, &iova_last);
> > > > > > > > +    assert(r == 0);
> > > > > > > >       r = vhost_vdpa_vring_pause(hdev);
> > > > > > > >       assert(r == 0);
> > > > > > > >
> > > > > > > > @@ -913,6 +945,12 @@ static unsigned
vhost_vdpa_enable_svq(struct vhost_vdpa *v, bool enable)
> > > > > > > >           }
> > > > > > > >       }
> > > > > > > >
> > > > > > > > +   
memory_listener_unregister(&v->listener);
> > > > > > > > +    if (vhost_vdpa_dma_unmap(v,
iova_first,
> > > > > > > > +                             (iova_last
- iova_first) & TARGET_PAGE_MASK)) {
> > > > > > > > +        error_report("Fail to
invalidate device iotlb");
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > >       /* Reset device so it can be
configured */
> > > > > > > >       r = vhost_vdpa_dev_start(hdev,
false);
> > > > > > > >       assert(r == 0);
> > > > > > > > diff --git a/hw/virtio/trace-events
b/hw/virtio/trace-events
> > > > > > > > index 8ed19e9d0c..650e521e35 100644
> > > > > > > > --- a/hw/virtio/trace-events
> > > > > > > > +++ b/hw/virtio/trace-events
> > > > > > > > @@ -52,6 +52,7 @@
vhost_vdpa_set_vring_call(void *dev, unsigned int index, int fd) "dev: %p
index:
> > > > > > > >   vhost_vdpa_get_features(void *dev,
uint64_t features) "dev: %p features: 0x%"PRIx64
> > > > > > > >   vhost_vdpa_set_owner(void *dev)
"dev: %p"
> > > > > > > >   vhost_vdpa_vq_get_addr(void *dev, void
*vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr)
"dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr:
0x%"PRIx64" used_user_addr: 0x%"PRIx64
> > > > > > > > +vhost_vdpa_get_iova_range(void *dev,
uint64_t first, uint64_t last) "dev: %p first: 0x%"PRIx64" last:
0x%"PRIx64
> > > > > > > >
> > > > > > > >   # virtio.c
> > > > > > > >   virtqueue_alloc_element(void *elem,
size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u
out_num %u"
> > > > > > >
> > > > > >
> > > >
> > >
> >
>
Virtualization - Oct 2021 - [RFC PATCH v4 20/20] vdpa: Add custom IOTLB translations to SVQ

[RFC PATCH v4 20/20] vdpa: Add custom IOTLB translations to SVQ