Jason Wang
2022-Feb-28 07:36 UTC
[PATCH v2 10/14] vdpa: Add custom IOTLB translations to SVQ
? 2022/2/27 ??9:41, Eugenio P?rez ??:> Use translations added in VhostIOVATree in SVQ. > > Only introduce usage here, not allocation and deallocation. As with > previous patches, we use the dead code paths of shadow_vqs_enabled to > avoid commiting too many changes at once. These are impossible to take > at the moment. > > Signed-off-by: Eugenio P?rez <eperezma at redhat.com> > --- > hw/virtio/vhost-shadow-virtqueue.h | 6 +- > include/hw/virtio/vhost-vdpa.h | 3 + > hw/virtio/vhost-shadow-virtqueue.c | 76 ++++++++++++++++- > hw/virtio/vhost-vdpa.c | 128 ++++++++++++++++++++++++----- > 4 files changed, 187 insertions(+), 26 deletions(-) > > diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h > index 04c67685fd..b2f722d101 100644 > --- a/hw/virtio/vhost-shadow-virtqueue.h > +++ b/hw/virtio/vhost-shadow-virtqueue.h > @@ -13,6 +13,7 @@ > #include "qemu/event_notifier.h" > #include "hw/virtio/virtio.h" > #include "standard-headers/linux/vhost_types.h" > +#include "hw/virtio/vhost-iova-tree.h" > > /* Shadow virtqueue to relay notifications */ > typedef struct VhostShadowVirtqueue { > @@ -43,6 +44,9 @@ typedef struct VhostShadowVirtqueue { > /* Virtio device */ > VirtIODevice *vdev; > > + /* IOVA mapping */ > + VhostIOVATree *iova_tree; > + > /* Map for use the guest's descriptors */ > VirtQueueElement **ring_id_maps; > > @@ -78,7 +82,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, > VirtQueue *vq); > void vhost_svq_stop(VhostShadowVirtqueue *svq); > > -VhostShadowVirtqueue *vhost_svq_new(void); > +VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree); > > void vhost_svq_free(gpointer vq); > G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free); > diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h > index 009a9f3b6b..ee8e939ad0 100644 > --- a/include/hw/virtio/vhost-vdpa.h > +++ b/include/hw/virtio/vhost-vdpa.h > @@ -14,6 +14,7 @@ > > #include <gmodule.h> > > +#include "hw/virtio/vhost-iova-tree.h" > #include "hw/virtio/virtio.h" > #include "standard-headers/linux/vhost_types.h" > > @@ -30,6 +31,8 @@ typedef struct vhost_vdpa { > MemoryListener listener; > struct vhost_vdpa_iova_range iova_range; > bool shadow_vqs_enabled; > + /* IOVA mapping used by the Shadow Virtqueue */ > + VhostIOVATree *iova_tree; > GPtrArray *shadow_vqs; > struct vhost_dev *dev; > VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; > diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c > index a38d313755..7e073773d1 100644 > --- a/hw/virtio/vhost-shadow-virtqueue.c > +++ b/hw/virtio/vhost-shadow-virtqueue.c > @@ -11,6 +11,7 @@ > #include "hw/virtio/vhost-shadow-virtqueue.h" > > #include "qemu/error-report.h" > +#include "qemu/log.h" > #include "qemu/main-loop.h" > #include "qemu/log.h" > #include "linux-headers/linux/vhost.h" > @@ -84,7 +85,58 @@ static void vhost_svq_set_notification(VhostShadowVirtqueue *svq, bool enable) > } > } > > +/** > + * Translate addresses between the qemu's virtual address and the SVQ IOVA > + * > + * @svq Shadow VirtQueue > + * @vaddr Translated IOVA addresses > + * @iovec Source qemu's VA addresses > + * @num Length of iovec and minimum length of vaddr > + */ > +static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, > + void **addrs, const struct iovec *iovec, > + size_t num) > +{ > + if (num == 0) { > + return true; > + } > + > + for (size_t i = 0; i < num; ++i) { > + DMAMap needle = { > + .translated_addr = (hwaddr)iovec[i].iov_base, > + .size = iovec[i].iov_len, > + }; > + size_t off; > + > + const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); > + /* > + * Map cannot be NULL since iova map contains all guest space and > + * qemu already has a physical address mapped > + */ > + if (unlikely(!map)) { > + qemu_log_mask(LOG_GUEST_ERROR, > + "Invalid address 0x%"HWADDR_PRIx" given by guest", > + needle.translated_addr); > + return false; > + } > + > + off = needle.translated_addr - map->translated_addr; > + addrs[i] = (void *)(map->iova + off); > + > + if (unlikely(int128_gt(int128_add(needle.translated_addr, > + iovec[i].iov_len), > + map->translated_addr + map->size))) { > + qemu_log_mask(LOG_GUEST_ERROR, > + "Guest buffer expands over iova range"); > + return false; > + } > + } > + > + return true; > +} > + > static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, > + void * const *vaddr_sg,Nit: it looks to me we are not passing vaddr but iova here, so it might be better to use "sg"?> const struct iovec *iovec, > size_t num, bool more_descs, bool write) > { > @@ -103,7 +155,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, > } else { > descs[i].flags = flags; > } > - descs[i].addr = cpu_to_le64((hwaddr)iovec[n].iov_base); > + descs[i].addr = cpu_to_le64((hwaddr)vaddr_sg[n]); > descs[i].len = cpu_to_le32(iovec[n].iov_len); > > last = i; > @@ -119,6 +171,8 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, > { > unsigned avail_idx; > vring_avail_t *avail = svq->vring.avail; > + bool ok; > + g_autofree void **sgs = g_new(void *, MAX(elem->out_num, elem->in_num)); > > *head = svq->free_head; > > @@ -129,9 +183,20 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, > return false; > } > > - vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, > + ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num); > + if (unlikely(!ok)) { > + return false; > + } > + vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, > elem->in_num > 0, false); > - vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true); > + > + > + ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num); > + if (unlikely(!ok)) { > + return false; > + } > + > + vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true); > > /* > * Put the entry in the available array (but don't update avail->idx until > @@ -514,11 +579,13 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) > * Creates vhost shadow virtqueue, and instructs the vhost device to use the > * shadow methods and file descriptors. > * > + * @iova_tree Tree to perform descriptors translations > + * > * Returns the new virtqueue or NULL. > * > * In case of error, reason is reported through error_report. > */ > -VhostShadowVirtqueue *vhost_svq_new(void) > +VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) > { > g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); > int r; > @@ -539,6 +606,7 @@ VhostShadowVirtqueue *vhost_svq_new(void) > > event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); > event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); > + svq->iova_tree = iova_tree; > return g_steal_pointer(&svq); > > err_init_hdev_call: > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c > index 435b9c2e9e..56f9f125cd 100644 > --- a/hw/virtio/vhost-vdpa.c > +++ b/hw/virtio/vhost-vdpa.c > @@ -209,6 +209,21 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, > vaddr, section->readonly); > > llsize = int128_sub(llend, int128_make64(iova)); > + if (v->shadow_vqs_enabled) { > + DMAMap mem_region = { > + .translated_addr = (hwaddr)vaddr, > + .size = int128_get64(llsize) - 1, > + .perm = IOMMU_ACCESS_FLAG(true, section->readonly), > + }; > + > + int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); > + if (unlikely(r != IOVA_OK)) { > + error_report("Can't allocate a mapping (%d)", r); > + goto fail; > + } > + > + iova = mem_region.iova; > + } > > vhost_vdpa_iotlb_batch_begin_once(v); > ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), > @@ -261,6 +276,20 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, > > llsize = int128_sub(llend, int128_make64(iova)); > > + if (v->shadow_vqs_enabled) { > + const DMAMap *result; > + const void *vaddr = memory_region_get_ram_ptr(section->mr) + > + section->offset_within_region + > + (iova - section->offset_within_address_space); > + DMAMap mem_region = { > + .translated_addr = (hwaddr)vaddr, > + .size = int128_get64(llsize) - 1, > + }; > + > + result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); > + iova = result->iova; > + vhost_iova_tree_remove(v->iova_tree, &mem_region); > + } > vhost_vdpa_iotlb_batch_begin_once(v); > ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); > if (ret) { > @@ -383,7 +412,7 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, > > shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); > for (unsigned n = 0; n < hdev->nvqs; ++n) { > - g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(); > + g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree); > > if (unlikely(!svq)) { > error_setg(errp, "Cannot create svq %u", n); > @@ -834,37 +863,78 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, > /** > * Unmap a SVQ area in the device > */ > -static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova, > - hwaddr size) > +static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, > + const DMAMap *needle) > { > + const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); > + hwaddr size; > int r; > > - size = ROUND_UP(size, qemu_real_host_page_size); > - r = vhost_vdpa_dma_unmap(v, iova, size); > + if (unlikely(!result)) { > + error_report("Unable to find SVQ address to unmap"); > + return false; > + } > + > + size = ROUND_UP(result->size, qemu_real_host_page_size); > + r = vhost_vdpa_dma_unmap(v, result->iova, size); > return r == 0; > } > > static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, > const VhostShadowVirtqueue *svq) > { > + DMAMap needle; > struct vhost_vdpa *v = dev->opaque; > struct vhost_vring_addr svq_addr; > - size_t device_size = vhost_svq_device_area_size(svq); > - size_t driver_size = vhost_svq_driver_area_size(svq); > bool ok; > > vhost_svq_get_vring_addr(svq, &svq_addr); > > - ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size); > + needle = (DMAMap) { > + .translated_addr = svq_addr.desc_user_addr, > + };Let's simply initialize the member to zero during start of this function then we can use needle->transalted_addr = XXX here.> + ok = vhost_vdpa_svq_unmap_ring(v, &needle); > if (unlikely(!ok)) { > return false; > } > > - return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size); > + needle = (DMAMap) { > + .translated_addr = svq_addr.used_user_addr, > + }; > + return vhost_vdpa_svq_unmap_ring(v, &needle); > +} > + > +/** > + * Map the SVQ area in the device > + * > + * @v Vhost-vdpa device > + * @needle The area to search iova > + * @errorp Error pointer > + */ > +static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, > + Error **errp) > +{ > + int r; > + > + r = vhost_iova_tree_map_alloc(v->iova_tree, needle); > + if (unlikely(r != IOVA_OK)) { > + error_setg(errp, "Cannot allocate iova (%d)", r); > + return false; > + } > + > + r = vhost_vdpa_dma_map(v, needle->iova, needle->size, > + (void *)needle->translated_addr, > + !(needle->perm & IOMMU_ACCESS_FLAG(0, 1)));Let's simply use needle->perm == IOMMU_RO here?> + if (unlikely(r != 0)) { > + error_setg_errno(errp, -r, "Cannot map region to device"); > + vhost_iova_tree_remove(v->iova_tree, needle); > + } > + > + return r == 0; > } > > /** > - * Map shadow virtqueue rings in device > + * Map the shadow virtqueue rings in the device > * > * @dev The vhost device > * @svq The shadow virtqueue > @@ -876,28 +946,44 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, > struct vhost_vring_addr *addr, > Error **errp) > { > + DMAMap device_region, driver_region; > + struct vhost_vring_addr svq_addr; > struct vhost_vdpa *v = dev->opaque; > size_t device_size = vhost_svq_device_area_size(svq); > size_t driver_size = vhost_svq_driver_area_size(svq); > - int r; > + size_t avail_offset; > + bool ok; > > ERRP_GUARD(); > - vhost_svq_get_vring_addr(svq, addr); > + vhost_svq_get_vring_addr(svq, &svq_addr); > > - r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size, > - (void *)addr->desc_user_addr, true); > - if (unlikely(r != 0)) { > - error_setg_errno(errp, -r, "Cannot create vq driver region: "); > + driver_region = (DMAMap) { > + .translated_addr = svq_addr.desc_user_addr, > + .size = driver_size - 1,Any reason for the "-1" here? I see several places do things like that, it's probably hint of wrong API somehwere. Thanks> + .perm = IOMMU_RO, > + }; > + ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp); > + if (unlikely(!ok)) { > + error_prepend(errp, "Cannot create vq driver region: "); > return false; > } > + addr->desc_user_addr = driver_region.iova; > + avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr; > + addr->avail_user_addr = driver_region.iova + avail_offset; > > - r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size, > - (void *)addr->used_user_addr, false); > - if (unlikely(r != 0)) { > - error_setg_errno(errp, -r, "Cannot create vq device region: "); > + device_region = (DMAMap) { > + .translated_addr = svq_addr.used_user_addr, > + .size = device_size - 1, > + .perm = IOMMU_RW, > + }; > + ok = vhost_vdpa_svq_map_ring(v, &device_region, errp); > + if (unlikely(!ok)) { > + error_prepend(errp, "Cannot create vq device region: "); > + vhost_vdpa_svq_unmap_ring(v, &driver_region); > } > + addr->used_user_addr = device_region.iova; > > - return r == 0; > + return ok; > } > > static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,