? 2021/6/1 ??4:15, Eugenio Perez Martin ??:> On Mon, May 31, 2021 at 11:40 AM Jason Wang <jasowang at redhat.com>
wrote:
>>
>> ? 2021/5/20 ??12:28, Eugenio P?rez ??:
>>> This tree is able to look for a translated address from a IOVA
address.
>>>
>>> At first glance is similar to util/iova-tree. However, SVQ working
on
>>> devices with limited IOVA space need more capabilities, like
allocating
>>> IOVA chunks or perform reverse translations (qemu addresses to
iova).
>>>
>>> Starting a sepparated implementation. Knowing than
insertions/deletions
>>> will not be as frequent as searches,
>>
>> This might not be true if vIOMMU is enabled.
>>
> Right.
>
>>> it uses an ordered array at
>>> implementation.
>>
>> I wonder how much overhead could g_array be if it needs to grow.
>>
> I didn't do any tests, actually. But I see this VhostIOVATree as a
> replaceable tool, just to get the buffer translations to work. So I'm
> both ok with change it now and ok to delay it, since they should not
> be hard to do.
>
>>> A different name could be used, but ordered
>>> searchable array is a little bit long though.
>>
>> Note that we had a very good example for this. That is the kernel iova
>> allocator which is implemented via rbtree.
>>
>> Instead of figuring out g_array vs g_tree stuffs, I would simple go
with
>> g_tree first (based on util/iova-tree) and borrow the well design
kernel
>> iova allocator API to have a generic IOVA one instead of coupling it
>> with vhost. It could be used by other userspace driver in the future:
>>
>> init_iova_domain()/put_iova_domain();
>>
>> alloc_iova()/free_iova();
>>
>> find_iova();
>>
> We could go that way, but then the iova-tree should be extended to
> support both translations (iova->translated_addr is now implemented in
> iova-tree, the reverse is not). If I understood you correctly,
> borrowing the kernel iova allocator would give us both, right?
No the reverse lookup is done via a specific IOMMU driver if I
understand it correctly.
And if the mapping is 1:1 we can just use two iova-tree I guess.
>
> Note that it is not coupled to vhost at all except in the name: all
> the implementations only work with hwaddr and void pointers memory.
> Just to illustrate the point, I think it could be a drop-in
> replacement for iova-tree at this moment (with all the
> drawbacks/advantages of an array vs tree).
Ok.
Thanks
>
>> Another reference is the iova allocator that is implemented in VFIO.
> I will check this too.
>
> Thanks!
>
>
>> Thanks
>>
>>
>>> Signed-off-by: Eugenio P?rez <eperezma at redhat.com>
>>> ---
>>> hw/virtio/vhost-iova-tree.h | 50 ++++++++++
>>> hw/virtio/vhost-iova-tree.c | 188
++++++++++++++++++++++++++++++++++++
>>> hw/virtio/meson.build | 2 +-
>>> 3 files changed, 239 insertions(+), 1 deletion(-)
>>> create mode 100644 hw/virtio/vhost-iova-tree.h
>>> create mode 100644 hw/virtio/vhost-iova-tree.c
>>>
>>> diff --git a/hw/virtio/vhost-iova-tree.h
b/hw/virtio/vhost-iova-tree.h
>>> new file mode 100644
>>> index 0000000000..2a44af8b3a
>>> --- /dev/null
>>> +++ b/hw/virtio/vhost-iova-tree.h
>>> @@ -0,0 +1,50 @@
>>> +/*
>>> + * vhost software live migration ring
>>> + *
>>> + * SPDX-FileCopyrightText: Red Hat, Inc. 2021
>>> + * SPDX-FileContributor: Author: Eugenio P?rez <eperezma at
redhat.com>
>>> + *
>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>> + */
>>> +
>>> +#ifndef HW_VIRTIO_VHOST_IOVA_TREE_H
>>> +#define HW_VIRTIO_VHOST_IOVA_TREE_H
>>> +
>>> +#include <gmodule.h>
>>> +
>>> +#include "exec/memory.h"
>>> +
>>> +typedef struct VhostDMAMap {
>>> + void *translated_addr;
>>> + hwaddr iova;
>>> + hwaddr size; /* Inclusive */
>>> + IOMMUAccessFlags perm;
>>> +} VhostDMAMap;
>>> +
>>> +typedef enum VhostDMAMapNewRC {
>>> + VHOST_DMA_MAP_OVERLAP = -2,
>>> + VHOST_DMA_MAP_INVALID = -1,
>>> + VHOST_DMA_MAP_OK = 0,
>>> +} VhostDMAMapNewRC;
>>> +
>>> +/**
>>> + * VhostIOVATree
>>> + *
>>> + * Store and search IOVA -> Translated mappings.
>>> + *
>>> + * Note that it cannot remove nodes.
>>> + */
>>> +typedef struct VhostIOVATree {
>>> + /* Ordered array of reverse translations, IOVA address to qemu
memory. */
>>> + GArray *iova_taddr_map;
>>> +} VhostIOVATree;
>>> +
>>> +void vhost_iova_tree_new(VhostIOVATree *iova_rm);
>>> +void vhost_iova_tree_destroy(VhostIOVATree *iova_rm);
>>> +
>>> +const VhostDMAMap *vhost_iova_tree_find_taddr(const VhostIOVATree
*iova_rm,
>>> + const VhostDMAMap
*map);
>>> +VhostDMAMapNewRC vhost_iova_tree_insert(VhostIOVATree *iova_rm,
>>> + VhostDMAMap *map);
>>> +
>>> +#endif
>>> diff --git a/hw/virtio/vhost-iova-tree.c
b/hw/virtio/vhost-iova-tree.c
>>> new file mode 100644
>>> index 0000000000..dfd7e448b5
>>> --- /dev/null
>>> +++ b/hw/virtio/vhost-iova-tree.c
>>> @@ -0,0 +1,188 @@
>>> +/*
>>> + * vhost software live migration ring
>>> + *
>>> + * SPDX-FileCopyrightText: Red Hat, Inc. 2021
>>> + * SPDX-FileContributor: Author: Eugenio P?rez <eperezma at
redhat.com>
>>> + *
>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>> + */
>>> +
>>> +#include "qemu/osdep.h"
>>> +#include "vhost-iova-tree.h"
>>> +
>>> +#define G_ARRAY_NOT_ZERO_TERMINATED false
>>> +#define G_ARRAY_NOT_CLEAR_ON_ALLOC false
>>> +
>>> +/**
>>> + * Inserts an element after an existing one in garray.
>>> + *
>>> + * @array The array
>>> + * @prev_elem The previous element of array of NULL if prepending
>>> + * @map The DMA map
>>> + *
>>> + * It provides the aditional advantage of being type safe over
>>> + * g_array_insert_val, which accepts a reference pointer instead
of a value
>>> + * with no complains.
>>> + */
>>> +static void vhost_iova_tree_insert_after(GArray *array,
>>> + const VhostDMAMap
*prev_elem,
>>> + const VhostDMAMap *map)
>>> +{
>>> + size_t pos;
>>> +
>>> + if (!prev_elem) {
>>> + pos = 0;
>>> + } else {
>>> + pos = prev_elem - &g_array_index(array,
typeof(*prev_elem), 0) + 1;
>>> + }
>>> +
>>> + g_array_insert_val(array, pos, *map);
>>> +}
>>> +
>>> +static gint vhost_iova_tree_cmp_iova(gconstpointer a,
gconstpointer b)
>>> +{
>>> + const VhostDMAMap *m1 = a, *m2 = b;
>>> +
>>> + if (m1->iova > m2->iova + m2->size) {
>>> + return 1;
>>> + }
>>> +
>>> + if (m1->iova + m1->size < m2->iova) {
>>> + return -1;
>>> + }
>>> +
>>> + /* Overlapped */
>>> + return 0;
>>> +}
>>> +
>>> +/**
>>> + * Find the previous node to a given iova
>>> + *
>>> + * @array The ascending ordered-by-translated-addr array of
VhostDMAMap
>>> + * @map The map to insert
>>> + * @prev Returned location of the previous map
>>> + *
>>> + * Return VHOST_DMA_MAP_OK if everything went well, or
VHOST_DMA_MAP_OVERLAP if
>>> + * it already exists. It is ok to use this function to check if a
given range
>>> + * exists, but it will use a linear search.
>>> + *
>>> + * TODO: We can use bsearch to locate the entry if we save the
state in the
>>> + * needle, knowing that the needle is always the first argument to
>>> + * compare_func.
>>> + */
>>> +static VhostDMAMapNewRC vhost_iova_tree_find_prev(const GArray
*array,
>>> + GCompareFunc
compare_func,
>>> + const
VhostDMAMap *map,
>>> + const
VhostDMAMap **prev)
>>> +{
>>> + size_t i;
>>> + int r;
>>> +
>>> + *prev = NULL;
>>> + for (i = 0; i < array->len; ++i) {
>>> + r = compare_func(map, &g_array_index(array,
typeof(*map), i));
>>> + if (r == 0) {
>>> + return VHOST_DMA_MAP_OVERLAP;
>>> + }
>>> + if (r < 0) {
>>> + return VHOST_DMA_MAP_OK;
>>> + }
>>> +
>>> + *prev = &g_array_index(array, typeof(**prev), i);
>>> + }
>>> +
>>> + return VHOST_DMA_MAP_OK;
>>> +}
>>> +
>>> +/**
>>> + * Create a new IOVA tree
>>> + *
>>> + * @tree The IOVA tree
>>> + */
>>> +void vhost_iova_tree_new(VhostIOVATree *tree)
>>> +{
>>> + assert(tree);
>>> +
>>> + tree->iova_taddr_map =
g_array_new(G_ARRAY_NOT_ZERO_TERMINATED,
>>> + G_ARRAY_NOT_CLEAR_ON_ALLOC,
>>> + sizeof(VhostDMAMap));
>>> +}
>>> +
>>> +/**
>>> + * Destroy an IOVA tree
>>> + *
>>> + * @tree The iova tree
>>> + */
>>> +void vhost_iova_tree_destroy(VhostIOVATree *tree)
>>> +{
>>> + g_array_unref(g_steal_pointer(&tree->iova_taddr_map));
>>> +}
>>> +
>>> +/**
>>> + * Perform a search on a GArray.
>>> + *
>>> + * @array Glib array
>>> + * @map Map to look up
>>> + * @compare_func Compare function to use
>>> + *
>>> + * Return The found element or NULL if not found.
>>> + *
>>> + * This can be replaced with g_array_binary_search (Since glib
2.62) when that
>>> + * is common enough.
>>> + */
>>> +static const VhostDMAMap *vhost_iova_tree_bsearch(const GArray
*array,
>>> + const
VhostDMAMap *map,
>>> + GCompareFunc
compare_func)
>>> +{
>>> + return bsearch(map, array->data, array->len,
sizeof(*map), compare_func);
>>> +}
>>> +
>>> +/**
>>> + * Find the translated address stored from a IOVA address
>>> + *
>>> + * @tree The iova tree
>>> + * @map The map with the memory address
>>> + *
>>> + * Return the stored mapping, or NULL if not found.
>>> + */
>>> +const VhostDMAMap *vhost_iova_tree_find_taddr(const VhostIOVATree
*tree,
>>> + const VhostDMAMap
*map)
>>> +{
>>> + return vhost_iova_tree_bsearch(tree->iova_taddr_map, map,
>>> + vhost_iova_tree_cmp_iova);
>>> +}
>>> +
>>> +/**
>>> + * Insert a new map
>>> + *
>>> + * @tree The iova tree
>>> + * @map The iova map
>>> + *
>>> + * Returns:
>>> + * - VHOST_DMA_MAP_OK if the map fits in the container
>>> + * - VHOST_DMA_MAP_INVALID if the map does not make sense (like
size overflow)
>>> + * - VHOST_DMA_MAP_OVERLAP if the tree already contains that map
>>> + * Can query the assignated iova in map.
>>> + */
>>> +VhostDMAMapNewRC vhost_iova_tree_insert(VhostIOVATree *tree,
>>> + VhostDMAMap *map)
>>> +{
>>> + const VhostDMAMap *prev;
>>> + int find_prev_rc;
>>> +
>>> + if (map->translated_addr + map->size <
map->translated_addr ||
>>> + map->iova + map->size < map->iova ||
map->perm == IOMMU_NONE) {
>>> + return VHOST_DMA_MAP_INVALID;
>>> + }
>>> +
>>> + /* Check for duplicates, and save position for insertion */
>>> + find_prev_rc =
vhost_iova_tree_find_prev(tree->iova_taddr_map,
>>> +
vhost_iova_tree_cmp_iova, map,
>>> + &prev);
>>> + if (find_prev_rc == VHOST_DMA_MAP_OVERLAP) {
>>> + return VHOST_DMA_MAP_OVERLAP;
>>> + }
>>> +
>>> + vhost_iova_tree_insert_after(tree->iova_taddr_map, prev,
map);
>>> + return VHOST_DMA_MAP_OK;
>>> +}
>>> diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
>>> index 8b5a0225fe..cb306b83c6 100644
>>> --- a/hw/virtio/meson.build
>>> +++ b/hw/virtio/meson.build
>>> @@ -11,7 +11,7 @@ softmmu_ss.add(when: 'CONFIG_ALL',
if_true: files('vhost-stub.c'))
>>>
>>> virtio_ss = ss.source_set()
>>> virtio_ss.add(files('virtio.c'))
>>> -virtio_ss.add(when: 'CONFIG_VHOST', if_true:
files('vhost.c', 'vhost-backend.c',
'vhost-shadow-virtqueue.c'))
>>> +virtio_ss.add(when: 'CONFIG_VHOST', if_true:
files('vhost.c', 'vhost-backend.c',
'vhost-shadow-virtqueue.c', 'vhost-iova-tree.c'))
>>> virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true:
files('vhost-user.c'))
>>> virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true:
files('vhost-vdpa.c'))
>>> virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true:
files('virtio-balloon.c'))