Si-Wei Liu
2023-Aug-15 01:43 UTC
[PATCH RFC 0/4] vdpa: decouple reset of iotlb mapping from device reset
In order to reduce needlessly high setup and teardown cost of iotlb mapping during live migration, it's crucial to decouple the vhost-vdpa iotlb abstraction from the virtio device life cycle, i.e. iotlb mappings should be left intact across virtio device reset [1]. For it to work, the on-chip IOMMU parent device should implement a separate .reset_map() operation callback to restore 1:1 DMA mapping without having to resort to the .reset() callback, which is mainly used to reset virtio specific device state. This new .reset_map() callback will be invoked only when the vhost-vdpa driver is to be removed and detached from the vdpa bus, such that other vdpa bus drivers, e.g. virtio-vdpa, can get back on 1:1 DMA mapping when they are attached. For the context, those on-chip IOMMU parent devices, create the 1:1 DMA mapping at vdpa device add, and they would implicitly destroy the 1:1 mapping when the first .set_map or .dma_map callback is invoked. [1] Reducing vdpa migration downtime because of memory pin / maps https://www.mail-archive.com/qemu-devel at nongnu.org/msg953755.html --- Si-Wei Liu (4): vdpa: introduce .reset_map operation callback vdpa/mlx5: implement .reset_map driver op vhost-vdpa: should restore 1:1 dma mapping before detaching driver vhost-vdpa: introduce IOTLB_PERSIST backend feature bit drivers/vdpa/mlx5/core/mlx5_vdpa.h | 1 + drivers/vdpa/mlx5/core/mr.c | 72 +++++++++++++++++++++----------------- drivers/vdpa/mlx5/net/mlx5_vnet.c | 18 +++++++--- drivers/vhost/vdpa.c | 33 ++++++++++++++++- include/linux/vdpa.h | 7 ++++ include/uapi/linux/vhost_types.h | 2 ++ 6 files changed, 95 insertions(+), 38 deletions(-) -- 1.8.3.1
Si-Wei Liu
2023-Aug-15 01:43 UTC
[PATCH RFC 1/4] vdpa: introduce .reset_map operation callback
Signed-off-by: Si-Wei Liu <si-wei.liu at oracle.com> --- include/linux/vdpa.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index db1b0ea..3a3878d 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -314,6 +314,12 @@ struct vdpa_map_file { * @iova: iova to be unmapped * @size: size of the area * Returns integer: success (0) or error (< 0) + * @reset_map: Reset device memory mapping (optional) + * Needed for device that using device + * specific DMA translation (on-chip IOMMU) + * @vdev: vdpa device + * @asid: address space identifier + * Returns integer: success (0) or error (< 0) * @get_vq_dma_dev: Get the dma device for a specific * virtqueue (optional) * @vdev: vdpa device @@ -390,6 +396,7 @@ struct vdpa_config_ops { u64 iova, u64 size, u64 pa, u32 perm, void *opaque); int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid, u64 iova, u64 size); + int (*reset_map)(struct vdpa_device *vdev, unsigned int asid); int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group, unsigned int asid); struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx); -- 1.8.3.1
Si-Wei Liu
2023-Aug-15 01:43 UTC
[PATCH RFC 2/4] vdpa/mlx5: implement .reset_map driver op
This patch is based on top of the "vdpa/mlx5: Fixes for ASID handling" series [1]. [1] vdpa/mlx5: Fixes for ASID handling https://lore.kernel.org/virtualization/20230802171231.11001-1-dtatulea at nvidia.com/ Signed-off-by: Si-Wei Liu <si-wei.liu at oracle.com> --- drivers/vdpa/mlx5/core/mlx5_vdpa.h | 1 + drivers/vdpa/mlx5/core/mr.c | 72 +++++++++++++++++++++----------------- drivers/vdpa/mlx5/net/mlx5_vnet.c | 18 +++++++--- 3 files changed, 54 insertions(+), 37 deletions(-) diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index b53420e..5c9a25a 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -123,6 +123,7 @@ int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, unsigned int asid); void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev); void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid); +int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid); #define mlx5_vdpa_warn(__dev, format, ...) \ dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \ diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 5a1971fc..c8d64fc 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -489,21 +489,15 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr } } -static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) +static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev) { - if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) - return; - prune_iotlb(mvdev); } -static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) +static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev) { struct mlx5_vdpa_mr *mr = &mvdev->mr; - if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid) - return; - if (!mr->initialized) return; @@ -521,8 +515,10 @@ void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid) mutex_lock(&mr->mkey_mtx); - _mlx5_vdpa_destroy_dvq_mr(mvdev, asid); - _mlx5_vdpa_destroy_cvq_mr(mvdev, asid); + if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) + _mlx5_vdpa_destroy_dvq_mr(mvdev); + if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) + _mlx5_vdpa_destroy_cvq_mr(mvdev); mutex_unlock(&mr->mkey_mtx); } @@ -534,25 +530,17 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) } static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev, - struct vhost_iotlb *iotlb, - unsigned int asid) + struct vhost_iotlb *iotlb) { - if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) - return 0; - return dup_iotlb(mvdev, iotlb); } static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev, - struct vhost_iotlb *iotlb, - unsigned int asid) + struct vhost_iotlb *iotlb) { struct mlx5_vdpa_mr *mr = &mvdev->mr; int err; - if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid) - return 0; - if (mr->initialized) return 0; @@ -574,20 +562,18 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, { int err; - err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid); - if (err) - return err; - - err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid); - if (err) - goto out_err; + if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) { + err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid); + if (err) + return err; + } + if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) { + err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb); + if (err) + return err; + } return 0; - -out_err: - _mlx5_vdpa_destroy_dvq_mr(mvdev, asid); - - return err; } int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, @@ -601,6 +587,28 @@ int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, return err; } +int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + int err = 0; + + if (asid != 0) + return 0; + + mutex_lock(&mr->mkey_mtx); + if (!mr->user_mr) + goto out; + _mlx5_vdpa_destroy_dvq_mr(mvdev); + if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { + err = _mlx5_vdpa_create_dvq_mr(mvdev, NULL, 0); + if (err) + mlx5_vdpa_warn(mvdev, "create DMA MR failed\n"); + } +out: + mutex_unlock(&mr->mkey_mtx); + return err; +} + int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, bool *change_map, unsigned int asid) { diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 61c10ba..399a690 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2816,7 +2816,6 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) unregister_link_notifier(ndev); teardown_driver(ndev); clear_vqs_ready(ndev); - mlx5_vdpa_destroy_mr(&ndev->mvdev); ndev->mvdev.status = 0; ndev->mvdev.suspended = false; ndev->cur_num_vqs = 0; @@ -2827,10 +2826,6 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) init_group_to_asid_map(mvdev); ++mvdev->generation; - if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { - if (mlx5_vdpa_create_mr(mvdev, NULL, 0)) - mlx5_vdpa_warn(mvdev, "create MR failed\n"); - } up_write(&ndev->reslock); return 0; @@ -2895,6 +2890,18 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, return err; } +static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + int err; + + down_write(&ndev->reslock); + err = mlx5_vdpa_reset_mr(mvdev, asid); + up_write(&ndev->reslock); + return err; +} + static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); @@ -3154,6 +3161,7 @@ static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, .set_config = mlx5_vdpa_set_config, .get_generation = mlx5_vdpa_get_generation, .set_map = mlx5_vdpa_set_map, + .reset_map = mlx5_vdpa_reset_map, .set_group_asid = mlx5_set_group_asid, .get_vq_dma_dev = mlx5_get_vq_dma_dev, .free = mlx5_vdpa_free, -- 1.8.3.1
Si-Wei Liu
2023-Aug-15 01:43 UTC
[PATCH RFC 3/4] vhost-vdpa: should restore 1:1 dma mapping before detaching driver
Signed-off-by: Si-Wei Liu <si-wei.liu at oracle.com> --- drivers/vhost/vdpa.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index b43e868..62b0a01 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -131,6 +131,15 @@ static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v, return vhost_vdpa_alloc_as(v, asid); } +static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (ops->reset_map) + ops->reset_map(vdpa, asid); +} + static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) { struct vhost_vdpa_as *as = asid_to_as(v, asid); @@ -140,6 +149,14 @@ static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) hlist_del(&as->hash_link); vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid); + /* + * Devices with on-chip IOMMU need to restore iotlb + * to 1:1 identity mapping before vhost-vdpa is going + * to be removed and detached from the device. Give + * them a chance to do so, as this cannot be done + * efficiently via the whole-range unmap call above. + */ + vhost_vdpa_reset_map(v, asid); kfree(as); return 0; -- 1.8.3.1