Jason Wang
2021-Aug-19 07:12 UTC
[PATCH v2 5/6] vdpa/mlx5: Add support for control VQ and MAC setting
? 2021/8/19 ??2:06, Eli Cohen ??:> On Thu, Aug 19, 2021 at 12:04:10PM +0800, Jason Wang wrote: >> ? 2021/8/17 ??2:02, Eli Cohen ??: >>> Add support to handle control virtqueue configurations per virtio >>> specification. The control virtqueue is implemented in software and no >>> hardware offloading is involved. >>> >>> Control VQ configuration need task context, therefore all configurations >>> are handled in a workqueue created for the purpose. >> >> I think all the current callers are already in the the task context (the >> caller of virtnet_send_command()). >> >> Any reason for using workqueue here? >> > I am running code that might sleep and the call has, IIRC, irqs disabled. The > kernel complained about this.I see.> >> I'm not sure if it can work well on UP where the workqueue might not have a >> chance to be scheduled (we are doing busy waiting here): >> >> ??????? /* Spin for a response, the kick causes an ioport write, trapping >> ???????? * into the hypervisor, so the request should be handled >> immediately. >> ???????? */ >> ??????? while (!virtqueue_get_buf(vi->cvq, &tmp) && >> ?????????????? !virtqueue_is_broken(vi->cvq)) >> ??????????????? cpu_relax(); >> > I think we have two different cases here: > 1. When the net device is running in a VM. In this case we do not have > any issue since the loop is running at the guest kernel and the work is > done at the host kernel and would end at a finite time. > > 2. When we're using virtio_vdpa and the device is on the host we have an > issue if we're using UP processor. Maybe we should avoid supporting CVQ > in this case?Maybe we can switch to use interrupt and sleep here, will have a look.> >>> Modifications are made to the memory registration code to allow for >>> saving a copy of itolb to be used by the control VQ to access the vring. >>> >>> The max number of data virtqueus supported by the driver has been >>> updated to 2 since multiqueue is not supported at this stage and we need >>> to ensure consistency of VQ indices mapping to either data or control >>> VQ. >>> >>> Signed-off-by: Eli Cohen <elic at nvidia.com> >>> --- >>> drivers/vdpa/mlx5/core/mlx5_vdpa.h | 23 +++ >>> drivers/vdpa/mlx5/core/mr.c | 81 +++++++--- >>> drivers/vdpa/mlx5/core/resources.c | 31 ++++ >>> drivers/vdpa/mlx5/net/mlx5_vnet.c | 231 +++++++++++++++++++++++++++-- >>> 4 files changed, 334 insertions(+), 32 deletions(-) >>> >>> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h >>> index 41b20855ed31..6c43476a69cb 100644 >>> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h >>> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h >>> @@ -5,6 +5,7 @@ >>> #define __MLX5_VDPA_H__ >>> #include <linux/etherdevice.h> >>> +#include <linux/vringh.h> >>> #include <linux/vdpa.h> >>> #include <linux/mlx5/driver.h> >>> @@ -47,6 +48,26 @@ struct mlx5_vdpa_resources { >>> bool valid; >>> }; >>> +struct mlx5_control_vq { >>> + struct vhost_iotlb *iotlb; >>> + /* spinlock to synchronize iommu table */ >>> + spinlock_t iommu_lock; >>> + struct vringh vring; >>> + bool ready; >>> + u64 desc_addr; >>> + u64 device_addr; >>> + u64 driver_addr; >>> + struct vdpa_callback event_cb; >>> + struct vringh_kiov riov; >>> + struct vringh_kiov wiov; >>> + unsigned short head; >>> +}; >>> + >>> +struct mlx5_ctrl_wq_ent { >>> + struct work_struct work; >>> + struct mlx5_vdpa_dev *mvdev; >>> +}; >>> + >>> struct mlx5_vdpa_dev { >>> struct vdpa_device vdev; >>> struct mlx5_core_dev *mdev; >>> @@ -60,6 +81,8 @@ struct mlx5_vdpa_dev { >>> u32 generation; >>> struct mlx5_vdpa_mr mr; >>> + struct mlx5_control_vq cvq; >>> + struct workqueue_struct *wq; >>> }; >>> int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid); >>> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c >>> index e59135fa867e..da013b8082bc 100644 >>> --- a/drivers/vdpa/mlx5/core/mr.c >>> +++ b/drivers/vdpa/mlx5/core/mr.c >>> @@ -1,6 +1,7 @@ >>> // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB >>> /* Copyright (c) 2020 Mellanox Technologies Ltd. */ >>> +#include <linux/vhost_types.h> >>> #include <linux/vdpa.h> >>> #include <linux/gcd.h> >>> #include <linux/string.h> >>> @@ -451,33 +452,30 @@ static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) >>> mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey); >>> } >>> -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) >>> +static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src) >>> { >>> - struct mlx5_vdpa_mr *mr = &mvdev->mr; >>> + struct vhost_iotlb_map *map; >>> + u64 start = 0ULL, last = 0ULL - 1; >>> int err; >>> - if (mr->initialized) >>> - return 0; >>> - >>> - if (iotlb) >>> - err = create_user_mr(mvdev, iotlb); >>> - else >>> - err = create_dma_mr(mvdev, mr); >>> - >>> - if (!err) >>> - mr->initialized = true; >>> + if (!src) { >>> + err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW); >>> + return err; >>> + } >>> - return err; >>> + for (map = vhost_iotlb_itree_first(src, start, last); map; >>> + map = vhost_iotlb_itree_next(map, start, last)) { >>> + err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last, >>> + map->addr, map->perm); >>> + if (err) >>> + return err; >>> + } >>> + return 0; >>> } >>> -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) >>> +static void prune_iotlb(struct mlx5_vdpa_dev *mvdev) >>> { >>> - int err; >>> - >>> - mutex_lock(&mvdev->mr.mkey_mtx); >>> - err = _mlx5_vdpa_create_mr(mvdev, iotlb); >>> - mutex_unlock(&mvdev->mr.mkey_mtx); >>> - return err; >>> + vhost_iotlb_del_range(mvdev->cvq.iotlb, 0ULL, 0ULL - 1); >> >> It's better to use ULLONG_MAX. > Will change. > >> >>> } >>> static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) >>> @@ -501,6 +499,7 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) >>> if (!mr->initialized) >>> goto out; >>> + prune_iotlb(mvdev); >>> if (mr->user_mr) >>> destroy_user_mr(mvdev, mr); >>> else >>> @@ -512,6 +511,48 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) >>> mutex_unlock(&mr->mkey_mtx); >>> } >>> +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) >>> +{ >>> + struct mlx5_vdpa_mr *mr = &mvdev->mr; >>> + int err; >>> + >>> + if (mr->initialized) >>> + return 0; >>> + >>> + if (iotlb) >>> + err = create_user_mr(mvdev, iotlb); >>> + else >>> + err = create_dma_mr(mvdev, mr); >>> + >>> + if (err) >>> + return err; >>> + >>> + err = dup_iotlb(mvdev, iotlb); >>> + if (err) >>> + goto out_err; >>> + >>> + mr->initialized = true; >>> + return 0; >>> + >>> +out_err: >>> + if (iotlb) >>> + destroy_user_mr(mvdev, mr); >>> + else >>> + destroy_dma_mr(mvdev, mr); >>> + >>> + return err; >>> +} >>> + >>> +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) >>> +{ >>> + int err; >>> + >>> + mutex_lock(&mvdev->mr.mkey_mtx); >>> + err = _mlx5_vdpa_create_mr(mvdev, iotlb); >>> + mutex_unlock(&mvdev->mr.mkey_mtx); >>> + return err; >>> +} >>> + >>> int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, >>> bool *change_map) >>> { >>> diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c >>> index d4606213f88a..d24ae1a85159 100644 >>> --- a/drivers/vdpa/mlx5/core/resources.c >>> +++ b/drivers/vdpa/mlx5/core/resources.c >>> @@ -1,6 +1,7 @@ >>> // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB >>> /* Copyright (c) 2020 Mellanox Technologies Ltd. */ >>> +#include <linux/iova.h> >>> #include <linux/mlx5/driver.h> >>> #include "mlx5_vdpa.h" >>> @@ -221,6 +222,28 @@ int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *m >>> return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in); >>> } >>> +static int init_ctrl_vq(struct mlx5_vdpa_dev *mvdev) >>> +{ >>> + int err; >>> + >>> + mvdev->cvq.iotlb = vhost_iotlb_alloc(0, 0); >>> + if (!mvdev->cvq.iotlb) >>> + return -ENOMEM; >>> + >>> + vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock); >>> + err = iova_cache_get(); >> >> Any reason for using iova cache here? > Isn't it required? Aren't we allocating buffers for the CVQ from > iommu_iova kmem cache?I may miss something here but which buffer did you refer here?> >> >>> + if (err) >>> + vhost_iotlb_free(mvdev->cvq.iotlb); >>> + >>> + return err; >>> +} >>> + >>> +static void cleanup_ctrl_vq(struct mlx5_vdpa_dev *mvdev) >>> +{ >>> + iova_cache_put(); >>> + vhost_iotlb_free(mvdev->cvq.iotlb); >>> +} >>> + >>> int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) >>> { >>> u64 offset = MLX5_CAP64_DEV_VDPA_EMULATION(mvdev->mdev, doorbell_bar_offset); >>> @@ -260,10 +283,17 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) >>> err = -ENOMEM; >>> goto err_key; >>> } >>> + >>> + err = init_ctrl_vq(mvdev); >>> + if (err) >>> + goto err_ctrl; >>> + >>> res->valid = true; >>> return 0; >>> +err_ctrl: >>> + iounmap(res->kick_addr); >>> err_key: >>> dealloc_pd(mvdev, res->pdn, res->uid); >>> err_pd: >>> @@ -282,6 +312,7 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev) >>> if (!res->valid) >>> return; >>> + cleanup_ctrl_vq(mvdev); >>> iounmap(res->kick_addr); >>> res->kick_addr = NULL; >>> dealloc_pd(mvdev, res->pdn, res->uid); >>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c >>> index 0fe7cd370e4b..e18665781135 100644 >>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c >>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c >>> @@ -133,7 +133,7 @@ struct mlx5_vdpa_virtqueue { >>> /* We will remove this limitation once mlx5_vdpa_alloc_resources() >>> * provides for driver space allocation >>> */ >>> -#define MLX5_MAX_SUPPORTED_VQS 16 >>> +#define MLX5_MAX_SUPPORTED_VQS 2 >>> static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) >>> { >>> @@ -160,6 +160,7 @@ struct mlx5_vdpa_net { >>> struct mlx5_flow_handle *rx_rule; >>> bool setup; >>> u16 mtu; >>> + u32 cur_num_vqs; >>> }; >>> static void free_resources(struct mlx5_vdpa_net *ndev); >>> @@ -169,6 +170,8 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev); >>> static bool mlx5_vdpa_debug; >>> +#define MLX5_CVQ_MAX_ENT 16 >>> + >>> #define MLX5_LOG_VIO_FLAG(_feature) \ >>> do { \ >>> if (features & BIT_ULL(_feature)) \ >>> @@ -186,6 +189,16 @@ static inline u32 mlx5_vdpa_max_qps(int max_vqs) >>> return max_vqs / 2; >>> } >>> +static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev) >>> +{ >>> + return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); >>> +} >>> + >>> +static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx) >>> +{ >>> + return idx == ctrl_vq_idx(mvdev); >>> +} >>> + >>> static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) >>> { >>> if (status & ~VALID_STATUS_MASK) >>> @@ -1359,15 +1372,132 @@ static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev) >>> ndev->rx_rule = NULL; >>> } >>> +virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) >>> +{ >>> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); >>> + struct mlx5_control_vq *cvq = &mvdev->cvq; >>> + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; >>> + struct mlx5_core_dev *pfmdev; >>> + size_t read; >>> + u8 mac[ETH_ALEN]; >>> + >>> + pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); >>> + switch (cmd) { >>> + case VIRTIO_NET_CTRL_MAC_ADDR_SET: >>> + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN); >>> + if (read != ETH_ALEN) >>> + break; >>> + >>> + if (!memcmp(ndev->config.mac, mac, 6)) { >>> + status = VIRTIO_NET_OK; >>> + break; >>> + } >>> + >>> + if (!is_zero_ether_addr(ndev->config.mac)) { >>> + if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { >>> + mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n", >>> + ndev->config.mac); >>> + break; >>> + } >>> + } >>> + >>> + if (mlx5_mpfs_add_mac(pfmdev, mac)) { >>> + mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n", >>> + mac); >>> + break; >>> + } >>> + >>> + memcpy(ndev->config.mac, mac, ETH_ALEN); >>> + status = VIRTIO_NET_OK; >>> + break; >>> + >>> + default: >>> + break; >>> + } >>> + >>> + return status; >>> +} >>> + >>> +static void mlx5_cvq_kick_handler(struct work_struct *work) >>> +{ >>> + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; >>> + struct virtio_net_ctrl_hdr ctrl; >>> + struct mlx5_ctrl_wq_ent *wqent; >>> + struct mlx5_vdpa_dev *mvdev; >>> + struct mlx5_control_vq *cvq; >>> + struct mlx5_vdpa_net *ndev; >>> + size_t read, write; >>> + int err; >>> + >>> + wqent = container_of(work, struct mlx5_ctrl_wq_ent, work); >>> + mvdev = wqent->mvdev; >>> + ndev = to_mlx5_vdpa_ndev(mvdev); >>> + cvq = &mvdev->cvq; >>> + if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) >>> + goto out; >>> + >>> + if (!cvq->ready) >>> + goto out; >>> + >>> + while (true) { >>> + err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head, >>> + GFP_ATOMIC); >>> + if (err <= 0) >>> + break; >>> + >>> + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl)); >>> + if (read != sizeof(ctrl)) >>> + break; >>> + >>> + switch (ctrl.class) { >>> + case VIRTIO_NET_CTRL_MAC: >>> + status = handle_ctrl_mac(mvdev, ctrl.cmd); >>> + break; >>> + >>> + default: >>> + break; >>> + } >>> + >>> + /* Make sure data is written before advancing index */ >>> + smp_wmb(); >>> + >>> + write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status)); >>> + vringh_complete_iotlb(&cvq->vring, cvq->head, write); >>> + vringh_kiov_cleanup(&cvq->riov); >>> + vringh_kiov_cleanup(&cvq->wiov); >>> + >>> + if (vringh_need_notify_iotlb(&cvq->vring)) >>> + vringh_notify(&cvq->vring); >>> + } >>> +out: >>> + kfree(wqent); >>> +} >>> + >>> static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) >>> { >>> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); >>> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); >>> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; >>> + struct mlx5_vdpa_virtqueue *mvq; >>> + struct mlx5_ctrl_wq_ent *wqent; >>> if (!is_index_valid(mvdev, idx)) >>> return; >>> + if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { >>> + if (!mvdev->cvq.ready) >>> + return; >>> + >>> + wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); >>> + if (!wqent) >>> + return; >>> + >>> + wqent->mvdev = mvdev; >>> + INIT_WORK(&wqent->work, mlx5_cvq_kick_handler); >>> + queue_work(mvdev->wq, &wqent->work); >>> + return; >>> + } >>> + >>> + mvq = &ndev->vqs[idx]; >>> if (unlikely(!mvq->ready)) >>> return; >>> @@ -1379,11 +1509,19 @@ static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_ >>> { >>> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); >>> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); >>> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; >>> + struct mlx5_vdpa_virtqueue *mvq; >>> if (!is_index_valid(mvdev, idx)) >>> return -EINVAL; >>> + if (is_ctrl_vq_idx(mvdev, idx)) { >>> + mvdev->cvq.desc_addr = desc_area; >>> + mvdev->cvq.device_addr = device_area; >>> + mvdev->cvq.driver_addr = driver_area; >>> + return 0; >>> + } >>> + >>> + mvq = &ndev->vqs[idx]; >>> mvq->desc_addr = desc_area; >>> mvq->device_addr = device_area; >>> mvq->driver_addr = driver_area; >>> @@ -1396,7 +1534,7 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) >>> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); >>> struct mlx5_vdpa_virtqueue *mvq; >>> - if (!is_index_valid(mvdev, idx)) >>> + if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) >>> return; >>> mvq = &ndev->vqs[idx]; >>> @@ -1411,15 +1549,42 @@ static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_c >>> ndev->event_cbs[idx] = *cb; >>> } >>> +static void mlx5_cvq_notify(struct vringh *vring) >>> +{ >>> + struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring); >>> + >>> + if (!cvq->event_cb.callback) >>> + return; >>> + >>> + cvq->event_cb.callback(cvq->event_cb.private); >>> +} >>> + >>> +static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready) >>> +{ >>> + struct mlx5_control_vq *cvq = &mvdev->cvq; >>> + >>> + cvq->ready = ready; >>> + if (!ready) >>> + return; >>> + >>> + cvq->vring.notify = mlx5_cvq_notify; >>> +} >>> + >>> static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready) >>> { >>> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); >>> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); >>> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; >>> + struct mlx5_vdpa_virtqueue *mvq; >>> if (!is_index_valid(mvdev, idx)) >>> return; >>> + if (is_ctrl_vq_idx(mvdev, idx)) { >>> + set_cvq_ready(mvdev, ready); >>> + return; >>> + } >>> + >>> + mvq = &ndev->vqs[idx]; >>> if (!ready) >>> suspend_vq(ndev, mvq); >>> @@ -1430,12 +1595,14 @@ static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx) >>> { >>> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); >>> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); >>> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; >>> if (!is_index_valid(mvdev, idx)) >>> return false; >>> - return mvq->ready; >>> + if (is_ctrl_vq_idx(mvdev, idx)) >>> + return mvdev->cvq.ready; >>> + >>> + return ndev->vqs[idx].ready; >>> } >>> static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, >>> @@ -1443,11 +1610,17 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, >>> { >>> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); >>> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); >>> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; >>> + struct mlx5_vdpa_virtqueue *mvq; >>> if (!is_index_valid(mvdev, idx)) >>> return -EINVAL; >>> + if (is_ctrl_vq_idx(mvdev, idx)) { >>> + mvdev->cvq.vring.last_avail_idx = state->split.avail_index; >> >> Question, is packed virtqueue supported by current mlx5e? >> >> If no, this is fine. > We don't. The hardware might support but the device driver does not > advertise packed virtqueue support.Good to know this. So we're fine. Thanks> >> If yes, we should disable packed and re-enable it after vringh supports >> packed virtqueue. >> >> Other looks good. >> >> Thanks >> >> >>> + return 0; >>> + } >>> + >>> + mvq = &ndev->vqs[idx]; >>> if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) { >>> mlx5_vdpa_warn(mvdev, "can't modify available index\n"); >>> return -EINVAL; >>> @@ -1462,13 +1635,19 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa >>> { >>> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); >>> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); >>> - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; >>> + struct mlx5_vdpa_virtqueue *mvq; >>> struct mlx5_virtq_attr attr; >>> int err; >>> if (!is_index_valid(mvdev, idx)) >>> return -EINVAL; >>> + if (is_ctrl_vq_idx(mvdev, idx)) { >>> + state->split.avail_index = mvdev->cvq.vring.last_avail_idx; >>> + return 0; >>> + } >>> + >>> + mvq = &ndev->vqs[idx]; >>> /* If the virtq object was destroyed, use the value saved at >>> * the last minute of suspend_vq. This caters for userspace >>> * that cares about emulating the index after vq is stopped. >>> @@ -1525,10 +1704,13 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev) >>> u16 dev_features; >>> dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask); >>> - ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features); >>> + ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features); >>> if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0)) >>> ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1); >>> ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); >>> + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); >>> + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); >>> + >>> print_features(mvdev, ndev->mvdev.mlx_features, false); >>> return ndev->mvdev.mlx_features; >>> } >>> @@ -1544,6 +1726,7 @@ static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features) >>> static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) >>> { >>> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); >>> + struct mlx5_control_vq *cvq = &mvdev->cvq; >>> int err; >>> int i; >>> @@ -1553,6 +1736,16 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) >>> goto err_vq; >>> } >>> + if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { >>> + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, >>> + MLX5_CVQ_MAX_ENT, false, >>> + (struct vring_desc *)(uintptr_t)cvq->desc_addr, >>> + (struct vring_avail *)(uintptr_t)cvq->driver_addr, >>> + (struct vring_used *)(uintptr_t)cvq->device_addr); >>> + if (err) >>> + goto err_vq; >>> + } >>> + >>> return 0; >>> err_vq: >>> @@ -1937,7 +2130,7 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device >>> struct mlx5_vdpa_net *ndev; >>> phys_addr_t addr; >>> - if (!is_index_valid(mvdev, idx)) >>> + if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) >>> return ret; >>> /* If SF BAR size is smaller than PAGE_SIZE, do not use direct >>> @@ -2114,8 +2307,11 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) >>> err = mlx5_mpfs_add_mac(pfmdev, config->mac); >>> if (err) >>> goto err_mtu; >>> + >>> + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC); >>> } >>> + config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs)); >>> mvdev->vdev.dma_dev = &mdev->pdev->dev; >>> err = mlx5_vdpa_alloc_resources(&ndev->mvdev); >>> if (err) >>> @@ -2131,8 +2327,15 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) >>> if (err) >>> goto err_mr; >>> + mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq"); >>> + if (!mvdev->wq) { >>> + err = -ENOMEM; >>> + goto err_res2; >>> + } >>> + >>> + ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs); >>> mvdev->vdev.mdev = &mgtdev->mgtdev; >>> - err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs)); >>> + err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1); >>> if (err) >>> goto err_reg; >>> @@ -2140,6 +2343,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) >>> return 0; >>> err_reg: >>> + destroy_workqueue(mvdev->wq); >>> +err_res2: >>> free_resources(ndev); >>> err_mr: >>> mlx5_vdpa_destroy_mr(mvdev); >>> @@ -2157,7 +2362,9 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) >>> static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev) >>> { >>> struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); >>> + struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); >>> + destroy_workqueue(mvdev->wq); >>> _vdpa_unregister_device(dev); >>> mgtdev->ndev = NULL; >>> }