thr3ads.net - Virtualization - [Patch v1 2/3] vdpa/mlx5: Add support for control VQ and MAC setting [Aug 2021]

If this information is useful, please help other people find it:
Share via:
Jason Wang
2021-Aug-10 08:36 UTC
[Patch v1 2/3] vdpa/mlx5: Add support for control VQ and MAC setting

? 2021/8/10 ??3:17, Eli Cohen ??:> On Tue, Aug 10, 2021 at 12:32:23PM +0800, Jason Wang wrote:
>> ? 2021/8/9 ??10:07, Eli Cohen ??:
>>> Add support to handle control virtqueue configurations per virtio
>>> specification. The control virtqueue is implemented in software and
no
>>> hardware offloading is involved.
>>>
>>> Control VQ configuration need task context, therefore all
configurations
>>> are handled in a workqueue created for the purpose.
>>>
>>> Modifications are made to the memory registration code to allow for
>>> saving a copy of itolb to be used by the control VQ to access the
vring.
>>>
>>> The max number of data virtqueus supported by the driver has been
>>> updated to 2 since multiqueue is not supported at this stage and we
need
>>> to ensure consistency of VQ indices mapping to either data or
control
>>> VQ.
>>>
>>> v0 --> v1:
>>> cleanup some leftover code
>>>
>>> Signed-off-by: Eli Cohen <elic at nvidia.com>
>>> ---
>>>    drivers/vdpa/mlx5/core/mlx5_vdpa.h |  23 +++
>>>    drivers/vdpa/mlx5/core/mr.c        |  87 ++++++--
>>>    drivers/vdpa/mlx5/core/resources.c |  31 +++
>>>    drivers/vdpa/mlx5/net/mlx5_vnet.c  | 307
+++++++++++++++++++++++++----
>>>    4 files changed, 391 insertions(+), 57 deletions(-)
>>>
>>> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>> index 8d0a6f2cb3f0..71bb29fcf4ca 100644
>>> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
>>> @@ -5,6 +5,7 @@
>>>    #define __MLX5_VDPA_H__
>>>    #include <linux/etherdevice.h>
>>> +#include <linux/vringh.h>
>>>    #include <linux/vdpa.h>
>>>    #include <linux/mlx5/driver.h>
>>> @@ -47,6 +48,26 @@ struct mlx5_vdpa_resources {
>>>    	bool valid;
>>>    };
>>> +struct mlx5_control_vq {
>>> +	struct vhost_iotlb *iotlb;
>>> +	/* spinlock to synchronize iommu table */
>>> +	spinlock_t iommu_lock;
>>> +	struct vringh vring;
>>> +	bool ready;
>>> +	u64 desc_addr;
>>> +	u64 device_addr;
>>> +	u64 driver_addr;
>>> +	struct vdpa_callback event_cb;
>>> +	struct vringh_kiov riov;
>>> +	struct vringh_kiov wiov;
>>> +	unsigned short head;
>>> +};
>>> +
>>> +struct mlx5_ctrl_wq_ent {
>>> +	struct work_struct work;
>>> +	struct mlx5_vdpa_dev *mvdev;
>>> +};
>>> +
>>>    struct mlx5_vdpa_dev {
>>>    	struct vdpa_device vdev;
>>>    	struct mlx5_core_dev *mdev;
>>> @@ -59,6 +80,8 @@ struct mlx5_vdpa_dev {
>>>    	u32 generation;
>>>    	struct mlx5_vdpa_mr mr;
>>> +	struct mlx5_control_vq cvq;
>>> +	struct workqueue_struct *wq;
>>>    };
>>>    int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16
uid);
>>> diff --git a/drivers/vdpa/mlx5/core/mr.c
b/drivers/vdpa/mlx5/core/mr.c
>>> index dcee6039e966..46a657ebb1df 100644
>>> --- a/drivers/vdpa/mlx5/core/mr.c
>>> +++ b/drivers/vdpa/mlx5/core/mr.c
>>> @@ -1,6 +1,7 @@
>>>    // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
>>>    /* Copyright (c) 2020 Mellanox Technologies Ltd. */
>>> +#include <linux/vhost_types.h>
>>>    #include <linux/vdpa.h>
>>>    #include <linux/gcd.h>
>>>    #include <linux/string.h>
>>> @@ -451,33 +452,38 @@ static void destroy_dma_mr(struct
mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
>>>    	mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey);
>>>    }
>>> -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
struct vhost_iotlb *iotlb)
>>> +static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct
vhost_iotlb *src)
>>>    {
>>> -	struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>> +	struct vhost_iotlb_map *map;
>>> +	u64 start = 0ULL, last = 0ULL - 1;
>>>    	int err;
>>> -	if (mr->initialized)
>>> -		return 0;
>>> -
>>> -	if (iotlb)
>>> -		err = create_user_mr(mvdev, iotlb);
>>> -	else
>>> -		err = create_dma_mr(mvdev, mr);
>>> -
>>> -	if (!err)
>>> -		mr->initialized = true;
>>> +	if (!src) {
>>> +		err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last,
start, VHOST_ACCESS_RW);
>>> +		return err;
>>> +	}
>>> -	return err;
>>> +	for (map = vhost_iotlb_itree_first(src, start, last); map;
>>> +		map = vhost_iotlb_itree_next(map, start, last)) {
>>> +		err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start,
map->last,
>>> +					    map->addr, map->perm);
>>> +		if (err)
>>> +			return err;
>>> +	}
>>> +	return 0;
>>>    }
>>> -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct
vhost_iotlb *iotlb)
>>> +static void prune_iotlb(struct mlx5_vdpa_dev *mvdev)
>>>    {
>>> -	int err;
>>> +	struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>> +	u64 start = 0ULL, last = 0ULL - 1;
>>> -	mutex_lock(&mvdev->mr.mkey_mtx);
>>> -	err = _mlx5_vdpa_create_mr(mvdev, iotlb);
>>> -	mutex_unlock(&mvdev->mr.mkey_mtx);
>>> -	return err;
>>> +	if (!mr->user_mr) {
>>> +		vhost_iotlb_del_range(mvdev->cvq.iotlb, start, last);
>>> +		return;
>>> +	}
>>> +
>>> +	vhost_iotlb_del_range(mvdev->cvq.iotlb, start, last);
>>
>> It looks to me we don't need check of if (!mr->user_mr) here.
> Right, will fix.
>
>>
>>>    }
>>>    static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct
mlx5_vdpa_mr *mr)
>>> @@ -501,6 +507,7 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev
*mvdev)
>>>    	if (!mr->initialized)
>>>    		goto out;
>>> +	prune_iotlb(mvdev);
>>>    	if (mr->user_mr)
>>>    		destroy_user_mr(mvdev, mr);
>>>    	else
>>> @@ -512,6 +519,48 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev
*mvdev)
>>>    	mutex_unlock(&mr->mkey_mtx);
>>>    }
>>> +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
struct vhost_iotlb *iotlb)
>>> +{
>>> +	struct mlx5_vdpa_mr *mr = &mvdev->mr;
>>> +	int err;
>>> +
>>> +	if (mr->initialized)
>>> +		return 0;
>>> +
>>> +	if (iotlb)
>>> +		err = create_user_mr(mvdev, iotlb);
>>> +	else
>>> +		err = create_dma_mr(mvdev, mr);
>>> +
>>> +	if (err)
>>> +		return err;
>>> +
>>> +	err = dup_iotlb(mvdev, iotlb);
>>> +	if (err)
>>> +		goto out_err;
>>> +
>>> +	mr->initialized = true;
>>> +	return 0;
>>> +
>>> +out_err:
>>> +	if (iotlb)
>>> +		destroy_user_mr(mvdev, mr);
>>> +	else
>>> +		destroy_dma_mr(mvdev, mr);
>>> +
>>> +	return err;
>>> +}
>>> +
>>> +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct
vhost_iotlb *iotlb)
>>> +{
>>> +	int err;
>>> +
>>> +	mutex_lock(&mvdev->mr.mkey_mtx);
>>> +	err = _mlx5_vdpa_create_mr(mvdev, iotlb);
>>> +	mutex_unlock(&mvdev->mr.mkey_mtx);
>>> +	return err;
>>> +}
>>> +
>>>    static bool map_empty(struct vhost_iotlb *iotlb)
>>>    {
>>>    	return !vhost_iotlb_itree_first(iotlb, 0, U64_MAX);
>>> diff --git a/drivers/vdpa/mlx5/core/resources.c
b/drivers/vdpa/mlx5/core/resources.c
>>> index d4606213f88a..d24ae1a85159 100644
>>> --- a/drivers/vdpa/mlx5/core/resources.c
>>> +++ b/drivers/vdpa/mlx5/core/resources.c
>>> @@ -1,6 +1,7 @@
>>>    // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
>>>    /* Copyright (c) 2020 Mellanox Technologies Ltd. */
>>> +#include <linux/iova.h>
>>>    #include <linux/mlx5/driver.h>
>>>    #include "mlx5_vdpa.h"
>>> @@ -221,6 +222,28 @@ int mlx5_vdpa_destroy_mkey(struct
mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *m
>>>    	return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in);
>>>    }
>>> +static int init_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
>>> +{
>>> +	int err;
>>> +
>>> +	mvdev->cvq.iotlb = vhost_iotlb_alloc(0, 0);
>>> +	if (!mvdev->cvq.iotlb)
>>> +		return -ENOMEM;
>>> +
>>> +	vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb,
&mvdev->cvq.iommu_lock);
>>> +	err = iova_cache_get();
>>> +	if (err)
>>> +		vhost_iotlb_free(mvdev->cvq.iotlb);
>>> +
>>> +	return err;
>>> +}
>>> +
>>> +static void cleanup_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
>>> +{
>>> +	iova_cache_put();
>>> +	vhost_iotlb_free(mvdev->cvq.iotlb);
>>> +}
>>> +
>>>    int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
>>>    {
>>>    	u64 offset = MLX5_CAP64_DEV_VDPA_EMULATION(mvdev->mdev,
doorbell_bar_offset);
>>> @@ -260,10 +283,17 @@ int mlx5_vdpa_alloc_resources(struct
mlx5_vdpa_dev *mvdev)
>>>    		err = -ENOMEM;
>>>    		goto err_key;
>>>    	}
>>> +
>>> +	err = init_ctrl_vq(mvdev);
>>> +	if (err)
>>> +		goto err_ctrl;
>>> +
>>>    	res->valid = true;
>>>    	return 0;
>>> +err_ctrl:
>>> +	iounmap(res->kick_addr);
>>>    err_key:
>>>    	dealloc_pd(mvdev, res->pdn, res->uid);
>>>    err_pd:
>>> @@ -282,6 +312,7 @@ void mlx5_vdpa_free_resources(struct
mlx5_vdpa_dev *mvdev)
>>>    	if (!res->valid)
>>>    		return;
>>> +	cleanup_ctrl_vq(mvdev);
>>>    	iounmap(res->kick_addr);
>>>    	res->kick_addr = NULL;
>>>    	dealloc_pd(mvdev, res->pdn, res->uid);
>>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>> index 2a31467f7ac5..46448b079aca 100644
>>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>> @@ -133,7 +133,7 @@ struct mlx5_vdpa_virtqueue {
>>>    /* We will remove this limitation once
mlx5_vdpa_alloc_resources()
>>>     * provides for driver space allocation
>>>     */
>>> -#define MLX5_MAX_SUPPORTED_VQS 16
>>> +#define MLX5_MAX_SUPPORTED_VQS 2
>>>    struct mlx5_vdpa_net {
>>>    	struct mlx5_vdpa_dev mvdev;
>>> @@ -151,15 +151,18 @@ struct mlx5_vdpa_net {
>>>    	struct mlx5_flow_handle *rx_rule;
>>>    	bool setup;
>>>    	u16 mtu;
>>> +	u32 cur_num_vqs;
>>>    };
>>>    static void free_resources(struct mlx5_vdpa_net *ndev);
>>>    static void init_mvqs(struct mlx5_vdpa_net *ndev);
>>> -static int setup_driver(struct mlx5_vdpa_net *ndev);
>>> +static int setup_driver(struct mlx5_vdpa_dev *mvdev);
>>>    static void teardown_driver(struct mlx5_vdpa_net *ndev);
>>>    static bool mlx5_vdpa_debug;
>>> +#define MLX5_CVQ_MAX_ENT 16
>>> +
>>>    #define MLX5_LOG_VIO_FLAG(_feature)                             
\
>>>    	do {                                                           
\
>>>    		if (features & BIT_ULL(_feature))                         
\
>>> @@ -172,11 +175,33 @@ static bool mlx5_vdpa_debug;
>>>    			mlx5_vdpa_info(mvdev, "%s\n", #_status);           
\
>>>    	} while (0)
>>> +/* TODO: cross-endian support */
>>> +static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev
*mvdev)
>>> +{
>>> +	return virtio_legacy_is_little_endian() ||
>>> +		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
>>> +}
>>> +
>>> +static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev,
u16 val)
>>> +{
>>> +	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
>>> +}
>>> +
>>>    static inline u32 mlx5_vdpa_max_qps(int max_vqs)
>>>    {
>>>    	return max_vqs / 2;
>>>    }
>>> +static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
>>> +{
>>> +	return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
>>> +}
>>> +
>>> +static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
>>> +{
>>> +	return idx == ctrl_vq_idx(mvdev);
>>> +}
>>> +
>>>    static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status,
bool set)
>>>    {
>>>    	if (status & ~VALID_STATUS_MASK)
>>> @@ -1346,12 +1371,139 @@ static void remove_fwd_to_tir(struct
mlx5_vdpa_net *ndev)
>>>    	ndev->rx_rule = NULL;
>>>    }
>>> +static int update_fwd_to_tir(struct mlx5_vdpa_net *ndev)
>>> +{
>>> +	remove_fwd_to_tir(ndev);
>>> +	return add_fwd_to_tir(ndev);
>>> +}
>>> +
>>> +virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev,
u8 cmd)
>>> +{
>>> +	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> +	struct mlx5_control_vq *cvq = &mvdev->cvq;
>>> +	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
>>> +	struct mlx5_core_dev *pfmdev;
>>> +	size_t read;
>>> +	u8 mac[6];
>>> +
>>> +	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
>>> +	switch (cmd) {
>>> +	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
>>> +		read = vringh_iov_pull_iotlb(&cvq->vring,
&cvq->riov, (void *)mac, ETH_ALEN);
>>> +		if (read != ETH_ALEN)
>>> +			break;
>>> +
>>> +		if (!memcmp(ndev->config.mac, mac, 6)) {
>>> +			status = VIRTIO_NET_OK;
>>> +			break;
>>> +		}
>>> +
>>> +		if (!is_zero_ether_addr(ndev->config.mac)) {
>>> +			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
>>> +				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from
MPFS table\n",
>>> +					       ndev->config.mac);
>>> +				break;
>>> +			}
>>> +		}
>>> +
>>> +		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
>>> +			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into
MPFS table\n",
>>> +				       mac);
>>> +			break;
>>> +		}
>>> +
>>> +		memcpy(ndev->config.mac, mac, 6);
>>
>> Let's use ETH_ALEN.
> Will do, here and in the local variable at the begining of the function.
>
>>
>>> +		if (!update_fwd_to_tir(ndev))
>>> +			status = VIRTIO_NET_OK;
>>
>> I think it's better to to update config after the succeed of
>> update_fwd_to_tir().
> add_fwd_to_tir() relies on whatever value the config has. I will modify
> the code to pass the mac as an argument to update_fwd_to_tir().

That should work.

>
>>
>>> +		break;
>>> +
>>> +	default:
>>> +		break;
>>> +	}
>>> +
>>> +	return status;
>>> +}
>>> +
>>> +static void mlx5_cvq_kick_handler(struct work_struct *work)
>>> +{
>>> +	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
>>> +	struct virtio_net_ctrl_hdr ctrl;
>>> +	struct mlx5_ctrl_wq_ent *wqent;
>>> +	struct mlx5_vdpa_dev *mvdev;
>>> +	struct mlx5_control_vq *cvq;
>>> +	struct mlx5_vdpa_net *ndev;
>>> +	size_t read, write;
>>> +	int err;
>>> +
>>> +	wqent = container_of(work, struct mlx5_ctrl_wq_ent, work);
>>> +	mvdev = wqent->mvdev;
>>> +	ndev = to_mlx5_vdpa_ndev(mvdev);
>>> +	cvq = &mvdev->cvq;
>>> +	if (!(ndev->mvdev.actual_features &
BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
>>> +		goto out;
>>> +
>>> +	if (!cvq->ready)
>>> +		goto out;
>>> +
>>> +	while (true) {
>>> +		err = vringh_getdesc_iotlb(&cvq->vring,
&cvq->riov, &cvq->wiov, &cvq->head,
>>> +					   GFP_ATOMIC);
>>> +		if (err <= 0)
>>> +			break;
>>> +
>>> +		read = vringh_iov_pull_iotlb(&cvq->vring,
&cvq->riov, &ctrl, sizeof(ctrl));
>>> +		if (read != sizeof(ctrl))
>>> +			break;
>>> +
>>> +		switch (ctrl.class) {
>>> +		case VIRTIO_NET_CTRL_MAC:
>>> +			status = handle_ctrl_mac(mvdev, ctrl.cmd);
>>> +			break;
>>> +
>>> +		default:
>>> +			break;
>>> +		}
>>> +
>>> +		/* Make sure data is written before advancing index */
>>> +		smp_wmb();
>>> +
>>> +		write = vringh_iov_push_iotlb(&cvq->vring,
&cvq->wiov, &status, sizeof(status));
>>> +		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
>>> +		vringh_kiov_cleanup(&cvq->riov);
>>> +		vringh_kiov_cleanup(&cvq->wiov);
>>> +
>>> +		/* Make sure used is visible before rasing the interrupt. */
>>> +		smp_wmb();
>>> +
>>> +		local_bh_disable();
>>> +		if (cvq->event_cb.callback)
>>> +			cvq->event_cb.callback(cvq->event_cb.private);
>>
>> Let's use the vringh helper instead (it can deal with e.g event
index
>> stuffs):
>>
>> if (vringh_need_notify_iotlb(cvq))
>>  ??? vringh_notify(cvq);
>>
> Will look at this.
>
>>> +
>>> +		local_bh_enable();
>>> +	}
>>> +out:
>>> +	kfree(wqent);
>>> +}
>>> +
>>>    static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
>>>    {
>>>    	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>>>    	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> -	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>>> +	struct mlx5_vdpa_virtqueue *mvq;
>>> +	struct mlx5_ctrl_wq_ent *wqent;
>>> +
>>> +	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
>>> +		wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
>>> +		if (!wqent)
>>> +			return;
>>> +
>>> +		wqent->mvdev = mvdev;
>>> +		INIT_WORK(&wqent->work, mlx5_cvq_kick_handler);
>>> +		queue_work(mvdev->wq, &wqent->work);
>>> +		return;
>>> +	}
>>> +	mvq = &ndev->vqs[idx];
>>>    	if (unlikely(!mvq->ready))
>>>    		return;
>>> @@ -1363,8 +1515,16 @@ static int mlx5_vdpa_set_vq_address(struct
vdpa_device *vdev, u16 idx, u64 desc_
>>>    {
>>>    	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>>>    	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> -	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>>> +	struct mlx5_vdpa_virtqueue *mvq;
>>> +	if (is_ctrl_vq_idx(mvdev, idx)) {
>>> +		mvdev->cvq.desc_addr = desc_area;
>>> +		mvdev->cvq.device_addr = device_area;
>>> +		mvdev->cvq.driver_addr = driver_area;
>>> +		return 0;
>>> +	}
>>> +
>>> +	mvq = &ndev->vqs[idx];
>>>    	mvq->desc_addr = desc_area;
>>>    	mvq->device_addr = device_area;
>>>    	mvq->driver_addr = driver_area;
>>> @@ -1377,6 +1537,9 @@ static void mlx5_vdpa_set_vq_num(struct
vdpa_device *vdev, u16 idx, u32 num)
>>>    	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>>    	struct mlx5_vdpa_virtqueue *mvq;
>>> +	if (is_ctrl_vq_idx(mvdev, idx))
>>> +		return;
>>> +
>>>    	mvq = &ndev->vqs[idx];
>>>    	mvq->num_ent = num;
>>>    }
>>> @@ -1385,17 +1548,50 @@ static void mlx5_vdpa_set_vq_cb(struct
vdpa_device *vdev, u16 idx, struct vdpa_c
>>>    {
>>>    	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>>>    	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> -	struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx];
>>> -	vq->event_cb = *cb;
>>> +	if (is_ctrl_vq_idx(mvdev, idx)) {
>>> +		mvdev->cvq.event_cb = *cb;
>>> +		return;
>>> +	}
>>> +
>>> +	ndev->vqs[idx].event_cb = *cb;
>>
>> I wonder whether we can simply treat cvq as a normal vq here and just
use
>> the ndev->vqs[idx].event_cb.
>>
> Not really. CVQ is a software vq while the data VQs are hardware VQs so
> we have different data structs to hold their contexts.

Right, rethink about this, actually it's even better since the index of 
cvq is not fixed.

>
>>> +}
>>> +
>>> +static void mlx5_cvq_notify(struct vringh *vring)
>>> +{
>>> +	struct mlx5_control_vq *cvq = container_of(vring, struct
mlx5_control_vq, vring);
>>> +
>>> +	if (!cvq->event_cb.callback)
>>> +		return;
>>> +
>>> +	cvq->event_cb.callback(cvq->event_cb.private);
>>> +}
>>> +
>>> +static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
>>> +{
>>> +	struct mlx5_control_vq *cvq = &mvdev->cvq;
>>> +
>>> +	WARN_ON(cvq->ready && ready);
>>> +	WARN_ON(!cvq->ready && !ready);
>>
>> It looks to me this is userspace trigger-able. E.g a buggy userspace
driver
>> can cause this. Then it's better to not warn here.
>>
> Agree, will remove.
>>> +	cvq->ready = ready;
>>> +	if (!ready)
>>> +		return;
>>> +
>>> +	cvq->vring.notify = mlx5_cvq_notify;
>>>    }
>>>    static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16
idx, bool ready)
>>>    {
>>>    	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>>>    	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> -	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>>> +	struct mlx5_vdpa_virtqueue *mvq;
>>> +	if (is_ctrl_vq_idx(mvdev, idx)) {
>>> +		set_cvq_ready(mvdev, ready);
>>> +		return;
>>> +	}
>>> +
>>> +	mvq = &ndev->vqs[idx];
>>>    	if (!ready)
>>>    		suspend_vq(ndev, mvq);
>>> @@ -1406,9 +1602,11 @@ static bool mlx5_vdpa_get_vq_ready(struct
vdpa_device *vdev, u16 idx)
>>>    {
>>>    	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>>>    	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> -	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>>> -	return mvq->ready;
>>> +	if (is_ctrl_vq_idx(mvdev, idx))
>>> +		return mvdev->cvq.ready;
>>> +
>>> +	return ndev->vqs[idx].ready;
>>>    }
>>>    static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16
idx,
>>> @@ -1416,8 +1614,14 @@ static int mlx5_vdpa_set_vq_state(struct
vdpa_device *vdev, u16 idx,
>>>    {
>>>    	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>>>    	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> -	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>>> +	struct mlx5_vdpa_virtqueue *mvq;
>>> +
>>> +	if (is_ctrl_vq_idx(mvdev, idx)) {
>>> +		mvdev->cvq.vring.last_avail_idx =
state->split.avail_index;
>>
>> Interesting, consider vringh can only support split ring. I wonder we
need
>> to fail the feature negotiation when both cvq and packed is negotiated.
>>
> Why do that? In the case of mlx5 we can live with it. Although I
> currently do not advertise split virtqueue support, the hardware could
> support in the future packed virtqueue and I could live with packed
> data VQs and split for control. So it should be up to the vdpa driver to
> take care of that.

I think you are using vringh to decode commands in the software control 
virtqueue now. Since vringh doesn't support packed ring, so when guest 
is using packed ring for control virtqueue, we breaks.

(I think mlx5 support packed ring, but if it doesn't the code is fine).

>
>>> +		return 0;
>>> +	}
>>> +	mvq = &ndev->vqs[idx];
>>>    	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
>>>    		mlx5_vdpa_warn(mvdev, "can't modify available
index\n");
>>>    		return -EINVAL;
>>> @@ -1432,10 +1636,16 @@ static int mlx5_vdpa_get_vq_state(struct
vdpa_device *vdev, u16 idx, struct vdpa
>>>    {
>>>    	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>>>    	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> -	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
>>> +	struct mlx5_vdpa_virtqueue *mvq;
>>>    	struct mlx5_virtq_attr attr;
>>>    	int err;
>>> +	if (is_ctrl_vq_idx(mvdev, idx)) {
>>> +		state->split.avail_index =
mvdev->cvq.vring.last_avail_idx;
>>> +		return 0;
>>> +	}
>>> +
>>> +	mvq = &ndev->vqs[idx];
>>>    	/* If the virtq object was destroyed, use the value saved at
>>>    	 * the last minute of suspend_vq. This caters for userspace
>>>    	 * that cares about emulating the index after vq is stopped.
>>> @@ -1492,10 +1702,13 @@ static u64 mlx5_vdpa_get_features(struct
vdpa_device *vdev)
>>>    	u16 dev_features;
>>>    	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev,
device_features_bits_mask);
>>> -	ndev->mvdev.mlx_features =
mlx_to_vritio_features(dev_features);
>>> +	ndev->mvdev.mlx_features |=
mlx_to_vritio_features(dev_features);
>>>    	if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev,
virtio_version_1_0))
>>>    		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
>>>    	ndev->mvdev.mlx_features |=
BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
>>> +	ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
>>> +	ndev->mvdev.mlx_features |=
BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
>>> +
>>>    	print_features(mvdev, ndev->mvdev.mlx_features, false);
>>>    	return ndev->mvdev.mlx_features;
>>>    }
>>> @@ -1508,8 +1721,10 @@ static int verify_min_features(struct
mlx5_vdpa_dev *mvdev, u64 features)
>>>    	return 0;
>>>    }
>>> -static int setup_virtqueues(struct mlx5_vdpa_net *ndev)
>>> +static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
>>>    {
>>> +	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>> +	struct mlx5_control_vq *cvq = &mvdev->cvq;
>>>    	int err;
>>>    	int i;
>>> @@ -1519,6 +1734,16 @@ static int setup_virtqueues(struct
mlx5_vdpa_net *ndev)
>>>    			goto err_vq;
>>>    	}
>>> +	if (mvdev->actual_features &
BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
>>> +		err = vringh_init_iotlb(&cvq->vring,
mvdev->actual_features,
>>> +					MLX5_CVQ_MAX_ENT, false,
>>> +					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
>>> +					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
>>> +					(struct vring_used *)(uintptr_t)cvq->device_addr);
>>> +		if (err)
>>> +			goto err_vq;
>>> +	}
>>> +
>>>    	return 0;
>>>    err_vq:
>>> @@ -1542,18 +1767,6 @@ static void teardown_virtqueues(struct
mlx5_vdpa_net *ndev)
>>>    	}
>>>    }
>>> -/* TODO: cross-endian support */
>>> -static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev
*mvdev)
>>> -{
>>> -	return virtio_legacy_is_little_endian() ||
>>> -		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
>>> -}
>>> -
>>> -static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev,
u16 val)
>>> -{
>>> -	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
>>> -}
>>> -
>>>    static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64
features)
>>>    {
>>>    	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>>> @@ -1672,8 +1885,9 @@ static void restore_channels_info(struct
mlx5_vdpa_net *ndev)
>>>    	}
>>>    }
>>> -static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct
vhost_iotlb *iotlb)
>>> +static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
struct vhost_iotlb *iotlb)
>>>    {
>>> +	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>>    	int err;
>>>    	suspend_vqs(ndev);
>>> @@ -1691,7 +1905,7 @@ static int mlx5_vdpa_change_map(struct
mlx5_vdpa_net *ndev, struct vhost_iotlb *
>>>    		return 0;
>>>    	restore_channels_info(ndev);
>>> -	err = setup_driver(ndev);
>>> +	err = setup_driver(mvdev);
>>>    	if (err)
>>>    		goto err_setup;
>>> @@ -1703,37 +1917,38 @@ static int mlx5_vdpa_change_map(struct
mlx5_vdpa_net *ndev, struct vhost_iotlb *
>>>    	return err;
>>>    }
>>> -static int setup_driver(struct mlx5_vdpa_net *ndev)
>>> +static int setup_driver(struct mlx5_vdpa_dev *mvdev)
>>>    {
>>> +	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>>    	int err;
>>>    	mutex_lock(&ndev->reslock);
>>>    	if (ndev->setup) {
>>> -		mlx5_vdpa_warn(&ndev->mvdev, "setup driver called
for already setup driver\n");
>>> +		mlx5_vdpa_warn(mvdev, "setup driver called for already
setup driver\n");
>>
>> It would be better to split those tweaks into another patch. (Or
it's no
>> easy to infer how it is related to the control vq support).
> OK, the point is that the CVQ belongs to mlx5_vdpa_dev so I hade to pass
> it and make these changes but will do it.
>
>> Btw, I don't see how reset is being handled for cvq?
> I guess you mean the oposite operation to vringh_init_iotlb() issued in
> setup_virtqueues(). Is there such cleanup call?

I think it should be something similar to what vdpa_sim did (recall 
vringh_init_iotlb()) to reset the vringh internal states:

 ??????? vringh_init_iotlb(&vq->vring,
vdpasim->dev_attr.supported_features,
 ????????????????????????? VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);

Thanks

>> Thanks
>>
>>
>>>    		err = 0;
>>>    		goto out;
>>>    	}
>>> -	err = setup_virtqueues(ndev);
>>> +	err = setup_virtqueues(mvdev);
>>>    	if (err) {
>>> -		mlx5_vdpa_warn(&ndev->mvdev,
"setup_virtqueues\n");
>>> +		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
>>>    		goto out;
>>>    	}
>>>    	err = create_rqt(ndev);
>>>    	if (err) {
>>> -		mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n");
>>> +		mlx5_vdpa_warn(mvdev, "create_rqt\n");
>>>    		goto err_rqt;
>>>    	}
>>>    	err = create_tir(ndev);
>>>    	if (err) {
>>> -		mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n");
>>> +		mlx5_vdpa_warn(mvdev, "create_tir\n");
>>>    		goto err_tir;
>>>    	}
>>>    	err = add_fwd_to_tir(ndev);
>>>    	if (err) {
>>> -		mlx5_vdpa_warn(&ndev->mvdev,
"add_fwd_to_tir\n");
>>> +		mlx5_vdpa_warn(mvdev, "add_fwd_to_tir\n");
>>>    		goto err_fwd;
>>>    	}
>>>    	ndev->setup = true;
>>> @@ -1799,7 +2014,7 @@ static void mlx5_vdpa_set_status(struct
vdpa_device *vdev, u8 status)
>>>    	if ((status ^ ndev->mvdev.status) &
VIRTIO_CONFIG_S_DRIVER_OK) {
>>>    		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
>>> -			err = setup_driver(ndev);
>>> +			err = setup_driver(mvdev);
>>>    			if (err) {
>>>    				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
>>>    				goto err_setup;
>>> @@ -1849,7 +2064,6 @@ static u32 mlx5_vdpa_get_generation(struct
vdpa_device *vdev)
>>>    static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct
vhost_iotlb *iotlb)
>>>    {
>>>    	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
>>> -	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
>>>    	bool change_map;
>>>    	int err;
>>> @@ -1860,7 +2074,7 @@ static int mlx5_vdpa_set_map(struct
vdpa_device *vdev, struct vhost_iotlb *iotlb
>>>    	}
>>>    	if (change_map)
>>> -		return mlx5_vdpa_change_map(ndev, iotlb);
>>> +		return mlx5_vdpa_change_map(mvdev, iotlb);
>>>    	return 0;
>>>    }
>>> @@ -1890,6 +2104,9 @@ static struct vdpa_notification_area
mlx5_get_vq_notification(struct vdpa_device
>>>    	struct mlx5_vdpa_net *ndev;
>>>    	phys_addr_t addr;
>>> +	if (is_ctrl_vq_idx(mvdev, idx))
>>> +		return ret;
>>> +
>>>    	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
>>>    	 * notification to avoid the risk of mapping pages that contain
BAR of more
>>>    	 * than one SF
>>> @@ -2058,8 +2275,11 @@ static int mlx5_vdpa_dev_add(struct
vdpa_mgmt_dev *v_mdev, const char *name)
>>>    		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
>>>    		if (err)
>>>    			goto err_mtu;
>>> +
>>> +		ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
>>>    	}
>>> +	config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev,
mlx5_vdpa_max_qps(max_vqs));
>>>    	mvdev->vdev.dma_dev = &mdev->pdev->dev;
>>>    	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
>>>    	if (err)
>>> @@ -2075,8 +2295,15 @@ static int mlx5_vdpa_dev_add(struct
vdpa_mgmt_dev *v_mdev, const char *name)
>>>    	if (err)
>>>    		goto err_mr;
>>> +	mvdev->wq =
create_singlethread_workqueue("mlx5_vdpa_ctrl_wq");
>>> +	if (!mvdev->wq) {
>>> +		err = -ENOMEM;
>>> +		goto err_res2;
>>> +	}
>>> +
>>> +	ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs);
>>>    	mvdev->vdev.mdev = &mgtdev->mgtdev;
>>> -	err = _vdpa_register_device(&mvdev->vdev, 2 *
mlx5_vdpa_max_qps(max_vqs));
>>> +	err = _vdpa_register_device(&mvdev->vdev,
ndev->cur_num_vqs + 1);
>>>    	if (err)
>>>    		goto err_reg;
>>> @@ -2084,6 +2311,8 @@ static int mlx5_vdpa_dev_add(struct
vdpa_mgmt_dev *v_mdev, const char *name)
>>>    	return 0;
>>>    err_reg:
>>> +	destroy_workqueue(mvdev->wq);
>>> +err_res2:
>>>    	free_resources(ndev);
>>>    err_mr:
>>>    	mlx5_vdpa_destroy_mr(mvdev);
>>> @@ -2101,7 +2330,9 @@ static int mlx5_vdpa_dev_add(struct
vdpa_mgmt_dev *v_mdev, const char *name)
>>>    static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev,
struct vdpa_device *dev)
>>>    {
>>>    	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct
mlx5_vdpa_mgmtdev, mgtdev);
>>> +	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
>>> +	destroy_workqueue(mvdev->wq);
>>>    	_vdpa_unregister_device(dev);
>>>    	mgtdev->ndev = NULL;
>>>    }
Virtualization - Aug 2021 - [Patch v1 2/3] vdpa/mlx5: Add support for control VQ and MAC setting

[Patch v1 2/3] vdpa/mlx5: Add support for control VQ and MAC setting