? 2021/8/17 ??2:02, Eli Cohen ??:> Multiqueue support requires additional virtio_net_q objects to be added
> or removed per the configured number of queue pairs. In addition the RQ
> tables needs to be modified to match the number of configured receive
> queues so the packets are dispatched to the right virtqueue according to
> the hash result.
>
> Note: qemu v6.0.0 is broken when the device requests more than two data
> queues; no net device will be created for the vdpa device. To avoid
> this, one should specify mq=off to qemu. In this case it will end up
> with a single queue.
>
> Signed-off-by: Eli Cohen <elic at nvidia.com>
> ---
> drivers/vdpa/mlx5/core/mlx5_vdpa.h | 1 +
> drivers/vdpa/mlx5/core/resources.c | 10 ++
> drivers/vdpa/mlx5/net/mlx5_vnet.c | 189 ++++++++++++++++++++++++-----
> 3 files changed, 169 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> index 6c43476a69cb..01a848adf590 100644
> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> @@ -91,6 +91,7 @@ int mlx5_vdpa_get_null_mkey(struct mlx5_vdpa_dev *dev,
u32 *null_mkey);
> int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32
*tisn);
> void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn);
> int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int
inlen, u32 *rqtn);
> +int mlx5_vdpa_modify_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen,
u32 rqtn);
> void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn);
> int mlx5_vdpa_create_tir(struct mlx5_vdpa_dev *mvdev, void *in, u32
*tirn);
> void mlx5_vdpa_destroy_tir(struct mlx5_vdpa_dev *mvdev, u32 tirn);
> diff --git a/drivers/vdpa/mlx5/core/resources.c
b/drivers/vdpa/mlx5/core/resources.c
> index d24ae1a85159..bbdcf9a01a6d 100644
> --- a/drivers/vdpa/mlx5/core/resources.c
> +++ b/drivers/vdpa/mlx5/core/resources.c
> @@ -129,6 +129,16 @@ int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev,
void *in, int inlen, u32 *
> return err;
> }
>
> +int mlx5_vdpa_modify_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen,
u32 rqtn)
> +{
> + u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {};
> +
> + MLX5_SET(modify_rqt_in, in, uid, mvdev->res.uid);
> + MLX5_SET(modify_rqt_in, in, rqtn, rqtn);
> + MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT);
> + return mlx5_cmd_exec(mvdev->mdev, in, inlen, out, sizeof(out));
> +}
> +
> void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn)
> {
> u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index e18665781135..9cff3a49552f 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -133,7 +133,7 @@ struct mlx5_vdpa_virtqueue {
> /* We will remove this limitation once mlx5_vdpa_alloc_resources()
> * provides for driver space allocation
> */
> -#define MLX5_MAX_SUPPORTED_VQS 2
> +#define MLX5_MAX_SUPPORTED_VQS 16
I wonder if we can stick this unchanged, since previous patch change it
from 16 to 2.
Other than this.
Acked-by: Jason Wang <jasowang at redhat.com>
>
> static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
> {
> @@ -184,6 +184,23 @@ static bool mlx5_vdpa_debug;
> mlx5_vdpa_info(mvdev, "%s\n", #_status);
\
> } while (0)
>
> +/* TODO: cross-endian support */
> +static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
> +{
> + return virtio_legacy_is_little_endian() ||
> + (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
> +}
> +
> +static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
> +{
> + return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
> +}
> +
> +static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
> +{
> + return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
> +}
> +
> static inline u32 mlx5_vdpa_max_qps(int max_vqs)
> {
> return max_vqs / 2;
> @@ -191,6 +208,9 @@ static inline u32 mlx5_vdpa_max_qps(int max_vqs)
>
> static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
> {
> + if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
> + return 2;
> +
> return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
> }
>
> @@ -1127,10 +1147,8 @@ static int setup_vq(struct mlx5_vdpa_net *ndev,
struct mlx5_vdpa_virtqueue *mvq)
> if (!mvq->num_ent)
> return 0;
>
> - if (mvq->initialized) {
> - mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n");
> - return -EINVAL;
> - }
> + if (mvq->initialized)
> + return 0;
>
> err = cq_create(ndev, idx, mvq->num_ent);
> if (err)
> @@ -1217,19 +1235,20 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev,
struct mlx5_vdpa_virtqueue *
>
> static int create_rqt(struct mlx5_vdpa_net *ndev)
> {
> - int log_max_rqt;
> __be32 *list;
> + int max_rqt;
> void *rqtc;
> int inlen;
> void *in;
> int i, j;
> int err;
>
> - log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev,
log_max_rqt_size));
> - if (log_max_rqt < 1)
> + max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
> + 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
> + if (max_rqt < 1)
> return -EOPNOTSUPP;
>
> - inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) *
MLX5_ST_SZ_BYTES(rq_num);
> + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + max_rqt *
MLX5_ST_SZ_BYTES(rq_num);
> in = kzalloc(inlen, GFP_KERNEL);
> if (!in)
> return -ENOMEM;
> @@ -1238,10 +1257,9 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
> rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
>
> MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
> - MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt);
> - MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
> + MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
> list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
> - for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) {
> + for (i = 0, j = 0; j < max_rqt; j++) {
> if (!ndev->vqs[j].initialized)
> continue;
>
> @@ -1250,6 +1268,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
> i++;
> }
> }
> + MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
>
> err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen,
&ndev->res.rqtn);
> kfree(in);
> @@ -1259,6 +1278,52 @@ static int create_rqt(struct mlx5_vdpa_net *ndev)
> return 0;
> }
>
> +#define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
> +
> +int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
> +{
> + __be32 *list;
> + int max_rqt;
> + void *rqtc;
> + int inlen;
> + void *in;
> + int i, j;
> + int err;
> +
> + max_rqt = min_t(int, ndev->cur_num_vqs / 2,
> + 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
> + if (max_rqt < 1)
> + return -EOPNOTSUPP;
> +
> + inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + max_rqt *
MLX5_ST_SZ_BYTES(rq_num);
> + in = kzalloc(inlen, GFP_KERNEL);
> + if (!in)
> + return -ENOMEM;
> +
> + MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
> + MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
> + rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
> + MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
> +
> + list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
> + for (i = 0, j = 0; j < num; j++) {
> + if (!ndev->vqs[j].initialized)
> + continue;
> +
> + if (!vq_is_tx(ndev->vqs[j].index)) {
> + list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
> + i++;
> + }
> + }
> + MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
> + err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen,
ndev->res.rqtn);
> + kfree(in);
> + if (err)
> + return err;
> +
> + return 0;
> +}
> +
> static void destroy_rqt(struct mlx5_vdpa_net *ndev)
> {
> mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
> @@ -1418,6 +1483,77 @@ virtio_net_ctrl_ack handle_ctrl_mac(struct
mlx5_vdpa_dev *mvdev, u8 cmd)
> return status;
> }
>
> +static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
> +{
> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> + int cur_qps = ndev->cur_num_vqs / 2;
> + int err;
> + int i;
> +
> + if (cur_qps > newqps) {
> + err = modify_rqt(ndev, 2 * newqps);
> + if (err)
> + return err;
> +
> + for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
> + teardown_vq(ndev, &ndev->vqs[i]);
> +
> + ndev->cur_num_vqs = 2 * newqps;
> + } else {
> + ndev->cur_num_vqs = 2 * newqps;
> + for (i = cur_qps * 2; i < 2 * newqps; i++) {
> + err = setup_vq(ndev, &ndev->vqs[i]);
> + if (err)
> + goto clean_added;
> + }
> + err = modify_rqt(ndev, 2 * newqps);
> + if (err)
> + goto clean_added;
> + }
> + return 0;
> +
> +clean_added:
> + for (--i; i >= cur_qps; --i)
> + teardown_vq(ndev, &ndev->vqs[i]);
> +
> + return err;
> +}
> +
> +virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
> +{
> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> + virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> + struct mlx5_control_vq *cvq = &mvdev->cvq;
> + struct virtio_net_ctrl_mq mq;
> + size_t read;
> + u16 newqps;
> +
> + switch (cmd) {
> + case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
> + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov,
(void *)&mq, sizeof(mq));
> + if (read != sizeof(mq))
> + break;
> +
> + newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
> + if (ndev->cur_num_vqs == 2 * newqps) {
> + status = VIRTIO_NET_OK;
> + break;
> + }
> +
> + if (newqps & (newqps - 1))
> + break;
> +
> + if (!change_num_qps(mvdev, newqps))
> + status = VIRTIO_NET_OK;
> +
> + break;
> + default:
> + break;
> + }
> +
> + return status;
> +}
> +
> static void mlx5_cvq_kick_handler(struct work_struct *work)
> {
> virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> @@ -1453,6 +1589,9 @@ static void mlx5_cvq_kick_handler(struct work_struct
*work)
> case VIRTIO_NET_CTRL_MAC:
> status = handle_ctrl_mac(mvdev, ctrl.cmd);
> break;
> + case VIRTIO_NET_CTRL_MQ:
> + status = handle_ctrl_mq(mvdev, ctrl.cmd);
> + break;
>
> default:
> break;
> @@ -1710,6 +1849,7 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device
*vdev)
> ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
> ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
> ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
> + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ);
>
> print_features(mvdev, ndev->mvdev.mlx_features, false);
> return ndev->mvdev.mlx_features;
> @@ -1769,18 +1909,6 @@ static void teardown_virtqueues(struct mlx5_vdpa_net
*ndev)
> }
> }
>
> -/* TODO: cross-endian support */
> -static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
> -{
> - return virtio_legacy_is_little_endian() ||
> - (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
> -}
> -
> -static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
> -{
> - return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
> -}
> -
> static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
> {
> if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
> @@ -1846,15 +1974,14 @@ static u8 mlx5_vdpa_get_status(struct vdpa_device
*vdev)
> static int save_channel_info(struct mlx5_vdpa_net *ndev, struct
mlx5_vdpa_virtqueue *mvq)
> {
> struct mlx5_vq_restore_info *ri = &mvq->ri;
> - struct mlx5_virtq_attr attr;
> + struct mlx5_virtq_attr attr = {};
> int err;
>
> - if (!mvq->initialized)
> - return 0;
> -
> - err = query_virtqueue(ndev, mvq, &attr);
> - if (err)
> - return err;
> + if (mvq->initialized) {
> + err = query_virtqueue(ndev, mvq, &attr);
> + if (err)
> + return err;
> + }
>
> ri->avail_index = attr.available_index;
> ri->used_index = attr.used_index;