Si-Wei Liu
2022-Jan-11 01:00 UTC
[PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
On 1/9/2022 6:10 AM, Eli Cohen wrote:> On Thu, Jan 06, 2022 at 05:50:24PM -0800, Si-Wei Liu wrote: >> >> On 1/6/2022 5:27 PM, Si-Wei Liu wrote: >>> >>> On 1/5/2022 3:46 AM, Eli Cohen wrote: >>>> Check whether the max number of data virtqueue pairs was provided when a >>>> adding a new device and verify the new value does not exceed device >>>> capabilities. >>>> >>>> In addition, change the arrays holding virtqueue and callback contexts >>>> to be dynamically allocated. >>>> >>>> Signed-off-by: Eli Cohen <elic at nvidia.com> >>>> --- >>>> v6 -> v7: >>>> 1. Evaluate RQT table size based on config.max_virtqueue_pairs. >>>> >>>> ? drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 ++++++++++++++++++++++--------- >>>> ? 1 file changed, 37 insertions(+), 14 deletions(-) >>>> >>>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c >>>> b/drivers/vdpa/mlx5/net/mlx5_vnet.c >>>> index 4a2149f70f1e..d4720444bf78 100644 >>>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c >>>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c >>>> @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue { >>>> ????? struct mlx5_vq_restore_info ri; >>>> ? }; >>>> ? -/* We will remove this limitation once mlx5_vdpa_alloc_resources() >>>> - * provides for driver space allocation >>>> - */ >>>> -#define MLX5_MAX_SUPPORTED_VQS 16 >>>> - >>>> ? static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) >>>> ? { >>>> ????? if (unlikely(idx > mvdev->max_idx)) >>>> @@ -148,8 +143,8 @@ struct mlx5_vdpa_net { >>>> ????? struct mlx5_vdpa_dev mvdev; >>>> ????? struct mlx5_vdpa_net_resources res; >>>> ????? struct virtio_net_config config; >>>> -??? struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS]; >>>> -??? struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1]; >>>> +??? struct mlx5_vdpa_virtqueue *vqs; >>>> +??? struct vdpa_callback *event_cbs; >>>> ? ????? /* Serialize vq resources creation and destruction. This is >>>> required >>>> ?????? * since memory map might change and we need to destroy and create >>>> @@ -1218,7 +1213,7 @@ static void suspend_vqs(struct mlx5_vdpa_net >>>> *ndev) >>>> ? { >>>> ????? int i; >>>> ? -??? for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++) >>>> +??? for (i = 0; i < ndev->mvdev.max_vqs; i++) >>>> ????????? suspend_vq(ndev, &ndev->vqs[i]); >>>> ? } >>>> ? @@ -1244,8 +1239,14 @@ static int create_rqt(struct mlx5_vdpa_net >>>> *ndev) >>>> ????? void *in; >>>> ????? int i, j; >>>> ????? int err; >>>> +??? int num; >>>> ? -??? max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2, >>>> +??? if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) >>>> +??????? num = 1; >>>> +??? else >>>> +??????? num = le16_to_cpu(ndev->config.max_virtqueue_pairs); >>>> + >>>> +??? max_rqt = min_t(int, roundup_pow_of_two(num), >>>> ????????????? 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size)); >>>> ????? if (max_rqt < 1) >>>> ????????? return -EOPNOTSUPP; >>>> @@ -1262,7 +1263,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev) >>>> ????? MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt); >>>> ????? list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); >>>> ????? for (i = 0, j = 0; i < max_rqt; i++, j += 2) >>>> -??????? list[i] = cpu_to_be32(ndev->vqs[j % >>>> ndev->mvdev.max_vqs].virtq_id); >>>> +??????? list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id); >>> Good catch. LGTM. >>> >>> Reviewed-by: Si-Wei Liu<si-wei.liu at oracle.com> >>> >> Apologies to reply to myself. It looks to me we need to set cur_num_vqs to >> the negotiated num of queues. Otherwise any site referencing cur_num_vqs >> won't work properly. > You can't really use cur_num_vqs. Consider this scenario: > create vdpa device with max VQs = 16 (RQT size created with 8 entries) > boot VM > ethtool modify num QPs to 2. cur_num_vqs now equals 2. > reboot VM.Once VM is rebooted, the cur_num_vqs has to reset to 0 in the reset() op.> create RQT with 1 entry only.Here cur_num_vqs will be loaded with the newly negotiated value (max_rqt) again.> ethtool modify num QPs to 4. modify RQT will fail since it was created > with max QPs equals 1.It won't fail as the cur_num_vqs will be consistent with the number of queues newly negotiated (i.e the max_rqt in create_rqt) for modify.> > I think it is ok to create the RQT always with the value configured when > the device was created.The problem of not setting cur_num_vqs in create_rqt (and resetting it in mlx5_vdpa_reset) is that, once the VM is rebooted or the device is reset, the value doesn't go with the actual number of rqt entries hence would break various assumptions in change_num_qps() or modify_rqt(). For instances, create vdpa device with max data VQs = 16 boot VM features_ok set with MQ negotiated RQT created with 8 entries in create_rqt ethtool modify num QPs to 2. cur_num_vqs now equals 2. reboot VM features_ok set with MQ negotiated RQT created with 8 entries in create_rqt ethtool modify num QPs to 6. cur_num_vqs was 2 while the actual RQT size is 8 (= 16 / 2) change_num_qps would fail as the wrong increment branch (rather than the decrement) was taken and this case too, which calls out the need to validate the presence of VIRTIO_NET_F_MQ in handle_ctrl_mq() create vdpa device with max data VQs = 16 (RQT size created with 8 entries) boot VM features_ok set with MQ negotiated RQT created with 8 entries in create_rqt ethtool modify num QPs to 2. cur_num_vqs now equals 2. reboot VM features_ok set with no MQ negotiated RQT created with 8 entries in create_rqt ethtool modify num QPs to 6. suppose guest runs a custom kernel without checking the #channels to configure change_num_qps can succeed and no host side check prohibiting a single queue guest to set multi-queue Thanks, -Siwei> >> Further, we need to validate VIRTIO_NET_F_MQ is present >> in handle_ctrl_mq() before changing the number of queue pairs. >> >> So just disregard my previous R-b for this patch. >> >> Thanks, >> -Siwei >> >> >>>> ? ????? MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt); >>>> ????? err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, >>>> &ndev->res.rqtn); >>>> @@ -2220,7 +2221,7 @@ static int mlx5_vdpa_reset(struct vdpa_device >>>> *vdev) >>>> ????? clear_vqs_ready(ndev); >>>> ????? mlx5_vdpa_destroy_mr(&ndev->mvdev); >>>> ????? ndev->mvdev.status = 0; >>>> -??? memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs)); >>>> +??? memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * >>>> (mvdev->max_vqs + 1)); >>>> ????? ndev->mvdev.actual_features = 0; >>>> ????? ++mvdev->generation; >>>> ????? if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { >>>> @@ -2293,6 +2294,8 @@ static void mlx5_vdpa_free(struct vdpa_device >>>> *vdev) >>>> ????? } >>>> ????? mlx5_vdpa_free_resources(&ndev->mvdev); >>>> ????? mutex_destroy(&ndev->reslock); >>>> +??? kfree(ndev->event_cbs); >>>> +??? kfree(ndev->vqs); >>>> ? } >>>> ? ? static struct vdpa_notification_area >>>> mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx) >>>> @@ -2538,15 +2541,33 @@ static int mlx5_vdpa_dev_add(struct >>>> vdpa_mgmt_dev *v_mdev, const char *name, >>>> ????????? return -EOPNOTSUPP; >>>> ????? } >>>> ? -??? /* we save one virtqueue for control virtqueue should we >>>> require it */ >>>> ????? max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, >>>> max_num_virtio_queues); >>>> -??? max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS); >>>> +??? if (max_vqs < 2) { >>>> +??????? dev_warn(mdev->device, >>>> +???????????? "%d virtqueues are supported. At least 2 are required\n", >>>> +???????????? max_vqs); >>>> +??????? return -EAGAIN; >>>> +??? } >>>> + >>>> +??? if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) { >>>> +??????? if (add_config->net.max_vq_pairs > max_vqs / 2) >>>> +??????????? return -EINVAL; >>>> +??????? max_vqs = min_t(u32, max_vqs, 2 * >>>> add_config->net.max_vq_pairs); >>>> +??? } else { >>>> +??????? max_vqs = 2; >>>> +??? } >>>> ? ????? ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, >>>> mdev->device, &mlx5_vdpa_ops, >>>> ?????????????????? name, false); >>>> ????? if (IS_ERR(ndev)) >>>> ????????? return PTR_ERR(ndev); >>>> ? +??? ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL); >>>> +??? ndev->event_cbs = kcalloc(max_vqs + 1, >>>> sizeof(*ndev->event_cbs), GFP_KERNEL); >>>> +??? if (!ndev->vqs || !ndev->event_cbs) { >>>> +??????? err = -ENOMEM; >>>> +??????? goto err_alloc; >>>> +??? } >>>> ????? ndev->mvdev.max_vqs = max_vqs; >>>> ????? mvdev = &ndev->mvdev; >>>> ????? mvdev->mdev = mdev; >>>> @@ -2627,6 +2648,7 @@ static int mlx5_vdpa_dev_add(struct >>>> vdpa_mgmt_dev *v_mdev, const char *name, >>>> ????????? mlx5_mpfs_del_mac(pfmdev, config->mac); >>>> ? err_mtu: >>>> ????? mutex_destroy(&ndev->reslock); >>>> +err_alloc: >>>> ????? put_device(&mvdev->vdev.dev); >>>> ????? return err; >>>> ? } >>>> @@ -2669,7 +2691,8 @@ static int mlx5v_probe(struct auxiliary_device >>>> *adev, >>>> ????? mgtdev->mgtdev.ops = &mdev_ops; >>>> ????? mgtdev->mgtdev.device = mdev->device; >>>> ????? mgtdev->mgtdev.id_table = id_table; >>>> -??? mgtdev->mgtdev.config_attr_mask >>>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR); >>>> +??? mgtdev->mgtdev.config_attr_mask >>>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | >>>> +????????????????????? BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP); >>>> ????? mgtdev->madev = madev; >>>> ? ????? err = vdpa_mgmtdev_register(&mgtdev->mgtdev);