Si-Wei Liu
2022-Jan-11 22:21 UTC
[PATCH v7 07/14] vdpa/mlx5: Support configuring max data virtqueue
On 1/11/2022 7:21 AM, Eli Cohen wrote:> On Tue, Jan 11, 2022 at 12:52:29AM -0800, Si-Wei Liu wrote: >> >> On 1/10/2022 11:34 PM, Eli Cohen wrote: >>> On Mon, Jan 10, 2022 at 05:00:34PM -0800, Si-Wei Liu wrote: >>>> On 1/9/2022 6:10 AM, Eli Cohen wrote: >>>>> On Thu, Jan 06, 2022 at 05:50:24PM -0800, Si-Wei Liu wrote: >>>>>> On 1/6/2022 5:27 PM, Si-Wei Liu wrote: >>>>>>> On 1/5/2022 3:46 AM, Eli Cohen wrote: >>>>>>>> Check whether the max number of data virtqueue pairs was provided when a >>>>>>>> adding a new device and verify the new value does not exceed device >>>>>>>> capabilities. >>>>>>>> >>>>>>>> In addition, change the arrays holding virtqueue and callback contexts >>>>>>>> to be dynamically allocated. >>>>>>>> >>>>>>>> Signed-off-by: Eli Cohen <elic at nvidia.com> >>>>>>>> --- >>>>>>>> v6 -> v7: >>>>>>>> 1. Evaluate RQT table size based on config.max_virtqueue_pairs. >>>>>>>> >>>>>>>> ? drivers/vdpa/mlx5/net/mlx5_vnet.c | 51 ++++++++++++++++++++++--------- >>>>>>>> ? 1 file changed, 37 insertions(+), 14 deletions(-) >>>>>>>> >>>>>>>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c >>>>>>>> b/drivers/vdpa/mlx5/net/mlx5_vnet.c >>>>>>>> index 4a2149f70f1e..d4720444bf78 100644 >>>>>>>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c >>>>>>>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c >>>>>>>> @@ -131,11 +131,6 @@ struct mlx5_vdpa_virtqueue { >>>>>>>> ????? struct mlx5_vq_restore_info ri; >>>>>>>> ? }; >>>>>>>> ? -/* We will remove this limitation once mlx5_vdpa_alloc_resources() >>>>>>>> - * provides for driver space allocation >>>>>>>> - */ >>>>>>>> -#define MLX5_MAX_SUPPORTED_VQS 16 >>>>>>>> - >>>>>>>> ? static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) >>>>>>>> ? { >>>>>>>> ????? if (unlikely(idx > mvdev->max_idx)) >>>>>>>> @@ -148,8 +143,8 @@ struct mlx5_vdpa_net { >>>>>>>> ????? struct mlx5_vdpa_dev mvdev; >>>>>>>> ????? struct mlx5_vdpa_net_resources res; >>>>>>>> ????? struct virtio_net_config config; >>>>>>>> -??? struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS]; >>>>>>>> -??? struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1]; >>>>>>>> +??? struct mlx5_vdpa_virtqueue *vqs; >>>>>>>> +??? struct vdpa_callback *event_cbs; >>>>>>>> ? ????? /* Serialize vq resources creation and destruction. This is >>>>>>>> required >>>>>>>> ?????? * since memory map might change and we need to destroy and create >>>>>>>> @@ -1218,7 +1213,7 @@ static void suspend_vqs(struct mlx5_vdpa_net >>>>>>>> *ndev) >>>>>>>> ? { >>>>>>>> ????? int i; >>>>>>>> ? -??? for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++) >>>>>>>> +??? for (i = 0; i < ndev->mvdev.max_vqs; i++) >>>>>>>> ????????? suspend_vq(ndev, &ndev->vqs[i]); >>>>>>>> ? } >>>>>>>> ? @@ -1244,8 +1239,14 @@ static int create_rqt(struct mlx5_vdpa_net >>>>>>>> *ndev) >>>>>>>> ????? void *in; >>>>>>>> ????? int i, j; >>>>>>>> ????? int err; >>>>>>>> +??? int num; >>>>>>>> ? -??? max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2, >>>>>>>> +??? if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) >>>>>>>> +??????? num = 1; >>>>>>>> +??? else >>>>>>>> +??????? num = le16_to_cpu(ndev->config.max_virtqueue_pairs); >>>>>>>> + >>>>>>>> +??? max_rqt = min_t(int, roundup_pow_of_two(num), >>>>>>>> ????????????? 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size)); >>>>>>>> ????? if (max_rqt < 1) >>>>>>>> ????????? return -EOPNOTSUPP; >>>>>>>> @@ -1262,7 +1263,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev) >>>>>>>> ????? MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt); >>>>>>>> ????? list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); >>>>>>>> ????? for (i = 0, j = 0; i < max_rqt; i++, j += 2) >>>>>>>> -??????? list[i] = cpu_to_be32(ndev->vqs[j % >>>>>>>> ndev->mvdev.max_vqs].virtq_id); >>>>>>>> +??????? list[i] = cpu_to_be32(ndev->vqs[j % (2 * num)].virtq_id); >>>>>>> Good catch. LGTM. >>>>>>> >>>>>>> Reviewed-by: Si-Wei Liu<si-wei.liu at oracle.com> >>>>>>> >>>>>> Apologies to reply to myself. It looks to me we need to set cur_num_vqs to >>>>>> the negotiated num of queues. Otherwise any site referencing cur_num_vqs >>>>>> won't work properly. >>>>> You can't really use cur_num_vqs. Consider this scenario: >>>>> create vdpa device with max VQs = 16 (RQT size created with 8 entries) >>>>> boot VM >>>>> ethtool modify num QPs to 2. cur_num_vqs now equals 2. >>>>> reboot VM. >>>> Once VM is rebooted, the cur_num_vqs has to reset to 0 in the reset() op. >>> Why reset to 0? The correct value to set here is >>> config->max_virtqueue_pairs. >> I am not sure I get you, perhaps post a patch to show what you want to do? >> If you intend to set to 2 (non-MQ) or 2 * max_virtqueue_pairs (MQ) later in >> create_rqt(), maybe it's not quite relevant whether or not to reset. But in >> the habit of keeping things consistent I'd prefer reset to a value to >> reflect the fact that this field won't have a valid value until features are >> negotiated. Whenever there's a need in future, this field can be easily >> exposed to userspace. >> >>> This is what happens when you add the >>> device. >> This is not relevant, and doesn't matter. Essentially that line of code to >> set cur_num_vqs in vdpa_add can be removed. The value of cur_num_vqs won't >> be effective before the MQ feature is negotiated, i.e. you don't get a >> sensible cur_num_vqs value before the size of RQT is derived from the MQ >> feature and the table gets created. >> >>> Setting cur_num_vqs to config->max_virtqueue_pairs will address >>> your concerns with respect to modify_rqt. Once you reset cur_num_vqs to >>> config->max_virtqueue_pairs your concerns with respect to changing >>> mumber of QPs should be addressed. >> No, it won't. If you look at the caller of change_num_qps(), in >> handle_ctrl_mq() there's following code: >> >> ??????????????? if (ndev->cur_num_vqs == 2 * newqps) { >> ??????????????????????? status = VIRTIO_NET_OK; >> ??????????????????????? break; >> ??????????????? } >> >> So if you set cur_num_vqs to max_virtqueue_pairs while MQ is not negotiated, >> it will result in a very weird success for a non-MQ supporting driver to be >> able to change the number of queues without changing anything effectively. >> You may argue that we can fail the non-MQ case early before coming to this >> code. But this is another patch, and would make code more obscure, not less. >> Intuitively people won't realize your cur_num_vqs doesn't apply to non-MQ >> just by follow the name. >> >> >> Regards, >> -Siwei >> > Si-Wei: > So your point is that you want to see cur_num_vqs 0 feature negotiated > and other 2 for non MQ, and whatever configured through ethtool > otherwise. > > I can send a patch to address this concern of yours.Thanks, Eli!> > Quesiton is how we want to proceed here. I do want this series to get > into 5.16. Should I send a new series or fixes on top of this?Yep, fixes on top should be fine. I already gave R-b on your fixes. This should pretty much close the review loop. Thanks for your hard work on this!> Also, it > would be helpful if you could list all the changes you think should be > made.There are a couple cosmetic changes, but doesn't matter anyway. I think your latest fixes address the major comments, and I can post patches myself to address the rest. Thanks! -Siwei> > Michael: what do you think? > >>> We could even leave create_rqt >>> untouched or modify the code to use cur_num_vqs. It should work the >>> same. >>> >>>>> create RQT with 1 entry only. >>>> Here cur_num_vqs will be loaded with the newly negotiated value (max_rqt) >>>> again. >>>> >>>>> ethtool modify num QPs to 4. modify RQT will fail since it was created >>>>> with max QPs equals 1. >>>> It won't fail as the cur_num_vqs will be consistent with the number of >>>> queues newly negotiated (i.e the max_rqt in create_rqt) for modify. >>>> >>>>> I think it is ok to create the RQT always with the value configured when >>>>> the device was created. >>>> The problem of not setting cur_num_vqs in create_rqt (and resetting it in >>>> mlx5_vdpa_reset) is that, once the VM is rebooted or the device is reset, >>>> the value doesn't go with the actual number of rqt entries hence would break >>>> various assumptions in change_num_qps() or modify_rqt(). For instances, >>>> >>>> create vdpa device with max data VQs = 16 >>>> boot VM >>>> features_ok set with MQ negotiated >>>> RQT created with 8 entries in create_rqt >>>> ethtool modify num QPs to 2. cur_num_vqs now equals 2. >>>> reboot VM >>>> features_ok set with MQ negotiated >>>> RQT created with 8 entries in create_rqt >>>> ethtool modify num QPs to 6. >>>> cur_num_vqs was 2 while the actual RQT size is 8 (= 16 / 2) >>>> change_num_qps would fail as the wrong increment branch (rather than the decrement) was taken >>>> >>>> and this case too, which calls out the need to validate the presence of >>>> VIRTIO_NET_F_MQ in handle_ctrl_mq() >>>> >>>> create vdpa device with max data VQs = 16 (RQT size created with 8 entries) >>>> boot VM >>>> features_ok set with MQ negotiated >>>> RQT created with 8 entries in create_rqt >>>> ethtool modify num QPs to 2. cur_num_vqs now equals 2. >>>> reboot VM >>>> features_ok set with no MQ negotiated >>>> RQT created with 8 entries in create_rqt >>>> ethtool modify num QPs to 6. suppose guest runs a custom kernel without checking the #channels to configure >>>> change_num_qps can succeed and no host side check prohibiting a single queue guest to set multi-queue >>>> >>>> Thanks, >>>> -Siwei >>>>>> Further, we need to validate VIRTIO_NET_F_MQ is present >>>>>> in handle_ctrl_mq() before changing the number of queue pairs. >>>>>> >>>>>> So just disregard my previous R-b for this patch. >>>>>> >>>>>> Thanks, >>>>>> -Siwei >>>>>> >>>>>> >>>>>>>> ? ????? MLX5_SET(rqtc, rqtc, rqt_actual_size, max_rqt); >>>>>>>> ????? err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, >>>>>>>> &ndev->res.rqtn); >>>>>>>> @@ -2220,7 +2221,7 @@ static int mlx5_vdpa_reset(struct vdpa_device >>>>>>>> *vdev) >>>>>>>> ????? clear_vqs_ready(ndev); >>>>>>>> ????? mlx5_vdpa_destroy_mr(&ndev->mvdev); >>>>>>>> ????? ndev->mvdev.status = 0; >>>>>>>> -??? memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs)); >>>>>>>> +??? memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * >>>>>>>> (mvdev->max_vqs + 1)); >>>>>>>> ????? ndev->mvdev.actual_features = 0; >>>>>>>> ????? ++mvdev->generation; >>>>>>>> ????? if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { >>>>>>>> @@ -2293,6 +2294,8 @@ static void mlx5_vdpa_free(struct vdpa_device >>>>>>>> *vdev) >>>>>>>> ????? } >>>>>>>> ????? mlx5_vdpa_free_resources(&ndev->mvdev); >>>>>>>> ????? mutex_destroy(&ndev->reslock); >>>>>>>> +??? kfree(ndev->event_cbs); >>>>>>>> +??? kfree(ndev->vqs); >>>>>>>> ? } >>>>>>>> ? ? static struct vdpa_notification_area >>>>>>>> mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx) >>>>>>>> @@ -2538,15 +2541,33 @@ static int mlx5_vdpa_dev_add(struct >>>>>>>> vdpa_mgmt_dev *v_mdev, const char *name, >>>>>>>> ????????? return -EOPNOTSUPP; >>>>>>>> ????? } >>>>>>>> ? -??? /* we save one virtqueue for control virtqueue should we >>>>>>>> require it */ >>>>>>>> ????? max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, >>>>>>>> max_num_virtio_queues); >>>>>>>> -??? max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS); >>>>>>>> +??? if (max_vqs < 2) { >>>>>>>> +??????? dev_warn(mdev->device, >>>>>>>> +???????????? "%d virtqueues are supported. At least 2 are required\n", >>>>>>>> +???????????? max_vqs); >>>>>>>> +??????? return -EAGAIN; >>>>>>>> +??? } >>>>>>>> + >>>>>>>> +??? if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) { >>>>>>>> +??????? if (add_config->net.max_vq_pairs > max_vqs / 2) >>>>>>>> +??????????? return -EINVAL; >>>>>>>> +??????? max_vqs = min_t(u32, max_vqs, 2 * >>>>>>>> add_config->net.max_vq_pairs); >>>>>>>> +??? } else { >>>>>>>> +??????? max_vqs = 2; >>>>>>>> +??? } >>>>>>>> ? ????? ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, >>>>>>>> mdev->device, &mlx5_vdpa_ops, >>>>>>>> ?????????????????? name, false); >>>>>>>> ????? if (IS_ERR(ndev)) >>>>>>>> ????????? return PTR_ERR(ndev); >>>>>>>> ? +??? ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL); >>>>>>>> +??? ndev->event_cbs = kcalloc(max_vqs + 1, >>>>>>>> sizeof(*ndev->event_cbs), GFP_KERNEL); >>>>>>>> +??? if (!ndev->vqs || !ndev->event_cbs) { >>>>>>>> +??????? err = -ENOMEM; >>>>>>>> +??????? goto err_alloc; >>>>>>>> +??? } >>>>>>>> ????? ndev->mvdev.max_vqs = max_vqs; >>>>>>>> ????? mvdev = &ndev->mvdev; >>>>>>>> ????? mvdev->mdev = mdev; >>>>>>>> @@ -2627,6 +2648,7 @@ static int mlx5_vdpa_dev_add(struct >>>>>>>> vdpa_mgmt_dev *v_mdev, const char *name, >>>>>>>> ????????? mlx5_mpfs_del_mac(pfmdev, config->mac); >>>>>>>> ? err_mtu: >>>>>>>> ????? mutex_destroy(&ndev->reslock); >>>>>>>> +err_alloc: >>>>>>>> ????? put_device(&mvdev->vdev.dev); >>>>>>>> ????? return err; >>>>>>>> ? } >>>>>>>> @@ -2669,7 +2691,8 @@ static int mlx5v_probe(struct auxiliary_device >>>>>>>> *adev, >>>>>>>> ????? mgtdev->mgtdev.ops = &mdev_ops; >>>>>>>> ????? mgtdev->mgtdev.device = mdev->device; >>>>>>>> ????? mgtdev->mgtdev.id_table = id_table; >>>>>>>> -??? mgtdev->mgtdev.config_attr_mask >>>>>>>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR); >>>>>>>> +??? mgtdev->mgtdev.config_attr_mask >>>>>>>> BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | >>>>>>>> +????????????????????? BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP); >>>>>>>> ????? mgtdev->madev = madev; >>>>>>>> ? ????? err = vdpa_mgmtdev_register(&mgtdev->mgtdev);