Si-Wei Liu
2021-Feb-10 02:30 UTC
[PATCH v1] vdpa/mlx5: Restore the hardware used index after change map
On 2/8/2021 10:37 PM, Jason Wang wrote:> > On 2021/2/9 ??2:12, Eli Cohen wrote: >> On Tue, Feb 09, 2021 at 11:20:14AM +0800, Jason Wang wrote: >>> On 2021/2/8 ??6:04, Eli Cohen wrote: >>>> On Mon, Feb 08, 2021 at 05:04:27PM +0800, Jason Wang wrote: >>>>> On 2021/2/8 ??2:37, Eli Cohen wrote: >>>>>> On Mon, Feb 08, 2021 at 12:27:18PM +0800, Jason Wang wrote: >>>>>>> On 2021/2/6 ??7:07, Si-Wei Liu wrote: >>>>>>>> On 2/3/2021 11:36 PM, Eli Cohen wrote: >>>>>>>>> When a change of memory map occurs, the hardware resources are >>>>>>>>> destroyed >>>>>>>>> and then re-created again with the new memory map. In such >>>>>>>>> case, we need >>>>>>>>> to restore the hardware available and used indices. The driver >>>>>>>>> failed to >>>>>>>>> restore the used index which is added here. >>>>>>>>> >>>>>>>>> Also, since the driver also fails to reset the available and used >>>>>>>>> indices upon device reset, fix this here to avoid regression >>>>>>>>> caused by >>>>>>>>> the fact that used index may not be zero upon device reset. >>>>>>>>> >>>>>>>>> Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported >>>>>>>>> mlx5 >>>>>>>>> devices") >>>>>>>>> Signed-off-by: Eli Cohen<elic at nvidia.com> >>>>>>>>> --- >>>>>>>>> v0 -> v1: >>>>>>>>> Clear indices upon device reset >>>>>>>>> >>>>>>>>> ?? ? drivers/vdpa/mlx5/net/mlx5_vnet.c | 18 ++++++++++++++++++ >>>>>>>>> ?? ? 1 file changed, 18 insertions(+) >>>>>>>>> >>>>>>>>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c >>>>>>>>> b/drivers/vdpa/mlx5/net/mlx5_vnet.c >>>>>>>>> index 88dde3455bfd..b5fe6d2ad22f 100644 >>>>>>>>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c >>>>>>>>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c >>>>>>>>> @@ -87,6 +87,7 @@ struct mlx5_vq_restore_info { >>>>>>>>> ?? ????? u64 device_addr; >>>>>>>>> ?? ????? u64 driver_addr; >>>>>>>>> ?? ????? u16 avail_index; >>>>>>>>> +??? u16 used_index; >>>>>>>>> ?? ????? bool ready; >>>>>>>>> ?? ????? struct vdpa_callback cb; >>>>>>>>> ?? ????? bool restore; >>>>>>>>> @@ -121,6 +122,7 @@ struct mlx5_vdpa_virtqueue { >>>>>>>>> ?? ????? u32 virtq_id; >>>>>>>>> ?? ????? struct mlx5_vdpa_net *ndev; >>>>>>>>> ?? ????? u16 avail_idx; >>>>>>>>> +??? u16 used_idx; >>>>>>>>> ?? ????? int fw_state; >>>>>>>>> ?? ? ????? /* keep last in the struct */ >>>>>>>>> @@ -804,6 +806,7 @@ static int create_virtqueue(struct >>>>>>>>> mlx5_vdpa_net >>>>>>>>> *ndev, struct mlx5_vdpa_virtque >>>>>>>>> ?? ? ????? obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, >>>>>>>>> obj_context); >>>>>>>>> ?? ????? MLX5_SET(virtio_net_q_object, obj_context, >>>>>>>>> hw_available_index, >>>>>>>>> mvq->avail_idx); >>>>>>>>> +??? MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, >>>>>>>>> mvq->used_idx); >>>>>>>>> ?? ????? MLX5_SET(virtio_net_q_object, obj_context, >>>>>>>>> queue_feature_bit_mask_12_3, >>>>>>>>> get_features_12_3(ndev->mvdev.actual_features)); >>>>>>>>> ?? ????? vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, >>>>>>>>> virtio_q_context); >>>>>>>>> @@ -1022,6 +1025,7 @@ static int connect_qps(struct mlx5_vdpa_net >>>>>>>>> *ndev, struct mlx5_vdpa_virtqueue *m >>>>>>>>> ?? ? struct mlx5_virtq_attr { >>>>>>>>> ?? ????? u8 state; >>>>>>>>> ?? ????? u16 available_index; >>>>>>>>> +??? u16 used_index; >>>>>>>>> ?? ? }; >>>>>>>>> ?? ? ? static int query_virtqueue(struct mlx5_vdpa_net *ndev, >>>>>>>>> struct >>>>>>>>> mlx5_vdpa_virtqueue *mvq, >>>>>>>>> @@ -1052,6 +1056,7 @@ static int query_virtqueue(struct >>>>>>>>> mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu >>>>>>>>> ?? ????? memset(attr, 0, sizeof(*attr)); >>>>>>>>> ?? ????? attr->state = MLX5_GET(virtio_net_q_object, >>>>>>>>> obj_context, state); >>>>>>>>> ?? ????? attr->available_index = MLX5_GET(virtio_net_q_object, >>>>>>>>> obj_context, hw_available_index); >>>>>>>>> +??? attr->used_index = MLX5_GET(virtio_net_q_object, >>>>>>>>> obj_context, >>>>>>>>> hw_used_index); >>>>>>>>> ?? ????? kfree(out); >>>>>>>>> ?? ????? return 0; >>>>>>>>> ?? ? @@ -1535,6 +1540,16 @@ static void >>>>>>>>> teardown_virtqueues(struct >>>>>>>>> mlx5_vdpa_net *ndev) >>>>>>>>> ?? ????? } >>>>>>>>> ?? ? } >>>>>>>>> ?? ? +static void clear_virtqueues(struct mlx5_vdpa_net *ndev) >>>>>>>>> +{ >>>>>>>>> +??? int i; >>>>>>>>> + >>>>>>>>> +??? for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) { >>>>>>>>> +??????? ndev->vqs[i].avail_idx = 0; >>>>>>>>> +??????? ndev->vqs[i].used_idx = 0; >>>>>>>>> +??? } >>>>>>>>> +} >>>>>>>>> + >>>>>>>>> ?? ? /* TODO: cross-endian support */ >>>>>>>>> ?? ? static inline bool mlx5_vdpa_is_little_endian(struct >>>>>>>>> mlx5_vdpa_dev >>>>>>>>> *mvdev) >>>>>>>>> ?? ? { >>>>>>>>> @@ -1610,6 +1625,7 @@ static int save_channel_info(struct >>>>>>>>> mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu >>>>>>>>> ?? ????????? return err; >>>>>>>>> ?? ? ????? ri->avail_index = attr.available_index; >>>>>>>>> +??? ri->used_index = attr.used_index; >>>>>>>>> ?? ????? ri->ready = mvq->ready; >>>>>>>>> ?? ????? ri->num_ent = mvq->num_ent; >>>>>>>>> ?? ????? ri->desc_addr = mvq->desc_addr; >>>>>>>>> @@ -1654,6 +1670,7 @@ static void restore_channels_info(struct >>>>>>>>> mlx5_vdpa_net *ndev) >>>>>>>>> ?? ????????????? continue; >>>>>>>>> ?? ? ????????? mvq->avail_idx = ri->avail_index; >>>>>>>>> +??????? mvq->used_idx = ri->used_index; >>>>>>>>> ?? ????????? mvq->ready = ri->ready; >>>>>>>>> ?? ????????? mvq->num_ent = ri->num_ent; >>>>>>>>> ?? ????????? mvq->desc_addr = ri->desc_addr; >>>>>>>>> @@ -1768,6 +1785,7 @@ static void mlx5_vdpa_set_status(struct >>>>>>>>> vdpa_device *vdev, u8 status) >>>>>>>>> ?? ????? if (!status) { >>>>>>>>> ?? ????????? mlx5_vdpa_info(mvdev, "performing device reset\n"); >>>>>>>>> ?? ????????? teardown_driver(ndev); >>>>>>>>> +??????? clear_virtqueues(ndev); >>>>>>>> The clearing looks fine at the first glance, as it aligns with >>>>>>>> the other >>>>>>>> state cleanups floating around at the same place. However, the >>>>>>>> thing is >>>>>>>> get_vq_state() is supposed to be called right after to get >>>>>>>> sync'ed with >>>>>>>> the latest internal avail_index from device while vq is >>>>>>>> stopped. The >>>>>>>> index was saved in the driver software at vq suspension, but >>>>>>>> before the >>>>>>>> virtq object is destroyed. We shouldn't clear the avail_index >>>>>>>> too early. >>>>>>> Good point. >>>>>>> >>>>>>> There's a limitation on the virtio spec and vDPA framework that >>>>>>> we can not >>>>>>> simply differ device suspending from device reset. >>>>>>> >>>>>> Are you talking about live migration where you reset the device but >>>>>> still want to know how far it progressed in order to continue >>>>>> from the >>>>>> same place in the new VM? >>>>> Yes. So if we want to support live migration at we need: >>>>> >>>>> in src node: >>>>> 1) suspend the device >>>>> 2) get last_avail_idx via get_vq_state() >>>>> >>>>> in the dst node: >>>>> 3) set last_avail_idx via set_vq_state() >>>>> 4) resume the device >>>>> >>>>> So you can see, step 2 requires the device/driver not to forget the >>>>> last_avail_idx. >>>>> >>>> Just to be sure, what really matters here is the used index. >>>> Becuase the >>>> vriqtueue itself is copied from the src VM to the dest VM. The >>>> available >>>> index is alreay there and we know the hardware reads it from there. >>> >>> So for "last_avail_idx" I meant the hardware internal avail index. >>> It's not >>> stored in the virtqueue so we must migrate it from src to dest and >>> set them >>> through set_vq_state(). Then in the destination, the virtqueue can be >>> restarted from that index. >>> >> Consider this case: driver posted buffers till avail index becomes the >> value 50. Hardware is executing but made it till 20 when virtqueue was >> suspended due to live migration - this is indicated by hardware used >> index equal 20. > > > So in this case the used index in the virtqueue should be 20? > Otherwise we need not sync used index itself but all the used entries > that is not committed to the used ring.In other word, for mlx5 vdpa there's no such internal last_avail_idx stuff maintained by the hardware, right? And the used_idx in the virtqueue is always in sync with the hardware used_index, and hardware is supposed to commit pending used buffers to the ring while bumping up the hardware used_index (and also committed to memory) altogether prior to suspension, is my understanding correct here? Double checking if this is the expected semantics of what modify_virtqueue(MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND) should achieve. If the above is true, then it looks to me for mlx5 vdpa we should really return h/w used_idx rather than the last_avail_idx through get_vq_state(), in order to reconstruct the virt queue state post live migration. For the set_map case, the internal last_avail_idx really doesn't matter, although both indices are saved and restored transparently as-is. -Siwei> > >> Now the vritqueue is copied to the new VM and the >> hardware now has to continue execution from index 20. We need to tell >> the hardware via configuring the last used_index. > > > If the hardware can not sync the index from the virtqueue, the driver > can do the synchronization by make the last_used_idx equals to used > index in the virtqueue. > > Thanks > > >> ? So why don't we >> restore the used index? >> >>>> So it puzzles me why is set_vq_state() we do not communicate the saved >>>> used index. >>> >>> We don't do that since: >>> >>> 1) if the hardware can sync its internal used index from the virtqueue >>> during device, then we don't need it >>> 2) if the hardware can not sync its internal used index, the driver >>> (e.g as >>> you did here) can do that. >>> >>> But there's no way for the hardware to deduce the internal avail >>> index from >>> the virtqueue, that's why avail index is sycned. >>> >>> Thanks >>> >>> >
Jason Wang
2021-Feb-10 03:53 UTC
[PATCH v1] vdpa/mlx5: Restore the hardware used index after change map
On 2021/2/10 ??10:30, Si-Wei Liu wrote:> > > On 2/8/2021 10:37 PM, Jason Wang wrote: >> >> On 2021/2/9 ??2:12, Eli Cohen wrote: >>> On Tue, Feb 09, 2021 at 11:20:14AM +0800, Jason Wang wrote: >>>> On 2021/2/8 ??6:04, Eli Cohen wrote: >>>>> On Mon, Feb 08, 2021 at 05:04:27PM +0800, Jason Wang wrote: >>>>>> On 2021/2/8 ??2:37, Eli Cohen wrote: >>>>>>> On Mon, Feb 08, 2021 at 12:27:18PM +0800, Jason Wang wrote: >>>>>>>> On 2021/2/6 ??7:07, Si-Wei Liu wrote: >>>>>>>>> On 2/3/2021 11:36 PM, Eli Cohen wrote: >>>>>>>>>> When a change of memory map occurs, the hardware resources >>>>>>>>>> are destroyed >>>>>>>>>> and then re-created again with the new memory map. In such >>>>>>>>>> case, we need >>>>>>>>>> to restore the hardware available and used indices. The >>>>>>>>>> driver failed to >>>>>>>>>> restore the used index which is added here. >>>>>>>>>> >>>>>>>>>> Also, since the driver also fails to reset the available and >>>>>>>>>> used >>>>>>>>>> indices upon device reset, fix this here to avoid regression >>>>>>>>>> caused by >>>>>>>>>> the fact that used index may not be zero upon device reset. >>>>>>>>>> >>>>>>>>>> Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for >>>>>>>>>> supported mlx5 >>>>>>>>>> devices") >>>>>>>>>> Signed-off-by: Eli Cohen<elic at nvidia.com> >>>>>>>>>> --- >>>>>>>>>> v0 -> v1: >>>>>>>>>> Clear indices upon device reset >>>>>>>>>> >>>>>>>>>> ?? ? drivers/vdpa/mlx5/net/mlx5_vnet.c | 18 ++++++++++++++++++ >>>>>>>>>> ?? ? 1 file changed, 18 insertions(+) >>>>>>>>>> >>>>>>>>>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c >>>>>>>>>> b/drivers/vdpa/mlx5/net/mlx5_vnet.c >>>>>>>>>> index 88dde3455bfd..b5fe6d2ad22f 100644 >>>>>>>>>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c >>>>>>>>>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c >>>>>>>>>> @@ -87,6 +87,7 @@ struct mlx5_vq_restore_info { >>>>>>>>>> ?? ????? u64 device_addr; >>>>>>>>>> ?? ????? u64 driver_addr; >>>>>>>>>> ?? ????? u16 avail_index; >>>>>>>>>> +??? u16 used_index; >>>>>>>>>> ?? ????? bool ready; >>>>>>>>>> ?? ????? struct vdpa_callback cb; >>>>>>>>>> ?? ????? bool restore; >>>>>>>>>> @@ -121,6 +122,7 @@ struct mlx5_vdpa_virtqueue { >>>>>>>>>> ?? ????? u32 virtq_id; >>>>>>>>>> ?? ????? struct mlx5_vdpa_net *ndev; >>>>>>>>>> ?? ????? u16 avail_idx; >>>>>>>>>> +??? u16 used_idx; >>>>>>>>>> ?? ????? int fw_state; >>>>>>>>>> ?? ? ????? /* keep last in the struct */ >>>>>>>>>> @@ -804,6 +806,7 @@ static int create_virtqueue(struct >>>>>>>>>> mlx5_vdpa_net >>>>>>>>>> *ndev, struct mlx5_vdpa_virtque >>>>>>>>>> ?? ? ????? obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, >>>>>>>>>> in, >>>>>>>>>> obj_context); >>>>>>>>>> ?? ????? MLX5_SET(virtio_net_q_object, obj_context, >>>>>>>>>> hw_available_index, >>>>>>>>>> mvq->avail_idx); >>>>>>>>>> +??? MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, >>>>>>>>>> mvq->used_idx); >>>>>>>>>> ?? ????? MLX5_SET(virtio_net_q_object, obj_context, >>>>>>>>>> queue_feature_bit_mask_12_3, >>>>>>>>>> get_features_12_3(ndev->mvdev.actual_features)); >>>>>>>>>> ?? ????? vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, >>>>>>>>>> virtio_q_context); >>>>>>>>>> @@ -1022,6 +1025,7 @@ static int connect_qps(struct >>>>>>>>>> mlx5_vdpa_net >>>>>>>>>> *ndev, struct mlx5_vdpa_virtqueue *m >>>>>>>>>> ?? ? struct mlx5_virtq_attr { >>>>>>>>>> ?? ????? u8 state; >>>>>>>>>> ?? ????? u16 available_index; >>>>>>>>>> +??? u16 used_index; >>>>>>>>>> ?? ? }; >>>>>>>>>> ?? ? ? static int query_virtqueue(struct mlx5_vdpa_net *ndev, >>>>>>>>>> struct >>>>>>>>>> mlx5_vdpa_virtqueue *mvq, >>>>>>>>>> @@ -1052,6 +1056,7 @@ static int query_virtqueue(struct >>>>>>>>>> mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu >>>>>>>>>> ?? ????? memset(attr, 0, sizeof(*attr)); >>>>>>>>>> ?? ????? attr->state = MLX5_GET(virtio_net_q_object, >>>>>>>>>> obj_context, state); >>>>>>>>>> ?? ????? attr->available_index = MLX5_GET(virtio_net_q_object, >>>>>>>>>> obj_context, hw_available_index); >>>>>>>>>> +??? attr->used_index = MLX5_GET(virtio_net_q_object, >>>>>>>>>> obj_context, >>>>>>>>>> hw_used_index); >>>>>>>>>> ?? ????? kfree(out); >>>>>>>>>> ?? ????? return 0; >>>>>>>>>> ?? ? @@ -1535,6 +1540,16 @@ static void >>>>>>>>>> teardown_virtqueues(struct >>>>>>>>>> mlx5_vdpa_net *ndev) >>>>>>>>>> ?? ????? } >>>>>>>>>> ?? ? } >>>>>>>>>> ?? ? +static void clear_virtqueues(struct mlx5_vdpa_net *ndev) >>>>>>>>>> +{ >>>>>>>>>> +??? int i; >>>>>>>>>> + >>>>>>>>>> +??? for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) { >>>>>>>>>> +??????? ndev->vqs[i].avail_idx = 0; >>>>>>>>>> +??????? ndev->vqs[i].used_idx = 0; >>>>>>>>>> +??? } >>>>>>>>>> +} >>>>>>>>>> + >>>>>>>>>> ?? ? /* TODO: cross-endian support */ >>>>>>>>>> ?? ? static inline bool mlx5_vdpa_is_little_endian(struct >>>>>>>>>> mlx5_vdpa_dev >>>>>>>>>> *mvdev) >>>>>>>>>> ?? ? { >>>>>>>>>> @@ -1610,6 +1625,7 @@ static int save_channel_info(struct >>>>>>>>>> mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu >>>>>>>>>> ?? ????????? return err; >>>>>>>>>> ?? ? ????? ri->avail_index = attr.available_index; >>>>>>>>>> +??? ri->used_index = attr.used_index; >>>>>>>>>> ?? ????? ri->ready = mvq->ready; >>>>>>>>>> ?? ????? ri->num_ent = mvq->num_ent; >>>>>>>>>> ?? ????? ri->desc_addr = mvq->desc_addr; >>>>>>>>>> @@ -1654,6 +1670,7 @@ static void restore_channels_info(struct >>>>>>>>>> mlx5_vdpa_net *ndev) >>>>>>>>>> ?? ????????????? continue; >>>>>>>>>> ?? ? ????????? mvq->avail_idx = ri->avail_index; >>>>>>>>>> +??????? mvq->used_idx = ri->used_index; >>>>>>>>>> ?? ????????? mvq->ready = ri->ready; >>>>>>>>>> ?? ????????? mvq->num_ent = ri->num_ent; >>>>>>>>>> ?? ????????? mvq->desc_addr = ri->desc_addr; >>>>>>>>>> @@ -1768,6 +1785,7 @@ static void mlx5_vdpa_set_status(struct >>>>>>>>>> vdpa_device *vdev, u8 status) >>>>>>>>>> ?? ????? if (!status) { >>>>>>>>>> ?? ????????? mlx5_vdpa_info(mvdev, "performing device reset\n"); >>>>>>>>>> ?? ????????? teardown_driver(ndev); >>>>>>>>>> +??????? clear_virtqueues(ndev); >>>>>>>>> The clearing looks fine at the first glance, as it aligns with >>>>>>>>> the other >>>>>>>>> state cleanups floating around at the same place. However, the >>>>>>>>> thing is >>>>>>>>> get_vq_state() is supposed to be called right after to get >>>>>>>>> sync'ed with >>>>>>>>> the latest internal avail_index from device while vq is >>>>>>>>> stopped. The >>>>>>>>> index was saved in the driver software at vq suspension, but >>>>>>>>> before the >>>>>>>>> virtq object is destroyed. We shouldn't clear the avail_index >>>>>>>>> too early. >>>>>>>> Good point. >>>>>>>> >>>>>>>> There's a limitation on the virtio spec and vDPA framework that >>>>>>>> we can not >>>>>>>> simply differ device suspending from device reset. >>>>>>>> >>>>>>> Are you talking about live migration where you reset the device but >>>>>>> still want to know how far it progressed in order to continue >>>>>>> from the >>>>>>> same place in the new VM? >>>>>> Yes. So if we want to support live migration at we need: >>>>>> >>>>>> in src node: >>>>>> 1) suspend the device >>>>>> 2) get last_avail_idx via get_vq_state() >>>>>> >>>>>> in the dst node: >>>>>> 3) set last_avail_idx via set_vq_state() >>>>>> 4) resume the device >>>>>> >>>>>> So you can see, step 2 requires the device/driver not to forget the >>>>>> last_avail_idx. >>>>>> >>>>> Just to be sure, what really matters here is the used index. >>>>> Becuase the >>>>> vriqtueue itself is copied from the src VM to the dest VM. The >>>>> available >>>>> index is alreay there and we know the hardware reads it from there. >>>> >>>> So for "last_avail_idx" I meant the hardware internal avail index. >>>> It's not >>>> stored in the virtqueue so we must migrate it from src to dest and >>>> set them >>>> through set_vq_state(). Then in the destination, the virtqueue can be >>>> restarted from that index. >>>> >>> Consider this case: driver posted buffers till avail index becomes the >>> value 50. Hardware is executing but made it till 20 when virtqueue was >>> suspended due to live migration - this is indicated by hardware used >>> index equal 20. >> >> >> So in this case the used index in the virtqueue should be 20? >> Otherwise we need not sync used index itself but all the used entries >> that is not committed to the used ring. > > In other word, for mlx5 vdpa there's no such internal last_avail_idx > stuff maintained by the hardware, right?For each device it should have one otherwise it won't work correctly during stop/resume. See the codes mlx5_vdpa_get_vq_state() which calls query_virtqueue() that build commands to query "last_avail_idx" from the hardware: ??? MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); ??? MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); ??? MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); ??? MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); ??? err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); ??? if (err) ??? ??? goto err_cmd; ??? obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); ??? memset(attr, 0, sizeof(*attr)); ??? attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); ??? attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);> And the used_idx in the virtqueue is always in sync with the hardware > used_index, and hardware is supposed to commit pending used buffers to > the ring while bumping up the hardware used_index (and also committed > to memory) altogether prior to suspension, is my understanding correct > here? Double checking if this is the expected semantics of what > modify_virtqueue(MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND) should achieve. > > If the above is true, then it looks to me for mlx5 vdpa we should > really return h/w used_idx rather than the last_avail_idx through > get_vq_state(), in order to reconstruct the virt queue state post live > migration. For the set_map case, the internal last_avail_idx really > doesn't matter, although both indices are saved and restored > transparently as-is.Right, a subtle thing here is that: for the device that might have can't not complete all virtqueue requests during vq suspending, the "last_avail_idx" might not be equal to the hardware used_idx. Thing might be true for the storage devices that needs to connect to a remote backend. But this is not the case of networking device, so last_avail_idx should be equal to hardware used_idx here. But using the "last_avail_idx" or hardware avail_idx should always be better in this case since it's guaranteed to correct and will have less confusion. We use this convention in other types of vhost backends (vhost-kernel, vhost-user). So looking at mlx5_set_vq_state(), it probably won't work since it doesn't not set either hardware avail_idx or hardware used_idx: static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, ??? ??? ??? ??? ? const struct vdpa_vq_state *state) { ??? struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); ??? struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); ??? struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; ??? if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) { ??? ??? mlx5_vdpa_warn(mvdev, "can't modify available index\n"); ??? ??? return -EINVAL; ??? } ??? mvq->avail_idx = state->avail_index; ??? return 0; } Depends on the hardware, we should either set hardware used_idx or hardware avail_idx here. I think we need to clarify how device is supposed to work in the virtio spec. Thanks> > -Siwei > >> >> >>> Now the vritqueue is copied to the new VM and the >>> hardware now has to continue execution from index 20. We need to tell >>> the hardware via configuring the last used_index. >> >> >> If the hardware can not sync the index from the virtqueue, the driver >> can do the synchronization by make the last_used_idx equals to used >> index in the virtqueue. >> >> Thanks >> >> >>> ? So why don't we >>> restore the used index? >>> >>>>> So it puzzles me why is set_vq_state() we do not communicate the >>>>> saved >>>>> used index. >>>> >>>> We don't do that since: >>>> >>>> 1) if the hardware can sync its internal used index from the virtqueue >>>> during device, then we don't need it >>>> 2) if the hardware can not sync its internal used index, the driver >>>> (e.g as >>>> you did here) can do that. >>>> >>>> But there's no way for the hardware to deduce the internal avail >>>> index from >>>> the virtqueue, that's why avail index is sycned. >>>> >>>> Thanks >>>> >>>> >> >