Jean-Philippe Brucker
2018-Nov-29 19:28 UTC
[REBASE PATCH net-next v9 1/4] net: vhost: lock the vqs one by one
Hi, On 25/09/2018 13:36, xiangxia.m.yue at gmail.com wrote:> From: Tonghao Zhang <xiangxia.m.yue at gmail.com> > > This patch changes the way that lock all vqs > at the same, to lock them one by one. It will > be used for next patch to avoid the deadlock. > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue at gmail.com> > Acked-by: Jason Wang <jasowang at redhat.com> > Signed-off-by: Jason Wang <jasowang at redhat.com> > --- > drivers/vhost/vhost.c | 24 +++++++----------------- > 1 file changed, 7 insertions(+), 17 deletions(-) > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index b13c6b4..f52008b 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -294,8 +294,11 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) > { > int i; > > - for (i = 0; i < d->nvqs; ++i) > + for (i = 0; i < d->nvqs; ++i) { > + mutex_lock(&d->vqs[i]->mutex); > __vhost_vq_meta_reset(d->vqs[i]); > + mutex_unlock(&d->vqs[i]->mutex); > + } > } > > static void vhost_vq_reset(struct vhost_dev *dev, > @@ -891,20 +894,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, > #define vhost_get_used(vq, x, ptr) \ > vhost_get_user(vq, x, ptr, VHOST_ADDR_USED) > > -static void vhost_dev_lock_vqs(struct vhost_dev *d) > -{ > - int i = 0; > - for (i = 0; i < d->nvqs; ++i) > - mutex_lock_nested(&d->vqs[i]->mutex, i); > -} > - > -static void vhost_dev_unlock_vqs(struct vhost_dev *d) > -{ > - int i = 0; > - for (i = 0; i < d->nvqs; ++i) > - mutex_unlock(&d->vqs[i]->mutex); > -} > - > static int vhost_new_umem_range(struct vhost_umem *umem, > u64 start, u64 size, u64 end, > u64 userspace_addr, int perm) > @@ -954,7 +943,10 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d, > if (msg->iova <= vq_msg->iova && > msg->iova + msg->size - 1 >= vq_msg->iova && > vq_msg->type == VHOST_IOTLB_MISS) { > + mutex_lock(&node->vq->mutex);This seems to introduce a deadlock (and sleep-in-atomic): the vq->mutex is taken while the IOTLB spinlock is held (taken earlier in vhost_iotlb_notify_vq()). On the vhost_iotlb_miss() path, the IOTLB spinlock is taken while the vq->mutex is held. I'm not sure how to fix it. Given that we're holding dev->mutex, that vq->poll only seems to be modified under dev->mutex, and assuming that vhost_poll_queue(vq->poll) can be called concurrently, is it safe to simply not take vq->mutex here? Thanks, Jean> vhost_poll_queue(&node->vq->poll); > + mutex_unlock(&node->vq->mutex); > + > list_del(&node->node); > kfree(node); > } > @@ -986,7 +978,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, > int ret = 0; > > mutex_lock(&dev->mutex); > - vhost_dev_lock_vqs(dev); > switch (msg->type) { > case VHOST_IOTLB_UPDATE: > if (!dev->iotlb) { > @@ -1020,7 +1011,6 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, > break; > } > > - vhost_dev_unlock_vqs(dev); > mutex_unlock(&dev->mutex); > > return ret; >
Jason Wang
2018-Nov-30 02:34 UTC
[REBASE PATCH net-next v9 1/4] net: vhost: lock the vqs one by one
On 2018/11/30 ??3:28, Jean-Philippe Brucker wrote:> Hi, > > On 25/09/2018 13:36,xiangxia.m.yue at gmail.com wrote: >> From: Tonghao Zhang<xiangxia.m.yue at gmail.com> >> >> This patch changes the way that lock all vqs >> at the same, to lock them one by one. It will >> be used for next patch to avoid the deadlock. >> >> Signed-off-by: Tonghao Zhang<xiangxia.m.yue at gmail.com> >> Acked-by: Jason Wang<jasowang at redhat.com> >> Signed-off-by: Jason Wang<jasowang at redhat.com> >> --- >> drivers/vhost/vhost.c | 24 +++++++----------------- >> 1 file changed, 7 insertions(+), 17 deletions(-) >> >> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c >> index b13c6b4..f52008b 100644 >> --- a/drivers/vhost/vhost.c >> +++ b/drivers/vhost/vhost.c >> @@ -294,8 +294,11 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) >> { >> int i; >> >> - for (i = 0; i < d->nvqs; ++i) >> + for (i = 0; i < d->nvqs; ++i) { >> + mutex_lock(&d->vqs[i]->mutex); >> __vhost_vq_meta_reset(d->vqs[i]); >> + mutex_unlock(&d->vqs[i]->mutex); >> + } >> } >> >> static void vhost_vq_reset(struct vhost_dev *dev, >> @@ -891,20 +894,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, >> #define vhost_get_used(vq, x, ptr) \ >> vhost_get_user(vq, x, ptr, VHOST_ADDR_USED) >> >> -static void vhost_dev_lock_vqs(struct vhost_dev *d) >> -{ >> - int i = 0; >> - for (i = 0; i < d->nvqs; ++i) >> - mutex_lock_nested(&d->vqs[i]->mutex, i); >> -} >> - >> -static void vhost_dev_unlock_vqs(struct vhost_dev *d) >> -{ >> - int i = 0; >> - for (i = 0; i < d->nvqs; ++i) >> - mutex_unlock(&d->vqs[i]->mutex); >> -} >> - >> static int vhost_new_umem_range(struct vhost_umem *umem, >> u64 start, u64 size, u64 end, >> u64 userspace_addr, int perm) >> @@ -954,7 +943,10 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d, >> if (msg->iova <= vq_msg->iova && >> msg->iova + msg->size - 1 >= vq_msg->iova && >> vq_msg->type == VHOST_IOTLB_MISS) { >> + mutex_lock(&node->vq->mutex); > This seems to introduce a deadlock (and sleep-in-atomic): the vq->mutex > is taken while the IOTLB spinlock is held (taken earlier in > vhost_iotlb_notify_vq()). On the vhost_iotlb_miss() path, the IOTLB > spinlock is taken while the vq->mutex is held.Good catch.> I'm not sure how to fix it. Given that we're holding dev->mutex, that > vq->poll only seems to be modified under dev->mutex, and assuming that > vhost_poll_queue(vq->poll) can be called concurrently, is it safe to > simply not take vq->mutex here?Yes, I think it can be removed here. Want to post a patch for this? Thanks> Thanks, > Jean > >
Jean-Philippe Brucker
2018-Nov-30 10:28 UTC
[REBASE PATCH net-next v9 1/4] net: vhost: lock the vqs one by one
On 30/11/2018 02:34, Jason Wang wrote:> > On 2018/11/30 ??3:28, Jean-Philippe Brucker wrote: >> Hi, >> >> On 25/09/2018 13:36,xiangxia.m.yue at gmail.com wrote: >>> From: Tonghao Zhang<xiangxia.m.yue at gmail.com> >>> >>> This patch changes the way that lock all vqs >>> at the same, to lock them one by one. It will >>> be used for next patch to avoid the deadlock. >>> >>> Signed-off-by: Tonghao Zhang<xiangxia.m.yue at gmail.com> >>> Acked-by: Jason Wang<jasowang at redhat.com> >>> Signed-off-by: Jason Wang<jasowang at redhat.com> >>> --- >>> drivers/vhost/vhost.c | 24 +++++++----------------- >>> 1 file changed, 7 insertions(+), 17 deletions(-) >>> >>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c >>> index b13c6b4..f52008b 100644 >>> --- a/drivers/vhost/vhost.c >>> +++ b/drivers/vhost/vhost.c >>> @@ -294,8 +294,11 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) >>> { >>> int i; >>> >>> - for (i = 0; i < d->nvqs; ++i) >>> + for (i = 0; i < d->nvqs; ++i) { >>> + mutex_lock(&d->vqs[i]->mutex); >>> __vhost_vq_meta_reset(d->vqs[i]); >>> + mutex_unlock(&d->vqs[i]->mutex); >>> + } >>> } >>> >>> static void vhost_vq_reset(struct vhost_dev *dev, >>> @@ -891,20 +894,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, >>> #define vhost_get_used(vq, x, ptr) \ >>> vhost_get_user(vq, x, ptr, VHOST_ADDR_USED) >>> >>> -static void vhost_dev_lock_vqs(struct vhost_dev *d) >>> -{ >>> - int i = 0; >>> - for (i = 0; i < d->nvqs; ++i) >>> - mutex_lock_nested(&d->vqs[i]->mutex, i); >>> -} >>> - >>> -static void vhost_dev_unlock_vqs(struct vhost_dev *d) >>> -{ >>> - int i = 0; >>> - for (i = 0; i < d->nvqs; ++i) >>> - mutex_unlock(&d->vqs[i]->mutex); >>> -} >>> - >>> static int vhost_new_umem_range(struct vhost_umem *umem, >>> u64 start, u64 size, u64 end, >>> u64 userspace_addr, int perm) >>> @@ -954,7 +943,10 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d, >>> if (msg->iova <= vq_msg->iova && >>> msg->iova + msg->size - 1 >= vq_msg->iova && >>> vq_msg->type == VHOST_IOTLB_MISS) { >>> + mutex_lock(&node->vq->mutex); >> This seems to introduce a deadlock (and sleep-in-atomic): the vq->mutex >> is taken while the IOTLB spinlock is held (taken earlier in >> vhost_iotlb_notify_vq()). On the vhost_iotlb_miss() path, the IOTLB >> spinlock is taken while the vq->mutex is held. > > > Good catch. > > >> I'm not sure how to fix it. Given that we're holding dev->mutex, that >> vq->poll only seems to be modified under dev->mutex, and assuming that >> vhost_poll_queue(vq->poll) can be called concurrently, is it safe to >> simply not take vq->mutex here? > > > Yes, I think it can be removed here. > > Want to post a patch for this?Yes, I'll post it shortly Thanks, Jean
Possibly Parallel Threads
- [REBASE PATCH net-next v9 1/4] net: vhost: lock the vqs one by one
- [REBASE PATCH net-next v9 1/4] net: vhost: lock the vqs one by one
- [REBASE PATCH net-next v9 1/4] net: vhost: lock the vqs one by one
- [PATCH net-next v6 1/4] net: vhost: lock the vqs one by one
- [PATCH net-next v6 1/4] net: vhost: lock the vqs one by one