Stefano Stabellini
2010-Jul-09 14:32 UTC
[Xen-devel] [PATCH] gntdev: switch back to rwlocks
Hi Jeremy, this patch switches back from spinlocks to rwlocks in gntdev, solving the locking issue that was preventing stubdoms from working. In particular the problem is that gntdev_mmap calls apply_to_page_range after acquiring the spinlock. apply_to_page_range causes the mmu notifiers to be called, so mn_invl_range_start will be called that will try to acquire again the same spinlock. Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> --- diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index a33e443..0a9a68c 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -51,7 +51,7 @@ struct gntdev_priv { struct list_head maps; uint32_t used; uint32_t limit; - spinlock_t lock; + rwlock_t rwlock; struct mm_struct *mm; struct mmu_notifier mn; }; @@ -289,7 +289,7 @@ static void mn_invl_range_start(struct mmu_notifier *mn, unsigned long mstart, mend; int err; - spin_lock(&priv->lock); + read_lock(&priv->rwlock); list_for_each_entry(map, &priv->maps, next) { if (!map->vma) continue; @@ -311,7 +311,7 @@ static void mn_invl_range_start(struct mmu_notifier *mn, (mend - mstart) >> PAGE_SHIFT); WARN_ON(err); } - spin_unlock(&priv->lock); + read_unlock(&priv->rwlock); } static void mn_invl_page(struct mmu_notifier *mn, @@ -328,7 +328,7 @@ static void mn_release(struct mmu_notifier *mn, struct grant_map *map; int err; - spin_lock(&priv->lock); + read_lock(&priv->rwlock); list_for_each_entry(map, &priv->maps, next) { if (!map->vma) continue; @@ -339,7 +339,7 @@ static void mn_release(struct mmu_notifier *mn, err = unmap_grant_pages(map, 0, map->count); WARN_ON(err); } - spin_unlock(&priv->lock); + read_unlock(&priv->rwlock); } struct mmu_notifier_ops gntdev_mmu_ops = { @@ -359,7 +359,7 @@ static int gntdev_open(struct inode *inode, struct file *flip) return -ENOMEM; INIT_LIST_HEAD(&priv->maps); - spin_lock_init(&priv->lock); + priv->rwlock = RW_LOCK_UNLOCKED; priv->limit = limit; priv->mm = get_task_mm(current); @@ -387,7 +387,7 @@ static int gntdev_release(struct inode *inode, struct file *flip) if (debug) printk("%s: priv %p\n", __FUNCTION__, priv); - spin_lock(&priv->lock); + write_lock(&priv->rwlock); while (!list_empty(&priv->maps)) { map = list_entry(priv->maps.next, struct grant_map, next); err = gntdev_del_map(map); @@ -395,7 +395,7 @@ static int gntdev_release(struct inode *inode, struct file *flip) gntdev_free_map(map); } - spin_unlock(&priv->lock); + write_unlock(&priv->rwlock); mmu_notifier_unregister(&priv->mn, priv->mm); kfree(priv); @@ -429,15 +429,15 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, return err; } - spin_lock(&priv->lock); + write_lock(&priv->rwlock); gntdev_add_map(priv, map); op.index = map->index << PAGE_SHIFT; - spin_unlock(&priv->lock); + write_unlock(&priv->rwlock); if (copy_to_user(u, &op, sizeof(op)) != 0) { - spin_lock(&priv->lock); + write_lock(&priv->rwlock); gntdev_del_map(map); - spin_unlock(&priv->lock); + write_unlock(&priv->rwlock); gntdev_free_map(map); return err; } @@ -457,11 +457,11 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, printk("%s: priv %p, del %d+%d\n", __FUNCTION__, priv, (int)op.index, (int)op.count); - spin_lock(&priv->lock); + write_lock(&priv->rwlock); map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); if (map) err = gntdev_del_map(map); - spin_unlock(&priv->lock); + write_unlock(&priv->rwlock); if (!err) gntdev_free_map(map); return err; @@ -479,16 +479,16 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv, printk("%s: priv %p, offset for vaddr %lx\n", __FUNCTION__, priv, (unsigned long)op.vaddr); - spin_lock(&priv->lock); + read_lock(&priv->rwlock); map = gntdev_find_map_vaddr(priv, op.vaddr); if (map == NULL || map->vma->vm_start != op.vaddr) { - spin_unlock(&priv->lock); + read_unlock(&priv->rwlock); return -EINVAL; } op.offset = map->index << PAGE_SHIFT; op.count = map->count; - spin_unlock(&priv->lock); + read_unlock(&priv->rwlock); if (copy_to_user(u, &op, sizeof(op)) != 0) return -EFAULT; @@ -507,9 +507,9 @@ static long gntdev_ioctl_set_max_grants(struct gntdev_priv *priv, if (op.count > limit) return -EINVAL; - spin_lock(&priv->lock); + write_lock(&priv->rwlock); priv->limit = op.count; - spin_unlock(&priv->lock); + write_unlock(&priv->rwlock); return 0; } @@ -557,7 +557,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) printk("%s: map %d+%d at %lx (pgoff %lx)\n", __FUNCTION__, index, count, vma->vm_start, vma->vm_pgoff); - spin_lock(&priv->lock); + read_lock(&priv->rwlock); map = gntdev_find_map_index(priv, index, count); if (!map) goto unlock_out; @@ -599,7 +599,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) map->is_mapped = 1; unlock_out: - spin_unlock(&priv->lock); + read_unlock(&priv->rwlock); return err; } _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 09.07.10 at 16:32, Stefano Stabellini <stefano.stabellini@eu.citrix.com> wrote: > Hi Jeremy, > this patch switches back from spinlocks to rwlocks in gntdev, solving > the locking issue that was preventing stubdoms from working. > In particular the problem is that gntdev_mmap calls apply_to_page_range > after acquiring the spinlock. apply_to_page_range causes the mmu > notifiers to be called, so mn_invl_range_start will be called that will > try to acquire again the same spinlock.Shouldn''t this be solved in a way not depending on an implementation detail (rw-locks being unfair in that readers can lock out writers indefinitely)? Is it even certain that all arch-es implement rw-locks in a manner compatible with this? Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Stefano Stabellini
2010-Jul-09 14:55 UTC
Re: [Xen-devel] [PATCH] gntdev: switch back to rwlocks
On Fri, 9 Jul 2010, Jan Beulich wrote:> >>> On 09.07.10 at 16:32, Stefano Stabellini <stefano.stabellini@eu.citrix.com> wrote: > > Hi Jeremy, > > this patch switches back from spinlocks to rwlocks in gntdev, solving > > the locking issue that was preventing stubdoms from working. > > In particular the problem is that gntdev_mmap calls apply_to_page_range > > after acquiring the spinlock. apply_to_page_range causes the mmu > > notifiers to be called, so mn_invl_range_start will be called that will > > try to acquire again the same spinlock. > > Shouldn''t this be solved in a way not depending on an implementation > detail (rw-locks being unfair in that readers can lock out writers > indefinitely)? Is it even certain that all arch-es implement rw-locks > in a manner compatible with this?any rwlock implementations that allow multiple readers will do: both mn_invl_range_start and gntdev_mmap only require a read lock. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2010-Jul-09 15:04 UTC
[Xen-devel] Re: [PATCH] gntdev: switch back to rwlocks
On 07/09/2010 07:32 AM, Stefano Stabellini wrote:> Hi Jeremy, > this patch switches back from spinlocks to rwlocks in gntdev, solving > the locking issue that was preventing stubdoms from working. > In particular the problem is that gntdev_mmap calls apply_to_page_range > after acquiring the spinlock. apply_to_page_range causes the mmu > notifiers to be called, so mn_invl_range_start will be called that will > try to acquire again the same spinlock. >I think the problem is that apply_to_page_range is calling the mmu notifiers at all. It seems to be plain bogus; it should be up to the callers of apply_to_page_range to call the mmu notifiers if necessary. Does removing those calls fix the problem? Thanks, J> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > > --- > > diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c > index a33e443..0a9a68c 100644 > --- a/drivers/xen/gntdev.c > +++ b/drivers/xen/gntdev.c > @@ -51,7 +51,7 @@ struct gntdev_priv { > struct list_head maps; > uint32_t used; > uint32_t limit; > - spinlock_t lock; > + rwlock_t rwlock; > struct mm_struct *mm; > struct mmu_notifier mn; > }; > @@ -289,7 +289,7 @@ static void mn_invl_range_start(struct mmu_notifier *mn, > unsigned long mstart, mend; > int err; > > - spin_lock(&priv->lock); > + read_lock(&priv->rwlock); > list_for_each_entry(map, &priv->maps, next) { > if (!map->vma) > continue; > @@ -311,7 +311,7 @@ static void mn_invl_range_start(struct mmu_notifier *mn, > (mend - mstart) >> PAGE_SHIFT); > WARN_ON(err); > } > - spin_unlock(&priv->lock); > + read_unlock(&priv->rwlock); > } > > static void mn_invl_page(struct mmu_notifier *mn, > @@ -328,7 +328,7 @@ static void mn_release(struct mmu_notifier *mn, > struct grant_map *map; > int err; > > - spin_lock(&priv->lock); > + read_lock(&priv->rwlock); > list_for_each_entry(map, &priv->maps, next) { > if (!map->vma) > continue; > @@ -339,7 +339,7 @@ static void mn_release(struct mmu_notifier *mn, > err = unmap_grant_pages(map, 0, map->count); > WARN_ON(err); > } > - spin_unlock(&priv->lock); > + read_unlock(&priv->rwlock); > } > > struct mmu_notifier_ops gntdev_mmu_ops = { > @@ -359,7 +359,7 @@ static int gntdev_open(struct inode *inode, struct file *flip) > return -ENOMEM; > > INIT_LIST_HEAD(&priv->maps); > - spin_lock_init(&priv->lock); > + priv->rwlock = RW_LOCK_UNLOCKED; > priv->limit = limit; > > priv->mm = get_task_mm(current); > @@ -387,7 +387,7 @@ static int gntdev_release(struct inode *inode, struct file *flip) > if (debug) > printk("%s: priv %p\n", __FUNCTION__, priv); > > - spin_lock(&priv->lock); > + write_lock(&priv->rwlock); > while (!list_empty(&priv->maps)) { > map = list_entry(priv->maps.next, struct grant_map, next); > err = gntdev_del_map(map); > @@ -395,7 +395,7 @@ static int gntdev_release(struct inode *inode, struct file *flip) > gntdev_free_map(map); > > } > - spin_unlock(&priv->lock); > + write_unlock(&priv->rwlock); > > mmu_notifier_unregister(&priv->mn, priv->mm); > kfree(priv); > @@ -429,15 +429,15 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, > return err; > } > > - spin_lock(&priv->lock); > + write_lock(&priv->rwlock); > gntdev_add_map(priv, map); > op.index = map->index << PAGE_SHIFT; > - spin_unlock(&priv->lock); > + write_unlock(&priv->rwlock); > > if (copy_to_user(u, &op, sizeof(op)) != 0) { > - spin_lock(&priv->lock); > + write_lock(&priv->rwlock); > gntdev_del_map(map); > - spin_unlock(&priv->lock); > + write_unlock(&priv->rwlock); > gntdev_free_map(map); > return err; > } > @@ -457,11 +457,11 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, > printk("%s: priv %p, del %d+%d\n", __FUNCTION__, priv, > (int)op.index, (int)op.count); > > - spin_lock(&priv->lock); > + write_lock(&priv->rwlock); > map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); > if (map) > err = gntdev_del_map(map); > - spin_unlock(&priv->lock); > + write_unlock(&priv->rwlock); > if (!err) > gntdev_free_map(map); > return err; > @@ -479,16 +479,16 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv, > printk("%s: priv %p, offset for vaddr %lx\n", __FUNCTION__, priv, > (unsigned long)op.vaddr); > > - spin_lock(&priv->lock); > + read_lock(&priv->rwlock); > map = gntdev_find_map_vaddr(priv, op.vaddr); > if (map == NULL || > map->vma->vm_start != op.vaddr) { > - spin_unlock(&priv->lock); > + read_unlock(&priv->rwlock); > return -EINVAL; > } > op.offset = map->index << PAGE_SHIFT; > op.count = map->count; > - spin_unlock(&priv->lock); > + read_unlock(&priv->rwlock); > > if (copy_to_user(u, &op, sizeof(op)) != 0) > return -EFAULT; > @@ -507,9 +507,9 @@ static long gntdev_ioctl_set_max_grants(struct gntdev_priv *priv, > if (op.count > limit) > return -EINVAL; > > - spin_lock(&priv->lock); > + write_lock(&priv->rwlock); > priv->limit = op.count; > - spin_unlock(&priv->lock); > + write_unlock(&priv->rwlock); > return 0; > } > > @@ -557,7 +557,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) > printk("%s: map %d+%d at %lx (pgoff %lx)\n", __FUNCTION__, > index, count, vma->vm_start, vma->vm_pgoff); > > - spin_lock(&priv->lock); > + read_lock(&priv->rwlock); > map = gntdev_find_map_index(priv, index, count); > if (!map) > goto unlock_out; > @@ -599,7 +599,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) > map->is_mapped = 1; > > unlock_out: > - spin_unlock(&priv->lock); > + read_unlock(&priv->rwlock); > return err; > } > > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Stefano Stabellini
2010-Jul-09 15:12 UTC
[Xen-devel] Re: [PATCH] gntdev: switch back to rwlocks
On Fri, 9 Jul 2010, Jeremy Fitzhardinge wrote:> On 07/09/2010 07:32 AM, Stefano Stabellini wrote: > > Hi Jeremy, > > this patch switches back from spinlocks to rwlocks in gntdev, solving > > the locking issue that was preventing stubdoms from working. > > In particular the problem is that gntdev_mmap calls apply_to_page_range > > after acquiring the spinlock. apply_to_page_range causes the mmu > > notifiers to be called, so mn_invl_range_start will be called that will > > try to acquire again the same spinlock. > > > > I think the problem is that apply_to_page_range is calling the mmu > notifiers at all. It seems to be plain bogus; it should be up to the > callers of apply_to_page_range to call the mmu notifiers if necessary. > > Does removing those calls fix the problem? >yep _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 09.07.10 at 16:55, Stefano Stabellini <stefano.stabellini@eu.citrix.com> wrote: > On Fri, 9 Jul 2010, Jan Beulich wrote: >> >>> On 09.07.10 at 16:32, Stefano Stabellini <stefano.stabellini@eu.citrix.com> wrote: >> > Hi Jeremy, >> > this patch switches back from spinlocks to rwlocks in gntdev, solving >> > the locking issue that was preventing stubdoms from working. >> > In particular the problem is that gntdev_mmap calls apply_to_page_range >> > after acquiring the spinlock. apply_to_page_range causes the mmu >> > notifiers to be called, so mn_invl_range_start will be called that will >> > try to acquire again the same spinlock. >> >> Shouldn''t this be solved in a way not depending on an implementation >> detail (rw-locks being unfair in that readers can lock out writers >> indefinitely)? Is it even certain that all arch-es implement rw-locks >> in a manner compatible with this? > > any rwlock implementations that allow multiple readers will do: both > mn_invl_range_start and gntdev_mmap only require a read lock.No - if an implementation forces further readers to spin once a writer started its attempt to acquire a lock, the code after your change still has the potential to deadlock afaict. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2010-Jul-09 17:57 UTC
Re: [Xen-devel] [PATCH] gntdev: switch back to rwlocks
On 07/09/2010 08:56 AM, Jan Beulich wrote:>>> Shouldn''t this be solved in a way not depending on an implementation >>> detail (rw-locks being unfair in that readers can lock out writers >>> indefinitely)? Is it even certain that all arch-es implement rw-locks >>> in a manner compatible with this? >>> >> any rwlock implementations that allow multiple readers will do: both >> mn_invl_range_start and gntdev_mmap only require a read lock. >> > No - if an implementation forces further readers to spin once a > writer started its attempt to acquire a lock, the code after your > change still has the potential to deadlock afaict. >Yes, relying on this kind of behaviour from rwlocks doesn''t pass the smell test. rwlocks are just a performance optimisation for particular locking patterns; it should always be safe to implement them as plain spinlocks (or convert them into spinlocks). I think removing the notifier calls from apply_to_page_range fixes the root of the problem. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Stefano Stabellini
2010-Jul-12 12:55 UTC
Re: [Xen-devel] [PATCH] gntdev: switch back to rwlocks
On Fri, 9 Jul 2010, Jeremy Fitzhardinge wrote:> On 07/09/2010 08:56 AM, Jan Beulich wrote: > >>> Shouldn''t this be solved in a way not depending on an implementation > >>> detail (rw-locks being unfair in that readers can lock out writers > >>> indefinitely)? Is it even certain that all arch-es implement rw-locks > >>> in a manner compatible with this? > >>> > >> any rwlock implementations that allow multiple readers will do: both > >> mn_invl_range_start and gntdev_mmap only require a read lock. > >> > > No - if an implementation forces further readers to spin once a > > writer started its attempt to acquire a lock, the code after your > > change still has the potential to deadlock afaict. > > > > Yes, relying on this kind of behaviour from rwlocks doesn''t pass the > smell test. rwlocks are just a performance optimisation for particular > locking patterns; it should always be safe to implement them as plain > spinlocks (or convert them into spinlocks). > > I think removing the notifier calls from apply_to_page_range fixes the > root of the problem. >I agree that your mmu notifier patch should solve the issue as well. Just remember to apply it to the stable branches, because the bug is affecting them too. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel