c/s 1090 wasn''t really helping much - I was doing that under the wrong
impression that the zap_pte() hook would be called with the mmap_sem
held. That being wrong, none of the uses of mmap_sem here actually are
write ones (they only look up vma information), so convert them all to
read acquires/releases.
A new spinlock needs to be introduced, protecting idx_map. This has to
nest inside mmap_sem, which requires some code movement.
--- a/drivers/xen/blktap/blktap.c
+++ b/drivers/xen/blktap/blktap.c
@@ -113,6 +113,7 @@ typedef struct tap_blkif {
pid_t pid; /*tapdisk process id */
enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace
shutdown */
+ spinlock_t map_lock; /*protects idx_map */
struct idx_map {
u16 mem, req;
} *idx_map; /*Record the user ring id to kern
@@ -295,7 +296,7 @@ static pte_t blktap_clear_pte(struct vm_
pte_t copy;
tap_blkif_t *info = NULL;
unsigned int seg, usr_idx, pending_idx, mmap_idx, count = 0;
- unsigned long offset, uvstart = 0;
+ unsigned long offset;
struct page *pg;
struct grant_handle_pair *khandle;
struct gnttab_unmap_grant_ref unmap[2];
@@ -304,25 +305,28 @@ static pte_t blktap_clear_pte(struct vm_
* If the address is before the start of the grant mapped region or
* if vm_file is NULL (meaning mmap failed and we have nothing to do)
*/
- if (vma->vm_file != NULL) {
+ if (vma->vm_file != NULL)
info = vma->vm_file->private_data;
- uvstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
- }
- if (vma->vm_file == NULL || uvaddr < uvstart)
+ if (info == NULL || uvaddr < info->user_vstart)
return ptep_get_and_clear_full(vma->vm_mm, uvaddr,
ptep, is_fullmm);
- /* TODO Should these be changed to if statements? */
- BUG_ON(!info);
- BUG_ON(!info->idx_map);
-
- offset = (uvaddr - uvstart) >> PAGE_SHIFT;
+ offset = (uvaddr - info->user_vstart) >> PAGE_SHIFT;
usr_idx = OFFSET_TO_USR_IDX(offset);
seg = OFFSET_TO_SEG(offset);
+ spin_lock(&info->map_lock);
+
pending_idx = info->idx_map[usr_idx].req;
mmap_idx = info->idx_map[usr_idx].mem;
+ /* fast_flush_area() may already have cleared this entry */
+ if (mmap_idx == INVALID_MIDX) {
+ spin_unlock(&info->map_lock);
+ return ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep,
+ is_fullmm);
+ }
+
pg = idx_to_page(mmap_idx, pending_idx, seg);
ClearPageReserved(pg);
info->foreign_map.map[offset + RING_PAGES] = NULL;
@@ -365,6 +369,8 @@ static pte_t blktap_clear_pte(struct vm_
BUG();
}
+ spin_unlock(&info->map_lock);
+
return copy;
}
@@ -489,6 +495,7 @@ found:
}
info->minor = minor;
+ spin_lock_init(&info->map_lock);
/*
* Make sure that we have a minor before others can
* see us.
@@ -1029,25 +1036,30 @@ static void fast_flush_area(pending_req_
unsigned int u_idx, tap_blkif_t *info)
{
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
- unsigned int i, mmap_idx, invcount = 0, locked = 0;
+ unsigned int i, mmap_idx, invcount = 0;
struct grant_handle_pair *khandle;
uint64_t ptep;
int ret;
unsigned long uvaddr;
struct mm_struct *mm = info->mm;
+ if (mm != NULL)
+ down_read(&mm->mmap_sem);
+
if (mm != NULL && xen_feature(XENFEAT_auto_translated_physmap)) {
- down_write(&mm->mmap_sem);
+ slow:
blktap_zap_page_range(mm,
MMAP_VADDR(info->user_vstart, u_idx, 0),
req->nr_pages);
info->idx_map[u_idx].mem = INVALID_MIDX;
- up_write(&mm->mmap_sem);
+ up_read(&mm->mmap_sem);
return;
}
mmap_idx = req->mem_idx;
+ spin_lock(&info->map_lock);
+
for (i = 0; i < req->nr_pages; i++) {
uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);
@@ -1066,15 +1078,13 @@ static void fast_flush_area(pending_req_
if (mm != NULL && khandle->user != INVALID_GRANT_HANDLE) {
BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
- if (!locked++)
- down_write(&mm->mmap_sem);
if (create_lookup_pte_addr(
mm,
MMAP_VADDR(info->user_vstart, u_idx, i),
&ptep) !=0) {
- up_write(&mm->mmap_sem);
+ spin_unlock(&info->map_lock);
WPRINTK("Couldn''t get a pte addr!\n");
- return;
+ goto slow;
}
gnttab_set_unmap_op(&unmap[invcount], ptep,
@@ -1091,19 +1101,11 @@ static void fast_flush_area(pending_req_
GNTTABOP_unmap_grant_ref, unmap, invcount);
BUG_ON(ret);
- if (mm != NULL && !xen_feature(XENFEAT_auto_translated_physmap)) {
- if (!locked++)
- down_write(&mm->mmap_sem);
- blktap_zap_page_range(mm,
- MMAP_VADDR(info->user_vstart, u_idx, 0),
- req->nr_pages);
- }
-
- if (!locked && mm != NULL)
- down_write(&mm->mmap_sem);
info->idx_map[u_idx].mem = INVALID_MIDX;
+
+ spin_unlock(&info->map_lock);
if (mm != NULL)
- up_write(&mm->mmap_sem);
+ up_read(&mm->mmap_sem);
}
/******************************************************************
@@ -1406,7 +1408,9 @@ static void dispatch_rw_block_io(blkif_t
goto fail_response;
/* Check we have space on user ring - should never fail. */
+ spin_lock(&info->map_lock);
usr_idx = GET_NEXT_REQ(info->idx_map);
+ spin_unlock(&info->map_lock);
if (usr_idx >= MAX_PENDING_REQS) {
WARN_ON(1);
goto fail_response;
@@ -1445,7 +1449,7 @@ static void dispatch_rw_block_io(blkif_t
op = 0;
mm = info->mm;
if (!xen_feature(XENFEAT_auto_translated_physmap))
- down_write(&mm->mmap_sem);
+ down_read(&mm->mmap_sem);
for (i = 0; i < nseg; i++) {
unsigned long uvaddr;
unsigned long kvaddr;
@@ -1462,9 +1466,9 @@ static void dispatch_rw_block_io(blkif_t
/* Now map it to user. */
ret = create_lookup_pte_addr(mm, uvaddr, &ptep);
if (ret) {
- up_write(&mm->mmap_sem);
+ up_read(&mm->mmap_sem);
WPRINTK("Couldn''t get a pte addr!\n");
- goto fail_flush;
+ goto fail_response;
}
gnttab_set_map_op(&map[op], ptep,
@@ -1478,12 +1482,15 @@ static void dispatch_rw_block_io(blkif_t
req->seg[i].first_sect + 1);
}
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ down_read(&mm->mmap_sem);
+
+ spin_lock(&info->map_lock);
+
ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op);
BUG_ON(ret);
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- up_write(&mm->mmap_sem);
-
for (i = 0; i < (nseg*2); i+=2) {
unsigned long uvaddr;
unsigned long offset;
@@ -1548,18 +1555,18 @@ static void dispatch_rw_block_io(blkif_t
}
}
+ /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
+ info->idx_map[usr_idx].mem = mmap_idx;
+ info->idx_map[usr_idx].req = pending_idx;
+
+ spin_unlock(&info->map_lock);
+
if (ret)
goto fail_flush;
- if (xen_feature(XENFEAT_auto_translated_physmap))
- down_write(&mm->mmap_sem);
- /* Mark mapped pages as reserved: */
- for (i = 0; i < req->nr_segments; i++) {
- struct page *pg;
-
- pg = idx_to_page(mmap_idx, pending_idx, i);
- SetPageReserved(pg);
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ for (i = 0; i < nseg; i++) {
+ struct page *pg = idx_to_page(mmap_idx, pending_idx, i);
unsigned long uvaddr = MMAP_VADDR(info->user_vstart,
usr_idx, i);
if (vma && uvaddr >= vma->vm_end) {
@@ -1577,18 +1584,12 @@ static void dispatch_rw_block_io(blkif_t
continue;
}
ret = vm_insert_page(vma, uvaddr, pg);
- if (ret) {
- up_write(&mm->mmap_sem);
+ if (ret)
goto fail_flush;
- }
}
}
- if (xen_feature(XENFEAT_auto_translated_physmap))
- up_write(&mm->mmap_sem);
- /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
- info->idx_map[usr_idx].mem = mmap_idx;
- info->idx_map[usr_idx].req = pending_idx;
+ up_read(&mm->mmap_sem);
blkif_get(blkif);
/* Finally, write the request message to the user ring. */
@@ -1611,6 +1612,7 @@ static void dispatch_rw_block_io(blkif_t
return;
fail_flush:
+ up_read(&mm->mmap_sem);
WPRINTK("Reached Fail_flush\n");
fast_flush_area(pending_req, pending_idx, usr_idx, info);
fail_response:
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
>>> On 23.12.11 at 12:33, "Jan Beulich" <JBeulich@suse.com> wrote: > c/s 1090 wasn''t really helping much - I was doing that under the wrong > impression that the zap_pte() hook would be called with the mmap_sem > held. That being wrong, none of the uses of mmap_sem here actually are > write ones (they only look up vma information), so convert them all to > read acquires/releases. > > A new spinlock needs to be introduced, protecting idx_map. This has to > nest inside mmap_sem, which requires some code movement.Signed-off-by: Jan Beulich <jbeulich@suse.com>> --- a/drivers/xen/blktap/blktap.c > +++ b/drivers/xen/blktap/blktap.c > @@ -113,6 +113,7 @@ typedef struct tap_blkif { > pid_t pid; /*tapdisk process id */ > enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace > shutdown */ > + spinlock_t map_lock; /*protects idx_map */ > struct idx_map { > u16 mem, req; > } *idx_map; /*Record the user ring id to kern > @@ -295,7 +296,7 @@ static pte_t blktap_clear_pte(struct vm_ > pte_t copy; > tap_blkif_t *info = NULL; > unsigned int seg, usr_idx, pending_idx, mmap_idx, count = 0; > - unsigned long offset, uvstart = 0; > + unsigned long offset; > struct page *pg; > struct grant_handle_pair *khandle; > struct gnttab_unmap_grant_ref unmap[2]; > @@ -304,25 +305,28 @@ static pte_t blktap_clear_pte(struct vm_ > * If the address is before the start of the grant mapped region or > * if vm_file is NULL (meaning mmap failed and we have nothing to do) > */ > - if (vma->vm_file != NULL) { > + if (vma->vm_file != NULL) > info = vma->vm_file->private_data; > - uvstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT); > - } > - if (vma->vm_file == NULL || uvaddr < uvstart) > + if (info == NULL || uvaddr < info->user_vstart) > return ptep_get_and_clear_full(vma->vm_mm, uvaddr, > ptep, is_fullmm); > > - /* TODO Should these be changed to if statements? */ > - BUG_ON(!info); > - BUG_ON(!info->idx_map); > - > - offset = (uvaddr - uvstart) >> PAGE_SHIFT; > + offset = (uvaddr - info->user_vstart) >> PAGE_SHIFT; > usr_idx = OFFSET_TO_USR_IDX(offset); > seg = OFFSET_TO_SEG(offset); > > + spin_lock(&info->map_lock); > + > pending_idx = info->idx_map[usr_idx].req; > mmap_idx = info->idx_map[usr_idx].mem; > > + /* fast_flush_area() may already have cleared this entry */ > + if (mmap_idx == INVALID_MIDX) { > + spin_unlock(&info->map_lock); > + return ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep, > + is_fullmm); > + } > + > pg = idx_to_page(mmap_idx, pending_idx, seg); > ClearPageReserved(pg); > info->foreign_map.map[offset + RING_PAGES] = NULL; > @@ -365,6 +369,8 @@ static pte_t blktap_clear_pte(struct vm_ > BUG(); > } > > + spin_unlock(&info->map_lock); > + > return copy; > } > > @@ -489,6 +495,7 @@ found: > } > > info->minor = minor; > + spin_lock_init(&info->map_lock); > /* > * Make sure that we have a minor before others can > * see us. > @@ -1029,25 +1036,30 @@ static void fast_flush_area(pending_req_ > unsigned int u_idx, tap_blkif_t *info) > { > struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; > - unsigned int i, mmap_idx, invcount = 0, locked = 0; > + unsigned int i, mmap_idx, invcount = 0; > struct grant_handle_pair *khandle; > uint64_t ptep; > int ret; > unsigned long uvaddr; > struct mm_struct *mm = info->mm; > > + if (mm != NULL) > + down_read(&mm->mmap_sem); > + > if (mm != NULL && xen_feature(XENFEAT_auto_translated_physmap)) { > - down_write(&mm->mmap_sem); > + slow: > blktap_zap_page_range(mm, > MMAP_VADDR(info->user_vstart, u_idx, 0), > req->nr_pages); > info->idx_map[u_idx].mem = INVALID_MIDX; > - up_write(&mm->mmap_sem); > + up_read(&mm->mmap_sem); > return; > } > > mmap_idx = req->mem_idx; > > + spin_lock(&info->map_lock); > + > for (i = 0; i < req->nr_pages; i++) { > uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i); > > @@ -1066,15 +1078,13 @@ static void fast_flush_area(pending_req_ > > if (mm != NULL && khandle->user != INVALID_GRANT_HANDLE) { > BUG_ON(xen_feature(XENFEAT_auto_translated_physmap)); > - if (!locked++) > - down_write(&mm->mmap_sem); > if (create_lookup_pte_addr( > mm, > MMAP_VADDR(info->user_vstart, u_idx, i), > &ptep) !=0) { > - up_write(&mm->mmap_sem); > + spin_unlock(&info->map_lock); > WPRINTK("Couldn''t get a pte addr!\n"); > - return; > + goto slow; > } > > gnttab_set_unmap_op(&unmap[invcount], ptep, > @@ -1091,19 +1101,11 @@ static void fast_flush_area(pending_req_ > GNTTABOP_unmap_grant_ref, unmap, invcount); > BUG_ON(ret); > > - if (mm != NULL && !xen_feature(XENFEAT_auto_translated_physmap)) { > - if (!locked++) > - down_write(&mm->mmap_sem); > - blktap_zap_page_range(mm, > - MMAP_VADDR(info->user_vstart, u_idx, 0), > - req->nr_pages); > - } > - > - if (!locked && mm != NULL) > - down_write(&mm->mmap_sem); > info->idx_map[u_idx].mem = INVALID_MIDX; > + > + spin_unlock(&info->map_lock); > if (mm != NULL) > - up_write(&mm->mmap_sem); > + up_read(&mm->mmap_sem); > } > > /****************************************************************** > @@ -1406,7 +1408,9 @@ static void dispatch_rw_block_io(blkif_t > goto fail_response; > > /* Check we have space on user ring - should never fail. */ > + spin_lock(&info->map_lock); > usr_idx = GET_NEXT_REQ(info->idx_map); > + spin_unlock(&info->map_lock); > if (usr_idx >= MAX_PENDING_REQS) { > WARN_ON(1); > goto fail_response; > @@ -1445,7 +1449,7 @@ static void dispatch_rw_block_io(blkif_t > op = 0; > mm = info->mm; > if (!xen_feature(XENFEAT_auto_translated_physmap)) > - down_write(&mm->mmap_sem); > + down_read(&mm->mmap_sem); > for (i = 0; i < nseg; i++) { > unsigned long uvaddr; > unsigned long kvaddr; > @@ -1462,9 +1466,9 @@ static void dispatch_rw_block_io(blkif_t > /* Now map it to user. */ > ret = create_lookup_pte_addr(mm, uvaddr, &ptep); > if (ret) { > - up_write(&mm->mmap_sem); > + up_read(&mm->mmap_sem); > WPRINTK("Couldn''t get a pte addr!\n"); > - goto fail_flush; > + goto fail_response; > } > > gnttab_set_map_op(&map[op], ptep, > @@ -1478,12 +1482,15 @@ static void dispatch_rw_block_io(blkif_t > req->seg[i].first_sect + 1); > } > > + if (xen_feature(XENFEAT_auto_translated_physmap)) > + down_read(&mm->mmap_sem); > + > + spin_lock(&info->map_lock); > + > ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op); > BUG_ON(ret); > > if (!xen_feature(XENFEAT_auto_translated_physmap)) { > - up_write(&mm->mmap_sem); > - > for (i = 0; i < (nseg*2); i+=2) { > unsigned long uvaddr; > unsigned long offset; > @@ -1548,18 +1555,18 @@ static void dispatch_rw_block_io(blkif_t > } > } > > + /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/ > + info->idx_map[usr_idx].mem = mmap_idx; > + info->idx_map[usr_idx].req = pending_idx; > + > + spin_unlock(&info->map_lock); > + > if (ret) > goto fail_flush; > > - if (xen_feature(XENFEAT_auto_translated_physmap)) > - down_write(&mm->mmap_sem); > - /* Mark mapped pages as reserved: */ > - for (i = 0; i < req->nr_segments; i++) { > - struct page *pg; > - > - pg = idx_to_page(mmap_idx, pending_idx, i); > - SetPageReserved(pg); > - if (xen_feature(XENFEAT_auto_translated_physmap)) { > + if (xen_feature(XENFEAT_auto_translated_physmap)) { > + for (i = 0; i < nseg; i++) { > + struct page *pg = idx_to_page(mmap_idx, pending_idx, i); > unsigned long uvaddr = MMAP_VADDR(info->user_vstart, > usr_idx, i); > if (vma && uvaddr >= vma->vm_end) { > @@ -1577,18 +1584,12 @@ static void dispatch_rw_block_io(blkif_t > continue; > } > ret = vm_insert_page(vma, uvaddr, pg); > - if (ret) { > - up_write(&mm->mmap_sem); > + if (ret) > goto fail_flush; > - } > } > } > - if (xen_feature(XENFEAT_auto_translated_physmap)) > - up_write(&mm->mmap_sem); > > - /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/ > - info->idx_map[usr_idx].mem = mmap_idx; > - info->idx_map[usr_idx].req = pending_idx; > + up_read(&mm->mmap_sem); > > blkif_get(blkif); > /* Finally, write the request message to the user ring. */ > @@ -1611,6 +1612,7 @@ static void dispatch_rw_block_io(blkif_t > return; > > fail_flush: > + up_read(&mm->mmap_sem); > WPRINTK("Reached Fail_flush\n"); > fast_flush_area(pending_req, pending_idx, usr_idx, info); > fail_response: