>>> On 23.12.11 at 12:33, "Jan Beulich"
<JBeulich@suse.com> wrote:
> c/s 1090 wasn''t really helping much - I was doing that under the
wrong
> impression that the zap_pte() hook would be called with the mmap_sem
> held. That being wrong, none of the uses of mmap_sem here actually are
> write ones (they only look up vma information), so convert them all to
> read acquires/releases.
>
> A new spinlock needs to be introduced, protecting idx_map. This has to
> nest inside mmap_sem, which requires some code movement.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
> --- a/drivers/xen/blktap/blktap.c
> +++ b/drivers/xen/blktap/blktap.c
> @@ -113,6 +113,7 @@ typedef struct tap_blkif {
> pid_t pid; /*tapdisk process id */
> enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace
> shutdown */
> + spinlock_t map_lock; /*protects idx_map */
> struct idx_map {
> u16 mem, req;
> } *idx_map; /*Record the user ring id to kern
> @@ -295,7 +296,7 @@ static pte_t blktap_clear_pte(struct vm_
> pte_t copy;
> tap_blkif_t *info = NULL;
> unsigned int seg, usr_idx, pending_idx, mmap_idx, count = 0;
> - unsigned long offset, uvstart = 0;
> + unsigned long offset;
> struct page *pg;
> struct grant_handle_pair *khandle;
> struct gnttab_unmap_grant_ref unmap[2];
> @@ -304,25 +305,28 @@ static pte_t blktap_clear_pte(struct vm_
> * If the address is before the start of the grant mapped region or
> * if vm_file is NULL (meaning mmap failed and we have nothing to do)
> */
> - if (vma->vm_file != NULL) {
> + if (vma->vm_file != NULL)
> info = vma->vm_file->private_data;
> - uvstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
> - }
> - if (vma->vm_file == NULL || uvaddr < uvstart)
> + if (info == NULL || uvaddr < info->user_vstart)
> return ptep_get_and_clear_full(vma->vm_mm, uvaddr,
> ptep, is_fullmm);
>
> - /* TODO Should these be changed to if statements? */
> - BUG_ON(!info);
> - BUG_ON(!info->idx_map);
> -
> - offset = (uvaddr - uvstart) >> PAGE_SHIFT;
> + offset = (uvaddr - info->user_vstart) >> PAGE_SHIFT;
> usr_idx = OFFSET_TO_USR_IDX(offset);
> seg = OFFSET_TO_SEG(offset);
>
> + spin_lock(&info->map_lock);
> +
> pending_idx = info->idx_map[usr_idx].req;
> mmap_idx = info->idx_map[usr_idx].mem;
>
> + /* fast_flush_area() may already have cleared this entry */
> + if (mmap_idx == INVALID_MIDX) {
> + spin_unlock(&info->map_lock);
> + return ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep,
> + is_fullmm);
> + }
> +
> pg = idx_to_page(mmap_idx, pending_idx, seg);
> ClearPageReserved(pg);
> info->foreign_map.map[offset + RING_PAGES] = NULL;
> @@ -365,6 +369,8 @@ static pte_t blktap_clear_pte(struct vm_
> BUG();
> }
>
> + spin_unlock(&info->map_lock);
> +
> return copy;
> }
>
> @@ -489,6 +495,7 @@ found:
> }
>
> info->minor = minor;
> + spin_lock_init(&info->map_lock);
> /*
> * Make sure that we have a minor before others can
> * see us.
> @@ -1029,25 +1036,30 @@ static void fast_flush_area(pending_req_
> unsigned int u_idx, tap_blkif_t *info)
> {
> struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
> - unsigned int i, mmap_idx, invcount = 0, locked = 0;
> + unsigned int i, mmap_idx, invcount = 0;
> struct grant_handle_pair *khandle;
> uint64_t ptep;
> int ret;
> unsigned long uvaddr;
> struct mm_struct *mm = info->mm;
>
> + if (mm != NULL)
> + down_read(&mm->mmap_sem);
> +
> if (mm != NULL && xen_feature(XENFEAT_auto_translated_physmap)) {
> - down_write(&mm->mmap_sem);
> + slow:
> blktap_zap_page_range(mm,
> MMAP_VADDR(info->user_vstart, u_idx, 0),
> req->nr_pages);
> info->idx_map[u_idx].mem = INVALID_MIDX;
> - up_write(&mm->mmap_sem);
> + up_read(&mm->mmap_sem);
> return;
> }
>
> mmap_idx = req->mem_idx;
>
> + spin_lock(&info->map_lock);
> +
> for (i = 0; i < req->nr_pages; i++) {
> uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);
>
> @@ -1066,15 +1078,13 @@ static void fast_flush_area(pending_req_
>
> if (mm != NULL && khandle->user != INVALID_GRANT_HANDLE) {
> BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
> - if (!locked++)
> - down_write(&mm->mmap_sem);
> if (create_lookup_pte_addr(
> mm,
> MMAP_VADDR(info->user_vstart, u_idx, i),
> &ptep) !=0) {
> - up_write(&mm->mmap_sem);
> + spin_unlock(&info->map_lock);
> WPRINTK("Couldn''t get a pte addr!\n");
> - return;
> + goto slow;
> }
>
> gnttab_set_unmap_op(&unmap[invcount], ptep,
> @@ -1091,19 +1101,11 @@ static void fast_flush_area(pending_req_
> GNTTABOP_unmap_grant_ref, unmap, invcount);
> BUG_ON(ret);
>
> - if (mm != NULL && !xen_feature(XENFEAT_auto_translated_physmap))
{
> - if (!locked++)
> - down_write(&mm->mmap_sem);
> - blktap_zap_page_range(mm,
> - MMAP_VADDR(info->user_vstart, u_idx, 0),
> - req->nr_pages);
> - }
> -
> - if (!locked && mm != NULL)
> - down_write(&mm->mmap_sem);
> info->idx_map[u_idx].mem = INVALID_MIDX;
> +
> + spin_unlock(&info->map_lock);
> if (mm != NULL)
> - up_write(&mm->mmap_sem);
> + up_read(&mm->mmap_sem);
> }
>
> /******************************************************************
> @@ -1406,7 +1408,9 @@ static void dispatch_rw_block_io(blkif_t
> goto fail_response;
>
> /* Check we have space on user ring - should never fail. */
> + spin_lock(&info->map_lock);
> usr_idx = GET_NEXT_REQ(info->idx_map);
> + spin_unlock(&info->map_lock);
> if (usr_idx >= MAX_PENDING_REQS) {
> WARN_ON(1);
> goto fail_response;
> @@ -1445,7 +1449,7 @@ static void dispatch_rw_block_io(blkif_t
> op = 0;
> mm = info->mm;
> if (!xen_feature(XENFEAT_auto_translated_physmap))
> - down_write(&mm->mmap_sem);
> + down_read(&mm->mmap_sem);
> for (i = 0; i < nseg; i++) {
> unsigned long uvaddr;
> unsigned long kvaddr;
> @@ -1462,9 +1466,9 @@ static void dispatch_rw_block_io(blkif_t
> /* Now map it to user. */
> ret = create_lookup_pte_addr(mm, uvaddr, &ptep);
> if (ret) {
> - up_write(&mm->mmap_sem);
> + up_read(&mm->mmap_sem);
> WPRINTK("Couldn''t get a pte addr!\n");
> - goto fail_flush;
> + goto fail_response;
> }
>
> gnttab_set_map_op(&map[op], ptep,
> @@ -1478,12 +1482,15 @@ static void dispatch_rw_block_io(blkif_t
> req->seg[i].first_sect + 1);
> }
>
> + if (xen_feature(XENFEAT_auto_translated_physmap))
> + down_read(&mm->mmap_sem);
> +
> + spin_lock(&info->map_lock);
> +
> ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op);
> BUG_ON(ret);
>
> if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> - up_write(&mm->mmap_sem);
> -
> for (i = 0; i < (nseg*2); i+=2) {
> unsigned long uvaddr;
> unsigned long offset;
> @@ -1548,18 +1555,18 @@ static void dispatch_rw_block_io(blkif_t
> }
> }
>
> + /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
> + info->idx_map[usr_idx].mem = mmap_idx;
> + info->idx_map[usr_idx].req = pending_idx;
> +
> + spin_unlock(&info->map_lock);
> +
> if (ret)
> goto fail_flush;
>
> - if (xen_feature(XENFEAT_auto_translated_physmap))
> - down_write(&mm->mmap_sem);
> - /* Mark mapped pages as reserved: */
> - for (i = 0; i < req->nr_segments; i++) {
> - struct page *pg;
> -
> - pg = idx_to_page(mmap_idx, pending_idx, i);
> - SetPageReserved(pg);
> - if (xen_feature(XENFEAT_auto_translated_physmap)) {
> + if (xen_feature(XENFEAT_auto_translated_physmap)) {
> + for (i = 0; i < nseg; i++) {
> + struct page *pg = idx_to_page(mmap_idx, pending_idx, i);
> unsigned long uvaddr = MMAP_VADDR(info->user_vstart,
> usr_idx, i);
> if (vma && uvaddr >= vma->vm_end) {
> @@ -1577,18 +1584,12 @@ static void dispatch_rw_block_io(blkif_t
> continue;
> }
> ret = vm_insert_page(vma, uvaddr, pg);
> - if (ret) {
> - up_write(&mm->mmap_sem);
> + if (ret)
> goto fail_flush;
> - }
> }
> }
> - if (xen_feature(XENFEAT_auto_translated_physmap))
> - up_write(&mm->mmap_sem);
>
> - /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
> - info->idx_map[usr_idx].mem = mmap_idx;
> - info->idx_map[usr_idx].req = pending_idx;
> + up_read(&mm->mmap_sem);
>
> blkif_get(blkif);
> /* Finally, write the request message to the user ring. */
> @@ -1611,6 +1612,7 @@ static void dispatch_rw_block_io(blkif_t
> return;
>
> fail_flush:
> + up_read(&mm->mmap_sem);
> WPRINTK("Reached Fail_flush\n");
> fast_flush_area(pending_req, pending_idx, usr_idx, info);
> fail_response: