Andres Lagar-Cavilla
2012-Feb-01 19:51 UTC
[PATCH 0 of 9] x86/mm: Fixes to sharing, paging and p2m
This patch series aggregates a number of fixes to different areas of mm: Sharing: - Make sharing play nice with balloon - Make physmap manipulations deall correctly with shared pages - Make sharing debug calls use locked accessors and return useful information Paging: - Eliminate a needless state in the paging state machine - Fix stats/accounting - Fix page type check when nominating or evicting a page P2M: This changes clear hurdles in advance of a fully-synchronized p2m - Eliminate possibility of deadlock in nested lookups - Reorder some locks taken by the sharing subsystem Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> Signed-off-by: Adin Scannell <adin@scannell.ca> xen/arch/x86/mm.c | 8 +- xen/arch/x86/mm/p2m-ept.c | 3 +- xen/arch/x86/mm/p2m.c | 7 +- xen/include/asm-x86/p2m.h | 7 +- xen/arch/x86/mm/p2m.c | 4 +- xen/arch/x86/hvm/emulate.c | 35 ++--- xen/arch/x86/mm/mem_sharing.c | 28 ++-- xen/include/asm-x86/p2m.h | 91 ++++++++++++++++ xen/arch/x86/mm/shadow/common.c | 3 + xen/arch/x86/mm/shadow/multi.c | 18 +- xen/arch/x86/mm/p2m.c | 21 +++- xen/common/memory.c | 12 +- xen/arch/x86/mm/mem_sharing.c | 6 +- xen/arch/x86/mm/p2m.c | 12 +- xen/common/memory.c | 2 +- xen/include/asm-x86/p2m.h | 6 +- xen/arch/x86/mm/mem_sharing.c | 7 +- xen/arch/x86/mm/mem_sharing.c | 224 ++++++++++++++++++++------------------- 18 files changed, 315 insertions(+), 179 deletions(-)
Andres Lagar-Cavilla
2012-Feb-01 19:51 UTC
[PATCH 1 of 9] x86/mm: Remove p2m_ram_paging_in
xen/arch/x86/mm.c | 8 ++++---- xen/arch/x86/mm/p2m-ept.c | 3 +-- xen/arch/x86/mm/p2m.c | 7 +++---- xen/include/asm-x86/p2m.h | 7 ++----- 4 files changed, 10 insertions(+), 15 deletions(-) This state in the paging state machine became unnecessary after the last few updates. Once eliminated, rename p2m_ram_paging_in_start to p2m_ram_paging_in. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r aa58893caa60 -r decd21170c2a xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -3573,7 +3573,7 @@ int do_mmu_update( rc = -ENOENT; break; } - else if ( p2m_ram_paging_in_start == l1e_p2mt && + else if ( p2m_ram_paging_in == l1e_p2mt && !mfn_valid(l1emfn) ) { put_gfn(pg_owner, l1egfn); @@ -3622,7 +3622,7 @@ int do_mmu_update( rc = -ENOENT; break; } - else if ( p2m_ram_paging_in_start == l2e_p2mt && + else if ( p2m_ram_paging_in == l2e_p2mt && !mfn_valid(l2emfn) ) { put_gfn(pg_owner, l2egfn); @@ -3657,7 +3657,7 @@ int do_mmu_update( rc = -ENOENT; break; } - else if ( p2m_ram_paging_in_start == l3e_p2mt && + else if ( p2m_ram_paging_in == l3e_p2mt && !mfn_valid(l3emfn) ) { put_gfn(pg_owner, l3egfn); @@ -3692,7 +3692,7 @@ int do_mmu_update( rc = -ENOENT; break; } - else if ( p2m_ram_paging_in_start == l4e_p2mt && + else if ( p2m_ram_paging_in == l4e_p2mt && !mfn_valid(l4emfn) ) { put_gfn(pg_owner, l4egfn); diff -r aa58893caa60 -r decd21170c2a xen/arch/x86/mm/p2m-ept.c --- a/xen/arch/x86/mm/p2m-ept.c +++ b/xen/arch/x86/mm/p2m-ept.c @@ -82,7 +82,6 @@ static void ept_p2m_type_to_flags(ept_en case p2m_ram_paging_out: case p2m_ram_paged: case p2m_ram_paging_in: - case p2m_ram_paging_in_start: default: entry->r = entry->w = entry->x = 0; break; @@ -381,7 +380,7 @@ ept_set_entry(struct p2m_domain *p2m, un old_entry = *ept_entry; if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) || - (p2mt == p2m_ram_paging_in_start) ) + (p2mt == p2m_ram_paging_in) ) { /* Construct the new entry, and then write it once */ new_entry.emt = epte_get_entry_emt(p2m->domain, gfn, mfn, &ipat, diff -r aa58893caa60 -r decd21170c2a xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -935,7 +935,7 @@ void p2m_mem_paging_populate(struct doma if ( p2mt == p2m_ram_paging_out ) req.flags |= MEM_EVENT_FLAG_EVICT_FAIL; - set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in_start, a); + set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in, a); } p2m_unlock(p2m); @@ -994,7 +994,7 @@ int p2m_mem_paging_prep(struct domain *d ret = -ENOENT; /* Allow missing pages */ - if ( (p2mt != p2m_ram_paging_in_start) && (p2mt != p2m_ram_paged) ) + if ( (p2mt != p2m_ram_paging_in) && (p2mt != p2m_ram_paged) ) goto out; /* Allocate a page if the gfn does not have one yet */ @@ -1086,8 +1086,7 @@ void p2m_mem_paging_resume(struct domain mfn = p2m->get_entry(p2m, rsp.gfn, &p2mt, &a, p2m_query, NULL); /* Allow only pages which were prepared properly, or pages which * were nominated but not evicted */ - if ( mfn_valid(mfn) && - (p2mt == p2m_ram_paging_in || p2mt == p2m_ram_paging_in_start) ) + if ( mfn_valid(mfn) && (p2mt == p2m_ram_paging_in) ) { set_p2m_entry(p2m, rsp.gfn, mfn, PAGE_ORDER_4K, paging_mode_log_dirty(d) ? p2m_ram_logdirty : diff -r aa58893caa60 -r decd21170c2a xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -82,9 +82,8 @@ typedef enum { p2m_ram_paging_out = 9, /* Memory that is being paged out */ p2m_ram_paged = 10, /* Memory that has been paged out */ p2m_ram_paging_in = 11, /* Memory that is being paged in */ - p2m_ram_paging_in_start = 12, /* Memory that is being paged in */ - p2m_ram_shared = 13, /* Shared or sharable memory */ - p2m_ram_broken = 14, /* Broken page, access cause domain crash */ + p2m_ram_shared = 12, /* Shared or sharable memory */ + p2m_ram_broken = 13, /* Broken page, access cause domain crash */ } p2m_type_t; /* @@ -131,7 +130,6 @@ typedef enum { | p2m_to_mask(p2m_ram_ro) \ | p2m_to_mask(p2m_ram_paging_out) \ | p2m_to_mask(p2m_ram_paged) \ - | p2m_to_mask(p2m_ram_paging_in_start) \ | p2m_to_mask(p2m_ram_paging_in) \ | p2m_to_mask(p2m_ram_shared)) @@ -158,7 +156,6 @@ typedef enum { #define P2M_PAGING_TYPES (p2m_to_mask(p2m_ram_paging_out) \ | p2m_to_mask(p2m_ram_paged) \ - | p2m_to_mask(p2m_ram_paging_in_start) \ | p2m_to_mask(p2m_ram_paging_in)) #define P2M_PAGED_TYPES (p2m_to_mask(p2m_ram_paged))
Andres Lagar-Cavilla
2012-Feb-01 19:51 UTC
[PATCH 2 of 9] x86/mm: Don''t fail to nominate for paging on type flag, rather look at type count
xen/arch/x86/mm/p2m.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) Xen doesn''t clean the type flag when dropping the type count for a page to zero. So, looking at the type flag when nominating a page for paging it''s incorrect. Look at the type count instead. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> Signed-off-by: Adin Scannell <adin@scannell.ca> diff -r decd21170c2a -r 27031a8a4eff xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -761,7 +761,7 @@ int p2m_mem_paging_nominate(struct domai (1 | PGC_allocated) ) goto out; - if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_none ) + if ( (page->u.inuse.type_info & PGT_count_mask) != 0 ) goto out; /* Fix p2m entry */ @@ -823,7 +823,7 @@ int p2m_mem_paging_evict(struct domain * (2 | PGC_allocated) ) goto out_put; - if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_none ) + if ( (page->u.inuse.type_info & PGT_count_mask) != 0 ) goto out_put; /* Decrement guest domain''s ref count of the page */
Andres Lagar-Cavilla
2012-Feb-01 19:51 UTC
[PATCH 3 of 9] x86/mm: Refactor possibly deadlocking get_gfn calls
xen/arch/x86/hvm/emulate.c | 35 +++++++-------- xen/arch/x86/mm/mem_sharing.c | 28 +++++++------ xen/include/asm-x86/p2m.h | 91 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 32 deletions(-) When calling get_gfn multiple times on different gfn''s in the same function, we can easily deadlock if p2m lookups are locked. Thus, refactor these calls to enforce simple deadlock-avoidance rules: - Lowest-numbered domain first - Lowest-numbered gfn first Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavila.org> diff -r 27031a8a4eff -r 3de7e43b130a xen/arch/x86/hvm/emulate.c --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -660,12 +660,13 @@ static int hvmemul_rep_movs( { struct hvm_emulate_ctxt *hvmemul_ctxt container_of(ctxt, struct hvm_emulate_ctxt, ctxt); - unsigned long saddr, daddr, bytes, sgfn, dgfn; + unsigned long saddr, daddr, bytes; paddr_t sgpa, dgpa; uint32_t pfec = PFEC_page_present; - p2m_type_t p2mt; + p2m_type_t sp2mt, dp2mt; int rc, df = !!(ctxt->regs->eflags & X86_EFLAGS_DF); char *buf; + struct two_gfns *tg; rc = hvmemul_virtual_to_linear( src_seg, src_offset, bytes_per_rep, reps, hvm_access_read, @@ -693,26 +694,25 @@ static int hvmemul_rep_movs( if ( rc != X86EMUL_OKAY ) return rc; - /* XXX In a fine-grained p2m locking scenario, we need to sort this - * get_gfn''s, or else we might deadlock */ - sgfn = sgpa >> PAGE_SHIFT; - (void)get_gfn(current->domain, sgfn, &p2mt); - if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) ) + tg = get_two_gfns(current->domain, sgpa >> PAGE_SHIFT, &sp2mt, NULL, NULL, + current->domain, dgpa >> PAGE_SHIFT, &dp2mt, NULL, NULL, + p2m_guest); + if ( !tg ) + return X86EMUL_UNHANDLEABLE; + + if ( !p2m_is_ram(sp2mt) && !p2m_is_grant(sp2mt) ) { rc = hvmemul_do_mmio( sgpa, reps, bytes_per_rep, dgpa, IOREQ_READ, df, NULL); - put_gfn(current->domain, sgfn); + put_two_gfns(&tg); return rc; } - dgfn = dgpa >> PAGE_SHIFT; - (void)get_gfn(current->domain, dgfn, &p2mt); - if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) ) + if ( !p2m_is_ram(dp2mt) && !p2m_is_grant(dp2mt) ) { rc = hvmemul_do_mmio( dgpa, reps, bytes_per_rep, sgpa, IOREQ_WRITE, df, NULL); - put_gfn(current->domain, sgfn); - put_gfn(current->domain, dgfn); + put_two_gfns(&tg); return rc; } @@ -730,8 +730,7 @@ static int hvmemul_rep_movs( */ if ( ((dgpa + bytes_per_rep) > sgpa) && (dgpa < (sgpa + bytes)) ) { - put_gfn(current->domain, sgfn); - put_gfn(current->domain, dgfn); + put_two_gfns(&tg); return X86EMUL_UNHANDLEABLE; } @@ -743,8 +742,7 @@ static int hvmemul_rep_movs( buf = xmalloc_bytes(bytes); if ( buf == NULL ) { - put_gfn(current->domain, sgfn); - put_gfn(current->domain, dgfn); + put_two_gfns(&tg); return X86EMUL_UNHANDLEABLE; } @@ -757,8 +755,7 @@ static int hvmemul_rep_movs( rc = hvm_copy_to_guest_phys(dgpa, buf, bytes); xfree(buf); - put_gfn(current->domain, sgfn); - put_gfn(current->domain, dgfn); + put_two_gfns(&tg); if ( rc == HVMCOPY_gfn_paged_out ) return X86EMUL_RETRY; diff -r 27031a8a4eff -r 3de7e43b130a xen/arch/x86/mm/mem_sharing.c --- a/xen/arch/x86/mm/mem_sharing.c +++ b/xen/arch/x86/mm/mem_sharing.c @@ -718,11 +718,13 @@ int mem_sharing_share_pages(struct domai int ret = -EINVAL; mfn_t smfn, cmfn; p2m_type_t smfn_type, cmfn_type; + struct two_gfns *tg; - /* XXX if sd == cd handle potential deadlock by ordering - * the get_ and put_gfn''s */ - smfn = get_gfn(sd, sgfn, &smfn_type); - cmfn = get_gfn(cd, cgfn, &cmfn_type); + tg = get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn, + cd, cgfn, &cmfn_type, NULL, &cmfn, + p2m_query); + if ( !tg ) + return -ENOMEM; /* This tricky business is to avoid two callers deadlocking if * grabbing pages in opposite client/source order */ @@ -819,8 +821,7 @@ int mem_sharing_share_pages(struct domai ret = 0; err_out: - put_gfn(cd, cgfn); - put_gfn(sd, sgfn); + put_two_gfns(&tg); return ret; } @@ -834,11 +835,13 @@ int mem_sharing_add_to_physmap(struct do struct gfn_info *gfn_info; struct p2m_domain *p2m = p2m_get_hostp2m(cd); p2m_access_t a; - - /* XXX if sd == cd handle potential deadlock by ordering - * the get_ and put_gfn''s */ - smfn = get_gfn_query(sd, sgfn, &smfn_type); - cmfn = get_gfn_type_access(p2m, cgfn, &cmfn_type, &a, p2m_query, NULL); + struct two_gfns *tg; + + tg = get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn, + cd, cgfn, &cmfn_type, &a, &cmfn, + p2m_query); + if ( !tg ) + return -ENOMEM; /* Get the source shared page, check and lock */ ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID; @@ -886,8 +889,7 @@ int mem_sharing_add_to_physmap(struct do err_unlock: mem_sharing_page_unlock(spage); err_out: - put_gfn(cd, cgfn); - put_gfn(sd, sgfn); + put_two_gfns(&tg); return ret; } diff -r 27031a8a4eff -r 3de7e43b130a xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -378,6 +378,97 @@ static inline unsigned long mfn_to_gfn(s return mfn_x(mfn); } +/* Deadlock-avoidance scheme when calling get_gfn on different gfn''s */ +struct two_gfns { + struct domain *first_domain; + unsigned long first_gfn; + struct domain *second_domain; + unsigned long second_gfn; +}; + +#define assign_pointers(dest, source) \ +do { \ + dest ## _mfn = (source ## mfn) ? (source ## mfn) : &__ ## dest ## _mfn; \ + dest ## _a = (source ## a) ? (source ## a) : &__ ## dest ## _a; \ + dest ## _t = (source ## t) ? (source ## t) : &__ ## dest ## _t; \ +} while(0) + +/* Returns mfn, type and access for potential caller consumption, but any + * of those can be NULL */ +static inline struct two_gfns *get_two_gfns(struct domain *rd, unsigned long rgfn, + p2m_type_t *rt, p2m_access_t *ra, mfn_t *rmfn, struct domain *ld, + unsigned long lgfn, p2m_type_t *lt, p2m_access_t *la, mfn_t *lmfn, + p2m_query_t q) +{ + mfn_t *first_mfn, *second_mfn, __first_mfn, __second_mfn; + p2m_access_t *first_a, *second_a, __first_a, __second_a; + p2m_type_t *first_t, *second_t, __first_t, __second_t; + + struct two_gfns *rval = xmalloc(struct two_gfns); + if ( !rval ) + return NULL; + + if ( rd == ld ) + { + rval->first_domain = rval->second_domain = rd; + + /* Sort by gfn */ + if (rgfn <= lgfn) + { + rval->first_gfn = rgfn; + rval->second_gfn = lgfn; + assign_pointers(first, r); + assign_pointers(second, l); + } else { + rval->first_gfn = lgfn; + rval->second_gfn = rgfn; + assign_pointers(first, l); + assign_pointers(second, r); + } + } else { + /* Sort by domain */ + if ( rd->domain_id <= ld->domain_id ) + { + rval->first_domain = rd; + rval->first_gfn = rgfn; + rval->second_domain = ld; + rval->second_gfn = lgfn; + assign_pointers(first, r); + assign_pointers(second, l); + } else { + rval->first_domain = ld; + rval->first_gfn = lgfn; + rval->second_domain = rd; + rval->second_gfn = rgfn; + assign_pointers(first, l); + assign_pointers(second, r); + } + } + + /* Now do the gets */ + *first_mfn = get_gfn_type_access(p2m_get_hostp2m(rval->first_domain), + rval->first_gfn, first_t, first_a, q, NULL); + *second_mfn = get_gfn_type_access(p2m_get_hostp2m(rval->second_domain), + rval->second_gfn, second_t, second_a, q, NULL); + + return rval; +} + +static inline void put_two_gfns(struct two_gfns **arg_ptr) +{ + struct two_gfns *arg; + + if ( !arg_ptr || !(*arg_ptr) ) + return; + + arg = *arg_ptr; + put_gfn(arg->second_domain, arg->second_gfn); + put_gfn(arg->first_domain, arg->first_gfn); + + xfree(arg); + *arg_ptr = NULL; +} + /* Init the datastructures for later use by the p2m code */ int p2m_init(struct domain *d);
Andres Lagar-Cavilla
2012-Feb-01 19:51 UTC
[PATCH 4 of 9] Reorder locks used by shadow code in anticipation of synchronized p2m lookups
xen/arch/x86/mm/shadow/common.c | 3 +++ xen/arch/x86/mm/shadow/multi.c | 18 +++++++++--------- 2 files changed, 12 insertions(+), 9 deletions(-) Currently, mm-locks.h enforces a strict ordering between locks in the mm layer lest there be an inversion in the order locks are taken and thus the risk of deadlock. Once p2m lookups becoming synchronized, get_gfn* calls take the p2m lock, and a new set of inversion arises. Reorder some of the locks in the shadow code so that even in this case no deadlocks happen. After this, synchronized p2m lookups are in principle ready to be enabled in shadow mode. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r 3de7e43b130a -r 8a920bcddd0f xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c +++ b/xen/arch/x86/mm/shadow/common.c @@ -3609,6 +3609,8 @@ int shadow_track_dirty_vram(struct domai || end_pfn >= p2m->max_mapped_pfn) return -EINVAL; + /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */ + p2m_lock(p2m_get_hostp2m(d)); paging_lock(d); if ( dirty_vram && (!nr || @@ -3782,6 +3784,7 @@ out_dirty_vram: out: paging_unlock(d); + p2m_unlock(p2m_get_hostp2m(d)); return rc; } diff -r 3de7e43b130a -r 8a920bcddd0f xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -2444,7 +2444,7 @@ static int validate_gl1e(struct vcpu *v, perfc_incr(shadow_validate_gl1e_calls); gfn = guest_l1e_get_gfn(new_gl1e); - gmfn = get_gfn_query(v->domain, gfn, &p2mt); + gmfn = get_gfn_query_unlocked(v->domain, gfn_x(gfn), &p2mt); l1e_propagate_from_guest(v, new_gl1e, gmfn, &new_sl1e, ft_prefetch, p2mt); result |= shadow_set_l1e(v, sl1p, new_sl1e, p2mt, sl1mfn); @@ -2466,7 +2466,6 @@ static int validate_gl1e(struct vcpu *v, } #endif /* OOS */ - put_gfn(v->domain, gfn_x(gfn)); return result; } @@ -4715,8 +4714,6 @@ static void sh_pagetable_dying(struct vc unsigned long l3gfn; mfn_t l3mfn; - paging_lock(v->domain); - gcr3 = (v->arch.hvm_vcpu.guest_cr[3]); /* fast path: the pagetable belongs to the current context */ if ( gcr3 == gpa ) @@ -4728,8 +4725,11 @@ static void sh_pagetable_dying(struct vc { printk(XENLOG_DEBUG "sh_pagetable_dying: gpa not valid %"PRIpaddr"\n", gpa); - goto out; + goto out_put_gfn; } + + paging_lock(v->domain); + if ( !fast_path ) { gl3pa = sh_map_domain_page(l3mfn); @@ -4770,11 +4770,11 @@ static void sh_pagetable_dying(struct vc v->arch.paging.shadow.pagetable_dying = 1; -out: if ( !fast_path ) unmap_domain_page(gl3pa); + paging_unlock(v->domain); +out_put_gfn: put_gfn(v->domain, l3gfn); - paging_unlock(v->domain); } #else static void sh_pagetable_dying(struct vcpu *v, paddr_t gpa) @@ -4782,15 +4782,14 @@ static void sh_pagetable_dying(struct vc mfn_t smfn, gmfn; p2m_type_t p2mt; + gmfn = get_gfn_query(v->domain, _gfn(gpa >> PAGE_SHIFT), &p2mt); paging_lock(v->domain); - gmfn = get_gfn_query(v->domain, _gfn(gpa >> PAGE_SHIFT), &p2mt); #if GUEST_PAGING_LEVELS == 2 smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l2_32_shadow); #else smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l4_64_shadow); #endif - put_gfn(v->domain, gpa >> PAGE_SHIFT); if ( mfn_valid(smfn) ) { @@ -4808,6 +4807,7 @@ static void sh_pagetable_dying(struct vc v->arch.paging.shadow.pagetable_dying = 1; paging_unlock(v->domain); + put_gfn(v->domain, gpa >> PAGE_SHIFT); } #endif
Andres Lagar-Cavilla
2012-Feb-01 19:51 UTC
[PATCH 5 of 9] x86/mm: When removing/adding a page from/to the physmap, keep in mind it could be shared
xen/arch/x86/mm/p2m.c | 21 ++++++++++++++++++++- 1 files changed, 20 insertions(+), 1 deletions(-) When removing the m2p mapping it is unconditionally set to invalid, which breaks sharing. When adding to the physmap, if the previous holder of that entry is a shared page, we unshare to default to normal case handling. And, we cannot add a shared page directly to the physmap. Proper interfaces must be employed, otherwise book-keeping goes awry. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r 8a920bcddd0f -r 1c61573d1765 xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -419,7 +419,7 @@ p2m_remove_page(struct p2m_domain *p2m, for ( i = 0; i < (1UL << page_order); i++ ) { mfn_return = p2m->get_entry(p2m, gfn + i, &t, &a, p2m_query, NULL); - if ( !p2m_is_grant(t) ) + if ( !p2m_is_grant(t) && !p2m_is_shared(t) ) set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY); ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) ); } @@ -481,6 +481,17 @@ guest_physmap_add_entry(struct domain *d for ( i = 0; i < (1UL << page_order); i++ ) { omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, p2m_query, NULL); + if ( p2m_is_shared(ot) ) + { + /* Do an unshare to cleanly take care of all corner + * cases. */ + int rc; + rc = mem_sharing_unshare_page(p2m->domain, gfn + i, 0); + if ( rc ) + return rc; + omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, p2m_query, NULL); + ASSERT(!p2m_is_shared(ot)); + } if ( p2m_is_grant(ot) ) { /* Really shouldn''t be unmapping grant maps this way */ @@ -504,6 +515,14 @@ guest_physmap_add_entry(struct domain *d /* Then, look for m->p mappings for this range and deal with them */ for ( i = 0; i < (1UL << page_order); i++ ) { + if ( page_get_owner(mfn_to_page(_mfn(mfn + i))) == dom_cow ) + { + /* This is no way to add a shared page to your physmap! */ + gdprintk(XENLOG_ERR, "Adding shared mfn %lx directly to dom %hu " + "physmap not allowed.\n", mfn+i, d->domain_id); + p2m_unlock(p2m); + return -EINVAL; + } if ( page_get_owner(mfn_to_page(_mfn(mfn + i))) != d ) continue; ogfn = mfn_to_gfn(d, _mfn(mfn+i));
xen/common/memory.c | 12 +++++++----- 1 files changed, 7 insertions(+), 5 deletions(-) Never mind that ballooning a shared page makes no sense. We still fix it because it may be exercised. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r 1c61573d1765 -r 75dec1a0ba2d xen/common/memory.c --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -185,12 +185,14 @@ int guest_remove_page(struct domain *d, #ifdef CONFIG_X86 /* If gmfn is shared, just drop the guest reference (which may or may not * free the page) */ - if(p2m_is_shared(p2mt)) + if ( p2m_is_shared(p2mt) ) { - put_page_and_type(page); - guest_physmap_remove_page(d, gmfn, mfn, 0); - put_gfn(d, gmfn); - return 1; + /* Unshare the page, bail out on error. We unshare because + * we might be the only one using this shared page, and we + * need to trigger proper cleanup. Once done, this is + * like any other page. */ + if ( mem_sharing_unshare_page(d, gmfn, 0) ) + return 0; } #endif /* CONFIG_X86 */
xen/arch/x86/mm/mem_sharing.c | 6 +++++- xen/arch/x86/mm/p2m.c | 12 +++++++++++- xen/common/memory.c | 2 +- xen/include/asm-x86/p2m.h | 6 ++++-- 4 files changed, 21 insertions(+), 5 deletions(-) There are several corner cases in which a page is paged back in, not by paging, and the stats are not properly updated. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r 75dec1a0ba2d -r 244804e8a002 xen/arch/x86/mm/mem_sharing.c --- a/xen/arch/x86/mm/mem_sharing.c +++ b/xen/arch/x86/mm/mem_sharing.c @@ -881,8 +881,12 @@ int mem_sharing_add_to_physmap(struct do ret = -ENOENT; mem_sharing_gfn_destroy(cd, gfn_info); put_page_and_type(spage); - } else + } else { ret = 0; + /* There is a chance we''re plugging a hole where a paged out page was */ + if ( p2m_is_paging(cmfn_type) && (cmfn_type != p2m_ram_paging_out) ) + atomic_dec(&cd->paged_pages); + } atomic_inc(&nr_saved_mfns); diff -r 75dec1a0ba2d -r 244804e8a002 xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -510,6 +510,11 @@ guest_physmap_add_entry(struct domain *d /* Count how man PoD entries we''ll be replacing if successful */ pod_count++; } + else if ( p2m_is_paging(ot) && (ot != p2m_ram_paging_out) ) + { + /* We''re plugging a hole in the physmap where a paged out page was */ + atomic_dec(&d->paged_pages); + } } /* Then, look for m->p mappings for this range and deal with them */ @@ -878,7 +883,8 @@ int p2m_mem_paging_evict(struct domain * * released by the guest. The pager is supposed to drop its reference of the * gfn. */ -void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn) +void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn, + p2m_type_t p2mt) { mem_event_request_t req; @@ -897,6 +903,10 @@ void p2m_mem_paging_drop_page(struct dom req.flags = MEM_EVENT_FLAG_DROP_PAGE; mem_event_put_request(d, &d->mem_event->paging, &req); + + /* Update stats unless the page hasn''t yet been evicted */ + if ( p2mt != p2m_ram_paging_out ) + atomic_dec(&d->paged_pages); } /** diff -r 75dec1a0ba2d -r 244804e8a002 xen/common/memory.c --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -166,7 +166,7 @@ int guest_remove_page(struct domain *d, if ( unlikely(p2m_is_paging(p2mt)) ) { guest_physmap_remove_page(d, gmfn, mfn, 0); - p2m_mem_paging_drop_page(d, gmfn); + p2m_mem_paging_drop_page(d, gmfn, p2mt); put_gfn(d, gmfn); return 1; } diff -r 75dec1a0ba2d -r 244804e8a002 xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -566,7 +566,8 @@ int p2m_mem_paging_nominate(struct domai /* Evict a frame */ int p2m_mem_paging_evict(struct domain *d, unsigned long gfn); /* Tell xenpaging to drop a paged out frame */ -void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn); +void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn, + p2m_type_t p2mt); /* Start populating a paged out frame */ void p2m_mem_paging_populate(struct domain *d, unsigned long gfn); /* Prepare the p2m for paging a frame in */ @@ -574,7 +575,8 @@ int p2m_mem_paging_prep(struct domain *d /* Resume normal operation (in case a domain was paused) */ void p2m_mem_paging_resume(struct domain *d); #else -static inline void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn) +static inline void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn, + p2m_type_t p2mt) { } static inline void p2m_mem_paging_populate(struct domain *d, unsigned long gfn) { }
Andres Lagar-Cavilla
2012-Feb-01 19:52 UTC
[PATCH 8 of 9] x86/mm: Make sharing ASSERT check more accurate
xen/arch/x86/mm/mem_sharing.c | 7 ++++++- 1 files changed, 6 insertions(+), 1 deletions(-) Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r 244804e8a002 -r 12f7da67cefe xen/arch/x86/mm/mem_sharing.c --- a/xen/arch/x86/mm/mem_sharing.c +++ b/xen/arch/x86/mm/mem_sharing.c @@ -197,7 +197,12 @@ static struct page_info* mem_sharing_loo struct page_info* page = mfn_to_page(_mfn(mfn)); if ( page_get_owner(page) == dom_cow ) { - ASSERT(page->u.inuse.type_info & PGT_type_mask); + /* Count has to be at least two, because we''re called + * with the mfn locked (1) and this is supposed to be + * a shared page (1). */ + ASSERT( (page->u.inuse.type_info & + (PGT_shared_page | PGT_count_mask)) >+ (PGT_shared_page | 2) ); ASSERT(get_gpfn_from_mfn(mfn) == SHARED_M2P_ENTRY); return page; }
Andres Lagar-Cavilla
2012-Feb-01 19:52 UTC
[PATCH 9 of 9] x86/mm: Make debug_{gfn, mfn, gref} calls to sharing more useful and correct
xen/arch/x86/mm/mem_sharing.c | 224 +++++++++++++++++++++-------------------- 1 files changed, 115 insertions(+), 109 deletions(-) Have them used locked accesors to the gfn and the underlying shared mfn. Have them return the number of shared refs to the underlying mfn. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r 12f7da67cefe -r 9a55109e4d7e xen/arch/x86/mm/mem_sharing.c --- a/xen/arch/x86/mm/mem_sharing.c +++ b/xen/arch/x86/mm/mem_sharing.c @@ -45,13 +45,13 @@ typedef struct pg_lock_data { DEFINE_PER_CPU(pg_lock_data_t, __pld); +#define MEM_SHARING_DEBUG(_f, _a...) \ + debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__, ##_a) + #if MEM_SHARING_AUDIT static void mem_sharing_audit(void); -#define MEM_SHARING_DEBUG(_f, _a...) \ - debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__, ##_a) - static struct list_head shr_audit_list; static spinlock_t shr_audit_lock; DEFINE_RCU_READ_LOCK(shr_audit_read_lock); @@ -400,111 +400,6 @@ int mem_sharing_sharing_resume(struct do return 0; } -int mem_sharing_debug_mfn(unsigned long mfn) -{ - struct page_info *page; - - if ( !mfn_valid(_mfn(mfn)) ) - { - gdprintk(XENLOG_ERR, "Invalid MFN=%lx\n", mfn); - return -1; - } - page = mfn_to_page(_mfn(mfn)); - - gdprintk(XENLOG_DEBUG, - "Debug page: MFN=%lx is ci=%lx, ti=%lx, owner_id=%d\n", - mfn_x(page_to_mfn(page)), - page->count_info, - page->u.inuse.type_info, - page_get_owner(page)->domain_id); - - return 0; -} - -int mem_sharing_debug_gfn(struct domain *d, unsigned long gfn) -{ - p2m_type_t p2mt; - mfn_t mfn; - - mfn = get_gfn_query_unlocked(d, gfn, &p2mt); - - gdprintk(XENLOG_DEBUG, "Debug for domain=%d, gfn=%lx, ", - d->domain_id, - gfn); - return mem_sharing_debug_mfn(mfn_x(mfn)); -} - -#define SHGNT_PER_PAGE_V1 (PAGE_SIZE / sizeof(grant_entry_v1_t)) -#define shared_entry_v1(t, e) \ - ((t)->shared_v1[(e)/SHGNT_PER_PAGE_V1][(e)%SHGNT_PER_PAGE_V1]) -#define SHGNT_PER_PAGE_V2 (PAGE_SIZE / sizeof(grant_entry_v2_t)) -#define shared_entry_v2(t, e) \ - ((t)->shared_v2[(e)/SHGNT_PER_PAGE_V2][(e)%SHGNT_PER_PAGE_V2]) -#define STGNT_PER_PAGE (PAGE_SIZE / sizeof(grant_status_t)) -#define status_entry(t, e) \ - ((t)->status[(e)/STGNT_PER_PAGE][(e)%STGNT_PER_PAGE]) - -static grant_entry_header_t * -shared_entry_header(struct grant_table *t, grant_ref_t ref) -{ - ASSERT (t->gt_version != 0); - if ( t->gt_version == 1 ) - return (grant_entry_header_t*)&shared_entry_v1(t, ref); - else - return &shared_entry_v2(t, ref).hdr; -} - -static int mem_sharing_gref_to_gfn(struct domain *d, - grant_ref_t ref, - unsigned long *gfn) -{ - if ( d->grant_table->gt_version < 1 ) - return -1; - - if ( d->grant_table->gt_version == 1 ) - { - grant_entry_v1_t *sha1; - sha1 = &shared_entry_v1(d->grant_table, ref); - *gfn = sha1->frame; - } - else - { - grant_entry_v2_t *sha2; - sha2 = &shared_entry_v2(d->grant_table, ref); - *gfn = sha2->full_page.frame; - } - - return 0; -} - - -int mem_sharing_debug_gref(struct domain *d, grant_ref_t ref) -{ - grant_entry_header_t *shah; - uint16_t status; - unsigned long gfn; - - if ( d->grant_table->gt_version < 1 ) - { - gdprintk(XENLOG_ERR, - "Asked to debug [dom=%d,gref=%d], but not yet inited.\n", - d->domain_id, ref); - return -1; - } - (void)mem_sharing_gref_to_gfn(d, ref, &gfn); - shah = shared_entry_header(d->grant_table, ref); - if ( d->grant_table->gt_version == 1 ) - status = shah->flags; - else - status = status_entry(d->grant_table, ref); - - gdprintk(XENLOG_DEBUG, - "==> Grant [dom=%d,ref=%d], status=%x. ", - d->domain_id, ref, status); - - return mem_sharing_debug_gfn(d, gfn); -} - /* Functions that change a page''s type and ownership */ static int page_make_sharable(struct domain *d, struct page_info *page, @@ -606,6 +501,117 @@ static inline struct page_info *__grab_s return pg; } +int mem_sharing_debug_mfn(mfn_t mfn) +{ + struct page_info *page; + int num_refs; + + if ( (page = __grab_shared_page(mfn)) == NULL) + { + gdprintk(XENLOG_ERR, "Invalid MFN=%lx\n", mfn_x(mfn)); + return -1; + } + + MEM_SHARING_DEBUG( + "Debug page: MFN=%lx is ci=%lx, ti=%lx, owner_id=%d\n", + mfn_x(page_to_mfn(page)), + page->count_info, + page->u.inuse.type_info, + page_get_owner(page)->domain_id); + + /* -1 because the page is locked and that''s an additional type ref */ + num_refs = ((int) (page->u.inuse.type_info & PGT_count_mask)) - 1; + mem_sharing_page_unlock(page); + return num_refs; +} + +int mem_sharing_debug_gfn(struct domain *d, unsigned long gfn) +{ + p2m_type_t p2mt; + mfn_t mfn; + int num_refs; + + mfn = get_gfn_query(d, gfn, &p2mt); + + MEM_SHARING_DEBUG("Debug for domain=%d, gfn=%lx, ", + d->domain_id, + gfn); + num_refs = mem_sharing_debug_mfn(mfn); + put_gfn(d, gfn); + return num_refs; +} + +#define SHGNT_PER_PAGE_V1 (PAGE_SIZE / sizeof(grant_entry_v1_t)) +#define shared_entry_v1(t, e) \ + ((t)->shared_v1[(e)/SHGNT_PER_PAGE_V1][(e)%SHGNT_PER_PAGE_V1]) +#define SHGNT_PER_PAGE_V2 (PAGE_SIZE / sizeof(grant_entry_v2_t)) +#define shared_entry_v2(t, e) \ + ((t)->shared_v2[(e)/SHGNT_PER_PAGE_V2][(e)%SHGNT_PER_PAGE_V2]) +#define STGNT_PER_PAGE (PAGE_SIZE / sizeof(grant_status_t)) +#define status_entry(t, e) \ + ((t)->status[(e)/STGNT_PER_PAGE][(e)%STGNT_PER_PAGE]) + +static grant_entry_header_t * +shared_entry_header(struct grant_table *t, grant_ref_t ref) +{ + ASSERT (t->gt_version != 0); + if ( t->gt_version == 1 ) + return (grant_entry_header_t*)&shared_entry_v1(t, ref); + else + return &shared_entry_v2(t, ref).hdr; +} + +static int mem_sharing_gref_to_gfn(struct domain *d, + grant_ref_t ref, + unsigned long *gfn) +{ + if ( d->grant_table->gt_version < 1 ) + return -1; + + if ( d->grant_table->gt_version == 1 ) + { + grant_entry_v1_t *sha1; + sha1 = &shared_entry_v1(d->grant_table, ref); + *gfn = sha1->frame; + } + else + { + grant_entry_v2_t *sha2; + sha2 = &shared_entry_v2(d->grant_table, ref); + *gfn = sha2->full_page.frame; + } + + return 0; +} + + +int mem_sharing_debug_gref(struct domain *d, grant_ref_t ref) +{ + grant_entry_header_t *shah; + uint16_t status; + unsigned long gfn; + + if ( d->grant_table->gt_version < 1 ) + { + MEM_SHARING_DEBUG( + "Asked to debug [dom=%d,gref=%d], but not yet inited.\n", + d->domain_id, ref); + return -1; + } + (void)mem_sharing_gref_to_gfn(d, ref, &gfn); + shah = shared_entry_header(d->grant_table, ref); + if ( d->grant_table->gt_version == 1 ) + status = shah->flags; + else + status = status_entry(d->grant_table, ref); + + MEM_SHARING_DEBUG( + "==> Grant [dom=%d,ref=%d], status=%x. ", + d->domain_id, ref, status); + + return mem_sharing_debug_gfn(d, gfn); +} + int mem_sharing_nominate_page(struct domain *d, unsigned long gfn, int expected_refcnt, @@ -1169,7 +1175,7 @@ int mem_sharing_domctl(struct domain *d, case XEN_DOMCTL_MEM_EVENT_OP_SHARING_DEBUG_MFN: { unsigned long mfn = mec->u.debug.u.mfn; - rc = mem_sharing_debug_mfn(mfn); + rc = mem_sharing_debug_mfn(_mfn(mfn)); } break;
Tim Deegan
2012-Feb-02 12:32 UTC
Re: [PATCH 0 of 9] x86/mm: Fixes to sharing, paging and p2m
At 14:51 -0500 on 01 Feb (1328107912), Andres Lagar-Cavilla wrote:> This patch series aggregates a number of fixes to different areas of mm: > > Sharing: > - Make sharing play nice with balloon > - Make physmap manipulations deall correctly with shared pages > - Make sharing debug calls use locked accessors and return useful information > > Paging: > - Eliminate a needless state in the paging state machine > - Fix stats/accounting > - Fix page type check when nominating or evicting a page > > P2M: > This changes clear hurdles in advance of a fully-synchronized p2m > - Eliminate possibility of deadlock in nested lookups > - Reorder some locks taken by the sharing subsystem > > Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> > Signed-off-by: Adin Scannell <adin@scannell.ca>- #3 and #5 I''ll comment on separately - #6 I applied, but updated to remove the outdated comment just above. - #8 I applied, but removed the type from the check, because it makes no sense with the >=. - the others I appled as-is. Cheers, Tim.
Tim Deegan
2012-Feb-02 12:39 UTC
Re: [PATCH 3 of 9] x86/mm: Refactor possibly deadlocking get_gfn calls
At 14:51 -0500 on 01 Feb (1328107915), Andres Lagar-Cavilla wrote:> xen/arch/x86/hvm/emulate.c | 35 +++++++-------- > xen/arch/x86/mm/mem_sharing.c | 28 +++++++------ > xen/include/asm-x86/p2m.h | 91 +++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 122 insertions(+), 32 deletions(-) > > > When calling get_gfn multiple times on different gfn''s in the same function, we > can easily deadlock if p2m lookups are locked. Thus, refactor these calls to > enforce simple deadlock-avoidance rules: > - Lowest-numbered domain first > - Lowest-numbered gfn firstThis is a good idea, and I like the get_two_gfns() abstraction, but: - I think the two_gfns struct should proabbly just live on the stack instead of malloc()ing it up every time. It''s not very big. - the implementation of get_two_gfns() seems to be very complex; I''m sure it could be simplified. At the very least, you could avoid a bit of duplication by just deciding once which order to do the gets in and then running all the setu and get code once. Cheers, Tim.> Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavila.org> > > diff -r 27031a8a4eff -r 3de7e43b130a xen/arch/x86/hvm/emulate.c > --- a/xen/arch/x86/hvm/emulate.c > +++ b/xen/arch/x86/hvm/emulate.c > @@ -660,12 +660,13 @@ static int hvmemul_rep_movs( > { > struct hvm_emulate_ctxt *hvmemul_ctxt > container_of(ctxt, struct hvm_emulate_ctxt, ctxt); > - unsigned long saddr, daddr, bytes, sgfn, dgfn; > + unsigned long saddr, daddr, bytes; > paddr_t sgpa, dgpa; > uint32_t pfec = PFEC_page_present; > - p2m_type_t p2mt; > + p2m_type_t sp2mt, dp2mt; > int rc, df = !!(ctxt->regs->eflags & X86_EFLAGS_DF); > char *buf; > + struct two_gfns *tg; > > rc = hvmemul_virtual_to_linear( > src_seg, src_offset, bytes_per_rep, reps, hvm_access_read, > @@ -693,26 +694,25 @@ static int hvmemul_rep_movs( > if ( rc != X86EMUL_OKAY ) > return rc; > > - /* XXX In a fine-grained p2m locking scenario, we need to sort this > - * get_gfn''s, or else we might deadlock */ > - sgfn = sgpa >> PAGE_SHIFT; > - (void)get_gfn(current->domain, sgfn, &p2mt); > - if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) ) > + tg = get_two_gfns(current->domain, sgpa >> PAGE_SHIFT, &sp2mt, NULL, NULL, > + current->domain, dgpa >> PAGE_SHIFT, &dp2mt, NULL, NULL, > + p2m_guest); > + if ( !tg ) > + return X86EMUL_UNHANDLEABLE; > + > + if ( !p2m_is_ram(sp2mt) && !p2m_is_grant(sp2mt) ) > { > rc = hvmemul_do_mmio( > sgpa, reps, bytes_per_rep, dgpa, IOREQ_READ, df, NULL); > - put_gfn(current->domain, sgfn); > + put_two_gfns(&tg); > return rc; > } > > - dgfn = dgpa >> PAGE_SHIFT; > - (void)get_gfn(current->domain, dgfn, &p2mt); > - if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) ) > + if ( !p2m_is_ram(dp2mt) && !p2m_is_grant(dp2mt) ) > { > rc = hvmemul_do_mmio( > dgpa, reps, bytes_per_rep, sgpa, IOREQ_WRITE, df, NULL); > - put_gfn(current->domain, sgfn); > - put_gfn(current->domain, dgfn); > + put_two_gfns(&tg); > return rc; > } > > @@ -730,8 +730,7 @@ static int hvmemul_rep_movs( > */ > if ( ((dgpa + bytes_per_rep) > sgpa) && (dgpa < (sgpa + bytes)) ) > { > - put_gfn(current->domain, sgfn); > - put_gfn(current->domain, dgfn); > + put_two_gfns(&tg); > return X86EMUL_UNHANDLEABLE; > } > > @@ -743,8 +742,7 @@ static int hvmemul_rep_movs( > buf = xmalloc_bytes(bytes); > if ( buf == NULL ) > { > - put_gfn(current->domain, sgfn); > - put_gfn(current->domain, dgfn); > + put_two_gfns(&tg); > return X86EMUL_UNHANDLEABLE; > } > > @@ -757,8 +755,7 @@ static int hvmemul_rep_movs( > rc = hvm_copy_to_guest_phys(dgpa, buf, bytes); > > xfree(buf); > - put_gfn(current->domain, sgfn); > - put_gfn(current->domain, dgfn); > + put_two_gfns(&tg); > > if ( rc == HVMCOPY_gfn_paged_out ) > return X86EMUL_RETRY; > diff -r 27031a8a4eff -r 3de7e43b130a xen/arch/x86/mm/mem_sharing.c > --- a/xen/arch/x86/mm/mem_sharing.c > +++ b/xen/arch/x86/mm/mem_sharing.c > @@ -718,11 +718,13 @@ int mem_sharing_share_pages(struct domai > int ret = -EINVAL; > mfn_t smfn, cmfn; > p2m_type_t smfn_type, cmfn_type; > + struct two_gfns *tg; > > - /* XXX if sd == cd handle potential deadlock by ordering > - * the get_ and put_gfn''s */ > - smfn = get_gfn(sd, sgfn, &smfn_type); > - cmfn = get_gfn(cd, cgfn, &cmfn_type); > + tg = get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn, > + cd, cgfn, &cmfn_type, NULL, &cmfn, > + p2m_query); > + if ( !tg ) > + return -ENOMEM; > > /* This tricky business is to avoid two callers deadlocking if > * grabbing pages in opposite client/source order */ > @@ -819,8 +821,7 @@ int mem_sharing_share_pages(struct domai > ret = 0; > > err_out: > - put_gfn(cd, cgfn); > - put_gfn(sd, sgfn); > + put_two_gfns(&tg); > return ret; > } > > @@ -834,11 +835,13 @@ int mem_sharing_add_to_physmap(struct do > struct gfn_info *gfn_info; > struct p2m_domain *p2m = p2m_get_hostp2m(cd); > p2m_access_t a; > - > - /* XXX if sd == cd handle potential deadlock by ordering > - * the get_ and put_gfn''s */ > - smfn = get_gfn_query(sd, sgfn, &smfn_type); > - cmfn = get_gfn_type_access(p2m, cgfn, &cmfn_type, &a, p2m_query, NULL); > + struct two_gfns *tg; > + > + tg = get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn, > + cd, cgfn, &cmfn_type, &a, &cmfn, > + p2m_query); > + if ( !tg ) > + return -ENOMEM; > > /* Get the source shared page, check and lock */ > ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID; > @@ -886,8 +889,7 @@ int mem_sharing_add_to_physmap(struct do > err_unlock: > mem_sharing_page_unlock(spage); > err_out: > - put_gfn(cd, cgfn); > - put_gfn(sd, sgfn); > + put_two_gfns(&tg); > return ret; > } > > diff -r 27031a8a4eff -r 3de7e43b130a xen/include/asm-x86/p2m.h > --- a/xen/include/asm-x86/p2m.h > +++ b/xen/include/asm-x86/p2m.h > @@ -378,6 +378,97 @@ static inline unsigned long mfn_to_gfn(s > return mfn_x(mfn); > } > > +/* Deadlock-avoidance scheme when calling get_gfn on different gfn''s */ > +struct two_gfns { > + struct domain *first_domain; > + unsigned long first_gfn; > + struct domain *second_domain; > + unsigned long second_gfn; > +}; > + > +#define assign_pointers(dest, source) \ > +do { \ > + dest ## _mfn = (source ## mfn) ? (source ## mfn) : &__ ## dest ## _mfn; \ > + dest ## _a = (source ## a) ? (source ## a) : &__ ## dest ## _a; \ > + dest ## _t = (source ## t) ? (source ## t) : &__ ## dest ## _t; \ > +} while(0) > + > +/* Returns mfn, type and access for potential caller consumption, but any > + * of those can be NULL */ > +static inline struct two_gfns *get_two_gfns(struct domain *rd, unsigned long rgfn, > + p2m_type_t *rt, p2m_access_t *ra, mfn_t *rmfn, struct domain *ld, > + unsigned long lgfn, p2m_type_t *lt, p2m_access_t *la, mfn_t *lmfn, > + p2m_query_t q) > +{ > + mfn_t *first_mfn, *second_mfn, __first_mfn, __second_mfn; > + p2m_access_t *first_a, *second_a, __first_a, __second_a; > + p2m_type_t *first_t, *second_t, __first_t, __second_t; > + > + struct two_gfns *rval = xmalloc(struct two_gfns); > + if ( !rval ) > + return NULL; > + > + if ( rd == ld ) > + { > + rval->first_domain = rval->second_domain = rd; > + > + /* Sort by gfn */ > + if (rgfn <= lgfn) > + { > + rval->first_gfn = rgfn; > + rval->second_gfn = lgfn; > + assign_pointers(first, r); > + assign_pointers(second, l); > + } else { > + rval->first_gfn = lgfn; > + rval->second_gfn = rgfn; > + assign_pointers(first, l); > + assign_pointers(second, r); > + } > + } else { > + /* Sort by domain */ > + if ( rd->domain_id <= ld->domain_id ) > + { > + rval->first_domain = rd; > + rval->first_gfn = rgfn; > + rval->second_domain = ld; > + rval->second_gfn = lgfn; > + assign_pointers(first, r); > + assign_pointers(second, l); > + } else { > + rval->first_domain = ld; > + rval->first_gfn = lgfn; > + rval->second_domain = rd; > + rval->second_gfn = rgfn; > + assign_pointers(first, l); > + assign_pointers(second, r); > + } > + } > + > + /* Now do the gets */ > + *first_mfn = get_gfn_type_access(p2m_get_hostp2m(rval->first_domain), > + rval->first_gfn, first_t, first_a, q, NULL); > + *second_mfn = get_gfn_type_access(p2m_get_hostp2m(rval->second_domain), > + rval->second_gfn, second_t, second_a, q, NULL); > + > + return rval; > +} > + > +static inline void put_two_gfns(struct two_gfns **arg_ptr) > +{ > + struct two_gfns *arg; > + > + if ( !arg_ptr || !(*arg_ptr) ) > + return; > + > + arg = *arg_ptr; > + put_gfn(arg->second_domain, arg->second_gfn); > + put_gfn(arg->first_domain, arg->first_gfn); > + > + xfree(arg); > + *arg_ptr = NULL; > +} > + > /* Init the datastructures for later use by the p2m code */ > int p2m_init(struct domain *d); > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel
Tim Deegan
2012-Feb-02 12:41 UTC
Re: [PATCH 5 of 9] x86/mm: When removing/adding a page from/to the physmap, keep in mind it could be shared
At 14:51 -0500 on 01 Feb (1328107917), Andres Lagar-Cavilla wrote:> xen/arch/x86/mm/p2m.c | 21 ++++++++++++++++++++- > 1 files changed, 20 insertions(+), 1 deletions(-) > > > When removing the m2p mapping it is unconditionally set to invalid, which > breaks sharing. > > When adding to the physmap, if the previous holder of that entry is a shared > page, we unshare to default to normal case handling. > > And, we cannot add a shared page directly to the physmap. Proper interfaces > must be employed, otherwise book-keeping goes awry. > > Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> > > diff -r 8a920bcddd0f -r 1c61573d1765 xen/arch/x86/mm/p2m.c > --- a/xen/arch/x86/mm/p2m.c > +++ b/xen/arch/x86/mm/p2m.c > @@ -419,7 +419,7 @@ p2m_remove_page(struct p2m_domain *p2m, > for ( i = 0; i < (1UL << page_order); i++ ) > { > mfn_return = p2m->get_entry(p2m, gfn + i, &t, &a, p2m_query, NULL); > - if ( !p2m_is_grant(t) ) > + if ( !p2m_is_grant(t) && !p2m_is_shared(t) ) > set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY); > ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) ); > } > @@ -481,6 +481,17 @@ guest_physmap_add_entry(struct domain *d > for ( i = 0; i < (1UL << page_order); i++ ) > { > omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, p2m_query, NULL); > + if ( p2m_is_shared(ot) ) > + { > + /* Do an unshare to cleanly take care of all corner > + * cases. */ > + int rc; > + rc = mem_sharing_unshare_page(p2m->domain, gfn + i, 0); > + if ( rc ) > + return rc;You''re holding the p2m lock here! Also, I don''t think you can call mem_sharing_unshare_page() with that held - wasn''t that the reason for cset f6c33cfe7333 ? Tim.
Andres Lagar-Cavilla
2012-Feb-02 13:44 UTC
Re: [PATCH 3 of 9] x86/mm: Refactor possibly deadlocking get_gfn calls
> At 14:51 -0500 on 01 Feb (1328107915), Andres Lagar-Cavilla wrote: >> xen/arch/x86/hvm/emulate.c | 35 +++++++-------- >> xen/arch/x86/mm/mem_sharing.c | 28 +++++++------ >> xen/include/asm-x86/p2m.h | 91 >> +++++++++++++++++++++++++++++++++++++++++++ >> 3 files changed, 122 insertions(+), 32 deletions(-) >> >> >> When calling get_gfn multiple times on different gfn''s in the same >> function, we >> can easily deadlock if p2m lookups are locked. Thus, refactor these >> calls to >> enforce simple deadlock-avoidance rules: >> - Lowest-numbered domain first >> - Lowest-numbered gfn first > > This is a good idea, and I like the get_two_gfns() abstraction, but: > - I think the two_gfns struct should proabbly just live on the stack > instead of malloc()ing it up every time. It''s not very big. > - the implementation of get_two_gfns() seems to be very complex; I''m > sure it could be simplified. At the very least, you could avoid a > bit of duplication by just deciding once which order to do the gets > in and then running all the setu and get code once.Reasonable. Will do both, repost later. Thanks! Andres> > Cheers, > > Tim. > >> Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavila.org> >> >> diff -r 27031a8a4eff -r 3de7e43b130a xen/arch/x86/hvm/emulate.c >> --- a/xen/arch/x86/hvm/emulate.c >> +++ b/xen/arch/x86/hvm/emulate.c >> @@ -660,12 +660,13 @@ static int hvmemul_rep_movs( >> { >> struct hvm_emulate_ctxt *hvmemul_ctxt >> container_of(ctxt, struct hvm_emulate_ctxt, ctxt); >> - unsigned long saddr, daddr, bytes, sgfn, dgfn; >> + unsigned long saddr, daddr, bytes; >> paddr_t sgpa, dgpa; >> uint32_t pfec = PFEC_page_present; >> - p2m_type_t p2mt; >> + p2m_type_t sp2mt, dp2mt; >> int rc, df = !!(ctxt->regs->eflags & X86_EFLAGS_DF); >> char *buf; >> + struct two_gfns *tg; >> >> rc = hvmemul_virtual_to_linear( >> src_seg, src_offset, bytes_per_rep, reps, hvm_access_read, >> @@ -693,26 +694,25 @@ static int hvmemul_rep_movs( >> if ( rc != X86EMUL_OKAY ) >> return rc; >> >> - /* XXX In a fine-grained p2m locking scenario, we need to sort this >> - * get_gfn''s, or else we might deadlock */ >> - sgfn = sgpa >> PAGE_SHIFT; >> - (void)get_gfn(current->domain, sgfn, &p2mt); >> - if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) ) >> + tg = get_two_gfns(current->domain, sgpa >> PAGE_SHIFT, &sp2mt, >> NULL, NULL, >> + current->domain, dgpa >> PAGE_SHIFT, &dp2mt, >> NULL, NULL, >> + p2m_guest); >> + if ( !tg ) >> + return X86EMUL_UNHANDLEABLE; >> + >> + if ( !p2m_is_ram(sp2mt) && !p2m_is_grant(sp2mt) ) >> { >> rc = hvmemul_do_mmio( >> sgpa, reps, bytes_per_rep, dgpa, IOREQ_READ, df, NULL); >> - put_gfn(current->domain, sgfn); >> + put_two_gfns(&tg); >> return rc; >> } >> >> - dgfn = dgpa >> PAGE_SHIFT; >> - (void)get_gfn(current->domain, dgfn, &p2mt); >> - if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) ) >> + if ( !p2m_is_ram(dp2mt) && !p2m_is_grant(dp2mt) ) >> { >> rc = hvmemul_do_mmio( >> dgpa, reps, bytes_per_rep, sgpa, IOREQ_WRITE, df, NULL); >> - put_gfn(current->domain, sgfn); >> - put_gfn(current->domain, dgfn); >> + put_two_gfns(&tg); >> return rc; >> } >> >> @@ -730,8 +730,7 @@ static int hvmemul_rep_movs( >> */ >> if ( ((dgpa + bytes_per_rep) > sgpa) && (dgpa < (sgpa + bytes)) ) >> { >> - put_gfn(current->domain, sgfn); >> - put_gfn(current->domain, dgfn); >> + put_two_gfns(&tg); >> return X86EMUL_UNHANDLEABLE; >> } >> >> @@ -743,8 +742,7 @@ static int hvmemul_rep_movs( >> buf = xmalloc_bytes(bytes); >> if ( buf == NULL ) >> { >> - put_gfn(current->domain, sgfn); >> - put_gfn(current->domain, dgfn); >> + put_two_gfns(&tg); >> return X86EMUL_UNHANDLEABLE; >> } >> >> @@ -757,8 +755,7 @@ static int hvmemul_rep_movs( >> rc = hvm_copy_to_guest_phys(dgpa, buf, bytes); >> >> xfree(buf); >> - put_gfn(current->domain, sgfn); >> - put_gfn(current->domain, dgfn); >> + put_two_gfns(&tg); >> >> if ( rc == HVMCOPY_gfn_paged_out ) >> return X86EMUL_RETRY; >> diff -r 27031a8a4eff -r 3de7e43b130a xen/arch/x86/mm/mem_sharing.c >> --- a/xen/arch/x86/mm/mem_sharing.c >> +++ b/xen/arch/x86/mm/mem_sharing.c >> @@ -718,11 +718,13 @@ int mem_sharing_share_pages(struct domai >> int ret = -EINVAL; >> mfn_t smfn, cmfn; >> p2m_type_t smfn_type, cmfn_type; >> + struct two_gfns *tg; >> >> - /* XXX if sd == cd handle potential deadlock by ordering >> - * the get_ and put_gfn''s */ >> - smfn = get_gfn(sd, sgfn, &smfn_type); >> - cmfn = get_gfn(cd, cgfn, &cmfn_type); >> + tg = get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn, >> + cd, cgfn, &cmfn_type, NULL, &cmfn, >> + p2m_query); >> + if ( !tg ) >> + return -ENOMEM; >> >> /* This tricky business is to avoid two callers deadlocking if >> * grabbing pages in opposite client/source order */ >> @@ -819,8 +821,7 @@ int mem_sharing_share_pages(struct domai >> ret = 0; >> >> err_out: >> - put_gfn(cd, cgfn); >> - put_gfn(sd, sgfn); >> + put_two_gfns(&tg); >> return ret; >> } >> >> @@ -834,11 +835,13 @@ int mem_sharing_add_to_physmap(struct do >> struct gfn_info *gfn_info; >> struct p2m_domain *p2m = p2m_get_hostp2m(cd); >> p2m_access_t a; >> - >> - /* XXX if sd == cd handle potential deadlock by ordering >> - * the get_ and put_gfn''s */ >> - smfn = get_gfn_query(sd, sgfn, &smfn_type); >> - cmfn = get_gfn_type_access(p2m, cgfn, &cmfn_type, &a, p2m_query, >> NULL); >> + struct two_gfns *tg; >> + >> + tg = get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn, >> + cd, cgfn, &cmfn_type, &a, &cmfn, >> + p2m_query); >> + if ( !tg ) >> + return -ENOMEM; >> >> /* Get the source shared page, check and lock */ >> ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID; >> @@ -886,8 +889,7 @@ int mem_sharing_add_to_physmap(struct do >> err_unlock: >> mem_sharing_page_unlock(spage); >> err_out: >> - put_gfn(cd, cgfn); >> - put_gfn(sd, sgfn); >> + put_two_gfns(&tg); >> return ret; >> } >> >> diff -r 27031a8a4eff -r 3de7e43b130a xen/include/asm-x86/p2m.h >> --- a/xen/include/asm-x86/p2m.h >> +++ b/xen/include/asm-x86/p2m.h >> @@ -378,6 +378,97 @@ static inline unsigned long mfn_to_gfn(s >> return mfn_x(mfn); >> } >> >> +/* Deadlock-avoidance scheme when calling get_gfn on different gfn''s */ >> +struct two_gfns { >> + struct domain *first_domain; >> + unsigned long first_gfn; >> + struct domain *second_domain; >> + unsigned long second_gfn; >> +}; >> + >> +#define assign_pointers(dest, source) >> \ >> +do { >> \ >> + dest ## _mfn = (source ## mfn) ? (source ## mfn) : &__ ## dest ## >> _mfn; \ >> + dest ## _a = (source ## a) ? (source ## a) : &__ ## dest ## >> _a; \ >> + dest ## _t = (source ## t) ? (source ## t) : &__ ## dest ## >> _t; \ >> +} while(0) >> + >> +/* Returns mfn, type and access for potential caller consumption, but >> any >> + * of those can be NULL */ >> +static inline struct two_gfns *get_two_gfns(struct domain *rd, unsigned >> long rgfn, >> + p2m_type_t *rt, p2m_access_t *ra, mfn_t *rmfn, struct domain >> *ld, >> + unsigned long lgfn, p2m_type_t *lt, p2m_access_t *la, mfn_t >> *lmfn, >> + p2m_query_t q) >> +{ >> + mfn_t *first_mfn, *second_mfn, __first_mfn, __second_mfn; >> + p2m_access_t *first_a, *second_a, __first_a, __second_a; >> + p2m_type_t *first_t, *second_t, __first_t, __second_t; >> + >> + struct two_gfns *rval = xmalloc(struct two_gfns); >> + if ( !rval ) >> + return NULL; >> + >> + if ( rd == ld ) >> + { >> + rval->first_domain = rval->second_domain = rd; >> + >> + /* Sort by gfn */ >> + if (rgfn <= lgfn) >> + { >> + rval->first_gfn = rgfn; >> + rval->second_gfn = lgfn; >> + assign_pointers(first, r); >> + assign_pointers(second, l); >> + } else { >> + rval->first_gfn = lgfn; >> + rval->second_gfn = rgfn; >> + assign_pointers(first, l); >> + assign_pointers(second, r); >> + } >> + } else { >> + /* Sort by domain */ >> + if ( rd->domain_id <= ld->domain_id ) >> + { >> + rval->first_domain = rd; >> + rval->first_gfn = rgfn; >> + rval->second_domain = ld; >> + rval->second_gfn = lgfn; >> + assign_pointers(first, r); >> + assign_pointers(second, l); >> + } else { >> + rval->first_domain = ld; >> + rval->first_gfn = lgfn; >> + rval->second_domain = rd; >> + rval->second_gfn = rgfn; >> + assign_pointers(first, l); >> + assign_pointers(second, r); >> + } >> + } >> + >> + /* Now do the gets */ >> + *first_mfn >> get_gfn_type_access(p2m_get_hostp2m(rval->first_domain), >> + rval->first_gfn, first_t, >> first_a, q, NULL); >> + *second_mfn >> get_gfn_type_access(p2m_get_hostp2m(rval->second_domain), >> + rval->second_gfn, second_t, >> second_a, q, NULL); >> + >> + return rval; >> +} >> + >> +static inline void put_two_gfns(struct two_gfns **arg_ptr) >> +{ >> + struct two_gfns *arg; >> + >> + if ( !arg_ptr || !(*arg_ptr) ) >> + return; >> + >> + arg = *arg_ptr; >> + put_gfn(arg->second_domain, arg->second_gfn); >> + put_gfn(arg->first_domain, arg->first_gfn); >> + >> + xfree(arg); >> + *arg_ptr = NULL; >> +} >> + >> /* Init the datastructures for later use by the p2m code */ >> int p2m_init(struct domain *d); >> >> >> _______________________________________________ >> Xen-devel mailing list >> Xen-devel@lists.xensource.com >> http://lists.xensource.com/xen-devel >
Andres Lagar-Cavilla
2012-Feb-02 13:46 UTC
Re: [PATCH 5 of 9] x86/mm: When removing/adding a page from/to the physmap, keep in mind it could be shared
> At 14:51 -0500 on 01 Feb (1328107917), Andres Lagar-Cavilla wrote: >> xen/arch/x86/mm/p2m.c | 21 ++++++++++++++++++++- >> 1 files changed, 20 insertions(+), 1 deletions(-) >> >> >> When removing the m2p mapping it is unconditionally set to invalid, >> which >> breaks sharing. >> >> When adding to the physmap, if the previous holder of that entry is a >> shared >> page, we unshare to default to normal case handling. >> >> And, we cannot add a shared page directly to the physmap. Proper >> interfaces >> must be employed, otherwise book-keeping goes awry. >> >> Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> >> >> diff -r 8a920bcddd0f -r 1c61573d1765 xen/arch/x86/mm/p2m.c >> --- a/xen/arch/x86/mm/p2m.c >> +++ b/xen/arch/x86/mm/p2m.c >> @@ -419,7 +419,7 @@ p2m_remove_page(struct p2m_domain *p2m, >> for ( i = 0; i < (1UL << page_order); i++ ) >> { >> mfn_return = p2m->get_entry(p2m, gfn + i, &t, &a, >> p2m_query, NULL); >> - if ( !p2m_is_grant(t) ) >> + if ( !p2m_is_grant(t) && !p2m_is_shared(t) ) >> set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY); >> ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) ); >> } >> @@ -481,6 +481,17 @@ guest_physmap_add_entry(struct domain *d >> for ( i = 0; i < (1UL << page_order); i++ ) >> { >> omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, p2m_query, NULL); >> + if ( p2m_is_shared(ot) ) >> + { >> + /* Do an unshare to cleanly take care of all corner >> + * cases. */ >> + int rc; >> + rc = mem_sharing_unshare_page(p2m->domain, gfn + i, 0); >> + if ( rc ) >> + return rc; > > You''re holding the p2m lock here! Also, I don''t think you can call > mem_sharing_unshare_page() with that held - wasn''t that the reason for > cset f6c33cfe7333 ?D''oh! This patch has to follow the locking p2m series. Before reposting, I''ll have to make sure nothing breaks when applying after locking p2m. Good catch! Andres> > Tim. > >