Jan Beulich
2008-Jun-12 15:11 UTC
[Xen-devel] [PATCH, resend] x86: remove use of per-domain lock from page table entry handling
This is a re-send of an earlier sent patch that was deemed to be too complex, yet a simpler mechanism didn''t find its way into the code base so far. This version has two bugs fixed and removes the lock also from do_mmuext_op(), but continues to not be a complete implementation: At least the use of the domain lock in do_set_gdt() still needs looking at, as do the assertions of the lock being held in create_grant_{pte,va}_mapping(). This change results in a 5% performance improvement for kernel builds on dual-socket quad-core systems (which is what I used for reference for both 32- and 64-bit). Along with that, the amount of time reported as spent in the kernel gets reduced by almost 25% (the fraction of time spent in the kernel is generally reported significantly higher under Xen than with a native kernel). Signed-off-by: Jan Beulich <jbeulich@novell.com> Index: 2008-06-12/xen/arch/x86/domain.c ==================================================================--- 2008-06-12.orig/xen/arch/x86/domain.c 2008-06-12 09:07:02.000000000 +0200 +++ 2008-06-12/xen/arch/x86/domain.c 2008-06-12 09:09:12.000000000 +0200 @@ -262,7 +262,7 @@ static int setup_compat_l4(struct vcpu * return -ENOMEM; /* This page needs to look like a pagetable so that it can be shadowed */ - pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated; + pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1; l4tab = copy_page(page_to_virt(pg), idle_pg_table); l4tab[0] = l4e_empty(); Index: 2008-06-12/xen/arch/x86/domain_build.c ==================================================================--- 2008-06-12.orig/xen/arch/x86/domain_build.c 2008-06-12 09:08:54.000000000 +0200 +++ 2008-06-12/xen/arch/x86/domain_build.c 2008-06-12 09:09:12.000000000 +0200 @@ -575,6 +575,7 @@ int __init construct_dom0( page = alloc_domheap_page(NULL, 0); if ( !page ) panic("Not enough RAM for domain 0 PML4.\n"); + page->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1; l4start = l4tab = page_to_virt(page); } copy_page(l4tab, idle_pg_table); Index: 2008-06-12/xen/arch/x86/mm.c ==================================================================--- 2008-06-12.orig/xen/arch/x86/mm.c 2008-06-12 09:08:42.000000000 +0200 +++ 2008-06-12/xen/arch/x86/mm.c 2008-06-12 09:09:12.000000000 +0200 @@ -1360,6 +1360,37 @@ static void free_l4_table(struct page_in #endif +static void lock_page(struct page_info *page) +{ + unsigned long x, y; + + y = page->u.inuse.type_info; + do { + for ( ; ; ) + { + ASSERT(y & PGT_count_mask); + if (likely(y & PGT_validated) && likely(!(y & PGT_locked))) + break; + cpu_relax(); + y = page->u.inuse.type_info; + } + x = y; + } + while ( (y = cmpxchg(&page->u.inuse.type_info, x, x | PGT_locked)) != x ); +} + +static void unlock_page(struct page_info *page) +{ + unsigned long x, y; + + y = page->u.inuse.type_info; + do { + ASSERT(y & PGT_locked); + ASSERT(y & PGT_count_mask); + x = y; + } + while ( (y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_locked)) != x ); +} /* How to write an entry to the guest pagetables. * Returns 0 for failure (pointer not valid), 1 for success. */ @@ -1421,24 +1452,33 @@ static int mod_l1_entry(l1_pgentry_t *pl struct vcpu *curr = current; struct domain *d = curr->domain; unsigned long mfn; + struct page_info *l1pg = mfn_to_page(gl1mfn); + int rc = 1; + + lock_page(l1pg); if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ) - return 0; + return unlock_page(l1pg), 0; if ( unlikely(paging_mode_refcounts(d)) ) - return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad); + { + rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad); + unlock_page(l1pg); + return rc; + } if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) { /* Translate foreign guest addresses. */ mfn = gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e)); if ( unlikely(mfn == INVALID_MFN) ) - return 0; + return unlock_page(l1pg), 0; ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0); nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e)); if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) ) { + unlock_page(l1pg); MEM_LOG("Bad L1 flags %x", l1e_get_flags(nl1e) & l1_disallow_mask(d)); return 0; @@ -1448,30 +1488,33 @@ static int mod_l1_entry(l1_pgentry_t *pl if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) ) { adjust_guest_l1e(nl1e, d); - return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, - preserve_ad); + rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, + preserve_ad); + unlock_page(l1pg); + return rc; } if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) ) - return 0; + return unlock_page(l1pg), 0; adjust_guest_l1e(nl1e, d); if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad)) ) { - put_page_from_l1e(nl1e, d); - return 0; + ol1e = nl1e; + rc = 0; } } - else + else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, + preserve_ad)) ) { - if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, - preserve_ad)) ) - return 0; + unlock_page(l1pg); + return 0; } + unlock_page(l1pg); put_page_from_l1e(ol1e, d); - return 1; + return rc; } @@ -1485,6 +1528,8 @@ static int mod_l2_entry(l2_pgentry_t *pl l2_pgentry_t ol2e; struct vcpu *curr = current; struct domain *d = curr->domain; + struct page_info *l2pg = mfn_to_page(pfn); + int rc = 1; if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) ) { @@ -1492,13 +1537,16 @@ static int mod_l2_entry(l2_pgentry_t *pl return 0; } + lock_page(l2pg); + if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) ) - return 0; + return unlock_page(l2pg), 0; if ( l2e_get_flags(nl2e) & _PAGE_PRESENT ) { if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) ) { + unlock_page(l2pg); MEM_LOG("Bad L2 flags %x", l2e_get_flags(nl2e) & L2_DISALLOW_MASK); return 0; @@ -1508,28 +1556,32 @@ static int mod_l2_entry(l2_pgentry_t *pl if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT) ) { adjust_guest_l2e(nl2e, d); - return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad); + rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad); + unlock_page(l2pg); + return rc; } if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) ) - return 0; + return unlock_page(l2pg), 0; adjust_guest_l2e(nl2e, d); if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad)) ) { - put_page_from_l2e(nl2e, pfn); - return 0; + ol2e = nl2e; + rc = 0; } } else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad)) ) { + unlock_page(l2pg); return 0; } + unlock_page(l2pg); put_page_from_l2e(ol2e, pfn); - return 1; + return rc; } #if CONFIG_PAGING_LEVELS >= 3 @@ -1543,7 +1595,8 @@ static int mod_l3_entry(l3_pgentry_t *pl l3_pgentry_t ol3e; struct vcpu *curr = current; struct domain *d = curr->domain; - int okay; + struct page_info *l3pg = mfn_to_page(pfn); + int okay, rc = 1; if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) ) { @@ -1558,13 +1611,16 @@ static int mod_l3_entry(l3_pgentry_t *pl if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) ) return 0; + lock_page(l3pg); + if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) ) - return 0; + return unlock_page(l3pg), 0; if ( l3e_get_flags(nl3e) & _PAGE_PRESENT ) { if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) ) { + unlock_page(l3pg); MEM_LOG("Bad L3 flags %x", l3e_get_flags(nl3e) & l3_disallow_mask(d)); return 0; @@ -1574,23 +1630,26 @@ static int mod_l3_entry(l3_pgentry_t *pl if ( !l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT) ) { adjust_guest_l3e(nl3e, d); - return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad); + rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad); + unlock_page(l3pg); + return rc; } if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) ) - return 0; + return unlock_page(l3pg), 0; adjust_guest_l3e(nl3e, d); if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad)) ) { - put_page_from_l3e(nl3e, pfn); - return 0; + ol3e = nl3e; + rc = 0; } } else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad)) ) { + unlock_page(l3pg); return 0; } @@ -1599,8 +1658,9 @@ static int mod_l3_entry(l3_pgentry_t *pl pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); + unlock_page(l3pg); put_page_from_l3e(ol3e, pfn); - return 1; + return rc; } #endif @@ -1616,6 +1676,8 @@ static int mod_l4_entry(l4_pgentry_t *pl struct vcpu *curr = current; struct domain *d = curr->domain; l4_pgentry_t ol4e; + struct page_info *l4pg = mfn_to_page(pfn); + int rc = 1; if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) ) { @@ -1623,13 +1685,16 @@ static int mod_l4_entry(l4_pgentry_t *pl return 0; } + lock_page(l4pg); + if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) ) - return 0; + return unlock_page(l4pg), 0; if ( l4e_get_flags(nl4e) & _PAGE_PRESENT ) { if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) ) { + unlock_page(l4pg); MEM_LOG("Bad L4 flags %x", l4e_get_flags(nl4e) & L4_DISALLOW_MASK); return 0; @@ -1639,28 +1704,32 @@ static int mod_l4_entry(l4_pgentry_t *pl if ( !l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT) ) { adjust_guest_l4e(nl4e, d); - return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad); + rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad); + unlock_page(l4pg); + return rc; } if ( unlikely(!get_page_from_l4e(nl4e, pfn, d)) ) - return 0; + return unlock_page(l4pg), 0; adjust_guest_l4e(nl4e, d); if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad)) ) { - put_page_from_l4e(nl4e, pfn); - return 0; + ol4e = nl4e; + rc = 0; } } else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad)) ) { + unlock_page(l4pg); return 0; } + unlock_page(l4pg); put_page_from_l4e(ol4e, pfn); - return 1; + return rc; } #endif @@ -2189,8 +2258,6 @@ int do_mmuext_op( goto out; } - domain_lock(d); - for ( i = 0; i < count; i++ ) { if ( hypercall_preempt_check() ) @@ -2438,8 +2505,6 @@ int do_mmuext_op( process_deferred_ops(); - domain_unlock(d); - perfc_add(num_mmuext_ops, i); out: @@ -2493,8 +2558,6 @@ int do_mmu_update( domain_mmap_cache_init(&mapcache); - domain_lock(d); - for ( i = 0; i < count; i++ ) { if ( hypercall_preempt_check() ) @@ -2667,8 +2730,6 @@ int do_mmu_update( process_deferred_ops(); - domain_unlock(d); - domain_mmap_cache_destroy(&mapcache); perfc_add(num_page_updates, i); @@ -2721,14 +2782,19 @@ static int create_grant_pte_mapping( goto failed; } + lock_page(page); + ol1e = *(l1_pgentry_t *)va; if ( !UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, nl1e, mfn, v, 0) ) { + unlock_page(page); put_page_type(page); rc = GNTST_general_error; goto failed; } + unlock_page(page); + if ( !paging_mode_refcounts(d) ) put_page_from_l1e(ol1e, d); @@ -2772,16 +2838,14 @@ static int destroy_grant_pte_mapping( goto failed; } - if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) ) - { - put_page_type(page); - rc = GNTST_general_error; - goto failed; - } + lock_page(page); + + ol1e = *(l1_pgentry_t *)va; /* Check that the virtual address supplied is actually mapped to frame. */ if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) ) { + unlock_page(page); MEM_LOG("PTE entry %lx for address %"PRIx64" doesn''t match frame %lx", (unsigned long)l1e_get_intpte(ol1e), addr, frame); put_page_type(page); @@ -2796,12 +2860,14 @@ static int destroy_grant_pte_mapping( d->vcpu[0] /* Change if we go to per-vcpu shadows. */, 0)) ) { + unlock_page(page); MEM_LOG("Cannot delete PTE entry at %p", va); put_page_type(page); rc = GNTST_general_error; goto failed; } + unlock_page(page); put_page_type(page); failed: @@ -2817,6 +2883,7 @@ static int create_grant_va_mapping( l1_pgentry_t *pl1e, ol1e; struct domain *d = v->domain; unsigned long gl1mfn; + struct page_info *l1pg; int okay; ASSERT(domain_is_locked(d)); @@ -2829,8 +2896,11 @@ static int create_grant_va_mapping( MEM_LOG("Could not find L1 PTE for address %lx", va); return GNTST_general_error; } + l1pg = mfn_to_page(gl1mfn); + lock_page(l1pg); ol1e = *pl1e; okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0); + unlock_page(l1pg); guest_unmap_l1e(v, pl1e); pl1e = NULL; @@ -2848,6 +2918,7 @@ static int replace_grant_va_mapping( { l1_pgentry_t *pl1e, ol1e; unsigned long gl1mfn; + struct page_info *l1pg; int rc = 0; pl1e = guest_map_l1e(v, addr, &gl1mfn); @@ -2856,11 +2927,15 @@ static int replace_grant_va_mapping( MEM_LOG("Could not find L1 PTE for address %lx", addr); return GNTST_general_error; } + + l1pg = mfn_to_page(gl1mfn); + lock_page(l1pg); ol1e = *pl1e; /* Check that the virtual address supplied is actually mapped to frame. */ if ( unlikely(l1e_get_pfn(ol1e) != frame) ) { + unlock_page(l1pg); MEM_LOG("PTE entry %lx for address %lx doesn''t match frame %lx", l1e_get_pfn(ol1e), addr, frame); rc = GNTST_general_error; @@ -2870,11 +2945,14 @@ static int replace_grant_va_mapping( /* Delete pagetable entry. */ if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) ) { + unlock_page(l1pg); MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); rc = GNTST_general_error; goto out; } + unlock_page(l1pg); + out: guest_unmap_l1e(v, pl1e); return rc; @@ -2909,6 +2987,7 @@ int replace_grant_host_mapping( struct vcpu *curr = current; l1_pgentry_t *pl1e, ol1e; unsigned long gl1mfn; + struct page_info *l1pg; int rc; if ( flags & GNTMAP_contains_pte ) @@ -2930,16 +3009,21 @@ int replace_grant_host_mapping( (unsigned long)new_addr); return GNTST_general_error; } + + l1pg = mfn_to_page(gl1mfn); + lock_page(l1pg); ol1e = *pl1e; if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, curr, 0)) ) { + unlock_page(l1pg); MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); guest_unmap_l1e(curr, pl1e); return GNTST_general_error; } + unlock_page(l1pg); guest_unmap_l1e(curr, pl1e); rc = replace_grant_va_mapping(addr, frame, ol1e, curr); @@ -3017,8 +3101,6 @@ int do_update_va_mapping(unsigned long v if ( rc ) return rc; - domain_lock(d); - pl1e = guest_map_l1e(v, va, &gl1mfn); if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn, 0)) ) @@ -3030,8 +3112,6 @@ int do_update_va_mapping(unsigned long v process_deferred_ops(); - domain_unlock(d); - switch ( flags & UVMF_FLUSHTYPE_MASK ) { case UVMF_TLB_FLUSH: @@ -3651,8 +3731,6 @@ int ptwr_do_page_fault(struct vcpu *v, u struct ptwr_emulate_ctxt ptwr_ctxt; int rc; - domain_lock(d); - /* Attempt to read the PTE that maps the VA being accessed. */ guest_get_eff_l1e(v, addr, &pte); page = l1e_get_page(pte); @@ -3672,16 +3750,16 @@ int ptwr_do_page_fault(struct vcpu *v, u ptwr_ctxt.cr2 = addr; ptwr_ctxt.pte = pte; + lock_page(page); rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops); + unlock_page(page); if ( rc == X86EMUL_UNHANDLEABLE ) goto bail; - domain_unlock(d); perfc_incr(ptwr_emulations); return EXCRET_fault_fixed; bail: - domain_unlock(d); return 0; } Index: 2008-06-12/xen/include/asm-x86/mm.h ==================================================================--- 2008-06-12.orig/xen/include/asm-x86/mm.h 2008-06-12 09:06:20.000000000 +0200 +++ 2008-06-12/xen/include/asm-x86/mm.h 2008-06-12 09:09:12.000000000 +0200 @@ -82,9 +82,12 @@ struct page_info /* PAE only: is this an L2 page directory containing Xen-private mappings? */ #define _PGT_pae_xen_l2 26 #define PGT_pae_xen_l2 (1U<<_PGT_pae_xen_l2) + /* The page is currently locked for modification. */ +#define _PGT_locked 25 +#define PGT_locked (1U<<_PGT_locked) - /* 26-bit count of uses of this frame as its current type. */ -#define PGT_count_mask ((1U<<26)-1) + /* 25-bit count of uses of this frame as its current type. */ +#define PGT_count_mask ((1U<<25)-1) /* Cleared when the owning guest ''frees'' this page. */ #define _PGC_allocated 31 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2008-Jun-12 15:35 UTC
Re: [Xen-devel] [PATCH, resend] x86: remove use of per-domain lock from page table entry handling
On 12/6/08 16:11, "Jan Beulich" <jbeulich@novell.com> wrote:> /* This page needs to look like a pagetable so that it can be shadowed */ > - pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated; > + pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1;Doesn''t this sort of thing belong in a logically separate bug-fixing patch? -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2008-Jun-12 16:39 UTC
Re: [Xen-devel] [PATCH, resend] x86: remove use of per-domain lock from page table entry handling
I like this better now I realise I can turn the unlock_page() into a byte-sized write on x86/64 at least, by turning page->shadow_flags into a u32. That means only one LOCK prefix per critical section. I will look into applying this patch plus this optimisation for x86/64. -- Keir On 12/6/08 16:11, "Jan Beulich" <jbeulich@novell.com> wrote:> This is a re-send of an earlier sent patch that was deemed to be too > complex, yet a simpler mechanism didn''t find its way into the code base > so far. This version has two bugs fixed and removes the lock also from > do_mmuext_op(), but continues to not be a complete implementation: At > least the use of the domain lock in do_set_gdt() still needs looking > at, as do the assertions of the lock being held in > create_grant_{pte,va}_mapping(). > > This change results in a 5% performance improvement for kernel builds > on dual-socket quad-core systems (which is what I used for reference > for both 32- and 64-bit). Along with that, the amount of time reported > as spent in the kernel gets reduced by almost 25% (the fraction of time > spent in the kernel is generally reported significantly higher under > Xen than with a native kernel). > > Signed-off-by: Jan Beulich <jbeulich@novell.com> > > Index: 2008-06-12/xen/arch/x86/domain.c > ==================================================================> --- 2008-06-12.orig/xen/arch/x86/domain.c 2008-06-12 09:07:02.000000000 +0200 > +++ 2008-06-12/xen/arch/x86/domain.c 2008-06-12 09:09:12.000000000 +0200 > @@ -262,7 +262,7 @@ static int setup_compat_l4(struct vcpu * > return -ENOMEM; > > /* This page needs to look like a pagetable so that it can be shadowed */ > - pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated; > + pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1; > > l4tab = copy_page(page_to_virt(pg), idle_pg_table); > l4tab[0] = l4e_empty(); > Index: 2008-06-12/xen/arch/x86/domain_build.c > ==================================================================> --- 2008-06-12.orig/xen/arch/x86/domain_build.c 2008-06-12 09:08:54.000000000 > +0200 > +++ 2008-06-12/xen/arch/x86/domain_build.c 2008-06-12 09:09:12.000000000 +0200 > @@ -575,6 +575,7 @@ int __init construct_dom0( > page = alloc_domheap_page(NULL, 0); > if ( !page ) > panic("Not enough RAM for domain 0 PML4.\n"); > + page->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1; > l4start = l4tab = page_to_virt(page); > } > copy_page(l4tab, idle_pg_table); > Index: 2008-06-12/xen/arch/x86/mm.c > ==================================================================> --- 2008-06-12.orig/xen/arch/x86/mm.c 2008-06-12 09:08:42.000000000 +0200 > +++ 2008-06-12/xen/arch/x86/mm.c 2008-06-12 09:09:12.000000000 +0200 > @@ -1360,6 +1360,37 @@ static void free_l4_table(struct page_in > > #endif > > +static void lock_page(struct page_info *page) > +{ > + unsigned long x, y; > + > + y = page->u.inuse.type_info; > + do { > + for ( ; ; ) > + { > + ASSERT(y & PGT_count_mask); > + if (likely(y & PGT_validated) && likely(!(y & PGT_locked))) > + break; > + cpu_relax(); > + y = page->u.inuse.type_info; > + } > + x = y; > + } > + while ( (y = cmpxchg(&page->u.inuse.type_info, x, x | PGT_locked)) != x > ); > +} > + > +static void unlock_page(struct page_info *page) > +{ > + unsigned long x, y; > + > + y = page->u.inuse.type_info; > + do { > + ASSERT(y & PGT_locked); > + ASSERT(y & PGT_count_mask); > + x = y; > + } > + while ( (y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_locked)) != x > ); > +} > > /* How to write an entry to the guest pagetables. > * Returns 0 for failure (pointer not valid), 1 for success. */ > @@ -1421,24 +1452,33 @@ static int mod_l1_entry(l1_pgentry_t *pl > struct vcpu *curr = current; > struct domain *d = curr->domain; > unsigned long mfn; > + struct page_info *l1pg = mfn_to_page(gl1mfn); > + int rc = 1; > + > + lock_page(l1pg); > > if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ) > - return 0; > + return unlock_page(l1pg), 0; > > if ( unlikely(paging_mode_refcounts(d)) ) > - return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad); > + { > + rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad); > + unlock_page(l1pg); > + return rc; > + } > > if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) > { > /* Translate foreign guest addresses. */ > mfn = gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e)); > if ( unlikely(mfn == INVALID_MFN) ) > - return 0; > + return unlock_page(l1pg), 0; > ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0); > nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e)); > > if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) ) > { > + unlock_page(l1pg); > MEM_LOG("Bad L1 flags %x", > l1e_get_flags(nl1e) & l1_disallow_mask(d)); > return 0; > @@ -1448,30 +1488,33 @@ static int mod_l1_entry(l1_pgentry_t *pl > if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) ) > { > adjust_guest_l1e(nl1e, d); > - return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, > - preserve_ad); > + rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, > + preserve_ad); > + unlock_page(l1pg); > + return rc; > } > > if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) ) > - return 0; > + return unlock_page(l1pg), 0; > > adjust_guest_l1e(nl1e, d); > if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, > preserve_ad)) ) > { > - put_page_from_l1e(nl1e, d); > - return 0; > + ol1e = nl1e; > + rc = 0; > } > } > - else > + else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, > + preserve_ad)) ) > { > - if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, > - preserve_ad)) ) > - return 0; > + unlock_page(l1pg); > + return 0; > } > > + unlock_page(l1pg); > put_page_from_l1e(ol1e, d); > - return 1; > + return rc; > } > > > @@ -1485,6 +1528,8 @@ static int mod_l2_entry(l2_pgentry_t *pl > l2_pgentry_t ol2e; > struct vcpu *curr = current; > struct domain *d = curr->domain; > + struct page_info *l2pg = mfn_to_page(pfn); > + int rc = 1; > > if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) ) > { > @@ -1492,13 +1537,16 @@ static int mod_l2_entry(l2_pgentry_t *pl > return 0; > } > > + lock_page(l2pg); > + > if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) ) > - return 0; > + return unlock_page(l2pg), 0; > > if ( l2e_get_flags(nl2e) & _PAGE_PRESENT ) > { > if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) ) > { > + unlock_page(l2pg); > MEM_LOG("Bad L2 flags %x", > l2e_get_flags(nl2e) & L2_DISALLOW_MASK); > return 0; > @@ -1508,28 +1556,32 @@ static int mod_l2_entry(l2_pgentry_t *pl > if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT) ) > { > adjust_guest_l2e(nl2e, d); > - return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, > preserve_ad); > + rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad); > + unlock_page(l2pg); > + return rc; > } > > if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) ) > - return 0; > + return unlock_page(l2pg), 0; > > adjust_guest_l2e(nl2e, d); > if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, > preserve_ad)) ) > { > - put_page_from_l2e(nl2e, pfn); > - return 0; > + ol2e = nl2e; > + rc = 0; > } > } > else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, > preserve_ad)) ) > { > + unlock_page(l2pg); > return 0; > } > > + unlock_page(l2pg); > put_page_from_l2e(ol2e, pfn); > - return 1; > + return rc; > } > > #if CONFIG_PAGING_LEVELS >= 3 > @@ -1543,7 +1595,8 @@ static int mod_l3_entry(l3_pgentry_t *pl > l3_pgentry_t ol3e; > struct vcpu *curr = current; > struct domain *d = curr->domain; > - int okay; > + struct page_info *l3pg = mfn_to_page(pfn); > + int okay, rc = 1; > > if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) ) > { > @@ -1558,13 +1611,16 @@ static int mod_l3_entry(l3_pgentry_t *pl > if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) ) > return 0; > > + lock_page(l3pg); > + > if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) ) > - return 0; > + return unlock_page(l3pg), 0; > > if ( l3e_get_flags(nl3e) & _PAGE_PRESENT ) > { > if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) ) > { > + unlock_page(l3pg); > MEM_LOG("Bad L3 flags %x", > l3e_get_flags(nl3e) & l3_disallow_mask(d)); > return 0; > @@ -1574,23 +1630,26 @@ static int mod_l3_entry(l3_pgentry_t *pl > if ( !l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT) ) > { > adjust_guest_l3e(nl3e, d); > - return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, > preserve_ad); > + rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad); > + unlock_page(l3pg); > + return rc; > } > > if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) ) > - return 0; > + return unlock_page(l3pg), 0; > > adjust_guest_l3e(nl3e, d); > if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, > preserve_ad)) ) > { > - put_page_from_l3e(nl3e, pfn); > - return 0; > + ol3e = nl3e; > + rc = 0; > } > } > else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, > preserve_ad)) ) > { > + unlock_page(l3pg); > return 0; > } > > @@ -1599,8 +1658,9 @@ static int mod_l3_entry(l3_pgentry_t *pl > > pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); > > + unlock_page(l3pg); > put_page_from_l3e(ol3e, pfn); > - return 1; > + return rc; > } > > #endif > @@ -1616,6 +1676,8 @@ static int mod_l4_entry(l4_pgentry_t *pl > struct vcpu *curr = current; > struct domain *d = curr->domain; > l4_pgentry_t ol4e; > + struct page_info *l4pg = mfn_to_page(pfn); > + int rc = 1; > > if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) ) > { > @@ -1623,13 +1685,16 @@ static int mod_l4_entry(l4_pgentry_t *pl > return 0; > } > > + lock_page(l4pg); > + > if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) ) > - return 0; > + return unlock_page(l4pg), 0; > > if ( l4e_get_flags(nl4e) & _PAGE_PRESENT ) > { > if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) ) > { > + unlock_page(l4pg); > MEM_LOG("Bad L4 flags %x", > l4e_get_flags(nl4e) & L4_DISALLOW_MASK); > return 0; > @@ -1639,28 +1704,32 @@ static int mod_l4_entry(l4_pgentry_t *pl > if ( !l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT) ) > { > adjust_guest_l4e(nl4e, d); > - return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, > preserve_ad); > + rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad); > + unlock_page(l4pg); > + return rc; > } > > if ( unlikely(!get_page_from_l4e(nl4e, pfn, d)) ) > - return 0; > + return unlock_page(l4pg), 0; > > adjust_guest_l4e(nl4e, d); > if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, > preserve_ad)) ) > { > - put_page_from_l4e(nl4e, pfn); > - return 0; > + ol4e = nl4e; > + rc = 0; > } > } > else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, > preserve_ad)) ) > { > + unlock_page(l4pg); > return 0; > } > > + unlock_page(l4pg); > put_page_from_l4e(ol4e, pfn); > - return 1; > + return rc; > } > > #endif > @@ -2189,8 +2258,6 @@ int do_mmuext_op( > goto out; > } > > - domain_lock(d); > - > for ( i = 0; i < count; i++ ) > { > if ( hypercall_preempt_check() ) > @@ -2438,8 +2505,6 @@ int do_mmuext_op( > > process_deferred_ops(); > > - domain_unlock(d); > - > perfc_add(num_mmuext_ops, i); > > out: > @@ -2493,8 +2558,6 @@ int do_mmu_update( > > domain_mmap_cache_init(&mapcache); > > - domain_lock(d); > - > for ( i = 0; i < count; i++ ) > { > if ( hypercall_preempt_check() ) > @@ -2667,8 +2730,6 @@ int do_mmu_update( > > process_deferred_ops(); > > - domain_unlock(d); > - > domain_mmap_cache_destroy(&mapcache); > > perfc_add(num_page_updates, i); > @@ -2721,14 +2782,19 @@ static int create_grant_pte_mapping( > goto failed; > } > > + lock_page(page); > + > ol1e = *(l1_pgentry_t *)va; > if ( !UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, nl1e, mfn, v, 0) ) > { > + unlock_page(page); > put_page_type(page); > rc = GNTST_general_error; > goto failed; > } > > + unlock_page(page); > + > if ( !paging_mode_refcounts(d) ) > put_page_from_l1e(ol1e, d); > > @@ -2772,16 +2838,14 @@ static int destroy_grant_pte_mapping( > goto failed; > } > > - if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) ) > - { > - put_page_type(page); > - rc = GNTST_general_error; > - goto failed; > - } > + lock_page(page); > + > + ol1e = *(l1_pgentry_t *)va; > > /* Check that the virtual address supplied is actually mapped to frame. > */ > if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) ) > { > + unlock_page(page); > MEM_LOG("PTE entry %lx for address %"PRIx64" doesn''t match frame > %lx", > (unsigned long)l1e_get_intpte(ol1e), addr, frame); > put_page_type(page); > @@ -2796,12 +2860,14 @@ static int destroy_grant_pte_mapping( > d->vcpu[0] /* Change if we go to per-vcpu shadows. */, > 0)) ) > { > + unlock_page(page); > MEM_LOG("Cannot delete PTE entry at %p", va); > put_page_type(page); > rc = GNTST_general_error; > goto failed; > } > > + unlock_page(page); > put_page_type(page); > > failed: > @@ -2817,6 +2883,7 @@ static int create_grant_va_mapping( > l1_pgentry_t *pl1e, ol1e; > struct domain *d = v->domain; > unsigned long gl1mfn; > + struct page_info *l1pg; > int okay; > > ASSERT(domain_is_locked(d)); > @@ -2829,8 +2896,11 @@ static int create_grant_va_mapping( > MEM_LOG("Could not find L1 PTE for address %lx", va); > return GNTST_general_error; > } > + l1pg = mfn_to_page(gl1mfn); > + lock_page(l1pg); > ol1e = *pl1e; > okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0); > + unlock_page(l1pg); > guest_unmap_l1e(v, pl1e); > pl1e = NULL; > > @@ -2848,6 +2918,7 @@ static int replace_grant_va_mapping( > { > l1_pgentry_t *pl1e, ol1e; > unsigned long gl1mfn; > + struct page_info *l1pg; > int rc = 0; > > pl1e = guest_map_l1e(v, addr, &gl1mfn); > @@ -2856,11 +2927,15 @@ static int replace_grant_va_mapping( > MEM_LOG("Could not find L1 PTE for address %lx", addr); > return GNTST_general_error; > } > + > + l1pg = mfn_to_page(gl1mfn); > + lock_page(l1pg); > ol1e = *pl1e; > > /* Check that the virtual address supplied is actually mapped to frame. > */ > if ( unlikely(l1e_get_pfn(ol1e) != frame) ) > { > + unlock_page(l1pg); > MEM_LOG("PTE entry %lx for address %lx doesn''t match frame %lx", > l1e_get_pfn(ol1e), addr, frame); > rc = GNTST_general_error; > @@ -2870,11 +2945,14 @@ static int replace_grant_va_mapping( > /* Delete pagetable entry. */ > if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) ) > { > + unlock_page(l1pg); > MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); > rc = GNTST_general_error; > goto out; > } > > + unlock_page(l1pg); > + > out: > guest_unmap_l1e(v, pl1e); > return rc; > @@ -2909,6 +2987,7 @@ int replace_grant_host_mapping( > struct vcpu *curr = current; > l1_pgentry_t *pl1e, ol1e; > unsigned long gl1mfn; > + struct page_info *l1pg; > int rc; > > if ( flags & GNTMAP_contains_pte ) > @@ -2930,16 +3009,21 @@ int replace_grant_host_mapping( > (unsigned long)new_addr); > return GNTST_general_error; > } > + > + l1pg = mfn_to_page(gl1mfn); > + lock_page(l1pg); > ol1e = *pl1e; > > if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), > gl1mfn, curr, 0)) ) > { > + unlock_page(l1pg); > MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); > guest_unmap_l1e(curr, pl1e); > return GNTST_general_error; > } > > + unlock_page(l1pg); > guest_unmap_l1e(curr, pl1e); > > rc = replace_grant_va_mapping(addr, frame, ol1e, curr); > @@ -3017,8 +3101,6 @@ int do_update_va_mapping(unsigned long v > if ( rc ) > return rc; > > - domain_lock(d); > - > pl1e = guest_map_l1e(v, va, &gl1mfn); > > if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn, 0)) ) > @@ -3030,8 +3112,6 @@ int do_update_va_mapping(unsigned long v > > process_deferred_ops(); > > - domain_unlock(d); > - > switch ( flags & UVMF_FLUSHTYPE_MASK ) > { > case UVMF_TLB_FLUSH: > @@ -3651,8 +3731,6 @@ int ptwr_do_page_fault(struct vcpu *v, u > struct ptwr_emulate_ctxt ptwr_ctxt; > int rc; > > - domain_lock(d); > - > /* Attempt to read the PTE that maps the VA being accessed. */ > guest_get_eff_l1e(v, addr, &pte); > page = l1e_get_page(pte); > @@ -3672,16 +3750,16 @@ int ptwr_do_page_fault(struct vcpu *v, u > ptwr_ctxt.cr2 = addr; > ptwr_ctxt.pte = pte; > > + lock_page(page); > rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops); > + unlock_page(page); > if ( rc == X86EMUL_UNHANDLEABLE ) > goto bail; > > - domain_unlock(d); > perfc_incr(ptwr_emulations); > return EXCRET_fault_fixed; > > bail: > - domain_unlock(d); > return 0; > } > > Index: 2008-06-12/xen/include/asm-x86/mm.h > ==================================================================> --- 2008-06-12.orig/xen/include/asm-x86/mm.h 2008-06-12 09:06:20.000000000 > +0200 > +++ 2008-06-12/xen/include/asm-x86/mm.h 2008-06-12 09:09:12.000000000 +0200 > @@ -82,9 +82,12 @@ struct page_info > /* PAE only: is this an L2 page directory containing Xen-private mappings? > */ > #define _PGT_pae_xen_l2 26 > #define PGT_pae_xen_l2 (1U<<_PGT_pae_xen_l2) > + /* The page is currently locked for modification. */ > +#define _PGT_locked 25 > +#define PGT_locked (1U<<_PGT_locked) > > - /* 26-bit count of uses of this frame as its current type. */ > -#define PGT_count_mask ((1U<<26)-1) > + /* 25-bit count of uses of this frame as its current type. */ > +#define PGT_count_mask ((1U<<25)-1) > > /* Cleared when the owning guest ''frees'' this page. */ > #define _PGC_allocated 31 > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2008-Jun-12 17:15 UTC
Re: [Xen-devel] [PATCH, resend] x86: remove use of per-domain lock from page table entry handling
Changesets 17845 and 17846. -- Keir On 12/6/08 17:39, "Keir Fraser" <keir.fraser@eu.citrix.com> wrote:> I like this better now I realise I can turn the unlock_page() into a > byte-sized write on x86/64 at least, by turning page->shadow_flags into a u32. > That means only one LOCK prefix per critical section. > > I will look into applying this patch plus this optimisation for x86/64. > > -- Keir > > On 12/6/08 16:11, "Jan Beulich" <jbeulich@novell.com> wrote: > >> This is a re-send of an earlier sent patch that was deemed to be too >> complex, yet a simpler mechanism didn''t find its way into the code base >> so far. This version has two bugs fixed and removes the lock also from >> do_mmuext_op(), but continues to not be a complete implementation: At >> least the use of the domain lock in do_set_gdt() still needs looking >> at, as do the assertions of the lock being held in >> create_grant_{pte,va}_mapping(). >> >> This change results in a 5% performance improvement for kernel builds >> on dual-socket quad-core systems (which is what I used for reference >> for both 32- and 64-bit). Along with that, the amount of time reported >> as spent in the kernel gets reduced by almost 25% (the fraction of time >> spent in the kernel is generally reported significantly higher under >> Xen than with a native kernel). >> >> Signed-off-by: Jan Beulich <jbeulich@novell.com> >> >> Index: 2008-06-12/xen/arch/x86/domain.c >> ==================================================================>> --- 2008-06-12.orig/xen/arch/x86/domain.c 2008-06-12 09:07:02.000000000 +0200 >> +++ 2008-06-12/xen/arch/x86/domain.c 2008-06-12 09:09:12.000000000 +0200 >> @@ -262,7 +262,7 @@ static int setup_compat_l4(struct vcpu * >> return -ENOMEM; >> >> /* This page needs to look like a pagetable so that it can be shadowed >> */ >> - pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated; >> + pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1; >> >> l4tab = copy_page(page_to_virt(pg), idle_pg_table); >> l4tab[0] = l4e_empty(); >> Index: 2008-06-12/xen/arch/x86/domain_build.c >> ==================================================================>> --- 2008-06-12.orig/xen/arch/x86/domain_build.c 2008-06-12 09:08:54.000000000 >> +0200 >> +++ 2008-06-12/xen/arch/x86/domain_build.c 2008-06-12 09:09:12.000000000 >> +0200 >> @@ -575,6 +575,7 @@ int __init construct_dom0( >> page = alloc_domheap_page(NULL, 0); >> if ( !page ) >> panic("Not enough RAM for domain 0 PML4.\n"); >> + page->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1; >> l4start = l4tab = page_to_virt(page); >> } >> copy_page(l4tab, idle_pg_table); >> Index: 2008-06-12/xen/arch/x86/mm.c >> ==================================================================>> --- 2008-06-12.orig/xen/arch/x86/mm.c 2008-06-12 09:08:42.000000000 +0200 >> +++ 2008-06-12/xen/arch/x86/mm.c 2008-06-12 09:09:12.000000000 +0200 >> @@ -1360,6 +1360,37 @@ static void free_l4_table(struct page_in >> >> #endif >> >> +static void lock_page(struct page_info *page) >> +{ >> + unsigned long x, y; >> + >> + y = page->u.inuse.type_info; >> + do { >> + for ( ; ; ) >> + { >> + ASSERT(y & PGT_count_mask); >> + if (likely(y & PGT_validated) && likely(!(y & PGT_locked))) >> + break; >> + cpu_relax(); >> + y = page->u.inuse.type_info; >> + } >> + x = y; >> + } >> + while ( (y = cmpxchg(&page->u.inuse.type_info, x, x | PGT_locked)) != x >> ); >> +} >> + >> +static void unlock_page(struct page_info *page) >> +{ >> + unsigned long x, y; >> + >> + y = page->u.inuse.type_info; >> + do { >> + ASSERT(y & PGT_locked); >> + ASSERT(y & PGT_count_mask); >> + x = y; >> + } >> + while ( (y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_locked)) != x >> ); >> +} >> >> /* How to write an entry to the guest pagetables. >> * Returns 0 for failure (pointer not valid), 1 for success. */ >> @@ -1421,24 +1452,33 @@ static int mod_l1_entry(l1_pgentry_t *pl >> struct vcpu *curr = current; >> struct domain *d = curr->domain; >> unsigned long mfn; >> + struct page_info *l1pg = mfn_to_page(gl1mfn); >> + int rc = 1; >> + >> + lock_page(l1pg); >> >> if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ) >> - return 0; >> + return unlock_page(l1pg), 0; >> >> if ( unlikely(paging_mode_refcounts(d)) ) >> - return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, >> preserve_ad); >> + { >> + rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad); >> + unlock_page(l1pg); >> + return rc; >> + } >> >> if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) >> { >> /* Translate foreign guest addresses. */ >> mfn = gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e)); >> if ( unlikely(mfn == INVALID_MFN) ) >> - return 0; >> + return unlock_page(l1pg), 0; >> ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0); >> nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e)); >> >> if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) ) >> { >> + unlock_page(l1pg); >> MEM_LOG("Bad L1 flags %x", >> l1e_get_flags(nl1e) & l1_disallow_mask(d)); >> return 0; >> @@ -1448,30 +1488,33 @@ static int mod_l1_entry(l1_pgentry_t *pl >> if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) ) >> { >> adjust_guest_l1e(nl1e, d); >> - return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, >> - preserve_ad); >> + rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, >> + preserve_ad); >> + unlock_page(l1pg); >> + return rc; >> } >> >> if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) ) >> - return 0; >> + return unlock_page(l1pg), 0; >> >> adjust_guest_l1e(nl1e, d); >> if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, >> preserve_ad)) ) >> { >> - put_page_from_l1e(nl1e, d); >> - return 0; >> + ol1e = nl1e; >> + rc = 0; >> } >> } >> - else >> + else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, >> + preserve_ad)) ) >> { >> - if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, >> - preserve_ad)) ) >> - return 0; >> + unlock_page(l1pg); >> + return 0; >> } >> >> + unlock_page(l1pg); >> put_page_from_l1e(ol1e, d); >> - return 1; >> + return rc; >> } >> >> >> @@ -1485,6 +1528,8 @@ static int mod_l2_entry(l2_pgentry_t *pl >> l2_pgentry_t ol2e; >> struct vcpu *curr = current; >> struct domain *d = curr->domain; >> + struct page_info *l2pg = mfn_to_page(pfn); >> + int rc = 1; >> >> if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) ) >> { >> @@ -1492,13 +1537,16 @@ static int mod_l2_entry(l2_pgentry_t *pl >> return 0; >> } >> >> + lock_page(l2pg); >> + >> if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) ) >> - return 0; >> + return unlock_page(l2pg), 0; >> >> if ( l2e_get_flags(nl2e) & _PAGE_PRESENT ) >> { >> if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) ) >> { >> + unlock_page(l2pg); >> MEM_LOG("Bad L2 flags %x", >> l2e_get_flags(nl2e) & L2_DISALLOW_MASK); >> return 0; >> @@ -1508,28 +1556,32 @@ static int mod_l2_entry(l2_pgentry_t *pl >> if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT) ) >> { >> adjust_guest_l2e(nl2e, d); >> - return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, >> preserve_ad); >> + rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad); >> + unlock_page(l2pg); >> + return rc; >> } >> >> if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) ) >> - return 0; >> + return unlock_page(l2pg), 0; >> >> adjust_guest_l2e(nl2e, d); >> if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, >> preserve_ad)) ) >> { >> - put_page_from_l2e(nl2e, pfn); >> - return 0; >> + ol2e = nl2e; >> + rc = 0; >> } >> } >> else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, >> preserve_ad)) ) >> { >> + unlock_page(l2pg); >> return 0; >> } >> >> + unlock_page(l2pg); >> put_page_from_l2e(ol2e, pfn); >> - return 1; >> + return rc; >> } >> >> #if CONFIG_PAGING_LEVELS >= 3 >> @@ -1543,7 +1595,8 @@ static int mod_l3_entry(l3_pgentry_t *pl >> l3_pgentry_t ol3e; >> struct vcpu *curr = current; >> struct domain *d = curr->domain; >> - int okay; >> + struct page_info *l3pg = mfn_to_page(pfn); >> + int okay, rc = 1; >> >> if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) ) >> { >> @@ -1558,13 +1611,16 @@ static int mod_l3_entry(l3_pgentry_t *pl >> if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) ) >> return 0; >> >> + lock_page(l3pg); >> + >> if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) ) >> - return 0; >> + return unlock_page(l3pg), 0; >> >> if ( l3e_get_flags(nl3e) & _PAGE_PRESENT ) >> { >> if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) ) >> { >> + unlock_page(l3pg); >> MEM_LOG("Bad L3 flags %x", >> l3e_get_flags(nl3e) & l3_disallow_mask(d)); >> return 0; >> @@ -1574,23 +1630,26 @@ static int mod_l3_entry(l3_pgentry_t *pl >> if ( !l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT) ) >> { >> adjust_guest_l3e(nl3e, d); >> - return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, >> preserve_ad); >> + rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad); >> + unlock_page(l3pg); >> + return rc; >> } >> >> if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) ) >> - return 0; >> + return unlock_page(l3pg), 0; >> >> adjust_guest_l3e(nl3e, d); >> if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, >> preserve_ad)) ) >> { >> - put_page_from_l3e(nl3e, pfn); >> - return 0; >> + ol3e = nl3e; >> + rc = 0; >> } >> } >> else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, >> preserve_ad)) ) >> { >> + unlock_page(l3pg); >> return 0; >> } >> >> @@ -1599,8 +1658,9 @@ static int mod_l3_entry(l3_pgentry_t *pl >> >> pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); >> >> + unlock_page(l3pg); >> put_page_from_l3e(ol3e, pfn); >> - return 1; >> + return rc; >> } >> >> #endif >> @@ -1616,6 +1676,8 @@ static int mod_l4_entry(l4_pgentry_t *pl >> struct vcpu *curr = current; >> struct domain *d = curr->domain; >> l4_pgentry_t ol4e; >> + struct page_info *l4pg = mfn_to_page(pfn); >> + int rc = 1; >> >> if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) ) >> { >> @@ -1623,13 +1685,16 @@ static int mod_l4_entry(l4_pgentry_t *pl >> return 0; >> } >> >> + lock_page(l4pg); >> + >> if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) ) >> - return 0; >> + return unlock_page(l4pg), 0; >> >> if ( l4e_get_flags(nl4e) & _PAGE_PRESENT ) >> { >> if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) ) >> { >> + unlock_page(l4pg); >> MEM_LOG("Bad L4 flags %x", >> l4e_get_flags(nl4e) & L4_DISALLOW_MASK); >> return 0; >> @@ -1639,28 +1704,32 @@ static int mod_l4_entry(l4_pgentry_t *pl >> if ( !l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT) ) >> { >> adjust_guest_l4e(nl4e, d); >> - return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, >> preserve_ad); >> + rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad); >> + unlock_page(l4pg); >> + return rc; >> } >> >> if ( unlikely(!get_page_from_l4e(nl4e, pfn, d)) ) >> - return 0; >> + return unlock_page(l4pg), 0; >> >> adjust_guest_l4e(nl4e, d); >> if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, >> preserve_ad)) ) >> { >> - put_page_from_l4e(nl4e, pfn); >> - return 0; >> + ol4e = nl4e; >> + rc = 0; >> } >> } >> else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, >> preserve_ad)) ) >> { >> + unlock_page(l4pg); >> return 0; >> } >> >> + unlock_page(l4pg); >> put_page_from_l4e(ol4e, pfn); >> - return 1; >> + return rc; >> } >> >> #endif >> @@ -2189,8 +2258,6 @@ int do_mmuext_op( >> goto out; >> } >> >> - domain_lock(d); >> - >> for ( i = 0; i < count; i++ ) >> { >> if ( hypercall_preempt_check() ) >> @@ -2438,8 +2505,6 @@ int do_mmuext_op( >> >> process_deferred_ops(); >> >> - domain_unlock(d); >> - >> perfc_add(num_mmuext_ops, i); >> >> out: >> @@ -2493,8 +2558,6 @@ int do_mmu_update( >> >> domain_mmap_cache_init(&mapcache); >> >> - domain_lock(d); >> - >> for ( i = 0; i < count; i++ ) >> { >> if ( hypercall_preempt_check() ) >> @@ -2667,8 +2730,6 @@ int do_mmu_update( >> >> process_deferred_ops(); >> >> - domain_unlock(d); >> - >> domain_mmap_cache_destroy(&mapcache); >> >> perfc_add(num_page_updates, i); >> @@ -2721,14 +2782,19 @@ static int create_grant_pte_mapping( >> goto failed; >> } >> >> + lock_page(page); >> + >> ol1e = *(l1_pgentry_t *)va; >> if ( !UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, nl1e, mfn, v, 0) ) >> { >> + unlock_page(page); >> put_page_type(page); >> rc = GNTST_general_error; >> goto failed; >> } >> >> + unlock_page(page); >> + >> if ( !paging_mode_refcounts(d) ) >> put_page_from_l1e(ol1e, d); >> >> @@ -2772,16 +2838,14 @@ static int destroy_grant_pte_mapping( >> goto failed; >> } >> >> - if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) ) >> - { >> - put_page_type(page); >> - rc = GNTST_general_error; >> - goto failed; >> - } >> + lock_page(page); >> + >> + ol1e = *(l1_pgentry_t *)va; >> >> /* Check that the virtual address supplied is actually mapped to frame. >> */ >> if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) ) >> { >> + unlock_page(page); >> MEM_LOG("PTE entry %lx for address %"PRIx64" doesn''t match frame >> %lx", >> (unsigned long)l1e_get_intpte(ol1e), addr, frame); >> put_page_type(page); >> @@ -2796,12 +2860,14 @@ static int destroy_grant_pte_mapping( >> d->vcpu[0] /* Change if we go to per-vcpu shadows. */, >> 0)) ) >> { >> + unlock_page(page); >> MEM_LOG("Cannot delete PTE entry at %p", va); >> put_page_type(page); >> rc = GNTST_general_error; >> goto failed; >> } >> >> + unlock_page(page); >> put_page_type(page); >> >> failed: >> @@ -2817,6 +2883,7 @@ static int create_grant_va_mapping( >> l1_pgentry_t *pl1e, ol1e; >> struct domain *d = v->domain; >> unsigned long gl1mfn; >> + struct page_info *l1pg; >> int okay; >> >> ASSERT(domain_is_locked(d)); >> @@ -2829,8 +2896,11 @@ static int create_grant_va_mapping( >> MEM_LOG("Could not find L1 PTE for address %lx", va); >> return GNTST_general_error; >> } >> + l1pg = mfn_to_page(gl1mfn); >> + lock_page(l1pg); >> ol1e = *pl1e; >> okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0); >> + unlock_page(l1pg); >> guest_unmap_l1e(v, pl1e); >> pl1e = NULL; >> >> @@ -2848,6 +2918,7 @@ static int replace_grant_va_mapping( >> { >> l1_pgentry_t *pl1e, ol1e; >> unsigned long gl1mfn; >> + struct page_info *l1pg; >> int rc = 0; >> >> pl1e = guest_map_l1e(v, addr, &gl1mfn); >> @@ -2856,11 +2927,15 @@ static int replace_grant_va_mapping( >> MEM_LOG("Could not find L1 PTE for address %lx", addr); >> return GNTST_general_error; >> } >> + >> + l1pg = mfn_to_page(gl1mfn); >> + lock_page(l1pg); >> ol1e = *pl1e; >> >> /* Check that the virtual address supplied is actually mapped to frame. >> */ >> if ( unlikely(l1e_get_pfn(ol1e) != frame) ) >> { >> + unlock_page(l1pg); >> MEM_LOG("PTE entry %lx for address %lx doesn''t match frame %lx", >> l1e_get_pfn(ol1e), addr, frame); >> rc = GNTST_general_error; >> @@ -2870,11 +2945,14 @@ static int replace_grant_va_mapping( >> /* Delete pagetable entry. */ >> if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) ) >> { >> + unlock_page(l1pg); >> MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); >> rc = GNTST_general_error; >> goto out; >> } >> >> + unlock_page(l1pg); >> + >> out: >> guest_unmap_l1e(v, pl1e); >> return rc; >> @@ -2909,6 +2987,7 @@ int replace_grant_host_mapping( >> struct vcpu *curr = current; >> l1_pgentry_t *pl1e, ol1e; >> unsigned long gl1mfn; >> + struct page_info *l1pg; >> int rc; >> >> if ( flags & GNTMAP_contains_pte ) >> @@ -2930,16 +3009,21 @@ int replace_grant_host_mapping( >> (unsigned long)new_addr); >> return GNTST_general_error; >> } >> + >> + l1pg = mfn_to_page(gl1mfn); >> + lock_page(l1pg); >> ol1e = *pl1e; >> >> if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), >> gl1mfn, curr, 0)) ) >> { >> + unlock_page(l1pg); >> MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); >> guest_unmap_l1e(curr, pl1e); >> return GNTST_general_error; >> } >> >> + unlock_page(l1pg); >> guest_unmap_l1e(curr, pl1e); >> >> rc = replace_grant_va_mapping(addr, frame, ol1e, curr); >> @@ -3017,8 +3101,6 @@ int do_update_va_mapping(unsigned long v >> if ( rc ) >> return rc; >> >> - domain_lock(d); >> - >> pl1e = guest_map_l1e(v, va, &gl1mfn); >> >> if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn, 0)) ) >> @@ -3030,8 +3112,6 @@ int do_update_va_mapping(unsigned long v >> >> process_deferred_ops(); >> >> - domain_unlock(d); >> - >> switch ( flags & UVMF_FLUSHTYPE_MASK ) >> { >> case UVMF_TLB_FLUSH: >> @@ -3651,8 +3731,6 @@ int ptwr_do_page_fault(struct vcpu *v, u >> struct ptwr_emulate_ctxt ptwr_ctxt; >> int rc; >> >> - domain_lock(d); >> - >> /* Attempt to read the PTE that maps the VA being accessed. */ >> guest_get_eff_l1e(v, addr, &pte); >> page = l1e_get_page(pte); >> @@ -3672,16 +3750,16 @@ int ptwr_do_page_fault(struct vcpu *v, u >> ptwr_ctxt.cr2 = addr; >> ptwr_ctxt.pte = pte; >> >> + lock_page(page); >> rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops); >> + unlock_page(page); >> if ( rc == X86EMUL_UNHANDLEABLE ) >> goto bail; >> >> - domain_unlock(d); >> perfc_incr(ptwr_emulations); >> return EXCRET_fault_fixed; >> >> bail: >> - domain_unlock(d); >> return 0; >> } >> >> Index: 2008-06-12/xen/include/asm-x86/mm.h >> ==================================================================>> --- 2008-06-12.orig/xen/include/asm-x86/mm.h 2008-06-12 09:06:20.000000000 >> +0200 >> +++ 2008-06-12/xen/include/asm-x86/mm.h 2008-06-12 09:09:12.000000000 +0200 >> @@ -82,9 +82,12 @@ struct page_info >> /* PAE only: is this an L2 page directory containing Xen-private mappings? >> */ >> #define _PGT_pae_xen_l2 26 >> #define PGT_pae_xen_l2 (1U<<_PGT_pae_xen_l2) >> + /* The page is currently locked for modification. */ >> +#define _PGT_locked 25 >> +#define PGT_locked (1U<<_PGT_locked) >> >> - /* 26-bit count of uses of this frame as its current type. */ >> -#define PGT_count_mask ((1U<<26)-1) >> + /* 25-bit count of uses of this frame as its current type. */ >> +#define PGT_count_mask ((1U<<25)-1) >> >> /* Cleared when the owning guest ''frees'' this page. */ >> #define _PGC_allocated 31 >> >> >> >> _______________________________________________ >> Xen-devel mailing list >> Xen-devel@lists.xensource.com >> http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel