thr3ads.net - Xen devel - [Xen-devel] [PATCH] x86: remove use of per-domain lock from page table entry handling [Apr 2008]

If this information is useful, please help other people find it:
Share via:

Jan Beulich

2008-Apr-22 13:01 UTC

[Xen-devel] [PATCH] x86: remove use of per-domain lock from page table entry handling

This is only a first step, the use of the domain lock in do_mmuext_op()
and do_set_gdt() still need looking at, as do the assertions of the
lock being held in create_grant_{pte,va}_mapping().

This change results in a 5% performance improvement for kernel builds
on dual-socket quad-core systems (which is what I used for reference
for both 32- and 64-bit). Along with that, the amount of time reported
as spent in the kernel gets reduced by almost 25% (the fraction of time
spent in the kernel is generally reported significantly higher under
Xen than with a native kernel).

Signed-off-by: Jan Beulich <jbeulich@novell.com>

Index: 2008-04-15/xen/arch/x86/mm.c
==================================================================---
2008-04-15.orig/xen/arch/x86/mm.c	2008-04-15 08:48:15.000000000 +0200
+++ 2008-04-15/xen/arch/x86/mm.c	2008-04-15 16:21:12.000000000 +0200
@@ -1371,6 +1371,37 @@ static void free_l4_table(struct page_in
 
 #endif
 
+static void lock_page(struct page_info *page)
+{
+    unsigned long x, y;
+
+    y = page->u.inuse.type_info;
+    do {
+        for ( ; ; )
+        {
+            ASSERT(y & PGT_count_mask);
+            if (likely(y & PGT_validated) && likely(!(y &
PGT_locked)))
+                break;
+            cpu_relax();
+            y = page->u.inuse.type_info;
+        }
+        x = y;
+    }
+    while ( (y = cmpxchg(&page->u.inuse.type_info, x, x | PGT_locked))
!= x );
+}
+
+static void unlock_page(struct page_info *page)
+{
+    unsigned long x, y;
+
+    y = page->u.inuse.type_info;
+    do {
+        ASSERT(y & PGT_locked);
+        ASSERT(y & PGT_count_mask);
+        x = y;
+    }
+    while ( (y = cmpxchg(&page->u.inuse.type_info, x, x &
~PGT_locked)) != x );
+}
 
 /* How to write an entry to the guest pagetables.
  * Returns 0 for failure (pointer not valid), 1 for success. */
@@ -1432,24 +1463,33 @@ static int mod_l1_entry(l1_pgentry_t *pl
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
     unsigned long mfn;
+    struct page_info *l1pg = mfn_to_page(gl1mfn);
+    int rc = 1;
+
+    lock_page(l1pg);
 
     if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
-        return 0;
+        return unlock_page(l1pg), 0;
 
     if ( unlikely(paging_mode_refcounts(d)) )
-        return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad);
+    {
+        rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad);
+        unlock_page(l1pg);
+        return rc;
+    }
 
     if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
     {
         /* Translate foreign guest addresses. */
         mfn = gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e));
         if ( unlikely(mfn == INVALID_MFN) )
-            return 0;
+            return unlock_page(l1pg), 0;
         ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
         nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e));
 
         if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) )
         {
+            unlock_page(l1pg);
             MEM_LOG("Bad L1 flags %x",
                     l1e_get_flags(nl1e) & l1_disallow_mask(d));
             return 0;
@@ -1459,30 +1499,33 @@ static int mod_l1_entry(l1_pgentry_t *pl
         if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
         {
             adjust_guest_l1e(nl1e, d);
-            return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
-                                preserve_ad);
+            rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
+                              preserve_ad);
+            unlock_page(l1pg);
+            return rc;
         }
 
         if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
-            return 0;
+            return unlock_page(l1pg), 0;
         
         adjust_guest_l1e(nl1e, d);
         if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
                                     preserve_ad)) )
         {
-            put_page_from_l1e(nl1e, d);
-            return 0;
+            ol1e = nl1e;
+            rc = 0;
         }
     }
-    else
+    else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
+                                     preserve_ad)) )
     {
-        if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
-                                    preserve_ad)) )
-            return 0;
+        unlock_page(l1pg);
+        return 0;
     }
 
+    unlock_page(l1pg);
     put_page_from_l1e(ol1e, d);
-    return 1;
+    return rc;
 }
 
 
@@ -1496,6 +1539,8 @@ static int mod_l2_entry(l2_pgentry_t *pl
     l2_pgentry_t ol2e;
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
+    struct page_info *l2pg = mfn_to_page(pfn);
+    int rc = 1;
 
     if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
     {
@@ -1503,13 +1548,16 @@ static int mod_l2_entry(l2_pgentry_t *pl
         return 0;
     }
 
+    lock_page(l2pg);
+
     if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
-        return 0;
+        return unlock_page(l2pg), 0;
 
     if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
     {
         if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
         {
+            unlock_page(l2pg);
             MEM_LOG("Bad L2 flags %x",
                     l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
             return 0;
@@ -1519,28 +1567,32 @@ static int mod_l2_entry(l2_pgentry_t *pl
         if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT) )
         {
             adjust_guest_l2e(nl2e, d);
-            return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad);
+            rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad);
+            unlock_page(l2pg);
+            return rc;
         }
 
         if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) )
-            return 0;
+            return unlock_page(l2pg), 0;
 
         adjust_guest_l2e(nl2e, d);
         if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
                                     preserve_ad)) )
         {
-            put_page_from_l2e(nl2e, pfn);
-            return 0;
+            ol2e = nl2e;
+            rc = 0;
         }
     }
     else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
                                      preserve_ad)) )
     {
+        unlock_page(l2pg);
         return 0;
     }
 
+    unlock_page(l2pg);
     put_page_from_l2e(ol2e, pfn);
-    return 1;
+    return rc;
 }
 
 #if CONFIG_PAGING_LEVELS >= 3
@@ -1554,7 +1606,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
     l3_pgentry_t ol3e;
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
-    int okay;
+    struct page_info *l3pg = mfn_to_page(pfn);
+    int okay, rc = 1;
 
     if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
     {
@@ -1571,13 +1624,16 @@ static int mod_l3_entry(l3_pgentry_t *pl
         return 0;
 #endif 
 
+    lock_page(l3pg);
+
     if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
-        return 0;
+        return unlock_page(l3pg), 0;
 
     if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
     {
         if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
         {
+            unlock_page(l3pg);
             MEM_LOG("Bad L3 flags %x",
                     l3e_get_flags(nl3e) & l3_disallow_mask(d));
             return 0;
@@ -1587,23 +1643,26 @@ static int mod_l3_entry(l3_pgentry_t *pl
         if ( !l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT) )
         {
             adjust_guest_l3e(nl3e, d);
-            return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
+            rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
+            unlock_page(l3pg);
+            return rc;
         }
 
         if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) )
-            return 0;
+            return unlock_page(l3pg), 0;
 
         adjust_guest_l3e(nl3e, d);
         if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
                                     preserve_ad)) )
         {
-            put_page_from_l3e(nl3e, pfn);
-            return 0;
+            ol3e = nl3e;
+            rc = 0;
         }
     }
     else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
                                      preserve_ad)) )
     {
+        unlock_page(l3pg);
         return 0;
     }
 
@@ -1612,8 +1671,9 @@ static int mod_l3_entry(l3_pgentry_t *pl
 
     pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
 
+    unlock_page(l3pg);
     put_page_from_l3e(ol3e, pfn);
-    return 1;
+    return rc;
 }
 
 #endif
@@ -1629,6 +1689,8 @@ static int mod_l4_entry(l4_pgentry_t *pl
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
     l4_pgentry_t ol4e;
+    struct page_info *l4pg = mfn_to_page(pfn);
+    int rc = 1;
 
     if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
     {
@@ -1636,13 +1698,16 @@ static int mod_l4_entry(l4_pgentry_t *pl
         return 0;
     }
 
+    lock_page(l4pg);
+
     if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
-        return 0;
+        return unlock_page(l4pg), 0;
 
     if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
     {
         if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
         {
+            unlock_page(l4pg);
             MEM_LOG("Bad L4 flags %x",
                     l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
             return 0;
@@ -1652,28 +1717,32 @@ static int mod_l4_entry(l4_pgentry_t *pl
         if ( !l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT) )
         {
             adjust_guest_l4e(nl4e, d);
-            return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
+            rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
+            unlock_page(l4pg);
+            return rc;
         }
 
         if ( unlikely(!get_page_from_l4e(nl4e, pfn, d)) )
-            return 0;
+            return unlock_page(l4pg), 0;
 
         adjust_guest_l4e(nl4e, d);
         if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
                                     preserve_ad)) )
         {
-            put_page_from_l4e(nl4e, pfn);
-            return 0;
+            ol4e = nl4e;
+            rc = 0;
         }
     }
     else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
                                      preserve_ad)) )
     {
+        unlock_page(l4pg);
         return 0;
     }
 
+    unlock_page(l4pg);
     put_page_from_l4e(ol4e, pfn);
-    return 1;
+    return rc;
 }
 
 #endif
@@ -2492,8 +2561,6 @@ int do_mmu_update(
 
     domain_mmap_cache_init(&mapcache);
 
-    domain_lock(d);
-
     for ( i = 0; i < count; i++ )
     {
         if ( hypercall_preempt_check() )
@@ -2664,8 +2731,6 @@ int do_mmu_update(
 
     process_deferred_ops();
 
-    domain_unlock(d);
-
     domain_mmap_cache_destroy(&mapcache);
 
     perfc_add(num_page_updates, i);
@@ -2718,14 +2783,19 @@ static int create_grant_pte_mapping(
         goto failed;
     }
 
+    lock_page(page);
+
     ol1e = *(l1_pgentry_t *)va;
     if ( !UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, nl1e, mfn, v, 0) )
     {
+        unlock_page(page);
         put_page_type(page);
         rc = GNTST_general_error;
         goto failed;
     } 
 
+    unlock_page(page);
+
     if ( !paging_mode_refcounts(d) )
         put_page_from_l1e(ol1e, d);
 
@@ -2769,16 +2839,14 @@ static int destroy_grant_pte_mapping(
         goto failed;
     }
 
-    if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) )
-    {
-        put_page_type(page);
-        rc = GNTST_general_error;
-        goto failed;
-    }
+    lock_page(page);
+
+    ol1e = *(l1_pgentry_t *)va;
     
     /* Check that the virtual address supplied is actually mapped to frame. */
     if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) )
     {
+        unlock_page(page);
         MEM_LOG("PTE entry %lx for address %"PRIx64"
doesn''t match frame %lx",
                 (unsigned long)l1e_get_intpte(ol1e), addr, frame);
         put_page_type(page);
@@ -2793,12 +2861,14 @@ static int destroy_grant_pte_mapping(
                    d->vcpu[0] /* Change if we go to per-vcpu shadows. */,
                    0)) )
     {
+        unlock_page(page);
         MEM_LOG("Cannot delete PTE entry at %p", va);
         put_page_type(page);
         rc = GNTST_general_error;
         goto failed;
     }
 
+    unlock_page(page);
     put_page_type(page);
 
  failed:
@@ -2814,6 +2884,7 @@ static int create_grant_va_mapping(
     l1_pgentry_t *pl1e, ol1e;
     struct domain *d = v->domain;
     unsigned long gl1mfn;
+    struct page_info *l1pg;
     int okay;
     
     ASSERT(domain_is_locked(d));
@@ -2826,8 +2897,11 @@ static int create_grant_va_mapping(
         MEM_LOG("Could not find L1 PTE for address %lx", va);
         return GNTST_general_error;
     }
+    l1pg = mfn_to_page(gl1mfn);
+    lock_page(l1pg);
     ol1e = *pl1e;
     okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0);
+    unlock_page(l1pg);
     guest_unmap_l1e(v, pl1e);
     pl1e = NULL;
 
@@ -2845,6 +2919,7 @@ static int replace_grant_va_mapping(
 {
     l1_pgentry_t *pl1e, ol1e;
     unsigned long gl1mfn;
+    struct page_info *l1pg;
     int rc = 0;
     
     pl1e = guest_map_l1e(v, addr, &gl1mfn);
@@ -2853,11 +2928,15 @@ static int replace_grant_va_mapping(
         MEM_LOG("Could not find L1 PTE for address %lx", addr);
         return GNTST_general_error;
     }
+
+    l1pg = mfn_to_page(gl1mfn);
+    lock_page(l1pg);
     ol1e = *pl1e;
 
     /* Check that the virtual address supplied is actually mapped to frame. */
     if ( unlikely(l1e_get_pfn(ol1e) != frame) )
     {
+        unlock_page(l1pg);
         MEM_LOG("PTE entry %lx for address %lx doesn''t match
frame %lx",
                 l1e_get_pfn(ol1e), addr, frame);
         rc = GNTST_general_error;
@@ -2867,11 +2946,14 @@ static int replace_grant_va_mapping(
     /* Delete pagetable entry. */
     if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) )
     {
+        unlock_page(l1pg);
         MEM_LOG("Cannot delete PTE entry at %p", (unsigned long
*)pl1e);
         rc = GNTST_general_error;
         goto out;
     }
 
+    unlock_page(l1pg);
+
  out:
     guest_unmap_l1e(v, pl1e);
     return rc;
@@ -2906,6 +2988,7 @@ int replace_grant_host_mapping(
     struct vcpu *curr = current;
     l1_pgentry_t *pl1e, ol1e;
     unsigned long gl1mfn;
+    struct page_info *l1pg;
     int rc;
     
     if ( flags & GNTMAP_contains_pte )
@@ -2927,16 +3010,21 @@ int replace_grant_host_mapping(
                 (unsigned long)new_addr);
         return GNTST_general_error;
     }
+
+    l1pg = mfn_to_page(gl1mfn);
+    lock_page(l1pg);
     ol1e = *pl1e;
 
     if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(),
                                 gl1mfn, curr, 0)) )
     {
+        unlock_page(l1pg);
         MEM_LOG("Cannot delete PTE entry at %p", (unsigned long
*)pl1e);
         guest_unmap_l1e(curr, pl1e);
         return GNTST_general_error;
     }
 
+    unlock_page(l1pg);
     guest_unmap_l1e(curr, pl1e);
 
     rc = replace_grant_va_mapping(addr, frame, ol1e, curr);
@@ -3014,8 +3102,6 @@ int do_update_va_mapping(unsigned long v
     if ( rc )
         return rc;
 
-    domain_lock(d);
-
     pl1e = guest_map_l1e(v, va, &gl1mfn);
 
     if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn, 0)) )
@@ -3027,8 +3113,6 @@ int do_update_va_mapping(unsigned long v
 
     process_deferred_ops();
 
-    domain_unlock(d);
-
     switch ( flags & UVMF_FLUSHTYPE_MASK )
     {
     case UVMF_TLB_FLUSH:
@@ -3669,8 +3753,6 @@ int ptwr_do_page_fault(struct vcpu *v, u
     struct ptwr_emulate_ctxt ptwr_ctxt;
     int rc;
 
-    domain_lock(d);
-
     /* Attempt to read the PTE that maps the VA being accessed. */
     guest_get_eff_l1e(v, addr, &pte);
     page = l1e_get_page(pte);
@@ -3690,16 +3772,16 @@ int ptwr_do_page_fault(struct vcpu *v, u
     ptwr_ctxt.cr2 = addr;
     ptwr_ctxt.pte = pte;
 
+    lock_page(page);
     rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops);
+    unlock_page(page);
     if ( rc == X86EMUL_UNHANDLEABLE )
         goto bail;
 
-    domain_unlock(d);
     perfc_incr(ptwr_emulations);
     return EXCRET_fault_fixed;
 
  bail:
-    domain_unlock(d);
     return 0;
 }
 
Index: 2008-04-15/xen/include/asm-x86/mm.h
==================================================================---
2008-04-15.orig/xen/include/asm-x86/mm.h	2008-04-15 08:48:06.000000000 +0200
+++ 2008-04-15/xen/include/asm-x86/mm.h	2008-04-15 15:16:03.000000000 +0200
@@ -82,9 +82,12 @@ struct page_info
  /* PAE only: is this an L2 page directory containing Xen-private mappings? */
 #define _PGT_pae_xen_l2     26
 #define PGT_pae_xen_l2      (1U<<_PGT_pae_xen_l2)
+ /* The page is currently locked for modification. */
+#define _PGT_locked         25
+#define PGT_locked          (1U<<_PGT_locked)
 
- /* 26-bit count of uses of this frame as its current type. */
-#define PGT_count_mask      ((1U<<26)-1)
+ /* 25-bit count of uses of this frame as its current type. */
+#define PGT_count_mask      ((1U<<25)-1)
 
  /* Cleared when the owning guest ''frees'' this page. */
 #define _PGC_allocated      31



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

Keir Fraser

2008-Apr-22 13:30 UTC

head link

Re: [Xen-devel] [PATCH] x86: remove use of per-domain lock from page table entry handling

On 22/4/08 14:01, "Jan Beulich" <jbeulich@novell.com> wrote:
> This is only a first step, the use of the domain lock in do_mmuext_op()
> and do_set_gdt() still need looking at, as do the assertions of the
> lock being held in create_grant_{pte,va}_mapping().
> 
> This change results in a 5% performance improvement for kernel builds
> on dual-socket quad-core systems (which is what I used for reference
> for both 32- and 64-bit). Along with that, the amount of time reported
> as spent in the kernel gets reduced by almost 25% (the fraction of time
> spent in the kernel is generally reported significantly higher under
> Xen than with a native kernel).
I''m pretty sure we don''t need per-page locks.

 -- Keir



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

Jan Beulich

2008-Apr-22 13:39 UTC

head link

Re: [Xen-devel] [PATCH] x86: remove use of per-domain lock frompage table entry handling

>>> Keir Fraser <keir.fraser@eu.citrix.com> 22.04.08 15:30
>>>
>On 22/4/08 14:01, "Jan Beulich" <jbeulich@novell.com> wrote:
>
>> This is only a first step, the use of the domain lock in do_mmuext_op()
>> and do_set_gdt() still need looking at, as do the assertions of the
>> lock being held in create_grant_{pte,va}_mapping().
>> 
>> This change results in a 5% performance improvement for kernel builds
>> on dual-socket quad-core systems (which is what I used for reference
>> for both 32- and 64-bit). Along with that, the amount of time reported
>> as spent in the kernel gets reduced by almost 25% (the fraction of time
>> spent in the kernel is generally reported significantly higher under
>> Xen than with a native kernel).
>
>I''m pretty sure we don''t need per-page locks.
If you want to make serialization by the caller part of the ABI - yes.
If not (and I think everything else would be wrong, as a malicious guest
could take advantage of that), then the window between reading of
the old entry and writing the new one must be protected against racing
updates, as otherwise the page type and reference counters could get
screwed up.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

Keir Fraser

2008-Apr-22 13:51 UTC

head link

Re: [Xen-devel] [PATCH] x86: remove use of per-domain lock frompage table entry handling

On 22/4/08 14:39, "Jan Beulich" <jbeulich@novell.com> wrote:
>> I''m pretty sure we don''t need per-page locks.
> 
> If you want to make serialization by the caller part of the ABI - yes.
> If not (and I think everything else would be wrong, as a malicious guest
> could take advantage of that), then the window between reading of
> the old entry and writing the new one must be protected against racing
> updates, as otherwise the page type and reference counters could get
> screwed up.
Ah, yes my statement assumes we return to doing cmpxchg for all PTE updates
(and retry on failure). As to whether that is better than your approach, I
suppose it depends on whether you can amortise the locking overheads across
multiple PTE updates. Right now it appears you lock/unlock on every PTE
update, which is double the number of locked cmpxchg operations compared
with just doing the cmpxchg on the pte itself.

Bear in mind that all the page reference counting was designed in the first
place to be safe, assuming the PTE update itself is done as an atomic
read-modify-write operation. It''s just that we changed the latter to a
simple write some time ago because of course we observed that that''s a
safe
thing to do under the big lock.

 -- Keir



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

Xen devel - Apr 2008 - [PATCH] x86: remove use of per-domain lock from page table entry handling

[Xen-devel] [PATCH] x86: remove use of per-domain lock from page table entry handling

Re: [Xen-devel] [PATCH] x86: remove use of per-domain lock from page table entry handling

Re: [Xen-devel] [PATCH] x86: remove use of per-domain lock frompage table entry handling

Re: [Xen-devel] [PATCH] x86: remove use of per-domain lock frompage table entry handling