Stefano Stabellini
2010-Jun-21 16:15 UTC
[Xen-devel] [PATCH] implement HVMOP_pagetable_dying
Hi all,
this patch implements HVMOP_pagetable_dying: an hypercall for
guests to notify Xen that a pagetable is about to be destroyed so that
Xen can use it as a hint to unshadow the pagetable soon and unhook the
top-level user-mode shadow entries right away.
Gianluca Guida is the original author of this patch.
Signed-off-by: Gianluca Guida <glguida@gmail.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
diff -r 91bb4e243355 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Fri Jun 11 14:43:59 2010 +0100
+++ b/xen/arch/x86/hvm/hvm.c Mon Jun 21 16:06:25 2010 +0100
@@ -3152,6 +3152,30 @@
break;
}
+ case HVMOP_pagetable_dying:
+ {
+ struct xen_hvm_pagetable_dying a;
+ struct domain *d;
+
+ if ( copy_from_guest(&a, arg, 1) )
+ return -EFAULT;
+
+ rc = rcu_lock_target_domain_by_id(a.domid, &d);
+ if ( rc != 0 )
+ return rc;
+
+ rc = -EINVAL;
+ if ( !is_hvm_domain(d) || !paging_mode_shadow(d) )
+ goto param_fail5;
+
+ rc = 0;
+ pagetable_dying(d, a.gpa);
+
+ param_fail5:
+ rcu_unlock_domain(d);
+ break;
+ }
+
default:
{
gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op);
diff -r 91bb4e243355 xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c Fri Jun 11 14:43:59 2010 +0100
+++ b/xen/arch/x86/mm/paging.c Mon Jun 21 16:06:25 2010 +0100
@@ -768,6 +768,18 @@
return shadow_enable(d, mode | PG_SH_enable);
}
+/* Called from the guest to indicate that a process is being torn down
+ * and therefore its pagetables will soon be discarded */
+void pagetable_dying(struct domain *d, paddr_t gpa)
+{
+ struct vcpu *v;
+
+ ASSERT(paging_mode_shadow(d));
+
+ v = d->vcpu[0];
+ v->arch.paging.mode->shadow.pagetable_dying(v, gpa);
+}
+
/* Print paging-assistance info to the console */
void paging_dump_domain_info(struct domain *d)
{
diff -r 91bb4e243355 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c Fri Jun 11 14:43:59 2010 +0100
+++ b/xen/arch/x86/mm/shadow/common.c Mon Jun 21 16:06:25 2010 +0100
@@ -60,6 +60,7 @@
d->arch.paging.shadow.oos_active = 0;
d->arch.paging.shadow.oos_off = (domcr_flags & DOMCRF_oos_off) ? 1
: 0;
#endif
+ d->arch.paging.shadow.pagetable_dying_op = 0;
}
/* Setup the shadow-specfic parts of a vcpu struct. Note: The most important
@@ -1314,22 +1315,23 @@
}
/* Dispatcher function: call the per-mode function that will unhook the
- * non-Xen mappings in this top-level shadow mfn */
-static void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn)
+ * non-Xen mappings in this top-level shadow mfn. With user_only == 1,
+ * unhooks only the user-mode mappings. */
+void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn, int user_only)
{
struct page_info *sp = mfn_to_page(smfn);
switch ( sp->u.sh.type )
{
case SH_type_l2_32_shadow:
- SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, 2)(v,smfn);
+ SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, 2)(v, smfn, user_only);
break;
case SH_type_l2_pae_shadow:
case SH_type_l2h_pae_shadow:
- SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings, 3)(v,smfn);
+ SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings, 3)(v, smfn, user_only);
break;
#if CONFIG_PAGING_LEVELS >= 4
case SH_type_l4_64_shadow:
- SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings, 4)(v,smfn);
+ SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings, 4)(v, smfn, user_only);
break;
#endif
default:
@@ -1399,8 +1401,7 @@
{
TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_PREALLOC_UNHOOK);
shadow_unhook_mappings(v,
- pagetable_get_mfn(v2->arch.shadow_table[i]));
-
+ pagetable_get_mfn(v2->arch.shadow_table[i]),
0);
/* See if that freed up enough space */
if ( space_is_available(d, order, count) )
{
@@ -1454,7 +1455,7 @@
for ( i = 0 ; i < 4 ; i++ )
if ( !pagetable_is_null(v->arch.shadow_table[i]) )
shadow_unhook_mappings(v,
- pagetable_get_mfn(v->arch.shadow_table[i]));
+ pagetable_get_mfn(v->arch.shadow_table[i]),
0);
/* Make sure everyone sees the unshadowings */
flush_tlb_mask(&d->domain_dirty_cpumask);
diff -r 91bb4e243355 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Fri Jun 11 14:43:59 2010 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c Mon Jun 21 16:06:25 2010 +0100
@@ -2179,37 +2179,42 @@
* These are called from common code when we are running out of shadow
* memory, and unpinning all the top-level shadows hasn''t worked.
*
+ * With user_only == 1, we leave guest kernel-mode mappings in place too,
+ * unhooking only the user-mode mappings
+ *
* This implementation is pretty crude and slow, but we hope that it
won''t
* be called very often. */
#if GUEST_PAGING_LEVELS == 2
-void sh_unhook_32b_mappings(struct vcpu *v, mfn_t sl2mfn)
+void sh_unhook_32b_mappings(struct vcpu *v, mfn_t sl2mfn, int user_only)
{
shadow_l2e_t *sl2e;
SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, v->domain, {
- (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
+ if ( !user_only || (sl2e->l2 & _PAGE_USER) )
+ (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
});
}
#elif GUEST_PAGING_LEVELS == 3
-void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl2mfn)
-/* Walk a PAE l2 shadow, unhooking entries from all the subshadows */
+void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl2mfn, int user_only)
{
shadow_l2e_t *sl2e;
SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, v->domain, {
- (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
+ if ( !user_only || (sl2e->l2 & _PAGE_USER) )
+ (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
});
}
#elif GUEST_PAGING_LEVELS == 4
-void sh_unhook_64b_mappings(struct vcpu *v, mfn_t sl4mfn)
+void sh_unhook_64b_mappings(struct vcpu *v, mfn_t sl4mfn, int user_only)
{
shadow_l4e_t *sl4e;
SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, v->domain, {
- (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
+ if ( !user_only || (sl4e->l4 & _PAGE_USER) )
+ (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
});
}
@@ -2693,8 +2698,18 @@
static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn)
{
#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
- if ( v->arch.paging.shadow.last_emulated_mfn_for_unshadow == mfn_x(gmfn)
- && sh_mfn_is_a_page_table(gmfn) )
+ /* If the domain has never made a "dying" op, use the two-writes
+ * heuristic; otherwise, unshadow as soon as we write a zero for a dying
+ * process.
+ *
+ * Don''t bother trying to unshadow if it''s not a PT, or
if it''s > l1.
+ */
+ if ( ( v->arch.paging.shadow.pagetable_dying
+ || ( !v->domain->arch.paging.shadow.pagetable_dying_op
+ &&
v->arch.paging.shadow.last_emulated_mfn_for_unshadow == mfn_x(gmfn) ) )
+ && sh_mfn_is_a_page_table(gmfn)
+ && !(mfn_to_page(gmfn)->shadow_flags
+ & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64)) )
{
perfc_incr(shadow_early_unshadow);
sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
@@ -3384,6 +3399,40 @@
* caught by user-mode page-table check above.
*/
emulate_readonly:
+
+ /* Unshadow if we are writing to a toplevel pagetable that is
+ * flagged as a dying process, and that is not currently used. */
+ if ( sh_mfn_is_a_page_table(gmfn)
+ && (mfn_to_page(gmfn)->shadow_flags &
SHF_pagetable_dying) )
+ {
+ int used = 0;
+ struct vcpu *tmp;
+ for_each_vcpu(d, tmp)
+ {
+#if GUEST_PAGING_LEVELS == 3
+ int i;
+ for ( i = 0; i < 4; i++ )
+ {
+ mfn_t smfn =
_mfn(pagetable_get_pfn(v->arch.shadow_table[i]));
+ if ( mfn_valid(smfn) && (mfn_x(smfn) != 0) )
+ {
+ used |= (mfn_to_page(smfn)->v.sh.back == mfn_x(gmfn));
+
+ if ( used )
+ break;
+ }
+ }
+#else /* 32 or 64 */
+ used = (mfn_x(pagetable_get_mfn(tmp->arch.guest_table)) ==
mfn_x(gmfn));
+#endif
+ if ( used )
+ break;
+ }
+
+ if ( !used )
+ sh_remove_shadows(v, gmfn, 1 /* fast */, 0 /* can fail */);
+ }
+
/*
* We don''t need to hold the lock for the whole emulation; we will
* take it again when we write to the pagetables.
@@ -4033,7 +4082,11 @@
smfn = sh_make_shadow(v, gmfn, root_type);
}
ASSERT(mfn_valid(smfn));
-
+
+ /* Remember if we''ve been told that this process is being torn
down */
+ v->arch.paging.shadow.pagetable_dying
+ = !!(mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying);
+
/* Pin the shadow and put it (back) on the list of pinned shadows */
if ( sh_pin(v, smfn) == 0 )
{
@@ -4603,6 +4656,110 @@
#endif /* 64bit guest */
/**************************************************************************/
+/* Function for the guest to inform us that a process is being torn
+ * down. We remember that as a hint to unshadow its pagetables soon,
+ * and in the meantime we unhook its top-level user-mode entries. */
+
+#if GUEST_PAGING_LEVELS == 3
+static void sh_pagetable_dying(struct vcpu *v, paddr_t gpa)
+{
+ int i = 0;
+ int flush = 0;
+ int fast_path = 0;
+ paddr_t gcr3 = 0;
+ mfn_t smfn, gmfn;
+ p2m_type_t p2mt;
+ unsigned long gl3pa;
+ guest_l3e_t *gl3e = NULL;
+ paddr_t gl2a = 0;
+
+ shadow_lock(v->domain);
+
+ gcr3 = (v->arch.hvm_vcpu.guest_cr[3]);
+ /* fast path: the pagetable belongs to the current context */
+ if ( gcr3 == gpa )
+ fast_path = 1;
+
+ gmfn = gfn_to_mfn_query(v->domain, _gfn(gpa >> PAGE_SHIFT),
&p2mt);
+ if ( !mfn_valid(gmfn) || !p2m_is_ram(p2mt) )
+ {
+ printk(XENLOG_DEBUG "sh_pagetable_dying: gpa not valid
%lx\n", gpa);
+ goto out;
+ }
+ if ( !fast_path )
+ {
+ gl3pa = (unsigned long) sh_map_domain_page(gmfn);
+ gl3e = (guest_l3e_t *) (gl3pa + (gpa & ~PAGE_MASK));
+ }
+ for ( i = 0; i < 4; i++ )
+ {
+ if ( fast_path )
+ smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[i]));
+ else
+ {
+ /* retrieving the l2s */
+ gl2a = guest_l3e_get_paddr(gl3e[i]);
+ gmfn = gfn_to_mfn_query(v->domain, _gfn(gl2a >>
PAGE_SHIFT), &p2mt);
+ smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l2_pae_shadow);
+ }
+
+ if ( mfn_valid(smfn) )
+ {
+ gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
+ mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying;
+ shadow_unhook_mappings(v, smfn, 1/* user pages only */);
+ flush = 1;
+ }
+ }
+ if ( flush )
+ flush_tlb_mask(&v->domain->domain_dirty_cpumask);
+
+ /* Remember that we''ve seen the guest use this interface, so we
+ * can rely on it using it in future, instead of guessing at
+ * when processes are being torn down. */
+ v->domain->arch.paging.shadow.pagetable_dying_op = 1;
+
+ v->arch.paging.shadow.pagetable_dying = 1;
+
+out:
+ if ( !fast_path )
+ unmap_domain_page(gl3pa);
+ shadow_unlock(v->domain);
+}
+#else
+static void sh_pagetable_dying(struct vcpu *v, paddr_t gpa)
+{
+ mfn_t smfn, gmfn;
+ p2m_type_t p2mt;
+
+ shadow_lock(v->domain);
+
+ gmfn = gfn_to_mfn_query(v->domain, _gfn(gpa >> PAGE_SHIFT),
&p2mt);
+#if GUEST_PAGING_LEVELS == 2
+ smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l2_32_shadow);
+#else
+ smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l4_64_shadow);
+#endif
+ if ( mfn_valid(smfn) )
+ {
+ mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying;
+ shadow_unhook_mappings(v, smfn, 1/* user pages only */);
+ /* Now flush the TLB: we removed toplevel mappings. */
+ flush_tlb_mask(&v->domain->domain_dirty_cpumask);
+ }
+
+ /* Remember that we''ve seen the guest use this interface, so we
+ * can rely on it using it in future, instead of guessing at
+ * when processes are being torn down. */
+ v->domain->arch.paging.shadow.pagetable_dying_op = 1;
+
+ v->arch.paging.shadow.pagetable_dying = 1;
+
+ shadow_unlock(v->domain);
+}
+#endif
+
+/**************************************************************************/
/* Handling HVM guest writes to pagetables */
/* Translate a VA to an MFN, injecting a page-fault if we fail */
@@ -5247,6 +5404,7 @@
#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
.shadow.guess_wrmap = sh_guess_wrmap,
#endif
+ .shadow.pagetable_dying = sh_pagetable_dying,
.shadow.shadow_levels = SHADOW_PAGING_LEVELS,
};
diff -r 91bb4e243355 xen/arch/x86/mm/shadow/multi.h
--- a/xen/arch/x86/mm/shadow/multi.h Fri Jun 11 14:43:59 2010 +0100
+++ b/xen/arch/x86/mm/shadow/multi.h Mon Jun 21 16:06:25 2010 +0100
@@ -52,13 +52,13 @@
extern void
SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, GUEST_LEVELS)
- (struct vcpu *v, mfn_t sl2mfn);
+ (struct vcpu *v, mfn_t sl2mfn, int user_only);
extern void
SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings, GUEST_LEVELS)
- (struct vcpu *v, mfn_t sl3mfn);
+ (struct vcpu *v, mfn_t sl3mfn, int user_only);
extern void
SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings, GUEST_LEVELS)
- (struct vcpu *v, mfn_t sl4mfn);
+ (struct vcpu *v, mfn_t sl4mfn, int user_only);
extern int
SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, GUEST_LEVELS)
diff -r 91bb4e243355 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h Fri Jun 11 14:43:59 2010 +0100
+++ b/xen/arch/x86/mm/shadow/private.h Mon Jun 21 16:06:25 2010 +0100
@@ -321,6 +321,8 @@
#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
+#define SHF_pagetable_dying (1u<<31)
+
static inline int sh_page_has_multiple_shadows(struct page_info *pg)
{
u32 shadows;
@@ -406,6 +408,10 @@
int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p,
intpte_t *old, intpte_t new, mfn_t gmfn);
+/* Unhook the non-Xen mappings in this top-level shadow mfn.
+ * With user_only == 1, unhooks only the user-mode mappings. */
+void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn, int user_only);
+
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
/* Allow a shadowed page to go out of sync */
int sh_unsync(struct vcpu *v, mfn_t gmfn);
diff -r 91bb4e243355 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Fri Jun 11 14:43:59 2010 +0100
+++ b/xen/include/asm-x86/domain.h Mon Jun 21 16:06:25 2010 +0100
@@ -121,6 +121,8 @@
/* OOS */
int oos_active;
int oos_off;
+
+ int pagetable_dying_op;
};
struct shadow_vcpu {
@@ -149,6 +151,8 @@
mfn_t smfn[SHADOW_OOS_FIXUPS];
unsigned long off[SHADOW_OOS_FIXUPS];
} oos_fixup[SHADOW_OOS_PAGES];
+
+ int pagetable_dying;
};
/************************************************/
diff -r 91bb4e243355 xen/include/asm-x86/paging.h
--- a/xen/include/asm-x86/paging.h Fri Jun 11 14:43:59 2010 +0100
+++ b/xen/include/asm-x86/paging.h Mon Jun 21 16:06:25 2010 +0100
@@ -95,6 +95,7 @@
void (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
int (*guess_wrmap )(struct vcpu *v,
unsigned long vaddr, mfn_t gmfn);
+ void (*pagetable_dying )(struct vcpu *v, paddr_t gpa);
/* For outsiders to tell what mode we''re in */
unsigned int shadow_levels;
};
@@ -342,6 +343,10 @@
safe_write_pte(p, new);
}
+/* Called from the guest to indicate that the a process is being
+ * torn down and its pagetables will soon be discarded */
+void pagetable_dying(struct domain *d, paddr_t gpa);
+
/* Print paging-assistance info to the console */
void paging_dump_domain_info(struct domain *d);
void paging_dump_vcpu_info(struct vcpu *v);
diff -r 91bb4e243355 xen/include/public/hvm/hvm_op.h
--- a/xen/include/public/hvm/hvm_op.h Fri Jun 11 14:43:59 2010 +0100
+++ b/xen/include/public/hvm/hvm_op.h Mon Jun 21 16:06:25 2010 +0100
@@ -127,6 +127,16 @@
typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t;
DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t);
+/* Hint from PV drivers for pagetable destruction. */
+#define HVMOP_pagetable_dying 9
+struct xen_hvm_pagetable_dying {
+ /* Domain with a pagetable about to be destroyed. */
+ domid_t domid;
+ /* guest physical address of the toplevel pagetable dying */
+ uint64_aligned_t gpa;
+};
+typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_pagetable_dying_t);
#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
On 21/06/2010 17:15, "Stefano Stabellini" <Stefano.Stabellini@eu.citrix.com> wrote:> Hi all, > this patch implements HVMOP_pagetable_dying: an hypercall for > guests to notify Xen that a pagetable is about to be destroyed so that > Xen can use it as a hint to unshadow the pagetable soon and unhook the > top-level user-mode shadow entries right away.This patch doesn''t apply to xen-unstable tip. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Stefano Stabellini
2010-Jun-21 17:50 UTC
Re: [Xen-devel] [PATCH] implement HVMOP_pagetable_dying
On Mon, 21 Jun 2010, Keir Fraser wrote:> On 21/06/2010 17:15, "Stefano Stabellini" <Stefano.Stabellini@eu.citrix.com> > wrote: > > > Hi all, > > this patch implements HVMOP_pagetable_dying: an hypercall for > > guests to notify Xen that a pagetable is about to be destroyed so that > > Xen can use it as a hint to unshadow the pagetable soon and unhook the > > top-level user-mode shadow entries right away. > > This patch doesn''t apply to xen-unstable tip. >here we go: diff -r 31708477f0a9 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Mon Jun 21 09:59:10 2010 +0100 +++ b/xen/arch/x86/hvm/hvm.c Mon Jun 21 18:49:28 2010 +0100 @@ -3153,6 +3153,30 @@ break; } + case HVMOP_pagetable_dying: + { + struct xen_hvm_pagetable_dying a; + struct domain *d; + + if ( copy_from_guest(&a, arg, 1) ) + return -EFAULT; + + rc = rcu_lock_target_domain_by_id(a.domid, &d); + if ( rc != 0 ) + return rc; + + rc = -EINVAL; + if ( !is_hvm_domain(d) || !paging_mode_shadow(d) ) + goto param_fail5; + + rc = 0; + pagetable_dying(d, a.gpa); + + param_fail5: + rcu_unlock_domain(d); + break; + } + default: { gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op); diff -r 31708477f0a9 xen/arch/x86/mm/paging.c --- a/xen/arch/x86/mm/paging.c Mon Jun 21 09:59:10 2010 +0100 +++ b/xen/arch/x86/mm/paging.c Mon Jun 21 18:49:28 2010 +0100 @@ -766,6 +766,18 @@ return shadow_enable(d, mode | PG_SH_enable); } +/* Called from the guest to indicate that a process is being torn down + * and therefore its pagetables will soon be discarded */ +void pagetable_dying(struct domain *d, paddr_t gpa) +{ + struct vcpu *v; + + ASSERT(paging_mode_shadow(d)); + + v = d->vcpu[0]; + v->arch.paging.mode->shadow.pagetable_dying(v, gpa); +} + /* Print paging-assistance info to the console */ void paging_dump_domain_info(struct domain *d) { diff -r 31708477f0a9 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Mon Jun 21 09:59:10 2010 +0100 +++ b/xen/arch/x86/mm/shadow/common.c Mon Jun 21 18:49:28 2010 +0100 @@ -60,6 +60,7 @@ d->arch.paging.shadow.oos_active = 0; d->arch.paging.shadow.oos_off = (domcr_flags & DOMCRF_oos_off) ? 1 : 0; #endif + d->arch.paging.shadow.pagetable_dying_op = 0; } /* Setup the shadow-specfic parts of a vcpu struct. Note: The most important @@ -1314,22 +1315,23 @@ } /* Dispatcher function: call the per-mode function that will unhook the - * non-Xen mappings in this top-level shadow mfn */ -static void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn) + * non-Xen mappings in this top-level shadow mfn. With user_only == 1, + * unhooks only the user-mode mappings. */ +void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn, int user_only) { struct page_info *sp = mfn_to_page(smfn); switch ( sp->u.sh.type ) { case SH_type_l2_32_shadow: - SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, 2)(v,smfn); + SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, 2)(v, smfn, user_only); break; case SH_type_l2_pae_shadow: case SH_type_l2h_pae_shadow: - SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings, 3)(v,smfn); + SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings, 3)(v, smfn, user_only); break; #if CONFIG_PAGING_LEVELS >= 4 case SH_type_l4_64_shadow: - SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings, 4)(v,smfn); + SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings, 4)(v, smfn, user_only); break; #endif default: @@ -1399,7 +1401,7 @@ { TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_PREALLOC_UNHOOK); shadow_unhook_mappings(v, - pagetable_get_mfn(v2->arch.shadow_table[i])); + pagetable_get_mfn(v2->arch.shadow_table[i]), 0); /* See if that freed up enough space */ if ( space_is_available(d, order, count) ) @@ -1454,7 +1456,7 @@ for ( i = 0 ; i < 4 ; i++ ) if ( !pagetable_is_null(v->arch.shadow_table[i]) ) shadow_unhook_mappings(v, - pagetable_get_mfn(v->arch.shadow_table[i])); + pagetable_get_mfn(v->arch.shadow_table[i]), 0); /* Make sure everyone sees the unshadowings */ flush_tlb_mask(&d->domain_dirty_cpumask); diff -r 31708477f0a9 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Mon Jun 21 09:59:10 2010 +0100 +++ b/xen/arch/x86/mm/shadow/multi.c Mon Jun 21 18:49:28 2010 +0100 @@ -2179,37 +2179,43 @@ * These are called from common code when we are running out of shadow * memory, and unpinning all the top-level shadows hasn''t worked. * + * With user_only == 1, we leave guest kernel-mode mappings in place too, + * unhooking only the user-mode mappings + * * This implementation is pretty crude and slow, but we hope that it won''t * be called very often. */ #if GUEST_PAGING_LEVELS == 2 -void sh_unhook_32b_mappings(struct vcpu *v, mfn_t sl2mfn) +void sh_unhook_32b_mappings(struct vcpu *v, mfn_t sl2mfn, int user_only) { shadow_l2e_t *sl2e; SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, v->domain, { - (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn); + if ( !user_only || (sl2e->l2 & _PAGE_USER) ) + (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn); }); } #elif GUEST_PAGING_LEVELS == 3 -void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl2mfn) +void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl2mfn, int user_only) /* Walk a PAE l2 shadow, unhooking entries from all the subshadows */ { shadow_l2e_t *sl2e; SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, v->domain, { - (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn); + if ( !user_only || (sl2e->l2 & _PAGE_USER) ) + (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn); }); } #elif GUEST_PAGING_LEVELS == 4 -void sh_unhook_64b_mappings(struct vcpu *v, mfn_t sl4mfn) +void sh_unhook_64b_mappings(struct vcpu *v, mfn_t sl4mfn, int user_only) { shadow_l4e_t *sl4e; SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, v->domain, { - (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn); + if ( !user_only || (sl4e->l4 & _PAGE_USER) ) + (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn); }); } @@ -2693,8 +2699,18 @@ static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn) { #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW - if ( v->arch.paging.shadow.last_emulated_mfn_for_unshadow == mfn_x(gmfn) - && sh_mfn_is_a_page_table(gmfn) ) + /* If the domain has never made a "dying" op, use the two-writes + * heuristic; otherwise, unshadow as soon as we write a zero for a dying + * process. + * + * Don''t bother trying to unshadow if it''s not a PT, or if it''s > l1. + */ + if ( ( v->arch.paging.shadow.pagetable_dying + || ( !v->domain->arch.paging.shadow.pagetable_dying_op + && v->arch.paging.shadow.last_emulated_mfn_for_unshadow == mfn_x(gmfn) ) ) + && sh_mfn_is_a_page_table(gmfn) + && !(mfn_to_page(gmfn)->shadow_flags + & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64)) ) { perfc_incr(shadow_early_unshadow); sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ ); @@ -3384,6 +3400,40 @@ * caught by user-mode page-table check above. */ emulate_readonly: + + /* Unshadow if we are writing to a toplevel pagetable that is + * flagged as a dying process, and that is not currently used. */ + if ( sh_mfn_is_a_page_table(gmfn) + && (mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying) ) + { + int used = 0; + struct vcpu *tmp; + for_each_vcpu(d, tmp) + { +#if GUEST_PAGING_LEVELS == 3 + int i; + for ( i = 0; i < 4; i++ ) + { + mfn_t smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[i])); + if ( mfn_valid(smfn) && (mfn_x(smfn) != 0) ) + { + used |= (mfn_to_page(smfn)->v.sh.back == mfn_x(gmfn)); + + if ( used ) + break; + } + } +#else /* 32 or 64 */ + used = (mfn_x(pagetable_get_mfn(tmp->arch.guest_table)) == mfn_x(gmfn)); +#endif + if ( used ) + break; + } + + if ( !used ) + sh_remove_shadows(v, gmfn, 1 /* fast */, 0 /* can fail */); + } + /* * We don''t need to hold the lock for the whole emulation; we will * take it again when we write to the pagetables. @@ -4033,6 +4083,11 @@ smfn = sh_make_shadow(v, gmfn, root_type); } ASSERT(mfn_valid(smfn)); + + /* Remember if we''ve been told that this process is being torn down */ + v->arch.paging.shadow.pagetable_dying + = !!(mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying); + /* Pin the shadow and put it (back) on the list of pinned shadows */ if ( sh_pin(v, smfn) == 0 ) @@ -4603,6 +4658,110 @@ #endif /* 64bit guest */ /**************************************************************************/ +/* Function for the guest to inform us that a process is being torn + * down. We remember that as a hint to unshadow its pagetables soon, + * and in the meantime we unhook its top-level user-mode entries. */ + +#if GUEST_PAGING_LEVELS == 3 +static void sh_pagetable_dying(struct vcpu *v, paddr_t gpa) +{ + int i = 0; + int flush = 0; + int fast_path = 0; + paddr_t gcr3 = 0; + mfn_t smfn, gmfn; + p2m_type_t p2mt; + unsigned long gl3pa; + guest_l3e_t *gl3e = NULL; + paddr_t gl2a = 0; + + shadow_lock(v->domain); + + gcr3 = (v->arch.hvm_vcpu.guest_cr[3]); + /* fast path: the pagetable belongs to the current context */ + if ( gcr3 == gpa ) + fast_path = 1; + + gmfn = gfn_to_mfn_query(v->domain, _gfn(gpa >> PAGE_SHIFT), &p2mt); + if ( !mfn_valid(gmfn) || !p2m_is_ram(p2mt) ) + { + printk(XENLOG_DEBUG "sh_pagetable_dying: gpa not valid %lx\n", gpa); + goto out; + } + if ( !fast_path ) + { + gl3pa = (unsigned long) sh_map_domain_page(gmfn); + gl3e = (guest_l3e_t *) (gl3pa + (gpa & ~PAGE_MASK)); + } + for ( i = 0; i < 4; i++ ) + { + if ( fast_path ) + smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[i])); + else + { + /* retrieving the l2s */ + gl2a = guest_l3e_get_paddr(gl3e[i]); + gmfn = gfn_to_mfn_query(v->domain, _gfn(gl2a >> PAGE_SHIFT), &p2mt); + smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l2_pae_shadow); + } + + if ( mfn_valid(smfn) ) + { + gmfn = _mfn(mfn_to_page(smfn)->v.sh.back); + mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying; + shadow_unhook_mappings(v, smfn, 1/* user pages only */); + flush = 1; + } + } + if ( flush ) + flush_tlb_mask(&v->domain->domain_dirty_cpumask); + + /* Remember that we''ve seen the guest use this interface, so we + * can rely on it using it in future, instead of guessing at + * when processes are being torn down. */ + v->domain->arch.paging.shadow.pagetable_dying_op = 1; + + v->arch.paging.shadow.pagetable_dying = 1; + +out: + if ( !fast_path ) + unmap_domain_page(gl3pa); + shadow_unlock(v->domain); +} +#else +static void sh_pagetable_dying(struct vcpu *v, paddr_t gpa) +{ + mfn_t smfn, gmfn; + p2m_type_t p2mt; + + shadow_lock(v->domain); + + gmfn = gfn_to_mfn_query(v->domain, _gfn(gpa >> PAGE_SHIFT), &p2mt); +#if GUEST_PAGING_LEVELS == 2 + smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l2_32_shadow); +#else + smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l4_64_shadow); +#endif + if ( mfn_valid(smfn) ) + { + mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying; + shadow_unhook_mappings(v, smfn, 1/* user pages only */); + /* Now flush the TLB: we removed toplevel mappings. */ + flush_tlb_mask(&v->domain->domain_dirty_cpumask); + } + + /* Remember that we''ve seen the guest use this interface, so we + * can rely on it using it in future, instead of guessing at + * when processes are being torn down. */ + v->domain->arch.paging.shadow.pagetable_dying_op = 1; + + v->arch.paging.shadow.pagetable_dying = 1; + + shadow_unlock(v->domain); +} +#endif + +/**************************************************************************/ /* Handling HVM guest writes to pagetables */ /* Translate a VA to an MFN, injecting a page-fault if we fail */ @@ -5247,6 +5406,7 @@ #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC .shadow.guess_wrmap = sh_guess_wrmap, #endif + .shadow.pagetable_dying = sh_pagetable_dying, .shadow.shadow_levels = SHADOW_PAGING_LEVELS, }; diff -r 31708477f0a9 xen/arch/x86/mm/shadow/multi.h --- a/xen/arch/x86/mm/shadow/multi.h Mon Jun 21 09:59:10 2010 +0100 +++ b/xen/arch/x86/mm/shadow/multi.h Mon Jun 21 18:49:28 2010 +0100 @@ -52,13 +52,13 @@ extern void SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, GUEST_LEVELS) - (struct vcpu *v, mfn_t sl2mfn); + (struct vcpu *v, mfn_t sl2mfn, int user_only); extern void SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings, GUEST_LEVELS) - (struct vcpu *v, mfn_t sl3mfn); + (struct vcpu *v, mfn_t sl3mfn, int user_only); extern void SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings, GUEST_LEVELS) - (struct vcpu *v, mfn_t sl4mfn); + (struct vcpu *v, mfn_t sl4mfn, int user_only); extern int SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, GUEST_LEVELS) diff -r 31708477f0a9 xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Mon Jun 21 09:59:10 2010 +0100 +++ b/xen/arch/x86/mm/shadow/private.h Mon Jun 21 18:49:28 2010 +0100 @@ -321,6 +321,8 @@ #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */ +#define SHF_pagetable_dying (1u<<31) + static inline int sh_page_has_multiple_shadows(struct page_info *pg) { u32 shadows; @@ -406,6 +408,10 @@ int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, intpte_t *old, intpte_t new, mfn_t gmfn); +/* Unhook the non-Xen mappings in this top-level shadow mfn. + * With user_only == 1, unhooks only the user-mode mappings. */ +void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn, int user_only); + #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) /* Allow a shadowed page to go out of sync */ int sh_unsync(struct vcpu *v, mfn_t gmfn); diff -r 31708477f0a9 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Mon Jun 21 09:59:10 2010 +0100 +++ b/xen/include/asm-x86/domain.h Mon Jun 21 18:49:28 2010 +0100 @@ -121,6 +121,8 @@ /* OOS */ int oos_active; int oos_off; + + int pagetable_dying_op; }; struct shadow_vcpu { @@ -149,6 +151,8 @@ mfn_t smfn[SHADOW_OOS_FIXUPS]; unsigned long off[SHADOW_OOS_FIXUPS]; } oos_fixup[SHADOW_OOS_PAGES]; + + int pagetable_dying; }; /************************************************/ diff -r 31708477f0a9 xen/include/asm-x86/paging.h --- a/xen/include/asm-x86/paging.h Mon Jun 21 09:59:10 2010 +0100 +++ b/xen/include/asm-x86/paging.h Mon Jun 21 18:49:28 2010 +0100 @@ -95,6 +95,7 @@ void (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn); int (*guess_wrmap )(struct vcpu *v, unsigned long vaddr, mfn_t gmfn); + void (*pagetable_dying )(struct vcpu *v, paddr_t gpa); /* For outsiders to tell what mode we''re in */ unsigned int shadow_levels; }; @@ -342,6 +343,10 @@ safe_write_pte(p, new); } +/* Called from the guest to indicate that the a process is being + * torn down and its pagetables will soon be discarded */ +void pagetable_dying(struct domain *d, paddr_t gpa); + /* Print paging-assistance info to the console */ void paging_dump_domain_info(struct domain *d); void paging_dump_vcpu_info(struct vcpu *v); diff -r 31708477f0a9 xen/include/public/hvm/hvm_op.h --- a/xen/include/public/hvm/hvm_op.h Mon Jun 21 09:59:10 2010 +0100 +++ b/xen/include/public/hvm/hvm_op.h Mon Jun 21 18:49:28 2010 +0100 @@ -127,6 +127,16 @@ typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t); +/* Hint from PV drivers for pagetable destruction. */ +#define HVMOP_pagetable_dying 9 +struct xen_hvm_pagetable_dying { + /* Domain with a pagetable about to be destroyed. */ + domid_t domid; + /* guest physical address of the toplevel pagetable dying */ + uint64_aligned_t gpa; +}; +typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_pagetable_dying_t); #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel