Document the sharing libxc interface, including failure conditions, meaning of stats, and internal semantics/behavior. Also remove a stale debug call. Patch #2 depends on patch #1. Patch #1 touches hypervisor x86/mm bits. Both patches touch the tools. Patch #2 is entirely documentation comments. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> tools/libxc/xc_memshr.c | 14 -- tools/libxc/xenctrl.h | 3 - xen/arch/x86/mm/mem_sharing.c | 11 +- tools/libxc/xenctrl.h | 201 +++++++++++++++++++++++++++++++++++++---- 4 files changed, 181 insertions(+), 48 deletions(-)
Andres Lagar-Cavilla
2012-Apr-02 19:11 UTC
[PATCH 1 of 2] x86/mem_sharing: Clean up debugging calls
tools/libxc/xc_memshr.c | 14 --------------
tools/libxc/xenctrl.h | 3 ---
xen/arch/x86/mm/mem_sharing.c | 11 ++---------
3 files changed, 2 insertions(+), 26 deletions(-)
- Remove debug_mfn from the user-space interface
- Clean up errno codes
Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>
diff -r 9f585ddcbe0c -r f110cf1372a8 tools/libxc/xc_memshr.c
--- a/tools/libxc/xc_memshr.c
+++ b/tools/libxc/xc_memshr.c
@@ -208,20 +208,6 @@ int xc_memshr_debug_gfn(xc_interface *xc
return xc_memshr_memop(xch, domid, &mso);
}
-int xc_memshr_debug_mfn(xc_interface *xch,
- domid_t domid,
- unsigned long mfn)
-{
- xen_mem_sharing_op_t mso;
-
- memset(&mso, 0, sizeof(mso));
-
- mso.op = XENMEM_sharing_op_debug_mfn;
- mso.u.debug.u.mfn = mfn;
-
- return xc_memshr_memop(xch, domid, &mso);
-}
-
int xc_memshr_debug_gref(xc_interface *xch,
domid_t domid,
grant_ref_t gref)
diff -r 9f585ddcbe0c -r f110cf1372a8 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -1960,9 +1960,6 @@ int xc_memshr_domain_resume(xc_interface
int xc_memshr_debug_gfn(xc_interface *xch,
domid_t domid,
unsigned long gfn);
-int xc_memshr_debug_mfn(xc_interface *xch,
- domid_t domid,
- unsigned long mfn);
int xc_memshr_debug_gref(xc_interface *xch,
domid_t domid,
grant_ref_t gref);
diff -r 9f585ddcbe0c -r f110cf1372a8 xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -512,7 +512,7 @@ int mem_sharing_debug_mfn(mfn_t mfn)
if ( (page = __grab_shared_page(mfn)) == NULL)
{
gdprintk(XENLOG_ERR, "Invalid MFN=%lx\n", mfn_x(mfn));
- return -1;
+ return -EINVAL;
}
MEM_SHARING_DEBUG(
@@ -599,7 +599,7 @@ int mem_sharing_debug_gref(struct domain
MEM_SHARING_DEBUG(
"Asked to debug [dom=%d,gref=%d], but not yet
inited.\n",
d->domain_id, ref);
- return -1;
+ return -EINVAL;
}
(void)mem_sharing_gref_to_gfn(d, ref, &gfn);
shah = shared_entry_header(d->grant_table, ref);
@@ -1216,13 +1216,6 @@ int mem_sharing_memop(struct domain *d,
}
break;
- case XENMEM_sharing_op_debug_mfn:
- {
- unsigned long mfn = mec->u.debug.u.mfn;
- rc = mem_sharing_debug_mfn(_mfn(mfn));
- }
- break;
-
case XENMEM_sharing_op_debug_gref:
{
grant_ref_t gref = mec->u.debug.u.gref;
tools/libxc/xenctrl.h | 201 ++++++++++++++++++++++++++++++++++++++++++++-----
1 files changed, 179 insertions(+), 22 deletions(-)
(also make note about AMD support''s experimental status)
Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla>
diff -r f110cf1372a8 -r 3f6b49097dce tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -1251,23 +1251,6 @@ int xc_mmuext_op(xc_interface *xch, stru
/* System wide memory properties */
long xc_maximum_ram_page(xc_interface *xch);
-/**
- * This function returns the total number of pages freed by using sharing
- * on the system. For example, if two domains contain a single entry in
- * their p2m map that points to the same shared page (and no other pages
- * in the system are shared), then this function should return 1.
- */
-long xc_sharing_freed_pages(xc_interface *xch);
-
-/**
- * This function returns the total number of frames occupied by shared
- * pages on the system. This is independent of the number of domains
- * pointing at these frames. For example, in the above scenario this
- * should return 1. The following should hold:
- * memory usage without sharing = freed_pages + used_frames
- */
-long xc_sharing_used_frames(xc_interface *xch);
-
/* Get current total pages allocated to a domain. */
long xc_get_tot_pages(xc_interface *xch, uint32_t domid);
@@ -1894,7 +1877,7 @@ int xc_tmem_restore(xc_interface *xch, i
int xc_tmem_restore_extra(xc_interface *xch, int dom, int fd);
/**
- * mem_event operations
+ * mem_event operations. Internal use only.
*/
int xc_mem_event_control(xc_interface *xch, domid_t domain_id, unsigned int op,
unsigned int mode, uint32_t *port);
@@ -1902,6 +1885,12 @@ int xc_mem_event_memop(xc_interface *xch
unsigned int op, unsigned int mode,
uint64_t gfn, void *buffer);
+/**
+ * Mem paging operations.
+ * Paging is supported only on the x86 architecture in 64 bit mode, with
+ * Hardware-Assisted Paging (i.e. Intel EPT, AMD NPT). Moreover, AMD NPT
+ * support is considered experimental.
+ */
int xc_mem_paging_enable(xc_interface *xch, domid_t domain_id, uint32_t *port);
int xc_mem_paging_disable(xc_interface *xch, domid_t domain_id);
int xc_mem_paging_nominate(xc_interface *xch, domid_t domain_id,
@@ -1911,30 +1900,133 @@ int xc_mem_paging_prep(xc_interface *xch
int xc_mem_paging_load(xc_interface *xch, domid_t domain_id,
unsigned long gfn, void *buffer);
+/**
+ * Access tracking operations.
+ * Supported only on Intel EPT 64 bit processors.
+ */
int xc_mem_access_enable(xc_interface *xch, domid_t domain_id, uint32_t *port);
int xc_mem_access_disable(xc_interface *xch, domid_t domain_id);
int xc_mem_access_resume(xc_interface *xch, domid_t domain_id,
unsigned long gfn);
-/**
- * memshr operations
+/***
+ * Memory sharing operations.
+ *
+ * Unles otherwise noted, these calls return 0 on succes, -1 and errno on
+ * failure.
+ *
+ * Sharing is supported only on the x86 architecture in 64 bit mode, with
+ * Hardware-Assisted Paging (i.e. Intel EPT, AMD NPT). Moreover, AMD NPT
+ * support is considered experimental.
+
+ * Calls below return ENOSYS if not in the x86_64 architecture.
+ * Calls below return ENODEV if the domain does not support HAP.
+ * Calls below return ESRCH if the specified domain does not exist.
+ * Calls below return EPERM if the caller is unprivileged for this domain.
*/
+
+/* Turn on/off sharing for the domid, depending on the enable flag.
+ *
+ * Returns EXDEV if trying to enable and the domain has had a PCI device
+ * assigned for passthrough (these two features are mutually exclusive).
+ *
+ * When sharing for a domain is turned off, the domain may still reference
+ * shared pages. Unsharing happens lazily. */
int xc_memshr_control(xc_interface *xch,
domid_t domid,
int enable);
+
+/* Create a communication ring in which the hypervisor will place ENOMEM
+ * notifications.
+ *
+ * ENOMEM happens when unsharing pages: a Copy-on-Write duplicate needs to be
+ * allocated, and thus the out-of-memory error occurr.
+ *
+ * For complete examples on how to plumb a notification ring, look into
+ * xenpaging or xen-access.
+ *
+ * On receipt of a notification, the helper should ensure there is memory
+ * available to the domain before retrying.
+ *
+ * If a domain encounters an ENOMEM condition when sharing and this ring
+ * has not been set up, the hypervisor will crash the domain.
+ *
+ * Fails with:
+ * EINVAL if port is NULL
+ * EINVAL if the sharing ring has already been enabled
+ * ENOSYS if no guest gfn has been specified to host the ring via an hvm param
+ * EINVAL if the gfn for the ring has not been populated
+ * ENOENT if the gfn for the ring is paged out, or cannot be unshared
+ * EINVAL if the gfn for the ring cannot be written to
+ * EINVAL if the domain is dying
+ * ENOSPC if an event channel cannot be allocated for the ring
+ * ENOMEM if memory cannot be allocated for internal data structures
+ * EINVAL or EACCESS if the request is denied by the security policy
+ */
+
int xc_memshr_ring_enable(xc_interface *xch,
domid_t domid,
uint32_t *port);
+/* Disable the ring for ENOMEM communication.
+ * May fail with EINVAL if the ring was not enabled in the first place.
+ */
int xc_memshr_ring_disable(xc_interface *xch,
domid_t domid);
+
+/*
+ * Calls below return EINVAL if sharing has not been enabled for the domain
+ * Calls below return EINVAL if the domain is dying
+ */
+/* Once a reponse to an ENOMEM notification is prepared, the tool can
+ * notify the hypervisor to re-schedule the faulting vcpu of the domain with an
+ * event channel kick and/or this call. */
+int xc_memshr_domain_resume(xc_interface *xch,
+ domid_t domid);
+
+/* Select a page for sharing.
+ *
+ * A 64 bit opaque handle will be stored in handle. The hypervisor ensures
+ * that if the page is modified, the handle will be invalidated, and future
+ * users of it will fail. If the page has already been selected and is still
+ * associated to a valid handle, the existing handle will be returned.
+ *
+ * May fail with:
+ * EINVAL if the gfn is not populated or not sharable (mmio, etc)
+ * ENOMEM if internal data structures cannot be allocated
+ * E2BIG if the page is being referenced by other subsytems (e.g. qemu)
+ * ENOENT or EEXIST if there are internal hypervisor errors.
+ */
int xc_memshr_nominate_gfn(xc_interface *xch,
domid_t domid,
unsigned long gfn,
uint64_t *handle);
+/* Same as above, but instead of a guest frame number, the input is a grant
+ * reference provided by the guest.
+ *
+ * May fail with EINVAL if the grant reference is invalid.
+ */
int xc_memshr_nominate_gref(xc_interface *xch,
domid_t domid,
grant_ref_t gref,
uint64_t *handle);
+
+/* The three calls below may fail with
+ * 10 (or -XENMEM_SHARING_OP_S_HANDLE_INVALID) if the handle passed as source
+ * is invalid.
+ * 9 (or -XENMEM_SHARING_OP_C_HANDLE_INVALID) if the handle passed as client is
+ * invalid.
+ */
+/* Share two nominated guest pages.
+ *
+ * If the call succeeds, both pages will point to the same backing frame (or
+ * mfn). The hypervisor will verify the handles are still valid, but it will
+ * not perform any sanity checking on the contens of the pages (the selection
+ * mechanism for sharing candidates is entirely up to the user-space tool).
+ *
+ * After successful sharing, the client handle becomes invalid. Both
<domain,
+ * gfn> tuples point to the same mfn with the same handle, the one specified
as
+ * source. Either 3-tuple can be specified later for further re-sharing.
+ */
int xc_memshr_share_gfns(xc_interface *xch,
domid_t source_domain,
unsigned long source_gfn,
@@ -1942,6 +2034,11 @@ int xc_memshr_share_gfns(xc_interface *x
domid_t client_domain,
unsigned long client_gfn,
uint64_t client_handle);
+
+/* Same as above, but share two grant references instead.
+ *
+ * May fail with EINVAL if either grant reference is invalid.
+ */
int xc_memshr_share_grefs(xc_interface *xch,
domid_t source_domain,
grant_ref_t source_gref,
@@ -1949,22 +2046,82 @@ int xc_memshr_share_grefs(xc_interface *
domid_t client_domain,
grant_ref_t client_gref,
uint64_t client_handle);
+
+/* Allows to add to the guest physmap of the client domain a shared frame
+ * directly.
+ *
+ * May additionally fail with
+ * 9 (-XENMEM_SHARING_OP_C_HANDLE_INVALID) if the physmap entry for the gfn is
+ * not suitable.
+ * ENOMEM if internal data structures cannot be allocated.
+ * ENOENT if there is an internal hypervisor error.
+ */
int xc_memshr_add_to_physmap(xc_interface *xch,
domid_t source_domain,
unsigned long source_gfn,
uint64_t source_handle,
domid_t client_domain,
unsigned long client_gfn);
-int xc_memshr_domain_resume(xc_interface *xch,
- domid_t domid);
+
+/* Debug calls: return the number of pages referencing the shared frame backing
+ * the input argument. Should be one or greater.
+ *
+ * May fail with EINVAL if there is no backing shared frame for the input
+ * argument.
+ */
int xc_memshr_debug_gfn(xc_interface *xch,
domid_t domid,
unsigned long gfn);
+/* May additionally fail with EINVAL if the grant reference is invalid. */
int xc_memshr_debug_gref(xc_interface *xch,
domid_t domid,
grant_ref_t gref);
+
+/* Audits the share subsystem.
+ *
+ * Returns ENOSYS if not supported (may not be compiled into the hypervisor).
+ *
+ * Returns the number of errors found during auditing otherwise. May be (should
+ * be!) zero.
+ *
+ * If debugtrace support has been compiled into the hypervisor and is enabled,
+ * verbose descriptions for the errors are available in the hypervisor console.
+ */
int xc_memshr_audit(xc_interface *xch);
+/* Stats reporting.
+ *
+ * At any point in time, the following equality should hold for a host:
+ *
+ * Let dominfo(d) be the xc_dominfo_t struct filled by a call to
+ * xc_domain_getinfo(d)
+ *
+ * The summation of dominfo(d)->shr_pages for all domains in the system
+ * should be equal to
+ * xc_sharing_freed_pages + xc_sharing_used_frames
+ */
+/*
+ * This function returns the total number of pages freed by using sharing
+ * on the system. For example, if two domains contain a single entry in
+ * their p2m table that points to the same shared page (and no other pages
+ * in the system are shared), then this function should return 1.
+ */
+long xc_sharing_freed_pages(xc_interface *xch);
+
+/*
+ * This function returns the total number of frames occupied by shared
+ * pages on the system. This is independent of the number of domains
+ * pointing at these frames. For example, in the above scenario this
+ * should return 1. (And dominfo(d) for each of the two domains should return 1
+ * as well).
+ *
+ * Note that some of these sharing_used_frames may be referenced by
+ * a single domain page, and thus not realize any savings. The same
+ * applies to some of the pages counted in dominfo(d)->shr_pages.
+ */
+long xc_sharing_used_frames(xc_interface *xch);
+/*** End sharing interface ***/
+
int xc_flask_load(xc_interface *xc_handle, char *buf, uint32_t size);
int xc_flask_context_to_sid(xc_interface *xc_handle, char *buf, uint32_t size,
uint32_t *sid);
int xc_flask_sid_to_context(xc_interface *xc_handle, int sid, char *buf,
uint32_t size);
At 15:11 -0400 on 02 Apr (1333379462), Andres Lagar-Cavilla wrote:> Document the sharing libxc interface, including failure conditions, meaning of > stats, and internal semantics/behavior. > > Also remove a stale debug call. > > Patch #2 depends on patch #1. Patch #1 touches hypervisor x86/mm bits. Both > patches touch the tools. Patch #2 is entirely documentation comments. > > Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>Acked-by: Tim Deegan <tim@xen.org> Cheers, Tim.
> At 15:11 -0400 on 02 Apr (1333379462), Andres Lagar-Cavilla wrote: >> Document the sharing libxc interface, including failure conditions, >> meaning of >> stats, and internal semantics/behavior. >> >> Also remove a stale debug call. >> >> Patch #2 depends on patch #1. Patch #1 touches hypervisor x86/mm bits. >> Both >> patches touch the tools. Patch #2 is entirely documentation comments. >> >> Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> > > Acked-by: Tim Deegan <tim@xen.org>Ping? Ian & Ian? Thanks, Andres> > Cheers, > > Tim. >
On Apr 12, 2012, at 10:18 AM, Andres Lagar-Cavilla wrote:>> At 15:11 -0400 on 02 Apr (1333379462), Andres Lagar-Cavilla wrote: >>> Document the sharing libxc interface, including failure conditions, >>> meaning of >>> stats, and internal semantics/behavior. >>> >>> Also remove a stale debug call. >>> >>> Patch #2 depends on patch #1. Patch #1 touches hypervisor x86/mm bits. >>> Both >>> patches touch the tools. Patch #2 is entirely documentation comments. >>> >>> Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> >> >> Acked-by: Tim Deegan <tim@xen.org> > > Ping? Ian & Ian? > Thanks, > AndresNew weekly ping :) Thanks, Andres> >> >> Cheers, >> >> Tim. >> > >
Tim Deegan writes ("Re: [Xen-devel] [PATCH 0 of 2] Sharing
Documentation"):> At 15:11 -0400 on 02 Apr (1333379462), Andres Lagar-Cavilla wrote:
> > Document the sharing libxc interface, including failure conditions,
meaning of
> > stats, and internal semantics/behavior.
> >
> > Also remove a stale debug call.
> >
> > Patch #2 depends on patch #1. Patch #1 touches hypervisor x86/mm bits.
Both
> > patches touch the tools. Patch #2 is entirely documentation comments.
> >
> > Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>
>
> Acked-by: Tim Deegan <tim@xen.org>
Thanks, I have committed both.
Ian.