Andres Lagar-Cavilla
2012-Jan-11 18:42 UTC
[PATCH] RFC: Use memops for mem paging, sharing, and access, instead of domctls
tools/libxc/xc_mem_access.c | 12 +- tools/libxc/xc_mem_event.c | 23 +++- tools/libxc/xc_mem_paging.c | 44 ++++---- tools/libxc/xc_memshr.c | 192 ++++++++++++++++++------------------- tools/libxc/xc_private.c | 10 - tools/libxc/xenctrl.h | 6 +- tools/memshr/interface.c | 4 +- xen/arch/x86/domctl.c | 1 - xen/arch/x86/mm/mem_access.c | 7 +- xen/arch/x86/mm/mem_event.c | 68 +++++++++++-- xen/arch/x86/mm/mem_paging.c | 13 +- xen/arch/x86/mm/mem_sharing.c | 101 +++++++++++-------- xen/arch/x86/x86_64/compat/mm.c | 23 ++++ xen/arch/x86/x86_64/mm.c | 23 ++++ xen/include/asm-x86/mem_access.h | 3 +- xen/include/asm-x86/mem_event.h | 2 + xen/include/asm-x86/mem_paging.h | 3 +- xen/include/asm-x86/mem_sharing.h | 3 + xen/include/public/domctl.h | 91 +++++------------ xen/include/public/memory.h | 74 ++++++++++++++ 20 files changed, 426 insertions(+), 277 deletions(-) Per page operations in the paging, sharing, and access tracking subsystems are all implemented with domctls (e.g. a domctl to evict one page, or to share one page). Under heavy load, the domctl path reveals a lack of scalability. The domctl lock serializes dom0''s vcpus in the hypervisor. When performing thousands of per-page operations on dozens of domains, these vcpus will spin in the hypervisor. Beyond the aggressive locking, an added inefficiency of blocking vcpus in the domctl lock is that dom0 is prevented from re-scheduling. In this proposal we retain the domctl interface for setting up and tearing down paging/sharing/mem access for a domain. But we migrate all the per page operations to use the memory_op hypercalls (e.g XENMEM_*). While we naturally welcome comments on the correctness of the approach, we are also concerned about the viability of this API change. With 4.2 coming, this is the right time to get an interface right, for the long run. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla> diff -r 3b1c596bc1b4 -r ed4d429d7026 tools/libxc/xc_mem_access.c --- a/tools/libxc/xc_mem_access.c +++ b/tools/libxc/xc_mem_access.c @@ -30,7 +30,7 @@ int xc_mem_access_enable(xc_interface *x return xc_mem_event_control(xch, domain_id, XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE, XEN_DOMCTL_MEM_EVENT_OP_ACCESS, - shared_page, ring_page, INVALID_MFN); + shared_page, ring_page); } int xc_mem_access_disable(xc_interface *xch, domid_t domain_id) @@ -38,15 +38,15 @@ int xc_mem_access_disable(xc_interface * return xc_mem_event_control(xch, domain_id, XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE, XEN_DOMCTL_MEM_EVENT_OP_ACCESS, - NULL, NULL, INVALID_MFN); + NULL, NULL); } int xc_mem_access_resume(xc_interface *xch, domid_t domain_id, unsigned long gfn) { - return xc_mem_event_control(xch, domain_id, - XEN_DOMCTL_MEM_EVENT_OP_ACCESS_RESUME, - XEN_DOMCTL_MEM_EVENT_OP_ACCESS, - NULL, NULL, gfn); + return xc_mem_event_memop(xch, domain_id, + XENMEM_access_op_resume, + XENMEM_access_op, + gfn, NULL); } /* diff -r 3b1c596bc1b4 -r ed4d429d7026 tools/libxc/xc_mem_event.c --- a/tools/libxc/xc_mem_event.c +++ b/tools/libxc/xc_mem_event.c @@ -24,8 +24,7 @@ #include "xc_private.h" int xc_mem_event_control(xc_interface *xch, domid_t domain_id, unsigned int op, - unsigned int mode, void *page, - void *ring_page, unsigned long gfn) + unsigned int mode, void *page, void *ring_page) { DECLARE_DOMCTL; @@ -34,11 +33,25 @@ int xc_mem_event_control(xc_interface *x domctl.u.mem_event_op.op = op; domctl.u.mem_event_op.mode = mode; - domctl.u.mem_event_op.u.shared_addr = (unsigned long)page; + domctl.u.mem_event_op.shared_addr = (unsigned long)page; domctl.u.mem_event_op.ring_addr = (unsigned long)ring_page; - - domctl.u.mem_event_op.gfn = gfn; return do_domctl(xch, &domctl); } +int xc_mem_event_memop(xc_interface *xch, domid_t domain_id, + unsigned int op, unsigned int mode, + uint64_t gfn, void *buffer) +{ + xen_mem_event_op_t meo; + + memset(&meo, 0, sizeof(meo)); + + meo.op = op; + meo.domain = domain_id; + meo.gfn = gfn; + meo.buffer = (unsigned long) buffer; + + return do_memory_op(xch, mode, &meo, sizeof(meo)); +} + diff -r 3b1c596bc1b4 -r ed4d429d7026 tools/libxc/xc_mem_paging.c --- a/tools/libxc/xc_mem_paging.c +++ b/tools/libxc/xc_mem_paging.c @@ -30,7 +30,7 @@ int xc_mem_paging_enable(xc_interface *x return xc_mem_event_control(xch, domain_id, XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE, XEN_DOMCTL_MEM_EVENT_OP_PAGING, - shared_page, ring_page, INVALID_MFN); + shared_page, ring_page); } int xc_mem_paging_disable(xc_interface *xch, domid_t domain_id) @@ -38,31 +38,31 @@ int xc_mem_paging_disable(xc_interface * return xc_mem_event_control(xch, domain_id, XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE, XEN_DOMCTL_MEM_EVENT_OP_PAGING, - NULL, NULL, INVALID_MFN); + NULL, NULL); } int xc_mem_paging_nominate(xc_interface *xch, domid_t domain_id, unsigned long gfn) { - return xc_mem_event_control(xch, domain_id, - XEN_DOMCTL_MEM_EVENT_OP_PAGING_NOMINATE, - XEN_DOMCTL_MEM_EVENT_OP_PAGING, - NULL, NULL, gfn); + return xc_mem_event_memop(xch, domain_id, + XENMEM_paging_op_nominate, + XENMEM_paging_op, + gfn, NULL); } int xc_mem_paging_evict(xc_interface *xch, domid_t domain_id, unsigned long gfn) { - return xc_mem_event_control(xch, domain_id, - XEN_DOMCTL_MEM_EVENT_OP_PAGING_EVICT, - XEN_DOMCTL_MEM_EVENT_OP_PAGING, - NULL, NULL, gfn); + return xc_mem_event_memop(xch, domain_id, + XENMEM_paging_op_evict, + XENMEM_paging_op, + gfn, NULL); } int xc_mem_paging_prep(xc_interface *xch, domid_t domain_id, unsigned long gfn) { - return xc_mem_event_control(xch, domain_id, - XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP, - XEN_DOMCTL_MEM_EVENT_OP_PAGING, - NULL, NULL, gfn); + return xc_mem_event_memop(xch, domain_id, + XENMEM_paging_op_prep, + XENMEM_paging_op, + gfn, NULL); } int xc_mem_paging_load(xc_interface *xch, domid_t domain_id, @@ -79,10 +79,10 @@ int xc_mem_paging_load(xc_interface *xch if ( mlock(buffer, XC_PAGE_SIZE) ) return -errno; - rc = xc_mem_event_control(xch, domain_id, - XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP, - XEN_DOMCTL_MEM_EVENT_OP_PAGING, - buffer, NULL, gfn); + rc = xc_mem_event_memop(xch, domain_id, + XENMEM_paging_op_prep, + XENMEM_paging_op, + gfn, buffer); (void)munlock(buffer, XC_PAGE_SIZE); return rc; @@ -90,10 +90,10 @@ int xc_mem_paging_load(xc_interface *xch int xc_mem_paging_resume(xc_interface *xch, domid_t domain_id, unsigned long gfn) { - return xc_mem_event_control(xch, domain_id, - XEN_DOMCTL_MEM_EVENT_OP_PAGING_RESUME, - XEN_DOMCTL_MEM_EVENT_OP_PAGING, - NULL, NULL, gfn); + return xc_mem_event_memop(xch, domain_id, + XENMEM_paging_op_resume, + XENMEM_paging_op, + gfn, NULL); } diff -r 3b1c596bc1b4 -r ed4d429d7026 tools/libxc/xc_memshr.c --- a/tools/libxc/xc_memshr.c +++ b/tools/libxc/xc_memshr.c @@ -36,32 +36,38 @@ int xc_memshr_control(xc_interface *xch, domctl.interface_version = XEN_DOMCTL_INTERFACE_VERSION; domctl.domain = domid; op = &(domctl.u.mem_sharing_op); - op->op = XEN_DOMCTL_MEM_EVENT_OP_SHARING_CONTROL; + op->op = XEN_DOMCTL_MEM_SHARING_CONTROL; op->u.enable = enable; return do_domctl(xch, &domctl); } +static int xc_memshr_memop(xc_interface *xch, domid_t domid, + xen_mem_sharing_op_t *mso) +{ + mso->domain = domid; + + return do_memory_op(xch, XENMEM_sharing_op, mso, sizeof(*mso)); +} + int xc_memshr_nominate_gfn(xc_interface *xch, domid_t domid, unsigned long gfn, uint64_t *handle) { - DECLARE_DOMCTL; - struct xen_domctl_mem_sharing_op *op; - int ret; + int rc; + xen_mem_sharing_op_t mso; - domctl.cmd = XEN_DOMCTL_mem_sharing_op; - domctl.interface_version = XEN_DOMCTL_INTERFACE_VERSION; - domctl.domain = domid; - op = &(domctl.u.mem_sharing_op); - op->op = XEN_DOMCTL_MEM_EVENT_OP_SHARING_NOMINATE_GFN; - op->u.nominate.u.gfn = gfn; + memset(&mso, 0, sizeof(mso)); - ret = do_domctl(xch, &domctl); - if(!ret) *handle = op->u.nominate.handle; + mso.op = XENMEM_sharing_op_nominate_gfn; + mso.u.nominate.u.gfn = gfn; - return ret; + rc = xc_memshr_memop(xch, domid, &mso); + + if (!rc) *handle = mso.u.nominate.handle; + + return rc; } int xc_memshr_nominate_gref(xc_interface *xch, @@ -69,21 +75,19 @@ int xc_memshr_nominate_gref(xc_interface grant_ref_t gref, uint64_t *handle) { - DECLARE_DOMCTL; - struct xen_domctl_mem_sharing_op *op; - int ret; + int rc; + xen_mem_sharing_op_t mso; - domctl.cmd = XEN_DOMCTL_mem_sharing_op; - domctl.interface_version = XEN_DOMCTL_INTERFACE_VERSION; - domctl.domain = domid; - op = &(domctl.u.mem_sharing_op); - op->op = XEN_DOMCTL_MEM_EVENT_OP_SHARING_NOMINATE_GREF; - op->u.nominate.u.grant_ref = gref; + memset(&mso, 0, sizeof(mso)); - ret = do_domctl(xch, &domctl); - if(!ret) *handle = op->u.nominate.handle; + mso.op = XENMEM_sharing_op_nominate_gref; + mso.u.nominate.u.grant_ref = gref; - return ret; + rc = xc_memshr_memop(xch, domid, &mso); + + if (!rc) *handle = mso.u.nominate.handle; + + return rc; } int xc_memshr_share_gfns(xc_interface *xch, @@ -94,21 +98,19 @@ int xc_memshr_share_gfns(xc_interface *x unsigned long client_gfn, uint64_t client_handle) { - DECLARE_DOMCTL; - struct xen_domctl_mem_sharing_op *op; + xen_mem_sharing_op_t mso; - domctl.cmd = XEN_DOMCTL_mem_sharing_op; - domctl.interface_version = XEN_DOMCTL_INTERFACE_VERSION; - domctl.domain = source_domain; - op = &(domctl.u.mem_sharing_op); - op->op = XEN_DOMCTL_MEM_EVENT_OP_SHARING_SHARE; - op->u.share.source_handle = source_handle; - op->u.share.source_gfn = source_gfn; - op->u.share.client_domain = client_domain; - op->u.share.client_gfn = client_gfn; - op->u.share.client_handle = client_handle; + memset(&mso, 0, sizeof(mso)); - return do_domctl(xch, &domctl); + mso.op = XENMEM_sharing_op_share; + + mso.u.share.source_handle = source_handle; + mso.u.share.source_gfn = source_gfn; + mso.u.share.client_domain = client_domain; + mso.u.share.client_gfn = client_gfn; + mso.u.share.client_handle = client_handle; + + return xc_memshr_memop(xch, source_domain, &mso); } int xc_memshr_share_grefs(xc_interface *xch, @@ -119,21 +121,19 @@ int xc_memshr_share_grefs(xc_interface * grant_ref_t client_gref, uint64_t client_handle) { - DECLARE_DOMCTL; - struct xen_domctl_mem_sharing_op *op; + xen_mem_sharing_op_t mso; - domctl.cmd = XEN_DOMCTL_mem_sharing_op; - domctl.interface_version = XEN_DOMCTL_INTERFACE_VERSION; - domctl.domain = source_domain; - op = &(domctl.u.mem_sharing_op); - op->op = XEN_DOMCTL_MEM_EVENT_OP_SHARING_SHARE; - op->u.share.source_handle = source_handle; - XEN_DOMCTL_MEM_SHARING_FIELD_MAKE_GREF(op->u.share.source_gfn, source_gref); - op->u.share.client_domain = client_domain; - XEN_DOMCTL_MEM_SHARING_FIELD_MAKE_GREF(op->u.share.client_gfn, client_gref); - op->u.share.client_handle = client_handle; + memset(&mso, 0, sizeof(mso)); - return do_domctl(xch, &domctl); + mso.op = XENMEM_sharing_op_share; + + mso.u.share.source_handle = source_handle; + XENMEM_SHARING_OP_FIELD_MAKE_GREF(mso.u.share.source_gfn, source_gref); + mso.u.share.client_domain = client_domain; + XENMEM_SHARING_OP_FIELD_MAKE_GREF(mso.u.share.client_gfn, client_gref); + mso.u.share.client_handle = client_handle; + + return xc_memshr_memop(xch, source_domain, &mso); } int xc_memshr_add_to_physmap(xc_interface *xch, @@ -143,85 +143,81 @@ int xc_memshr_add_to_physmap(xc_interfac domid_t client_domain, unsigned long client_gfn) { - DECLARE_DOMCTL; - struct xen_domctl_mem_sharing_op *op; + xen_mem_sharing_op_t mso; - domctl.cmd = XEN_DOMCTL_mem_sharing_op; - domctl.interface_version = XEN_DOMCTL_INTERFACE_VERSION; - domctl.domain = source_domain; - op = &(domctl.u.mem_sharing_op); - op->op = XEN_DOMCTL_MEM_EVENT_OP_SHARING_ADD_PHYSMAP; - op->u.share.source_gfn = source_gfn; - op->u.share.source_handle = source_handle; - op->u.share.client_gfn = client_gfn; - op->u.share.client_domain = client_domain; + memset(&mso, 0, sizeof(mso)); - return do_domctl(xch, &domctl); + mso.op = XENMEM_sharing_op_add_physmap; + + mso.u.share.source_handle = source_handle; + mso.u.share.source_gfn = source_gfn; + mso.u.share.client_domain = client_domain; + mso.u.share.client_gfn = client_gfn; + + return xc_memshr_memop(xch, source_domain, &mso); } int xc_memshr_domain_resume(xc_interface *xch, domid_t domid) { - DECLARE_DOMCTL; - struct xen_domctl_mem_sharing_op *op; + xen_mem_sharing_op_t mso; - domctl.cmd = XEN_DOMCTL_mem_sharing_op; - domctl.interface_version = XEN_DOMCTL_INTERFACE_VERSION; - domctl.domain = domid; - op = &(domctl.u.mem_sharing_op); - op->op = XEN_DOMCTL_MEM_EVENT_OP_SHARING_RESUME; + memset(&mso, 0, sizeof(mso)); - return do_domctl(xch, &domctl); + mso.op = XENMEM_sharing_op_resume; + + return xc_memshr_memop(xch, domid, &mso); } int xc_memshr_debug_gfn(xc_interface *xch, domid_t domid, unsigned long gfn) { - DECLARE_DOMCTL; - struct xen_domctl_mem_sharing_op *op; + xen_mem_sharing_op_t mso; - domctl.cmd = XEN_DOMCTL_mem_sharing_op; - domctl.interface_version = XEN_DOMCTL_INTERFACE_VERSION; - domctl.domain = domid; - op = &(domctl.u.mem_sharing_op); - op->op = XEN_DOMCTL_MEM_EVENT_OP_SHARING_DEBUG_GFN; - op->u.debug.u.gfn = gfn; + memset(&mso, 0, sizeof(mso)); - return do_domctl(xch, &domctl); + mso.op = XENMEM_sharing_op_debug_gfn; + mso.u.debug.u.gfn = gfn; + + return xc_memshr_memop(xch, domid, &mso); } int xc_memshr_debug_mfn(xc_interface *xch, domid_t domid, unsigned long mfn) { - DECLARE_DOMCTL; - struct xen_domctl_mem_sharing_op *op; + xen_mem_sharing_op_t mso; - domctl.cmd = XEN_DOMCTL_mem_sharing_op; - domctl.interface_version = XEN_DOMCTL_INTERFACE_VERSION; - domctl.domain = domid; - op = &(domctl.u.mem_sharing_op); - op->op = XEN_DOMCTL_MEM_EVENT_OP_SHARING_DEBUG_MFN; - op->u.debug.u.mfn = mfn; + memset(&mso, 0, sizeof(mso)); - return do_domctl(xch, &domctl); + mso.op = XENMEM_sharing_op_debug_mfn; + mso.u.debug.u.mfn = mfn; + + return xc_memshr_memop(xch, domid, &mso); } int xc_memshr_debug_gref(xc_interface *xch, domid_t domid, grant_ref_t gref) { - DECLARE_DOMCTL; - struct xen_domctl_mem_sharing_op *op; + xen_mem_sharing_op_t mso; - domctl.cmd = XEN_DOMCTL_mem_sharing_op; - domctl.interface_version = XEN_DOMCTL_INTERFACE_VERSION; - domctl.domain = domid; - op = &(domctl.u.mem_sharing_op); - op->op = XEN_DOMCTL_MEM_EVENT_OP_SHARING_DEBUG_GREF; - op->u.debug.u.gref = gref; + memset(&mso, 0, sizeof(mso)); - return do_domctl(xch, &domctl); + mso.op = XENMEM_sharing_op_debug_gref; + mso.u.debug.u.gref = gref; + + return xc_memshr_memop(xch, domid, &mso); } +long xc_sharing_freed_pages(xc_interface *xch) +{ + return do_memory_op(xch, XENMEM_get_sharing_freed_pages, NULL, 0); +} + +long xc_sharing_used_frames(xc_interface *xch) +{ + return do_memory_op(xch, XENMEM_get_sharing_shared_pages, NULL, 0); +} + diff -r 3b1c596bc1b4 -r ed4d429d7026 tools/libxc/xc_private.c --- a/tools/libxc/xc_private.c +++ b/tools/libxc/xc_private.c @@ -533,16 +533,6 @@ long xc_maximum_ram_page(xc_interface *x return do_memory_op(xch, XENMEM_maximum_ram_page, NULL, 0); } -long xc_sharing_freed_pages(xc_interface *xch) -{ - return do_memory_op(xch, XENMEM_get_sharing_freed_pages, NULL, 0); -} - -long xc_sharing_used_frames(xc_interface *xch) -{ - return do_memory_op(xch, XENMEM_get_sharing_shared_pages, NULL, 0); -} - long long xc_domain_get_cpu_usage( xc_interface *xch, domid_t domid, int vcpu ) { DECLARE_DOMCTL; diff -r 3b1c596bc1b4 -r ed4d429d7026 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h +++ b/tools/libxc/xenctrl.h @@ -1872,8 +1872,10 @@ int xc_tmem_restore_extra(xc_interface * * mem_event operations */ int xc_mem_event_control(xc_interface *xch, domid_t domain_id, unsigned int op, - unsigned int mode, void *shared_page, - void *ring_page, unsigned long gfn); + unsigned int mode, void *shared_page, void *ring_page); +int xc_mem_event_memop(xc_interface *xch, domid_t domain_id, + unsigned int op, unsigned int mode, + uint64_t gfn, void *buffer); int xc_mem_paging_enable(xc_interface *xch, domid_t domain_id, void *shared_page, void *ring_page); diff -r 3b1c596bc1b4 -r ed4d429d7026 tools/memshr/interface.c --- a/tools/memshr/interface.c +++ b/tools/memshr/interface.c @@ -186,12 +186,12 @@ int memshr_vbd_issue_ro_request(char *bu remove the relevant ones from the map */ switch(ret) { - case XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID: + case XENMEM_SHARING_OP_S_HANDLE_INVALID: ret = blockshr_shrhnd_remove(memshr.blks, source_st, NULL); if(ret) DPRINTF("Could not rm invl s_hnd: %u %"PRId64" %"PRId64"\n", source_st.domain, source_st.frame, source_st.handle); break; - case XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID: + case XENMEM_SHARING_OP_C_HANDLE_INVALID: ret = blockshr_shrhnd_remove(memshr.blks, client_st, NULL); if(ret) DPRINTF("Could not rm invl c_hnd: %u %"PRId64" %"PRId64"\n", client_st.domain, client_st.frame, client_st.handle); diff -r 3b1c596bc1b4 -r ed4d429d7026 xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -1463,7 +1463,6 @@ long arch_do_domctl( if ( !ret ) ret = mem_sharing_domctl(d, &domctl->u.mem_sharing_op); rcu_unlock_domain(d); - copy_to_guest(u_domctl, domctl, 1); } } break; diff -r 3b1c596bc1b4 -r ed4d429d7026 xen/arch/x86/mm/mem_access.c --- a/xen/arch/x86/mm/mem_access.c +++ b/xen/arch/x86/mm/mem_access.c @@ -25,14 +25,13 @@ #include <asm/mem_event.h> -int mem_access_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec, - XEN_GUEST_HANDLE(void) u_domctl) +int mem_access_memop(struct domain *d, xen_mem_event_op_t *meo) { int rc; - switch( mec->op ) + switch( meo->op ) { - case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_RESUME: + case XENMEM_access_op_resume: { p2m_mem_access_resume(d); rc = 0; diff -r 3b1c596bc1b4 -r ed4d429d7026 xen/arch/x86/mm/mem_event.c --- a/xen/arch/x86/mm/mem_event.c +++ b/xen/arch/x86/mm/mem_event.c @@ -28,6 +28,7 @@ #include <asm/mem_event.h> #include <asm/mem_paging.h> #include <asm/mem_access.h> +#include <asm/mem_sharing.h> /* for public/io/ring.h macros */ #define xen_mb() mb() @@ -49,7 +50,7 @@ static int mem_event_enable( struct domain *dom_mem_event = current->domain; struct vcpu *v = current; unsigned long ring_addr = mec->ring_addr; - unsigned long shared_addr = mec->u.shared_addr; + unsigned long shared_addr = mec->shared_addr; l1_pgentry_t l1e; unsigned long shared_gfn = 0, ring_gfn = 0; /* gcc ... */ p2m_type_t p2mt; @@ -457,6 +458,54 @@ static void mem_access_notification(stru p2m_mem_access_resume(v->domain); } +struct domain *get_mem_event_op_target(uint32_t domain, int *rc) +{ + struct domain *d; + + /* Get the target domain */ + *rc = rcu_lock_remote_target_domain_by_id(domain, &d); + if ( *rc != 0 ) + return NULL; + + /* Not dying? */ + if ( d->is_dying ) + { + rcu_unlock_domain(d); + *rc = -EINVAL; + return NULL; + } + + return d; +} + +int do_mem_event_op(int op, uint32_t domain, void *arg) +{ + int ret; + struct domain *d; + + d = get_mem_event_op_target(domain, &ret); + if ( !d ) + return ret; + + switch (op) + { + case XENMEM_paging_op: + ret = mem_paging_memop(d, (xen_mem_event_op_t *) arg); + break; + case XENMEM_access_op: + ret = mem_access_memop(d, (xen_mem_event_op_t *) arg); + break; + case XENMEM_sharing_op: + ret = mem_sharing_memop(d, (xen_mem_sharing_op_t *) arg); + break; + default: + ret = -ENOSYS; + } + + rcu_unlock_domain(d); + return ret; +} + int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec, XEN_GUEST_HANDLE(void) u_domctl) { @@ -530,11 +579,8 @@ int mem_event_domctl(struct domain *d, x break; default: - { - if ( med->ring_page ) - rc = mem_paging_domctl(d, mec, u_domctl); - } - break; + rc = -ENOSYS; + break; } } break; @@ -569,14 +615,14 @@ int mem_event_domctl(struct domain *d, x break; default: - { - if ( med->ring_page ) - rc = mem_access_domctl(d, mec, u_domctl); - } - break; + rc = -ENOSYS; + break; } } break; + + default: + rc = -ENOSYS; } return rc; diff -r 3b1c596bc1b4 -r ed4d429d7026 xen/arch/x86/mm/mem_paging.c --- a/xen/arch/x86/mm/mem_paging.c +++ b/xen/arch/x86/mm/mem_paging.c @@ -25,33 +25,32 @@ #include <asm/mem_event.h> -int mem_paging_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec, - XEN_GUEST_HANDLE(void) u_domctl) +int mem_paging_memop(struct domain *d, xen_mem_event_op_t *mec) { switch( mec->op ) { - case XEN_DOMCTL_MEM_EVENT_OP_PAGING_NOMINATE: + case XENMEM_paging_op_nominate: { unsigned long gfn = mec->gfn; return p2m_mem_paging_nominate(d, gfn); } break; - case XEN_DOMCTL_MEM_EVENT_OP_PAGING_EVICT: + case XENMEM_paging_op_evict: { unsigned long gfn = mec->gfn; return p2m_mem_paging_evict(d, gfn); } break; - case XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP: + case XENMEM_paging_op_prep: { unsigned long gfn = mec->gfn; - return p2m_mem_paging_prep(d, gfn, mec->u.buffer); + return p2m_mem_paging_prep(d, gfn, mec->buffer); } break; - case XEN_DOMCTL_MEM_EVENT_OP_PAGING_RESUME: + case XENMEM_paging_op_resume: { p2m_mem_paging_resume(d); return 0; diff -r 3b1c596bc1b4 -r ed4d429d7026 xen/arch/x86/mm/mem_sharing.c --- a/xen/arch/x86/mm/mem_sharing.c +++ b/xen/arch/x86/mm/mem_sharing.c @@ -745,12 +745,12 @@ int mem_sharing_share_pages(struct domai } else if ( mfn_x(smfn) < mfn_x(cmfn) ) { - ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID; + ret = XENMEM_SHARING_OP_S_HANDLE_INVALID; spage = firstpg = __grab_shared_page(smfn, &pld); if ( spage == NULL ) goto err_out; - ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID; + ret = XENMEM_SHARING_OP_C_HANDLE_INVALID; cpage = secondpg = __grab_shared_page(cmfn, &pld); if ( cpage == NULL ) { @@ -758,12 +758,12 @@ int mem_sharing_share_pages(struct domai goto err_out; } } else { - ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID; + ret = XENMEM_SHARING_OP_C_HANDLE_INVALID; cpage = firstpg = __grab_shared_page(cmfn, &pld); if ( cpage == NULL ) goto err_out; - ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID; + ret = XENMEM_SHARING_OP_S_HANDLE_INVALID; spage = secondpg = __grab_shared_page(smfn, &pld); if ( spage == NULL ) { @@ -778,14 +778,14 @@ int mem_sharing_share_pages(struct domai /* Check that the handles match */ if ( spage->shared_info->handle != sh ) { - ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID; + ret = XENMEM_SHARING_OP_S_HANDLE_INVALID; mem_sharing_page_unlock(secondpg, &pld); mem_sharing_page_unlock(firstpg, &pld); goto err_out; } if ( cpage->shared_info->handle != ch ) { - ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID; + ret = XENMEM_SHARING_OP_C_HANDLE_INVALID; mem_sharing_page_unlock(secondpg, &pld); mem_sharing_page_unlock(firstpg, &pld); goto err_out; @@ -849,7 +849,7 @@ int mem_sharing_add_to_physmap(struct do cmfn = get_gfn_type_access(p2m, cgfn, &cmfn_type, &a, p2m_query, NULL); /* Get the source shared page, check and lock */ - ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID; + ret = XENMEM_SHARING_OP_S_HANDLE_INVALID; spage = __grab_shared_page(smfn, &pld); if ( spage == NULL ) goto err_out; @@ -863,7 +863,7 @@ int mem_sharing_add_to_physmap(struct do if ( mfn_valid(cmfn) || (!(p2m_is_ram(cmfn_type))) ) { - ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID; + ret = XENMEM_SHARING_OP_C_HANDLE_INVALID; goto err_unlock; } @@ -1014,9 +1014,9 @@ private_page_found: return 0; } -int mem_sharing_domctl(struct domain *d, xen_domctl_mem_sharing_op_t *mec) +int mem_sharing_memop(struct domain *d, xen_mem_sharing_op_t *mec) { - int rc; + int rc = 0; /* Only HAP is supported */ if ( !hap_enabled(d) ) @@ -1024,14 +1024,7 @@ int mem_sharing_domctl(struct domain *d, switch(mec->op) { - case XEN_DOMCTL_MEM_EVENT_OP_SHARING_CONTROL: - { - d->arch.hvm_domain.mem_sharing_enabled = mec->u.enable; - rc = 0; - } - break; - - case XEN_DOMCTL_MEM_EVENT_OP_SHARING_NOMINATE_GFN: + case XENMEM_sharing_op_nominate_gfn: { unsigned long gfn = mec->u.nominate.u.gfn; shr_handle_t handle; @@ -1042,7 +1035,7 @@ int mem_sharing_domctl(struct domain *d, } break; - case XEN_DOMCTL_MEM_EVENT_OP_SHARING_NOMINATE_GREF: + case XENMEM_sharing_op_nominate_gref: { grant_ref_t gref = mec->u.nominate.u.grant_ref; unsigned long gfn; @@ -1057,47 +1050,48 @@ int mem_sharing_domctl(struct domain *d, } break; - case XEN_DOMCTL_MEM_EVENT_OP_SHARING_SHARE: + case XENMEM_sharing_op_share: { unsigned long sgfn, cgfn; struct domain *cd; shr_handle_t sh, ch; + int rc; if ( !mem_sharing_enabled(d) ) return -EINVAL; - cd = get_domain_by_id(mec->u.share.client_domain); + cd = get_mem_event_op_target(mec->u.share.client_domain, &rc); if ( !cd ) - return -ESRCH; + return rc; if ( !mem_sharing_enabled(cd) ) { - put_domain(cd); + rcu_unlock_domain(cd); return -EINVAL; } - if ( XEN_DOMCTL_MEM_SHARING_FIELD_IS_GREF(mec->u.share.source_gfn) ) + if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mec->u.share.source_gfn) ) { grant_ref_t gref = (grant_ref_t) - (XEN_DOMCTL_MEM_SHARING_FIELD_GET_GREF( + (XENMEM_SHARING_OP_FIELD_GET_GREF( mec->u.share.source_gfn)); if ( mem_sharing_gref_to_gfn(d, gref, &sgfn) < 0 ) { - put_domain(cd); + rcu_unlock_domain(cd); return -EINVAL; } } else { sgfn = mec->u.share.source_gfn; } - if ( XEN_DOMCTL_MEM_SHARING_FIELD_IS_GREF(mec->u.share.client_gfn) ) + if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mec->u.share.client_gfn) ) { grant_ref_t gref = (grant_ref_t) - (XEN_DOMCTL_MEM_SHARING_FIELD_GET_GREF( + (XENMEM_SHARING_OP_FIELD_GET_GREF( mec->u.share.client_gfn)); if ( mem_sharing_gref_to_gfn(cd, gref, &cgfn) < 0 ) { - put_domain(cd); + rcu_unlock_domain(cd); return -EINVAL; } } else { @@ -1109,33 +1103,34 @@ int mem_sharing_domctl(struct domain *d, rc = mem_sharing_share_pages(d, sgfn, sh, cd, cgfn, ch); - put_domain(cd); + rcu_unlock_domain(cd); } break; - case XEN_DOMCTL_MEM_EVENT_OP_SHARING_ADD_PHYSMAP: + case XENMEM_sharing_op_add_physmap: { unsigned long sgfn, cgfn; struct domain *cd; shr_handle_t sh; + int rc; if ( !mem_sharing_enabled(d) ) return -EINVAL; - cd = get_domain_by_id(mec->u.share.client_domain); + cd = get_mem_event_op_target(mec->u.share.client_domain, &rc); if ( !cd ) - return -ESRCH; + return rc; if ( !mem_sharing_enabled(cd) ) { - put_domain(cd); + rcu_unlock_domain(cd); return -EINVAL; } - if ( XEN_DOMCTL_MEM_SHARING_FIELD_IS_GREF(mec->u.share.source_gfn) ) + if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mec->u.share.source_gfn) ) { /* Cannot add a gref to the physmap */ - put_domain(cd); + rcu_unlock_domain(cd); return -EINVAL; } @@ -1145,11 +1140,11 @@ int mem_sharing_domctl(struct domain *d, rc = mem_sharing_add_to_physmap(d, sgfn, sh, cd, cgfn); - put_domain(cd); + rcu_unlock_domain(cd); } break; - case XEN_DOMCTL_MEM_EVENT_OP_SHARING_RESUME: + case XENMEM_sharing_op_resume: { if ( !mem_sharing_enabled(d) ) return -EINVAL; @@ -1157,21 +1152,21 @@ int mem_sharing_domctl(struct domain *d, } break; - case XEN_DOMCTL_MEM_EVENT_OP_SHARING_DEBUG_GFN: + case XENMEM_sharing_op_debug_gfn: { unsigned long gfn = mec->u.debug.u.gfn; rc = mem_sharing_debug_gfn(d, gfn); } break; - case XEN_DOMCTL_MEM_EVENT_OP_SHARING_DEBUG_MFN: + case XENMEM_sharing_op_debug_mfn: { unsigned long mfn = mec->u.debug.u.mfn; rc = mem_sharing_debug_mfn(mfn); } break; - case XEN_DOMCTL_MEM_EVENT_OP_SHARING_DEBUG_GREF: + case XENMEM_sharing_op_debug_gref: { grant_ref_t gref = mec->u.debug.u.gref; rc = mem_sharing_debug_gref(d, gref); @@ -1188,6 +1183,30 @@ int mem_sharing_domctl(struct domain *d, return rc; } +int mem_sharing_domctl(struct domain *d, xen_domctl_mem_sharing_op_t *mec) +{ + int rc; + + /* Only HAP is supported */ + if ( !hap_enabled(d) ) + return -ENODEV; + + switch(mec->op) + { + case XEN_DOMCTL_MEM_SHARING_CONTROL: + { + d->arch.hvm_domain.mem_sharing_enabled = mec->u.enable; + rc = 0; + } + break; + + default: + rc = -ENOSYS; + } + + return rc; +} + void __init mem_sharing_init(void) { printk("Initing memory sharing.\n"); diff -r 3b1c596bc1b4 -r ed4d429d7026 xen/arch/x86/x86_64/compat/mm.c --- a/xen/arch/x86/x86_64/compat/mm.c +++ b/xen/arch/x86/x86_64/compat/mm.c @@ -2,6 +2,7 @@ #include <xen/multicall.h> #include <compat/memory.h> #include <compat/xen.h> +#include <asm/mem_event.h> int compat_set_gdt(XEN_GUEST_HANDLE(uint) frame_list, unsigned int entries) { @@ -211,6 +212,28 @@ int compat_arch_memory_op(int op, XEN_GU case XENMEM_get_sharing_shared_pages: return mem_sharing_get_nr_shared_mfns(); + case XENMEM_paging_op: + case XENMEM_access_op: + { + xen_mem_event_op_t meo; + if ( copy_from_guest(&meo, arg, 1) ) + return -EFAULT; + rc = do_mem_event_op(op, meo.domain, (void *) &meo); + if ( !rc && copy_to_guest(arg, &meo, 1) ) + return -EFAULT; + break; + } + case XENMEM_sharing_op: + { + xen_mem_sharing_op_t mso; + if ( copy_from_guest(&mso, arg, 1) ) + return -EFAULT; + rc = do_mem_event_op(op, mso.domain, (void *) &mso); + if ( !rc && copy_to_guest(arg, &mso, 1) ) + return -EFAULT; + break; + } + default: rc = -ENOSYS; break; diff -r 3b1c596bc1b4 -r ed4d429d7026 xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -34,6 +34,7 @@ #include <asm/msr.h> #include <asm/setup.h> #include <asm/numa.h> +#include <asm/mem_event.h> #include <asm/mem_sharing.h> #include <public/memory.h> @@ -1100,6 +1101,28 @@ long subarch_memory_op(int op, XEN_GUEST case XENMEM_get_sharing_shared_pages: return mem_sharing_get_nr_shared_mfns(); + case XENMEM_paging_op: + case XENMEM_access_op: + { + xen_mem_event_op_t meo; + if ( copy_from_guest(&meo, arg, 1) ) + return -EFAULT; + rc = do_mem_event_op(op, meo.domain, (void *) &meo); + if ( !rc && copy_to_guest(arg, &meo, 1) ) + return -EFAULT; + break; + } + case XENMEM_sharing_op: + { + xen_mem_sharing_op_t mso; + if ( copy_from_guest(&mso, arg, 1) ) + return -EFAULT; + rc = do_mem_event_op(op, mso.domain, (void *) &mso); + if ( !rc && copy_to_guest(arg, &mso, 1) ) + return -EFAULT; + break; + } + default: rc = -ENOSYS; break; diff -r 3b1c596bc1b4 -r ed4d429d7026 xen/include/asm-x86/mem_access.h --- a/xen/include/asm-x86/mem_access.h +++ b/xen/include/asm-x86/mem_access.h @@ -21,8 +21,7 @@ */ -int mem_access_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec, - XEN_GUEST_HANDLE(void) u_domctl); +int mem_access_memop(struct domain *d, xen_mem_event_op_t *meo); /* diff -r 3b1c596bc1b4 -r ed4d429d7026 xen/include/asm-x86/mem_event.h --- a/xen/include/asm-x86/mem_event.h +++ b/xen/include/asm-x86/mem_event.h @@ -39,6 +39,8 @@ void mem_event_put_request(struct domain int mem_event_get_response(struct domain *d, struct mem_event_domain *med, mem_event_response_t *rsp); +struct domain *get_mem_event_op_target(uint32_t domain, int *rc); +int do_mem_event_op(int op, uint32_t domain, void *arg); int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec, XEN_GUEST_HANDLE(void) u_domctl); diff -r 3b1c596bc1b4 -r ed4d429d7026 xen/include/asm-x86/mem_paging.h --- a/xen/include/asm-x86/mem_paging.h +++ b/xen/include/asm-x86/mem_paging.h @@ -21,8 +21,7 @@ */ -int mem_paging_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec, - XEN_GUEST_HANDLE(void) u_domctl); +int mem_paging_memop(struct domain *d, xen_mem_event_op_t *meo); /* diff -r 3b1c596bc1b4 -r ed4d429d7026 xen/include/asm-x86/mem_sharing.h --- a/xen/include/asm-x86/mem_sharing.h +++ b/xen/include/asm-x86/mem_sharing.h @@ -23,6 +23,7 @@ #define __MEM_SHARING_H__ #include <public/domctl.h> +#include <public/memory.h> /* Auditing of memory sharing code? */ #define MEM_SHARING_AUDIT 1 @@ -56,6 +57,8 @@ int mem_sharing_unshare_page(struct doma unsigned long gfn, uint16_t flags); int mem_sharing_sharing_resume(struct domain *d); +int mem_sharing_memop(struct domain *d, + xen_mem_sharing_op_t *mec); int mem_sharing_domctl(struct domain *d, xen_domctl_mem_sharing_op_t *mec); void mem_sharing_init(void); diff -r 3b1c596bc1b4 -r ed4d429d7026 xen/include/public/domctl.h --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -711,47 +711,46 @@ struct xen_domctl_gdbsx_domstatus { /* * Domain memory paging - * Page memory in and out. + * Page memory in and out. + * Domctl interface to set up and tear down the + * pager<->hypervisor interface. Use XENMEM_paging_op* + * to perform per-page operations. */ #define XEN_DOMCTL_MEM_EVENT_OP_PAGING 1 #define XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE 0 #define XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE 1 -#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_NOMINATE 2 -#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_EVICT 3 -#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP 4 -#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_RESUME 5 /* * Access permissions. * + * As with paging, use the domctl for teardown/setup of the + * helper<->hypervisor interface. + * * There are HVM hypercalls to set the per-page access permissions of every * page in a domain. When one of these permissions--independent, read, * write, and execute--is violated, the VCPU is paused and a memory event - * is sent with what happened. (See public/mem_event.h) The memory event - * handler can then resume the VCPU and redo the access with an - * ACCESS_RESUME mode for the following domctl. + * is sent with what happened. (See public/mem_event.h) . + * + * The memory event handler can then resume the VCPU and redo the access + * with a XENMEM_access_op_resume hypercall. */ #define XEN_DOMCTL_MEM_EVENT_OP_ACCESS 2 #define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE 0 #define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE 1 -#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_RESUME 2 +/* Use for teardown/setup of helper<->hypervisor interface for paging, + * access and sharing.*/ struct xen_domctl_mem_event_op { uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */ uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */ - union { - /* OP_ENABLE IN: Virtual address of shared page */ - uint64_aligned_t shared_addr; - /* PAGING_PREP IN: buffer to immediately fill page in */ - uint64_aligned_t buffer; - } u; + uint64_aligned_t shared_addr; /* IN: Virtual address of shared page */ uint64_aligned_t ring_addr; /* IN: Virtual address of ring page */ - /* Other OPs */ - uint64_aligned_t gfn; /* IN: gfn of page being operated on */ + /* For binary backwards compatibility */ + uint64_aligned_t pad; }; typedef struct xen_domctl_mem_event_op xen_domctl_mem_event_op_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_event_op_t); @@ -759,59 +758,23 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_e /* * Memory sharing operations */ -/* XEN_DOMCTL_mem_sharing_op */ +/* XEN_DOMCTL_mem_sharing_op. + * The CONTROL sub-domctl is used for bringup/teardown. */ -#define XEN_DOMCTL_MEM_EVENT_OP_SHARING 3 - -#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_CONTROL 0 -#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_NOMINATE_GFN 1 -#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_NOMINATE_GREF 2 -#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_SHARE 3 -#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_RESUME 4 -#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_DEBUG_GFN 5 -#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_DEBUG_MFN 6 -#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_DEBUG_GREF 7 -#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_ADD_PHYSMAP 8 - -#define XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID (-10) -#define XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID (-9) - -#define XEN_DOMCTL_MEM_SHARING_FIELD_IS_GREF_FLAG (1ULL << 62) - -#define XEN_DOMCTL_MEM_SHARING_FIELD_MAKE_GREF(field, val) \ - (field) = (XEN_DOMCTL_MEM_SHARING_FIELD_IS_GREF_FLAG | val) -#define XEN_DOMCTL_MEM_SHARING_FIELD_IS_GREF(field) \ - ((field) & XEN_DOMCTL_MEM_SHARING_FIELD_IS_GREF_FLAG) -#define XEN_DOMCTL_MEM_SHARING_FIELD_GET_GREF(field) \ - ((field) & (~XEN_DOMCTL_MEM_SHARING_FIELD_IS_GREF_FLAG)) +#define XEN_DOMCTL_MEM_SHARING_CONTROL 0 struct xen_domctl_mem_sharing_op { - uint8_t op; /* XEN_DOMCTL_MEM_EVENT_OP_* */ + uint8_t op; /* XEN_DOMCTL_MEM_SHARING_* */ union { - uint8_t enable; /* OP_CONTROL */ + uint8_t enable; /* CONTROL */ - struct mem_sharing_op_nominate { /* OP_NOMINATE_xxx */ - union { - uint64_aligned_t gfn; /* IN: gfn to nominate */ - uint32_t grant_ref; /* IN: grant ref to nominate */ - } u; - uint64_aligned_t handle; /* OUT: the handle */ - } nominate; - struct mem_sharing_op_share { /* OP_SHARE/ADD_PHYSMAP */ - uint64_aligned_t source_gfn; /* IN: the gfn of the source page */ - uint64_aligned_t source_handle; /* IN: handle to the source page */ - domid_t client_domain; /* IN: the client domain id */ - uint64_aligned_t client_gfn; /* IN: the client gfn */ - uint64_aligned_t client_handle; /* IN: handle to the client page */ - } share; - struct mem_sharing_op_debug { /* OP_DEBUG_xxx */ - union { - uint64_aligned_t gfn; /* IN: gfn to debug */ - uint64_aligned_t mfn; /* IN: mfn to debug */ - grant_ref_t gref; /* IN: gref to debug */ - } u; - } debug; + /* For binary backwards compatibility */ + struct __pad { + uint64_aligned_t pad1[2]; + domid_t pad2; + uint64_aligned_t pad3[2]; + } pad; } u; }; typedef struct xen_domctl_mem_sharing_op xen_domctl_mem_sharing_op_t; diff -r 3b1c596bc1b4 -r ed4d429d7026 xen/include/public/memory.h --- a/xen/include/public/memory.h +++ b/xen/include/public/memory.h @@ -296,6 +296,80 @@ typedef struct xen_pod_target xen_pod_ta #define XENMEM_get_sharing_freed_pages 18 #define XENMEM_get_sharing_shared_pages 19 +#define XENMEM_paging_op 20 +#define XENMEM_paging_op_nominate 0 +#define XENMEM_paging_op_evict 1 +#define XENMEM_paging_op_prep 2 +#define XENMEM_paging_op_resume 3 + +#define XENMEM_access_op 21 +#define XENMEM_access_op_resume 0 + +struct xen_mem_event_op { + uint8_t op; /* XENMEM_*_op_* */ + domid_t domain; + + /* PAGING_PREP IN: buffer to immediately fill page in */ + uint64_t buffer; + /* Other OPs */ + uint64_t gfn; /* IN: gfn of page being operated on */ +}; +typedef struct xen_mem_event_op xen_mem_event_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_event_op_t); + +#define XENMEM_sharing_op 22 +#define XENMEM_sharing_op_nominate_gfn 0 +#define XENMEM_sharing_op_nominate_gref 1 +#define XENMEM_sharing_op_share 2 +#define XENMEM_sharing_op_resume 3 +#define XENMEM_sharing_op_debug_gfn 4 +#define XENMEM_sharing_op_debug_mfn 5 +#define XENMEM_sharing_op_debug_gref 6 +#define XENMEM_sharing_op_add_physmap 7 + +#define XENMEM_SHARING_OP_S_HANDLE_INVALID (-10) +#define XENMEM_SHARING_OP_C_HANDLE_INVALID (-9) + +#define XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG (1ULL << 62) + +#define XENMEM_SHARING_OP_FIELD_MAKE_GREF(field, val) \ + (field) = (XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG | val) +#define XENMEM_SHARING_OP_FIELD_IS_GREF(field) \ + ((field) & XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG) +#define XENMEM_SHARING_OP_FIELD_GET_GREF(field) \ + ((field) & (~XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG)) + +struct xen_mem_sharing_op { + uint8_t op; /* XENMEM_sharing_op_* */ + domid_t domain; + + union { + struct mem_sharing_op_nominate { /* OP_NOMINATE_xxx */ + union { + uint64_t gfn; /* IN: gfn to nominate */ + uint32_t grant_ref; /* IN: grant ref to nominate */ + } u; + uint64_t handle; /* OUT: the handle */ + } nominate; + struct mem_sharing_op_share { /* OP_SHARE/ADD_PHYSMAP */ + uint64_t source_gfn; /* IN: the gfn of the source page */ + uint64_t source_handle; /* IN: handle to the source page */ + domid_t client_domain; /* IN: the client domain id */ + uint64_t client_gfn; /* IN: the client gfn */ + uint64_t client_handle; /* IN: handle to the client page */ + } share; + struct mem_sharing_op_debug { /* OP_DEBUG_xxx */ + union { + uint64_t gfn; /* IN: gfn to debug */ + uint64_t mfn; /* IN: mfn to debug */ + uint32_t gref; /* IN: gref to debug */ + } u; + } debug; + } u; +}; +typedef struct xen_mem_sharing_op xen_mem_sharing_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); + #endif /* __XEN_PUBLIC_MEMORY_H__ */ /*
Tim Deegan
2012-Jan-12 13:01 UTC
Re: [PATCH] RFC: Use memops for mem paging, sharing, and access, instead of domctls
At 13:42 -0500 on 11 Jan (1326289372), Andres Lagar-Cavilla wrote:> Per page operations in the paging, sharing, and access tracking subsystems are > all implemented with domctls (e.g. a domctl to evict one page, or to share one > page). > > Under heavy load, the domctl path reveals a lack of scalability. The domctl > lock serializes dom0''s vcpus in the hypervisor. When performing thousands of > per-page operations on dozens of domains, these vcpus will spin in the > hypervisor. Beyond the aggressive locking, an added inefficiency of blocking vcpus > in the domctl lock is that dom0 is prevented from re-scheduling. > > In this proposal we retain the domctl interface for setting up and tearing down > paging/sharing/mem access for a domain. But we migrate all the per page operations > to use the memory_op hypercalls (e.g XENMEM_*). > > While we naturally welcome comments on the correctness of the approach, we are also > concerned about the viability of this API change. With 4.2 coming, this is the right > time to get an interface right, for the long run.I''m happy with the API change but I''d like the other users of it to comment. Tim.
Olaf Hering
2012-Jan-12 14:43 UTC
Re: [PATCH] RFC: Use memops for mem paging, sharing, and access, instead of domctls
On Wed, Jan 11, Andres Lagar-Cavilla wrote:> +++ b/xen/include/public/domctl.h> +/* Use for teardown/setup of helper<->hypervisor interface for paging, > + * access and sharing.*/ > struct xen_domctl_mem_event_op { > uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */ > uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */ > > - union { > - /* OP_ENABLE IN: Virtual address of shared page */ > - uint64_aligned_t shared_addr; > - /* PAGING_PREP IN: buffer to immediately fill page in */ > - uint64_aligned_t buffer; > - } u; > + uint64_aligned_t shared_addr; /* IN: Virtual address of shared page */ > uint64_aligned_t ring_addr; /* IN: Virtual address of ring page */ > > - /* Other OPs */ > - uint64_aligned_t gfn; /* IN: gfn of page being operated on */ > + /* For binary backwards compatibility */ > + uint64_aligned_t pad; > };Assuming this struct is routed through libxc, and libxc gets a new SONAME for every release, doesnt this mean that every old binary has to be recompiled anyway for the new release? If so, the padding is not needed. Olaf
Andres Lagar-Cavilla
2012-Jan-12 16:04 UTC
Re: [PATCH] RFC: Use memops for mem paging, sharing, and access, instead of domctls
> On Wed, Jan 11, Andres Lagar-Cavilla wrote: > >> +++ b/xen/include/public/domctl.h > >> +/* Use for teardown/setup of helper<->hypervisor interface for paging, >> + * access and sharing.*/ >> struct xen_domctl_mem_event_op { >> uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */ >> uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */ >> >> - union { >> - /* OP_ENABLE IN: Virtual address of shared page */ >> - uint64_aligned_t shared_addr; >> - /* PAGING_PREP IN: buffer to immediately fill page in */ >> - uint64_aligned_t buffer; >> - } u; >> + uint64_aligned_t shared_addr; /* IN: Virtual address of shared >> page */ >> uint64_aligned_t ring_addr; /* IN: Virtual address of ring page >> */ >> >> - /* Other OPs */ >> - uint64_aligned_t gfn; /* IN: gfn of page being operated >> on */ >> + /* For binary backwards compatibility */ >> + uint64_aligned_t pad; >> }; > > Assuming this struct is routed through libxc, and libxc gets a new > SONAME for every release, doesnt this mean that every old binary has to > be recompiled anyway for the new release? > If so, the padding is not needed.Agreed, basically. Waiting to hear from tools maintainers about best approach to libxc. It seems that there aren''t that many users relying on a fixed ABI, so we can (still, until 4.2) change things. But obviously I want to be careful. Andres> > Olaf > >
Tim Deegan
2012-Jan-26 12:57 UTC
Re: [PATCH] RFC: Use memops for mem paging, sharing, and access, instead of domctls
At 08:04 -0800 on 12 Jan (1326355474), Andres Lagar-Cavilla wrote:> > On Wed, Jan 11, Andres Lagar-Cavilla wrote: > > > >> +++ b/xen/include/public/domctl.h > > > >> +/* Use for teardown/setup of helper<->hypervisor interface for paging, > >> + * access and sharing.*/ > >> struct xen_domctl_mem_event_op { > >> uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */ > >> uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */ > >> > >> - union { > >> - /* OP_ENABLE IN: Virtual address of shared page */ > >> - uint64_aligned_t shared_addr; > >> - /* PAGING_PREP IN: buffer to immediately fill page in */ > >> - uint64_aligned_t buffer; > >> - } u; > >> + uint64_aligned_t shared_addr; /* IN: Virtual address of shared > >> page */ > >> uint64_aligned_t ring_addr; /* IN: Virtual address of ring page > >> */ > >> > >> - /* Other OPs */ > >> - uint64_aligned_t gfn; /* IN: gfn of page being operated > >> on */ > >> + /* For binary backwards compatibility */ > >> + uint64_aligned_t pad; > >> }; > > > > Assuming this struct is routed through libxc, and libxc gets a new > > SONAME for every release, doesnt this mean that every old binary has to > > be recompiled anyway for the new release? > > If so, the padding is not needed. > Agreed, basically. Waiting to hear from tools maintainers about best > approach to libxc. > > It seems that there aren''t that many users relying on a fixed ABI, so we > can (still, until 4.2) change things. But obviously I want to be careful.Ping? Tim.
Andres Lagar-Cavilla
2012-Jan-26 13:00 UTC
Re: [PATCH] RFC: Use memops for mem paging, sharing, and access, instead of domctls
> At 08:04 -0800 on 12 Jan (1326355474), Andres Lagar-Cavilla wrote: >> > On Wed, Jan 11, Andres Lagar-Cavilla wrote: >> > >> >> +++ b/xen/include/public/domctl.h >> > >> >> +/* Use for teardown/setup of helper<->hypervisor interface for >> paging, >> >> + * access and sharing.*/ >> >> struct xen_domctl_mem_event_op { >> >> uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */ >> >> uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */ >> >> >> >> - union { >> >> - /* OP_ENABLE IN: Virtual address of shared page */ >> >> - uint64_aligned_t shared_addr; >> >> - /* PAGING_PREP IN: buffer to immediately fill page in */ >> >> - uint64_aligned_t buffer; >> >> - } u; >> >> + uint64_aligned_t shared_addr; /* IN: Virtual address of shared >> >> page */ >> >> uint64_aligned_t ring_addr; /* IN: Virtual address of ring >> page >> >> */ >> >> >> >> - /* Other OPs */ >> >> - uint64_aligned_t gfn; /* IN: gfn of page being >> operated >> >> on */ >> >> + /* For binary backwards compatibility */ >> >> + uint64_aligned_t pad; >> >> }; >> > >> > Assuming this struct is routed through libxc, and libxc gets a new >> > SONAME for every release, doesnt this mean that every old binary has >> to >> > be recompiled anyway for the new release? >> > If so, the padding is not needed. >> Agreed, basically. Waiting to hear from tools maintainers about best >> approach to libxc. >> >> It seems that there aren''t that many users relying on a fixed ABI, so we >> can (still, until 4.2) change things. But obviously I want to be >> careful. > > Ping?I have working code. Except that it blue-screened W7 once, so it''s been put in the back-burner for next week. I can repost for review. Ok, I will. I have not heard any objections thus far to the ABI change. Andres> > Tim. >