Aravindh Puthiyaparambil
2012-Apr-26 18:33 UTC
[PATCH 0 of 2] Add libxc API that sets mem_access type for an array of gfns
When the mem_access type needs to be changed for multiple discontiguous gfns, calling xc_hvm_set_mem_access() multiple times does not perform very well. The main pain points are the multiple libxc calls themselves plus the multiple map_domain_page() / unmap_domain_page() and ept_sync_domain() calls for each ept_set_entry(gfn). The following patches adds a new mem_access API that sets the mem_access type for an array of guest physical addresses in one libxc call with minimal map_domain_page() / unmap_domain_page() and ept_sync_domain() calls. Signed-off-by: Aravindh Puthiyaparambil <aravindh@virtuata.com>
Aravindh Puthiyaparambil
2012-Apr-26 18:33 UTC
[PATCH 1 of 2] x86/mm: Split ept_set_entry()
xen/arch/x86/mm/p2m-ept.c | 60 ++++++++++++++++++++++++++++------------------ 1 files changed, 37 insertions(+), 23 deletions(-) Split ept_set_entry() such that mapping and unmapping of the page tables and ept_sync occurrs outside of it. Signed-off-by: Aravindh Puthiyaparambil <aravindh@virtuata.com> diff -r 63eb1343cbdb -r f12cf785738f xen/arch/x86/mm/p2m-ept.c --- a/xen/arch/x86/mm/p2m-ept.c Wed Apr 25 19:29:53 2012 -0700 +++ b/xen/arch/x86/mm/p2m-ept.c Wed Apr 25 19:29:54 2012 -0700 @@ -272,14 +272,15 @@ static int ept_next_level(struct p2m_dom } /* - * ept_set_entry() computes ''need_modify_vtd_table'' for itself, + * __ept_set_entry() computes ''need_modify_vtd_table'' for itself, * by observing whether any gfn->mfn translations are modified. */ static int -ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, - unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma) +__ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, + unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma, + ept_entry_t *table, ept_entry_t *old_entry, bool_t *needs_sync) { - ept_entry_t *table, *ept_entry = NULL; + ept_entry_t *ept_entry = NULL; unsigned long gfn_remainder = gfn; unsigned long offset = 0; u32 index; @@ -288,11 +289,9 @@ ept_set_entry(struct p2m_domain *p2m, un int ret = 0; bool_t direct_mmio = (p2mt == p2m_mmio_direct); uint8_t ipat = 0; - int need_modify_vtd_table = 1; - int vtd_pte_present = 0; - int needs_sync = 1; + bool_t need_modify_vtd_table = 1; + bool_t vtd_pte_present = 0; struct domain *d = p2m->domain; - ept_entry_t old_entry = { .epte = 0 }; /* * the caller must make sure: @@ -305,12 +304,6 @@ ept_set_entry(struct p2m_domain *p2m, un (order % EPT_TABLE_ORDER) ) return 0; - ASSERT((target == 2 && hvm_hap_has_1gb(d)) || - (target == 1 && hvm_hap_has_2mb(d)) || - (target == 0)); - - table = map_domain_page(ept_get_asr(d)); - for ( i = ept_get_wl(d); i > target; i-- ) { ret = ept_next_level(p2m, 0, &table, &gfn_remainder, i); @@ -327,7 +320,7 @@ ept_set_entry(struct p2m_domain *p2m, un ept_entry = table + index; - /* In case VT-d uses same page table, this flag is needed by VT-d */ + /* In case VT-d uses same page table, this flag is needed by VT-d */ vtd_pte_present = is_epte_present(ept_entry) ? 1 : 0; /* @@ -352,7 +345,7 @@ ept_set_entry(struct p2m_domain *p2m, un * the intermediate tables will be freed below after the ept flush * * Read-then-write is OK because we hold the p2m lock. */ - old_entry = *ept_entry; + old_entry->epte = ept_entry->epte; if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) || (p2mt == p2m_ram_paging_in) ) @@ -369,7 +362,7 @@ ept_set_entry(struct p2m_domain *p2m, un new_entry.mfn = mfn_x(mfn); - if ( old_entry.mfn == new_entry.mfn ) + if ( old_entry->mfn == new_entry.mfn ) need_modify_vtd_table = 0; ept_p2m_type_to_flags(&new_entry, p2mt, p2ma); @@ -435,12 +428,7 @@ ept_set_entry(struct p2m_domain *p2m, un /* Success */ rv = 1; -out: - unmap_domain_page(table); - - if ( needs_sync ) - ept_sync_domain(p2m->domain); - + out: if ( rv && iommu_enabled && need_iommu(p2m->domain) && need_modify_vtd_table ) { if ( iommu_hap_pt_share ) @@ -473,6 +461,32 @@ out: } } + return rv; +} + +static int +ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, + unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma) +{ + ept_entry_t *table; + int target = order / EPT_TABLE_ORDER; + int rv = 0; + bool_t needs_sync = 1; + struct domain *d = p2m->domain; + ept_entry_t old_entry = { .epte = 0 }; + + ASSERT((target == 2 && hvm_hap_has_1gb(d)) || + (target == 1 && hvm_hap_has_2mb(d)) || + (target == 0)); + + table = map_domain_page(ept_get_asr(d)); + rv = __ept_set_entry(p2m, gfn, mfn, order, p2mt, p2ma, table, &old_entry, + &needs_sync); + unmap_domain_page(table); + + if ( needs_sync ) + ept_sync_domain(p2m->domain); + /* Release the old intermediate tables, if any. This has to be the last thing we do, after the ept_sync_domain() and removal from the iommu tables, so as to avoid a potential
Aravindh Puthiyaparambil
2012-Apr-26 18:33 UTC
[PATCH 2 of 2] mem_access: Add xc_hvm_mem_access_bulk() API
tools/libxc/xc_misc.c | 51 ++++++++++++++++++++++++++++++++++ tools/libxc/xenctrl.h | 11 +++++++ xen/arch/x86/hvm/hvm.c | 45 ++++++++++++++++++++++++++++++ xen/arch/x86/mm/p2m-ept.c | 61 +++++++++++++++++++++++++++++++++++++++++ xen/arch/x86/mm/p2m-pt.c | 1 + xen/arch/x86/mm/p2m.c | 36 ++++++++++++++++++++++++ xen/include/asm-x86/p2m.h | 18 +++++++++++- xen/include/public/hvm/hvm_op.h | 18 ++++++++++++ 8 files changed, 240 insertions(+), 1 deletions(-) int xc_hvm_set_mem_access_bulk( xc_interface *xch, domid_t dom, hvmmem_access_t memaccess, xen_pfn_t *arr, int *err, uint64_t num); Set the array of guest physical addresses to a specific mem_access type. Allowed types are HVMMEM_access_default, HVMMEM_access_n, any combination of HVM_access_ + (rwx), and HVM_access_rx2rw. When a gfn cannot be set to the specified access, its respective field in @err is set to the corresponding errno value. Signed-off-by: Aravindh Puthiyaparambil <aravindh@virtuata.com> diff -r f12cf785738f -r 2f695c43ce22 tools/libxc/xc_misc.c --- a/tools/libxc/xc_misc.c Wed Apr 25 19:29:54 2012 -0700 +++ b/tools/libxc/xc_misc.c Thu Apr 26 11:25:50 2012 -0700 @@ -570,6 +570,57 @@ int xc_hvm_set_mem_access( return rc; } +int xc_hvm_set_mem_access_bulk( + xc_interface *xch, domid_t dom, hvmmem_access_t mem_access, + xen_pfn_t *arr, int *err, uint64_t nr) +{ + DECLARE_HYPERCALL; + DECLARE_HYPERCALL_BUFFER(struct xen_hvm_set_mem_access_bulk, arg); + DECLARE_HYPERCALL_BOUNCE(arr, sizeof(xen_pfn_t) * nr, + XC_HYPERCALL_BUFFER_BOUNCE_IN); + DECLARE_HYPERCALL_BOUNCE(err, sizeof(int) * nr, + XC_HYPERCALL_BUFFER_BOUNCE_OUT); + int rc; + + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof(*arg)); + if ( arg == NULL ) + { + PERROR("Could not allocate memory for xc_hvm_set_mem_access_bulk hypercall"); + return -1; + } + + if ( xc_hypercall_bounce_pre(xch, arr) ) { + PERROR("Could not bounce arr for xc_hvm_set_mem_access_bulk hypercall"); + rc = -1; + goto out; + } + + if ( xc_hypercall_bounce_pre(xch, err) ) { + PERROR("Could not bounce err for xc_hvm_set_mem_access_bulk hypercall"); + rc = -1; + goto out; + } + + arg->domid = dom; + arg->hvmmem_access = mem_access; + arg->nr = nr; + set_xen_guest_handle(arg->arr, arr); + set_xen_guest_handle(arg->err, err); + + hypercall.op = __HYPERVISOR_hvm_op; + hypercall.arg[0] = HVMOP_set_mem_access_bulk; + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); + + rc = do_xen_hypercall(xch, &hypercall); + +out: + xc_hypercall_buffer_free(xch, arg); + xc_hypercall_bounce_post(xch, arr); + xc_hypercall_bounce_post(xch, err); + + return rc; +} + int xc_hvm_get_mem_access( xc_interface *xch, domid_t dom, uint64_t pfn, hvmmem_access_t* mem_access) { diff -r f12cf785738f -r 2f695c43ce22 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Wed Apr 25 19:29:54 2012 -0700 +++ b/tools/libxc/xenctrl.h Thu Apr 26 11:25:50 2012 -0700 @@ -1568,6 +1568,17 @@ int xc_hvm_set_mem_access( xc_interface *xch, domid_t dom, hvmmem_access_t memaccess, uint64_t first_pfn, uint64_t nr); /* + * Set the arry of pfns to a specific access. + * When a pfn cannot be set to the specified access, its respective field in + * @err is set to the corresponding errno value. + * Allowed types are HVMMEM_access_default, HVMMEM_access_n, any combination of + * HVM_access_ + (rwx), and HVM_access_rx2rw + */ +int xc_hvm_set_mem_access_bulk( + xc_interface *xch, domid_t dom, hvmmem_access_t memaccess, + xen_pfn_t *arr, int *err, uint64_t num); + +/* * Gets the mem access for the given page (returned in memacess on success) */ int xc_hvm_get_mem_access( diff -r f12cf785738f -r 2f695c43ce22 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Wed Apr 25 19:29:54 2012 -0700 +++ b/xen/arch/x86/hvm/hvm.c Thu Apr 26 11:25:50 2012 -0700 @@ -4197,6 +4197,51 @@ long do_hvm_op(unsigned long op, XEN_GUE break; } + case HVMOP_set_mem_access_bulk: + { + struct xen_hvm_set_mem_access_bulk a; + struct domain *d; + xen_pfn_t *arr = 0; + int *err = 0; + + if ( copy_from_guest(&a, arg, 1) ) + return -EFAULT; + + rc = rcu_lock_remote_target_domain_by_id(a.domid, &d); + if ( rc != 0 ) + return rc; + + rc = -EINVAL; + + if ( !is_hvm_domain(d) ) + goto param_fail9; + + rc = -ENOMEM; + arr = xmalloc_array(xen_pfn_t, a.nr); + if (!arr) + goto param_fail9; + + err = xmalloc_array(int, a.nr); + if (!err) + goto param_fail9; + + if ( copy_from_guest(arr, a.arr, a.nr) ) + goto param_fail9; + + rc = p2m_set_access_bulk(d, arr, err, a.nr, a.hvmmem_access); + + if ( copy_to_guest(a.err, err, a.nr) ) + goto param_fail9; + + param_fail9: + rcu_unlock_domain(d); + if (arr) + xfree(arr); + if (err) + xfree(err); + break; + } + case HVMOP_get_mem_access: { struct xen_hvm_get_mem_access a; diff -r f12cf785738f -r 2f695c43ce22 xen/arch/x86/mm/p2m-ept.c --- a/xen/arch/x86/mm/p2m-ept.c Wed Apr 25 19:29:54 2012 -0700 +++ b/xen/arch/x86/mm/p2m-ept.c Thu Apr 26 11:25:50 2012 -0700 @@ -597,6 +597,66 @@ out: return mfn; } +static int +ept_set_entry_bulk(struct p2m_domain *p2m, xen_pfn_t *arr, int *err, + uint64_t nr, unsigned int order, p2m_access_t p2ma) +{ + unsigned long pfn; + p2m_access_t a; + p2m_type_t t; + mfn_t mfn; + ept_entry_t *table; + int target = order / EPT_TABLE_ORDER; + int rc; + bool_t needs_sync, bulk_needs_sync = 0; + struct domain *d = p2m->domain; + ept_entry_t *old_entries = 0; + + ASSERT((target == 2 && hvm_hap_has_1gb(d)) || + (target == 1 && hvm_hap_has_2mb(d)) || + (target == 0)); + + old_entries = xzalloc_array(ept_entry_t, nr); + if ( !old_entries ) + return 0; + + memset(err, 0, nr * sizeof(int)); + table = map_domain_page(ept_get_asr(d)); + for ( pfn = 0; pfn < nr; pfn++ ) + { + if ( arr[pfn] > domain_get_maximum_gpfn(d) ) + { + err[pfn] = -EINVAL; + continue; + } + + needs_sync = 1; + mfn = ept_get_entry(p2m, arr[pfn], &t, &a, 0, NULL); + rc = __ept_set_entry(p2m, arr[pfn], mfn, order, t, p2ma, table, + &old_entries[pfn], &needs_sync); + if (rc == 0) + err[pfn] = -ENOMEM; + + if ( needs_sync ) + bulk_needs_sync = 1; + } + unmap_domain_page(table); + + if ( bulk_needs_sync ) + ept_sync_domain(p2m->domain); + + /* Release the old intermediate tables, if any. This has to be the + last thing we do, after the ept_sync_domain() and removal + from the iommu tables, so as to avoid a potential + use-after-free. */ + for ( pfn = 0; pfn < nr; pfn++ ) + if ( is_epte_present(&old_entries[pfn]) ) + ept_free_entry(p2m, &old_entries[pfn], target); + + /* Success */ + return 1; +} + /* WARNING: Only caller doesn''t care about PoD pages. So this function will * always return 0 for PoD pages, not populate them. If that becomes necessary, * pass a p2m_query_t type along to distinguish. */ @@ -808,6 +868,7 @@ static void ept_change_entry_type_global void ept_p2m_init(struct p2m_domain *p2m) { p2m->set_entry = ept_set_entry; + p2m->set_entry_bulk = ept_set_entry_bulk; p2m->get_entry = ept_get_entry; p2m->change_entry_type_global = ept_change_entry_type_global; p2m->audit_p2m = NULL; diff -r f12cf785738f -r 2f695c43ce22 xen/arch/x86/mm/p2m-pt.c --- a/xen/arch/x86/mm/p2m-pt.c Wed Apr 25 19:29:54 2012 -0700 +++ b/xen/arch/x86/mm/p2m-pt.c Thu Apr 26 11:25:50 2012 -0700 @@ -1139,6 +1139,7 @@ long p2m_pt_audit_p2m(struct p2m_domain void p2m_pt_init(struct p2m_domain *p2m) { p2m->set_entry = p2m_set_entry; + p2m->set_entry_bulk = NULL; p2m->get_entry = p2m_gfn_to_mfn; p2m->change_entry_type_global = p2m_change_type_global; p2m->write_p2m_entry = paging_write_p2m_entry; diff -r f12cf785738f -r 2f695c43ce22 xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Wed Apr 25 19:29:54 2012 -0700 +++ b/xen/arch/x86/mm/p2m.c Thu Apr 26 11:25:50 2012 -0700 @@ -1334,6 +1334,42 @@ int p2m_set_mem_access(struct domain *d, return rc; } +int p2m_set_access_bulk(struct domain *d, xen_pfn_t *arr, int *err, + uint64_t nr, hvmmem_access_t access) +{ + struct p2m_domain *p2m = p2m_get_hostp2m(d); + p2m_access_t a; + p2m_access_t memaccess[] = { + p2m_access_n, + p2m_access_r, + p2m_access_w, + p2m_access_rw, + p2m_access_x, + p2m_access_rx, + p2m_access_wx, + p2m_access_rwx, + p2m_access_rx2rw, + p2m_access_n2rwx, + p2m->default_access, + }; + int rc = 0; + + ASSERT(p2m->set_entry_bulk); + + if ( (unsigned) access >= HVMMEM_access_default ) + return -EINVAL; + + a = memaccess[access]; + + p2m_lock(p2m); + if ( p2m->set_entry_bulk(p2m, arr, err, nr, PAGE_ORDER_4K, a) == 0 ) + rc = -ENOMEM; + p2m_unlock(p2m); + + return rc; +} + + /* Get access type for a pfn * If pfn == -1ul, gets the default access type */ int p2m_get_mem_access(struct domain *d, unsigned long pfn, diff -r f12cf785738f -r 2f695c43ce22 xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h Wed Apr 25 19:29:54 2012 -0700 +++ b/xen/include/asm-x86/p2m.h Thu Apr 26 11:25:50 2012 -0700 @@ -232,6 +232,13 @@ struct p2m_domain { mfn_t mfn, unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma); + int (*set_entry_bulk)(struct p2m_domain *p2m, + xen_pfn_t *arr, + int *err, + uint64_t nr, + unsigned int order, + p2m_access_t p2ma); + mfn_t (*get_entry )(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *p2mt, @@ -568,6 +575,11 @@ void p2m_mem_access_resume(struct domain int p2m_set_mem_access(struct domain *d, unsigned long start_pfn, uint32_t nr, hvmmem_access_t access); +/* Set access type for an array of pfns. set_mem_access success or failure is + * returned in the err array. */ +int p2m_set_access_bulk(struct domain *d, xen_pfn_t *arr, int *err, + uint64_t nr, hvmmem_access_t access); + /* Get access type for a pfn * If pfn == -1ul, gets the default access type */ int p2m_get_mem_access(struct domain *d, unsigned long pfn, @@ -583,7 +595,11 @@ static inline int p2m_set_mem_access(str unsigned long start_pfn, uint32_t nr, hvmmem_access_t access) { return -EINVAL; } -static inline int p2m_get_mem_access(struct domain *d, unsigned long pfn, +static inline int p2m_set_access_bulk(struct domain *d, xen_pfn_t *arr, + int *err, uint64_t nr, + hvmmem_access_t access) +{ return -EINVAL; } +static inline int p2m_get_mem_access(struct domain *d, unsigned long pfn, hvmmem_access_t *access) { return -EINVAL; } #endif diff -r f12cf785738f -r 2f695c43ce22 xen/include/public/hvm/hvm_op.h --- a/xen/include/public/hvm/hvm_op.h Wed Apr 25 19:29:54 2012 -0700 +++ b/xen/include/public/hvm/hvm_op.h Thu Apr 26 11:25:50 2012 -0700 @@ -261,4 +261,22 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_m #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#define HVMOP_set_mem_access_bulk 17 +/* Notify that a array of pfns is to have specific access types */ +struct xen_hvm_set_mem_access_bulk { + /* Domain to be updated. */ + domid_t domid; + /* Memory type */ + uint16_t hvmmem_access; /* hvm_access_t */ + /* Array of pfns */ + XEN_GUEST_HANDLE_64(xen_pfn_t) arr; + XEN_GUEST_HANDLE_64(int) err ; + /* Number of entries */ + uint64_t nr; +}; +typedef struct xen_hvm_set_mem_access_bulk xen_hvm_set_mem_access_bulk_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_access_bulk_t); +#endif + #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
Christian Limpach
2012-Apr-26 20:20 UTC
Re: [PATCH 0 of 2] Add libxc API that sets mem_access type for an array of gfns
On Thu, Apr 26, 2012 at 11:33 AM, Aravindh Puthiyaparambil <aravindh@virtuata.com> wrote:> When the mem_access type needs to be changed for multiple discontiguous gfns, calling xc_hvm_set_mem_access() multiple times does not perform very well. The main pain points are the multiple libxc calls themselves plus the multiple map_domain_page() / unmap_domain_page() and ept_sync_domain() calls for each ept_set_entry(gfn). The following patches adds a new mem_access API that sets the mem_access type for an array of guest physical addresses in one libxc call with minimal map_domain_page() / unmap_domain_page() and ept_sync_domain() calls.Are you sure that your bulk code actually works? It seems to me that your __ept_set_entry function assumes that table still points to the top of the p2m. The "for ( i = ept_get_wl(d); i > target; i-- )" loop will walk the table, and so in the subsequent calls from your bulk loop, this won''t work? I think you need something like an iterator, the context of which can be passed around... Also, the call to ept_get_entry in your bulk loop will do a walk in every iteration, it seems a bit arbitrary to only (try to) avoid one and not the other. But I guess the "win" is from reducing the number of ept_sync_domain calls. christian
Aravindh Puthiyaparambil
2012-Apr-26 22:41 UTC
Re: [PATCH 0 of 2] Add libxc API that sets mem_access type for an array of gfns
On Thu, Apr 26, 2012 at 1:20 PM, Christian Limpach <christian.limpach@gmail.com> wrote:> On Thu, Apr 26, 2012 at 11:33 AM, Aravindh Puthiyaparambil > <aravindh@virtuata.com> wrote: >> When the mem_access type needs to be changed for multiple discontiguous gfns, calling xc_hvm_set_mem_access() multiple times does not perform very well. The main pain points are the multiple libxc calls themselves plus the multiple map_domain_page() / unmap_domain_page() and ept_sync_domain() calls for each ept_set_entry(gfn). The following patches adds a new mem_access API that sets the mem_access type for an array of guest physical addresses in one libxc call with minimal map_domain_page() / unmap_domain_page() and ept_sync_domain() calls. > > > Are you sure that your bulk code actually works? It seems to me that > your __ept_set_entry function assumes that table still points to the > top of the p2m. The "for ( i = ept_get_wl(d); i > target; i-- )" loop > will walk the table, and so in the subsequent calls from your bulk > loop, this won''t work? > > I think you need something like an iterator, the context of which can > be passed around...Duh! You are right. My test code has been giving me a false positive. I completely misread how ept_next_level() works.> Also, the call to ept_get_entry in your bulk loop will do a walk in > every iteration, it seems a bit arbitrary to only (try to) avoid one > and not the other. But I guess the "win" is from reducing the number > of ept_sync_domain calls.You are right. I was mainly focusing on removing the multiple libxc calls and reducing the ept_sync_domain calls. I thought removing the map and unmap of the p2m top page was an extra optimization which I obviously messed up. I will rework the patch to only stick with the original optimization I had in mind. Thanks, Aravindh
Aravindh Puthiyaparambil
2012-Apr-27 00:15 UTC
Re: [PATCH 0 of 2] Add libxc API that sets mem_access type for an array of gfns
On Thu, Apr 26, 2012 at 3:41 PM, Aravindh Puthiyaparambil <aravindh@virtuata.com> wrote:> On Thu, Apr 26, 2012 at 1:20 PM, Christian Limpach > <christian.limpach@gmail.com> wrote: >> On Thu, Apr 26, 2012 at 11:33 AM, Aravindh Puthiyaparambil >> <aravindh@virtuata.com> wrote: >>> When the mem_access type needs to be changed for multiple discontiguous gfns, calling xc_hvm_set_mem_access() multiple times does not perform very well. The main pain points are the multiple libxc calls themselves plus the multiple map_domain_page() / unmap_domain_page() and ept_sync_domain() calls for each ept_set_entry(gfn). The following patches adds a new mem_access API that sets the mem_access type for an array of guest physical addresses in one libxc call with minimal map_domain_page() / unmap_domain_page() and ept_sync_domain() calls. >> >> >> Are you sure that your bulk code actually works? It seems to me that >> your __ept_set_entry function assumes that table still points to the >> top of the p2m. The "for ( i = ept_get_wl(d); i > target; i-- )" loop >> will walk the table, and so in the subsequent calls from your bulk >> loop, this won''t work? >> >> I think you need something like an iterator, the context of which can >> be passed around... > > Duh! You are right. My test code has been giving me a false positive. > I completely misread how ept_next_level() works. > >> Also, the call to ept_get_entry in your bulk loop will do a walk in >> every iteration, it seems a bit arbitrary to only (try to) avoid one >> and not the other. But I guess the "win" is from reducing the number >> of ept_sync_domain calls. > > You are right. I was mainly focusing on removing the multiple libxc > calls and reducing the ept_sync_domain calls. I thought removing the > map and unmap of the p2m top page was an extra optimization which I > obviously messed up. I will rework the patch to only stick with the > original optimization I had in mind.Does this look correct now? Thanks, Aravindh changeset: 25252:b1bde8691257 summary: x86/mm: Split ept_set_entry() diff -r 63eb1343cbdb -r b1bde8691257 xen/arch/x86/mm/p2m-ept.c --- a/xen/arch/x86/mm/p2m-ept.c Wed Apr 25 19:29:53 2012 -0700 +++ b/xen/arch/x86/mm/p2m-ept.c Thu Apr 26 17:10:29 2012 -0700 @@ -272,12 +272,13 @@ static int ept_next_level(struct p2m_dom } /* - * ept_set_entry() computes ''need_modify_vtd_table'' for itself, + * __ept_set_entry() computes ''need_modify_vtd_table'' for itself, * by observing whether any gfn->mfn translations are modified. */ static int -ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, - unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma) +__ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, + unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma, + ept_entry_t *old_entry, bool_t *needs_sync) { ept_entry_t *table, *ept_entry = NULL; unsigned long gfn_remainder = gfn; @@ -288,11 +289,9 @@ ept_set_entry(struct p2m_domain *p2m, un int ret = 0; bool_t direct_mmio = (p2mt == p2m_mmio_direct); uint8_t ipat = 0; - int need_modify_vtd_table = 1; - int vtd_pte_present = 0; - int needs_sync = 1; + bool_t need_modify_vtd_table = 1; + bool_t vtd_pte_present = 0; struct domain *d = p2m->domain; - ept_entry_t old_entry = { .epte = 0 }; /* * the caller must make sure: @@ -305,10 +304,6 @@ ept_set_entry(struct p2m_domain *p2m, un (order % EPT_TABLE_ORDER) ) return 0; - ASSERT((target == 2 && hvm_hap_has_1gb(d)) || - (target == 1 && hvm_hap_has_2mb(d)) || - (target == 0)); - table = map_domain_page(ept_get_asr(d)); for ( i = ept_get_wl(d); i > target; i-- ) @@ -352,7 +347,7 @@ ept_set_entry(struct p2m_domain *p2m, un * the intermediate tables will be freed below after the ept flush * * Read-then-write is OK because we hold the p2m lock. */ - old_entry = *ept_entry; + old_entry->epte = ept_entry->epte; if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) || (p2mt == p2m_ram_paging_in) ) @@ -369,7 +364,7 @@ ept_set_entry(struct p2m_domain *p2m, un new_entry.mfn = mfn_x(mfn); - if ( old_entry.mfn == new_entry.mfn ) + if ( old_entry->mfn == new_entry.mfn ) need_modify_vtd_table = 0; ept_p2m_type_to_flags(&new_entry, p2mt, p2ma); @@ -438,9 +433,6 @@ ept_set_entry(struct p2m_domain *p2m, un out: unmap_domain_page(table); - if ( needs_sync ) - ept_sync_domain(p2m->domain); - if ( rv && iommu_enabled && need_iommu(p2m->domain) && need_modify_vtd_table ) { if ( iommu_hap_pt_share ) @@ -473,6 +465,28 @@ out: } } + return rv; +} + +static int +ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, + unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma) +{ + int target = order / EPT_TABLE_ORDER; + int rv = 0; + bool_t needs_sync = 1; + ept_entry_t old_entry = { .epte = 0 }; + + ASSERT((target == 2 && hvm_hap_has_1gb(d)) || + (target == 1 && hvm_hap_has_2mb(d)) || + (target == 0)); + + rv = __ept_set_entry(p2m, gfn, mfn, order, p2mt, p2ma, &old_entry, + &needs_sync); + + if ( needs_sync ) + ept_sync_domain(p2m->domain); + /* Release the old intermediate tables, if any. This has to be the last thing we do, after the ept_sync_domain() and removal from the iommu tables, so as to avoid a potential changeset: 25253:07225bc67197 summary: mem_access: Add xc_hvm_mem_access_bulk() API diff -r b1bde8691257 -r 07225bc67197 tools/libxc/xc_misc.c --- a/tools/libxc/xc_misc.c Thu Apr 26 17:10:29 2012 -0700 +++ b/tools/libxc/xc_misc.c Thu Apr 26 17:10:35 2012 -0700 @@ -570,6 +570,57 @@ int xc_hvm_set_mem_access( return rc; } +int xc_hvm_set_mem_access_bulk( + xc_interface *xch, domid_t dom, hvmmem_access_t mem_access, + xen_pfn_t *arr, int *err, uint64_t nr) +{ + DECLARE_HYPERCALL; + DECLARE_HYPERCALL_BUFFER(struct xen_hvm_set_mem_access_bulk, arg); + DECLARE_HYPERCALL_BOUNCE(arr, sizeof(xen_pfn_t) * nr, + XC_HYPERCALL_BUFFER_BOUNCE_IN); + DECLARE_HYPERCALL_BOUNCE(err, sizeof(int) * nr, + XC_HYPERCALL_BUFFER_BOUNCE_OUT); + int rc; + + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof(*arg)); + if ( arg == NULL ) + { + PERROR("Could not allocate memory for xc_hvm_set_mem_access_bulk hypercall"); + return -1; + } + + if ( xc_hypercall_bounce_pre(xch, arr) ) { + PERROR("Could not bounce arr for xc_hvm_set_mem_access_bulk hypercall"); + rc = -1; + goto out; + } + + if ( xc_hypercall_bounce_pre(xch, err) ) { + PERROR("Could not bounce err for xc_hvm_set_mem_access_bulk hypercall"); + rc = -1; + goto out; + } + + arg->domid = dom; + arg->hvmmem_access = mem_access; + arg->nr = nr; + set_xen_guest_handle(arg->arr, arr); + set_xen_guest_handle(arg->err, err); + + hypercall.op = __HYPERVISOR_hvm_op; + hypercall.arg[0] = HVMOP_set_mem_access_bulk; + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); + + rc = do_xen_hypercall(xch, &hypercall); + +out: + xc_hypercall_buffer_free(xch, arg); + xc_hypercall_bounce_post(xch, arr); + xc_hypercall_bounce_post(xch, err); + + return rc; +} + int xc_hvm_get_mem_access( xc_interface *xch, domid_t dom, uint64_t pfn, hvmmem_access_t* mem_access) { diff -r b1bde8691257 -r 07225bc67197 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Thu Apr 26 17:10:29 2012 -0700 +++ b/tools/libxc/xenctrl.h Thu Apr 26 17:10:35 2012 -0700 @@ -1568,6 +1568,17 @@ int xc_hvm_set_mem_access( xc_interface *xch, domid_t dom, hvmmem_access_t memaccess, uint64_t first_pfn, uint64_t nr); /* + * Set the arry of pfns to a specific access. + * When a pfn cannot be set to the specified access, its respective field in + * @err is set to the corresponding errno value. + * Allowed types are HVMMEM_access_default, HVMMEM_access_n, any combination of + * HVM_access_ + (rwx), and HVM_access_rx2rw + */ +int xc_hvm_set_mem_access_bulk( + xc_interface *xch, domid_t dom, hvmmem_access_t memaccess, + xen_pfn_t *arr, int *err, uint64_t num); + +/* * Gets the mem access for the given page (returned in memacess on success) */ int xc_hvm_get_mem_access( diff -r b1bde8691257 -r 07225bc67197 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Thu Apr 26 17:10:29 2012 -0700 +++ b/xen/arch/x86/hvm/hvm.c Thu Apr 26 17:10:35 2012 -0700 @@ -4197,6 +4197,51 @@ long do_hvm_op(unsigned long op, XEN_GUE break; } + case HVMOP_set_mem_access_bulk: + { + struct xen_hvm_set_mem_access_bulk a; + struct domain *d; + xen_pfn_t *arr = 0; + int *err = 0; + + if ( copy_from_guest(&a, arg, 1) ) + return -EFAULT; + + rc = rcu_lock_remote_target_domain_by_id(a.domid, &d); + if ( rc != 0 ) + return rc; + + rc = -EINVAL; + + if ( !is_hvm_domain(d) ) + goto param_fail9; + + rc = -ENOMEM; + arr = xmalloc_array(xen_pfn_t, a.nr); + if (!arr) + goto param_fail9; + + err = xmalloc_array(int, a.nr); + if (!err) + goto param_fail9; + + if ( copy_from_guest(arr, a.arr, a.nr) ) + goto param_fail9; + + rc = p2m_set_access_bulk(d, arr, err, a.nr, a.hvmmem_access); + + if ( copy_to_guest(a.err, err, a.nr) ) + goto param_fail9; + + param_fail9: + rcu_unlock_domain(d); + if (arr) + xfree(arr); + if (err) + xfree(err); + break; + } + case HVMOP_get_mem_access: { struct xen_hvm_get_mem_access a; diff -r b1bde8691257 -r 07225bc67197 xen/arch/x86/mm/p2m-ept.c --- a/xen/arch/x86/mm/p2m-ept.c Thu Apr 26 17:10:29 2012 -0700 +++ b/xen/arch/x86/mm/p2m-ept.c Thu Apr 26 17:10:35 2012 -0700 @@ -597,6 +597,63 @@ out: return mfn; } +static int +ept_set_entry_bulk(struct p2m_domain *p2m, xen_pfn_t *arr, int *err, + uint64_t nr, unsigned int order, p2m_access_t p2ma) +{ + unsigned long pfn; + p2m_access_t a; + p2m_type_t t; + mfn_t mfn; + int target = order / EPT_TABLE_ORDER; + int rc; + bool_t needs_sync, bulk_needs_sync = 0; + struct domain *d = p2m->domain; + ept_entry_t *old_entries = 0; + + ASSERT((target == 2 && hvm_hap_has_1gb(d)) || + (target == 1 && hvm_hap_has_2mb(d)) || + (target == 0)); + + old_entries = xzalloc_array(ept_entry_t, nr); + if ( !old_entries ) + return 0; + + memset(err, 0, nr * sizeof(int)); + for ( pfn = 0; pfn < nr; pfn++ ) + { + if ( arr[pfn] > domain_get_maximum_gpfn(d) ) + { + err[pfn] = -EINVAL; + continue; + } + + needs_sync = 1; + mfn = ept_get_entry(p2m, arr[pfn], &t, &a, 0, NULL); + rc = __ept_set_entry(p2m, arr[pfn], mfn, order, t, p2ma, + &old_entries[pfn], &needs_sync); + if (rc == 0) + err[pfn] = -ENOMEM; + + if ( needs_sync ) + bulk_needs_sync = 1; + } + + if ( bulk_needs_sync ) + ept_sync_domain(p2m->domain); + + /* Release the old intermediate tables, if any. This has to be the + last thing we do, after the ept_sync_domain() and removal + from the iommu tables, so as to avoid a potential + use-after-free. */ + for ( pfn = 0; pfn < nr; pfn++ ) + if ( is_epte_present(&old_entries[pfn]) ) + ept_free_entry(p2m, &old_entries[pfn], target); + + /* Success */ + return 1; +} + /* WARNING: Only caller doesn''t care about PoD pages. So this function will * always return 0 for PoD pages, not populate them. If that becomes necessary, * pass a p2m_query_t type along to distinguish. */ @@ -808,6 +865,7 @@ static void ept_change_entry_type_global void ept_p2m_init(struct p2m_domain *p2m) { p2m->set_entry = ept_set_entry; + p2m->set_entry_bulk = ept_set_entry_bulk; p2m->get_entry = ept_get_entry; p2m->change_entry_type_global = ept_change_entry_type_global; p2m->audit_p2m = NULL; diff -r b1bde8691257 -r 07225bc67197 xen/arch/x86/mm/p2m-pt.c --- a/xen/arch/x86/mm/p2m-pt.c Thu Apr 26 17:10:29 2012 -0700 +++ b/xen/arch/x86/mm/p2m-pt.c Thu Apr 26 17:10:35 2012 -0700 @@ -1139,6 +1139,7 @@ long p2m_pt_audit_p2m(struct p2m_domain void p2m_pt_init(struct p2m_domain *p2m) { p2m->set_entry = p2m_set_entry; + p2m->set_entry_bulk = NULL; p2m->get_entry = p2m_gfn_to_mfn; p2m->change_entry_type_global = p2m_change_type_global; p2m->write_p2m_entry = paging_write_p2m_entry; diff -r b1bde8691257 -r 07225bc67197 xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Thu Apr 26 17:10:29 2012 -0700 +++ b/xen/arch/x86/mm/p2m.c Thu Apr 26 17:10:35 2012 -0700 @@ -1334,6 +1334,42 @@ int p2m_set_mem_access(struct domain *d, return rc; } +int p2m_set_access_bulk(struct domain *d, xen_pfn_t *arr, int *err, + uint64_t nr, hvmmem_access_t access) +{ + struct p2m_domain *p2m = p2m_get_hostp2m(d); + p2m_access_t a; + p2m_access_t memaccess[] = { + p2m_access_n, + p2m_access_r, + p2m_access_w, + p2m_access_rw, + p2m_access_x, + p2m_access_rx, + p2m_access_wx, + p2m_access_rwx, + p2m_access_rx2rw, + p2m_access_n2rwx, + p2m->default_access, + }; + int rc = 0; + + ASSERT(p2m->set_entry_bulk); + + if ( (unsigned) access >= HVMMEM_access_default ) + return -EINVAL; + + a = memaccess[access]; + + p2m_lock(p2m); + if ( p2m->set_entry_bulk(p2m, arr, err, nr, PAGE_ORDER_4K, a) == 0 ) + rc = -ENOMEM; + p2m_unlock(p2m); + + return rc; +} + + /* Get access type for a pfn * If pfn == -1ul, gets the default access type */ int p2m_get_mem_access(struct domain *d, unsigned long pfn, diff -r b1bde8691257 -r 07225bc67197 xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h Thu Apr 26 17:10:29 2012 -0700 +++ b/xen/include/asm-x86/p2m.h Thu Apr 26 17:10:35 2012 -0700 @@ -232,6 +232,13 @@ struct p2m_domain { mfn_t mfn, unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma); + int (*set_entry_bulk)(struct p2m_domain *p2m, + xen_pfn_t *arr, + int *err, + uint64_t nr, + unsigned int order, + p2m_access_t p2ma); + mfn_t (*get_entry )(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *p2mt, @@ -568,6 +575,11 @@ void p2m_mem_access_resume(struct domain int p2m_set_mem_access(struct domain *d, unsigned long start_pfn, uint32_t nr, hvmmem_access_t access); +/* Set access type for an array of pfns. set_mem_access success or failure is + * returned in the err array. */ +int p2m_set_access_bulk(struct domain *d, xen_pfn_t *arr, int *err, + uint64_t nr, hvmmem_access_t access); + /* Get access type for a pfn * If pfn == -1ul, gets the default access type */ int p2m_get_mem_access(struct domain *d, unsigned long pfn, @@ -583,7 +595,11 @@ static inline int p2m_set_mem_access(str unsigned long start_pfn, uint32_t nr, hvmmem_access_t access) { return -EINVAL; } -static inline int p2m_get_mem_access(struct domain *d, unsigned long pfn, +static inline int p2m_set_access_bulk(struct domain *d, xen_pfn_t *arr, + int *err, uint64_t nr, + hvmmem_access_t access) +{ return -EINVAL; } +static inline int p2m_get_mem_access(struct domain *d, unsigned long pfn, hvmmem_access_t *access) { return -EINVAL; } #endif diff -r b1bde8691257 -r 07225bc67197 xen/include/public/hvm/hvm_op.h --- a/xen/include/public/hvm/hvm_op.h Thu Apr 26 17:10:29 2012 -0700 +++ b/xen/include/public/hvm/hvm_op.h Thu Apr 26 17:10:35 2012 -0700 @@ -261,4 +261,22 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_m #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#define HVMOP_set_mem_access_bulk 17 +/* Notify that a array of pfns is to have specific access types */ +struct xen_hvm_set_mem_access_bulk { + /* Domain to be updated. */ + domid_t domid; + /* Memory type */ + uint16_t hvmmem_access; /* hvm_access_t */ + /* Array of pfns */ + XEN_GUEST_HANDLE_64(xen_pfn_t) arr; + XEN_GUEST_HANDLE_64(int) err ; + /* Number of entries */ + uint64_t nr; +}; +typedef struct xen_hvm_set_mem_access_bulk xen_hvm_set_mem_access_bulk_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_access_bulk_t); +#endif + #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
Christian Limpach
2012-Apr-27 01:06 UTC
Re: [PATCH 0 of 2] Add libxc API that sets mem_access type for an array of gfns
On Thu, Apr 26, 2012 at 5:15 PM, Aravindh Puthiyaparambil <aravindh@virtuata.com> wrote:> Does this look correct now?It addresses the issues I''ve pointed out, but: - you should leave the ASSERT where it is, or is there a reason to move it? - this is wrong:> - old_entry = *ept_entry; > + old_entry->epte = ept_entry->epte;You should follow the code and see what uses old_entry and you''ll see that within the function old_entry->mfn is used (your diff changes the line that uses it) and ept_free_entry also accesses mfn. - are you sure you can move the ept_sync_domain call past the iommu code? I made a similar change a while ago, though it is for a more specific case, updating the ept table to "clean" the vram tracking. My change is: - clear needs_sync when setting the type to logdirty for a leaf entry if ( !is_epte_present(ept_entry) || (!target && p2mt == p2m_ram_logdirty) ) needs_sync = 0; - only call ept_free_entry in the non-leaf case if ( target && is_epte_present(&old_entry) ) ept_free_entry(p2m, &old_entry, target); - call ept_sync_domain from hap_clean_vram_tracking Maybe you can do something similar, for example passing in a hint whether ept_sync_domain needs to be done or not. In my case, the reasoning is that all the entries changed from hap_clean_vram_tracking are leaf entries, so ept_free_entry will never be called and thus ept_sync_domain can be deferred. I didn''t think through/consider the iommu case since that code is commented out in my tree. christian
Aravindh Puthiyaparambil
2012-Apr-27 01:36 UTC
Re: [PATCH 0 of 2] Add libxc API that sets mem_access type for an array of gfns
On Apr 26, 2012 6:06 PM, "Christian Limpach" <christian.limpach@gmail.com> wrote:> > On Thu, Apr 26, 2012 at 5:15 PM, Aravindh Puthiyaparambil > <aravindh@virtuata.com> wrote: > > Does this look correct now? > > It addresses the issues I''ve pointed out, but: > - you should leave the ASSERT where it is, or is there a reason to moveit? Ok, I will move the ASSERT back to where it was.> - this is wrong: > > - old_entry = *ept_entry; > > + old_entry->epte = ept_entry->epte; > You should follow the code and see what uses old_entry and you''ll see > that within the function old_entry->mfn is used (your diff changes the > line that uses it) and ept_free_entry also accesses mfn.I will fix that.> - are you sure you can move the ept_sync_domain call past the iommu code? >I was hoping Tim would give me feedback about that.> I made a similar change a while ago, though it is for a more specific > case, updating the ept table to "clean" the vram tracking. My change > is: > - clear needs_sync when setting the type to logdirty for a leaf entry > if ( !is_epte_present(ept_entry) || > (!target && p2mt == p2m_ram_logdirty) ) > needs_sync = 0; > - only call ept_free_entry in the non-leaf case > if ( target && is_epte_present(&old_entry) ) > ept_free_entry(p2m, &old_entry, target); > - call ept_sync_domain from hap_clean_vram_tracking > > Maybe you can do something similar, for example passing in a hint > whether ept_sync_domain needs to be done or not. In my case, the > reasoning is that all the entries changed from hap_clean_vram_tracking > are leaf entries, so ept_free_entry will never be called and thus > ept_sync_domain can be deferred. I didn''t think through/consider the > iommu case since that code is commented out in my tree. >I thought about doing that initially. But then in the bulk case I would always have to call ept_sync_domain() to be on the safe side. But if the iommu case forces me down that path, then I guess I have no choice. Aravindh _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Tim Deegan
2012-Apr-27 08:48 UTC
Re: [PATCH 0 of 2] Add libxc API that sets mem_access type for an array of gfns
At 18:36 -0700 on 26 Apr (1335465381), Aravindh Puthiyaparambil wrote:> > - this is wrong: > > > - old_entry = *ept_entry; > > > + old_entry->epte = ept_entry->epte; > > You should follow the code and see what uses old_entry and you''ll see > > that within the function old_entry->mfn is used (your diff changes the > > line that uses it) and ept_free_entry also accesses mfn. > > I will fix that. > > > - are you sure you can move the ept_sync_domain call past the iommu code? > > > > I was hoping Tim would give me feedback about that.I''m afraid I won''t be able to review this code until next week. This change is already too late for the 4.2 freeze, though, so there''s plenty of time to get it right after we branch. Cheers, Tim.
Christian Limpach
2012-Apr-27 17:37 UTC
Re: [PATCH 0 of 2] Add libxc API that sets mem_access type for an array of gfns
On Thu, Apr 26, 2012 at 6:36 PM, Aravindh Puthiyaparambil <aravindh@virtuata.com> wrote:> > On Apr 26, 2012 6:06 PM, "Christian Limpach" <christian.limpach@gmail.com> > wrote: >> >> Maybe you can do something similar, for example passing in a hint >> whether ept_sync_domain needs to be done or not. In my case, the >> reasoning is that all the entries changed from hap_clean_vram_tracking >> are leaf entries, so ept_free_entry will never be called and thus >> ept_sync_domain can be deferred. I didn''t think through/consider the >> iommu case since that code is commented out in my tree. > > I thought about doing that initially. But then in the bulk case I would > always have to call ept_sync_domain() to be on the safe side. But if the > iommu case forces me down that path, then I guess I have no choice.I think you should re-consider. It would avoid adding the extra wrapper, which makes this code a lot less readable. Plus it avoids the need for the old_entries array. Let me re-iterate: - if it''s a leaf entry, then there''s no need to call ept_free_entry - if you don''t need to call ept_free_entry, then you can defer ept_sync_domain (subject to the iommu case) - you could pass in a pointer to a bool to indicate to the caller that ept_sync_domain was deferred and that the caller needs to do it, with a NULL pointer indicating that the caller doesn''t support defer christian
Aravindh Puthiyaparambil
2012-Apr-27 18:25 UTC
Re: [PATCH 0 of 2] Add libxc API that sets mem_access type for an array of gfns
On Fri, Apr 27, 2012 at 10:37 AM, Christian Limpach <christian.limpach@gmail.com> wrote:> On Thu, Apr 26, 2012 at 6:36 PM, Aravindh Puthiyaparambil > <aravindh@virtuata.com> wrote: >> >> On Apr 26, 2012 6:06 PM, "Christian Limpach" <christian.limpach@gmail.com> >> wrote: >>> >>> Maybe you can do something similar, for example passing in a hint >>> whether ept_sync_domain needs to be done or not. In my case, the >>> reasoning is that all the entries changed from hap_clean_vram_tracking >>> are leaf entries, so ept_free_entry will never be called and thus >>> ept_sync_domain can be deferred. I didn''t think through/consider the >>> iommu case since that code is commented out in my tree. >> >> I thought about doing that initially. But then in the bulk case I would >> always have to call ept_sync_domain() to be on the safe side. But if the >> iommu case forces me down that path, then I guess I have no choice. > > I think you should re-consider. It would avoid adding the extra > wrapper, which makes this code a lot less readable. Plus it avoids > the need for the old_entries array. > > Let me re-iterate: > - if it''s a leaf entry, then there''s no need to call ept_free_entry > - if you don''t need to call ept_free_entry, then you can defer > ept_sync_domain (subject to the iommu case) > - you could pass in a pointer to a bool to indicate to the caller that > ept_sync_domain was deferred and that the caller needs to do it, with > a NULL pointer indicating that the caller doesn''t support deferI understand now and like this approach. I will rework the patch and send it out. Thanks, Aravindh
Aravindh Puthiyaparambil
2012-Apr-27 18:26 UTC
Re: [PATCH 0 of 2] Add libxc API that sets mem_access type for an array of gfns
On Fri, Apr 27, 2012 at 1:48 AM, Tim Deegan <tim@xen.org> wrote:> At 18:36 -0700 on 26 Apr (1335465381), Aravindh Puthiyaparambil wrote: >> > - this is wrong: >> > > - old_entry = *ept_entry; >> > > + old_entry->epte = ept_entry->epte; >> > You should follow the code and see what uses old_entry and you''ll see >> > that within the function old_entry->mfn is used (your diff changes the >> > line that uses it) and ept_free_entry also accesses mfn. >> >> I will fix that. >> >> > - are you sure you can move the ept_sync_domain call past the iommu code? >> > >> >> I was hoping Tim would give me feedback about that. > > I''m afraid I won''t be able to review this code until next week. This > change is already too late for the 4.2 freeze, though, so there''s plenty > of time to get it right after we branch.Not a problem. This can wait for post 4.2. By then I would have reworked it the way Christian was mentioning. Thanks, Aravindh
Aravindh Puthiyaparambil
2012-Apr-28 04:22 UTC
Re: [PATCH 0 of 2] Add libxc API that sets mem_access type for an array of gfns
On Fri, Apr 27, 2012 at 11:25 AM, Aravindh Puthiyaparambil <aravindh@virtuata.com> wrote:> On Fri, Apr 27, 2012 at 10:37 AM, Christian Limpach > <christian.limpach@gmail.com> wrote: >> On Thu, Apr 26, 2012 at 6:36 PM, Aravindh Puthiyaparambil >> <aravindh@virtuata.com> wrote: >>> >>> On Apr 26, 2012 6:06 PM, "Christian Limpach" <christian.limpach@gmail.com> >>> wrote: >>>> >>>> Maybe you can do something similar, for example passing in a hint >>>> whether ept_sync_domain needs to be done or not. In my case, the >>>> reasoning is that all the entries changed from hap_clean_vram_tracking >>>> are leaf entries, so ept_free_entry will never be called and thus >>>> ept_sync_domain can be deferred. I didn''t think through/consider the >>>> iommu case since that code is commented out in my tree. >>> >>> I thought about doing that initially. But then in the bulk case I would >>> always have to call ept_sync_domain() to be on the safe side. But if the >>> iommu case forces me down that path, then I guess I have no choice. >> >> I think you should re-consider. It would avoid adding the extra >> wrapper, which makes this code a lot less readable. Plus it avoids >> the need for the old_entries array. >> >> Let me re-iterate: >> - if it''s a leaf entry, then there''s no need to call ept_free_entry >> - if you don''t need to call ept_free_entry, then you can defer >> ept_sync_domain (subject to the iommu case) >> - you could pass in a pointer to a bool to indicate to the caller that >> ept_sync_domain was deferred and that the caller needs to do it, with >> a NULL pointer indicating that the caller doesn''t support deferHow does this look? changeset: 25257:2c05bdb052ea user: Aravindh Puthiyaparambil <aravindh@virtuata.com> date: Fri Apr 27 20:28:37 2012 -0700 summary: x86/mm: Add sync deferred option to p2m->set_entry() diff -r 9dda0efd8ce1 -r 2c05bdb052ea xen/arch/x86/mm/mem_sharing.c --- a/xen/arch/x86/mm/mem_sharing.c Fri Apr 27 17:57:55 2012 +0200 +++ b/xen/arch/x86/mm/mem_sharing.c Fri Apr 27 20:28:37 2012 -0700 @@ -1272,7 +1272,7 @@ int relinquish_shared_pages(struct domai /* Clear out the p2m entry so no one else may try to * unshare */ p2m->set_entry(p2m, gfn, _mfn(0), PAGE_ORDER_4K, - p2m_invalid, p2m_access_rwx); + p2m_invalid, p2m_access_rwx, NULL); count++; } diff -r 9dda0efd8ce1 -r 2c05bdb052ea xen/arch/x86/mm/p2m-ept.c --- a/xen/arch/x86/mm/p2m-ept.c Fri Apr 27 17:57:55 2012 +0200 +++ b/xen/arch/x86/mm/p2m-ept.c Fri Apr 27 20:28:37 2012 -0700 @@ -274,10 +274,13 @@ static int ept_next_level(struct p2m_dom /* * ept_set_entry() computes ''need_modify_vtd_table'' for itself, * by observing whether any gfn->mfn translations are modified. + * If sync_deferred is not NULL, then the caller will take care of + * calling ept_sync_domain() in the cases where it can be deferred. */ static int ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, - unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma) + unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma, + bool_t *sync_deferred) { ept_entry_t *table, *ept_entry = NULL; unsigned long gfn_remainder = gfn; @@ -293,6 +296,7 @@ ept_set_entry(struct p2m_domain *p2m, un int needs_sync = 1; struct domain *d = p2m->domain; ept_entry_t old_entry = { .epte = 0 }; + bool_t _sync_deferred = 0; /* * the caller must make sure: @@ -309,6 +313,9 @@ ept_set_entry(struct p2m_domain *p2m, un (target == 1 && hvm_hap_has_2mb(d)) || (target == 0)); + if (sync_deferred) + *sync_deferred = 1; + table = map_domain_page(ept_get_asr(d)); for ( i = ept_get_wl(d); i > target; i-- ) @@ -346,7 +353,11 @@ ept_set_entry(struct p2m_domain *p2m, un /* No need to flush if the old entry wasn''t valid */ if ( !is_epte_present(ept_entry) ) + { needs_sync = 0; + if ( sync_deferred ) + *sync_deferred = 0; + } /* If we''re replacing a non-leaf entry with a leaf entry (1GiB or 2MiB), * the intermediate tables will be freed below after the ept flush @@ -385,6 +396,9 @@ ept_set_entry(struct p2m_domain *p2m, un ASSERT(is_epte_superpage(ept_entry)); + if ( sync_deferred ) + _sync_deferred = 1; + split_ept_entry = atomic_read_ept_entry(ept_entry); if ( !ept_split_super_page(p2m, &split_ept_entry, i, target) ) @@ -438,7 +452,7 @@ ept_set_entry(struct p2m_domain *p2m, un out: unmap_domain_page(table); - if ( needs_sync ) + if ( needs_sync && !_sync_deferred ) ept_sync_domain(p2m->domain); if ( rv && iommu_enabled && need_iommu(p2m->domain) && need_modify_vtd_table ) @@ -727,7 +741,8 @@ void ept_change_entry_emt_with_range(str order = level * EPT_TABLE_ORDER; if ( need_modify_ept_entry(p2m, gfn, mfn, e.ipat, e.emt, e.sa_p2mt) ) - ept_set_entry(p2m, gfn, mfn, order, e.sa_p2mt, e.access); + ept_set_entry(p2m, gfn, mfn, order, e.sa_p2mt, e.access, + NULL); gfn += trunk; break; } @@ -737,7 +752,7 @@ void ept_change_entry_emt_with_range(str else /* gfn assigned with 4k */ { if ( need_modify_ept_entry(p2m, gfn, mfn, e.ipat, e.emt, e.sa_p2mt) ) - ept_set_entry(p2m, gfn, mfn, order, e.sa_p2mt, e.access); + ept_set_entry(p2m, gfn, mfn, order, e.sa_p2mt, e.access, NULL); } } p2m_unlock(p2m); diff -r 9dda0efd8ce1 -r 2c05bdb052ea xen/arch/x86/mm/p2m-pt.c --- a/xen/arch/x86/mm/p2m-pt.c Fri Apr 27 17:57:55 2012 +0200 +++ b/xen/arch/x86/mm/p2m-pt.c Fri Apr 27 20:28:37 2012 -0700 @@ -291,7 +291,8 @@ p2m_next_level(struct p2m_domain *p2m, m // Returns 0 on error (out of memory) static int p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, - unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma) + unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma, + bool_t *unused) { // XXX -- this might be able to be faster iff current->domain == d mfn_t table_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m)); diff -r 9dda0efd8ce1 -r 2c05bdb052ea xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Fri Apr 27 17:57:55 2012 +0200 +++ b/xen/arch/x86/mm/p2m.c Fri Apr 27 20:28:37 2012 -0700 @@ -227,7 +227,7 @@ int set_p2m_entry(struct p2m_domain *p2m else order = 0; - if ( !p2m->set_entry(p2m, gfn, mfn, order, p2mt, p2ma) ) + if ( !p2m->set_entry(p2m, gfn, mfn, order, p2mt, p2ma, NULL) ) rc = 0; gfn += 1ul << order; if ( mfn_x(mfn) != INVALID_MFN ) @@ -1199,14 +1199,14 @@ bool_t p2m_mem_access_check(unsigned lon if ( access_w && p2ma == p2m_access_rx2rw ) { - p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, p2m_access_rw); + p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, p2m_access_rw, NULL); gfn_unlock(p2m, gfn, 0); return 1; } else if ( p2ma == p2m_access_n2rwx ) { ASSERT(access_w || access_r || access_x); - p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, p2m_access_rwx); + p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, p2m_access_rwx, NULL); } gfn_unlock(p2m, gfn, 0); @@ -1228,7 +1228,8 @@ bool_t p2m_mem_access_check(unsigned lon { /* A listener is not required, so clear the access restrictions */ gfn_lock(p2m, gfn, 0); - p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, p2m_access_rwx); + p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, + p2m_access_rwx, NULL); gfn_unlock(p2m, gfn, 0); } return 1; @@ -1292,6 +1293,7 @@ int p2m_set_mem_access(struct domain *d, p2m_type_t t; mfn_t mfn; int rc = 0; + bool_t sync_deferred = 1; /* N.B. _not_ static: initializer depends on p2m->default_access */ p2m_access_t memaccess[] = { @@ -1324,12 +1326,17 @@ int p2m_set_mem_access(struct domain *d, for ( pfn = start_pfn; pfn < start_pfn + nr; pfn++ ) { mfn = p2m->get_entry(p2m, pfn, &t, &_a, 0, NULL); - if ( p2m->set_entry(p2m, pfn, mfn, PAGE_ORDER_4K, t, a) == 0 ) + if ( p2m->set_entry(p2m, pfn, mfn, PAGE_ORDER_4K, t, a, &sync_deferred) + == 0 ) { rc = -ENOMEM; break; } } + + if ( sync_deferred ) + ept_sync_domain(p2m->domain); + p2m_unlock(p2m); return rc; } diff -r 9dda0efd8ce1 -r 2c05bdb052ea xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h Fri Apr 27 17:57:55 2012 +0200 +++ b/xen/include/asm-x86/p2m.h Fri Apr 27 20:28:37 2012 -0700 @@ -231,7 +231,8 @@ struct p2m_domain { unsigned long gfn, mfn_t mfn, unsigned int page_order, p2m_type_t p2mt, - p2m_access_t p2ma); + p2m_access_t p2ma, + bool_t *sync_deferred); mfn_t (*get_entry )(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *p2mt, changeset: 25258:5a0d60bb536b user: Aravindh Puthiyaparambil <aravindh@virtuata.com> date: Fri Apr 27 21:10:59 2012 -0700 summary: mem_access: Add xc_hvm_mem_access_bulk() API diff -r 2c05bdb052ea -r 5a0d60bb536b tools/libxc/xc_misc.c --- a/tools/libxc/xc_misc.c Fri Apr 27 20:28:37 2012 -0700 +++ b/tools/libxc/xc_misc.c Fri Apr 27 21:10:59 2012 -0700 @@ -570,6 +570,57 @@ int xc_hvm_set_mem_access( return rc; } +int xc_hvm_set_mem_access_bulk( + xc_interface *xch, domid_t dom, hvmmem_access_t mem_access, + xen_pfn_t *arr, int *err, uint64_t nr) +{ + DECLARE_HYPERCALL; + DECLARE_HYPERCALL_BUFFER(struct xen_hvm_set_mem_access_bulk, arg); + DECLARE_HYPERCALL_BOUNCE(arr, sizeof(xen_pfn_t) * nr, + XC_HYPERCALL_BUFFER_BOUNCE_IN); + DECLARE_HYPERCALL_BOUNCE(err, sizeof(int) * nr, + XC_HYPERCALL_BUFFER_BOUNCE_OUT); + int rc; + + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof(*arg)); + if ( arg == NULL ) + { + PERROR("Could not allocate memory for xc_hvm_set_mem_access_bulk hypercall"); + return -1; + } + + if ( xc_hypercall_bounce_pre(xch, arr) ) { + PERROR("Could not bounce arr for xc_hvm_set_mem_access_bulk hypercall"); + rc = -1; + goto out; + } + + if ( xc_hypercall_bounce_pre(xch, err) ) { + PERROR("Could not bounce err for xc_hvm_set_mem_access_bulk hypercall"); + rc = -1; + goto out; + } + + arg->domid = dom; + arg->hvmmem_access = mem_access; + arg->nr = nr; + set_xen_guest_handle(arg->arr, arr); + set_xen_guest_handle(arg->err, err); + + hypercall.op = __HYPERVISOR_hvm_op; + hypercall.arg[0] = HVMOP_set_mem_access_bulk; + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); + + rc = do_xen_hypercall(xch, &hypercall); + +out: + xc_hypercall_buffer_free(xch, arg); + xc_hypercall_bounce_post(xch, arr); + xc_hypercall_bounce_post(xch, err); + + return rc; +} + int xc_hvm_get_mem_access( xc_interface *xch, domid_t dom, uint64_t pfn, hvmmem_access_t* mem_access) { diff -r 2c05bdb052ea -r 5a0d60bb536b tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Fri Apr 27 20:28:37 2012 -0700 +++ b/tools/libxc/xenctrl.h Fri Apr 27 21:10:59 2012 -0700 @@ -1568,6 +1568,17 @@ int xc_hvm_set_mem_access( xc_interface *xch, domid_t dom, hvmmem_access_t memaccess, uint64_t first_pfn, uint64_t nr); /* + * Set the arry of pfns to a specific access. + * When a pfn cannot be set to the specified access, its respective field in + * @err is set to the corresponding errno value. + * Allowed types are HVMMEM_access_default, HVMMEM_access_n, any combination of + * HVM_access_ + (rwx), and HVM_access_rx2rw + */ +int xc_hvm_set_mem_access_bulk( + xc_interface *xch, domid_t dom, hvmmem_access_t memaccess, + xen_pfn_t *arr, int *err, uint64_t num); + +/* * Gets the mem access for the given page (returned in memacess on success) */ int xc_hvm_get_mem_access( diff -r 2c05bdb052ea -r 5a0d60bb536b xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Fri Apr 27 20:28:37 2012 -0700 +++ b/xen/arch/x86/hvm/hvm.c Fri Apr 27 21:10:59 2012 -0700 @@ -4197,6 +4197,51 @@ long do_hvm_op(unsigned long op, XEN_GUE break; } + case HVMOP_set_mem_access_bulk: + { + struct xen_hvm_set_mem_access_bulk a; + struct domain *d; + xen_pfn_t *arr = 0; + int *err = 0; + + if ( copy_from_guest(&a, arg, 1) ) + return -EFAULT; + + rc = rcu_lock_remote_target_domain_by_id(a.domid, &d); + if ( rc != 0 ) + return rc; + + rc = -EINVAL; + + if ( !is_hvm_domain(d) ) + goto param_fail9; + + rc = -ENOMEM; + arr = xmalloc_array(xen_pfn_t, a.nr); + if (!arr) + goto param_fail9; + + err = xmalloc_array(int, a.nr); + if (!err) + goto param_fail9; + + if ( copy_from_guest(arr, a.arr, a.nr) ) + goto param_fail9; + + rc = p2m_set_access_bulk(d, arr, err, a.nr, a.hvmmem_access); + + if ( copy_to_guest(a.err, err, a.nr) ) + goto param_fail9; + + param_fail9: + rcu_unlock_domain(d); + if (arr) + xfree(arr); + if (err) + xfree(err); + break; + } + case HVMOP_get_mem_access: { struct xen_hvm_get_mem_access a; diff -r 2c05bdb052ea -r 5a0d60bb536b xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Fri Apr 27 20:28:37 2012 -0700 +++ b/xen/arch/x86/mm/p2m.c Fri Apr 27 21:10:59 2012 -0700 @@ -1341,6 +1341,61 @@ int p2m_set_mem_access(struct domain *d, return rc; } +int p2m_set_access_bulk(struct domain *d, xen_pfn_t *arr, int *err, + uint64_t nr, hvmmem_access_t access) +{ + struct p2m_domain *p2m = p2m_get_hostp2m(d); + unsigned long pfn; + p2m_access_t a, _a; + p2m_type_t t; + p2m_access_t memaccess[] = { + p2m_access_n, + p2m_access_r, + p2m_access_w, + p2m_access_rw, + p2m_access_x, + p2m_access_rx, + p2m_access_wx, + p2m_access_rwx, + p2m_access_rx2rw, + p2m_access_n2rwx, + p2m->default_access, + }; + mfn_t mfn; + int rc; + bool_t sync_deferred = 1; + + if ( (unsigned) access >= HVMMEM_access_default ) + return -EINVAL; + + a = memaccess[access]; + + p2m_lock(p2m); + + for ( pfn = 0; pfn < nr; pfn++ ) + { + if ( arr[pfn] > domain_get_maximum_gpfn(d) ) + { + err[pfn] = -EINVAL; + continue; + } + + mfn = p2m->get_entry(p2m, arr[pfn], &t, &_a, 0, NULL); + rc = p2m->set_entry(p2m, arr[pfn], mfn, PAGE_ORDER_4K, t, a, + &sync_deferred); + if ( rc == 0 ) + err[pfn] = -ENOMEM; + } + + if ( sync_deferred ) + ept_sync_domain(p2m->domain); + + p2m_unlock(p2m); + + return 0; +} + + /* Get access type for a pfn * If pfn == -1ul, gets the default access type */ int p2m_get_mem_access(struct domain *d, unsigned long pfn, diff -r 2c05bdb052ea -r 5a0d60bb536b xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h Fri Apr 27 20:28:37 2012 -0700 +++ b/xen/include/asm-x86/p2m.h Fri Apr 27 21:10:59 2012 -0700 @@ -574,6 +574,11 @@ void p2m_mem_access_resume(struct domain int p2m_set_mem_access(struct domain *d, unsigned long start_pfn, uint32_t nr, hvmmem_access_t access); +/* Set access type for an array of pfns. set_mem_access success or failure is + * returned in the err array. */ +int p2m_set_access_bulk(struct domain *d, xen_pfn_t *arr, int *err, + uint64_t nr, hvmmem_access_t access); + /* Get access type for a pfn * If pfn == -1ul, gets the default access type */ int p2m_get_mem_access(struct domain *d, unsigned long pfn, @@ -589,7 +594,11 @@ static inline int p2m_set_mem_access(str unsigned long start_pfn, uint32_t nr, hvmmem_access_t access) { return -EINVAL; } -static inline int p2m_get_mem_access(struct domain *d, unsigned long pfn, +static inline int p2m_set_access_bulk(struct domain *d, xen_pfn_t *arr, + int *err, uint64_t nr, + hvmmem_access_t access) +{ return -EINVAL; } +static inline int p2m_get_mem_access(struct domain *d, unsigned long pfn, hvmmem_access_t *access) { return -EINVAL; } #endif diff -r 2c05bdb052ea -r 5a0d60bb536b xen/include/public/hvm/hvm_op.h --- a/xen/include/public/hvm/hvm_op.h Fri Apr 27 20:28:37 2012 -0700 +++ b/xen/include/public/hvm/hvm_op.h Fri Apr 27 21:10:59 2012 -0700 @@ -261,4 +261,22 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_m #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#define HVMOP_set_mem_access_bulk 17 +/* Notify that a array of pfns is to have specific access types */ +struct xen_hvm_set_mem_access_bulk { + /* Domain to be updated. */ + domid_t domid; + /* Memory type */ + uint16_t hvmmem_access; /* hvm_access_t */ + /* Array of pfns */ + XEN_GUEST_HANDLE_64(xen_pfn_t) arr; + XEN_GUEST_HANDLE_64(int) err ; + /* Number of entries */ + uint64_t nr; +}; +typedef struct xen_hvm_set_mem_access_bulk xen_hvm_set_mem_access_bulk_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_access_bulk_t); +#endif + #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
Christian Limpach
2012-May-03 03:28 UTC
Re: [PATCH 0 of 2] Add libxc API that sets mem_access type for an array of gfns
On Fri, Apr 27, 2012 at 9:22 PM, Aravindh Puthiyaparambil <aravindh@virtuata.com> wrote:>>> Let me re-iterate: >>> - if it''s a leaf entry, then there''s no need to call ept_free_entry >>> - if you don''t need to call ept_free_entry, then you can defer >>> ept_sync_domain (subject to the iommu case) >>> - you could pass in a pointer to a bool to indicate to the caller that >>> ept_sync_domain was deferred and that the caller needs to do it, with >>> a NULL pointer indicating that the caller doesn''t support defer > > How does this look?It''s missing the "leaf entry" part of my description of how I think things should work. Without that, you''re effectively ignoring the following comment at the end of ept_set_entry: /* Release the old intermediate tables, if any. This has to be the last thing we do, after the ept_sync_domain() and removal from the iommu tables, so as to avoid a potential use-after-free. */ See inline comments in your patch below for how I''d change this, untested... christian> @@ -293,6 +296,7 @@ ept_set_entry(struct p2m_domain *p2m, un > int needs_sync = 1; > struct domain *d = p2m->domain; > ept_entry_t old_entry = { .epte = 0 }; > + bool_t _sync_deferred = 0;don''t need that> /* > * the caller must make sure: > @@ -309,6 +313,9 @@ ept_set_entry(struct p2m_domain *p2m, un > (target == 1 && hvm_hap_has_2mb(d)) || > (target == 0)); > > + if (sync_deferred) > + *sync_deferred = 1; > + > table = map_domain_page(ept_get_asr(d)); > > for ( i = ept_get_wl(d); i > target; i-- ) > @@ -346,7 +353,11 @@ ept_set_entry(struct p2m_domain *p2m, un > > /* No need to flush if the old entry wasn''t valid */ > if ( !is_epte_present(ept_entry) ) > needs_sync = 0;This should be: if ( !is_epte_present(ept_entry) || (!target && sync_deferred) ) { needs_sync = 0; if (sync_deferred) *sync_deferred = 0; } This expresses the notion that we''re going to skip the call to ept_free_entry since it''s a leaf entry (target == 0) and that the caller will make the ept_sync_domain call (sync_deferred != NULL).> > /* If we''re replacing a non-leaf entry with a leaf entry > (1GiB or 2MiB), > * the intermediate tables will be freed below after the ept flush > @@ -385,6 +396,9 @@ ept_set_entry(struct p2m_domain *p2m, un > > ASSERT(is_epte_superpage(ept_entry)); > > + if ( sync_deferred ) > + _sync_deferred = 1; > +don''t need that> split_ept_entry = atomic_read_ept_entry(ept_entry); > > if ( !ept_split_super_page(p2m, &split_ept_entry, i, target) ) > @@ -438,7 +452,7 @@ ept_set_entry(struct p2m_domain *p2m, un > out: > unmap_domain_page(table); > > - if ( needs_sync ) > + if ( needs_sync && !_sync_deferred ) > ept_sync_domain(p2m->domain);don''t need that change, test on needs_sync is sufficient> > if ( rv && iommu_enabled && need_iommu(p2m->domain) && > need_modify_vtd_table )Then at the end of ept_set_pte add the test for non-leaf, which is more like an optimization/clarification since ept_free_entry has the same test already. if ( target && is_epte_present(&old_entry) ) ept_free_entry(p2m, &old_entry, target);
Aravindh Puthiyaparambil
2012-May-04 22:02 UTC
Re: [PATCH 0 of 2] Add libxc API that sets mem_access type for an array of gfns
On Wed, May 2, 2012 at 8:28 PM, Christian Limpach < christian.limpach@gmail.com> wrote:> On Fri, Apr 27, 2012 at 9:22 PM, Aravindh Puthiyaparambil > <aravindh@virtuata.com> wrote: >>>> Let me re-iterate: >>>> - if it''s a leaf entry, then there''s no need to call ept_free_entry >>>> - if you don''t need to call ept_free_entry, then you can defer >>>> ept_sync_domain (subject to the iommu case) >>>> - you could pass in a pointer to a bool to indicate to the caller that >>>> ept_sync_domain was deferred and that the caller needs to do it, with >>>> a NULL pointer indicating that the caller doesn''t support defer >> >> How does this look? > > It''s missing the "leaf entry" part of my description of how I think > things should work. Without that, you''re effectively ignoring the > following comment at the end of ept_set_entry: > /* Release the old intermediate tables, if any. This has to be the > last thing we do, after the ept_sync_domain() and removal > from the iommu tables, so as to avoid a potential > use-after-free. */ > > See inline comments in your patch below for how I''d change this,untested...> > christian > >> @@ -293,6 +296,7 @@ ept_set_entry(struct p2m_domain *p2m, un >> int needs_sync = 1; >> struct domain *d = p2m->domain; >> ept_entry_t old_entry = { .epte = 0 }; >> + bool_t _sync_deferred = 0; > > don''t need that > >> /* >> * the caller must make sure: >> @@ -309,6 +313,9 @@ ept_set_entry(struct p2m_domain *p2m, un >> (target == 1 && hvm_hap_has_2mb(d)) || >> (target == 0)); >> >> + if (sync_deferred) >> + *sync_deferred = 1; >> + >> table = map_domain_page(ept_get_asr(d)); >> >> for ( i = ept_get_wl(d); i > target; i-- ) >> @@ -346,7 +353,11 @@ ept_set_entry(struct p2m_domain *p2m, un >> >> /* No need to flush if the old entry wasn''t valid */ >> if ( !is_epte_present(ept_entry) ) >> needs_sync = 0; > > This should be: > if ( !is_epte_present(ept_entry) || > (!target && sync_deferred) ) { > needs_sync = 0; > if (sync_deferred) > *sync_deferred = 0; > } > > This expresses the notion that we''re going to skip the call to > ept_free_entry since it''s a leaf entry (target == 0) and that the > caller will make the ept_sync_domain call (sync_deferred != NULL). > >> >> /* If we''re replacing a non-leaf entry with a leaf entry >> (1GiB or 2MiB), >> * the intermediate tables will be freed below after the eptflush>> @@ -385,6 +396,9 @@ ept_set_entry(struct p2m_domain *p2m, un >> >> ASSERT(is_epte_superpage(ept_entry)); >> >> + if ( sync_deferred ) >> + _sync_deferred = 1; >> + > > don''t need that > >> split_ept_entry = atomic_read_ept_entry(ept_entry); >> >> if ( !ept_split_super_page(p2m, &split_ept_entry, i, target) ) >> @@ -438,7 +452,7 @@ ept_set_entry(struct p2m_domain *p2m, un >> out: >> unmap_domain_page(table); >> >> - if ( needs_sync ) >> + if ( needs_sync && !_sync_deferred ) >> ept_sync_domain(p2m->domain); > > don''t need that change, test on needs_sync is sufficient > >> >> if ( rv && iommu_enabled && need_iommu(p2m->domain) && >> need_modify_vtd_table ) > > Then at the end of ept_set_pte add the test for non-leaf, which is > more like an optimization/clarification since ept_free_entry has the > same test already. > > if ( target && is_epte_present(&old_entry) ) > ept_free_entry(p2m, &old_entry, target);Sorry for not following and thanks for your patience. Hopefully this looks correct. I have just included the pertinent bit of the patch you were commenting on. diff -r 9dda0efd8ce1 -r c180942992bf xen/arch/x86/mm/p2m-ept.c --- a/xen/arch/x86/mm/p2m-ept.c Fri Apr 27 17:57:55 2012 +0200 +++ b/xen/arch/x86/mm/p2m-ept.c Fri May 04 14:56:12 2012 -0700 @@ -274,10 +274,13 @@ static int ept_next_level(struct p2m_dom /* * ept_set_entry() computes ''need_modify_vtd_table'' for itself, * by observing whether any gfn->mfn translations are modified. + * If sync_deferred is not NULL, then the caller will take care of + * calling ept_sync_domain() in the cases where it can be deferred. */ static int ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, - unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma) + unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma, + bool_t *sync_deferred) { ept_entry_t *table, *ept_entry = NULL; unsigned long gfn_remainder = gfn; @@ -309,6 +312,9 @@ ept_set_entry(struct p2m_domain *p2m, un (target == 1 && hvm_hap_has_2mb(d)) || (target == 0)); + if (sync_deferred) + *sync_deferred = 1; + table = map_domain_page(ept_get_asr(d)); for ( i = ept_get_wl(d); i > target; i-- ) @@ -345,8 +351,12 @@ ept_set_entry(struct p2m_domain *p2m, un ept_entry_t new_entry = { .epte = 0 }; /* No need to flush if the old entry wasn''t valid */ - if ( !is_epte_present(ept_entry) ) + if ( !is_epte_present(ept_entry) || (!target && sync_deferred) ) + { needs_sync = 0; + if ( sync_deferred ) + *sync_deferred = 0; + } /* If we''re replacing a non-leaf entry with a leaf entry (1GiB or 2MiB), * the intermediate tables will be freed below after the ept flush @@ -477,7 +487,7 @@ out: last thing we do, after the ept_sync_domain() and removal from the iommu tables, so as to avoid a potential use-after-free. */ - if ( is_epte_present(&old_entry) ) + if ( target && is_epte_present(&old_entry) ) ept_free_entry(p2m, &old_entry, target); return rv; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Tim Deegan
2012-May-17 10:05 UTC
Re: [PATCH 0 of 2] Add libxc API that sets mem_access type for an array of gfns
Hi Aravindh, At 15:02 -0700 on 04 May (1336143748), Aravindh Puthiyaparambil wrote:> diff -r 9dda0efd8ce1 -r c180942992bf xen/arch/x86/mm/p2m-ept.c > --- a/xen/arch/x86/mm/p2m-ept.c Fri Apr 27 17:57:55 2012 +0200 > +++ b/xen/arch/x86/mm/p2m-ept.c Fri May 04 14:56:12 2012 -0700 > @@ -274,10 +274,13 @@ static int ept_next_level(struct p2m_dom > /* > * ept_set_entry() computes ''need_modify_vtd_table'' for itself, > * by observing whether any gfn->mfn translations are modified. > + * If sync_deferred is not NULL, then the caller will take care of > + * calling ept_sync_domain() in the cases where it can be deferred. > */ > static int > ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, > - unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma) > + unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma, > + bool_t *sync_deferred) > { > ept_entry_t *table, *ept_entry = NULL; > unsigned long gfn_remainder = gfn; > @@ -309,6 +312,9 @@ ept_set_entry(struct p2m_domain *p2m, un > (target == 1 && hvm_hap_has_2mb(d)) || > (target == 0)); > > + if (sync_deferred)Xen coding style has spaces inside those parentheses.> + *sync_deferred = 1; > + > table = map_domain_page(ept_get_asr(d)); > > for ( i = ept_get_wl(d); i > target; i-- ) > @@ -345,8 +351,12 @@ ept_set_entry(struct p2m_domain *p2m, un > ept_entry_t new_entry = { .epte = 0 }; > > /* No need to flush if the old entry wasn''t valid */ > - if ( !is_epte_present(ept_entry) ) > + if ( !is_epte_present(ept_entry) || (!target && sync_deferred) ) > + { > needs_sync = 0; > + if ( sync_deferred ) > + *sync_deferred = 0;I don''t think you should do this -- you call this function in a loop and you may need to sync because of an earlier iteration. Better to only write a 1 to *sync_deferred once you know there''s a sync to be done, and never to write a zero.> + }One comment on the rest of the patch: your calling function calls ept_sync_domain() directly if sync_deferred == 1. That''s correct for now but it would be cleaner to use a sync() function pointer in the struct p2m_domain, the same as the other implementation-specific calls. Other than that, I think this looks pretty good to go in after 4.2 has branched. Cheers, Tim.
Aravindh Puthiyaparambil
2012-May-17 18:43 UTC
Re: [PATCH 0 of 2] Add libxc API that sets mem_access type for an array of gfns
On Thu, May 17, 2012 at 3:05 AM, Tim Deegan <tim@xen.org> wrote:> > Hi Aravindh, > > At 15:02 -0700 on 04 May (1336143748), Aravindh Puthiyaparambil wrote: > > diff -r 9dda0efd8ce1 -r c180942992bf xen/arch/x86/mm/p2m-ept.c > > --- a/xen/arch/x86/mm/p2m-ept.c Fri Apr 27 17:57:55 2012 +0200 > > +++ b/xen/arch/x86/mm/p2m-ept.c Fri May 04 14:56:12 2012 -0700 > > @@ -274,10 +274,13 @@ static int ept_next_level(struct p2m_dom > > /* > > * ept_set_entry() computes ''need_modify_vtd_table'' for itself, > > * by observing whether any gfn->mfn translations are modified. > > + * If sync_deferred is not NULL, then the caller will take care of > > + * calling ept_sync_domain() in the cases where it can be deferred. > > */ > > static int > > ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, > > - unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma) > > + unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma, > > + bool_t *sync_deferred) > > { > > ept_entry_t *table, *ept_entry = NULL; > > unsigned long gfn_remainder = gfn; > > @@ -309,6 +312,9 @@ ept_set_entry(struct p2m_domain *p2m, un > > (target == 1 && hvm_hap_has_2mb(d)) || > > (target == 0)); > > > > + if (sync_deferred) > > Xen coding style has spaces inside those parentheses. > > + *sync_deferred = 1; > > + > > table = map_domain_page(ept_get_asr(d)); > > > > for ( i = ept_get_wl(d); i > target; i-- ) > > @@ -345,8 +351,12 @@ ept_set_entry(struct p2m_domain *p2m, un > > ept_entry_t new_entry = { .epte = 0 }; > > > > /* No need to flush if the old entry wasn''t valid */ > > - if ( !is_epte_present(ept_entry) ) > > + if ( !is_epte_present(ept_entry) || (!target && sync_deferred) > > ) > > + { > > needs_sync = 0; > > + if ( sync_deferred ) > > + *sync_deferred = 0; > > I don''t think you should do this -- you call this function in a loop and > you may need to sync because of an earlier iteration. Better to only > write a 1 to *sync_deferred once you know there''s a sync to be done, and > never to write a zero.You are right. I will fix that.> > + } > > One comment on the rest of the patch: your calling function calls > ept_sync_domain() directly if sync_deferred == 1. That''s correct for > now but it would be cleaner to use a sync() function pointer in the > struct p2m_domain, the same as the other implementation-specific calls.I can add that too.> Other than that, I think this looks pretty good to go in after 4.2 has > branched.OK, I will resubmit once 4.2 has branched. Thanks, Aravindh _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel