Second haf of patch series submitted yesterday. These three patches contain improvements to users of the get_gfn* interface. We ensure liveness of hypervisors mappings of guest pages in the face of paging, and remove a few cases of recursive locking. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> xen/arch/x86/hvm/hvm.c | 69 ++++++++++++++++++++++++------------- xen/arch/x86/hvm/nestedhvm.c | 4 +- xen/arch/x86/hvm/svm/nestedsvm.c | 23 ++++++------ xen/arch/x86/hvm/vmx/vvmx.c | 36 +++++++++++-------- xen/include/asm-x86/hvm/hvm.h | 9 +++- xen/include/asm-x86/hvm/vcpu.h | 1 + xen/include/asm-x86/hvm/vmx/vvmx.h | 1 + xen/arch/x86/mm/guest_walk.c | 16 ++++++-- xen/common/grant_table.c | 69 ++++++++++++++++++++----------------- 9 files changed, 136 insertions(+), 92 deletions(-)
Andres Lagar-Cavilla
2011-Nov-24 13:41 UTC
[PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out
xen/arch/x86/hvm/hvm.c | 69 ++++++++++++++++++++++++------------- xen/arch/x86/hvm/nestedhvm.c | 4 +- xen/arch/x86/hvm/svm/nestedsvm.c | 23 ++++++------ xen/arch/x86/hvm/vmx/vvmx.c | 36 +++++++++++-------- xen/include/asm-x86/hvm/hvm.h | 9 +++- xen/include/asm-x86/hvm/vcpu.h | 1 + xen/include/asm-x86/hvm/vmx/vvmx.h | 1 + 7 files changed, 88 insertions(+), 55 deletions(-) The nested hvm code maps pages of the guest hvm. These maps live beyond a hypervisor entry/exit pair, and thus their liveness cannot be ensured with get_gfn/put_gfn critical sections. Ensure their liveness by increasing the page ref count, instead. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1801,11 +1801,15 @@ int hvm_virtual_to_linear_addr( return 0; } -/* We leave this function holding a lock on the p2m entry */ -static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable) +/* On non-NULL return, we leave this function holding an additional + * ref on the underlying mfn, if any */ +static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable, + struct page_info **page) { + void *map; unsigned long mfn; p2m_type_t p2mt; + struct page_info *pg; struct domain *d = current->domain; mfn = mfn_x(writable @@ -1828,27 +1832,43 @@ static void *__hvm_map_guest_frame(unsig if ( writable ) paging_mark_dirty(d, mfn); - return map_domain_page(mfn); + pg = mfn_to_page(mfn); + if (page) + { + page_get_owner_and_reference(pg); + *page = pg; + } + + map = map_domain_page(mfn); + put_gfn(d, gfn); + return map; } -void *hvm_map_guest_frame_rw(unsigned long gfn) +void *hvm_map_guest_frame_rw(unsigned long gfn, + struct page_info **page) { - return __hvm_map_guest_frame(gfn, 1); + return __hvm_map_guest_frame(gfn, 1, page); } -void *hvm_map_guest_frame_ro(unsigned long gfn) +void *hvm_map_guest_frame_ro(unsigned long gfn, + struct page_info **page) { - return __hvm_map_guest_frame(gfn, 0); + return __hvm_map_guest_frame(gfn, 0, page); } -void hvm_unmap_guest_frame(void *p) +void hvm_unmap_guest_frame(void *p, struct page_info *page) { if ( p ) + { unmap_domain_page(p); + if ( page ) + put_page(page); + } } -static void *hvm_map_entry(unsigned long va, unsigned long *gfn) +static void *hvm_map_entry(unsigned long va, struct page_info **page) { + unsigned long gfn; uint32_t pfec; char *v; @@ -1865,11 +1885,11 @@ static void *hvm_map_entry(unsigned long * treat it as a kernel-mode read (i.e. no access checks). */ pfec = PFEC_page_present; - *gfn = paging_gva_to_gfn(current, va, &pfec); + gfn = paging_gva_to_gfn(current, va, &pfec); if ( (pfec == PFEC_page_paged) || (pfec == PFEC_page_shared) ) goto fail; - v = hvm_map_guest_frame_rw(*gfn); + v = hvm_map_guest_frame_rw(gfn, page); if ( v == NULL ) goto fail; @@ -1880,11 +1900,9 @@ static void *hvm_map_entry(unsigned long return NULL; } -static void hvm_unmap_entry(void *p, unsigned long gfn) +static void hvm_unmap_entry(void *p, struct page_info *page) { - hvm_unmap_guest_frame(p); - if ( p && (gfn != INVALID_GFN) ) - put_gfn(current->domain, gfn); + hvm_unmap_guest_frame(p, page); } static int hvm_load_segment_selector( @@ -1896,7 +1914,7 @@ static int hvm_load_segment_selector( int fault_type = TRAP_invalid_tss; struct cpu_user_regs *regs = guest_cpu_user_regs(); struct vcpu *v = current; - unsigned long pdesc_gfn = INVALID_GFN; + struct page_info *pdesc_pg = NULL; if ( regs->eflags & X86_EFLAGS_VM ) { @@ -1930,7 +1948,7 @@ static int hvm_load_segment_selector( if ( ((sel & 0xfff8) + 7) > desctab.limit ) goto fail; - pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_gfn); + pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_pg); if ( pdesc == NULL ) goto hvm_map_fail; @@ -1990,7 +2008,7 @@ static int hvm_load_segment_selector( desc.b |= 0x100; skip_accessed_flag: - hvm_unmap_entry(pdesc, pdesc_gfn); + hvm_unmap_entry(pdesc, pdesc_pg); segr.base = (((desc.b << 0) & 0xff000000u) | ((desc.b << 16) & 0x00ff0000u) | @@ -2006,7 +2024,7 @@ static int hvm_load_segment_selector( return 0; unmap_and_fail: - hvm_unmap_entry(pdesc, pdesc_gfn); + hvm_unmap_entry(pdesc, pdesc_pg); fail: hvm_inject_exception(fault_type, sel & 0xfffc, 0); hvm_map_fail: @@ -2021,7 +2039,8 @@ void hvm_task_switch( struct cpu_user_regs *regs = guest_cpu_user_regs(); struct segment_register gdt, tr, prev_tr, segr; struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc; - unsigned long eflags, optss_gfn = INVALID_GFN, nptss_gfn = INVALID_GFN; + unsigned long eflags; + struct page_info *optss_pg = NULL, *nptss_pg = NULL; int exn_raised, rc; struct { u16 back_link,__blh; @@ -2047,11 +2066,13 @@ void hvm_task_switch( goto out; } - optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), &optss_gfn); + optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), + &optss_pg); if ( optss_desc == NULL ) goto out; - nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), &nptss_gfn); + nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), + &nptss_pg); if ( nptss_desc == NULL ) goto out; @@ -2216,8 +2237,8 @@ void hvm_task_switch( } out: - hvm_unmap_entry(optss_desc, optss_gfn); - hvm_unmap_entry(nptss_desc, nptss_gfn); + hvm_unmap_entry(optss_desc, optss_pg); + hvm_unmap_entry(nptss_desc, nptss_pg); } #define HVMCOPY_from_guest (0u<<0) diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/nestedhvm.c --- a/xen/arch/x86/hvm/nestedhvm.c +++ b/xen/arch/x86/hvm/nestedhvm.c @@ -56,9 +56,11 @@ nestedhvm_vcpu_reset(struct vcpu *v) nv->nv_ioportED = 0; if (nv->nv_vvmcx) - hvm_unmap_guest_frame(nv->nv_vvmcx); + hvm_unmap_guest_frame(nv->nv_vvmcx, + nv->nv_vvmcx_pg); nv->nv_vvmcx = NULL; nv->nv_vvmcxaddr = VMCX_EADDR; + nv->nv_vvmcx_pg = NULL; nv->nv_flushp2m = 0; nv->nv_p2m = NULL; diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/svm/nestedsvm.c --- a/xen/arch/x86/hvm/svm/nestedsvm.c +++ b/xen/arch/x86/hvm/svm/nestedsvm.c @@ -71,20 +71,18 @@ int nestedsvm_vmcb_map(struct vcpu *v, u if (nv->nv_vvmcx != NULL && nv->nv_vvmcxaddr != vmcbaddr) { ASSERT(nv->nv_vvmcx != NULL); ASSERT(nv->nv_vvmcxaddr != VMCX_EADDR); - hvm_unmap_guest_frame(nv->nv_vvmcx); + hvm_unmap_guest_frame(nv->nv_vvmcx, nv->nv_vvmcx_pg); nv->nv_vvmcx = NULL; nv->nv_vvmcxaddr = VMCX_EADDR; + nv->nv_vvmcx_pg = NULL; } if (nv->nv_vvmcx == NULL) { - nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT); + nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT, + &nv->nv_vvmcx_pg); if (nv->nv_vvmcx == NULL) return 0; nv->nv_vvmcxaddr = vmcbaddr; - /* put_gfn here even though the map survives beyond this caller. - * The map can likely survive beyond a hypervisor exit, thus we - * need to put the gfn */ - put_gfn(current->domain, vmcbaddr >> PAGE_SHIFT); } return 1; @@ -336,6 +334,7 @@ static int nsvm_vmrun_permissionmap(stru unsigned int i; enum hvm_copy_result ret; unsigned long *ns_viomap; + struct page_info *ns_viomap_pg = NULL; bool_t ioport_80, ioport_ed; ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm; @@ -353,12 +352,12 @@ static int nsvm_vmrun_permissionmap(stru svm->ns_oiomap_pa = svm->ns_iomap_pa; svm->ns_iomap_pa = ns_vmcb->_iopm_base_pa; - ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT); + ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT, + &ns_viomap_pg); ASSERT(ns_viomap != NULL); ioport_80 = test_bit(0x80, ns_viomap); ioport_ed = test_bit(0xed, ns_viomap); - hvm_unmap_guest_frame(ns_viomap); - put_gfn(current->domain, svm->ns_iomap_pa >> PAGE_SHIFT); + hvm_unmap_guest_frame(ns_viomap, ns_viomap_pg); svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed); @@ -863,6 +862,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t uint16_t port; bool_t enabled; unsigned long gfn = 0; /* gcc ... */ + struct page_info *io_bitmap_pg = NULL; ioinfo.bytes = exitinfo1; port = ioinfo.fields.port; @@ -880,7 +880,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t break; } - io_bitmap = hvm_map_guest_frame_ro(gfn); + io_bitmap = hvm_map_guest_frame_ro(gfn, &io_bitmap_pg); if (io_bitmap == NULL) { gdprintk(XENLOG_ERR, "IOIO intercept: mapping of permission map failed\n"); @@ -888,8 +888,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t } enabled = test_bit(port, io_bitmap); - hvm_unmap_guest_frame(io_bitmap); - put_gfn(current->domain, gfn); + hvm_unmap_guest_frame(io_bitmap, io_bitmap_pg); if (!enabled) return NESTEDHVM_VMEXIT_HOST; diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/vmx/vvmx.c --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -44,10 +44,13 @@ int nvmx_vcpu_initialise(struct vcpu *v) nvmx->vmxon_region_pa = 0; nvcpu->nv_vvmcx = NULL; nvcpu->nv_vvmcxaddr = VMCX_EADDR; + nvcpu->nv_vvmcx_pg = NULL; nvmx->intr.intr_info = 0; nvmx->intr.error_code = 0; nvmx->iobitmap[0] = NULL; nvmx->iobitmap[1] = NULL; + nvmx->iobitmap_pg[0] = NULL; + nvmx->iobitmap_pg[1] = NULL; return 0; out: return -ENOMEM; @@ -558,12 +561,14 @@ static void __map_io_bitmap(struct vcpu index = vmcs_reg == IO_BITMAP_A ? 0 : 1; if (nvmx->iobitmap[index]) - hvm_unmap_guest_frame (nvmx->iobitmap[index]); + { + hvm_unmap_guest_frame (nvmx->iobitmap[index], + nvmx->iobitmap_pg[index]); + nvmx->iobitmap_pg[index] = NULL; + } gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg); - nvmx->iobitmap[index] = hvm_map_guest_frame_ro (gpa >> PAGE_SHIFT); - /* See comment in nestedsvm_vmcb_map re putting this gfn and - * liveness of the map it backs */ - put_gfn(current->domain, gpa >> PAGE_SHIFT); + nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, + &nvmx->iobitmap_pg[index]); } static inline void map_io_bitmap_all(struct vcpu *v) @@ -580,13 +585,16 @@ static void nvmx_purge_vvmcs(struct vcpu __clear_current_vvmcs(v); if ( nvcpu->nv_vvmcxaddr != VMCX_EADDR ) - hvm_unmap_guest_frame(nvcpu->nv_vvmcx); - nvcpu->nv_vvmcx == NULL; + hvm_unmap_guest_frame(nvcpu->nv_vvmcx, nvcpu->nv_vvmcx_pg); + nvcpu->nv_vvmcx = NULL; nvcpu->nv_vvmcxaddr = VMCX_EADDR; + nvcpu->nv_vvmcx_pg = NULL; for (i=0; i<2; i++) { if ( nvmx->iobitmap[i] ) { - hvm_unmap_guest_frame(nvmx->iobitmap[i]); + hvm_unmap_guest_frame(nvmx->iobitmap[i], + nvmx->iobitmap_pg[i]); nvmx->iobitmap[i] = NULL; + nvmx->iobitmap_pg[i] = NULL; } } } @@ -1138,12 +1146,10 @@ int nvmx_handle_vmptrld(struct cpu_user_ if ( nvcpu->nv_vvmcxaddr == VMCX_EADDR ) { - nvcpu->nv_vvmcx = hvm_map_guest_frame_rw (gpa >> PAGE_SHIFT); + nvcpu->nv_vvmcx = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT, + &nvcpu->nv_vvmcx_pg); nvcpu->nv_vvmcxaddr = gpa; map_io_bitmap_all (v); - /* See comment in nestedsvm_vmcb_map regarding putting this - * gfn and liveness of the map that uses it */ - put_gfn(current->domain, gpa >> PAGE_SHIFT); } vmreturn(regs, VMSUCCEED); @@ -1201,11 +1207,11 @@ int nvmx_handle_vmclear(struct cpu_user_ else { /* Even if this VMCS isn''t the current one, we must clear it. */ - vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT); + struct page_info *vvmcs_pg = NULL; + vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT, &vvmcs_pg); if ( vvmcs ) __set_vvmcs(vvmcs, NVMX_LAUNCH_STATE, 0); - hvm_unmap_guest_frame(vvmcs); - put_gfn(current->domain, gpa >> PAGE_SHIFT); + hvm_unmap_guest_frame(vvmcs, vvmcs_pg); } vmreturn(regs, VMSUCCEED); diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -26,6 +26,7 @@ #include <asm/hvm/asid.h> #include <public/domctl.h> #include <public/hvm/save.h> +#include <asm/mm.h> /* Interrupt acknowledgement sources. */ enum hvm_intsrc { @@ -392,9 +393,11 @@ int hvm_virtual_to_linear_addr( unsigned int addr_size, unsigned long *linear_addr); -void *hvm_map_guest_frame_rw(unsigned long gfn); -void *hvm_map_guest_frame_ro(unsigned long gfn); -void hvm_unmap_guest_frame(void *p); +void *hvm_map_guest_frame_rw(unsigned long gfn, + struct page_info **page); +void *hvm_map_guest_frame_ro(unsigned long gfn, + struct page_info **page); +void hvm_unmap_guest_frame(void *p, struct page_info *page); static inline void hvm_set_info_guest(struct vcpu *v) { diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vcpu.h --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -77,6 +77,7 @@ struct nestedvcpu { void *nv_n2vmcx; /* shadow VMCB/VMCS used to run l2 guest */ uint64_t nv_vvmcxaddr; /* l1 guest physical address of nv_vvmcx */ + struct page_info *nv_vvmcx_pg; /* page where nv_vvmcx resides */ uint64_t nv_n1vmcx_pa; /* host physical address of nv_n1vmcx */ uint64_t nv_n2vmcx_pa; /* host physical address of nv_n2vmcx */ diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vmx/vvmx.h --- a/xen/include/asm-x86/hvm/vmx/vvmx.h +++ b/xen/include/asm-x86/hvm/vmx/vvmx.h @@ -26,6 +26,7 @@ struct nestedvmx { paddr_t vmxon_region_pa; void *iobitmap[2]; /* map (va) of L1 guest I/O bitmap */ + struct page_info *iobitmap_pg[2]; /* pages backing L1 guest I/O bitmap */ /* deferred nested interrupt */ struct { unsigned long intr_info;
Andres Lagar-Cavilla
2011-Nov-24 13:41 UTC
[PATCH 2 of 3] Ensure liveness of pages involved in a guest page table walk
xen/arch/x86/mm/guest_walk.c | 16 +++++++++++----- 1 files changed, 11 insertions(+), 5 deletions(-) Instead of risking deadlock by holding onto the gfn''s acquired during a guest page table walk, acquire an extra reference within the get_gfn/ put_gfn critical section, and drop the extra reference when done with the map. This ensures liveness of the map, i.e. the underlying page won''t be paged out. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r f079cbeae77e -r b0410cb27d17 xen/arch/x86/mm/guest_walk.c --- a/xen/arch/x86/mm/guest_walk.c +++ b/xen/arch/x86/mm/guest_walk.c @@ -87,7 +87,7 @@ static uint32_t set_ad_bits(void *guest_ } /* If the map is non-NULL, we leave this function having - * called get_gfn, you need to call put_gfn. */ + * acquired an extra ref on mfn_to_page(*mfn) */ static inline void *map_domain_gfn(struct p2m_domain *p2m, gfn_t gfn, mfn_t *mfn, @@ -95,6 +95,7 @@ static inline void *map_domain_gfn(struc uint32_t *rc) { p2m_access_t p2ma; + void *map; /* Translate the gfn, unsharing if shared */ *mfn = get_gfn_type_access(p2m, gfn_x(gfn), p2mt, &p2ma, p2m_unshare, NULL); @@ -120,7 +121,12 @@ static inline void *map_domain_gfn(struc } ASSERT(mfn_valid(mfn_x(*mfn))); - return map_domain_page(mfn_x(*mfn)); + /* Get an extra ref to the page to ensure liveness of the map. + * Then we can safely put gfn */ + page_get_owner_and_reference(mfn_to_page(mfn_x(*mfn))); + map = map_domain_page(mfn_x(*mfn)); + __put_gfn(p2m, gfn_x(gfn)); + return map; } @@ -357,20 +363,20 @@ set_ad: if ( l3p ) { unmap_domain_page(l3p); - __put_gfn(p2m, gfn_x(guest_l4e_get_gfn(gw->l4e))); + put_page(mfn_to_page(mfn_x(gw->l3mfn))); } #endif #if GUEST_PAGING_LEVELS >= 3 if ( l2p ) { unmap_domain_page(l2p); - __put_gfn(p2m, gfn_x(guest_l3e_get_gfn(gw->l3e))); + put_page(mfn_to_page(mfn_x(gw->l2mfn))); } #endif if ( l1p ) { unmap_domain_page(l1p); - __put_gfn(p2m, gfn_x(guest_l2e_get_gfn(gw->l2e))); + put_page(mfn_to_page(mfn_x(gw->l1mfn))); } return rc;
Andres Lagar-Cavilla
2011-Nov-24 13:41 UTC
[PATCH 3 of 3] Fix liveness of pages in grant copy operations
xen/common/grant_table.c | 69 +++++++++++++++++++++++++---------------------- 1 files changed, 37 insertions(+), 32 deletions(-) We were immediately putting the p2m entry translation for grant copy operations. This allowed for an unnecessary race by which the page could have been swapped out between the p2m lookup and the actual use. Hold on to the p2m entries until the grant operation finishes. Also fixes a small bug: for the source page of the copy, get_page was assuming the page was owned by the source domain. It may be a shared page, since we don''t perform an unsharing p2m lookup. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r b0410cb27d17 -r 9e44fd6e4955 xen/common/grant_table.c --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -1723,11 +1723,12 @@ static void __fixup_status_for_pin(struc /* Grab a frame number from a grant entry and update the flags and pin count as appropriate. Note that this does *not* update the page type or reference counts, and does not check that the mfn is - actually valid. */ + actually valid. If *gfn != INVALID_GFN, and rc == GNTST_okay, then + we leave this function holding the p2m entry for *gfn in *owning_domain */ static int __acquire_grant_for_copy( struct domain *rd, unsigned long gref, struct domain *ld, int readonly, - unsigned long *frame, unsigned *page_off, unsigned *length, + unsigned long *frame, unsigned long *gfn, unsigned *page_off, unsigned *length, unsigned allow_transitive, struct domain **owning_domain) { grant_entry_v1_t *sha1; @@ -1739,7 +1740,6 @@ __acquire_grant_for_copy( domid_t trans_domid; grant_ref_t trans_gref; struct domain *td; - unsigned long gfn; unsigned long grant_frame; unsigned trans_page_off; unsigned trans_length; @@ -1748,6 +1748,7 @@ __acquire_grant_for_copy( s16 rc = GNTST_okay; *owning_domain = NULL; + *gfn = INVALID_GFN; spin_lock(&rd->grant_table->lock); @@ -1824,7 +1825,7 @@ __acquire_grant_for_copy( spin_unlock(&rd->grant_table->lock); rc = __acquire_grant_for_copy(td, trans_gref, rd, - readonly, &grant_frame, + readonly, &grant_frame, gfn, &trans_page_off, &trans_length, 0, &ignore); @@ -1846,7 +1847,7 @@ __acquire_grant_for_copy( rcu_unlock_domain(td); spin_unlock(&rd->grant_table->lock); return __acquire_grant_for_copy(rd, gref, ld, readonly, - frame, page_off, length, + frame, gfn, page_off, length, allow_transitive, owning_domain); } @@ -1860,13 +1861,11 @@ __acquire_grant_for_copy( } else if ( sha1 ) { - gfn = sha1->frame; - rc = __get_paged_frame(gfn, &grant_frame, readonly, rd); - /* We drop this immediately per the comments at the top */ - put_gfn(rd, gfn); + *gfn = sha1->frame; + rc = __get_paged_frame(*gfn, &grant_frame, readonly, rd); if ( rc != GNTST_okay ) goto unlock_out; - act->gfn = gfn; + act->gfn = *gfn; is_sub_page = 0; trans_page_off = 0; trans_length = PAGE_SIZE; @@ -1874,12 +1873,11 @@ __acquire_grant_for_copy( } else if ( !(sha2->hdr.flags & GTF_sub_page) ) { - gfn = sha2->full_page.frame; - rc = __get_paged_frame(gfn, &grant_frame, readonly, rd); - put_gfn(rd, gfn); + *gfn = sha2->full_page.frame; + rc = __get_paged_frame(*gfn, &grant_frame, readonly, rd); if ( rc != GNTST_okay ) goto unlock_out; - act->gfn = gfn; + act->gfn = *gfn; is_sub_page = 0; trans_page_off = 0; trans_length = PAGE_SIZE; @@ -1887,12 +1885,11 @@ __acquire_grant_for_copy( } else { - gfn = sha2->sub_page.frame; - rc = __get_paged_frame(gfn, &grant_frame, readonly, rd); - put_gfn(rd, gfn); + *gfn = sha2->sub_page.frame; + rc = __get_paged_frame(*gfn, &grant_frame, readonly, rd); if ( rc != GNTST_okay ) goto unlock_out; - act->gfn = gfn; + act->gfn = *gfn; is_sub_page = 1; trans_page_off = sha2->sub_page.page_off; trans_length = sha2->sub_page.length; @@ -1932,7 +1929,7 @@ __gnttab_copy( { struct domain *sd = NULL, *dd = NULL; struct domain *source_domain = NULL, *dest_domain = NULL; - unsigned long s_frame, d_frame; + unsigned long s_frame, d_frame, s_gfn = INVALID_GFN, d_gfn = INVALID_GFN; char *sp, *dp; s16 rc = GNTST_okay; int have_d_grant = 0, have_s_grant = 0, have_s_ref = 0; @@ -1973,14 +1970,14 @@ __gnttab_copy( { unsigned source_off, source_len; rc = __acquire_grant_for_copy(sd, op->source.u.ref, current->domain, 1, - &s_frame, &source_off, &source_len, 1, + &s_frame, &s_gfn, &source_off, &source_len, 1, &source_domain); if ( rc != GNTST_okay ) goto error_out; have_s_grant = 1; if ( op->source.offset < source_off || op->len > source_len ) - PIN_FAIL(error_out, GNTST_general_error, + PIN_FAIL(error_put_s_gfn, GNTST_general_error, "copy source out of bounds: %d < %d || %d > %d\n", op->source.offset, source_off, op->len, source_len); @@ -1988,8 +1985,8 @@ __gnttab_copy( else { #ifdef CONFIG_X86 + s_gfn = op->source.u.gmfn; rc = __get_paged_frame(op->source.u.gmfn, &s_frame, 1, sd); - put_gfn(sd, op->source.u.gmfn); if ( rc != GNTST_okay ) goto error_out; #else @@ -1998,14 +1995,16 @@ __gnttab_copy( source_domain = sd; } if ( unlikely(!mfn_valid(s_frame)) ) - PIN_FAIL(error_out, GNTST_general_error, + PIN_FAIL(error_put_s_gfn, GNTST_general_error, "source frame %lx invalid.\n", s_frame); - if ( !get_page(mfn_to_page(s_frame), source_domain) ) + /* For the source frame, the page could still be shared, so + * don''t assume ownership by source_domain */ + if ( !page_get_owner_and_reference(mfn_to_page(s_frame)) ) { if ( !sd->is_dying ) gdprintk(XENLOG_WARNING, "Could not get src frame %lx\n", s_frame); rc = GNTST_general_error; - goto error_out; + goto error_put_s_gfn; } have_s_ref = 1; @@ -2013,14 +2012,14 @@ __gnttab_copy( { unsigned dest_off, dest_len; rc = __acquire_grant_for_copy(dd, op->dest.u.ref, current->domain, 0, - &d_frame, &dest_off, &dest_len, 1, + &d_frame, &d_gfn, &dest_off, &dest_len, 1, &dest_domain); if ( rc != GNTST_okay ) - goto error_out; + goto error_put_s_gfn; have_d_grant = 1; if ( op->dest.offset < dest_off || op->len > dest_len ) - PIN_FAIL(error_out, GNTST_general_error, + PIN_FAIL(error_put_d_gfn, GNTST_general_error, "copy dest out of bounds: %d < %d || %d > %d\n", op->dest.offset, dest_off, op->len, dest_len); @@ -2028,17 +2027,17 @@ __gnttab_copy( else { #ifdef CONFIG_X86 + d_gfn = op->dest.u.gmfn; rc = __get_paged_frame(op->dest.u.gmfn, &d_frame, 0, dd); - put_gfn(dd, op->dest.u.gmfn); if ( rc != GNTST_okay ) - goto error_out; + goto error_put_s_gfn; #else d_frame = gmfn_to_mfn(dd, op->dest.u.gmfn); #endif dest_domain = dd; } if ( unlikely(!mfn_valid(d_frame)) ) - PIN_FAIL(error_out, GNTST_general_error, + PIN_FAIL(error_put_d_gfn, GNTST_general_error, "destination frame %lx invalid.\n", d_frame); if ( !get_page_and_type(mfn_to_page(d_frame), dest_domain, PGT_writable_page) ) @@ -2046,7 +2045,7 @@ __gnttab_copy( if ( !dd->is_dying ) gdprintk(XENLOG_WARNING, "Could not get dst frame %lx\n", d_frame); rc = GNTST_general_error; - goto error_out; + goto error_put_d_gfn; } sp = map_domain_page(s_frame); @@ -2060,6 +2059,12 @@ __gnttab_copy( gnttab_mark_dirty(dd, d_frame); put_page_and_type(mfn_to_page(d_frame)); + error_put_d_gfn: + if ( (d_gfn != INVALID_GFN) && (dest_domain) ) + put_gfn(dest_domain, d_gfn); + error_put_s_gfn: + if ( (s_gfn != INVALID_GFN) && (source_domain) ) + put_gfn(source_domain, s_gfn); error_out: if ( have_s_ref ) put_page(mfn_to_page(s_frame));
Tim Deegan
2011-Nov-24 16:48 UTC
Re: [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out
This is a bit messy still. On 64-bit Xen we could just use virt_to_page() on the map pointers rather than storing their page_info pointers separately. On 32-bit, maybe we could use the existing linear mappings to look up the MFN instead. Or would it be easier to add a function to eth map_domain_page() family that does va->mfn for mapped frames? Keir, any opinion? Tim. At 08:41 -0500 on 24 Nov (1322124091), Andres Lagar-Cavilla wrote:> xen/arch/x86/hvm/hvm.c | 69 ++++++++++++++++++++++++------------- > xen/arch/x86/hvm/nestedhvm.c | 4 +- > xen/arch/x86/hvm/svm/nestedsvm.c | 23 ++++++------ > xen/arch/x86/hvm/vmx/vvmx.c | 36 +++++++++++-------- > xen/include/asm-x86/hvm/hvm.h | 9 +++- > xen/include/asm-x86/hvm/vcpu.h | 1 + > xen/include/asm-x86/hvm/vmx/vvmx.h | 1 + > 7 files changed, 88 insertions(+), 55 deletions(-) > > > The nested hvm code maps pages of the guest hvm. These maps live beyond > a hypervisor entry/exit pair, and thus their liveness cannot be ensured > with get_gfn/put_gfn critical sections. Ensure their liveness by > increasing the page ref count, instead. > > Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> > > diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/hvm.c > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -1801,11 +1801,15 @@ int hvm_virtual_to_linear_addr( > return 0; > } > > -/* We leave this function holding a lock on the p2m entry */ > -static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable) > +/* On non-NULL return, we leave this function holding an additional > + * ref on the underlying mfn, if any */ > +static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable, > + struct page_info **page) > { > + void *map; > unsigned long mfn; > p2m_type_t p2mt; > + struct page_info *pg; > struct domain *d = current->domain; > > mfn = mfn_x(writable > @@ -1828,27 +1832,43 @@ static void *__hvm_map_guest_frame(unsig > if ( writable ) > paging_mark_dirty(d, mfn); > > - return map_domain_page(mfn); > + pg = mfn_to_page(mfn); > + if (page) > + { > + page_get_owner_and_reference(pg); > + *page = pg; > + } > + > + map = map_domain_page(mfn); > + put_gfn(d, gfn); > + return map; > } > > -void *hvm_map_guest_frame_rw(unsigned long gfn) > +void *hvm_map_guest_frame_rw(unsigned long gfn, > + struct page_info **page) > { > - return __hvm_map_guest_frame(gfn, 1); > + return __hvm_map_guest_frame(gfn, 1, page); > } > > -void *hvm_map_guest_frame_ro(unsigned long gfn) > +void *hvm_map_guest_frame_ro(unsigned long gfn, > + struct page_info **page) > { > - return __hvm_map_guest_frame(gfn, 0); > + return __hvm_map_guest_frame(gfn, 0, page); > } > > -void hvm_unmap_guest_frame(void *p) > +void hvm_unmap_guest_frame(void *p, struct page_info *page) > { > if ( p ) > + { > unmap_domain_page(p); > + if ( page ) > + put_page(page); > + } > } > > -static void *hvm_map_entry(unsigned long va, unsigned long *gfn) > +static void *hvm_map_entry(unsigned long va, struct page_info **page) > { > + unsigned long gfn; > uint32_t pfec; > char *v; > > @@ -1865,11 +1885,11 @@ static void *hvm_map_entry(unsigned long > * treat it as a kernel-mode read (i.e. no access checks). > */ > pfec = PFEC_page_present; > - *gfn = paging_gva_to_gfn(current, va, &pfec); > + gfn = paging_gva_to_gfn(current, va, &pfec); > if ( (pfec == PFEC_page_paged) || (pfec == PFEC_page_shared) ) > goto fail; > > - v = hvm_map_guest_frame_rw(*gfn); > + v = hvm_map_guest_frame_rw(gfn, page); > if ( v == NULL ) > goto fail; > > @@ -1880,11 +1900,9 @@ static void *hvm_map_entry(unsigned long > return NULL; > } > > -static void hvm_unmap_entry(void *p, unsigned long gfn) > +static void hvm_unmap_entry(void *p, struct page_info *page) > { > - hvm_unmap_guest_frame(p); > - if ( p && (gfn != INVALID_GFN) ) > - put_gfn(current->domain, gfn); > + hvm_unmap_guest_frame(p, page); > } > > static int hvm_load_segment_selector( > @@ -1896,7 +1914,7 @@ static int hvm_load_segment_selector( > int fault_type = TRAP_invalid_tss; > struct cpu_user_regs *regs = guest_cpu_user_regs(); > struct vcpu *v = current; > - unsigned long pdesc_gfn = INVALID_GFN; > + struct page_info *pdesc_pg = NULL; > > if ( regs->eflags & X86_EFLAGS_VM ) > { > @@ -1930,7 +1948,7 @@ static int hvm_load_segment_selector( > if ( ((sel & 0xfff8) + 7) > desctab.limit ) > goto fail; > > - pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_gfn); > + pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_pg); > if ( pdesc == NULL ) > goto hvm_map_fail; > > @@ -1990,7 +2008,7 @@ static int hvm_load_segment_selector( > desc.b |= 0x100; > > skip_accessed_flag: > - hvm_unmap_entry(pdesc, pdesc_gfn); > + hvm_unmap_entry(pdesc, pdesc_pg); > > segr.base = (((desc.b << 0) & 0xff000000u) | > ((desc.b << 16) & 0x00ff0000u) | > @@ -2006,7 +2024,7 @@ static int hvm_load_segment_selector( > return 0; > > unmap_and_fail: > - hvm_unmap_entry(pdesc, pdesc_gfn); > + hvm_unmap_entry(pdesc, pdesc_pg); > fail: > hvm_inject_exception(fault_type, sel & 0xfffc, 0); > hvm_map_fail: > @@ -2021,7 +2039,8 @@ void hvm_task_switch( > struct cpu_user_regs *regs = guest_cpu_user_regs(); > struct segment_register gdt, tr, prev_tr, segr; > struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc; > - unsigned long eflags, optss_gfn = INVALID_GFN, nptss_gfn = INVALID_GFN; > + unsigned long eflags; > + struct page_info *optss_pg = NULL, *nptss_pg = NULL; > int exn_raised, rc; > struct { > u16 back_link,__blh; > @@ -2047,11 +2066,13 @@ void hvm_task_switch( > goto out; > } > > - optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), &optss_gfn); > + optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), > + &optss_pg); > if ( optss_desc == NULL ) > goto out; > > - nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), &nptss_gfn); > + nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), > + &nptss_pg); > if ( nptss_desc == NULL ) > goto out; > > @@ -2216,8 +2237,8 @@ void hvm_task_switch( > } > > out: > - hvm_unmap_entry(optss_desc, optss_gfn); > - hvm_unmap_entry(nptss_desc, nptss_gfn); > + hvm_unmap_entry(optss_desc, optss_pg); > + hvm_unmap_entry(nptss_desc, nptss_pg); > } > > #define HVMCOPY_from_guest (0u<<0) > diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/nestedhvm.c > --- a/xen/arch/x86/hvm/nestedhvm.c > +++ b/xen/arch/x86/hvm/nestedhvm.c > @@ -56,9 +56,11 @@ nestedhvm_vcpu_reset(struct vcpu *v) > nv->nv_ioportED = 0; > > if (nv->nv_vvmcx) > - hvm_unmap_guest_frame(nv->nv_vvmcx); > + hvm_unmap_guest_frame(nv->nv_vvmcx, > + nv->nv_vvmcx_pg); > nv->nv_vvmcx = NULL; > nv->nv_vvmcxaddr = VMCX_EADDR; > + nv->nv_vvmcx_pg = NULL; > nv->nv_flushp2m = 0; > nv->nv_p2m = NULL; > > diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/svm/nestedsvm.c > --- a/xen/arch/x86/hvm/svm/nestedsvm.c > +++ b/xen/arch/x86/hvm/svm/nestedsvm.c > @@ -71,20 +71,18 @@ int nestedsvm_vmcb_map(struct vcpu *v, u > if (nv->nv_vvmcx != NULL && nv->nv_vvmcxaddr != vmcbaddr) { > ASSERT(nv->nv_vvmcx != NULL); > ASSERT(nv->nv_vvmcxaddr != VMCX_EADDR); > - hvm_unmap_guest_frame(nv->nv_vvmcx); > + hvm_unmap_guest_frame(nv->nv_vvmcx, nv->nv_vvmcx_pg); > nv->nv_vvmcx = NULL; > nv->nv_vvmcxaddr = VMCX_EADDR; > + nv->nv_vvmcx_pg = NULL; > } > > if (nv->nv_vvmcx == NULL) { > - nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT); > + nv->nv_vvmcx = hvm_map_guest_frame_rw(vmcbaddr >> PAGE_SHIFT, > + &nv->nv_vvmcx_pg); > if (nv->nv_vvmcx == NULL) > return 0; > nv->nv_vvmcxaddr = vmcbaddr; > - /* put_gfn here even though the map survives beyond this caller. > - * The map can likely survive beyond a hypervisor exit, thus we > - * need to put the gfn */ > - put_gfn(current->domain, vmcbaddr >> PAGE_SHIFT); > } > > return 1; > @@ -336,6 +334,7 @@ static int nsvm_vmrun_permissionmap(stru > unsigned int i; > enum hvm_copy_result ret; > unsigned long *ns_viomap; > + struct page_info *ns_viomap_pg = NULL; > bool_t ioport_80, ioport_ed; > > ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm; > @@ -353,12 +352,12 @@ static int nsvm_vmrun_permissionmap(stru > svm->ns_oiomap_pa = svm->ns_iomap_pa; > svm->ns_iomap_pa = ns_vmcb->_iopm_base_pa; > > - ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT); > + ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT, > + &ns_viomap_pg); > ASSERT(ns_viomap != NULL); > ioport_80 = test_bit(0x80, ns_viomap); > ioport_ed = test_bit(0xed, ns_viomap); > - hvm_unmap_guest_frame(ns_viomap); > - put_gfn(current->domain, svm->ns_iomap_pa >> PAGE_SHIFT); > + hvm_unmap_guest_frame(ns_viomap, ns_viomap_pg); > > svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed); > > @@ -863,6 +862,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t > uint16_t port; > bool_t enabled; > unsigned long gfn = 0; /* gcc ... */ > + struct page_info *io_bitmap_pg = NULL; > > ioinfo.bytes = exitinfo1; > port = ioinfo.fields.port; > @@ -880,7 +880,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t > break; > } > > - io_bitmap = hvm_map_guest_frame_ro(gfn); > + io_bitmap = hvm_map_guest_frame_ro(gfn, &io_bitmap_pg); > if (io_bitmap == NULL) { > gdprintk(XENLOG_ERR, > "IOIO intercept: mapping of permission map failed\n"); > @@ -888,8 +888,7 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t > } > > enabled = test_bit(port, io_bitmap); > - hvm_unmap_guest_frame(io_bitmap); > - put_gfn(current->domain, gfn); > + hvm_unmap_guest_frame(io_bitmap, io_bitmap_pg); > > if (!enabled) > return NESTEDHVM_VMEXIT_HOST; > diff -r a64f63ecfc57 -r f079cbeae77e xen/arch/x86/hvm/vmx/vvmx.c > --- a/xen/arch/x86/hvm/vmx/vvmx.c > +++ b/xen/arch/x86/hvm/vmx/vvmx.c > @@ -44,10 +44,13 @@ int nvmx_vcpu_initialise(struct vcpu *v) > nvmx->vmxon_region_pa = 0; > nvcpu->nv_vvmcx = NULL; > nvcpu->nv_vvmcxaddr = VMCX_EADDR; > + nvcpu->nv_vvmcx_pg = NULL; > nvmx->intr.intr_info = 0; > nvmx->intr.error_code = 0; > nvmx->iobitmap[0] = NULL; > nvmx->iobitmap[1] = NULL; > + nvmx->iobitmap_pg[0] = NULL; > + nvmx->iobitmap_pg[1] = NULL; > return 0; > out: > return -ENOMEM; > @@ -558,12 +561,14 @@ static void __map_io_bitmap(struct vcpu > > index = vmcs_reg == IO_BITMAP_A ? 0 : 1; > if (nvmx->iobitmap[index]) > - hvm_unmap_guest_frame (nvmx->iobitmap[index]); > + { > + hvm_unmap_guest_frame (nvmx->iobitmap[index], > + nvmx->iobitmap_pg[index]); > + nvmx->iobitmap_pg[index] = NULL; > + } > gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg); > - nvmx->iobitmap[index] = hvm_map_guest_frame_ro (gpa >> PAGE_SHIFT); > - /* See comment in nestedsvm_vmcb_map re putting this gfn and > - * liveness of the map it backs */ > - put_gfn(current->domain, gpa >> PAGE_SHIFT); > + nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, > + &nvmx->iobitmap_pg[index]); > } > > static inline void map_io_bitmap_all(struct vcpu *v) > @@ -580,13 +585,16 @@ static void nvmx_purge_vvmcs(struct vcpu > > __clear_current_vvmcs(v); > if ( nvcpu->nv_vvmcxaddr != VMCX_EADDR ) > - hvm_unmap_guest_frame(nvcpu->nv_vvmcx); > - nvcpu->nv_vvmcx == NULL; > + hvm_unmap_guest_frame(nvcpu->nv_vvmcx, nvcpu->nv_vvmcx_pg); > + nvcpu->nv_vvmcx = NULL; > nvcpu->nv_vvmcxaddr = VMCX_EADDR; > + nvcpu->nv_vvmcx_pg = NULL; > for (i=0; i<2; i++) { > if ( nvmx->iobitmap[i] ) { > - hvm_unmap_guest_frame(nvmx->iobitmap[i]); > + hvm_unmap_guest_frame(nvmx->iobitmap[i], > + nvmx->iobitmap_pg[i]); > nvmx->iobitmap[i] = NULL; > + nvmx->iobitmap_pg[i] = NULL; > } > } > } > @@ -1138,12 +1146,10 @@ int nvmx_handle_vmptrld(struct cpu_user_ > > if ( nvcpu->nv_vvmcxaddr == VMCX_EADDR ) > { > - nvcpu->nv_vvmcx = hvm_map_guest_frame_rw (gpa >> PAGE_SHIFT); > + nvcpu->nv_vvmcx = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT, > + &nvcpu->nv_vvmcx_pg); > nvcpu->nv_vvmcxaddr = gpa; > map_io_bitmap_all (v); > - /* See comment in nestedsvm_vmcb_map regarding putting this > - * gfn and liveness of the map that uses it */ > - put_gfn(current->domain, gpa >> PAGE_SHIFT); > } > > vmreturn(regs, VMSUCCEED); > @@ -1201,11 +1207,11 @@ int nvmx_handle_vmclear(struct cpu_user_ > else > { > /* Even if this VMCS isn''t the current one, we must clear it. */ > - vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT); > + struct page_info *vvmcs_pg = NULL; > + vvmcs = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT, &vvmcs_pg); > if ( vvmcs ) > __set_vvmcs(vvmcs, NVMX_LAUNCH_STATE, 0); > - hvm_unmap_guest_frame(vvmcs); > - put_gfn(current->domain, gpa >> PAGE_SHIFT); > + hvm_unmap_guest_frame(vvmcs, vvmcs_pg); > } > > vmreturn(regs, VMSUCCEED); > diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/hvm.h > --- a/xen/include/asm-x86/hvm/hvm.h > +++ b/xen/include/asm-x86/hvm/hvm.h > @@ -26,6 +26,7 @@ > #include <asm/hvm/asid.h> > #include <public/domctl.h> > #include <public/hvm/save.h> > +#include <asm/mm.h> > > /* Interrupt acknowledgement sources. */ > enum hvm_intsrc { > @@ -392,9 +393,11 @@ int hvm_virtual_to_linear_addr( > unsigned int addr_size, > unsigned long *linear_addr); > > -void *hvm_map_guest_frame_rw(unsigned long gfn); > -void *hvm_map_guest_frame_ro(unsigned long gfn); > -void hvm_unmap_guest_frame(void *p); > +void *hvm_map_guest_frame_rw(unsigned long gfn, > + struct page_info **page); > +void *hvm_map_guest_frame_ro(unsigned long gfn, > + struct page_info **page); > +void hvm_unmap_guest_frame(void *p, struct page_info *page); > > static inline void hvm_set_info_guest(struct vcpu *v) > { > diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vcpu.h > --- a/xen/include/asm-x86/hvm/vcpu.h > +++ b/xen/include/asm-x86/hvm/vcpu.h > @@ -77,6 +77,7 @@ struct nestedvcpu { > void *nv_n2vmcx; /* shadow VMCB/VMCS used to run l2 guest */ > > uint64_t nv_vvmcxaddr; /* l1 guest physical address of nv_vvmcx */ > + struct page_info *nv_vvmcx_pg; /* page where nv_vvmcx resides */ > uint64_t nv_n1vmcx_pa; /* host physical address of nv_n1vmcx */ > uint64_t nv_n2vmcx_pa; /* host physical address of nv_n2vmcx */ > > diff -r a64f63ecfc57 -r f079cbeae77e xen/include/asm-x86/hvm/vmx/vvmx.h > --- a/xen/include/asm-x86/hvm/vmx/vvmx.h > +++ b/xen/include/asm-x86/hvm/vmx/vvmx.h > @@ -26,6 +26,7 @@ > struct nestedvmx { > paddr_t vmxon_region_pa; > void *iobitmap[2]; /* map (va) of L1 guest I/O bitmap */ > + struct page_info *iobitmap_pg[2]; /* pages backing L1 guest I/O bitmap */ > /* deferred nested interrupt */ > struct { > unsigned long intr_info; > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel
Keir Fraser
2011-Nov-24 17:04 UTC
Re: [PATCH 1 of 3] Ensure maps used by nested hvm code cannot be paged out
On 24/11/2011 16:48, "Tim Deegan" <tim@xen.org> wrote:> This is a bit messy still. On 64-bit Xen we could just use > virt_to_page() on the map pointers rather than storing their > page_info pointers separately. > > On 32-bit, maybe we could use the existing linear mappings to look up > the MFN instead. Or would it be easier to add a function to eth > map_domain_page() family that does va->mfn for mapped frames? > Keir, any opinion?Whatever''s easiest. 32-bit isn''t a priority. -- Keir
Tim Deegan
2011-Nov-24 17:07 UTC
Re: [PATCH 2 of 3] Ensure liveness of pages involved in a guest page table walk
At 08:41 -0500 on 24 Nov (1322124092), Andres Lagar-Cavilla wrote:> Instead of risking deadlock by holding onto the gfn''s acquired during > a guest page table walk, acquire an extra reference within the get_gfn/ > put_gfn critical section, and drop the extra reference when done with > the map. This ensures liveness of the map, i.e. the underlying page > won''t be paged out. > > Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>Applied, thanks. Tim.
Tim Deegan
2011-Nov-24 17:07 UTC
Re: [PATCH 3 of 3] Fix liveness of pages in grant copy operations
At 08:41 -0500 on 24 Nov (1322124093), Andres Lagar-Cavilla wrote:> We were immediately putting the p2m entry translation for grant > copy operations. This allowed for an unnecessary race by which the > page could have been swapped out between the p2m lookup and the actual > use. Hold on to the p2m entries until the grant operation finishes. > > Also fixes a small bug: for the source page of the copy, get_page > was assuming the page was owned by the source domain. It may be a > shared page, since we don''t perform an unsharing p2m lookup. > > Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>Applied, thanks. Tim.