Resending three bug fix patches after feedback from Tim Deegan and Jan Also rebased against 3c4c29899d8a. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> xen/include/asm-x86/page.h | 18 ++++++++++++- xen/include/asm-x86/x86_64/page.h | 5 +++ xen/arch/x86/hvm/hvm.c | 54 ++++++++++++++++++++++++++------------ xen/arch/x86/hvm/svm/nestedsvm.c | 6 ---- xen/arch/x86/hvm/vmx/vvmx.c | 11 +------ xen/arch/x86/mm.c | 12 +++++-- 6 files changed, 69 insertions(+), 37 deletions(-)
Andres Lagar-Cavilla
2011-Dec-01 16:21 UTC
[PATCH 1 of 3] x86: Add conversion from a xen map to an mfn
xen/include/asm-x86/page.h | 18 +++++++++++++++++- xen/include/asm-x86/x86_64/page.h | 5 +++++ 2 files changed, 22 insertions(+), 1 deletions(-) This conversion is a trivial invocation of virt_to_mfn in 64 bits. In 32 bits it uses the linear_map. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r 36aac3e56d01 -r 7b6db593bda0 xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h +++ b/xen/include/asm-x86/page.h @@ -296,8 +296,22 @@ void copy_page_sse2(void *, const void * #define __linear_l4_table \ ((l4_pgentry_t *)(__linear_l3_table + l3_linear_offset(LINEAR_PT_VIRT_START))) +#ifndef __ASSEMBLY__ +#ifndef __x86_64__ +/* We need to stash this here for 32 bits as it depends on constructs + * defined after including x86_32/page.h */ +static inline unsigned long __xen_map_to_mfn(void *va) +{ + l1_pgentry_t *l1e; -#ifndef __ASSEMBLY__ + ASSERT( (((unsigned long) va) >= MAPCACHE_VIRT_START) && + (((unsigned long) va) <= MAPCACHE_VIRT_END) ); + l1e = &__linear_l1_table[ + l1_linear_offset((unsigned long) va)]; + return l1e_get_pfn(*l1e); +} +#endif /* __x86_64__ */ + extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES]; #if CONFIG_PAGING_LEVELS == 3 extern l2_pgentry_t idle_pg_table_l2[ @@ -317,6 +331,8 @@ void paging_init(void); void setup_idle_pagetable(void); #endif /* !defined(__ASSEMBLY__) */ +#define xen_map_to_mfn(va) __xen_map_to_mfn(va) + #define _PAGE_PRESENT 0x001U #define _PAGE_RW 0x002U #define _PAGE_USER 0x004U diff -r 36aac3e56d01 -r 7b6db593bda0 xen/include/asm-x86/x86_64/page.h --- a/xen/include/asm-x86/x86_64/page.h +++ b/xen/include/asm-x86/x86_64/page.h @@ -104,6 +104,11 @@ static inline void *__maddr_to_virt(unsi ((ma & ma_top_mask) >> pfn_pdx_hole_shift))); } +static inline unsigned long __xen_map_to_mfn(void *va) +{ + return (__virt_to_maddr((unsigned long) va) >> PAGE_SHIFT); +} + /* read access (should only be used for debug printk''s) */ typedef u64 intpte_t; #define PRIpte "016lx"
Andres Lagar-Cavilla
2011-Dec-01 16:21 UTC
[PATCH 2 of 3] x86/mm: Ensure maps used by nested hvm code cannot be paged out
xen/arch/x86/hvm/hvm.c | 54 +++++++++++++++++++++++++++------------ xen/arch/x86/hvm/svm/nestedsvm.c | 6 ---- xen/arch/x86/hvm/vmx/vvmx.c | 11 +------ 3 files changed, 39 insertions(+), 32 deletions(-) The nested hvm code maps pages of the guest hvm. These maps live beyond a hypervisor entry/exit pair, and thus their liveness cannot be ensured with get_gfn/put_gfn critical sections. Ensure their liveness by increasing the page ref count, instead. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r 7b6db593bda0 -r 4e0c533a3e1d xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1801,12 +1801,16 @@ int hvm_virtual_to_linear_addr( return 0; } -/* We leave this function holding a lock on the p2m entry */ +/* On non-NULL return, we leave this function holding an additional + * ref on the underlying mfn, if any */ static void *__hvm_map_guest_frame(unsigned long gfn, bool_t writable) { + void *map; unsigned long mfn; p2m_type_t p2mt; + struct page_info *pg; struct domain *d = current->domain; + int rc; mfn = mfn_x(writable ? get_gfn_unshare(d, gfn, &p2mt) @@ -1828,7 +1832,21 @@ static void *__hvm_map_guest_frame(unsig if ( writable ) paging_mark_dirty(d, mfn); - return map_domain_page(mfn); + /* Get a ref on the page, considering that it could be shared */ + pg = mfn_to_page(mfn); + rc = get_page(pg, d); + if ( !rc && !writable ) + /* Page could be shared */ + rc = get_page(pg, dom_cow); + if ( !rc ) + { + put_gfn(d, gfn); + return NULL; + } + + map = map_domain_page(mfn); + put_gfn(d, gfn); + return map; } void *hvm_map_guest_frame_rw(unsigned long gfn) @@ -1844,11 +1862,16 @@ void *hvm_map_guest_frame_ro(unsigned lo void hvm_unmap_guest_frame(void *p) { if ( p ) + { + unsigned long mfn = xen_map_to_mfn(p); unmap_domain_page(p); + put_page(mfn_to_page(mfn)); + } } -static void *hvm_map_entry(unsigned long va, unsigned long *gfn) +static void *hvm_map_entry(unsigned long va) { + unsigned long gfn; uint32_t pfec; char *v; @@ -1865,11 +1888,11 @@ static void *hvm_map_entry(unsigned long * treat it as a kernel-mode read (i.e. no access checks). */ pfec = PFEC_page_present; - *gfn = paging_gva_to_gfn(current, va, &pfec); + gfn = paging_gva_to_gfn(current, va, &pfec); if ( (pfec == PFEC_page_paged) || (pfec == PFEC_page_shared) ) goto fail; - v = hvm_map_guest_frame_rw(*gfn); + v = hvm_map_guest_frame_rw(gfn); if ( v == NULL ) goto fail; @@ -1880,11 +1903,9 @@ static void *hvm_map_entry(unsigned long return NULL; } -static void hvm_unmap_entry(void *p, unsigned long gfn) +static void hvm_unmap_entry(void *p) { hvm_unmap_guest_frame(p); - if ( p && (gfn != INVALID_GFN) ) - put_gfn(current->domain, gfn); } static int hvm_load_segment_selector( @@ -1896,7 +1917,6 @@ static int hvm_load_segment_selector( int fault_type = TRAP_invalid_tss; struct cpu_user_regs *regs = guest_cpu_user_regs(); struct vcpu *v = current; - unsigned long pdesc_gfn = INVALID_GFN; if ( regs->eflags & X86_EFLAGS_VM ) { @@ -1930,7 +1950,7 @@ static int hvm_load_segment_selector( if ( ((sel & 0xfff8) + 7) > desctab.limit ) goto fail; - pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8), &pdesc_gfn); + pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8)); if ( pdesc == NULL ) goto hvm_map_fail; @@ -1990,7 +2010,7 @@ static int hvm_load_segment_selector( desc.b |= 0x100; skip_accessed_flag: - hvm_unmap_entry(pdesc, pdesc_gfn); + hvm_unmap_entry(pdesc); segr.base = (((desc.b << 0) & 0xff000000u) | ((desc.b << 16) & 0x00ff0000u) | @@ -2006,7 +2026,7 @@ static int hvm_load_segment_selector( return 0; unmap_and_fail: - hvm_unmap_entry(pdesc, pdesc_gfn); + hvm_unmap_entry(pdesc); fail: hvm_inject_exception(fault_type, sel & 0xfffc, 0); hvm_map_fail: @@ -2021,7 +2041,7 @@ void hvm_task_switch( struct cpu_user_regs *regs = guest_cpu_user_regs(); struct segment_register gdt, tr, prev_tr, segr; struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc; - unsigned long eflags, optss_gfn = INVALID_GFN, nptss_gfn = INVALID_GFN; + unsigned long eflags; int exn_raised, rc; struct { u16 back_link,__blh; @@ -2047,11 +2067,11 @@ void hvm_task_switch( goto out; } - optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8), &optss_gfn); + optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8)); if ( optss_desc == NULL ) goto out; - nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8), &nptss_gfn); + nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8)); if ( nptss_desc == NULL ) goto out; @@ -2216,8 +2236,8 @@ void hvm_task_switch( } out: - hvm_unmap_entry(optss_desc, optss_gfn); - hvm_unmap_entry(nptss_desc, nptss_gfn); + hvm_unmap_entry(optss_desc); + hvm_unmap_entry(nptss_desc); } #define HVMCOPY_from_guest (0u<<0) diff -r 7b6db593bda0 -r 4e0c533a3e1d xen/arch/x86/hvm/svm/nestedsvm.c --- a/xen/arch/x86/hvm/svm/nestedsvm.c +++ b/xen/arch/x86/hvm/svm/nestedsvm.c @@ -81,10 +81,6 @@ int nestedsvm_vmcb_map(struct vcpu *v, u if (nv->nv_vvmcx == NULL) return 0; nv->nv_vvmcxaddr = vmcbaddr; - /* put_gfn here even though the map survives beyond this caller. - * The map can likely survive beyond a hypervisor exit, thus we - * need to put the gfn */ - put_gfn(current->domain, vmcbaddr >> PAGE_SHIFT); } return 1; @@ -358,7 +354,6 @@ static int nsvm_vmrun_permissionmap(stru ioport_80 = test_bit(0x80, ns_viomap); ioport_ed = test_bit(0xed, ns_viomap); hvm_unmap_guest_frame(ns_viomap); - put_gfn(current->domain, svm->ns_iomap_pa >> PAGE_SHIFT); svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed); @@ -889,7 +884,6 @@ nsvm_vmcb_guest_intercepts_ioio(paddr_t enabled = test_bit(port, io_bitmap); hvm_unmap_guest_frame(io_bitmap); - put_gfn(current->domain, gfn); if (!enabled) return NESTEDHVM_VMEXIT_HOST; diff -r 7b6db593bda0 -r 4e0c533a3e1d xen/arch/x86/hvm/vmx/vvmx.c --- a/xen/arch/x86/hvm/vmx/vvmx.c +++ b/xen/arch/x86/hvm/vmx/vvmx.c @@ -560,10 +560,7 @@ static void __map_io_bitmap(struct vcpu if (nvmx->iobitmap[index]) hvm_unmap_guest_frame (nvmx->iobitmap[index]); gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg); - nvmx->iobitmap[index] = hvm_map_guest_frame_ro (gpa >> PAGE_SHIFT); - /* See comment in nestedsvm_vmcb_map re putting this gfn and - * liveness of the map it backs */ - put_gfn(current->domain, gpa >> PAGE_SHIFT); + nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT); } static inline void map_io_bitmap_all(struct vcpu *v) @@ -1138,12 +1135,9 @@ int nvmx_handle_vmptrld(struct cpu_user_ if ( nvcpu->nv_vvmcxaddr == VMCX_EADDR ) { - nvcpu->nv_vvmcx = hvm_map_guest_frame_rw (gpa >> PAGE_SHIFT); + nvcpu->nv_vvmcx = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT); nvcpu->nv_vvmcxaddr = gpa; map_io_bitmap_all (v); - /* See comment in nestedsvm_vmcb_map regarding putting this - * gfn and liveness of the map that uses it */ - put_gfn(current->domain, gpa >> PAGE_SHIFT); } vmreturn(regs, VMSUCCEED); @@ -1205,7 +1199,6 @@ int nvmx_handle_vmclear(struct cpu_user_ if ( vvmcs ) __set_vvmcs(vvmcs, NVMX_LAUNCH_STATE, 0); hvm_unmap_guest_frame(vvmcs); - put_gfn(current->domain, gpa >> PAGE_SHIFT); } vmreturn(regs, VMSUCCEED);
Andres Lagar-Cavilla
2011-Dec-01 16:21 UTC
[PATCH 3 of 3] x86/mm: Fix checks during foreign mapping of paged pages
xen/arch/x86/mm.c | 12 ++++++++---- 1 files changed, 8 insertions(+), 4 deletions(-) Check that the valid mfn is the one we are mapping, not the mfn of the page table of the foreign domain. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r 4e0c533a3e1d -r 2372d2bf76b5 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -3572,7 +3572,8 @@ int do_mmu_update( rc = -ENOENT; break; } - else if ( p2m_ram_paging_in_start == l1e_p2mt && !mfn_valid(mfn) ) + else if ( p2m_ram_paging_in_start == l1e_p2mt && + !mfn_valid(l1emfn) ) { put_gfn(pg_owner, l1egfn); rc = -ENOENT; @@ -3620,7 +3621,8 @@ int do_mmu_update( rc = -ENOENT; break; } - else if ( p2m_ram_paging_in_start == l2e_p2mt && !mfn_valid(mfn) ) + else if ( p2m_ram_paging_in_start == l2e_p2mt && + !mfn_valid(l2emfn) ) { put_gfn(pg_owner, l2egfn); rc = -ENOENT; @@ -3654,7 +3656,8 @@ int do_mmu_update( rc = -ENOENT; break; } - else if ( p2m_ram_paging_in_start == l3e_p2mt && !mfn_valid(mfn) ) + else if ( p2m_ram_paging_in_start == l3e_p2mt && + !mfn_valid(l3emfn) ) { put_gfn(pg_owner, l3egfn); rc = -ENOENT; @@ -3688,7 +3691,8 @@ int do_mmu_update( rc = -ENOENT; break; } - else if ( p2m_ram_paging_in_start == l4e_p2mt && !mfn_valid(mfn) ) + else if ( p2m_ram_paging_in_start == l4e_p2mt && + !mfn_valid(l4emfn) ) { put_gfn(pg_owner, l4egfn); rc = -ENOENT;