Hi, V4: - Dropped V3 patch 2, and moved just the small needed code to domain_build.c - xsm related changes. These patches implement PVH dom0. Patches 1 and 2 implement changes in and around construct_dom0. Patch 9 adds option to boot a dom0 in PVH mode. The rest support tool stack on a pvh dom0. These patches are based on c/s: 0475025 These can also be found in public git tree at: git://oss.oracle.com/git/mrathor/xen.git branch: dom0pvh-v4 Thanks for all the help, Mukesh
Mukesh Rathor
2013-Dec-03 02:30 UTC
[V4 PATCH 1/7] PVH dom0: move some pv specific code to static functions
In this preparatory patch also, some pv specific code is carved out into static functions. No functionality change. Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> --- xen/arch/x86/domain_build.c | 347 +++++++++++++++++++++++-------------------- 1 files changed, 188 insertions(+), 159 deletions(-) diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c index 232adf8..67a569a 100644 --- a/xen/arch/x86/domain_build.c +++ b/xen/arch/x86/domain_build.c @@ -307,6 +307,187 @@ static void __init process_dom0_ioports_disable(void) } } +static __init void mark_pv_pt_pages_rdonly(struct domain *d, + l4_pgentry_t *l4start, + unsigned long vpt_start, + unsigned long nr_pt_pages) +{ + unsigned long count; + struct page_info *page; + l4_pgentry_t *pl4e; + l3_pgentry_t *pl3e; + l2_pgentry_t *pl2e; + l1_pgentry_t *pl1e; + + pl4e = l4start + l4_table_offset(vpt_start); + pl3e = l4e_to_l3e(*pl4e); + pl3e += l3_table_offset(vpt_start); + pl2e = l3e_to_l2e(*pl3e); + pl2e += l2_table_offset(vpt_start); + pl1e = l2e_to_l1e(*pl2e); + pl1e += l1_table_offset(vpt_start); + for ( count = 0; count < nr_pt_pages; count++ ) + { + l1e_remove_flags(*pl1e, _PAGE_RW); + page = mfn_to_page(l1e_get_pfn(*pl1e)); + + /* Read-only mapping + PGC_allocated + page-table page. */ + page->count_info = PGC_allocated | 3; + page->u.inuse.type_info |= PGT_validated | 1; + + /* Top-level p.t. is pinned. */ + if ( (page->u.inuse.type_info & PGT_type_mask) =+ (!is_pv_32on64_domain(d) ? + PGT_l4_page_table : PGT_l3_page_table) ) + { + page->count_info += 1; + page->u.inuse.type_info += 1 | PGT_pinned; + } + + /* Iterate. */ + if ( !((unsigned long)++pl1e & (PAGE_SIZE - 1)) ) + { + if ( !((unsigned long)++pl2e & (PAGE_SIZE - 1)) ) + { + if ( !((unsigned long)++pl3e & (PAGE_SIZE - 1)) ) + pl3e = l4e_to_l3e(*++pl4e); + pl2e = l3e_to_l2e(*pl3e); + } + pl1e = l2e_to_l1e(*pl2e); + } + } +} + +static __init void setup_pv_physmap(struct domain *d, unsigned long pgtbl_pfn, + unsigned long v_start, unsigned long v_end, + unsigned long vphysmap_start, + unsigned long vphysmap_end, + unsigned long nr_pages) +{ + struct page_info *page = NULL; + l4_pgentry_t *pl4e, *l4start = map_domain_page(pgtbl_pfn); + l3_pgentry_t *pl3e = NULL; + l2_pgentry_t *pl2e = NULL; + l1_pgentry_t *pl1e = NULL; + + if ( v_start <= vphysmap_end && vphysmap_start <= v_end ) + panic("DOM0 P->M table overlaps initial mapping"); + + while ( vphysmap_start < vphysmap_end ) + { + if ( d->tot_pages + ((round_pgup(vphysmap_end) - vphysmap_start) + >> PAGE_SHIFT) + 3 > nr_pages ) + panic("Dom0 allocation too small for initial P->M table.\n"); + + if ( pl1e ) + { + unmap_domain_page(pl1e); + pl1e = NULL; + } + if ( pl2e ) + { + unmap_domain_page(pl2e); + pl2e = NULL; + } + if ( pl3e ) + { + unmap_domain_page(pl3e); + pl3e = NULL; + } + pl4e = l4start + l4_table_offset(vphysmap_start); + if ( !l4e_get_intpte(*pl4e) ) + { + page = alloc_domheap_page(d, 0); + if ( !page ) + break; + + /* No mapping, PGC_allocated + page-table page. */ + page->count_info = PGC_allocated | 2; + page->u.inuse.type_info = PGT_l3_page_table | PGT_validated | 1; + pl3e = __map_domain_page(page); + clear_page(pl3e); + *pl4e = l4e_from_page(page, L4_PROT); + } else + pl3e = map_domain_page(l4e_get_pfn(*pl4e)); + + pl3e += l3_table_offset(vphysmap_start); + if ( !l3e_get_intpte(*pl3e) ) + { + if ( cpu_has_page1gb && + !(vphysmap_start & ((1UL << L3_PAGETABLE_SHIFT) - 1)) && + vphysmap_end >= vphysmap_start + (1UL << L3_PAGETABLE_SHIFT) && + (page = alloc_domheap_pages(d, + L3_PAGETABLE_SHIFT - PAGE_SHIFT, + 0)) != NULL ) + { + *pl3e = l3e_from_page(page, L1_PROT|_PAGE_DIRTY|_PAGE_PSE); + vphysmap_start += 1UL << L3_PAGETABLE_SHIFT; + continue; + } + if ( (page = alloc_domheap_page(d, 0)) == NULL ) + break; + + /* No mapping, PGC_allocated + page-table page. */ + page->count_info = PGC_allocated | 2; + page->u.inuse.type_info = PGT_l2_page_table | PGT_validated | 1; + pl2e = __map_domain_page(page); + clear_page(pl2e); + *pl3e = l3e_from_page(page, L3_PROT); + } + else + pl2e = map_domain_page(l3e_get_pfn(*pl3e)); + + pl2e += l2_table_offset(vphysmap_start); + if ( !l2e_get_intpte(*pl2e) ) + { + if ( !(vphysmap_start & ((1UL << L2_PAGETABLE_SHIFT) - 1)) && + vphysmap_end >= vphysmap_start + (1UL << L2_PAGETABLE_SHIFT) && + (page = alloc_domheap_pages(d, + L2_PAGETABLE_SHIFT - PAGE_SHIFT, + 0)) != NULL ) + { + *pl2e = l2e_from_page(page, L1_PROT|_PAGE_DIRTY|_PAGE_PSE); + if ( opt_allow_superpage ) + get_superpage(page_to_mfn(page), d); + vphysmap_start += 1UL << L2_PAGETABLE_SHIFT; + continue; + } + if ( (page = alloc_domheap_page(d, 0)) == NULL ) + break; + + /* No mapping, PGC_allocated + page-table page. */ + page->count_info = PGC_allocated | 2; + page->u.inuse.type_info = PGT_l1_page_table | PGT_validated | 1; + pl1e = __map_domain_page(page); + clear_page(pl1e); + *pl2e = l2e_from_page(page, L2_PROT); + } + else + pl1e = map_domain_page(l2e_get_pfn(*pl2e)); + + pl1e += l1_table_offset(vphysmap_start); + BUG_ON(l1e_get_intpte(*pl1e)); + page = alloc_domheap_page(d, 0); + if ( !page ) + break; + + *pl1e = l1e_from_page(page, L1_PROT|_PAGE_DIRTY); + vphysmap_start += PAGE_SIZE; + vphysmap_start &= PAGE_MASK; + } + if ( !page ) + panic("Not enough RAM for DOM0 P->M table.\n"); + + if ( pl1e ) + unmap_domain_page(pl1e); + if ( pl2e ) + unmap_domain_page(pl2e); + if ( pl3e ) + unmap_domain_page(pl3e); + + unmap_domain_page(l4start); +} + int __init construct_dom0( struct domain *d, const module_t *image, unsigned long image_headroom, @@ -706,43 +887,8 @@ int __init construct_dom0( } /* Pages that are part of page tables must be read only. */ - l4tab = l4start + l4_table_offset(vpt_start); - l3start = l3tab = l4e_to_l3e(*l4tab); - l3tab += l3_table_offset(vpt_start); - l2start = l2tab = l3e_to_l2e(*l3tab); - l2tab += l2_table_offset(vpt_start); - l1start = l1tab = l2e_to_l1e(*l2tab); - l1tab += l1_table_offset(vpt_start); - for ( count = 0; count < nr_pt_pages; count++ ) - { - l1e_remove_flags(*l1tab, _PAGE_RW); - page = mfn_to_page(l1e_get_pfn(*l1tab)); - - /* Read-only mapping + PGC_allocated + page-table page. */ - page->count_info = PGC_allocated | 3; - page->u.inuse.type_info |= PGT_validated | 1; - - /* Top-level p.t. is pinned. */ - if ( (page->u.inuse.type_info & PGT_type_mask) =- (!is_pv_32on64_domain(d) ? - PGT_l4_page_table : PGT_l3_page_table) ) - { - page->count_info += 1; - page->u.inuse.type_info += 1 | PGT_pinned; - } - - /* Iterate. */ - if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) ) - { - if ( !((unsigned long)++l2tab & (PAGE_SIZE - 1)) ) - { - if ( !((unsigned long)++l3tab & (PAGE_SIZE - 1)) ) - l3start = l3tab = l4e_to_l3e(*++l4tab); - l2start = l2tab = l3e_to_l2e(*l3tab); - } - l1start = l1tab = l2e_to_l1e(*l2tab); - } - } + if ( is_pv_domain(d) ) + mark_pv_pt_pages_rdonly(d, l4start, vpt_start, nr_pt_pages); /* Mask all upcalls... */ for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ ) @@ -814,132 +960,15 @@ int __init construct_dom0( elf_64bit(&elf) ? 64 : 32, parms.pae ? "p" : ""); count = d->tot_pages; - l4start = map_domain_page(pagetable_get_pfn(v->arch.guest_table)); - l3tab = NULL; - l2tab = NULL; - l1tab = NULL; + /* Set up the phys->machine table if not part of the initial mapping. */ - if ( parms.p2m_base != UNSET_ADDR ) + if ( is_pv_domain(d) && parms.p2m_base != UNSET_ADDR ) { - unsigned long va = vphysmap_start; - - if ( v_start <= vphysmap_end && vphysmap_start <= v_end ) - panic("DOM0 P->M table overlaps initial mapping"); - - while ( va < vphysmap_end ) - { - if ( d->tot_pages + ((round_pgup(vphysmap_end) - va) - >> PAGE_SHIFT) + 3 > nr_pages ) - panic("Dom0 allocation too small for initial P->M table.\n"); - - if ( l1tab ) - { - unmap_domain_page(l1tab); - l1tab = NULL; - } - if ( l2tab ) - { - unmap_domain_page(l2tab); - l2tab = NULL; - } - if ( l3tab ) - { - unmap_domain_page(l3tab); - l3tab = NULL; - } - l4tab = l4start + l4_table_offset(va); - if ( !l4e_get_intpte(*l4tab) ) - { - page = alloc_domheap_page(d, 0); - if ( !page ) - break; - /* No mapping, PGC_allocated + page-table page. */ - page->count_info = PGC_allocated | 2; - page->u.inuse.type_info - PGT_l3_page_table | PGT_validated | 1; - l3tab = __map_domain_page(page); - clear_page(l3tab); - *l4tab = l4e_from_page(page, L4_PROT); - } else - l3tab = map_domain_page(l4e_get_pfn(*l4tab)); - l3tab += l3_table_offset(va); - if ( !l3e_get_intpte(*l3tab) ) - { - if ( cpu_has_page1gb && - !(va & ((1UL << L3_PAGETABLE_SHIFT) - 1)) && - vphysmap_end >= va + (1UL << L3_PAGETABLE_SHIFT) && - (page = alloc_domheap_pages(d, - L3_PAGETABLE_SHIFT - - PAGE_SHIFT, - 0)) != NULL ) - { - *l3tab = l3e_from_page(page, - L1_PROT|_PAGE_DIRTY|_PAGE_PSE); - va += 1UL << L3_PAGETABLE_SHIFT; - continue; - } - if ( (page = alloc_domheap_page(d, 0)) == NULL ) - break; - /* No mapping, PGC_allocated + page-table page. */ - page->count_info = PGC_allocated | 2; - page->u.inuse.type_info - PGT_l2_page_table | PGT_validated | 1; - l2tab = __map_domain_page(page); - clear_page(l2tab); - *l3tab = l3e_from_page(page, L3_PROT); - } - else - l2tab = map_domain_page(l3e_get_pfn(*l3tab)); - l2tab += l2_table_offset(va); - if ( !l2e_get_intpte(*l2tab) ) - { - if ( !(va & ((1UL << L2_PAGETABLE_SHIFT) - 1)) && - vphysmap_end >= va + (1UL << L2_PAGETABLE_SHIFT) && - (page = alloc_domheap_pages(d, - L2_PAGETABLE_SHIFT - - PAGE_SHIFT, - 0)) != NULL ) - { - *l2tab = l2e_from_page(page, - L1_PROT|_PAGE_DIRTY|_PAGE_PSE); - if ( opt_allow_superpage ) - get_superpage(page_to_mfn(page), d); - va += 1UL << L2_PAGETABLE_SHIFT; - continue; - } - if ( (page = alloc_domheap_page(d, 0)) == NULL ) - break; - /* No mapping, PGC_allocated + page-table page. */ - page->count_info = PGC_allocated | 2; - page->u.inuse.type_info - PGT_l1_page_table | PGT_validated | 1; - l1tab = __map_domain_page(page); - clear_page(l1tab); - *l2tab = l2e_from_page(page, L2_PROT); - } - else - l1tab = map_domain_page(l2e_get_pfn(*l2tab)); - l1tab += l1_table_offset(va); - BUG_ON(l1e_get_intpte(*l1tab)); - page = alloc_domheap_page(d, 0); - if ( !page ) - break; - *l1tab = l1e_from_page(page, L1_PROT|_PAGE_DIRTY); - va += PAGE_SIZE; - va &= PAGE_MASK; - } - if ( !page ) - panic("Not enough RAM for DOM0 P->M table.\n"); + pfn = pagetable_get_pfn(v->arch.guest_table); + setup_pv_physmap(d, pfn, v_start, v_end, vphysmap_start, vphysmap_end, + nr_pages); } - if ( l1tab ) - unmap_domain_page(l1tab); - if ( l2tab ) - unmap_domain_page(l2tab); - if ( l3tab ) - unmap_domain_page(l3tab); - unmap_domain_page(l4start); - /* Write the phys->machine and machine->phys table entries. */ for ( pfn = 0; pfn < count; pfn++ ) { -- 1.7.2.3
This patch changes construct_dom0 to boot in PVH mode. Changes need to support it are also included here. Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com> --- xen/arch/x86/domain_build.c | 235 +++++++++++++++++++++++++++++++++++++++--- xen/arch/x86/mm/hap/hap.c | 15 +++ xen/include/asm-x86/hap.h | 1 + 3 files changed, 234 insertions(+), 17 deletions(-) diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c index 67a569a..eb00c0d 100644 --- a/xen/arch/x86/domain_build.c +++ b/xen/arch/x86/domain_build.c @@ -35,6 +35,7 @@ #include <asm/setup.h> #include <asm/bzimage.h> /* for bzimage_parse */ #include <asm/io_apic.h> +#include <asm/hap.h> #include <public/version.h> @@ -307,6 +308,151 @@ static void __init process_dom0_ioports_disable(void) } } +static __init void pvh_add_mem_mapping(struct domain *d, unsigned long gfn, + unsigned long mfn, unsigned long nr_mfns) +{ + unsigned long i; + for ( i = 0; i < nr_mfns; i++ ) + if ( !set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i)) ) + panic("Failed setting p2m. gfn:%lx mfn:%lx i:%ld\n", gfn, mfn, i); +} + +/* + * Set the 1:1 map for all non-RAM regions for dom 0. Thus, dom0 will have + * the entire io region mapped in the EPT/NPT. + * + * pvh fixme: The following doesn''t map MMIO ranges when they sit above the + * highest E820 covered address. + */ +static __init void pvh_map_all_iomem(struct domain *d) +{ + unsigned long start_pfn, end_pfn, end = 0, start = 0; + const struct e820entry *entry; + unsigned int i, nump; + + for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ ) + { + end = entry->addr + entry->size; + + if ( entry->type == E820_RAM || entry->type == E820_UNUSABLE || + i == e820.nr_map - 1 ) + { + start_pfn = PFN_DOWN(start); + + /* Unused RAM areas are marked UNUSABLE, so skip it too */ + if ( entry->type == E820_RAM || entry->type == E820_UNUSABLE ) + end_pfn = PFN_UP(entry->addr); + else + end_pfn = PFN_UP(end); + + if ( start_pfn < end_pfn ) + { + nump = end_pfn - start_pfn; + /* Add pages to the mapping */ + pvh_add_mem_mapping(d, start_pfn, start_pfn, nump); + } + start = end; + } + } + + /* If the e820 ended under 4GB, we must map the remaining space upto 4GB */ + if ( end < GB(4) ) + { + start_pfn = PFN_UP(end); + end_pfn = (GB(4)) >> PAGE_SHIFT; + nump = end_pfn - start_pfn; + pvh_add_mem_mapping(d, start_pfn, start_pfn, nump); + } +} + +static __init void dom0_update_physmap(struct domain *d, unsigned long pfn, + unsigned long mfn, unsigned long vphysmap_s) +{ + if ( is_pvh_domain(d) ) + { + int rc = guest_physmap_add_page(d, pfn, mfn, 0); + BUG_ON(rc); + return; + } + if ( !is_pv_32on64_domain(d) ) + ((unsigned long *)vphysmap_s)[pfn] = mfn; + else + ((unsigned int *)vphysmap_s)[pfn] = mfn; + + set_gpfn_from_mfn(mfn, pfn); +} + +static __init void pvh_fixup_page_tables_for_hap(struct vcpu *v, + unsigned long v_start, + unsigned long v_end) +{ + int i, j, k; + l4_pgentry_t *pl4e, *l4start; + l3_pgentry_t *pl3e; + l2_pgentry_t *pl2e; + l1_pgentry_t *pl1e; + unsigned long cr3_pfn; + + ASSERT(paging_mode_enabled(v->domain)); + + l4start = map_domain_page(pagetable_get_pfn(v->arch.guest_table)); + + /* Clear entries prior to guest L4 start */ + pl4e = l4start + l4_table_offset(v_start); + memset(l4start, 0, (unsigned long)pl4e - (unsigned long)l4start); + + for ( ; pl4e <= l4start + l4_table_offset(v_end - 1); pl4e++ ) + { + pl3e = map_l3t_from_l4e(*pl4e); + for ( i = 0; i < PAGE_SIZE / sizeof(*pl3e); i++, pl3e++ ) + { + if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) + continue; + + pl2e = map_l2t_from_l3e(*pl3e); + for ( j = 0; j < PAGE_SIZE / sizeof(*pl2e); j++, pl2e++ ) + { + if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) + continue; + + pl1e = map_l1t_from_l2e(*pl2e); + for ( k = 0; k < PAGE_SIZE / sizeof(*pl1e); k++, pl1e++ ) + { + if ( !(l1e_get_flags(*pl1e) & _PAGE_PRESENT) ) + continue; + + *pl1e = l1e_from_pfn(get_gpfn_from_mfn(l1e_get_pfn(*pl1e)), + l1e_get_flags(*pl1e)); + } + unmap_domain_page(pl1e); + *pl2e = l2e_from_pfn(get_gpfn_from_mfn(l2e_get_pfn(*pl2e)), + l2e_get_flags(*pl2e)); + } + unmap_domain_page(pl2e); + *pl3e = l3e_from_pfn(get_gpfn_from_mfn(l3e_get_pfn(*pl3e)), + l3e_get_flags(*pl3e)); + } + unmap_domain_page(pl3e); + *pl4e = l4e_from_pfn(get_gpfn_from_mfn(l4e_get_pfn(*pl4e)), + l4e_get_flags(*pl4e)); + } + + /* Clear entries post guest L4. */ + if ( (unsigned long)pl4e & (PAGE_SIZE - 1) ) + memset(pl4e, 0, PAGE_SIZE - ((unsigned long)pl4e & (PAGE_SIZE - 1))); + + unmap_domain_page(l4start); + + cr3_pfn = get_gpfn_from_mfn(paddr_to_pfn(v->arch.cr3)); + v->arch.hvm_vcpu.guest_cr[3] = pfn_to_paddr(cr3_pfn); + + /* + * Finally, we update the paging modes (hap_update_paging_modes). This will + * create monitor_table for us, update v->arch.cr3, and update vmcs.cr3. + */ + paging_update_paging_modes(v); +} + static __init void mark_pv_pt_pages_rdonly(struct domain *d, l4_pgentry_t *l4start, unsigned long vpt_start, @@ -516,6 +662,8 @@ int __init construct_dom0( l3_pgentry_t *l3tab = NULL, *l3start = NULL; l2_pgentry_t *l2tab = NULL, *l2start = NULL; l1_pgentry_t *l1tab = NULL, *l1start = NULL; + paddr_t shared_info_paddr = 0; + u32 save_pvh_pg_mode = 0; /* * This fully describes the memory layout of the initial domain. All @@ -593,12 +741,21 @@ int __init construct_dom0( goto out; } - if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE && - !test_bit(XENFEAT_dom0, parms.f_supported) ) + if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE ) { - printk("Kernel does not support Dom0 operation\n"); - rc = -EINVAL; - goto out; + if ( !test_bit(XENFEAT_dom0, parms.f_supported) ) + { + printk("Kernel does not support Dom0 operation\n"); + rc = -EINVAL; + goto out; + } + if ( is_pvh_domain(d) && + !test_bit(XENFEAT_hvm_callback_vector, parms.f_supported) ) + { + printk("Kernel does not support PVH mode\n"); + rc = -EINVAL; + goto out; + } } if ( compat32 ) @@ -663,6 +820,13 @@ int __init construct_dom0( vstartinfo_end = (vstartinfo_start + sizeof(struct start_info) + sizeof(struct dom0_vga_console_info)); + + if ( is_pvh_domain(d) ) + { + shared_info_paddr = round_pgup(vstartinfo_end) - v_start; + vstartinfo_end += PAGE_SIZE; + } + vpt_start = round_pgup(vstartinfo_end); for ( nr_pt_pages = 2; ; nr_pt_pages++ ) { @@ -903,6 +1067,13 @@ int __init construct_dom0( (void)alloc_vcpu(d, i, cpu); } + /* + * pvh: we temporarily disable paging mode so that we can build cr3 needed + * to run on dom0''s page tables. + */ + save_pvh_pg_mode = d->arch.paging.mode; + d->arch.paging.mode = 0; + /* Set up CR3 value for write_ptbase */ if ( paging_mode_enabled(d) ) paging_update_paging_modes(v); @@ -969,6 +1140,15 @@ int __init construct_dom0( nr_pages); } + if ( is_pvh_domain(d) ) + hap_set_pvh_alloc_for_dom0(d, nr_pages); + + /* + * We enable paging mode again so guest_physmap_add_page will do the + * right thing for us. + */ + d->arch.paging.mode = save_pvh_pg_mode; + /* Write the phys->machine and machine->phys table entries. */ for ( pfn = 0; pfn < count; pfn++ ) { @@ -985,11 +1165,7 @@ int __init construct_dom0( if ( pfn > REVERSE_START && (vinitrd_start || pfn < initrd_pfn) ) mfn = alloc_epfn - (pfn - REVERSE_START); #endif - if ( !is_pv_32on64_domain(d) ) - ((unsigned long *)vphysmap_start)[pfn] = mfn; - else - ((unsigned int *)vphysmap_start)[pfn] = mfn; - set_gpfn_from_mfn(mfn, pfn); + dom0_update_physmap(d, pfn, mfn, vphysmap_start); if (!(pfn & 0xfffff)) process_pending_softirqs(); } @@ -1005,8 +1181,8 @@ int __init construct_dom0( if ( !page->u.inuse.type_info && !get_page_and_type(page, d, PGT_writable_page) ) BUG(); - ((unsigned long *)vphysmap_start)[pfn] = mfn; - set_gpfn_from_mfn(mfn, pfn); + + dom0_update_physmap(d, pfn, mfn, vphysmap_start); ++pfn; if (!(pfn & 0xfffff)) process_pending_softirqs(); @@ -1026,11 +1202,7 @@ int __init construct_dom0( #ifndef NDEBUG #define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn))) #endif - if ( !is_pv_32on64_domain(d) ) - ((unsigned long *)vphysmap_start)[pfn] = mfn; - else - ((unsigned int *)vphysmap_start)[pfn] = mfn; - set_gpfn_from_mfn(mfn, pfn); + dom0_update_physmap(d, pfn, mfn, vphysmap_start); #undef pfn page++; pfn++; if (!(pfn & 0xfffff)) @@ -1054,6 +1226,15 @@ int __init construct_dom0( si->console.dom0.info_size = sizeof(struct dom0_vga_console_info); } + /* + * PVH: We need to update si->shared_info while we are on dom0 page tables, + * but need to defer the p2m update until after we have fixed up the + * page tables for PVH so that the m2p for the si pte entry returns + * correct pfn. + */ + if ( is_pvh_domain(d) ) + si->shared_info = shared_info_paddr; + if ( is_pv_32on64_domain(d) ) xlat_start_info(si, XLAT_start_info_console_dom0); @@ -1087,8 +1268,15 @@ int __init construct_dom0( regs->eflags = X86_EFLAGS_IF; if ( opt_dom0_shadow ) + { + if ( is_pvh_domain(d) ) + { + printk("Unsupported option dom0_shadow for PVH\n"); + return -EINVAL; + } if ( paging_enable(d, PG_SH_enable) == 0 ) paging_update_paging_modes(v); + } if ( supervisor_mode_kernel ) { @@ -1178,6 +1366,19 @@ int __init construct_dom0( printk(" Xen warning: dom0 kernel broken ELF: %s\n", elf_check_broken(&elf)); + if ( is_pvh_domain(d) ) + { + /* finally, fixup the page table, replacing mfns with pfns */ + pvh_fixup_page_tables_for_hap(v, v_start, v_end); + + /* the pt has correct pfn for si, now update the mfn in the p2m */ + mfn = virt_to_mfn(d->shared_info); + pfn = shared_info_paddr >> PAGE_SHIFT; + dom0_update_physmap(d, pfn, mfn, 0); + + pvh_map_all_iomem(d); + } + iommu_dom0_init(dom0); return 0; diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c index d3f64bd..cc3ba66 100644 --- a/xen/arch/x86/mm/hap/hap.c +++ b/xen/arch/x86/mm/hap/hap.c @@ -579,6 +579,21 @@ int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, } } +void __init hap_set_pvh_alloc_for_dom0(struct domain *d, + unsigned long num_pages) +{ + int rc; + unsigned long memkb = num_pages * (PAGE_SIZE / 1024); + + /* Copied from: libxl_get_required_shadow_memory() */ + memkb = 4 * (256 * d->max_vcpus + 2 * (memkb / 1024)); + num_pages = ( (memkb + 1023) / 1024) << (20 - PAGE_SHIFT); + paging_lock(d); + rc = hap_set_allocation(d, num_pages, NULL); + paging_unlock(d); + BUG_ON(rc); +} + static const struct paging_mode hap_paging_real_mode; static const struct paging_mode hap_paging_protected_mode; static const struct paging_mode hap_paging_pae_mode; diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h index e03f983..aab8558 100644 --- a/xen/include/asm-x86/hap.h +++ b/xen/include/asm-x86/hap.h @@ -63,6 +63,7 @@ int hap_track_dirty_vram(struct domain *d, XEN_GUEST_HANDLE_64(uint8) dirty_bitmap); extern const struct paging_mode *hap_paging_get_mode(struct vcpu *); +void hap_set_pvh_alloc_for_dom0(struct domain *d, unsigned long num_pages); #endif /* XEN_HAP_H */ -- 1.7.2.3
Mukesh Rathor
2013-Dec-03 02:30 UTC
[V4 PATCH 3/7] PVH dom0: implement XENMEM_add_to_physmap_range for x86
This preparatory patch adds support for XENMEM_add_to_physmap_range on x86 so it can be used to create a guest on PVH dom0. To this end, we add a new function xenmem_add_to_physmap_range(), and change xenmem_add_to_physmap_once parameters so it can be called from xenmem_add_to_physmap_range. Please note, compat will continue to return -ENOSYS. Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com> --- xen/arch/x86/mm.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 65 insertions(+), 0 deletions(-) diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 6c26026..4ae4523 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -4675,6 +4675,39 @@ static int xenmem_add_to_physmap(struct domain *d, return xenmem_add_to_physmap_once(d, xatp); } +static int xenmem_add_to_physmap_range(struct domain *d, + struct xen_add_to_physmap_range *xatpr) +{ + /* Process entries in reverse order to allow continuations */ + while ( xatpr->size > 0 ) + { + int rc; + xen_ulong_t idx; + xen_pfn_t gpfn; + struct xen_add_to_physmap xatp; + + if ( copy_from_guest_offset(&idx, xatpr->idxs, xatpr->size-1, 1) || + copy_from_guest_offset(&gpfn, xatpr->gpfns, xatpr->size-1, 1) ) + return -EFAULT; + + xatp.space = xatpr->space; + xatp.idx = idx; + xatp.gpfn = gpfn; + rc = xenmem_add_to_physmap_once(d, &xatp); + + if ( copy_to_guest_offset(xatpr->errs, xatpr->size-1, &rc, 1) ) + return -EFAULT; + + xatpr->size--; + + /* Check for continuation if it''s not the last interation */ + if ( xatpr->size > 0 && hypercall_preempt_check() ) + return -EAGAIN; + } + + return 0; +} + long arch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg) { int rc; @@ -4689,6 +4722,10 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg) if ( copy_from_guest(&xatp, arg, 1) ) return -EFAULT; + /* This one is only supported for add_to_physmap_range for now */ + if ( xatp.space == XENMAPSPACE_gmfn_foreign ) + return -EINVAL; + d = rcu_lock_domain_by_any_id(xatp.domid); if ( d == NULL ) return -ESRCH; @@ -4716,6 +4753,34 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg) return rc; } + case XENMEM_add_to_physmap_range: + { + struct xen_add_to_physmap_range xatpr; + struct domain *d; + + if ( copy_from_guest(&xatpr, arg, 1) ) + return -EFAULT; + + /* This mapspace is redundant for this hypercall */ + if ( xatpr.space == XENMAPSPACE_gmfn_range ) + return -EINVAL; + + d = rcu_lock_domain_by_any_id(xatpr.domid); + if ( d == NULL ) + return -ESRCH; + + if ( (rc = xsm_add_to_physmap(XSM_TARGET, current->domain, d)) == 0 ) + rc = xenmem_add_to_physmap_range(d, &xatpr); + + rcu_unlock_domain(d); + + if ( rc == -EAGAIN ) + rc = hypercall_create_continuation( + __HYPERVISOR_memory_op, "ih", op, arg); + + return rc; + } + case XENMEM_set_memory_map: { struct xen_foreign_memory_map fmap; -- 1.7.2.3
In this patch, a new type p2m_map_foreign is introduced for pages that toolstack on PVH dom0 maps from foreign domains that its creating or supporting during it''s run time. Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com> --- xen/arch/x86/mm/p2m-ept.c | 1 + xen/arch/x86/mm/p2m-pt.c | 1 + xen/arch/x86/mm/p2m.c | 28 ++++++++++++++++++++-------- xen/include/asm-x86/p2m.h | 4 ++++ 4 files changed, 26 insertions(+), 8 deletions(-) diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c index 92d9e2d..08d1d72 100644 --- a/xen/arch/x86/mm/p2m-ept.c +++ b/xen/arch/x86/mm/p2m-ept.c @@ -75,6 +75,7 @@ static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type, p2m_acces entry->w = 0; break; case p2m_grant_map_rw: + case p2m_map_foreign: entry->r = entry->w = 1; entry->x = 0; break; diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c index a1d5650..09b60ce 100644 --- a/xen/arch/x86/mm/p2m-pt.c +++ b/xen/arch/x86/mm/p2m-pt.c @@ -89,6 +89,7 @@ static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn) case p2m_ram_rw: return flags | P2M_BASE_FLAGS | _PAGE_RW; case p2m_grant_map_rw: + case p2m_map_foreign: return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT; case p2m_mmio_direct: if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) ) diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index 8f380ed..0659ef1 100644 --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -525,7 +525,7 @@ p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn, for ( i = 0; i < (1UL << page_order); i++ ) { mfn_return = p2m->get_entry(p2m, gfn + i, &t, &a, 0, NULL); - if ( !p2m_is_grant(t) && !p2m_is_shared(t) ) + if ( !p2m_is_grant(t) && !p2m_is_shared(t) && !p2m_is_foreign(t) ) set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY); ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) ); } @@ -756,10 +756,9 @@ void p2m_change_type_range(struct domain *d, p2m_unlock(p2m); } - - -int -set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) +static int +set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, + p2m_type_t gfn_p2mt) { int rc = 0; p2m_access_t a; @@ -784,16 +783,29 @@ set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); } - P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn)); - rc = set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_mmio_direct, p2m->default_access); + P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn, mfn_x(mfn)); + rc = set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, gfn_p2mt, + p2m->default_access); gfn_unlock(p2m, gfn, 0); if ( 0 == rc ) gdprintk(XENLOG_ERR, - "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n", + "%s: set_p2m_entry failed! mfn=%08lx\n", __func__, mfn_x(get_gfn_query_unlocked(p2m->domain, gfn, &ot))); return rc; } +/* Returns: True for success. */ +int set_foreign_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) +{ + return set_typed_p2m_entry(d, gfn, mfn, p2m_map_foreign); +} + +int +set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) +{ + return set_typed_p2m_entry(d, gfn, mfn, p2m_mmio_direct); +} + int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn) { diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h index 43583b2..6fc71a1 100644 --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -70,6 +70,7 @@ typedef enum { p2m_ram_paging_in = 11, /* Memory that is being paged in */ p2m_ram_shared = 12, /* Shared or sharable memory */ p2m_ram_broken = 13, /* Broken page, access cause domain crash */ + p2m_map_foreign = 14, /* ram pages from foreign domain */ } p2m_type_t; /* @@ -180,6 +181,7 @@ typedef unsigned int p2m_query_t; #define p2m_is_sharable(_t) (p2m_to_mask(_t) & P2M_SHARABLE_TYPES) #define p2m_is_shared(_t) (p2m_to_mask(_t) & P2M_SHARED_TYPES) #define p2m_is_broken(_t) (p2m_to_mask(_t) & P2M_BROKEN_TYPES) +#define p2m_is_foreign(_t) (p2m_to_mask(_t) & p2m_to_mask(p2m_map_foreign)) /* Per-p2m-table state */ struct p2m_domain { @@ -510,6 +512,8 @@ p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn, int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn); int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn); +/* Set foreign mfn in the current guest''s p2m table. */ +int set_foreign_p2m_entry(struct domain *domp, unsigned long gfn, mfn_t mfn); /* * Populate-on-demand -- 1.7.2.3
In preparation for the next patch, we update xsm_add_to_physmap to allow for checking of foreign domain. Thus, the current domain must have the right to update the mappings of target domain with pages from foreign domain. Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com> --- xen/arch/x86/mm.c | 18 +++++++++++++++--- xen/include/xsm/dummy.h | 10 ++++++++-- xen/include/xsm/xsm.h | 6 +++--- xen/xsm/flask/hooks.c | 9 +++++++-- 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 4ae4523..e3da479 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -4730,7 +4730,7 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg) if ( d == NULL ) return -ESRCH; - if ( xsm_add_to_physmap(XSM_TARGET, current->domain, d) ) + if ( xsm_add_to_physmap(XSM_TARGET, current->domain, d, NULL) ) { rcu_unlock_domain(d); return -EPERM; @@ -4756,7 +4756,7 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg) case XENMEM_add_to_physmap_range: { struct xen_add_to_physmap_range xatpr; - struct domain *d; + struct domain *d, *fd = NULL; if ( copy_from_guest(&xatpr, arg, 1) ) return -EFAULT; @@ -4769,10 +4769,22 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg) if ( d == NULL ) return -ESRCH; - if ( (rc = xsm_add_to_physmap(XSM_TARGET, current->domain, d)) == 0 ) + if ( xatpr.space == XENMAPSPACE_gmfn_foreign ) + { + fd = get_pg_owner(xatpr.foreign_domid); + if ( fd == NULL ) + { + rcu_unlock_domain(d); + return -ESRCH; + } + } + rc = xsm_add_to_physmap(XSM_TARGET, current->domain, d, fd); + if ( rc == 0 ) rc = xenmem_add_to_physmap_range(d, &xatpr); rcu_unlock_domain(d); + if ( fd ) + put_pg_owner(fd); if ( rc == -EAGAIN ) rc = hypercall_create_continuation( diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h index eb9e1a1..1228e52 100644 --- a/xen/include/xsm/dummy.h +++ b/xen/include/xsm/dummy.h @@ -467,10 +467,16 @@ static XSM_INLINE int xsm_pci_config_permission(XSM_DEFAULT_ARG struct domain *d return xsm_default_action(action, current->domain, d); } -static XSM_INLINE int xsm_add_to_physmap(XSM_DEFAULT_ARG struct domain *d1, struct domain *d2) +static XSM_INLINE int xsm_add_to_physmap(XSM_DEFAULT_ARG struct domain *d, struct domain *t, struct domain *f) { + int rc; + XSM_ASSERT_ACTION(XSM_TARGET); - return xsm_default_action(action, d1, d2); + rc = xsm_default_action(action, d, t); + if ( f && !rc ) + rc = xsm_default_action(action, d, f); + + return rc; } static XSM_INLINE int xsm_remove_from_physmap(XSM_DEFAULT_ARG struct domain *d1, struct domain *d2) diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h index 1939453..9ee9543 100644 --- a/xen/include/xsm/xsm.h +++ b/xen/include/xsm/xsm.h @@ -90,7 +90,7 @@ struct xsm_operations { int (*memory_adjust_reservation) (struct domain *d1, struct domain *d2); int (*memory_stat_reservation) (struct domain *d1, struct domain *d2); int (*memory_pin_page) (struct domain *d1, struct domain *d2, struct page_info *page); - int (*add_to_physmap) (struct domain *d1, struct domain *d2); + int (*add_to_physmap) (struct domain *d, struct domain *t, struct domain *f); int (*remove_from_physmap) (struct domain *d1, struct domain *d2); int (*claim_pages) (struct domain *d); @@ -344,9 +344,9 @@ static inline int xsm_memory_pin_page(xsm_default_t def, struct domain *d1, stru return xsm_ops->memory_pin_page(d1, d2, page); } -static inline int xsm_add_to_physmap(xsm_default_t def, struct domain *d1, struct domain *d2) +static inline int xsm_add_to_physmap(xsm_default_t def, struct domain *d, struct domain *t, struct domain *f) { - return xsm_ops->add_to_physmap(d1, d2); + return xsm_ops->add_to_physmap(d, t, f); } static inline int xsm_remove_from_physmap(xsm_default_t def, struct domain *d1, struct domain *d2) diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c index b1e2593..30807e9 100644 --- a/xen/xsm/flask/hooks.c +++ b/xen/xsm/flask/hooks.c @@ -1068,9 +1068,14 @@ static inline int flask_tmem_control(void) return domain_has_xen(current->domain, XEN__TMEM_CONTROL); } -static int flask_add_to_physmap(struct domain *d1, struct domain *d2) +static int flask_add_to_physmap(struct domain *d, struct domain *t, struct domain *f) { - return domain_has_perm(d1, d2, SECCLASS_MMU, MMU__PHYSMAP); + int rc; + + rc = domain_has_perm(d, t, SECCLASS_MMU, MMU__PHYSMAP); + if ( f && !rc ) + rc = domain_has_perm(d, f, SECCLASS_MMU, MMU__MAP_READ|MMU__MAP_WRITE); + return rc; } static int flask_remove_from_physmap(struct domain *d1, struct domain *d2) -- 1.7.2.3
In this patch, a new function, xenmem_add_foreign_to_pmap(), is added to map pages from foreign guest into current dom0 for domU creation. Such pages are typed p2m_map_foreign. Also, support is added here to XENMEM_remove_from_physmap to remove such pages. Note, in the remove path, we must release the refcount that was taken during the map phase. Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com> --- xen/arch/x86/mm.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++---- xen/common/memory.c | 38 +++++++++++++++++++--- 2 files changed, 114 insertions(+), 12 deletions(-) diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index e3da479..1a4d564 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -2810,7 +2810,7 @@ static struct domain *get_pg_owner(domid_t domid) goto out; } - if ( unlikely(paging_mode_translate(curr)) ) + if ( !is_pvh_domain(curr) && unlikely(paging_mode_translate(curr)) ) { MEM_LOG("Cannot mix foreign mappings with translated domains"); goto out; @@ -4520,9 +4520,75 @@ static int handle_iomem_range(unsigned long s, unsigned long e, void *p) return 0; } +/* + * Add frames from foreign domain to target domain''s physmap. Similar to + * XENMAPSPACE_gmfn but the frame is foreign being mapped into current, + * and is not removed from foreign domain. + * Usage: libxl on pvh dom0 creating a guest and doing privcmd_ioctl_mmap. + * Side Effect: the mfn for fgfn will be refcounted so it is not lost + * while mapped here. The refcnt is released in do_memory_op() + * via XENMEM_remove_from_physmap. + * Returns: 0 ==> success + */ +static int xenmem_add_foreign_to_p2m(struct domain *tdom, unsigned long fgfn, + unsigned long gpfn, struct domain *fdom) +{ + p2m_type_t p2mt, p2mt_prev; + int rc = 0; + unsigned long prev_mfn, mfn = 0; + struct page_info *page = NULL; + + if ( tdom == fdom || !tdom || !fdom || !is_pvh_domain(tdom) ) + return -EINVAL; + + /* following will take a refcnt on the mfn */ + page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC); + if ( !page || !p2m_is_valid(p2mt) ) + { + if ( page ) + put_page(page); + return -EINVAL; + } + mfn = page_to_mfn(page); + + /* Remove previously mapped page if it is present. */ + prev_mfn = mfn_x(get_gfn(tdom, gpfn, &p2mt_prev)); + if ( mfn_valid(prev_mfn) ) + { + if ( is_xen_heap_mfn(prev_mfn) ) + /* Xen heap frames are simply unhooked from this phys slot */ + guest_physmap_remove_page(tdom, gpfn, prev_mfn, 0); + else + /* Normal domain memory is freed, to avoid leaking memory. */ + guest_remove_page(tdom, gpfn); + } + /* + * Create the new mapping. Can''t use guest_physmap_add_page() because it + * will update the m2p table which will result in mfn -> gpfn of dom0 + * and not fgfn of domU. + */ + if ( set_foreign_p2m_entry(tdom, gpfn, _mfn(mfn)) == 0 ) + { + gdprintk(XENLOG_WARNING, "set_foreign_p2m_entry failed. " + "gpfn:%lx mfn:%lx fgfn:%lx td:%d fd:%d\n", + gpfn, mfn, fgfn, tdom->domain_id, fdom->domain_id); + put_page(page); + rc = -EINVAL; + } + + /* + * We must do this put_gfn after set_foreign_p2m_entry so another cpu + * doesn''t populate the gpfn before us. + */ + put_gfn(tdom, gpfn); + + return rc; +} + static int xenmem_add_to_physmap_once( struct domain *d, - const struct xen_add_to_physmap *xatp) + const struct xen_add_to_physmap *xatp, + struct domain *fdom) { struct page_info *page = NULL; unsigned long gfn = 0; /* gcc ... */ @@ -4581,6 +4647,13 @@ static int xenmem_add_to_physmap_once( page = mfn_to_page(mfn); break; } + + case XENMAPSPACE_gmfn_foreign: + { + rc = xenmem_add_foreign_to_p2m(d, xatp->idx, xatp->gpfn, fdom); + return rc; + } + default: break; } @@ -4646,7 +4719,7 @@ static int xenmem_add_to_physmap(struct domain *d, start_xatp = *xatp; while ( xatp->size > 0 ) { - rc = xenmem_add_to_physmap_once(d, xatp); + rc = xenmem_add_to_physmap_once(d, xatp, NULL); if ( rc < 0 ) return rc; @@ -4672,11 +4745,12 @@ static int xenmem_add_to_physmap(struct domain *d, return rc; } - return xenmem_add_to_physmap_once(d, xatp); + return xenmem_add_to_physmap_once(d, xatp, NULL); } static int xenmem_add_to_physmap_range(struct domain *d, - struct xen_add_to_physmap_range *xatpr) + struct xen_add_to_physmap_range *xatpr, + struct domain *fdom) { /* Process entries in reverse order to allow continuations */ while ( xatpr->size > 0 ) @@ -4693,7 +4767,7 @@ static int xenmem_add_to_physmap_range(struct domain *d, xatp.space = xatpr->space; xatp.idx = idx; xatp.gpfn = gpfn; - rc = xenmem_add_to_physmap_once(d, &xatp); + rc = xenmem_add_to_physmap_once(d, &xatp, fdom); if ( copy_to_guest_offset(xatpr->errs, xatpr->size-1, &rc, 1) ) return -EFAULT; @@ -4780,7 +4854,7 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg) } rc = xsm_add_to_physmap(XSM_TARGET, current->domain, d, fd); if ( rc == 0 ) - rc = xenmem_add_to_physmap_range(d, &xatpr); + rc = xenmem_add_to_physmap_range(d, &xatpr, fd); rcu_unlock_domain(d); if ( fd ) diff --git a/xen/common/memory.c b/xen/common/memory.c index 50b740f..d81df18 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -675,9 +675,11 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) case XENMEM_remove_from_physmap: { + unsigned long mfn; struct xen_remove_from_physmap xrfp; struct page_info *page; - struct domain *d; + struct domain *d, *foreign_dom = NULL; + p2m_type_t p2mt, tp; if ( copy_from_guest(&xrfp, arg, 1) ) return -EFAULT; @@ -693,11 +695,37 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) return rc; } - page = get_page_from_gfn(d, xrfp.gpfn, NULL, P2M_ALLOC); - if ( page ) + /* + * if PVH, the gfn could be mapped to a mfn from foreign domain by the + * user space tool during domain creation. We need to check for that, + * free it up from the p2m, and release refcnt on it. In such a case, + * page would be NULL and the following call would not have refcnt''d + * the page. See also xenmem_add_foreign_to_pmap(). + */ + page = get_page_from_gfn(d, xrfp.gpfn, &p2mt, P2M_ALLOC); + + if ( page || p2m_is_foreign(p2mt) ) { - guest_physmap_remove_page(d, xrfp.gpfn, page_to_mfn(page), 0); - put_page(page); + if ( page ) + mfn = page_to_mfn(page); + else + { + mfn = mfn_x(get_gfn_query(d, xrfp.gpfn, &tp)); + foreign_dom = page_get_owner(mfn_to_page(mfn)); + ASSERT(is_pvh_domain(d)); + ASSERT(d != foreign_dom); + ASSERT(p2m_is_foreign(tp)); + } + + guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0); + if (page) + put_page(page); + + if ( p2m_is_foreign(p2mt) ) + { + put_page(mfn_to_page(mfn)); + put_gfn(d, xrfp.gpfn); + } } else rc = -ENOENT; -- 1.7.2.3
Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com> --- xen/arch/x86/setup.c | 12 +++++++++--- 1 files changed, 9 insertions(+), 3 deletions(-) diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index e33c34b..4d15300 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -61,6 +61,10 @@ integer_param("maxcpus", max_cpus); static bool_t __initdata disable_smep; invbool_param("smep", disable_smep); +/* Boot dom0 in pvh mode */ +bool_t __initdata opt_dom0pvh; +boolean_param("dom0pvh", opt_dom0pvh); + /* **** Linux config option: propagated to domain0. */ /* "acpi=off": Sisables both ACPI table parsing and interpreter. */ /* "acpi=force": Override the disable blacklist. */ @@ -545,7 +549,7 @@ void __init __start_xen(unsigned long mbi_p) { char *memmap_type = NULL; char *cmdline, *kextra, *loader; - unsigned int initrdidx; + unsigned int initrdidx, domcr_flags = 0; multiboot_info_t *mbi = __va(mbi_p); module_t *mod = (module_t *)__va(mbi->mods_addr); unsigned long nr_pages, raw_max_page, modules_headroom, *module_map; @@ -1332,8 +1336,10 @@ void __init __start_xen(unsigned long mbi_p) if ( !tboot_protect_mem_regions() ) panic("Could not protect TXT memory regions\n"); - /* Create initial domain 0. */ - dom0 = domain_create(0, DOMCRF_s3_integrity, 0); + /* Create initial domain 0. */ + domcr_flags = (opt_dom0pvh ? DOMCRF_pvh | DOMCRF_hap : 0); + domcr_flags |= DOMCRF_s3_integrity; + dom0 = domain_create(0, domcr_flags, 0); if ( IS_ERR(dom0) || (alloc_dom0_vcpu0() == NULL) ) panic("Error creating domain 0\n"); -- 1.7.2.3
Julien Grall
2013-Dec-04 00:00 UTC
Re: [V4 PATCH 6/7] pvh dom0: Add and remove foreign pages
On 12/03/2013 02:30 AM, Mukesh Rathor wrote:> In this patch, a new function, xenmem_add_foreign_to_pmap(), is addedxenmem_add_foreign_to_p2m?> to map pages from foreign guest into current dom0 for domU creation. > Such pages are typed p2m_map_foreign. Also, support is added here to > XENMEM_remove_from_physmap to remove such pages. Note, in the remove > path, we must release the refcount that was taken during the map phase.Your remove path is very interesting for the ARM port. For now we are unable to unmap the foreign page because get_page() will always return NULL (as dom0 is not the owner). I will give a try on ARM to see if it could resolve our problem.> Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com> > --- > xen/arch/x86/mm.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++---- > xen/common/memory.c | 38 +++++++++++++++++++--- > 2 files changed, 114 insertions(+), 12 deletions(-)[..]> diff --git a/xen/common/memory.c b/xen/common/memory.c > index 50b740f..d81df18 100644 > --- a/xen/common/memory.c > +++ b/xen/common/memory.c > @@ -675,9 +675,11 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) > > case XENMEM_remove_from_physmap: > { > + unsigned long mfn; > struct xen_remove_from_physmap xrfp; > struct page_info *page; > - struct domain *d; > + struct domain *d, *foreign_dom = NULL; > + p2m_type_t p2mt, tp; > > if ( copy_from_guest(&xrfp, arg, 1) ) > return -EFAULT; > @@ -693,11 +695,37 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) > return rc; > } > > - page = get_page_from_gfn(d, xrfp.gpfn, NULL, P2M_ALLOC); > - if ( page ) > + /* > + * if PVH, the gfn could be mapped to a mfn from foreign domain by the > + * user space tool during domain creation. We need to check for that, > + * free it up from the p2m, and release refcnt on it. In such a case, > + * page would be NULL and the following call would not have refcnt''d > + * the page. See also xenmem_add_foreign_to_pmap().s/xenmem_add_foreign_pmap/xenmem_add_foreign_p2m/> + */ > + page = get_page_from_gfn(d, xrfp.gpfn, &p2mt, P2M_ALLOC); > + > + if ( page || p2m_is_foreign(p2mt) ) > {p2m_is_foreign doesn''t exist on ARM. I plan to introduce p2m type in the future, so I think you can define p2m_is_foreign as 0 for now on ARM.> - guest_physmap_remove_page(d, xrfp.gpfn, page_to_mfn(page), 0); > - put_page(page); > + if ( page ) > + mfn = page_to_mfn(page); > + else > + { > + mfn = mfn_x(get_gfn_query(d, xrfp.gpfn, &tp));get_gfn_query doesn''t exist on ARM.> + foreign_dom = page_get_owner(mfn_to_page(mfn)); > + ASSERT(is_pvh_domain(d));On ARM, the assert will always be wrong.> + ASSERT(d != foreign_dom); > + ASSERT(p2m_is_foreign(tp)); > + } > + > + guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0); > + if (page) > + put_page(page); > + > + if ( p2m_is_foreign(p2mt) ) > + { > + put_page(mfn_to_page(mfn)); > + put_gfn(d, xrfp.gpfn); > + } > } > else > rc = -ENOENT; >-- Julien Grall
Mukesh Rathor
2013-Dec-04 01:05 UTC
Re: [V4 PATCH 6/7] pvh dom0: Add and remove foreign pages
On Wed, 04 Dec 2013 00:00:54 +0000 Julien Grall <julien.grall@linaro.org> wrote:> > > On 12/03/2013 02:30 AM, Mukesh Rathor wrote: > > In this patch, a new function, xenmem_add_foreign_to_pmap(), is > > added > > xenmem_add_foreign_to_p2m? > > > to map pages from foreign guest into current dom0 for domU creation. > > Such pages are typed p2m_map_foreign. Also, support is added here to > > XENMEM_remove_from_physmap to remove such pages. Note, in the remove > > path, we must release the refcount that was taken during the map > > phase. > > Your remove path is very interesting for the ARM port. For now we are > unable to unmap the foreign page because get_page() will always > return NULL (as dom0 is not the owner). > > I will give a try on ARM to see if it could resolve our problem. >Don''t know much about ARM, is the remove path failing to compile on it? You can submit ARM modification patch after it''s checked in? thanks mukesh
Ian Campbell
2013-Dec-04 09:43 UTC
Re: [V4 PATCH 6/7] pvh dom0: Add and remove foreign pages
On Wed, 2013-12-04 at 00:00 +0000, Julien Grall wrote:> > On 12/03/2013 02:30 AM, Mukesh Rathor wrote: > > In this patch, a new function, xenmem_add_foreign_to_pmap(), is added > > xenmem_add_foreign_to_p2m? > > > to map pages from foreign guest into current dom0 for domU creation. > > Such pages are typed p2m_map_foreign. Also, support is added here to > > XENMEM_remove_from_physmap to remove such pages. Note, in the remove > > path, we must release the refcount that was taken during the map phase. > > Your remove path is very interesting for the ARM port. For now we are > unable to unmap the foreign page because get_page() will always return > NULL (as dom0 is not the owner). > > I will give a try on ARM to see if it could resolve our problem. > > > Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com> > > --- > > xen/arch/x86/mm.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++---- > > xen/common/memory.c | 38 +++++++++++++++++++--- > > 2 files changed, 114 insertions(+), 12 deletions(-) > > [..] > > > diff --git a/xen/common/memory.c b/xen/common/memory.c > > index 50b740f..d81df18 100644 > > --- a/xen/common/memory.c > > +++ b/xen/common/memory.c > > @@ -675,9 +675,11 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) > > > > case XENMEM_remove_from_physmap: > > { > > + unsigned long mfn; > > struct xen_remove_from_physmap xrfp; > > struct page_info *page; > > - struct domain *d; > > + struct domain *d, *foreign_dom = NULL; > > + p2m_type_t p2mt, tp; > > > > if ( copy_from_guest(&xrfp, arg, 1) ) > > return -EFAULT; > > @@ -693,11 +695,37 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) > > return rc; > > } > > > > - page = get_page_from_gfn(d, xrfp.gpfn, NULL, P2M_ALLOC); > > - if ( page ) > > + /* > > + * if PVH, the gfn could be mapped to a mfn from foreign domain by the > > + * user space tool during domain creation. We need to check for that, > > + * free it up from the p2m, and release refcnt on it. In such a case, > > + * page would be NULL and the following call would not have refcnt''d > > + * the page. See also xenmem_add_foreign_to_pmap(). > > s/xenmem_add_foreign_pmap/xenmem_add_foreign_p2m/ > > > + */ > > + page = get_page_from_gfn(d, xrfp.gpfn, &p2mt, P2M_ALLOC); > > + > > + if ( page || p2m_is_foreign(p2mt) ) > > { > > p2m_is_foreign doesn''t exist on ARM. I plan to introduce p2m type in the > future, so I think you can define p2m_is_foreign as 0 for now on ARM. > > > - guest_physmap_remove_page(d, xrfp.gpfn, page_to_mfn(page), 0); > > - put_page(page); > > + if ( page ) > > + mfn = page_to_mfn(page); > > + else > > + { > > + mfn = mfn_x(get_gfn_query(d, xrfp.gpfn, &tp)); > > get_gfn_query doesn''t exist on ARM. > > > + foreign_dom = page_get_owner(mfn_to_page(mfn)); > > + ASSERT(is_pvh_domain(d)); > > On ARM, the assert will always be wrong.Shouldn''t almost all of this logic be part of the per arch get_page_from_gfn?> > + ASSERT(d != foreign_dom); > > + ASSERT(p2m_is_foreign(tp)); > > + } > > + > > + guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0); > > + if (page) > > + put_page(page); > > + > > + if ( p2m_is_foreign(p2mt) ) > > + { > > + put_page(mfn_to_page(mfn)); > > + put_gfn(d, xrfp.gpfn); > > + } > > } > > else > > rc = -ENOENT; > > >
Ian Campbell
2013-Dec-04 09:44 UTC
Re: [V4 PATCH 6/7] pvh dom0: Add and remove foreign pages
On Tue, 2013-12-03 at 17:05 -0800, Mukesh Rathor wrote:> On Wed, 04 Dec 2013 00:00:54 +0000 > Julien Grall <julien.grall@linaro.org> wrote: > > > > > > > On 12/03/2013 02:30 AM, Mukesh Rathor wrote: > > > In this patch, a new function, xenmem_add_foreign_to_pmap(), is > > > added > > > > xenmem_add_foreign_to_p2m? > > > > > to map pages from foreign guest into current dom0 for domU creation. > > > Such pages are typed p2m_map_foreign. Also, support is added here to > > > XENMEM_remove_from_physmap to remove such pages. Note, in the remove > > > path, we must release the refcount that was taken during the map > > > phase. > > > > Your remove path is very interesting for the ARM port. For now we are > > unable to unmap the foreign page because get_page() will always > > return NULL (as dom0 is not the owner). > > > > I will give a try on ARM to see if it could resolve our problem. > > > > Don''t know much about ARM, is the remove path failing to compile on it? > You can submit ARM modification patch after it''s checked in?No, we don''t break things like this even transiently. You can find an ARM cross compiler at https://launchpad.net/linaro-toolchain-binaries/+download and cross building just the hypervisor part is pretty trivial. Please make sure the ARM side at least compiles, probably by defining the macros which you introduce to have fixed values which equate to the current behaviour. Ian.
Stefano Stabellini
2013-Dec-04 10:56 UTC
Re: [V4 PATCH 6/7] pvh dom0: Add and remove foreign pages
On Tue, 3 Dec 2013, Mukesh Rathor wrote:> On Wed, 04 Dec 2013 00:00:54 +0000 > Julien Grall <julien.grall@linaro.org> wrote: > > > > > > > On 12/03/2013 02:30 AM, Mukesh Rathor wrote: > > > In this patch, a new function, xenmem_add_foreign_to_pmap(), is > > > added > > > > xenmem_add_foreign_to_p2m? > > > > > to map pages from foreign guest into current dom0 for domU creation. > > > Such pages are typed p2m_map_foreign. Also, support is added here to > > > XENMEM_remove_from_physmap to remove such pages. Note, in the remove > > > path, we must release the refcount that was taken during the map > > > phase. > > > > Your remove path is very interesting for the ARM port. For now we are > > unable to unmap the foreign page because get_page() will always > > return NULL (as dom0 is not the owner). > > > > I will give a try on ARM to see if it could resolve our problem. > > > > Don''t know much about ARM, is the remove path failing to compile on it? > You can submit ARM modification patch after it''s checked in?This patch is against Xen common code, therefore if you introduce any x86-only functions, you are going to break the ARM build.
>>> On 03.12.13 at 03:30, Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > This patch changes construct_dom0 to boot in PVH mode. Changes > need to support it are also included here. > > Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>Reviewed-by: Jan Beulich <jbeulich@suse.com>> --- > xen/arch/x86/domain_build.c | 235 +++++++++++++++++++++++++++++++++++++++--- > xen/arch/x86/mm/hap/hap.c | 15 +++ > xen/include/asm-x86/hap.h | 1 + > 3 files changed, 234 insertions(+), 17 deletions(-) > > diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c > index 67a569a..eb00c0d 100644 > --- a/xen/arch/x86/domain_build.c > +++ b/xen/arch/x86/domain_build.c > @@ -35,6 +35,7 @@ > #include <asm/setup.h> > #include <asm/bzimage.h> /* for bzimage_parse */ > #include <asm/io_apic.h> > +#include <asm/hap.h> > > #include <public/version.h> > > @@ -307,6 +308,151 @@ static void __init process_dom0_ioports_disable(void) > } > } > > +static __init void pvh_add_mem_mapping(struct domain *d, unsigned long gfn, > + unsigned long mfn, unsigned long > nr_mfns) > +{ > + unsigned long i; > + for ( i = 0; i < nr_mfns; i++ ) > + if ( !set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i)) ) > + panic("Failed setting p2m. gfn:%lx mfn:%lx i:%ld\n", gfn, mfn, > i); > +} > + > +/* > + * Set the 1:1 map for all non-RAM regions for dom 0. Thus, dom0 will have > + * the entire io region mapped in the EPT/NPT. > + * > + * pvh fixme: The following doesn''t map MMIO ranges when they sit above the > + * highest E820 covered address. > + */ > +static __init void pvh_map_all_iomem(struct domain *d) > +{ > + unsigned long start_pfn, end_pfn, end = 0, start = 0; > + const struct e820entry *entry; > + unsigned int i, nump; > + > + for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ ) > + { > + end = entry->addr + entry->size; > + > + if ( entry->type == E820_RAM || entry->type == E820_UNUSABLE || > + i == e820.nr_map - 1 ) > + { > + start_pfn = PFN_DOWN(start); > + > + /* Unused RAM areas are marked UNUSABLE, so skip it too */ > + if ( entry->type == E820_RAM || entry->type == E820_UNUSABLE ) > + end_pfn = PFN_UP(entry->addr); > + else > + end_pfn = PFN_UP(end); > + > + if ( start_pfn < end_pfn ) > + { > + nump = end_pfn - start_pfn; > + /* Add pages to the mapping */ > + pvh_add_mem_mapping(d, start_pfn, start_pfn, nump); > + } > + start = end; > + } > + } > + > + /* If the e820 ended under 4GB, we must map the remaining space upto > 4GB */ > + if ( end < GB(4) ) > + { > + start_pfn = PFN_UP(end); > + end_pfn = (GB(4)) >> PAGE_SHIFT; > + nump = end_pfn - start_pfn; > + pvh_add_mem_mapping(d, start_pfn, start_pfn, nump); > + } > +} > + > +static __init void dom0_update_physmap(struct domain *d, unsigned long pfn, > + unsigned long mfn, unsigned long > vphysmap_s) > +{ > + if ( is_pvh_domain(d) ) > + { > + int rc = guest_physmap_add_page(d, pfn, mfn, 0); > + BUG_ON(rc); > + return; > + } > + if ( !is_pv_32on64_domain(d) ) > + ((unsigned long *)vphysmap_s)[pfn] = mfn; > + else > + ((unsigned int *)vphysmap_s)[pfn] = mfn; > + > + set_gpfn_from_mfn(mfn, pfn); > +} > + > +static __init void pvh_fixup_page_tables_for_hap(struct vcpu *v, > + unsigned long v_start, > + unsigned long v_end) > +{ > + int i, j, k; > + l4_pgentry_t *pl4e, *l4start; > + l3_pgentry_t *pl3e; > + l2_pgentry_t *pl2e; > + l1_pgentry_t *pl1e; > + unsigned long cr3_pfn; > + > + ASSERT(paging_mode_enabled(v->domain)); > + > + l4start = map_domain_page(pagetable_get_pfn(v->arch.guest_table)); > + > + /* Clear entries prior to guest L4 start */ > + pl4e = l4start + l4_table_offset(v_start); > + memset(l4start, 0, (unsigned long)pl4e - (unsigned long)l4start); > + > + for ( ; pl4e <= l4start + l4_table_offset(v_end - 1); pl4e++ ) > + { > + pl3e = map_l3t_from_l4e(*pl4e); > + for ( i = 0; i < PAGE_SIZE / sizeof(*pl3e); i++, pl3e++ ) > + { > + if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) > + continue; > + > + pl2e = map_l2t_from_l3e(*pl3e); > + for ( j = 0; j < PAGE_SIZE / sizeof(*pl2e); j++, pl2e++ ) > + { > + if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) > + continue; > + > + pl1e = map_l1t_from_l2e(*pl2e); > + for ( k = 0; k < PAGE_SIZE / sizeof(*pl1e); k++, pl1e++ ) > + { > + if ( !(l1e_get_flags(*pl1e) & _PAGE_PRESENT) ) > + continue; > + > + *pl1e = > l1e_from_pfn(get_gpfn_from_mfn(l1e_get_pfn(*pl1e)), > + l1e_get_flags(*pl1e)); > + } > + unmap_domain_page(pl1e); > + *pl2e = l2e_from_pfn(get_gpfn_from_mfn(l2e_get_pfn(*pl2e)), > + l2e_get_flags(*pl2e)); > + } > + unmap_domain_page(pl2e); > + *pl3e = l3e_from_pfn(get_gpfn_from_mfn(l3e_get_pfn(*pl3e)), > + l3e_get_flags(*pl3e)); > + } > + unmap_domain_page(pl3e); > + *pl4e = l4e_from_pfn(get_gpfn_from_mfn(l4e_get_pfn(*pl4e)), > + l4e_get_flags(*pl4e)); > + } > + > + /* Clear entries post guest L4. */ > + if ( (unsigned long)pl4e & (PAGE_SIZE - 1) ) > + memset(pl4e, 0, PAGE_SIZE - ((unsigned long)pl4e & (PAGE_SIZE - 1))); > + > + unmap_domain_page(l4start); > + > + cr3_pfn = get_gpfn_from_mfn(paddr_to_pfn(v->arch.cr3)); > + v->arch.hvm_vcpu.guest_cr[3] = pfn_to_paddr(cr3_pfn); > + > + /* > + * Finally, we update the paging modes (hap_update_paging_modes). This > will > + * create monitor_table for us, update v->arch.cr3, and update vmcs.cr3. > + */ > + paging_update_paging_modes(v); > +} > + > static __init void mark_pv_pt_pages_rdonly(struct domain *d, > l4_pgentry_t *l4start, > unsigned long vpt_start, > @@ -516,6 +662,8 @@ int __init construct_dom0( > l3_pgentry_t *l3tab = NULL, *l3start = NULL; > l2_pgentry_t *l2tab = NULL, *l2start = NULL; > l1_pgentry_t *l1tab = NULL, *l1start = NULL; > + paddr_t shared_info_paddr = 0; > + u32 save_pvh_pg_mode = 0; > > /* > * This fully describes the memory layout of the initial domain. All > @@ -593,12 +741,21 @@ int __init construct_dom0( > goto out; > } > > - if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != > XEN_ENT_NONE && > - !test_bit(XENFEAT_dom0, parms.f_supported) ) > + if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != > XEN_ENT_NONE ) > { > - printk("Kernel does not support Dom0 operation\n"); > - rc = -EINVAL; > - goto out; > + if ( !test_bit(XENFEAT_dom0, parms.f_supported) ) > + { > + printk("Kernel does not support Dom0 operation\n"); > + rc = -EINVAL; > + goto out; > + } > + if ( is_pvh_domain(d) && > + !test_bit(XENFEAT_hvm_callback_vector, parms.f_supported) ) > + { > + printk("Kernel does not support PVH mode\n"); > + rc = -EINVAL; > + goto out; > + } > } > > if ( compat32 ) > @@ -663,6 +820,13 @@ int __init construct_dom0( > vstartinfo_end = (vstartinfo_start + > sizeof(struct start_info) + > sizeof(struct dom0_vga_console_info)); > + > + if ( is_pvh_domain(d) ) > + { > + shared_info_paddr = round_pgup(vstartinfo_end) - v_start; > + vstartinfo_end += PAGE_SIZE; > + } > + > vpt_start = round_pgup(vstartinfo_end); > for ( nr_pt_pages = 2; ; nr_pt_pages++ ) > { > @@ -903,6 +1067,13 @@ int __init construct_dom0( > (void)alloc_vcpu(d, i, cpu); > } > > + /* > + * pvh: we temporarily disable paging mode so that we can build cr3 > needed > + * to run on dom0''s page tables. > + */ > + save_pvh_pg_mode = d->arch.paging.mode; > + d->arch.paging.mode = 0; > + > /* Set up CR3 value for write_ptbase */ > if ( paging_mode_enabled(d) ) > paging_update_paging_modes(v); > @@ -969,6 +1140,15 @@ int __init construct_dom0( > nr_pages); > } > > + if ( is_pvh_domain(d) ) > + hap_set_pvh_alloc_for_dom0(d, nr_pages); > + > + /* > + * We enable paging mode again so guest_physmap_add_page will do the > + * right thing for us. > + */ > + d->arch.paging.mode = save_pvh_pg_mode; > + > /* Write the phys->machine and machine->phys table entries. */ > for ( pfn = 0; pfn < count; pfn++ ) > { > @@ -985,11 +1165,7 @@ int __init construct_dom0( > if ( pfn > REVERSE_START && (vinitrd_start || pfn < initrd_pfn) ) > mfn = alloc_epfn - (pfn - REVERSE_START); > #endif > - if ( !is_pv_32on64_domain(d) ) > - ((unsigned long *)vphysmap_start)[pfn] = mfn; > - else > - ((unsigned int *)vphysmap_start)[pfn] = mfn; > - set_gpfn_from_mfn(mfn, pfn); > + dom0_update_physmap(d, pfn, mfn, vphysmap_start); > if (!(pfn & 0xfffff)) > process_pending_softirqs(); > } > @@ -1005,8 +1181,8 @@ int __init construct_dom0( > if ( !page->u.inuse.type_info && > !get_page_and_type(page, d, PGT_writable_page) ) > BUG(); > - ((unsigned long *)vphysmap_start)[pfn] = mfn; > - set_gpfn_from_mfn(mfn, pfn); > + > + dom0_update_physmap(d, pfn, mfn, vphysmap_start); > ++pfn; > if (!(pfn & 0xfffff)) > process_pending_softirqs(); > @@ -1026,11 +1202,7 @@ int __init construct_dom0( > #ifndef NDEBUG > #define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn))) > #endif > - if ( !is_pv_32on64_domain(d) ) > - ((unsigned long *)vphysmap_start)[pfn] = mfn; > - else > - ((unsigned int *)vphysmap_start)[pfn] = mfn; > - set_gpfn_from_mfn(mfn, pfn); > + dom0_update_physmap(d, pfn, mfn, vphysmap_start); > #undef pfn > page++; pfn++; > if (!(pfn & 0xfffff)) > @@ -1054,6 +1226,15 @@ int __init construct_dom0( > si->console.dom0.info_size = sizeof(struct dom0_vga_console_info); > } > > + /* > + * PVH: We need to update si->shared_info while we are on dom0 page > tables, > + * but need to defer the p2m update until after we have fixed up the > + * page tables for PVH so that the m2p for the si pte entry returns > + * correct pfn. > + */ > + if ( is_pvh_domain(d) ) > + si->shared_info = shared_info_paddr; > + > if ( is_pv_32on64_domain(d) ) > xlat_start_info(si, XLAT_start_info_console_dom0); > > @@ -1087,8 +1268,15 @@ int __init construct_dom0( > regs->eflags = X86_EFLAGS_IF; > > if ( opt_dom0_shadow ) > + { > + if ( is_pvh_domain(d) ) > + { > + printk("Unsupported option dom0_shadow for PVH\n"); > + return -EINVAL; > + } > if ( paging_enable(d, PG_SH_enable) == 0 ) > paging_update_paging_modes(v); > + } > > if ( supervisor_mode_kernel ) > { > @@ -1178,6 +1366,19 @@ int __init construct_dom0( > printk(" Xen warning: dom0 kernel broken ELF: %s\n", > elf_check_broken(&elf)); > > + if ( is_pvh_domain(d) ) > + { > + /* finally, fixup the page table, replacing mfns with pfns */ > + pvh_fixup_page_tables_for_hap(v, v_start, v_end); > + > + /* the pt has correct pfn for si, now update the mfn in the p2m */ > + mfn = virt_to_mfn(d->shared_info); > + pfn = shared_info_paddr >> PAGE_SHIFT; > + dom0_update_physmap(d, pfn, mfn, 0); > + > + pvh_map_all_iomem(d); > + } > + > iommu_dom0_init(dom0); > return 0; > > diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c > index d3f64bd..cc3ba66 100644 > --- a/xen/arch/x86/mm/hap/hap.c > +++ b/xen/arch/x86/mm/hap/hap.c > @@ -579,6 +579,21 @@ int hap_domctl(struct domain *d, xen_domctl_shadow_op_t > *sc, > } > } > > +void __init hap_set_pvh_alloc_for_dom0(struct domain *d, > + unsigned long num_pages) > +{ > + int rc; > + unsigned long memkb = num_pages * (PAGE_SIZE / 1024); > + > + /* Copied from: libxl_get_required_shadow_memory() */ > + memkb = 4 * (256 * d->max_vcpus + 2 * (memkb / 1024)); > + num_pages = ( (memkb + 1023) / 1024) << (20 - PAGE_SHIFT); > + paging_lock(d); > + rc = hap_set_allocation(d, num_pages, NULL); > + paging_unlock(d); > + BUG_ON(rc); > +} > + > static const struct paging_mode hap_paging_real_mode; > static const struct paging_mode hap_paging_protected_mode; > static const struct paging_mode hap_paging_pae_mode; > diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h > index e03f983..aab8558 100644 > --- a/xen/include/asm-x86/hap.h > +++ b/xen/include/asm-x86/hap.h > @@ -63,6 +63,7 @@ int hap_track_dirty_vram(struct domain *d, > XEN_GUEST_HANDLE_64(uint8) dirty_bitmap); > > extern const struct paging_mode *hap_paging_get_mode(struct vcpu *); > +void hap_set_pvh_alloc_for_dom0(struct domain *d, unsigned long num_pages); > > #endif /* XEN_HAP_H */ > > -- > 1.7.2.3
>>> On 03.12.13 at 03:30, Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > Hi, > > V4: > - Dropped V3 patch 2, and moved just the small needed code to > domain_build.c > - xsm related changes. > > These patches implement PVH dom0. > > Patches 1 and 2 implement changes in and around construct_dom0. > Patch 9 adds option to boot a dom0 in PVH mode. The rest support tool > stack on a pvh dom0. > > These patches are based on c/s: 0475025 > > These can also be found in public git tree at: > > git://oss.oracle.com/git/mrathor/xen.git branch: dom0pvh-v4 > > Thanks for all the help, > MukeshSo 1-3 and 8 are ready to be applied as far as I''m concerned. 4 needs Tim''s approval, 5 Daniel''s, and iirc 6 was reported broken for ARM. Applying 1-3 alone doesn''t make much sense though, so I''m not going to take any further action for the time being. Jan
Julien Grall
2013-Dec-04 17:00 UTC
Re: [V4 PATCH 6/7] pvh dom0: Add and remove foreign pages
On 12/04/2013 01:05 AM, Mukesh Rathor wrote:> On Wed, 04 Dec 2013 00:00:54 +0000 > Julien Grall <julien.grall@linaro.org> wrote: > >> >> >> On 12/03/2013 02:30 AM, Mukesh Rathor wrote: >>> In this patch, a new function, xenmem_add_foreign_to_pmap(), is >>> added >> >> xenmem_add_foreign_to_p2m? >> >>> to map pages from foreign guest into current dom0 for domU creation. >>> Such pages are typed p2m_map_foreign. Also, support is added here to >>> XENMEM_remove_from_physmap to remove such pages. Note, in the remove >>> path, we must release the refcount that was taken during the map >>> phase. >> >> Your remove path is very interesting for the ARM port. For now we are >> unable to unmap the foreign page because get_page() will always >> return NULL (as dom0 is not the owner). >> >> I will give a try on ARM to see if it could resolve our problem. >> > > Don''t know much about ARM, is the remove path failing to compile on it? > You can submit ARM modification patch after it''s checked in?So I have reworked common/memory.c part to be able to boot on ARM. What about this fix: diff --git a/xen/common/memory.c b/xen/common/memory.c index df36d43..8ceb78b 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -713,9 +713,11 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) case XENMEM_remove_from_physmap: { + unsigned long mfn; struct xen_remove_from_physmap xrfp; struct page_info *page; struct domain *d; + p2m_type_t p2mt; if ( copy_from_guest(&xrfp, arg, 1) ) return -EFAULT; @@ -731,11 +733,30 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) return rc; } - page = get_page_from_gfn(d, xrfp.gpfn, NULL, P2M_ALLOC); - if ( page ) + /* + * if PVH, the gfn could be mapped to a mfn from foreign domain by the + * user space tool during domain creation. We need to check for that, + * free it up from the p2m, and release refcnt on it. In such a case, + * page would be NULL and the following call would not have refcnt''d + * the page. See also xenmem_add_foreign_to_pmap(). + */ + page = get_page_from_gfn(d, xrfp.gpfn, &p2mt, P2M_ALLOC); + + if ( page || p2m_is_foreign(p2mt) ) { - guest_physmap_remove_page(d, xrfp.gpfn, page_to_mfn(page), 0); - put_page(page); + if ( page ) + mfn = page_to_mfn(page); + else + { + mfn = gmfn_to_mfn(d, xrfp.gpfn); + page = mfn_to_page(mfn); + + ASSERT(d != page_get_owner(page)); + } + + guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0); + if (page) + put_page(page); } else rc = -ENOENT; This change is assuming that: - p2m_is_foreign is defined on ARM (I have a patch for that) - gmfn_to_mfn is defined on x86 (I don''t have a patch, but the code should be trivial?) - ARM change in xenmem_add_to_physmap to take refcount on the foreign map I also have a bunch of intrusive ARM patch to handle correctly foreign mapping. Mukesh: This small fix should go to this patch: diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c index 177f2e5..fc5ab8f 100644 --- a/xen/arch/arm/mm.c +++ b/xen/arch/arm/mm.c @@ -1020,8 +1020,8 @@ static int xenmem_add_to_physmap_one( break; case XENMAPSPACE_gmfn_foreign: { - paddr_t maddr; struct domain *od; + struct page_info *page; od = rcu_lock_domain_by_any_id(foreign_domid); if ( od == NULL ) return -ESRCH; @@ -1033,15 +1033,17 @@ static int xenmem_add_to_physmap_one( return rc; } - maddr = p2m_lookup(od, pfn_to_paddr(idx)); - if ( maddr == INVALID_PADDR ) + /* Take refcount to the foreign domain page. + * Refcount will be release in XENMEM_remove_from_physmap */ + page = get_page_from_gfn(od, idx, NULL, P2M_ALLOC); + if ( !page ) { dump_p2m_lookup(od, pfn_to_paddr(idx)); rcu_unlock_domain(od); return -EINVAL; } - mfn = maddr >> PAGE_SHIFT; + mfn = page_to_mfn(page); t = p2m_map_foreign; rcu_unlock_domain(od); -- Julien Grall
Mukesh Rathor
2013-Dec-05 01:40 UTC
Re: [V4 PATCH 6/7] pvh dom0: Add and remove foreign pages
On Wed, 04 Dec 2013 17:00:57 +0000 Julien Grall <julien.grall@linaro.org> wrote:> On 12/04/2013 01:05 AM, Mukesh Rathor wrote: > > On Wed, 04 Dec 2013 00:00:54 +0000 > > Julien Grall <julien.grall@linaro.org> wrote: > >......> refcnt''d > + * the page. See also xenmem_add_foreign_to_pmap(). > + */ > + page = get_page_from_gfn(d, xrfp.gpfn, &p2mt, P2M_ALLOC); > + > + if ( page || p2m_is_foreign(p2mt) ) > { > - guest_physmap_remove_page(d, xrfp.gpfn, > page_to_mfn(page), 0); > - put_page(page); > + if ( page ) > + mfn = page_to_mfn(page); > + else > + { > + mfn = gmfn_to_mfn(d, xrfp.gpfn); > + page = mfn_to_page(mfn); > + > + ASSERT(d != page_get_owner(page)); > + } > + > + guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0); > + if (page) > + put_page(page);Why did you drop the following release of refcnt from here? + if ( p2m_is_foreign(p2mt) ) + { + put_page(mfn_to_page(mfn)); + put_gfn(d, xrfp.gpfn); + }> } > else > rc = -ENOENT; > > This change is assuming that: > - p2m_is_foreign is defined on ARM (I have a patch for that)I added #define p2m_is_foreign to 0 in my patch for now.> - gmfn_to_mfn is defined on x86 (I don''t have a patch, but the code > should be trivial?)I''d rather just ifdef CONFIG_X86 like guest_remove_page() does in the same file for gmfn_to_mfn. Also, lets not mix any partial arm changes with these x86 changes here. So, in this patch, I''ve added p2m_is_foreign set to 0, and changed the remove path to remain exactly same for arm. So, arm will continue to work the same way, and no build break. This way, you can add arm change incrementally and we can track things nicely. Please see the changes in the next version. thanks, Mukesh
Mukesh Rathor
2013-Dec-05 02:09 UTC
Re: [V4 PATCH 6/7] pvh dom0: Add and remove foreign pages
On Wed, 4 Dec 2013 09:43:26 +0000 Ian Campbell <Ian.Campbell@citrix.com> wrote:> On Wed, 2013-12-04 at 00:00 +0000, Julien Grall wrote: > > > > On 12/03/2013 02:30 AM, Mukesh Rathor wrote: > > > In this patch, a new function, xenmem_add_foreign_to_pmap(), is > > > added > > > > xenmem_add_foreign_to_p2m? > > > > > to map pages from foreign guest into current dom0 for domU > > > creation. Such pages are typed p2m_map_foreign. Also, support is > > > added here to XENMEM_remove_from_physmap to remove such pages. > > > Note, in the remove path, we must release the refcount that was > > > taken during the map phase. > > > > Your remove path is very interesting for the ARM port. For now we > > are unable to unmap the foreign page because get_page() will always > > return NULL (as dom0 is not the owner). > > > > I will give a try on ARM to see if it could resolve our problem. > > > > > Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com> > > > --- > > > xen/arch/x86/mm.c | 88 > > > ++++++++++++++++++++++++++++++++++++++++++++++---- > > > xen/common/memory.c | 38 +++++++++++++++++++--- 2 files > > > changed, 114 insertions(+), 12 deletions(-) > > > > [..] > > > > > diff --git a/xen/common/memory.c b/xen/common/memory.c > > > index 50b740f..d81df18 100644 > > > --- a/xen/common/memory.c > > > +++ b/xen/common/memory.c > > > @@ -675,9 +675,11 @@ long do_memory_op(unsigned long cmd, > > > XEN_GUEST_HANDLE_PARAM(void) arg) > > > > > > case XENMEM_remove_from_physmap: > > > { > > > + unsigned long mfn; > > > struct xen_remove_from_physmap xrfp; > > > struct page_info *page; > > > - struct domain *d; > > > + struct domain *d, *foreign_dom = NULL; > > > + p2m_type_t p2mt, tp; > > > > > > if ( copy_from_guest(&xrfp, arg, 1) ) > > > return -EFAULT; > > > @@ -693,11 +695,37 @@ long do_memory_op(unsigned long cmd, > > > XEN_GUEST_HANDLE_PARAM(void) arg) return rc; > > > } > > > > > > - page = get_page_from_gfn(d, xrfp.gpfn, NULL, P2M_ALLOC); > > > - if ( page ) > > > + /* > > > + * if PVH, the gfn could be mapped to a mfn from foreign > > > domain by the > > > + * user space tool during domain creation. We need to > > > check for that, > > > + * free it up from the p2m, and release refcnt on it. In > > > such a case, > > > + * page would be NULL and the following call would not > > > have refcnt''d > > > + * the page. See also xenmem_add_foreign_to_pmap(). > > > > s/xenmem_add_foreign_pmap/xenmem_add_foreign_p2m/ > > > > > + */ > > > + page = get_page_from_gfn(d, xrfp.gpfn, &p2mt, P2M_ALLOC); > > > + > > > + if ( page || p2m_is_foreign(p2mt) ) > > > { > > > > p2m_is_foreign doesn''t exist on ARM. I plan to introduce p2m type > > in the future, so I think you can define p2m_is_foreign as 0 for > > now on ARM. > > > > > - guest_physmap_remove_page(d, xrfp.gpfn, > > > page_to_mfn(page), 0); > > > - put_page(page); > > > + if ( page ) > > > + mfn = page_to_mfn(page); > > > + else > > > + { > > > + mfn = mfn_x(get_gfn_query(d, xrfp.gpfn, &tp)); > > > > get_gfn_query doesn''t exist on ARM. > > > > > + foreign_dom = page_get_owner(mfn_to_page(mfn)); > > > + ASSERT(is_pvh_domain(d)); > > > > On ARM, the assert will always be wrong. > > Shouldn''t almost all of this logic be part of the per arch > get_page_from_gfn?Not sure. We are just checking few asserts in the remove path here. get_page_from_gfn gets called from many non-relevant places.
Julien Grall
2013-Dec-05 12:31 UTC
Re: [V4 PATCH 6/7] pvh dom0: Add and remove foreign pages
On 12/05/2013 01:40 AM, Mukesh Rathor wrote:> On Wed, 04 Dec 2013 17:00:57 +0000 > Julien Grall <julien.grall@linaro.org> wrote: > >> On 12/04/2013 01:05 AM, Mukesh Rathor wrote: >>> On Wed, 04 Dec 2013 00:00:54 +0000 >>> Julien Grall <julien.grall@linaro.org> wrote: >>> > ...... >> refcnt''d >> + * the page. See also xenmem_add_foreign_to_pmap(). >> + */ >> + page = get_page_from_gfn(d, xrfp.gpfn, &p2mt, P2M_ALLOC); >> + >> + if ( page || p2m_is_foreign(p2mt) ) >> { >> - guest_physmap_remove_page(d, xrfp.gpfn, >> page_to_mfn(page), 0); >> - put_page(page); >> + if ( page ) >> + mfn = page_to_mfn(page); >> + else >> + { >> + mfn = gmfn_to_mfn(d, xrfp.gpfn); >> + page = mfn_to_page(mfn); >> + >> + ASSERT(d != page_get_owner(page)); >> + } >> + >> + guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0); >> + if (page) >> + put_page(page); > > > Why did you drop the following release of refcnt from here?When it''s the foreign page, I have assigned mfn_to_page() to the variable "page". It''s avoid duplication code.> > + if ( p2m_is_foreign(p2mt) ) > + { > + put_page(mfn_to_page(mfn)); > + put_gfn(d, xrfp.gpfn); > + } > > >> } >> else >> rc = -ENOENT; >> >> This change is assuming that: >> - p2m_is_foreign is defined on ARM (I have a patch for that) > > I added #define p2m_is_foreign to 0 in my patch for now. > >> - gmfn_to_mfn is defined on x86 (I don''t have a patch, but the code >> should be trivial?) > > I''d rather just ifdef CONFIG_X86 like guest_remove_page() does in the > same file for gmfn_to_mfn. > > Also, lets not mix any partial arm changes with these x86 changes here. > > So, in this patch, I''ve added p2m_is_foreign set to 0, and changed the > remove path to remain exactly same for arm. So, arm will continue to work > the same way, and no build break. This way, you can add arm change > incrementally and we can track things nicely.I''m fine with this solution as long as this patch series (or at least this patch) is applied for Xen 4.4. On ARM side it''s a bug fix that must be resolved for the next release. -- Julien Grall