The main goal being the re-work of the hypercall argument translation area management, this series first breaks out per-domain mapping management into its own set of functions, in order to then use this for setting up the translation areas in per-domain space. While doing this I also realized that it''s pointless for the map_domain_page() code to track L1 page table pointers in a separate Xen heap page - we can equally well use the linear page table for the manipulations needed here. 1: introduce create_perdomain_mapping() 2: rework hypercall argument translation area setup 3: use linear L1 page table for map_domain_page() page table manipulation Signed-off-by: Jan Beulich <jbeulich@suse.com>
Jan Beulich
2013-Feb-26 16:06 UTC
[PATCH v2 1/3] x86: introduce create_perdomain_mapping()
... as well as free_perdomain_mappings(), and use them to carry out the existing per-domain mapping setup/teardown. This at once makes the setup of the first sub-range PV domain specific (with idle domains also excluded), as the GDT/LDT mapping area is needed only for those. Also fix an improperly scaled BUILD_BUG_ON() expression in mapcache_domain_init(). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -372,37 +372,16 @@ int switch_compat(struct domain *d) int vcpu_initialise(struct vcpu *v) { struct domain *d = v->domain; - unsigned int idx; int rc; v->arch.flags = TF_kernel_mode; - idx = perdomain_pt_idx(v); - if ( !d->arch.perdomain_pts[idx] ) - { - void *pt; - l2_pgentry_t *l2tab; - - pt = alloc_xenheap_pages(0, MEMF_node(vcpu_to_node(v))); - if ( !pt ) - return -ENOMEM; - clear_page(pt); - d->arch.perdomain_pts[idx] = pt; - - l2tab = __map_domain_page(d->arch.perdomain_l2_pg[0]); - l2tab[l2_table_offset(PERDOMAIN_VIRT_START) + idx] - = l2e_from_paddr(__pa(pt), __PAGE_HYPERVISOR); - unmap_domain_page(l2tab); - } - rc = mapcache_vcpu_init(v); if ( rc ) return rc; paging_vcpu_init(v); - v->arch.perdomain_ptes = perdomain_ptes(d, v); - if ( (rc = vcpu_init_fpu(v)) != 0 ) return rc; @@ -420,6 +399,12 @@ int vcpu_initialise(struct vcpu *v) if ( !is_idle_domain(d) ) { + rc = create_perdomain_mapping(d, GDT_VIRT_START(v), + 1 << GDT_LDT_VCPU_SHIFT, + d->arch.pv_domain.gdt_ldt_l1tab, NULL); + if ( rc ) + goto done; + BUILD_BUG_ON(NR_VECTORS * sizeof(*v->arch.pv_vcpu.trap_ctxt) > PAGE_SIZE); v->arch.pv_vcpu.trap_ctxt = xzalloc_array(struct trap_info, @@ -478,8 +463,6 @@ void vcpu_destroy(struct vcpu *v) int arch_domain_create(struct domain *d, unsigned int domcr_flags) { - struct page_info *pg; - l3_pgentry_t *l3tab; int i, paging_initialised = 0; int rc = -ENOMEM; @@ -510,29 +493,24 @@ int arch_domain_create(struct domain *d, d->domain_id); } - BUILD_BUG_ON(PDPT_L2_ENTRIES * sizeof(*d->arch.perdomain_pts) - != PAGE_SIZE); - d->arch.perdomain_pts - alloc_xenheap_pages(0, MEMF_node(domain_to_node(d))); - if ( !d->arch.perdomain_pts ) - goto fail; - clear_page(d->arch.perdomain_pts); - - pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d))); - if ( pg == NULL ) - goto fail; - d->arch.perdomain_l2_pg[0] = pg; - clear_domain_page(page_to_mfn(pg)); + if ( is_hvm_domain(d) ) + rc = create_perdomain_mapping(d, PERDOMAIN_VIRT_START, 0, NULL, NULL); + else if ( is_idle_domain(d) ) + rc = 0; + else + { + d->arch.pv_domain.gdt_ldt_l1tab + alloc_xenheap_pages(0, MEMF_node(domain_to_node(d))); + if ( !d->arch.pv_domain.gdt_ldt_l1tab ) + goto fail; + clear_page(d->arch.pv_domain.gdt_ldt_l1tab); - pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d))); - if ( pg == NULL ) + rc = create_perdomain_mapping(d, GDT_LDT_VIRT_START, + GDT_LDT_MBYTES << (20 - PAGE_SHIFT), + NULL, NULL); + } + if ( rc ) goto fail; - d->arch.perdomain_l3_pg = pg; - l3tab = __map_domain_page(pg); - clear_page(l3tab); - l3tab[l3_table_offset(PERDOMAIN_VIRT_START)] - l3e_from_page(d->arch.perdomain_l2_pg[0], __PAGE_HYPERVISOR); - unmap_domain_page(l3tab); mapcache_domain_init(d); @@ -608,19 +586,14 @@ int arch_domain_create(struct domain *d, if ( paging_initialised ) paging_final_teardown(d); mapcache_domain_exit(d); - for ( i = 0; i < PERDOMAIN_SLOTS; ++i) - if ( d->arch.perdomain_l2_pg[i] ) - free_domheap_page(d->arch.perdomain_l2_pg[i]); - if ( d->arch.perdomain_l3_pg ) - free_domheap_page(d->arch.perdomain_l3_pg); - free_xenheap_page(d->arch.perdomain_pts); + free_perdomain_mappings(d); + if ( !is_hvm_domain(d) ) + free_xenheap_page(d->arch.pv_domain.gdt_ldt_l1tab); return rc; } void arch_domain_destroy(struct domain *d) { - unsigned int i; - if ( is_hvm_domain(d) ) hvm_domain_destroy(d); else @@ -634,13 +607,9 @@ void arch_domain_destroy(struct domain * mapcache_domain_exit(d); - for ( i = 0; i < PDPT_L2_ENTRIES; ++i ) - free_xenheap_page(d->arch.perdomain_pts[i]); - free_xenheap_page(d->arch.perdomain_pts); - for ( i = 0; i < PERDOMAIN_SLOTS; ++i) - if ( d->arch.perdomain_l2_pg[i] ) - free_domheap_page(d->arch.perdomain_l2_pg[i]); - free_domheap_page(d->arch.perdomain_l3_pg); + free_perdomain_mappings(d); + if ( !is_hvm_domain(d) ) + free_xenheap_page(d->arch.pv_domain.gdt_ldt_l1tab); free_xenheap_page(d->shared_info); cleanup_domain_irq_mapping(d); @@ -1515,10 +1484,11 @@ static void __context_switch(void) if ( need_full_gdt(n) ) { unsigned long mfn = virt_to_mfn(gdt); + l1_pgentry_t *pl1e = gdt_ldt_ptes(n->domain, n); unsigned int i; + for ( i = 0; i < NR_RESERVED_GDT_PAGES; i++ ) - l1e_write(n->arch.perdomain_ptes + - FIRST_RESERVED_GDT_PAGE + i, + l1e_write(pl1e + FIRST_RESERVED_GDT_PAGE + i, l1e_from_pfn(mfn + i, __PAGE_HYPERVISOR)); } --- a/xen/arch/x86/domain_page.c +++ b/xen/arch/x86/domain_page.c @@ -243,10 +243,7 @@ void copy_domain_page(unsigned long dmfn int mapcache_domain_init(struct domain *d) { struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache; - l3_pgentry_t *l3tab; - l2_pgentry_t *l2tab; - unsigned int i, bitmap_pages, memf = MEMF_node(domain_to_node(d)); - unsigned long *end; + unsigned int bitmap_pages; if ( is_hvm_domain(d) || is_idle_domain(d) ) return 0; @@ -256,48 +253,23 @@ int mapcache_domain_init(struct domain * return 0; #endif - dcache->l1tab = xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES + 1); - d->arch.perdomain_l2_pg[MAPCACHE_SLOT] = alloc_domheap_page(NULL, memf); - if ( !dcache->l1tab || !d->arch.perdomain_l2_pg[MAPCACHE_SLOT] ) + dcache->l1tab = xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES); + if ( !dcache->l1tab ) return -ENOMEM; - clear_domain_page(page_to_mfn(d->arch.perdomain_l2_pg[MAPCACHE_SLOT])); - l3tab = __map_domain_page(d->arch.perdomain_l3_pg); - l3tab[l3_table_offset(MAPCACHE_VIRT_START)] - l3e_from_page(d->arch.perdomain_l2_pg[MAPCACHE_SLOT], - __PAGE_HYPERVISOR); - unmap_domain_page(l3tab); - - l2tab = __map_domain_page(d->arch.perdomain_l2_pg[MAPCACHE_SLOT]); - - BUILD_BUG_ON(MAPCACHE_VIRT_END + 3 + - 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long)) > + BUILD_BUG_ON(MAPCACHE_VIRT_END + PAGE_SIZE * (3 + + 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long))) > MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20)); bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long)); dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE; dcache->garbage = dcache->inuse + (bitmap_pages + 1) * PAGE_SIZE / sizeof(long); - end = dcache->garbage + bitmap_pages * PAGE_SIZE / sizeof(long); - - for ( i = l2_table_offset((unsigned long)dcache->inuse); - i <= l2_table_offset((unsigned long)(end - 1)); ++i ) - { - ASSERT(i <= MAPCACHE_L2_ENTRIES); - dcache->l1tab[i] = alloc_xenheap_pages(0, memf); - if ( !dcache->l1tab[i] ) - { - unmap_domain_page(l2tab); - return -ENOMEM; - } - clear_page(dcache->l1tab[i]); - l2tab[i] = l2e_from_paddr(__pa(dcache->l1tab[i]), __PAGE_HYPERVISOR); - } - - unmap_domain_page(l2tab); spin_lock_init(&dcache->lock); - return 0; + return create_perdomain_mapping(d, (unsigned long)dcache->inuse, + 2 * bitmap_pages + 1, + NIL(l1_pgentry_t *), NULL); } void mapcache_domain_exit(struct domain *d) @@ -307,94 +279,41 @@ void mapcache_domain_exit(struct domain if ( is_hvm_domain(d) ) return; - if ( dcache->l1tab ) - { - unsigned long i; - - for ( i = (unsigned long)dcache->inuse; ; i += PAGE_SIZE ) - { - l1_pgentry_t *pl1e; - - if ( l2_table_offset(i) > MAPCACHE_L2_ENTRIES || - !dcache->l1tab[l2_table_offset(i)] ) - break; - - pl1e = &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)]; - if ( l1e_get_flags(*pl1e) ) - free_domheap_page(l1e_get_page(*pl1e)); - } - - for ( i = 0; i < MAPCACHE_L2_ENTRIES + 1; ++i ) - free_xenheap_page(dcache->l1tab[i]); - - xfree(dcache->l1tab); - } + xfree(dcache->l1tab); } int mapcache_vcpu_init(struct vcpu *v) { struct domain *d = v->domain; struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache; - l2_pgentry_t *l2tab; unsigned long i; - unsigned int memf = MEMF_node(vcpu_to_node(v)); + unsigned int ents = d->max_vcpus * MAPCACHE_VCPU_ENTRIES; + unsigned int nr = PFN_UP(BITS_TO_LONGS(ents) * sizeof(long)); if ( is_hvm_vcpu(v) || !dcache->l1tab ) return 0; - l2tab = __map_domain_page(d->arch.perdomain_l2_pg[MAPCACHE_SLOT]); - - while ( dcache->entries < d->max_vcpus * MAPCACHE_VCPU_ENTRIES ) + if ( ents > dcache->entries ) { - unsigned int ents = dcache->entries + MAPCACHE_VCPU_ENTRIES; - l1_pgentry_t *pl1e; - /* Populate page tables. */ - if ( !dcache->l1tab[i = mapcache_l2_entry(ents - 1)] ) - { - dcache->l1tab[i] = alloc_xenheap_pages(0, memf); - if ( !dcache->l1tab[i] ) - { - unmap_domain_page(l2tab); - return -ENOMEM; - } - clear_page(dcache->l1tab[i]); - l2tab[i] = l2e_from_paddr(__pa(dcache->l1tab[i]), - __PAGE_HYPERVISOR); - } + int rc = create_perdomain_mapping(d, MAPCACHE_VIRT_START, + d->max_vcpus * MAPCACHE_VCPU_ENTRIES, + dcache->l1tab, NULL); /* Populate bit maps. */ - i = (unsigned long)(dcache->inuse + BITS_TO_LONGS(ents)); - pl1e = &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)]; - if ( !l1e_get_flags(*pl1e) ) - { - struct page_info *pg = alloc_domheap_page(NULL, memf); + if ( !rc ) + rc = create_perdomain_mapping(d, (unsigned long)dcache->inuse, + nr, NULL, NIL(struct page_info *)); + if ( !rc ) + rc = create_perdomain_mapping(d, (unsigned long)dcache->garbage, + nr, NULL, NIL(struct page_info *)); - if ( pg ) - { - clear_domain_page(page_to_mfn(pg)); - *pl1e = l1e_from_page(pg, __PAGE_HYPERVISOR); - pg = alloc_domheap_page(NULL, memf); - } - if ( !pg ) - { - unmap_domain_page(l2tab); - return -ENOMEM; - } - - i = (unsigned long)(dcache->garbage + BITS_TO_LONGS(ents)); - pl1e = &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)]; - ASSERT(!l1e_get_flags(*pl1e)); - - clear_domain_page(page_to_mfn(pg)); - *pl1e = l1e_from_page(pg, __PAGE_HYPERVISOR); - } + if ( rc ) + return rc; dcache->entries = ents; } - unmap_domain_page(l2tab); - /* Mark all maphash entries as not in use. */ BUILD_BUG_ON(MAPHASHENT_NOTINUSE < MAPCACHE_ENTRIES); for ( i = 0; i < MAPHASH_ENTRIES; i++ ) --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -511,6 +511,7 @@ void update_cr3(struct vcpu *v) static void invalidate_shadow_ldt(struct vcpu *v, int flush) { + l1_pgentry_t *pl1e; int i; unsigned long pfn; struct page_info *page; @@ -523,12 +524,13 @@ static void invalidate_shadow_ldt(struct goto out; v->arch.pv_vcpu.shadow_ldt_mapcnt = 0; + pl1e = gdt_ldt_ptes(v->domain, v); for ( i = 16; i < 32; i++ ) { - pfn = l1e_get_pfn(v->arch.perdomain_ptes[i]); + pfn = l1e_get_pfn(pl1e[i]); if ( pfn == 0 ) continue; - l1e_write(&v->arch.perdomain_ptes[i], l1e_empty()); + l1e_write(&pl1e[i], l1e_empty()); page = mfn_to_page(pfn); ASSERT_PAGE_IS_TYPE(page, PGT_seg_desc_page); ASSERT_PAGE_IS_DOMAIN(page, v->domain); @@ -596,7 +598,7 @@ int map_ldt_shadow_page(unsigned int off nl1e = l1e_from_pfn(page_to_mfn(page), l1e_get_flags(l1e) | _PAGE_RW); spin_lock(&v->arch.pv_vcpu.shadow_ldt_lock); - l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e); + l1e_write(&gdt_ldt_ptes(d, v)[off + 16], nl1e); v->arch.pv_vcpu.shadow_ldt_mapcnt++; spin_unlock(&v->arch.pv_vcpu.shadow_ldt_lock); @@ -4073,15 +4075,17 @@ long do_update_va_mapping_otherdomain(un void destroy_gdt(struct vcpu *v) { + l1_pgentry_t *pl1e; int i; unsigned long pfn; v->arch.pv_vcpu.gdt_ents = 0; + pl1e = gdt_ldt_ptes(v->domain, v); for ( i = 0; i < FIRST_RESERVED_GDT_PAGE; i++ ) { - if ( (pfn = l1e_get_pfn(v->arch.perdomain_ptes[i])) != 0 ) + if ( (pfn = l1e_get_pfn(pl1e[i])) != 0 ) put_page_and_type(mfn_to_page(pfn)); - l1e_write(&v->arch.perdomain_ptes[i], l1e_empty()); + l1e_write(&pl1e[i], l1e_empty()); v->arch.pv_vcpu.gdt_frames[i] = 0; } } @@ -4092,6 +4096,7 @@ long set_gdt(struct vcpu *v, unsigned int entries) { struct domain *d = v->domain; + l1_pgentry_t *pl1e; /* NB. There are 512 8-byte entries per GDT page. */ int i, nr_pages = (entries + 511) / 512; unsigned long mfn, *pfns; @@ -4124,11 +4129,11 @@ long set_gdt(struct vcpu *v, /* Install the new GDT. */ v->arch.pv_vcpu.gdt_ents = entries; + pl1e = gdt_ldt_ptes(d, v); for ( i = 0; i < nr_pages; i++ ) { v->arch.pv_vcpu.gdt_frames[i] = frames[i]; - l1e_write(&v->arch.perdomain_ptes[i], - l1e_from_pfn(frames[i], __PAGE_HYPERVISOR)); + l1e_write(&pl1e[i], l1e_from_pfn(frames[i], __PAGE_HYPERVISOR)); } xfree(pfns); @@ -5528,6 +5533,175 @@ void __iomem *ioremap(paddr_t pa, size_t return (void __force __iomem *)va; } +int create_perdomain_mapping(struct domain *d, unsigned long va, + unsigned int nr, l1_pgentry_t **pl1tab, + struct page_info **ppg) +{ + struct page_info *pg; + l3_pgentry_t *l3tab; + l2_pgentry_t *l2tab; + l1_pgentry_t *l1tab; + unsigned int memf = MEMF_node(domain_to_node(d)); + int rc = 0; + + ASSERT(va >= PERDOMAIN_VIRT_START && + va < PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS)); + + if ( !d->arch.perdomain_l3_pg ) + { + pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d))); + if ( !pg ) + return -ENOMEM; + l3tab = __map_domain_page(pg); + clear_page(l3tab); + d->arch.perdomain_l3_pg = pg; + if ( !nr ) + { + unmap_domain_page(l3tab); + return 0; + } + } + else if ( !nr ) + return 0; + else + l3tab = __map_domain_page(d->arch.perdomain_l3_pg); + + ASSERT(!l3_table_offset(va ^ (va + nr * PAGE_SIZE - 1))); + + if ( !(l3e_get_flags(l3tab[l3_table_offset(va)]) & _PAGE_PRESENT) ) + { + pg = alloc_domheap_page(NULL, memf); + if ( !pg ) + { + unmap_domain_page(l3tab); + return -ENOMEM; + } + l2tab = __map_domain_page(pg); + clear_page(l2tab); + l3tab[l3_table_offset(va)] = l3e_from_page(pg, __PAGE_HYPERVISOR); + } + else + l2tab = map_domain_page(l3e_get_pfn(l3tab[l3_table_offset(va)])); + + unmap_domain_page(l3tab); + + if ( !pl1tab && !ppg ) + { + unmap_domain_page(l2tab); + return 0; + } + + for ( l1tab = NULL; !rc && nr--; ) + { + l2_pgentry_t *pl2e = l2tab + l2_table_offset(va); + + if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) + { + if ( pl1tab && !IS_NIL(pl1tab) ) + { + l1tab = alloc_xenheap_pages(0, memf); + if ( !l1tab ) + { + rc = -ENOMEM; + break; + } + ASSERT(!pl1tab[l2_table_offset(va)]); + pl1tab[l2_table_offset(va)] = l1tab; + pg = virt_to_page(l1tab); + } + else + { + pg = alloc_domheap_page(NULL, memf); + if ( !pg ) + { + rc = -ENOMEM; + break; + } + l1tab = __map_domain_page(pg); + } + clear_page(l1tab); + *pl2e = l2e_from_page(pg, __PAGE_HYPERVISOR); + } + else if ( !l1tab ) + l1tab = map_domain_page(l2e_get_pfn(*pl2e)); + + if ( ppg && + !(l1e_get_flags(l1tab[l1_table_offset(va)]) & _PAGE_PRESENT) ) + { + pg = alloc_domheap_page(NULL, memf); + if ( pg ) + { + clear_domain_page(page_to_mfn(pg)); + if ( !IS_NIL(ppg) ) + *ppg++ = pg; + l1tab[l1_table_offset(va)] + l1e_from_page(pg, __PAGE_HYPERVISOR | _PAGE_AVAIL0); + l2e_add_flags(*pl2e, _PAGE_AVAIL0); + } + else + rc = -ENOMEM; + } + + va += PAGE_SIZE; + if ( rc || !nr || !l1_table_offset(va) ) + { + /* Note that this is a no-op for the alloc_xenheap_page() case. */ + unmap_domain_page(l1tab); + l1tab = NULL; + } + } + + ASSERT(!l1tab); + unmap_domain_page(l2tab); + + return rc; +} + +void free_perdomain_mappings(struct domain *d) +{ + l3_pgentry_t *l3tab = __map_domain_page(d->arch.perdomain_l3_pg); + unsigned int i; + + for ( i = 0; i < PERDOMAIN_SLOTS; ++i) + if ( l3e_get_flags(l3tab[i]) & _PAGE_PRESENT ) + { + struct page_info *l2pg = l3e_get_page(l3tab[i]); + l2_pgentry_t *l2tab = __map_domain_page(l2pg); + unsigned int j; + + for ( j = 0; j < L2_PAGETABLE_ENTRIES; ++j ) + if ( l2e_get_flags(l2tab[j]) & _PAGE_PRESENT ) + { + struct page_info *l1pg = l2e_get_page(l2tab[j]); + + if ( l2e_get_flags(l2tab[j]) & _PAGE_AVAIL0 ) + { + l1_pgentry_t *l1tab = __map_domain_page(l1pg); + unsigned int k; + + for ( k = 0; k < L1_PAGETABLE_ENTRIES; ++k ) + if ( (l1e_get_flags(l1tab[k]) & + (_PAGE_PRESENT | _PAGE_AVAIL0)) =+ (_PAGE_PRESENT | _PAGE_AVAIL0) ) + free_domheap_page(l1e_get_page(l1tab[k])); + + unmap_domain_page(l1tab); + } + + if ( is_xen_heap_page(l1pg) ) + free_xenheap_page(page_to_virt(l1pg)); + else + free_domheap_page(l1pg); + } + + unmap_domain_page(l2tab); + free_domheap_page(l2pg); + } + + unmap_domain_page(l3tab); + free_domheap_page(d->arch.perdomain_l3_pg); +} + #ifdef MEMORY_GUARD void memguard_init(void) --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -304,19 +304,13 @@ extern unsigned long xen_phys_start; #define LDT_VIRT_START(v) \ (GDT_VIRT_START(v) + (64*1024)) -/* map_domain_page() map cache. The last per-domain-mapping sub-area. */ +/* map_domain_page() map cache. The second per-domain-mapping sub-area. */ #define MAPCACHE_VCPU_ENTRIES (CONFIG_PAGING_LEVELS * CONFIG_PAGING_LEVELS) #define MAPCACHE_ENTRIES (MAX_VIRT_CPUS * MAPCACHE_VCPU_ENTRIES) -#define MAPCACHE_SLOT (PERDOMAIN_SLOTS - 1) -#define MAPCACHE_VIRT_START PERDOMAIN_VIRT_SLOT(MAPCACHE_SLOT) +#define MAPCACHE_VIRT_START PERDOMAIN_VIRT_SLOT(1) #define MAPCACHE_VIRT_END (MAPCACHE_VIRT_START + \ MAPCACHE_ENTRIES * PAGE_SIZE) -#define PDPT_L1_ENTRIES \ - ((PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS - 1) - PERDOMAIN_VIRT_START) >> PAGE_SHIFT) -#define PDPT_L2_ENTRIES \ - ((PDPT_L1_ENTRIES + (1 << PAGETABLE_ORDER) - 1) >> PAGETABLE_ORDER) - #define ELFSIZE 64 #define ARCH_CRASH_SAVE_VMCOREINFO --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -223,6 +223,8 @@ struct time_scale { struct pv_domain { + l1_pgentry_t **gdt_ldt_l1tab; + /* Shared page for notifying that explicit PIRQ EOI is required. */ unsigned long *pirq_eoi_map; unsigned long pirq_eoi_map_mfn; @@ -241,8 +243,6 @@ struct pv_domain struct arch_domain { - void **perdomain_pts; - struct page_info *perdomain_l2_pg[PERDOMAIN_SLOTS]; struct page_info *perdomain_l3_pg; unsigned int hv_compat_vstart; @@ -318,10 +318,10 @@ struct arch_domain #define has_arch_pdevs(d) (!list_empty(&(d)->arch.pdev_list)) #define has_arch_mmios(d) (!rangeset_is_empty((d)->iomem_caps)) -#define perdomain_pt_idx(v) \ +#define gdt_ldt_pt_idx(v) \ ((v)->vcpu_id >> (PAGETABLE_ORDER - GDT_LDT_VCPU_SHIFT)) -#define perdomain_ptes(d, v) \ - ((l1_pgentry_t *)(d)->arch.perdomain_pts[perdomain_pt_idx(v)] + \ +#define gdt_ldt_ptes(d, v) \ + ((d)->arch.pv_domain.gdt_ldt_l1tab[gdt_ldt_pt_idx(v)] + \ (((v)->vcpu_id << GDT_LDT_VCPU_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))) struct pv_vcpu @@ -406,12 +406,6 @@ struct arch_vcpu struct hvm_vcpu hvm_vcpu; }; - /* - * Every domain has a L1 pagetable of its own. Per-domain mappings - * are put in this table (eg. the current GDT is mapped here). - */ - l1_pgentry_t *perdomain_ptes; - pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */ pagetable_t guest_table; /* (MFN) guest notion of cr3 */ /* guest_table holds a ref to the page, and also a type-count unless --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -573,6 +573,14 @@ int donate_page( int map_ldt_shadow_page(unsigned int); +#define NIL(type) ((type *)NULL - 1) +#define IS_NIL(ptr) (!((ptr) + 1)) + +int create_perdomain_mapping(struct domain *, unsigned long va, + unsigned int nr, l1_pgentry_t **, + struct page_info **); +void free_perdomain_mappings(struct domain *); + extern int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm); void domain_set_alloc_bitsize(struct domain *d); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Feb-26 16:06 UTC
[PATCH v2 2/3] x86: rework hypercall argument translation area setup
... using the new per-domain mapping management functions, adding destroy_perdomain_mapping() to the previously introduced pair. Rather than using an order-1 Xen heap allocation, use (currently 2) individual domain heap pages to populate space in the per-domain mapping area. Also fix a benign off-by-one mistake in is_compat_arg_xlat_range(). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -5657,6 +5657,59 @@ int create_perdomain_mapping(struct doma return rc; } +void destroy_perdomain_mapping(struct domain *d, unsigned long va, + unsigned int nr) +{ + const l3_pgentry_t *l3tab, *pl3e; + + ASSERT(va >= PERDOMAIN_VIRT_START && + va < PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS)); + ASSERT(!l3_table_offset(va ^ (va + nr * PAGE_SIZE - 1))); + + if ( !d->arch.perdomain_l3_pg ) + return; + + l3tab = __map_domain_page(d->arch.perdomain_l3_pg); + pl3e = l3tab + l3_table_offset(va); + + if ( l3e_get_flags(*pl3e) & _PAGE_PRESENT ) + { + const l2_pgentry_t *l2tab = map_domain_page(l3e_get_pfn(*pl3e)); + const l2_pgentry_t *pl2e = l2tab + l2_table_offset(va); + unsigned int i = l1_table_offset(va); + + while ( nr ) + { + if ( l2e_get_flags(*pl2e) & _PAGE_PRESENT ) + { + l1_pgentry_t *l1tab = map_domain_page(l2e_get_pfn(*pl2e)); + + for ( ; nr && i < L1_PAGETABLE_ENTRIES; --nr, ++i ) + { + if ( (l1e_get_flags(l1tab[i]) & + (_PAGE_PRESENT | _PAGE_AVAIL0)) =+ (_PAGE_PRESENT | _PAGE_AVAIL0) ) + free_domheap_page(l1e_get_page(l1tab[i])); + l1tab[i] = l1e_empty(); + } + + unmap_domain_page(l1tab); + } + else if ( nr + i < L1_PAGETABLE_ENTRIES ) + break; + else + nr -= L1_PAGETABLE_ENTRIES - i; + + ++pl2e; + i = 0; + } + + unmap_domain_page(l2tab); + } + + unmap_domain_page(l3tab); +} + void free_perdomain_mappings(struct domain *d) { l3_pgentry_t *l3tab = __map_domain_page(d->arch.perdomain_l3_pg); --- a/xen/arch/x86/usercopy.c +++ b/xen/arch/x86/usercopy.c @@ -6,8 +6,8 @@ * Copyright 2002 Andi Kleen <ak@suse.de> */ -#include <xen/config.h> #include <xen/lib.h> +#include <xen/sched.h> #include <asm/uaccess.h> unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned n) --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -832,27 +832,17 @@ void __init zap_low_mappings(void) __PAGE_HYPERVISOR); } -void *compat_arg_xlat_virt_base(void) -{ - return current->arch.compat_arg_xlat; -} - int setup_compat_arg_xlat(struct vcpu *v) { - unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE); - - v->arch.compat_arg_xlat = alloc_xenheap_pages(order, - MEMF_node(vcpu_to_node(v))); - - return v->arch.compat_arg_xlat ? 0 : -ENOMEM; + return create_perdomain_mapping(v->domain, ARG_XLAT_START(v), + PFN_UP(COMPAT_ARG_XLAT_SIZE), + NULL, NIL(struct page_info *)); } void free_compat_arg_xlat(struct vcpu *v) { - unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE); - - free_xenheap_pages(v->arch.compat_arg_xlat, order); - v->arch.compat_arg_xlat = NULL; + destroy_perdomain_mapping(v->domain, ARG_XLAT_START(v), + PFN_UP(COMPAT_ARG_XLAT_SIZE)); } void cleanup_frame_table(struct mem_hotadd_info *info) --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -212,7 +212,7 @@ extern unsigned char boot_edid_info[128] /* Slot 260: per-domain mappings (including map cache). */ #define PERDOMAIN_VIRT_START (PML4_ADDR(260)) #define PERDOMAIN_SLOT_MBYTES (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER)) -#define PERDOMAIN_SLOTS 2 +#define PERDOMAIN_SLOTS 3 #define PERDOMAIN_VIRT_SLOT(s) (PERDOMAIN_VIRT_START + (s) * \ (PERDOMAIN_SLOT_MBYTES << 20)) /* Slot 261: machine-to-phys conversion table (256GB). */ @@ -311,6 +311,13 @@ extern unsigned long xen_phys_start; #define MAPCACHE_VIRT_END (MAPCACHE_VIRT_START + \ MAPCACHE_ENTRIES * PAGE_SIZE) +/* Argument translation area. The third per-domain-mapping sub-area. */ +#define ARG_XLAT_VIRT_START PERDOMAIN_VIRT_SLOT(2) +/* Allow for at least one guard page (COMPAT_ARG_XLAT_SIZE being 2 pages): */ +#define ARG_XLAT_VA_SHIFT (2 + PAGE_SHIFT) +#define ARG_XLAT_START(v) \ + (ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT)) + #define ELFSIZE 64 #define ARCH_CRASH_SAVE_VMCOREINFO --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -442,9 +442,6 @@ struct arch_vcpu /* A secondary copy of the vcpu time info. */ XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest; - - void *compat_arg_xlat; - } __cacheline_aligned; /* Shorthands to improve code legibility. */ --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -579,6 +579,8 @@ int map_ldt_shadow_page(unsigned int); int create_perdomain_mapping(struct domain *, unsigned long va, unsigned int nr, l1_pgentry_t **, struct page_info **); +void destroy_perdomain_mapping(struct domain *, unsigned long va, + unsigned int nr); void free_perdomain_mappings(struct domain *); extern int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm); --- a/xen/include/asm-x86/x86_64/uaccess.h +++ b/xen/include/asm-x86/x86_64/uaccess.h @@ -1,16 +1,15 @@ #ifndef __X86_64_UACCESS_H #define __X86_64_UACCESS_H -#define COMPAT_ARG_XLAT_VIRT_BASE compat_arg_xlat_virt_base() +#define COMPAT_ARG_XLAT_VIRT_BASE ((void *)ARG_XLAT_START(current)) #define COMPAT_ARG_XLAT_SIZE (2*PAGE_SIZE) struct vcpu; -void *compat_arg_xlat_virt_base(void); int setup_compat_arg_xlat(struct vcpu *v); void free_compat_arg_xlat(struct vcpu *v); #define is_compat_arg_xlat_range(addr, size) ({ \ unsigned long __off; \ __off = (unsigned long)(addr) - (unsigned long)COMPAT_ARG_XLAT_VIRT_BASE; \ - (__off <= COMPAT_ARG_XLAT_SIZE) && \ + (__off < COMPAT_ARG_XLAT_SIZE) && \ ((__off + (unsigned long)(size)) <= COMPAT_ARG_XLAT_SIZE); \ }) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Feb-26 16:07 UTC
[PATCH v2 3/3] x86: use linear L1 page table for map_domain_page() page table manipulation
This saves allocation of a Xen heap page for tracking the L1 page table pointers. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -585,7 +585,6 @@ int arch_domain_create(struct domain *d, free_xenheap_page(d->shared_info); if ( paging_initialised ) paging_final_teardown(d); - mapcache_domain_exit(d); free_perdomain_mappings(d); if ( !is_hvm_domain(d) ) free_xenheap_page(d->arch.pv_domain.gdt_ldt_l1tab); @@ -605,8 +604,6 @@ void arch_domain_destroy(struct domain * paging_final_teardown(d); - mapcache_domain_exit(d); - free_perdomain_mappings(d); if ( !is_hvm_domain(d) ) free_xenheap_page(d->arch.pv_domain.gdt_ldt_l1tab); --- a/xen/arch/x86/domain_page.c +++ b/xen/arch/x86/domain_page.c @@ -53,9 +53,8 @@ void __init mapcache_override_current(st #define mapcache_l2_entry(e) ((e) >> PAGETABLE_ORDER) #define MAPCACHE_L2_ENTRIES (mapcache_l2_entry(MAPCACHE_ENTRIES - 1) + 1) -#define DCACHE_L1ENT(dc, idx) \ - ((dc)->l1tab[(idx) >> PAGETABLE_ORDER] \ - [(idx) & ((1 << PAGETABLE_ORDER) - 1)]) +#define MAPCACHE_L1ENT(idx) \ + __linear_l1_table[l1_linear_offset(MAPCACHE_VIRT_START + pfn_to_paddr(idx))] void *map_domain_page(unsigned long mfn) { @@ -77,7 +76,7 @@ void *map_domain_page(unsigned long mfn) dcache = &v->domain->arch.pv_domain.mapcache; vcache = &v->arch.pv_vcpu.mapcache; - if ( !dcache->l1tab ) + if ( !dcache->inuse ) return mfn_to_virt(mfn); perfc_incr(map_domain_page_count); @@ -91,7 +90,7 @@ void *map_domain_page(unsigned long mfn) ASSERT(idx < dcache->entries); hashent->refcnt++; ASSERT(hashent->refcnt); - ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, idx)) == mfn); + ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(idx)) == mfn); goto out; } @@ -131,9 +130,8 @@ void *map_domain_page(unsigned long mfn) if ( hashent->idx != MAPHASHENT_NOTINUSE && !hashent->refcnt ) { idx = hashent->idx; - ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, idx)) =- hashent->mfn); - l1e_write(&DCACHE_L1ENT(dcache, idx), l1e_empty()); + ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(idx)) == hashent->mfn); + l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty()); hashent->idx = MAPHASHENT_NOTINUSE; hashent->mfn = ~0UL; break; @@ -156,8 +154,7 @@ void *map_domain_page(unsigned long mfn) spin_unlock(&dcache->lock); - l1e_write(&DCACHE_L1ENT(dcache, idx), - l1e_from_pfn(mfn, __PAGE_HYPERVISOR)); + l1e_write(&MAPCACHE_L1ENT(idx), l1e_from_pfn(mfn, __PAGE_HYPERVISOR)); out: local_irq_restore(flags); @@ -181,10 +178,10 @@ void unmap_domain_page(const void *ptr) ASSERT(v && !is_hvm_vcpu(v)); dcache = &v->domain->arch.pv_domain.mapcache; - ASSERT(dcache->l1tab); + ASSERT(dcache->inuse); idx = PFN_DOWN(va - MAPCACHE_VIRT_START); - mfn = l1e_get_pfn(DCACHE_L1ENT(dcache, idx)); + mfn = l1e_get_pfn(MAPCACHE_L1ENT(idx)); hashent = &v->arch.pv_vcpu.mapcache.hash[MAPHASH_HASHFN(mfn)]; local_irq_save(flags); @@ -200,9 +197,9 @@ void unmap_domain_page(const void *ptr) if ( hashent->idx != MAPHASHENT_NOTINUSE ) { /* /First/, zap the PTE. */ - ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, hashent->idx)) =+ ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(hashent->idx)) = hashent->mfn); - l1e_write(&DCACHE_L1ENT(dcache, hashent->idx), l1e_empty()); + l1e_write(&MAPCACHE_L1ENT(hashent->idx), l1e_empty()); /* /Second/, mark as garbage. */ set_bit(hashent->idx, dcache->garbage); } @@ -214,7 +211,7 @@ void unmap_domain_page(const void *ptr) else { /* /First/, zap the PTE. */ - l1e_write(&DCACHE_L1ENT(dcache, idx), l1e_empty()); + l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty()); /* /Second/, mark as garbage. */ set_bit(idx, dcache->garbage); } @@ -253,10 +250,6 @@ int mapcache_domain_init(struct domain * return 0; #endif - dcache->l1tab = xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES); - if ( !dcache->l1tab ) - return -ENOMEM; - BUILD_BUG_ON(MAPCACHE_VIRT_END + PAGE_SIZE * (3 + 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long))) > MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20)); @@ -272,16 +265,6 @@ int mapcache_domain_init(struct domain * NIL(l1_pgentry_t *), NULL); } -void mapcache_domain_exit(struct domain *d) -{ - struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache; - - if ( is_hvm_domain(d) ) - return; - - xfree(dcache->l1tab); -} - int mapcache_vcpu_init(struct vcpu *v) { struct domain *d = v->domain; @@ -290,7 +273,7 @@ int mapcache_vcpu_init(struct vcpu *v) unsigned int ents = d->max_vcpus * MAPCACHE_VCPU_ENTRIES; unsigned int nr = PFN_UP(BITS_TO_LONGS(ents) * sizeof(long)); - if ( is_hvm_vcpu(v) || !dcache->l1tab ) + if ( is_hvm_vcpu(v) || !dcache->inuse ) return 0; if ( ents > dcache->entries ) @@ -298,7 +281,7 @@ int mapcache_vcpu_init(struct vcpu *v) /* Populate page tables. */ int rc = create_perdomain_mapping(d, MAPCACHE_VIRT_START, d->max_vcpus * MAPCACHE_VCPU_ENTRIES, - dcache->l1tab, NULL); + NIL(l1_pgentry_t *), NULL); /* Populate bit maps. */ if ( !rc ) --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -53,8 +53,7 @@ struct mapcache_vcpu { }; struct mapcache_domain { - /* The PTEs that provide the mappings, and a cursor into the array. */ - l1_pgentry_t **l1tab; + /* The number of array entries, and a cursor into the array. */ unsigned int entries; unsigned int cursor; @@ -71,7 +70,6 @@ struct mapcache_domain { }; int mapcache_domain_init(struct domain *); -void mapcache_domain_exit(struct domain *); int mapcache_vcpu_init(struct vcpu *); void mapcache_override_current(struct vcpu *); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
On 26/02/2013 15:56, "Jan Beulich" <JBeulich@suse.com> wrote:> The main goal being the re-work of the hypercall argument translation > area management, this series first breaks out per-domain mapping > management into its own set of functions, in order to then use this > for setting up the translation areas in per-domain space. > > While doing this I also realized that it''s pointless for the > map_domain_page() code to track L1 page table pointers in a > separate Xen heap page - we can equally well use the linear > page table for the manipulations needed here. > > 1: introduce create_perdomain_mapping() > 2: rework hypercall argument translation area setup > 3: use linear L1 page table for map_domain_page() page table manipulation > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Acked-by: Keir Fraser <keir@xen.org>> > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel