The main goal being the re-work of the hypercall argument translation area management, this series first breaks out per-domain mapping management into its own set of functions, in order to then use this for setting up the translation areas in per-domain space. While doing this I also realized that it''s pointless for the map_domain_page() code to track L1 page table pointers in a separate Xen heap page - we can equally well use the linear page table for the manipulations needed here. 1: introduce create_perdomain_mapping() 2: rework hypercall argument translation area setup 3: use linear L1 page table for map_domain_page() page table manipulation Signed-off-by: Jan Beulich <jbeulich@suse.com>
Jan Beulich
2013-Feb-26 16:06 UTC
[PATCH v2 1/3] x86: introduce create_perdomain_mapping()
... as well as free_perdomain_mappings(), and use them to carry out the
existing per-domain mapping setup/teardown. This at once makes the
setup of the first sub-range PV domain specific (with idle domains also
excluded), as the GDT/LDT mapping area is needed only for those.
Also fix an improperly scaled BUILD_BUG_ON() expression in
mapcache_domain_init().
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -372,37 +372,16 @@ int switch_compat(struct domain *d)
int vcpu_initialise(struct vcpu *v)
{
struct domain *d = v->domain;
- unsigned int idx;
int rc;
v->arch.flags = TF_kernel_mode;
- idx = perdomain_pt_idx(v);
- if ( !d->arch.perdomain_pts[idx] )
- {
- void *pt;
- l2_pgentry_t *l2tab;
-
- pt = alloc_xenheap_pages(0, MEMF_node(vcpu_to_node(v)));
- if ( !pt )
- return -ENOMEM;
- clear_page(pt);
- d->arch.perdomain_pts[idx] = pt;
-
- l2tab = __map_domain_page(d->arch.perdomain_l2_pg[0]);
- l2tab[l2_table_offset(PERDOMAIN_VIRT_START) + idx]
- = l2e_from_paddr(__pa(pt), __PAGE_HYPERVISOR);
- unmap_domain_page(l2tab);
- }
-
rc = mapcache_vcpu_init(v);
if ( rc )
return rc;
paging_vcpu_init(v);
- v->arch.perdomain_ptes = perdomain_ptes(d, v);
-
if ( (rc = vcpu_init_fpu(v)) != 0 )
return rc;
@@ -420,6 +399,12 @@ int vcpu_initialise(struct vcpu *v)
if ( !is_idle_domain(d) )
{
+ rc = create_perdomain_mapping(d, GDT_VIRT_START(v),
+ 1 << GDT_LDT_VCPU_SHIFT,
+ d->arch.pv_domain.gdt_ldt_l1tab,
NULL);
+ if ( rc )
+ goto done;
+
BUILD_BUG_ON(NR_VECTORS * sizeof(*v->arch.pv_vcpu.trap_ctxt) >
PAGE_SIZE);
v->arch.pv_vcpu.trap_ctxt = xzalloc_array(struct trap_info,
@@ -478,8 +463,6 @@ void vcpu_destroy(struct vcpu *v)
int arch_domain_create(struct domain *d, unsigned int domcr_flags)
{
- struct page_info *pg;
- l3_pgentry_t *l3tab;
int i, paging_initialised = 0;
int rc = -ENOMEM;
@@ -510,29 +493,24 @@ int arch_domain_create(struct domain *d,
d->domain_id);
}
- BUILD_BUG_ON(PDPT_L2_ENTRIES * sizeof(*d->arch.perdomain_pts)
- != PAGE_SIZE);
- d->arch.perdomain_pts - alloc_xenheap_pages(0,
MEMF_node(domain_to_node(d)));
- if ( !d->arch.perdomain_pts )
- goto fail;
- clear_page(d->arch.perdomain_pts);
-
- pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
- if ( pg == NULL )
- goto fail;
- d->arch.perdomain_l2_pg[0] = pg;
- clear_domain_page(page_to_mfn(pg));
+ if ( is_hvm_domain(d) )
+ rc = create_perdomain_mapping(d, PERDOMAIN_VIRT_START, 0, NULL, NULL);
+ else if ( is_idle_domain(d) )
+ rc = 0;
+ else
+ {
+ d->arch.pv_domain.gdt_ldt_l1tab + alloc_xenheap_pages(0,
MEMF_node(domain_to_node(d)));
+ if ( !d->arch.pv_domain.gdt_ldt_l1tab )
+ goto fail;
+ clear_page(d->arch.pv_domain.gdt_ldt_l1tab);
- pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
- if ( pg == NULL )
+ rc = create_perdomain_mapping(d, GDT_LDT_VIRT_START,
+ GDT_LDT_MBYTES << (20 -
PAGE_SHIFT),
+ NULL, NULL);
+ }
+ if ( rc )
goto fail;
- d->arch.perdomain_l3_pg = pg;
- l3tab = __map_domain_page(pg);
- clear_page(l3tab);
- l3tab[l3_table_offset(PERDOMAIN_VIRT_START)] -
l3e_from_page(d->arch.perdomain_l2_pg[0], __PAGE_HYPERVISOR);
- unmap_domain_page(l3tab);
mapcache_domain_init(d);
@@ -608,19 +586,14 @@ int arch_domain_create(struct domain *d,
if ( paging_initialised )
paging_final_teardown(d);
mapcache_domain_exit(d);
- for ( i = 0; i < PERDOMAIN_SLOTS; ++i)
- if ( d->arch.perdomain_l2_pg[i] )
- free_domheap_page(d->arch.perdomain_l2_pg[i]);
- if ( d->arch.perdomain_l3_pg )
- free_domheap_page(d->arch.perdomain_l3_pg);
- free_xenheap_page(d->arch.perdomain_pts);
+ free_perdomain_mappings(d);
+ if ( !is_hvm_domain(d) )
+ free_xenheap_page(d->arch.pv_domain.gdt_ldt_l1tab);
return rc;
}
void arch_domain_destroy(struct domain *d)
{
- unsigned int i;
-
if ( is_hvm_domain(d) )
hvm_domain_destroy(d);
else
@@ -634,13 +607,9 @@ void arch_domain_destroy(struct domain *
mapcache_domain_exit(d);
- for ( i = 0; i < PDPT_L2_ENTRIES; ++i )
- free_xenheap_page(d->arch.perdomain_pts[i]);
- free_xenheap_page(d->arch.perdomain_pts);
- for ( i = 0; i < PERDOMAIN_SLOTS; ++i)
- if ( d->arch.perdomain_l2_pg[i] )
- free_domheap_page(d->arch.perdomain_l2_pg[i]);
- free_domheap_page(d->arch.perdomain_l3_pg);
+ free_perdomain_mappings(d);
+ if ( !is_hvm_domain(d) )
+ free_xenheap_page(d->arch.pv_domain.gdt_ldt_l1tab);
free_xenheap_page(d->shared_info);
cleanup_domain_irq_mapping(d);
@@ -1515,10 +1484,11 @@ static void __context_switch(void)
if ( need_full_gdt(n) )
{
unsigned long mfn = virt_to_mfn(gdt);
+ l1_pgentry_t *pl1e = gdt_ldt_ptes(n->domain, n);
unsigned int i;
+
for ( i = 0; i < NR_RESERVED_GDT_PAGES; i++ )
- l1e_write(n->arch.perdomain_ptes +
- FIRST_RESERVED_GDT_PAGE + i,
+ l1e_write(pl1e + FIRST_RESERVED_GDT_PAGE + i,
l1e_from_pfn(mfn + i, __PAGE_HYPERVISOR));
}
--- a/xen/arch/x86/domain_page.c
+++ b/xen/arch/x86/domain_page.c
@@ -243,10 +243,7 @@ void copy_domain_page(unsigned long dmfn
int mapcache_domain_init(struct domain *d)
{
struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
- l3_pgentry_t *l3tab;
- l2_pgentry_t *l2tab;
- unsigned int i, bitmap_pages, memf = MEMF_node(domain_to_node(d));
- unsigned long *end;
+ unsigned int bitmap_pages;
if ( is_hvm_domain(d) || is_idle_domain(d) )
return 0;
@@ -256,48 +253,23 @@ int mapcache_domain_init(struct domain *
return 0;
#endif
- dcache->l1tab = xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES + 1);
- d->arch.perdomain_l2_pg[MAPCACHE_SLOT] = alloc_domheap_page(NULL, memf);
- if ( !dcache->l1tab || !d->arch.perdomain_l2_pg[MAPCACHE_SLOT] )
+ dcache->l1tab = xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES);
+ if ( !dcache->l1tab )
return -ENOMEM;
- clear_domain_page(page_to_mfn(d->arch.perdomain_l2_pg[MAPCACHE_SLOT]));
- l3tab = __map_domain_page(d->arch.perdomain_l3_pg);
- l3tab[l3_table_offset(MAPCACHE_VIRT_START)] -
l3e_from_page(d->arch.perdomain_l2_pg[MAPCACHE_SLOT],
- __PAGE_HYPERVISOR);
- unmap_domain_page(l3tab);
-
- l2tab = __map_domain_page(d->arch.perdomain_l2_pg[MAPCACHE_SLOT]);
-
- BUILD_BUG_ON(MAPCACHE_VIRT_END + 3 +
- 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long))
>
+ BUILD_BUG_ON(MAPCACHE_VIRT_END + PAGE_SIZE * (3 +
+ 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long)))
>
MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20));
bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
dcache->garbage = dcache->inuse +
(bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
- end = dcache->garbage + bitmap_pages * PAGE_SIZE / sizeof(long);
-
- for ( i = l2_table_offset((unsigned long)dcache->inuse);
- i <= l2_table_offset((unsigned long)(end - 1)); ++i )
- {
- ASSERT(i <= MAPCACHE_L2_ENTRIES);
- dcache->l1tab[i] = alloc_xenheap_pages(0, memf);
- if ( !dcache->l1tab[i] )
- {
- unmap_domain_page(l2tab);
- return -ENOMEM;
- }
- clear_page(dcache->l1tab[i]);
- l2tab[i] = l2e_from_paddr(__pa(dcache->l1tab[i]),
__PAGE_HYPERVISOR);
- }
-
- unmap_domain_page(l2tab);
spin_lock_init(&dcache->lock);
- return 0;
+ return create_perdomain_mapping(d, (unsigned long)dcache->inuse,
+ 2 * bitmap_pages + 1,
+ NIL(l1_pgentry_t *), NULL);
}
void mapcache_domain_exit(struct domain *d)
@@ -307,94 +279,41 @@ void mapcache_domain_exit(struct domain
if ( is_hvm_domain(d) )
return;
- if ( dcache->l1tab )
- {
- unsigned long i;
-
- for ( i = (unsigned long)dcache->inuse; ; i += PAGE_SIZE )
- {
- l1_pgentry_t *pl1e;
-
- if ( l2_table_offset(i) > MAPCACHE_L2_ENTRIES ||
- !dcache->l1tab[l2_table_offset(i)] )
- break;
-
- pl1e =
&dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)];
- if ( l1e_get_flags(*pl1e) )
- free_domheap_page(l1e_get_page(*pl1e));
- }
-
- for ( i = 0; i < MAPCACHE_L2_ENTRIES + 1; ++i )
- free_xenheap_page(dcache->l1tab[i]);
-
- xfree(dcache->l1tab);
- }
+ xfree(dcache->l1tab);
}
int mapcache_vcpu_init(struct vcpu *v)
{
struct domain *d = v->domain;
struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
- l2_pgentry_t *l2tab;
unsigned long i;
- unsigned int memf = MEMF_node(vcpu_to_node(v));
+ unsigned int ents = d->max_vcpus * MAPCACHE_VCPU_ENTRIES;
+ unsigned int nr = PFN_UP(BITS_TO_LONGS(ents) * sizeof(long));
if ( is_hvm_vcpu(v) || !dcache->l1tab )
return 0;
- l2tab = __map_domain_page(d->arch.perdomain_l2_pg[MAPCACHE_SLOT]);
-
- while ( dcache->entries < d->max_vcpus * MAPCACHE_VCPU_ENTRIES )
+ if ( ents > dcache->entries )
{
- unsigned int ents = dcache->entries + MAPCACHE_VCPU_ENTRIES;
- l1_pgentry_t *pl1e;
-
/* Populate page tables. */
- if ( !dcache->l1tab[i = mapcache_l2_entry(ents - 1)] )
- {
- dcache->l1tab[i] = alloc_xenheap_pages(0, memf);
- if ( !dcache->l1tab[i] )
- {
- unmap_domain_page(l2tab);
- return -ENOMEM;
- }
- clear_page(dcache->l1tab[i]);
- l2tab[i] = l2e_from_paddr(__pa(dcache->l1tab[i]),
- __PAGE_HYPERVISOR);
- }
+ int rc = create_perdomain_mapping(d, MAPCACHE_VIRT_START,
+ d->max_vcpus *
MAPCACHE_VCPU_ENTRIES,
+ dcache->l1tab, NULL);
/* Populate bit maps. */
- i = (unsigned long)(dcache->inuse + BITS_TO_LONGS(ents));
- pl1e = &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)];
- if ( !l1e_get_flags(*pl1e) )
- {
- struct page_info *pg = alloc_domheap_page(NULL, memf);
+ if ( !rc )
+ rc = create_perdomain_mapping(d, (unsigned long)dcache->inuse,
+ nr, NULL, NIL(struct page_info *));
+ if ( !rc )
+ rc = create_perdomain_mapping(d, (unsigned long)dcache->garbage,
+ nr, NULL, NIL(struct page_info *));
- if ( pg )
- {
- clear_domain_page(page_to_mfn(pg));
- *pl1e = l1e_from_page(pg, __PAGE_HYPERVISOR);
- pg = alloc_domheap_page(NULL, memf);
- }
- if ( !pg )
- {
- unmap_domain_page(l2tab);
- return -ENOMEM;
- }
-
- i = (unsigned long)(dcache->garbage + BITS_TO_LONGS(ents));
- pl1e =
&dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)];
- ASSERT(!l1e_get_flags(*pl1e));
-
- clear_domain_page(page_to_mfn(pg));
- *pl1e = l1e_from_page(pg, __PAGE_HYPERVISOR);
- }
+ if ( rc )
+ return rc;
dcache->entries = ents;
}
- unmap_domain_page(l2tab);
-
/* Mark all maphash entries as not in use. */
BUILD_BUG_ON(MAPHASHENT_NOTINUSE < MAPCACHE_ENTRIES);
for ( i = 0; i < MAPHASH_ENTRIES; i++ )
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -511,6 +511,7 @@ void update_cr3(struct vcpu *v)
static void invalidate_shadow_ldt(struct vcpu *v, int flush)
{
+ l1_pgentry_t *pl1e;
int i;
unsigned long pfn;
struct page_info *page;
@@ -523,12 +524,13 @@ static void invalidate_shadow_ldt(struct
goto out;
v->arch.pv_vcpu.shadow_ldt_mapcnt = 0;
+ pl1e = gdt_ldt_ptes(v->domain, v);
for ( i = 16; i < 32; i++ )
{
- pfn = l1e_get_pfn(v->arch.perdomain_ptes[i]);
+ pfn = l1e_get_pfn(pl1e[i]);
if ( pfn == 0 ) continue;
- l1e_write(&v->arch.perdomain_ptes[i], l1e_empty());
+ l1e_write(&pl1e[i], l1e_empty());
page = mfn_to_page(pfn);
ASSERT_PAGE_IS_TYPE(page, PGT_seg_desc_page);
ASSERT_PAGE_IS_DOMAIN(page, v->domain);
@@ -596,7 +598,7 @@ int map_ldt_shadow_page(unsigned int off
nl1e = l1e_from_pfn(page_to_mfn(page), l1e_get_flags(l1e) | _PAGE_RW);
spin_lock(&v->arch.pv_vcpu.shadow_ldt_lock);
- l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e);
+ l1e_write(&gdt_ldt_ptes(d, v)[off + 16], nl1e);
v->arch.pv_vcpu.shadow_ldt_mapcnt++;
spin_unlock(&v->arch.pv_vcpu.shadow_ldt_lock);
@@ -4073,15 +4075,17 @@ long do_update_va_mapping_otherdomain(un
void destroy_gdt(struct vcpu *v)
{
+ l1_pgentry_t *pl1e;
int i;
unsigned long pfn;
v->arch.pv_vcpu.gdt_ents = 0;
+ pl1e = gdt_ldt_ptes(v->domain, v);
for ( i = 0; i < FIRST_RESERVED_GDT_PAGE; i++ )
{
- if ( (pfn = l1e_get_pfn(v->arch.perdomain_ptes[i])) != 0 )
+ if ( (pfn = l1e_get_pfn(pl1e[i])) != 0 )
put_page_and_type(mfn_to_page(pfn));
- l1e_write(&v->arch.perdomain_ptes[i], l1e_empty());
+ l1e_write(&pl1e[i], l1e_empty());
v->arch.pv_vcpu.gdt_frames[i] = 0;
}
}
@@ -4092,6 +4096,7 @@ long set_gdt(struct vcpu *v,
unsigned int entries)
{
struct domain *d = v->domain;
+ l1_pgentry_t *pl1e;
/* NB. There are 512 8-byte entries per GDT page. */
int i, nr_pages = (entries + 511) / 512;
unsigned long mfn, *pfns;
@@ -4124,11 +4129,11 @@ long set_gdt(struct vcpu *v,
/* Install the new GDT. */
v->arch.pv_vcpu.gdt_ents = entries;
+ pl1e = gdt_ldt_ptes(d, v);
for ( i = 0; i < nr_pages; i++ )
{
v->arch.pv_vcpu.gdt_frames[i] = frames[i];
- l1e_write(&v->arch.perdomain_ptes[i],
- l1e_from_pfn(frames[i], __PAGE_HYPERVISOR));
+ l1e_write(&pl1e[i], l1e_from_pfn(frames[i], __PAGE_HYPERVISOR));
}
xfree(pfns);
@@ -5528,6 +5533,175 @@ void __iomem *ioremap(paddr_t pa, size_t
return (void __force __iomem *)va;
}
+int create_perdomain_mapping(struct domain *d, unsigned long va,
+ unsigned int nr, l1_pgentry_t **pl1tab,
+ struct page_info **ppg)
+{
+ struct page_info *pg;
+ l3_pgentry_t *l3tab;
+ l2_pgentry_t *l2tab;
+ l1_pgentry_t *l1tab;
+ unsigned int memf = MEMF_node(domain_to_node(d));
+ int rc = 0;
+
+ ASSERT(va >= PERDOMAIN_VIRT_START &&
+ va < PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS));
+
+ if ( !d->arch.perdomain_l3_pg )
+ {
+ pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
+ if ( !pg )
+ return -ENOMEM;
+ l3tab = __map_domain_page(pg);
+ clear_page(l3tab);
+ d->arch.perdomain_l3_pg = pg;
+ if ( !nr )
+ {
+ unmap_domain_page(l3tab);
+ return 0;
+ }
+ }
+ else if ( !nr )
+ return 0;
+ else
+ l3tab = __map_domain_page(d->arch.perdomain_l3_pg);
+
+ ASSERT(!l3_table_offset(va ^ (va + nr * PAGE_SIZE - 1)));
+
+ if ( !(l3e_get_flags(l3tab[l3_table_offset(va)]) & _PAGE_PRESENT) )
+ {
+ pg = alloc_domheap_page(NULL, memf);
+ if ( !pg )
+ {
+ unmap_domain_page(l3tab);
+ return -ENOMEM;
+ }
+ l2tab = __map_domain_page(pg);
+ clear_page(l2tab);
+ l3tab[l3_table_offset(va)] = l3e_from_page(pg, __PAGE_HYPERVISOR);
+ }
+ else
+ l2tab = map_domain_page(l3e_get_pfn(l3tab[l3_table_offset(va)]));
+
+ unmap_domain_page(l3tab);
+
+ if ( !pl1tab && !ppg )
+ {
+ unmap_domain_page(l2tab);
+ return 0;
+ }
+
+ for ( l1tab = NULL; !rc && nr--; )
+ {
+ l2_pgentry_t *pl2e = l2tab + l2_table_offset(va);
+
+ if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
+ {
+ if ( pl1tab && !IS_NIL(pl1tab) )
+ {
+ l1tab = alloc_xenheap_pages(0, memf);
+ if ( !l1tab )
+ {
+ rc = -ENOMEM;
+ break;
+ }
+ ASSERT(!pl1tab[l2_table_offset(va)]);
+ pl1tab[l2_table_offset(va)] = l1tab;
+ pg = virt_to_page(l1tab);
+ }
+ else
+ {
+ pg = alloc_domheap_page(NULL, memf);
+ if ( !pg )
+ {
+ rc = -ENOMEM;
+ break;
+ }
+ l1tab = __map_domain_page(pg);
+ }
+ clear_page(l1tab);
+ *pl2e = l2e_from_page(pg, __PAGE_HYPERVISOR);
+ }
+ else if ( !l1tab )
+ l1tab = map_domain_page(l2e_get_pfn(*pl2e));
+
+ if ( ppg &&
+ !(l1e_get_flags(l1tab[l1_table_offset(va)]) & _PAGE_PRESENT) )
+ {
+ pg = alloc_domheap_page(NULL, memf);
+ if ( pg )
+ {
+ clear_domain_page(page_to_mfn(pg));
+ if ( !IS_NIL(ppg) )
+ *ppg++ = pg;
+ l1tab[l1_table_offset(va)] +
l1e_from_page(pg, __PAGE_HYPERVISOR | _PAGE_AVAIL0);
+ l2e_add_flags(*pl2e, _PAGE_AVAIL0);
+ }
+ else
+ rc = -ENOMEM;
+ }
+
+ va += PAGE_SIZE;
+ if ( rc || !nr || !l1_table_offset(va) )
+ {
+ /* Note that this is a no-op for the alloc_xenheap_page() case. */
+ unmap_domain_page(l1tab);
+ l1tab = NULL;
+ }
+ }
+
+ ASSERT(!l1tab);
+ unmap_domain_page(l2tab);
+
+ return rc;
+}
+
+void free_perdomain_mappings(struct domain *d)
+{
+ l3_pgentry_t *l3tab = __map_domain_page(d->arch.perdomain_l3_pg);
+ unsigned int i;
+
+ for ( i = 0; i < PERDOMAIN_SLOTS; ++i)
+ if ( l3e_get_flags(l3tab[i]) & _PAGE_PRESENT )
+ {
+ struct page_info *l2pg = l3e_get_page(l3tab[i]);
+ l2_pgentry_t *l2tab = __map_domain_page(l2pg);
+ unsigned int j;
+
+ for ( j = 0; j < L2_PAGETABLE_ENTRIES; ++j )
+ if ( l2e_get_flags(l2tab[j]) & _PAGE_PRESENT )
+ {
+ struct page_info *l1pg = l2e_get_page(l2tab[j]);
+
+ if ( l2e_get_flags(l2tab[j]) & _PAGE_AVAIL0 )
+ {
+ l1_pgentry_t *l1tab = __map_domain_page(l1pg);
+ unsigned int k;
+
+ for ( k = 0; k < L1_PAGETABLE_ENTRIES; ++k )
+ if ( (l1e_get_flags(l1tab[k]) &
+ (_PAGE_PRESENT | _PAGE_AVAIL0)) =+
(_PAGE_PRESENT | _PAGE_AVAIL0) )
+ free_domheap_page(l1e_get_page(l1tab[k]));
+
+ unmap_domain_page(l1tab);
+ }
+
+ if ( is_xen_heap_page(l1pg) )
+ free_xenheap_page(page_to_virt(l1pg));
+ else
+ free_domheap_page(l1pg);
+ }
+
+ unmap_domain_page(l2tab);
+ free_domheap_page(l2pg);
+ }
+
+ unmap_domain_page(l3tab);
+ free_domheap_page(d->arch.perdomain_l3_pg);
+}
+
#ifdef MEMORY_GUARD
void memguard_init(void)
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -304,19 +304,13 @@ extern unsigned long xen_phys_start;
#define LDT_VIRT_START(v) \
(GDT_VIRT_START(v) + (64*1024))
-/* map_domain_page() map cache. The last per-domain-mapping sub-area. */
+/* map_domain_page() map cache. The second per-domain-mapping sub-area. */
#define MAPCACHE_VCPU_ENTRIES (CONFIG_PAGING_LEVELS * CONFIG_PAGING_LEVELS)
#define MAPCACHE_ENTRIES (MAX_VIRT_CPUS * MAPCACHE_VCPU_ENTRIES)
-#define MAPCACHE_SLOT (PERDOMAIN_SLOTS - 1)
-#define MAPCACHE_VIRT_START PERDOMAIN_VIRT_SLOT(MAPCACHE_SLOT)
+#define MAPCACHE_VIRT_START PERDOMAIN_VIRT_SLOT(1)
#define MAPCACHE_VIRT_END (MAPCACHE_VIRT_START + \
MAPCACHE_ENTRIES * PAGE_SIZE)
-#define PDPT_L1_ENTRIES \
- ((PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS - 1) - PERDOMAIN_VIRT_START) >>
PAGE_SHIFT)
-#define PDPT_L2_ENTRIES \
- ((PDPT_L1_ENTRIES + (1 << PAGETABLE_ORDER) - 1) >>
PAGETABLE_ORDER)
-
#define ELFSIZE 64
#define ARCH_CRASH_SAVE_VMCOREINFO
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -223,6 +223,8 @@ struct time_scale {
struct pv_domain
{
+ l1_pgentry_t **gdt_ldt_l1tab;
+
/* Shared page for notifying that explicit PIRQ EOI is required. */
unsigned long *pirq_eoi_map;
unsigned long pirq_eoi_map_mfn;
@@ -241,8 +243,6 @@ struct pv_domain
struct arch_domain
{
- void **perdomain_pts;
- struct page_info *perdomain_l2_pg[PERDOMAIN_SLOTS];
struct page_info *perdomain_l3_pg;
unsigned int hv_compat_vstart;
@@ -318,10 +318,10 @@ struct arch_domain
#define has_arch_pdevs(d) (!list_empty(&(d)->arch.pdev_list))
#define has_arch_mmios(d) (!rangeset_is_empty((d)->iomem_caps))
-#define perdomain_pt_idx(v) \
+#define gdt_ldt_pt_idx(v) \
((v)->vcpu_id >> (PAGETABLE_ORDER - GDT_LDT_VCPU_SHIFT))
-#define perdomain_ptes(d, v) \
- ((l1_pgentry_t *)(d)->arch.perdomain_pts[perdomain_pt_idx(v)] + \
+#define gdt_ldt_ptes(d, v) \
+ ((d)->arch.pv_domain.gdt_ldt_l1tab[gdt_ldt_pt_idx(v)] + \
(((v)->vcpu_id << GDT_LDT_VCPU_SHIFT) & (L1_PAGETABLE_ENTRIES
- 1)))
struct pv_vcpu
@@ -406,12 +406,6 @@ struct arch_vcpu
struct hvm_vcpu hvm_vcpu;
};
- /*
- * Every domain has a L1 pagetable of its own. Per-domain mappings
- * are put in this table (eg. the current GDT is mapped here).
- */
- l1_pgentry_t *perdomain_ptes;
-
pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */
pagetable_t guest_table; /* (MFN) guest notion of cr3 */
/* guest_table holds a ref to the page, and also a type-count unless
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -573,6 +573,14 @@ int donate_page(
int map_ldt_shadow_page(unsigned int);
+#define NIL(type) ((type *)NULL - 1)
+#define IS_NIL(ptr) (!((ptr) + 1))
+
+int create_perdomain_mapping(struct domain *, unsigned long va,
+ unsigned int nr, l1_pgentry_t **,
+ struct page_info **);
+void free_perdomain_mappings(struct domain *);
+
extern int memory_add(unsigned long spfn, unsigned long epfn, unsigned int
pxm);
void domain_set_alloc_bitsize(struct domain *d);
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Jan Beulich
2013-Feb-26 16:06 UTC
[PATCH v2 2/3] x86: rework hypercall argument translation area setup
... using the new per-domain mapping management functions, adding
destroy_perdomain_mapping() to the previously introduced pair.
Rather than using an order-1 Xen heap allocation, use (currently 2)
individual domain heap pages to populate space in the per-domain
mapping area.
Also fix a benign off-by-one mistake in is_compat_arg_xlat_range().
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -5657,6 +5657,59 @@ int create_perdomain_mapping(struct doma
return rc;
}
+void destroy_perdomain_mapping(struct domain *d, unsigned long va,
+ unsigned int nr)
+{
+ const l3_pgentry_t *l3tab, *pl3e;
+
+ ASSERT(va >= PERDOMAIN_VIRT_START &&
+ va < PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS));
+ ASSERT(!l3_table_offset(va ^ (va + nr * PAGE_SIZE - 1)));
+
+ if ( !d->arch.perdomain_l3_pg )
+ return;
+
+ l3tab = __map_domain_page(d->arch.perdomain_l3_pg);
+ pl3e = l3tab + l3_table_offset(va);
+
+ if ( l3e_get_flags(*pl3e) & _PAGE_PRESENT )
+ {
+ const l2_pgentry_t *l2tab = map_domain_page(l3e_get_pfn(*pl3e));
+ const l2_pgentry_t *pl2e = l2tab + l2_table_offset(va);
+ unsigned int i = l1_table_offset(va);
+
+ while ( nr )
+ {
+ if ( l2e_get_flags(*pl2e) & _PAGE_PRESENT )
+ {
+ l1_pgentry_t *l1tab = map_domain_page(l2e_get_pfn(*pl2e));
+
+ for ( ; nr && i < L1_PAGETABLE_ENTRIES; --nr, ++i )
+ {
+ if ( (l1e_get_flags(l1tab[i]) &
+ (_PAGE_PRESENT | _PAGE_AVAIL0)) =+
(_PAGE_PRESENT | _PAGE_AVAIL0) )
+ free_domheap_page(l1e_get_page(l1tab[i]));
+ l1tab[i] = l1e_empty();
+ }
+
+ unmap_domain_page(l1tab);
+ }
+ else if ( nr + i < L1_PAGETABLE_ENTRIES )
+ break;
+ else
+ nr -= L1_PAGETABLE_ENTRIES - i;
+
+ ++pl2e;
+ i = 0;
+ }
+
+ unmap_domain_page(l2tab);
+ }
+
+ unmap_domain_page(l3tab);
+}
+
void free_perdomain_mappings(struct domain *d)
{
l3_pgentry_t *l3tab = __map_domain_page(d->arch.perdomain_l3_pg);
--- a/xen/arch/x86/usercopy.c
+++ b/xen/arch/x86/usercopy.c
@@ -6,8 +6,8 @@
* Copyright 2002 Andi Kleen <ak@suse.de>
*/
-#include <xen/config.h>
#include <xen/lib.h>
+#include <xen/sched.h>
#include <asm/uaccess.h>
unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned n)
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -832,27 +832,17 @@ void __init zap_low_mappings(void)
__PAGE_HYPERVISOR);
}
-void *compat_arg_xlat_virt_base(void)
-{
- return current->arch.compat_arg_xlat;
-}
-
int setup_compat_arg_xlat(struct vcpu *v)
{
- unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE);
-
- v->arch.compat_arg_xlat = alloc_xenheap_pages(order,
- MEMF_node(vcpu_to_node(v)));
-
- return v->arch.compat_arg_xlat ? 0 : -ENOMEM;
+ return create_perdomain_mapping(v->domain, ARG_XLAT_START(v),
+ PFN_UP(COMPAT_ARG_XLAT_SIZE),
+ NULL, NIL(struct page_info *));
}
void free_compat_arg_xlat(struct vcpu *v)
{
- unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE);
-
- free_xenheap_pages(v->arch.compat_arg_xlat, order);
- v->arch.compat_arg_xlat = NULL;
+ destroy_perdomain_mapping(v->domain, ARG_XLAT_START(v),
+ PFN_UP(COMPAT_ARG_XLAT_SIZE));
}
void cleanup_frame_table(struct mem_hotadd_info *info)
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -212,7 +212,7 @@ extern unsigned char boot_edid_info[128]
/* Slot 260: per-domain mappings (including map cache). */
#define PERDOMAIN_VIRT_START (PML4_ADDR(260))
#define PERDOMAIN_SLOT_MBYTES (PML4_ENTRY_BYTES >> (20 +
PAGETABLE_ORDER))
-#define PERDOMAIN_SLOTS 2
+#define PERDOMAIN_SLOTS 3
#define PERDOMAIN_VIRT_SLOT(s) (PERDOMAIN_VIRT_START + (s) * \
(PERDOMAIN_SLOT_MBYTES << 20))
/* Slot 261: machine-to-phys conversion table (256GB). */
@@ -311,6 +311,13 @@ extern unsigned long xen_phys_start;
#define MAPCACHE_VIRT_END (MAPCACHE_VIRT_START + \
MAPCACHE_ENTRIES * PAGE_SIZE)
+/* Argument translation area. The third per-domain-mapping sub-area. */
+#define ARG_XLAT_VIRT_START PERDOMAIN_VIRT_SLOT(2)
+/* Allow for at least one guard page (COMPAT_ARG_XLAT_SIZE being 2 pages): */
+#define ARG_XLAT_VA_SHIFT (2 + PAGE_SHIFT)
+#define ARG_XLAT_START(v) \
+ (ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT))
+
#define ELFSIZE 64
#define ARCH_CRASH_SAVE_VMCOREINFO
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -442,9 +442,6 @@ struct arch_vcpu
/* A secondary copy of the vcpu time info. */
XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;
-
- void *compat_arg_xlat;
-
} __cacheline_aligned;
/* Shorthands to improve code legibility. */
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -579,6 +579,8 @@ int map_ldt_shadow_page(unsigned int);
int create_perdomain_mapping(struct domain *, unsigned long va,
unsigned int nr, l1_pgentry_t **,
struct page_info **);
+void destroy_perdomain_mapping(struct domain *, unsigned long va,
+ unsigned int nr);
void free_perdomain_mappings(struct domain *);
extern int memory_add(unsigned long spfn, unsigned long epfn, unsigned int
pxm);
--- a/xen/include/asm-x86/x86_64/uaccess.h
+++ b/xen/include/asm-x86/x86_64/uaccess.h
@@ -1,16 +1,15 @@
#ifndef __X86_64_UACCESS_H
#define __X86_64_UACCESS_H
-#define COMPAT_ARG_XLAT_VIRT_BASE compat_arg_xlat_virt_base()
+#define COMPAT_ARG_XLAT_VIRT_BASE ((void *)ARG_XLAT_START(current))
#define COMPAT_ARG_XLAT_SIZE (2*PAGE_SIZE)
struct vcpu;
-void *compat_arg_xlat_virt_base(void);
int setup_compat_arg_xlat(struct vcpu *v);
void free_compat_arg_xlat(struct vcpu *v);
#define is_compat_arg_xlat_range(addr, size) ({ \
unsigned long __off; \
__off = (unsigned long)(addr) - (unsigned long)COMPAT_ARG_XLAT_VIRT_BASE; \
- (__off <= COMPAT_ARG_XLAT_SIZE) &&
\
+ (__off < COMPAT_ARG_XLAT_SIZE) &&
\
((__off + (unsigned long)(size)) <= COMPAT_ARG_XLAT_SIZE);
\
})
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Jan Beulich
2013-Feb-26 16:07 UTC
[PATCH v2 3/3] x86: use linear L1 page table for map_domain_page() page table manipulation
This saves allocation of a Xen heap page for tracking the L1 page table
pointers.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -585,7 +585,6 @@ int arch_domain_create(struct domain *d,
free_xenheap_page(d->shared_info);
if ( paging_initialised )
paging_final_teardown(d);
- mapcache_domain_exit(d);
free_perdomain_mappings(d);
if ( !is_hvm_domain(d) )
free_xenheap_page(d->arch.pv_domain.gdt_ldt_l1tab);
@@ -605,8 +604,6 @@ void arch_domain_destroy(struct domain *
paging_final_teardown(d);
- mapcache_domain_exit(d);
-
free_perdomain_mappings(d);
if ( !is_hvm_domain(d) )
free_xenheap_page(d->arch.pv_domain.gdt_ldt_l1tab);
--- a/xen/arch/x86/domain_page.c
+++ b/xen/arch/x86/domain_page.c
@@ -53,9 +53,8 @@ void __init mapcache_override_current(st
#define mapcache_l2_entry(e) ((e) >> PAGETABLE_ORDER)
#define MAPCACHE_L2_ENTRIES (mapcache_l2_entry(MAPCACHE_ENTRIES - 1) + 1)
-#define DCACHE_L1ENT(dc, idx) \
- ((dc)->l1tab[(idx) >> PAGETABLE_ORDER] \
- [(idx) & ((1 << PAGETABLE_ORDER) - 1)])
+#define MAPCACHE_L1ENT(idx) \
+ __linear_l1_table[l1_linear_offset(MAPCACHE_VIRT_START +
pfn_to_paddr(idx))]
void *map_domain_page(unsigned long mfn)
{
@@ -77,7 +76,7 @@ void *map_domain_page(unsigned long mfn)
dcache = &v->domain->arch.pv_domain.mapcache;
vcache = &v->arch.pv_vcpu.mapcache;
- if ( !dcache->l1tab )
+ if ( !dcache->inuse )
return mfn_to_virt(mfn);
perfc_incr(map_domain_page_count);
@@ -91,7 +90,7 @@ void *map_domain_page(unsigned long mfn)
ASSERT(idx < dcache->entries);
hashent->refcnt++;
ASSERT(hashent->refcnt);
- ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, idx)) == mfn);
+ ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(idx)) == mfn);
goto out;
}
@@ -131,9 +130,8 @@ void *map_domain_page(unsigned long mfn)
if ( hashent->idx != MAPHASHENT_NOTINUSE &&
!hashent->refcnt )
{
idx = hashent->idx;
- ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, idx)) =-
hashent->mfn);
- l1e_write(&DCACHE_L1ENT(dcache, idx), l1e_empty());
+ ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(idx)) ==
hashent->mfn);
+ l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty());
hashent->idx = MAPHASHENT_NOTINUSE;
hashent->mfn = ~0UL;
break;
@@ -156,8 +154,7 @@ void *map_domain_page(unsigned long mfn)
spin_unlock(&dcache->lock);
- l1e_write(&DCACHE_L1ENT(dcache, idx),
- l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
+ l1e_write(&MAPCACHE_L1ENT(idx), l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
out:
local_irq_restore(flags);
@@ -181,10 +178,10 @@ void unmap_domain_page(const void *ptr)
ASSERT(v && !is_hvm_vcpu(v));
dcache = &v->domain->arch.pv_domain.mapcache;
- ASSERT(dcache->l1tab);
+ ASSERT(dcache->inuse);
idx = PFN_DOWN(va - MAPCACHE_VIRT_START);
- mfn = l1e_get_pfn(DCACHE_L1ENT(dcache, idx));
+ mfn = l1e_get_pfn(MAPCACHE_L1ENT(idx));
hashent = &v->arch.pv_vcpu.mapcache.hash[MAPHASH_HASHFN(mfn)];
local_irq_save(flags);
@@ -200,9 +197,9 @@ void unmap_domain_page(const void *ptr)
if ( hashent->idx != MAPHASHENT_NOTINUSE )
{
/* /First/, zap the PTE. */
- ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, hashent->idx)) =+
ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(hashent->idx)) =
hashent->mfn);
- l1e_write(&DCACHE_L1ENT(dcache, hashent->idx), l1e_empty());
+ l1e_write(&MAPCACHE_L1ENT(hashent->idx), l1e_empty());
/* /Second/, mark as garbage. */
set_bit(hashent->idx, dcache->garbage);
}
@@ -214,7 +211,7 @@ void unmap_domain_page(const void *ptr)
else
{
/* /First/, zap the PTE. */
- l1e_write(&DCACHE_L1ENT(dcache, idx), l1e_empty());
+ l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty());
/* /Second/, mark as garbage. */
set_bit(idx, dcache->garbage);
}
@@ -253,10 +250,6 @@ int mapcache_domain_init(struct domain *
return 0;
#endif
- dcache->l1tab = xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES);
- if ( !dcache->l1tab )
- return -ENOMEM;
-
BUILD_BUG_ON(MAPCACHE_VIRT_END + PAGE_SIZE * (3 +
2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long)))
>
MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20));
@@ -272,16 +265,6 @@ int mapcache_domain_init(struct domain *
NIL(l1_pgentry_t *), NULL);
}
-void mapcache_domain_exit(struct domain *d)
-{
- struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
-
- if ( is_hvm_domain(d) )
- return;
-
- xfree(dcache->l1tab);
-}
-
int mapcache_vcpu_init(struct vcpu *v)
{
struct domain *d = v->domain;
@@ -290,7 +273,7 @@ int mapcache_vcpu_init(struct vcpu *v)
unsigned int ents = d->max_vcpus * MAPCACHE_VCPU_ENTRIES;
unsigned int nr = PFN_UP(BITS_TO_LONGS(ents) * sizeof(long));
- if ( is_hvm_vcpu(v) || !dcache->l1tab )
+ if ( is_hvm_vcpu(v) || !dcache->inuse )
return 0;
if ( ents > dcache->entries )
@@ -298,7 +281,7 @@ int mapcache_vcpu_init(struct vcpu *v)
/* Populate page tables. */
int rc = create_perdomain_mapping(d, MAPCACHE_VIRT_START,
d->max_vcpus *
MAPCACHE_VCPU_ENTRIES,
- dcache->l1tab, NULL);
+ NIL(l1_pgentry_t *), NULL);
/* Populate bit maps. */
if ( !rc )
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -53,8 +53,7 @@ struct mapcache_vcpu {
};
struct mapcache_domain {
- /* The PTEs that provide the mappings, and a cursor into the array. */
- l1_pgentry_t **l1tab;
+ /* The number of array entries, and a cursor into the array. */
unsigned int entries;
unsigned int cursor;
@@ -71,7 +70,6 @@ struct mapcache_domain {
};
int mapcache_domain_init(struct domain *);
-void mapcache_domain_exit(struct domain *);
int mapcache_vcpu_init(struct vcpu *);
void mapcache_override_current(struct vcpu *);
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
On 26/02/2013 15:56, "Jan Beulich" <JBeulich@suse.com> wrote:> The main goal being the re-work of the hypercall argument translation > area management, this series first breaks out per-domain mapping > management into its own set of functions, in order to then use this > for setting up the translation areas in per-domain space. > > While doing this I also realized that it''s pointless for the > map_domain_page() code to track L1 page table pointers in a > separate Xen heap page - we can equally well use the linear > page table for the manipulations needed here. > > 1: introduce create_perdomain_mapping() > 2: rework hypercall argument translation area setup > 3: use linear L1 page table for map_domain_page() page table manipulation > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Acked-by: Keir Fraser <keir@xen.org>> > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel