George Dunlap
2011-Jan-21 15:37 UTC
[Xen-devel] [PATCH] p2m: Allow non-leaf entries to be replaced by leaf entries
# HG changeset patch # User George Dunlap <george.dunlap@eu.citrix.com> # Date 1295624256 0 # Node ID 9ca9331c97808b6aa02697619743cf6e8030587a # Parent 003acf02d416d657f750b7a7748fa8c5a932222c p2m: Allow non-leaf entries to be replaced by leaf entries Allow l2 and l3 p2m tables to be replaced with 2MB and 1GB pages respectively, freeing the p2m table page properly. This allows, for example, a sequence of 512 singleton zero pages to be replaced with a superpage populate-on-demand entry. Changes: * Add a p2m_free_ptp() corresponding to p2m_alloc_ptp(), which will handle everything related to the freeing properly. * Add p2m_free_entry(), based on ept_free_entry(), which will free intermediate tables recursively. * For both ept and p2m, when replacing non-leaf entries with leaf entries, keep old entry and call *_free_entry() after new entry has been written and proper flushes have been done. Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> diff -r 003acf02d416 -r 9ca9331c9780 xen/arch/x86/mm/hap/hap.c --- a/xen/arch/x86/mm/hap/hap.c Thu Jan 20 17:04:06 2011 +0000 +++ b/xen/arch/x86/mm/hap/hap.c Fri Jan 21 15:37:36 2011 +0000 @@ -333,9 +333,11 @@ ASSERT(page_get_owner(pg) == d); /* Should have just the one ref we gave it in alloc_p2m_page() */ - if ( (pg->count_info & PGC_count_mask) != 1 ) - HAP_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n", - pg->count_info, pg->u.inuse.type_info); + if ( (pg->count_info & PGC_count_mask) != 1 ) { + HAP_ERROR("Odd p2m page %p count c=%#lx t=%"PRtype_info"\n", + pg, pg->count_info, pg->u.inuse.type_info); + WARN(); + } pg->count_info &= ~PGC_count_mask; /* Free should not decrement domain''s total allocation, since * these pages were allocated without an owner. */ diff -r 003acf02d416 -r 9ca9331c9780 xen/arch/x86/mm/hap/p2m-ept.c --- a/xen/arch/x86/mm/hap/p2m-ept.c Thu Jan 20 17:04:06 2011 +0000 +++ b/xen/arch/x86/mm/hap/p2m-ept.c Fri Jan 21 15:37:36 2011 +0000 @@ -166,8 +166,6 @@ /* free ept sub tree behind an entry */ void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level) { - struct domain *d = p2m->domain; - /* End if the entry is a leaf entry. */ if ( level == 0 || !is_epte_present(ept_entry) || is_epte_superpage(ept_entry) ) @@ -180,8 +178,8 @@ ept_free_entry(p2m, epte + i, level - 1); unmap_domain_page(epte); } - - d->arch.paging.free_page(d, mfn_to_page(ept_entry->mfn)); + + p2m_free_ptp(p2m, mfn_to_page(ept_entry->mfn)); } static int ept_split_super_page(struct p2m_domain *p2m, ept_entry_t *ept_entry, @@ -317,6 +315,7 @@ int vtd_pte_present = 0; int needs_sync = 1; struct domain *d = p2m->domain; + ept_entry_t old_entry = { .epte = 0 }; /* * the caller must make sure: @@ -357,8 +356,12 @@ vtd_pte_present = is_epte_present(ept_entry) ? 1 : 0; /* - * When we are here, we must be on a leaf ept entry - * with i == target or i > target. + * If we''re here with i > target, we must be at a leaf node, and + * we need to break up the superpage. + * + * If we''re here with i == target and i > 0, we need to check to see + * if we''re replacing a non-leaf entry (i.e., pointing to an N-1 table) + * with a leaf entry (a 1GiB or 2MiB page), and handle things appropriately. */ if ( i == target ) @@ -370,6 +373,10 @@ if ( !is_epte_present(ept_entry) ) needs_sync = 0; + /* If we''re replacing a non-leaf entry with a leaf entry (1GiB or 2MiB), + * the intermediate tables will be freed below after the ept flush */ + old_entry = *ept_entry; + if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) || (p2mt == p2m_ram_paging_in_start) ) { @@ -487,6 +494,13 @@ } } + /* Release the old intermediate tables, if any. This has to be the + last thing we do, after the ept_sync_domain() and removal + from the iommu tables, so as to avoid a potential + use-after-free. */ + if ( is_epte_present(&old_entry) ) + ept_free_entry(p2m, &old_entry, target); + return rv; } diff -r 003acf02d416 -r 9ca9331c9780 xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Thu Jan 20 17:04:06 2011 +0000 +++ b/xen/arch/x86/mm/p2m.c Fri Jan 21 15:37:36 2011 +0000 @@ -153,11 +153,45 @@ page_list_add_tail(pg, &p2m->pages); pg->u.inuse.type_info = type | 1 | PGT_validated; - pg->count_info |= 1; return pg; } +void +p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg) +{ + ASSERT(pg); + ASSERT(p2m); + ASSERT(p2m->domain); + ASSERT(p2m->domain->arch.paging.free_page); + + page_list_del(pg, &p2m->pages); + p2m->domain->arch.paging.free_page(p2m->domain, pg); + + return; +} + +/* Free intermediate tables from a p2m sub-tree */ +void +p2m_free_entry(struct p2m_domain *p2m, l1_pgentry_t *p2m_entry, int page_order) +{ + /* End if the entry is a leaf entry. */ + if ( page_order == 0 + || !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) + || (l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) + return; + + if ( page_order > 9 ) + { + l1_pgentry_t *l3_table = map_domain_page(l1e_get_pfn(*p2m_entry)); + for ( int i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) + p2m_free_entry(p2m, l3_table + i, page_order - 9); + unmap_domain_page(l3_table); + } + + p2m_free_ptp(p2m, mfn_to_page(l1e_get_pfn(*p2m_entry))); +} + // Walk one level of the P2M table, allocating a new table if required. // Returns 0 on error. // @@ -1316,6 +1350,7 @@ */ if ( page_order == 18 ) { + l1_pgentry_t old_entry = { .l1=0 }; p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, L3_PAGETABLE_SHIFT - PAGE_SHIFT, L3_PAGETABLE_ENTRIES); @@ -1323,10 +1358,11 @@ if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) && !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { - P2M_ERROR("configure P2M table L3 entry with large page\n"); - domain_crash(p2m->domain); - goto out; + /* We''re replacing a non-SP page with a superpage. Make sure to + * handle freeing the table properly. */ + old_entry = *p2m_entry; } + ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); l3e_content = mfn_valid(mfn) ? l3e_from_pfn(mfn_x(mfn), @@ -1335,7 +1371,11 @@ entry_content.l1 = l3e_content.l3; paging_write_p2m_entry(p2m->domain, gfn, p2m_entry, table_mfn, entry_content, 3); - + /* NB: paging_write_p2m_entry() handles tlb flushes properly */ + + /* Free old intermediate tables if necessary */ + if ( l1e_get_flags(old_entry) & _PAGE_PRESENT ) + p2m_free_entry(p2m, &old_entry, page_order); } /* * When using PAE Xen, we only allow 33 bits of pseudo-physical @@ -1372,9 +1412,11 @@ /* level 1 entry */ paging_write_p2m_entry(p2m->domain, gfn, p2m_entry, table_mfn, entry_content, 1); + /* NB: paging_write_p2m_entry() handles tlb flushes properly */ } else if ( page_order == 9 ) { + l1_pgentry_t old_entry = { .l1=0 }; p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, L2_PAGETABLE_SHIFT - PAGE_SHIFT, L2_PAGETABLE_ENTRIES); @@ -1384,9 +1426,9 @@ if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) && !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { - P2M_ERROR("configure P2M table 4KB L2 entry with large page\n"); - domain_crash(p2m->domain); - goto out; + /* We''re replacing a non-SP page with a superpage. Make sure to + * handle freeing the table properly. */ + old_entry = *p2m_entry; } ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); @@ -1400,6 +1442,11 @@ entry_content.l1 = l2e_content.l2; paging_write_p2m_entry(p2m->domain, gfn, p2m_entry, table_mfn, entry_content, 2); + /* NB: paging_write_p2m_entry() handles tlb flushes properly */ + + /* Free old intermediate tables if necessary */ + if ( l1e_get_flags(old_entry) & _PAGE_PRESENT ) + p2m_free_entry(p2m, &old_entry, page_order); } /* Track the highest gfn for which we have ever had a valid mapping */ diff -r 003acf02d416 -r 9ca9331c9780 xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h Thu Jan 20 17:04:06 2011 +0000 +++ b/xen/include/asm-x86/p2m.h Fri Jan 21 15:37:36 2011 +0000 @@ -541,6 +541,7 @@ #endif struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type); +void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg); #endif /* _XEN_P2M_H */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Tim Deegan
2011-Jan-21 16:06 UTC
Re: [Xen-devel] [PATCH] p2m: Allow non-leaf entries to be replaced by leaf entries
At 15:37 +0000 on 21 Jan (1295624275), George Dunlap wrote:> p2m: Allow non-leaf entries to be replaced by leaf entries > > Allow l2 and l3 p2m tables to be replaced with 2MB and 1GB pages > respectively, freeing the p2m table page properly. This allows, for example, > a sequence of 512 singleton zero pages to be replaced with a superpage > populate-on-demand entry. > > Changes: > * Add a p2m_free_ptp() corresponding to p2m_alloc_ptp(), which will > handle everything related to the freeing properly. > * Add p2m_free_entry(), based on ept_free_entry(), which will free > intermediate tables recursively. > * For both ept and p2m, when replacing non-leaf entries with leaf > entries, keep old entry and call *_free_entry() after new entry > has been written and proper flushes have been done. > > Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>Applied, thanks. (I made a few small changes to help the debug=y build.) Tim. -- Tim Deegan <Tim.Deegan@citrix.com> Principal Software Engineer, Xen Platform Team Citrix Systems UK Ltd. (Company #02937203, SL9 0BG) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel