Li, Xin
2010-Jul-03 05:37 UTC
[Xen-devel] [PATCH] VMX: fix ept pages free up when ept superpage split fails.
VMX: fix ept pages free up when ept superpage split fails: 1) implement ept super page split in a recursive way to form an ept sub tree before real installation; 2) free an ept sub tree also in a recursive way. 3) change ept_next_level last input parameter from shift bits # to next walk level; signed-off-by: Xin Li <xin.li@intel.com> diff -r f483b5ce7be2 xen/arch/x86/mm/hap/p2m-ept.c --- a/xen/arch/x86/mm/hap/p2m-ept.c Fri Jul 02 19:04:57 2010 +0100 +++ b/xen/arch/x86/mm/hap/p2m-ept.c Sat Jul 03 21:29:56 2010 +0800 @@ -118,6 +118,74 @@ return 1; } +/* free ept sub tree behind an entry */ +void ept_free_entry(struct domain *d, ept_entry_t *ept_entry, int level) +{ + /* End if the entry is a leaf entry. */ + if ( level == 0 || !is_epte_present(ept_entry) || + is_epte_superpage(ept_entry) ) + return; + + if ( level > 1 ) + { + ept_entry_t *epte = map_domain_page(ept_entry->mfn); + for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ ) + ept_free_entry(d, epte + i, level - 1); + unmap_domain_page(epte); + } + + d->arch.p2m->free_page(d, mfn_to_page(ept_entry->mfn)); +} + +static int ept_split_super_page(struct domain *d, ept_entry_t *ept_entry, + int level, int target) +{ + ept_entry_t new_ept, *table; + uint64_t trunk; + int rv = 1; + + /* End if the entry is a leaf entry or reaches the target level. */ + if ( level == 0 || level == target ) + return rv; + + ASSERT(is_epte_superpage(ept_entry)); + + if ( !ept_set_middle_entry(d, &new_ept) ) + return 0; + + table = map_domain_page(new_ept.mfn); + trunk = 1UL << ((level - 1) * EPT_TABLE_ORDER); + + for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ ) + { + ept_entry_t *epte = table + i; + + epte->emt = ept_entry->emt; + epte->ipat = ept_entry->ipat; + epte->sp = (level > 1) ? 1 : 0; + epte->avail1 = ept_entry->avail1; + epte->avail2 = 0; + epte->mfn = ept_entry->mfn + i * trunk; + + ept_p2m_type_to_flags(epte, epte->avail1); + + if ( (level - 1) == target ) + continue; + + ASSERT(is_epte_superpage(epte)); + + if ( !(rv = ept_split_super_page(d, epte, level - 1, target)) ) + break; + } + + unmap_domain_page(table); + + /* Even failed we should install the newly allocated ept page. */ + *ept_entry = new_ept; + + return rv; +} + /* Take the currently mapped table, find the corresponding gfn entry, * and map the next table, if available. If the entry is empty * and read_only is set, @@ -134,14 +202,18 @@ */ static int ept_next_level(struct domain *d, bool_t read_only, ept_entry_t **table, unsigned long *gfn_remainder, - u32 shift) + int next_level) { ept_entry_t *ept_entry; - ept_entry_t *next; - u32 index; + u32 shift, index; + + shift = next_level * EPT_TABLE_ORDER; index = *gfn_remainder >> shift; + /* index must be falling into the page */ + ASSERT(index < EPT_PAGETABLE_ENTRIES); + ept_entry = (*table) + index; if ( !is_epte_present(ept_entry) ) @@ -161,69 +233,15 @@ return GUEST_TABLE_SUPER_PAGE; else { + unsigned long mfn = ept_entry->mfn; + + unmap_domain_page(*table); + *table = map_domain_page(mfn); *gfn_remainder &= (1UL << shift) - 1; - next = map_domain_page(ept_entry->mfn); - unmap_domain_page(*table); - *table = next; return GUEST_TABLE_NORMAL_PAGE; } } -/* It''s super page before and we should break down it now. */ -static int ept_split_large_page(struct domain *d, - ept_entry_t **table, u32 *index, - unsigned long gfn, int level) -{ - ept_entry_t *prev_table = *table; - ept_entry_t *split_table = NULL; - ept_entry_t *split_entry = NULL; - ept_entry_t *ept_entry = (*table) + (*index); - ept_entry_t temp_ept_entry; - unsigned long s_gfn, s_mfn; - unsigned long offset, trunk; - int i; - - /* alloc new page for new ept middle level entry which is - * before a leaf super entry - */ - - if ( !ept_set_middle_entry(d, &temp_ept_entry) ) - return 0; - - /* split the super page to small next level pages */ - split_table = map_domain_page(temp_ept_entry.mfn); - offset = gfn & ((1UL << (level * EPT_TABLE_ORDER)) - 1); - trunk = (1UL << ((level-1) * EPT_TABLE_ORDER)); - - for ( i = 0; i < (1UL << EPT_TABLE_ORDER); i++ ) - { - s_gfn = gfn - offset + i * trunk; - s_mfn = ept_entry->mfn + i * trunk; - - split_entry = split_table + i; - split_entry->emt = ept_entry->emt; - split_entry->ipat = ept_entry->ipat; - - split_entry->sp = (level > 1) ? 1 : 0; - - split_entry->mfn = s_mfn; - - split_entry->avail1 = ept_entry->avail1; - split_entry->avail2 = 0; - /* last step */ - split_entry->r = split_entry->w = split_entry->x = 1; - ept_p2m_type_to_flags(split_entry, ept_entry->avail1); - } - - *ept_entry = temp_ept_entry; - - *index = offset / trunk; - *table = split_table; - unmap_domain_page(prev_table); - - return 1; -} - /* * ept_set_entry() computes ''need_modify_vtd_table'' for itself, * by observing whether any gfn->mfn translations are modified. @@ -265,7 +283,7 @@ for ( i = ept_get_wl(d); i > target; i-- ) { - ret = ept_next_level(d, 0, &table, &gfn_remainder, i * EPT_TABLE_ORDER); + ret = ept_next_level(d, 0, &table, &gfn_remainder, i); if ( !ret ) goto out; else if ( ret != GUEST_TABLE_NORMAL_PAGE ) @@ -275,12 +293,10 @@ ASSERT(ret != GUEST_TABLE_POD_PAGE || i != target); index = gfn_remainder >> (i * EPT_TABLE_ORDER); - gfn_remainder &= (1UL << (i * EPT_TABLE_ORDER)) - 1; + offset = gfn_remainder & ((1UL << (i * EPT_TABLE_ORDER)) - 1); ept_entry = table + index; - offset = gfn_remainder; - /* * When we are here, we must be on a leaf ept entry * with i == target or i > target. @@ -301,15 +317,14 @@ direct_mmio); ept_entry->ipat = ipat; ept_entry->sp = order ? 1 : 0; + ept_entry->avail1 = p2mt; + ept_entry->avail2 = 0; if ( ept_entry->mfn == mfn_x(mfn) ) need_modify_vtd_table = 0; else ept_entry->mfn = mfn_x(mfn); - ept_entry->avail1 = p2mt; - ept_entry->avail2 = 0; - ept_p2m_type_to_flags(ept_entry, p2mt); } else @@ -318,33 +333,50 @@ else { /* We need to split the original page. */ - ept_entry_t *split_ept_entry; + ept_entry_t split_ept_entry; ASSERT(is_epte_superpage(ept_entry)); - for ( ; i > target; i-- ) + split_ept_entry = *ept_entry; + + if ( !ept_split_super_page(d, &split_ept_entry, i, target) ) { - rv = ept_split_large_page(d, &table, &index, gfn, i); - if ( !rv ) - goto out; + ept_free_entry(d, &split_ept_entry, i); + goto out; } - split_ept_entry = table + index; - split_ept_entry->avail1 = p2mt; - ept_p2m_type_to_flags(split_ept_entry, p2mt); - split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat, - direct_mmio); - split_ept_entry->ipat = ipat; + /* now install the newly split ept sub-tree */ + /* NB: please make sure domian is paused and no in-fly VT-d DMA. */ + *ept_entry = split_ept_entry; - if ( split_ept_entry->mfn == mfn_x(mfn) ) - need_modify_vtd_table = 0; - else - split_ept_entry->mfn = mfn_x(mfn); + /* then move to the level we want to make real changes */ + for ( ; i > target; i-- ) + ept_next_level(d, 0, &table, &gfn_remainder, i); + + ASSERT(i == target); + + index = gfn_remainder >> (i * EPT_TABLE_ORDER); + offset = gfn_remainder & ((1UL << (i * EPT_TABLE_ORDER)) - 1); + + ept_entry = table + index; + + ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio); + ept_entry->ipat = ipat; + ept_entry->sp = i ? 1 : 0; + ept_entry->avail1 = p2mt; + ept_entry->avail2 = 0; + + if ( ept_entry->mfn == mfn_x(mfn) ) + need_modify_vtd_table = 0; + else /* the caller should take care of the previous page */ + ept_entry->mfn = mfn_x(mfn); + + ept_p2m_type_to_flags(ept_entry, p2mt); } /* Track the highest gfn for which we have ever had a valid mapping */ - if ( mfn_valid(mfn_x(mfn)) - && (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) ) + if ( mfn_valid(mfn_x(mfn)) && + (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) ) d->arch.p2m->max_mapped_pfn = gfn + (1UL << order) - 1; /* Success */ @@ -366,11 +398,11 @@ for ( i = 0; i < (1 << order); i++ ) iommu_map_page( d, gfn - offset + i, mfn_x(mfn) - offset + i, - IOMMUF_readable|IOMMUF_writable); + IOMMUF_readable | IOMMUF_writable); } else if ( !order ) iommu_map_page( - d, gfn, mfn_x(mfn), IOMMUF_readable|IOMMUF_writable); + d, gfn, mfn_x(mfn), IOMMUF_readable | IOMMUF_writable); } else { @@ -410,8 +442,7 @@ for ( i = ept_get_wl(d); i > 0; i-- ) { retry: - ret = ept_next_level(d, 1, &table, &gfn_remainder, - i * EPT_TABLE_ORDER); + ret = ept_next_level(d, 1, &table, &gfn_remainder, i); if ( !ret ) goto out; else if ( ret == GUEST_TABLE_POD_PAGE ) @@ -498,8 +529,7 @@ for ( i = ept_get_wl(d); i > 0; i-- ) { - ret = ept_next_level(d, 1, &table, &gfn_remainder, - i * EPT_TABLE_ORDER); + ret = ept_next_level(d, 1, &table, &gfn_remainder, i); if ( !ret || ret == GUEST_TABLE_POD_PAGE ) goto out; else if ( ret == GUEST_TABLE_SUPER_PAGE ) @@ -722,8 +752,7 @@ for ( i = ept_get_wl(d); i > 0; i-- ) { - ret = ept_next_level(d, 1, &table, &gfn_remainder, - i * EPT_TABLE_ORDER); + ret = ept_next_level(d, 1, &table, &gfn_remainder, i); if ( ret != GUEST_TABLE_NORMAL_PAGE ) break; } _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel