Tim Deegan
2010-Nov-18 13:08 UTC
[Xen-devel] [PATCH] x86/mm: Allocate log-dirty bitmaps from shadow/HAP memory
x86/mm: Allocate log-dirty bitmaps from shadow/HAP memory. Move the p2m alloc and free functions back into the per-domain paging assistance structure and allow them to be called from the log-dirty code. This makes it less likely that log-dirty code will run out of memory populating the log-dirty bitmap. Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com> diff -r b76df6b4375a -r 38afe60221d7 xen/arch/x86/mm/hap/hap.c --- a/xen/arch/x86/mm/hap/hap.c Wed Nov 17 20:40:30 2010 +0000 +++ b/xen/arch/x86/mm/hap/hap.c Thu Nov 18 12:35:46 2010 +0000 @@ -276,12 +276,16 @@ static void hap_free(struct domain *d, m page_list_add_tail(pg, &d->arch.paging.hap.freelist); } -static struct page_info *hap_alloc_p2m_page(struct p2m_domain *p2m) +static struct page_info *hap_alloc_p2m_page(struct domain *d) { - struct domain *d = p2m->domain; struct page_info *pg; + int do_locking; - hap_lock(d); + /* This is called both from the p2m code (which never holds the + * hap lock) and the log-dirty code (which sometimes does). */ + do_locking = !hap_locked_by_me(d); + if ( do_locking ) + hap_lock(d); pg = hap_alloc(d); #if CONFIG_PAGING_LEVELS == 3 @@ -312,14 +316,21 @@ static struct page_info *hap_alloc_p2m_p pg->count_info |= 1; } - hap_unlock(d); + if ( do_locking ) + hap_unlock(d); return pg; } -static void hap_free_p2m_page(struct p2m_domain *p2m, struct page_info *pg) +static void hap_free_p2m_page(struct domain *d, struct page_info *pg) { - struct domain *d = p2m->domain; - hap_lock(d); + int do_locking; + + /* This is called both from the p2m code (which never holds the + * hap lock) and the log-dirty code (which sometimes does). */ + do_locking = !hap_locked_by_me(d); + if ( do_locking ) + hap_lock(d); + ASSERT(page_get_owner(pg) == d); /* Should have just the one ref we gave it in alloc_p2m_page() */ if ( (pg->count_info & PGC_count_mask) != 1 ) @@ -333,7 +344,9 @@ static void hap_free_p2m_page(struct p2m d->arch.paging.hap.total_pages++; hap_free(d, page_to_mfn(pg)); ASSERT(d->arch.paging.hap.p2m_pages >= 0); - hap_unlock(d); + + if ( do_locking ) + hap_unlock(d); } /* Return the size of the pool, rounded up to the nearest MB */ @@ -597,11 +610,14 @@ int hap_enable(struct domain *d, u32 mod } } + /* Allow p2m and log-dirty code to borrow our memory */ + d->arch.paging.alloc_page = hap_alloc_p2m_page; + d->arch.paging.free_page = hap_free_p2m_page; + /* allocate P2m table */ if ( mode & PG_translate ) { - rv = p2m_alloc_table(p2m_get_hostp2m(d), - hap_alloc_p2m_page, hap_free_p2m_page); + rv = p2m_alloc_table(p2m_get_hostp2m(d)); if ( rv != 0 ) goto out; } diff -r b76df6b4375a -r 38afe60221d7 xen/arch/x86/mm/hap/p2m-ept.c --- a/xen/arch/x86/mm/hap/p2m-ept.c Wed Nov 17 20:40:30 2010 +0000 +++ b/xen/arch/x86/mm/hap/p2m-ept.c Thu Nov 18 12:35:46 2010 +0000 @@ -125,6 +125,8 @@ static int ept_set_middle_entry(struct p /* free ept sub tree behind an entry */ void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level) { + struct domain *d = p2m->domain; + /* End if the entry is a leaf entry. */ if ( level == 0 || !is_epte_present(ept_entry) || is_epte_superpage(ept_entry) ) @@ -138,7 +140,7 @@ void ept_free_entry(struct p2m_domain *p unmap_domain_page(epte); } - p2m->free_page(p2m, mfn_to_page(ept_entry->mfn)); + d->arch.paging.free_page(d, mfn_to_page(ept_entry->mfn)); } static int ept_split_super_page(struct p2m_domain *p2m, ept_entry_t *ept_entry, diff -r b76df6b4375a -r 38afe60221d7 xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Wed Nov 17 20:40:30 2010 +0000 +++ b/xen/arch/x86/mm/p2m.c Thu Nov 18 12:35:46 2010 +0000 @@ -142,8 +142,9 @@ p2m_alloc_ptp(struct p2m_domain *p2m, un struct page_info *pg; ASSERT(p2m); - ASSERT(p2m->alloc_page); - pg = p2m->alloc_page(p2m); + ASSERT(p2m->domain); + ASSERT(p2m->domain->arch.paging.alloc_page); + pg = p2m->domain->arch.paging.alloc_page(p2m->domain); if (pg == NULL) return NULL; @@ -167,7 +168,6 @@ p2m_next_level(struct p2m_domain *p2m, m l1_pgentry_t new_entry; void *next; int i; - ASSERT(p2m->alloc_page); if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, shift, max)) ) @@ -1792,14 +1792,9 @@ int set_p2m_entry(struct p2m_domain *p2m // The structure of the p2m table is that of a pagetable for xen (i.e. it is // controlled by CONFIG_PAGING_LEVELS). // -// The alloc_page and free_page functions will be used to get memory to -// build the p2m, and to release it again at the end of day. -// // Returns 0 for success or -errno. // -int p2m_alloc_table(struct p2m_domain *p2m, - struct page_info * (*alloc_page)(struct p2m_domain *p2m), - void (*free_page)(struct p2m_domain *p2m, struct page_info *pg)) +int p2m_alloc_table(struct p2m_domain *p2m) { mfn_t mfn = _mfn(INVALID_MFN); struct page_info *page, *p2m_top; @@ -1817,9 +1812,6 @@ int p2m_alloc_table(struct p2m_domain *p P2M_PRINTK("allocating p2m table\n"); - p2m->alloc_page = alloc_page; - p2m->free_page = free_page; - p2m_top = p2m_alloc_ptp(p2m, #if CONFIG_PAGING_LEVELS == 4 PGT_l4_page_table @@ -1882,6 +1874,7 @@ void p2m_teardown(struct p2m_domain *p2m * We know we don''t have any extra mappings to these pages */ { struct page_info *pg; + struct domain *d = p2m->domain; #ifdef __x86_64__ unsigned long gfn; p2m_type_t t; @@ -1902,7 +1895,7 @@ void p2m_teardown(struct p2m_domain *p2m p2m->phys_table = pagetable_null(); while ( (pg = page_list_remove_head(&p2m->pages)) ) - p2m->free_page(p2m, pg); + d->arch.paging.free_page(d, pg); p2m_unlock(p2m); } diff -r b76df6b4375a -r 38afe60221d7 xen/arch/x86/mm/paging.c --- a/xen/arch/x86/mm/paging.c Wed Nov 17 20:40:30 2010 +0000 +++ b/xen/arch/x86/mm/paging.c Thu Nov 18 12:35:46 2010 +0000 @@ -99,10 +99,11 @@ static mfn_t paging_new_log_dirty_page(s { struct page_info *page; - page = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d))); + page = d->arch.paging.alloc_page(d); if ( unlikely(page == NULL) ) { d->arch.paging.log_dirty.failed_allocs++; + *mapping_p = NULL; return _mfn(INVALID_MFN); } @@ -131,30 +132,23 @@ static mfn_t paging_new_log_dirty_node(s return mfn; } -int paging_alloc_log_dirty_bitmap(struct domain *d) +mfn_t *paging_map_log_dirty_bitmap(struct domain *d) { mfn_t *mapping; - if ( mfn_valid(d->arch.paging.log_dirty.top) ) - return 0; + if ( likely(mfn_valid(d->arch.paging.log_dirty.top)) ) + return map_domain_page(mfn_x(d->arch.paging.log_dirty.top)); d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d, &mapping); - if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) ) - { - /* Clear error indicator since we''re reporting this one */ - d->arch.paging.log_dirty.failed_allocs = 0; - return -ENOMEM; - } - unmap_domain_page(mapping); - return 0; + return mapping; } static void paging_free_log_dirty_page(struct domain *d, mfn_t mfn) { d->arch.paging.log_dirty.allocs--; - free_domheap_page(mfn_to_page(mfn)); -} + d->arch.paging.free_page(d, mfn_to_page(mfn)); +} void paging_free_log_dirty_bitmap(struct domain *d) { @@ -204,36 +198,13 @@ int paging_log_dirty_enable(struct domai { int ret; + if ( paging_mode_log_dirty(d) ) + return -EINVAL; + domain_pause(d); - log_dirty_lock(d); + ret = d->arch.paging.log_dirty.enable_log_dirty(d); + domain_unpause(d); - if ( paging_mode_log_dirty(d) ) - { - ret = -EINVAL; - goto out; - } - - ret = paging_alloc_log_dirty_bitmap(d); - if ( ret != 0 ) - { - paging_free_log_dirty_bitmap(d); - goto out; - } - - log_dirty_unlock(d); - - /* Safe because the domain is paused. */ - ret = d->arch.paging.log_dirty.enable_log_dirty(d); - - /* Possibility of leaving the bitmap allocated here but it''ll be - * tidied on domain teardown. */ - - domain_unpause(d); - return ret; - - out: - log_dirty_unlock(d); - domain_unpause(d); return ret; } @@ -271,8 +242,6 @@ void paging_mark_dirty(struct domain *d, log_dirty_lock(d); - ASSERT(mfn_valid(d->arch.paging.log_dirty.top)); - /* We /really/ mean PFN here, even for non-translated guests. */ pfn = get_gpfn_from_mfn(mfn_x(gmfn)); /* Shared MFNs should NEVER be marked dirty */ @@ -291,7 +260,9 @@ void paging_mark_dirty(struct domain *d, i3 = L3_LOGDIRTY_IDX(pfn); i4 = L4_LOGDIRTY_IDX(pfn); - l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top)); + l4 = paging_map_log_dirty_bitmap(d); + if ( !l4 ) + goto out; mfn = l4[i4]; if ( !mfn_valid(mfn) ) mfn = l4[i4] = paging_new_log_dirty_node(d, &l3); @@ -367,12 +338,6 @@ int paging_log_dirty_op(struct domain *d /* caller may have wanted just to clean the state or access stats. */ peek = 0; - if ( (peek || clean) && !mfn_valid(d->arch.paging.log_dirty.top) ) - { - rv = -EINVAL; /* perhaps should be ENOMEM? */ - goto out; - } - if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) { printk("%s: %d failed page allocs while logging dirty pages\n", __FUNCTION__, d->arch.paging.log_dirty.failed_allocs); @@ -381,8 +346,7 @@ int paging_log_dirty_op(struct domain *d } pages = 0; - l4 = (mfn_valid(d->arch.paging.log_dirty.top) ? - map_domain_page(mfn_x(d->arch.paging.log_dirty.top)) : NULL); + l4 = paging_map_log_dirty_bitmap(d); for ( i4 = 0; (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); @@ -469,12 +433,6 @@ int paging_log_dirty_range(struct domain d->arch.paging.log_dirty.fault_count, d->arch.paging.log_dirty.dirty_count); - if ( !mfn_valid(d->arch.paging.log_dirty.top) ) - { - rv = -EINVAL; /* perhaps should be ENOMEM? */ - goto out; - } - if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) { printk("%s: %d failed page allocs while logging dirty pages\n", __FUNCTION__, d->arch.paging.log_dirty.failed_allocs); @@ -500,13 +458,13 @@ int paging_log_dirty_range(struct domain b2 = L2_LOGDIRTY_IDX(begin_pfn); b3 = L3_LOGDIRTY_IDX(begin_pfn); b4 = L4_LOGDIRTY_IDX(begin_pfn); - l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top)); + l4 = paging_map_log_dirty_bitmap(d); for ( i4 = b4; (pages < nr) && (i4 < LOGDIRTY_NODE_ENTRIES); i4++ ) { - l3 = mfn_valid(l4[i4]) ? map_domain_page(mfn_x(l4[i4])) : NULL; + l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL; for ( i3 = b3; (pages < nr) && (i3 < LOGDIRTY_NODE_ENTRIES); i3++ ) @@ -590,7 +548,8 @@ int paging_log_dirty_range(struct domain if ( l3 ) unmap_domain_page(l3); } - unmap_domain_page(l4); + if ( l4 ) + unmap_domain_page(l4); log_dirty_unlock(d); diff -r b76df6b4375a -r 38afe60221d7 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Wed Nov 17 20:40:30 2010 +0000 +++ b/xen/arch/x86/mm/shadow/common.c Thu Nov 18 12:35:46 2010 +0000 @@ -1620,12 +1620,16 @@ void shadow_free(struct domain *d, mfn_t * That''s OK because the p2m table only exists for translated domains, * and those domains can''t ever turn off shadow mode. */ static struct page_info * -shadow_alloc_p2m_page(struct p2m_domain *p2m) +shadow_alloc_p2m_page(struct domain *d) { - struct domain *d = p2m->domain; struct page_info *pg; - - shadow_lock(d); + int do_locking; + + /* This is called both from the p2m code (which never holds the + * shadow lock) and the log-dirty code (which sometimes does). */ + do_locking = !shadow_locked_by_me(d); + if ( do_locking ) + shadow_lock(d); if ( d->arch.paging.shadow.total_pages < shadow_min_acceptable_pages(d) + 1 ) @@ -1639,7 +1643,8 @@ shadow_alloc_p2m_page(struct p2m_domain d->arch.paging.shadow.p2m_pages++; d->arch.paging.shadow.total_pages--; - shadow_unlock(d); + if ( do_locking ) + shadow_unlock(d); /* Unlike shadow pages, mark p2m pages as owned by the domain. * Marking the domain as the owner would normally allow the guest to @@ -1652,9 +1657,10 @@ shadow_alloc_p2m_page(struct p2m_domain } static void -shadow_free_p2m_page(struct p2m_domain *p2m, struct page_info *pg) +shadow_free_p2m_page(struct domain *d, struct page_info *pg) { - struct domain *d = p2m->domain; + int do_locking; + ASSERT(page_get_owner(pg) == d); /* Should have just the one ref we gave it in alloc_p2m_page() */ if ( (pg->count_info & PGC_count_mask) != 1 ) @@ -1666,11 +1672,18 @@ shadow_free_p2m_page(struct p2m_domain * pg->u.sh.type = SH_type_p2m_table; /* p2m code reuses type-info */ page_set_owner(pg, NULL); - shadow_lock(d); + /* This is called both from the p2m code (which never holds the + * shadow lock) and the log-dirty code (which sometimes does). */ + do_locking = !shadow_locked_by_me(d); + if ( do_locking ) + shadow_lock(d); + shadow_free(d, page_to_mfn(pg)); d->arch.paging.shadow.p2m_pages--; d->arch.paging.shadow.total_pages++; - shadow_unlock(d); + + if ( do_locking ) + shadow_unlock(d); } #if CONFIG_PAGING_LEVELS == 3 @@ -3032,6 +3045,7 @@ static void sh_new_mode(struct domain *d ASSERT(shadow_locked_by_me(d)); ASSERT(d != current->domain); + d->arch.paging.mode = new_mode; for_each_vcpu(d, v) sh_update_paging_modes(v); @@ -3079,12 +3093,15 @@ int shadow_enable(struct domain *d, u32 shadow_unlock(d); } + /* Allow p2m and log-dirty code to borrow shadow memory */ + d->arch.paging.alloc_page = shadow_alloc_p2m_page; + d->arch.paging.free_page = shadow_free_p2m_page; + /* Init the P2M table. Must be done before we take the shadow lock * to avoid possible deadlock. */ if ( mode & PG_translate ) { - rv = p2m_alloc_table(p2m, - shadow_alloc_p2m_page, shadow_free_p2m_page); + rv = p2m_alloc_table(p2m); if (rv != 0) goto out_unlocked; } @@ -3095,7 +3112,7 @@ int shadow_enable(struct domain *d, u32 { /* Get a single page from the shadow pool. Take it via the * P2M interface to make freeing it simpler afterwards. */ - pg = shadow_alloc_p2m_page(p2m); + pg = shadow_alloc_p2m_page(d); if ( pg == NULL ) { rv = -ENOMEM; @@ -3147,7 +3164,7 @@ int shadow_enable(struct domain *d, u32 if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) ) p2m_teardown(p2m); if ( rv != 0 && pg != NULL ) - shadow_free_p2m_page(p2m, pg); + shadow_free_p2m_page(d, pg); domain_unpause(d); return rv; } @@ -3265,7 +3282,7 @@ void shadow_teardown(struct domain *d) /* Must be called outside the lock */ if ( unpaged_pagetable ) - shadow_free_p2m_page(p2m_get_hostp2m(d), unpaged_pagetable); + shadow_free_p2m_page(d, unpaged_pagetable); } void shadow_final_teardown(struct domain *d) @@ -3320,6 +3337,10 @@ static int shadow_one_bit_enable(struct sh_set_allocation(d, 0, NULL); return -ENOMEM; } + + /* Allow p2m and log-dirty code to borrow shadow memory */ + d->arch.paging.alloc_page = shadow_alloc_p2m_page; + d->arch.paging.free_page = shadow_free_p2m_page; } /* Update the bits */ diff -r b76df6b4375a -r 38afe60221d7 xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Wed Nov 17 20:40:30 2010 +0000 +++ b/xen/arch/x86/mm/shadow/private.h Thu Nov 18 12:35:46 2010 +0000 @@ -584,7 +584,6 @@ sh_mfn_is_dirty(struct domain *d, mfn_t int rv; ASSERT(shadow_mode_log_dirty(d)); - ASSERT(mfn_valid(d->arch.paging.log_dirty.top)); /* We /really/ mean PFN here, even for non-translated guests. */ pfn = get_gpfn_from_mfn(mfn_x(gmfn)); @@ -602,7 +601,10 @@ sh_mfn_is_dirty(struct domain *d, mfn_t */ return 1; - l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top)); + l4 = paging_map_log_dirty_bitmap(d); + if ( !l4 ) + return 0; + mfn = l4[L4_LOGDIRTY_IDX(pfn)]; unmap_domain_page(l4); if ( !mfn_valid(mfn) ) diff -r b76df6b4375a -r 38afe60221d7 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Wed Nov 17 20:40:30 2010 +0000 +++ b/xen/include/asm-x86/domain.h Thu Nov 18 12:35:46 2010 +0000 @@ -201,6 +201,10 @@ struct paging_domain { struct hap_domain hap; /* log dirty support */ struct log_dirty_domain log_dirty; + /* alloc/free pages from the pool for paging-assistance structures + * (used by p2m and log-dirty code for their tries) */ + struct page_info * (*alloc_page)(struct domain *d); + void (*free_page)(struct domain *d, struct page_info *pg); }; struct paging_vcpu { diff -r b76df6b4375a -r 38afe60221d7 xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h Wed Nov 17 20:40:30 2010 +0000 +++ b/xen/include/asm-x86/p2m.h Thu Nov 18 12:35:46 2010 +0000 @@ -179,10 +179,6 @@ struct p2m_domain { /* Pages used to construct the p2m */ struct page_list_head pages; - /* Functions to call to get or free pages for the p2m */ - struct page_info * (*alloc_page )(struct p2m_domain *p2m); - void (*free_page )(struct p2m_domain *p2m, - struct page_info *pg); int (*set_entry )(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, unsigned int page_order, @@ -390,13 +386,8 @@ int p2m_init(struct domain *d); /* Allocate a new p2m table for a domain. * - * The alloc_page and free_page functions will be used to get memory to - * build the p2m, and to release it again at the end of day. - * * Returns 0 for success or -errno. */ -int p2m_alloc_table(struct p2m_domain *p2m, - struct page_info * (*alloc_page)(struct p2m_domain *p2m), - void (*free_page)(struct p2m_domain *p2m, struct page_info *pg)); +int p2m_alloc_table(struct p2m_domain *p2m); /* Return all the p2m resources to Xen. */ void p2m_teardown(struct p2m_domain *p2m); diff -r b76df6b4375a -r 38afe60221d7 xen/include/asm-x86/paging.h --- a/xen/include/asm-x86/paging.h Wed Nov 17 20:40:30 2010 +0000 +++ b/xen/include/asm-x86/paging.h Thu Nov 18 12:35:46 2010 +0000 @@ -134,8 +134,8 @@ struct paging_mode { /***************************************************************************** * Log dirty code */ -/* allocate log dirty bitmap resource for recording dirty pages */ -int paging_alloc_log_dirty_bitmap(struct domain *d); +/* get the top of the log-dirty bitmap trie, allocating if necessary */ +mfn_t *paging_map_log_dirty_bitmap(struct domain *d); /* free log dirty bitmap resource */ void paging_free_log_dirty_bitmap(struct domain *d); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel