Andres Lagar-Cavilla
2012-Apr-24 19:48 UTC
[PATCH 0 of 3] x86/mem_sharing: Improve performance of rmap, fix cascading bugs
This is a repost of patches 2 and 3 of the series initially posted on Apr 12th. The first patch has been split into two functionally isolated patches as per Tim Deegan''s request. The original posting (suitably edited) follows -------------------------------- The sharing subsystem does not scale elegantly with high degrees of page sharing. The culprit is a reverse map that each shared frame maintains, resolving to all domain pages pointing to the shared frame. Because the rmap is implemented with a O(n) search linked-list, CoW unsharing can result in prolonged search times. The place where this becomes most obvious is during domain destruction, during which all shared p2m entries need to be unshared. Destroying a domain with a lot of sharing could result in minutes of hypervisor freeze-up: 7 minutes for a 2 GiB domain! As a result, errors cascade throughout the system, including soft lockups, watchdogs firing, IO drivers timing out, etc. The proposed solution is to mutate the rmap from a linked list to a hash table when the number of domain pages referencing the shared frame exceeds a threshold. This maintains minimal space use for the common case of relatively low sharing, and switches to an O(1) data structure for heavily shared pages, with an space overhead of one page. The threshold chosen is 256, as a single page can fit 256 spill lists for 256 buckets in a hash table. With these patches in place, domain destruction for a 2 GiB domain with a shared frame including over a hundred thousand references drops from 7 minutes to two seconds. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> xen/arch/x86/mm/mem_sharing.c | 8 +- xen/arch/x86/mm/mem_sharing.c | 138 ++++++++++++++++++++++-------- xen/arch/x86/mm/mem_sharing.c | 170 +++++++++++++++++++++++++++++++++++-- xen/include/asm-x86/mem_sharing.h | 13 ++- 4 files changed, 274 insertions(+), 55 deletions(-)
Andres Lagar-Cavilla
2012-Apr-24 19:48 UTC
[PATCH 1 of 3] x86/mem_sharing: Don''t destroy a page''s shared state before depleting its <gfn, domid> tuple list
xen/arch/x86/mm/mem_sharing.c | 8 +++++--- 1 files changed, 5 insertions(+), 3 deletions(-) Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r 08946bbc8036 -r 796b523346ac xen/arch/x86/mm/mem_sharing.c --- a/xen/arch/x86/mm/mem_sharing.c +++ b/xen/arch/x86/mm/mem_sharing.c @@ -963,7 +963,9 @@ gfn_found: last_gfn = list_has_one_entry(&page->sharing->gfns); if ( last_gfn ) { - /* Clean up shared state */ + /* Clean up shared state. Get rid of the <domid, gfn> tuple + * before destroying the rmap. */ + mem_sharing_gfn_destroy(d, gfn_info); audit_del_list(page); page->sharing = NULL; atomic_dec(&nr_shared_mfns); @@ -974,7 +976,8 @@ gfn_found: * (possibly freeing the page), and exit early */ if ( flags & MEM_SHARING_DESTROY_GFN ) { - mem_sharing_gfn_destroy(d, gfn_info); + if ( !last_gfn ) + mem_sharing_gfn_destroy(d, gfn_info); put_page_and_type(page); mem_sharing_page_unlock(page); if ( last_gfn && @@ -987,7 +990,6 @@ gfn_found: if ( last_gfn ) { - mem_sharing_gfn_destroy(d, gfn_info); /* Making a page private atomically unlocks it */ BUG_ON(page_make_private(d, page) != 0); goto private_page_found;
Andres Lagar-Cavilla
2012-Apr-24 19:48 UTC
[PATCH 2 of 3] x86/mem_sharing: modularize reverse map for shared frames
xen/arch/x86/mm/mem_sharing.c | 138 ++++++++++++++++++++++++++++++----------- 1 files changed, 100 insertions(+), 38 deletions(-) Each shared frame maintains a reverse map of <domain, gfn> tuples, so we know which tuples this shared frame is backing. This is useful for auditing and sanity-checking, and necessary to update all relevant p2m entries when sharing. The reverse map is maintained as a doubly linked list, but the interface is open-coded throughout the mem_sharing.c subsystem. Bury it inside a level of abstraction, so it can later support different (more scalable) rmap implementations. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r 796b523346ac -r 5965a38564fc xen/arch/x86/mm/mem_sharing.c --- a/xen/arch/x86/mm/mem_sharing.c +++ b/xen/arch/x86/mm/mem_sharing.c @@ -69,7 +69,8 @@ static inline void audit_add_list(struct spin_unlock(&shr_audit_lock); } -static inline void audit_del_list(struct page_info *page) +/* Removes from the audit list and cleans up the page sharing metadata. */ +static inline void page_sharing_dispose(struct page_info *page) { spin_lock(&shr_audit_lock); list_del_rcu(&page->sharing->entry); @@ -86,7 +87,7 @@ int mem_sharing_audit(void) } #define audit_add_list(p) ((void)0) -static inline void audit_del_list(struct page_info *page) +static inline void page_sharing_dispose(struct page_info *page) { xfree(page->sharing); } @@ -143,6 +144,7 @@ static inline shr_handle_t get_next_hand static atomic_t nr_saved_mfns = ATOMIC_INIT(0); static atomic_t nr_shared_mfns = ATOMIC_INIT(0); +/** Reverse map **/ typedef struct gfn_info { unsigned long gfn; @@ -150,15 +152,77 @@ typedef struct gfn_info struct list_head list; } gfn_info_t; -/* Returns true if list has only one entry. O(1) complexity. */ -static inline int list_has_one_entry(struct list_head *head) +static inline void +rmap_init(struct page_info *page) { + INIT_LIST_HEAD(&page->sharing->gfns); +} + +static inline void +rmap_del(gfn_info_t *gfn_info, struct page_info *page) +{ + list_del(&gfn_info->list); +} + +static inline void +rmap_add(gfn_info_t *gfn_info, struct page_info *page) +{ + INIT_LIST_HEAD(&gfn_info->list); + list_add(&gfn_info->list, &page->sharing->gfns); +} + +static inline gfn_info_t * +rmap_retrieve(uint16_t domain_id, unsigned long gfn, + struct page_info *page) +{ + gfn_info_t *gfn_info; + struct list_head *le; + list_for_each(le, &page->sharing->gfns) + { + gfn_info = list_entry(le, gfn_info_t, list); + if ( (gfn_info->gfn == gfn) && (gfn_info->domain == domain_id) ) + return gfn_info; + } + return NULL; +} + +/* Returns true if the rmap has only one entry. O(1) complexity. */ +static inline int rmap_has_one_entry(struct page_info *page) +{ + struct list_head *head = &page->sharing->gfns; return (head->next != head) && (head->next->next == head); } -static inline gfn_info_t *gfn_get_info(struct list_head *list) +/* Returns true if the rmap has any entries. O(1) complexity. */ +static inline int rmap_has_entries(struct page_info *page) { - return list_entry(list->next, gfn_info_t, list); + return (!list_empty(&page->sharing->gfns)); +} + +/* The iterator hides the details of how the rmap is implemented. This + * involves splitting the list_for_each_safe macro into two steps. */ +struct rmap_iterator { + struct list_head *curr; + struct list_head *next; +}; + +static inline void +rmap_seed_iterator(struct page_info *page, struct rmap_iterator *ri) +{ + ri->curr = &page->sharing->gfns; + ri->next = ri->curr->next; +} + +static inline gfn_info_t * +rmap_iterate(struct page_info *page, struct rmap_iterator *ri) +{ + if ( ri->next == &page->sharing->gfns) + return NULL; + + ri->curr = ri->next; + ri->next = ri->curr->next; + + return list_entry(ri->curr, gfn_info_t, list); } static inline gfn_info_t *mem_sharing_gfn_alloc(struct page_info *page, @@ -172,8 +236,8 @@ static inline gfn_info_t *mem_sharing_gf gfn_info->gfn = gfn; gfn_info->domain = d->domain_id; - INIT_LIST_HEAD(&gfn_info->list); - list_add(&gfn_info->list, &page->sharing->gfns); + + rmap_add(gfn_info, page); /* Increment our number of shared pges. */ atomic_inc(&d->shr_pages); @@ -181,14 +245,15 @@ static inline gfn_info_t *mem_sharing_gf return gfn_info; } -static inline void mem_sharing_gfn_destroy(struct domain *d, +static inline void mem_sharing_gfn_destroy(struct page_info *page, + struct domain *d, gfn_info_t *gfn_info) { /* Decrement the number of pages. */ atomic_dec(&d->shr_pages); /* Free the gfn_info structure. */ - list_del(&gfn_info->list); + rmap_del(gfn_info, page); xfree(gfn_info); } @@ -230,8 +295,9 @@ int mem_sharing_audit(void) struct page_sharing_info *pg_shared_info; unsigned long nr_gfns = 0; struct page_info *pg; - struct list_head *le; mfn_t mfn; + gfn_info_t *g; + struct rmap_iterator ri; pg_shared_info = list_entry(ae, struct page_sharing_info, entry); pg = pg_shared_info->pg; @@ -272,7 +338,7 @@ int mem_sharing_audit(void) } /* Check we have a list */ - if ( (!pg->sharing) || (list_empty(&pg->sharing->gfns)) ) + if ( (!pg->sharing) || !rmap_has_entries(pg) ) { MEM_SHARING_DEBUG("mfn %lx shared, but empty gfn list!\n", mfn_x(mfn)); @@ -284,14 +350,13 @@ int mem_sharing_audit(void) count_found++; /* Check if all GFNs map to the MFN, and the p2m types */ - list_for_each(le, &pg->sharing->gfns) + rmap_seed_iterator(pg, &ri); + while ( (g = rmap_iterate(pg, &ri)) != NULL ) { struct domain *d; p2m_type_t t; mfn_t o_mfn; - gfn_info_t *g; - g = list_entry(le, gfn_info_t, list); d = get_domain_by_id(g->domain); if ( d == NULL ) { @@ -677,7 +742,7 @@ int mem_sharing_nominate_page(struct dom goto out; } page->sharing->pg = page; - INIT_LIST_HEAD(&page->sharing->gfns); + rmap_init(page); /* Create the handle */ page->sharing->handle = get_next_handle(); @@ -698,7 +763,7 @@ int mem_sharing_nominate_page(struct dom * it a few lines above. * The mfn needs to revert back to rw type. This should never fail, * since no-one knew that the mfn was temporarily sharable */ - mem_sharing_gfn_destroy(d, gfn_info); + mem_sharing_gfn_destroy(page, d, gfn_info); xfree(page->sharing); page->sharing = NULL; /* NOTE: We haven''t yet added this to the audit list. */ @@ -726,13 +791,13 @@ int mem_sharing_share_pages(struct domai struct domain *cd, unsigned long cgfn, shr_handle_t ch) { struct page_info *spage, *cpage, *firstpg, *secondpg; - struct list_head *le, *te; gfn_info_t *gfn; struct domain *d; int ret = -EINVAL; mfn_t smfn, cmfn; p2m_type_t smfn_type, cmfn_type; struct two_gfns tg; + struct rmap_iterator ri; get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn, cd, cgfn, &cmfn_type, NULL, &cmfn, @@ -799,15 +864,15 @@ int mem_sharing_share_pages(struct domai } /* Merge the lists together */ - list_for_each_safe(le, te, &cpage->sharing->gfns) + rmap_seed_iterator(cpage, &ri); + while ( (gfn = rmap_iterate(cpage, &ri)) != NULL) { - gfn = list_entry(le, gfn_info_t, list); /* Get the source page and type, this should never fail: * we are under shr lock, and got a successful lookup */ BUG_ON(!get_page_and_type(spage, dom_cow, PGT_shared_page)); /* Move the gfn_info from client list to source list */ - list_del(&gfn->list); - list_add(&gfn->list, &spage->sharing->gfns); + rmap_del(gfn, cpage); + rmap_add(gfn, spage); put_page_and_type(cpage); d = get_domain_by_id(gfn->domain); BUG_ON(!d); @@ -817,7 +882,7 @@ int mem_sharing_share_pages(struct domai ASSERT(list_empty(&cpage->sharing->gfns)); /* Clear the rest of the shared state */ - audit_del_list(cpage); + page_sharing_dispose(cpage); cpage->sharing = NULL; mem_sharing_page_unlock(secondpg); @@ -887,7 +952,7 @@ int mem_sharing_add_to_physmap(struct do if ( !ret ) { ret = -ENOENT; - mem_sharing_gfn_destroy(cd, gfn_info); + mem_sharing_gfn_destroy(spage, cd, gfn_info); put_page_and_type(spage); } else { ret = 0; @@ -929,7 +994,6 @@ int __mem_sharing_unshare_page(struct do void *s, *t; int last_gfn; gfn_info_t *gfn_info = NULL; - struct list_head *le; mfn = get_gfn(d, gfn, &p2mt); @@ -947,37 +1011,35 @@ int __mem_sharing_unshare_page(struct do BUG(); } - list_for_each(le, &page->sharing->gfns) + gfn_info = rmap_retrieve(d->domain_id, gfn, page); + if ( unlikely(gfn_info == NULL) ) { - gfn_info = list_entry(le, gfn_info_t, list); - if ( (gfn_info->gfn == gfn) && (gfn_info->domain == d->domain_id) ) - goto gfn_found; + gdprintk(XENLOG_ERR, "Could not find gfn_info for shared gfn: " + "%lx\n", gfn); + BUG(); } - gdprintk(XENLOG_ERR, "Could not find gfn_info for shared gfn: " - "%lx\n", gfn); - BUG(); -gfn_found: /* Do the accounting first. If anything fails below, we have bigger * bigger fish to fry. First, remove the gfn from the list. */ - last_gfn = list_has_one_entry(&page->sharing->gfns); + last_gfn = rmap_has_one_entry(page); if ( last_gfn ) { /* Clean up shared state. Get rid of the <domid, gfn> tuple * before destroying the rmap. */ - mem_sharing_gfn_destroy(d, gfn_info); - audit_del_list(page); + mem_sharing_gfn_destroy(page, d, gfn_info); + page_sharing_dispose(page); page->sharing = NULL; atomic_dec(&nr_shared_mfns); } else atomic_dec(&nr_saved_mfns); + /* If the GFN is getting destroyed drop the references to MFN * (possibly freeing the page), and exit early */ if ( flags & MEM_SHARING_DESTROY_GFN ) { if ( !last_gfn ) - mem_sharing_gfn_destroy(d, gfn_info); + mem_sharing_gfn_destroy(page, d, gfn_info); put_page_and_type(page); mem_sharing_page_unlock(page); if ( last_gfn && @@ -1013,7 +1075,7 @@ gfn_found: unmap_domain_page(t); BUG_ON(set_shared_p2m_entry(d, gfn, page_to_mfn(page)) == 0); - mem_sharing_gfn_destroy(d, gfn_info); + mem_sharing_gfn_destroy(old_page, d, gfn_info); mem_sharing_page_unlock(old_page); put_page_and_type(old_page);
Andres Lagar-Cavilla
2012-Apr-24 19:48 UTC
[PATCH 3 of 3] x86/mem_sharing: For shared pages with many references, use a hash table instead of a list
xen/arch/x86/mm/mem_sharing.c | 170 +++++++++++++++++++++++++++++++++++-- xen/include/asm-x86/mem_sharing.h | 13 ++- 2 files changed, 169 insertions(+), 14 deletions(-) For shared frames that have many references, the doubly-linked list used to store the rmap results in costly scans during unshare operations. To alleviate the overhead, replace the linked list by a hash table. However, hash tables are space-intensive, so only use them for pages that have "many" (arbitrary threshold) references. Unsharing is heaviliy exercised during domain destroy. In experimental testing, for a domain that points over 100 thousand pages to the same shared frame, domain destruction dropped from over 7 minutes(!) to less than two seconds. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r 5965a38564fc -r 8a6f6d38cb84 xen/arch/x86/mm/mem_sharing.c --- a/xen/arch/x86/mm/mem_sharing.c +++ b/xen/arch/x86/mm/mem_sharing.c @@ -49,6 +49,17 @@ DEFINE_PER_CPU(pg_lock_data_t, __pld); #define MEM_SHARING_DEBUG(_f, _a...) \ debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__, ##_a) +/* Reverse map defines */ +#define RMAP_HASHTAB_ORDER 0 +#define RMAP_HASHTAB_SIZE \ + ((PAGE_SIZE << RMAP_HASHTAB_ORDER) / sizeof(struct list_head)) +#define RMAP_USES_HASHTAB(page) \ + ((page)->sharing->hash_table.flag == NULL) +#define RMAP_HEAVY_SHARED_PAGE RMAP_HASHTAB_SIZE +/* A bit of hysteresis. We don''t want to be mutating between list and hash + * table constantly. */ +#define RMAP_LIGHT_SHARED_PAGE (RMAP_HEAVY_SHARED_PAGE >> 2) + #if MEM_SHARING_AUDIT static struct list_head shr_audit_list; @@ -72,6 +83,11 @@ static inline void audit_add_list(struct /* Removes from the audit list and cleans up the page sharing metadata. */ static inline void page_sharing_dispose(struct page_info *page) { + /* Unlikely given our thresholds, but we should be careful. */ + if ( unlikely(RMAP_USES_HASHTAB(page)) ) + free_xenheap_pages(page->sharing->hash_table.bucket, + RMAP_HASHTAB_ORDER); + spin_lock(&shr_audit_lock); list_del_rcu(&page->sharing->entry); spin_unlock(&shr_audit_lock); @@ -89,6 +105,10 @@ int mem_sharing_audit(void) #define audit_add_list(p) ((void)0) static inline void page_sharing_dispose(struct page_info *page) { + /* Unlikely given our thresholds, but we should be careful. */ + if ( unlikely(RMAP_USES_HASHTAB(page)) ) + free_xenheap_pages(page->sharing->hash_table.bucket, + RMAP_HASHTAB_ORDER); xfree(page->sharing); } @@ -145,6 +165,11 @@ static atomic_t nr_saved_mfns = ATOMIC static atomic_t nr_shared_mfns = ATOMIC_INIT(0); /** Reverse map **/ +/* Every shared frame keeps a reverse map (rmap) of <domain, gfn> tuples that + * this shared frame backs. For pages with a low degree of sharing, a O(n) + * search linked list is good enough. For pages with higher degree of sharing, + * we use a hash table instead. */ + typedef struct gfn_info { unsigned long gfn; @@ -155,20 +180,109 @@ typedef struct gfn_info static inline void rmap_init(struct page_info *page) { + /* We always start off as a doubly linked list. */ INIT_LIST_HEAD(&page->sharing->gfns); } +/* Exceedingly simple "hash function" */ +#define HASH(domain, gfn) \ + (((gfn) + (domain)) % RMAP_HASHTAB_SIZE) + +/* Conversions. Tuned by the thresholds. Should only happen twice + * (once each) during the lifetime of a shared page */ +static inline int +rmap_list_to_hash_table(struct page_info *page) +{ + unsigned int i; + struct list_head *pos, *tmp, *b + alloc_xenheap_pages(RMAP_HASHTAB_ORDER, 0); + + if ( b == NULL ) + return -ENOMEM; + + for (i = 0; i < RMAP_HASHTAB_SIZE; i++) + INIT_LIST_HEAD(b + i); + + list_for_each_safe(pos, tmp, &page->sharing->gfns) + { + gfn_info_t *gfn_info = list_entry(pos, gfn_info_t, list); + struct list_head *bucket = b + HASH(gfn_info->domain, gfn_info->gfn); + list_del(pos); + list_add(pos, bucket); + } + + page->sharing->hash_table.bucket = b; + page->sharing->hash_table.flag = NULL; + + return 0; +} + static inline void -rmap_del(gfn_info_t *gfn_info, struct page_info *page) +rmap_hash_table_to_list(struct page_info *page) { + unsigned int i; + struct list_head *bucket = page->sharing->hash_table.bucket; + + INIT_LIST_HEAD(&page->sharing->gfns); + + for (i = 0; i < RMAP_HASHTAB_SIZE; i++) + { + struct list_head *pos, *tmp, *head = bucket + i; + list_for_each_safe(pos, tmp, head) + { + list_del(pos); + list_add(pos, &page->sharing->gfns); + } + } + + free_xenheap_pages(bucket, RMAP_HASHTAB_ORDER); +} + +/* Generic accessors to the rmap */ +static inline unsigned long +rmap_count(struct page_info *pg) +{ + unsigned long count; + unsigned long t = read_atomic(&pg->u.inuse.type_info); + count = t & PGT_count_mask; + if ( t & PGT_locked ) + count--; + return count; +} + +/* The page type count is always decreased after removing from the rmap. + * Use a convert flag to avoid mutating the rmap if in the middle of an + * iterator, or if the page will be soon destroyed anyways. */ +static inline void +rmap_del(gfn_info_t *gfn_info, struct page_info *page, int convert) +{ + if ( RMAP_USES_HASHTAB(page) && convert && + (rmap_count(page) <= RMAP_LIGHT_SHARED_PAGE) ) + rmap_hash_table_to_list(page); + + /* Regardless of rmap type, same removal operation */ list_del(&gfn_info->list); } +/* The page type count is always increased before adding to the rmap. */ static inline void rmap_add(gfn_info_t *gfn_info, struct page_info *page) { + struct list_head *head; + + if ( !RMAP_USES_HASHTAB(page) && + (rmap_count(page) >= RMAP_HEAVY_SHARED_PAGE) ) + /* The conversion may fail with ENOMEM. We''ll be less efficient, + * but no reason to panic. */ + (void)rmap_list_to_hash_table(page); + + head = (RMAP_USES_HASHTAB(page)) ? + page->sharing->hash_table.bucket + + HASH(gfn_info->domain, gfn_info->gfn) : + &page->sharing->gfns; + INIT_LIST_HEAD(&gfn_info->list); - list_add(&gfn_info->list, &page->sharing->gfns); + list_add(&gfn_info->list, head); } static inline gfn_info_t * @@ -176,27 +290,33 @@ rmap_retrieve(uint16_t domain_id, unsign struct page_info *page) { gfn_info_t *gfn_info; - struct list_head *le; - list_for_each(le, &page->sharing->gfns) + struct list_head *le, *head; + + head = (RMAP_USES_HASHTAB(page)) ? + page->sharing->hash_table.bucket + HASH(domain_id, gfn) : + &page->sharing->gfns; + + list_for_each(le, head) { gfn_info = list_entry(le, gfn_info_t, list); if ( (gfn_info->gfn == gfn) && (gfn_info->domain == domain_id) ) return gfn_info; } + + /* Nothing was found */ return NULL; } /* Returns true if the rmap has only one entry. O(1) complexity. */ static inline int rmap_has_one_entry(struct page_info *page) { - struct list_head *head = &page->sharing->gfns; - return (head->next != head) && (head->next->next == head); + return (rmap_count(page) == 1); } /* Returns true if the rmap has any entries. O(1) complexity. */ static inline int rmap_has_entries(struct page_info *page) { - return (!list_empty(&page->sharing->gfns)); + return (rmap_count(page) != 0); } /* The iterator hides the details of how the rmap is implemented. This @@ -204,20 +324,43 @@ static inline int rmap_has_entries(struc struct rmap_iterator { struct list_head *curr; struct list_head *next; + unsigned int bucket; }; static inline void rmap_seed_iterator(struct page_info *page, struct rmap_iterator *ri) { - ri->curr = &page->sharing->gfns; + ri->curr = (RMAP_USES_HASHTAB(page)) ? + page->sharing->hash_table.bucket : + &page->sharing->gfns; ri->next = ri->curr->next; + ri->bucket = 0; } static inline gfn_info_t * rmap_iterate(struct page_info *page, struct rmap_iterator *ri) { - if ( ri->next == &page->sharing->gfns) - return NULL; + struct list_head *head = (RMAP_USES_HASHTAB(page)) ? + page->sharing->hash_table.bucket + ri->bucket : + &page->sharing->gfns; + +retry: + if ( ri->next == head) + { + if ( RMAP_USES_HASHTAB(page) ) + { + ri->bucket++; + if ( ri->bucket >= RMAP_HASHTAB_SIZE ) + /* No more hash table buckets */ + return NULL; + head = page->sharing->hash_table.bucket + ri->bucket; + ri->curr = head; + ri->next = ri->curr->next; + goto retry; + } else + /* List exhausted */ + return NULL; + } ri->curr = ri->next; ri->next = ri->curr->next; @@ -253,7 +396,7 @@ static inline void mem_sharing_gfn_destr atomic_dec(&d->shr_pages); /* Free the gfn_info structure. */ - rmap_del(gfn_info, page); + rmap_del(gfn_info, page, 1); xfree(gfn_info); } @@ -870,8 +1013,9 @@ int mem_sharing_share_pages(struct domai /* Get the source page and type, this should never fail: * we are under shr lock, and got a successful lookup */ BUG_ON(!get_page_and_type(spage, dom_cow, PGT_shared_page)); - /* Move the gfn_info from client list to source list */ - rmap_del(gfn, cpage); + /* Move the gfn_info from client list to source list. + * Don''t change the type of rmap for the client page. */ + rmap_del(gfn, cpage, 0); rmap_add(gfn, spage); put_page_and_type(cpage); d = get_domain_by_id(gfn->domain); diff -r 5965a38564fc -r 8a6f6d38cb84 xen/include/asm-x86/mem_sharing.h --- a/xen/include/asm-x86/mem_sharing.h +++ b/xen/include/asm-x86/mem_sharing.h @@ -30,6 +30,13 @@ typedef uint64_t shr_handle_t; +typedef struct rmap_hashtab { + struct list_head *bucket; + /* Overlaps with prev pointer of list_head in union below. + * Unlike the prev pointer, this can be NULL. */ + void *flag; +} rmap_hashtab_t; + struct page_sharing_info { struct page_info *pg; /* Back pointer to the page. */ @@ -38,7 +45,11 @@ struct page_sharing_info struct list_head entry; /* List of all shared pages (entry). */ struct rcu_head rcu_head; /* List of all shared pages (entry). */ #endif - struct list_head gfns; /* List of domains and gfns for this page (head). */ + /* Reverse map of <domain,gfn> tuples for this shared frame. */ + union { + struct list_head gfns; + rmap_hashtab_t hash_table; + }; }; #ifdef __x86_64__
Tim Deegan
2012-Apr-26 09:12 UTC
Re: [PATCH 0 of 3] x86/mem_sharing: Improve performance of rmap, fix cascading bugs
At 15:48 -0400 on 24 Apr (1335282500), Andres Lagar-Cavilla wrote:> This is a repost of patches 2 and 3 of the series initially posted on Apr 12th. > > The first patch has been split into two functionally isolated patches as per > Tim Deegan''s request.Applied, thanks. Tim.