Hi all,
I have a basic question about memory sharing.
What is the intended purpose of the hashtable / handle abstraction for
page sharing?
I understand that it''s a nice to associate a handle with a
particularly revision of a shared page, but I don''t see why
it''s
necessary given that whoever is calling nominate() must be aware of
the contents of the page (post-nomination) and how many times it has
been nominated.
Also, with a lot of shared pages, it seems like you could easily end
up with buckets in the hashtable that are thousands of pages deep
(i.e. consider 4GB shared on a large machine, each bucket will be 1000
handles on average). When this happens, it seems like it will
generate a lot of unnecessary memory accesses to share and unshare
pages (esp. since there is a single lock held during scanning!).
Anyways, I''ve got a not-yet-prime-time patch that removes the
hashtable and just uses mfns. In true form, I''ve probably missed
something critical with the handle approach, so I''d love some feedback
on this before investing effort in testing and scalability comparison.
The patch essentially embeds the necessary extra bit of information
in the struct page_info for shared pages (without growing the
structure).
Thanks! (patch inline below and attached)
Cheers,
-Adin
(inline patch follows)
diff -r 38135be750ed xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4230,12 +4230,19 @@ int page_make_sharable(struct domain *d,
struct page_info *page,
int expected_refcnt)
{
+ struct list_head *shared_gfns = NULL;
+
+ if((shared_gfns = xmalloc(struct list_head)) == NULL)
+ return -ENOMEM;
+ INIT_LIST_HEAD(shared_gfns);
+
spin_lock(&d->page_alloc_lock);
/* Change page type and count atomically */
if ( !get_page_and_type(page, d, PGT_shared_page) )
{
spin_unlock(&d->page_alloc_lock);
+ xfree(shared_gfns);
return -EINVAL;
}
@@ -4244,6 +4251,7 @@ int page_make_sharable(struct domain *d,
{
put_page_and_type(page);
spin_unlock(&d->page_alloc_lock);
+ xfree(shared_gfns);
return -EEXIST;
}
@@ -4254,12 +4262,14 @@ int page_make_sharable(struct domain *d,
/* Return type count back to zero */
put_page_and_type(page);
spin_unlock(&d->page_alloc_lock);
+ xfree(shared_gfns);
return -E2BIG;
}
page_set_owner(page, dom_cow);
d->tot_pages--;
page_list_del(page, &d->page_list);
+ page->shared_gfns = shared_gfns;
spin_unlock(&d->page_alloc_lock);
return 0;
}
@@ -4280,6 +4290,9 @@ int page_make_private(struct domain *d,
return -EEXIST;
}
+ /* Free the shared page info */
+ xfree(page->shared_gfns);
+
/* Drop the final typecount */
put_page_and_type(page);
diff -r 38135be750ed xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -39,8 +39,8 @@
#if MEM_SHARING_AUDIT
static void mem_sharing_audit(void);
-#define MEM_SHARING_DEBUG(_f, _a...) \
- debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__, ##_a)
+#define MEM_SHARING_DEBUG(_f, _a...) \
+ debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__,
##_a)
#else
# define mem_sharing_audit() do {} while(0)
#endif /* MEM_SHARING_AUDIT */
@@ -55,19 +55,7 @@ static void mem_sharing_audit(void);
#undef page_to_mfn
#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
-static shr_handle_t next_handle = 1;
-static atomic_t nr_saved_mfns = ATOMIC_INIT(0);
-
-typedef struct shr_hash_entry
-{
- shr_handle_t handle;
- mfn_t mfn;
- struct shr_hash_entry *next;
- struct list_head gfns;
-} shr_hash_entry_t;
-
-#define SHR_HASH_LENGTH 1000
-static shr_hash_entry_t *shr_hash[SHR_HASH_LENGTH];
+static atomic_t nr_saved_mfns = ATOMIC_INIT(0);
typedef struct gfn_info
{
@@ -84,28 +72,9 @@ static inline int list_has_one_entry(str
return (head->next != head) && (head->next->next == head);
}
-static inline struct gfn_info* gfn_get_info(struct list_head *list)
+static inline gfn_info_t* gfn_get_info(struct list_head *list)
{
- return list_entry(list->next, struct gfn_info, list);
-}
-
-static void __init mem_sharing_hash_init(void)
-{
- int i;
-
- mm_lock_init(&shr_lock);
- for(i=0; i<SHR_HASH_LENGTH; i++)
- shr_hash[i] = NULL;
-}
-
-static shr_hash_entry_t *mem_sharing_hash_alloc(void)
-{
- return xmalloc(shr_hash_entry_t);
-}
-
-static void mem_sharing_hash_destroy(shr_hash_entry_t *e)
-{
- xfree(e);
+ return list_entry(list->next, gfn_info_t, list);
}
static gfn_info_t *mem_sharing_gfn_alloc(void)
@@ -130,125 +99,99 @@ static void mem_sharing_gfn_destroy(gfn_
xfree(gfn);
}
-static shr_hash_entry_t* mem_sharing_hash_lookup(shr_handle_t handle)
+static struct page_info* mem_sharing_lookup(shr_handle_t handle)
{
- shr_hash_entry_t *e;
-
- e = shr_hash[handle % SHR_HASH_LENGTH];
- while(e != NULL)
+ if( mfn_valid(_mfn(handle)) )
{
- if(e->handle == handle)
- return e;
- e = e->next;
+ struct page_info* page = mfn_to_page(_mfn(handle));
+ if( page_get_owner(page) == dom_cow )
+ return page;
}
return NULL;
}
-static shr_hash_entry_t* mem_sharing_hash_insert(shr_handle_t handle,
mfn_t mfn)
+#if MEM_SHARING_AUDIT
+static int mem_sharing_audit(void)
{
- shr_hash_entry_t *e, **ee;
-
- e = mem_sharing_hash_alloc();
- if(e == NULL) return NULL;
- e->handle = handle;
- e->mfn = mfn;
- ee = &shr_hash[handle % SHR_HASH_LENGTH];
- e->next = *ee;
- *ee = e;
- return e;
-}
-
-static void mem_sharing_hash_delete(shr_handle_t handle)
-{
- shr_hash_entry_t **pprev, *e;
-
- pprev = &shr_hash[handle % SHR_HASH_LENGTH];
- e = *pprev;
- while(e != NULL)
- {
- if(e->handle == handle)
- {
- *pprev = e->next;
- mem_sharing_hash_destroy(e);
- return;
- }
- pprev = &e->next;
- e = e->next;
- }
- printk("Could not find shr entry for handle
%"PRIx64"\n", handle);
- BUG();
-}
-
-#if MEM_SHARING_AUDIT
-static void mem_sharing_audit(void)
-{
- shr_hash_entry_t *e;
- struct list_head *le;
- gfn_info_t *g;
- int bucket;
- struct page_info *pg;
+ int errors = 0;
+ unsigned long count_found = 0;
+ unsigned long mfn;
ASSERT(shr_locked_by_me());
- for(bucket=0; bucket < SHR_HASH_LENGTH; bucket++)
+ for( mfn = 0; mfn < max_page; mfn++ )
{
- e = shr_hash[bucket];
- /* Loop over all shr_hash_entries */
- while(e != NULL)
+ unsigned long nr_gfns = 0;
+ struct page_info *pg;
+ struct list_head *le;
+ gfn_info_t *g;
+
+ if( !mfn_valid(_mfn(mfn)) )
+ continue;
+
+ pg = mfn_to_page(_mfn(mfn));
+
+ /* Check if the MFN has correct type, owner and handle. */
+ if((pg->u.inuse.type_info & PGT_type_mask) != PGT_shared_page)
+ continue;
+
+ /* Count this page. */
+ count_found++;
+
+ /* Check the page owner. */
+ if(page_get_owner(pg) != dom_cow)
{
- int nr_gfns=0;
+ MEM_SHARING_DEBUG("mfn %lx shared, but wrong owner
(%d)!\n",
+ mfn, page_get_owner(pg)->domain_id);
+ errors++;
+ }
- /* Check if the MFN has correct type, owner and handle */
- pg = mfn_to_page(e->mfn);
- if((pg->u.inuse.type_info & PGT_type_mask) !=
PGT_shared_page)
- MEM_SHARING_DEBUG("mfn %lx not shared, but in the
hash!\n",
- mfn_x(e->mfn));
- if(page_get_owner(pg) != dom_cow)
- MEM_SHARING_DEBUG("mfn %lx shared, but wrong owner
(%d)!\n",
- mfn_x(e->mfn),
- page_get_owner(pg)->domain_id);
- if(e->handle != pg->shr_handle)
- MEM_SHARING_DEBUG("mfn %lx shared, but wrong handle "
- "(%ld != %ld)!\n",
- mfn_x(e->mfn), pg->shr_handle,
e->handle);
- /* Check if all GFNs map to the MFN, and the p2m types */
- list_for_each(le, &e->gfns)
+ /* Check if all GFNs map to the MFN, and the p2m types */
+ list_for_each(le, pg->shared_gfns)
+ {
+ struct domain *d;
+ p2m_type_t t;
+ mfn_t o_mfn;
+
+ g = list_entry(le, gfn_info_t, list);
+ d = get_domain_by_id(g->domain);
+ if(d == NULL)
{
- struct domain *d;
- p2m_type_t t;
- mfn_t mfn;
+ MEM_SHARING_DEBUG("Unknown dom: %d, for PFN=%lx,
MFN=%lx\n",
+ g->domain, g->gfn, mfn);
+ errors++;
+ continue;
+ }
+ o_mfn = gfn_to_mfn(d, g->gfn, &t);
+ if(mfn_x(o_mfn) != mfn)
+ {
+ MEM_SHARING_DEBUG("Incorrect P2M for d=%d, PFN=%lx."
+ "Expecting MFN=%ld, got %ld\n",
+ g->domain, g->gfn, mfn, mfn_x(o_mfn));
+ errors++;
+ }
+ if(t != p2m_ram_shared)
+ {
+ MEM_SHARING_DEBUG("Incorrect P2M type for d=%d,
PFN=%lx."
+ "Expecting t=%d, got %d\n",
+ g->domain, g->gfn, mfn, p2m_ram_shared,
t);
+ errors++;
+ }
+ put_domain(d);
+ nr_gfns++;
+ }
- g = list_entry(le, struct gfn_info, list);
- d = get_domain_by_id(g->domain);
- if(d == NULL)
- {
- MEM_SHARING_DEBUG("Unknow dom: %d, for PFN=%lx,
MFN=%lx\n",
- g->domain, g->gfn, mfn_x(e->mfn));
- continue;
- }
- mfn = gfn_to_mfn(d, g->gfn, &t);
- if(mfn_x(mfn) != mfn_x(e->mfn))
- MEM_SHARING_DEBUG("Incorrect P2M for d=%d,
PFN=%lx."
- "Expecting MFN=%ld, got %ld\n",
- g->domain, g->gfn,
mfn_x(e->mfn),
- mfn_x(mfn));
- if(t != p2m_ram_shared)
- MEM_SHARING_DEBUG("Incorrect P2M type for d=%d,
PFN=%lx."
- "Expecting t=%d, got %d\n",
- g->domain, g->gfn,
mfn_x(e->mfn),
- p2m_ram_shared, t);
- put_domain(d);
- nr_gfns++;
- }
- if(nr_gfns != (pg->u.inuse.type_info & PGT_count_mask))
- MEM_SHARING_DEBUG("Mismatched counts for MFN=%lx."
- "nr_gfns in hash %d, in type_info
%d\n",
- mfn_x(e->mfn), nr_gfns,
- (pg->u.inuse.type_info &
PGT_count_mask));
- e = e->next;
+ if(nr_gfns != (pg->u.inuse.type_info & PGT_count_mask))
+ {
+ MEM_SHARING_DEBUG("Mismatched counts for MFN=%lx."
+ "nr_gfns in hash %d, in type_info
%d\n",
+ mfn, nr_gfns, (pg->u.inuse.type_info &
PGT_count_mask));
+ errors++;
}
}
+
+ return errors;
}
#endif
@@ -392,37 +335,6 @@ static int mem_sharing_gref_to_gfn(struc
return -2;
}
-/* Account for a GFN being shared/unshared.
- * When sharing this function needs to be called _before_ gfn lists are merged
- * together, but _after_ gfn is removed from the list when unsharing.
- */
-static int mem_sharing_gfn_account(struct gfn_info *gfn, int sharing)
-{
- struct domain *d;
-
- /* A) When sharing:
- * if the gfn being shared is in > 1 long list, its already been
- * accounted for
- * B) When unsharing:
- * if the list is longer than > 1, we don''t have to account
yet.
- */
- if(list_has_one_entry(&gfn->list))
- {
- d = get_domain_by_id(gfn->domain);
- BUG_ON(!d);
- if(sharing)
- atomic_inc(&d->shr_pages);
- else
- atomic_dec(&d->shr_pages);
- put_domain(d);
-
- return 1;
- }
- mem_sharing_audit();
-
- return 0;
-}
-
int mem_sharing_debug_gref(struct domain *d, grant_ref_t ref)
{
grant_entry_header_t *shah;
@@ -457,8 +369,6 @@ int mem_sharing_nominate_page(struct dom
mfn_t mfn;
struct page_info *page;
int ret;
- shr_handle_t handle;
- shr_hash_entry_t *hash_entry;
struct gfn_info *gfn_info;
*phandle = 0UL;
@@ -474,7 +384,7 @@ int mem_sharing_nominate_page(struct dom
/* Return the handle if the page is already shared */
page = mfn_to_page(mfn);
if (p2m_is_shared(p2mt)) {
- *phandle = page->shr_handle;
+ *phandle = mfn_x(mfn);
ret = 0;
goto out;
}
@@ -490,16 +400,8 @@ int mem_sharing_nominate_page(struct dom
/* Create the handle */
ret = -ENOMEM;
- handle = next_handle++;
- if((hash_entry = mem_sharing_hash_insert(handle, mfn)) == NULL)
- {
+ if((gfn_info = mem_sharing_gfn_alloc()) == NULL)
goto out;
- }
- if((gfn_info = mem_sharing_gfn_alloc()) == NULL)
- {
- mem_sharing_hash_destroy(hash_entry);
- goto out;
- }
/* Change the p2m type */
if(p2m_change_type(d, gfn, p2mt, p2m_ram_shared) != p2mt)
@@ -509,7 +411,6 @@ int mem_sharing_nominate_page(struct dom
* The mfn needs to revert back to rw type. This should never fail,
* since no-one knew that the mfn was temporarily sharable */
BUG_ON(page_make_private(d, page) != 0);
- mem_sharing_hash_destroy(hash_entry);
mem_sharing_gfn_destroy(gfn_info, 0);
goto out;
}
@@ -517,13 +418,12 @@ int mem_sharing_nominate_page(struct dom
/* Update m2p entry to SHARED_M2P_ENTRY */
set_gpfn_from_mfn(mfn_x(mfn), SHARED_M2P_ENTRY);
- INIT_LIST_HEAD(&hash_entry->gfns);
INIT_LIST_HEAD(&gfn_info->list);
- list_add(&gfn_info->list, &hash_entry->gfns);
+ list_add(&gfn_info->list, page->shared_gfns);
gfn_info->gfn = gfn;
gfn_info->domain = d->domain_id;
- page->shr_handle = handle;
- *phandle = handle;
+ *phandle = mfn_x(mfn);
+ atomic_inc(&d->shr_pages);
ret = 0;
@@ -534,29 +434,25 @@ out:
int mem_sharing_share_pages(shr_handle_t sh, shr_handle_t ch)
{
- shr_hash_entry_t *se, *ce;
struct page_info *spage, *cpage;
struct list_head *le, *te;
- struct gfn_info *gfn;
+ gfn_info_t *gfn;
struct domain *d;
int ret;
shr_lock();
ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID;
- se = mem_sharing_hash_lookup(sh);
- if(se == NULL) goto err_out;
+ spage = mem_sharing_lookup(sh);
+ if(spage == NULL) goto err_out;
ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID;
- ce = mem_sharing_hash_lookup(ch);
- if(ce == NULL) goto err_out;
- spage = mfn_to_page(se->mfn);
- cpage = mfn_to_page(ce->mfn);
- /* gfn lists always have at least one entry => save to call list_entry
*/
- mem_sharing_gfn_account(gfn_get_info(&ce->gfns), 1);
- mem_sharing_gfn_account(gfn_get_info(&se->gfns), 1);
- list_for_each_safe(le, te, &ce->gfns)
+ cpage = mem_sharing_lookup(ch);
+ if(cpage == NULL) goto err_out;
+
+ /* Merge the lists together */
+ list_for_each_safe(le, te, cpage->shared_gfns)
{
- gfn = list_entry(le, struct gfn_info, list);
+ gfn = list_entry(le, gfn_info_t, list);
/* Get the source page and type, this should never fail
* because we are under shr lock, and got non-null se */
BUG_ON(!get_page_and_type(spage, dom_cow, PGT_shared_page));
@@ -564,13 +460,12 @@ int mem_sharing_share_pages(shr_handle_t
list_del(&gfn->list);
d = get_domain_by_id(gfn->domain);
BUG_ON(!d);
- BUG_ON(set_shared_p2m_entry(d, gfn->gfn, se->mfn) == 0);
+ BUG_ON(set_shared_p2m_entry(d, gfn->gfn, _mfn(sh)) == 0);
put_domain(d);
- list_add(&gfn->list, &se->gfns);
+ list_add(&gfn->list, spage->shared_gfns);
put_page_and_type(cpage);
- }
- ASSERT(list_empty(&ce->gfns));
- mem_sharing_hash_delete(ch);
+ }
+ ASSERT(list_empty(cpage->shared_gfns));
atomic_inc(&nr_saved_mfns);
/* Free the client page */
if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
@@ -592,9 +487,7 @@ int mem_sharing_unshare_page(struct doma
struct page_info *page, *old_page;
void *s, *t;
int ret, last_gfn;
- shr_hash_entry_t *hash_entry;
- struct gfn_info *gfn_info = NULL;
- shr_handle_t handle;
+ gfn_info_t *gfn_info = NULL;
struct list_head *le;
shr_lock();
@@ -604,37 +497,40 @@ int mem_sharing_unshare_page(struct doma
mfn = gfn_to_mfn(d, gfn, &p2mt);
/* Has someone already unshared it? */
- if (!p2m_is_shared(p2mt)) {
+ if (!p2m_is_shared(p2mt))
+ {
shr_unlock();
return 0;
}
- page = mfn_to_page(mfn);
- handle = page->shr_handle;
-
- hash_entry = mem_sharing_hash_lookup(handle);
- list_for_each(le, &hash_entry->gfns)
+ page = mem_sharing_lookup((shr_handle_t)mfn_x(mfn));
+ if (page == NULL || page_get_owner(page) != dom_cow)
{
- gfn_info = list_entry(le, struct gfn_info, list);
+ printk("Domain p2m is shared, but page is not: %lx\n", gfn);
+ BUG();
+ }
+
+ list_for_each(le, page->shared_gfns)
+ {
+ gfn_info = list_entry(le, gfn_info_t, list);
if((gfn_info->gfn == gfn) && (gfn_info->domain ==
d->domain_id))
goto gfn_found;
}
printk("Could not find gfn_info for shared gfn: %lx\n", gfn);
BUG();
+
gfn_found:
/* Delete gfn_info from the list, but hold on to it, until we''ve
allocated
* memory to make a copy */
list_del(&gfn_info->list);
- last_gfn = list_empty(&hash_entry->gfns);
+ last_gfn = list_empty(page->shared_gfns);
/* If the GFN is getting destroyed drop the references to MFN
* (possibly freeing the page), and exit early */
if(flags & MEM_SHARING_DESTROY_GFN)
{
mem_sharing_gfn_destroy(gfn_info, !last_gfn);
- if(last_gfn)
- mem_sharing_hash_delete(handle);
- else
+ if(!last_gfn)
/* Even though we don''t allocate a private page, we have
to account
* for the MFN that originally backed this PFN. */
atomic_dec(&nr_saved_mfns);
@@ -656,7 +552,7 @@ gfn_found:
{
/* We''ve failed to obtain memory for private page. Need to
re-add the
* gfn_info to relevant list */
- list_add(&gfn_info->list, &hash_entry->gfns);
+ list_add(&gfn_info->list, page->shared_gfns);
shr_unlock();
return -ENOMEM;
}
@@ -673,9 +569,7 @@ gfn_found:
private_page_found:
/* We''ve got a private page, we can commit the gfn destruction */
mem_sharing_gfn_destroy(gfn_info, !last_gfn);
- if(last_gfn)
- mem_sharing_hash_delete(handle);
- else
+ if(!last_gfn)
atomic_dec(&nr_saved_mfns);
if(p2m_change_type(d, gfn, p2m_ram_shared, p2m_ram_rw) !@@ -786,6 +680,5 @@
int mem_sharing_domctl(struct domain *d,
void __init mem_sharing_init(void)
{
printk("Initing memory sharing.\n");
- mem_sharing_hash_init();
+ mm_lock_init(&shr_lock);
}
-
diff -r 38135be750ed xen/include/asm-x86/mem_sharing.h
--- a/xen/include/asm-x86/mem_sharing.h
+++ b/xen/include/asm-x86/mem_sharing.h
@@ -41,9 +41,10 @@ int mem_sharing_unshare_page(struct doma
int mem_sharing_sharing_resume(struct domain *d);
int mem_sharing_domctl(struct domain *d,
xen_domctl_mem_sharing_op_t *mec);
+
void mem_sharing_init(void);
-#else
+#else
#define mem_sharing_init() do { } while (0)
diff -r 38135be750ed xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -49,8 +49,15 @@ struct page_info
/* For non-pinnable single-page shadows, a higher entry that points
* at us. */
paddr_t up;
- /* For shared/sharable pages the sharing handle */
- uint64_t shr_handle;
+ /* For shared/sharable pages, we use a doubly-linked list
+ * of all the domains that map this page (w/ associated PFNs).
+ * Previously, this was a handle to a separate hashtable structure,
+ * however given that this is the only real information we need
+ * about a shared page, we can associate this directly with the
+ * page_info frame and use the mfn as the handle for usercode.
+ * This list is allocated and freed when a page is shared/unshared.
+ */
+ struct list_head *shared_gfns;
};
/* Reference count and various PGC_xxx flags and fields. */
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel