Dan Magenheimer
2011-Jun-15 21:59 UTC
[Xen-devel] [PATCH] [linux] xen: tmem: frontswap-tmemonly
(Resending, just noticed I left xen-devel off the list.) Hi Konrad (and any others interested in reviewing) -- This patch provides Linux kernel changes to drivers/xen/tmem.c (and drivers/xen build files) for the Xen tmem shim to support frontswap. It builds successfully even if the frontswap patchset is not present, so is suitable for merging in an upstream tree that has the cleancache patchset but does not yet have the frontswap patchset. Note that minor variations of this code have been posted and working with Xen tmem for 2-1/2 years, but the code was left behind when cleancache was split off to "divide and conquer" for upstream submission. I believe this patch supercedes the patch proposed in the xen-devel thread titled: [PATCH] xen-tmem: Build cleancache shim to Xen Transcendent Memory only if Cleancache is enabled For your convenience, the patch is also attached and in the following git branch: git://git.kernel.org/pub/scm/linux/kernel/git/djm/tmem.git#stable/frontswap-tmemonly (since 3.0-rc1) drivers/xen/Kconfig | 7 ++ drivers/xen/Makefile | 2 +- drivers/xen/tmem.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 164 insertions(+), 13 deletions(-) Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com> Thanks, Dan diff -Napur linux-3.0-rc1/drivers/xen/Kconfig linux-3.0-rc1-frontswap-tmemonly/drivers/xen/Kconfig --- linux-3.0-rc1/drivers/xen/Kconfig 2011-05-29 18:43:36.000000000 -0600 +++ linux-3.0-rc1-frontswap-tmemonly/drivers/xen/Kconfig 2011-06-14 10:44:20.091857809 -0600 @@ -105,4 +105,11 @@ config SWIOTLB_XEN depends on PCI select SWIOTLB +config XEN_TMEM + bool + default y if (CLEANCACHE || FRONTSWAP) + help + Shim to interface in-kernel Transcendent Memory hooks + (e.g. cleancache and frontswap) to Xen tmem hypercalls. + endmenu diff -Napur linux-3.0-rc1/drivers/xen/Makefile linux-3.0-rc1-frontswap-tmemonly/drivers/xen/Makefile --- linux-3.0-rc1/drivers/xen/Makefile 2011-05-29 18:43:36.000000000 -0600 +++ linux-3.0-rc1-frontswap-tmemonly/drivers/xen/Makefile 2011-06-14 10:41:16.508856401 -0600 @@ -1,6 +1,5 @@ obj-y += grant-table.o features.o events.o manage.o balloon.o obj-y += xenbus/ -obj-y += tmem.o nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_features.o := $(nostackp) @@ -15,6 +14,7 @@ obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o +obj-$(CONFIG_XEN_TMEM) += tmem.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o diff -Napur linux-3.0-rc1/drivers/xen/tmem.c linux-3.0-rc1-frontswap-tmemonly/drivers/xen/tmem.c --- linux-3.0-rc1/drivers/xen/tmem.c 2011-05-29 18:43:36.000000000 -0600 +++ linux-3.0-rc1-frontswap-tmemonly/drivers/xen/tmem.c 2011-06-14 16:34:23.487854177 -0600 @@ -1,7 +1,7 @@ /* * Xen implementation for transcendent memory (tmem) * - * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. + * Copyright (C) 2009-2011 Oracle Corp. All rights reserved. * Author: Dan Magenheimer */ @@ -9,8 +9,14 @@ #include <linux/types.h> #include <linux/init.h> #include <linux/pagemap.h> +#include <linux/module.h> #include <linux/cleancache.h> +/* temporary ifdef until include/linux/frontswap.h is upstream */ +#ifdef CONFIG_FRONTSWAP +#include <linux/frontswap.h> +#endif + #include <xen/xen.h> #include <xen/interface/xen.h> #include <asm/xen/hypercall.h> @@ -122,14 +128,8 @@ static int xen_tmem_flush_object(u32 poo return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); } -static int xen_tmem_destroy_pool(u32 pool_id) -{ - struct tmem_oid oid = { { 0 } }; - - return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); -} - -int tmem_enabled; +int tmem_enabled __read_mostly; +EXPORT_SYMBOL(tmem_enabled); static int __init enable_tmem(char *s) { @@ -139,6 +139,14 @@ static int __init enable_tmem(char *s) __setup("tmem", enable_tmem); +#ifdef CONFIG_CLEANCACHE +static int xen_tmem_destroy_pool(u32 pool_id) +{ + struct tmem_oid oid = { { 0 } }; + + return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); +} + /* cleancache ops */ static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, @@ -240,18 +248,154 @@ static struct cleancache_ops tmem_cleanc .init_shared_fs = tmem_cleancache_init_shared_fs, .init_fs = tmem_cleancache_init_fs }; +#endif -static int __init xen_tmem_init(void) +#ifdef CONFIG_FRONTSWAP +/* frontswap tmem operations */ + +/* a single tmem poolid is used for all frontswap "types" (swapfiles) */ +static int tmem_frontswap_poolid; + +/* + * Swizzling increases objects per swaptype, increasing tmem concurrency + * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS + */ +#define SWIZ_BITS 4 +#define SWIZ_MASK ((1 << SWIZ_BITS) - 1) +#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) +#define iswiz(_ind) (_ind >> SWIZ_BITS) + +static inline struct tmem_oid oswiz(unsigned type, u32 ind) +{ + struct tmem_oid oid = { .oid = { 0 } }; + oid.oid[0] = _oswiz(type, ind); + return oid; +} + +/* returns 0 if the page was successfully put into frontswap, -1 if not */ +static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, + struct page *page) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + unsigned long pfn = page_to_pfn(page); + int pool = tmem_frontswap_poolid; + int ret; + + if (pool < 0) + return -1; + if (ind64 != ind) + return -1; + mb(); /* ensure page is quiescent; tmem may address it with an alias */ + ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn); + /* translate Xen tmem return values to linux semantics */ + if (ret == 1) + return 0; + else + return -1; +} + +/* returns 0 if the page was successfully gotten from frontswap, -1 if + * was not present (should never happen!) */ +static int tmem_frontswap_get_page(unsigned type, pgoff_t offset, + struct page *page) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + unsigned long pfn = page_to_pfn(page); + int pool = tmem_frontswap_poolid; + int ret; + + if (pool < 0) + return -1; + if (ind64 != ind) + return -1; + ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn); + /* translate Xen tmem return values to linux semantics */ + if (ret == 1) + return 0; + else + return -1; +} + +/* flush a single page from frontswap */ +static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + int pool = tmem_frontswap_poolid; + + if (pool < 0) + return; + if (ind64 != ind) + return; + (void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind)); +} + +/* flush all pages from the passed swaptype */ +static void tmem_frontswap_flush_area(unsigned type) +{ + int pool = tmem_frontswap_poolid; + int ind; + + if (pool < 0) + return; + for (ind = SWIZ_MASK; ind >= 0; ind--) + (void)xen_tmem_flush_object(pool, oswiz(type, ind)); +} + +static void tmem_frontswap_init(unsigned ignored) { - struct cleancache_ops old_ops; + struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID; + + /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ + if (tmem_frontswap_poolid < 0) + tmem_frontswap_poolid + xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE); +} + +static int use_frontswap = 1; + +static int __init no_frontswap(char *s) +{ + use_frontswap = 0; + return 1; +} +__setup("nofrontswap", no_frontswap); + +static struct frontswap_ops tmem_frontswap_ops = { + .put_page = tmem_frontswap_put_page, + .get_page = tmem_frontswap_get_page, + .flush_page = tmem_frontswap_flush_page, + .flush_area = tmem_frontswap_flush_area, + .init = tmem_frontswap_init +}; +#endif + +static int __init xen_tmem_init(void) +{ if (!xen_domain()) return 0; +#ifdef CONFIG_FRONTSWAP + if (tmem_enabled && use_frontswap) { + char *s = ""; + struct frontswap_ops old_ops + frontswap_register_ops(&tmem_frontswap_ops); + + tmem_frontswap_poolid = -1; + if (old_ops.init != NULL) + s = " (WARNING: frontswap_ops overridden)"; + printk(KERN_INFO "frontswap enabled, RAM provided by " + "Xen Transcendent Memory\n"); + } +#endif #ifdef CONFIG_CLEANCACHE BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); if (tmem_enabled && use_cleancache) { char *s = ""; - old_ops = cleancache_register_ops(&tmem_cleancache_ops); + struct cleancache_ops old_ops + cleancache_register_ops(&tmem_cleancache_ops); if (old_ops.init_fs != NULL) s = " (WARNING: cleancache_ops overridden)"; printk(KERN_INFO "cleancache enabled, RAM provided by " _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Jun-16 18:38 UTC
Re: [Xen-devel] [PATCH] [linux] xen: tmem: frontswap-tmemonly
On Wed, Jun 15, 2011 at 02:59:33PM -0700, Dan Magenheimer wrote:> (Resending, just noticed I left xen-devel off the list.) > > Hi Konrad (and any others interested in reviewing) -- > > This patch provides Linux kernel changes to drivers/xen/tmem.c > (and drivers/xen build files) for the Xen tmem shim to support > frontswap. It builds successfully even if the frontswap patchset > is not present, so is suitable for merging in an upstream > tree that has the cleancache patchset but does not yet have > the frontswap patchset.This is a quite small file.. This is it?> > Note that minor variations of this code have been posted and > working with Xen tmem for 2-1/2 years, but the code was left > behind when cleancache was split off to "divide and conquer" > for upstream submission. > > I believe this patch supercedes the patch proposed in the > xen-devel thread titled: > > [PATCH] xen-tmem: Build cleancache shim to Xen Transcendent Memory > only if Cleancache is enabled > > For your convenience, the patch is also attached and in the > following git branch: > > git://git.kernel.org/pub/scm/linux/kernel/git/djm/tmem.git#stable/frontswap-tmemonly > > (since 3.0-rc1) > > drivers/xen/Kconfig | 7 ++ > drivers/xen/Makefile | 2 +- > drivers/xen/tmem.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++---- > 3 files changed, 164 insertions(+), 13 deletions(-) > > Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com> > > Thanks, > Dan > > diff -Napur linux-3.0-rc1/drivers/xen/Kconfig linux-3.0-rc1-frontswap-tmemonly/drivers/xen/Kconfig > --- linux-3.0-rc1/drivers/xen/Kconfig 2011-05-29 18:43:36.000000000 -0600 > +++ linux-3.0-rc1-frontswap-tmemonly/drivers/xen/Kconfig 2011-06-14 10:44:20.091857809 -0600 > @@ -105,4 +105,11 @@ config SWIOTLB_XEN > depends on PCI > select SWIOTLB > > +config XEN_TMEM > + bool > + default y if (CLEANCACHE || FRONTSWAP) > + help > + Shim to interface in-kernel Transcendent Memory hooks > + (e.g. cleancache and frontswap) to Xen tmem hypercalls. > + > endmenu > diff -Napur linux-3.0-rc1/drivers/xen/Makefile linux-3.0-rc1-frontswap-tmemonly/drivers/xen/Makefile > --- linux-3.0-rc1/drivers/xen/Makefile 2011-05-29 18:43:36.000000000 -0600 > +++ linux-3.0-rc1-frontswap-tmemonly/drivers/xen/Makefile 2011-06-14 10:41:16.508856401 -0600 > @@ -1,6 +1,5 @@ > obj-y += grant-table.o features.o events.o manage.o balloon.o > obj-y += xenbus/ > -obj-y += tmem.o > > nostackp := $(call cc-option, -fno-stack-protector) > CFLAGS_features.o := $(nostackp) > @@ -15,6 +14,7 @@ obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen > obj-$(CONFIG_XENFS) += xenfs/ > obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o > obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o > +obj-$(CONFIG_XEN_TMEM) += tmem.o > obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o > obj-$(CONFIG_XEN_DOM0) += pci.o > > diff -Napur linux-3.0-rc1/drivers/xen/tmem.c linux-3.0-rc1-frontswap-tmemonly/drivers/xen/tmem.c > --- linux-3.0-rc1/drivers/xen/tmem.c 2011-05-29 18:43:36.000000000 -0600 > +++ linux-3.0-rc1-frontswap-tmemonly/drivers/xen/tmem.c 2011-06-14 16:34:23.487854177 -0600 > @@ -1,7 +1,7 @@ > /* > * Xen implementation for transcendent memory (tmem) > * > - * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. > + * Copyright (C) 2009-2011 Oracle Corp. All rights reserved. > * Author: Dan Magenheimer > */ > > @@ -9,8 +9,14 @@ > #include <linux/types.h> > #include <linux/init.h> > #include <linux/pagemap.h> > +#include <linux/module.h> > #include <linux/cleancache.h> > > +/* temporary ifdef until include/linux/frontswap.h is upstream */ > +#ifdef CONFIG_FRONTSWAP > +#include <linux/frontswap.h> > +#endif > + > #include <xen/xen.h> > #include <xen/interface/xen.h> > #include <asm/xen/hypercall.h> > @@ -122,14 +128,8 @@ static int xen_tmem_flush_object(u32 poo > return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); > } > > -static int xen_tmem_destroy_pool(u32 pool_id) > -{ > - struct tmem_oid oid = { { 0 } }; > - > - return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); > -} > - > -int tmem_enabled; > +int tmem_enabled __read_mostly; > +EXPORT_SYMBOL(tmem_enabled); > > static int __init enable_tmem(char *s) > { > @@ -139,6 +139,14 @@ static int __init enable_tmem(char *s) > > __setup("tmem", enable_tmem); > > +#ifdef CONFIG_CLEANCACHE > +static int xen_tmem_destroy_pool(u32 pool_id) > +{ > + struct tmem_oid oid = { { 0 } }; > + > + return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); > +} > + > /* cleancache ops */ > > static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, > @@ -240,18 +248,154 @@ static struct cleancache_ops tmem_cleanc > .init_shared_fs = tmem_cleancache_init_shared_fs, > .init_fs = tmem_cleancache_init_fs > }; > +#endif > > -static int __init xen_tmem_init(void) > +#ifdef CONFIG_FRONTSWAP > +/* frontswap tmem operations */ > + > +/* a single tmem poolid is used for all frontswap "types" (swapfiles) */ > +static int tmem_frontswap_poolid; > + > +/* > + * Swizzling increases objects per swaptype, increasing tmem concurrency > + * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS > + */ > +#define SWIZ_BITS 4 > +#define SWIZ_MASK ((1 << SWIZ_BITS) - 1) > +#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) > +#define iswiz(_ind) (_ind >> SWIZ_BITS) > + > +static inline struct tmem_oid oswiz(unsigned type, u32 ind) > +{ > + struct tmem_oid oid = { .oid = { 0 } }; > + oid.oid[0] = _oswiz(type, ind); > + return oid; > +} > + > +/* returns 0 if the page was successfully put into frontswap, -1 if not */ > +static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, > + struct page *page)Hm, not aligned?> +{ > + u64 ind64 = (u64)offset; > + u32 ind = (u32)offset; > + unsigned long pfn = page_to_pfn(page); > + int pool = tmem_frontswap_poolid; > + int ret; > + > + if (pool < 0) > + return -1; > + if (ind64 != ind) > + return -1; > + mb(); /* ensure page is quiescent; tmem may address it with an alias */ > + ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn); > + /* translate Xen tmem return values to linux semantics */ > + if (ret == 1) > + return 0; > + else > + return -1;This could be just : return (ret == 1) ? 0 : -1;> +} > + > +/* returns 0 if the page was successfully gotten from frontswap, -1 if > + * was not present (should never happen!) */ > +static int tmem_frontswap_get_page(unsigned type, pgoff_t offset, > + struct page *page)Ditto> +{ > + u64 ind64 = (u64)offset; > + u32 ind = (u32)offset; > + unsigned long pfn = page_to_pfn(page); > + int pool = tmem_frontswap_poolid; > + int ret; > + > + if (pool < 0) > + return -1; > + if (ind64 != ind) > + return -1;This looks to repeat itself in the previous function. You might want to make this a macro. Or a short inline function in the header file.> + ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn); > + /* translate Xen tmem return values to linux semantics */ > + if (ret == 1) > + return 0; > + else > + return -1;Ditto.> +} > + > +/* flush a single page from frontswap */ > +static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset) > +{ > + u64 ind64 = (u64)offset; > + u32 ind = (u32)offset; > + int pool = tmem_frontswap_poolid; > + > + if (pool < 0) > + return; > + if (ind64 != ind) > + return; > + (void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind)); > +} > + > +/* flush all pages from the passed swaptype */ > +static void tmem_frontswap_flush_area(unsigned type) > +{ > + int pool = tmem_frontswap_poolid; > + int ind; > + > + if (pool < 0) > + return; > + for (ind = SWIZ_MASK; ind >= 0; ind--) > + (void)xen_tmem_flush_object(pool, oswiz(type, ind)); > +} > + > +static void tmem_frontswap_init(unsigned ignored) > { > - struct cleancache_ops old_ops; > + struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID; > + > + /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ > + if (tmem_frontswap_poolid < 0) > + tmem_frontswap_poolid > + xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE); > +} > + > +static int use_frontswap = 1;You could save some precious bytes by making this __initdata Also the naming is different. You have tmem_enabled. How about frontswap_enabled instead?> + > +static int __init no_frontswap(char *s) > +{ > + use_frontswap = 0; > + return 1;This looks like you are using spaces instead of tabs. But it could be the editor of mine mugling things up.> +} > > +__setup("nofrontswap", no_frontswap); > + > +static struct frontswap_ops tmem_frontswap_ops = { > + .put_page = tmem_frontswap_put_page, > + .get_page = tmem_frontswap_get_page, > + .flush_page = tmem_frontswap_flush_page, > + .flush_area = tmem_frontswap_flush_area, > + .init = tmem_frontswap_initDitto.> +}; > +#endif > + > +static int __init xen_tmem_init(void) > +{ > if (!xen_domain()) > return 0; > +#ifdef CONFIG_FRONTSWAP > + if (tmem_enabled && use_frontswap) { > + char *s = ""; > + struct frontswap_ops old_ops > + frontswap_register_ops(&tmem_frontswap_ops); > + > + tmem_frontswap_poolid = -1; > + if (old_ops.init != NULL) > + s = " (WARNING: frontswap_ops overridden)"; > + printk(KERN_INFO "frontswap enabled, RAM provided by " > + "Xen Transcendent Memory\n"); > + } > +#endif > #ifdef CONFIG_CLEANCACHE > BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); > if (tmem_enabled && use_cleancache) { > char *s = ""; > - old_ops = cleancache_register_ops(&tmem_cleancache_ops); > + struct cleancache_ops old_ops > + cleancache_register_ops(&tmem_cleancache_ops); > if (old_ops.init_fs != NULL) > s = " (WARNING: cleancache_ops overridden)"; > printk(KERN_INFO "cleancache enabled, RAM provided by " >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2011-Jun-16 21:17 UTC
RE: [Xen-devel] [PATCH] [linux] xen: tmem: frontswap-tmemonly
> From: Konrad Rzeszutek WilkCan I add a "Reviewed-by:"? Or will this get a S-O-B from you-as-maintainer anyway?> This is a quite small file.. This is it?Thanks for the review! Yes, this file is just a shim translating kernel-ese to Xen-tmem-ese. I realize I do need to add a tmem.h file (with one line of code in it) to surface the exported tmem_enabled.> This looks like you are using spaces instead of tabs. But it > could be the editor of mine mugling things up. > : > Hm, not aligned?These must both be your editor (or maybe my mailer if you are editing the inline version), they are fine in the patch and in git.> This could be just : return (ret == 1) ? 0 : -1;I''m inclined to spell this out as it is. IMHO, at least in this case, clarity trumps brevity. It is consistent with the rest of the file too. If you feel strongly though, let me know.> > + if (pool < 0) > > + return -1; > > + if (ind64 != ind) > > + return -1; > > This looks to repeat itself in the previous function. You might want to > make this a macro. Or a short inline function in the header fileSame here. I think a macro will obfuscate this code, and only will remove a few lines. Again, if you feel strongly...> > +static int use_frontswap = 1; > > You could save some precious bytes by making this __initdata > Also the naming is different. You have tmem_enabled. How about > frontswap_enabled instead?OK, will fix. V2 to be posted soon. Thanks, Dan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel