Jan Beulich
2009-Sep-21 12:12 UTC
[Xen-devel] [PATCH 7/7] introduce write-once optimization
This one is likely the most questionable part: Realizing that the compiler can do a much better job in terms of CSE when the variables needed are constant, this introduces and extension to the read-mostly variables, allowing variables to be marked as being written just during boot initialization. There is, however, quite a bit of care required, as it must be ensured that the compiler will never mix uses of the "real" and the "aliased" variable within a function, and that the compiler never cache the contents of the read-only representation across those function calls that lead to initialization through the writable representation. The concept therefore is to have two mechanisms for initialization: Either through marking a whole translation unit as being initialization only (and hence only ever seeing the writable representation), or by making sure that in a particular initialization function it is only the writable representation that gets accessed (even when wanting to read its value). For that latter method, I wasn''t able to think of a mechanism which would enforce this, which is the reason for the need to be really careful. The benefit of the whole construct is that for some of the converted variables the number of accesses to these variables goes down by over 50%, including cases where entire inline function expansions (accessing several of these variables) can be eliminated by CSE. Signed-off-by: Jan Beulich <jbeulich@novell.com> --- 2009-09-10.orig/xen/arch/x86/mm.c 2009-09-17 12:08:27.000000000 +0200 +++ 2009-09-10/xen/arch/x86/mm.c 2009-09-17 14:37:46.000000000 +0200 @@ -138,12 +138,12 @@ l1_pgentry_t __attribute__ ((__section__ struct domain *dom_xen, *dom_io; /* Frame table size in pages. */ -unsigned long max_page; -unsigned long total_pages; +DEFINE_WRITE_ONCE(unsigned long, max_page, 0); +DEFINE_WRITE_ONCE(unsigned long, total_pages, 0); -unsigned long __read_mostly pdx_group_valid[BITS_TO_LONGS( +DEFINE_WRITE_ONCE(unsigned long[BITS_TO_LONGS( (FRAMETABLE_SIZE / sizeof(*frame_table) + PDX_GROUP_COUNT - 1) - / PDX_GROUP_COUNT)] = { [0] = 1 }; + / PDX_GROUP_COUNT)], pdx_group_valid, { [0] = 1 }); #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT) --- 2009-09-10.orig/xen/arch/x86/setup.c 2009-09-17 12:05:47.000000000 +0200 +++ 2009-09-10/xen/arch/x86/setup.c 2009-09-17 13:51:21.000000000 +0200 @@ -1,3 +1,4 @@ +#include <xen/no-write-once.h> #include <xen/config.h> #include <xen/init.h> #include <xen/lib.h> --- 2009-09-10.orig/xen/arch/x86/x86_64/mm.c 2009-09-17 15:54:05.000000000 +0200 +++ 2009-09-10/xen/arch/x86/x86_64/mm.c 2009-09-17 16:17:23.000000000 +0200 @@ -34,13 +34,13 @@ #include <public/memory.h> /* Parameters for PFN/MADDR compression. */ -unsigned long __read_mostly max_pdx; -unsigned long __read_mostly pfn_pdx_bottom_mask = ~0UL; -unsigned long __read_mostly ma_va_bottom_mask = ~0UL; -unsigned long __read_mostly pfn_top_mask = 0; -unsigned long __read_mostly ma_top_mask = 0; -unsigned long __read_mostly pfn_hole_mask = 0; -unsigned int __read_mostly pfn_pdx_hole_shift = 0; +DEFINE_WRITE_ONCE(unsigned long, max_pdx, 0); +DEFINE_WRITE_ONCE(unsigned long, pfn_pdx_bottom_mask, ~0UL); +DEFINE_WRITE_ONCE(unsigned long, ma_va_bottom_mask, ~0UL); +DEFINE_WRITE_ONCE(unsigned long, pfn_top_mask, 0); +DEFINE_WRITE_ONCE(unsigned long, ma_top_mask, 0); +DEFINE_WRITE_ONCE(unsigned long, pfn_hole_mask, 0); +DEFINE_WRITE_ONCE(unsigned int, pfn_pdx_hole_shift, 0); #ifdef CONFIG_COMPAT unsigned int m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START; @@ -183,12 +183,14 @@ void __init pfn_pdx_hole_setup(unsigned printk(KERN_INFO "PFN compression on bits %u...%u\n", bottom_shift, bottom_shift + hole_shift - 1); - pfn_pdx_hole_shift = hole_shift; - pfn_pdx_bottom_mask = (1UL << bottom_shift) - 1; - ma_va_bottom_mask = (PAGE_SIZE << bottom_shift) - 1; - pfn_hole_mask = ((1UL << hole_shift) - 1) << bottom_shift; - pfn_top_mask = ~(pfn_pdx_bottom_mask | pfn_hole_mask); - ma_top_mask = pfn_top_mask << PAGE_SHIFT; + write_once(pfn_pdx_hole_shift) = hole_shift; + write_once(pfn_pdx_bottom_mask) = (1UL << bottom_shift) - 1; + write_once(ma_va_bottom_mask) = (PAGE_SIZE << bottom_shift) - 1; + write_once(pfn_hole_mask) = ((1UL << hole_shift) - 1) + << bottom_shift; + write_once(pfn_top_mask) = ~(write_once(pfn_pdx_bottom_mask) + | write_once(pfn_hole_mask)); + write_once(ma_top_mask) = write_once(pfn_top_mask) << PAGE_SHIFT; } void __init paging_init(void) --- 2009-09-10.orig/xen/arch/x86/xen.lds.S 2009-09-17 16:14:56.000000000 +0200 +++ 2009-09-10/xen/arch/x86/xen.lds.S 2009-09-17 14:38:19.000000000 +0200 @@ -59,6 +59,7 @@ SECTIONS . = ALIGN(128); .data.read_mostly : { + *(.data.write_once) *(.data.read_mostly) } :text --- 2009-09-10.orig/xen/include/asm-x86/mm.h 2009-09-17 15:35:03.000000000 +0200 +++ 2009-09-10/xen/include/asm-x86/mm.h 2009-09-17 15:36:21.000000000 +0200 @@ -4,6 +4,7 @@ #include <xen/config.h> #include <xen/list.h> +#include <xen/write-once.h> #include <asm/io.h> #include <asm/uaccess.h> @@ -259,13 +260,13 @@ extern void share_xen_page_with_privileg struct page_info *page, int readonly); #define frame_table ((struct page_info *)FRAMETABLE_VIRT_START) -extern unsigned long max_page; -extern unsigned long total_pages; +extern DECLARE_WRITE_ONCE(unsigned long, max_page); +extern DECLARE_WRITE_ONCE(unsigned long, total_pages); void init_frametable(void); #define PDX_GROUP_COUNT ((1 << L2_PAGETABLE_SHIFT) / \ (sizeof(*frame_table) & -sizeof(*frame_table))) -extern unsigned long pdx_group_valid[]; +extern DECLARE_WRITE_ONCE(unsigned long[], pdx_group_valid); /* Convert between Xen-heap virtual addresses and page-info structures. */ static inline struct page_info *__virt_to_page(const void *v) --- 2009-09-10.orig/xen/include/asm-x86/x86_64/page.h 2009-09-17 15:31:15.000000000 +0200 +++ 2009-09-10/xen/include/asm-x86/x86_64/page.h 2009-09-17 15:37:28.000000000 +0200 @@ -30,16 +30,19 @@ #ifndef __ASSEMBLY__ #include <xen/config.h> +#include <xen/write-once.h> #include <asm/types.h> /* Physical address where Xen was relocated to. */ extern unsigned long xen_phys_start; -extern unsigned long max_pdx; -extern unsigned long pfn_pdx_bottom_mask, ma_va_bottom_mask; -extern unsigned int pfn_pdx_hole_shift; -extern unsigned long pfn_hole_mask; -extern unsigned long pfn_top_mask, ma_top_mask; +extern DECLARE_WRITE_ONCE(unsigned long, max_pdx); +extern DECLARE_WRITE_ONCE(unsigned long, pfn_pdx_bottom_mask); +extern DECLARE_WRITE_ONCE(unsigned long, ma_va_bottom_mask); +extern DECLARE_WRITE_ONCE(unsigned int, pfn_pdx_hole_shift); +extern DECLARE_WRITE_ONCE(unsigned long, pfn_hole_mask); +extern DECLARE_WRITE_ONCE(unsigned long, pfn_top_mask); +extern DECLARE_WRITE_ONCE(unsigned long, ma_top_mask); extern void pfn_pdx_hole_setup(unsigned long); #define page_to_pdx(pg) ((pg) - frame_table) @@ -53,7 +56,7 @@ extern void pfn_pdx_hole_setup(unsigned #define pdx_to_virt(pdx) ((void *)(DIRECTMAP_VIRT_START + \ ((unsigned long)(pdx) << PAGE_SHIFT))) -extern int __mfn_valid(unsigned long mfn); +extern __attribute_write_once__ int __mfn_valid(unsigned long mfn); static inline unsigned long pfn_to_pdx(unsigned long pfn) { --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2009-09-10/xen/include/xen/no-write-once.h 2009-09-17 15:18:29.000000000 +0200 @@ -0,0 +1,11 @@ +#ifdef __XEN_WRITE_ONCE_H__ +#error Must not include xen/no-write-once.h after xen/write-once.h. +#endif +#ifndef __XEN_NO_WRITE_ONCE_H__ +#define __XEN_NO_WRITE_ONCE_H__ + +#define DECLARE_WRITE_ONCE(type, name) __typeof__(type) name + +#define __attribute_write_once__ + +#endif /* __XEN_NO_WRITE_ONCE_H__ */ --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2009-09-10/xen/include/xen/write-once.h 2009-09-17 16:12:21.000000000 +0200 @@ -0,0 +1,15 @@ +#if !defined(__XEN_WRITE_ONCE_H__) && !defined(__XEN_NO_WRITE_ONCE_H__) +#define __XEN_WRITE_ONCE_H__ + +#define DECLARE_WRITE_ONCE(type, name) const __typeof__(type) name + +#define DEFINE_WRITE_ONCE(type, name, ...) static __typeof__(type) \ + __attribute_used__ __attribute__((__section__(".data.write_once"))) \ + __write_once_##name __asm__(#name) = __VA_ARGS__; \ + __asm__(".globl " #name) + +#define write_once(name) __write_once_##name + +#define __attribute_write_once__ __attribute_const__ + +#endif /* __XEN_WRITE_ONCE_H__ */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Sep-21 12:41 UTC
Re: [Xen-devel] [PATCH 7/7] introduce write-once optimization
On 21/09/2009 13:12, "Jan Beulich" <JBeulich@novell.com> wrote:> This one is likely the most questionable part: Realizing that the > compiler can do a much better job in terms of CSE when the variables > needed are constant, this introduces and extension to the read-mostly > variables, allowing variables to be marked as being written just > during boot initialization.Cunning, but this one is a bit too nasty for my taste. The performance wins would have to be measurable and significant. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Sep-22 08:27 UTC
Re: [Xen-devel] [PATCH 7/7] introduce write-once optimization
On 21/09/2009 13:12, "Jan Beulich" <JBeulich@novell.com> wrote:> This one is likely the most questionable part: Realizing that the > compiler can do a much better job in terms of CSE when the variables > needed are constant, this introduces and extension to the read-mostly > variables, allowing variables to be marked as being written just > during boot initialization.How much of a win overall was this? The main concern comes from mixing the two aliased variables within a single linkage unit, where it will be hard to know how clever the compiler might be with inlining and inter-function optimisations. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2009-Sep-22 08:48 UTC
Re: [Xen-devel] [PATCH 7/7] introduce write-once optimization
>>> Keir Fraser <keir.fraser@eu.citrix.com> 22.09.09 10:27 >>> >On 21/09/2009 13:12, "Jan Beulich" <JBeulich@novell.com> wrote: > >> This one is likely the most questionable part: Realizing that the >> compiler can do a much better job in terms of CSE when the variables >> needed are constant, this introduces and extension to the read-mostly >> variables, allowing variables to be marked as being written just >> during boot initialization. > >How much of a win overall was this? The main concern comes from mixing the >two aliased variables within a single linkage unit, where it will be hard to >know how clever the compiler might be with inlining and inter-function >optimisations.About 8k (5k compressed) code size reduction, I think. I didn''t measure performance. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel