# HG changeset patch # User Tim Deegan <tim@xen.org> # Date 1353600936 0 # Node ID ffc59d2fc9980c660941eb24cbe827c1142d1fd0 # Parent 7b4449bdb980caee8efc498d5ea48f772331df2f arm: Tidy up flush_xen_dcache(). - Use a compile-time-constant check for whether we can safely flush just one cacheline. This reduces the common case from 28 instructions to three. - Pass an object to the macro, not a pointer, so we can detect attempts to flush arrays. - Decode CCSIDR correctly to get cacheline size. - Remove some redundant DSBs at the call sites. Signed-off-by: Tim Deegan <tim@xen.org> diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/mm.c --- a/xen/arch/arm/mm.c Thu Nov 22 10:47:58 2012 +0100 +++ b/xen/arch/arm/mm.c Thu Nov 22 16:15:36 2012 +0000 @@ -247,7 +247,7 @@ void __init setup_pagetables(unsigned lo /* Change pagetables to the copy in the relocated Xen */ boot_httbr = (unsigned long) xen_pgtable + phys_offset; - flush_xen_dcache_va(&boot_httbr); + flush_xen_dcache(boot_httbr); flush_xen_dcache_va_range((void*)dest_va, _end - _start); flush_xen_text_tlb(); diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/setup.c --- a/xen/arch/arm/setup.c Thu Nov 22 10:47:58 2012 +0100 +++ b/xen/arch/arm/setup.c Thu Nov 22 16:15:36 2012 +0000 @@ -175,6 +175,21 @@ static void __init setup_mm(unsigned lon end_boot_allocator(); } +size_t __read_mostly cacheline_bytes; + +/* Very early check of the CPU cache properties */ +void __init setup_cache(void) +{ + uint32_t ccsid; + + /* Read the cache size ID register for the level-0 data cache */ + WRITE_CP32(0, CSSELR); + ccsid = READ_CP32(CCSIDR); + + /* Low 3 bits are log2(cacheline size in words) - 2. */ + cacheline_bytes = 1U << (4 + (ccsid & 0x7)); +} + /* C entry point for boot CPU */ void __init start_xen(unsigned long boot_phys_offset, unsigned long arm_type, @@ -185,6 +200,8 @@ void __init start_xen(unsigned long boot size_t fdt_size; int cpus, i; + setup_cache(); + smp_clear_cpu_maps(); fdt = (void *)BOOT_MISC_VIRT_START diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/smpboot.c --- a/xen/arch/arm/smpboot.c Thu Nov 22 10:47:58 2012 +0100 +++ b/xen/arch/arm/smpboot.c Thu Nov 22 16:15:36 2012 +0000 @@ -118,8 +118,8 @@ make_cpus_ready(unsigned int max_cpus, u /* Tell the next CPU to get ready */ /* TODO: handle boards where CPUIDs are not contiguous */ *gate = i; - flush_xen_dcache_va(gate); - asm volatile("dsb; isb; sev"); + flush_xen_dcache(*gate); + asm volatile("isb; sev"); /* And wait for it to respond */ while ( ready_cpus < i ) smp_rmb(); @@ -217,8 +217,8 @@ int __cpu_up(unsigned int cpu) smp_up_cpu = cpu; /* we need to make sure that the change to smp_up_cpu is visible to * secondary cpus with D-cache off */ - flush_xen_dcache_va(&smp_up_cpu); - asm volatile("dsb; isb; sev"); + flush_xen_dcache(smp_up_cpu); + asm volatile("isb; sev"); while ( !cpu_online(cpu) ) { diff -r 7b4449bdb980 -r ffc59d2fc998 xen/include/asm-arm/page.h --- a/xen/include/asm-arm/page.h Thu Nov 22 10:47:58 2012 +0100 +++ b/xen/include/asm-arm/page.h Thu Nov 22 16:15:36 2012 +0000 @@ -248,13 +248,16 @@ static inline void write_pte(lpae_t *p, : : "r" (pte.bits), "r" (p) : "memory"); } +/* Architectural minimum cacheline size is 4 32-bit words. */ +#define MIN_CACHELINE_BYTES 16 +/* Actual cacheline size on the boot CPU. */ +extern size_t cacheline_bytes; /* Function for flushing medium-sized areas. * if ''range'' is large enough we might want to use model-specific * full-cache flushes. */ static inline void flush_xen_dcache_va_range(void *p, unsigned long size) { - int cacheline_bytes = READ_CP32(CCSIDR); void *end; dsb(); /* So the CPU issues all writes to the range */ for ( end = p + size; p < end; p += cacheline_bytes ) @@ -262,17 +265,13 @@ static inline void flush_xen_dcache_va_r dsb(); /* So we know the flushes happen before continuing */ } - /* Macro for flushing a single small item. The predicate is always * compile-time constant so this will compile down to 3 instructions in - * the common case. Make sure to call it with the correct type of - * pointer! */ -#define flush_xen_dcache_va(p) do { \ - int cacheline_bytes = READ_CP32(CCSIDR); \ - typeof(p) _p = (p); \ - if ( ((unsigned long)_p & ~(cacheline_bytes - 1)) != \ - (((unsigned long)_p + (sizeof *_p)) & ~(cacheline_bytes - 1)) ) \ - flush_xen_dcache_va_range(_p, sizeof *_p); \ + * the common case. */ +#define flush_xen_dcache(x) do { \ + typeof(x) *_p = &(x); \ + if ( sizeof(x) > MIN_CACHELINE_BYTES || sizeof(x) > alignof(x) ) \ + flush_xen_dcache_va_range(_p, sizeof(x)); \ else \ asm volatile ( \ "dsb;" /* Finish all earlier writes */ \ diff -r 7b4449bdb980 -r ffc59d2fc998 xen/include/xen/compiler.h --- a/xen/include/xen/compiler.h Thu Nov 22 10:47:58 2012 +0100 +++ b/xen/include/xen/compiler.h Thu Nov 22 16:15:36 2012 +0000 @@ -54,6 +54,10 @@ #define offsetof(a,b) __builtin_offsetof(a,b) +#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L +#define alignof __alignof__ +#endif + /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) \ BUILD_BUG_ON_ZERO(__builtin_types_compatible_p(typeof(a), typeof(&a[0])))
On Thu, 22 Nov 2012, Tim Deegan wrote:> # HG changeset patch > # User Tim Deegan <tim@xen.org> > # Date 1353600936 0 > # Node ID ffc59d2fc9980c660941eb24cbe827c1142d1fd0 > # Parent 7b4449bdb980caee8efc498d5ea48f772331df2f > arm: Tidy up flush_xen_dcache(). > > - Use a compile-time-constant check for whether we can safely flush > just one cacheline. This reduces the common case from 28 > instructions to three. > - Pass an object to the macro, not a pointer, so we can detect > attempts to flush arrays. > - Decode CCSIDR correctly to get cacheline size. > - Remove some redundant DSBs at the call sites. > > Signed-off-by: Tim Deegan <tim@xen.org>Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>> diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/mm.c > --- a/xen/arch/arm/mm.c Thu Nov 22 10:47:58 2012 +0100 > +++ b/xen/arch/arm/mm.c Thu Nov 22 16:15:36 2012 +0000 > @@ -247,7 +247,7 @@ void __init setup_pagetables(unsigned lo > > /* Change pagetables to the copy in the relocated Xen */ > boot_httbr = (unsigned long) xen_pgtable + phys_offset; > - flush_xen_dcache_va(&boot_httbr); > + flush_xen_dcache(boot_httbr); > flush_xen_dcache_va_range((void*)dest_va, _end - _start); > flush_xen_text_tlb(); > > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/setup.c > --- a/xen/arch/arm/setup.c Thu Nov 22 10:47:58 2012 +0100 > +++ b/xen/arch/arm/setup.c Thu Nov 22 16:15:36 2012 +0000 > @@ -175,6 +175,21 @@ static void __init setup_mm(unsigned lon > end_boot_allocator(); > } > > +size_t __read_mostly cacheline_bytes; > + > +/* Very early check of the CPU cache properties */ > +void __init setup_cache(void) > +{ > + uint32_t ccsid; > + > + /* Read the cache size ID register for the level-0 data cache */ > + WRITE_CP32(0, CSSELR); > + ccsid = READ_CP32(CCSIDR); > + > + /* Low 3 bits are log2(cacheline size in words) - 2. */ > + cacheline_bytes = 1U << (4 + (ccsid & 0x7)); > +} > + > /* C entry point for boot CPU */ > void __init start_xen(unsigned long boot_phys_offset, > unsigned long arm_type, > @@ -185,6 +200,8 @@ void __init start_xen(unsigned long boot > size_t fdt_size; > int cpus, i; > > + setup_cache(); > + > smp_clear_cpu_maps(); > > fdt = (void *)BOOT_MISC_VIRT_START > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/smpboot.c > --- a/xen/arch/arm/smpboot.c Thu Nov 22 10:47:58 2012 +0100 > +++ b/xen/arch/arm/smpboot.c Thu Nov 22 16:15:36 2012 +0000 > @@ -118,8 +118,8 @@ make_cpus_ready(unsigned int max_cpus, u > /* Tell the next CPU to get ready */ > /* TODO: handle boards where CPUIDs are not contiguous */ > *gate = i; > - flush_xen_dcache_va(gate); > - asm volatile("dsb; isb; sev"); > + flush_xen_dcache(*gate); > + asm volatile("isb; sev"); > /* And wait for it to respond */ > while ( ready_cpus < i ) > smp_rmb(); > @@ -217,8 +217,8 @@ int __cpu_up(unsigned int cpu) > smp_up_cpu = cpu; > /* we need to make sure that the change to smp_up_cpu is visible to > * secondary cpus with D-cache off */ > - flush_xen_dcache_va(&smp_up_cpu); > - asm volatile("dsb; isb; sev"); > + flush_xen_dcache(smp_up_cpu); > + asm volatile("isb; sev"); > > while ( !cpu_online(cpu) ) > { > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/include/asm-arm/page.h > --- a/xen/include/asm-arm/page.h Thu Nov 22 10:47:58 2012 +0100 > +++ b/xen/include/asm-arm/page.h Thu Nov 22 16:15:36 2012 +0000 > @@ -248,13 +248,16 @@ static inline void write_pte(lpae_t *p, > : : "r" (pte.bits), "r" (p) : "memory"); > } > > +/* Architectural minimum cacheline size is 4 32-bit words. */ > +#define MIN_CACHELINE_BYTES 16 > +/* Actual cacheline size on the boot CPU. */ > +extern size_t cacheline_bytes; > > /* Function for flushing medium-sized areas. > * if ''range'' is large enough we might want to use model-specific > * full-cache flushes. */ > static inline void flush_xen_dcache_va_range(void *p, unsigned long size) > { > - int cacheline_bytes = READ_CP32(CCSIDR); > void *end; > dsb(); /* So the CPU issues all writes to the range */ > for ( end = p + size; p < end; p += cacheline_bytes ) > @@ -262,17 +265,13 @@ static inline void flush_xen_dcache_va_r > dsb(); /* So we know the flushes happen before continuing */ > } > > - > /* Macro for flushing a single small item. The predicate is always > * compile-time constant so this will compile down to 3 instructions in > - * the common case. Make sure to call it with the correct type of > - * pointer! */ > -#define flush_xen_dcache_va(p) do { \ > - int cacheline_bytes = READ_CP32(CCSIDR); \ > - typeof(p) _p = (p); \ > - if ( ((unsigned long)_p & ~(cacheline_bytes - 1)) != \ > - (((unsigned long)_p + (sizeof *_p)) & ~(cacheline_bytes - 1)) ) \ > - flush_xen_dcache_va_range(_p, sizeof *_p); \ > + * the common case. */ > +#define flush_xen_dcache(x) do { \ > + typeof(x) *_p = &(x); \ > + if ( sizeof(x) > MIN_CACHELINE_BYTES || sizeof(x) > alignof(x) ) \ > + flush_xen_dcache_va_range(_p, sizeof(x)); \ > else \ > asm volatile ( \ > "dsb;" /* Finish all earlier writes */ \ > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/include/xen/compiler.h > --- a/xen/include/xen/compiler.h Thu Nov 22 10:47:58 2012 +0100 > +++ b/xen/include/xen/compiler.h Thu Nov 22 16:15:36 2012 +0000 > @@ -54,6 +54,10 @@ > > #define offsetof(a,b) __builtin_offsetof(a,b) > > +#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L > +#define alignof __alignof__ > +#endif > + > /* &a[0] degrades to a pointer: a different type from an array */ > #define __must_be_array(a) \ > BUILD_BUG_ON_ZERO(__builtin_types_compatible_p(typeof(a), typeof(&a[0]))) >
On Thu, 2012-11-22 at 17:16 +0000, Stefano Stabellini wrote:> On Thu, 22 Nov 2012, Tim Deegan wrote: > > # HG changeset patch > > # User Tim Deegan <tim@xen.org> > > # Date 1353600936 0 > > # Node ID ffc59d2fc9980c660941eb24cbe827c1142d1fd0 > > # Parent 7b4449bdb980caee8efc498d5ea48f772331df2f > > arm: Tidy up flush_xen_dcache(). > > > > - Use a compile-time-constant check for whether we can safely flush > > just one cacheline. This reduces the common case from 28 > > instructions to three. > > - Pass an object to the macro, not a pointer, so we can detect > > attempts to flush arrays. > > - Decode CCSIDR correctly to get cacheline size. > > - Remove some redundant DSBs at the call sites. > > > > Signed-off-by: Tim Deegan <tim@xen.org> > > Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>Applied. I wonder if flush_xen_dcache_va_range should be out of line?> > > > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/mm.c > > --- a/xen/arch/arm/mm.c Thu Nov 22 10:47:58 2012 +0100 > > +++ b/xen/arch/arm/mm.c Thu Nov 22 16:15:36 2012 +0000 > > @@ -247,7 +247,7 @@ void __init setup_pagetables(unsigned lo > > > > /* Change pagetables to the copy in the relocated Xen */ > > boot_httbr = (unsigned long) xen_pgtable + phys_offset; > > - flush_xen_dcache_va(&boot_httbr); > > + flush_xen_dcache(boot_httbr); > > flush_xen_dcache_va_range((void*)dest_va, _end - _start); > > flush_xen_text_tlb(); > > > > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/setup.c > > --- a/xen/arch/arm/setup.c Thu Nov 22 10:47:58 2012 +0100 > > +++ b/xen/arch/arm/setup.c Thu Nov 22 16:15:36 2012 +0000 > > @@ -175,6 +175,21 @@ static void __init setup_mm(unsigned lon > > end_boot_allocator(); > > } > > > > +size_t __read_mostly cacheline_bytes; > > + > > +/* Very early check of the CPU cache properties */ > > +void __init setup_cache(void) > > +{ > > + uint32_t ccsid; > > + > > + /* Read the cache size ID register for the level-0 data cache */ > > + WRITE_CP32(0, CSSELR); > > + ccsid = READ_CP32(CCSIDR); > > + > > + /* Low 3 bits are log2(cacheline size in words) - 2. */ > > + cacheline_bytes = 1U << (4 + (ccsid & 0x7)); > > +} > > + > > /* C entry point for boot CPU */ > > void __init start_xen(unsigned long boot_phys_offset, > > unsigned long arm_type, > > @@ -185,6 +200,8 @@ void __init start_xen(unsigned long boot > > size_t fdt_size; > > int cpus, i; > > > > + setup_cache(); > > + > > smp_clear_cpu_maps(); > > > > fdt = (void *)BOOT_MISC_VIRT_START > > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/smpboot.c > > --- a/xen/arch/arm/smpboot.c Thu Nov 22 10:47:58 2012 +0100 > > +++ b/xen/arch/arm/smpboot.c Thu Nov 22 16:15:36 2012 +0000 > > @@ -118,8 +118,8 @@ make_cpus_ready(unsigned int max_cpus, u > > /* Tell the next CPU to get ready */ > > /* TODO: handle boards where CPUIDs are not contiguous */ > > *gate = i; > > - flush_xen_dcache_va(gate); > > - asm volatile("dsb; isb; sev"); > > + flush_xen_dcache(*gate); > > + asm volatile("isb; sev"); > > /* And wait for it to respond */ > > while ( ready_cpus < i ) > > smp_rmb(); > > @@ -217,8 +217,8 @@ int __cpu_up(unsigned int cpu) > > smp_up_cpu = cpu; > > /* we need to make sure that the change to smp_up_cpu is visible to > > * secondary cpus with D-cache off */ > > - flush_xen_dcache_va(&smp_up_cpu); > > - asm volatile("dsb; isb; sev"); > > + flush_xen_dcache(smp_up_cpu); > > + asm volatile("isb; sev"); > > > > while ( !cpu_online(cpu) ) > > { > > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/include/asm-arm/page.h > > --- a/xen/include/asm-arm/page.h Thu Nov 22 10:47:58 2012 +0100 > > +++ b/xen/include/asm-arm/page.h Thu Nov 22 16:15:36 2012 +0000 > > @@ -248,13 +248,16 @@ static inline void write_pte(lpae_t *p, > > : : "r" (pte.bits), "r" (p) : "memory"); > > } > > > > +/* Architectural minimum cacheline size is 4 32-bit words. */ > > +#define MIN_CACHELINE_BYTES 16 > > +/* Actual cacheline size on the boot CPU. */ > > +extern size_t cacheline_bytes; > > > > /* Function for flushing medium-sized areas. > > * if ''range'' is large enough we might want to use model-specific > > * full-cache flushes. */ > > static inline void flush_xen_dcache_va_range(void *p, unsigned long size) > > { > > - int cacheline_bytes = READ_CP32(CCSIDR); > > void *end; > > dsb(); /* So the CPU issues all writes to the range */ > > for ( end = p + size; p < end; p += cacheline_bytes ) > > @@ -262,17 +265,13 @@ static inline void flush_xen_dcache_va_r > > dsb(); /* So we know the flushes happen before continuing */ > > } > > > > - > > /* Macro for flushing a single small item. The predicate is always > > * compile-time constant so this will compile down to 3 instructions in > > - * the common case. Make sure to call it with the correct type of > > - * pointer! */ > > -#define flush_xen_dcache_va(p) do { \ > > - int cacheline_bytes = READ_CP32(CCSIDR); \ > > - typeof(p) _p = (p); \ > > - if ( ((unsigned long)_p & ~(cacheline_bytes - 1)) != \ > > - (((unsigned long)_p + (sizeof *_p)) & ~(cacheline_bytes - 1)) ) \ > > - flush_xen_dcache_va_range(_p, sizeof *_p); \ > > + * the common case. */ > > +#define flush_xen_dcache(x) do { \ > > + typeof(x) *_p = &(x); \ > > + if ( sizeof(x) > MIN_CACHELINE_BYTES || sizeof(x) > alignof(x) ) \ > > + flush_xen_dcache_va_range(_p, sizeof(x)); \ > > else \ > > asm volatile ( \ > > "dsb;" /* Finish all earlier writes */ \ > > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/include/xen/compiler.h > > --- a/xen/include/xen/compiler.h Thu Nov 22 10:47:58 2012 +0100 > > +++ b/xen/include/xen/compiler.h Thu Nov 22 16:15:36 2012 +0000 > > @@ -54,6 +54,10 @@ > > > > #define offsetof(a,b) __builtin_offsetof(a,b) > > > > +#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L > > +#define alignof __alignof__ > > +#endif > > + > > /* &a[0] degrades to a pointer: a different type from an array */ > > #define __must_be_array(a) \ > > BUILD_BUG_ON_ZERO(__builtin_types_compatible_p(typeof(a), typeof(&a[0]))) > >