# HG changeset patch
# User Tim Deegan <tim@xen.org>
# Date 1353600936 0
# Node ID ffc59d2fc9980c660941eb24cbe827c1142d1fd0
# Parent 7b4449bdb980caee8efc498d5ea48f772331df2f
arm: Tidy up flush_xen_dcache().
- Use a compile-time-constant check for whether we can safely flush
just one cacheline. This reduces the common case from 28
instructions to three.
- Pass an object to the macro, not a pointer, so we can detect
attempts to flush arrays.
- Decode CCSIDR correctly to get cacheline size.
- Remove some redundant DSBs at the call sites.
Signed-off-by: Tim Deegan <tim@xen.org>
diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/mm.c
--- a/xen/arch/arm/mm.c Thu Nov 22 10:47:58 2012 +0100
+++ b/xen/arch/arm/mm.c Thu Nov 22 16:15:36 2012 +0000
@@ -247,7 +247,7 @@ void __init setup_pagetables(unsigned lo
/* Change pagetables to the copy in the relocated Xen */
boot_httbr = (unsigned long) xen_pgtable + phys_offset;
- flush_xen_dcache_va(&boot_httbr);
+ flush_xen_dcache(boot_httbr);
flush_xen_dcache_va_range((void*)dest_va, _end - _start);
flush_xen_text_tlb();
diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/setup.c
--- a/xen/arch/arm/setup.c Thu Nov 22 10:47:58 2012 +0100
+++ b/xen/arch/arm/setup.c Thu Nov 22 16:15:36 2012 +0000
@@ -175,6 +175,21 @@ static void __init setup_mm(unsigned lon
end_boot_allocator();
}
+size_t __read_mostly cacheline_bytes;
+
+/* Very early check of the CPU cache properties */
+void __init setup_cache(void)
+{
+ uint32_t ccsid;
+
+ /* Read the cache size ID register for the level-0 data cache */
+ WRITE_CP32(0, CSSELR);
+ ccsid = READ_CP32(CCSIDR);
+
+ /* Low 3 bits are log2(cacheline size in words) - 2. */
+ cacheline_bytes = 1U << (4 + (ccsid & 0x7));
+}
+
/* C entry point for boot CPU */
void __init start_xen(unsigned long boot_phys_offset,
unsigned long arm_type,
@@ -185,6 +200,8 @@ void __init start_xen(unsigned long boot
size_t fdt_size;
int cpus, i;
+ setup_cache();
+
smp_clear_cpu_maps();
fdt = (void *)BOOT_MISC_VIRT_START
diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/smpboot.c
--- a/xen/arch/arm/smpboot.c Thu Nov 22 10:47:58 2012 +0100
+++ b/xen/arch/arm/smpboot.c Thu Nov 22 16:15:36 2012 +0000
@@ -118,8 +118,8 @@ make_cpus_ready(unsigned int max_cpus, u
/* Tell the next CPU to get ready */
/* TODO: handle boards where CPUIDs are not contiguous */
*gate = i;
- flush_xen_dcache_va(gate);
- asm volatile("dsb; isb; sev");
+ flush_xen_dcache(*gate);
+ asm volatile("isb; sev");
/* And wait for it to respond */
while ( ready_cpus < i )
smp_rmb();
@@ -217,8 +217,8 @@ int __cpu_up(unsigned int cpu)
smp_up_cpu = cpu;
/* we need to make sure that the change to smp_up_cpu is visible to
* secondary cpus with D-cache off */
- flush_xen_dcache_va(&smp_up_cpu);
- asm volatile("dsb; isb; sev");
+ flush_xen_dcache(smp_up_cpu);
+ asm volatile("isb; sev");
while ( !cpu_online(cpu) )
{
diff -r 7b4449bdb980 -r ffc59d2fc998 xen/include/asm-arm/page.h
--- a/xen/include/asm-arm/page.h Thu Nov 22 10:47:58 2012 +0100
+++ b/xen/include/asm-arm/page.h Thu Nov 22 16:15:36 2012 +0000
@@ -248,13 +248,16 @@ static inline void write_pte(lpae_t *p,
: : "r" (pte.bits), "r" (p) : "memory");
}
+/* Architectural minimum cacheline size is 4 32-bit words. */
+#define MIN_CACHELINE_BYTES 16
+/* Actual cacheline size on the boot CPU. */
+extern size_t cacheline_bytes;
/* Function for flushing medium-sized areas.
* if ''range'' is large enough we might want to use
model-specific
* full-cache flushes. */
static inline void flush_xen_dcache_va_range(void *p, unsigned long size)
{
- int cacheline_bytes = READ_CP32(CCSIDR);
void *end;
dsb(); /* So the CPU issues all writes to the range */
for ( end = p + size; p < end; p += cacheline_bytes )
@@ -262,17 +265,13 @@ static inline void flush_xen_dcache_va_r
dsb(); /* So we know the flushes happen before continuing */
}
-
/* Macro for flushing a single small item. The predicate is always
* compile-time constant so this will compile down to 3 instructions in
- * the common case. Make sure to call it with the correct type of
- * pointer! */
-#define flush_xen_dcache_va(p) do { \
- int cacheline_bytes = READ_CP32(CCSIDR); \
- typeof(p) _p = (p); \
- if ( ((unsigned long)_p & ~(cacheline_bytes - 1)) != \
- (((unsigned long)_p + (sizeof *_p)) & ~(cacheline_bytes - 1)) ) \
- flush_xen_dcache_va_range(_p, sizeof *_p); \
+ * the common case. */
+#define flush_xen_dcache(x) do { \
+ typeof(x) *_p = &(x); \
+ if ( sizeof(x) > MIN_CACHELINE_BYTES || sizeof(x) > alignof(x) ) \
+ flush_xen_dcache_va_range(_p, sizeof(x)); \
else \
asm volatile ( \
"dsb;" /* Finish all earlier writes */
\
diff -r 7b4449bdb980 -r ffc59d2fc998 xen/include/xen/compiler.h
--- a/xen/include/xen/compiler.h Thu Nov 22 10:47:58 2012 +0100
+++ b/xen/include/xen/compiler.h Thu Nov 22 16:15:36 2012 +0000
@@ -54,6 +54,10 @@
#define offsetof(a,b) __builtin_offsetof(a,b)
+#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L
+#define alignof __alignof__
+#endif
+
/* &a[0] degrades to a pointer: a different type from an array */
#define __must_be_array(a) \
BUILD_BUG_ON_ZERO(__builtin_types_compatible_p(typeof(a), typeof(&a[0])))
On Thu, 22 Nov 2012, Tim Deegan wrote:> # HG changeset patch > # User Tim Deegan <tim@xen.org> > # Date 1353600936 0 > # Node ID ffc59d2fc9980c660941eb24cbe827c1142d1fd0 > # Parent 7b4449bdb980caee8efc498d5ea48f772331df2f > arm: Tidy up flush_xen_dcache(). > > - Use a compile-time-constant check for whether we can safely flush > just one cacheline. This reduces the common case from 28 > instructions to three. > - Pass an object to the macro, not a pointer, so we can detect > attempts to flush arrays. > - Decode CCSIDR correctly to get cacheline size. > - Remove some redundant DSBs at the call sites. > > Signed-off-by: Tim Deegan <tim@xen.org>Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>> diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/mm.c > --- a/xen/arch/arm/mm.c Thu Nov 22 10:47:58 2012 +0100 > +++ b/xen/arch/arm/mm.c Thu Nov 22 16:15:36 2012 +0000 > @@ -247,7 +247,7 @@ void __init setup_pagetables(unsigned lo > > /* Change pagetables to the copy in the relocated Xen */ > boot_httbr = (unsigned long) xen_pgtable + phys_offset; > - flush_xen_dcache_va(&boot_httbr); > + flush_xen_dcache(boot_httbr); > flush_xen_dcache_va_range((void*)dest_va, _end - _start); > flush_xen_text_tlb(); > > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/setup.c > --- a/xen/arch/arm/setup.c Thu Nov 22 10:47:58 2012 +0100 > +++ b/xen/arch/arm/setup.c Thu Nov 22 16:15:36 2012 +0000 > @@ -175,6 +175,21 @@ static void __init setup_mm(unsigned lon > end_boot_allocator(); > } > > +size_t __read_mostly cacheline_bytes; > + > +/* Very early check of the CPU cache properties */ > +void __init setup_cache(void) > +{ > + uint32_t ccsid; > + > + /* Read the cache size ID register for the level-0 data cache */ > + WRITE_CP32(0, CSSELR); > + ccsid = READ_CP32(CCSIDR); > + > + /* Low 3 bits are log2(cacheline size in words) - 2. */ > + cacheline_bytes = 1U << (4 + (ccsid & 0x7)); > +} > + > /* C entry point for boot CPU */ > void __init start_xen(unsigned long boot_phys_offset, > unsigned long arm_type, > @@ -185,6 +200,8 @@ void __init start_xen(unsigned long boot > size_t fdt_size; > int cpus, i; > > + setup_cache(); > + > smp_clear_cpu_maps(); > > fdt = (void *)BOOT_MISC_VIRT_START > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/smpboot.c > --- a/xen/arch/arm/smpboot.c Thu Nov 22 10:47:58 2012 +0100 > +++ b/xen/arch/arm/smpboot.c Thu Nov 22 16:15:36 2012 +0000 > @@ -118,8 +118,8 @@ make_cpus_ready(unsigned int max_cpus, u > /* Tell the next CPU to get ready */ > /* TODO: handle boards where CPUIDs are not contiguous */ > *gate = i; > - flush_xen_dcache_va(gate); > - asm volatile("dsb; isb; sev"); > + flush_xen_dcache(*gate); > + asm volatile("isb; sev"); > /* And wait for it to respond */ > while ( ready_cpus < i ) > smp_rmb(); > @@ -217,8 +217,8 @@ int __cpu_up(unsigned int cpu) > smp_up_cpu = cpu; > /* we need to make sure that the change to smp_up_cpu is visible to > * secondary cpus with D-cache off */ > - flush_xen_dcache_va(&smp_up_cpu); > - asm volatile("dsb; isb; sev"); > + flush_xen_dcache(smp_up_cpu); > + asm volatile("isb; sev"); > > while ( !cpu_online(cpu) ) > { > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/include/asm-arm/page.h > --- a/xen/include/asm-arm/page.h Thu Nov 22 10:47:58 2012 +0100 > +++ b/xen/include/asm-arm/page.h Thu Nov 22 16:15:36 2012 +0000 > @@ -248,13 +248,16 @@ static inline void write_pte(lpae_t *p, > : : "r" (pte.bits), "r" (p) : "memory"); > } > > +/* Architectural minimum cacheline size is 4 32-bit words. */ > +#define MIN_CACHELINE_BYTES 16 > +/* Actual cacheline size on the boot CPU. */ > +extern size_t cacheline_bytes; > > /* Function for flushing medium-sized areas. > * if ''range'' is large enough we might want to use model-specific > * full-cache flushes. */ > static inline void flush_xen_dcache_va_range(void *p, unsigned long size) > { > - int cacheline_bytes = READ_CP32(CCSIDR); > void *end; > dsb(); /* So the CPU issues all writes to the range */ > for ( end = p + size; p < end; p += cacheline_bytes ) > @@ -262,17 +265,13 @@ static inline void flush_xen_dcache_va_r > dsb(); /* So we know the flushes happen before continuing */ > } > > - > /* Macro for flushing a single small item. The predicate is always > * compile-time constant so this will compile down to 3 instructions in > - * the common case. Make sure to call it with the correct type of > - * pointer! */ > -#define flush_xen_dcache_va(p) do { \ > - int cacheline_bytes = READ_CP32(CCSIDR); \ > - typeof(p) _p = (p); \ > - if ( ((unsigned long)_p & ~(cacheline_bytes - 1)) != \ > - (((unsigned long)_p + (sizeof *_p)) & ~(cacheline_bytes - 1)) ) \ > - flush_xen_dcache_va_range(_p, sizeof *_p); \ > + * the common case. */ > +#define flush_xen_dcache(x) do { \ > + typeof(x) *_p = &(x); \ > + if ( sizeof(x) > MIN_CACHELINE_BYTES || sizeof(x) > alignof(x) ) \ > + flush_xen_dcache_va_range(_p, sizeof(x)); \ > else \ > asm volatile ( \ > "dsb;" /* Finish all earlier writes */ \ > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/include/xen/compiler.h > --- a/xen/include/xen/compiler.h Thu Nov 22 10:47:58 2012 +0100 > +++ b/xen/include/xen/compiler.h Thu Nov 22 16:15:36 2012 +0000 > @@ -54,6 +54,10 @@ > > #define offsetof(a,b) __builtin_offsetof(a,b) > > +#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L > +#define alignof __alignof__ > +#endif > + > /* &a[0] degrades to a pointer: a different type from an array */ > #define __must_be_array(a) \ > BUILD_BUG_ON_ZERO(__builtin_types_compatible_p(typeof(a), typeof(&a[0]))) >
On Thu, 2012-11-22 at 17:16 +0000, Stefano Stabellini wrote:> On Thu, 22 Nov 2012, Tim Deegan wrote: > > # HG changeset patch > > # User Tim Deegan <tim@xen.org> > > # Date 1353600936 0 > > # Node ID ffc59d2fc9980c660941eb24cbe827c1142d1fd0 > > # Parent 7b4449bdb980caee8efc498d5ea48f772331df2f > > arm: Tidy up flush_xen_dcache(). > > > > - Use a compile-time-constant check for whether we can safely flush > > just one cacheline. This reduces the common case from 28 > > instructions to three. > > - Pass an object to the macro, not a pointer, so we can detect > > attempts to flush arrays. > > - Decode CCSIDR correctly to get cacheline size. > > - Remove some redundant DSBs at the call sites. > > > > Signed-off-by: Tim Deegan <tim@xen.org> > > Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>Applied. I wonder if flush_xen_dcache_va_range should be out of line?> > > > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/mm.c > > --- a/xen/arch/arm/mm.c Thu Nov 22 10:47:58 2012 +0100 > > +++ b/xen/arch/arm/mm.c Thu Nov 22 16:15:36 2012 +0000 > > @@ -247,7 +247,7 @@ void __init setup_pagetables(unsigned lo > > > > /* Change pagetables to the copy in the relocated Xen */ > > boot_httbr = (unsigned long) xen_pgtable + phys_offset; > > - flush_xen_dcache_va(&boot_httbr); > > + flush_xen_dcache(boot_httbr); > > flush_xen_dcache_va_range((void*)dest_va, _end - _start); > > flush_xen_text_tlb(); > > > > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/setup.c > > --- a/xen/arch/arm/setup.c Thu Nov 22 10:47:58 2012 +0100 > > +++ b/xen/arch/arm/setup.c Thu Nov 22 16:15:36 2012 +0000 > > @@ -175,6 +175,21 @@ static void __init setup_mm(unsigned lon > > end_boot_allocator(); > > } > > > > +size_t __read_mostly cacheline_bytes; > > + > > +/* Very early check of the CPU cache properties */ > > +void __init setup_cache(void) > > +{ > > + uint32_t ccsid; > > + > > + /* Read the cache size ID register for the level-0 data cache */ > > + WRITE_CP32(0, CSSELR); > > + ccsid = READ_CP32(CCSIDR); > > + > > + /* Low 3 bits are log2(cacheline size in words) - 2. */ > > + cacheline_bytes = 1U << (4 + (ccsid & 0x7)); > > +} > > + > > /* C entry point for boot CPU */ > > void __init start_xen(unsigned long boot_phys_offset, > > unsigned long arm_type, > > @@ -185,6 +200,8 @@ void __init start_xen(unsigned long boot > > size_t fdt_size; > > int cpus, i; > > > > + setup_cache(); > > + > > smp_clear_cpu_maps(); > > > > fdt = (void *)BOOT_MISC_VIRT_START > > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/arch/arm/smpboot.c > > --- a/xen/arch/arm/smpboot.c Thu Nov 22 10:47:58 2012 +0100 > > +++ b/xen/arch/arm/smpboot.c Thu Nov 22 16:15:36 2012 +0000 > > @@ -118,8 +118,8 @@ make_cpus_ready(unsigned int max_cpus, u > > /* Tell the next CPU to get ready */ > > /* TODO: handle boards where CPUIDs are not contiguous */ > > *gate = i; > > - flush_xen_dcache_va(gate); > > - asm volatile("dsb; isb; sev"); > > + flush_xen_dcache(*gate); > > + asm volatile("isb; sev"); > > /* And wait for it to respond */ > > while ( ready_cpus < i ) > > smp_rmb(); > > @@ -217,8 +217,8 @@ int __cpu_up(unsigned int cpu) > > smp_up_cpu = cpu; > > /* we need to make sure that the change to smp_up_cpu is visible to > > * secondary cpus with D-cache off */ > > - flush_xen_dcache_va(&smp_up_cpu); > > - asm volatile("dsb; isb; sev"); > > + flush_xen_dcache(smp_up_cpu); > > + asm volatile("isb; sev"); > > > > while ( !cpu_online(cpu) ) > > { > > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/include/asm-arm/page.h > > --- a/xen/include/asm-arm/page.h Thu Nov 22 10:47:58 2012 +0100 > > +++ b/xen/include/asm-arm/page.h Thu Nov 22 16:15:36 2012 +0000 > > @@ -248,13 +248,16 @@ static inline void write_pte(lpae_t *p, > > : : "r" (pte.bits), "r" (p) : "memory"); > > } > > > > +/* Architectural minimum cacheline size is 4 32-bit words. */ > > +#define MIN_CACHELINE_BYTES 16 > > +/* Actual cacheline size on the boot CPU. */ > > +extern size_t cacheline_bytes; > > > > /* Function for flushing medium-sized areas. > > * if ''range'' is large enough we might want to use model-specific > > * full-cache flushes. */ > > static inline void flush_xen_dcache_va_range(void *p, unsigned long size) > > { > > - int cacheline_bytes = READ_CP32(CCSIDR); > > void *end; > > dsb(); /* So the CPU issues all writes to the range */ > > for ( end = p + size; p < end; p += cacheline_bytes ) > > @@ -262,17 +265,13 @@ static inline void flush_xen_dcache_va_r > > dsb(); /* So we know the flushes happen before continuing */ > > } > > > > - > > /* Macro for flushing a single small item. The predicate is always > > * compile-time constant so this will compile down to 3 instructions in > > - * the common case. Make sure to call it with the correct type of > > - * pointer! */ > > -#define flush_xen_dcache_va(p) do { \ > > - int cacheline_bytes = READ_CP32(CCSIDR); \ > > - typeof(p) _p = (p); \ > > - if ( ((unsigned long)_p & ~(cacheline_bytes - 1)) != \ > > - (((unsigned long)_p + (sizeof *_p)) & ~(cacheline_bytes - 1)) ) \ > > - flush_xen_dcache_va_range(_p, sizeof *_p); \ > > + * the common case. */ > > +#define flush_xen_dcache(x) do { \ > > + typeof(x) *_p = &(x); \ > > + if ( sizeof(x) > MIN_CACHELINE_BYTES || sizeof(x) > alignof(x) ) \ > > + flush_xen_dcache_va_range(_p, sizeof(x)); \ > > else \ > > asm volatile ( \ > > "dsb;" /* Finish all earlier writes */ \ > > diff -r 7b4449bdb980 -r ffc59d2fc998 xen/include/xen/compiler.h > > --- a/xen/include/xen/compiler.h Thu Nov 22 10:47:58 2012 +0100 > > +++ b/xen/include/xen/compiler.h Thu Nov 22 16:15:36 2012 +0000 > > @@ -54,6 +54,10 @@ > > > > #define offsetof(a,b) __builtin_offsetof(a,b) > > > > +#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L > > +#define alignof __alignof__ > > +#endif > > + > > /* &a[0] degrades to a pointer: a different type from an array */ > > #define __must_be_array(a) \ > > BUILD_BUG_ON_ZERO(__builtin_types_compatible_p(typeof(a), typeof(&a[0]))) > >