Jan Beulich
2007-Aug-21 15:25 UTC
[Xen-devel] [PATCH] x86: consolidate/enhance TLB flushing interface
Folding into a single local handler and a single SMP multiplexor as well as adding capability to also flush caches through the same interfaces (a subsequent patch will make use of this). Once at changing cpuinfo_x86, this patch also removes several unused fields apparently inherited from Linux. The changes to xen/include/asm-x86/cpufeature.h apply cleanly only with the SVM/EFER patch (which was sent several days ago) applied. Signed-off-by: Jan Beulich <jbeulich@novell.com> Index: 2007-08-08/xen/arch/x86/cpu/common.c ==================================================================--- 2007-08-08.orig/xen/arch/x86/cpu/common.c 2007-08-21 14:15:47.000000000 +0200 +++ 2007-08-08/xen/arch/x86/cpu/common.c 2007-08-08 12:03:19.000000000 +0200 @@ -229,7 +229,6 @@ static void __init early_cpu_detect(void void __devinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; - int junk; if (have_cpuid_p()) { /* Get vendor name */ @@ -244,8 +243,8 @@ void __devinit generic_identify(struct c /* Intel-defined flags: level 0x00000001 */ if ( c->cpuid_level >= 0x00000001 ) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &junk, &excap, &capability); + u32 capability, excap, ebx; + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); c->x86_capability[0] = capability; c->x86_capability[4] = excap; c->x86 = (tfms >> 8) & 15; @@ -255,6 +254,8 @@ void __devinit generic_identify(struct c c->x86_model += ((tfms >> 16) & 0xF) << 4; } c->x86_mask = tfms & 15; + if ( cpu_has(c, X86_FEATURE_CLFLSH) ) + c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; @@ -272,6 +273,11 @@ void __devinit generic_identify(struct c } } +#ifdef CONFIG_X86_64 + if ( cpu_has(c, X86_FEATURE_PAGE1GB) ) + __set_bit(3, &c->invlpg_works_ok); +#endif + early_intel_workaround(c); #ifdef CONFIG_X86_HT @@ -313,6 +319,7 @@ void __devinit identify_cpu(struct cpuin c->x86_vendor_id[0] = ''\0''; /* Unset */ c->x86_model_id[0] = ''\0''; /* Unset */ c->x86_max_cores = 1; + c->x86_clflush_size = 0; memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { Index: 2007-08-08/xen/arch/x86/cpu/intel.c ==================================================================--- 2007-08-08.orig/xen/arch/x86/cpu/intel.c 2007-08-21 14:15:47.000000000 +0200 +++ 2007-08-08/xen/arch/x86/cpu/intel.c 2007-08-21 12:17:32.000000000 +0200 @@ -16,8 +16,6 @@ #define select_idle_routine(x) ((void)0) -extern int trap_init_f00f_bug(void); - #ifdef CONFIG_X86_INTEL_USERCOPY /* * Alignment at which movsl is preferred for bulk memory copies. @@ -97,25 +95,6 @@ static void __devinit init_intel(struct unsigned int l2 = 0; char *p = NULL; -#ifdef CONFIG_X86_F00F_BUG - /* - * All current models of Pentium and Pentium with MMX technology CPUs - * have the F0 0F bug, which lets nonprivileged users lock up the system. - * Note that the workaround only should be initialized once... - */ - c->f00f_bug = 0; - if ( c->x86 == 5 ) { - static int f00f_workaround_enabled = 0; - - c->f00f_bug = 1; - if ( !f00f_workaround_enabled ) { - trap_init_f00f_bug(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); - f00f_workaround_enabled = 1; - } - } -#endif - select_idle_routine(c); l2 = init_intel_cacheinfo(c); @@ -123,6 +102,16 @@ static void __devinit init_intel(struct if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) clear_bit(X86_FEATURE_SEP, c->x86_capability); + if ( /* PentiumPro erratum 30 */ + (c->x86 == 6 && c->x86_model == 1 && c->x86_mask < 9) || + /* Dual-Core Intel Xeon 3000/5100 series erratum 89/90 */ + /* Quad-Core Intel Xeon 3200/5300 series erratum 89/88 */ + /* Intel Core2 erratum 89 */ + (c->x86 == 6 && c->x86_model == 15 ) || + /* Dual-Core Intel Xeon LV/ULV erratum 75 */ + (c->x86 == 6 && c->x86_model == 14 ) ) + __clear_bit(2, &c->invlpg_works_ok); + /* Names for the Pentium II/Celeron processors detectable only by also checking the cache size. Dixon is NOT a Celeron. */ Index: 2007-08-08/xen/arch/x86/flushtlb.c ==================================================================--- 2007-08-08.orig/xen/arch/x86/flushtlb.c 2007-08-21 14:15:47.000000000 +0200 +++ 2007-08-08/xen/arch/x86/flushtlb.c 2007-08-21 14:00:19.000000000 +0200 @@ -95,26 +95,74 @@ void write_cr3(unsigned long cr3) local_irq_restore(flags); } -void local_flush_tlb(void) +void flush_one_local(const void *va, unsigned int flags) { - unsigned long flags; - u32 t; + const struct cpuinfo_x86 *c = ¤t_cpu_data; + unsigned int level = flags & FLUSH_LEVEL_MASK; + unsigned long irqfl; - /* This non-reentrant function is sometimes called in interrupt context. */ - local_irq_save(flags); - - t = pre_flush(); + if ( !c->x86 ) + c = &boot_cpu_data; - hvm_flush_guest_tlbs(); + /* This non-reentrant function is sometimes called in interrupt context. */ + local_irq_save(irqfl); -#ifdef USER_MAPPINGS_ARE_GLOBAL - __pge_off(); - __pge_on(); -#else - __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" ); + if ( flags & (FLUSH_TLB|FLUSH_TLB_GLOBAL) ) + { + if ( level > 0 && + level < CONFIG_PAGING_LEVELS && + test_bit(level, &c->invlpg_works_ok) ) + __asm__ __volatile__( "invlpg %0" + : + : "m" (*(const char *)(va)) + : "memory" ); + else + { + u32 t = pre_flush(); + + hvm_flush_guest_tlbs(); + +#ifndef USER_MAPPINGS_ARE_GLOBAL + if ( !(flags & FLUSH_TLB_GLOBAL) || + !(mmu_cr4_features & X86_CR4_PGE) ) + __asm__ __volatile__ ( "mov %0, %%cr3" + : + : "r" (read_cr3()) + : "memory" ); + else #endif + { + __pge_off(); + __pge_on(); + } + + post_flush(t); + } + } + + if ( flags & FLUSH_CACHE ) + { + unsigned long sz; + + if ( level > 0 && level < CONFIG_PAGING_LEVELS ) + sz = 1UL << ((level - 1) * PAGETABLE_ORDER); + else + sz = ULONG_MAX; + if ( c->x86_clflush_size > 0 && + c->x86_cache_size > 0 && + sz < (c->x86_cache_size >> (PAGE_SHIFT - 10)) ) + { + unsigned long i; + + va = (const void *)((unsigned long)va & ~(sz - 1)); + for ( i = 0; i < sz; i += c->x86_clflush_size ) + __asm__ __volatile__( "clflush %0" + : + : "m" (((const char *)va)[i]) ); + } + else + wbinvd(); + } - post_flush(t); - - local_irq_restore(flags); + local_irq_restore(irqfl); } Index: 2007-08-08/xen/arch/x86/mm.c ==================================================================--- 2007-08-08.orig/xen/arch/x86/mm.c 2007-08-21 14:15:47.000000000 +0200 +++ 2007-08-08/xen/arch/x86/mm.c 2007-08-21 14:18:00.000000000 +0200 @@ -3497,7 +3497,7 @@ int map_pages_to_xen( if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) ) { - local_flush_tlb_pge(); + flush_one_local((void *)virt, FLUSH_TLB_GLOBAL|2); if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) ) free_xen_pagetable(mfn_to_virt(l2e_get_pfn(ol2e))); } @@ -3525,7 +3525,7 @@ int map_pages_to_xen( l2e_get_flags(*pl2e) & ~_PAGE_PSE)); l2e_write_atomic(pl2e, l2e_from_pfn(virt_to_mfn(pl1e), __PAGE_HYPERVISOR)); - local_flush_tlb_pge(); + flush_one_local((void *)virt, FLUSH_TLB_GLOBAL|2); } pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(virt); @@ -3608,7 +3608,7 @@ void destroy_xen_mappings(unsigned long } } - flush_tlb_all_pge(); + flush_global(FLUSH_TLB_GLOBAL); } void __set_fixmap( Index: 2007-08-08/xen/arch/x86/setup.c ==================================================================--- 2007-08-08.orig/xen/arch/x86/setup.c 2007-08-21 14:15:47.000000000 +0200 +++ 2007-08-08/xen/arch/x86/setup.c 2007-08-21 11:06:23.000000000 +0200 @@ -114,7 +114,7 @@ struct tss_struct init_tss[NR_CPUS]; char __attribute__ ((__section__(".bss.stack_aligned"))) cpu0_stack[STACK_SIZE]; -struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; +struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, 0x6, 1, -1 }; #if CONFIG_PAGING_LEVELS > 2 unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE; Index: 2007-08-08/xen/arch/x86/smp.c ==================================================================--- 2007-08-08.orig/xen/arch/x86/smp.c 2007-08-21 14:15:47.000000000 +0200 +++ 2007-08-08/xen/arch/x86/smp.c 2007-08-21 12:20:28.000000000 +0200 @@ -164,7 +164,8 @@ void send_IPI_mask_phys(cpumask_t mask, static DEFINE_SPINLOCK(flush_lock); static cpumask_t flush_cpumask; -static unsigned long flush_va; +static const void *flush_va; +static unsigned int flush_flags; fastcall void smp_invalidate_interrupt(void) { @@ -172,26 +173,18 @@ fastcall void smp_invalidate_interrupt(v perfc_incr(ipis); irq_enter(); if ( !__sync_lazy_execstate() ) - { - if ( flush_va == FLUSHVA_ALL ) - local_flush_tlb(); - else - local_flush_tlb_one(flush_va); - } + flush_one_local(flush_va, flush_flags); cpu_clear(smp_processor_id(), flush_cpumask); irq_exit(); } -void __flush_tlb_mask(cpumask_t mask, unsigned long va) +void flush_one_mask(cpumask_t mask, const void *va, unsigned int flags) { ASSERT(local_irq_is_enabled()); if ( cpu_isset(smp_processor_id(), mask) ) { - if ( va == FLUSHVA_ALL ) - local_flush_tlb(); - else - local_flush_tlb_one(va); + flush_one_local(va, flags); cpu_clear(smp_processor_id(), mask); } @@ -200,6 +193,7 @@ void __flush_tlb_mask(cpumask_t mask, un spin_lock(&flush_lock); flush_cpumask = mask; flush_va = va; + flush_flags = flags; send_IPI_mask(mask, INVALIDATE_TLB_VECTOR); while ( !cpus_empty(flush_cpumask) ) cpu_relax(); @@ -215,24 +209,13 @@ void new_tlbflush_clock_period(void) /* Flush everyone else. We definitely flushed just before entry. */ allbutself = cpu_online_map; cpu_clear(smp_processor_id(), allbutself); - __flush_tlb_mask(allbutself, FLUSHVA_ALL); + flush_mask(allbutself, FLUSH_TLB); /* No need for atomicity: we are the only possible updater. */ ASSERT(tlbflush_clock == 0); tlbflush_clock++; } -static void flush_tlb_all_pge_ipi(void *info) -{ - local_flush_tlb_pge(); -} - -void flush_tlb_all_pge(void) -{ - smp_call_function(flush_tlb_all_pge_ipi, 0, 1, 1); - local_flush_tlb_pge(); -} - void smp_send_event_check_mask(cpumask_t mask) { cpu_clear(smp_processor_id(), mask); Index: 2007-08-08/xen/arch/x86/x86_32/mm.c ==================================================================--- 2007-08-08.orig/xen/arch/x86/x86_32/mm.c 2007-08-21 14:15:47.000000000 +0200 +++ 2007-08-08/xen/arch/x86/x86_32/mm.c 2007-08-21 09:59:15.000000000 +0200 @@ -149,7 +149,7 @@ void __init zap_low_mappings(l2_pgentry_ /* Now zap mappings in the idle pagetables. */ destroy_xen_mappings(0, HYPERVISOR_VIRT_START); - flush_tlb_all_pge(); + flush_global(FLUSH_TLB_GLOBAL); /* Replace with mapping of the boot trampoline only. */ map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT, Index: 2007-08-08/xen/arch/x86/x86_64/mm.c ==================================================================--- 2007-08-08.orig/xen/arch/x86/x86_64/mm.c 2007-08-21 14:15:47.000000000 +0200 +++ 2007-08-08/xen/arch/x86/x86_64/mm.c 2007-08-21 12:33:35.000000000 +0200 @@ -205,7 +205,7 @@ void __init zap_low_mappings(void) /* Remove aliased mapping of first 1:1 PML4 entry. */ l4e_write(&idle_pg_table[0], l4e_empty()); - local_flush_tlb_pge(); + flush_local(FLUSH_TLB_GLOBAL); /* Replace with mapping of the boot trampoline only. */ map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT, Index: 2007-08-08/xen/include/asm-x86/cpufeature.h ==================================================================--- 2007-08-08.orig/xen/include/asm-x86/cpufeature.h 2007-08-21 14:15:47.000000000 +0200 +++ 2007-08-08/xen/include/asm-x86/cpufeature.h 2007-08-21 12:07:46.000000000 +0200 @@ -50,6 +50,7 @@ #define X86_FEATURE_NX (1*32+20) /* Execute Disable */ #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ #define X86_FEATURE_FFXSR (1*32+25) /* FFXSR instruction optimizations */ +#define X86_FEATURE_PAGE1GB (1*32+26) /* 1Gb large page support */ #define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */ #define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ #define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ @@ -137,6 +138,7 @@ #define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR) #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) #define cpu_has_ffxsr 0 +#define cpu_has_page1gb 0 #else /* __x86_64__ */ #define cpu_has_vme 0 #define cpu_has_de 1 @@ -161,6 +163,7 @@ #define cpu_has_centaur_mcr 0 #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) #define cpu_has_ffxsr boot_cpu_has(X86_FEATURE_FFXSR) +#define cpu_has_page1gb boot_cpu_has(X86_FEATURE_PAGE1GB) #endif #endif /* __ASM_I386_CPUFEATURE_H */ Index: 2007-08-08/xen/include/asm-x86/flushtlb.h ==================================================================--- 2007-08-08.orig/xen/include/asm-x86/flushtlb.h 2007-08-21 14:15:47.000000000 +0200 +++ 2007-08-08/xen/include/asm-x86/flushtlb.h 2007-08-21 12:34:36.000000000 +0200 @@ -15,6 +15,29 @@ #include <xen/smp.h> #include <xen/types.h> +#define FLUSH_LEVEL_MASK 0x0f +#define FLUSH_TLB 0x10 +#define FLUSH_TLB_GLOBAL 0x20 +#define FLUSH_CACHE 0x40 + +void flush_one_local(const void *va, unsigned int flags); +static inline void flush_local(unsigned int flags) +{ + flush_one_local(NULL, flags | FLUSH_LEVEL_MASK); +} +#ifdef CONFIG_SMP +void flush_one_mask(cpumask_t, const void *va, unsigned int flags); +static inline void flush_mask(cpumask_t mask, unsigned int flags) +{ + flush_one_mask(mask, NULL, flags | FLUSH_LEVEL_MASK); +} +#else +#define flush_mask(mask, flags) flush_local(flags) +#define flush_one_mask(mask, va, flags) flush_one_local(va, flags) +#endif +#define flush_global(flags) flush_mask(cpu_online_map, flags) +#define flush_one_global(va, flags) flush_one_mask(cpu_online_map, va, flags) + /* The current time as shown by the virtual TLB clock. */ extern u32 tlbflush_clock; @@ -72,31 +95,20 @@ static inline unsigned long read_cr3(voi extern void write_cr3(unsigned long cr3); /* Flush guest mappings from the TLB and implicitly tick the tlbflush clock. */ -extern void local_flush_tlb(void); - -#define local_flush_tlb_pge() \ - do { \ - __pge_off(); \ - local_flush_tlb(); \ - __pge_on(); \ - } while ( 0 ) +#define local_flush_tlb(void) flush_local(FLUSH_TLB) -#define local_flush_tlb_one(__addr) \ - __asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr))) +#define local_flush_tlb_one(v) \ + flush_one_local((const void *)(v), FLUSH_TLB|1) #define flush_tlb_all() flush_tlb_mask(cpu_online_map) #ifndef CONFIG_SMP -#define flush_tlb_all_pge() local_flush_tlb_pge() #define flush_tlb_mask(mask) local_flush_tlb() -#define flush_tlb_one_mask(mask,v) local_flush_tlb_one(_v) +#define flush_tlb_one_mask(mask,v) local_flush_tlb_one(v) #else #include <xen/smp.h> -#define FLUSHVA_ALL (~0UL) -extern void flush_tlb_all_pge(void); -extern void __flush_tlb_mask(cpumask_t mask, unsigned long va); -#define flush_tlb_mask(mask) __flush_tlb_mask(mask,FLUSHVA_ALL) -#define flush_tlb_one_mask(mask,v) __flush_tlb_mask(mask,(unsigned long)(v)) +#define flush_tlb_mask(mask) flush_mask(mask, FLUSH_TLB) +#define flush_tlb_one_mask(mask,v) flush_one_mask(mask,(const void *)(v), FLUSH_TLB|1) #endif #endif /* __FLUSHTLB_H__ */ Index: 2007-08-08/xen/include/asm-x86/processor.h ==================================================================--- 2007-08-08.orig/xen/include/asm-x86/processor.h 2007-08-21 14:15:47.000000000 +0200 +++ 2007-08-08/xen/include/asm-x86/processor.h 2007-08-21 11:34:25.000000000 +0200 @@ -156,24 +156,20 @@ struct cpuinfo_x86 { __u8 x86_vendor; /* CPU vendor */ __u8 x86_model; __u8 x86_mask; - char wp_works_ok; /* It doesn''t on 386''s */ - char hlt_works_ok; /* Problems on some 486Dx4''s and old 386''s */ + __u8 invlpg_works_ok; char hard_math; - char rfu; int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ unsigned int x86_capability[NCAPINTS]; char x86_vendor_id[16]; char x86_model_id[64]; int x86_cache_size; /* in KB - valid for CPUS which support this call */ int x86_cache_alignment; /* In bytes */ - char fdiv_bug; - char f00f_bug; char coma_bug; - char pad0; int x86_power; unsigned char x86_max_cores; /* cpuid returned max cores value */ unsigned char booted_cores; /* number of cores as seen by OS */ unsigned char apicid; + unsigned short x86_clflush_size; } __cacheline_aligned; /* _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2007-Aug-22 16:21 UTC
Re: [Xen-devel] [PATCH] x86: consolidate/enhance TLB flushing interface
Pretty yukky and unlikely to improve performance on any current workload. The scary errata around INVLPG on 2MB/4MB pages makes me think we should just continue to avoid INVLPG for anything other than 4kB regions. Certainly unless there is a demonstrable measurable performance loss for taking this conservative approach. Anyway, you''re basically turning off the cunning goodness for all current Intel CPUs. :-) -- Keir On 21/8/07 16:25, "Jan Beulich" <jbeulich@novell.com> wrote:> Folding into a single local handler and a single SMP multiplexor as > well as adding capability to also flush caches through the same > interfaces (a subsequent patch will make use of this). > > Once at changing cpuinfo_x86, this patch also removes several unused > fields apparently inherited from Linux. > > The changes to xen/include/asm-x86/cpufeature.h apply cleanly only with > the SVM/EFER patch (which was sent several days ago) applied. > > Signed-off-by: Jan Beulich <jbeulich@novell.com> > > Index: 2007-08-08/xen/arch/x86/cpu/common.c > ==================================================================> --- 2007-08-08.orig/xen/arch/x86/cpu/common.c 2007-08-21 14:15:47.000000000 > +0200 > +++ 2007-08-08/xen/arch/x86/cpu/common.c 2007-08-08 12:03:19.000000000 +0200 > @@ -229,7 +229,6 @@ static void __init early_cpu_detect(void > void __devinit generic_identify(struct cpuinfo_x86 * c) > { > u32 tfms, xlvl; > - int junk; > > if (have_cpuid_p()) { > /* Get vendor name */ > @@ -244,8 +243,8 @@ void __devinit generic_identify(struct c > > /* Intel-defined flags: level 0x00000001 */ > if ( c->cpuid_level >= 0x00000001 ) { > - u32 capability, excap; > - cpuid(0x00000001, &tfms, &junk, &excap, &capability); > + u32 capability, excap, ebx; > + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); > c->x86_capability[0] = capability; > c->x86_capability[4] = excap; > c->x86 = (tfms >> 8) & 15; > @@ -255,6 +254,8 @@ void __devinit generic_identify(struct c > c->x86_model += ((tfms >> 16) & 0xF) << 4; > } > c->x86_mask = tfms & 15; > + if ( cpu_has(c, X86_FEATURE_CLFLSH) ) > + c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; > } else { > /* Have CPUID level 0 only - unheard of */ > c->x86 = 4; > @@ -272,6 +273,11 @@ void __devinit generic_identify(struct c > } > } > > +#ifdef CONFIG_X86_64 > + if ( cpu_has(c, X86_FEATURE_PAGE1GB) ) > + __set_bit(3, &c->invlpg_works_ok); > +#endif > + > early_intel_workaround(c); > > #ifdef CONFIG_X86_HT > @@ -313,6 +319,7 @@ void __devinit identify_cpu(struct cpuin > c->x86_vendor_id[0] = ''\0''; /* Unset */ > c->x86_model_id[0] = ''\0''; /* Unset */ > c->x86_max_cores = 1; > + c->x86_clflush_size = 0; > memset(&c->x86_capability, 0, sizeof c->x86_capability); > > if (!have_cpuid_p()) { > Index: 2007-08-08/xen/arch/x86/cpu/intel.c > ==================================================================> --- 2007-08-08.orig/xen/arch/x86/cpu/intel.c 2007-08-21 14:15:47.000000000 > +0200 > +++ 2007-08-08/xen/arch/x86/cpu/intel.c 2007-08-21 12:17:32.000000000 +0200 > @@ -16,8 +16,6 @@ > > #define select_idle_routine(x) ((void)0) > > -extern int trap_init_f00f_bug(void); > - > #ifdef CONFIG_X86_INTEL_USERCOPY > /* > * Alignment at which movsl is preferred for bulk memory copies. > @@ -97,25 +95,6 @@ static void __devinit init_intel(struct > unsigned int l2 = 0; > char *p = NULL; > > -#ifdef CONFIG_X86_F00F_BUG > - /* > - * All current models of Pentium and Pentium with MMX technology CPUs > - * have the F0 0F bug, which lets nonprivileged users lock up the system. > - * Note that the workaround only should be initialized once... > - */ > - c->f00f_bug = 0; > - if ( c->x86 == 5 ) { > - static int f00f_workaround_enabled = 0; > - > - c->f00f_bug = 1; > - if ( !f00f_workaround_enabled ) { > - trap_init_f00f_bug(); > - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround > enabled.\n"); > - f00f_workaround_enabled = 1; > - } > - } > -#endif > - > select_idle_routine(c); > l2 = init_intel_cacheinfo(c); > > @@ -123,6 +102,16 @@ static void __devinit init_intel(struct > if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) > clear_bit(X86_FEATURE_SEP, c->x86_capability); > > + if ( /* PentiumPro erratum 30 */ > + (c->x86 == 6 && c->x86_model == 1 && c->x86_mask < 9) || > + /* Dual-Core Intel Xeon 3000/5100 series erratum 89/90 */ > + /* Quad-Core Intel Xeon 3200/5300 series erratum 89/88 */ > + /* Intel Core2 erratum 89 */ > + (c->x86 == 6 && c->x86_model == 15 ) || > + /* Dual-Core Intel Xeon LV/ULV erratum 75 */ > + (c->x86 == 6 && c->x86_model == 14 ) ) > + __clear_bit(2, &c->invlpg_works_ok); > + > /* Names for the Pentium II/Celeron processors > detectable only by also checking the cache size. > Dixon is NOT a Celeron. */ > Index: 2007-08-08/xen/arch/x86/flushtlb.c > ==================================================================> --- 2007-08-08.orig/xen/arch/x86/flushtlb.c 2007-08-21 14:15:47.000000000 > +0200 > +++ 2007-08-08/xen/arch/x86/flushtlb.c 2007-08-21 14:00:19.000000000 +0200 > @@ -95,26 +95,74 @@ void write_cr3(unsigned long cr3) > local_irq_restore(flags); > } > > -void local_flush_tlb(void) > +void flush_one_local(const void *va, unsigned int flags) > { > - unsigned long flags; > - u32 t; > + const struct cpuinfo_x86 *c = ¤t_cpu_data; > + unsigned int level = flags & FLUSH_LEVEL_MASK; > + unsigned long irqfl; > > - /* This non-reentrant function is sometimes called in interrupt context. > */ > - local_irq_save(flags); > - > - t = pre_flush(); > + if ( !c->x86 ) > + c = &boot_cpu_data; > > - hvm_flush_guest_tlbs(); > + /* This non-reentrant function is sometimes called in interrupt context. > */ > + local_irq_save(irqfl); > > -#ifdef USER_MAPPINGS_ARE_GLOBAL > - __pge_off(); > - __pge_on(); > -#else > - __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" ); > + if ( flags & (FLUSH_TLB|FLUSH_TLB_GLOBAL) ) > + { > + if ( level > 0 && > + level < CONFIG_PAGING_LEVELS && > + test_bit(level, &c->invlpg_works_ok) ) > + __asm__ __volatile__( "invlpg %0" > + : > + : "m" (*(const char *)(va)) > + : "memory" ); > + else > + { > + u32 t = pre_flush(); > + > + hvm_flush_guest_tlbs(); > + > +#ifndef USER_MAPPINGS_ARE_GLOBAL > + if ( !(flags & FLUSH_TLB_GLOBAL) || > + !(mmu_cr4_features & X86_CR4_PGE) ) > + __asm__ __volatile__ ( "mov %0, %%cr3" > + : > + : "r" (read_cr3()) > + : "memory" ); > + else > #endif > + { > + __pge_off(); > + __pge_on(); > + } > + > + post_flush(t); > + } > + } > + > + if ( flags & FLUSH_CACHE ) > + { > + unsigned long sz; > + > + if ( level > 0 && level < CONFIG_PAGING_LEVELS ) > + sz = 1UL << ((level - 1) * PAGETABLE_ORDER); > + else > + sz = ULONG_MAX; > + if ( c->x86_clflush_size > 0 && > + c->x86_cache_size > 0 && > + sz < (c->x86_cache_size >> (PAGE_SHIFT - 10)) ) > + { > + unsigned long i; > + > + va = (const void *)((unsigned long)va & ~(sz - 1)); > + for ( i = 0; i < sz; i += c->x86_clflush_size ) > + __asm__ __volatile__( "clflush %0" > + : > + : "m" (((const char *)va)[i]) ); > + } > + else > + wbinvd(); > + } > > - post_flush(t); > - > - local_irq_restore(flags); > + local_irq_restore(irqfl); > } > Index: 2007-08-08/xen/arch/x86/mm.c > ==================================================================> --- 2007-08-08.orig/xen/arch/x86/mm.c 2007-08-21 14:15:47.000000000 +0200 > +++ 2007-08-08/xen/arch/x86/mm.c 2007-08-21 14:18:00.000000000 +0200 > @@ -3497,7 +3497,7 @@ int map_pages_to_xen( > > if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) ) > { > - local_flush_tlb_pge(); > + flush_one_local((void *)virt, FLUSH_TLB_GLOBAL|2); > if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) ) > free_xen_pagetable(mfn_to_virt(l2e_get_pfn(ol2e))); > } > @@ -3525,7 +3525,7 @@ int map_pages_to_xen( > l2e_get_flags(*pl2e) & > ~_PAGE_PSE)); > l2e_write_atomic(pl2e, l2e_from_pfn(virt_to_mfn(pl1e), > __PAGE_HYPERVISOR)); > - local_flush_tlb_pge(); > + flush_one_local((void *)virt, FLUSH_TLB_GLOBAL|2); > } > > pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(virt); > @@ -3608,7 +3608,7 @@ void destroy_xen_mappings(unsigned long > } > } > > - flush_tlb_all_pge(); > + flush_global(FLUSH_TLB_GLOBAL); > } > > void __set_fixmap( > Index: 2007-08-08/xen/arch/x86/setup.c > ==================================================================> --- 2007-08-08.orig/xen/arch/x86/setup.c 2007-08-21 14:15:47.000000000 +0200 > +++ 2007-08-08/xen/arch/x86/setup.c 2007-08-21 11:06:23.000000000 +0200 > @@ -114,7 +114,7 @@ struct tss_struct init_tss[NR_CPUS]; > > char __attribute__ ((__section__(".bss.stack_aligned"))) > cpu0_stack[STACK_SIZE]; > > -struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; > +struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, 0x6, 1, -1 }; > > #if CONFIG_PAGING_LEVELS > 2 > unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE; > Index: 2007-08-08/xen/arch/x86/smp.c > ==================================================================> --- 2007-08-08.orig/xen/arch/x86/smp.c 2007-08-21 14:15:47.000000000 +0200 > +++ 2007-08-08/xen/arch/x86/smp.c 2007-08-21 12:20:28.000000000 +0200 > @@ -164,7 +164,8 @@ void send_IPI_mask_phys(cpumask_t mask, > > static DEFINE_SPINLOCK(flush_lock); > static cpumask_t flush_cpumask; > -static unsigned long flush_va; > +static const void *flush_va; > +static unsigned int flush_flags; > > fastcall void smp_invalidate_interrupt(void) > { > @@ -172,26 +173,18 @@ fastcall void smp_invalidate_interrupt(v > perfc_incr(ipis); > irq_enter(); > if ( !__sync_lazy_execstate() ) > - { > - if ( flush_va == FLUSHVA_ALL ) > - local_flush_tlb(); > - else > - local_flush_tlb_one(flush_va); > - } > + flush_one_local(flush_va, flush_flags); > cpu_clear(smp_processor_id(), flush_cpumask); > irq_exit(); > } > > -void __flush_tlb_mask(cpumask_t mask, unsigned long va) > +void flush_one_mask(cpumask_t mask, const void *va, unsigned int flags) > { > ASSERT(local_irq_is_enabled()); > > if ( cpu_isset(smp_processor_id(), mask) ) > { > - if ( va == FLUSHVA_ALL ) > - local_flush_tlb(); > - else > - local_flush_tlb_one(va); > + flush_one_local(va, flags); > cpu_clear(smp_processor_id(), mask); > } > > @@ -200,6 +193,7 @@ void __flush_tlb_mask(cpumask_t mask, un > spin_lock(&flush_lock); > flush_cpumask = mask; > flush_va = va; > + flush_flags = flags; > send_IPI_mask(mask, INVALIDATE_TLB_VECTOR); > while ( !cpus_empty(flush_cpumask) ) > cpu_relax(); > @@ -215,24 +209,13 @@ void new_tlbflush_clock_period(void) > /* Flush everyone else. We definitely flushed just before entry. */ > allbutself = cpu_online_map; > cpu_clear(smp_processor_id(), allbutself); > - __flush_tlb_mask(allbutself, FLUSHVA_ALL); > + flush_mask(allbutself, FLUSH_TLB); > > /* No need for atomicity: we are the only possible updater. */ > ASSERT(tlbflush_clock == 0); > tlbflush_clock++; > } > > -static void flush_tlb_all_pge_ipi(void *info) > -{ > - local_flush_tlb_pge(); > -} > - > -void flush_tlb_all_pge(void) > -{ > - smp_call_function(flush_tlb_all_pge_ipi, 0, 1, 1); > - local_flush_tlb_pge(); > -} > - > void smp_send_event_check_mask(cpumask_t mask) > { > cpu_clear(smp_processor_id(), mask); > Index: 2007-08-08/xen/arch/x86/x86_32/mm.c > ==================================================================> --- 2007-08-08.orig/xen/arch/x86/x86_32/mm.c 2007-08-21 14:15:47.000000000 > +0200 > +++ 2007-08-08/xen/arch/x86/x86_32/mm.c 2007-08-21 09:59:15.000000000 +0200 > @@ -149,7 +149,7 @@ void __init zap_low_mappings(l2_pgentry_ > /* Now zap mappings in the idle pagetables. */ > destroy_xen_mappings(0, HYPERVISOR_VIRT_START); > > - flush_tlb_all_pge(); > + flush_global(FLUSH_TLB_GLOBAL); > > /* Replace with mapping of the boot trampoline only. */ > map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT, > Index: 2007-08-08/xen/arch/x86/x86_64/mm.c > ==================================================================> --- 2007-08-08.orig/xen/arch/x86/x86_64/mm.c 2007-08-21 14:15:47.000000000 > +0200 > +++ 2007-08-08/xen/arch/x86/x86_64/mm.c 2007-08-21 12:33:35.000000000 +0200 > @@ -205,7 +205,7 @@ void __init zap_low_mappings(void) > > /* Remove aliased mapping of first 1:1 PML4 entry. */ > l4e_write(&idle_pg_table[0], l4e_empty()); > - local_flush_tlb_pge(); > + flush_local(FLUSH_TLB_GLOBAL); > > /* Replace with mapping of the boot trampoline only. */ > map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT, > Index: 2007-08-08/xen/include/asm-x86/cpufeature.h > ==================================================================> --- 2007-08-08.orig/xen/include/asm-x86/cpufeature.h 2007-08-21 > 14:15:47.000000000 +0200 > +++ 2007-08-08/xen/include/asm-x86/cpufeature.h 2007-08-21 12:07:46.000000000 > +0200 > @@ -50,6 +50,7 @@ > #define X86_FEATURE_NX (1*32+20) /* Execute Disable */ > #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ > #define X86_FEATURE_FFXSR (1*32+25) /* FFXSR instruction optimizations */ > +#define X86_FEATURE_PAGE1GB (1*32+26) /* 1Gb large page support */ > #define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */ > #define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ > #define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ > @@ -137,6 +138,7 @@ > #define cpu_has_centaur_mcr boot_cpu_has(X86_FEATURE_CENTAUR_MCR) > #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) > #define cpu_has_ffxsr 0 > +#define cpu_has_page1gb 0 > #else /* __x86_64__ */ > #define cpu_has_vme 0 > #define cpu_has_de 1 > @@ -161,6 +163,7 @@ > #define cpu_has_centaur_mcr 0 > #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) > #define cpu_has_ffxsr boot_cpu_has(X86_FEATURE_FFXSR) > +#define cpu_has_page1gb boot_cpu_has(X86_FEATURE_PAGE1GB) > #endif > > #endif /* __ASM_I386_CPUFEATURE_H */ > Index: 2007-08-08/xen/include/asm-x86/flushtlb.h > ==================================================================> --- 2007-08-08.orig/xen/include/asm-x86/flushtlb.h 2007-08-21 > 14:15:47.000000000 +0200 > +++ 2007-08-08/xen/include/asm-x86/flushtlb.h 2007-08-21 12:34:36.000000000 > +0200 > @@ -15,6 +15,29 @@ > #include <xen/smp.h> > #include <xen/types.h> > > +#define FLUSH_LEVEL_MASK 0x0f > +#define FLUSH_TLB 0x10 > +#define FLUSH_TLB_GLOBAL 0x20 > +#define FLUSH_CACHE 0x40 > + > +void flush_one_local(const void *va, unsigned int flags); > +static inline void flush_local(unsigned int flags) > +{ > + flush_one_local(NULL, flags | FLUSH_LEVEL_MASK); > +} > +#ifdef CONFIG_SMP > +void flush_one_mask(cpumask_t, const void *va, unsigned int flags); > +static inline void flush_mask(cpumask_t mask, unsigned int flags) > +{ > + flush_one_mask(mask, NULL, flags | FLUSH_LEVEL_MASK); > +} > +#else > +#define flush_mask(mask, flags) flush_local(flags) > +#define flush_one_mask(mask, va, flags) flush_one_local(va, flags) > +#endif > +#define flush_global(flags) flush_mask(cpu_online_map, flags) > +#define flush_one_global(va, flags) flush_one_mask(cpu_online_map, va, flags) > + > /* The current time as shown by the virtual TLB clock. */ > extern u32 tlbflush_clock; > > @@ -72,31 +95,20 @@ static inline unsigned long read_cr3(voi > extern void write_cr3(unsigned long cr3); > > /* Flush guest mappings from the TLB and implicitly tick the tlbflush clock. > */ > -extern void local_flush_tlb(void); > - > -#define local_flush_tlb_pge() \ > - do { \ > - __pge_off(); \ > - local_flush_tlb(); \ > - __pge_on(); \ > - } while ( 0 ) > +#define local_flush_tlb(void) flush_local(FLUSH_TLB) > > -#define local_flush_tlb_one(__addr) \ > - __asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr))) > +#define local_flush_tlb_one(v) \ > + flush_one_local((const void *)(v), FLUSH_TLB|1) > > #define flush_tlb_all() flush_tlb_mask(cpu_online_map) > > #ifndef CONFIG_SMP > -#define flush_tlb_all_pge() local_flush_tlb_pge() > #define flush_tlb_mask(mask) local_flush_tlb() > -#define flush_tlb_one_mask(mask,v) local_flush_tlb_one(_v) > +#define flush_tlb_one_mask(mask,v) local_flush_tlb_one(v) > #else > #include <xen/smp.h> > -#define FLUSHVA_ALL (~0UL) > -extern void flush_tlb_all_pge(void); > -extern void __flush_tlb_mask(cpumask_t mask, unsigned long va); > -#define flush_tlb_mask(mask) __flush_tlb_mask(mask,FLUSHVA_ALL) > -#define flush_tlb_one_mask(mask,v) __flush_tlb_mask(mask,(unsigned long)(v)) > +#define flush_tlb_mask(mask) flush_mask(mask, FLUSH_TLB) > +#define flush_tlb_one_mask(mask,v) flush_one_mask(mask,(const void *)(v), > FLUSH_TLB|1) > #endif > > #endif /* __FLUSHTLB_H__ */ > Index: 2007-08-08/xen/include/asm-x86/processor.h > ==================================================================> --- 2007-08-08.orig/xen/include/asm-x86/processor.h 2007-08-21 > 14:15:47.000000000 +0200 > +++ 2007-08-08/xen/include/asm-x86/processor.h 2007-08-21 11:34:25.000000000 > +0200 > @@ -156,24 +156,20 @@ struct cpuinfo_x86 { > __u8 x86_vendor; /* CPU vendor */ > __u8 x86_model; > __u8 x86_mask; > - char wp_works_ok; /* It doesn''t on 386''s */ > - char hlt_works_ok; /* Problems on some 486Dx4''s and old 386''s */ > + __u8 invlpg_works_ok; > char hard_math; > - char rfu; > int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ > unsigned int x86_capability[NCAPINTS]; > char x86_vendor_id[16]; > char x86_model_id[64]; > int x86_cache_size; /* in KB - valid for CPUS which support this call > */ > int x86_cache_alignment; /* In bytes */ > - char fdiv_bug; > - char f00f_bug; > char coma_bug; > - char pad0; > int x86_power; > unsigned char x86_max_cores; /* cpuid returned max cores value */ > unsigned char booted_cores; /* number of cores as seen by OS */ > unsigned char apicid; > + unsigned short x86_clflush_size; > } __cacheline_aligned; > > /* > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2007-Oct-16 16:38 UTC
Re: [Xen-devel] [PATCH] x86: consolidate/enhance TLB flushing interface
On 21/8/07 16:25, "Jan Beulich" <jbeulich@novell.com> wrote:> Folding into a single local handler and a single SMP multiplexor as > well as adding capability to also flush caches through the same > interfaces (a subsequent patch will make use of this). > > Once at changing cpuinfo_x86, this patch also removes several unused > fields apparently inherited from Linux.Applied at last. I just changed the names of a few functions and added a few comments. Also, I don''t know whether you empirically evaluated CLFLUSH versus WBINVD, but your CLFLUSH loop was actually broken because ''sz'' was in pages rather than bytes. Hence you did not CLFLUSH a big enough area (by a large margin) and hence you would vastly underestimate the cost of the CLFLUSH approach. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2007-Oct-17 07:15 UTC
Re: [Xen-devel] [PATCH] x86: consolidate/enhance TLB flushing interface
>>> Keir Fraser <Keir.Fraser@cl.cam.ac.uk> 16.10.07 18:38 >>> >On 21/8/07 16:25, "Jan Beulich" <jbeulich@novell.com> wrote: > >> Folding into a single local handler and a single SMP multiplexor as >> well as adding capability to also flush caches through the same >> interfaces (a subsequent patch will make use of this). >> >> Once at changing cpuinfo_x86, this patch also removes several unused >> fields apparently inherited from Linux. > >Applied at last. I just changed the names of a few functions and added a few >comments. Also, I don''t know whether you empirically evaluated CLFLUSH >versus WBINVD, but your CLFLUSH loop was actually broken because ''sz'' was in >pages rather than bytes. Hence you did not CLFLUSH a big enough area (by a >large margin) and hence you would vastly underestimate the cost of the >CLFLUSH approach.Oh, good you caught this. But no, I didn''t do any measurements, I just wanted to cut off at the point where it is sufficiently sure using wbinvd wouldn''t be slower than looping over clflush, which I estimated at the point where more data needs flushing than the cache can hold (of course, if what is being flushed hasn''t been referenced recently, this may still be wrong, but otoh potentially flushing hundreds of megabytes in a loop seemed wasteful - L2 [or L3 is present] cache size is likely already larger than the real cutoff point, which in turn cannot reasonably be determined empirically as it likely depends on the amount of hits the clflush-es would have). Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2007-Oct-17 07:21 UTC
Re: [Xen-devel] [PATCH] x86: consolidate/enhance TLB flushing interface
On 17/10/07 08:15, "Jan Beulich" <jbeulich@novell.com> wrote:>> Applied at last. I just changed the names of a few functions and added a few >> comments. Also, I don''t know whether you empirically evaluated CLFLUSH >> versus WBINVD, but your CLFLUSH loop was actually broken because ''sz'' was in >> pages rather than bytes. Hence you did not CLFLUSH a big enough area (by a >> large margin) and hence you would vastly underestimate the cost of the >> CLFLUSH approach. > > Oh, good you caught this. But no, I didn''t do any measurements, I just > wanted to cut off at the point where it is sufficiently sure using wbinvd > wouldn''t be slower than looping over clflush, which I estimated at the > point where more data needs flushing than the cache can hold (of course, > if what is being flushed hasn''t been referenced recently, this may still > be wrong, but otoh potentially flushing hundreds of megabytes in a loop > seemed wasteful - L2 [or L3 is present] cache size is likely already larger > than the real cutoff point, which in turn cannot reasonably be determined > empirically as it likely depends on the amount of hits the clflush-es would > have).Fair enough. I believe that CLFLUSH of a few megabytes is unlikely to be slower than WBINVD (which is really really slow!). -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel