Hi folks, After some time away from it, and a big rebase as a consequence, here is the updated version of paravirt_ops for x86_64, heading to inclusion. Your criticism is of course, very welcome. Have fun -- arch/x86_64/Kconfig | 11 arch/x86_64/ia32/syscall32.c | 2 arch/x86_64/kernel/Makefile | 1 arch/x86_64/kernel/apic.c | 2 arch/x86_64/kernel/asm-offsets.c | 14 arch/x86_64/kernel/entry.S | 125 +++-- arch/x86_64/kernel/head.S | 10 arch/x86_64/kernel/head64.c | 2 arch/x86_64/kernel/i8259.c | 15 arch/x86_64/kernel/ldt.c | 6 arch/x86_64/kernel/paravirt.c | 455 +++++++++++++++++++ arch/x86_64/kernel/process.c | 2 arch/x86_64/kernel/reboot.c | 3 arch/x86_64/kernel/setup.c | 41 + arch/x86_64/kernel/setup64.c | 18 arch/x86_64/kernel/smp.c | 10 arch/x86_64/kernel/smpboot.c | 10 arch/x86_64/kernel/suspend.c | 11 arch/x86_64/kernel/tce.c | 2 arch/x86_64/kernel/time.c | 37 + arch/x86_64/kernel/traps.c | 1 arch/x86_64/kernel/tsc.c | 42 + arch/x86_64/kernel/vmlinux.lds.S | 6 arch/x86_64/kernel/vsyscall.c | 4 arch/x86_64/kernel/x8664_ksyms.c | 6 arch/x86_64/mm/pageattr.c | 2 arch/x86_64/vdso/vgetcpu.c | 4 include/asm-x86_64/alternative.h | 8 include/asm-x86_64/apic.h | 13 include/asm-x86_64/desc.h | 183 +++++-- include/asm-x86_64/e820.h | 6 include/asm-x86_64/irq.h | 2 include/asm-x86_64/irqflags.h | 32 + include/asm-x86_64/mmu_context.h | 23 include/asm-x86_64/msr.h | 284 +++++++----- include/asm-x86_64/page.h | 36 + include/asm-x86_64/paravirt.h | 901 +++++++++++++++++++++++++++++++++++++++ include/asm-x86_64/pgalloc.h | 7 include/asm-x86_64/pgtable.h | 152 +++--- include/asm-x86_64/processor.h | 71 ++- include/asm-x86_64/proto.h | 3 include/asm-x86_64/segment.h | 4 include/asm-x86_64/smp.h | 8 include/asm-x86_64/spinlock.h | 16 include/asm-x86_64/tlbflush.h | 22 include/linux/mm.h | 14 46 files changed, 2271 insertions(+), 356 deletions(-)
Later on, the paravirt_ops patch will deference the vm_area_struct in asm/pgtable.h. It means this define must be after the struct definition Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> --- include/linux/mm.h | 14 +++++++++----- 1 files changed, 9 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 655094d..c3f8561 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -35,11 +35,6 @@ extern int sysctl_legacy_va_layout; #define sysctl_legacy_va_layout 0 #endif -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/processor.h> - -#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) /* * Linux kernel virtual memory manager primitives. @@ -113,6 +108,15 @@ struct vm_area_struct { #endif }; +#include <asm/page.h> +/* + * pgtable.h must be included after the definition of vm_area_struct. + * x86_64 pgtable.h is one of the dereferencers of this struct + */ +#include <asm/pgtable.h> +#include <asm/processor.h> + +#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) extern struct kmem_cache *vm_area_cachep; /* -- 1.4.4.2
Glauber de Oliveira Costa
2007-Aug-08 00:06 UTC
[PATCH 2/25] [PATCH] tlb flushing routines
This patch turns the flush_tlb routines into native versions. In case paravirt is not defined, the natives are defined into the actually used ones. flush_tlb_others() goes in smp.c, unless we smp is not in the game Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> --- arch/x86_64/kernel/smp.c | 10 +++++++++- include/asm-x86_64/smp.h | 8 ++++++++ include/asm-x86_64/tlbflush.h | 22 ++++++++++++++++++---- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 673a300..39f5f6b 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -165,7 +165,7 @@ out: cpu_clear(cpu, f->flush_cpumask); } -static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, +void native_flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, unsigned long va) { int sender; @@ -198,6 +198,14 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, spin_unlock(&f->tlbstate_lock); } +/* Overriden in paravirt.c if CONFIG_PARAVIRT */ +void __attribute__((weak)) flush_tlb_others(cpumask_t cpumask, + struct mm_struct *mm, + unsigned long va) +{ + native_flush_tlb_others(cpumask, mm, va); +} + int __cpuinit init_smp_flush(void) { int i; diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index 3f303d2..6b11114 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -19,6 +19,14 @@ extern int disable_apic; #include <asm/pda.h> +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +void native_flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, + unsigned long va); +#else +#define startup_ipi_hook(apicid, rip, rsp) do { } while (0) +#endif + struct pt_regs; extern cpumask_t cpu_present_mask; diff --git a/include/asm-x86_64/tlbflush.h b/include/asm-x86_64/tlbflush.h index 888eb4a..1c68cc8 100644 --- a/include/asm-x86_64/tlbflush.h +++ b/include/asm-x86_64/tlbflush.h @@ -6,21 +6,30 @@ #include <asm/processor.h> #include <asm/system.h> -static inline void __flush_tlb(void) +static inline void native_flush_tlb(void) { write_cr3(read_cr3()); } -static inline void __flush_tlb_all(void) +static inline void native_flush_tlb_all(void) { unsigned long cr4 = read_cr4(); write_cr4(cr4 & ~X86_CR4_PGE); /* clear PGE */ write_cr4(cr4); /* write old PGE again and flush TLBs */ } -#define __flush_tlb_one(addr) \ - __asm__ __volatile__("invlpg (%0)" :: "r" (addr) : "memory") +static inline void native_flush_tlb_one(unsigned long addr) +{ + asm volatile ("invlpg (%0)" :: "r" (addr) : "memory"); +} +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define __flush_tlb() native_flush_tlb() +#define __flush_tlb_all() native_flush_tlb_all() +#define __flush_tlb_one(addr) native_flush_tlb_one(addr) +#endif /* CONFIG_PARAVIRT */ /* * TLB flushing: @@ -64,6 +73,11 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, __flush_tlb(); } +static inline void native_flush_tlb_others(cpumask_t *cpumask, + struct mm_struct *mm, unsigned long va) +{ +} + #else #include <asm/smp.h> -- 1.4.4.2
Glauber de Oliveira Costa
2007-Aug-08 00:06 UTC
[PATCH 3/25] [PATCH] irq_flags / halt routines
This patch turns the irq_flags and halt routines into the native versions. Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> --- include/asm-x86_64/irqflags.h | 32 +++++++++++++++++++++++++++----- 1 files changed, 27 insertions(+), 5 deletions(-) diff --git a/include/asm-x86_64/irqflags.h b/include/asm-x86_64/irqflags.h index 86e70fe..4ba5241 100644 --- a/include/asm-x86_64/irqflags.h +++ b/include/asm-x86_64/irqflags.h @@ -16,6 +16,22 @@ * Interrupt control: */ +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +# ifdef CONFIG_X86_VSMP +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC); +} +# else +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & X86_EFLAGS_IF); +} +# endif + +#else /* PARAVIRT */ + static inline unsigned long __raw_local_save_flags(void) { unsigned long flags; @@ -31,9 +47,6 @@ static inline unsigned long __raw_local_save_flags(void) return flags; } -#define raw_local_save_flags(flags) \ - do { (flags) = __raw_local_save_flags(); } while (0) - static inline void raw_local_irq_restore(unsigned long flags) { __asm__ __volatile__( @@ -88,6 +101,10 @@ static inline int raw_irqs_disabled_flags(unsigned long flags) #endif +#endif /* CONFIG_PARAVIRT */ + +#define raw_local_save_flags(flags) \ + do { (flags) = __raw_local_save_flags(); } while (0) /* * For spinlocks, etc.: */ @@ -115,7 +132,7 @@ static inline int raw_irqs_disabled(void) * Used in the idle loop; sti takes one instruction cycle * to complete: */ -static inline void raw_safe_halt(void) +static inline void native_raw_safe_halt(void) { __asm__ __volatile__("sti; hlt" : : : "memory"); } @@ -124,11 +141,16 @@ static inline void raw_safe_halt(void) * Used when interrupts are already enabled or to * shutdown the processor: */ -static inline void halt(void) +static inline void native_halt(void) { __asm__ __volatile__("hlt": : :"memory"); } +#ifndef CONFIG_PARAVIRT +#define raw_safe_halt native_raw_safe_halt +#define halt native_halt +#endif /* ! CONFIG_PARAVIRT */ + #else /* __ASSEMBLY__: */ # ifdef CONFIG_TRACE_IRQFLAGS # define TRACE_IRQS_ON call trace_hardirqs_on_thunk -- 1.4.4.2
Glauber de Oliveira Costa
2007-Aug-08 00:06 UTC
[PATCH 4/25] [PATCH] Add debugreg/load_rsp native hooks
This patch adds native hooks for debugreg handling functions, and for the native load_rsp0 function. The later also have its call sites patched. Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> --- arch/x86_64/kernel/process.c | 2 +- arch/x86_64/kernel/smpboot.c | 2 +- include/asm-x86_64/processor.h | 71 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 66 insertions(+), 9 deletions(-) diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 2842f50..33046f1 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -595,7 +595,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Reload esp0, LDT and the page table pointer: */ - tss->rsp0 = next->rsp0; + load_rsp0(tss, next); /* * Switch DS and ES. diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 32f5078..be9c7eb 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -620,7 +620,7 @@ do_rest: start_rip = setup_trampoline(); init_rsp = c_idle.idle->thread.rsp; - per_cpu(init_tss,cpu).rsp0 = init_rsp; + load_rsp0(&per_cpu(init_tss,cpu), &c_idle.idle->thread); initial_code = start_secondary; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 1952517..65f689b 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -249,6 +249,12 @@ struct thread_struct { .rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \ } +static inline void native_load_rsp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + tss->rsp0 = thread->rsp0; +} + #define INIT_MMAP \ { &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL } @@ -264,13 +270,64 @@ struct thread_struct { set_fs(USER_DS); \ } while(0) -#define get_debugreg(var, register) \ - __asm__("movq %%db" #register ", %0" \ - :"=r" (var)) -#define set_debugreg(value, register) \ - __asm__("movq %0,%%db" #register \ - : /* no output */ \ - :"r" (value)) +static inline unsigned long native_get_debugreg(int regno) +{ + unsigned long val; + + switch (regno) { + case 0: + asm("movq %%db0, %0" :"=r" (val)); break; + case 1: + asm("movq %%db1, %0" :"=r" (val)); break; + case 2: + asm("movq %%db2, %0" :"=r" (val)); break; + case 3: + asm("movq %%db3, %0" :"=r" (val)); break; + case 6: + asm("movq %%db6, %0" :"=r" (val)); break; + case 7: + asm("movq %%db7, %0" :"=r" (val)); break; + default: + val = 0; /* assign it to keep gcc quiet */ + WARN_ON(1); + } + return val; +} + +static inline void native_set_debugreg(unsigned long value, int regno) +{ + switch (regno) { + case 0: + asm("movq %0,%%db0" : /* no output */ :"r" (value)); + break; + case 1: + asm("movq %0,%%db1" : /* no output */ :"r" (value)); + break; + case 2: + asm("movq %0,%%db2" : /* no output */ :"r" (value)); + break; + case 3: + asm("movq %0,%%db3" : /* no output */ :"r" (value)); + break; + case 6: + asm("movq %0,%%db6" : /* no output */ :"r" (value)); + break; + case 7: + asm("movq %0,%%db7" : /* no output */ :"r" (value)); + break; + default: + BUG(); + } +} + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define paravirt_enabled() 0 +#define load_rsp0 native_load_rsp0 +#define set_debugreg(val, reg) native_set_debugreg(reg, val) +#define get_debugreg(var, reg) (var) = native_get_debugreg(reg) +#endif struct task_struct; struct mm_struct; -- 1.4.4.2
Glauber de Oliveira Costa
2007-Aug-08 00:06 UTC
[PATCH 5/25] [PATCH] native versions for system.h functions
This patch adds the native hook for the functions in system.h They are the read/write_crX, clts and wbinvd. The later, also gets its call sites patched. Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> --- arch/x86_64/kernel/tce.c | 2 +- arch/x86_64/mm/pageattr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c index e3f2569..587f0c2 100644 --- a/arch/x86_64/kernel/tce.c +++ b/arch/x86_64/kernel/tce.c @@ -42,7 +42,7 @@ static inline void flush_tce(void* tceaddr) if (cpu_has_clflush) asm volatile("clflush (%0)" :: "r" (tceaddr)); else - asm volatile("wbinvd":::"memory"); + wbinvd(); } void tce_build(struct iommu_table *tbl, unsigned long index, diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index 7e161c6..b497afd 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c @@ -76,7 +76,7 @@ static void flush_kernel_map(void *arg) /* When clflush is available always use it because it is much cheaper than WBINVD. */ if (!cpu_has_clflush) - asm volatile("wbinvd" ::: "memory"); + wbinvd(); else list_for_each_entry(pg, l, lru) { void *adr = page_address(pg); cache_flush_page(adr); -- 1.4.4.2
Glauber de Oliveira Costa
2007-Aug-08 00:06 UTC
[PATCH 6/25] [PATCH] add native_apic read and write functions, as well as boot clocks ones
Time for the apic handling functions to get their native counterparts. Also, put the native hook for the boot clocks functions in the apic.h header Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> --- arch/x86_64/kernel/apic.c | 2 +- arch/x86_64/kernel/smpboot.c | 8 +++++++- include/asm-x86_64/apic.h | 13 +++++++++++-- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 900ff38..2d233ef 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -1193,7 +1193,7 @@ int __init APIC_init_uniprocessor (void) setup_IO_APIC(); else nr_ioapics = 0; - setup_boot_APIC_clock(); + setup_boot_clock(); check_nmi_watchdog(); return 0; } diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index be9c7eb..5c7eb60 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -338,7 +338,7 @@ void __cpuinit start_secondary(void) check_tsc_sync_target(); Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); - setup_secondary_APIC_clock(); + setup_secondary_clock(); Dprintk("cpu %d: enabling apic timer\n", smp_processor_id()); @@ -468,6 +468,12 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta num_starts = 2; /* + * Paravirt wants a startup IPI hook here to set up the + * target processor state. + */ + startup_ipi_hook(phys_apicid, (unsigned long) start_rip, + (unsigned long) init_rsp); + /* * Run STARTUP IPI loop. */ Dprintk("#startup loops: %d.\n", num_starts); diff --git a/include/asm-x86_64/apic.h b/include/asm-x86_64/apic.h index 85125ef..de17908 100644 --- a/include/asm-x86_64/apic.h +++ b/include/asm-x86_64/apic.h @@ -38,16 +38,25 @@ struct pt_regs; * Basic functions accessing APICs. */ -static __inline void apic_write(unsigned long reg, unsigned int v) +static __inline void native_apic_write(unsigned long reg, unsigned int v) { *((volatile unsigned int *)(APIC_BASE+reg)) = v; } -static __inline unsigned int apic_read(unsigned long reg) +static __inline unsigned int native_apic_read(unsigned long reg) { return *((volatile unsigned int *)(APIC_BASE+reg)); } +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define apic_write(reg, v) native_apic_write(reg, v) +#define apic_read(reg) native_apic_read(reg) +#define setup_boot_clock(void) setup_boot_APIC_clock(void) +#define setup_secondary_clock(void) setup_secondary_APIC_clock(void) +#endif + extern void apic_wait_icr_idle(void); extern unsigned int safe_apic_wait_icr_idle(void); -- 1.4.4.2
Glauber de Oliveira Costa
2007-Aug-08 00:07 UTC
[PATCH 10/25] [PATCH] export math_state_restore
Export math_state_restore symbol, so it can be used for hypervisors. They are commonly loaded as modules. Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> --- arch/x86_64/kernel/traps.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 0388842..aacbe12 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -1081,6 +1081,7 @@ asmlinkage void math_state_restore(void) task_thread_info(me)->status |= TS_USEDFPU; me->fpu_counter++; } +EXPORT_SYMBOL_GPL(math_state_restore); void __init trap_init(void) { -- 1.4.4.2
Glauber de Oliveira Costa
2007-Aug-08 00:07 UTC
[PATCH 7/25] [PATCH] interrupt related native paravirt functions.
The interrupt initialization routine becomes native_init_IRQ and will be overriden later in case paravirt is on. The interrupt vector is made global, so paravirt guests can reference it in their initializations. However, "interrupt" is such a common name, and could lead to clashes, so it is renamed. Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> --- arch/x86_64/kernel/i8259.c | 15 +++++++++++---- include/asm-x86_64/irq.h | 2 ++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index 948cae6..8dda872 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c @@ -75,8 +75,12 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) -/* for the irq vectors */ -static void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { +/* + * For the irq vectors. It is global rather than static to allow for + * paravirtualized guests to use it in their own interrupt initialization + * routines + */ +void (*interrupt_vector[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { IRQLIST_16(0x2), IRQLIST_16(0x3), IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), @@ -484,7 +488,10 @@ static int __init init_timer_sysfs(void) device_initcall(init_timer_sysfs); -void __init init_IRQ(void) +/* Overridden in paravirt.c */ +void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); + +void __init native_init_IRQ(void) { int i; @@ -497,7 +504,7 @@ void __init init_IRQ(void) for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { int vector = FIRST_EXTERNAL_VECTOR + i; if (vector != IA32_SYSCALL_VECTOR) - set_intr_gate(vector, interrupt[i]); + set_intr_gate(vector, interrupt_vector[i]); } #ifdef CONFIG_SMP diff --git a/include/asm-x86_64/irq.h b/include/asm-x86_64/irq.h index 5006c6e..be55299 100644 --- a/include/asm-x86_64/irq.h +++ b/include/asm-x86_64/irq.h @@ -46,6 +46,8 @@ static __inline__ int irq_canonicalize(int irq) extern void fixup_irqs(cpumask_t map); #endif +void native_init_IRQ(void); + #define __ARCH_HAS_DO_SOFTIRQ 1 #endif /* _ASM_IRQ_H */ -- 1.4.4.2
Glauber de Oliveira Costa
2007-Aug-08 00:42 UTC
[PATCH 21/25] [PATCH] export cpu_gdt_descr
With paravirualization, hypervisors needs to handle the gdt, that was right to this point only used at very early inialization code. Hypervisors are commonly modules, so make it an export Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> --- arch/x86_64/kernel/x8664_ksyms.c | 6 ++++++ 1 files changed, 6 insertions(+), 0 deletions(-) diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index 77c25b3..8f10698 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c @@ -60,3 +60,9 @@ EXPORT_SYMBOL(init_level4_pgt); EXPORT_SYMBOL(load_gs_index); EXPORT_SYMBOL(_proxy_pda); + +#ifdef CONFIG_PARAVIRT +extern unsigned long *cpu_gdt_descr; +/* Virtualized guests may want to use it */ +EXPORT_SYMBOL(cpu_gdt_descr); +#endif -- 1.4.4.2
Glauber de Oliveira Costa
2007-Aug-08 00:45 UTC
[PATCH 24/25] [PATCH] provide paravirt patching function
This patch introduces apply_paravirt(), a function that shall be called by i386/alternative.c to apply replacements to paravirt_functions. It is defined to an do-nothing function if paravirt is not enabled. Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> --- include/asm-x86_64/alternative.h | 8 +++++--- 1 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/asm-x86_64/alternative.h b/include/asm-x86_64/alternative.h index ab161e8..e69a141 100644 --- a/include/asm-x86_64/alternative.h +++ b/include/asm-x86_64/alternative.h @@ -143,12 +143,14 @@ static inline void alternatives_smp_switch(int smp) {} */ #define ASM_OUTPUT2(a, b) a, b -struct paravirt_patch; +struct paravirt_patch_site; #ifdef CONFIG_PARAVIRT -void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end); +void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end); #else static inline void -apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) +apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end) {} #define __parainstructions NULL #define __parainstructions_end NULL -- 1.4.4.2
Glauber de Oliveira Costa
2007-Aug-08 00:48 UTC
[PATCH 20/25] [PATCH] replace syscall_init
This patch replaces syscall_init by x86_64_syscall_init. The former will be later replaced by a paravirt replacement in case paravirt is on Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> --- arch/x86_64/kernel/setup64.c | 8 +++++++- include/asm-x86_64/proto.h | 3 +++ 2 files changed, 10 insertions(+), 1 deletions(-) diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 49f7342..723822c 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -153,7 +153,7 @@ __attribute__((section(".bss.page_aligned"))); extern asmlinkage void ignore_sysret(void); /* May not be marked __init: used by software suspend */ -void syscall_init(void) +void x86_64_syscall_init(void) { /* * LSTAR and STAR live in a bit strange symbiosis. @@ -172,6 +172,12 @@ void syscall_init(void) wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); } +/* Overriden in paravirt.c if CONFIG_PARAVIRT */ +void __attribute__((weak)) syscall_init(void) +{ + x86_64_syscall_init(); +} + void __cpuinit check_efer(void) { unsigned long efer; diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index 31f20ad..77ed2de 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -18,6 +18,9 @@ extern void init_memory_mapping(unsigned long start, unsigned long end); extern void system_call(void); extern int kernel_syscall(void); +#ifdef CONFIG_PARAVIRT +extern void x86_64_syscall_init(void); +#endif extern void syscall_init(void); extern void ia32_syscall(void); -- 1.4.4.2
> > @@ -264,13 +270,64 @@ struct thread_struct { > set_fs(USER_DS); \ > } while(0) > > -#define get_debugreg(var, register) \ > - __asm__("movq %%db" #register ", %0" \ > - :"=r" (var)) > -#define set_debugreg(value, register) \ > - __asm__("movq %0,%%db" #register \ > - : /* no output */ \ > - :"r" (value)) > +static inline unsigned long native_get_debugreg(int regno) > +{ > + unsigned long val;It would be better to have own functions for each debug register I think -Andi
> +#ifdef CONFIG_PARAVIRT > +#include <asm/paravirt.h> > +# ifdef CONFIG_X86_VSMP > +static inline int raw_irqs_disabled_flags(unsigned long flags) > +{ > + return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC); > +} > +# else > +static inline int raw_irqs_disabled_flags(unsigned long flags) > +{ > + return !(flags & X86_EFLAGS_IF); > +} > +# endifYou should really turn the vsmp special case into a paravirt client first instead of complicating all this even more.> +#ifndef CONFIG_PARAVIRT > +#define raw_safe_halt native_raw_safe_halt > +#define halt native_halt > +#endif /* ! CONFIG_PARAVIRT */This seems inconsistent -Andi
On Wed, 2007-08-08 at 01:19 -0300, Glauber de Oliveira Costa wrote:> With paravirualization, hypervisors needs to handle the gdt, > that was right to this point only used at very early > inialization code. Hypervisors are commonly modules, so make > it an export >the GDT is so deeply internal that this really ought to be a _GPL export..
Glauber de Oliveira Costa
2007-Aug-08 07:25 UTC
[PATCH 21/25] [PATCH] export cpu_gdt_descr
On 8/8/07, Arjan van de Ven <arjan@infradead.org> wrote:> On Wed, 2007-08-08 at 01:19 -0300, Glauber de Oliveira Costa wrote: > > With paravirualization, hypervisors needs to handle the gdt, > > that was right to this point only used at very early > > inialization code. Hypervisors are commonly modules, so make > > it an export > > > > the GDT is so deeply internal that this really ought to be a _GPL > export..Yes, Arjan, I agree. Thanks for noticing it. -- Glauber de Oliveira Costa. "Free as in Freedom" http://glommer.net "The less confident you are, the more serious you have to act."
Glauber de Oliveira Costa wrote:> Hi folks, > > After some time away from it, and a big rebase as a consequence, hereis> the updated version of paravirt_ops for x86_64, heading to inclusion. > > Your criticism is of course, very welcome. > > Have funDo you assume that the kernel ougtht to use 2MB pages for its mappings (e.g. initilal text/data, direct mapping of physical memory) under your paravirt_ops? As far as I look at the patches, I don't find one. Jun --- Intel Open Source Technology Center
Jeremy Fitzhardinge
2007-Aug-08 22:49 UTC
[PATCH 4/25] [PATCH] Add debugreg/load_rsp native hooks
Andi Kleen wrote:>> @@ -264,13 +270,64 @@ struct thread_struct { >> set_fs(USER_DS); \ >> } while(0) >> >> -#define get_debugreg(var, register) \ >> - __asm__("movq %%db" #register ", %0" \ >> - :"=r" (var)) >> -#define set_debugreg(value, register) \ >> - __asm__("movq %0,%%db" #register \ >> - : /* no output */ \ >> - :"r" (value)) >> +static inline unsigned long native_get_debugreg(int regno) >> +{ >> + unsigned long val; >> > > It would be better to have own functions for each debug register I think >? A separate pvop for each? Seems excessive. And surely this should be identical to 32bit either way. J
Glauber de Oliveira Costa
2007-Aug-08 22:49 UTC
[PATCH 4/25] [PATCH] Add debugreg/load_rsp native hooks
On 8/8/07, Andi Kleen <ak@suse.de> wrote:> > > > > @@ -264,13 +270,64 @@ struct thread_struct { > > set_fs(USER_DS); \ > > } while(0) > > > > -#define get_debugreg(var, register) \ > > - __asm__("movq %%db" #register ", %0" \ > > - :"=r" (var)) > > -#define set_debugreg(value, register) \ > > - __asm__("movq %0,%%db" #register \ > > - : /* no output */ \ > > - :"r" (value)) > > +static inline unsigned long native_get_debugreg(int regno) > > +{ > > + unsigned long val; > > It would be better to have own functions for each debug register I think >Andi, you mean: a) split the debugreg paravirt_ops in various paravirt_ops.set/get_debugreg{X,Y,Z...}, and then join them together in a set/get_debugreg(a,b) to keep the current interface. OR b) keep one paravirt_ops for each set/get_debugreg, then split then in various set/get_debugregX(a, b), changing the current interface, OR c) plit the debugreg paravirt_ops in various paravirt_ops.set/get_debugreg{X,Y,Z...}, and give each its own function set/get_debugregX(a, b), again, changing the current interface, OR d) None of the above? -- Glauber de Oliveira Costa. "Free as in Freedom" http://glommer.net "The less confident you are, the more serious you have to act."
Glauber de Oliveira Costa
2007-Aug-09 00:07 UTC
[PATCH 25/25] [PATCH] add paravirtualization support for x86_64
On 8/9/07, Jeremy Fitzhardinge <jeremy@goop.org> wrote:> > > > Does it really matter? > > > > Well, yes, if alignment is an issue.Of course, But the question rises from the context that they are both together at the beginning. So they are not making anybody non-aligned. Then the question: Why would putting it in the end be different to putting them _together_, aligned at the beginning ? -- Glauber de Oliveira Costa. "Free as in Freedom" http://glommer.net "The less confident you are, the more serious you have to act."