Steven Rostedt
2007-Apr-18 13:02 UTC
[RFC/PATCH PV_OPS X86_64 10/17] paravirt_ops - boot changes
plain text document attachment (xx-paravirt-boot.patch)
Boot up code modifications to get paravirt ops running.
Signed-off-by: Steven Rostedt srostedt@redhat.com
Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Index: clean-start/arch/x86_64/kernel/head.S
==================================================================---
clean-start.orig/arch/x86_64/kernel/head.S
+++ clean-start/arch/x86_64/kernel/head.S
@@ -16,6 +16,13 @@
#include <asm/page.h>
#include <asm/msr.h>
#include <asm/cache.h>
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/asm-offsets.h>
+#include <asm/paravirt.h>
+#else
+#define GET_CR2_INTO_RAX mov %cr2, %rax
+#endif
/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
* because we need identity-mapped pages on setup so define __START_KERNEL to
@@ -106,6 +113,14 @@ startup_64:
* reload the page tables here.
*/
+#ifdef CONFIG_PARAVIRT
+ /* a CS ended in 0x3 indicates we're in userspace. That's where
+ * our paravirt guests run. */
+ movq %cs, %rax
+ testq $0x3, %rax
+ jnz startup_paravirt
+#endif
+
/* Enable PAE mode and PGE */
xorq %rax, %rax
btsq $5, %rax
@@ -208,10 +223,11 @@ ENTRY(early_idt_handler)
cmpl $2,early_recursion_flag(%rip)
jz 1f
incl early_recursion_flag(%rip)
- xorl %eax,%eax
movq 8(%rsp),%rsi # get rip
movq (%rsp),%rdx
- movq %cr2,%rcx
+ GET_CR2_INTO_RAX
+ movq %rax,%rcx
+ xorq %rax, %rax
leaq early_idt_msg(%rip),%rdi
call early_printk
cmpl $2,early_recursion_flag(%rip)
@@ -232,6 +248,47 @@ early_idt_msg:
early_idt_ripmsg:
.asciz "RIP %s\n"
+#ifdef CONFIG_PARAVIRT
+ENTRY(startup_paravirt)
+ cld
+
+ /* initial stack location */
+ movq $(init_thread_union+THREAD_SIZE),%rsp
+
+ /* We take pains to preserve all the regs. */
+ pushq %r11
+ pushq %r10
+ pushq %r9
+ pushq %r8
+ pushq %rsi
+ pushq %rdi
+ pushq %rdx
+ pushq %rcx
+ pushq %rax
+
+ /* paravirt.o is last in link, and that probe fn never returns */
+ pushq $__start_paravirtprobe
+1:
+ movq 0(%rsp), %rax
+ pushq (%rax)
+ movq 8(%rsp), %rdi
+ call *(%rsp)
+ popq %rax
+
+ movq 0x10(%rsp), %rax
+ movq 0x18(%rsp), %rcx
+ movq 0x20(%rsp), %rdx
+ movq 0x28(%rsp), %rdi
+ movq 0x30(%rsp), %rsi
+ movq 0x38(%rsp), %r8
+ movq 0x40(%rsp), %r9
+ movq 0x48(%rsp), %r10
+ movq 0x50(%rsp), %r11
+
+ addl $8, (%rsp)
+ jmp 1b
+#endif
+
.code32
ENTRY(no_long_mode)
/* This isn't an x86-64 CPU so hang */
@@ -317,7 +374,9 @@ ENTRY(wakeup_level4_pgt)
#endif
#ifndef CONFIG_HOTPLUG_CPU
+ #ifndef CONFIG_PARAVIRT
__INITDATA
+ #endif
#endif
/*
* This default setting generates an ident mapping at address 0x100000
Index: clean-start/arch/x86_64/kernel/head64.c
==================================================================---
clean-start.orig/arch/x86_64/kernel/head64.c
+++ clean-start/arch/x86_64/kernel/head64.c
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/percpu.h>
+#include <linux/module.h>
#include <asm/processor.h>
#include <asm/proto.h>
@@ -20,6 +21,9 @@
#include <asm/pgtable.h>
#include <asm/sections.h>
+/* Virtualized guests may want to use it */
+EXPORT_SYMBOL(cpu_gdt_descr);
+
/* Don't add a printk in there. printk relies on the PDA which is not
initialized
yet. */
static void __init clear_bss(void)
@@ -62,7 +66,7 @@ void __init x86_64_start_kernel(char * r
for (i = 0; i < IDT_ENTRIES; i++)
set_intr_gate(i, early_idt_handler);
- asm volatile("lidt %0" :: "m" (idt_descr));
+ load_idt((const struct desc_ptr *)&idt_descr);
early_printk("Kernel alive\n");
@@ -70,7 +74,7 @@ void __init x86_64_start_kernel(char * r
* switch to init_level4_pgt from boot_level4_pgt
*/
memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
- asm volatile("movq %0,%%cr3" :: "r"
(__pa_symbol(&init_level4_pgt)));
+ write_cr3(__pa_symbol(&init_level4_pgt));
for (i = 0; i < NR_CPUS; i++)
cpu_pda(i) = &boot_cpu_pda[i];
Index: clean-start/arch/x86_64/kernel/process.c
==================================================================---
clean-start.orig/arch/x86_64/kernel/process.c
+++ clean-start/arch/x86_64/kernel/process.c
@@ -42,6 +42,7 @@
#include <asm/system.h>
#include <asm/io.h>
#include <asm/processor.h>
+#include <asm/system.h>
#include <asm/i387.h>
#include <asm/mmu_context.h>
#include <asm/pda.h>
@@ -338,10 +339,10 @@ void __show_regs(struct pt_regs * regs)
rdmsrl(MSR_GS_BASE, gs);
rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
- asm("movq %%cr0, %0": "=r" (cr0));
- asm("movq %%cr2, %0": "=r" (cr2));
- asm("movq %%cr3, %0": "=r" (cr3));
- asm("movq %%cr4, %0": "=r" (cr4));
+ cr0 = read_cr0();
+ cr2 = read_cr2();
+ cr3 = read_cr3();
+ cr4 = read_cr4();
printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
fs,fsindex,gs,gsindex,shadowgs);
@@ -578,7 +579,7 @@ __switch_to(struct task_struct *prev_p,
/*
* Reload esp0, LDT and the page table pointer:
*/
- tss->rsp0 = next->rsp0;
+ load_rsp0(tss, next);
/*
* Switch DS and ES.
Index: clean-start/arch/x86_64/kernel/reboot.c
==================================================================---
clean-start.orig/arch/x86_64/kernel/reboot.c
+++ clean-start/arch/x86_64/kernel/reboot.c
@@ -15,6 +15,7 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/apic.h>
+#include <asm/desc.h>
/*
* Power off function, if any
@@ -131,7 +132,7 @@ void machine_emergency_restart(void)
}
case BOOT_TRIPLE:
- __asm__ __volatile__("lidt (%0)": :"r" (&no_idt));
+ load_idt((const struct desc_ptr *)&no_idt);
__asm__ __volatile__("int3");
reboot_type = BOOT_KBD;
Index: clean-start/arch/x86_64/kernel/setup.c
==================================================================---
clean-start.orig/arch/x86_64/kernel/setup.c
+++ clean-start/arch/x86_64/kernel/setup.c
@@ -327,10 +327,7 @@ static void discover_ebda(void)
* there is a real-mode segmented pointer pointing to the
* 4K EBDA area at 0x40E
*/
- ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER;
- ebda_addr <<= 4;
-
- ebda_size = *(unsigned short *)(unsigned long)ebda_addr;
+ ebda_info(&ebda_addr,&ebda_size);
/* Round EBDA up to pages */
if (ebda_size == 0)
@@ -341,6 +338,12 @@ static void discover_ebda(void)
ebda_size = 64*1024;
}
+/* Overridden in paravirt.c if CONFIG_PARAVIRT */
+void __attribute__((weak)) memory_setup(void)
+{
+ return setup_memory_region();
+}
+
void __init setup_arch(char **cmdline_p)
{
printk(KERN_INFO "Command line: %s\n", saved_command_line);
@@ -356,7 +359,7 @@ void __init setup_arch(char **cmdline_p)
rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
#endif
- setup_memory_region();
+ memory_setup();
copy_edd();
if (!MOUNT_ROOT_RDONLY)
@@ -561,7 +564,6 @@ static int __cpuinit get_model_name(stru
return 1;
}
-
static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, eax, ebx, ecx, edx;
Index: clean-start/arch/x86_64/kernel/setup64.c
==================================================================---
clean-start.orig/arch/x86_64/kernel/setup64.c
+++ clean-start/arch/x86_64/kernel/setup64.c
@@ -123,7 +123,7 @@ void pda_init(int cpu)
asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
/* Memory clobbers used to order PDA accessed */
mb();
- wrmsrl(MSR_GS_BASE, pda);
+ wrmsrl(MSR_GS_BASE, (u64)pda);
mb();
pda->cpunumber = cpu;
@@ -152,7 +152,7 @@ char boot_exception_stacks[(N_EXCEPTION_
__attribute__((section(".bss.page_aligned")));
/* May not be marked __init: used by software suspend */
-void syscall_init(void)
+void x86_64_syscall_init(void)
{
/*
* LSTAR and STAR live in a bit strange symbiosis.
@@ -160,7 +160,7 @@ void syscall_init(void)
* but only a 32bit target. LSTAR sets the 64bit rip.
*/
wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 |
((u64)__KERNEL_CS)<<32);
- wrmsrl(MSR_LSTAR, system_call);
+ wrmsrl(MSR_LSTAR, (u64)system_call);
#ifdef CONFIG_IA32_EMULATION
syscall32_cpu_init ();
@@ -170,6 +170,12 @@ void syscall_init(void)
wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000);
}
+/* Overriden in paravirt.c if CONFIG_PARAVIRT */
+void __attribute__((weak)) syscall_init(void)
+{
+ x86_64_syscall_init();
+}
+
void __cpuinit check_efer(void)
{
unsigned long efer;
@@ -223,8 +229,8 @@ void __cpuinit cpu_init (void)
memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE);
cpu_gdt_descr[cpu].size = GDT_SIZE;
- asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
- asm volatile("lidt %0" :: "m" (idt_descr));
+ load_gdt((const struct desc_ptr *)&cpu_gdt_descr[cpu]);
+ load_idt((const struct desc_ptr *)&idt_descr);
memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
syscall_init();
@@ -267,6 +273,7 @@ void __cpuinit cpu_init (void)
BUG();
enter_lazy_tlb(&init_mm, me);
+ load_rsp0(t, ¤t->thread);
set_tss_desc(cpu, t);
load_TR_desc();
load_LDT(&init_mm.context);
Index: clean-start/arch/x86_64/kernel/smpboot.c
==================================================================---
clean-start.orig/arch/x86_64/kernel/smpboot.c
+++ clean-start/arch/x86_64/kernel/smpboot.c
@@ -848,7 +848,7 @@ do_rest:
start_rip = setup_trampoline();
init_rsp = c_idle.idle->thread.rsp;
- per_cpu(init_tss,cpu).rsp0 = init_rsp;
+ load_rsp0(&per_cpu(init_tss,cpu), &c_idle.idle->thread);
initial_code = start_secondary;
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
Index: clean-start/arch/x86_64/kernel/vmlinux.lds.S
==================================================================---
clean-start.orig/arch/x86_64/kernel/vmlinux.lds.S
+++ clean-start/arch/x86_64/kernel/vmlinux.lds.S
@@ -61,6 +61,13 @@ SECTIONS
CONSTRUCTORS
} :data
+ .paravirtprobe : AT(ADDR(.paravirtprobe) - LOAD_OFFSET) {
+ __start_paravirtprobe = .;
+ *(.paravirtprobe);
+ *(.paravirtprobe_failsafe);
+ __stop_paravirtprobe = .;
+ }
+
_edata = .; /* End of data section */
. = ALIGN(PAGE_SIZE);
@@ -180,14 +187,20 @@ SECTIONS
__con_initcall_end = .;
SECURITY_INIT
. = ALIGN(8);
- __alt_instructions = .;
.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
+ __alt_instructions = .;
*(.altinstructions)
+ __alt_instructions_end = .;
}
- __alt_instructions_end = .;
.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
*(.altinstr_replacement)
}
+ . = ALIGN(8);
+ .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
+ __start_parainstructions = .;
+ *(.parainstructions)
+ __stop_parainstructions = .;
+ }
/* .exit.text is discard at runtime, not link time, to deal with references
from .altinstructions and .eh_frame */
.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
--
