Jan Beulich
2007-Jul-05 13:47 UTC
[Xen-devel] [PATCH] linux/x86: use sysenter/syscall for 32-bit apps on 64-bit Xen
For both 32-bit apps on 64-bit kernels and 32bit kernels. Also removing the needless re-enabling of events on x86-64''s 64-bit syscall path as well as it''s 32-bit int80 path (the latter accompanied by telling Xen not to disable them in the first place). This was tested on 2.6.22-rc6, and only made apply to 2.6.18 without further testing. Signed-off-by: Jan Beulich <jbeulich@novell.com> Index: head-2007-07-02/arch/i386/kernel/entry-xen.S ==================================================================--- head-2007-07-02.orig/arch/i386/kernel/entry-xen.S 2007-07-04 11:45:03.000000000 +0200 +++ head-2007-07-02/arch/i386/kernel/entry-xen.S 2007-07-03 14:01:17.000000000 +0200 @@ -382,6 +382,26 @@ sysenter_past_esp: CFI_ENDPROC + # pv sysenter call handler stub +ENTRY(sysenter_entry_pv) + RING0_INT_FRAME + movl $__USER_DS,16(%esp) + movl %ebp,12(%esp) +/* + * Load the potential sixth argument from user stack. + * Careful about security. + */ + cmpl $__PAGE_OFFSET-3,%ebp + jae syscall_fault +1: movl (%ebp),%ebp +.section __ex_table,"a" + .align 4 + .long 1b,syscall_fault +.previous + /* fall through */ + CFI_ENDPROC +ENDPROC(sysenter_entry_pv) + # system call handler stub ENTRY(system_call) RING0_INT_FRAME # can''t unwind into user space anyway Index: head-2007-07-02/arch/i386/kernel/process-xen.c ==================================================================--- head-2007-07-02.orig/arch/i386/kernel/process-xen.c 2007-07-04 11:45:01.000000000 +0200 +++ head-2007-07-02/arch/i386/kernel/process-xen.c 2007-07-04 10:02:51.000000000 +0200 @@ -51,6 +51,7 @@ #include <asm/math_emu.h> #endif +#include <xen/interface/callback.h> #include <xen/interface/physdev.h> #include <xen/interface/vcpu.h> #include <xen/cpu_hotplug.h> @@ -581,6 +582,22 @@ struct task_struct fastcall * __switch_t mcl->args[1] = next->esp0; mcl++; + if (boot_cpu_has(X86_FEATURE_SEP) && next_p->mm) { + /* Specify sysexit address. */ + struct callback_register sysexit = { + .type = CALLBACKTYPE_sysexit, + .address = { + __USER_CS, + (unsigned long)task_thread_info(next_p)->sysenter_return + } + }; + + mcl->op = __HYPERVISOR_callback_op; + mcl->args[0] = CALLBACKOP_register; + mcl->args[1] = (unsigned long)&sysexit; + mcl++; + } + /* * Load the per-thread Thread-Local Storage descriptor. * This is load_TLS(next, cpu) with multicalls. @@ -616,7 +633,9 @@ struct task_struct fastcall * __switch_t mcl++; } - (void)HYPERVISOR_multicall(_mcl, mcl - _mcl); + BUG_ON(mcl > _mcl + ARRAY_SIZE(_mcl)); + if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL))) + BUG(); /* * Restore %fs and %gs if needed. Index: head-2007-07-02/arch/i386/kernel/sysenter.c ==================================================================--- head-2007-07-02.orig/arch/i386/kernel/sysenter.c 2007-07-04 11:45:01.000000000 +0200 +++ head-2007-07-02/arch/i386/kernel/sysenter.c 2007-07-04 09:55:04.000000000 +0200 @@ -48,22 +48,33 @@ extern asmlinkage void sysenter_entry(vo void enable_sep_cpu(void) { -#ifndef CONFIG_X86_NO_TSS int cpu = get_cpu(); +#ifndef CONFIG_X86_NO_TSS struct tss_struct *tss = &per_cpu(init_tss, cpu); +#else + (void)cpu; +#endif if (!boot_cpu_has(X86_FEATURE_SEP)) { put_cpu(); return; } +#ifndef CONFIG_X86_NO_TSS tss->ss1 = __KERNEL_CS; tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; - wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0); +#endif + wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); +#ifdef CONFIG_XEN + if (!xen_feature(XENFEAT_supervisor_mode_kernel)) { + extern asmlinkage void sysenter_entry_pv(void); + + wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry_pv, 0); + } else +#endif wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); put_cpu(); -#endif } /* @@ -78,18 +89,6 @@ int __init sysenter_setup(void) { syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); -#ifdef CONFIG_XEN - if (boot_cpu_has(X86_FEATURE_SEP)) { - static struct callback_register __initdata sysenter = { - .type = CALLBACKTYPE_sysenter, - .address = { __KERNEL_CS, (unsigned long)sysenter_entry }, - }; - - if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0) - clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability); - } -#endif - #ifdef CONFIG_COMPAT_VDSO __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY); printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); @@ -174,6 +173,19 @@ int arch_setup_additional_pages(struct l current->mm->context.vdso = (void *)addr; current_thread_info()->sysenter_return (void *)VDSO_SYM(&SYSENTER_RETURN); +#ifdef CONFIG_XEN + if (boot_cpu_has(X86_FEATURE_SEP)) { + /* Specify sysexit address. */ + struct callback_register sysexit = { + .type = CALLBACKTYPE_sysexit, + .address = { __USER_CS, VDSO_SYM(&SYSENTER_RETURN) } + }; + + if (HYPERVISOR_callback_op(CALLBACKOP_register, + &sysexit) < 0) + BUG(); + } +#endif mm->total_vm++; up_fail: up_write(&mm->mmap_sem); Index: head-2007-07-02/arch/x86_64/ia32/Makefile ==================================================================--- head-2007-07-02.orig/arch/x86_64/ia32/Makefile 2007-07-04 11:45:01.000000000 +0200 +++ head-2007-07-02/arch/x86_64/ia32/Makefile 2007-07-03 14:37:01.000000000 +0200 @@ -14,11 +14,15 @@ obj-$(CONFIG_IA32_AOUT) += ia32_aout.o audit-class-$(CONFIG_AUDIT) := audit.o obj-$(CONFIG_IA32_EMULATION) += $(audit-class-y) +syscall32-types-y := sysenter syscall +syscall32-types-$(shell expr $(CONFIG_XEN_COMPAT)0 ''<'' 0x0302000 \ + | sed ''y,01,n$(CONFIG_XEN),'') += int80 + $(obj)/syscall32_syscall.o: \ - $(foreach F,sysenter syscall,$(obj)/vsyscall-$F.so) + $(foreach F,$(syscall32-types-y),$(obj)/vsyscall-$F.so) # Teach kbuild about targets -targets := $(foreach F,sysenter syscall,vsyscall-$F.o vsyscall-$F.so) +targets := $(foreach F,$(syscall32-types-y),vsyscall-$F.o vsyscall-$F.so) # The DSO images are built using a special linker script quiet_cmd_syscall = SYSCALL $@ @@ -27,23 +31,17 @@ quiet_cmd_syscall = SYSCALL $@ -Wl,-soname=linux-gate.so.1 -o $@ \ -Wl,-T,$(filter-out FORCE,$^) -$(obj)/vsyscall-int80.so \ -$(obj)/vsyscall-sysenter.so $(obj)/vsyscall-syscall.so: \ +$(foreach F,$(syscall32-types-y),$(obj)/vsyscall-$F.so): \ $(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE $(call if_changed,syscall) AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32 -Iarch/i386/kernel AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32 -Iarch/i386/kernel - -ifdef CONFIG_XEN AFLAGS_vsyscall-int80.o = -m32 -Wa,-32 -Iarch/i386/kernel -CFLAGS_syscall32-xen.o += -DUSE_INT80 -AFLAGS_syscall32_syscall-xen.o += -DUSE_INT80 +ifdef CONFIG_XEN $(obj)/syscall32_syscall-xen.o: \ - $(foreach F,int80 sysenter syscall,$(obj)/vsyscall-$F.so) - -targets := $(foreach F,int80 sysenter syscall,vsyscall-$F.o vsyscall-$F.so) + $(foreach F,$(syscall32-types-y),$(obj)/vsyscall-$F.so) include $(srctree)/scripts/Makefile.xen Index: head-2007-07-02/arch/x86_64/ia32/ia32entry-xen.S ==================================================================--- head-2007-07-02.orig/arch/x86_64/ia32/ia32entry-xen.S 2007-07-04 11:45:01.000000000 +0200 +++ head-2007-07-02/arch/x86_64/ia32/ia32entry-xen.S 2007-07-03 15:12:20.000000000 +0200 @@ -80,7 +80,7 @@ * %ebp user stack * 0(%ebp) Arg6 * - * Interrupts off. + * Interrupts on. * * This is purely a fast path. For anything complicated we use the int 0x80 * path below. Set up a complete hardware stack frame to share code @@ -89,38 +89,25 @@ */ ENTRY(ia32_sysenter_target) CFI_STARTPROC32 simple - CFI_DEF_CFA rsp,0 - CFI_REGISTER rsp,rbp - __swapgs - movq %gs:pda_kernelstack, %rsp - addq $(PDA_STACKOFFSET),%rsp - /* - * No need to follow this irqs on/off section: the syscall - * disabled irqs, here we enable it straight after entry: - */ - XEN_UNBLOCK_EVENTS(%r11) - __sti + CFI_DEF_CFA rsp,SS+8-RIP+16 + /*CFI_REL_OFFSET ss,SS-RIP+16*/ + CFI_REL_OFFSET rsp,RSP-RIP+16 + /*CFI_REL_OFFSET rflags,EFLAGS-RIP+16*/ + /*CFI_REL_OFFSET cs,CS-RIP+16*/ + CFI_REL_OFFSET rip,RIP-RIP+16 + CFI_REL_OFFSET r11,8 + CFI_REL_OFFSET rcx,0 + movq 8(%rsp),%r11 + CFI_RESTORE r11 + popq %rcx + CFI_ADJUST_CFA_OFFSET -8 + CFI_RESTORE rcx movl %ebp,%ebp /* zero extension */ - pushq $__USER32_DS - CFI_ADJUST_CFA_OFFSET 8 - /*CFI_REL_OFFSET ss,0*/ - pushq %rbp - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rsp,0 - pushfq - CFI_ADJUST_CFA_OFFSET 8 - /*CFI_REL_OFFSET rflags,0*/ - movl $VSYSCALL32_SYSEXIT, %r10d - CFI_REGISTER rip,r10 - pushq $__USER32_CS - CFI_ADJUST_CFA_OFFSET 8 - /*CFI_REL_OFFSET cs,0*/ - movl %eax, %eax - pushq %r10 - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rip,0 - pushq %rax - CFI_ADJUST_CFA_OFFSET 8 + movl %eax,%eax + movl $__USER32_DS,40(%rsp) + movq %rbp,32(%rsp) + movl $__USER32_CS,16(%rsp) + movq %rax,(%rsp) cld SAVE_ARGS 0,0,0 /* no need to do an access_ok check here because rbp has been @@ -132,7 +119,6 @@ ENTRY(ia32_sysenter_target) GET_THREAD_INFO(%r10) orl $TS_COMPAT,threadinfo_status(%r10) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) - CFI_REMEMBER_STATE jnz sysenter_tracesys sysenter_do_call: cmpl $(IA32_NR_syscalls-1),%eax @@ -140,33 +126,9 @@ sysenter_do_call: IA32_ARG_FIXUP 1 call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) - GET_THREAD_INFO(%r10) - XEN_BLOCK_EVENTS(%r11) - __cli - TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) - jnz int_ret_from_sys_call - andl $~TS_COMPAT,threadinfo_status(%r10) - /* clear IF, that popfq doesn''t enable interrupts early */ - andl $~0x200,EFLAGS-R11(%rsp) - RESTORE_ARGS 1,24,1,1,1,1 - popfq - CFI_ADJUST_CFA_OFFSET -8 - /*CFI_RESTORE rflags*/ - popq %rcx /* User %esp */ - CFI_ADJUST_CFA_OFFSET -8 - CFI_REGISTER rsp,rcx - movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */ - CFI_REGISTER rip,rdx - TRACE_IRQS_ON - __swapgs - XEN_UNBLOCK_EVENTS(%r11) - __sti /* sti only takes effect after the next instruction */ - /* sysexit */ - .byte 0xf, 0x35 /* TBD */ + jmp int_ret_from_sys_call sysenter_tracesys: - CFI_RESTORE_STATE SAVE_REST CLEAR_RREGS movq $-ENOSYS,RAX(%rsp) /* really needed? */ @@ -199,7 +161,7 @@ ENDPROC(ia32_sysenter_target) * %esp user stack * 0(%esp) Arg6 * - * Interrupts off. + * Interrupts on. * * This is purely a fast path. For anything complicated we use the int 0x80 * path below. Set up a complete hardware stack frame to share code @@ -208,32 +170,20 @@ ENDPROC(ia32_sysenter_target) */ ENTRY(ia32_cstar_target) CFI_STARTPROC32 simple - CFI_DEF_CFA rsp,PDA_STACKOFFSET - CFI_REGISTER rip,rcx - /*CFI_REGISTER rflags,r11*/ - __swapgs - movl %esp,%r8d - CFI_REGISTER rsp,r8 - movq %gs:pda_kernelstack,%rsp - /* - * No need to follow this irqs on/off section: the syscall - * disabled irqs and here we enable it straight after entry: - */ - XEN_UNBLOCK_EVENTS(%r11) - __sti - SAVE_ARGS 8,1,1 + CFI_DEF_CFA rsp,SS+8-RIP+16 + /*CFI_REL_OFFSET ss,SS-RIP+16*/ + CFI_REL_OFFSET rsp,RSP-RIP+16 + /*CFI_REL_OFFSET rflags,EFLAGS-RIP+16*/ + /*CFI_REL_OFFSET cs,CS-RIP+16*/ + CFI_REL_OFFSET rip,RIP-RIP+16 movl %eax,%eax /* zero extension */ + movl RSP-RIP+16(%rsp),%r8d + SAVE_ARGS -8,1,1 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) - movq %rcx,RIP-ARGOFFSET(%rsp) - CFI_REL_OFFSET rip,RIP-ARGOFFSET movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */ movl %ebp,%ecx - movq $__USER32_CS,CS-ARGOFFSET(%rsp) - movq $__USER32_DS,SS-ARGOFFSET(%rsp) - movq %r11,EFLAGS-ARGOFFSET(%rsp) - /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ - movq %r8,RSP-ARGOFFSET(%rsp) - CFI_REL_OFFSET rsp,RSP-ARGOFFSET + movl $__USER32_CS,CS-ARGOFFSET(%rsp) + movl $__USER32_DS,SS-ARGOFFSET(%rsp) /* no need to do an access_ok check here because r8 has been 32bit zero extended */ /* hardware stack frame is complete now */ @@ -244,7 +194,6 @@ ENTRY(ia32_cstar_target) GET_THREAD_INFO(%r10) orl $TS_COMPAT,threadinfo_status(%r10) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) - CFI_REMEMBER_STATE jnz cstar_tracesys cstar_do_call: cmpl $IA32_NR_syscalls-1,%eax @@ -252,26 +201,9 @@ cstar_do_call: IA32_ARG_FIXUP 1 call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) - GET_THREAD_INFO(%r10) - XEN_BLOCK_EVENTS(%r11) - __cli - TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) - jnz int_ret_from_sys_call - andl $~TS_COMPAT,threadinfo_status(%r10) - RESTORE_ARGS 1,-ARG_SKIP,1,1,1 - movl RIP-ARGOFFSET(%rsp),%ecx - CFI_REGISTER rip,rcx - movl EFLAGS-ARGOFFSET(%rsp),%r11d - /*CFI_REGISTER rflags,r11*/ - TRACE_IRQS_ON - movl RSP-ARGOFFSET(%rsp),%esp - CFI_RESTORE rsp - __swapgs - sysretl /* TBD */ + jmp int_ret_from_sys_call cstar_tracesys: - CFI_RESTORE_STATE SAVE_REST CLEAR_RREGS movq $-ENOSYS,RAX(%rsp) /* really needed? */ @@ -312,32 +244,27 @@ ia32_badarg: * Arguments are zero extended. For system calls that want sign extension and * take long arguments a wrapper is needed. Most calls can just be called * directly. - * Assumes it is only called from user space and entered with interrupts off. + * Assumes it is only called from user space and entered with interrupts on. */ ENTRY(ia32_syscall) CFI_STARTPROC simple - CFI_DEF_CFA rsp,SS+8-RIP - /*CFI_REL_OFFSET ss,SS-RIP*/ - CFI_REL_OFFSET rsp,RSP-RIP - /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ - /*CFI_REL_OFFSET cs,CS-RIP*/ - CFI_REL_OFFSET rip,RIP-RIP - __swapgs - /* - * No need to follow this irqs on/off section: the syscall - * disabled irqs and here we enable it straight after entry: - */ - XEN_UNBLOCK_EVENTS(%r11) - __sti - movq (%rsp),%rcx + CFI_DEF_CFA rsp,SS+8-RIP+16 + /*CFI_REL_OFFSET ss,SS-RIP+16*/ + CFI_REL_OFFSET rsp,RSP-RIP+16 + /*CFI_REL_OFFSET rflags,EFLAGS-RIP+16*/ + /*CFI_REL_OFFSET cs,CS-RIP+16*/ + CFI_REL_OFFSET rip,RIP-RIP+16 + CFI_REL_OFFSET r11,8 + CFI_REL_OFFSET rcx,0 movq 8(%rsp),%r11 - addq $0x10,%rsp /* skip rcx and r11 */ + CFI_RESTORE r11 + popq %rcx + CFI_ADJUST_CFA_OFFSET -8 + CFI_RESTORE rcx movl %eax,%eax - pushq %rax - CFI_ADJUST_CFA_OFFSET 8 + movq %rax,(%rsp) cld -/* 1: jmp 1b */ /* note the registers are not zero extended to the sf. this could be a problem. */ SAVE_ARGS 0,0,1 Index: head-2007-07-02/arch/x86_64/ia32/syscall32_syscall-xen.S ==================================================================--- head-2007-07-02.orig/arch/x86_64/ia32/syscall32_syscall-xen.S 2007-07-04 11:45:01.000000000 +0200 +++ head-2007-07-02/arch/x86_64/ia32/syscall32_syscall-xen.S 2007-07-03 14:01:17.000000000 +0200 @@ -2,7 +2,7 @@ .section ".init.data","aw" -#ifdef USE_INT80 +#if CONFIG_XEN_COMPAT < 0x030200 .globl syscall32_int80 .globl syscall32_int80_end Index: head-2007-07-02/arch/x86_64/ia32/syscall32-xen.c ==================================================================--- head-2007-07-02.orig/arch/x86_64/ia32/syscall32-xen.c 2007-07-04 11:45:01.000000000 +0200 +++ head-2007-07-02/arch/x86_64/ia32/syscall32-xen.c 2007-07-03 14:37:38.000000000 +0200 @@ -15,7 +15,8 @@ #include <asm/tlbflush.h> #include <asm/ia32_unistd.h> -#ifdef USE_INT80 +#if CONFIG_XEN_COMPAT < 0x030200 +#include <xen/interface/callback.h> extern unsigned char syscall32_int80[], syscall32_int80_end[]; #endif extern unsigned char syscall32_syscall[], syscall32_syscall_end[]; @@ -23,8 +24,9 @@ extern unsigned char syscall32_sysenter[ extern int sysctl_vsyscall32; char *syscall32_page; -#ifndef USE_INT80 static int use_sysenter = -1; +#if CONFIG_XEN_COMPAT < 0x030200 +static int use_int80 = 1; #endif static struct page * @@ -69,13 +71,12 @@ static int __init init_syscall32(void) if (!syscall32_page) panic("Cannot allocate syscall32 page"); -#ifdef USE_INT80 - /* - * At this point we use int 0x80. - */ - memcpy(syscall32_page, syscall32_int80, - syscall32_int80_end - syscall32_int80); -#else +#if CONFIG_XEN_COMPAT < 0x030200 + if (use_int80) { + memcpy(syscall32_page, syscall32_int80, + syscall32_int80_end - syscall32_int80); + } else +#endif if (use_sysenter > 0) { memcpy(syscall32_page, syscall32_sysenter, syscall32_sysenter_end - syscall32_sysenter); @@ -83,7 +84,6 @@ static int __init init_syscall32(void) memcpy(syscall32_page, syscall32_syscall, syscall32_syscall_end - syscall32_syscall); } -#endif return 0; } @@ -96,7 +96,17 @@ core_initcall(init_syscall32); /* May not be __init: called during resume */ void syscall32_cpu_init(void) { -#ifndef USE_INT80 +#if CONFIG_XEN_COMPAT < 0x030200 + static struct callback_register sysexit = { + .type = CALLBACKTYPE_sysexit, + .address = (unsigned long)VSYSCALL32_SYSEXIT + }; + + if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysexit) < 0) + return; + use_int80 = 0; +#endif + if (use_sysenter < 0) use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL); @@ -107,5 +117,4 @@ void syscall32_cpu_init(void) checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); wrmsrl(MSR_CSTAR, ia32_cstar_target); -#endif } Index: head-2007-07-02/arch/x86_64/kernel/entry-xen.S ==================================================================--- head-2007-07-02.orig/arch/x86_64/kernel/entry-xen.S 2007-07-04 11:45:03.000000000 +0200 +++ head-2007-07-02/arch/x86_64/kernel/entry-xen.S 2007-07-03 14:01:17.000000000 +0200 @@ -244,7 +244,7 @@ END(ret_from_fork) * r11 eflags for syscall/sysret, temporary for C * r12-r15,rbp,rbx saved by C code, not touched. * - * Interrupts are off on entry. + * Interrupts are enabled on entry. * Only called from user space. * * XXX if we had a free scratch register we could save the RSP into the stack frame @@ -259,11 +259,6 @@ ENTRY(system_call) _frame (RIP-0x10) SAVE_ARGS -8,0 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) - /* - * No need to follow this irqs off/on section - it''s straight - * and short: - */ - XEN_UNBLOCK_EVENTS(%r11) GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) CFI_REMEMBER_STATE Index: head-2007-07-02/arch/x86_64/kernel/process-xen.c ==================================================================--- head-2007-07-02.orig/arch/x86_64/kernel/process-xen.c 2007-07-04 11:45:01.000000000 +0200 +++ head-2007-07-02/arch/x86_64/kernel/process-xen.c 2007-07-04 10:03:04.000000000 +0200 @@ -584,7 +584,10 @@ __switch_to(struct task_struct *prev_p, mcl++; } - (void)HYPERVISOR_multicall(_mcl, mcl - _mcl); + BUG_ON(mcl > _mcl + ARRAY_SIZE(_mcl)); + if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL))) + BUG(); + /* * Switch DS and ES. * This won''t pick up thread selector changes, but I guess that is ok. Index: head-2007-07-02/arch/x86_64/kernel/setup64-xen.c ==================================================================--- head-2007-07-02.orig/arch/x86_64/kernel/setup64-xen.c 2007-07-04 11:45:01.000000000 +0200 +++ head-2007-07-02/arch/x86_64/kernel/setup64-xen.c 2007-07-03 14:01:17.000000000 +0200 @@ -211,7 +211,10 @@ extern asmlinkage void ignore_sysret(voi /* May not be marked __init: used by software suspend */ void syscall_init(void) { -#ifndef CONFIG_XEN +#if CONFIG_XEN_COMPAT < 0x030200 + if (!boot_cpu_has(X86_FEATURE_SEP)) + return; +#endif /* * LSTAR and STAR live in a bit strange symbiosis. * They both write to the same internal register. STAR allows to set CS/DS @@ -220,12 +223,12 @@ void syscall_init(void) wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); wrmsrl(MSR_LSTAR, system_call); - /* Flags to clear on syscall */ - wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); -#endif #ifdef CONFIG_IA32_EMULATION syscall32_cpu_init (); #endif + + /* Flags to clear on syscall */ + wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF); } void __cpuinit check_efer(void) Index: head-2007-07-02/arch/x86_64/kernel/traps-xen.c ==================================================================--- head-2007-07-02.orig/arch/x86_64/kernel/traps-xen.c 2007-07-04 11:45:04.000000000 +0200 +++ head-2007-07-02/arch/x86_64/kernel/traps-xen.c 2007-07-03 14:01:17.000000000 +0200 @@ -1110,7 +1110,7 @@ static trap_info_t trap_table[] = { #endif { 19, 0|4, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, #ifdef CONFIG_IA32_EMULATION - { IA32_SYSCALL_VECTOR, 3|4, __KERNEL_CS, (unsigned long)ia32_syscall}, + { IA32_SYSCALL_VECTOR, 3, __KERNEL_CS, (unsigned long)ia32_syscall}, #endif { 0, 0, 0, 0 } }; Index: head-2007-07-02/drivers/xen/Kconfig ==================================================================--- head-2007-07-02.orig/drivers/xen/Kconfig 2007-07-04 11:45:01.000000000 +0200 +++ head-2007-07-02/drivers/xen/Kconfig 2007-07-03 14:01:17.000000000 +0200 @@ -13,7 +13,7 @@ config XEN if XEN config XEN_INTERFACE_VERSION hex - default 0x00030205 + default 0x00030206 menu "XEN" @@ -247,6 +247,9 @@ choice config XEN_COMPAT_030004_AND_LATER bool "3.0.4 and later" + config XEN_COMPAT_030100_AND_LATER + bool "3.1.0 and later" + config XEN_COMPAT_LATEST_ONLY bool "no compatibility code" @@ -255,6 +258,7 @@ endchoice config XEN_COMPAT hex default 0xffffff if XEN_COMPAT_LATEST_ONLY + default 0x030100 if XEN_COMPAT_030100_AND_LATER default 0x030004 if XEN_COMPAT_030004_AND_LATER default 0x030002 if XEN_COMPAT_030002_AND_LATER default 0 Index: head-2007-07-02/drivers/xen/core/smpboot.c ==================================================================--- head-2007-07-02.orig/drivers/xen/core/smpboot.c 2007-07-04 11:45:07.000000000 +0200 +++ head-2007-07-02/drivers/xen/core/smpboot.c 2007-07-03 14:46:21.000000000 +0200 @@ -174,6 +174,7 @@ static void xen_smp_intr_exit(unsigned i void cpu_bringup(void) { cpu_init(); + identify_cpu(cpu_data + smp_processor_id()); touch_softlockup_watchdog(); preempt_disable(); local_irq_enable(); Index: head-2007-07-02/include/asm-x86_64/mach-xen/setup_arch_post.h ==================================================================--- head-2007-07-02.orig/include/asm-x86_64/mach-xen/setup_arch_post.h 2007-07-04 11:45:01.000000000 +0200 +++ head-2007-07-02/include/asm-x86_64/mach-xen/setup_arch_post.h 2007-07-03 14:01:17.000000000 +0200 @@ -23,10 +23,12 @@ static void __init machine_specific_arch .type = CALLBACKTYPE_failsafe, .address = (unsigned long)failsafe_callback, }; +#if CONFIG_XEN_COMPAT < 0x030200 static struct callback_register __initdata syscall = { .type = CALLBACKTYPE_syscall, .address = (unsigned long)system_call, }; +#endif #ifdef CONFIG_X86_LOCAL_APIC static struct callback_register __initdata nmi_cb = { .type = CALLBACKTYPE_nmi, @@ -37,8 +39,10 @@ static void __init machine_specific_arch ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event); if (ret == 0) ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); +#if CONFIG_XEN_COMPAT < 0x030200 if (ret == 0) ret = HYPERVISOR_callback_op(CALLBACKOP_register, &syscall); +#endif #if CONFIG_XEN_COMPAT <= 0x030002 if (ret == -ENOSYS) ret = HYPERVISOR_set_callbacks( Index: head-2007-07-02/include/xen/interface/callback.h ==================================================================--- head-2007-07-02.orig/include/xen/interface/callback.h 2007-07-04 11:45:01.000000000 +0200 +++ head-2007-07-02/include/xen/interface/callback.h 2007-07-03 10:39:14.000000000 +0200 @@ -38,13 +38,34 @@ #define CALLBACKTYPE_event 0 #define CALLBACKTYPE_failsafe 1 -#define CALLBACKTYPE_syscall 2 /* x86_64 only */ +#define CALLBACKTYPE_syscall 2 /* x86_64 hv only */ /* - * sysenter is only available on x86_32 with the - * supervisor_mode_kernel option enabled. + * sysenter_deprecated is only available on x86_32 with the + * supervisor_mode_kernel option enabled, and should not be used in new code. */ -#define CALLBACKTYPE_sysenter 3 +#define CALLBACKTYPE_sysenter_deprecated 3 #define CALLBACKTYPE_nmi 4 +#if __XEN_INTERFACE_VERSION__ < 0x00030206 +#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated +#else +/* + * sysenter is only available + * - on x86_32 with the supervisor_mode_kernel option enabled, + * - on x86_64 hv for x86_32 pv or 32-bit guest support in x86_64 pv. + */ +#define CALLBACKTYPE_sysenter 5 +/* + * sysexit is only available on x86_64 hv, and is only used to fill a + * sysenter frame''s return address (if the guest desires to have a non-NULL + * value there). Additionally, since CALLBACKF_mask_events is meaningless + * here, it is being (mis-)used for 64-bits guests to distinguish sysenter + * callers expected to be in 64-bit mode (flag set) from 32-bit ones (flag + * clear). + */ +#define CALLBACKTYPE_sysexit 6 +#define CALLBACKTYPE_syscall32 7 /* x86_64 only */ +#define CALLBACKTYPE_sfmask 8 /* x86_64 only */ +#endif /* * Disable event deliver during callback? This flag is ignored for event and Index: head-2007-07-02/include/xen/interface/xen-compat.h ==================================================================--- head-2007-07-02.orig/include/xen/interface/xen-compat.h 2007-07-04 11:45:01.000000000 +0200 +++ head-2007-07-02/include/xen/interface/xen-compat.h 2007-07-03 10:39:14.000000000 +0200 @@ -27,7 +27,7 @@ #ifndef __XEN_PUBLIC_XEN_COMPAT_H__ #define __XEN_PUBLIC_XEN_COMPAT_H__ -#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030205 +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030206 #if defined(__XEN__) || defined(__XEN_TOOLS__) /* Xen is built with matching headers and implements the latest interface. */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel