zach@vmware.com
2007-Apr-18 17:49 UTC
[PATCH 1/2] Transparent entry.S IRQ holdoff handling
i386 Transparent paravirtualization patch #1. Add support for interrupt holdoff to the entry.S fault and syscall paths. This is a straightforward macro-ization for the default sub-architecture. Note that CLI, STI and IRET may be called with non-flat segments because guest %ds and %es segments are live at the time. Any alternative implementation (such as a virtual interrupt mask) is required to be compatible with this requirement. This makes maintenance of the code in entry.S much more easy, and eliminates some very tricky code changes that would otherwise be required. CLI/STI/IRET may all assume a 32 bit flat stack for writing to data areas. STI_SYSEXIT is macroized together because it implicitly relies on the interrupt holdoff property of STI. The IRET from a 16-bit stack must be marked explicitly as IRET16, because at this point there are no flat data segments loaded. It is expected that the IRET translation for a hypervisor will need to access a virtual interrupt flag in memory, requiring either a flat stack segment or knowledge of non-flatness so it can load a temporary selector to use. Signed-off-by: Zachary Amsden <zach@vmware.com> Index: linux-2.6.13/arch/i386/kernel/entry.S ==================================================================--- linux-2.6.13.orig/arch/i386/kernel/entry.S 2005-08-16 14:12:11.000000000 -0700 +++ linux-2.6.13/arch/i386/kernel/entry.S 2005-08-17 11:16:54.000000000 -0700 @@ -49,6 +49,7 @@ #include <asm/page.h> #include <asm/desc.h> #include "irq_vectors.h" +#include "mach_asm.h" /* We do not recover from a stack overflow, but at least * we know it happened and should be able to track it down. */ @@ -88,7 +89,7 @@ NT_MASK = 0x00004000 VM_MASK = 0x00020000 #ifdef CONFIG_PREEMPT -#define preempt_stop cli +#define preempt_stop CLI #else #define preempt_stop #define resume_kernel restore_nocheck @@ -161,7 +162,7 @@ ret_from_intr: testl $(VM_MASK | 3), %eax jz resume_kernel ENTRY(resume_userspace) - cli # make sure we don't miss an interrupt + CLI # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx @@ -172,7 +173,7 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) - cli + CLI cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_nocheck need_resched: @@ -193,7 +194,7 @@ ENTRY(sysenter_entry) movl TSS_sysenter_esp0(%esp),%esp .globl sysenter_past_esp sysenter_past_esp: - sti + STI pushl $(__USER_DS) pushl %ebp pushfl @@ -223,7 +224,7 @@ sysenter_past_esp: jae syscall_badsys call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) - cli + CLI movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work @@ -231,9 +232,7 @@ sysenter_past_esp: movl EIP(%esp), %edx movl OLDESP(%esp), %ecx xorl %ebp,%ebp - sti - sysexit - + STI_SYSEXIT # system call handler stub ENTRY(system_call) @@ -250,7 +249,7 @@ syscall_call: call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) # store the return value syscall_exit: - cli # make sure we don't miss an interrupt + CLI # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret #ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS @@ -274,10 +273,10 @@ restore_all: restore_nocheck: RESTORE_REGS addl $4, %esp -1: iret +1: IRET .section .fixup,"ax" iret_exc: - sti + STI pushl $0 # no error code pushl $do_iret_error jmp error_code @@ -299,14 +298,14 @@ ldt_ss: * CPUs, which we can try to work around to make * dosemu and wine happy. */ subl $8, %esp # reserve space for switch16 pointer - cli + CLI movl %esp, %eax /* Set up the 16bit stack frame with switch32 pointer on top, * and a switch16 pointer on top of the current frame. */ call setup_x86_bogus_stack RESTORE_REGS lss 20+4(%esp), %esp # switch to 16bit stack -1: iret +1: IRET16 .section __ex_table,"a" .align 4 .long 1b,iret_exc @@ -319,7 +318,7 @@ work_pending: jz work_notifysig work_resched: call schedule - cli # make sure we don't miss an interrupt + CLI # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx @@ -369,7 +368,7 @@ syscall_trace_entry: syscall_exit_work: testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl jz work_pending - sti # could let do_syscall_trace() call + STI # could let do_syscall_trace() call # schedule() instead movl %esp, %eax movl $1, %edx @@ -613,7 +612,7 @@ nmi_16bit_stack: call do_nmi RESTORE_REGS lss 12+4(%esp), %esp # back to 16bit stack -1: iret +1: IRET16 .section __ex_table,"a" .align 4 .long 1b,iret_exc Index: linux-2.6.13/include/asm-i386/mach-default/mach_asm.h ==================================================================--- linux-2.6.13.orig/include/asm-i386/mach-default/mach_asm.h 2005-08-16 14:12:22.000000000 -0700 +++ linux-2.6.13/include/asm-i386/mach-default/mach_asm.h 2005-08-17 11:17:09.000000000 -0700 @@ -0,0 +1,11 @@ +#ifndef __MACH_ASM_H +#define __MACH_ASM_H + +#define GET_CR0 mov %cr0, %eax +#define IRET iret +#define IRET16 iret +#define CLI cli +#define STI sti +#define STI_SYSEXIT sti; sysexit + +#endif
* zach@vmware.com (zach@vmware.com) wrote:> i386 Transparent paravirtualization patch #1. > > Add support for interrupt holdoff to the entry.S fault and syscall paths. This > is a straightforward macro-ization for the default sub-architecture. Note that > CLI, STI and IRET may be called with non-flat segments because guest %ds and > %es segments are live at the time. Any alternative implementation (such as a > virtual interrupt mask) is required to be compatible with this requirement.Could you please elaborate on this one. I'd like to make sure I understand (since Xen uses some type of virtual interrupt mask to disable event channel).> This makes maintenance of the code in entry.S much more easy, and eliminates > some very tricky code changes that would otherwise be required. CLI/STI/IRET > may all assume a 32 bit flat stack for writing to data areas. > > STI_SYSEXIT is macroized together because it implicitly relies on the interrupt > holdoff property of STI.I'm also unclear on this bit. thanks, -chris