zach@vmware.com
2007-Apr-18 17:49 UTC
[PATCH 1/2] Transparent entry.S IRQ holdoff handling
i386 Transparent paravirtualization patch #1.
Add support for interrupt holdoff to the entry.S fault and syscall paths. This
is a straightforward macro-ization for the default sub-architecture. Note that
CLI, STI and IRET may be called with non-flat segments because guest %ds and
%es segments are live at the time. Any alternative implementation (such as a
virtual interrupt mask) is required to be compatible with this requirement.
This makes maintenance of the code in entry.S much more easy, and eliminates
some very tricky code changes that would otherwise be required. CLI/STI/IRET
may all assume a 32 bit flat stack for writing to data areas.
STI_SYSEXIT is macroized together because it implicitly relies on the interrupt
holdoff property of STI.
The IRET from a 16-bit stack must be marked explicitly as IRET16, because at
this point there are no flat data segments loaded. It is expected that the
IRET translation for a hypervisor will need to access a virtual interrupt flag
in memory, requiring either a flat stack segment or knowledge of non-flatness
so it can load a temporary selector to use.
Signed-off-by: Zachary Amsden <zach@vmware.com>
Index: linux-2.6.13/arch/i386/kernel/entry.S
==================================================================---
linux-2.6.13.orig/arch/i386/kernel/entry.S 2005-08-16 14:12:11.000000000 -0700
+++ linux-2.6.13/arch/i386/kernel/entry.S 2005-08-17 11:16:54.000000000 -0700
@@ -49,6 +49,7 @@
#include <asm/page.h>
#include <asm/desc.h>
#include "irq_vectors.h"
+#include "mach_asm.h"
/* We do not recover from a stack overflow, but at least
* we know it happened and should be able to track it down.
*/
@@ -88,7 +89,7 @@ NT_MASK = 0x00004000
VM_MASK = 0x00020000
#ifdef CONFIG_PREEMPT
-#define preempt_stop cli
+#define preempt_stop CLI
#else
#define preempt_stop
#define resume_kernel restore_nocheck
@@ -161,7 +162,7 @@ ret_from_intr:
testl $(VM_MASK | 3), %eax
jz resume_kernel
ENTRY(resume_userspace)
- cli # make sure we don't miss an interrupt
+ CLI # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
movl TI_flags(%ebp), %ecx
@@ -172,7 +173,7 @@ ENTRY(resume_userspace)
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
- cli
+ CLI
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
jnz restore_nocheck
need_resched:
@@ -193,7 +194,7 @@ ENTRY(sysenter_entry)
movl TSS_sysenter_esp0(%esp),%esp
.globl sysenter_past_esp
sysenter_past_esp:
- sti
+ STI
pushl $(__USER_DS)
pushl %ebp
pushfl
@@ -223,7 +224,7 @@ sysenter_past_esp:
jae syscall_badsys
call *sys_call_table(,%eax,4)
movl %eax,EAX(%esp)
- cli
+ CLI
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx
jne syscall_exit_work
@@ -231,9 +232,7 @@ sysenter_past_esp:
movl EIP(%esp), %edx
movl OLDESP(%esp), %ecx
xorl %ebp,%ebp
- sti
- sysexit
-
+ STI_SYSEXIT
# system call handler stub
ENTRY(system_call)
@@ -250,7 +249,7 @@ syscall_call:
call *sys_call_table(,%eax,4)
movl %eax,EAX(%esp) # store the return value
syscall_exit:
- cli # make sure we don't miss an interrupt
+ CLI # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS
@@ -274,10 +273,10 @@ restore_all:
restore_nocheck:
RESTORE_REGS
addl $4, %esp
-1: iret
+1: IRET
.section .fixup,"ax"
iret_exc:
- sti
+ STI
pushl $0 # no error code
pushl $do_iret_error
jmp error_code
@@ -299,14 +298,14 @@ ldt_ss:
* CPUs, which we can try to work around to make
* dosemu and wine happy. */
subl $8, %esp # reserve space for switch16 pointer
- cli
+ CLI
movl %esp, %eax
/* Set up the 16bit stack frame with switch32 pointer on top,
* and a switch16 pointer on top of the current frame. */
call setup_x86_bogus_stack
RESTORE_REGS
lss 20+4(%esp), %esp # switch to 16bit stack
-1: iret
+1: IRET16
.section __ex_table,"a"
.align 4
.long 1b,iret_exc
@@ -319,7 +318,7 @@ work_pending:
jz work_notifysig
work_resched:
call schedule
- cli # make sure we don't miss an interrupt
+ CLI # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
movl TI_flags(%ebp), %ecx
@@ -369,7 +368,7 @@ syscall_trace_entry:
syscall_exit_work:
testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
jz work_pending
- sti # could let do_syscall_trace() call
+ STI # could let do_syscall_trace() call
# schedule() instead
movl %esp, %eax
movl $1, %edx
@@ -613,7 +612,7 @@ nmi_16bit_stack:
call do_nmi
RESTORE_REGS
lss 12+4(%esp), %esp # back to 16bit stack
-1: iret
+1: IRET16
.section __ex_table,"a"
.align 4
.long 1b,iret_exc
Index: linux-2.6.13/include/asm-i386/mach-default/mach_asm.h
==================================================================---
linux-2.6.13.orig/include/asm-i386/mach-default/mach_asm.h 2005-08-16
14:12:22.000000000 -0700
+++ linux-2.6.13/include/asm-i386/mach-default/mach_asm.h 2005-08-17
11:17:09.000000000 -0700
@@ -0,0 +1,11 @@
+#ifndef __MACH_ASM_H
+#define __MACH_ASM_H
+
+#define GET_CR0 mov %cr0, %eax
+#define IRET iret
+#define IRET16 iret
+#define CLI cli
+#define STI sti
+#define STI_SYSEXIT sti; sysexit
+
+#endif
* zach@vmware.com (zach@vmware.com) wrote:> i386 Transparent paravirtualization patch #1. > > Add support for interrupt holdoff to the entry.S fault and syscall paths. This > is a straightforward macro-ization for the default sub-architecture. Note that > CLI, STI and IRET may be called with non-flat segments because guest %ds and > %es segments are live at the time. Any alternative implementation (such as a > virtual interrupt mask) is required to be compatible with this requirement.Could you please elaborate on this one. I'd like to make sure I understand (since Xen uses some type of virtual interrupt mask to disable event channel).> This makes maintenance of the code in entry.S much more easy, and eliminates > some very tricky code changes that would otherwise be required. CLI/STI/IRET > may all assume a 32 bit flat stack for writing to data areas. > > STI_SYSEXIT is macroized together because it implicitly relies on the interrupt > holdoff property of STI.I'm also unclear on this bit. thanks, -chris