i386 Transparent Paravirtualization Patch #2
Changes required to low level fault / system call code for supporting
transparent paravirtualization where the kernel may be running at non-zero
CPL.
The testing of VM_MASK and CS together uses a neat trick which is now extended
to support CPL 0,1,2 kernels.
Note that assembly code must test against __ESPFIX_SS, the 16-bit stack on
interrupts and exceptions. To do so at non-zero CPL requires masking off
the RPL of the selector (which is a nop for the mach-default implementation).
Also, there is no reason to load __KERNEL_DS into %ds or %es. __KERNEL_DS
is actually quite poorly named, and is in fact the kernel stack selector. The
kernel uses __USER_DS internally as both %es and %ds (since it is a flat CPL-3
accessible data segment that allows the kernel to skip reloading segments on
sysexit). The reason for avoiding the __KERNEL_DS macro is it is predefined
as an RPL zero segment.
Diffs against: linux-2.6.13-rc6
Signed-off-by: Zachary Amsden <zach@vmware.com>
Index: linux-2.6.13/arch/i386/kernel/entry.S
==================================================================---
linux-2.6.13.orig/arch/i386/kernel/entry.S 2005-08-17 11:16:54.000000000 -0700
+++ linux-2.6.13/arch/i386/kernel/entry.S 2005-08-17 11:17:13.000000000 -0700
@@ -159,8 +159,9 @@ ret_from_intr:
GET_THREAD_INFO(%ebp)
movl EFLAGS(%esp), %eax # mix EFLAGS and CS
movb CS(%esp), %al
- testl $(VM_MASK | 3), %eax
- jz resume_kernel
+ andl $(VM_MASK | 3), %eax
+ cmpl $3, %eax
+ jb resume_kernel
ENTRY(resume_userspace)
CLI # make sure we don't miss an interrupt
# setting need_resched or sigpending
@@ -399,10 +400,11 @@ syscall_badsys:
#define UNWIND_ESPFIX_STACK \
pushl %eax; \
movl %ss, %eax; \
+ MASK_RPL(%ax); \
/* see if on 16bit stack */ \
cmpw $__ESPFIX_SS, %ax; \
jne 28f; \
- movl $__KERNEL_DS, %edx; \
+ movl $__USER_DS, %edx; \
movl %edx, %ds; \
movl %edx, %es; \
/* switch to 32bit stack */ \
@@ -500,7 +502,7 @@ ENTRY(simd_coprocessor_error)
ENTRY(device_not_available)
pushl $-1 # mark this as an int
SAVE_ALL
- movl %cr0, %eax
+ GET_CR0
testl $0x4, %eax # EM (math emulation bit)
jne device_not_available_emulate
preempt_stop
@@ -557,6 +559,7 @@ debug_stack_correct:
ENTRY(nmi)
pushl %eax
movl %ss, %eax
+ MASK_RPL(%ax)
cmpw $__ESPFIX_SS, %ax
popl %eax
je nmi_16bit_stack
Index: linux-2.6.13/include/asm-i386/mach-default/mach_asm.h
==================================================================---
linux-2.6.13.orig/include/asm-i386/mach-default/mach_asm.h 2005-08-17
11:17:09.000000000 -0700
+++ linux-2.6.13/include/asm-i386/mach-default/mach_asm.h 2005-08-17
11:17:13.000000000 -0700
@@ -7,5 +7,6 @@
#define CLI cli
#define STI sti
#define STI_SYSEXIT sti; sysexit
+#define MASK_RPL(seg) /* nop */
#endif