The first patch fixes Xen PV regression introduced by 32-bit rewrite. Unlike the earlier version it uses ALTERNATIVE instruction and avoids using xen_sysexit (and sysret32 in compat mode) pv ops, as suggested by Andy. (I ended up patching TEST with XOR to avoid extra NOPs, even though I said yesterday it would be wrong. It's not wrong) As result of this patch irq_enable_sysexit and usergs_sysret32 pv ops are not used anymore by anyone and so can be removed. Boris Ostrovsky (3): x86/xen: Avoid fast syscall path for Xen PV guests x86: irq_enable_sysexit pv op is no longer needed x86: usergs_sysret32 pv op is no longer needed arch/x86/entry/entry_32.S | 11 ++++------- arch/x86/entry/entry_64_compat.S | 16 ++++++---------- arch/x86/include/asm/cpufeature.h | 1 + arch/x86/include/asm/paravirt.h | 12 ------------ arch/x86/include/asm/paravirt_types.h | 17 ----------------- arch/x86/kernel/asm-offsets.c | 3 --- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/paravirt.c | 12 ------------ arch/x86/kernel/paravirt_patch_32.c | 2 -- arch/x86/kernel/paravirt_patch_64.c | 3 --- arch/x86/xen/enlighten.c | 7 +++---- arch/x86/xen/xen-asm_32.S | 14 -------------- arch/x86/xen/xen-asm_64.S | 19 ------------------- arch/x86/xen/xen-ops.h | 3 --- 14 files changed, 14 insertions(+), 107 deletions(-) -- 1.8.1.4
Boris Ostrovsky
2015-Nov-18 20:06 UTC
[PATCH 1/3] x86/xen: Avoid fast syscall path for Xen PV guests
After 32-bit syscall rewrite, and specifically after commit 5f310f739b4c
("x86/entry/32: Re-implement SYSENTER using the new C path"), the
stack
frame that is passed to xen_sysexit is no longer a "standard" one
(i.e.
it's not pt_regs).
Since we end up calling xen_iret from xen_sysexit we don't need to fix
up the stack and instead follow entry_SYSENTER_32's IRET path directly
to xen_iret.
We can do the same thing for compat mode even though stack does not need
to be fixed. This will allow us to drop usergs_sysret32 paravirt op (in
the subsequent patch)
Signed-off-by: Boris Ostrovsky <boris.ostrovsky at oracle.com>
Suggested-by: Andy Lutomirski <luto at amacapital.net>
---
arch/x86/entry/entry_32.S | 3 ++-
arch/x86/entry/entry_64_compat.S | 6 ++++--
arch/x86/include/asm/cpufeature.h | 1 +
arch/x86/xen/enlighten.c | 4 +++-
4 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 3eb572e..901f186 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -308,7 +308,8 @@ sysenter_past_esp:
movl %esp, %eax
call do_fast_syscall_32
- testl %eax, %eax
+ /* XEN PV guests always use IRET path */
+ ALTERNATIVE "testl %eax, %eax", "xor %eax, %eax",
X86_FEATURE_XENPV
jz .Lsyscall_32_done
/* Opportunistic SYSEXIT */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index c320183..98893d9 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -121,7 +121,8 @@ sysenter_flags_fixed:
movq %rsp, %rdi
call do_fast_syscall_32
- testl %eax, %eax
+ /* XEN PV guests always use IRET path */
+ ALTERNATIVE "testl %eax, %eax", "xor %eax, %eax",
X86_FEATURE_XENPV
jz .Lsyscall_32_done
jmp sysret32_from_system_call
@@ -200,7 +201,8 @@ ENTRY(entry_SYSCALL_compat)
movq %rsp, %rdi
call do_fast_syscall_32
- testl %eax, %eax
+ /* XEN PV guests always use IRET path */
+ ALTERNATIVE "testl %eax, %eax", "xor %eax, %eax",
X86_FEATURE_XENPV
jz .Lsyscall_32_done
/* Opportunistic SYSRET */
diff --git a/arch/x86/include/asm/cpufeature.h
b/arch/x86/include/asm/cpufeature.h
index e4f8010..0e4fe5b 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -216,6 +216,7 @@
#define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
#define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
+#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5774800..d315151 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1886,8 +1886,10 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
static void xen_set_cpu_features(struct cpuinfo_x86 *c)
{
- if (xen_pv_domain())
+ if (xen_pv_domain()) {
clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+ set_cpu_cap(c, X86_FEATURE_XENPV);
+ }
}
const struct hypervisor_x86 x86_hyper_xen = {
--
1.8.1.4
Boris Ostrovsky
2015-Nov-18 20:06 UTC
[PATCH 2/3] x86: irq_enable_sysexit pv op is no longer needed
Xen PV guests have been the only ones using it and now they don't.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky at oracle.com>
---
arch/x86/entry/entry_32.S | 8 ++------
arch/x86/include/asm/paravirt.h | 7 -------
arch/x86/include/asm/paravirt_types.h | 9 ---------
arch/x86/kernel/asm-offsets.c | 3 ---
arch/x86/kernel/paravirt.c | 7 -------
arch/x86/kernel/paravirt_patch_32.c | 2 --
arch/x86/kernel/paravirt_patch_64.c | 1 -
arch/x86/xen/enlighten.c | 3 ---
arch/x86/xen/xen-asm_32.S | 14 --------------
arch/x86/xen/xen-ops.h | 3 ---
10 files changed, 2 insertions(+), 55 deletions(-)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 901f186..e2158ae 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -329,7 +329,8 @@ sysenter_past_esp:
* Return back to the vDSO, which will pop ecx and edx.
* Don't bother with DS and ES (they already contain __USER_DS).
*/
- ENABLE_INTERRUPTS_SYSEXIT
+ sti
+ sysexit
.pushsection .fixup, "ax"
2: movl $0, PT_FS(%esp)
@@ -552,11 +553,6 @@ ENTRY(native_iret)
iret
_ASM_EXTABLE(native_iret, iret_exc)
END(native_iret)
-
-ENTRY(native_irq_enable_sysexit)
- sti
- sysexit
-END(native_irq_enable_sysexit)
#endif
ENTRY(overflow)
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 10d0596..c28518e 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -932,13 +932,6 @@ extern void default_banner(void);
push %ecx; push %edx; \
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
pop %edx; pop %ecx
-
-#define ENABLE_INTERRUPTS_SYSEXIT \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
- CLBR_NONE, \
- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
-
-
#else /* !CONFIG_X86_32 */
/*
diff --git a/arch/x86/include/asm/paravirt_types.h
b/arch/x86/include/asm/paravirt_types.h
index 31247b5..608bbf3 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -157,15 +157,6 @@ struct pv_cpu_ops {
u64 (*read_pmc)(int counter);
-#ifdef CONFIG_X86_32
- /*
- * Atomically enable interrupts and return to userspace. This
- * is only used in 32-bit kernels. 64-bit kernels use
- * usergs_sysret32 instead.
- */
- void (*irq_enable_sysexit)(void);
-#endif
-
/*
* Switch to usermode gs and return to 64-bit usermode using
* sysret. Only used in 64-bit kernels to return to 64-bit
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 439df97..84a7524 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -65,9 +65,6 @@ void common(void) {
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
-#ifdef CONFIG_X86_32
- OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
-#endif
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
#endif
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c2130ae..c55f437 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -162,9 +162,6 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void
*insnbuf,
ret = paravirt_patch_ident_64(insnbuf, len);
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
-#ifdef CONFIG_X86_32
- type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
-#endif
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
/* If operation requires a jmp, then jmp */
@@ -220,7 +217,6 @@ static u64 native_steal_clock(int cpu)
/* These are in entry.S */
extern void native_iret(void);
-extern void native_irq_enable_sysexit(void);
extern void native_usergs_sysret32(void);
extern void native_usergs_sysret64(void);
@@ -379,9 +375,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.load_sp0 = native_load_sp0,
-#if defined(CONFIG_X86_32)
- .irq_enable_sysexit = native_irq_enable_sysexit,
-#endif
#ifdef CONFIG_X86_64
#ifdef CONFIG_IA32_EMULATION
.usergs_sysret32 = native_usergs_sysret32,
diff --git a/arch/x86/kernel/paravirt_patch_32.c
b/arch/x86/kernel/paravirt_patch_32.c
index c89f50a..158dc06 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -5,7 +5,6 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
DEF_NATIVE(pv_cpu_ops, iret, "iret");
-DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
@@ -46,7 +45,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, restore_fl);
PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_cpu_ops, iret);
- PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_mmu_ops, read_cr2);
PATCH_SITE(pv_mmu_ops, read_cr3);
PATCH_SITE(pv_mmu_ops, write_cr3);
diff --git a/arch/x86/kernel/paravirt_patch_64.c
b/arch/x86/kernel/paravirt_patch_64.c
index 8aa0558..17c00f8 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -13,7 +13,6 @@ DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg
(%rdi)");
DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
-DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "swapgs; sti; sysexit");
DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d315151..a068e36 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1229,10 +1229,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst =
{
.iret = xen_iret,
#ifdef CONFIG_X86_64
- .usergs_sysret32 = xen_sysret32,
.usergs_sysret64 = xen_sysret64,
-#else
- .irq_enable_sysexit = xen_sysexit,
#endif
.load_tr_desc = paravirt_nop,
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index fd92a64..feb6d40 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -35,20 +35,6 @@ check_events:
ret
/*
- * We can't use sysexit directly, because we're not running in ring0.
- * But we can easily fake it up using iret. Assuming xen_sysexit is
- * jumped to with a standard stack frame, we can just strip it back to
- * a standard iret frame and use iret.
- */
-ENTRY(xen_sysexit)
- movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
- orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
- lea PT_EIP(%esp), %esp
-
- jmp xen_iret
-ENDPROC(xen_sysexit)
-
-/*
* This is run where a normal iret would be run, with the same stack setup:
* 8: eflags
* 4: cs
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 1399423..4140b07 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -139,9 +139,6 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long);
/* These are not functions, and cannot be called normally */
__visible void xen_iret(void);
-#ifdef CONFIG_X86_32
-__visible void xen_sysexit(void);
-#endif
__visible void xen_sysret32(void);
__visible void xen_sysret64(void);
__visible void xen_adjust_exception_frame(void);
--
1.8.1.4
Boris Ostrovsky
2015-Nov-18 20:06 UTC
[PATCH 3/3] x86: usergs_sysret32 pv op is no longer needed
Xen PV guests have been the only ones using it and now they don't.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky at oracle.com>
---
arch/x86/entry/entry_64_compat.S | 10 ++--------
arch/x86/include/asm/paravirt.h | 5 -----
arch/x86/include/asm/paravirt_types.h | 8 --------
arch/x86/kernel/asm-offsets_64.c | 1 -
arch/x86/kernel/paravirt.c | 5 -----
arch/x86/kernel/paravirt_patch_64.c | 2 --
arch/x86/xen/xen-asm_64.S | 19 -------------------
7 files changed, 2 insertions(+), 48 deletions(-)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 98893d9..82415dc 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -18,13 +18,6 @@
.section .entry.text, "ax"
-#ifdef CONFIG_PARAVIRT
-ENTRY(native_usergs_sysret32)
- swapgs
- sysretl
-ENDPROC(native_usergs_sysret32)
-#endif
-
/*
* 32-bit SYSENTER instruction entry.
*
@@ -238,7 +231,8 @@ sysret32_from_system_call:
xorq %r9, %r9
xorq %r10, %r10
movq RSP-ORIG_RAX(%rsp), %rsp
- USERGS_SYSRET32
+ swapgs
+ sysretl
END(entry_SYSCALL_compat)
/*
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c28518e..1b71c3a 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -922,11 +922,6 @@ extern void default_banner(void);
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-#define USERGS_SYSRET32 \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \
- CLBR_NONE, \
- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
-
#ifdef CONFIG_X86_32
#define GET_CR0_INTO_EAX \
push %ecx; push %edx; \
diff --git a/arch/x86/include/asm/paravirt_types.h
b/arch/x86/include/asm/paravirt_types.h
index 608bbf3..702c8bd 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -165,14 +165,6 @@ struct pv_cpu_ops {
*/
void (*usergs_sysret64)(void);
- /*
- * Switch to usermode gs and return to 32-bit usermode using
- * sysret. Used to return to 32-on-64 compat processes.
- * Other usermode register state, including %esp, must already
- * be restored.
- */
- void (*usergs_sysret32)(void);
-
/* Normal iret. Jump to this with the standard iret stack
frame set up. */
void (*iret)(void);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index d8f42f9..f2edafb 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -23,7 +23,6 @@ int main(void)
{
#ifdef CONFIG_PARAVIRT
OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
- OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
BLANK();
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c55f437..8c19b4d 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -162,7 +162,6 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void
*insnbuf,
ret = paravirt_patch_ident_64(insnbuf, len);
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
- type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
/* If operation requires a jmp, then jmp */
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
@@ -217,7 +216,6 @@ static u64 native_steal_clock(int cpu)
/* These are in entry.S */
extern void native_iret(void);
-extern void native_usergs_sysret32(void);
extern void native_usergs_sysret64(void);
static struct resource reserve_ioports = {
@@ -376,9 +374,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.load_sp0 = native_load_sp0,
#ifdef CONFIG_X86_64
-#ifdef CONFIG_IA32_EMULATION
- .usergs_sysret32 = native_usergs_sysret32,
-#endif
.usergs_sysret64 = native_usergs_sysret64,
#endif
.iret = native_iret,
diff --git a/arch/x86/kernel/paravirt_patch_64.c
b/arch/x86/kernel/paravirt_patch_64.c
index 17c00f8..e70087a 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -14,7 +14,6 @@ DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
-DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
DEF_NATIVE(, mov32, "mov %edi, %eax");
@@ -54,7 +53,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_irq_ops, irq_enable);
PATCH_SITE(pv_irq_ops, irq_disable);
- PATCH_SITE(pv_cpu_ops, usergs_sysret32);
PATCH_SITE(pv_cpu_ops, usergs_sysret64);
PATCH_SITE(pv_cpu_ops, swapgs);
PATCH_SITE(pv_mmu_ops, read_cr2);
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index f22667a..cc8acc4 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -68,25 +68,6 @@ ENTRY(xen_sysret64)
ENDPATCH(xen_sysret64)
RELOC(xen_sysret64, 1b+1)
-ENTRY(xen_sysret32)
- /*
- * We're already on the usermode stack at this point, but
- * still with the kernel gs, so we can easily switch back
- */
- movq %rsp, PER_CPU_VAR(rsp_scratch)
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-
- pushq $__USER32_DS
- pushq PER_CPU_VAR(rsp_scratch)
- pushq %r11
- pushq $__USER32_CS
- pushq %rcx
-
- pushq $0
-1: jmp hypercall_iret
-ENDPATCH(xen_sysret32)
-RELOC(xen_sysret32, 1b+1)
-
/*
* Xen handles syscall callbacks much like ordinary exceptions, which
* means we have:
--
1.8.1.4
Andy Lutomirski
2015-Nov-18 20:21 UTC
[PATCH 1/3] x86/xen: Avoid fast syscall path for Xen PV guests
On Wed, Nov 18, 2015 at 12:06 PM, Boris Ostrovsky <boris.ostrovsky at oracle.com> wrote:> After 32-bit syscall rewrite, and specifically after commit 5f310f739b4c > ("x86/entry/32: Re-implement SYSENTER using the new C path"), the stack > frame that is passed to xen_sysexit is no longer a "standard" one (i.e. > it's not pt_regs). > > Since we end up calling xen_iret from xen_sysexit we don't need to fix > up the stack and instead follow entry_SYSENTER_32's IRET path directly > to xen_iret. > > We can do the same thing for compat mode even though stack does not need > to be fixed. This will allow us to drop usergs_sysret32 paravirt op (in > the subsequent patch)Looks generally quite nice. Minor comments below:> --- a/arch/x86/entry/entry_32.S > +++ b/arch/x86/entry/entry_32.S > @@ -308,7 +308,8 @@ sysenter_past_esp: > > movl %esp, %eax > call do_fast_syscall_32 > - testl %eax, %eax > + /* XEN PV guests always use IRET path */ > + ALTERNATIVE "testl %eax, %eax", "xor %eax, %eax", X86_FEATURE_XENPV > jz .Lsyscall_32_doneCould we make this a little less subtle: ALTERNATIVE "testl %eax, %eax; lz .Lsyscall_32_done", "jmp .Lsyscasll_32_done", X86_FEATURE_XENPV Borislav, what do you think? Ditto for the others.> diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h > index e4f8010..0e4fe5b 100644 > --- a/arch/x86/include/asm/cpufeature.h > +++ b/arch/x86/include/asm/cpufeature.h > @@ -216,6 +216,7 @@ > #define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */ > #define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */ > #define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ > +#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ >This bit is highly magical and I think we need Borislav's ack. --Andy
Andy Lutomirski
2015-Nov-18 20:23 UTC
[PATCH 2/3] x86: irq_enable_sysexit pv op is no longer needed
On Wed, Nov 18, 2015 at 12:06 PM, Boris Ostrovsky <boris.ostrovsky at oracle.com> wrote:> Xen PV guests have been the only ones using it and now they don't.Fantastic! --Andy
Andy Lutomirski
2015-Nov-18 20:26 UTC
[PATCH 3/3] x86: usergs_sysret32 pv op is no longer needed
On Wed, Nov 18, 2015 at 12:06 PM, Boris Ostrovsky <boris.ostrovsky at oracle.com> wrote:> Xen PV guests have been the only ones using it and now they don't.Yay! --Andy
Boris Ostrovsky
2015-Nov-18 20:34 UTC
[PATCH 2/3] x86: irq_enable_sysexit pv op is no longer needed
On 11/18/2015 03:11 PM, Konrad Rzeszutek Wilk wrote:> On Wed, Nov 18, 2015 at 03:06:18PM -0500, Boris Ostrovsky wrote: >> Xen PV guests have been the only ones using it and now they don't. > Could you elaborate on the 'now they don't' please? > > Is Xen not doing it? Did it do it in the past? > > Is it because the Linux code paths will never run to this in Xen PV mode? > Is that due to other patches? If so please mention the name > of the other patches.. (so if somebody is thinking to put the > other patches on the stable train they should also take these ones).Since this is due to the first patch in the series I don't know its commitID. Should I just refer to patch title? -boris
On Wed, Nov 18, 2015 at 03:06:16PM -0500, Boris Ostrovsky wrote:> The first patch fixes Xen PV regression introduced by 32-bit rewrite. Unlike the > earlier version it uses ALTERNATIVE instruction and avoids using xen_sysexit > (and sysret32 in compat mode) pv ops, as suggested by Andy. (I ended up patching > TEST with XOR to avoid extra NOPs, even though I said yesterday it would be > wrong. It's not wrong) > > As result of this patch irq_enable_sysexit and usergs_sysret32 pv ops are not > used anymore by anyone and so can be removed. > > Boris Ostrovsky (3): > x86/xen: Avoid fast syscall path for Xen PV guests > x86: irq_enable_sysexit pv op is no longer needed > x86: usergs_sysret32 pv op is no longer needed > > arch/x86/entry/entry_32.S | 11 ++++------- > arch/x86/entry/entry_64_compat.S | 16 ++++++---------- > arch/x86/include/asm/cpufeature.h | 1 + > arch/x86/include/asm/paravirt.h | 12 ------------ > arch/x86/include/asm/paravirt_types.h | 17 ----------------- > arch/x86/kernel/asm-offsets.c | 3 --- > arch/x86/kernel/asm-offsets_64.c | 1 - > arch/x86/kernel/paravirt.c | 12 ------------ > arch/x86/kernel/paravirt_patch_32.c | 2 -- > arch/x86/kernel/paravirt_patch_64.c | 3 --- > arch/x86/xen/enlighten.c | 7 +++---- > arch/x86/xen/xen-asm_32.S | 14 -------------- > arch/x86/xen/xen-asm_64.S | 19 ------------------- > arch/x86/xen/xen-ops.h | 3 --- > 14 files changed, 14 insertions(+), 107 deletions(-)Can't argue with that diffstat! :-) -- Regards/Gruss, Boris. SUSE Linux GmbH, GF: Felix Imend?rffer, Jane Smithard, Graham Norton, HRB 21284 (AG N?rnberg) --
Possibly Parallel Threads
- [PATCH 0/3] Fix and cleanup for 32-bit PV sysexit
- [PATCH v2 0/3] Fix and cleanup for 32-bit PV sysexit
- [PATCH v2 0/3] Fix and cleanup for 32-bit PV sysexit
- [PATCH 00/13] x86/paravirt: Make pv ops code generation more closely match reality
- [PATCH 00/13] x86/paravirt: Make pv ops code generation more closely match reality