Josh Poimboeuf
2017-Oct-04  15:58 UTC
[PATCH 00/13] x86/paravirt: Make pv ops code generation more closely match reality
This changes the pv ops code generation to more closely match reality.
For example, instead of:
  callq  *0xffffffff81e3a400 (pv_irq_ops.save_fl)
vmlinux will now show:
  pushfq
  pop    %rax
  nop
  nop
  nop
  nop
  nop
which is what the runtime version of the code will show in most cases.
This idea was suggested by Andy Lutomirski.
The benefits are:
- For the most common runtime cases (everything except Xen and vSMP),
  vmlinux disassembly now matches what the actual runtime code looks
  like.  This improves debuggability and kernel developer sanity (a
  precious resource).
- It fixes a KASAN warning in the ORC unwinder due to objtool not
  understanding the .parainstructions stuff.
- It's hopefully a first step in simplifying paravirt patching by
  getting rid of .parainstructions, pv ops, and apply_paravirt()
  completely.  (I think Xen can be changed to set CPU feature bits to
  specify which ops it needs during early boot, then those ops can be
  patched in using early alternatives.)
For more details, see the commit log in patch 11/13.
Josh Poimboeuf (13):
  x86/paravirt: remove wbinvd() paravirt interface
  x86/paravirt: Fix output constraint macro names
  x86/paravirt: Convert native patch assembly code strings to macros
  x86/paravirt: Convert DEF_NATIVE macro to GCC extended asm syntax
  x86/paravirt: Move paravirt asm macros to paravirt-asm.h
  x86/paravirt: Clean up paravirt-asm.h
  x86/paravirt: Simplify ____PVOP_CALL()
  x86/paravirt: Clean up paravirt_types.h
  x86/asm: Convert ALTERNATIVE*() assembler macros to preprocessor
    macros
  x86/alternative: Support indirect call replacement
  x86/paravirt: Add paravirt alternatives infrastructure
  objtool: Add support for new .pv_altinstructions section
  x86/paravirt: Convert natively patched pv ops to use paravirt
    alternatives
 arch/x86/entry/entry_32.S                |  13 +-
 arch/x86/entry/entry_64.S                |  12 +-
 arch/x86/entry/entry_64_compat.S         |   9 +-
 arch/x86/entry/vdso/vdso32/system_call.S |  10 +-
 arch/x86/include/asm/alternative-asm.h   |  71 ++++-----
 arch/x86/include/asm/alternative.h       |  12 +-
 arch/x86/include/asm/cpufeatures.h       |   1 +
 arch/x86/include/asm/paravirt-asm.h      | 142 ++++++++++++++++++
 arch/x86/include/asm/paravirt.h          | 174 ++++------------------
 arch/x86/include/asm/paravirt_types.h    | 243 ++++++++++++++++++++-----------
 arch/x86/include/asm/smap.h              |   4 +-
 arch/x86/include/asm/special_insns.h     |  31 +++-
 arch/x86/kernel/alternative.c            |  35 ++++-
 arch/x86/kernel/cpu/hypervisor.c         |   2 +
 arch/x86/kernel/head_64.S                |   2 +-
 arch/x86/kernel/module.c                 |  11 +-
 arch/x86/kernel/paravirt.c               |   1 -
 arch/x86/kernel/paravirt_patch_32.c      |  21 +--
 arch/x86/kernel/paravirt_patch_64.c      |  31 ++--
 arch/x86/kernel/vmlinux.lds.S            |   6 +
 arch/x86/lib/copy_page_64.S              |   2 +-
 arch/x86/lib/memcpy_64.S                 |   4 +-
 arch/x86/lib/memmove_64.S                |   3 +-
 arch/x86/lib/memset_64.S                 |   4 +-
 arch/x86/xen/enlighten_pv.c              |   3 +-
 tools/objtool/special.c                  |  10 ++
 26 files changed, 516 insertions(+), 341 deletions(-)
 create mode 100644 arch/x86/include/asm/paravirt-asm.h
-- 
2.13.6
Josh Poimboeuf
2017-Oct-04  15:58 UTC
[PATCH 01/13] x86/paravirt: remove wbinvd() paravirt interface
Since lguest was removed, only the native version of wbinvd() is used.
The paravirt interface is no longer needed.
Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>
---
 arch/x86/include/asm/paravirt.h       | 5 -----
 arch/x86/include/asm/paravirt_types.h | 1 -
 arch/x86/include/asm/special_insns.h  | 7 +------
 arch/x86/kernel/paravirt.c            | 1 -
 arch/x86/kernel/paravirt_patch_64.c   | 2 --
 arch/x86/xen/enlighten_pv.c           | 2 --
 6 files changed, 1 insertion(+), 17 deletions(-)
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 12deec722cf0..2f51fbf175da 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -98,11 +98,6 @@ static inline void halt(void)
 	PVOP_VCALL0(pv_irq_ops.halt);
 }
 
-static inline void wbinvd(void)
-{
-	PVOP_VCALL0(pv_cpu_ops.wbinvd);
-}
-
 #define get_kernel_rpl()  (pv_info.kernel_rpl)
 
 static inline u64 paravirt_read_msr(unsigned msr)
diff --git a/arch/x86/include/asm/paravirt_types.h
b/arch/x86/include/asm/paravirt_types.h
index 280d94c36dad..0e112f279514 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -137,7 +137,6 @@ struct pv_cpu_ops {
 
 	void (*set_iopl_mask)(unsigned mask);
 
-	void (*wbinvd)(void);
 	void (*io_delay)(void);
 
 	/* cpuid emulation, mostly so that caps bits can be disabled */
diff --git a/arch/x86/include/asm/special_insns.h
b/arch/x86/include/asm/special_insns.h
index a24dfcf79f4a..ac402c6fc24b 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -128,7 +128,7 @@ static inline void __write_pkru(u32 pkru)
 }
 #endif
 
-static inline void native_wbinvd(void)
+static inline void wbinvd(void)
 {
 	asm volatile("wbinvd": : :"memory");
 }
@@ -183,11 +183,6 @@ static inline void __write_cr4(unsigned long x)
 	native_write_cr4(x);
 }
 
-static inline void wbinvd(void)
-{
-	native_wbinvd();
-}
-
 #ifdef CONFIG_X86_64
 
 static inline unsigned long read_cr8(void)
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 19a3e8f961c7..3fead3a50723 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -332,7 +332,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
 	.read_cr8 = native_read_cr8,
 	.write_cr8 = native_write_cr8,
 #endif
-	.wbinvd = native_wbinvd,
 	.read_msr = native_read_msr,
 	.write_msr = native_write_msr,
 	.read_msr_safe = native_read_msr_safe,
diff --git a/arch/x86/kernel/paravirt_patch_64.c
b/arch/x86/kernel/paravirt_patch_64.c
index 11aaf1eaa0e4..0a1ba3f80cbf 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
 DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
-DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
 
 DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
 DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		PATCH_SITE(pv_mmu_ops, read_cr3);
 		PATCH_SITE(pv_mmu_ops, write_cr3);
 		PATCH_SITE(pv_mmu_ops, flush_tlb_single);
-		PATCH_SITE(pv_cpu_ops, wbinvd);
 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
 		case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
 			if (pv_is_native_spin_unlock()) {
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 73f809a6ca87..c0cb5c2bfd92 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1045,8 +1045,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.write_cr8 = xen_write_cr8,
 #endif
 
-	.wbinvd = native_wbinvd,
-
 	.read_msr = xen_read_msr,
 	.write_msr = xen_write_msr,
 
-- 
2.13.6
Josh Poimboeuf
2017-Oct-04  15:58 UTC
[PATCH 02/13] x86/paravirt: Fix output constraint macro names
Some of the paravirt '*_CLOBBERS' macros refer to output constraints instead of clobbers, which makes the code extra confusing. Rename the output constraint related macros to '*_OUTPUTS'. Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com> --- arch/x86/include/asm/paravirt_types.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 0e112f279514..e99e5ac3e036 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -466,12 +466,12 @@ int paravirt_disable_iospace(void); #define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x)) #define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x)) -#define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ +#define PVOP_VCALL_OUTPUTS "=a" (__eax), "=d" (__edx), \ "=c" (__ecx) -#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS +#define PVOP_CALL_OUTPUTS PVOP_VCALL_OUTPUTS -#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx) -#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS +#define PVOP_VCALLEE_OUTPUTS "=a" (__eax), "=d" (__edx) +#define PVOP_CALLEE_OUTPUTS PVOP_VCALLEE_OUTPUTS #define EXTRA_CLOBBERS #define VEXTRA_CLOBBERS @@ -488,14 +488,14 @@ int paravirt_disable_iospace(void); #define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x)) #define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x)) -#define PVOP_VCALL_CLOBBERS "=D" (__edi), \ +#define PVOP_VCALL_OUTPUTS "=D" (__edi), \ "=S" (__esi), "=d" (__edx), \ "=c" (__ecx) -#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) +#define PVOP_CALL_OUTPUTS PVOP_VCALL_OUTPUTS, "=a" (__eax) /* void functions are still allowed [re]ax for scratch */ -#define PVOP_VCALLEE_CLOBBERS "=a" (__eax) -#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS +#define PVOP_VCALLEE_OUTPUTS "=a" (__eax) +#define PVOP_CALLEE_OUTPUTS PVOP_VCALLEE_OUTPUTS #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" @@ -552,12 +552,12 @@ int paravirt_disable_iospace(void); }) #define __PVOP_CALL(rettype, op, pre, post, ...) \ - ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \ + ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_OUTPUTS, \ EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__) #define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \ ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ - PVOP_CALLEE_CLOBBERS, , \ + PVOP_CALLEE_OUTPUTS, , \ pre, post, ##__VA_ARGS__) @@ -576,13 +576,13 @@ int paravirt_disable_iospace(void); }) #define __PVOP_VCALL(op, pre, post, ...) \ - ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ + ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_OUTPUTS, \ VEXTRA_CLOBBERS, \ pre, post, ##__VA_ARGS__) #define __PVOP_VCALLEESAVE(op, pre, post, ...) \ ____PVOP_VCALL(op.func, CLBR_RET_REG, \ - PVOP_VCALLEE_CLOBBERS, , \ + PVOP_VCALLEE_OUTPUTS, , \ pre, post, ##__VA_ARGS__) -- 2.13.6
Josh Poimboeuf
2017-Oct-04  15:58 UTC
[PATCH 03/13] x86/paravirt: Convert native patch assembly code strings to macros
Convert the hard-coded native patch assembly code strings to macros to
facilitate sharing common code between 32-bit and 64-bit.
These macros will also be used by a future patch which requires the GCC
extended asm syntax of two '%' characters instead of one when specifying
a register name.
Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>
---
 arch/x86/include/asm/special_insns.h | 24 ++++++++++++++++++++++++
 arch/x86/kernel/paravirt_patch_32.c  | 21 +++++++++++----------
 arch/x86/kernel/paravirt_patch_64.c  | 29 +++++++++++++++--------------
 3 files changed, 50 insertions(+), 24 deletions(-)
diff --git a/arch/x86/include/asm/special_insns.h
b/arch/x86/include/asm/special_insns.h
index ac402c6fc24b..0549c5f2c1b3 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -6,6 +6,30 @@
 
 #include <asm/nops.h>
 
+#ifdef CONFIG_X86_64
+# define _REG_ARG1			"%rdi"
+# define NATIVE_IDENTITY_32		"mov %edi, %eax"
+# define NATIVE_USERGS_SYSRET64		"swapgs; sysretq"
+#else
+# define _REG_ARG1			"%eax"
+#endif
+
+#define _REG_RET			"%" _ASM_AX
+
+#define NATIVE_ZERO			"xor " _REG_ARG1 ", " _REG_ARG1
+#define NATIVE_IDENTITY			"mov " _REG_ARG1 ", " _REG_RET
+#define NATIVE_SAVE_FL			"pushf; pop " _REG_RET
+#define NATIVE_RESTORE_FL		"push " _REG_ARG1 "; popf"
+#define NATIVE_IRQ_DISABLE		"cli"
+#define NATIVE_IRQ_ENABLE		"sti"
+#define NATIVE_READ_CR2			"mov %cr2, " _REG_RET
+#define NATIVE_READ_CR3			"mov %cr3, " _REG_RET
+#define NATIVE_WRITE_CR3		"mov " _REG_ARG1 ", %cr3"
+#define NATIVE_FLUSH_TLB_SINGLE		"invlpg (" _REG_ARG1 ")"
+#define NATIVE_SWAPGS			"swapgs"
+#define NATIVE_IRET			"iret"
+#define NATIVE_QUEUED_SPIN_UNLOCK	"movb $0, (" _REG_ARG1
")"
+
 /*
  * Volatile isn't enough to prevent the compiler from reordering the
  * read/write functions for the control registers and messing everything up.
diff --git a/arch/x86/kernel/paravirt_patch_32.c
b/arch/x86/kernel/paravirt_patch_32.c
index 553acbbb4d32..c9c6106ae714 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -1,17 +1,18 @@
 #include <asm/paravirt.h>
+#include <asm/special_insns.h>
 
-DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
-DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
-DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
-DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
-DEF_NATIVE(pv_cpu_ops, iret, "iret");
-DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
-DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
-DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
+DEF_NATIVE(pv_irq_ops,	save_fl,		NATIVE_SAVE_FL);
+DEF_NATIVE(pv_irq_ops,	restore_fl,		NATIVE_RESTORE_FL);
+DEF_NATIVE(pv_irq_ops,	irq_disable,		NATIVE_IRQ_DISABLE);
+DEF_NATIVE(pv_irq_ops,	irq_enable,		NATIVE_IRQ_ENABLE);
+DEF_NATIVE(pv_cpu_ops,	iret,			NATIVE_IRET);
+DEF_NATIVE(pv_mmu_ops,	read_cr2,		NATIVE_READ_CR2);
+DEF_NATIVE(pv_mmu_ops,	read_cr3,		NATIVE_READ_CR3);
+DEF_NATIVE(pv_mmu_ops,	write_cr3,		NATIVE_WRITE_CR3);
 
 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
-DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax");
+DEF_NATIVE(pv_lock_ops,	queued_spin_unlock,	NATIVE_QUEUED_SPIN_UNLOCK);
+DEF_NATIVE(pv_lock_ops,	vcpu_is_preempted,	NATIVE_ZERO);
 #endif
 
 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
diff --git a/arch/x86/kernel/paravirt_patch_64.c
b/arch/x86/kernel/paravirt_patch_64.c
index 0a1ba3f80cbf..0aa232edd670 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -1,25 +1,26 @@
 #include <asm/paravirt.h>
 #include <asm/asm-offsets.h>
+#include <asm/special_insns.h>
 #include <linux/stringify.h>
 
-DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
-DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
-DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
-DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
-DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
-DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
-DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
+DEF_NATIVE(pv_irq_ops,	save_fl,		NATIVE_SAVE_FL);
+DEF_NATIVE(pv_irq_ops,	restore_fl,		NATIVE_RESTORE_FL);
+DEF_NATIVE(pv_irq_ops,	irq_disable,		NATIVE_IRQ_DISABLE);
+DEF_NATIVE(pv_irq_ops,	irq_enable,		NATIVE_IRQ_ENABLE);
+DEF_NATIVE(pv_mmu_ops,	read_cr2,		NATIVE_READ_CR2);
+DEF_NATIVE(pv_mmu_ops,	read_cr3,		NATIVE_READ_CR3);
+DEF_NATIVE(pv_mmu_ops,	write_cr3,		NATIVE_WRITE_CR3);
+DEF_NATIVE(pv_mmu_ops,	flush_tlb_single,	NATIVE_FLUSH_TLB_SINGLE);
 
-DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
-DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
+DEF_NATIVE(pv_cpu_ops,	usergs_sysret64,	NATIVE_USERGS_SYSRET64);
+DEF_NATIVE(pv_cpu_ops,	swapgs,			NATIVE_SWAPGS);
 
-DEF_NATIVE(, mov32, "mov %edi, %eax");
-DEF_NATIVE(, mov64, "mov %rdi, %rax");
+DEF_NATIVE(,		mov32,			NATIVE_IDENTITY_32);
+DEF_NATIVE(,		mov64,			NATIVE_IDENTITY);
 
 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
-DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %rax, %rax");
+DEF_NATIVE(pv_lock_ops,	queued_spin_unlock,	NATIVE_QUEUED_SPIN_UNLOCK);
+DEF_NATIVE(pv_lock_ops,	vcpu_is_preempted,	NATIVE_ZERO);
 #endif
 
 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
-- 
2.13.6
Josh Poimboeuf
2017-Oct-04  15:58 UTC
[PATCH 04/13] x86/paravirt: Convert DEF_NATIVE macro to GCC extended asm syntax
In a future patch, the NATIVE_* instruction string macros will be used
in GCC extended inline asm, which requires registers to have two '%'
instead of one in the asm template string.  Convert the DEF_NATIVE macro
to the GCC extended asm syntax so the NATIVE_* macros can be shared more
broadly.
Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>
---
 arch/x86/include/asm/paravirt_types.h | 10 +++++++---
 arch/x86/include/asm/special_insns.h  | 14 +++++++-------
 2 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/arch/x86/include/asm/paravirt_types.h
b/arch/x86/include/asm/paravirt_types.h
index e99e5ac3e036..ab7aabe6b668 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -359,11 +359,15 @@ extern struct pv_lock_ops pv_lock_ops;
 	_paravirt_alt(insn_string, "%c[paravirt_typenum]",
"%c[paravirt_clobber]")
 
 /* Simple instruction patching code. */
-#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b
"\n" a #x "_" #b ":\n\t"
+#define NATIVE_LABEL(a,x,b) "\n" a #x "_" #b
":\n\t"
 
 #define DEF_NATIVE(ops, name, code)					\
-	__visible extern const char start_##ops##_##name[], end_##ops##_##name[];	\
-	asm(NATIVE_LABEL("start_", ops, name) code
NATIVE_LABEL("end_", ops, name))
+static inline void __used __native_ ## name ## _insns(void) {		\
+	asm volatile(NATIVE_LABEL("start_", ops, name)			\
+		     code						\
+		     NATIVE_LABEL("end_", ops, name) : );		\
+} \
+__visible extern const char start_##ops##_##name[], end_##ops##_##name[];
 
 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
 unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
diff --git a/arch/x86/include/asm/special_insns.h
b/arch/x86/include/asm/special_insns.h
index 0549c5f2c1b3..4b89668f2862 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -7,14 +7,14 @@
 #include <asm/nops.h>
 
 #ifdef CONFIG_X86_64
-# define _REG_ARG1			"%rdi"
-# define NATIVE_IDENTITY_32		"mov %edi, %eax"
+# define _REG_ARG1			"%%rdi"
+# define NATIVE_IDENTITY_32		"mov %%edi, %%eax"
 # define NATIVE_USERGS_SYSRET64		"swapgs; sysretq"
 #else
-# define _REG_ARG1			"%eax"
+# define _REG_ARG1			"%%eax"
 #endif
 
-#define _REG_RET			"%" _ASM_AX
+#define _REG_RET			"%%" _ASM_AX
 
 #define NATIVE_ZERO			"xor " _REG_ARG1 ", " _REG_ARG1
 #define NATIVE_IDENTITY			"mov " _REG_ARG1 ", " _REG_RET
@@ -22,9 +22,9 @@
 #define NATIVE_RESTORE_FL		"push " _REG_ARG1 "; popf"
 #define NATIVE_IRQ_DISABLE		"cli"
 #define NATIVE_IRQ_ENABLE		"sti"
-#define NATIVE_READ_CR2			"mov %cr2, " _REG_RET
-#define NATIVE_READ_CR3			"mov %cr3, " _REG_RET
-#define NATIVE_WRITE_CR3		"mov " _REG_ARG1 ", %cr3"
+#define NATIVE_READ_CR2			"mov %%cr2, " _REG_RET
+#define NATIVE_READ_CR3			"mov %%cr3, " _REG_RET
+#define NATIVE_WRITE_CR3		"mov " _REG_ARG1 ", %%cr3"
 #define NATIVE_FLUSH_TLB_SINGLE		"invlpg (" _REG_ARG1 ")"
 #define NATIVE_SWAPGS			"swapgs"
 #define NATIVE_IRET			"iret"
-- 
2.13.6
Josh Poimboeuf
2017-Oct-04  15:58 UTC
[PATCH 05/13] x86/paravirt: Move paravirt asm macros to paravirt-asm.h
The paravirt.h file is quite big and the asm interfaces for paravirt
don't need to be in the same file as the C interfaces.  Move the asm
interfaces to a dedicated header file.
Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>
---
 arch/x86/entry/entry_32.S           |   1 +
 arch/x86/entry/entry_64.S           |   2 +-
 arch/x86/entry/entry_64_compat.S    |   1 +
 arch/x86/include/asm/paravirt-asm.h | 126 ++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/paravirt.h     | 132 +++---------------------------------
 arch/x86/kernel/head_64.S           |   2 +-
 6 files changed, 138 insertions(+), 126 deletions(-)
 create mode 100644 arch/x86/include/asm/paravirt-asm.h
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 8a13d468635a..21d1197779a4 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -40,6 +40,7 @@
 #include <asm/irq_vectors.h>
 #include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
+#include <asm/paravirt-asm.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
 #include <asm/frame.h>
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 49167258d587..c7c85724d7e0 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -30,7 +30,7 @@
 #include <asm/hw_irq.h>
 #include <asm/page_types.h>
 #include <asm/irqflags.h>
-#include <asm/paravirt.h>
+#include <asm/paravirt-asm.h>
 #include <asm/percpu.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index e26c25ca7756..4d9385529c39 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -13,6 +13,7 @@
 #include <asm/irqflags.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
+#include <asm/paravirt-asm.h>
 #include <linux/linkage.h>
 #include <linux/err.h>
 
diff --git a/arch/x86/include/asm/paravirt-asm.h
b/arch/x86/include/asm/paravirt-asm.h
new file mode 100644
index 000000000000..add8a190fdac
--- /dev/null
+++ b/arch/x86/include/asm/paravirt-asm.h
@@ -0,0 +1,126 @@
+#ifndef _ASM_X86_PARAVIRT_ASM_H
+#define _ASM_X86_PARAVIRT_ASM_H
+
+#ifdef CONFIG_PARAVIRT
+#ifdef __ASSEMBLY__
+
+#include <asm/asm.h>
+#include <asm/paravirt_types.h>
+
+#define _PVSITE(ptype, clobbers, ops, word, algn)	\
+771:;						\
+	ops;					\
+772:;						\
+	.pushsection .parainstructions,"a";	\
+	 .align	algn;				\
+	 word 771b;				\
+	 .byte ptype;				\
+	 .byte 772b-771b;			\
+	 .short clobbers;			\
+	.popsection
+
+
+#define COND_PUSH(set, mask, reg)			\
+	.if ((~(set)) & mask); push %reg; .endif
+#define COND_POP(set, mask, reg)			\
+	.if ((~(set)) & mask); pop %reg; .endif
+
+#ifdef CONFIG_X86_64
+
+#define PV_SAVE_REGS(set)			\
+	COND_PUSH(set, CLBR_RAX, rax);		\
+	COND_PUSH(set, CLBR_RCX, rcx);		\
+	COND_PUSH(set, CLBR_RDX, rdx);		\
+	COND_PUSH(set, CLBR_RSI, rsi);		\
+	COND_PUSH(set, CLBR_RDI, rdi);		\
+	COND_PUSH(set, CLBR_R8, r8);		\
+	COND_PUSH(set, CLBR_R9, r9);		\
+	COND_PUSH(set, CLBR_R10, r10);		\
+	COND_PUSH(set, CLBR_R11, r11)
+#define PV_RESTORE_REGS(set)			\
+	COND_POP(set, CLBR_R11, r11);		\
+	COND_POP(set, CLBR_R10, r10);		\
+	COND_POP(set, CLBR_R9, r9);		\
+	COND_POP(set, CLBR_R8, r8);		\
+	COND_POP(set, CLBR_RDI, rdi);		\
+	COND_POP(set, CLBR_RSI, rsi);		\
+	COND_POP(set, CLBR_RDX, rdx);		\
+	COND_POP(set, CLBR_RCX, rcx);		\
+	COND_POP(set, CLBR_RAX, rax)
+
+#define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
+#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
+#define PARA_INDIRECT(addr)	*addr(%rip)
+#else
+#define PV_SAVE_REGS(set)			\
+	COND_PUSH(set, CLBR_EAX, eax);		\
+	COND_PUSH(set, CLBR_EDI, edi);		\
+	COND_PUSH(set, CLBR_ECX, ecx);		\
+	COND_PUSH(set, CLBR_EDX, edx)
+#define PV_RESTORE_REGS(set)			\
+	COND_POP(set, CLBR_EDX, edx);		\
+	COND_POP(set, CLBR_ECX, ecx);		\
+	COND_POP(set, CLBR_EDI, edi);		\
+	COND_POP(set, CLBR_EAX, eax)
+
+#define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 4)
+#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
+#define PARA_INDIRECT(addr)	*%cs:addr
+#endif
+
+#define INTERRUPT_RETURN						\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,	\
+		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
+
+#define DISABLE_INTERRUPTS(clobbers)					\
+	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
+		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
+		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);	\
+		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+
+#define ENABLE_INTERRUPTS(clobbers)					\
+	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,	\
+		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
+		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);	\
+		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+
+#ifdef CONFIG_X86_32
+#define GET_CR0_INTO_EAX				\
+	push %ecx; push %edx;				\
+	call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0);	\
+	pop %edx; pop %ecx
+#else	/* !CONFIG_X86_32 */
+
+/*
+ * If swapgs is used while the userspace stack is still current,
+ * there's no way to call a pvop.  The PV replacement *must* be
+ * inlined, or the swapgs instruction must be trapped and emulated.
+ */
+#define SWAPGS_UNSAFE_STACK						\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
+		  swapgs)
+
+/*
+ * Note: swapgs is very special, and in practise is either going to be
+ * implemented with a single "swapgs" instruction or something very
+ * special.  Either way, we don't need to save any registers for
+ * it.
+ */
+#define SWAPGS								\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
+		  call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs)		\
+		 )
+
+#define GET_CR2_INTO_RAX				\
+	call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
+
+#define USERGS_SYSRET64							\
+	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\
+		  CLBR_NONE,						\
+		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
+#endif	/* CONFIG_X86_32 */
+
+#endif  /*  __ASSEMBLY__  */
+#endif /* CONFIG_PARAVIRT */
+
+#endif /* _ASM_X86_PARAVIRT_ASM_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 2f51fbf175da..bfd02c3335cb 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -3,17 +3,16 @@
 /* Various instructions on x86 need to be replaced for
  * para-virtualization: those hooks are defined here. */
 
-#ifdef CONFIG_PARAVIRT
-#include <asm/pgtable_types.h>
-#include <asm/asm.h>
+#ifndef __ASSEMBLY__
 
-#include <asm/paravirt_types.h>
+#ifdef CONFIG_PARAVIRT
 
-#ifndef __ASSEMBLY__
 #include <linux/bug.h>
 #include <linux/types.h>
 #include <linux/cpumask.h>
 #include <asm/frame.h>
+#include <asm/pgtable_types.h>
+#include <asm/paravirt_types.h>
 
 static inline void load_sp0(struct tss_struct *tss,
 			     struct thread_struct *thread)
@@ -809,125 +808,8 @@ static inline notrace unsigned long
arch_local_irq_save(void)
 
 extern void default_banner(void);
 
-#else  /* __ASSEMBLY__ */
-
-#define _PVSITE(ptype, clobbers, ops, word, algn)	\
-771:;						\
-	ops;					\
-772:;						\
-	.pushsection .parainstructions,"a";	\
-	 .align	algn;				\
-	 word 771b;				\
-	 .byte ptype;				\
-	 .byte 772b-771b;			\
-	 .short clobbers;			\
-	.popsection
-
-
-#define COND_PUSH(set, mask, reg)			\
-	.if ((~(set)) & mask); push %reg; .endif
-#define COND_POP(set, mask, reg)			\
-	.if ((~(set)) & mask); pop %reg; .endif
-
-#ifdef CONFIG_X86_64
-
-#define PV_SAVE_REGS(set)			\
-	COND_PUSH(set, CLBR_RAX, rax);		\
-	COND_PUSH(set, CLBR_RCX, rcx);		\
-	COND_PUSH(set, CLBR_RDX, rdx);		\
-	COND_PUSH(set, CLBR_RSI, rsi);		\
-	COND_PUSH(set, CLBR_RDI, rdi);		\
-	COND_PUSH(set, CLBR_R8, r8);		\
-	COND_PUSH(set, CLBR_R9, r9);		\
-	COND_PUSH(set, CLBR_R10, r10);		\
-	COND_PUSH(set, CLBR_R11, r11)
-#define PV_RESTORE_REGS(set)			\
-	COND_POP(set, CLBR_R11, r11);		\
-	COND_POP(set, CLBR_R10, r10);		\
-	COND_POP(set, CLBR_R9, r9);		\
-	COND_POP(set, CLBR_R8, r8);		\
-	COND_POP(set, CLBR_RDI, rdi);		\
-	COND_POP(set, CLBR_RSI, rsi);		\
-	COND_POP(set, CLBR_RDX, rdx);		\
-	COND_POP(set, CLBR_RCX, rcx);		\
-	COND_POP(set, CLBR_RAX, rax)
-
-#define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
-#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
-#define PARA_INDIRECT(addr)	*addr(%rip)
-#else
-#define PV_SAVE_REGS(set)			\
-	COND_PUSH(set, CLBR_EAX, eax);		\
-	COND_PUSH(set, CLBR_EDI, edi);		\
-	COND_PUSH(set, CLBR_ECX, ecx);		\
-	COND_PUSH(set, CLBR_EDX, edx)
-#define PV_RESTORE_REGS(set)			\
-	COND_POP(set, CLBR_EDX, edx);		\
-	COND_POP(set, CLBR_ECX, ecx);		\
-	COND_POP(set, CLBR_EDI, edi);		\
-	COND_POP(set, CLBR_EAX, eax)
-
-#define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 4)
-#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
-#define PARA_INDIRECT(addr)	*%cs:addr
-#endif
-
-#define INTERRUPT_RETURN						\
-	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,	\
-		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
-
-#define DISABLE_INTERRUPTS(clobbers)					\
-	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
-		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
-		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);	\
-		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-
-#define ENABLE_INTERRUPTS(clobbers)					\
-	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,	\
-		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
-		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);	\
-		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-
-#ifdef CONFIG_X86_32
-#define GET_CR0_INTO_EAX				\
-	push %ecx; push %edx;				\
-	call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0);	\
-	pop %edx; pop %ecx
-#else	/* !CONFIG_X86_32 */
-
-/*
- * If swapgs is used while the userspace stack is still current,
- * there's no way to call a pvop.  The PV replacement *must* be
- * inlined, or the swapgs instruction must be trapped and emulated.
- */
-#define SWAPGS_UNSAFE_STACK						\
-	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
-		  swapgs)
-
-/*
- * Note: swapgs is very special, and in practise is either going to be
- * implemented with a single "swapgs" instruction or something very
- * special.  Either way, we don't need to save any registers for
- * it.
- */
-#define SWAPGS								\
-	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
-		  call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs)		\
-		 )
-
-#define GET_CR2_INTO_RAX				\
-	call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
-
-#define USERGS_SYSRET64							\
-	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),	\
-		  CLBR_NONE,						\
-		  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
-#endif	/* CONFIG_X86_32 */
-
-#endif /* __ASSEMBLY__ */
-#else  /* CONFIG_PARAVIRT */
+#else  /* !CONFIG_PARAVIRT */
 # define default_banner x86_init_noop
-#ifndef __ASSEMBLY__
 static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm,
 					  struct mm_struct *mm)
 {
@@ -936,6 +818,8 @@ static inline void paravirt_arch_dup_mmap(struct mm_struct
*oldmm,
 static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
 {
 }
-#endif /* __ASSEMBLY__ */
 #endif /* !CONFIG_PARAVIRT */
+
+#endif /* __ASSEMBLY__ */
+
 #endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 42e32c2e51bb..5bf2faec41b9 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -25,7 +25,7 @@
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/asm-offsets.h>
-#include <asm/paravirt.h>
+#include <asm/paravirt-asm.h>
 #define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg
 #else
 #define GET_CR2_INTO(reg) movq %cr2, reg
-- 
2.13.6
Some cleanup to make the code easier to read and understand: - Use the common "PV_" prefix - Simplify the PV_SITE macro interface - Improve whitespace Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com> --- arch/x86/include/asm/paravirt-asm.h | 95 +++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 46 deletions(-) diff --git a/arch/x86/include/asm/paravirt-asm.h b/arch/x86/include/asm/paravirt-asm.h index add8a190fdac..8bdd50ee4bf3 100644 --- a/arch/x86/include/asm/paravirt-asm.h +++ b/arch/x86/include/asm/paravirt-asm.h @@ -7,16 +7,18 @@ #include <asm/asm.h> #include <asm/paravirt_types.h> -#define _PVSITE(ptype, clobbers, ops, word, algn) \ -771:; \ - ops; \ -772:; \ - .pushsection .parainstructions,"a"; \ - .align algn; \ - word 771b; \ - .byte ptype; \ - .byte 772b-771b; \ - .short clobbers; \ +#define PV_TYPE(ops, off) ((PARAVIRT_PATCH_##ops + (off)) / __ASM_SEL(4, 8)) + +#define PV_SITE(insns, ops, off, clobbers) \ +771:; \ + insns; \ +772:; \ + .pushsection .parainstructions, "a"; \ + _ASM_ALIGN; \ + _ASM_PTR 771b; \ + .byte PV_TYPE(ops, off); \ + .byte 772b-771b; \ + .short clobbers; \ .popsection @@ -33,62 +35,65 @@ COND_PUSH(set, CLBR_RDX, rdx); \ COND_PUSH(set, CLBR_RSI, rsi); \ COND_PUSH(set, CLBR_RDI, rdi); \ - COND_PUSH(set, CLBR_R8, r8); \ - COND_PUSH(set, CLBR_R9, r9); \ + COND_PUSH(set, CLBR_R8, r8); \ + COND_PUSH(set, CLBR_R9, r9); \ COND_PUSH(set, CLBR_R10, r10); \ COND_PUSH(set, CLBR_R11, r11) + #define PV_RESTORE_REGS(set) \ COND_POP(set, CLBR_R11, r11); \ COND_POP(set, CLBR_R10, r10); \ - COND_POP(set, CLBR_R9, r9); \ - COND_POP(set, CLBR_R8, r8); \ + COND_POP(set, CLBR_R9, r9); \ + COND_POP(set, CLBR_R8, r8); \ COND_POP(set, CLBR_RDI, rdi); \ COND_POP(set, CLBR_RSI, rsi); \ COND_POP(set, CLBR_RDX, rdx); \ COND_POP(set, CLBR_RCX, rcx); \ COND_POP(set, CLBR_RAX, rax) -#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) -#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) -#define PARA_INDIRECT(addr) *addr(%rip) -#else +#define PV_INDIRECT(addr) *addr(%rip) + +#else /* !CONFIG_X86_64 */ + #define PV_SAVE_REGS(set) \ COND_PUSH(set, CLBR_EAX, eax); \ COND_PUSH(set, CLBR_EDI, edi); \ COND_PUSH(set, CLBR_ECX, ecx); \ COND_PUSH(set, CLBR_EDX, edx) + #define PV_RESTORE_REGS(set) \ COND_POP(set, CLBR_EDX, edx); \ COND_POP(set, CLBR_ECX, ecx); \ COND_POP(set, CLBR_EDI, edi); \ COND_POP(set, CLBR_EAX, eax) -#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) -#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) -#define PARA_INDIRECT(addr) *%cs:addr -#endif +#define PV_INDIRECT(addr) *%cs:addr + +#endif /* !CONFIG_X86_64 */ #define INTERRUPT_RETURN \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) + PV_SITE(jmp PV_INDIRECT(pv_cpu_ops+PV_CPU_iret), \ + pv_cpu_ops, PV_CPU_iret, CLBR_NONE) #define DISABLE_INTERRUPTS(clobbers) \ - PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ - PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ - call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ - PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) + PV_SITE(PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + call PV_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE), \ + pv_irq_ops, PV_IRQ_irq_disable, clobbers) #define ENABLE_INTERRUPTS(clobbers) \ - PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ - PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ - call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ - PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) + PV_SITE(PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + call PV_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE), \ + pv_irq_ops, PV_IRQ_irq_enable, clobbers) #ifdef CONFIG_X86_32 -#define GET_CR0_INTO_EAX \ - push %ecx; push %edx; \ - call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ + +#define GET_CR0_INTO_EAX \ + push %ecx; push %edx; \ + call PV_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ pop %edx; pop %ecx + #else /* !CONFIG_X86_32 */ /* @@ -97,8 +102,7 @@ * inlined, or the swapgs instruction must be trapped and emulated. */ #define SWAPGS_UNSAFE_STACK \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ - swapgs) + PV_SITE(swapgs, pv_cpu_ops, PV_CPU_swapgs, CLBR_NONE) /* * Note: swapgs is very special, and in practise is either going to be @@ -107,18 +111,17 @@ * it. */ #define SWAPGS \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ - call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ - ) + PV_SITE(call PV_INDIRECT(pv_cpu_ops+PV_CPU_swapgs), \ + pv_cpu_ops, PV_CPU_swapgs, CLBR_NONE) -#define GET_CR2_INTO_RAX \ - call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2) +#define GET_CR2_INTO_RAX \ + call PV_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2) #define USERGS_SYSRET64 \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ - CLBR_NONE, \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) -#endif /* CONFIG_X86_32 */ + PV_SITE(jmp PV_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64), \ + pv_cpu_ops, PV_CPU_usergs_sysret64, CLBR_NONE) + +#endif /* !CONFIG_X86_32 */ #endif /* __ASSEMBLY__ */ #endif /* CONFIG_PARAVIRT */ -- 2.13.6
Remove the inline asm duplication in ____PVOP_CALL().
Also add 'IS_ENABLED(CONFIG_X86_32)' to the return variable logic,
making the code clearer and rendering the comment unnecessary.
Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>
---
 arch/x86/include/asm/paravirt_types.h | 36 +++++++++++++----------------------
 1 file changed, 13 insertions(+), 23 deletions(-)
diff --git a/arch/x86/include/asm/paravirt_types.h
b/arch/x86/include/asm/paravirt_types.h
index ab7aabe6b668..01f9e10983c1 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -529,29 +529,19 @@ int paravirt_disable_iospace(void);
 		rettype __ret;						\
 		PVOP_CALL_ARGS;						\
 		PVOP_TEST_NULL(op);					\
-		/* This is 32-bit specific, but is okay in 64-bit */	\
-		/* since this condition will never hold */		\
-		if (sizeof(rettype) > sizeof(unsigned long)) {		\
-			asm volatile(pre				\
-				     paravirt_alt(PARAVIRT_CALL)	\
-				     post				\
-				     : call_clbr, ASM_CALL_CONSTRAINT	\
-				     : paravirt_type(op),		\
-				       paravirt_clobber(clbr),		\
-				       ##__VA_ARGS__			\
-				     : "memory", "cc" extra_clbr);	\
-			__ret = (rettype)((((u64)__edx) << 32) | __eax); \
-		} else {						\
-			asm volatile(pre				\
-				     paravirt_alt(PARAVIRT_CALL)	\
-				     post				\
-				     : call_clbr, ASM_CALL_CONSTRAINT	\
-				     : paravirt_type(op),		\
-				       paravirt_clobber(clbr),		\
-				       ##__VA_ARGS__			\
-				     : "memory", "cc" extra_clbr);	\
-			__ret = (rettype)(__eax & PVOP_RETMASK(rettype));	\
-		}							\
+		asm volatile(pre					\
+			     paravirt_alt(PARAVIRT_CALL)		\
+			     post					\
+			     : call_clbr, ASM_CALL_CONSTRAINT		\
+			     : paravirt_type(op),			\
+			       paravirt_clobber(clbr),			\
+			       ##__VA_ARGS__				\
+			     : "memory", "cc" extra_clbr);		\
+		if (IS_ENABLED(CONFIG_X86_32) &&			\
+		    sizeof(rettype) > sizeof(unsigned long))		\
+			__ret = (rettype)((((u64)__edx) << 32) | __eax);\
+		else							\
+			__ret = (rettype)(__eax & PVOP_RETMASK(rettype));\
 		__ret;							\
 	})
 
-- 
2.13.6
Josh Poimboeuf
2017-Oct-04  15:59 UTC
[PATCH 08/13] x86/paravirt: Clean up paravirt_types.h
Make paravirt_types.h more understandable:
- Use more consistent and logical naming
- Simplify interfaces
- Put related macros together
- Improve whitespace
Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>
---
 arch/x86/include/asm/paravirt_types.h | 104 ++++++++++++++++++----------------
 1 file changed, 54 insertions(+), 50 deletions(-)
diff --git a/arch/x86/include/asm/paravirt_types.h
b/arch/x86/include/asm/paravirt_types.h
index 01f9e10983c1..5656aea79412 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -331,33 +331,6 @@ extern struct pv_irq_ops pv_irq_ops;
 extern struct pv_mmu_ops pv_mmu_ops;
 extern struct pv_lock_ops pv_lock_ops;
 
-#define PARAVIRT_PATCH(x)					\
-	(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
-
-#define paravirt_type(op)				\
-	[paravirt_typenum] "i" (PARAVIRT_PATCH(op)),	\
-	[paravirt_opptr] "i" (&(op))
-#define paravirt_clobber(clobber)		\
-	[paravirt_clobber] "i" (clobber)
-
-/*
- * Generate some code, and mark it as patchable by the
- * apply_paravirt() alternate instruction patcher.
- */
-#define _paravirt_alt(insn_string, type, clobber)	\
-	"771:\n\t" insn_string "\n" "772:\n"		\
-	".pushsection .parainstructions,\"a\"\n"	\
-	_ASM_ALIGN "\n"					\
-	_ASM_PTR " 771b\n"				\
-	"  .byte " type "\n"				\
-	"  .byte 772b-771b\n"				\
-	"  .short " clobber "\n"			\
-	".popsection\n"
-
-/* Generate patchable code, with the default asm parameters. */
-#define paravirt_alt(insn_string)					\
-	_paravirt_alt(insn_string, "%c[paravirt_typenum]",
"%c[paravirt_clobber]")
-
 /* Simple instruction patching code. */
 #define NATIVE_LABEL(a,x,b) "\n" a #x "_" #b
":\n\t"
 
@@ -388,13 +361,46 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 
 int paravirt_disable_iospace(void);
 
+
 /*
- * This generates an indirect call based on the operation type number.
- * The type number, computed in PARAVIRT_PATCH, is derived from the
- * offset into the paravirt_patch_template structure, and can therefore be
- * freely converted back into a structure offset.
+ * Generate some code, and mark it as patchable by apply_paravirt().
  */
-#define PARAVIRT_CALL	"call *%c[paravirt_opptr];"
+#define _PV_SITE(insn_string, type, clobber)				\
+	"771:\n\t" insn_string "\n" "772:\n"				\
+	".pushsection .parainstructions,\"a\"\n"			\
+	_ASM_ALIGN "\n"							\
+	_ASM_PTR " 771b\n"						\
+	"  .byte " type "\n"						\
+	"  .byte 772b-771b\n"						\
+	"  .short " clobber "\n"					\
+	".popsection\n"
+
+#define PARAVIRT_PATCH(x)						\
+	(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
+
+#define PV_STRINGIFY(constraint)	"%c[" __stringify(constraint)
"]"
+
+#define PV_CALL_CONSTRAINT		pv_op_ptr
+#define PV_TYPE_CONSTRAINT		pv_typenum
+#define PV_CLBR_CONSTRAINT		pv_clobber
+
+#define PV_CALL_CONSTRAINT_STR		PV_STRINGIFY(PV_CALL_CONSTRAINT)
+#define PV_TYPE_CONSTRAINT_STR		PV_STRINGIFY(PV_TYPE_CONSTRAINT)
+#define PV_CLBR_CONSTRAINT_STR		PV_STRINGIFY(PV_CLBR_CONSTRAINT)
+
+#define PV_CALL_STR			"call *" PV_CALL_CONSTRAINT_STR ";"
+
+#define PV_INPUT_CONSTRAINTS(op, clobber)				\
+	[PV_TYPE_CONSTRAINT] "i" (PARAVIRT_PATCH(op)),			\
+	[PV_CALL_CONSTRAINT] "i" (&(op)),				\
+	[PV_CLBR_CONSTRAINT] "i" (clobber)
+
+#define PV_SITE(insn_string)						\
+	_PV_SITE(insn_string, PV_TYPE_CONSTRAINT_STR, PV_CLBR_CONSTRAINT_STR)
+
+#define PV_ALT_SITE(oldinstr, newinstr)					\
+	_PV_ALT_SITE(oldinstr, newinstr, PV_TYPE_CONSTRAINT_STR,	\
+		     PV_CLBR_CONSTRAINT_STR)
 
 /*
  * These macros are intended to wrap calls through one of the paravirt
@@ -525,25 +531,24 @@ int paravirt_disable_iospace(void);
 
 #define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr,		\
 		      pre, post, ...)					\
-	({								\
-		rettype __ret;						\
-		PVOP_CALL_ARGS;						\
-		PVOP_TEST_NULL(op);					\
+({									\
+	rettype __ret;							\
+	PVOP_CALL_ARGS;							\
+	PVOP_TEST_NULL(op);						\
 		asm volatile(pre					\
-			     paravirt_alt(PARAVIRT_CALL)		\
+			     PV_SITE(PV_CALL_STR)			\
 			     post					\
 			     : call_clbr, ASM_CALL_CONSTRAINT		\
-			     : paravirt_type(op),			\
-			       paravirt_clobber(clbr),			\
+			     : PV_INPUT_CONSTRAINTS(op, clbr),		\
 			       ##__VA_ARGS__				\
 			     : "memory", "cc" extra_clbr);		\
-		if (IS_ENABLED(CONFIG_X86_32) &&			\
-		    sizeof(rettype) > sizeof(unsigned long))		\
-			__ret = (rettype)((((u64)__edx) << 32) | __eax);\
-		else							\
-			__ret = (rettype)(__eax & PVOP_RETMASK(rettype));\
-		__ret;							\
-	})
+	if (IS_ENABLED(CONFIG_X86_32) &&				\
+	    sizeof(rettype) > sizeof(unsigned long))			\
+		__ret = (rettype)((((u64)__edx) << 32) | __eax);	\
+	else								\
+		__ret = (rettype)(__eax & PVOP_RETMASK(rettype));	\
+	__ret;								\
+})
 
 #define __PVOP_CALL(rettype, op, pre, post, ...)			\
 	____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_OUTPUTS,		\
@@ -560,11 +565,10 @@ int paravirt_disable_iospace(void);
 		PVOP_VCALL_ARGS;					\
 		PVOP_TEST_NULL(op);					\
 		asm volatile(pre					\
-			     paravirt_alt(PARAVIRT_CALL)		\
+			     PV_SITE(PV_CALL_STR)			\
 			     post					\
 			     : call_clbr, ASM_CALL_CONSTRAINT		\
-			     : paravirt_type(op),			\
-			       paravirt_clobber(clbr),			\
+			     : PV_INPUT_CONSTRAINTS(op, clbr),		\
 			       ##__VA_ARGS__				\
 			     : "memory", "cc" extra_clbr);		\
 	})
@@ -671,7 +675,7 @@ u64 _paravirt_ident_64(u64);
 
 /* These all sit in the .parainstructions section to tell us what to patch. */
 struct paravirt_patch_site {
-	u8 *instr; 		/* original instructions */
+	u8 *instr;		/* original instructions */
 	u8 instrtype;		/* type of this instruction */
 	u8 len;			/* length of original instruction */
 	u16 clobbers;		/* what registers you may clobber */
-- 
2.13.6
Josh Poimboeuf
2017-Oct-04  15:59 UTC
[PATCH 09/13] x86/asm: Convert ALTERNATIVE*() assembler macros to preprocessor macros
The ALTERNATIVE() and ALTERNATIVE_2() macros are GNU assembler macros, which makes them quite inflexible for future changes. Convert them to preprocessor macros. Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com> --- arch/x86/entry/entry_32.S | 12 +++--- arch/x86/entry/entry_64.S | 10 ++--- arch/x86/entry/entry_64_compat.S | 8 ++-- arch/x86/entry/vdso/vdso32/system_call.S | 10 ++--- arch/x86/include/asm/alternative-asm.h | 68 +++++++++++++++----------------- arch/x86/include/asm/smap.h | 4 +- arch/x86/lib/copy_page_64.S | 2 +- arch/x86/lib/memcpy_64.S | 4 +- arch/x86/lib/memmove_64.S | 3 +- arch/x86/lib/memset_64.S | 4 +- 10 files changed, 59 insertions(+), 66 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 21d1197779a4..338dc838a9a8 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -443,8 +443,8 @@ ENTRY(entry_SYSENTER_32) movl %esp, %eax call do_fast_syscall_32 /* XEN PV guests always use IRET path */ - ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ - "jmp .Lsyscall_32_done", X86_FEATURE_XENPV + #define JMP_IF_IRET testl %eax, %eax; jz .Lsyscall_32_done + ALTERNATIVE(JMP_IF_IRET, jmp .Lsyscall_32_done, X86_FEATURE_XENPV) /* Opportunistic SYSEXIT */ TRACE_IRQS_ON /* User mode traces as IRQs on. */ @@ -536,7 +536,7 @@ restore_all: TRACE_IRQS_IRET .Lrestore_all_notrace: #ifdef CONFIG_X86_ESPFIX32 - ALTERNATIVE "jmp .Lrestore_nocheck", "", X86_BUG_ESPFIX + ALTERNATIVE(jmp .Lrestore_nocheck, , X86_BUG_ESPFIX) movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS /* @@ -692,9 +692,9 @@ ENTRY(simd_coprocessor_error) pushl $0 #ifdef CONFIG_X86_INVD_BUG /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ - ALTERNATIVE "pushl $do_general_protection", \ - "pushl $do_simd_coprocessor_error", \ - X86_FEATURE_XMM + ALTERNATIVE(pushl $do_general_protection, + pushl $do_simd_coprocessor_error, + X86_FEATURE_XMM) #else pushl $do_simd_coprocessor_error #endif diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index c7c85724d7e0..49733c72619a 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -925,7 +925,7 @@ ENTRY(native_load_gs_index) SWAPGS .Lgs_change: movl %edi, %gs -2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE +2: ALTERNATIVE(, mfence, X86_BUG_SWAPGS_FENCE) SWAPGS popfq FRAME_END @@ -938,12 +938,8 @@ EXPORT_SYMBOL(native_load_gs_index) /* running with kernelgs */ bad_gs: SWAPGS /* switch back to user gs */ -.macro ZAP_GS - /* This can't be a string because the preprocessor needs to see it. */ - movl $__USER_DS, %eax - movl %eax, %gs -.endm - ALTERNATIVE "", "ZAP_GS", X86_BUG_NULL_SEG + #define ZAP_GS movl $__USER_DS, %eax; movl %eax, %gs + ALTERNATIVE(, ZAP_GS, X86_BUG_NULL_SEG) xorl %eax, %eax movl %eax, %gs jmp 2b diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 4d9385529c39..16e82b5103b5 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -124,8 +124,8 @@ ENTRY(entry_SYSENTER_compat) movq %rsp, %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ - ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ - "jmp .Lsyscall_32_done", X86_FEATURE_XENPV + #define JMP_IF_IRET testl %eax, %eax; jz .Lsyscall_32_done + ALTERNATIVE(JMP_IF_IRET, jmp .Lsyscall_32_done, X86_FEATURE_XENPV) jmp sysret32_from_system_call .Lsysenter_fix_flags: @@ -224,8 +224,8 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) movq %rsp, %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ - ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ - "jmp .Lsyscall_32_done", X86_FEATURE_XENPV + ALTERNATIVE(JMP_IF_IRET, + jmp .Lsyscall_32_done, X86_FEATURE_XENPV) /* Opportunistic SYSRET */ sysret32_from_system_call: diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index ed4bc9731cbb..a0c5f9e8226c 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -48,15 +48,15 @@ __kernel_vsyscall: CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ebp, 0 - #define SYSENTER_SEQUENCE "movl %esp, %ebp; sysenter" - #define SYSCALL_SEQUENCE "movl %ecx, %ebp; syscall" + #define SYSENTER_SEQUENCE movl %esp, %ebp; sysenter + #define SYSCALL_SEQUENCE movl %ecx, %ebp; syscall #ifdef CONFIG_X86_64 /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */ - ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \ - SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32 + ALTERNATIVE_2(, SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, + SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32) #else - ALTERNATIVE "", SYSENTER_SEQUENCE, X86_FEATURE_SEP + ALTERNATIVE(, SYSENTER_SEQUENCE, X86_FEATURE_SEP) #endif /* Enter using int $0x80 */ diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index e7636bac7372..60073947350d 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -39,23 +39,21 @@ * @newinstr. ".skip" directive takes care of proper instruction padding * in case @newinstr is longer than @oldinstr. */ -.macro ALTERNATIVE oldinstr, newinstr, feature -140: - \oldinstr -141: - .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 -142: - - .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b - .popsection - - .pushsection .altinstr_replacement,"ax" -143: - \newinstr -144: +#define ALTERNATIVE(oldinstr, newinstr, feature) \ +140:; \ + oldinstr; \ +141:; \ + .skip -(((144f-143f)-(141b-140b)) > 0) * \ + ((144f-143f)-(141b-140b)),0x90; \ +142:; \ + .pushsection .altinstructions, "a"; \ + altinstruction_entry 140b,143f,feature,142b-140b,144f-143f,142b-141b;\ + .popsection; \ + .pushsection .altinstr_replacement, "ax"; \ +143:; \ + newinstr; \ +144:; \ .popsection -.endm #define old_len 141b-140b #define new_len1 144f-143f @@ -73,27 +71,25 @@ * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has * @feature2, it replaces @oldinstr with @feature2. */ -.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 -140: - \oldinstr -141: - .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ - (alt_max_short(new_len1, new_len2) - (old_len)),0x90 -142: - - .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b - altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b - .popsection - - .pushsection .altinstr_replacement,"ax" -143: - \newinstr1 -144: - \newinstr2 -145: +#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, \ + newinstr2, feature2) \ +140:; \ + oldinstr; \ +141:; \ + .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ + (alt_max_short(new_len1, new_len2) - (old_len)),0x90; \ +142:; \ + .pushsection .altinstructions, "a"; \ + altinstruction_entry 140b,143f,feature1,142b-140b,144f-143f,142b-141b; \ + altinstruction_entry 140b,144f,feature2,142b-140b,145f-144f,142b-141b; \ + .popsection; \ + .pushsection .altinstr_replacement, "ax"; \ +143:; \ + newinstr1; \ +144:; \ + newinstr2; \ +145:; \ .popsection -.endm #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index db333300bd4b..b1264cff8906 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -28,10 +28,10 @@ #ifdef CONFIG_X86_SMAP #define ASM_CLAC \ - ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP + ALTERNATIVE(, __ASM_CLAC, X86_FEATURE_SMAP) #define ASM_STAC \ - ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP + ALTERNATIVE(, __ASM_STAC, X86_FEATURE_SMAP) #else /* CONFIG_X86_SMAP */ diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index e8508156c99d..0611dee51760 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -13,7 +13,7 @@ */ ALIGN ENTRY(copy_page) - ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD + ALTERNATIVE(jmp copy_page_regs, , X86_FEATURE_REP_GOOD) movl $4096/8, %ecx rep movsq ret diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 9a53a06e5a3e..7ada0513864b 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -28,8 +28,8 @@ */ ENTRY(__memcpy) ENTRY(memcpy) - ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ - "jmp memcpy_erms", X86_FEATURE_ERMS + ALTERNATIVE_2(jmp memcpy_orig, , X86_FEATURE_REP_GOOD, + jmp memcpy_erms, X86_FEATURE_ERMS) movq %rdi, %rax movq %rdx, %rcx diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 15de86cd15b0..ca6c39effa2f 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -42,7 +42,8 @@ ENTRY(__memmove) jg 2f .Lmemmove_begin_forward: - ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS + #define ERMS_MOVSB_RET movq %rdx, %rcx; rep movsb; retq + ALTERNATIVE(, ERMS_MOVSB_RET, X86_FEATURE_ERMS) /* * movsq instruction have many startup latency diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 55b95db30a61..d86825a11724 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -26,8 +26,8 @@ ENTRY(__memset) * * Otherwise, use original memset function. */ - ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ - "jmp memset_erms", X86_FEATURE_ERMS + ALTERNATIVE_2(jmp memset_orig, , X86_FEATURE_REP_GOOD, + jmp memset_erms, X86_FEATURE_ERMS) movq %rdi,%r9 movq %rdx,%rcx -- 2.13.6
Josh Poimboeuf
2017-Oct-04  15:59 UTC
[PATCH 10/13] x86/alternative: Support indirect call replacement
Add alternative patching support for replacing an instruction with an
indirect call.  This will be needed for the paravirt alternatives.
Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>
---
 arch/x86/kernel/alternative.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 3344d3382e91..81c577c7deba 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -410,20 +410,28 @@ void __init_or_module noinline apply_alternatives(struct
alt_instr *start,
 		insnbuf_sz = a->replacementlen;
 
 		/*
-		 * 0xe8 is a relative jump; fix the offset.
-		 *
-		 * Instruction length is checked before the opcode to avoid
-		 * accessing uninitialized bytes for zero-length replacements.
+		 * Fix the address offsets for call and jump instructions which
+		 * use PC-relative addressing.
 		 */
 		if (a->replacementlen == 5 && *insnbuf == 0xe8) {
+			/* direct call */
 			*(s32 *)(insnbuf + 1) += replacement - instr;
-			DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
+			DPRINTK("Fix direct CALL offset: 0x%x, CALL 0x%lx",
 				*(s32 *)(insnbuf + 1),
 				(unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
-		}
 
-		if (a->replacementlen && is_jmp(replacement[0]))
+		} else if (a->replacementlen == 6 && *insnbuf == 0xff &&
+			   *(insnbuf+1) == 0x15) {
+			/* indirect call */
+			*(s32 *)(insnbuf + 2) += replacement - instr;
+			DPRINTK("Fix indirect CALL offset: 0x%x, CALL *0x%lx",
+				*(s32 *)(insnbuf + 2),
+				(unsigned long)instr + *(s32 *)(insnbuf + 2) + 6);
+
+		} else if (a->replacementlen && is_jmp(replacement[0])) {
+			/* direct jump */
 			recompute_jump(a, instr, replacement, insnbuf);
+		}
 
 		if (a->instrlen > a->replacementlen) {
 			add_nops(insnbuf + a->replacementlen,
-- 
2.13.6
Josh Poimboeuf
2017-Oct-04  15:59 UTC
[PATCH 11/13] x86/paravirt: Add paravirt alternatives infrastructure
With CONFIG_PARAVIRT, the kernel .text is littered with a bunch of calls
to pv_irq_ops function pointers, like:
  callq  *0xffffffff81e3a400 (pv_irq_ops.save_fl)
In non-Xen paravirt environments -- including native, KVM, Hyper-V, and
VMware -- the above code gets patched by native_patch() to look like
this instead:
   pushfq
   pop    %rax
   nopl   0x0(%rax,%rax,1)
So in most scenarios, there's a mismatch between what vmlinux shows and
the actual runtime code.  This mismatch hurts debuggability and makes
the assembly code harder to understand.
It also causes the ORC unwinder to produce KASAN warnings like:
  BUG: KASAN: stack-out-of-bounds in deref_stack_reg+0x123/0x140
This warning is due to the fact that objtool doesn't know about
parainstructions, so it doesn't know about the "pushfq; pop %rax"
sequence above.
Prepare to fix both of these issues (debuggability and ORC KASAN
warnings) by adding a paravirt alternatives infrastructure to put the
native instructions in .text by default.  Then, when booting on a
hypervisor, replace the native instructions with pv ops calls.
The pv ops calls need to be available much earlier than when
alternatives are normally applied.  So put these alternatives in a
dedicated ".pv_alternatives" section.
So now these instructions may be patched twice:
- in apply_pv_alternatives(), to allow the kernel to boot in the
  virtualized environment;
- and again in apply_paravirt(), to enable performance improvements
  (e.g., replacing an indirect call with a direct call).
That's a bit more complex, but overall this approach should cause less
confusion than before because the vmlinux code is now much more likely
to represent the actual runtime state of the code in the most common
paravirt cases (everything except Xen and vSMP).
It could be simplified by redesigning the paravirt patching code such
that it uses alternatives for all of its patching.  Instead of using pv
ops to specify which functions they need, they would instead set CPU
feature bits, which would then be used by the alternatives to decide
what to replace the native code with.  Then each site would only be
patched once.
But that's going to be a bit more work.  At least this patch creates a
good foundation for eventually getting rid of .parainstructions and pv
ops completely.
Suggested-by: Andy Lutomirski <luto at kernel.org>
Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>
---
 arch/x86/include/asm/alternative-asm.h |  9 +++-
 arch/x86/include/asm/alternative.h     | 12 +++--
 arch/x86/include/asm/cpufeatures.h     |  1 +
 arch/x86/include/asm/paravirt-asm.h    | 10 ++++
 arch/x86/include/asm/paravirt_types.h  | 84 ++++++++++++++++++++++++++++++++++
 arch/x86/kernel/alternative.c          | 13 ++++++
 arch/x86/kernel/cpu/hypervisor.c       |  2 +
 arch/x86/kernel/module.c               | 11 ++++-
 arch/x86/kernel/vmlinux.lds.S          |  6 +++
 arch/x86/xen/enlighten_pv.c            |  1 +
 10 files changed, 141 insertions(+), 8 deletions(-)
diff --git a/arch/x86/include/asm/alternative-asm.h
b/arch/x86/include/asm/alternative-asm.h
index 60073947350d..0ced2e3d0a30 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -39,14 +39,14 @@
  * @newinstr. ".skip" directive takes care of proper instruction
padding
  * in case @newinstr is longer than @oldinstr.
  */
-#define ALTERNATIVE(oldinstr, newinstr, feature)			\
+#define __ALTERNATIVE(section, oldinstr, newinstr, feature)		\
 140:;									\
 	oldinstr;							\
 141:;									\
 	.skip -(((144f-143f)-(141b-140b)) > 0) *			\
 		((144f-143f)-(141b-140b)),0x90;				\
 142:;									\
-	.pushsection .altinstructions, "a";				\
+	.pushsection section, "a";					\
 	altinstruction_entry 140b,143f,feature,142b-140b,144f-143f,142b-141b;\
 	.popsection;							\
 	.pushsection .altinstr_replacement, "ax";			\
@@ -55,6 +55,11 @@
 144:;									\
 	.popsection
 
+#define ARGS(args...) args
+
+#define ALTERNATIVE(oldinstr, newinstr, feature)			\
+	__ALTERNATIVE(.altinstructions, ARGS(oldinstr), ARGS(newinstr), feature)
+
 #define old_len			141b-140b
 #define new_len1		144f-143f
 #define new_len2		145f-144f
diff --git a/arch/x86/include/asm/alternative.h
b/arch/x86/include/asm/alternative.h
index c096624137ae..8482f90d5078 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -61,6 +61,7 @@ extern int alternatives_patched;
 
 extern void alternative_instructions(void);
 extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+extern void apply_pv_alternatives(void);
 
 struct module;
 
@@ -132,14 +133,17 @@ static inline int alternatives_text_reserved(void *start,
void *end)
 	b_replacement(num)":\n\t" newinstr "\n" e_replacement(num)
":\n\t"
 
 /* alternative assembly primitive: */
-#define ALTERNATIVE(oldinstr, newinstr, feature)			\
+#define __ALTERNATIVE(section, oldinstr, newinstr, feature)		\
 	OLDINSTR(oldinstr, 1)						\
-	".pushsection .altinstructions,\"a\"\n"				\
+	".pushsection " section ",\"a\"\n"				\
 	ALTINSTR_ENTRY(feature, 1)					\
 	".popsection\n"							\
 	".pushsection .altinstr_replacement, \"ax\"\n"			\
 	ALTINSTR_REPLACEMENT(newinstr, feature, 1)			\
-	".popsection"
+	".popsection\n"
+
+#define ALTERNATIVE(oldinstr, newinstr, feature)			\
+	__ALTERNATIVE(".altinstructions", oldinstr, newinstr, feature)
 
 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
 	OLDINSTR_2(oldinstr, 1, 2)					\
@@ -150,7 +154,7 @@ static inline int alternatives_text_reserved(void *start,
void *end)
 	".pushsection .altinstr_replacement, \"ax\"\n"			\
 	ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)			\
 	ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)			\
-	".popsection"
+	".popsection\n"
 
 /*
  * Alternative instructions for different CPU types or capabilities.
diff --git a/arch/x86/include/asm/cpufeatures.h
b/arch/x86/include/asm/cpufeatures.h
index 2519c6c801c9..1be45a2fc00d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -214,6 +214,7 @@
 
 #define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
 #define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual
guest */
+#define X86_FEATURE_PV_OPS      ( 8*32+17) /* Use pv ops alternatives */
 
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
diff --git a/arch/x86/include/asm/paravirt-asm.h
b/arch/x86/include/asm/paravirt-asm.h
index 8bdd50ee4bf3..a8139ea27cc1 100644
--- a/arch/x86/include/asm/paravirt-asm.h
+++ b/arch/x86/include/asm/paravirt-asm.h
@@ -21,6 +21,16 @@
 	 .short clobbers;						\
 	.popsection
 
+#define PV_ALT_SITE(oldinstr, newinstr, ops, off, clobbers)		\
+	__ALTERNATIVE(.pv_altinstructions, oldinstr, newinstr,		\
+		      X86_FEATURE_PV_OPS);				\
+	.pushsection .parainstructions, "a";				\
+	_ASM_ALIGN;							\
+	_ASM_PTR 140b;							\
+	.byte PV_TYPE(ops, off);					\
+	.byte 142b-140b;						\
+	.short clobbers;						\
+	.popsection
 
 #define COND_PUSH(set, mask, reg)			\
 	.if ((~(set)) & mask); push %reg; .endif
diff --git a/arch/x86/include/asm/paravirt_types.h
b/arch/x86/include/asm/paravirt_types.h
index 5656aea79412..b3a73d6d8908 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -375,6 +375,33 @@ int paravirt_disable_iospace(void);
 	"  .short " clobber "\n"					\
 	".popsection\n"
 
+/*
+ * Generate some native code, which, if running on a hypervisor, is replaced
+ * *twice*:
+ *
+ * - The first patch is done in early boot by apply_pv_alternatives(), to
+ *   enable the patch to boot in the virtualized environment.  It replaces the
+ *   native code with a call to the pv ops struct function pointer.
+ *
+ * - The second patch is done later by apply_paravirt(), for performance
+ *   reasons.  In most cases it converts the indirect call to a direct call in
+ *   order to improve CPU branch prediction.
+ *
+ * This is done for debugging improvement purposes, so that instructions listed
+ * in the kernel disassembly will match up with the most common runtime case
+ * (native instructions).
+ */
+#define _PV_ALT_SITE(oldinstr, newinstr, type, clobber)			\
+	__ALTERNATIVE(".pv_altinstructions", oldinstr, newinstr,	\
+		      X86_FEATURE_PV_OPS)				\
+	".pushsection .parainstructions,\"a\"\n"			\
+	_ASM_ALIGN "\n"							\
+	_ASM_PTR " 661b\n"						\
+	".byte " type "\n"						\
+	".byte " alt_total_slen "\n"					\
+	".short " clobber "\n"						\
+	".popsection\n"							\
+
 #define PARAVIRT_PATCH(x)						\
 	(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
 
@@ -559,6 +586,33 @@ int paravirt_disable_iospace(void);
 		      PVOP_CALLEE_OUTPUTS, ,				\
 		      pre, post, ##__VA_ARGS__)
 
+#define ____PVOP_ALT_CALL(rettype, native, op, clbr, call_clbr,	\
+			     extra_clbr, ...)				\
+({									\
+	rettype __ret;							\
+	PVOP_CALL_ARGS;							\
+	PVOP_TEST_NULL(op);						\
+	asm volatile(PV_ALT_SITE(native, PV_CALL_STR)			\
+		     : call_clbr, ASM_CALL_CONSTRAINT			\
+		     : PV_INPUT_CONSTRAINTS(op, clbr),			\
+		       ##__VA_ARGS__					\
+		     : "memory", "cc" extra_clbr);			\
+	if (IS_ENABLED(CONFIG_X86_32) &&				\
+	    sizeof(rettype) > sizeof(unsigned long))			\
+		__ret = (rettype)((((u64)__edx) << 32) | __eax);	\
+	else								\
+		__ret = (rettype)(__eax & PVOP_RETMASK(rettype));	\
+	__ret;								\
+})
+
+#define __PVOP_ALT_CALL(rettype, native, op, ...)			\
+	____PVOP_ALT_CALL(rettype, native, op, CLBR_ANY,		\
+			     PVOP_CALL_OUTPUTS, EXTRA_CLOBBERS,		\
+			     ##__VA_ARGS__)
+
+#define __PVOP_ALT_CALLEESAVE(rettype, native, op, ...)			\
+	____PVOP_ALT_CALL(rettype, native, op.func, CLBR_RET_REG,	\
+			     PVOP_CALLEE_OUTPUTS, , ##__VA_ARGS__)
 
 #define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...)	\
 	({								\
@@ -583,28 +637,58 @@ int paravirt_disable_iospace(void);
 		      PVOP_VCALLEE_OUTPUTS, ,				\
 		      pre, post, ##__VA_ARGS__)
 
+#define ____PVOP_ALT_VCALL(native, op, clbr, call_clbr, extra_clbr,	\
+			      ...)					\
+({									\
+	PVOP_VCALL_ARGS;						\
+	PVOP_TEST_NULL(op);						\
+	asm volatile(PV_ALT_SITE(native, PV_CALL_STR)			\
+		     : call_clbr, ASM_CALL_CONSTRAINT			\
+		     : PV_INPUT_CONSTRAINTS(op, clbr),			\
+		       ##__VA_ARGS__					\
+		     : "memory", "cc" extra_clbr);			\
+})
+
+#define __PVOP_ALT_VCALL(native, op, ...)				\
+	____PVOP_ALT_VCALL(native, op, CLBR_ANY,			\
+			      PVOP_VCALL_OUTPUTS, VEXTRA_CLOBBERS,	\
+			      ##__VA_ARGS__)
+
+#define __PVOP_ALT_VCALLEESAVE(native, op, ...)				\
+	____PVOP_ALT_VCALL(native, op.func, CLBR_RET_REG,		\
+			      PVOP_VCALLEE_OUTPUTS, , ##__VA_ARGS__)
 
 
 #define PVOP_CALL0(rettype, op)						\
 	__PVOP_CALL(rettype, op, "", "")
+#define PVOP_ALT_CALL0(rettype, native, op)				\
+	__PVOP_ALT_CALL(rettype, native, op)
 #define PVOP_VCALL0(op)							\
 	__PVOP_VCALL(op, "", "")
 
 #define PVOP_CALLEE0(rettype, op)					\
 	__PVOP_CALLEESAVE(rettype, op, "", "")
+#define PVOP_ALT_CALLEE0(rettype, native, op)				\
+	__PVOP_ALT_CALLEESAVE(rettype, native, op)
 #define PVOP_VCALLEE0(op)						\
 	__PVOP_VCALLEESAVE(op, "", "")
+#define PVOP_ALT_VCALLEE0(native, op)					\
+	__PVOP_ALT_VCALLEESAVE(native, op)
 
 
 #define PVOP_CALL1(rettype, op, arg1)					\
 	__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
 #define PVOP_VCALL1(op, arg1)						\
 	__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_VCALL1(native, op, arg1)				\
+	__PVOP_ALT_VCALL(native, op, PVOP_CALL_ARG1(arg1))
 
 #define PVOP_CALLEE1(rettype, op, arg1)					\
 	__PVOP_CALLEESAVE(rettype, op, "", "",
PVOP_CALL_ARG1(arg1))
 #define PVOP_VCALLEE1(op, arg1)						\
 	__PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_VCALLEE1(native, op, arg1)				\
+	__PVOP_ALT_VCALLEESAVE(native, op, PVOP_CALL_ARG1(arg1))
 
 
 #define PVOP_CALL2(rettype, op, arg1, arg2)				\
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 81c577c7deba..2d13c1af76ac 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -21,6 +21,7 @@
 #include <asm/tlbflush.h>
 #include <asm/io.h>
 #include <asm/fixmap.h>
+#include <asm/cpufeature.h>
 
 int __read_mostly alternatives_patched;
 
@@ -269,6 +270,7 @@ static void __init_or_module add_nops(void *insns, unsigned
int len)
 }
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+extern struct alt_instr __pv_alt_instructions[], __pv_alt_instructions_end[];
 extern s32 __smp_locks[], __smp_locks_end[];
 void *text_poke_early(void *addr, const void *opcode, size_t len);
 
@@ -598,6 +600,17 @@ int alternatives_text_reserved(void *start, void *end)
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_PARAVIRT
+/*
+ * Paravirt alternatives are applied much earlier than normal alternatives.
+ * They are only applied when running on a hypervisor.  They replace some
+ * native instructions with calls to pv ops.
+ */
+void __init apply_pv_alternatives(void)
+{
+	setup_force_cpu_cap(X86_FEATURE_PV_OPS);
+	apply_alternatives(__pv_alt_instructions, __pv_alt_instructions_end);
+}
+
 void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
 				     struct paravirt_patch_site *end)
 {
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 4fa90006ac68..17243fe0f5ce 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -71,6 +71,8 @@ void __init init_hypervisor_platform(void)
 	if (!x86_hyper)
 		return;
 
+	apply_pv_alternatives();
+
 	if (x86_hyper->init_platform)
 		x86_hyper->init_platform();
 }
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 62e7d70aadd5..34ec137e302a 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -213,8 +213,8 @@ int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
 {
-	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
-		*para = NULL, *orc = NULL, *orc_ip = NULL;
+	const Elf_Shdr *s, *text = NULL, *alt = NULL, *pv_alt = NULL,
+		*locks = NULL, *para = NULL, *orc = NULL, *orc_ip = NULL;
 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 
 	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@@ -222,6 +222,8 @@ int module_finalize(const Elf_Ehdr *hdr,
 			text = s;
 		if (!strcmp(".altinstructions", secstrings + s->sh_name))
 			alt = s;
+		if (!strcmp(".pv_altinstructions", secstrings + s->sh_name))
+			pv_alt = s;
 		if (!strcmp(".smp_locks", secstrings + s->sh_name))
 			locks = s;
 		if (!strcmp(".parainstructions", secstrings + s->sh_name))
@@ -237,6 +239,11 @@ int module_finalize(const Elf_Ehdr *hdr,
 		void *aseg = (void *)alt->sh_addr;
 		apply_alternatives(aseg, aseg + alt->sh_size);
 	}
+	if (pv_alt) {
+		/* patch .altinstructions */
+		void *seg = (void *)pv_alt->sh_addr;
+		apply_alternatives(seg, seg + pv_alt->sh_size);
+	}
 	if (locks && text) {
 		void *lseg = (void *)locks->sh_addr;
 		void *tseg = (void *)text->sh_addr;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index f05f00acac89..94537de39109 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -250,6 +250,12 @@ SECTIONS
 		*(.altinstructions)
 		__alt_instructions_end = .;
 	}
+	. = ALIGN(8);
+	.pv_altinstructions : AT(ADDR(.pv_altinstructions) - LOAD_OFFSET) {
+		__pv_alt_instructions = .;
+		*(.pv_altinstructions)
+		__pv_alt_instructions_end = .;
+	}
 
 	/*
 	 * And here are the replacement instructions. The linker sticks
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index c0cb5c2bfd92..874953d8c360 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1224,6 +1224,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	pv_info = xen_info;
 	pv_init_ops.patch = paravirt_patch_default;
 	pv_cpu_ops = xen_cpu_ops;
+	apply_pv_alternatives();
 
 	x86_platform.get_nmi_reason = xen_get_nmi_reason;
 
-- 
2.13.6
Josh Poimboeuf
2017-Oct-04  15:59 UTC
[PATCH 12/13] objtool: Add support for new .pv_altinstructions section
Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>
---
 tools/objtool/special.c | 10 ++++++++++
 1 file changed, 10 insertions(+)
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 84f001d52322..dc15a3564fc9 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -63,6 +63,16 @@ struct special_entry entries[] = {
 		.feature = ALT_FEATURE_OFFSET,
 	},
 	{
+		.sec = ".pv_altinstructions",
+		.group = true,
+		.size = ALT_ENTRY_SIZE,
+		.orig = ALT_ORIG_OFFSET,
+		.orig_len = ALT_ORIG_LEN_OFFSET,
+		.new = ALT_NEW_OFFSET,
+		.new_len = ALT_NEW_LEN_OFFSET,
+		.feature = ALT_FEATURE_OFFSET,
+	},
+	{
 		.sec = "__jump_table",
 		.jump_or_nop = true,
 		.size = JUMP_ENTRY_SIZE,
-- 
2.13.6
Josh Poimboeuf
2017-Oct-04  15:59 UTC
[PATCH 13/13] x86/paravirt: Convert natively patched pv ops to use paravirt alternatives
Now that the paravirt alternatives infrastructure is in place, use it
for all natively patched pv ops.
This fixes KASAN warnings in the ORC unwinder like the following:
  BUG: KASAN: stack-out-of-bounds in deref_stack_reg+0x123/0x140
This also improves debuggability by making vmlinux more likely to match
reality.
Reported-by: Sasha Levin <alexander.levin at verizon.com>
Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>
---
 arch/x86/include/asm/paravirt-asm.h | 23 +++++++++++++----------
 arch/x86/include/asm/paravirt.h     | 37 +++++++++++++++++++++----------------
 2 files changed, 34 insertions(+), 26 deletions(-)
diff --git a/arch/x86/include/asm/paravirt-asm.h
b/arch/x86/include/asm/paravirt-asm.h
index a8139ea27cc1..b051f9254ace 100644
--- a/arch/x86/include/asm/paravirt-asm.h
+++ b/arch/x86/include/asm/paravirt-asm.h
@@ -86,16 +86,18 @@
 		pv_cpu_ops, PV_CPU_iret, CLBR_NONE)
 
 #define DISABLE_INTERRUPTS(clobbers)					\
-	PV_SITE(PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
-		call PV_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);	\
-		PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE),		\
-		pv_irq_ops, PV_IRQ_irq_disable, clobbers)
+	PV_ALT_SITE(cli,						\
+		    PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
+		    call PV_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);	\
+		    PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE),	\
+		    pv_irq_ops, PV_IRQ_irq_disable, clobbers)
 
 #define ENABLE_INTERRUPTS(clobbers)					\
-	PV_SITE(PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
-		call PV_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);		\
-		PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE),		\
-		pv_irq_ops, PV_IRQ_irq_enable, clobbers)
+	PV_ALT_SITE(sti,						\
+		    PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
+		    call PV_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);	\
+		    PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE),	\
+		    pv_irq_ops, PV_IRQ_irq_enable, clobbers)
 
 #ifdef CONFIG_X86_32
 
@@ -128,8 +130,9 @@
 	call PV_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
 
 #define USERGS_SYSRET64							\
-	PV_SITE(jmp PV_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64),	\
-		pv_cpu_ops, PV_CPU_usergs_sysret64, CLBR_NONE)
+	PV_ALT_SITE(swapgs; sysret,					\
+		    jmp PV_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64),	\
+		    pv_cpu_ops, PV_CPU_usergs_sysret64, CLBR_NONE)
 
 #endif	/* !CONFIG_X86_32 */
 
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index bfd02c3335cb..4216a3b02832 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -13,6 +13,7 @@
 #include <asm/frame.h>
 #include <asm/pgtable_types.h>
 #include <asm/paravirt_types.h>
+#include <asm/special_insns.h>
 
 static inline void load_sp0(struct tss_struct *tss,
 			     struct thread_struct *thread)
@@ -50,9 +51,10 @@ static inline void write_cr0(unsigned long x)
 	PVOP_VCALL1(pv_cpu_ops.write_cr0, x);
 }
 
-static inline unsigned long read_cr2(void)
+static __always_inline unsigned long read_cr2(void)
 {
-	return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2);
+	return PVOP_ALT_CALL0(unsigned long, NATIVE_READ_CR2,
+			      pv_mmu_ops.read_cr2);
 }
 
 static inline void write_cr2(unsigned long x)
@@ -60,14 +62,15 @@ static inline void write_cr2(unsigned long x)
 	PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
 }
 
-static inline unsigned long __read_cr3(void)
+static __always_inline unsigned long __read_cr3(void)
 {
-	return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
+	return PVOP_ALT_CALL0(unsigned long, NATIVE_READ_CR3,
+			      pv_mmu_ops.read_cr3);
 }
 
-static inline void write_cr3(unsigned long x)
+static __always_inline void write_cr3(unsigned long x)
 {
-	PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
+	PVOP_ALT_VCALL1(NATIVE_WRITE_CR3, pv_mmu_ops.write_cr3, x);
 }
 
 static inline void __write_cr4(unsigned long x)
@@ -291,9 +294,10 @@ static inline void __flush_tlb_global(void)
 {
 	PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
 }
-static inline void __flush_tlb_single(unsigned long addr)
+static __always_inline void __flush_tlb_single(unsigned long addr)
 {
-	PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
+	PVOP_ALT_VCALL1(NATIVE_FLUSH_TLB_SINGLE, pv_mmu_ops.flush_tlb_single,
+			addr);
 }
 
 static inline void flush_tlb_others(const struct cpumask *cpumask,
@@ -761,24 +765,25 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu)
 #define __PV_IS_CALLEE_SAVE(func)			\
 	((struct paravirt_callee_save) { func })
 
-static inline notrace unsigned long arch_local_save_flags(void)
+static __always_inline unsigned long arch_local_save_flags(void)
 {
-	return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
+	return PVOP_ALT_CALLEE0(unsigned long, NATIVE_SAVE_FL,
+				pv_irq_ops.save_fl);
 }
 
-static inline notrace void arch_local_irq_restore(unsigned long f)
+static __always_inline void arch_local_irq_restore(unsigned long f)
 {
-	PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
+	PVOP_ALT_VCALLEE1(NATIVE_RESTORE_FL, pv_irq_ops.restore_fl, f);
 }
 
-static inline notrace void arch_local_irq_disable(void)
+static __always_inline void arch_local_irq_disable(void)
 {
-	PVOP_VCALLEE0(pv_irq_ops.irq_disable);
+	PVOP_ALT_VCALLEE0(NATIVE_IRQ_DISABLE, pv_irq_ops.irq_disable);
 }
 
-static inline notrace void arch_local_irq_enable(void)
+static __always_inline void arch_local_irq_enable(void)
 {
-	PVOP_VCALLEE0(pv_irq_ops.irq_enable);
+	PVOP_ALT_VCALLEE0(NATIVE_IRQ_ENABLE, pv_irq_ops.irq_enable);
 }
 
 static inline notrace unsigned long arch_local_irq_save(void)
-- 
2.13.6
Boris Ostrovsky
2017-Oct-05  20:35 UTC
[PATCH 11/13] x86/paravirt: Add paravirt alternatives infrastructure
> #ifdef CONFIG_PARAVIRT > +/* > + * Paravirt alternatives are applied much earlier than normal alternatives. > + * They are only applied when running on a hypervisor. They replace some > + * native instructions with calls to pv ops. > + */ > +void __init apply_pv_alternatives(void) > +{ > + setup_force_cpu_cap(X86_FEATURE_PV_OPS);Not for Xen HVM guests.> + apply_alternatives(__pv_alt_instructions, __pv_alt_instructions_end); > +}This is a problem (at least for Xen PV guests): apply_alternatives()->text_poke_early()->local_irq_save()->...'cli'->death. It might be possible not to turn off/on the interrupts in this particular case since the guest probably won't be able to handle an interrupt at this point anyway.> + > void __init_or_module apply_paravirt(struct paravirt_patch_site *start, > struct paravirt_patch_site *end) > { > diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c > index 4fa90006ac68..17243fe0f5ce 100644 > --- a/arch/x86/kernel/cpu/hypervisor.c > +++ b/arch/x86/kernel/cpu/hypervisor.c > @@ -71,6 +71,8 @@ void __init init_hypervisor_platform(void) > if (!x86_hyper) > return; > > + apply_pv_alternatives();Not for Xen PV guests who have already done this. -boris> + > if (x86_hyper->init_platform) > x86_hyper->init_platform(); > } >
Vitaly Kuznetsov
2017-Oct-06  07:35 UTC
[Xen-devel] [PATCH 00/13] x86/paravirt: Make pv ops code generation more closely match reality
Josh Poimboeuf <jpoimboe at redhat.com> writes:> - For the most common runtime cases (everything except Xen and vSMP), > vmlinux disassembly now matches what the actual runtime code looks > like. This improves debuggability and kernel developer sanity (a > precious resource). > > ... > > - It's hopefully a first step in simplifying paravirt patching by > getting rid of .parainstructions, pv ops, and apply_paravirt() > completely. (I think Xen can be changed to set CPU feature bits to > specify which ops it needs during early boot, then those ops can be > patched in using early alternatives.)JFYI starting 4.14 Xen PV is not the only user of pv_mmu_ops, Hyper-V uses it for TLB shootdown now. -- Vitaly
Josh Poimboeuf
2017-Oct-06  14:36 UTC
[Xen-devel] [PATCH 00/13] x86/paravirt: Make pv ops code generation more closely match reality
On Fri, Oct 06, 2017 at 09:35:16AM +0200, Vitaly Kuznetsov wrote:> Josh Poimboeuf <jpoimboe at redhat.com> writes: > > > - For the most common runtime cases (everything except Xen and vSMP), > > vmlinux disassembly now matches what the actual runtime code looks > > like. This improves debuggability and kernel developer sanity (a > > precious resource). > > > > ... > > > > - It's hopefully a first step in simplifying paravirt patching by > > getting rid of .parainstructions, pv ops, and apply_paravirt() > > completely. (I think Xen can be changed to set CPU feature bits to > > specify which ops it needs during early boot, then those ops can be > > patched in using early alternatives.) > > JFYI starting 4.14 Xen PV is not the only user of pv_mmu_ops, Hyper-V > uses it for TLB shootdown now.Yeah, I saw that. It should be fine because the pv_alternatives get patched before the Hyper-V code sets up pv_mmu_ops. -- Josh
Juergen Gross
2017-Oct-24  13:17 UTC
[PATCH 01/13] x86/paravirt: remove wbinvd() paravirt interface
On 04/10/17 17:58, Josh Poimboeuf wrote:> Since lguest was removed, only the native version of wbinvd() is used. > The paravirt interface is no longer needed. > > Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>Reviewed-by: Juergen Gross <jgross at suse.com> Juergen
Juergen Gross
2017-Oct-25  09:33 UTC
[PATCH 02/13] x86/paravirt: Fix output constraint macro names
On 04/10/17 17:58, Josh Poimboeuf wrote:> Some of the paravirt '*_CLOBBERS' macros refer to output constraints > instead of clobbers, which makes the code extra confusing. Rename the > output constraint related macros to '*_OUTPUTS'. > > Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>I'm fine with the changes, but you might want to rename the "call_clbr" parameter of ____PVOP_[V]CALL, too, e.g. to "outputs". You could then drop the "CALL_" from the macros, too. Juergen
Juergen Gross
2017-Oct-25  09:46 UTC
[PATCH 03/13] x86/paravirt: Convert native patch assembly code strings to macros
On 04/10/17 17:58, Josh Poimboeuf wrote:> Convert the hard-coded native patch assembly code strings to macros to > facilitate sharing common code between 32-bit and 64-bit. > > These macros will also be used by a future patch which requires the GCC > extended asm syntax of two '%' characters instead of one when specifying > a register name. > > Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>Reviewed-by: Juergen Gross <jgross at suse.com> Mind adding another patch to merge the now nearly identical paravirt_patch_32.c and paravirt_patch_64.c either into paravirt.c or paravirt_patch.c? This would require only very few #ifdef. Juergen> --- > arch/x86/include/asm/special_insns.h | 24 ++++++++++++++++++++++++ > arch/x86/kernel/paravirt_patch_32.c | 21 +++++++++++---------- > arch/x86/kernel/paravirt_patch_64.c | 29 +++++++++++++++-------------- > 3 files changed, 50 insertions(+), 24 deletions(-) > > diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h > index ac402c6fc24b..0549c5f2c1b3 100644 > --- a/arch/x86/include/asm/special_insns.h > +++ b/arch/x86/include/asm/special_insns.h > @@ -6,6 +6,30 @@ > > #include <asm/nops.h> > > +#ifdef CONFIG_X86_64 > +# define _REG_ARG1 "%rdi" > +# define NATIVE_IDENTITY_32 "mov %edi, %eax" > +# define NATIVE_USERGS_SYSRET64 "swapgs; sysretq" > +#else > +# define _REG_ARG1 "%eax" > +#endif > + > +#define _REG_RET "%" _ASM_AX > + > +#define NATIVE_ZERO "xor " _REG_ARG1 ", " _REG_ARG1 > +#define NATIVE_IDENTITY "mov " _REG_ARG1 ", " _REG_RET > +#define NATIVE_SAVE_FL "pushf; pop " _REG_RET > +#define NATIVE_RESTORE_FL "push " _REG_ARG1 "; popf" > +#define NATIVE_IRQ_DISABLE "cli" > +#define NATIVE_IRQ_ENABLE "sti" > +#define NATIVE_READ_CR2 "mov %cr2, " _REG_RET > +#define NATIVE_READ_CR3 "mov %cr3, " _REG_RET > +#define NATIVE_WRITE_CR3 "mov " _REG_ARG1 ", %cr3" > +#define NATIVE_FLUSH_TLB_SINGLE "invlpg (" _REG_ARG1 ")" > +#define NATIVE_SWAPGS "swapgs" > +#define NATIVE_IRET "iret" > +#define NATIVE_QUEUED_SPIN_UNLOCK "movb $0, (" _REG_ARG1 ")" > + > /* > * Volatile isn't enough to prevent the compiler from reordering the > * read/write functions for the control registers and messing everything up. > diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c > index 553acbbb4d32..c9c6106ae714 100644 > --- a/arch/x86/kernel/paravirt_patch_32.c > +++ b/arch/x86/kernel/paravirt_patch_32.c > @@ -1,17 +1,18 @@ > #include <asm/paravirt.h> > +#include <asm/special_insns.h> > > -DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); > -DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); > -DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); > -DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); > -DEF_NATIVE(pv_cpu_ops, iret, "iret"); > -DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); > -DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); > -DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); > +DEF_NATIVE(pv_irq_ops, save_fl, NATIVE_SAVE_FL); > +DEF_NATIVE(pv_irq_ops, restore_fl, NATIVE_RESTORE_FL); > +DEF_NATIVE(pv_irq_ops, irq_disable, NATIVE_IRQ_DISABLE); > +DEF_NATIVE(pv_irq_ops, irq_enable, NATIVE_IRQ_ENABLE); > +DEF_NATIVE(pv_cpu_ops, iret, NATIVE_IRET); > +DEF_NATIVE(pv_mmu_ops, read_cr2, NATIVE_READ_CR2); > +DEF_NATIVE(pv_mmu_ops, read_cr3, NATIVE_READ_CR3); > +DEF_NATIVE(pv_mmu_ops, write_cr3, NATIVE_WRITE_CR3); > > #if defined(CONFIG_PARAVIRT_SPINLOCKS) > -DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); > -DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax"); > +DEF_NATIVE(pv_lock_ops, queued_spin_unlock, NATIVE_QUEUED_SPIN_UNLOCK); > +DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, NATIVE_ZERO); > #endif > > unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) > diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c > index 0a1ba3f80cbf..0aa232edd670 100644 > --- a/arch/x86/kernel/paravirt_patch_64.c > +++ b/arch/x86/kernel/paravirt_patch_64.c > @@ -1,25 +1,26 @@ > #include <asm/paravirt.h> > #include <asm/asm-offsets.h> > +#include <asm/special_insns.h> > #include <linux/stringify.h> > > -DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); > -DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); > -DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq"); > -DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); > -DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); > -DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); > -DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); > -DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); > +DEF_NATIVE(pv_irq_ops, save_fl, NATIVE_SAVE_FL); > +DEF_NATIVE(pv_irq_ops, restore_fl, NATIVE_RESTORE_FL); > +DEF_NATIVE(pv_irq_ops, irq_disable, NATIVE_IRQ_DISABLE); > +DEF_NATIVE(pv_irq_ops, irq_enable, NATIVE_IRQ_ENABLE); > +DEF_NATIVE(pv_mmu_ops, read_cr2, NATIVE_READ_CR2); > +DEF_NATIVE(pv_mmu_ops, read_cr3, NATIVE_READ_CR3); > +DEF_NATIVE(pv_mmu_ops, write_cr3, NATIVE_WRITE_CR3); > +DEF_NATIVE(pv_mmu_ops, flush_tlb_single, NATIVE_FLUSH_TLB_SINGLE); > > -DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); > -DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); > +DEF_NATIVE(pv_cpu_ops, usergs_sysret64, NATIVE_USERGS_SYSRET64); > +DEF_NATIVE(pv_cpu_ops, swapgs, NATIVE_SWAPGS); > > -DEF_NATIVE(, mov32, "mov %edi, %eax"); > -DEF_NATIVE(, mov64, "mov %rdi, %rax"); > +DEF_NATIVE(, mov32, NATIVE_IDENTITY_32); > +DEF_NATIVE(, mov64, NATIVE_IDENTITY); > > #if defined(CONFIG_PARAVIRT_SPINLOCKS) > -DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)"); > -DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %rax, %rax"); > +DEF_NATIVE(pv_lock_ops, queued_spin_unlock, NATIVE_QUEUED_SPIN_UNLOCK); > +DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, NATIVE_ZERO); > #endif > > unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) >
Juergen Gross
2017-Oct-25  10:03 UTC
[PATCH 04/13] x86/paravirt: Convert DEF_NATIVE macro to GCC extended asm syntax
On 04/10/17 17:58, Josh Poimboeuf wrote:> In a future patch, the NATIVE_* instruction string macros will be used > in GCC extended inline asm, which requires registers to have two '%' > instead of one in the asm template string. Convert the DEF_NATIVE macro > to the GCC extended asm syntax so the NATIVE_* macros can be shared more > broadly. > > Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>Reviewed-by: Juergen Gross <jgross at suse.com> Juergen
Juergen Gross
2017-Oct-25  10:32 UTC
[PATCH 05/13] x86/paravirt: Move paravirt asm macros to paravirt-asm.h
On 04/10/17 17:58, Josh Poimboeuf wrote:> The paravirt.h file is quite big and the asm interfaces for paravirt > don't need to be in the same file as the C interfaces. Move the asm > interfaces to a dedicated header file. > > Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>Reviewed-by: Juergen Gross <jgross at suse.com> Juergen
On 04/10/17 17:58, Josh Poimboeuf wrote:> Some cleanup to make the code easier to read and understand: > > - Use the common "PV_" prefix > - Simplify the PV_SITE macro interface > - Improve whitespace > > Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>Reviewed-by: Juergen Gross <jgross at suse.com> Juergen
On 04/10/17 17:58, Josh Poimboeuf wrote:> Remove the inline asm duplication in ____PVOP_CALL(). > > Also add 'IS_ENABLED(CONFIG_X86_32)' to the return variable logic, > making the code clearer and rendering the comment unnecessary. > > Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>Reviewed-by: Juergen Gross <jgross at suse.com> Juergen
On 04/10/17 17:58, Josh Poimboeuf wrote:> Make paravirt_types.h more understandable: > > - Use more consistent and logical naming > - Simplify interfaces > - Put related macros together > - Improve whitespace > > Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>Reviewed-by: Juergen Gross <jgross at suse.com> Juergen
Juergen Gross
2017-Oct-25  11:14 UTC
[PATCH 09/13] x86/asm: Convert ALTERNATIVE*() assembler macros to preprocessor macros
On 04/10/17 17:58, Josh Poimboeuf wrote:> The ALTERNATIVE() and ALTERNATIVE_2() macros are GNU assembler macros, > which makes them quite inflexible for future changes. Convert them to > preprocessor macros. > > Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com>Reviewed-by: Juergen Gross <jgross at suse.com> Juergen
Juergen Gross
2017-Oct-25  11:25 UTC
[PATCH 10/13] x86/alternative: Support indirect call replacement
On 04/10/17 17:58, Josh Poimboeuf wrote:> Add alternative patching support for replacing an instruction with an > indirect call. This will be needed for the paravirt alternatives. > > Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com> > --- > arch/x86/kernel/alternative.c | 22 +++++++++++++++------- > 1 file changed, 15 insertions(+), 7 deletions(-) > > diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c > index 3344d3382e91..81c577c7deba 100644 > --- a/arch/x86/kernel/alternative.c > +++ b/arch/x86/kernel/alternative.c > @@ -410,20 +410,28 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, > insnbuf_sz = a->replacementlen; > > /* > - * 0xe8 is a relative jump; fix the offset. > - * > - * Instruction length is checked before the opcode to avoid > - * accessing uninitialized bytes for zero-length replacements. > + * Fix the address offsets for call and jump instructions which > + * use PC-relative addressing. > */ > if (a->replacementlen == 5 && *insnbuf == 0xe8) { > + /* direct call */ > *(s32 *)(insnbuf + 1) += replacement - instr; > - DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx", > + DPRINTK("Fix direct CALL offset: 0x%x, CALL 0x%lx", > *(s32 *)(insnbuf + 1), > (unsigned long)instr + *(s32 *)(insnbuf + 1) + 5); > - } > > - if (a->replacementlen && is_jmp(replacement[0])) > + } else if (a->replacementlen == 6 && *insnbuf == 0xff && > + *(insnbuf+1) == 0x15) { > + /* indirect call */ > + *(s32 *)(insnbuf + 2) += replacement - instr; > + DPRINTK("Fix indirect CALL offset: 0x%x, CALL *0x%lx", > + *(s32 *)(insnbuf + 2), > + (unsigned long)instr + *(s32 *)(insnbuf + 2) + 6); > + > + } else if (a->replacementlen && is_jmp(replacement[0])) {Is this correct? Without your patch this was: if (*insnbuf == 0xe8) ... if (is_jmp(replacement[0])) ... Now you have: if (*insnbuf == 0xe8) ... else if (*insnbuf == 0xff15) ... else if (is_jmp(replacement[0])) ... So only one or none of the three variants will be executed. In the past it could be none, one or both. Juergen
Borislav Petkov
2017-Nov-17  14:39 UTC
[PATCH 01/13] x86/paravirt: remove wbinvd() paravirt interface
On Wed, Oct 04, 2017 at 10:58:22AM -0500, Josh Poimboeuf wrote:> Since lguest was removed, only the native version of wbinvd() is used. > The paravirt interface is no longer needed. > > Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com> > --- > arch/x86/include/asm/paravirt.h | 5 ----- > arch/x86/include/asm/paravirt_types.h | 1 - > arch/x86/include/asm/special_insns.h | 7 +------ > arch/x86/kernel/paravirt.c | 1 - > arch/x86/kernel/paravirt_patch_64.c | 2 -- > arch/x86/xen/enlighten_pv.c | 2 -- > 6 files changed, 1 insertion(+), 17 deletions(-)Reviewed-by: Borislav Petkov <bp at suse.de> -- Regards/Gruss, Boris. Good mailing practices for 400: avoid top-posting and trim the reply.
Borislav Petkov
2017-Nov-17  18:07 UTC
[PATCH 03/13] x86/paravirt: Convert native patch assembly code strings to macros
On Wed, Oct 04, 2017 at 10:58:24AM -0500, Josh Poimboeuf wrote:> Convert the hard-coded native patch assembly code strings to macros to > facilitate sharing common code between 32-bit and 64-bit. > > These macros will also be used by a future patch which requires the GCC > extended asm syntax of two '%' characters instead of one when specifying > a register name. > > Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com> > --- > arch/x86/include/asm/special_insns.h | 24 ++++++++++++++++++++++++ > arch/x86/kernel/paravirt_patch_32.c | 21 +++++++++++---------- > arch/x86/kernel/paravirt_patch_64.c | 29 +++++++++++++++-------------- > 3 files changed, 50 insertions(+), 24 deletions(-) > > diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h > index ac402c6fc24b..0549c5f2c1b3 100644 > --- a/arch/x86/include/asm/special_insns.h > +++ b/arch/x86/include/asm/special_insns.h > @@ -6,6 +6,30 @@ > > #include <asm/nops.h> > > +#ifdef CONFIG_X86_64 > +# define _REG_ARG1 "%rdi" > +# define NATIVE_IDENTITY_32 "mov %edi, %eax"Yeah, that "identity" looks strange. How about NATIVE_NOOP and NATIVE_NOOP_32 ?> +# define NATIVE_USERGS_SYSRET64 "swapgs; sysretq" > +#else > +# define _REG_ARG1 "%eax" > +#endif > + > +#define _REG_RET "%" _ASM_AX > + > +#define NATIVE_ZERO "xor " _REG_ARG1 ", " _REG_ARG1NATIVE_ZERO_OUT I guess. NATIVE_ZERO reads like the native representation of 0 :-) ... -- Regards/Gruss, Boris. Good mailing practices for 400: avoid top-posting and trim the reply.
H. Peter Anvin
2017-Nov-17  19:52 UTC
[PATCH 10/13] x86/alternative: Support indirect call replacement
On 10/04/17 08:58, Josh Poimboeuf wrote:> Add alternative patching support for replacing an instruction with an > indirect call. This will be needed for the paravirt alternatives.I have a patchset that generalizes the alternatives in what I think is a more robust way. I really, really want to get rid of these hacks. Let me clean it up an post it... -hpa
Borislav Petkov
2017-Nov-22  16:35 UTC
[PATCH 07/13] x86/paravirt: Simplify ____PVOP_CALL()
On Wed, Oct 04, 2017 at 10:58:28AM -0500, Josh Poimboeuf wrote:> Remove the inline asm duplication in ____PVOP_CALL(). > > Also add 'IS_ENABLED(CONFIG_X86_32)' to the return variable logic, > making the code clearer and rendering the comment unnecessary. > > Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com> > --- > arch/x86/include/asm/paravirt_types.h | 36 +++++++++++++---------------------- > 1 file changed, 13 insertions(+), 23 deletions(-) > > diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h > index ab7aabe6b668..01f9e10983c1 100644 > --- a/arch/x86/include/asm/paravirt_types.h > +++ b/arch/x86/include/asm/paravirt_types.h > @@ -529,29 +529,19 @@ int paravirt_disable_iospace(void); > rettype __ret; \ > PVOP_CALL_ARGS; \ > PVOP_TEST_NULL(op); \Newline here...> - /* This is 32-bit specific, but is okay in 64-bit */ \ > - /* since this condition will never hold */ \ > - if (sizeof(rettype) > sizeof(unsigned long)) { \ > - asm volatile(pre \ > - paravirt_alt(PARAVIRT_CALL) \ > - post \ > - : call_clbr, ASM_CALL_CONSTRAINT \ > - : paravirt_type(op), \ > - paravirt_clobber(clbr), \ > - ##__VA_ARGS__ \ > - : "memory", "cc" extra_clbr); \ > - __ret = (rettype)((((u64)__edx) << 32) | __eax); \ > - } else { \ > - asm volatile(pre \ > - paravirt_alt(PARAVIRT_CALL) \ > - post \ > - : call_clbr, ASM_CALL_CONSTRAINT \ > - : paravirt_type(op), \ > - paravirt_clobber(clbr), \ > - ##__VA_ARGS__ \ > - : "memory", "cc" extra_clbr); \ > - __ret = (rettype)(__eax & PVOP_RETMASK(rettype)); \ > - } \ > + asm volatile(pre \ > + paravirt_alt(PARAVIRT_CALL) \ > + post \ > + : call_clbr, ASM_CALL_CONSTRAINT \ > + : paravirt_type(op), \ > + paravirt_clobber(clbr), \ > + ##__VA_ARGS__ \ > + : "memory", "cc" extra_clbr); \... and here goes a long way towards readability. :)> + if (IS_ENABLED(CONFIG_X86_32) && \ > + sizeof(rettype) > sizeof(unsigned long)) \ > + __ret = (rettype)((((u64)__edx) << 32) | __eax);\ > + else \ > + __ret = (rettype)(__eax & PVOP_RETMASK(rettype));\ > __ret; \ > })-- Regards/Gruss, Boris. Good mailing practices for 400: avoid top-posting and trim the reply.
Borislav Petkov
2017-Nov-22  20:46 UTC
[PATCH 08/13] x86/paravirt: Clean up paravirt_types.h
On Wed, Oct 04, 2017 at 10:58:29AM -0500, Josh Poimboeuf wrote:> Make paravirt_types.h more understandable: > > - Use more consistent and logical naming > - Simplify interfaces > - Put related macros together > - Improve whitespace > > Signed-off-by: Josh Poimboeuf <jpoimboe at redhat.com> > --- > arch/x86/include/asm/paravirt_types.h | 104 ++++++++++++++++++---------------- > 1 file changed, 54 insertions(+), 50 deletions(-) > > diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h > index 01f9e10983c1..5656aea79412 100644 > --- a/arch/x86/include/asm/paravirt_types.h > +++ b/arch/x86/include/asm/paravirt_types.h...> @@ -388,13 +361,46 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, > > int paravirt_disable_iospace(void); > > + > /* > - * This generates an indirect call based on the operation type number. > - * The type number, computed in PARAVIRT_PATCH, is derived from the > - * offset into the paravirt_patch_template structure, and can therefore be > - * freely converted back into a structure offset. > + * Generate some code, and mark it as patchable by apply_paravirt(). > */ > -#define PARAVIRT_CALL "call *%c[paravirt_opptr];" > +#define _PV_SITE(insn_string, type, clobber) \ > + "771:\n\t" insn_string "\n" "772:\n" \You can merge the last two strings into one. Also, s/insn_string/insns/ so that it is the same as in paravirt-asm.h PV_SITE definition. Ditto for s/clobber/clobbers/ I.e., in general, have the same parameter names in the respective macros so that they can be parsed visually quickly and reader can say, aha, I see, that's that param from the asm version of the macro.> + ".pushsection .parainstructions,\"a\"\n" \ > + _ASM_ALIGN "\n" \ > + _ASM_PTR " 771b\n" \ > + " .byte " type "\n" \ > + " .byte 772b-771b\n" \ > + " .short " clobber "\n" \ > + ".popsection\n" > + > +#define PARAVIRT_PATCH(x) \Btw, that macro's name doesn't tell me anything: it should be PV_INSN_TYPE or so.> + (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) > + > +#define PV_STRINGIFY(constraint) "%c[" __stringify(constraint) "]" > + > +#define PV_CALL_CONSTRAINT pv_op_ptr > +#define PV_TYPE_CONSTRAINT pv_typenum > +#define PV_CLBR_CONSTRAINT pv_clobber > + > +#define PV_CALL_CONSTRAINT_STR PV_STRINGIFY(PV_CALL_CONSTRAINT) > +#define PV_TYPE_CONSTRAINT_STR PV_STRINGIFY(PV_TYPE_CONSTRAINT) > +#define PV_CLBR_CONSTRAINT_STR PV_STRINGIFY(PV_CLBR_CONSTRAINT) > + > +#define PV_CALL_STR "call *" PV_CALL_CONSTRAINT_STR ";" > + > +#define PV_INPUT_CONSTRAINTS(op, clobber) \ > + [PV_TYPE_CONSTRAINT] "i" (PARAVIRT_PATCH(op)), \ > + [PV_CALL_CONSTRAINT] "i" (&(op)), \ > + [PV_CLBR_CONSTRAINT] "i" (clobber) > + > +#define PV_SITE(insn_string) \ > + _PV_SITE(insn_string, PV_TYPE_CONSTRAINT_STR, PV_CLBR_CONSTRAINT_STR) > + > +#define PV_ALT_SITE(oldinstr, newinstr) \ > + _PV_ALT_SITE(oldinstr, newinstr, PV_TYPE_CONSTRAINT_STR, \ > + PV_CLBR_CONSTRAINT_STR) > > /* > * These macros are intended to wrap calls through one of the paravirt > @@ -525,25 +531,24 @@ int paravirt_disable_iospace(void); > > #define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \ > pre, post, ...) \ > - ({ \ > - rettype __ret; \ > - PVOP_CALL_ARGS; \ > - PVOP_TEST_NULL(op); \ > +({ \ > + rettype __ret; \ > + PVOP_CALL_ARGS; \ > + PVOP_TEST_NULL(op); \ > asm volatile(pre \ > - paravirt_alt(PARAVIRT_CALL) \ > + PV_SITE(PV_CALL_STR) \ > post \ > : call_clbr, ASM_CALL_CONSTRAINT \ > - : paravirt_type(op), \ > - paravirt_clobber(clbr), \ > + : PV_INPUT_CONSTRAINTS(op, clbr), \ > ##__VA_ARGS__ \ > : "memory", "cc" extra_clbr); \ > - if (IS_ENABLED(CONFIG_X86_32) && \ > - sizeof(rettype) > sizeof(unsigned long)) \ > - __ret = (rettype)((((u64)__edx) << 32) | __eax);\ > - else \ > - __ret = (rettype)(__eax & PVOP_RETMASK(rettype));\ > - __ret; \ > - })<---- newline here.> + if (IS_ENABLED(CONFIG_X86_32) && \ > + sizeof(rettype) > sizeof(unsigned long)) \ > + __ret = (rettype)((((u64)__edx) << 32) | __eax); \ > + else \ > + __ret = (rettype)(__eax & PVOP_RETMASK(rettype)); \ > + __ret; \ > +})-- Regards/Gruss, Boris. Good mailing practices for 400: avoid top-posting and trim the reply.
Apparently Analagous Threads
- [PATCH 03/13] x86/paravirt: Convert native patch assembly code strings to macros
- [PATCH 04/13] x86/paravirt: Convert DEF_NATIVE macro to GCC extended asm syntax
- [PATCH 03/13] x86/paravirt: Convert native patch assembly code strings to macros
- [PATCH 03/13] x86/paravirt: Convert native patch assembly code strings to macros
- [PATCH 03/13] x86/paravirt: Convert native patch assembly code strings to macros