thr3ads.net - Linux Virtualization - [PATCH v1 1/4] KVM/vmx: re-write the msr auto switch feature [Sep 2017]

If this information is useful, please help other people find it:
Share via:

Wei Wang

2017-Sep-25 04:44 UTC

[PATCH v1 0/4] Enable LBR for the guest

This patch series enables the Last Branch Recording feature for the
guest. Instead of trapping each LBR stack MSR access, the MSRs are
passthroughed to the guest. Those MSRs are switched (i.e. load and
saved) on VMExit and VMEntry.

Test:
Try "perf record -b ./test_program" on guest.

Wei Wang (4):
  KVM/vmx: re-write the msr auto switch feature
  KVM/vmx: auto switch MSR_IA32_DEBUGCTLMSR
  perf/x86: add a function to get the lbr stack
  KVM/vmx: enable lbr for the guest

 arch/x86/events/intel/lbr.c       |  23 +++++++
 arch/x86/include/asm/perf_event.h |  14 ++++
 arch/x86/kvm/vmx.c                | 135 +++++++++++++++++++++++++++++++++-----
 3 files changed, 154 insertions(+), 18 deletions(-)

-- 
2.7.4

Wei Wang

2017-Sep-25 04:44 UTC

head link

[PATCH v1 1/4] KVM/vmx: re-write the msr auto switch feature

This patch clarifies a vague statement in the SDM: the recommended maximum
number of MSRs that can be automically switched by CPU during VMExit and
VMEntry is 512, rather than 512 Bytes of MSRs.

Depending on the CPU implementations, it may also support more than 512
MSRs to be auto switched. This can be calculated by
(MSR_IA32_VMX_MISC[27:25] + 1) * 512.

Signed-off-by: Wei Wang <wei.w.wang at intel.com>
---
 arch/x86/kvm/vmx.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 63 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0726ca7..8434fc8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -158,6 +158,7 @@ module_param_named(preemption_timer,
enable_preemption_timer, bool, S_IRUGO);
 #define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
 #define KVM_VMX_DEFAULT_PLE_WINDOW_MAX    \
 		INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
+#define KVM_VMX_DEFAULT_MSR_AUTO_LOAD_COUNT 512
 
 static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
 module_param(ple_gap, int, S_IRUGO);
@@ -178,9 +179,10 @@ static int ple_window_actual_max =
KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
 static int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
 module_param(ple_window_max, int, S_IRUGO);
 
+static int msr_autoload_count_max = KVM_VMX_DEFAULT_MSR_AUTO_LOAD_COUNT;
+
 extern const ulong vmx_return;
 
-#define NR_AUTOLOAD_MSRS 8
 #define VMCS02_POOL_SIZE 1
 
 struct vmcs {
@@ -588,8 +590,8 @@ struct vcpu_vmx {
 	bool                  __launched; /* temporary, used in vmx_vcpu_run */
 	struct msr_autoload {
 		unsigned nr;
-		struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
-		struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
+		struct vmx_msr_entry *guest;
+		struct vmx_msr_entry *host;
 	} msr_autoload;
 	struct {
 		int           loaded;
@@ -1942,6 +1944,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx,
unsigned msr)
 	m->host[i] = m->host[m->nr];
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
+	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, m->nr);
 }
 
 static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
@@ -1997,7 +2000,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx,
unsigned msr,
 		if (m->guest[i].index == msr)
 			break;
 
-	if (i == NR_AUTOLOAD_MSRS) {
+	if (i == msr_autoload_count_max) {
 		printk_once(KERN_WARNING "Not enough msr switch entries. "
 				"Can't add msr %x\n", msr);
 		return;
@@ -2005,6 +2008,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx,
unsigned msr,
 		++m->nr;
 		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
 		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
+		vmcs_write32(VM_EXIT_MSR_STORE_COUNT, m->nr);
 	}
 
 	m->guest[i].index = msr;
@@ -5501,6 +5505,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
 	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
+	vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autoload.guest));
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
 	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
 
@@ -6670,6 +6675,21 @@ static void update_ple_window_actual_max(void)
 			                    ple_window_grow, INT_MIN);
 }
 
+static void update_msr_autoload_count_max(void)
+{
+	u64 vmx_msr;
+	int n;
+
+	/*
+	 * According to the Intel SDM, if Bits 27:25 of MSR_IA32_VMX_MISC is
+	 * n, then (n + 1) * 512 is the recommended max number of MSRs to be
+	 * included in the VMExit and VMEntry MSR auto switch list.
+	 */
+	rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
+	n = ((vmx_msr & 0xe000000) >> 25) + 1;
+	msr_autoload_count_max = n * KVM_VMX_DEFAULT_MSR_AUTO_LOAD_COUNT;
+}
+
 /*
  * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
  */
@@ -6837,6 +6857,7 @@ static __init int hardware_setup(void)
 		kvm_disable_tdp();
 
 	update_ple_window_actual_max();
+	update_msr_autoload_count_max();
 
 	/*
 	 * Only enable PML when hardware supports PML feature, and both EPT
@@ -9248,6 +9269,19 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
 	vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
 }
 
+/*
+ * Currently, the CPU does not support the auto save of MSRs on VMEntry, so we
+ * save the MSRs for the host before entering into guest.
+ */
+static void vmx_save_host_msrs(struct msr_autoload *m)
+
+{
+	u32 i;
+
+	for (i = 0; i < m->nr; i++)
+		m->host[i].value = __rdmsr(m->host[i].index);
+}
+
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -9304,6 +9338,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx_arm_hv_timer(vcpu);
 
 	vmx->__launched = vmx->loaded_vmcs->launched;
+
+	vmx_save_host_msrs(&vmx->msr_autoload);
 	asm(
 		/* Store host registers */
 		"push %%" _ASM_DX "; push %%" _ASM_BP ";"
@@ -9504,6 +9540,7 @@ static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	size_t bytes = msr_autoload_count_max * sizeof(struct vmx_msr_entry);
 
 	if (enable_pml)
 		vmx_destroy_pml_buffer(vmx);
@@ -9512,15 +9549,17 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 	vmx_free_vcpu_nested(vcpu);
 	free_loaded_vmcs(vmx->loaded_vmcs);
 	kfree(vmx->guest_msrs);
+	free_pages_exact(vmx->msr_autoload.host, bytes);
+	free_pages_exact(vmx->msr_autoload.guest, bytes);
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vmx);
 }
 
 static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 {
-	int err;
+	int err, cpu;
+	size_t bytes;
 	struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
-	int cpu;
 
 	if (!vmx)
 		return ERR_PTR(-ENOMEM);
@@ -9559,6 +9598,17 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm,
unsigned int id)
 		goto free_msrs;
 	loaded_vmcs_init(vmx->loaded_vmcs);
 
+	bytes = msr_autoload_count_max * sizeof(struct vmx_msr_entry);
+	vmx->msr_autoload.guest = alloc_pages_exact(bytes,
+						    GFP_KERNEL | __GFP_ZERO);
+	if (!vmx->msr_autoload.guest)
+		goto free_vmcs;
+
+	vmx->msr_autoload.host = alloc_pages_exact(bytes,
+						   GFP_KERNEL | __GFP_ZERO);
+	if (!vmx->msr_autoload.guest)
+		goto free_autoload_guest;
+
 	cpu = get_cpu();
 	vmx_vcpu_load(&vmx->vcpu, cpu);
 	vmx->vcpu.cpu = cpu;
@@ -9566,11 +9616,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm,
unsigned int id)
 	vmx_vcpu_put(&vmx->vcpu);
 	put_cpu();
 	if (err)
-		goto free_vmcs;
+		goto free_autoload_host;
 	if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
 		err = alloc_apic_access_page(kvm);
 		if (err)
-			goto free_vmcs;
+			goto free_autoload_host;
 	}
 
 	if (enable_ept) {
@@ -9579,7 +9629,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm,
unsigned int id)
 				VMX_EPT_IDENTITY_PAGETABLE_ADDR;
 		err = init_rmode_identity_map(kvm);
 		if (err)
-			goto free_vmcs;
+			goto free_autoload_host;
 	}
 
 	if (nested) {
@@ -9594,6 +9644,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm,
unsigned int id)
 
 	return &vmx->vcpu;
 
+free_autoload_host:
+	free_pages_exact(vmx->msr_autoload.host, bytes);
+free_autoload_guest:
+	free_pages_exact(vmx->msr_autoload.guest, bytes);
 free_vmcs:
 	free_vpid(vmx->nested.vpid02);
 	free_loaded_vmcs(vmx->loaded_vmcs);
-- 
2.7.4

Wei Wang

2017-Sep-25 04:44 UTC

head link

[PATCH v1 2/4] KVM/vmx: auto switch MSR_IA32_DEBUGCTLMSR

Passthrough the MSR_IA32_DEBUGCTLMSR to the guest, and take advantage of
the hardware VT-x feature to auto switch the msr upon VMExit and VMEntry.

Signed-off-by: Wei Wang <wei.w.wang at intel.com>
---
 arch/x86/kvm/vmx.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8434fc8..5f5c2f1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5502,13 +5502,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	if (cpu_has_vmx_vmfunc())
 		vmcs_write64(VM_FUNCTION_CONTROL, 0);
 
-	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
-	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
 	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
 	vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autoload.guest));
-	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
 	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
 
+	add_atomic_switch_msr(vmx, MSR_IA32_DEBUGCTLMSR, 0, 0);
+
 	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
 		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
 
@@ -6821,6 +6820,7 @@ static __init int hardware_setup(void)
 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_DEBUGCTLMSR, false);
 
 	memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
 			vmx_msr_bitmap_legacy, PAGE_SIZE);
@@ -9285,7 +9285,7 @@ static void vmx_save_host_msrs(struct msr_autoload *m)
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	unsigned long debugctlmsr, cr3, cr4;
+	unsigned long cr3, cr4;
 
 	/* Don't enter VMX if guest state is invalid, let the exit handler
 	   start emulation until we arrive back to a valid state */
@@ -9333,7 +9333,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		__write_pkru(vcpu->arch.pkru);
 
 	atomic_switch_perf_msrs(vmx);
-	debugctlmsr = get_debugctlmsr();
 
 	vmx_arm_hv_timer(vcpu);
 
@@ -9445,10 +9444,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
 	      );
 
-	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
-	if (debugctlmsr)
-		update_debugctlmsr(debugctlmsr);
-
 #ifndef CONFIG_X86_64
 	/*
 	 * The sysexit path does not restore ds/es, so we must set them to
-- 
2.7.4

Wei Wang

2017-Sep-25 04:44 UTC

head link

[PATCH v1 3/4] perf/x86: add a function to get the lbr stack

The LBR stack MSRs are architecturally specific. The perf subsystem has
already assigned the abstracted MSR values based on the CPU architecture.

This patch enables a caller outside the perf subsystem to get the LBR
stack info. This is useful for hyperviosrs to prepare the lbr feature
for the guest.

Signed-off-by: Wei Wang <wei.w.wang at intel.com>
---
 arch/x86/events/intel/lbr.c       | 23 +++++++++++++++++++++++
 arch/x86/include/asm/perf_event.h | 14 ++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 8a6bbac..ea547ec 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1230,3 +1230,26 @@ void intel_pmu_lbr_init_knl(void)
 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
 	x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
 }
+
+/**
+ * perf_get_lbr_stack - get the lbr stack related MSRs
+ *
+ * @stack: the caller's memory to get the lbr stack
+ *
+ * Returns: 0 indicates that the lbr stack has been successfully obtained.
+ */
+int perf_get_lbr_stack(struct perf_lbr_stack *stack)
+{
+	stack->lbr_nr = x86_pmu.lbr_nr;
+	stack->lbr_tos = x86_pmu.lbr_tos;
+	stack->lbr_from = x86_pmu.lbr_from;
+	stack->lbr_to = x86_pmu.lbr_to;
+
+	if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+		stack->lbr_info = MSR_LBR_INFO_0;
+	else
+		stack->lbr_info = 0;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(perf_get_lbr_stack);
diff --git a/arch/x86/include/asm/perf_event.h
b/arch/x86/include/asm/perf_event.h
index f353061..c098462 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -266,7 +266,16 @@ struct perf_guest_switch_msr {
 	u64 host, guest;
 };
 
+struct perf_lbr_stack {
+	int		lbr_nr;
+	unsigned long	lbr_tos;
+	unsigned long	lbr_from;
+	unsigned long	lbr_to;
+	unsigned long	lbr_info;
+};
+
 extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
+extern int perf_get_lbr_stack(struct perf_lbr_stack *stack);
 extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
 extern void perf_check_microcode(void);
 #else
@@ -276,6 +285,11 @@ static inline struct perf_guest_switch_msr
*perf_guest_get_msrs(int *nr)
 	return NULL;
 }
 
+static inline int perf_get_lbr_stack(struct perf_lbr_stack *stack)
+{
+	return -1;
+}
+
 static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
 {
 	memset(cap, 0, sizeof(*cap));
-- 
2.7.4

Wei Wang

2017-Sep-25 04:44 UTC

head link

[PATCH v1 4/4] KVM/vmx: enable lbr for the guest

Passthrough the LBR stack to the guest, and auto switch the stack MSRs
upon VMEntry and VMExit.

Signed-off-by: Wei Wang <wei.w.wang at intel.com>
---
 arch/x86/kvm/vmx.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5f5c2f1..35e02a7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -107,6 +107,9 @@ static u64 __read_mostly host_xss;
 static bool __read_mostly enable_pml = 1;
 module_param_named(pml, enable_pml, bool, S_IRUGO);
 
+static bool __read_mostly enable_lbrv = 1;
+module_param_named(lbrv, enable_lbrv, bool, 0444);
+
 #define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL
 
 /* Guest_tsc -> host_tsc conversion requires 64-bit division.  */
@@ -5428,6 +5431,25 @@ static void ept_set_mmio_spte_mask(void)
 				   VMX_EPT_MISCONFIG_WX_VALUE);
 }
 
+static void auto_switch_lbr_msrs(struct vcpu_vmx *vmx)
+{
+	int i;
+	struct perf_lbr_stack lbr_stack;
+
+	perf_get_lbr_stack(&lbr_stack);
+
+	add_atomic_switch_msr(vmx, MSR_LBR_SELECT, 0, 0);
+	add_atomic_switch_msr(vmx, lbr_stack.lbr_tos, 0, 0);
+
+	for (i = 0; i < lbr_stack.lbr_nr; i++) {
+		add_atomic_switch_msr(vmx, lbr_stack.lbr_from + i, 0, 0);
+		add_atomic_switch_msr(vmx, lbr_stack.lbr_to + i, 0, 0);
+		if (lbr_stack.lbr_info)
+			add_atomic_switch_msr(vmx, lbr_stack.lbr_info + i, 0,
+					      0);
+	}
+}
+
 #define VMX_XSS_EXIT_BITMAP 0
 /*
  * Sets up the vmcs for emulated real mode.
@@ -5508,6 +5530,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
 	add_atomic_switch_msr(vmx, MSR_IA32_DEBUGCTLMSR, 0, 0);
 
+	if (enable_lbrv)
+		auto_switch_lbr_msrs(vmx);
+
 	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
 		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
 
@@ -6721,6 +6746,28 @@ void vmx_enable_tdp(void)
 	kvm_enable_tdp();
 }
 
+static void vmx_passthrough_lbr_msrs(void)
+{
+	int i;
+	struct perf_lbr_stack lbr_stack;
+
+	if (perf_get_lbr_stack(&lbr_stack) < 0) {
+		enable_lbrv = false;
+		return;
+	}
+
+	vmx_disable_intercept_for_msr(MSR_LBR_SELECT, false);
+	vmx_disable_intercept_for_msr(lbr_stack.lbr_tos, false);
+
+	for (i = 0; i < lbr_stack.lbr_nr; i++) {
+		vmx_disable_intercept_for_msr(lbr_stack.lbr_from + i, false);
+		vmx_disable_intercept_for_msr(lbr_stack.lbr_to + i, false);
+		if (lbr_stack.lbr_info)
+			vmx_disable_intercept_for_msr(lbr_stack.lbr_info + i,
+						      false);
+	}
+}
+
 static __init int hardware_setup(void)
 {
 	int r = -ENOMEM, i, msr;
@@ -6822,6 +6869,9 @@ static __init int hardware_setup(void)
 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
 	vmx_disable_intercept_for_msr(MSR_IA32_DEBUGCTLMSR, false);
 
+	if (enable_lbrv)
+		vmx_passthrough_lbr_msrs();
+
 	memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
 			vmx_msr_bitmap_legacy, PAGE_SIZE);
 	memcpy(vmx_msr_bitmap_longmode_x2apic_apicv,
-- 
2.7.4

Paolo Bonzini

2017-Sep-25 09:16 UTC

head link

[PATCH v1 4/4] KVM/vmx: enable lbr for the guest

On 25/09/2017 06:44, Wei Wang wrote:> Passthrough the LBR stack to the guest, and auto switch the stack MSRs
> upon VMEntry and VMExit.
> 
> Signed-off-by: Wei Wang <wei.w.wang at intel.com>
This has to be enabled separately for each guest, because it may prevent
live migration to hosts with a different family/model.

Paolo
> ---
>  arch/x86/kvm/vmx.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 50 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 5f5c2f1..35e02a7 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -107,6 +107,9 @@ static u64 __read_mostly host_xss;
>  static bool __read_mostly enable_pml = 1;
>  module_param_named(pml, enable_pml, bool, S_IRUGO);
>  
> +static bool __read_mostly enable_lbrv = 1;
> +module_param_named(lbrv, enable_lbrv, bool, 0444);
> +
>  #define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL
>  
>  /* Guest_tsc -> host_tsc conversion requires 64-bit division.  */
> @@ -5428,6 +5431,25 @@ static void ept_set_mmio_spte_mask(void)
>  				   VMX_EPT_MISCONFIG_WX_VALUE);
>  }
>  
> +static void auto_switch_lbr_msrs(struct vcpu_vmx *vmx)
> +{
> +	int i;
> +	struct perf_lbr_stack lbr_stack;
> +
> +	perf_get_lbr_stack(&lbr_stack);
> +
> +	add_atomic_switch_msr(vmx, MSR_LBR_SELECT, 0, 0);
> +	add_atomic_switch_msr(vmx, lbr_stack.lbr_tos, 0, 0);
> +
> +	for (i = 0; i < lbr_stack.lbr_nr; i++) {
> +		add_atomic_switch_msr(vmx, lbr_stack.lbr_from + i, 0, 0);
> +		add_atomic_switch_msr(vmx, lbr_stack.lbr_to + i, 0, 0);
> +		if (lbr_stack.lbr_info)
> +			add_atomic_switch_msr(vmx, lbr_stack.lbr_info + i, 0,
> +					      0);
> +	}
> +}
> +
>  #define VMX_XSS_EXIT_BITMAP 0
>  /*
>   * Sets up the vmcs for emulated real mode.
> @@ -5508,6 +5530,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>  
>  	add_atomic_switch_msr(vmx, MSR_IA32_DEBUGCTLMSR, 0, 0);
>  
> +	if (enable_lbrv)
> +		auto_switch_lbr_msrs(vmx);
> +
>  	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
>  		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
>  
> @@ -6721,6 +6746,28 @@ void vmx_enable_tdp(void)
>  	kvm_enable_tdp();
>  }
>  
> +static void vmx_passthrough_lbr_msrs(void)
> +{
> +	int i;
> +	struct perf_lbr_stack lbr_stack;
> +
> +	if (perf_get_lbr_stack(&lbr_stack) < 0) {
> +		enable_lbrv = false;
> +		return;
> +	}
> +
> +	vmx_disable_intercept_for_msr(MSR_LBR_SELECT, false);
> +	vmx_disable_intercept_for_msr(lbr_stack.lbr_tos, false);
> +
> +	for (i = 0; i < lbr_stack.lbr_nr; i++) {
> +		vmx_disable_intercept_for_msr(lbr_stack.lbr_from + i, false);
> +		vmx_disable_intercept_for_msr(lbr_stack.lbr_to + i, false);
> +		if (lbr_stack.lbr_info)
> +			vmx_disable_intercept_for_msr(lbr_stack.lbr_info + i,
> +						      false);
> +	}
> +}
> +
>  static __init int hardware_setup(void)
>  {
>  	int r = -ENOMEM, i, msr;
> @@ -6822,6 +6869,9 @@ static __init int hardware_setup(void)
>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
>  	vmx_disable_intercept_for_msr(MSR_IA32_DEBUGCTLMSR, false);
>  
> +	if (enable_lbrv)
> +		vmx_passthrough_lbr_msrs();
> +
>  	memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
>  			vmx_msr_bitmap_legacy, PAGE_SIZE);
>  	memcpy(vmx_msr_bitmap_longmode_x2apic_apicv,
>

Paolo Bonzini

2017-Sep-25 11:54 UTC

head link

[PATCH v1 1/4] KVM/vmx: re-write the msr auto switch feature

On 25/09/2017 06:44, Wei Wang wrote:>  
> +static void update_msr_autoload_count_max(void)
> +{
> +	u64 vmx_msr;
> +	int n;
> +
> +	/*
> +	 * According to the Intel SDM, if Bits 27:25 of MSR_IA32_VMX_MISC is
> +	 * n, then (n + 1) * 512 is the recommended max number of MSRs to be
> +	 * included in the VMExit and VMEntry MSR auto switch list.
> +	 */
> +	rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
> +	n = ((vmx_msr & 0xe000000) >> 25) + 1;
> +	msr_autoload_count_max = n * KVM_VMX_DEFAULT_MSR_AUTO_LOAD_COUNT;
> +}
> +

Any reasons to do this if it's unlikely that we'll ever update more than
512 MSRs?

Paolo

Paolo Bonzini

2017-Sep-25 11:57 UTC

head link

[PATCH v1 2/4] KVM/vmx: auto switch MSR_IA32_DEBUGCTLMSR

On 25/09/2017 06:44, Wei Wang wrote:> Passthrough the MSR_IA32_DEBUGCTLMSR to the guest, and take advantage of
> the hardware VT-x feature to auto switch the msr upon VMExit and VMEntry.
I think most bits in the MSR should not be passed through (for example
FREEZE_WHILE_SMM_EN, FREEZE_LBRS_ON_PMI etc.).  Using auto-switch of
course is fine instead.

Paolo

Andi Kleen

2017-Sep-25 14:57 UTC

head link

[PATCH v1 4/4] KVM/vmx: enable lbr for the guest

> +static void auto_switch_lbr_msrs(struct vcpu_vmx *vmx)
> +{
> +	int i;
> +	struct perf_lbr_stack lbr_stack;
> +
> +	perf_get_lbr_stack(&lbr_stack);
> +
> +	add_atomic_switch_msr(vmx, MSR_LBR_SELECT, 0, 0);
> +	add_atomic_switch_msr(vmx, lbr_stack.lbr_tos, 0, 0);
> +
> +	for (i = 0; i < lbr_stack.lbr_nr; i++) {
> +		add_atomic_switch_msr(vmx, lbr_stack.lbr_from + i, 0, 0);
> +		add_atomic_switch_msr(vmx, lbr_stack.lbr_to + i, 0, 0);
> +		if (lbr_stack.lbr_info)
> +			add_atomic_switch_msr(vmx, lbr_stack.lbr_info + i, 0,
> +					      0);
> +	}
That will be really expensive and add a lot of overhead to every entry/exit.
perf can already context switch the LBRs on task context switch. With that
you can just switch LBR_SELECT, which is *much* cheaper because there
are far less context switches than exit/entries.

It implies that when KVM is running it needs to prevent perf from enabling
LBRs in the context of KVM, but that should be straight forward.

-Andi

Andi Kleen

2017-Sep-25 14:59 UTC

head link

[PATCH v1 0/4] Enable LBR for the guest

On Mon, Sep 25, 2017 at 12:44:52PM +0800, Wei Wang
wrote:> This patch series enables the Last Branch Recording feature for the
> guest. Instead of trapping each LBR stack MSR access, the MSRs are
> passthroughed to the guest. Those MSRs are switched (i.e. load and
> saved) on VMExit and VMEntry.
> 
> Test:
> Try "perf record -b ./test_program" on guest.
I don't see where you expose the PERF capabilities MSR? 

That's normally needed for LBR too to report the version
number.

-Andi

Wei Wang

2017-Sep-26 08:47 UTC

head link

[PATCH v1 0/4] Enable LBR for the guest

On 09/25/2017 10:59 PM, Andi Kleen wrote:> On Mon, Sep 25, 2017 at 12:44:52PM +0800, Wei Wang wrote:
>> This patch series enables the Last Branch Recording feature for the
>> guest. Instead of trapping each LBR stack MSR access, the MSRs are
>> passthroughed to the guest. Those MSRs are switched (i.e. load and
>> saved) on VMExit and VMEntry.
>>
>> Test:
>> Try "perf record -b ./test_program" on guest.
> I don't see where you expose the PERF capabilities MSR?
>
> That's normally needed for LBR too to report the version
> number.
>
It was missed, thanks for pointing it out. I also found KVM/QEMU doesn't
expose CPUID.PDCM, will add that too.

Since for now we are enabling LBR, I plan to expose only "PERF_CAP &
0x3f"
to the guest, which reports the LBR format only.

On the other side, it seems that the (guest) kernel driver also works 
without
the above being supported, should we change it to report error and stop
using the PMU features when the check of the above two fails (at 
intel_pmu_init())?

Best,
Wei

Reasonably Related Threads

Search for more reasonably related threads

Linux Virtualization - Sep 2017 - [PATCH v1 1/4] KVM/vmx: re-write the msr auto switch feature

[PATCH v1 0/4] Enable LBR for the guest

[PATCH v1 1/4] KVM/vmx: re-write the msr auto switch feature

[PATCH v1 2/4] KVM/vmx: auto switch MSR_IA32_DEBUGCTLMSR

[PATCH v1 3/4] perf/x86: add a function to get the lbr stack

[PATCH v1 4/4] KVM/vmx: enable lbr for the guest

[PATCH v1 4/4] KVM/vmx: enable lbr for the guest

[PATCH v1 1/4] KVM/vmx: re-write the msr auto switch feature

[PATCH v1 2/4] KVM/vmx: auto switch MSR_IA32_DEBUGCTLMSR

[PATCH v1 4/4] KVM/vmx: enable lbr for the guest

[PATCH v1 0/4] Enable LBR for the guest

[PATCH v1 0/4] Enable LBR for the guest

Reasonably Related Threads